commit 5e601d04015cc5c502053468535f5ac4116cfaa2 Author: Yaossg Date: Sat Jan 18 21:09:52 2025 +0800 initial commit diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..8edfb9b --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.pbxproj binary merge=union diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7f38173 --- /dev/null +++ b/.gitignore @@ -0,0 +1,48 @@ +*~ +*.dSYM +.DS_Store +tags +*-debug +*-s +*-l +hisat2.xcodeproj/project.xcworkspace +hisat2.xcodeproj/xcuserdata +hisat2.xcodeproj/xcshareddata +*.patch + +build_automaton +build_index +clean_alignment +determinize +gcsa_alignment +gcsa_test +hisat2-repeat + +hisat2_test/*.bt2 +hisat2_test/*.ht2 +hisat2_test/*.sam +hisat2_test/paper_example.malignment.automaton +hisat2_test/paper_example.malignment.backbone +hisat2_test/paper_example.malignment.gcsa +hisat2_test/kim_example*.malignment.automaton +hisat2_test/kim_example*.malignment.backbone +hisat2_test/kim_example*.malignment.gcsa +hisat2_test/genome* +hisat2_test/2* +hisat2_test/snp142* +hisat2_test/testset* + +.idea +.vscode + +.ht2lib-obj* +*.a +*.so +docs/_site +docs/*.lock +docs/.*-cache +*.tar.gz +*.ipynb +*.pyc + +cmake* \ No newline at end of file diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..d22b8b2 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,29 @@ +Ben Langmead wrote Bowtie 2, which is based partially on +Bowtie. Bowtie was written by Ben Langmead and Cole Trapnell. + + Bowtie & Bowtie 2: http://bowtie-bio.sf.net + +A DLL from the pthreads for Win32 library is distributed with the Win32 version +of Bowtie 2. The pthreads for Win32 library and the GnuWin32 package have many +contributors (see their respective web sites). + + pthreads for Win32: http://sourceware.org/pthreads-win32 + GnuWin32: http://gnuwin32.sf.net + +The ForkManager.pm perl module is used in Bowtie 2's random testing framework, +and is included as scripts/sim/contrib/ForkManager.pm. ForkManager.pm is +written by dLux (Szabo, Balazs), with contributions by others. See the perldoc +in ForkManager.pm for the complete list. + +The file ls.h includes an implementation of the Larsson-Sadakane suffix sorting +algorithm. The implementation is by N. Jesper Larsson and was adapted somewhat +for use in Bowtie 2. + +TinyThreads is a portable thread implementation with a fairly compatible subset +of C++11 thread management classes written by Marcus Geelnard. For more info +check http://tinythreadpp.bitsnbites.eu/ + +Various users have kindly supplied patches, bug reports and feature requests +over the years. Many, many thanks go to them. + +September 2011 diff --git a/HISAT2-genotype.png b/HISAT2-genotype.png new file mode 100644 index 0000000..1327c4c Binary files /dev/null and b/HISAT2-genotype.png differ diff --git a/HISAT2_VERSION b/HISAT2_VERSION new file mode 100644 index 0000000..ddd34cc --- /dev/null +++ b/HISAT2_VERSION @@ -0,0 +1 @@ +2.2.1-3n-0.0.3 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..94a9ed0 --- /dev/null +++ b/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/MANUAL b/MANUAL new file mode 100644 index 0000000..56eb557 --- /dev/null +++ b/MANUAL @@ -0,0 +1,1467 @@ + +Introduction +============ + +What is HISAT2? +----------------- + +HISAT2 is a fast and sensitive alignment program for mapping next-generation sequencing reads +(whole-genome, transcriptome, and exome sequencing data) against the general human population +(as well as against a single reference genome). Based on [GCSA] (an extension of [BWT] for a graph), we designed and implemented a graph FM index (GFM), +an original approach and its first implementation to the best of our knowledge. +In addition to using one global GFM index that represents general population, +HISAT2 uses a large set of small GFM indexes that collectively cover the whole genome +(each index representing a genomic region of 56 Kbp, with 55,000 indexes needed to cover human population). +These small indexes (called local indexes) combined with several alignment strategies enable effective alignment of sequencing reads. +This new indexing scheme is called Hierarchical Graph FM index (HGFM). +We have developed HISAT 2 based on the [HISAT] and [Bowtie2] implementations. +HISAT2 outputs alignments in [SAM] format, enabling interoperation with a large number of other tools (e.g. [SAMtools], [GATK]) that use SAM. +HISAT2 is distributed under the [GPLv3 license], and it runs on the command line under +Linux, Mac OS X and Windows. + +[HISAT2]: http://ccb.jhu.edu/software/hisat2 +[HISAT]: http://ccb.jhu.edu/software/hisat +[Bowtie2]: http://bowtie-bio.sf.net/bowtie2 +[Bowtie]: http://bowtie-bio.sf.net +[Bowtie1]: http://bowtie-bio.sf.net +[GCSA]: http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=6698337&tag=1 +[Burrows-Wheeler Transform]: http://en.wikipedia.org/wiki/Burrows-Wheeler_transform +[BWT]: http://en.wikipedia.org/wiki/Burrows-Wheeler_transform +[FM Index]: http://en.wikipedia.org/wiki/FM-index +[SAM]: http://samtools.sourceforge.net/SAM1.pdf +[SAMtools]: http://samtools.sourceforge.net +[GATK]: http://www.broadinstitute.org/gsa/wiki/index.php/The_Genome_Analysis_Toolkit +[TopHat2]: http://ccb.jhu.edu/software/tophat +[Cufflinks]: http://cufflinks.cbcb.umd.edu/ +[Crossbow]: http://bowtie-bio.sf.net/crossbow +[Myrna]: http://bowtie-bio.sf.net/myrna +[Bowtie paper]: http://genomebiology.com/2009/10/3/R25 +[GPLv3 license]: http://www.gnu.org/licenses/gpl-3.0.html + +Obtaining HISAT2 +================== + +Download HISAT2 sources and binaries from the Releases sections on the right side. +Binaries are available for Intel architectures (`x86_64`) running Linux, and Mac OS X. + +Building from source +-------------------- + +Building HISAT2 from source requires a GNU-like environment with GCC, GNU Make +and other basics. It should be possible to build HISAT2 on most vanilla Linux +installations or on a Mac installation with [Xcode] installed. HISAT2 can +also be built on Windows using [Cygwin] or [MinGW] (MinGW recommended). For a +MinGW build the choice of what compiler is to be used is important since this +will determine if a 32 or 64 bit code can be successfully compiled using it. If +there is a need to generate both 32 and 64 bit on the same machine then a multilib +MinGW has to be properly installed. [MSYS], the [zlib] library, and depending on +architecture [pthreads] library are also required. We are recommending a 64 bit +build since it has some clear advantages in real life research problems. In order +to simplify the MinGW setup it might be worth investigating popular MinGW personal +builds since these are coming already prepared with most of the toolchains needed. + +First, download the [source package] from the Releases section on the right side. +Unzip the file, change to the unzipped directory, and build the +HISAT2 tools by running GNU `make` (usually with the command `make`, but +sometimes with `gmake`) with no arguments. If building with MinGW, run `make` +from the MSYS environment. + +HISAT2 is using the multithreading software model in order to speed up +execution times on SMP architectures where this is possible. On POSIX +platforms (like linux, Mac OS, etc) it needs the pthread library. Although +it is possible to use pthread library on non-POSIX platform like Windows, due +to performance reasons HISAT2 will try to use Windows native multithreading +if possible. + +For the support of SRA data access in HISAT2, please download and install the [NCBI-NGS] toolkit. +When running `make`, specify additional variables as follow. +`make USE_SRA=1 NCBI_NGS_DIR=/path/to/NCBI-NGS-directory NCBI_VDB_DIR=/path/to/NCBI-NGS-directory`, +where `NCBI_NGS_DIR` and `NCBI_VDB_DIR` will be used in Makefile for -I and -L compilation options. +For example, $(NCBI_NGS_DIR)/include and $(NCBI_NGS_DIR)/lib64 will be used. + +[Cygwin]: http://www.cygwin.com/ +[MinGW]: http://www.mingw.org/ +[MSYS]: http://www.mingw.org/wiki/msys +[zlib]: http://cygwin.com/packages/mingw-zlib/ +[pthreads]: http://sourceware.org/pthreads-win32/ +[GnuWin32]: http://gnuwin32.sf.net/packages/coreutils.htm +[Download]: https://sourceforge.net/projects/bowtie-bio/files/bowtie2/ +[sourceforge site]: https://sourceforge.net/projects/bowtie-bio/files/bowtie2/ +[source package]: http://ccb.jhu.edu/software/hisat2/downloads/hisat2-2.0.0-beta-source.zip +[Xcode]: http://developer.apple.com/xcode/ +[NCBI-NGS]: https://github.com/ncbi/ngs/wiki/Downloads + +Running HISAT2 +============= + +Adding to PATH +-------------- + +By adding your new HISAT2 directory to your [PATH environment variable], you +ensure that whenever you run `hisat2`, `hisat2-build` or `hisat2-inspect` +from the command line, you will get the version you just installed without +having to specify the entire path. This is recommended for most users. To do +this, follow your operating system's instructions for adding the directory to +your [PATH]. + +If you would like to install HISAT2 by copying the HISAT2 executable files +to an existing directory in your [PATH], make sure that you copy all the +executables, including `hisat2`, `hisat2-align-s`, `hisat2-align-l`, `hisat2-build`, `hisat2-build-s`, `hisat2-build-l`, `hisat2-inspect`, `hisat2-inspect-s` and +`hisat2-inspect-l`. + +[PATH environment variable]: http://en.wikipedia.org/wiki/PATH_(variable) +[PATH]: http://en.wikipedia.org/wiki/PATH_(variable) + +Reporting +--------- + +The reporting mode governs how many alignments HISAT2 looks for, and how to +report them. + +In general, when we say that a read has an alignment, we mean that it has a +[valid alignment]. When we say that a read has multiple alignments, we mean +that it has multiple alignments that are valid and distinct from one another. + +By default, HISAT2 may soft-clip reads near their 5' and 3' ends. Users can control this behavior by setting different penalties for soft-clipping (`--sp`) or by disallowing soft-clipping (`--no-softclip`). + +### Distinct alignments map a read to different places + +Two alignments for the same individual read are "distinct" if they map the same +read to different places. Specifically, we say that two alignments are distinct +if there are no alignment positions where a particular read offset is aligned +opposite a particular reference offset in both alignments with the same +orientation. E.g. if the first alignment is in the forward orientation and +aligns the read character at read offset 10 to the reference character at +chromosome 3, offset 3,445,245, and the second alignment is also in the forward +orientation and also aligns the read character at read offset 10 to the +reference character at chromosome 3, offset 3,445,245, they are not distinct +alignments. + +Two alignments for the same pair are distinct if either the mate 1s in the two +paired-end alignments are distinct or the mate 2s in the two alignments are +distinct or both. + +### Default mode: search for one or more alignments, report each + +HISAT2 searches for up to N distinct, primary alignments for +each read, where N equals the integer specified with the `-k` parameter. +Primary alignments mean alignments whose alignment score is equal or higher than any other alignments. +It is possible that multiple distinct alignments have the same score. +That is, if `-k 2` is specified, HISAT2 will search for at most 2 distinct +alignments. The alignment score for a paired-end alignment equals the sum of the +alignment scores of the individual mates. Each reported read or pair alignment +beyond the first has the SAM 'secondary' bit (which equals 256) set in its FLAGS +field. See the [SAM specification] for details. + +HISAT2 does not "find" alignments in any specific order, so for reads that +have more than N distinct, valid alignments, HISAT2 does not guarantee that +the N alignments reported are the best possible in terms of alignment score. +Still, this mode can be effective and fast in situations where the user cares +more about whether a read aligns (or aligns a certain number of times) than +where exactly it originated. + +[SAM specification]: http://samtools.sourceforge.net/SAM1.pdf + +Alignment summary +------------------ + +When HISAT2 finishes running, it prints messages summarizing what happened. +These messages are printed to the "standard error" ("stderr") filehandle. For +datasets consisting of unpaired reads, the summary might look like this: + + 20000 reads; of these: + 20000 (100.00%) were unpaired; of these: + 1247 (6.24%) aligned 0 times + 18739 (93.69%) aligned exactly 1 time + 14 (0.07%) aligned >1 times + 93.77% overall alignment rate + +For datasets consisting of pairs, the summary might look like this: + + 10000 reads; of these: + 10000 (100.00%) were paired; of these: + 650 (6.50%) aligned concordantly 0 times + 8823 (88.23%) aligned concordantly exactly 1 time + 527 (5.27%) aligned concordantly >1 times + ---- + 650 pairs aligned concordantly 0 times; of these: + 34 (5.23%) aligned discordantly 1 time + ---- + 616 pairs aligned 0 times concordantly or discordantly; of these: + 1232 mates make up the pairs; of these: + 660 (53.57%) aligned 0 times + 571 (46.35%) aligned exactly 1 time + 1 (0.08%) aligned >1 times + 96.70% overall alignment rate + +The indentation indicates how subtotals relate to totals. + +Wrapper +------- + +The `hisat2`, `hisat2-build` and `hisat2-inspect` executables are actually +wrapper scripts that call binary programs as appropriate. The wrappers shield +users from having to distinguish between "small" and "large" index formats, +discussed briefly in the following section. Also, the `hisat2` wrapper +provides some key functionality, like the ability to handle compressed inputs, +and the functionality for `--un`, `--al` and related options. + +It is recommended that you always run the hisat2 wrappers and not run the +binaries directly. + +Small and large indexes +----------------------- + +`hisat2-build` can index reference genomes of any size. For genomes less than +about 4 billion nucleotides in length, `hisat2-build` builds a "small" index +using 32-bit numbers in various parts of the index. When the genome is longer, +`hisat2-build` builds a "large" index using 64-bit numbers. Small indexes are +stored in files with the `.ht2` extension, and large indexes are stored in +files with the `.ht2l` extension. The user need not worry about whether a +particular index is small or large; the wrapper scripts will automatically build +and use the appropriate index. + +Performance tuning +------------------ + +1. If your computer has multiple processors/cores, use `-p` + + The `-p` option causes HISAT2 to launch a specified number of parallel + search threads. Each thread runs on a different processor/core and all + threads find alignments in parallel, increasing alignment throughput by + approximately a multiple of the number of threads (though in practice, + speedup is somewhat worse than linear). + +Command Line +------------ + +### Setting function options + +Some HISAT2 options specify a function rather than an individual number or +setting. In these cases the user specifies three parameters: (a) a function +type `F`, (b) a constant term `B`, and (c) a coefficient `A`. The available +function types are constant (`C`), linear (`L`), square-root (`S`), and natural +log (`G`). The parameters are specified as `F,B,A` - that is, the function type, +the constant term, and the coefficient are separated by commas with no +whitespace. The constant term and coefficient may be negative and/or +floating-point numbers. + +For example, if the function specification is `L,-0.4,-0.6`, then the function +defined is: + + f(x) = -0.4 + -0.6 * x + +If the function specification is `G,1,5.4`, then the function defined is: + + f(x) = 1.0 + 5.4 * ln(x) + +See the documentation for the option in question to learn what the parameter `x` +is for. For example, in the case if the `--score-min` option, the function +`f(x)` sets the minimum alignment score necessary for an alignment to be +considered valid, and `x` is the read length. + +### Usage + + hisat2 [options]* -x {-1 -2 | -U | --sra-acc } [-S ] + +### Main arguments + + -x + +The basename of the index for the reference genome. The basename is the name of +any of the index files up to but not including the final `.1.ht2` / etc. +`hisat2` looks for the specified index first in the current directory, +then in the directory specified in the `HISAT2_INDEXES` environment variable. + + -1 + +Comma-separated list of files containing mate 1s (filename usually includes +`_1`), e.g. `-1 flyA_1.fq,flyB_1.fq`. Sequences specified with this option must +correspond file-for-file and read-for-read with those specified in ``. Reads +may be a mix of different lengths. If `-` is specified, `hisat2` will read the +mate 1s from the "standard in" or "stdin" filehandle. + + -2 + +Comma-separated list of files containing mate 2s (filename usually includes +`_2`), e.g. `-2 flyA_2.fq,flyB_2.fq`. Sequences specified with this option must +correspond file-for-file and read-for-read with those specified in ``. Reads +may be a mix of different lengths. If `-` is specified, `hisat2` will read the +mate 2s from the "standard in" or "stdin" filehandle. + + -U + +Comma-separated list of files containing unpaired reads to be aligned, e.g. +`lane1.fq,lane2.fq,lane3.fq,lane4.fq`. Reads may be a mix of different lengths. +If `-` is specified, `hisat2` gets the reads from the "standard in" or "stdin" +filehandle. + + --sra-acc + +Comma-separated list of SRA accession numbers, e.g. `--sra-acc SRR353653,SRR353654`. +Information about read types is available at http://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?sp=runinfo&acc=sra-acc&retmode=xml, +where sra-acc is SRA accession number. If users run HISAT2 on a computer cluster, it is recommended to disable SRA-related caching (see the instruction at [SRA-MANUAL]). + +[SRA-MANUAL]: https://github.com/ncbi/sra-tools/wiki/Toolkit-Configuration + + -S + +File to write SAM alignments to. By default, alignments are written to the +"standard out" or "stdout" filehandle (i.e. the console). + +### Options + +#### Input options + + -q + +Reads (specified with ``, ``, ``) are FASTQ files. FASTQ files +usually have extension `.fq` or `.fastq`. FASTQ is the default format. See +also: `--solexa-quals` and `--int-quals`. + + --qseq + +Reads (specified with ``, ``, ``) are QSEQ files. QSEQ files usually +end in `_qseq.txt`. See also: `--solexa-quals` and `--int-quals`. + + -f + +Reads (specified with ``, ``, ``) are FASTA files. FASTA files +usually have extension `.fa`, `.fasta`, `.mfa`, `.fna` or similar. FASTA files +do not have a way of specifying quality values, so when `-f` is set, the result +is as if `--ignore-quals` is also set. + + -r + +Reads (specified with ``, ``, ``) are files with one input sequence +per line, without any other information (no read names, no qualities). When +`-r` is set, the result is as if `--ignore-quals` is also set. + + -c + +The read sequences are given on command line. I.e. ``, `` and +`` are comma-separated lists of reads rather than lists of read files. +There is no way to specify read names or qualities, so `-c` also implies +`--ignore-quals`. + + -s/--skip + +Skip (i.e. do not align) the first `` reads or pairs in the input. + + -u/--qupto + +Align the first `` reads or read pairs from the input (after the +`-s`/`--skip` reads or pairs have been skipped), then stop. Default: no limit. + + -5/--trim5 + +Trim `` bases from 5' (left) end of each read before alignment (default: 0). + + -3/--trim3 + +Trim `` bases from 3' (right) end of each read before alignment (default: +0). + + --phred33 + +Input qualities are ASCII chars equal to the [Phred quality] plus 33. This is +also called the "Phred+33" encoding, which is used by the very latest Illumina +pipelines. + +[Phred quality]: http://en.wikipedia.org/wiki/Phred_quality_score + + --phred64 + +Input qualities are ASCII chars equal to the [Phred quality] plus 64. This is +also called the "Phred+64" encoding. + + --solexa-quals + +Convert input qualities from [Solexa][Phred quality] (which can be negative) to +[Phred][Phred quality] (which can't). This scheme was used in older Illumina GA +Pipeline versions (prior to 1.3). Default: off. + + --int-quals + +Quality values are represented in the read input file as space-separated ASCII +integers, e.g., `40 40 30 40`..., rather than ASCII characters, e.g., `II?I`.... + Integers are treated as being on the [Phred quality] scale unless +`--solexa-quals` is also specified. Default: off. + +#### Alignment options + + --n-ceil + +Sets a function governing the maximum number of ambiguous characters (usually +`N`s and/or `.`s) allowed in a read as a function of read length. For instance, +specifying `-L,0,0.15` sets the N-ceiling function `f` to `f(x) = 0 + 0.15 * x`, +where x is the read length. See also: [setting function options]. Reads +exceeding this ceiling are [filtered out]. Default: `L,0,0.15`. + + --ignore-quals + +When calculating a mismatch penalty, always consider the quality value at the +mismatched position to be the highest possible, regardless of the actual value. +I.e. input is treated as though all quality values are high. This is also the +default behavior when the input doesn't specify quality values (e.g. in `-f`, +`-r`, or `-c` modes). + + --nofw/--norc + +If `--nofw` is specified, `hisat2` will not attempt to align unpaired reads to +the forward (Watson) reference strand. If `--norc` is specified, `hisat2` will +not attempt to align unpaired reads against the reverse-complement (Crick) +reference strand. In paired-end mode, `--nofw` and `--norc` pertain to the +fragments; i.e. specifying `--nofw` causes `hisat2` to explore only those +paired-end configurations corresponding to fragments from the reverse-complement +(Crick) strand. Default: both strands enabled. + +#### Scoring options + + --mp MX,MN + +Sets the maximum (`MX`) and minimum (`MN`) mismatch penalties, both integers. A +number less than or equal to `MX` and greater than or equal to `MN` is +subtracted from the alignment score for each position where a read character +aligns to a reference character, the characters do not match, and neither is an +`N`. If `--ignore-quals` is specified, the number subtracted quals `MX`. +Otherwise, the number subtracted is `MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) )` +where Q is the Phred quality value. Default: `MX` = 6, `MN` = 2. + + --sp MX,MN + +Sets the maximum (`MX`) and minimum (`MN`) penalties for soft-clipping per base, +both integers. A number less than or equal to `MX` and greater than or equal to `MN` is +subtracted from the alignment score for each position. +The number subtracted is `MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) )` +where Q is the Phred quality value. Default: `MX` = 2, `MN` = 1. + + --no-softclip + +Disallow soft-clipping. + + --np + +Sets penalty for positions where the read, reference, or both, contain an +ambiguous character such as `N`. Default: 1. + + --rdg , + +Sets the read gap open (``) and extend (``) penalties. A read gap of +length N gets a penalty of `` + N * ``. Default: 5, 3. + + --rfg , + +Sets the reference gap open (``) and extend (``) penalties. A +reference gap of length N gets a penalty of `` + N * ``. Default: +5, 3. + + --score-min + +Sets a function governing the minimum alignment score needed for an alignment to +be considered "valid" (i.e. good enough to report). This is a function of read +length. For instance, specifying `L,0,-0.6` sets the minimum-score function `f` +to `f(x) = 0 + -0.6 * x`, where `x` is the read length. See also: [setting +function options]. The default is `L,0,-0.2`. + +#### Spliced alignment options + + --pen-cansplice + +Sets the penalty for each pair of canonical splice sites (e.g. GT/AG). Default: 0. + + --pen-noncansplice + +Sets the penalty for each pair of non-canonical splice sites (e.g. non-GT/AG). Default: 12. + + --pen-canintronlen + +Sets the penalty for long introns with canonical splice sites so that alignments with shorter introns are preferred +to those with longer ones. Default: G,-8,1 + + --pen-noncanintronlen + +Sets the penalty for long introns with noncanonical splice sites so that alignments with shorter introns are preferred +to those with longer ones. Default: G,-8,1 + + --min-intronlen + +Sets minimum intron length. Default: 20 + + --max-intronlen + +Sets maximum intron length. Default: 500000 + + --known-splicesite-infile + +With this mode, you can provide a list of known splice sites, which HISAT2 makes use of to align reads with small anchors. +You can create such a list using `python hisat2_extract_splice_sites.py genes.gtf > splicesites.txt`, +where `hisat2_extract_splice_sites.py` is included in the HISAT2 package, `genes.gtf` is a gene annotation file, +and `splicesites.txt` is a list of splice sites with which you provide HISAT2 in this mode. +Note that it is better to use indexes built using annotated transcripts (such as genome_tran or genome_snp_tran), which works better +than using this option. It has no effect to provide splice sites that are already included in the indexes. + + --novel-splicesite-outfile + +In this mode, HISAT2 reports a list of splice sites in the file : + chromosome name `` genomic position of the flanking base on the left side of an intron `` genomic position of the flanking base on the right `` strand (+, -, and .) + '.' indicates an unknown strand for non-canonical splice sites. + + --novel-splicesite-infile + +With this mode, you can provide a list of novel splice sites that were generated from the above option "--novel-splicesite-outfile". + + --no-temp-splicesite + +HISAT2, by default, makes use of splice sites found by earlier reads to align later reads in the same run, +in particular, reads with small anchors (<= 15 bp). +The option disables this default alignment strategy. + + --no-spliced-alignment + +Disable spliced alignment. + + --rna-strandness + +Specify strand-specific information: the default is unstranded. +For single-end reads, use F or R. + 'F' means a read corresponds to a transcript. + 'R' means a read corresponds to the reverse complemented counterpart of a transcript. +For paired-end reads, use either FR or RF. +With this option being used, every read alignment will have an XS attribute tag: + '+' means a read belongs to a transcript on '+' strand of genome. + '-' means a read belongs to a transcript on '-' strand of genome. + +(TopHat has a similar option, --library-type option, where fr-firststrand corresponds to R and RF; fr-secondstrand corresponds to F and FR.) + + --tmo/--transcriptome-mapping-only + +Report only those alignments within known transcripts. + + --dta/--downstream-transcriptome-assembly + +Report alignments tailored for transcript assemblers including StringTie. +With this option, HISAT2 requires longer anchor lengths for de novo discovery of splice sites. +This leads to fewer alignments with short-anchors, +which helps transcript assemblers improve significantly in computation and memory usage. + + --dta-cufflinks + +Report alignments tailored specifically for Cufflinks. In addition to what HISAT2 does with the above option (--dta), +With this option, HISAT2 looks for novel splice sites with three signals (GT/AG, GC/AG, AT/AC), but all user-provided splice sites are used irrespective of their signals. +HISAT2 produces an optional field, XS:A:[+-], for every spliced alignment. + + --no-templatelen-adjustment + +Disables template length adjustment for RNA-seq reads. + +#### Reporting options + + -k + +It searches for at most `` distinct, primary alignments for each read. +Primary alignments mean alignments whose alignment score is equal or higher than any other alignments. +The search terminates when it can't find more distinct valid alignments, or when it +finds ``, whichever happens first. The alignment score for a paired-end +alignment equals the sum of the alignment scores of the individual mates. Each +reported read or pair alignment beyond the first has the SAM 'secondary' bit +(which equals 256) set in its FLAGS field. For reads that have more than +`` distinct, valid alignments, `hisat2` does not guarantee that the +`` alignments reported are the best possible in terms of alignment score. Default: 5 (HFM) or 10 (HGFM) + +Note: HISAT2 is not designed with large values for `-k` in mind, and when +aligning reads to long, repetitive genomes large `-k` can be very, very slow. + + --max-seeds + +HISAT2, like other aligners, uses seed-and-extend approaches. HISAT2 tries to extend seeds to full-length alignments. In HISAT2, --max-seeds is used to control the maximum number of seeds that will be extended. HISAT2 extends up to these many seeds and skips the rest of the seeds. Large values for `--max-seeds` may improve alignment sensitivity, but HISAT2 is not designed with large values for `--max-seeds` in mind, and when aligning reads to long, repetitive genomes large `--max-seeds` can be very, very slow. The default value is the maximum of 5 and the value that comes with`-k`. + + --secondary + +Report secondary alignments. + +#### Paired-end options + + -I/--minins + +The minimum fragment length for valid paired-end alignments.This option is valid only with --no-spliced-alignment. +E.g. if `-I 60` is specified and a paired-end alignment consists of two 20-bp alignments in the +appropriate orientation with a 20-bp gap between them, that alignment is +considered valid (as long as `-X` is also satisfied). A 19-bp gap would not +be valid in that case. If trimming options `-3` or `-5` are also used, the +`-I` constraint is applied with respect to the untrimmed mates. + +The larger the difference between `-I` and `-X`, the slower HISAT2 will +run. This is because larger differences between `-I` and `-X` require that +HISAT2 scan a larger window to determine if a concordant alignment exists. +For typical fragment length ranges (200 to 400 nucleotides), HISAT2 is very +efficient. + +Default: 0 (essentially imposing no minimum) + + -X/--maxins + +The maximum fragment length for valid paired-end alignments. This option is valid only with --no-spliced-alignment. +E.g. if `-X 100` is specified and a paired-end alignment consists of two 20-bp alignments in the +proper orientation with a 60-bp gap between them, that alignment is considered +valid (as long as `-I` is also satisfied). A 61-bp gap would not be valid in +that case. If trimming options `-3` or `-5` are also used, the `-X` +constraint is applied with respect to the untrimmed mates, not the trimmed +mates. + +The larger the difference between `-I` and `-X`, the slower HISAT2 will +run. This is because larger differences between `-I` and `-X` require that +HISAT2 scan a larger window to determine if a concordant alignment exists. +For typical fragment length ranges (200 to 400 nucleotides), HISAT2 is very +efficient. + +Default: 500. + + --fr/--rf/--ff + +The upstream/downstream mate orientations for a valid paired-end alignment +against the forward reference strand. E.g., if `--fr` is specified and there is +a candidate paired-end alignment where mate 1 appears upstream of the reverse +complement of mate 2 and the fragment length constraints (`-I` and `-X`) are +met, that alignment is valid. Also, if mate 2 appears upstream of the reverse +complement of mate 1 and all other constraints are met, that too is valid. +`--rf` likewise requires that an upstream mate1 be reverse-complemented and a +downstream mate2 be forward-oriented. ` --ff` requires both an upstream mate 1 +and a downstream mate 2 to be forward-oriented. Default: `--fr` (appropriate +for Illumina's Paired-end Sequencing Assay). + + --no-mixed + +By default, when `hisat2` cannot find a concordant or discordant alignment for +a pair, it then tries to find alignments for the individual mates. This option +disables that behavior. + + --no-discordant + +By default, `hisat2` looks for discordant alignments if it cannot find any +concordant alignments. A discordant alignment is an alignment where both mates +align uniquely, but that does not satisfy the paired-end constraints +(`--fr`/`--rf`/`--ff`, `-I`, `-X`). This option disables that behavior. + +#### Output options + + -t/--time + +Print the wall-clock time required to load the index files and align the reads. +This is printed to the "standard error" ("stderr") filehandle. Default: off. + + --un + --un-gz + --un-bz2 + +Write unpaired reads that fail to align to file at ``. These reads +correspond to the SAM records with the FLAGS `0x4` bit set and neither the +`0x40` nor `0x80` bits set. If `--un-gz` is specified, output will be gzip +compressed. If `--un-bz2` is specified, output will be bzip2 compressed. Reads +written in this way will appear exactly as they did in the input file, without +any modification (same sequence, same name, same quality string, same quality +encoding). Reads will not necessarily appear in the same order as they did in +the input. + + --al + --al-gz + --al-bz2 + +Write unpaired reads that align at least once to file at ``. These reads +correspond to the SAM records with the FLAGS `0x4`, `0x40`, and `0x80` bits +unset. If `--al-gz` is specified, output will be gzip compressed. If `--al-bz2` +is specified, output will be bzip2 compressed. Reads written in this way will +appear exactly as they did in the input file, without any modification (same +sequence, same name, same quality string, same quality encoding). Reads will +not necessarily appear in the same order as they did in the input. + + --un-conc + --un-conc-gz + --un-conc-bz2 + +Write paired-end reads that fail to align concordantly to file(s) at ``. +These reads correspond to the SAM records with the FLAGS `0x4` bit set and +either the `0x40` or `0x80` bit set (depending on whether it's mate #1 or #2). +`.1` and `.2` strings are added to the filename to distinguish which file +contains mate #1 and mate #2. If a percent symbol, `%`, is used in ``, +the percent symbol is replaced with `1` or `2` to make the per-mate filenames. +Otherwise, `.1` or `.2` are added before the final dot in `` to make the +per-mate filenames. Reads written in this way will appear exactly as they did +in the input files, without any modification (same sequence, same name, same +quality string, same quality encoding). Reads will not necessarily appear in +the same order as they did in the inputs. + + --al-conc + --al-conc-gz + --al-conc-bz2 + +Write paired-end reads that align concordantly at least once to file(s) at +``. These reads correspond to the SAM records with the FLAGS `0x4` bit +unset and either the `0x40` or `0x80` bit set (depending on whether it's mate #1 +or #2). `.1` and `.2` strings are added to the filename to distinguish which +file contains mate #1 and mate #2. If a percent symbol, `%`, is used in +``, the percent symbol is replaced with `1` or `2` to make the per-mate +filenames. Otherwise, `.1` or `.2` are added before the final dot in `` to +make the per-mate filenames. Reads written in this way will appear exactly as +they did in the input files, without any modification (same sequence, same name, +same quality string, same quality encoding). Reads will not necessarily appear +in the same order as they did in the inputs. + + --quiet + +Print nothing besides alignments and serious errors. + + --summary-file + +Print alignment summary to this file. + + --new-summary + +Print alignment summary in a new style, which is more machine-friendly. + + --met-file + +Write `hisat2` metrics to file ``. Having alignment metric can be useful +for debugging certain problems, especially performance issues. See also: +`--met`. Default: metrics disabled. + + --met-stderr + +Write `hisat2` metrics to the "standard error" ("stderr") filehandle. This is +not mutually exclusive with `--met-file`. Having alignment metric can be +useful for debugging certain problems, especially performance issues. See also: +`--met`. Default: metrics disabled. + + --met + +Write a new `hisat2` metrics record every `` seconds. Only matters if +either `--met-stderr` or `--met-file` are specified. Default: 1. + +#### SAM options + + --no-unal + +Suppress SAM records for reads that failed to align. + + --no-hd + +Suppress SAM header lines (starting with `@`). + + --no-sq + +Suppress `@SQ` SAM header lines. + + --rg-id + +Set the read group ID to ``. This causes the SAM `@RG` header line to be +printed, with `` as the value associated with the `ID:` tag. It also +causes the `RG:Z:` extra field to be attached to each SAM output record, with +value set to ``. + + --rg + +Add `` (usually of the form `TAG:VAL`, e.g. `SM:Pool1`) as a field on the +`@RG` header line. Note: in order for the `@RG` line to appear, `--rg-id` +must also be specified. This is because the `ID` tag is required by the [SAM +Spec][SAM]. Specify `--rg` multiple times to set multiple fields. See the +[SAM Spec][SAM] for details about what fields are legal. + + --remove-chrname + +Remove 'chr' from reference names in alignment (e.g., chr18 to 18) + + --add-chrname + +Add 'chr' to reference names in alignment (e.g., 18 to chr18) + + --omit-sec-seq + +When printing secondary alignments, HISAT2 by default will write out the `SEQ` +and `QUAL` strings. Specifying this option causes HISAT2 to print an asterisk +in those fields instead. + +#### Performance options + + -o/--offrate + +Override the offrate of the index with ``. If `` is greater +than the offrate used to build the index, then some row markings are +discarded when the index is read into memory. This reduces the memory +footprint of the aligner but requires more time to calculate text +offsets. `` must be greater than the value used to build the +index. + + -p/--threads NTHREADS + +Launch `NTHREADS` parallel search threads (default: 1). Threads will run on +separate processors/cores and synchronize when parsing reads and outputting +alignments. Searching for alignments is highly parallel, and speedup is close +to linear. Increasing `-p` increases HISAT2's memory footprint. E.g. when +aligning to a human genome index, increasing `-p` from 1 to 8 increases the +memory footprint by a few hundred megabytes. This option is only available if +`hisat2` is linked with the `pthreads` library (i.e. if `HISAT2_PTHREADS=0` is +not specified at build time). + + --reorder + +Guarantees that output SAM records are printed in an order corresponding to the +order of the reads in the original input file, even when `-p` is set greater +than 1. Specifying `--reorder` and setting `-p` greater than 1 causes HISAT2 +to run somewhat slower and use somewhat more memory then if `--reorder` were +not specified. Has no effect if `-p` is set to 1, since output order will +naturally correspond to input order in that case. + + --mm + +Use memory-mapped I/O to load the index, rather than typical file I/O. +Memory-mapping allows many concurrent `hisat2` processes on the same computer to +share the same memory image of the index (i.e. you pay the memory overhead just +once). This facilitates memory-efficient parallelization of `hisat2` in +situations where using `-p` is not possible or not preferable. + +#### Other options + + --qc-filter + +Filter out reads for which the QSEQ filter field is non-zero. Only has an +effect when read format is `--qseq`. Default: off. + + --seed + +Use `` as the seed for pseudo-random number generator. Default: 0. + + --non-deterministic + +Normally, HISAT2 re-initializes its pseudo-random generator for each read. It +seeds the generator with a number derived from (a) the read name, (b) the +nucleotide sequence, (c) the quality sequence, (d) the value of the `--seed` +option. This means that if two reads are identical (same name, same +nucleotides, same qualities) HISAT2 will find and report the same alignment(s) +for both, even if there was ambiguity. When `--non-deterministic` is specified, +HISAT2 re-initializes its pseudo-random generator for each read using the +current time. This means that HISAT2 will not necessarily report the same +alignment for two identical reads. This is counter-intuitive for some users, +but might be more appropriate in situations where the input consists of many +identical reads. + + --version + +Print version information and quit. + + -h/--help + +Print usage information and quit. + +SAM output +---------- + +Following is a brief description of the [SAM] format as output by `hisat2`. +For more details, see the [SAM format specification][SAM]. + +By default, `hisat2` prints a SAM header with `@HD`, `@SQ` and `@PG` lines. +When one or more `--rg` arguments are specified, `hisat2` will also print +an `@RG` line that includes all user-specified `--rg` tokens separated by +tabs. + +Each subsequent line describes an alignment or, if the read failed to align, a +read. Each line is a collection of at least 12 fields separated by tabs; from +left to right, the fields are: + +1. Name of read that aligned. + + Note that the [SAM specification] disallows whitespace in the read name. + If the read name contains any whitespace characters, HISAT2 will truncate + the name at the first whitespace character. This is similar to the + behavior of other tools. + +2. Sum of all applicable flags. Flags relevant to HISAT2 are: + + 1 + + The read is one of a pair + + 2 + + The alignment is one end of a proper paired-end alignment + + 4 + + The read has no reported alignments + + 8 + + The read is one of a pair and has no reported alignments + + 16 + + The alignment is to the reverse reference strand + + 32 + + The other mate in the paired-end alignment is aligned to the + reverse reference strand + + 64 + + The read is mate 1 in a pair + + 128 + + The read is mate 2 in a pair + + Thus, an unpaired read that aligns to the reverse reference strand + will have flag 16. A paired-end read that aligns and is the first + mate in the pair will have flag 83 (= 64 + 16 + 2 + 1). + +3. Name of reference sequence where alignment occurs + +4. 1-based offset into the forward reference strand where leftmost + character of the alignment occurs + +5. Mapping quality + +6. CIGAR string representation of alignment + +7. Name of reference sequence where mate's alignment occurs. Set to `=` if the +mate's reference sequence is the same as this alignment's, or `*` if there is no +mate. + +8. 1-based offset into the forward reference strand where leftmost character of +the mate's alignment occurs. Offset is 0 if there is no mate. + +9. Inferred fragment length. Size is negative if the mate's alignment occurs +upstream of this alignment. Size is 0 if the mates did not align concordantly. +However, size is non-0 if the mates aligned discordantly to the same +chromosome. + +10. Read sequence (reverse-complemented if aligned to the reverse strand) + +11. ASCII-encoded read qualities (reverse-complemented if the read aligned to +the reverse strand). The encoded quality values are on the [Phred quality] +scale and the encoding is ASCII-offset by 33 (ASCII char `!`), similarly to a +[FASTQ] file. + +12. Optional fields. Fields are tab-separated. `hisat2` outputs zero or more +of these optional fields for each alignment, depending on the type of the +alignment: + + AS:i: + + Alignment score. Can be negative. Only present if SAM record is for + an aligned read. + + ZS:i: + + Alignment score for the best-scoring alignment found other than the + alignment reported. Can be negative. Only present if the SAM record is + for an aligned read and more than one alignment was found for the read. + Note that, when the read is part of a concordantly-aligned pair, this score + could be greater than `AS:i`. + + YS:i: + + Alignment score for opposite mate in the paired-end alignment. Only present + if the SAM record is for a read that aligned as part of a paired-end + alignment. + + XN:i: + + The number of ambiguous bases in the reference covering this alignment. + Only present if SAM record is for an aligned read. + + XM:i: + + The number of mismatches in the alignment. Only present if SAM record is + for an aligned read. + + XO:i: + + The number of gap opens, for both read and reference gaps, in the alignment. + Only present if SAM record is for an aligned read. + + XG:i: + + The number of gap extensions, for both read and reference gaps, in the + alignment. Only present if SAM record is for an aligned read. + + NM:i: + + The edit distance; that is, the minimal number of one-nucleotide edits + (substitutions, insertions and deletions) needed to transform the read + string into the reference string. Only present if SAM record is for an + aligned read. + + YF:Z: + + String indicating reason why the read was filtered out. See also: + [Filtering]. Only appears for reads that were filtered out. + + YT:Z: + + Value of `UU` indicates the read was not part of a pair. Value of `CP` + indicates the read was part of a pair and the pair aligned concordantly. + Value of `DP` indicates the read was part of a pair and the pair aligned + discordantly. Value of `UP` indicates the read was part of a pair but the + pair failed to aligned either concordantly or discordantly. + + MD:Z: + + A string representation of the mismatched reference bases in the alignment. + See [SAM] format specification for details. Only present if SAM record is + for an aligned read. + + XS:A: + + Values of `+` and `-` indicate the read is mapped to transcripts on sense and anti-sense + strands, respectively. Spliced alignments need to have this field, which is required in Cufflinks and StringTie. + We can report this field for the canonical-splice site (GT/AG), but not for non-canonical splice sites. + You can direct HISAT2 not to output such alignments (involving non-canonical splice sites) using "--pen-noncansplice 1000000". + + NH:i: + + The number of mapped locations for the read or the pair. + + Zs:Z: + + When the alignment of a read involves SNPs that are in the index, this option is used to indicate where exactly the read involves the SNPs. + This optional field is similar to the above MD:Z field. + For example, `Zs:Z:1|S|rs3747203,97|S|rs16990981` indicates the second base of the read corresponds to a known SNP (ID: rs3747203). + 97 bases after the third base (the base after the second one), the read at 100th base involves another known SNP (ID: rs16990981). + 'S' indicates a single nucleotide polymorphism. 'D' and 'I' indicate a deletion and an insertion, respectively. + +[SAM format specification]: http://samtools.sf.net/SAM1.pdf +[FASTQ]: http://en.wikipedia.org/wiki/FASTQ_format + +The `hisat2-build` indexer +=========================== + +`hisat2-build` builds a HISAT2 index from a set of DNA sequences. +`hisat2-build` outputs a set of 6 files with suffixes `.1.ht2`, `.2.ht2`, +`.3.ht2`, `.4.ht2`, `.5.ht2`, `.6.ht2`, `.7.ht2`, and `.8.ht2`. In the case of a large +index these suffixes will have a `ht2l` termination. These files together +constitute the index: they are all that is needed to align reads to that +reference. The original sequence FASTA files are no longer used by HISAT2 +once the index is built. + +Use of Karkkainen's [blockwise algorithm] allows `hisat2-build` to trade off +between running time and memory usage. `hisat2-build` has three options +governing how it makes this trade: `-p`/`--packed`, `--bmax`/`--bmaxdivn`, +and `--dcv`. By default, `hisat2-build` will automatically search for the +settings that yield the best running time without exhausting memory. This +behavior can be disabled using the `-a`/`--noauto` option. + +The indexer provides options pertaining to the "shape" of the index, e.g. +`--offrate` governs the fraction of [Burrows-Wheeler] +rows that are "marked" (i.e., the density of the suffix-array sample; see the +original [FM Index] paper for details). All of these options are potentially +profitable trade-offs depending on the application. They have been set to +defaults that are reasonable for most cases according to our experiments. See +[Performance tuning] for details. + +`hisat2-build` can generate either [small or large indexes]. The wrapper +will decide which based on the length of the input genome. If the reference +does not exceed 4 billion characters but a large index is preferred, the user +can specify `--large-index` to force `hisat2-build` to build a large index +instead. + +The HISAT2 index is based on the [FM Index] of Ferragina and Manzini, which in +turn is based on the [Burrows-Wheeler] transform. The algorithm used to build +the index is based on the [blockwise algorithm] of Karkkainen. + +[Blockwise algorithm]: http://portal.acm.org/citation.cfm?id=1314852 +[Burrows-Wheeler]: http://en.wikipedia.org/wiki/Burrows-Wheeler_transform + +Command Line +------------ + +Usage: + + hisat2-build [options]* + +### Notes + If you use --snp, --ss, and/or --exon, hisat2-build will need about 200GB RAM for the human genome size as index building involves a graph construction. + Otherwise, you will be able to build an index on your desktop with 8GB RAM. + +### Main arguments + +A comma-separated list of FASTA files containing the reference sequences to be +aligned to, or, if `-c` is specified, the sequences +themselves. E.g., `` might be `chr1.fa,chr2.fa,chrX.fa,chrY.fa`, +or, if `-c` is specified, this might be +`GGTCATCCT,ACGGGTCGT,CCGTTCTATGCGGCTTA`. + +The basename of the index files to write. By default, `hisat2-build` writes +files named `NAME.1.ht2`, `NAME.2.ht2`, `NAME.3.ht2`, `NAME.4.ht2`, +`NAME.5.ht2`, `NAME.6.ht2`, `NAME.7.ht2`, and `NAME.8.ht2` where `NAME` is ``. + +### Options + + -f + +The reference input files (specified as ``) are FASTA files +(usually having extension `.fa`, `.mfa`, `.fna` or similar). + + -c + +The reference sequences are given on the command line. I.e. `` is +a comma-separated list of sequences rather than a list of FASTA files. + + --large-index + +Force `hisat2-build` to build a [large index], even if the reference is less +than ~ 4 billion nucleotides long. + + -a/--noauto + +Disable the default behavior whereby `hisat2-build` automatically selects +values for the `--bmax`, `--dcv` and `--packed` parameters according to +available memory. Instead, user may specify values for those parameters. If +memory is exhausted during indexing, an error message will be printed; it is up +to the user to try new parameters. + + --bmax + +The maximum number of suffixes allowed in a block. Allowing more suffixes per +block makes indexing faster, but increases peak memory usage. Setting this +option overrides any previous setting for `--bmax`, or `--bmaxdivn`. +Default (in terms of the `--bmaxdivn` parameter) is `--bmaxdivn` 4. This is +configured automatically by default; use `-a`/`--noauto` to configure manually. + + --bmaxdivn + +The maximum number of suffixes allowed in a block, expressed as a fraction of +the length of the reference. Setting this option overrides any previous setting +for `--bmax`, or `--bmaxdivn`. Default: `--bmaxdivn` 4. This is +configured automatically by default; use `-a`/`--noauto` to configure manually. + + --dcv + +Use `` as the period for the difference-cover sample. A larger period +yields less memory overhead, but may make suffix sorting slower, especially if +repeats are present. Must be a power of 2 no greater than 4096. Default: 1024. + This is configured automatically by default; use `-a`/`--noauto` to configure +manually. + + --nodc + +Disable use of the difference-cover sample. Suffix sorting becomes +quadratic-time in the worst case (where the worst case is an extremely +repetitive reference). Default: off. + + -r/--noref + +Do not build the `NAME.3.ht2` and `NAME.4.ht2` portions of the index, which +contain a bitpacked version of the reference sequences and are used for +paired-end alignment. + + -3/--justref + +Build only the `NAME.3.ht2` and `NAME.4.ht2` portions of the index, which +contain a bitpacked version of the reference sequences and are used for +paired-end alignment. + + -o/--offrate + +To map alignments back to positions on the reference sequences, it's necessary +to annotate ("mark") some or all of the [Burrows-Wheeler] rows with their +corresponding location on the genome. +`-o`/`--offrate` governs how many rows get marked: +the indexer will mark every 2^`` rows. Marking more rows makes +reference-position lookups faster, but requires more memory to hold the +annotations at runtime. The default is 4 (every 16th row is marked; for human +genome, annotations occupy about 680 megabytes). + + -t/--ftabchars + +The ftab is the lookup table used to calculate an initial [Burrows-Wheeler] +range with respect to the first `` characters of the query. A larger +`` yields a larger lookup table but faster query times. The ftab has size +4^(``+1) bytes. The default setting is 10 (ftab is 4MB). + + --localoffrate + +This option governs how many rows get marked in a local index: +the indexer will mark every 2^`` rows. Marking more rows makes +reference-position lookups faster, but requires more memory to hold the +annotations at runtime. The default is 3 (every 8th row is marked, +this occupies about 16KB per local index). + + --localftabchars + +The local ftab is the lookup table in a local index. +The default setting is 6 (ftab is 8KB per local index). + + -p + +Launch `NTHREADS` parallel build threads (default: 1). + + --snp + +Provide a list of SNPs (in the HISAT2's own format) as follows (five columns). + + SNP ID `` snp type (single, deletion, or insertion) `` chromosome name `` zero-offset based genomic position of a SNP `` alternative base (single), the length of SNP (deletion), or insertion sequence (insertion) + + For example, + rs58784443 single 13 18447947 T + +Use `hisat2_extract_snps_haplotypes_UCSC.py` (in the HISAT2 package) to extract SNPs and haplotypes from a dbSNP file (e.g. http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/snp144Common.txt.gz). +or `hisat2_extract_snps_haplotypes_VCF.py` to extract SNPs and haplotypes from a VCF file (e.g. ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/supporting/GRCh38_positions/ALL.chr22.phase3_shapeit2_mvncall_integrated_v3plus_nounphased.rsID.genotypes.GRCh38_dbSNP_no_SVs.vcf.gz). + + --haplotype + +Provide a list of haplotypes (in the HISAT2's own format) as follows (five columns). + + Haplotype ID `` chromosome name `` zero-offset based left coordinate of haplotype `` zero-offset based right coordinate of haplotype `` a comma separated list of SNP ids in the haplotype + + For example, + ht35 13 18446877 18446945 rs12381094,rs12381056,rs192016659,rs538569910 + +See the above option, --snp, about how to extract haplotypes. This option is not required, but haplotype information can keep the index construction from exploding and reduce the index size substantially. + + --ss + +Note this option should be used with the following --exon option. +Provide a list of splice sites (in the HISAT2's own format) as follows (four columns). + + chromosome name `` zero-offset based genomic position of the flanking base on the left side of an intron `` zero-offset based genomic position of the flanking base on the right `` strand + +Use `hisat2_extract_splice_sites.py` (in the HISAT2 package) to extract splice sites from a GTF file. + + --exon + +Note this option should be used with the above --ss option. +Provide a list of exons (in the HISAT2's own format) as follows (three columns). + + chromosome name `` zero-offset based left genomic position of an exon `` zero-offset based right genomic position of an exon + +Use `hisat2_extract_exons.py` (in the HISAT2 package) to extract exons from a GTF file. + + --seed + +Use `` as the seed for pseudo-random number generator. + + --cutoff + +Index only the first `` bases of the reference sequences (cumulative across +sequences) and ignore the rest. + + -q/--quiet + +`hisat2-build` is verbose by default. With this option `hisat2-build` will +print only error messages. + + -h/--help + +Print usage information and quit. + + --version + +Print version information and quit. + +The `hisat2-inspect` index inspector +===================================== + +`hisat2-inspect` extracts information from a HISAT2 index about what kind of +index it is and what reference sequences were used to build it. When run without +any options, the tool will output a FASTA file containing the sequences of the +original references (with all non-`A`/`C`/`G`/`T` characters converted to `N`s). + It can also be used to extract just the reference sequence names using the +`-n`/`--names` option or a more verbose summary using the `-s`/`--summary` +option. + +Command Line +------------ + +Usage: + + hisat2-inspect [options]* + +### Main arguments + +The basename of the index to be inspected. The basename is name of any of the +index files but with the `.X.ht2` suffix omitted. +`hisat2-inspect` first looks in the current directory for the index files, then +in the directory specified in the `HISAT2_INDEXES` environment variable. + +### Options + + -a/--across + +When printing FASTA output, output a newline character every `` bases +(default: 60). + + -n/--names + +Print reference sequence names, one per line, and quit. + + -s/--summary + +Print a summary that includes information about index settings, as well as the +names and lengths of the input sequences. The summary has this format: + + Colorspace <0 or 1> + SA-Sample 1 in + FTab-Chars + Sequence-1 + Sequence-2 + ... + Sequence-N + +Fields are separated by tabs. Colorspace is always set to 0 for HISAT2. + + --snp + +Print SNPs, and quit. + + --ss + +Print splice sites, and quit. + + --ss-all + +Print splice sites including those not in the global index, and quit. + + --exon + +Print exons, and quit. + + -v/--verbose + +Print verbose output (for debugging). + + --version + +Print version information and quit. + + -h/--help + +Print usage information and quit. + +Getting started with HISAT2 +=================================================== + +HISAT2 comes with some example files to get you started. The example files +are not scientifically significant; these files will simply let you start running HISAT2 and +downstream tools right away. + +First follow the manual instructions to [obtain HISAT2]. Set the `HISAT2_HOME` +environment variable to point to the new HISAT2 directory containing the +`hisat2`, `hisat2-build` and `hisat2-inspect` binaries. This is important, +as the `HISAT2_HOME` variable is used in the commands below to refer to that +directory. + +Indexing a reference genome +--------------------------- + +To create an index for the genomic region (1 million bps from the human chromosome 22 between 20,000,000 and 20,999,999) +included with HISAT2, create a new temporary directory (it doesn't matter where), change into that directory, and run: + + $HISAT2_HOME/hisat2-build $HISAT2_HOME/example/reference/22_20-21M.fa --snp $HISAT2_HOME/example/reference/22_20-21M.snp 22_20-21M_snp + +The command should print many lines of output then quit. When the command +completes, the current directory will contain ten new files that all start with +`22_20-21M_snp` and end with `.1.ht2`, `.2.ht2`, `.3.ht2`, `.4.ht2`, `.5.ht2`, `.6.ht2`, +`.7.ht2`, and `.8.ht2`. These files constitute the index - you're done! + +You can use `hisat2-build` to create an index for a set of FASTA files obtained +from any source, including sites such as [UCSC], [NCBI], and [Ensembl]. When +indexing multiple FASTA files, specify all the files using commas to separate +file names. For more details on how to create an index with `hisat2-build`, +see the [manual section on index building]. You may also want to bypass this +process by obtaining a pre-built index. + +[UCSC]: http://genome.ucsc.edu/cgi-bin/hgGateway +[NCBI]: http://www.ncbi.nlm.nih.gov/sites/genome +[Ensembl]: http://www.ensembl.org/ + +Aligning example reads +---------------------- + +Stay in the directory created in the previous step, which now contains the +`22_20-21M` index files. Next, run: + + $HISAT2_HOME/hisat2 -f -x $HISAT2_HOME/example/index/22_20-21M_snp -U $HISAT2_HOME/example/reads/reads_1.fa -S eg1.sam + +This runs the HISAT2 aligner, which aligns a set of unpaired reads to the +genome region using the index generated in the previous step. +The alignment results in SAM format are written to the file `eg1.sam`, and a +short alignment summary is written to the console. (Actually, the summary is +written to the "standard error" or "stderr" filehandle, which is typically +printed to the console.) + +To see the first few lines of the SAM output, run: + + head eg1.sam + +You will see something like this: + + @HD VN:1.0 SO:unsorted + @SQ SN:22:20000001-21000000 LN:1000000 + @PG ID:hisat2 PN:hisat2 VN:2.0.0-beta + 1 0 22:20000001-21000000 397984 255 100M * 0 0 GCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU NH:i:1 + 2 16 22:20000001-21000000 398131 255 100M * 0 0 ATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:80A19 YT:Z:UU NH:i:1 Zs:Z:80|S|rs576159895 + 3 16 22:20000001-21000000 398222 255 100M * 0 0 TGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCTCCACTTGGTCAGAGCTGCAGTACTTGGCGATCTCAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:16A83 YT:Z:UU NH:i:1 Zs:Z:16|S|rs2629364 + 4 16 22:20000001-21000000 398247 255 90M200N10M * 0 0 CAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCTCCACTTGGTCAGAGCTGCAGTACTTGGCGATCTCAAACCGCTGCACCAGGAAGTCGATCCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU XS:A:- NH:i:1 + 5 16 22:20000001-21000000 398194 255 100M * 0 0 GGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCTCCACTTGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:17A26A55 YT:Z:UU NH:i:1 Zs:Z:17|S|rs576159895,26|S|rs2629364 + 6 0 22:20000001-21000000 398069 255 100M * 0 0 CAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU NH:i:1 + 7 0 22:20000001-21000000 397896 255 100M * 0 0 GTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:31G68 YT:Z:UU NH:i:1 Zs:Z:31|S|rs562662261 + 8 0 22:20000001-21000000 398150 255 100M * 0 0 AGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:61A26A11 YT:Z:UU NH:i:1 Zs:Z:61|S|rs576159895,26|S|rs2629364 + 9 16 22:20000001-21000000 398329 255 8M200N92M * 0 0 ACCAGGAAGTCGATCCAGATGTAGTGGGGGGTCACTTCGGGGGGACAGGGTTTGGGTTGACTTGCTTCCGAGGCAGCCAGGGGGTCTGCTTCCTTTATCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU XS:A:- NH:i:1 + 10 16 22:20000001-21000000 398184 255 100M * 0 0 CTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:27A26A45 YT:Z:UU NH:i:1 Zs:Z:27|S|rs576159895,26|S|rs2629364 + +The first few lines (beginning with `@`) are SAM header lines, and the rest of +the lines are SAM alignments, one line per read or mate. See the [HISAT2 +manual section on SAM output] and the [SAM specification] for details about how +to interpret the SAM file format. + +Paired-end example +------------------ + +To align paired-end reads included with HISAT2, stay in the same directory and +run: + + $HISAT2_HOME/hisat2 -f -x $HISAT2_HOME/example/index/22_20-21M_snp -1 $HISAT2_HOME/example/reads/reads_1.fa -2 $HISAT2_HOME/example/reads/reads_2.fa -S eg2.sam + +This aligns a set of paired-end reads to the reference genome, with results +written to the file `eg2.sam`. + +Using SAMtools/BCFtools downstream +---------------------------------- + +[SAMtools] is a collection of tools for manipulating and analyzing SAM and BAM +alignment files. [BCFtools] is a collection of tools for calling variants and +manipulating VCF and BCF files, and it is typically distributed with [SAMtools]. +Using these tools together allows you to get from alignments in SAM format to +variant calls in VCF format. This example assumes that `samtools` and +`bcftools` are installed and that the directories containing these binaries are +in your [PATH environment variable]. + +Run the paired-end example: + + $HISAT2_HOME/hisat -f -x $HISAT2_HOME/example/index/22_20-21M_snp -1 $HISAT2_HOME/example/reads/reads_1.fa -2 $HISAT2_HOME/example/reads/reads_2.fa -S eg2.sam + +Use `samtools view` to convert the SAM file into a BAM file. BAM is a the +binary format corresponding to the SAM text format. Run: + + samtools view -bS eg2.sam > eg2.bam + +Use `samtools sort` to convert the BAM file to a sorted BAM file. The following command requires samtools version 1.2 or higher. + + samtools sort eg2.bam -o eg2.sorted.bam + +We now have a sorted BAM file called `eg2.sorted.bam`. Sorted BAM is a useful +format because the alignments are (a) compressed, which is convenient for +long-term storage, and (b) sorted, which is convenient for variant discovery. +To generate variant calls in VCF format, run: + + samtools mpileup -uf $HISAT2_HOME/example/reference/22_20-21M.fa eg2.sorted.bam | bcftools view -bvcg - > eg2.raw.bcf + +Then to view the variants, run: + + bcftools view eg2.raw.bcf + +See the official SAMtools guide to [Calling SNPs/INDELs with SAMtools/BCFtools] +for more details and variations on this process. + +[BCFtools]: http://samtools.sourceforge.net/mpileup.shtml +[Calling SNPs/INDELs with SAMtools/BCFtools]: http://samtools.sourceforge.net/mpileup.shtml diff --git a/MANUAL.markdown b/MANUAL.markdown new file mode 100644 index 0000000..a88b0f6 --- /dev/null +++ b/MANUAL.markdown @@ -0,0 +1,2437 @@ + + +Introduction +============ + +What is HISAT2? +----------------- + +HISAT2 is a fast and sensitive alignment program for mapping next-generation sequencing reads +(whole-genome, transcriptome, and exome sequencing data) against the general human population +(as well as against a single reference genome). Based on [GCSA] (an extension of [BWT] for a graph), we designed and implemented a graph FM index (GFM), +an original approach and its first implementation to the best of our knowledge. +In addition to using one global GFM index that represents general population, +HISAT2 uses a large set of small GFM indexes that collectively cover the whole genome +(each index representing a genomic region of 56 Kbp, with 55,000 indexes needed to cover human population). +These small indexes (called local indexes) combined with several alignment strategies enable effective alignment of sequencing reads. +This new indexing scheme is called Hierarchical Graph FM index (HGFM). +We have developed HISAT 2 based on the [HISAT] and [Bowtie2] implementations. +HISAT2 outputs alignments in [SAM] format, enabling interoperation with a large number of other tools (e.g. [SAMtools], [GATK]) that use SAM. +HISAT2 is distributed under the [GPLv3 license], and it runs on the command line under +Linux, Mac OS X and Windows. + +[HISAT2]: http://ccb.jhu.edu/software/hisat2 +[HISAT]: http://ccb.jhu.edu/software/hisat +[Bowtie2]: http://bowtie-bio.sf.net/bowtie2 +[Bowtie]: http://bowtie-bio.sf.net +[Bowtie1]: http://bowtie-bio.sf.net +[GCSA]: http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=6698337&tag=1 +[Burrows-Wheeler Transform]: http://en.wikipedia.org/wiki/Burrows-Wheeler_transform +[BWT]: http://en.wikipedia.org/wiki/Burrows-Wheeler_transform +[FM Index]: http://en.wikipedia.org/wiki/FM-index +[SAM]: http://samtools.sourceforge.net/SAM1.pdf +[SAMtools]: http://samtools.sourceforge.net +[GATK]: http://www.broadinstitute.org/gsa/wiki/index.php/The_Genome_Analysis_Toolkit +[TopHat2]: http://ccb.jhu.edu/software/tophat +[Cufflinks]: http://cufflinks.cbcb.umd.edu/ +[Crossbow]: http://bowtie-bio.sf.net/crossbow +[Myrna]: http://bowtie-bio.sf.net/myrna +[Bowtie paper]: http://genomebiology.com/2009/10/3/R25 +[GPLv3 license]: http://www.gnu.org/licenses/gpl-3.0.html + + +Obtaining HISAT2 +================== + +Download HISAT2 sources and binaries from the Releases sections on the right side. +Binaries are available for Intel architectures (`x86_64`) running Linux, and Mac OS X. + +Building from source +-------------------- + +Building HISAT2 from source requires a GNU-like environment with GCC, GNU Make +and other basics. It should be possible to build HISAT2 on most vanilla Linux +installations or on a Mac installation with [Xcode] installed. HISAT2 can +also be built on Windows using [Cygwin] or [MinGW] (MinGW recommended). For a +MinGW build the choice of what compiler is to be used is important since this +will determine if a 32 or 64 bit code can be successfully compiled using it. If +there is a need to generate both 32 and 64 bit on the same machine then a multilib +MinGW has to be properly installed. [MSYS], the [zlib] library, and depending on +architecture [pthreads] library are also required. We are recommending a 64 bit +build since it has some clear advantages in real life research problems. In order +to simplify the MinGW setup it might be worth investigating popular MinGW personal +builds since these are coming already prepared with most of the toolchains needed. + +First, download the [source package] from the Releases section on the right side. +Unzip the file, change to the unzipped directory, and build the +HISAT2 tools by running GNU `make` (usually with the command `make`, but +sometimes with `gmake`) with no arguments. If building with MinGW, run `make` +from the MSYS environment. + +HISAT2 is using the multithreading software model in order to speed up +execution times on SMP architectures where this is possible. On POSIX +platforms (like linux, Mac OS, etc) it needs the pthread library. Although +it is possible to use pthread library on non-POSIX platform like Windows, due +to performance reasons HISAT2 will try to use Windows native multithreading +if possible. + +For the support of SRA data access in HISAT2, please download and install the [NCBI-NGS] toolkit. +When running `make`, specify additional variables as follow. +`make USE_SRA=1 NCBI_NGS_DIR=/path/to/NCBI-NGS-directory NCBI_VDB_DIR=/path/to/NCBI-NGS-directory`, +where `NCBI_NGS_DIR` and `NCBI_VDB_DIR` will be used in Makefile for -I and -L compilation options. +For example, $(NCBI_NGS_DIR)/include and $(NCBI_NGS_DIR)/lib64 will be used. + +[Cygwin]: http://www.cygwin.com/ +[MinGW]: http://www.mingw.org/ +[MSYS]: http://www.mingw.org/wiki/msys +[zlib]: http://cygwin.com/packages/mingw-zlib/ +[pthreads]: http://sourceware.org/pthreads-win32/ +[GnuWin32]: http://gnuwin32.sf.net/packages/coreutils.htm +[Download]: https://sourceforge.net/projects/bowtie-bio/files/bowtie2/ +[sourceforge site]: https://sourceforge.net/projects/bowtie-bio/files/bowtie2/ +[source package]: http://ccb.jhu.edu/software/hisat2/downloads/hisat2-2.0.0-beta-source.zip +[Xcode]: http://developer.apple.com/xcode/ +[NCBI-NGS]: https://github.com/ncbi/ngs/wiki/Downloads + +Running HISAT2 +============= + +Adding to PATH +-------------- + +By adding your new HISAT2 directory to your [PATH environment variable], you +ensure that whenever you run `hisat2`, `hisat2-build` or `hisat2-inspect` +from the command line, you will get the version you just installed without +having to specify the entire path. This is recommended for most users. To do +this, follow your operating system's instructions for adding the directory to +your [PATH]. + +If you would like to install HISAT2 by copying the HISAT2 executable files +to an existing directory in your [PATH], make sure that you copy all the +executables, including `hisat2`, `hisat2-align-s`, `hisat2-align-l`, `hisat2-build`, `hisat2-build-s`, `hisat2-build-l`, `hisat2-inspect`, `hisat2-inspect-s` and +`hisat2-inspect-l`. + +[PATH environment variable]: http://en.wikipedia.org/wiki/PATH_(variable) +[PATH]: http://en.wikipedia.org/wiki/PATH_(variable) + +Reporting +--------- + +The reporting mode governs how many alignments HISAT2 looks for, and how to +report them. + +In general, when we say that a read has an alignment, we mean that it has a +[valid alignment]. When we say that a read has multiple alignments, we mean +that it has multiple alignments that are valid and distinct from one another. + +[valid alignment]: #valid-alignments-meet-or-exceed-the-minimum-score-threshold + +By default, HISAT2 may soft-clip reads near their 5' and 3' ends. Users can control this behavior by setting different penalties for soft-clipping ([`--sp`]) or by disallowing soft-clipping ([`--no-softclip`]). + +### Distinct alignments map a read to different places + +Two alignments for the same individual read are "distinct" if they map the same +read to different places. Specifically, we say that two alignments are distinct +if there are no alignment positions where a particular read offset is aligned +opposite a particular reference offset in both alignments with the same +orientation. E.g. if the first alignment is in the forward orientation and +aligns the read character at read offset 10 to the reference character at +chromosome 3, offset 3,445,245, and the second alignment is also in the forward +orientation and also aligns the read character at read offset 10 to the +reference character at chromosome 3, offset 3,445,245, they are not distinct +alignments. + +Two alignments for the same pair are distinct if either the mate 1s in the two +paired-end alignments are distinct or the mate 2s in the two alignments are +distinct or both. + +### Default mode: search for one or more alignments, report each + +HISAT2 searches for up to N distinct, primary alignments for +each read, where N equals the integer specified with the `-k` parameter. +Primary alignments mean alignments whose alignment score is equal or higher than any other alignments. +It is possible that multiple distinct alignments have the same score. +That is, if `-k 2` is specified, HISAT2 will search for at most 2 distinct +alignments. The alignment score for a paired-end alignment equals the sum of the +alignment scores of the individual mates. Each reported read or pair alignment +beyond the first has the SAM 'secondary' bit (which equals 256) set in its FLAGS +field. See the [SAM specification] for details. + +HISAT2 does not "find" alignments in any specific order, so for reads that +have more than N distinct, valid alignments, HISAT2 does not guarantee that +the N alignments reported are the best possible in terms of alignment score. +Still, this mode can be effective and fast in situations where the user cares +more about whether a read aligns (or aligns a certain number of times) than +where exactly it originated. + + +[SAM specification]: http://samtools.sourceforge.net/SAM1.pdf + +Alignment summary +------------------ + +When HISAT2 finishes running, it prints messages summarizing what happened. +These messages are printed to the "standard error" ("stderr") filehandle. For +datasets consisting of unpaired reads, the summary might look like this: + + 20000 reads; of these: + 20000 (100.00%) were unpaired; of these: + 1247 (6.24%) aligned 0 times + 18739 (93.69%) aligned exactly 1 time + 14 (0.07%) aligned >1 times + 93.77% overall alignment rate + +For datasets consisting of pairs, the summary might look like this: + + 10000 reads; of these: + 10000 (100.00%) were paired; of these: + 650 (6.50%) aligned concordantly 0 times + 8823 (88.23%) aligned concordantly exactly 1 time + 527 (5.27%) aligned concordantly >1 times + ---- + 650 pairs aligned concordantly 0 times; of these: + 34 (5.23%) aligned discordantly 1 time + ---- + 616 pairs aligned 0 times concordantly or discordantly; of these: + 1232 mates make up the pairs; of these: + 660 (53.57%) aligned 0 times + 571 (46.35%) aligned exactly 1 time + 1 (0.08%) aligned >1 times + 96.70% overall alignment rate + +The indentation indicates how subtotals relate to totals. + +Wrapper +------- + +The `hisat2`, `hisat2-build` and `hisat2-inspect` executables are actually +wrapper scripts that call binary programs as appropriate. The wrappers shield +users from having to distinguish between "small" and "large" index formats, +discussed briefly in the following section. Also, the `hisat2` wrapper +provides some key functionality, like the ability to handle compressed inputs, +and the functionality for [`--un`], [`--al`] and related options. + +It is recommended that you always run the hisat2 wrappers and not run the +binaries directly. + +Small and large indexes +----------------------- + +`hisat2-build` can index reference genomes of any size. For genomes less than +about 4 billion nucleotides in length, `hisat2-build` builds a "small" index +using 32-bit numbers in various parts of the index. When the genome is longer, +`hisat2-build` builds a "large" index using 64-bit numbers. Small indexes are +stored in files with the `.ht2` extension, and large indexes are stored in +files with the `.ht2l` extension. The user need not worry about whether a +particular index is small or large; the wrapper scripts will automatically build +and use the appropriate index. + +Performance tuning +------------------ + +1. If your computer has multiple processors/cores, use `-p` + + The [`-p`] option causes HISAT2 to launch a specified number of parallel + search threads. Each thread runs on a different processor/core and all + threads find alignments in parallel, increasing alignment throughput by + approximately a multiple of the number of threads (though in practice, + speedup is somewhat worse than linear). + +Command Line +------------ + +### Setting function options + +Some HISAT2 options specify a function rather than an individual number or +setting. In these cases the user specifies three parameters: (a) a function +type `F`, (b) a constant term `B`, and (c) a coefficient `A`. The available +function types are constant (`C`), linear (`L`), square-root (`S`), and natural +log (`G`). The parameters are specified as `F,B,A` - that is, the function type, +the constant term, and the coefficient are separated by commas with no +whitespace. The constant term and coefficient may be negative and/or +floating-point numbers. + +For example, if the function specification is `L,-0.4,-0.6`, then the function +defined is: + + f(x) = -0.4 + -0.6 * x + +If the function specification is `G,1,5.4`, then the function defined is: + + f(x) = 1.0 + 5.4 * ln(x) + +See the documentation for the option in question to learn what the parameter `x` +is for. For example, in the case if the [`--score-min`] option, the function +`f(x)` sets the minimum alignment score necessary for an alignment to be +considered valid, and `x` is the read length. + +### Usage + + hisat2 [options]* -x {-1 -2 | -U | --sra-acc } [-S ] + +### Main arguments + +
+ +[`-x`]: #hisat2-options-x + + -x + + + +The basename of the index for the reference genome. The basename is the name of +any of the index files up to but not including the final `.1.ht2` / etc. +`hisat2` looks for the specified index first in the current directory, +then in the directory specified in the `HISAT2_INDEXES` environment variable. + +
+ +[`-1`]: #hisat2-options-1 + + -1 + + + +Comma-separated list of files containing mate 1s (filename usually includes +`_1`), e.g. `-1 flyA_1.fq,flyB_1.fq`. Sequences specified with this option must +correspond file-for-file and read-for-read with those specified in ``. Reads +may be a mix of different lengths. If `-` is specified, `hisat2` will read the +mate 1s from the "standard in" or "stdin" filehandle. + +
+ +[`-2`]: #hisat2-options-2 + + -2 + + + +Comma-separated list of files containing mate 2s (filename usually includes +`_2`), e.g. `-2 flyA_2.fq,flyB_2.fq`. Sequences specified with this option must +correspond file-for-file and read-for-read with those specified in ``. Reads +may be a mix of different lengths. If `-` is specified, `hisat2` will read the +mate 2s from the "standard in" or "stdin" filehandle. + +
+ +[`-U`]: #hisat2-options-U + + -U + + + +Comma-separated list of files containing unpaired reads to be aligned, e.g. +`lane1.fq,lane2.fq,lane3.fq,lane4.fq`. Reads may be a mix of different lengths. +If `-` is specified, `hisat2` gets the reads from the "standard in" or "stdin" +filehandle. + +
+ +[`--sra-acc`]: #hisat2-options-sra-acc + + --sra-acc + + + +Comma-separated list of SRA accession numbers, e.g. `--sra-acc SRR353653,SRR353654`. +Information about read types is available at http://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?sp=runinfo&acc=sra-acc&retmode=xml, +where sra-acc is SRA accession number. If users run HISAT2 on a computer cluster, it is recommended to disable SRA-related caching (see the instruction at [SRA-MANUAL]). + +[SRA-MANUAL]: https://github.com/ncbi/sra-tools/wiki/Toolkit-Configuration + +
+ +[`-S`]: #hisat2-options-S + + -S + + + +File to write SAM alignments to. By default, alignments are written to the +"standard out" or "stdout" filehandle (i.e. the console). + +
+ +### Options + +#### Input options + + + + + + + + + + + + + +
+ +[`-q`]: #hisat2-options-q + + -q + + + +Reads (specified with ``, ``, ``) are FASTQ files. FASTQ files +usually have extension `.fq` or `.fastq`. FASTQ is the default format. See +also: [`--solexa-quals`] and [`--int-quals`]. + +
+ +[`--qseq`]: #hisat2-options-qseq + + --qseq + + + +Reads (specified with ``, ``, ``) are QSEQ files. QSEQ files usually +end in `_qseq.txt`. See also: [`--solexa-quals`] and [`--int-quals`]. + +
+ +[`-f`]: #hisat2-options-f + + -f + + + +Reads (specified with ``, ``, ``) are FASTA files. FASTA files +usually have extension `.fa`, `.fasta`, `.mfa`, `.fna` or similar. FASTA files +do not have a way of specifying quality values, so when `-f` is set, the result +is as if `--ignore-quals` is also set. + +
+ +[`-r`]: #hisat2-options-r + + -r + + + +Reads (specified with ``, ``, ``) are files with one input sequence +per line, without any other information (no read names, no qualities). When +`-r` is set, the result is as if `--ignore-quals` is also set. + +
+ +[`-c`]: #hisat2-options-c + + -c + + + +The read sequences are given on command line. I.e. ``, `` and +`` are comma-separated lists of reads rather than lists of read files. +There is no way to specify read names or qualities, so `-c` also implies +`--ignore-quals`. + +
+ +[`-s`/`--skip`]: #hisat2-options-s +[`-s`]: #hisat2-options-s + + -s/--skip + + + +Skip (i.e. do not align) the first `` reads or pairs in the input. + +
+ +[`-u`/`--qupto`]: #hisat2-options-u +[`-u`]: #hisat2-options-u + + -u/--qupto + + + +Align the first `` reads or read pairs from the input (after the +[`-s`/`--skip`] reads or pairs have been skipped), then stop. Default: no limit. + +
+ +[`-5`/`--trim5`]: #hisat2-options-5 +[`-5`]: #hisat2-options-5 + + -5/--trim5 + + + +Trim `` bases from 5' (left) end of each read before alignment (default: 0). + +
+ +[`-3`/`--trim3`]: #hisat2-options-3 +[`-3`]: #hisat2-options-3 + + -3/--trim3 + + + +Trim `` bases from 3' (right) end of each read before alignment (default: +0). + +
+ +[`--phred33`]: #hisat2-options-phred33-quals + + --phred33 + + + +Input qualities are ASCII chars equal to the [Phred quality] plus 33. This is +also called the "Phred+33" encoding, which is used by the very latest Illumina +pipelines. + +[Phred quality]: http://en.wikipedia.org/wiki/Phred_quality_score + +
+ +[`--phred64`]: #hisat2-options-phred64-quals + + --phred64 + + + +Input qualities are ASCII chars equal to the [Phred quality] plus 64. This is +also called the "Phred+64" encoding. + +
+ +[`--solexa-quals`]: #hisat2-options-solexa-quals + + --solexa-quals + + + +Convert input qualities from [Solexa][Phred quality] (which can be negative) to +[Phred][Phred quality] (which can't). This scheme was used in older Illumina GA +Pipeline versions (prior to 1.3). Default: off. + +
+ +[`--int-quals`]: #hisat2-options-int-quals + + --int-quals + + + +Quality values are represented in the read input file as space-separated ASCII +integers, e.g., `40 40 30 40`..., rather than ASCII characters, e.g., `II?I`.... + Integers are treated as being on the [Phred quality] scale unless +[`--solexa-quals`] is also specified. Default: off. + +
+ +#### Alignment options + + + + + + + + +
+ +[`--n-ceil`]: #hisat2-options-n-ceil + + --n-ceil + + + +Sets a function governing the maximum number of ambiguous characters (usually +`N`s and/or `.`s) allowed in a read as a function of read length. For instance, +specifying `-L,0,0.15` sets the N-ceiling function `f` to `f(x) = 0 + 0.15 * x`, +where x is the read length. See also: [setting function options]. Reads +exceeding this ceiling are [filtered out]. Default: `L,0,0.15`. + +[filtered out]: #filtering + +
+ +[`--ignore-quals`]: #hisat2-options-ignore-quals + + --ignore-quals + + + +When calculating a mismatch penalty, always consider the quality value at the +mismatched position to be the highest possible, regardless of the actual value. +I.e. input is treated as though all quality values are high. This is also the +default behavior when the input doesn't specify quality values (e.g. in [`-f`], +[`-r`], or [`-c`] modes). + +
+ +[`--nofw`]: #hisat2-options-nofw + + --nofw/--norc + + + +If `--nofw` is specified, `hisat2` will not attempt to align unpaired reads to +the forward (Watson) reference strand. If `--norc` is specified, `hisat2` will +not attempt to align unpaired reads against the reverse-complement (Crick) +reference strand. In paired-end mode, `--nofw` and `--norc` pertain to the +fragments; i.e. specifying `--nofw` causes `hisat2` to explore only those +paired-end configurations corresponding to fragments from the reverse-complement +(Crick) strand. Default: both strands enabled. + +
+ +#### Scoring options + + + + + + + + + + + +
+ +[`--mp`]: #hisat2-options-mp + + --mp MX,MN + + + +Sets the maximum (`MX`) and minimum (`MN`) mismatch penalties, both integers. A +number less than or equal to `MX` and greater than or equal to `MN` is +subtracted from the alignment score for each position where a read character +aligns to a reference character, the characters do not match, and neither is an +`N`. If [`--ignore-quals`] is specified, the number subtracted quals `MX`. +Otherwise, the number subtracted is `MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) )` +where Q is the Phred quality value. Default: `MX` = 6, `MN` = 2. + +
+ +[`--sp`]: #hisat2-options-sp + + --sp MX,MN + + + +Sets the maximum (`MX`) and minimum (`MN`) penalties for soft-clipping per base, +both integers. A number less than or equal to `MX` and greater than or equal to `MN` is +subtracted from the alignment score for each position. +The number subtracted is `MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) )` +where Q is the Phred quality value. Default: `MX` = 2, `MN` = 1. + +
+ +[`--sp`]: #hisat2-options-no-softclip + + --no-softclip + + + +Disallow soft-clipping. + +
+ +[`--np`]: #hisat2-options-np + + --np + + + +Sets penalty for positions where the read, reference, or both, contain an +ambiguous character such as `N`. Default: 1. + +
+ +[`--rdg`]: #hisat2-options-rdg + + --rdg , + + + +Sets the read gap open (``) and extend (``) penalties. A read gap of +length N gets a penalty of `` + N * ``. Default: 5, 3. + +
+ +[`--rfg`]: #hisat2-options-rfg + + --rfg , + + + +Sets the reference gap open (``) and extend (``) penalties. A +reference gap of length N gets a penalty of `` + N * ``. Default: +5, 3. + +
+ +[`--score-min`]: #hisat2-options-score-min + + --score-min + + + +Sets a function governing the minimum alignment score needed for an alignment to +be considered "valid" (i.e. good enough to report). This is a function of read +length. For instance, specifying `L,0,-0.6` sets the minimum-score function `f` +to `f(x) = 0 + -0.6 * x`, where `x` is the read length. See also: [setting +function options]. The default is `L,0,-0.2`. + +
+ +#### Spliced alignment options + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +[`--pen-cansplice`]: #hisat2-options-pen-cansplice + + --pen-cansplice + + + +Sets the penalty for each pair of canonical splice sites (e.g. GT/AG). Default: 0. + +
+ +[`--pen-noncansplice`]: #hisat2-options-pen-noncansplice + + --pen-noncansplice + + + +Sets the penalty for each pair of non-canonical splice sites (e.g. non-GT/AG). Default: 12. + +
+ +[`--pen-canintronlen`]: #hisat2-options-pen-canintronlen + + --pen-canintronlen + + + +Sets the penalty for long introns with canonical splice sites so that alignments with shorter introns are preferred +to those with longer ones. Default: G,-8,1 + +
+ +[`--pen-noncanintronlen`]: #hisat2-options-pen-noncanintronlen + + --pen-noncanintronlen + + + +Sets the penalty for long introns with noncanonical splice sites so that alignments with shorter introns are preferred +to those with longer ones. Default: G,-8,1 + + +
+ +[`--min-intronlen`]: #hisat2-options-min-intronlen + + --min-intronlen + + + +Sets minimum intron length. Default: 20 + +
+ +[`--max-intronlen`]: #hisat2-options-max-intronlen + + --max-intronlen + + + +Sets maximum intron length. Default: 500000 + +
+ +[`--splice-infile`]: #hisat2-options-known-splicesite-infile + + --known-splicesite-infile + + + +With this mode, you can provide a list of known splice sites, which HISAT2 makes use of to align reads with small anchors. +You can create such a list using `python hisat2_extract_splice_sites.py genes.gtf > splicesites.txt`, +where `hisat2_extract_splice_sites.py` is included in the HISAT2 package, `genes.gtf` is a gene annotation file, +and `splicesites.txt` is a list of splice sites with which you provide HISAT2 in this mode. +Note that it is better to use indexes built using annotated transcripts (such as genome_tran or genome_snp_tran), which works better +than using this option. It has no effect to provide splice sites that are already included in the indexes. + +
+ +[`--novel-splicesite-outfile`]: #hisat2-options-novel-splicesite-outfile + + --novel-splicesite-outfile + + + +In this mode, HISAT2 reports a list of splice sites in the file : + chromosome name `` genomic position of the flanking base on the left side of an intron `` genomic position of the flanking base on the right `` strand (+, -, and .) + '.' indicates an unknown strand for non-canonical splice sites. + +
+ +[`--novel-splicesite-infile`]: #hisat2-options-novel-splicesite-infile + + --novel-splicesite-infile + + + +With this mode, you can provide a list of novel splice sites that were generated from the above option "--novel-splicesite-outfile". + +
+ +[`--no-temp-splicesite`]: #hisat2-options-no-temp-splicesite + + --no-temp-splicesite + + + +HISAT2, by default, makes use of splice sites found by earlier reads to align later reads in the same run, +in particular, reads with small anchors (<= 15 bp). +The option disables this default alignment strategy. + +
+ +[`--no-spliced-alignment`]: #hisat2-options-no-spliced-alignment + + --no-spliced-alignment + + + +Disable spliced alignment. + +
+[`--rna-strandness`]: #hisat2-options-rna-strandness + + --rna-strandness + + + +Specify strand-specific information: the default is unstranded. +For single-end reads, use F or R. + 'F' means a read corresponds to a transcript. + 'R' means a read corresponds to the reverse complemented counterpart of a transcript. +For paired-end reads, use either FR or RF. +With this option being used, every read alignment will have an XS attribute tag: + '+' means a read belongs to a transcript on '+' strand of genome. + '-' means a read belongs to a transcript on '-' strand of genome. + +(TopHat has a similar option, --library-type option, where fr-firststrand corresponds to R and RF; fr-secondstrand corresponds to F and FR.) +
+[`--tmo/--transcriptome-mapping-only`]: #hisat2-options-tmo + + --tmo/--transcriptome-mapping-only + + + +Report only those alignments within known transcripts. + +
+[`--dta/--downstream-transcriptome-assembly`]: #hisat2-options-dta + + --dta/--downstream-transcriptome-assembly + + + +Report alignments tailored for transcript assemblers including StringTie. +With this option, HISAT2 requires longer anchor lengths for de novo discovery of splice sites. +This leads to fewer alignments with short-anchors, +which helps transcript assemblers improve significantly in computation and memory usage. + +
+ +Report alignments tailored specifically for Cufflinks. In addition to what HISAT2 does with the above option (--dta), +With this option, HISAT2 looks for novel splice sites with three signals (GT/AG, GC/AG, AT/AC), but all user-provided splice sites are used irrespective of their signals. +HISAT2 produces an optional field, XS:A:[+-], for every spliced alignment. + +
+[`--avoid-pseudogene`]: #hisat2-options-avoid-pseudogene + + --avoid-pseudogene + + + +Try to avoid aligning reads to pseudogenes. Note this option is experimental and needs further investigation. + +
+[`--no-templatelen-adjustment`]: #hisat2-options-no-templatelen-adjustment + + --no-templatelen-adjustment + + + +Disables template length adjustment for RNA-seq reads. + +
+ +#### Reporting options + + + + + + + + +
+ +[`-k`]: #hisat2-options-k + + -k + + + +It searches for at most `` distinct, primary alignments for each read. +Primary alignments mean alignments whose alignment score is equal or higher than any other alignments. +The search terminates when it can't find more distinct valid alignments, or when it +finds ``, whichever happens first. The alignment score for a paired-end +alignment equals the sum of the alignment scores of the individual mates. Each +reported read or pair alignment beyond the first has the SAM 'secondary' bit +(which equals 256) set in its FLAGS field. For reads that have more than +`` distinct, valid alignments, `hisat2` does not guarantee that the +`` alignments reported are the best possible in terms of alignment score. Default: 5 (HFM) or 10 (HGFM) + +Note: HISAT2 is not designed with large values for `-k` in mind, and when +aligning reads to long, repetitive genomes large `-k` can be very, very slow. + +
+ +[`--max-seeds`]: #hisat2-options-max-seeds + + --max-seeds + + + +HISAT2, like other aligners, uses seed-and-extend approaches. HISAT2 tries to extend seeds to full-length alignments. In HISAT2, --max-seeds is used to control the maximum number of seeds that will be extended. HISAT2 extends up to these many seeds and skips the rest of the seeds. Large values for `--max-seeds` may improve alignment sensitivity, but HISAT2 is not designed with large values for `--max-seeds` in mind, and when aligning reads to long, repetitive genomes large `--max-seeds` can be very, very slow. The default value is the maximum of 5 and the value that comes with`-k`. + +
+ +[`--secondary`]: #hisat2-options-secondary + + --secondary + + + +Report secondary alignments. + +
+ +#### Paired-end options + + + + + + + +
+ +[`-I`/`--minins`]: #hisat2-options-I +[`-I`]: #hisat2-options-I + + -I/--minins + + + +The minimum fragment length for valid paired-end alignments.This option is valid only with --no-spliced-alignment. +E.g. if `-I 60` is specified and a paired-end alignment consists of two 20-bp alignments in the +appropriate orientation with a 20-bp gap between them, that alignment is +considered valid (as long as [`-X`] is also satisfied). A 19-bp gap would not +be valid in that case. If trimming options [`-3`] or [`-5`] are also used, the +[`-I`] constraint is applied with respect to the untrimmed mates. + +The larger the difference between [`-I`] and [`-X`], the slower HISAT2 will +run. This is because larger differences between [`-I`] and [`-X`] require that +HISAT2 scan a larger window to determine if a concordant alignment exists. +For typical fragment length ranges (200 to 400 nucleotides), HISAT2 is very +efficient. + +Default: 0 (essentially imposing no minimum) + +
+ +[`-X`/`--maxins`]: #hisat2-options-X +[`-X`]: #hisat2-options-X + + -X/--maxins + + + +The maximum fragment length for valid paired-end alignments. This option is valid only with --no-spliced-alignment. +E.g. if `-X 100` is specified and a paired-end alignment consists of two 20-bp alignments in the +proper orientation with a 60-bp gap between them, that alignment is considered +valid (as long as [`-I`] is also satisfied). A 61-bp gap would not be valid in +that case. If trimming options [`-3`] or [`-5`] are also used, the `-X` +constraint is applied with respect to the untrimmed mates, not the trimmed +mates. + +The larger the difference between [`-I`] and [`-X`], the slower HISAT2 will +run. This is because larger differences between [`-I`] and [`-X`] require that +HISAT2 scan a larger window to determine if a concordant alignment exists. +For typical fragment length ranges (200 to 400 nucleotides), HISAT2 is very +efficient. + +Default: 500. + +
+ +[`--fr`/`--rf`/`--ff`]: #hisat2-options-fr +[`--fr`]: #hisat2-options-fr +[`--rf`]: #hisat2-options-fr +[`--ff`]: #hisat2-options-fr + + --fr/--rf/--ff + + + +The upstream/downstream mate orientations for a valid paired-end alignment +against the forward reference strand. E.g., if `--fr` is specified and there is +a candidate paired-end alignment where mate 1 appears upstream of the reverse +complement of mate 2 and the fragment length constraints ([`-I`] and [`-X`]) are +met, that alignment is valid. Also, if mate 2 appears upstream of the reverse +complement of mate 1 and all other constraints are met, that too is valid. +`--rf` likewise requires that an upstream mate1 be reverse-complemented and a +downstream mate2 be forward-oriented. ` --ff` requires both an upstream mate 1 +and a downstream mate 2 to be forward-oriented. Default: `--fr` (appropriate +for Illumina's Paired-end Sequencing Assay). + +
+ +[`--no-mixed`]: #hisat2-options-no-mixed + + --no-mixed + + + +By default, when `hisat2` cannot find a concordant or discordant alignment for +a pair, it then tries to find alignments for the individual mates. This option +disables that behavior. + +
+ +[`--no-discordant`]: #hisat2-options-no-discordant + + --no-discordant + + + +By default, `hisat2` looks for discordant alignments if it cannot find any +concordant alignments. A discordant alignment is an alignment where both mates +align uniquely, but that does not satisfy the paired-end constraints +([`--fr`/`--rf`/`--ff`], [`-I`], [`-X`]). This option disables that behavior. + +
+ +#### Output options + + + + + + + + + + + + + + + + + +
+ +[`-t`/`--time`]: #hisat2-options-t +[`-t`]: #hisat2-options-t + + -t/--time + + + +Print the wall-clock time required to load the index files and align the reads. +This is printed to the "standard error" ("stderr") filehandle. Default: off. + +
+ +[`--un`]: #hisat2-options-un +[`--un-gz`]: #hisat2-options-un +[`--un-bz2`]: #hisat2-options-un + + --un + --un-gz + --un-bz2 + + + +Write unpaired reads that fail to align to file at ``. These reads +correspond to the SAM records with the FLAGS `0x4` bit set and neither the +`0x40` nor `0x80` bits set. If `--un-gz` is specified, output will be gzip +compressed. If `--un-bz2` is specified, output will be bzip2 compressed. Reads +written in this way will appear exactly as they did in the input file, without +any modification (same sequence, same name, same quality string, same quality +encoding). Reads will not necessarily appear in the same order as they did in +the input. + +
+ +[`--al`]: #hisat2-options-al +[`--al-gz`]: #hisat2-options-al +[`--al-bz2`]: #hisat2-options-al + + --al + --al-gz + --al-bz2 + + + +Write unpaired reads that align at least once to file at ``. These reads +correspond to the SAM records with the FLAGS `0x4`, `0x40`, and `0x80` bits +unset. If `--al-gz` is specified, output will be gzip compressed. If `--al-bz2` +is specified, output will be bzip2 compressed. Reads written in this way will +appear exactly as they did in the input file, without any modification (same +sequence, same name, same quality string, same quality encoding). Reads will +not necessarily appear in the same order as they did in the input. + +
+ +[`--un-conc`]: #hisat2-options-un-conc +[`--un-conc-gz`]: #hisat2-options-un-conc +[`--un-conc-bz2`]: #hisat2-options-un-conc + + --un-conc + --un-conc-gz + --un-conc-bz2 + + + +Write paired-end reads that fail to align concordantly to file(s) at ``. +These reads correspond to the SAM records with the FLAGS `0x4` bit set and +either the `0x40` or `0x80` bit set (depending on whether it's mate #1 or #2). +`.1` and `.2` strings are added to the filename to distinguish which file +contains mate #1 and mate #2. If a percent symbol, `%`, is used in ``, +the percent symbol is replaced with `1` or `2` to make the per-mate filenames. +Otherwise, `.1` or `.2` are added before the final dot in `` to make the +per-mate filenames. Reads written in this way will appear exactly as they did +in the input files, without any modification (same sequence, same name, same +quality string, same quality encoding). Reads will not necessarily appear in +the same order as they did in the inputs. + +
+ +[`--al-conc`]: #hisat2-options-al-conc +[`--al-conc-gz`]: #hisat2-options-al-conc +[`--al-conc-bz2`]: #hisat2-options-al-conc + + --al-conc + --al-conc-gz + --al-conc-bz2 + + + +Write paired-end reads that align concordantly at least once to file(s) at +``. These reads correspond to the SAM records with the FLAGS `0x4` bit +unset and either the `0x40` or `0x80` bit set (depending on whether it's mate #1 +or #2). `.1` and `.2` strings are added to the filename to distinguish which +file contains mate #1 and mate #2. If a percent symbol, `%`, is used in +``, the percent symbol is replaced with `1` or `2` to make the per-mate +filenames. Otherwise, `.1` or `.2` are added before the final dot in `` to +make the per-mate filenames. Reads written in this way will appear exactly as +they did in the input files, without any modification (same sequence, same name, +same quality string, same quality encoding). Reads will not necessarily appear +in the same order as they did in the inputs. + +
+ +[`--quiet`]: #hisat2-options-quiet + + --quiet + + + +Print nothing besides alignments and serious errors. + +
+ +[`--summary-file`]: #hisat2-options-summary-file + + --summary-file + + + +Print alignment summary to this file. + +
+ +[`--new-summary`]: #hisat2-options-new-summary + + --new-summary + + + +Print alignment summary in a new style, which is more machine-friendly. + +
+ +[`--met-file`]: #hisat2-options-met-file + + --met-file + + + +Write `hisat2` metrics to file ``. Having alignment metric can be useful +for debugging certain problems, especially performance issues. See also: +[`--met`]. Default: metrics disabled. + +
+ +[`--met-stderr`]: #hisat2-options-met-stderr + + --met-stderr + + + +Write `hisat2` metrics to the "standard error" ("stderr") filehandle. This is +not mutually exclusive with [`--met-file`]. Having alignment metric can be +useful for debugging certain problems, especially performance issues. See also: +[`--met`]. Default: metrics disabled. + +
+ +[`--met`]: #hisat2-options-met + + --met + + + +Write a new `hisat2` metrics record every `` seconds. Only matters if +either [`--met-stderr`] or [`--met-file`] are specified. Default: 1. + +
+ +#### SAM options + + + + + + + + + + + + + +
+ +[`--no-unal`]: #hisat2-options-no-unal + + --no-unal + + + +Suppress SAM records for reads that failed to align. + +
+ +[`--no-hd`]: #hisat2-options-no-hd + + --no-hd + + + +Suppress SAM header lines (starting with `@`). + +
+ +[`--no-sq`]: #hisat2-options-no-sq + + --no-sq + + + +Suppress `@SQ` SAM header lines. + +
+ +[`--rg-id`]: #hisat2-options-rg-id + + --rg-id + + + +Set the read group ID to ``. This causes the SAM `@RG` header line to be +printed, with `` as the value associated with the `ID:` tag. It also +causes the `RG:Z:` extra field to be attached to each SAM output record, with +value set to ``. + +
+ +[`--rg`]: #hisat2-options-rg + + --rg + + + +Add `` (usually of the form `TAG:VAL`, e.g. `SM:Pool1`) as a field on the +`@RG` header line. Note: in order for the `@RG` line to appear, [`--rg-id`] +must also be specified. This is because the `ID` tag is required by the [SAM +Spec][SAM]. Specify `--rg` multiple times to set multiple fields. See the +[SAM Spec][SAM] for details about what fields are legal. + + +
+ +[`--remove-chrname`]: #hisat2-remove-chrname + + --remove-chrname + + + +Remove 'chr' from reference names in alignment (e.g., chr18 to 18) + +
+ +[`--add-chrname`]: #hisat2-options-add-chrname + + --add-chrname + + + +Add 'chr' to reference names in alignment (e.g., 18 to chr18) + +
+ +[`--omit-sec-seq`]: #hisat2-options-omit-sec-seq + + --omit-sec-seq + + + +When printing secondary alignments, HISAT2 by default will write out the `SEQ` +and `QUAL` strings. Specifying this option causes HISAT2 to print an asterisk +in those fields instead. + +
+ +#### Performance options + + + + + + +
+ +[`-o`/`--offrate`]: #hisat2-options-o +[`-o`]: #hisat2-options-o +[`--offrate`]: #hisat2-options-o + + -o/--offrate + + + +Override the offrate of the index with ``. If `` is greater +than the offrate used to build the index, then some row markings are +discarded when the index is read into memory. This reduces the memory +footprint of the aligner but requires more time to calculate text +offsets. `` must be greater than the value used to build the +index. + +
+ +[`-p`/`--threads`]: #hisat2-options-p +[`-p`]: #hisat2-options-p + + -p/--threads NTHREADS + + + +Launch `NTHREADS` parallel search threads (default: 1). Threads will run on +separate processors/cores and synchronize when parsing reads and outputting +alignments. Searching for alignments is highly parallel, and speedup is close +to linear. Increasing `-p` increases HISAT2's memory footprint. E.g. when +aligning to a human genome index, increasing `-p` from 1 to 8 increases the +memory footprint by a few hundred megabytes. This option is only available if +`bowtie` is linked with the `pthreads` library (i.e. if `BOWTIE_PTHREADS=0` is +not specified at build time). + +
+ +[`--reorder`]: #hisat2-options-reorder + + --reorder + + + +Guarantees that output SAM records are printed in an order corresponding to the +order of the reads in the original input file, even when [`-p`] is set greater +than 1. Specifying `--reorder` and setting [`-p`] greater than 1 causes HISAT2 +to run somewhat slower and use somewhat more memory then if `--reorder` were +not specified. Has no effect if [`-p`] is set to 1, since output order will +naturally correspond to input order in that case. + +
+ +[`--mm`]: #hisat2-options-mm + + --mm + + + +Use memory-mapped I/O to load the index, rather than typical file I/O. +Memory-mapping allows many concurrent `bowtie` processes on the same computer to +share the same memory image of the index (i.e. you pay the memory overhead just +once). This facilitates memory-efficient parallelization of `bowtie` in +situations where using [`-p`] is not possible or not preferable. + +
+ +#### Other options + + + + + + +
+ +[`--qc-filter`]: #hisat2-options-qc-filter + + --qc-filter + + + +Filter out reads for which the QSEQ filter field is non-zero. Only has an +effect when read format is [`--qseq`]. Default: off. + +
+ +[`--seed`]: #hisat2-options-seed + + --seed + + + +Use `` as the seed for pseudo-random number generator. Default: 0. + +
+ +[`--non-deterministic`]: #hisat2-options-non-deterministic + + --non-deterministic + + + +Normally, HISAT2 re-initializes its pseudo-random generator for each read. It +seeds the generator with a number derived from (a) the read name, (b) the +nucleotide sequence, (c) the quality sequence, (d) the value of the [`--seed`] +option. This means that if two reads are identical (same name, same +nucleotides, same qualities) HISAT2 will find and report the same alignment(s) +for both, even if there was ambiguity. When `--non-deterministic` is specified, +HISAT2 re-initializes its pseudo-random generator for each read using the +current time. This means that HISAT2 will not necessarily report the same +alignment for two identical reads. This is counter-intuitive for some users, +but might be more appropriate in situations where the input consists of many +identical reads. + +
+ +[`--version`]: #hisat2-options-version + + --version + + + +Print version information and quit. + +
+ + -h/--help + + + +Print usage information and quit. + +
+ +SAM output +---------- + +Following is a brief description of the [SAM] format as output by `hisat2`. +For more details, see the [SAM format specification][SAM]. + +By default, `hisat2` prints a SAM header with `@HD`, `@SQ` and `@PG` lines. +When one or more [`--rg`] arguments are specified, `hisat2` will also print +an `@RG` line that includes all user-specified [`--rg`] tokens separated by +tabs. + +Each subsequent line describes an alignment or, if the read failed to align, a +read. Each line is a collection of at least 12 fields separated by tabs; from +left to right, the fields are: + +1. Name of read that aligned. + + Note that the [SAM specification] disallows whitespace in the read name. + If the read name contains any whitespace characters, HISAT2 will truncate + the name at the first whitespace character. This is similar to the + behavior of other tools. + +2. Sum of all applicable flags. Flags relevant to HISAT2 are: + +
+ + 1 + + + + The read is one of a pair + +
+ + 2 + + + + The alignment is one end of a proper paired-end alignment + +
+ + 4 + + + + The read has no reported alignments + +
+ + 8 + + + + The read is one of a pair and has no reported alignments + +
+ + 16 + + + + The alignment is to the reverse reference strand + +
+ + 32 + + + + The other mate in the paired-end alignment is aligned to the + reverse reference strand + +
+ + 64 + + + + The read is mate 1 in a pair + +
+ + 128 + + + + The read is mate 2 in a pair + +
+ + Thus, an unpaired read that aligns to the reverse reference strand + will have flag 16. A paired-end read that aligns and is the first + mate in the pair will have flag 83 (= 64 + 16 + 2 + 1). + +3. Name of reference sequence where alignment occurs + +4. 1-based offset into the forward reference strand where leftmost + character of the alignment occurs + +5. Mapping quality. Mapping quality of HISAT2 + +6. CIGAR string representation of alignment + +7. Name of reference sequence where mate's alignment occurs. Set to `=` if the +mate's reference sequence is the same as this alignment's, or `*` if there is no +mate. + +8. 1-based offset into the forward reference strand where leftmost character of +the mate's alignment occurs. Offset is 0 if there is no mate. + +9. Inferred fragment length. Size is negative if the mate's alignment occurs +upstream of this alignment. Size is 0 if the mates did not align concordantly. +However, size is non-0 if the mates aligned discordantly to the same +chromosome. + +10. Read sequence (reverse-complemented if aligned to the reverse strand) + +11. ASCII-encoded read qualities (reverse-complemented if the read aligned to +the reverse strand). The encoded quality values are on the [Phred quality] +scale and the encoding is ASCII-offset by 33 (ASCII char `!`), similarly to a +[FASTQ] file. + +12. Optional fields. Fields are tab-separated. `hisat2` outputs zero or more +of these optional fields for each alignment, depending on the type of the +alignment: + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + AS:i: + + + + Alignment score. Can be negative. Only present if SAM record is for + an aligned read. + +
+ + ZS:i: + + + Alignment score for the best-scoring alignment found other than the + alignment reported. Can be negative. Only present if the SAM record is + for an aligned read and more than one alignment was found for the read. + Note that, when the read is part of a concordantly-aligned pair, this score + could be greater than [`AS:i`]. + +
+ + YS:i: + + + + Alignment score for opposite mate in the paired-end alignment. Only present + if the SAM record is for a read that aligned as part of a paired-end + alignment. + +
+ + XN:i: + + + + The number of ambiguous bases in the reference covering this alignment. + Only present if SAM record is for an aligned read. + +
+ + XM:i: + + + + The number of mismatches in the alignment. Only present if SAM record is + for an aligned read. + +
+ + XO:i: + + + + The number of gap opens, for both read and reference gaps, in the alignment. + Only present if SAM record is for an aligned read. + +
+ + XG:i: + + + + The number of gap extensions, for both read and reference gaps, in the + alignment. Only present if SAM record is for an aligned read. + +
+ + NM:i: + + + + The edit distance; that is, the minimal number of one-nucleotide edits + (substitutions, insertions and deletions) needed to transform the read + string into the reference string. Only present if SAM record is for an + aligned read. + +
+ + YF:Z: + + + + String indicating reason why the read was filtered out. See also: + [Filtering]. Only appears for reads that were filtered out. + +
+ + YT:Z: + + + + Value of `UU` indicates the read was not part of a pair. Value of `CP` + indicates the read was part of a pair and the pair aligned concordantly. + Value of `DP` indicates the read was part of a pair and the pair aligned + discordantly. Value of `UP` indicates the read was part of a pair but the + pair failed to aligned either concordantly or discordantly. + +
+ + MD:Z: + + + + A string representation of the mismatched reference bases in the alignment. + See [SAM] format specification for details. Only present if SAM record is + for an aligned read. + +
+ + XS:A: + + + + Values of `+` and `-` indicate the read is mapped to transcripts on sense and anti-sense + strands, respectively. Spliced alignments need to have this field, which is required in Cufflinks and StringTie. + We can report this field for the canonical-splice site (GT/AG), but not for non-canonical splice sites. + You can direct HISAT2 not to output such alignments (involving non-canonical splice sites) using "--pen-noncansplice 1000000". + +
+ + NH:i: + + + + The number of mapped locations for the read or the pair. + +
+ + Zs:Z: + + + + When the alignment of a read involves SNPs that are in the index, this option is used to indicate where exactly the read involves the SNPs. + This optional field is similar to the above MD:Z field. + For example, `Zs:Z:1|S|rs3747203,97|S|rs16990981` indicates the second base of the read corresponds to a known SNP (ID: rs3747203). + 97 bases after the third base (the base after the second one), the read at 100th base involves another known SNP (ID: rs16990981). + 'S' indicates a single nucleotide polymorphism. 'D' and 'I' indicate a deletion and an insertion, respectively. +
+ +[SAM format specification]: http://samtools.sf.net/SAM1.pdf +[FASTQ]: http://en.wikipedia.org/wiki/FASTQ_format +[`-S`/`--sam`]: #hisat2-options-S +[`-m`]: #hisat2-options-m + +The `hisat2-build` indexer +=========================== + +`hisat2-build` builds a HISAT2 index from a set of DNA sequences. +`hisat2-build` outputs a set of 6 files with suffixes `.1.ht2`, `.2.ht2`, +`.3.ht2`, `.4.ht2`, `.5.ht2`, `.6.ht2`, `.7.ht2`, and `.8.ht2`. In the case of a large +index these suffixes will have a `ht2l` termination. These files together +constitute the index: they are all that is needed to align reads to that +reference. The original sequence FASTA files are no longer used by HISAT2 +once the index is built. + +Use of Karkkainen's [blockwise algorithm] allows `hisat2-build` to trade off +between running time and memory usage. `hisat2-build` has three options +governing how it makes this trade: [`-p`/`--packed`], [`--bmax`]/[`--bmaxdivn`], +and [`--dcv`]. By default, `hisat2-build` will automatically search for the +settings that yield the best running time without exhausting memory. This +behavior can be disabled using the [`-a`/`--noauto`] option. + +The indexer provides options pertaining to the "shape" of the index, e.g. +[`--offrate`](#hisat2-build-options-o) governs the fraction of [Burrows-Wheeler] +rows that are "marked" (i.e., the density of the suffix-array sample; see the +original [FM Index] paper for details). All of these options are potentially +profitable trade-offs depending on the application. They have been set to +defaults that are reasonable for most cases according to our experiments. See +[Performance tuning] for details. + +`hisat2-build` can generate either [small or large indexes](#small-and-large-indexes). The wrapper +will decide which based on the length of the input genome. If the reference +does not exceed 4 billion characters but a large index is preferred, the user +can specify [`--large-index`] to force `hisat2-build` to build a large index +instead. + +The HISAT2 index is based on the [FM Index] of Ferragina and Manzini, which in +turn is based on the [Burrows-Wheeler] transform. The algorithm used to build +the index is based on the [blockwise algorithm] of Karkkainen. + +[Blockwise algorithm]: http://portal.acm.org/citation.cfm?id=1314852 +[Burrows-Wheeler]: http://en.wikipedia.org/wiki/Burrows-Wheeler_transform +[Performance tuning]: #performance-tuning + +Command Line +------------ + +Usage: + + hisat2-build [options]* + +### Notes + If you use --snp, --ss, and/or --exon, hisat2-build will need about 200GB RAM for the human genome size as index building involves a graph construction. + Otherwise, you will be able to build an index on your desktop with 8GB RAM. + +### Main arguments + +
+ + + + + +A comma-separated list of FASTA files containing the reference sequences to be +aligned to, or, if [`-c`](#hisat2-build-options-c) is specified, the sequences +themselves. E.g., `` might be `chr1.fa,chr2.fa,chrX.fa,chrY.fa`, +or, if [`-c`](#hisat2-build-options-c) is specified, this might be +`GGTCATCCT,ACGGGTCGT,CCGTTCTATGCGGCTTA`. + +
+ + + + + +The basename of the index files to write. By default, `hisat2-build` writes +files named `NAME.1.ht2`, `NAME.2.ht2`, `NAME.3.ht2`, `NAME.4.ht2`, +`NAME.5.ht2`, `NAME.6.ht2`, `NAME.7.ht2`, and `NAME.8.ht2` where `NAME` is ``. + +
+ +### Options + + + +
+ + -f + + + +The reference input files (specified as ``) are FASTA files +(usually having extension `.fa`, `.mfa`, `.fna` or similar). + +
+ + -c + + + +The reference sequences are given on the command line. I.e. `` is +a comma-separated list of sequences rather than a list of FASTA files. + +
+ +[`--large-index`]: #hisat2-build-options-large-index + + --large-index + + + +Force `hisat2-build` to build a [large index](#small-and-large-indexes), even if the reference is less +than ~ 4 billion nucleotides long. + +
+ +[`-a`/`--noauto`]: #hisat2-build-options-a + + -a/--noauto + + + +Disable the default behavior whereby `hisat2-build` automatically selects +values for the [`--bmax`], [`--dcv`] and [`--packed`] parameters according to +available memory. Instead, user may specify values for those parameters. If +memory is exhausted during indexing, an error message will be printed; it is up +to the user to try new parameters. + +
+ +[`--bmax`]: #hisat2-build-options-bmax + + --bmax + + + +The maximum number of suffixes allowed in a block. Allowing more suffixes per +block makes indexing faster, but increases peak memory usage. Setting this +option overrides any previous setting for [`--bmax`], or [`--bmaxdivn`]. +Default (in terms of the [`--bmaxdivn`] parameter) is [`--bmaxdivn`] 4. This is +configured automatically by default; use [`-a`/`--noauto`] to configure manually. + +
+ +[`--bmaxdivn`]: #hisat2-build-options-bmaxdivn + + --bmaxdivn + + + +The maximum number of suffixes allowed in a block, expressed as a fraction of +the length of the reference. Setting this option overrides any previous setting +for [`--bmax`], or [`--bmaxdivn`]. Default: [`--bmaxdivn`] 4. This is +configured automatically by default; use [`-a`/`--noauto`] to configure manually. + +
+ +[`--dcv`]: #hisat2-build-options-dcv + + --dcv + + + +Use `` as the period for the difference-cover sample. A larger period +yields less memory overhead, but may make suffix sorting slower, especially if +repeats are present. Must be a power of 2 no greater than 4096. Default: 1024. + This is configured automatically by default; use [`-a`/`--noauto`] to configure +manually. + +
+ +[`--nodc`]: #hisat2-build-options-nodc + + --nodc + + + +Disable use of the difference-cover sample. Suffix sorting becomes +quadratic-time in the worst case (where the worst case is an extremely +repetitive reference). Default: off. + +
+ + -r/--noref + + + +Do not build the `NAME.3.ht2` and `NAME.4.ht2` portions of the index, which +contain a bitpacked version of the reference sequences and are used for +paired-end alignment. + +
+ + -3/--justref + + + +Build only the `NAME.3.ht2` and `NAME.4.ht2` portions of the index, which +contain a bitpacked version of the reference sequences and are used for +paired-end alignment. + +
+ + -o/--offrate + + + +To map alignments back to positions on the reference sequences, it's necessary +to annotate ("mark") some or all of the [Burrows-Wheeler] rows with their +corresponding location on the genome. +[`-o`/`--offrate`](#hisat2-build-options-o) governs how many rows get marked: +the indexer will mark every 2^`` rows. Marking more rows makes +reference-position lookups faster, but requires more memory to hold the +annotations at runtime. The default is 4 (every 16th row is marked; for human +genome, annotations occupy about 680 megabytes). + +
+ + -t/--ftabchars + + + +The ftab is the lookup table used to calculate an initial [Burrows-Wheeler] +range with respect to the first `` characters of the query. A larger +`` yields a larger lookup table but faster query times. The ftab has size +4^(``+1) bytes. The default setting is 10 (ftab is 4MB). + + +
+ + --localoffrate + + + +This option governs how many rows get marked in a local index: +the indexer will mark every 2^`` rows. Marking more rows makes +reference-position lookups faster, but requires more memory to hold the +annotations at runtime. The default is 3 (every 8th row is marked, +this occupies about 16KB per local index). + +
+ + --localftabchars + + + +The local ftab is the lookup table in a local index. +The default setting is 6 (ftab is 8KB per local index). + +
+ + -p + + + +Launch `NTHREADS` parallel build threads (default: 1). + +
+ + --snp + + + +Provide a list of SNPs (in the HISAT2's own format) as follows (five columns). + + SNP ID `` snp type (single, deletion, or insertion) `` chromosome name `` zero-offset based genomic position of a SNP `` alternative base (single), the length of SNP (deletion), or insertion sequence (insertion) + + For example, + rs58784443 single 13 18447947 T + +Use `hisat2_extract_snps_haplotypes_UCSC.py` (in the HISAT2 package) to extract SNPs and haplotypes from a dbSNP file (e.g. http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/snp144Common.txt.gz). +or `hisat2_extract_snps_haplotypes_VCF.py` to extract SNPs and haplotypes from a VCF file (e.g. ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/supporting/GRCh38_positions/ALL.chr22.phase3_shapeit2_mvncall_integrated_v3plus_nounphased.rsID.genotypes.GRCh38_dbSNP_no_SVs.vcf.gz). + +
+ + --haplotype + + + +Provide a list of haplotypes (in the HISAT2's own format) as follows (five columns). + + Haplotype ID `` chromosome name `` zero-offset based left coordinate of haplotype `` zero-offset based right coordinate of haplotype `` a comma separated list of SNP ids in the haplotype + + For example, + ht35 13 18446877 18446945 rs12381094,rs12381056,rs192016659,rs538569910 + +See the above option, --snp, about how to extract haplotypes. This option is not required, but haplotype information can keep the index construction from exploding and reduce the index size substantially. + +
+ + --ss + + + +Note this option should be used with the following --exon option. +Provide a list of splice sites (in the HISAT2's own format) as follows (four columns). + + chromosome name `` zero-offset based genomic position of the flanking base on the left side of an intron `` zero-offset based genomic position of the flanking base on the right `` strand + +Use `hisat2_extract_splice_sites.py` (in the HISAT2 package) to extract splice sites from a GTF file. + +
+ + --exon + + + +Note this option should be used with the above --ss option. +Provide a list of exons (in the HISAT2's own format) as follows (three columns). + + chromosome name `` zero-offset based left genomic position of an exon `` zero-offset based right genomic position of an exon + +Use `hisat2_extract_exons.py` (in the HISAT2 package) to extract exons from a GTF file. + +
+ + --seed + + + +Use `` as the seed for pseudo-random number generator. + +
+ + --cutoff + + + +Index only the first `` bases of the reference sequences (cumulative across +sequences) and ignore the rest. + +
+ + -q/--quiet + + + +`hisat2-build` is verbose by default. With this option `hisat2-build` will +print only error messages. + +
+ + -h/--help + + + +Print usage information and quit. + +
+ + --version + + + +Print version information and quit. + +
+ +The `hisat2-inspect` index inspector +===================================== + +`hisat2-inspect` extracts information from a HISAT2 index about what kind of +index it is and what reference sequences were used to build it. When run without +any options, the tool will output a FASTA file containing the sequences of the +original references (with all non-`A`/`C`/`G`/`T` characters converted to `N`s). + It can also be used to extract just the reference sequence names using the +[`-n`/`--names`] option or a more verbose summary using the [`-s`/`--summary`] +option. + +Command Line +------------ + +Usage: + + hisat2-inspect [options]* + +### Main arguments + +
+ + + + + +The basename of the index to be inspected. The basename is name of any of the +index files but with the `.X.ht2` suffix omitted. +`hisat2-inspect` first looks in the current directory for the index files, then +in the directory specified in the `HISAT2_INDEXES` environment variable. + +
+ +### Options + +
+ + -a/--across + + + +When printing FASTA output, output a newline character every `` bases +(default: 60). + +
+ +[`-n`/`--names`]: #hisat2-inspect-options-n + + -n/--names + + + +Print reference sequence names, one per line, and quit. + +
+ +[`-s`/`--summary`]: #hisat2-inspect-options-s + + -s/--summary + + + +Print a summary that includes information about index settings, as well as the +names and lengths of the input sequences. The summary has this format: + + Colorspace <0 or 1> + SA-Sample 1 in + FTab-Chars + Sequence-1 + Sequence-2 + ... + Sequence-N + +Fields are separated by tabs. Colorspace is always set to 0 for HISAT2. + +
+ +[`--snp`]: #hisat2-inspect-options-snp + + --snp + + + +Print SNPs, and quit. + +
+ +[`--ss`]: #hisat2-inspect-options-ss + + --ss + + + +Print splice sites, and quit. + +
+ +[`--ss-all`]: #hisat2-inspect-options-ss-all + + --ss-all + + + +Print splice sites including those not in the global index, and quit. + +
+ +[`--exon`]: #hisat2-inspect-options-exon + + --exon + + + +Print exons, and quit. + +
+ + -v/--verbose + + + +Print verbose output (for debugging). + +
+ + --version + + + +Print version information and quit. + +
+ + -h/--help + + + +Print usage information and quit. + +
+ +Getting started with HISAT2 +=================================================== + +HISAT2 comes with some example files to get you started. The example files +are not scientifically significant; these files will simply let you start running HISAT2 and +downstream tools right away. + +First follow the manual instructions to [obtain HISAT2]. Set the `HISAT2_HOME` +environment variable to point to the new HISAT2 directory containing the +`hisat2`, `hisat2-build` and `hisat2-inspect` binaries. This is important, +as the `HISAT2_HOME` variable is used in the commands below to refer to that +directory. + +[obtain HISAT2]: #obtaining-hisat2 + +Indexing a reference genome +--------------------------- + +To create an index for the genomic region (1 million bps from the human chromosome 22 between 20,000,000 and 20,999,999) +included with HISAT2, create a new temporary directory (it doesn't matter where), change into that directory, and run: + + $HISAT2_HOME/hisat2-build $HISAT2_HOME/example/reference/22_20-21M.fa --snp $HISAT2_HOME/example/reference/22_20-21M.snp 22_20-21M_snp + +The command should print many lines of output then quit. When the command +completes, the current directory will contain ten new files that all start with +`22_20-21M_snp` and end with `.1.ht2`, `.2.ht2`, `.3.ht2`, `.4.ht2`, `.5.ht2`, `.6.ht2`, +`.7.ht2`, and `.8.ht2`. These files constitute the index - you're done! + +You can use `hisat2-build` to create an index for a set of FASTA files obtained +from any source, including sites such as [UCSC], [NCBI], and [Ensembl]. When +indexing multiple FASTA files, specify all the files using commas to separate +file names. For more details on how to create an index with `hisat2-build`, +see the [manual section on index building]. You may also want to bypass this +process by obtaining a pre-built index. + +[UCSC]: http://genome.ucsc.edu/cgi-bin/hgGateway +[NCBI]: http://www.ncbi.nlm.nih.gov/sites/genome +[Ensembl]: http://www.ensembl.org/ +[manual section on index building]: #the-hisat2-build-indexer +[using a pre-built index]: #using-a-pre-built-index + +Aligning example reads +---------------------- + +Stay in the directory created in the previous step, which now contains the +`22_20-21M` index files. Next, run: + + $HISAT2_HOME/hisat2 -f -x $HISAT2_HOME/example/index/22_20-21M_snp -U $HISAT2_HOME/example/reads/reads_1.fa -S eg1.sam + +This runs the HISAT2 aligner, which aligns a set of unpaired reads to the +genome region using the index generated in the previous step. +The alignment results in SAM format are written to the file `eg1.sam`, and a +short alignment summary is written to the console. (Actually, the summary is +written to the "standard error" or "stderr" filehandle, which is typically +printed to the console.) + +To see the first few lines of the SAM output, run: + + head eg1.sam + +You will see something like this: + + @HD VN:1.0 SO:unsorted + @SQ SN:22:20000001-21000000 LN:1000000 + @PG ID:hisat2 PN:hisat2 VN:2.0.0-beta + 1 0 22:20000001-21000000 397984 255 100M * 0 0 GCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU NH:i:1 + 2 16 22:20000001-21000000 398131 255 100M * 0 0 ATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:80A19 YT:Z:UU NH:i:1 Zs:Z:80|S|rs576159895 + 3 16 22:20000001-21000000 398222 255 100M * 0 0 TGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCTCCACTTGGTCAGAGCTGCAGTACTTGGCGATCTCAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:16A83 YT:Z:UU NH:i:1 Zs:Z:16|S|rs2629364 + 4 16 22:20000001-21000000 398247 255 90M200N10M * 0 0 CAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCTCCACTTGGTCAGAGCTGCAGTACTTGGCGATCTCAAACCGCTGCACCAGGAAGTCGATCCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU XS:A:- NH:i:1 + 5 16 22:20000001-21000000 398194 255 100M * 0 0 GGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCTCCACTTGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:17A26A55 YT:Z:UU NH:i:1 Zs:Z:17|S|rs576159895,26|S|rs2629364 + 6 0 22:20000001-21000000 398069 255 100M * 0 0 CAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU NH:i:1 + 7 0 22:20000001-21000000 397896 255 100M * 0 0 GTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:31G68 YT:Z:UU NH:i:1 Zs:Z:31|S|rs562662261 + 8 0 22:20000001-21000000 398150 255 100M * 0 0 AGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:61A26A11 YT:Z:UU NH:i:1 Zs:Z:61|S|rs576159895,26|S|rs2629364 + 9 16 22:20000001-21000000 398329 255 8M200N92M * 0 0 ACCAGGAAGTCGATCCAGATGTAGTGGGGGGTCACTTCGGGGGGACAGGGTTTGGGTTGACTTGCTTCCGAGGCAGCCAGGGGGTCTGCTTCCTTTATCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU XS:A:- NH:i:1 + 10 16 22:20000001-21000000 398184 255 100M * 0 0 CTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:27A26A45 YT:Z:UU NH:i:1 Zs:Z:27|S|rs576159895,26|S|rs2629364 + +The first few lines (beginning with `@`) are SAM header lines, and the rest of +the lines are SAM alignments, one line per read or mate. See the [HISAT2 +manual section on SAM output] and the [SAM specification] for details about how +to interpret the SAM file format. + +[HISAT2 manual section on SAM output]: #sam-output + +Paired-end example +------------------ + +To align paired-end reads included with HISAT2, stay in the same directory and +run: + + $HISAT2_HOME/hisat2 -f -x $HISAT2_HOME/example/index/22_20-21M_snp -1 $HISAT2_HOME/example/reads/reads_1.fa -2 $HISAT2_HOME/example/reads/reads_2.fa -S eg2.sam + +This aligns a set of paired-end reads to the reference genome, with results +written to the file `eg2.sam`. + +Using SAMtools/BCFtools downstream +---------------------------------- + +[SAMtools] is a collection of tools for manipulating and analyzing SAM and BAM +alignment files. [BCFtools] is a collection of tools for calling variants and +manipulating VCF and BCF files, and it is typically distributed with [SAMtools]. +Using these tools together allows you to get from alignments in SAM format to +variant calls in VCF format. This example assumes that `samtools` and +`bcftools` are installed and that the directories containing these binaries are +in your [PATH environment variable]. + +Run the paired-end example: + + $HISAT2_HOME/hisat -f -x $HISAT2_HOME/example/index/22_20-21M_snp -1 $HISAT2_HOME/example/reads/reads_1.fa -2 $HISAT2_HOME/example/reads/reads_2.fa -S eg2.sam + +Use `samtools view` to convert the SAM file into a BAM file. BAM is a the +binary format corresponding to the SAM text format. Run: + + samtools view -bS eg2.sam > eg2.bam + +Use `samtools sort` to convert the BAM file to a sorted BAM file. The following command requires samtools version 1.2 or higher. + + samtools sort eg2.bam -o eg2.sorted.bam + +We now have a sorted BAM file called `eg2.sorted.bam`. Sorted BAM is a useful +format because the alignments are (a) compressed, which is convenient for +long-term storage, and (b) sorted, which is convenient for variant discovery. +To generate variant calls in VCF format, run: + + samtools mpileup -uf $HISAT2_HOME/example/reference/22_20-21M.fa eg2.sorted.bam | bcftools view -bvcg - > eg2.raw.bcf + +Then to view the variants, run: + + bcftools view eg2.raw.bcf + +See the official SAMtools guide to [Calling SNPs/INDELs with SAMtools/BCFtools] +for more details and variations on this process. + +[BCFtools]: http://samtools.sourceforge.net/mpileup.shtml +[Calling SNPs/INDELs with SAMtools/BCFtools]: http://samtools.sourceforge.net/mpileup.shtml diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0abfad9 --- /dev/null +++ b/Makefile @@ -0,0 +1,590 @@ +# +# Copyright 2015, Daehwan Kim +# +# This file is part of HISAT2. +# +# HISAT 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# HISAT 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HISAT. If not, see . +# +# +# Makefile for hisat2-align, hisat2-build, hisat2-inspect +# + +INC = +GCC_PREFIX = $(shell dirname `which gcc`) +GCC_SUFFIX = +CC = $(GCC_PREFIX)/gcc$(GCC_SUFFIX) +CPP = $(GCC_PREFIX)/g++$(GCC_SUFFIX) +CXX = $(CPP) +HEADERS = $(wildcard *.h) +BOWTIE_MM = 1 +BOWTIE_SHARED_MEM = 0 + +# Detect Cygwin or MinGW +WINDOWS = 0 +CYGWIN = 0 +MINGW = 0 +ifneq (,$(findstring CYGWIN,$(shell uname))) + WINDOWS = 1 + CYGWIN = 1 + # POSIX memory-mapped files not currently supported on Windows + BOWTIE_MM = 0 + BOWTIE_SHARED_MEM = 0 +else + ifneq (,$(findstring MINGW,$(shell uname))) + WINDOWS = 1 + MINGW = 1 + # POSIX memory-mapped files not currently supported on Windows + BOWTIE_MM = 0 + BOWTIE_SHARED_MEM = 0 + endif +endif + +MACOS = 0 +ifneq (,$(findstring Darwin,$(shell uname))) + MACOS = 1 +endif + +EXTRA_FLAGS += -DPOPCNT_CAPABILITY -std=c++11 +INC += -I. -I third_party + +MM_DEF = + +ifeq (1,$(BOWTIE_MM)) + MM_DEF = -DBOWTIE_MM +endif + +SHMEM_DEF = + +ifeq (1,$(BOWTIE_SHARED_MEM)) + SHMEM_DEF = -DBOWTIE_SHARED_MEM +endif + +PTHREAD_PKG = +PTHREAD_LIB = + +ifeq (1,$(MINGW)) + PTHREAD_LIB = +else + PTHREAD_LIB = -lpthread +endif + +SEARCH_LIBS = +BUILD_LIBS = +INSPECT_LIBS = + +ifeq (1,$(MINGW)) + BUILD_LIBS = + INSPECT_LIBS = +endif + +USE_SRA = 0 +SRA_DEF = +SRA_LIB = +SERACH_INC = +ifeq (1,$(USE_SRA)) + SRA_DEF = -DUSE_SRA + SRA_LIB = -lncbi-ngs-c++-static -lngs-c++-static -lncbi-vdb-static -ldl + SEARCH_INC += -I$(NCBI_NGS_DIR)/include -I$(NCBI_VDB_DIR)/include + SEARCH_LIBS += -L$(NCBI_NGS_DIR)/lib64 -L$(NCBI_VDB_DIR)/lib64 +endif + +LIBS = $(PTHREAD_LIB) + +HT2LIB_DIR = hisat2lib + +HT2LIB_CPPS = $(HT2LIB_DIR)/ht2_init.cpp \ + $(HT2LIB_DIR)/ht2_repeat.cpp \ + $(HT2LIB_DIR)/ht2_index.cpp + +SHARED_CPPS = ccnt_lut.cpp ref_read.cpp alphabet.cpp shmem.cpp \ + edit.cpp gfm.cpp \ + reference.cpp ds.cpp multikey_qsort.cpp limit.cpp \ + random_source.cpp tinythread.cpp utility_3n.cpp +SEARCH_CPPS = qual.cpp pat.cpp \ + read_qseq.cpp aligner_seed_policy.cpp \ + aligner_seed.cpp \ + aligner_seed2.cpp \ + aligner_sw.cpp \ + aligner_sw_driver.cpp aligner_cache.cpp \ + aligner_result.cpp ref_coord.cpp mask.cpp \ + pe.cpp aln_sink.cpp dp_framer.cpp \ + scoring.cpp presets.cpp unique.cpp \ + simple_func.cpp \ + random_util.cpp \ + aligner_bt.cpp sse_util.cpp \ + aligner_swsse.cpp outq.cpp \ + aligner_swsse_loc_i16.cpp \ + aligner_swsse_ee_i16.cpp \ + aligner_swsse_loc_u8.cpp \ + aligner_swsse_ee_u8.cpp \ + aligner_driver.cpp \ + splice_site.cpp \ + alignment_3n.cpp \ + position_3n.cpp \ + $(HT2LIB_CPPS) + +BUILD_CPPS = diff_sample.cpp + +REPEAT_CPPS = \ + mask.cpp \ + qual.cpp \ + aligner_bt.cpp \ + scoring.cpp \ + simple_func.cpp \ + dp_framer.cpp \ + aligner_result.cpp \ + aligner_sw_driver.cpp \ + aligner_sw.cpp \ + aligner_swsse_ee_i16.cpp \ + aligner_swsse_ee_u8.cpp \ + aligner_swsse_loc_i16.cpp \ + aligner_swsse_loc_u8.cpp \ + aligner_swsse.cpp \ + bit_packed_array.cpp \ + repeat_builder.cpp + +THREE_N_HEADERS = \ + position_3n_table.h \ + alignment_3n_table.h \ + utility_3n_table.h + +HISAT2_CPPS_MAIN = $(SEARCH_CPPS) hisat2_main.cpp +HISAT2_BUILD_CPPS_MAIN = $(BUILD_CPPS) hisat2_build_main.cpp +HISAT2_REPEAT_CPPS_MAIN = $(REPEAT_CPPS) $(BUILD_CPPS) hisat2_repeat_main.cpp + +SEARCH_FRAGMENTS = $(wildcard search_*_phase*.c) +VERSION := $(shell cat HISAT2_VERSION) + +# Convert BITS=?? to a -m flag +BITS=32 +ifeq (x86_64,$(shell uname -m)) +BITS=64 +endif +# msys will always be 32 bit so look at the cpu arch instead. +ifneq (,$(findstring AMD64,$(PROCESSOR_ARCHITEW6432))) + ifeq (1,$(MINGW)) + BITS=64 + endif +endif +BITS_FLAG = + +ifeq (32,$(BITS)) + BITS_FLAG = -m32 +endif + +ifeq (64,$(BITS)) + BITS_FLAG = -m64 +endif +SSE_FLAG=-msse2 + +DEBUG_FLAGS = -O0 -g3 $(BITS_FLAG) $(SSE_FLAG) +DEBUG_DEFS = -DCOMPILER_OPTIONS="\"$(DEBUG_FLAGS) $(EXTRA_FLAGS)\"" +RELEASE_FLAGS = -O3 $(BITS_FLAG) $(SSE_FLAG) -funroll-loops -g3 +RELEASE_DEFS = -DCOMPILER_OPTIONS="\"$(RELEASE_FLAGS) $(EXTRA_FLAGS)\"" +NOASSERT_FLAGS = -DNDEBUG +FILE_FLAGS = -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE +HT2LIB_FLAGS = -DHISAT2_BUILD_LIB +ifeq (1,$(USE_SRA)) + ifeq (1, $(MACOS)) + SRA_LIB += -stdlib=libc++ + DEBUG_FLAGS += -mmacosx-version-min=10.10 + RELEASE_FLAGS += -mmacosx-version-min=10.10 + endif +endif + + +HISAT2_BIN_LIST = hisat2-build-s \ + hisat2-build-l \ + hisat2-align-s \ + hisat2-align-l \ + hisat2-inspect-s \ + hisat2-inspect-l \ + hisat2-repeat \ + hisat-3n-table + +HISAT2_BIN_LIST_AUX = hisat2-build-s-debug \ + hisat2-build-l-debug \ + hisat2-align-s-debug \ + hisat2-align-l-debug \ + hisat2-inspect-s-debug \ + hisat2-inspect-l-debug \ + hisat2-repeat-debug + +HT2LIB_SRCS = $(SHARED_CPPS) \ + $(HT2LIB_CPPS) + +HT2LIB_OBJS = $(HT2LIB_SRCS:.cpp=.o) + +HT2LIB_DEBUG_OBJS = $(addprefix .ht2lib-obj-debug/,$(HT2LIB_OBJS)) +HT2LIB_RELEASE_OBJS = $(addprefix .ht2lib-obj-release/,$(HT2LIB_OBJS)) +HT2LIB_SHARED_DEBUG_OBJS = $(addprefix .ht2lib-obj-debug-shared/,$(HT2LIB_OBJS)) +HT2LIB_SHARED_RELEASE_OBJS = $(addprefix .ht2lib-obj-release-shared/,$(HT2LIB_OBJS)) + +HT2LIB_PKG_SRC = \ + $(HT2LIB_DIR)/ht2_init.cpp \ + $(HT2LIB_DIR)/ht2_repeat.cpp \ + $(HT2LIB_DIR)/ht2_index.cpp \ + $(HT2LIB_DIR)/ht2.h \ + $(HT2LIB_DIR)/ht2_handle.h \ + $(HT2LIB_DIR)/java_jni/Makefile \ + $(HT2LIB_DIR)/java_jni/ht2module.c \ + $(HT2LIB_DIR)/java_jni/HT2Module.java \ + $(HT2LIB_DIR)/java_jni/HT2ModuleExample.java \ + $(HT2LIB_DIR)/pymodule/Makefile \ + $(HT2LIB_DIR)/pymodule/ht2module.c \ + $(HT2LIB_DIR)/pymodule/setup.py \ + $(HT2LIB_DIR)/pymodule/ht2example.py + + +GENERAL_LIST = $(wildcard scripts/*.sh) \ + $(wildcard scripts/*.pl) \ + $(wildcard *.py) \ + $(wildcard example/index/*.ht2) \ + $(wildcard example/reads/*.fa) \ + example/reference/22_20-21M.fa \ + example/reference/22_20-21M.snp \ + $(PTHREAD_PKG) \ + hisat2 \ + hisat2-build \ + hisat2-inspect \ + AUTHORS \ + LICENSE \ + NEWS \ + MANUAL \ + MANUAL.markdown \ + TUTORIAL \ + HISAT2_VERSION + +ifeq (1,$(WINDOWS)) + HISAT2_BIN_LIST := $(HISAT2_BIN_LIST) hisat2.bat hisat2-build.bat hisat2-inspect.bat +endif + +# This is helpful on Windows under MinGW/MSYS, where Make might go for +# the Windows FIND tool instead. +FIND=$(shell which find) + +SRC_PKG_LIST = $(wildcard *.h) \ + $(wildcard *.hh) \ + $(wildcard *.c) \ + $(wildcard *.cpp) \ + $(HT2LIB_PKG_SRC) \ + Makefile \ + CMakeLists.txt \ + $(GENERAL_LIST) + +BIN_PKG_LIST = $(GENERAL_LIST) + +.PHONY: all allall both both-debug + +all: $(HISAT2_BIN_LIST) + +allall: $(HISAT2_BIN_LIST) $(HISAT2_BIN_LIST_AUX) + +both: hisat2-align-s hisat2-align-l hisat2-build-s hisat2-build-l + +both-debug: hisat2-align-s-debug hisat2-align-l-debug hisat2-build-s-debug hisat2-build-l-debug + +repeat: hisat2-repeat + +repeat-debug: hisat2-repeat-debug + +DEFS :=-fno-strict-aliasing \ + -DHISAT2_VERSION="\"`cat HISAT2_VERSION`\"" \ + -DBUILD_HOST="\"`hostname`\"" \ + -DBUILD_TIME="\"`date`\"" \ + -DCOMPILER_VERSION="\"`$(CXX) -v 2>&1 | tail -1`\"" \ + $(FILE_FLAGS) \ + $(PREF_DEF) \ + $(MM_DEF) \ + $(SHMEM_DEF) + +# +# hisat-bp targets +# + +hisat-bp-bin: hisat_bp.cpp $(SEARCH_CPPS) $(SHARED_CPPS) $(HEADERS) $(SEARCH_FRAGMENTS) + $(CXX) $(RELEASE_FLAGS) $(RELEASE_DEFS) $(EXTRA_FLAGS) \ + $(DEFS) -DBOWTIE2 $(NOASSERT_FLAGS) -Wall \ + $(INC) \ + -o $@ $< \ + $(SHARED_CPPS) $(HISAT_CPPS_MAIN) \ + $(LIBS) $(SEARCH_LIBS) + +hisat-bp-bin-debug: hisat_bp.cpp $(SEARCH_CPPS) $(SHARED_CPPS) $(HEADERS) $(SEARCH_FRAGMENTS) + $(CXX) $(DEBUG_FLAGS) \ + $(DEBUG_DEFS) $(EXTRA_FLAGS) \ + $(DEFS) -DBOWTIE2 -Wall \ + $(INC) \ + -o $@ $< \ + $(SHARED_CPPS) $(HISAT_CPPS_MAIN) \ + $(LIBS) $(SEARCH_LIBS) + +# +# hisat2-repeat targets +# + +hisat2-repeat: hisat2_repeat.cpp $(REPEAT_CPPS) $(SHARED_CPPS) $(HEADERS) + $(CXX) $(RELEASE_FLAGS) $(RELEASE_DEFS) $(EXTRA_FLAGS) \ + $(DEFS) -DBOWTIE2 -DBOWTIE_64BIT_INDEX $(NOASSERT_FLAGS) -Wall \ + $(INC) \ + -o $@ $< \ + $(SHARED_CPPS) $(HISAT2_REPEAT_CPPS_MAIN) \ + $(LIBS) $(BUILD_LIBS) + +hisat2-repeat-debug: hisat2_repeat.cpp $(REPEAT_CPPS) $(SHARED_CPPS) $(HEADERS) + $(CXX) $(DEBUG_FLAGS) $(DEBUG_DEFS) $(EXTRA_FLAGS) \ + $(DEFS) -DBOWTIE2 -DBOWTIE_64BIT_INDEX -Wall \ + $(INC) \ + -o $@ $< \ + $(SHARED_CPPS) $(HISAT2_REPEAT_CPPS_MAIN) \ + $(LIBS) $(BUILD_LIBS) + + +# +# hisat2-build targets +# + +hisat2-build-s: hisat2_build.cpp $(SHARED_CPPS) $(HEADERS) + $(CXX) $(RELEASE_FLAGS) $(RELEASE_DEFS) $(EXTRA_FLAGS) \ + $(DEFS) -DBOWTIE2 $(NOASSERT_FLAGS) -Wall -DMASSIVE_DATA_RLCSA \ + $(INC) \ + -o $@ $< \ + $(SHARED_CPPS) $(HISAT2_BUILD_CPPS_MAIN) \ + $(LIBS) $(BUILD_LIBS) + +hisat2-build-l: hisat2_build.cpp $(SHARED_CPPS) $(HEADERS) + $(CXX) $(RELEASE_FLAGS) $(RELEASE_DEFS) $(EXTRA_FLAGS) \ + $(DEFS) -DBOWTIE2 -DBOWTIE_64BIT_INDEX $(NOASSERT_FLAGS) -Wall \ + $(INC) \ + -o $@ $< \ + $(SHARED_CPPS) $(HISAT2_BUILD_CPPS_MAIN) \ + $(LIBS) $(BUILD_LIBS) + +hisat2-build-s-debug: hisat2_build.cpp $(SHARED_CPPS) $(HEADERS) + $(CXX) $(DEBUG_FLAGS) $(DEBUG_DEFS) $(EXTRA_FLAGS) \ + $(DEFS) -DBOWTIE2 -Wall -DMASSIVE_DATA_RLCSA \ + $(INC) \ + -o $@ $< \ + $(SHARED_CPPS) $(HISAT2_BUILD_CPPS_MAIN) \ + $(LIBS) $(BUILD_LIBS) + +hisat2-build-l-debug: hisat2_build.cpp $(SHARED_CPPS) $(HEADERS) + $(CXX) $(DEBUG_FLAGS) $(DEBUG_DEFS) $(EXTRA_FLAGS) \ + $(DEFS) -DBOWTIE2 -DBOWTIE_64BIT_INDEX -Wall \ + $(INC) \ + -o $@ $< \ + $(SHARED_CPPS) $(HISAT2_BUILD_CPPS_MAIN) \ + $(LIBS) $(BUILD_LIBS) + +# +# hisat2 targets +# + +hisat2-align-s: hisat2.cpp $(SEARCH_CPPS) $(SHARED_CPPS) $(HEADERS) $(SEARCH_FRAGMENTS) + $(CXX) $(RELEASE_FLAGS) $(RELEASE_DEFS) $(EXTRA_FLAGS) \ + $(DEFS) $(SRA_DEF) -DBOWTIE2 $(NOASSERT_FLAGS) -Wall \ + $(INC) $(SEARCH_INC) \ + -o $@ $< \ + $(SHARED_CPPS) $(HISAT2_CPPS_MAIN) \ + $(LIBS) $(SRA_LIB) $(SEARCH_LIBS) + +hisat2-align-l: hisat2.cpp $(SEARCH_CPPS) $(SHARED_CPPS) $(HEADERS) $(SEARCH_FRAGMENTS) + $(CXX) $(RELEASE_FLAGS) $(RELEASE_DEFS) $(EXTRA_FLAGS) \ + $(DEFS) $(SRA_DEF) -DBOWTIE2 -DBOWTIE_64BIT_INDEX $(NOASSERT_FLAGS) -Wall \ + $(INC) $(SEARCH_INC) \ + -o $@ $< \ + $(SHARED_CPPS) $(HISAT2_CPPS_MAIN) \ + $(LIBS) $(SRA_LIB) $(SEARCH_LIBS) + +hisat2-align-s-debug: hisat2.cpp $(SEARCH_CPPS) $(SHARED_CPPS) $(HEADERS) $(SEARCH_FRAGMENTS) + $(CXX) $(DEBUG_FLAGS) \ + $(DEBUG_DEFS) $(EXTRA_FLAGS) \ + $(DEFS) $(SRA_DEF) -DBOWTIE2 -Wall \ + $(INC) $(SEARCH_INC) \ + -o $@ $< \ + $(SHARED_CPPS) $(HISAT2_CPPS_MAIN) \ + $(LIBS) $(SRA_LIB) $(SEARCH_LIBS) + +hisat2-align-l-debug: hisat2.cpp $(SEARCH_CPPS) $(SHARED_CPPS) $(HEADERS) $(SEARCH_FRAGMENTS) + $(CXX) $(DEBUG_FLAGS) \ + $(DEBUG_DEFS) $(EXTRA_FLAGS) \ + $(DEFS) $(SRA_DEF) -DBOWTIE2 -DBOWTIE_64BIT_INDEX -Wall \ + $(INC) $(SEARCH_INC) \ + -o $@ $< \ + $(SHARED_CPPS) $(HISAT2_CPPS_MAIN) \ + $(LIBS) $(SRA_LIB) $(SEARCH_LIBS) + +# +# hisat2-inspect targets +# + +hisat2-inspect-s: hisat2_inspect.cpp $(HEADERS) $(SHARED_CPPS) + $(CXX) $(RELEASE_FLAGS) \ + $(RELEASE_DEFS) $(EXTRA_FLAGS) \ + $(DEFS) -DBOWTIE2 -DHISAT2_INSPECT_MAIN -Wall \ + $(INC) -I . \ + -o $@ $< \ + $(SHARED_CPPS) \ + $(LIBS) $(INSPECT_LIBS) + +hisat2-inspect-l: hisat2_inspect.cpp $(HEADERS) $(SHARED_CPPS) + $(CXX) $(RELEASE_FLAGS) \ + $(RELEASE_DEFS) $(EXTRA_FLAGS) \ + $(DEFS) -DBOWTIE2 -DBOWTIE_64BIT_INDEX -DHISAT2_INSPECT_MAIN -Wall \ + $(INC) -I . \ + -o $@ $< \ + $(SHARED_CPPS) \ + $(LIBS) $(INSPECT_LIBS) + +hisat2-inspect-s-debug: hisat2_inspect.cpp $(HEADERS) $(SHARED_CPPS) + $(CXX) $(DEBUG_FLAGS) \ + $(DEBUG_DEFS) $(EXTRA_FLAGS) \ + $(DEFS) -DBOWTIE2 -DHISAT2_INSPECT_MAIN -Wall \ + $(INC) -I . \ + -o $@ $< \ + $(SHARED_CPPS) \ + $(LIBS) $(INSPECT_LIBS) + +hisat2-inspect-l-debug: hisat2_inspect.cpp $(HEADERS) $(SHARED_CPPS) + $(CXX) $(DEBUG_FLAGS) \ + $(DEBUG_DEFS) $(EXTRA_FLAGS) \ + $(DEFS) -DBOWTIE2 -DBOWTIE_64BIT_INDEX -DHISAT2_INSPECT_MAIN -Wall \ + $(INC) -I . \ + -o $@ $< \ + $(SHARED_CPPS) \ + $(LIBS) $(INSPECT_LIBS) + +# +# hisat-3n-table targets +# + +hisat-3n-table: hisat_3n_table.cpp $(THREE_N_HEADERS) + $(CXX) $(RELEASE_FLAGS) $(RELEASE_DEFS) $(EXTRA_FLAGS) $(NOASSERT_FLAGS) $(DEFS) -pthread -o $@ $< + +# +# HT2LIB targets +# + +ht2lib: libhisat2lib-debug.a libhisat2lib.a libhisat2lib-debug.so libhisat2lib.so + +libhisat2lib-debug.a: $(HT2LIB_DEBUG_OBJS) + ar rc $@ $(HT2LIB_DEBUG_OBJS) + +libhisat2lib.a: $(HT2LIB_RELEASE_OBJS) + ar rc $@ $(HT2LIB_RELEASE_OBJS) + +libhisat2lib-debug.so: $(HT2LIB_SHARED_DEBUG_OBJS) + $(CXX) $(DEBUG_FLAGS) $(DEBUG_DEFS) $(EXTRA_FLAGS) $(DEFS) $(SRA_DEF) -DBOWTIE2 -Wall $(INC) $(SEARCH_INC) \ + -shared -o $@ $(HT2LIB_SHARED_DEBUG_OBJS) $(LIBS) $(SRA_LIB) $(SEARCH_LIBS) + +libhisat2lib.so: $(HT2LIB_SHARED_RELEASE_OBJS) + $(CXX) $(RELEASE_FLAGS) $(RELEASE_DEFS) $(EXTRA_FLAGS) $(DEFS) $(SRA_DEF) -DBOWTIE2 $(NOASSERT_FLAGS) -Wall $(INC) $(SEARCH_INC)\ + -shared -o $@ $(HT2LIB_SHARED_RELEASE_OBJS) $(LIBS) $(SRA_LIB) $(SEARCH_LIBS) + +.ht2lib-obj-debug/%.o: %.cpp + @mkdir -p $(dir $@)/$(dir $<) + $(CXX) -fPIC $(DEBUG_FLAGS) $(DEBUG_DEFS) $(EXTRA_FLAGS) $(DEFS) $(SRA_DEF) $(HT2LIB_FLAGS) -DBOWTIE2 -Wall $(INC) $(SEARCH_INC) \ + -c -o $@ $< + +.ht2lib-obj-release/%.o: %.cpp + @mkdir -p $(dir $@)/$(dir $<) + $(CXX) -fPIC $(RELEASE_FLAGS) $(RELEASE_DEFS) $(EXTRA_FLAGS) $(DEFS) $(SRA_DEF) $(HT2LIB_FLAGS) -DBOWTIE2 $(NOASSERT_FLAGS) -Wall $(INC) $(SEARCH_INC) \ + -c -o $@ $< + +.ht2lib-obj-debug-shared/%.o: %.cpp + @mkdir -p $(dir $@)/$(dir $<) + $(CXX) -fPIC $(DEBUG_FLAGS) $(DEBUG_DEFS) $(EXTRA_FLAGS) $(DEFS) $(SRA_DEF) $(HT2LIB_FLAGS) -DBOWTIE2 -Wall $(INC) $(SEARCH_INC) \ + -c -o $@ $< + +.ht2lib-obj-release-shared/%.o: %.cpp + @mkdir -p $(dir $@)/$(dir $<) + $(CXX) -fPIC $(RELEASE_FLAGS) $(RELEASE_DEFS) $(EXTRA_FLAGS) $(DEFS) $(SRA_DEF) $(HT2LIB_FLAGS) -DBOWTIE2 $(NOASSERT_FLAGS) -Wall $(INC) $(SEARCH_INC) \ + -c -o $@ $< + +# +# repeatexp +# +repeatexp: + g++ -o repeatexp repeatexp.cpp -I hisat2lib libhisat2lib.a + +hisat2: ; + +hisat2.bat: + echo "@echo off" > hisat2.bat + echo "perl %~dp0/hisat2 %*" >> hisat2.bat + +hisat2-build.bat: + echo "@echo off" > hisat2-build.bat + echo "python %~dp0/hisat2-build %*" >> hisat2-build.bat + +hisat2-inspect.bat: + echo "@echo off" > hisat2-inspect.bat + echo "python %~dp0/hisat2-inspect %*" >> hisat2-inspect.bat + + +.PHONY: hisat2-src +hisat2-src: $(SRC_PKG_LIST) + chmod a+x scripts/*.sh scripts/*.pl + mkdir .src.tmp + mkdir .src.tmp/hisat2-$(VERSION) + zip tmp.zip $(SRC_PKG_LIST) + mv tmp.zip .src.tmp/hisat2-$(VERSION) + cd .src.tmp/hisat2-$(VERSION) ; unzip tmp.zip ; rm -f tmp.zip + cd .src.tmp ; zip -r hisat2-$(VERSION)-source.zip hisat2-$(VERSION) + cp .src.tmp/hisat2-$(VERSION)-source.zip . + rm -rf .src.tmp + +.PHONY: hisat2-bin +hisat2-bin: $(BIN_PKG_LIST) $(HISAT2_BIN_LIST) $(HISAT2_BIN_LIST_AUX) + chmod a+x scripts/*.sh scripts/*.pl + rm -rf .bin.tmp + mkdir .bin.tmp + mkdir .bin.tmp/hisat2-$(VERSION) + if [ -f hisat2.exe ] ; then \ + zip tmp.zip $(BIN_PKG_LIST) $(addsuffix .exe,$(HISAT2_BIN_LIST) $(HISAT2_BIN_LIST_AUX)) ; \ + else \ + zip tmp.zip $(BIN_PKG_LIST) $(HISAT2_BIN_LIST) $(HISAT2_BIN_LIST_AUX) ; \ + fi + mv tmp.zip .bin.tmp/hisat2-$(VERSION) + cd .bin.tmp/hisat2-$(VERSION) ; unzip tmp.zip ; rm -f tmp.zip + cd .bin.tmp ; zip -r hisat2-$(VERSION)-$(BITS).zip hisat2-$(VERSION) + cp .bin.tmp/hisat2-$(VERSION)-$(BITS).zip . + rm -rf .bin.tmp + +.PHONY: doc +doc: doc/manual.inc.html MANUAL + +doc/manual.inc.html: MANUAL.markdown + pandoc -T "HISAT2 Manual" -o $@ \ + --from markdown --to HTML --toc $^ + perl -i -ne \ + '$$w=0 if m|^|;print if $$w;$$w=1 if m|^|;' $@ + +MANUAL: MANUAL.markdown + perl doc/strip_markdown.pl < $^ > $@ + +.PHONY: clean +clean: + rm -f $(HISAT2_BIN_LIST) $(HISAT2_BIN_LIST_AUX) \ + $(addsuffix .exe,$(HISAT2_BIN_LIST) $(HISAT2_BIN_LIST_AUX)) \ + hisat2-src.zip hisat2-bin.zip + rm -f core.* .tmp.head + rm -rf *.dSYM + rm -rf .ht2lib-obj* + rm -f libhisat2lib*.a libhisat2lib*.so + + +.PHONY: push-doc +push-doc: doc/manual.inc.html + scp doc/*.*html doc/indexes.txt salz-dmz:/ccb/salz7-data/www/ccb.jhu.edu/html/software/hisat2/ diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..56be177 --- /dev/null +++ b/NEWS @@ -0,0 +1,16 @@ +HISAT 2 NEWS +============= + +HISAT 2 is now available for download from the project website, +http://bowtie-bio.sf.net/bowtie2. 2.0.0-beta is the first version released to +the public and 2.0.7 is the latest version. HISAT 2 is licensed under +the GPLv3 license. See `LICENSE' file for details. + + +Version Release History +======================= + +Version 2.0.0-beta - August XX, 2015 + * Improved multithreading support so that Bowtie 2 now uses native Windows + threads when compiled on Windows and uses a faster mutex. Threading + performance should improve on all platforms. diff --git a/README.md b/README.md new file mode 100644 index 0000000..1278115 --- /dev/null +++ b/README.md @@ -0,0 +1,247 @@ +HISAT-3N +============ + +Overview +----------------- +HISAT-3N (hierarchical indexing for spliced alignment of transcripts - 3 nucleotides) +is an ultrafast and memory-efficient sequence aligner designed for nucleotide conversion +sequencing technologies. HISAT-3N index contains two HISAT2 indexes which require memory small: +for the human genome, it requires 9 GB for standard 3N-index and 10.5 GB for repeat 3N-index. +The repeat 3N-index could be used to align one read to thousands position 3 times faster standard 3N-index. +HISAT-3N is developed based on [HISAT2], +which is particularly optimized for RNA sequencing technology. HISAT-3N support both strand-specific and non-strand reads. +HISAT-3N can be used for any base-converted sequencing reads include [BS-seq], [SLAM-seq], [scBS-seq], [scSLAM-seq], and [TAPS]. +See the [HISAT-3N] website for more information. + +[HISAT2]:https://github.com/DaehwanKimLab/hisat2 +[BS-seq]: https://en.wikipedia.org/wiki/Bisulfite_sequencing +[SLAM-seq]: https://www.nature.com/articles/nmeth.4435 +[scBS-seq]: https://www.nature.com/articles/nmeth.3035 +[scSLAM-seq]: https://www.nature.com/articles/s41586-019-1369-y +[TAPS]: https://www.nature.com/articles/s41587-019-0041-2 +[HISAT-3N]:https://daehwankimlab.github.io/hisat2/hisat-3n + + +Getting started +============ +HISAT-3N requires a 64-bit computer running either Linux or Mac OS X and at least 16 GB of RAM. + +A few notes: + +1. Building the standard 3N index requires 16GB of RAM or less. +2. Building the repeat 3N index requires 256GB of RAM. +3. The alignment process using either the standard or repeat index requires less than 16GB of RAM. +4. [SAMtools] is required to sort SAM files in order to generate a HISAT-3N table. + +Install +------------ + + git clone https://github.com/DaehwanKimLab/hisat2.git hisat-3n + cd hisat-3n + git checkout -b hisat-3n origin/hisat-3n + make + +Build a HISAT-3N index with `hisat-3n-build` +----------- +`hisat-3n-build` builds a 3N-index, which contains two hisat2 indexes, from a set of DNA sequences. For standard 3N-index, +each index contains 16 files with suffix `.3n.*.*.ht2`. +For repeat 3N-index, there are 16 more files in addition to the standard 3N-index, and they have the suffix +`.3n.*.rep.*.ht2`. +These files constitute the hisat-3n index and no other file is needed to alignment reads to the reference. + +* `--base-change ` argument is required for `hisat-3n-build` and `hisat-3n`. + Provide which base is converted in the sequencing process to another base. Please enter + 2 letters separated by ',' for this argument. The first letter(chr1) should be the converted base, the second letter(chr2) should be + the converted to base. For example, during slam-seq, some 'T' is converted to 'C', + please enter `--base-change T,C`. During bisulfite-seq, some 'C' is converted to 'T', please enter `--base-change C,T`. +* Different conversion types may build the same hisat-3n index. Please check the table below for more detail. + Once you build the hisat-3n index with C to T conversion (for example BS-seq). + You can align the T to C conversion reads (for example SLAM-seq reads) with the same index. + + +| Conversion Types | HISAT-3N index suffix | + |:----------------------------------:|:-----------------------------:| +|C -> T
T -> C
A -> G
G -> A|.3n.CT.\*.ht2
.3n.GA.\*.ht2| +|A -> C
C -> A
G -> T
T -> G|.3n.AC.\*.ht2
.3n.TG.\*.ht2| +|A -> T
T -> A |.3n.AT.\*.ht2
.3n.TA.\*.ht2| +|C -> G
G -> C |.3n.CG.\*.ht2
.3n.GC.\*.ht2| + +#### Examples: + # Build the standard HISAT-3N index (with C to T conversion): + hisat-3n-build --base-change C,T genome.fa genome + + # Build the repeat HISAT-3N index (with T to C conversion, require 256 GB memory for human genome index): + hisat-3n-build --base-change T,C --repeat-index genome.fa genome + +It is optional to make the graph index and add SNP or spice site information to the index, to increase the alignment accuracy. +The graph index building may require more memory than the linear index building. +For more detail, please check the [HISAT2 manual]. + +[HISAT2 manual]:https://daehwankimlab.github.io/hisat2/manual/ + +#### Examples: + # Build the standard HISAT-3N index integrated index with SNP information + hisat-3n-build --base-change C,T --snp genome.snp genome.fa genome + + # Build the standard HISAT-3N integrated index with splice site information + hisat-3n-build --base-change C,T --ss genome.ss --exon genome.exon genome.fa genome + + # Build the repeat HISAT-3N index integrated index with SNP information + hisat-3n-build --base-change C,T --repeat-index --snp genome.snp genome.fa genome + + # Build the repeat HISAT-3N integrated index with splice site information + hisat-3n-build --base-change C,T --repeat-index --ss genome.ss --exon genome.exon genome.fa genome + + +Alignment with `hisat-3n` +------------ +After building the HISAT-3N index, you are ready to use `hisat-3n` for alignment. +HISAT-3N has the same set of parameters as in HISAT2 with some additional arguments. Please refer to the [HISAT2 manual] for more details. + +For the human reference genome, HISAT-3N requires about 9GB for alignment with the standard 3N-index and 10.5GB for the repeat 3N-index. + +* `--base-change ` + Specify the nucleotide conversion type (e.g., C to T in bisulfite-sequencing reads). The parameter option is two characters separated by ','. Type the original nucleotide for the first character (nt1) and type the converted nucleotide as the second character (nt2). For example, if performing [SLAM-seq] where some 'T's are converted to 'C's, input `--base-change T,C`. + As another example, if performing bisulfite-seq, where some 'C's are converted to 'T's, please input `--base-change C,T`. + If you want to align non-converted reads to the regular HISAT2 index, then omit this command. + +* `--index/-x ` + Specify the index file basename for HISAT-3N. The basename is the name of the index files up to but not including the suffix `.3n.*.*.ht2` / etc. + For example, if you build your index with basename 'genome' using a HISAT-3N-build, please input `--index genome`. + +* `--directional-mapping` + Make directional mapping. Please use this option only if your sequencing reads are generated from a strand-specific library. + The directional mapping mode is about 2x faster than the standard (non-directional) mapping mode. + +* `--repeat-limit ` + You can set up the number of alignments to be checked for each repeat alignment. You may increase the number to direct hisat-3n + to output more, if a read has multiple mapping locations. We suggest that you limit the repeat number for paired-end read alignment to no more + than 1,000,000. default: 1000. + +* `--unique-only` + Only output uniquely aligned reads. + + +#### Examples: +* Single-end [SLAM-seq] read (T to C conversion) alignment with standard 3N-index: + `hisat-3n --index genome -f -U read.fa -S output.sam --base-change T,C` + +* Paired-end strand-specific bisulfite-seq read (C to T conversion) alignment with repeat 3N-index: + `hisat-3n --index genome -f -1 read_1.fa -2 read_2.fa -S output.sam --base-change C,T --directional-mapping` + +* Single-end TAPS reads (C to T conversion) alignment with repeat 3N-index and only output unique aligned results: + `hisat-3n --index genome -q -U read.fq -S output.sam --base-change C,T --unique` + + + +#### Extra SAM tags generated by HISAT-3N: + +* `Yf:i:`: Number of conversions detected in the read. +* `Zf:i:`: Number of un-converted bases are detected in the read. Yf + Zf = total number of bases which can be converted in the read sequence. +* `YZ:A:
`: The value `+` or `–` indicates the read is mapped to REF-3N (`+`) or REF-RC-3N (`-`), respectively. + +Generate a 3N-conversion-table with `hisat-3n-table` +------------ +### Preparation + +To generate a 3N-conversion-table, users need to sort the `hisat-3n` generated SAM alignment file. + +[SAMtools] is required for this sorting process. + +Use `samtools sort` to convert the SAM file into a sorted SAM file. + + samtools sort output.sam -o output_sorted.sam -O sam + +Generate 3N-conversion-table with `hisat-3n-table`: + +### Usage + hisat-3n-table [options]* --alignments --ref --base-change + +#### Main arguments +* `--alignments ` + SORTED SAM file. Please enter `-` for standard input. + +* `--ref ` + The reference genome file (FASTA format) for generating HISAT-3N index. + +* `--output-name ` + Filename to write 3N-conversion-table (tsv format) to. By default, table is written to the “standard out†or “stdout†filehandle (i.e. the console). + +* `--base-change ` + The base-change rule. User should enter the exact same `--base-change` arguments in hisat-3n. + For example, please enter `--base-change C,T` for bisulfite sequencing reads. + +#### Input options +* `-u/--unique-only` + Only count the unique aligned reads into 3N-conversion-table. + +* `-m/--multiple-only` + Only count the multiple aligned reads into 3N-conversion-table. + +* `-c/--CG-only` + Only count the CpG sites in reference genome. This option is designed for bisulfite sequencing reads. + +* `--added-chrname` + Please add this option if you use `--add-chrname` during `hisat-3n` alignment. + During `hisat-3n` alignment, the prefix "chr" is added in front of chromosome name and shows on SAM output, when user choose `--add-chrname`. + `hisat-3n-table` cannot find the chromosome name on reference because it has an additional "chr" prefix. This option is to help `hisat-3n-table` + find the matching chromosome name on reference file. The 3n-table provides the same chromosome name as SAM file. + +* `--removed-chrname` + Please add this option if you use `--remove-chrname` during `hisat-3n` alignment. + During `hisat-3n` alignment, the prefix "chr" is removed in front of chromosome name and shows on SAM output, when user choose `--remove-chrname`. + `hisat-3n-table` cannot find the chromosome name on reference because it has no "chr" prefix. This option is to help `hisat-3n-table` + find the matching chromosome name on reference file. The 3n-table provides the same chromosome name as SAM file. + +#### Other options: +* `-p/--threads ` + Launch `int` parallel threads (default: 1) for table building. + +* `-h/--help` + Print usage information and quit. + +#### Examples: + # Generate the 3N-conversion-table for bisulfite sequencing data: + hisat-3n-table -p 16 --alignments sorted_alignment_result.sam --ref genome.fa --output-name output.tsv --base-change C,T + + # Generate the 3N-conversion-table for TAPS data and only count base in CpG site and uniquely aligned: + hisat-3n-table -p 16 --alignments sorted_alignment_result.sam --ref genome.fa --output-name output.tsv --base-change C,T --CG-only --unique-only + + # Generate the 3N-conversion-table for bisulfite sequencing data from sorted BAM file: + samtools view -h sorted_alignment_result.bam | hisat-3n-table --ref genome.fa --alignments - --output-name output.tsv --base-change C,T + + # Generate the 3N-conversion-table for bisulfite sequencing data from unsorted BAM file: + samtools sort alignment_result.bam -O sam | hisat-3n-table --ref genome.fa --alignments - --output-name output.tsv --base-change C,T + + +#### Note: +There are 7 columns in the 3N-conversion-table: + +1. `ref`: the chromosome name. +2. `pos`: 1-based position in `ref`. +3. `strand`: '+' for forward strand. '-' for reverse strand. +4. `convertedBaseQualities`: the qualities of the converted bases in read-level measurement. The length of this string is equal to the number of converted bases. +5. `convertedBaseCount`: the number of distinct read positions where converted bases in read-level measurements were found. + this number is equal to the length of convertedBaseQualities. +6. `unconvertedBaseQualities`: the qualities of the unconverted bases in read-level measurement. The length of this string is equal to the number of unconverted bases in read-level measurement. +7. `unconvertedBaseCount`: the number of distinct read positions where unconverted bases in read-level measurements were found. + this number is equal to the length of unconvertedBaseQualities. + +##### Sample 3N-conversion-table: + ref pos strand convertedBaseQualities convertedBaseCount unconvertedBaseQualities unconvertedBaseCount + 1 11874 + FFFFFB + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include "aligner_bt.h" +#include "mask.h" + +using namespace std; + +#define CHECK_ROW_COL(rowc, colc) \ + if(rowc >= 0 && colc >= 0) { \ + if(!sawcell_[colc].insert(rowc)) { \ + /* was already in there */ \ + abort = true; \ + return; \ + } \ + assert(local || prob_.cper_->debugCell(rowc, colc, hefc)); \ + } + +/** + * Fill in a triangle of the DP table and backtrace from the given cell to + * a cell in the previous checkpoint, or to the terminal cell. + */ +void BtBranchTracer::triangleFill( + int64_t rw, // row of cell to backtrace from + int64_t cl, // column of cell to backtrace from + int hef, // cell to backtrace from is H (0), E (1), or F (2) + TAlScore targ, // score of cell to backtrace from + TAlScore targ_final, // score of alignment we're looking for + RandomSource& rnd, // pseudo-random generator + int64_t& row_new, // out: row we ended up in after backtrace + int64_t& col_new, // out: column we ended up in after backtrace + int& hef_new, // out: H/E/F after backtrace + TAlScore& targ_new, // out: score up to cell we ended up in + bool& done, // out: finished tracing out an alignment? + bool& abort) // out: aborted b/c cell was seen before? +{ + assert_geq(rw, 0); + assert_geq(cl, 0); + assert_range(0, 2, hef); + assert_lt(rw, (int64_t)prob_.qrylen_); + assert_lt(cl, (int64_t)prob_.reflen_); + assert(prob_.usecp_ && prob_.fill_); + int64_t row = rw, col = cl; + const int64_t colmin = 0; + const int64_t rowmin = 0; + const int64_t colmax = prob_.reflen_ - 1; + const int64_t rowmax = prob_.qrylen_ - 1; + assert_leq(prob_.reflen_, (TRefOff)sawcell_.size()); + assert_leq(col, (int64_t)prob_.cper_->hicol()); + assert_geq(col, (int64_t)prob_.cper_->locol()); + assert_geq(prob_.cper_->per(), 2); + size_t mod = (row + col) & prob_.cper_->lomask(); + assert_lt(mod, prob_.cper_->per()); + // Allocate room for diags + size_t depth = mod+1; + assert_leq(depth, prob_.cper_->per()); + size_t breadth = depth; + tri_.resize(depth); + // Allocate room for each diag + for(size_t i = 0; i < depth; i++) { + tri_[i].resize(breadth - i); + } + bool upperleft = false; + size_t off = (row + col) >> prob_.cper_->perpow2(); + if(off == 0) { + upperleft = true; + } else { + off--; + } + const TAlScore sc_rdo = prob_.sc_->readGapOpen(); + const TAlScore sc_rde = prob_.sc_->readGapExtend(); + const TAlScore sc_rfo = prob_.sc_->refGapOpen(); + const TAlScore sc_rfe = prob_.sc_->refGapExtend(); + const bool local = !prob_.sc_->monotone; + int64_t row_lo = row - (int64_t)mod; + const CpQuad *prev2 = NULL, *prev1 = NULL; + if(!upperleft) { + // Read-only pointer to cells in diagonal -2. Start one row above the + // target row. + prev2 = prob_.cper_->qdiag1sPtr() + (off * prob_.cper_->nrow() + row_lo - 1); + // Read-only pointer to cells in diagonal -1. Start one row above the + // target row + prev1 = prob_.cper_->qdiag2sPtr() + (off * prob_.cper_->nrow() + row_lo - 1); +#ifndef NDEBUG + if(row >= (int64_t)mod) { + size_t rowc = row - mod, colc = col; + if(rowc > 0 && prob_.cper_->isCheckpointed(rowc-1, colc)) { + TAlScore al = prev1[0].sc[0]; + if(al == MIN_I16) al = MIN_I64; + assert_eq(prob_.cper_->scoreTriangle(rowc-1, colc, 0), al); + } + if(rowc > 0 && colc > 0 && prob_.cper_->isCheckpointed(rowc-1, colc-1)) { + TAlScore al = prev2[0].sc[0]; + if(al == MIN_I16) al = MIN_I64; + assert_eq(prob_.cper_->scoreTriangle(rowc-1, colc-1, 0), al); + } + } +#endif + } + // Pointer to cells in current diagonal + // For each diagonal we need to fill in + for(size_t i = 0; i < depth; i++) { + CpQuad * cur = tri_[i].ptr(); + CpQuad * curc = cur; + size_t doff = mod - i; // # diagonals we are away from target diag + //assert_geq(row, (int64_t)doff); + int64_t rowc = row - doff; + int64_t colc = col; + size_t neval = 0; // # cells evaluated in this diag + ASSERT_ONLY(const CpQuad *last = NULL); + // Fill this diagonal from upper right to lower left + for(size_t j = 0; j < breadth; j++) { + if(rowc >= rowmin && rowc <= rowmax && + colc >= colmin && colc <= colmax) + { + neval++; + int64_t fromend = prob_.qrylen_ - rowc - 1; + bool allowGaps = fromend >= prob_.sc_->gapbar && rowc >= prob_.sc_->gapbar; + // Fill this cell + // Some things we might want to calculate about this cell up front: + // 1. How many matches are possible from this cell to the cell in + // row, col, in case this allows us to prune + // Get character from read + int qc = prob_.qry_[rowc]; + // Get quality value from read + int qq = prob_.qual_[rowc]; + assert_geq(qq, 33); + // Get character from reference + int rc = prob_.ref_[colc]; + assert_range(0, 16, rc); + int16_t sc_diag = prob_.sc_->score(qc, rc, qq - 33); + int16_t sc_h_up = MIN_I16; + int16_t sc_f_up = MIN_I16; + int16_t sc_h_lf = MIN_I16; + int16_t sc_e_lf = MIN_I16; + if(allowGaps) { + if(rowc > 0) { + assert(local || prev1[j+0].sc[2] < 0); + if(prev1[j+0].sc[0] > MIN_I16) { + sc_h_up = prev1[j+0].sc[0] - sc_rfo; + if(local) sc_h_up = max(sc_h_up, 0); + } + if(prev1[j+0].sc[2] > MIN_I16) { + sc_f_up = prev1[j+0].sc[2] - sc_rfe; + if(local) sc_f_up = max(sc_f_up, 0); + } +#ifndef NDEBUG + TAlScore hup = prev1[j+0].sc[0]; + TAlScore fup = prev1[j+0].sc[2]; + if(hup == MIN_I16) hup = MIN_I64; + if(fup == MIN_I16) fup = MIN_I64; + if(local) { + hup = max(hup, 0); + fup = max(fup, 0); + } + if(prob_.cper_->isCheckpointed(rowc-1, colc)) { + assert_eq(hup, prob_.cper_->scoreTriangle(rowc-1, colc, 0)); + assert_eq(fup, prob_.cper_->scoreTriangle(rowc-1, colc, 2)); + } +#endif + } + if(colc > 0) { + assert(local || prev1[j+1].sc[1] < 0); + if(prev1[j+1].sc[0] > MIN_I16) { + sc_h_lf = prev1[j+1].sc[0] - sc_rdo; + if(local) sc_h_lf = max(sc_h_lf, 0); + } + if(prev1[j+1].sc[1] > MIN_I16) { + sc_e_lf = prev1[j+1].sc[1] - sc_rde; + if(local) sc_e_lf = max(sc_e_lf, 0); + } +#ifndef NDEBUG + TAlScore hlf = prev1[j+1].sc[0]; + TAlScore elf = prev1[j+1].sc[1]; + if(hlf == MIN_I16) hlf = MIN_I64; + if(elf == MIN_I16) elf = MIN_I64; + if(local) { + hlf = max(hlf, 0); + elf = max(elf, 0); + } + if(prob_.cper_->isCheckpointed(rowc, colc-1)) { + assert_eq(hlf, prob_.cper_->scoreTriangle(rowc, colc-1, 0)); + assert_eq(elf, prob_.cper_->scoreTriangle(rowc, colc-1, 1)); + } +#endif + } + } + assert(rowc <= 1 || colc <= 0 || prev2 != NULL); + int16_t sc_h_dg = ((rowc > 0 && colc > 0) ? prev2[j+0].sc[0] : 0); + if(colc == 0 && rowc > 0 && !local) { + sc_h_dg = MIN_I16; + } + if(sc_h_dg > MIN_I16) { + sc_h_dg += sc_diag; + } + if(local) sc_h_dg = max(sc_h_dg, 0); + // cerr << sc_diag << " " << sc_h_dg << " " << sc_h_up << " " << sc_f_up << " " << sc_h_lf << " " << sc_e_lf << endl; + int mask = 0; + // Calculate best ways into H, E, F cells starting with H. + // Mask bits: + // H: 1=diag, 2=hhoriz, 4=ehoriz, 8=hvert, 16=fvert + // E: 32=hhoriz, 64=ehoriz + // F: 128=hvert, 256=fvert + int16_t sc_best = sc_h_dg; + if(sc_h_dg > MIN_I64) { + mask = 1; + } + if(colc > 0 && sc_h_lf >= sc_best && sc_h_lf > MIN_I64) { + if(sc_h_lf > sc_best) mask = 0; + mask |= 2; + sc_best = sc_h_lf; + } + if(colc > 0 && sc_e_lf >= sc_best && sc_e_lf > MIN_I64) { + if(sc_e_lf > sc_best) mask = 0; + mask |= 4; + sc_best = sc_e_lf; + } + if(rowc > 0 && sc_h_up >= sc_best && sc_h_up > MIN_I64) { + if(sc_h_up > sc_best) mask = 0; + mask |= 8; + sc_best = sc_h_up; + } + if(rowc > 0 && sc_f_up >= sc_best && sc_f_up > MIN_I64) { + if(sc_f_up > sc_best) mask = 0; + mask |= 16; + sc_best = sc_f_up; + } + // Calculate best way into E cell + int16_t sc_e_best = sc_h_lf; + if(colc > 0) { + if(sc_h_lf >= sc_e_lf && sc_h_lf > MIN_I64) { + if(sc_h_lf == sc_e_lf) { + mask |= 64; + } + mask |= 32; + } else if(sc_e_lf > MIN_I64) { + sc_e_best = sc_e_lf; + mask |= 64; + } + } + if(sc_e_best > sc_best) { + sc_best = sc_e_best; + mask &= ~31; // don't go diagonal + } + // Calculate best way into F cell + int16_t sc_f_best = sc_h_up; + if(rowc > 0) { + if(sc_h_up >= sc_f_up && sc_h_up > MIN_I64) { + if(sc_h_up == sc_f_up) { + mask |= 256; + } + mask |= 128; + } else if(sc_f_up > MIN_I64) { + sc_f_best = sc_f_up; + mask |= 256; + } + } + if(sc_f_best > sc_best) { + sc_best = sc_f_best; + mask &= ~127; // don't go horizontal or diagonal + } + // Install results in cur + assert(!prob_.sc_->monotone || sc_best <= 0); + assert(!prob_.sc_->monotone || sc_e_best <= 0); + assert(!prob_.sc_->monotone || sc_f_best <= 0); + curc->sc[0] = sc_best; + assert( local || sc_e_best < 0); + assert( local || sc_f_best < 0); + assert(!local || sc_e_best >= 0 || sc_e_best == MIN_I16); + assert(!local || sc_f_best >= 0 || sc_f_best == MIN_I16); + curc->sc[1] = sc_e_best; + curc->sc[2] = sc_f_best; + curc->sc[3] = mask; + // cerr << curc->sc[0] << " " << curc->sc[1] << " " << curc->sc[2] << " " << curc->sc[3] << endl; + ASSERT_ONLY(last = curc); +#ifndef NDEBUG + if(prob_.cper_->isCheckpointed(rowc, colc)) { + if(local) { + sc_e_best = max(sc_e_best, 0); + sc_f_best = max(sc_f_best, 0); + } + TAlScore sc_best64 = sc_best; if(sc_best == MIN_I16) sc_best64 = MIN_I64; + TAlScore sc_e_best64 = sc_e_best; if(sc_e_best == MIN_I16) sc_e_best64 = MIN_I64; + TAlScore sc_f_best64 = sc_f_best; if(sc_f_best == MIN_I16) sc_f_best64 = MIN_I64; + assert_eq(prob_.cper_->scoreTriangle(rowc, colc, 0), sc_best64); + assert_eq(prob_.cper_->scoreTriangle(rowc, colc, 1), sc_e_best64); + assert_eq(prob_.cper_->scoreTriangle(rowc, colc, 2), sc_f_best64); + } +#endif + } + // Update row, col + assert_lt(rowc, (int64_t)prob_.qrylen_); + rowc++; + colc--; + curc++; + } // for(size_t j = 0; j < breadth; j++) + if(i == depth-1) { + // Final iteration + assert(last != NULL); + assert_eq(1, neval); + assert_neq(0, last->sc[3]); + assert_eq(targ, last->sc[hef]); + } else { + breadth--; + prev2 = prev1 + 1; + prev1 = cur; + } + } // for(size_t i = 0; i < depth; i++) + // + // Now backtrack through the triangle. Abort as soon as we enter a cell + // that was visited by a previous backtrace. + // + int64_t rowc = row, colc = col; + size_t curid; + int hefc = hef; + if(bs_.empty()) { + // Start an initial branch + CHECK_ROW_COL(rowc, colc); + curid = bs_.alloc(); + assert_eq(0, curid); + Edit e; + bs_[curid].init( + prob_, + 0, // parent ID + 0, // penalty + 0, // score_en + rowc, // row + colc, // col + e, // edit + 0, // hef + true, // I am the root + false); // don't try to extend with exact matches + bs_[curid].len_ = 0; + } else { + curid = bs_.size()-1; + } + size_t idx_orig = (row + col) >> prob_.cper_->perpow2(); + while(true) { + // What depth are we? + size_t mod = (rowc + colc) & prob_.cper_->lomask(); + assert_lt(mod, prob_.cper_->per()); + CpQuad * cur = tri_[mod].ptr(); + int64_t row_off = rowc - row_lo - mod; + assert(!local || cur[row_off].sc[0] > 0); + assert_geq(row_off, 0); + int mask = cur[row_off].sc[3]; + assert_gt(mask, 0); + int sel = -1; + // Select what type of move to make, which depends on whether we're + // currently in H, E, F: + if(hefc == 0) { + if( (mask & 1) != 0) { + // diagonal + sel = 0; + } else if((mask & 8) != 0) { + // up to H + sel = 3; + } else if((mask & 16) != 0) { + // up to F + sel = 4; + } else if((mask & 2) != 0) { + // left to H + sel = 1; + } else if((mask & 4) != 0) { + // left to E + sel = 2; + } + } else if(hefc == 1) { + if( (mask & 32) != 0) { + // left to H + sel = 5; + } else if((mask & 64) != 0) { + // left to E + sel = 6; + } + } else { + assert_eq(2, hefc); + if( (mask & 128) != 0) { + // up to H + sel = 7; + } else if((mask & 256) != 0) { + // up to F + sel = 8; + } + } + assert_geq(sel, 0); + // Get character from read + int qc = prob_.qry_[rowc], qq = prob_.qual_[rowc]; + // Get character from reference + int rc = prob_.ref_[colc]; + assert_range(0, 16, rc); + // Now that we know what type of move to make, make it, updating our + // row and column and moving updating the branch. + if(sel == 0) { + assert_geq(rowc, 0); + assert_geq(colc, 0); + TAlScore scd = prob_.sc_->score(qc, rc, qq - 33); + if((rc & (1 << qc)) == 0) { + // Mismatch + size_t id = curid; + // Check if the previous branch was the initial (bottommost) + // branch with no matches. If so, the mismatch should be added + // to the initial branch, instead of starting a new branch. + bool empty = (bs_[curid].len_ == 0 && curid == 0); + if(!empty) { + id = bs_.alloc(); + } + Edit e((int)rowc, mask2dna[rc], "ACGTN"[qc], EDIT_TYPE_MM); + assert_lt(scd, 0); + TAlScore score_en = bs_[curid].score_st_ + scd; + bs_[id].init( + prob_, + curid, // parent ID + -scd, // penalty + score_en, // score_en + rowc, // row + colc, // col + e, // edit + hefc, // hef + empty, // root? + false); // don't try to extend with exact matches + //assert(!local || bs_[id].score_st_ >= 0); + curid = id; + } else { + // Match + bs_[curid].score_st_ += prob_.sc_->match(); + bs_[curid].len_++; + assert_leq((int64_t)bs_[curid].len_, bs_[curid].row_ + 1); + } + rowc--; + colc--; + assert(local || bs_[curid].score_st_ >= targ_final); + hefc = 0; + } else if((sel >= 1 && sel <= 2) || (sel >= 5 && sel <= 6)) { + assert_gt(colc, 0); + // Read gap + size_t id = bs_.alloc(); + Edit e((int)rowc+1, mask2dna[rc], '-', EDIT_TYPE_READ_GAP); + TAlScore gapp = prob_.sc_->readGapOpen(); + if(bs_[curid].len_ == 0 && bs_[curid].e_.inited() && bs_[curid].e_.isReadGap()) { + gapp = prob_.sc_->readGapExtend(); + } + TAlScore score_en = bs_[curid].score_st_ - gapp; + bs_[id].init( + prob_, + curid, // parent ID + gapp, // penalty + score_en, // score_en + rowc, // row + colc-1, // col + e, // edit + hefc, // hef + false, // root? + false); // don't try to extend with exact matches + colc--; + curid = id; + assert( local || bs_[curid].score_st_ >= targ_final); + //assert(!local || bs_[curid].score_st_ >= 0); + if(sel == 1 || sel == 5) { + hefc = 0; + } else { + hefc = 1; + } + } else { + assert_gt(rowc, 0); + // Reference gap + size_t id = bs_.alloc(); + Edit e((int)rowc, '-', "ACGTN"[qc], EDIT_TYPE_REF_GAP); + TAlScore gapp = prob_.sc_->refGapOpen(); + if(bs_[curid].len_ == 0 && bs_[curid].e_.inited() && bs_[curid].e_.isRefGap()) { + gapp = prob_.sc_->refGapExtend(); + } + TAlScore score_en = bs_[curid].score_st_ - gapp; + bs_[id].init( + prob_, + curid, // parent ID + gapp, // penalty + score_en, // score_en + rowc-1, // row + colc, // col + e, // edit + hefc, // hef + false, // root? + false); // don't try to extend with exact matches + rowc--; + curid = id; + //assert(!local || bs_[curid].score_st_ >= 0); + if(sel == 3 || sel == 7) { + hefc = 0; + } else { + hefc = 2; + } + } + CHECK_ROW_COL(rowc, colc); + size_t mod_new = (rowc + colc) & prob_.cper_->lomask(); + size_t idx = (rowc + colc) >> prob_.cper_->perpow2(); + assert_lt(mod_new, prob_.cper_->per()); + int64_t row_off_new = rowc - row_lo - mod_new; + CpQuad * cur_new = NULL; + if(colc >= 0 && rowc >= 0 && idx == idx_orig) { + cur_new = tri_[mod_new].ptr(); + } + bool hit_new_tri = (idx < idx_orig && colc >= 0 && rowc >= 0); + // Check whether we made it to the top row or to a cell with score 0 + if(colc < 0 || rowc < 0 || + (cur_new != NULL && (local && cur_new[row_off_new].sc[0] == 0))) + { + done = true; + assert(bs_[curid].isSolution(prob_)); + addSolution(curid); +#ifndef NDEBUG + // A check to see if any two adjacent branches in the backtrace + // overlap. If they do, the whole alignment will be filtered out + // in trySolution(...) + size_t cur = curid; + if(!bs_[cur].root_) { + size_t next = bs_[cur].parentId_; + while(!bs_[next].root_) { + assert_neq(cur, next); + if(bs_[next].len_ != 0 || bs_[cur].len_ == 0) { + assert(!bs_[cur].overlap(prob_, bs_[next])); + } + cur = next; + next = bs_[cur].parentId_; + } + } +#endif + return; + } + if(hit_new_tri) { + assert(rowc < 0 || colc < 0 || prob_.cper_->isCheckpointed(rowc, colc)); + row_new = rowc; col_new = colc; + hef_new = hefc; + done = false; + if(rowc < 0 || colc < 0) { + assert(local); + targ_new = 0; + } else { + targ_new = prob_.cper_->scoreTriangle(rowc, colc, hefc); + } + if(local && targ_new == 0) { + done = true; + assert(bs_[curid].isSolution(prob_)); + addSolution(curid); + } + assert((row_new >= 0 && col_new >= 0) || done); + return; + } + } + assert(false); +} + +#ifndef NDEBUG +#define DEBUG_CHECK(ss, row, col, hef) { \ + if(prob_.cper_->debug() && row >= 0 && col >= 0) { \ + TAlScore s = ss; \ + if(s == MIN_I16) s = MIN_I64; \ + if(local && s < 0) s = 0; \ + TAlScore deb = prob_.cper_->debugCell(row, col, hef); \ + if(local && deb < 0) deb = 0; \ + assert_eq(s, deb); \ + } \ +} +#else +#define DEBUG_CHECK(ss, row, col, hef) +#endif + + +/** + * Fill in a square of the DP table and backtrace from the given cell to + * a cell in the previous checkpoint, or to the terminal cell. + */ +void BtBranchTracer::squareFill( + int64_t rw, // row of cell to backtrace from + int64_t cl, // column of cell to backtrace from + int hef, // cell to backtrace from is H (0), E (1), or F (2) + TAlScore targ, // score of cell to backtrace from + TAlScore targ_final, // score of alignment we're looking for + RandomSource& rnd, // pseudo-random generator + int64_t& row_new, // out: row we ended up in after backtrace + int64_t& col_new, // out: column we ended up in after backtrace + int& hef_new, // out: H/E/F after backtrace + TAlScore& targ_new, // out: score up to cell we ended up in + bool& done, // out: finished tracing out an alignment? + bool& abort) // out: aborted b/c cell was seen before? +{ + assert_geq(rw, 0); + assert_geq(cl, 0); + assert_range(0, 2, hef); + assert_lt(rw, (int64_t)prob_.qrylen_); + assert_lt(cl, (int64_t)prob_.reflen_); + assert(prob_.usecp_ && prob_.fill_); + const bool is8_ = prob_.cper_->is8_; + int64_t row = rw, col = cl; + assert_leq(prob_.reflen_, (TRefOff)sawcell_.size()); + assert_leq(col, (int64_t)prob_.cper_->hicol()); + assert_geq(col, (int64_t)prob_.cper_->locol()); + assert_geq(prob_.cper_->per(), 2); + size_t xmod = col & prob_.cper_->lomask(); + size_t ymod = row & prob_.cper_->lomask(); + size_t xdiv = col >> prob_.cper_->perpow2(); + size_t ydiv = row >> prob_.cper_->perpow2(); + size_t sq_ncol = xmod+1, sq_nrow = ymod+1; + sq_.resize(sq_ncol * sq_nrow); + bool upper = ydiv == 0; + bool left = xdiv == 0; + const TAlScore sc_rdo = prob_.sc_->readGapOpen(); + const TAlScore sc_rde = prob_.sc_->readGapExtend(); + const TAlScore sc_rfo = prob_.sc_->refGapOpen(); + const TAlScore sc_rfe = prob_.sc_->refGapExtend(); + const bool local = !prob_.sc_->monotone; + const CpQuad *qup = NULL; + const __m128i *qlf = NULL; + size_t per = prob_.cper_->per_; + ASSERT_ONLY(size_t nrow = prob_.cper_->nrow()); + size_t ncol = prob_.cper_->ncol(); + assert_eq(prob_.qrylen_, nrow); + assert_eq(prob_.reflen_, (TRefOff)ncol); + size_t niter = prob_.cper_->niter_; + if(!upper) { + qup = prob_.cper_->qrows_.ptr() + (ncol * (ydiv-1)) + xdiv * per; + } + if(!left) { + // Set up the column pointers to point to the first __m128i word in the + // relevant column + size_t off = (niter << 2) * (xdiv-1); + qlf = prob_.cper_->qcols_.ptr() + off; + } + size_t xedge = xdiv * per; // absolute offset of leftmost cell in square + size_t yedge = ydiv * per; // absolute offset of topmost cell in square + size_t xi = xedge, yi = yedge; // iterators for columns, rows + size_t ii = 0; // iterator into packed square + // Iterate over rows, then over columns + size_t m128mod = yi % prob_.cper_->niter_; + size_t m128div = yi / prob_.cper_->niter_; + int16_t sc_h_dg_lastrow = MIN_I16; + for(size_t i = 0; i <= ymod; i++, yi++) { + assert_lt(yi, nrow); + xi = xedge; + // Handling for first column is done outside the loop + size_t fromend = prob_.qrylen_ - yi - 1; + bool allowGaps = fromend >= (size_t)prob_.sc_->gapbar && yi >= (size_t)prob_.sc_->gapbar; + // Get character, quality from read + int qc = prob_.qry_[yi], qq = prob_.qual_[yi]; + assert_geq(qq, 33); + int16_t sc_h_lf_last = MIN_I16; + int16_t sc_e_lf_last = MIN_I16; + for(size_t j = 0; j <= xmod; j++, xi++) { + assert_lt(xi, ncol); + // Get character from reference + int rc = prob_.ref_[xi]; + assert_range(0, 16, rc); + int16_t sc_diag = prob_.sc_->score(qc, rc, qq - 33); + int16_t sc_h_up = MIN_I16, sc_f_up = MIN_I16, + sc_h_lf = MIN_I16, sc_e_lf = MIN_I16, + sc_h_dg = MIN_I16; + int16_t sc_h_up_c = MIN_I16, sc_f_up_c = MIN_I16, + sc_h_lf_c = MIN_I16, sc_e_lf_c = MIN_I16, + sc_h_dg_c = MIN_I16; + if(yi == 0) { + // If I'm in the first first row or column set it to 0 + sc_h_dg = 0; + } else if(xi == 0) { + // Do nothing; leave it at min + if(local) { + sc_h_dg = 0; + } + } else if(i == 0 && j == 0) { + // Otherwise, if I'm in the upper-left square corner, I can get + // it from the checkpoint + sc_h_dg = qup[-1].sc[0]; + } else if(j == 0) { + // Otherwise, if I'm in the leftmost cell of this row, I can + // get it from sc_h_lf in first column of previous row + sc_h_dg = sc_h_dg_lastrow; + } else { + // Otherwise, I can get it from qup + sc_h_dg = qup[j-1].sc[0]; + } + if(yi > 0 && xi > 0) DEBUG_CHECK(sc_h_dg, yi-1, xi-1, 2); + + // If we're in the leftmost column, calculate sc_h_lf regardless of + // allowGaps. + if(j == 0 && xi > 0) { + // Get values for left neighbors from the checkpoint + if(is8_) { + size_t vecoff = (m128mod << 6) + m128div; + sc_e_lf = ((uint8_t*)(qlf + 0))[vecoff]; + sc_h_lf = ((uint8_t*)(qlf + 2))[vecoff]; + if(local) { + // No adjustment + } else { + if(sc_h_lf == 0) sc_h_lf = MIN_I16; + else sc_h_lf -= 0xff; + if(sc_e_lf == 0) sc_e_lf = MIN_I16; + else sc_e_lf -= 0xff; + } + } else { + size_t vecoff = (m128mod << 5) + m128div; + sc_e_lf = ((int16_t*)(qlf + 0))[vecoff]; + sc_h_lf = ((int16_t*)(qlf + 2))[vecoff]; + if(local) { + sc_h_lf += 0x8000; assert_geq(sc_h_lf, 0); + sc_e_lf += 0x8000; assert_geq(sc_e_lf, 0); + } else { + if(sc_h_lf != MIN_I16) sc_h_lf -= 0x7fff; + if(sc_e_lf != MIN_I16) sc_e_lf -= 0x7fff; + } + } + DEBUG_CHECK(sc_e_lf, yi, xi-1, 0); + DEBUG_CHECK(sc_h_lf, yi, xi-1, 2); + sc_h_dg_lastrow = sc_h_lf; + } + + if(allowGaps) { + if(j == 0 /* at left edge */ && xi > 0 /* not extreme */) { + sc_h_lf_c = sc_h_lf; + sc_e_lf_c = sc_e_lf; + if(sc_h_lf_c != MIN_I16) sc_h_lf_c -= sc_rdo; + if(sc_e_lf_c != MIN_I16) sc_e_lf_c -= sc_rde; + assert_leq(sc_h_lf_c, prob_.cper_->perf_); + assert_leq(sc_e_lf_c, prob_.cper_->perf_); + } else if(xi > 0) { + // Get values for left neighbors from the previous iteration + if(sc_h_lf_last != MIN_I16) { + sc_h_lf = sc_h_lf_last; + sc_h_lf_c = sc_h_lf - sc_rdo; + } + if(sc_e_lf_last != MIN_I16) { + sc_e_lf = sc_e_lf_last; + sc_e_lf_c = sc_e_lf - sc_rde; + } + } + if(yi > 0 /* not extreme */) { + // Get column values + assert(qup != NULL); + assert(local || qup[j].sc[2] < 0); + if(qup[j].sc[0] > MIN_I16) { + DEBUG_CHECK(qup[j].sc[0], yi-1, xi, 2); + sc_h_up = qup[j].sc[0]; + sc_h_up_c = sc_h_up - sc_rfo; + } + if(qup[j].sc[2] > MIN_I16) { + DEBUG_CHECK(qup[j].sc[2], yi-1, xi, 1); + sc_f_up = qup[j].sc[2]; + sc_f_up_c = sc_f_up - sc_rfe; + } + } + if(local) { + sc_h_up_c = max(sc_h_up_c, 0); + sc_f_up_c = max(sc_f_up_c, 0); + sc_h_lf_c = max(sc_h_lf_c, 0); + sc_e_lf_c = max(sc_e_lf_c, 0); + } + } + + if(sc_h_dg > MIN_I16) { + sc_h_dg_c = sc_h_dg + sc_diag; + } + if(local) sc_h_dg_c = max(sc_h_dg_c, 0); + + int mask = 0; + // Calculate best ways into H, E, F cells starting with H. + // Mask bits: + // H: 1=diag, 2=hhoriz, 4=ehoriz, 8=hvert, 16=fvert + // E: 32=hhoriz, 64=ehoriz + // F: 128=hvert, 256=fvert + int16_t sc_best = sc_h_dg_c; + if(sc_h_dg_c > MIN_I64) { + mask = 1; + } + if(xi > 0 && sc_h_lf_c >= sc_best && sc_h_lf_c > MIN_I64) { + if(sc_h_lf_c > sc_best) mask = 0; + mask |= 2; + sc_best = sc_h_lf_c; + } + if(xi > 0 && sc_e_lf_c >= sc_best && sc_e_lf_c > MIN_I64) { + if(sc_e_lf_c > sc_best) mask = 0; + mask |= 4; + sc_best = sc_e_lf_c; + } + if(yi > 0 && sc_h_up_c >= sc_best && sc_h_up_c > MIN_I64) { + if(sc_h_up_c > sc_best) mask = 0; + mask |= 8; + sc_best = sc_h_up_c; + } + if(yi > 0 && sc_f_up_c >= sc_best && sc_f_up_c > MIN_I64) { + if(sc_f_up_c > sc_best) mask = 0; + mask |= 16; + sc_best = sc_f_up_c; + } + // Calculate best way into E cell + int16_t sc_e_best = sc_h_lf_c; + if(xi > 0) { + if(sc_h_lf_c >= sc_e_lf_c && sc_h_lf_c > MIN_I64) { + if(sc_h_lf_c == sc_e_lf_c) { + mask |= 64; + } + mask |= 32; + } else if(sc_e_lf_c > MIN_I64) { + sc_e_best = sc_e_lf_c; + mask |= 64; + } + } + if(sc_e_best > sc_best) { + sc_best = sc_e_best; + mask &= ~31; // don't go diagonal + } + // Calculate best way into F cell + int16_t sc_f_best = sc_h_up_c; + if(yi > 0) { + if(sc_h_up_c >= sc_f_up_c && sc_h_up_c > MIN_I64) { + if(sc_h_up_c == sc_f_up_c) { + mask |= 256; + } + mask |= 128; + } else if(sc_f_up_c > MIN_I64) { + sc_f_best = sc_f_up_c; + mask |= 256; + } + } + if(sc_f_best > sc_best) { + sc_best = sc_f_best; + mask &= ~127; // don't go horizontal or diagonal + } + // Install results in cur + assert( local || sc_best <= 0); + sq_[ii+j].sc[0] = sc_best; + assert( local || sc_e_best < 0); + assert( local || sc_f_best < 0); + assert(!local || sc_e_best >= 0 || sc_e_best == MIN_I16); + assert(!local || sc_f_best >= 0 || sc_f_best == MIN_I16); + sq_[ii+j].sc[1] = sc_e_best; + sq_[ii+j].sc[2] = sc_f_best; + sq_[ii+j].sc[3] = mask; + DEBUG_CHECK(sq_[ii+j].sc[0], yi, xi, 2); // H + DEBUG_CHECK(sq_[ii+j].sc[1], yi, xi, 0); // E + DEBUG_CHECK(sq_[ii+j].sc[2], yi, xi, 1); // F + // Update sc_h_lf_last, sc_e_lf_last + sc_h_lf_last = sc_best; + sc_e_lf_last = sc_e_best; + } + // Update m128mod, m128div + m128mod++; + if(m128mod == prob_.cper_->niter_) { + m128mod = 0; + m128div++; + } + // update qup + ii += sq_ncol; + // dimensions of sq_ + qup = sq_.ptr() + sq_ncol * i; + } + assert_eq(targ, sq_[ymod * sq_ncol + xmod].sc[hef]); + // + // Now backtrack through the triangle. Abort as soon as we enter a cell + // that was visited by a previous backtrace. + // + int64_t rowc = row, colc = col; + size_t curid; + int hefc = hef; + if(bs_.empty()) { + // Start an initial branch + CHECK_ROW_COL(rowc, colc); + curid = bs_.alloc(); + assert_eq(0, curid); + Edit e; + bs_[curid].init( + prob_, + 0, // parent ID + 0, // penalty + 0, // score_en + rowc, // row + colc, // col + e, // edit + 0, // hef + true, // root? + false); // don't try to extend with exact matches + bs_[curid].len_ = 0; + } else { + curid = bs_.size()-1; + } + size_t ymodTimesNcol = ymod * sq_ncol; + while(true) { + // What depth are we? + assert_eq(ymodTimesNcol, ymod * sq_ncol); + CpQuad * cur = sq_.ptr() + ymodTimesNcol + xmod; + int mask = cur->sc[3]; + assert_gt(mask, 0); + int sel = -1; + // Select what type of move to make, which depends on whether we're + // currently in H, E, F: + if(hefc == 0) { + if( (mask & 1) != 0) { + // diagonal + sel = 0; + } else if((mask & 8) != 0) { + // up to H + sel = 3; + } else if((mask & 16) != 0) { + // up to F + sel = 4; + } else if((mask & 2) != 0) { + // left to H + sel = 1; + } else if((mask & 4) != 0) { + // left to E + sel = 2; + } + } else if(hefc == 1) { + if( (mask & 32) != 0) { + // left to H + sel = 5; + } else if((mask & 64) != 0) { + // left to E + sel = 6; + } + } else { + assert_eq(2, hefc); + if( (mask & 128) != 0) { + // up to H + sel = 7; + } else if((mask & 256) != 0) { + // up to F + sel = 8; + } + } + assert_geq(sel, 0); + // Get character from read + int qc = prob_.qry_[rowc], qq = prob_.qual_[rowc]; + // Get character from reference + int rc = prob_.ref_[colc]; + assert_range(0, 16, rc); + bool xexit = false, yexit = false; + // Now that we know what type of move to make, make it, updating our + // row and column and moving updating the branch. + if(sel == 0) { + assert_geq(rowc, 0); + assert_geq(colc, 0); + TAlScore scd = prob_.sc_->score(qc, rc, qq - 33); + if((rc & (1 << qc)) == 0) { + // Mismatch + size_t id = curid; + // Check if the previous branch was the initial (bottommost) + // branch with no matches. If so, the mismatch should be added + // to the initial branch, instead of starting a new branch. + bool empty = (bs_[curid].len_ == 0 && curid == 0); + if(!empty) { + id = bs_.alloc(); + } + Edit e((int)rowc, mask2dna[rc], "ACGTN"[qc], EDIT_TYPE_MM); + assert_lt(scd, 0); + TAlScore score_en = bs_[curid].score_st_ + scd; + bs_[id].init( + prob_, + curid, // parent ID + -scd, // penalty + score_en, // score_en + rowc, // row + colc, // col + e, // edit + hefc, // hef + empty, // root? + false); // don't try to extend with exact matches + curid = id; + //assert(!local || bs_[curid].score_st_ >= 0); + } else { + // Match + bs_[curid].score_st_ += prob_.sc_->match(); + bs_[curid].len_++; + assert_leq((int64_t)bs_[curid].len_, bs_[curid].row_ + 1); + } + if(xmod == 0) xexit = true; + if(ymod == 0) yexit = true; + rowc--; ymod--; ymodTimesNcol -= sq_ncol; + colc--; xmod--; + assert(local || bs_[curid].score_st_ >= targ_final); + hefc = 0; + } else if((sel >= 1 && sel <= 2) || (sel >= 5 && sel <= 6)) { + assert_gt(colc, 0); + // Read gap + size_t id = bs_.alloc(); + Edit e((int)rowc+1, mask2dna[rc], '-', EDIT_TYPE_READ_GAP); + TAlScore gapp = prob_.sc_->readGapOpen(); + if(bs_[curid].len_ == 0 && bs_[curid].e_.inited() && bs_[curid].e_.isReadGap()) { + gapp = prob_.sc_->readGapExtend(); + } + //assert(!local || bs_[curid].score_st_ >= gapp); + TAlScore score_en = bs_[curid].score_st_ - gapp; + bs_[id].init( + prob_, + curid, // parent ID + gapp, // penalty + score_en, // score_en + rowc, // row + colc-1, // col + e, // edit + hefc, // hef + false, // root? + false); // don't try to extend with exact matches + if(xmod == 0) xexit = true; + colc--; xmod--; + curid = id; + assert( local || bs_[curid].score_st_ >= targ_final); + //assert(!local || bs_[curid].score_st_ >= 0); + if(sel == 1 || sel == 5) { + hefc = 0; + } else { + hefc = 1; + } + } else { + assert_gt(rowc, 0); + // Reference gap + size_t id = bs_.alloc(); + Edit e((int)rowc, '-', "ACGTN"[qc], EDIT_TYPE_REF_GAP); + TAlScore gapp = prob_.sc_->refGapOpen(); + if(bs_[curid].len_ == 0 && bs_[curid].e_.inited() && bs_[curid].e_.isRefGap()) { + gapp = prob_.sc_->refGapExtend(); + } + //assert(!local || bs_[curid].score_st_ >= gapp); + TAlScore score_en = bs_[curid].score_st_ - gapp; + bs_[id].init( + prob_, + curid, // parent ID + gapp, // penalty + score_en, // score_en + rowc-1, // row + colc, // col + e, // edit + hefc, // hef + false, // root? + false); // don't try to extend with exact matches + if(ymod == 0) yexit = true; + rowc--; ymod--; ymodTimesNcol -= sq_ncol; + curid = id; + assert( local || bs_[curid].score_st_ >= targ_final); + //assert(!local || bs_[curid].score_st_ >= 0); + if(sel == 3 || sel == 7) { + hefc = 0; + } else { + hefc = 2; + } + } + CHECK_ROW_COL(rowc, colc); + CpQuad * cur_new = NULL; + if(!xexit && !yexit) { + cur_new = sq_.ptr() + ymodTimesNcol + xmod; + } + // Check whether we made it to the top row or to a cell with score 0 + if(colc < 0 || rowc < 0 || + (cur_new != NULL && local && cur_new->sc[0] == 0)) + { + done = true; + assert(bs_[curid].isSolution(prob_)); + addSolution(curid); +#ifndef NDEBUG + // A check to see if any two adjacent branches in the backtrace + // overlap. If they do, the whole alignment will be filtered out + // in trySolution(...) + size_t cur = curid; + if(!bs_[cur].root_) { + size_t next = bs_[cur].parentId_; + while(!bs_[next].root_) { + assert_neq(cur, next); + if(bs_[next].len_ != 0 || bs_[cur].len_ == 0) { + assert(!bs_[cur].overlap(prob_, bs_[next])); + } + cur = next; + next = bs_[cur].parentId_; + } + } +#endif + return; + } + assert(!xexit || hefc == 0 || hefc == 1); + assert(!yexit || hefc == 0 || hefc == 2); + if(xexit || yexit) { + //assert(rowc < 0 || colc < 0 || prob_.cper_->isCheckpointed(rowc, colc)); + row_new = rowc; col_new = colc; + hef_new = hefc; + done = false; + if(rowc < 0 || colc < 0) { + assert(local); + targ_new = 0; + } else { + // TODO: Don't use scoreSquare + targ_new = prob_.cper_->scoreSquare(rowc, colc, hefc); + assert(local || targ_new >= targ); + assert(local || targ_new >= targ_final); + } + if(local && targ_new == 0) { + assert_eq(0, hefc); + done = true; + assert(bs_[curid].isSolution(prob_)); + addSolution(curid); + } + assert((row_new >= 0 && col_new >= 0) || done); + return; + } + } + assert(false); +} + +/** + * Caller gives us score_en, row and col. We figure out score_st and len_ + * by comparing characters from the strings. + * + * If this branch comes after a mismatch, (row, col) describe the cell that the + * mismatch occurs in. len_ is initially set to 1, and the next cell we test + * is the next cell up and to the left (row-1, col-1). + * + * If this branch comes after a read gap, (row, col) describe the leftmost cell + * involved in the gap. len_ is initially set to 0, and the next cell we test + * is the current cell (row, col). + * + * If this branch comes after a reference gap, (row, col) describe the upper + * cell involved in the gap. len_ is initially set to 0, and the next cell we + * test is the current cell (row, col). + */ +void BtBranch::init( + const BtBranchProblem& prob, + size_t parentId, + TAlScore penalty, + TAlScore score_en, + int64_t row, + int64_t col, + Edit e, + int hef, + bool root, + bool extend) +{ + score_en_ = score_en; + penalty_ = penalty; + score_st_ = score_en_; + row_ = row; + col_ = col; + parentId_ = parentId; + e_ = e; + root_ = root; + assert(!root_ || parentId == 0); + assert_lt(row, (int64_t)prob.qrylen_); + assert_lt(col, (int64_t)prob.reflen_); + // First match to check is diagonally above and to the left of the cell + // where the edit occurs + int64_t rowc = row; + int64_t colc = col; + len_ = 0; + if(e.inited() && e.isMismatch()) { + rowc--; colc--; + len_ = 1; + } + int64_t match = prob.sc_->match(); + bool cp = prob.usecp_; + size_t iters = 0; + curtailed_ = false; + if(extend) { + while(rowc >= 0 && colc >= 0) { + int rfm = prob.ref_[colc]; + assert_range(0, 16, rfm); + int rdc = prob.qry_[rowc]; + bool matches = (rfm & (1 << rdc)) != 0; + if(!matches) { + // What's the mismatch penalty? + break; + } + // Get score from checkpointer + score_st_ += match; + if(cp && rowc - 1 >= 0 && colc - 1 >= 0 && + prob.cper_->isCheckpointed(rowc - 1, colc - 1)) + { + // Possibly prune + int16_t cpsc; + cpsc = prob.cper_->scoreTriangle(rowc - 1, colc - 1, hef); + if(cpsc + score_st_ < prob.targ_) { + curtailed_ = true; + break; + } + } + iters++; + rowc--; colc--; + } + } + assert_geq(rowc, -1); + assert_geq(colc, -1); + len_ = (int64_t)row - rowc; + assert_leq((int64_t)len_, row_+1); + assert_leq((int64_t)len_, col_+1); + assert_leq((int64_t)score_st_, (int64_t)prob.qrylen_ * match); +} + +/** + * Given a potential branch to add to the queue, see if we can follow the + * branch a little further first. If it's still valid, or if we reach a + * choice between valid outgoing paths, go ahead and add it to the queue. + */ +void BtBranchTracer::examineBranch( + int64_t row, + int64_t col, + const Edit& e, + TAlScore pen, // penalty associated with edit + TAlScore sc, + size_t parentId) +{ + size_t id = bs_.alloc(); + bs_[id].init(prob_, parentId, pen, sc, row, col, e, 0, false, true); + if(bs_[id].isSolution(prob_)) { + assert(bs_[id].isValid(prob_)); + addSolution(id); + } else { + // Check if this branch is legit + if(bs_[id].isValid(prob_)) { + add(id); + } else { + bs_.pop(); + } + } +} + +/** + * Take all possible ways of leaving the given branch and add them to the + * branch queue. + */ +void BtBranchTracer::addOffshoots(size_t bid) { + BtBranch& b = bs_[bid]; + TAlScore sc = b.score_en_; + int64_t match = prob_.sc_->match(); + int64_t scoreFloor = prob_.sc_->monotone ? MIN_I64 : 0; + bool cp = prob_.usecp_; // Are there are any checkpoints? + ASSERT_ONLY(TAlScore perfectScore = prob_.sc_->perfectScore(prob_.qrylen_)); + assert_leq(prob_.targ_, perfectScore); + // For each cell in the branch + for(size_t i = 0 ; i < b.len_; i++) { + assert_leq((int64_t)i, b.row_+1); + assert_leq((int64_t)i, b.col_+1); + int64_t row = b.row_ - i, col = b.col_ - i; + int64_t bonusLeft = (row + 1) * match; + int64_t fromend = prob_.qrylen_ - row - 1; + bool allowGaps = fromend >= prob_.sc_->gapbar && row >= prob_.sc_->gapbar; + if(allowGaps && row >= 0 && col >= 0) { + if(col > 0) { + // Try a read gap - it's either an extension or an open + bool extend = b.e_.inited() && b.e_.isReadGap() && i == 0; + TAlScore rdgapPen = extend ? + prob_.sc_->readGapExtend() : prob_.sc_->readGapOpen(); + bool prune = false; + assert_gt(rdgapPen, 0); + if(cp && prob_.cper_->isCheckpointed(row, col - 1)) { + // Possibly prune + int16_t cpsc = (int16_t)prob_.cper_->scoreTriangle(row, col - 1, 0); + assert_leq(cpsc, perfectScore); + assert_geq(prob_.sc_->readGapOpen(), prob_.sc_->readGapExtend()); + TAlScore bonus = prob_.sc_->readGapOpen() - prob_.sc_->readGapExtend(); + assert_geq(bonus, 0); + if(cpsc + bonus + sc - rdgapPen < prob_.targ_) { + prune = true; + } + } + if(prune) { + if(extend) { nrdexPrune_++; } else { nrdopPrune_++; } + } else if(sc - rdgapPen >= scoreFloor && sc - rdgapPen + bonusLeft >= prob_.targ_) { + // Yes, we can introduce a read gap here + Edit e((int)row + 1, mask2dna[(int)prob_.ref_[col]], '-', EDIT_TYPE_READ_GAP); + assert(e.isReadGap()); + examineBranch(row, col - 1, e, rdgapPen, sc - rdgapPen, bid); + if(extend) { nrdex_++; } else { nrdop_++; } + } + } + if(row > 0) { + // Try a reference gap - it's either an extension or an open + bool extend = b.e_.inited() && b.e_.isRefGap() && i == 0; + TAlScore rfgapPen = (b.e_.inited() && b.e_.isRefGap()) ? + prob_.sc_->refGapExtend() : prob_.sc_->refGapOpen(); + bool prune = false; + assert_gt(rfgapPen, 0); + if(cp && prob_.cper_->isCheckpointed(row - 1, col)) { + // Possibly prune + int16_t cpsc = (int16_t)prob_.cper_->scoreTriangle(row - 1, col, 0); + assert_leq(cpsc, perfectScore); + assert_geq(prob_.sc_->refGapOpen(), prob_.sc_->refGapExtend()); + TAlScore bonus = prob_.sc_->refGapOpen() - prob_.sc_->refGapExtend(); + assert_geq(bonus, 0); + if(cpsc + bonus + sc - rfgapPen < prob_.targ_) { + prune = true; + } + } + if(prune) { + if(extend) { nrfexPrune_++; } else { nrfopPrune_++; } + } else if(sc - rfgapPen >= scoreFloor && sc - rfgapPen + bonusLeft >= prob_.targ_) { + // Yes, we can introduce a ref gap here + Edit e((int)row, '-', "ACGTN"[(int)prob_.qry_[row]], EDIT_TYPE_REF_GAP); + assert(e.isRefGap()); + examineBranch(row - 1, col, e, rfgapPen, sc - rfgapPen, bid); + if(extend) { nrfex_++; } else { nrfop_++; } + } + } + } + // If we're at the top of the branch but not yet at the top of + // the DP table, a mismatch branch is also possible. + if(i == b.len_ && !b.curtailed_ && row >= 0 && col >= 0) { + int rfm = prob_.ref_[col]; + assert_lt(row, (int64_t)prob_.qrylen_); + int rdc = prob_.qry_[row]; + int rdq = prob_.qual_[row]; + int scdiff = prob_.sc_->score(rdc, rfm, rdq - 33); + assert_lt(scdiff, 0); // at end of branch, so can't match + bool prune = false; + if(cp && row > 0 && col > 0 && prob_.cper_->isCheckpointed(row - 1, col - 1)) { + // Possibly prune + int16_t cpsc = prob_.cper_->scoreTriangle(row - 1, col - 1, 0); + assert_leq(cpsc, perfectScore); + assert_leq(cpsc + scdiff + sc, perfectScore); + if(cpsc + scdiff + sc < prob_.targ_) { + prune = true; + } + } + if(prune) { + nmm_++; + } else { + // Yes, we can introduce a mismatch here + if(sc + scdiff >= scoreFloor && sc + scdiff + bonusLeft >= prob_.targ_) { + Edit e((int)row, mask2dna[rfm], "ACGTN"[rdc], EDIT_TYPE_MM); + bool nmm = (mask2dna[rfm] == 'N' || rdc > 4); + assert_neq(e.chr, e.qchr); + assert_lt(scdiff, 0); + examineBranch(row - 1, col - 1, e, -scdiff, sc + scdiff, bid); + if(nmm) { nnmm_++; } else { nmm_++; } + } + } + } + sc += match; + } +} + +/** + * Sort unsorted branches, merge them with master sorted list. + */ +void BtBranchTracer::flushUnsorted() { + if(unsorted_.empty()) { + return; + } + unsorted_.sort(); + unsorted_.reverse(); +#ifndef NDEBUG + for(size_t i = 1; i < unsorted_.size(); i++) { + assert_leq(bs_[unsorted_[i].second].score_st_, bs_[unsorted_[i-1].second].score_st_); + } +#endif + EList *src2 = sortedSel_ ? &sorted1_ : &sorted2_; + EList *dest = sortedSel_ ? &sorted2_ : &sorted1_; + // Merge src1 and src2 into dest + dest->clear(); + size_t cur1 = 0, cur2 = cur_; + while(cur1 < unsorted_.size() || cur2 < src2->size()) { + // Take from 1 or 2 next? + bool take1 = true; + if(cur1 == unsorted_.size()) { + take1 = false; + } else if(cur2 == src2->size()) { + take1 = true; + } else { + assert_neq(unsorted_[cur1].second, (*src2)[cur2]); + take1 = bs_[unsorted_[cur1].second] < bs_[(*src2)[cur2]]; + } + if(take1) { + dest->push_back(unsorted_[cur1++].second); // Take from list 1 + } else { + dest->push_back((*src2)[cur2++]); // Take from list 2 + } + } + assert_eq(cur1, unsorted_.size()); + assert_eq(cur2, src2->size()); + sortedSel_ = !sortedSel_; + cur_ = 0; + unsorted_.clear(); +} + +/** + * Try all the solutions accumulated so far. Solutions might be rejected + * if they, for instance, overlap a previous solution, have too many Ns, + * fail to overlap a core diagonal, etc. + */ +bool BtBranchTracer::trySolutions( + bool lookForOlap, + SwResult& res, + size_t& off, + size_t& nrej, + RandomSource& rnd, + bool& success) +{ + if(solutions_.size() > 0) { + for(size_t i = 0; i < solutions_.size(); i++) { + int ret = trySolution(solutions_[i], lookForOlap, res, off, nrej, rnd); + if(ret == BT_FOUND) { + success = true; + return true; // there were solutions and one was good + } + } + solutions_.clear(); + success = false; + return true; // there were solutions but none were good + } + return false; // there were no solutions to check +} + +/** + * Given the id of a branch that completes a successful backtrace, turn the + * chain of branches into + */ +int BtBranchTracer::trySolution( + size_t id, + bool lookForOlap, + SwResult& res, + size_t& off, + size_t& nrej, + RandomSource& rnd) +{ + AlnScore score; + BtBranch *br = &bs_[id]; + // 'br' corresponds to the leftmost edit in a right-to-left + // chain of edits. + EList& ned = res.alres.ned(); + const BtBranch *cur = br, *prev = NULL; + size_t ns = 0, nrefns = 0; + size_t ngap = 0; + while(true) { + if(cur->e_.inited()) { + if(cur->e_.isMismatch()) { + if(cur->e_.qchr == 'N' || cur->e_.chr == 'N') { + ns++; + } + } else if(cur->e_.isGap()) { + ngap++; + } + if(cur->e_.chr == 'N') { + nrefns++; + } + ned.push_back(cur->e_); + } + if(cur->root_) { + break; + } + cur = &bs_[cur->parentId_]; + } + if(ns > prob_.nceil_) { + // Alignment has too many Ns in it! + res.reset(); + assert(res.alres.ned().empty()); + nrej++; + return BT_REJECTED_N; + } + // Update 'seenPaths_' + cur = br; + bool rejSeen = false; // set =true if we overlap prev path + bool rejCore = true; // set =true if we don't touch core diag + while(true) { + // Consider row, col, len, then do something + int64_t row = cur->row_, col = cur->col_; + assert_lt(row, (int64_t)prob_.qrylen_); + size_t fromend = prob_.qrylen_ - row - 1; + size_t diag = fromend + col; + // Calculate the diagonal within the *trimmed* rectangle, + // i.e. the rectangle we dealt with in align, gather and + // backtrack. + int64_t diagi = col - row; + // Now adjust to the diagonal within the *untrimmed* + // rectangle by adding on the amount trimmed from the left. + diagi += prob_.rect_->triml; + assert_lt(diag, seenPaths_.size()); + // Does it overlap a core diagonal? + if(diagi >= 0) { + size_t diag = (size_t)diagi; + if(diag >= prob_.rect_->corel && + diag <= prob_.rect_->corer) + { + // Yes it does - it's OK + rejCore = false; + } + } + if(lookForOlap) { + int64_t newlo, newhi; + if(cur->len_ == 0) { + if(prev != NULL && prev->len_ > 0) { + // If there's a gap at the base of a non-0 length branch, the + // gap will appear to overlap the branch if we give it length 1. + newhi = newlo = 0; + } else { + // Read or ref gap with no matches coming off of it + newlo = row; + newhi = row + 1; + } + } else { + // Diagonal with matches + newlo = row - (cur->len_ - 1); + newhi = row + 1; + } + assert_geq(newlo, 0); + assert_geq(newhi, 0); + // Does the diagonal cover cells? + if(newhi > newlo) { + // Check whether there is any overlap with previously traversed + // cells + bool added = false; + const size_t sz = seenPaths_[diag].size(); + for(size_t i = 0; i < sz; i++) { + // Does the new interval overlap this already-seen + // interval? Also of interest: does it abut this + // already-seen interval? If so, we should merge them. + size_t lo = seenPaths_[diag][i].first; + size_t hi = seenPaths_[diag][i].second; + assert_lt(lo, hi); + size_t lo_sm = newlo, hi_sm = newhi; + if(hi - lo < hi_sm - lo_sm) { + swap(lo, lo_sm); + swap(hi, hi_sm); + } + if((lo <= lo_sm && hi > lo_sm) || + (lo < hi_sm && hi >= hi_sm)) + { + // One or both of the shorter interval's end points + // are contained in the longer interval - so they + // overlap. + rejSeen = true; + // Merge them into one longer interval + seenPaths_[diag][i].first = min(lo, lo_sm); + seenPaths_[diag][i].second = max(hi, hi_sm); +#ifndef NDEBUG + for(int64_t ii = seenPaths_[diag][i].first; + ii < (int64_t)seenPaths_[diag][i].second; + ii++) + { + //cerr << "trySolution rejected (" << ii << ", " << (ii + col - row) << ")" << endl; + } +#endif + added = true; + break; + } else if(hi == lo_sm || lo == hi_sm) { + // Merge them into one longer interval + seenPaths_[diag][i].first = min(lo, lo_sm); + seenPaths_[diag][i].second = max(hi, hi_sm); +#ifndef NDEBUG + for(int64_t ii = seenPaths_[diag][i].first; + ii < (int64_t)seenPaths_[diag][i].second; + ii++) + { + //cerr << "trySolution rejected (" << ii << ", " << (ii + col - row) << ")" << endl; + } +#endif + added = true; + // Keep going in case it overlaps one of the other + // intervals + } + } + if(!added) { + seenPaths_[diag].push_back(make_pair(newlo, newhi)); + } + } + } + // After the merging that may have occurred above, it's no + // longer guarnateed that all the overlapping intervals in + // the list have been merged. That's OK though. We'll + // still get correct answers to overlap queries. + if(cur->root_) { + assert_eq(0, cur->parentId_); + break; + } + prev = cur; + cur = &bs_[cur->parentId_]; + } // while(cur->e_.inited()) + if(rejSeen) { + res.reset(); + assert(res.alres.ned().empty()); + nrej++; + return BT_NOT_FOUND; + } + if(rejCore) { + res.reset(); + assert(res.alres.ned().empty()); + nrej++; + return BT_REJECTED_CORE_DIAG; + } + off = br->leftmostCol(); + score.score_ = prob_.targ_; + score.ns_ = ns; + score.gaps_ = ngap; + res.alres.setScore(score); + res.alres.setRefNs(nrefns); + size_t trimBeg = br->uppermostRow(); + size_t trimEnd = prob_.qrylen_ - prob_.row_ - 1; + assert_leq(trimBeg, prob_.qrylen_); + assert_leq(trimEnd, prob_.qrylen_); + TRefOff refoff = off + prob_.refoff_ + prob_.rect_->refl; + res.alres.setShape( + prob_.refid_, // ref id + refoff, // 0-based ref offset + prob_.treflen(), // ref length + prob_.fw_, // aligned to Watson? + prob_.qrylen_, // read length + 0, // read id + true, // pretrim soft? + 0, // pretrim 5' end + 0, // pretrim 3' end + true, // alignment trim soft? + prob_.fw_ ? trimBeg : trimEnd, // alignment trim 5' end + prob_.fw_ ? trimEnd : trimBeg); // alignment trim 3' end + return BT_FOUND; +} + +/** + * Get the next valid alignment given a backtrace problem. Return false + * if there is no valid solution. Use a backtracking search to find the + * solution. This can be very slow. + */ +bool BtBranchTracer::nextAlignmentBacktrace( + size_t maxiter, + SwResult& res, + size_t& off, + size_t& nrej, + size_t& niter, + RandomSource& rnd) +{ + assert(!empty() || !emptySolution()); + assert(prob_.inited()); + // There's a subtle case where we might fail to backtracing in + // local-alignment mode. The basic fact to remember is that when we're + // backtracing from the highest-scoring cell in the table, we're guaranteed + // to be able to backtrace without ever dipping below 0. But if we're + // backtracing from a cell other than the highest-scoring cell in the + // table, we might dip below 0. Dipping below 0 implies that there's a + // shorted local alignment with a better score. In which case, it's + // perfectly fair for us to abandon any path that dips below the floor, and + // this might result in the queue becoming empty before we finish. + bool result = false; + niter = 0; + while(!empty()) { + if(trySolutions(true, res, off, nrej, rnd, result)) { + return result; + } + if(niter++ >= maxiter) { + break; + } + size_t brid = best(rnd); // put best branch in 'br' + assert(!seen_.contains(brid)); + ASSERT_ONLY(seen_.insert(brid)); +#if 0 + BtBranch *br = &bs_[brid]; + cerr << brid + << ": targ:" << prob_.targ_ + << ", sc:" << br->score_st_ + << ", row:" << br->uppermostRow() + << ", nmm:" << nmm_ + << ", nnmm:" << nnmm_ + << ", nrdop:" << nrdop_ + << ", nrfop:" << nrfop_ + << ", nrdex:" << nrdex_ + << ", nrfex:" << nrfex_ + << ", nrdop_pr: " << nrdopPrune_ + << ", nrfop_pr: " << nrfopPrune_ + << ", nrdex_pr: " << nrdexPrune_ + << ", nrfex_pr: " << nrfexPrune_ + << endl; +#endif + addOffshoots(brid); + } + if(trySolutions(true, res, off, nrej, rnd, result)) { + return result; + } + return false; +} + +/** + * Get the next valid alignment given a backtrace problem. Return false + * if there is no valid solution. Use a triangle-fill backtrace to find + * the solution. This is usually fast (it's O(m + n)). + */ +bool BtBranchTracer::nextAlignmentFill( + size_t maxiter, + SwResult& res, + size_t& off, + size_t& nrej, + size_t& niter, + RandomSource& rnd) +{ + assert(prob_.inited()); + assert(!emptySolution()); + bool result = false; + if(trySolutions(false, res, off, nrej, rnd, result)) { + return result; + } + return false; +} + +/** + * Get the next valid alignment given the backtrace problem. Return false + * if there is no valid solution, e.g., if + */ +bool BtBranchTracer::nextAlignment( + size_t maxiter, + SwResult& res, + size_t& off, + size_t& nrej, + size_t& niter, + RandomSource& rnd) +{ + if(prob_.fill_) { + return nextAlignmentFill( + maxiter, + res, + off, + nrej, + niter, + rnd); + } else { + return nextAlignmentBacktrace( + maxiter, + res, + off, + nrej, + niter, + rnd); + } +} + +#ifdef MAIN_ALIGNER_BT + +#include + +int main(int argc, char **argv) { + size_t off = 0; + RandomSource rnd(77); + BtBranchTracer tr; + Scoring sc = Scoring::base1(); + SwResult res; + tr.init( + "ACGTACGT", // in: read sequence + "IIIIIIII", // in: quality sequence + 8, // in: read sequence length + "ACGTACGT", // in: reference sequence + 8, // in: reference sequence length + 0, // in: reference id + 0, // in: reference offset + true, // in: orientation + sc, // in: scoring scheme + 0, // in: N ceiling + 8, // in: alignment score + 7, // start in this row + 7, // start in this column + rnd); // random gen, to choose among equal paths + size_t nrej = 0; + tr.nextAlignment( + res, + off, + nrej, + rnd); +} + +#endif /*def MAIN_ALIGNER_BT*/ diff --git a/aligner_bt.h b/aligner_bt.h new file mode 100644 index 0000000..8056b7a --- /dev/null +++ b/aligner_bt.h @@ -0,0 +1,947 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef ALIGNER_BT_H_ +#define ALIGNER_BT_H_ + +#include +#include +#include "aligner_sw_common.h" +#include "aligner_result.h" +#include "scoring.h" +#include "edit.h" +#include "limit.h" +#include "dp_framer.h" +#include "sse_util.h" + +/* Say we've filled in a DP matrix in a cost-only manner, not saving the scores + * for each of the cells. At the end, we obtain a list of candidate cells and + * we'd like to backtrace from them. The per-cell scores are gone, but we have + * to re-create the correct path somehow. Hopefully we can do this without + * recreating most or al of the score matrix, since this takes too much memory. + * + * Approach 1: Naively refill the matrix. + * + * Just refill the matrix, perhaps backwards starting from the backtrace cell. + * Since this involves recreating all or most of the score matrix, this is not + * a good approach. + * + * Approach 2: Naive backtracking. + * + * Conduct a search through the space of possible backtraces, rooted at the + * candidate cell. To speed things along, we can prioritize paths that have a + * high score and that align more characters from the read. + * + * The approach is simple, but it's neither fast nor memory-efficient in + * general. + * + * Approach 3: Refilling with checkpoints. + * + * Refill the matrix "backwards" starting from the candidate cell, but use + * checkpoints to ensure that only a series of relatively small triangles or + * rectangles need to be refilled. The checkpoints must include elements from + * the H, E and F matrices; not just H. After each refill, we backtrace + * through the refilled area, then discard/reuse the fill memory. I call each + * such fill/backtrace a mini-fill/backtrace. + * + * If there's only one path to be found, then this is O(m+n). But what if + * there are many? And what if we would like to avoid paths that overlap in + * one or more cells? There are two ways we can make this more efficient: + * + * 1. Remember the re-calculated E/F/H values and try to retrieve them + * 2. Keep a record of cells that have already been traversed + * + * Legend: + * + * 1: Candidate cell + * 2: Final cell from first mini-fill/backtrace + * 3: Final cell from second mini-fill/backtrace (third not shown) + * +: Checkpointed cell + * *: Cell filled from first or second mini-fill/backtrace + * -: Unfilled cell + * + * ---++--------++--------++---- + * --++--------++*-------++----- + * -++--(etc)-++**------++------ + * ++--------+3***-----++------- + * +--------++****----++-------- + * --------++*****---++--------+ + * -------++******--++--------++ + * ------++*******-++*-------++- + * -----++********++**------++-- + * ----++********2+***-----++--- + * ---++--------++****----++---- + * --++--------++*****---++----- + * -++--------++*****1--++------ + * ++--------++--------++------- + * + * Approach 4: Backtracking with checkpoints. + * + * Conduct a search through the space of possible backtraces, rooted at the + * candidate cell. Use "checkpoints" to prune. That is, when a backtrace + * moves through a cell with a checkpointed score, consider the score + * accumulated so far and the cell's saved score; abort if those two scores + * add to something less than a valid score. Note we're only checkpointing H + * in this case (possibly; see "subtle point"), not E or F. + * + * Subtle point: checkpoint scores are a result of moving forward through + * the matrix whereas backtracking scores result from moving backward. This + * matters becuase the two paths that meet up at a cell might have both + * factored in a gap open penalty for the same gap, in which case we will + * underestimate the overall score and prune a good path. Here are two ideas + * for how to resolve this: + * + * Idea 1: when we combine the forward and backward scores to find an overall + * score, and our backtrack procedure *just* made a horizontal or vertical + * move, add in a "bonus" equal to the gap open penalty of the appropraite + * type (read gap open for horizontal, ref gap open for vertical). This might + * overcompensate, since + * + * Idea 2: keep the E and F values for the checkpoints around, in addition to + * the H values. When it comes time to combine the score from the forward + * and backward paths, we consider the last move we made in the backward + * backtrace. If it's a read gap (horizontal move), then we calculate the + * overall score as: + * + * max(Score-backward + H-forward, Score-backward + E-forward + read-open) + * + * If it's a reference gap (vertical move), then we calculate the overall + * score as: + * + * max(Score-backward + H-forward, Score-backward + F-forward + ref-open) + * + * What does it mean to abort a backtrack? If we're starting a new branch + * and there is a checkpoing in the bottommost cell of the branch, and the + * overall score is less than the target, then we can simply ignore the + * branch. If the checkpoint occurs in the middle of a string of matches, we + * need to curtail the branch such that it doesn't include the checkpointed + * cell and we won't ever try to enter the checkpointed cell, e.g., on a + * mismatch. + * + * Approaches 3 and 4 seem reasonable, and could be combined. For simplicity, + * we implement only approach 4 for now. + * + * Checkpoint information is propagated from the fill process to the backtracer + * via a + */ + +enum { + BT_NOT_FOUND = 1, // could not obtain the backtrace because it + // overlapped a previous solution + BT_FOUND, // obtained a valid backtrace + BT_REJECTED_N, // backtrace rejected because it had too many Ns + BT_REJECTED_CORE_DIAG // backtrace rejected because it failed to overlap a + // core diagonal +}; + +/** + * Parameters for a matrix of potential backtrace problems to solve. + * Encapsulates information about: + * + * The problem given a particular reference substring: + * + * - The query string (nucleotides and qualities) + * - The reference substring (incl. orientation, offset into overall sequence) + * - Checkpoints (i.e. values of matrix cells) + * - Scoring scheme and other thresholds + * + * The problem given a particular reference substring AND a particular row and + * column from which to backtrace: + * + * - The row and column + * - The target score + */ +class BtBranchProblem { + +public: + + /** + * Create new uninitialized problem. + */ + BtBranchProblem() { reset(); } + + /** + * Initialize a new problem. + */ + void initRef( + const char *qry, // query string (along rows) + const char *qual, // query quality string (along rows) + size_t qrylen, // query string (along rows) length + const char *ref, // reference string (along columns) + TRefOff reflen, // in-rectangle reference string length + TRefOff treflen,// total reference string length + TRefId refid, // reference id + TRefOff refoff, // reference offset + bool fw, // orientation of problem + const DPRect* rect, // dynamic programming rectangle filled out + const Checkpointer* cper, // checkpointer + const Scoring *sc, // scoring scheme + size_t nceil) // max # Ns allowed in alignment + { + qry_ = qry; + qual_ = qual; + qrylen_ = qrylen; + ref_ = ref; + reflen_ = reflen; + treflen_ = treflen; + refid_ = refid; + refoff_ = refoff; + fw_ = fw; + rect_ = rect; + cper_ = cper; + sc_ = sc; + nceil_ = nceil; + } + + /** + * Initialize a new problem. + */ + void initBt( + size_t row, // row + size_t col, // column + bool fill, // use a filling rather than a backtracking strategy + bool usecp, // use checkpoints to short-circuit while backtracking + TAlScore targ) // target score + { + row_ = row; + col_ = col; + targ_ = targ; + fill_ = fill; + usecp_ = usecp; + if(fill) { + assert(usecp_); + } + } + + /** + * Reset to uninitialized state. + */ + void reset() { + qry_ = qual_ = ref_ = NULL; + cper_ = NULL; + rect_ = NULL; + sc_ = NULL; + qrylen_ = reflen_ = treflen_ = refid_ = refoff_ = row_ = col_ = targ_ = nceil_ = 0; + fill_ = fw_ = usecp_ = false; + } + + /** + * Return true iff the BtBranchProblem has been initialized. + */ + bool inited() const { + return qry_ != NULL; + } + +#ifndef NDEBUG + /** + * Sanity-check the problem. + */ + bool repOk() const { + assert_gt(qrylen_, 0); + assert_gt(reflen_, 0); + assert_gt(treflen_, 0); + assert_lt(row_, qrylen_); + assert_lt((TRefOff)col_, reflen_); + return true; + } +#endif + + size_t reflen() const { return reflen_; } + size_t treflen() const { return treflen_; } + +protected: + + const char *qry_; // query string (along rows) + const char *qual_; // query quality string (along rows) + size_t qrylen_; // query string (along rows) length + const char *ref_; // reference string (along columns) + TRefOff reflen_; // in-rectangle reference string length + TRefOff treflen_;// total reference string length + TRefId refid_; // reference id + TRefOff refoff_; // reference offset + bool fw_; // orientation of problem + const DPRect* rect_; // dynamic programming rectangle filled out + size_t row_; // starting row + size_t col_; // starting column + TAlScore targ_; // target score + const Checkpointer *cper_; // checkpointer + bool fill_; // use mini-fills + bool usecp_; // use checkpointing? + const Scoring *sc_; // scoring scheme + size_t nceil_; // max # Ns allowed in alignment + + friend class BtBranch; + friend class BtBranchQ; + friend class BtBranchTracer; +}; + +/** + * Encapsulates a "branch" which is a diagonal of cells (possibly of length 0) + * in the matrix where all the cells are matches. These stretches are linked + * together by edits to form a full backtrace path through the matrix. Lengths + * are measured w/r/t to the number of rows traversed by the path, so a branch + * that represents a read gap extension could have length = 0. + * + * At the end of the day, the full backtrace path is represented as a list of + * BtBranch's where each BtBranch represents a stretch of matching cells (and + * up to one mismatching cell at its bottom extreme) ending in an edit (or in + * the bottommost row, in which case the edit is uninitialized). Each + * BtBranch's row and col fields indicate the bottommost cell involved in the + * diagonal stretch of matches, and the len_ field indicates the length of the + * stretch of matches. Note that the edits themselves also correspond to + * movement through the matrix. + * + * A related issue is how we record which cells have been visited so that we + * never report a pair of paths both traversing the same (row, col) of the + * overall DP matrix. This gets a little tricky because we have to take into + * account the cells covered by *edits* in addition to the cells covered by the + * stretches of matches. For instance: imagine a mismatch. That takes up a + * cell of the DP matrix, but it may or may not be preceded by a string of + * matches. It's hard to imagine how to represent this unless we let the + * mismatch "count toward" the len_ of the branch and let (row, col) refer to + * the cell where the mismatch occurs. + * + * We need BtBranches to "live forever" so that we can make some BtBranches + * parents of others using parent pointers. For this reason, BtBranch's are + * stored in an EFactory object in the BtBranchTracer class. + */ +class BtBranch { + +public: + + BtBranch() { reset(); } + + BtBranch( + const BtBranchProblem& prob, + size_t parentId, + TAlScore penalty, + TAlScore score_en, + int64_t row, + int64_t col, + Edit e, + int hef, + bool root, + bool extend) + { + init(prob, parentId, penalty, score_en, row, col, e, hef, root, extend); + } + + /** + * Reset to uninitialized state. + */ + void reset() { + parentId_ = 0; + score_st_ = score_en_ = len_ = row_ = col_ = 0; + curtailed_ = false; + e_.reset(); + } + + /** + * Caller gives us score_en, row and col. We figure out score_st and len_ + * by comparing characters from the strings. + */ + void init( + const BtBranchProblem& prob, + size_t parentId, + TAlScore penalty, + TAlScore score_en, + int64_t row, + int64_t col, + Edit e, + int hef, + bool root, + bool extend); + + /** + * Return true iff this branch ends in a solution to the backtrace problem. + */ + bool isSolution(const BtBranchProblem& prob) const { + const bool end2end = prob.sc_->monotone; + return score_st_ == prob.targ_ && (!end2end || endsInFirstRow()); + } + + /** + * Return true iff this branch could potentially lead to a valid alignment. + */ + bool isValid(const BtBranchProblem& prob) const { + int64_t scoreFloor = prob.sc_->monotone ? MIN_I64 : 0; + if(score_st_ < scoreFloor) { + // Dipped below the score floor + return false; + } + if(isSolution(prob)) { + // It's a solution, so it's also valid + return true; + } + if((int64_t)len_ > row_) { + // Went all the way to the top row + //assert_leq(score_st_, prob.targ_); + return score_st_ == prob.targ_; + } else { + int64_t match = prob.sc_->match(); + int64_t bonusLeft = (row_ + 1 - len_) * match; + return score_st_ + bonusLeft >= prob.targ_; + } + } + + /** + * Return true iff this branch overlaps with the given branch. + */ + bool overlap(const BtBranchProblem& prob, const BtBranch& bt) const { + // Calculate this branch's diagonal + assert_lt(row_, (int64_t)prob.qrylen_); + size_t fromend = prob.qrylen_ - row_ - 1; + size_t diag = fromend + col_; + int64_t lo = 0, hi = row_ + 1; + if(len_ == 0) { + lo = row_; + } else { + lo = row_ - (len_ - 1); + } + // Calculate other branch's diagonal + assert_lt(bt.row_, (int64_t)prob.qrylen_); + size_t ofromend = prob.qrylen_ - bt.row_ - 1; + size_t odiag = ofromend + bt.col_; + if(diag != odiag) { + return false; + } + int64_t olo = 0, ohi = bt.row_ + 1; + if(bt.len_ == 0) { + olo = bt.row_; + } else { + olo = bt.row_ - (bt.len_ - 1); + } + int64_t losm = olo, hism = ohi; + if(hi - lo < ohi - olo) { + swap(lo, losm); + swap(hi, hism); + } + if((lo <= losm && hi > losm) || (lo < hism && hi >= hism)) { + return true; + } + return false; + } + + /** + * Return true iff this branch is higher priority than the branch 'o'. + */ + bool operator<(const BtBranch& o) const { + // Prioritize uppermost above score + if(uppermostRow() != o.uppermostRow()) { + return uppermostRow() < o.uppermostRow(); + } + if(score_st_ != o.score_st_) return score_st_ > o.score_st_; + if(row_ != o.row_) return row_ < o.row_; + if(col_ != o.col_) return col_ > o.col_; + if(parentId_ != o.parentId_) return parentId_ > o.parentId_; + assert(false); + return false; + } + + /** + * Return true iff the topmost cell involved in this branch is in the top + * row. + */ + bool endsInFirstRow() const { + assert_leq((int64_t)len_, row_ + 1); + return (int64_t)len_ == row_+1; + } + + /** + * Return the uppermost row covered by this branch. + */ + size_t uppermostRow() const { + assert_geq(row_ + 1, (int64_t)len_); + return row_ + 1 - (int64_t)len_; + } + + /** + * Return the leftmost column covered by this branch. + */ + size_t leftmostCol() const { + assert_geq(col_ + 1, (int64_t)len_); + return col_ + 1 - (int64_t)len_; + } + +#ifndef NDEBUG + /** + * Sanity-check this BtBranch. + */ + bool repOk() const { + assert(root_ || e_.inited()); + assert_gt(len_, 0); + assert_geq(col_ + 1, (int64_t)len_); + assert_geq(row_ + 1, (int64_t)len_); + return true; + } +#endif + +protected: + + // ID of the parent branch. + size_t parentId_; + + // Penalty associated with the edit at the bottom of this branch (0 if + // there is no edit) + TAlScore penalty_; + + // Score at the beginning of the branch + TAlScore score_st_; + + // Score at the end of the branch (taking the edit into account) + TAlScore score_en_; + + // Length of the branch. That is, the total number of diagonal cells + // involved in all the matches and in the edit (if any). Should always be + // > 0. + size_t len_; + + // The row of the final (bottommost) cell in the branch. This might be the + // bottommost match if the branch has no associated edit. Otherwise, it's + // the cell occupied by the edit. + int64_t row_; + + // The column of the final (bottommost) cell in the branch. + int64_t col_; + + // The edit at the bottom of the branch. If this is the bottommost branch + // in the alignment and it does not end in an edit, then this remains + // uninitialized. + Edit e_; + + // True iff this is the bottommost branch in the alignment. We can't just + // use row_ to tell us this because local alignments don't necessarily end + // in the last row. + bool root_; + + bool curtailed_; // true -> pruned at a checkpoint where we otherwise + // would have had a match + +friend class BtBranchQ; +friend class BtBranchTracer; + +}; + +/** + * Instantiate and solve best-first branch-based backtraces. + */ +class BtBranchTracer { + +public: + + explicit BtBranchTracer() : + prob_(), bs_(), seenPaths_(DP_CAT), sawcell_(DP_CAT), doTri_() { } + + /** + * Add a branch to the queue. + */ + void add(size_t id) { + assert(!bs_[id].isSolution(prob_)); + unsorted_.push_back(make_pair(bs_[id].score_st_, id)); + } + + /** + * Add a branch to the list of solutions. + */ + void addSolution(size_t id) { + assert(bs_[id].isSolution(prob_)); + solutions_.push_back(id); + } + + /** + * Given a potential branch to add to the queue, see if we can follow the + * branch a little further first. If it's still valid, or if we reach a + * choice between valid outgoing paths, go ahead and add it to the queue. + */ + void examineBranch( + int64_t row, + int64_t col, + const Edit& e, + TAlScore pen, + TAlScore sc, + size_t parentId); + + /** + * Take all possible ways of leaving the given branch and add them to the + * branch queue. + */ + void addOffshoots(size_t bid); + + /** + * Get the best branch and remove it from the priority queue. + */ + size_t best(RandomSource& rnd) { + assert(!empty()); + flushUnsorted(); + assert_gt(sortedSel_ ? sorted1_.size() : sorted2_.size(), cur_); + // Perhaps shuffle everyone who's tied for first? + size_t id = sortedSel_ ? sorted1_[cur_] : sorted2_[cur_]; + cur_++; + return id; + } + + /** + * Return true iff there are no branches left to try. + */ + bool empty() const { + return size() == 0; + } + + /** + * Return the size, i.e. the total number of branches contained. + */ + size_t size() const { + return unsorted_.size() + + (sortedSel_ ? sorted1_.size() : sorted2_.size()) - cur_; + } + + /** + * Return true iff there are no solutions left to try. + */ + bool emptySolution() const { + return sizeSolution() == 0; + } + + /** + * Return the size of the solution set so far. + */ + size_t sizeSolution() const { + return solutions_.size(); + } + + /** + * Sort unsorted branches, merge them with master sorted list. + */ + void flushUnsorted(); + +#ifndef NDEBUG + /** + * Sanity-check the queue. + */ + bool repOk() const { + assert_lt(cur_, (sortedSel_ ? sorted1_.size() : sorted2_.size())); + return true; + } +#endif + + /** + * Initialize the tracer with respect to a new read. This involves + * resetting all the state relating to the set of cells already visited + */ + void initRef( + const char* rd, // in: read sequence + const char* qu, // in: quality sequence + size_t rdlen, // in: read sequence length + const char* rf, // in: reference sequence + size_t rflen, // in: in-rectangle reference sequence length + TRefOff trflen, // in: total reference sequence length + TRefId refid, // in: reference id + TRefOff refoff, // in: reference offset + bool fw, // in: orientation + const DPRect *rect, // in: DP rectangle + const Checkpointer *cper, // in: checkpointer + const Scoring& sc, // in: scoring scheme + size_t nceil) // in: N ceiling + { + prob_.initRef(rd, qu, rdlen, rf, rflen, trflen, refid, refoff, fw, rect, cper, &sc, nceil); + const size_t ndiag = rflen + rdlen - 1; + seenPaths_.resize(ndiag); + for(size_t i = 0; i < ndiag; i++) { + seenPaths_[i].clear(); + } + // clear each of the per-column sets + if(sawcell_.size() < rflen) { + size_t isz = sawcell_.size(); + sawcell_.resize(rflen); + for(size_t i = isz; i < rflen; i++) { + sawcell_[i].setCat(DP_CAT); + } + } + for(size_t i = 0; i < rflen; i++) { + sawcell_[i].setCat(DP_CAT); + sawcell_[i].clear(); // clear the set + } + } + + /** + * Initialize with a new backtrace. + */ + void initBt( + TAlScore escore, // in: alignment score + size_t row, // in: start in this row + size_t col, // in: start in this column + bool fill, // in: use mini-filling? + bool usecp, // in: use checkpointing? + bool doTri, // in: triangle-shaped mini-fills? + RandomSource& rnd) // in: random gen, to choose among equal paths + { + prob_.initBt(row, col, fill, usecp, escore); + Edit e; e.reset(); + unsorted_.clear(); + solutions_.clear(); + sorted1_.clear(); + sorted2_.clear(); + cur_ = 0; + nmm_ = 0; // number of mismatches attempted + nnmm_ = 0; // number of mismatches involving N attempted + nrdop_ = 0; // number of read gap opens attempted + nrfop_ = 0; // number of ref gap opens attempted + nrdex_ = 0; // number of read gap extensions attempted + nrfex_ = 0; // number of ref gap extensions attempted + nmmPrune_ = 0; // number of mismatches attempted + nnmmPrune_ = 0; // number of mismatches involving N attempted + nrdopPrune_ = 0; // number of read gap opens attempted + nrfopPrune_ = 0; // number of ref gap opens attempted + nrdexPrune_ = 0; // number of read gap extensions attempted + nrfexPrune_ = 0; // number of ref gap extensions attempted + row_ = row; + col_ = col; + doTri_ = doTri; + bs_.clear(); + if(!prob_.fill_) { + size_t id = bs_.alloc(); + bs_[id].init( + prob_, + 0, // parent id + 0, // penalty + 0, // starting score + row, // row + col, // column + e, + 0, + true, // this is the root + true); // this should be extend with exact matches + if(bs_[id].isSolution(prob_)) { + addSolution(id); + } else { + add(id); + } + } else { + int64_t row = row_, col = col_; + TAlScore targsc = prob_.targ_; + int hef = 0; + bool done = false, abort = false; + size_t depth = 0; + while(!done && !abort) { + // Accumulate edits as we go. We can do this by adding + // BtBranches to the bs_ structure. Each step of the backtrace + // either involves an edit (thereby starting a new branch) or + // extends the previous branch by one more position. + // + // Note: if the BtBranches are in line, then trySolution can be + // used to populate the SwResult and check for various + // situations where we might reject the alignment (i.e. due to + // a cell having been visited previously). + if(doTri_) { + triangleFill( + row, // row of cell to backtrace from + col, // column of cell to backtrace from + hef, // cell to bt from: H (0), E (1), or F (2) + targsc, // score of cell to backtrace from + prob_.targ_, // score of alignment we're looking for + rnd, // pseudo-random generator + row, // out: row we ended up in after bt + col, // out: column we ended up in after bt + hef, // out: H/E/F after backtrace + targsc, // out: score up to cell we ended up in + done, // out: finished tracing out an alignment? + abort); // out: aborted b/c cell was seen before? + } else { + squareFill( + row, // row of cell to backtrace from + col, // column of cell to backtrace from + hef, // cell to bt from: H (0), E (1), or F (2) + targsc, // score of cell to backtrace from + prob_.targ_, // score of alignment we're looking for + rnd, // pseudo-random generator + row, // out: row we ended up in after bt + col, // out: column we ended up in after bt + hef, // out: H/E/F after backtrace + targsc, // out: score up to cell we ended up in + done, // out: finished tracing out an alignment? + abort); // out: aborted b/c cell was seen before? + } + if(depth >= ndep_.size()) { + ndep_.resize(depth+1); + ndep_[depth] = 1; + } else { + ndep_[depth]++; + } + depth++; + assert((row >= 0 && col >= 0) || done); + } + } + ASSERT_ONLY(seen_.clear()); + } + + /** + * Get the next valid alignment given the backtrace problem. Return false + * if there is no valid solution, e.g., if + */ + bool nextAlignment( + size_t maxiter, + SwResult& res, + size_t& off, + size_t& nrej, + size_t& niter, + RandomSource& rnd); + + /** + * Return true iff this tracer has been initialized + */ + bool inited() const { + return prob_.inited(); + } + + /** + * Return true iff the mini-fills are triangle-shaped. + */ + bool doTri() const { return doTri_; } + + /** + * Fill in a triangle of the DP table and backtrace from the given cell to + * a cell in the previous checkpoint, or to the terminal cell. + */ + void triangleFill( + int64_t rw, // row of cell to backtrace from + int64_t cl, // column of cell to backtrace from + int hef, // cell to backtrace from is H (0), E (1), or F (2) + TAlScore targ, // score of cell to backtrace from + TAlScore targ_final, // score of alignment we're looking for + RandomSource& rnd, // pseudo-random generator + int64_t& row_new, // out: row we ended up in after backtrace + int64_t& col_new, // out: column we ended up in after backtrace + int& hef_new, // out: H/E/F after backtrace + TAlScore& targ_new, // out: score up to cell we ended up in + bool& done, // out: finished tracing out an alignment? + bool& abort); // out: aborted b/c cell was seen before? + + /** + * Fill in a square of the DP table and backtrace from the given cell to + * a cell in the previous checkpoint, or to the terminal cell. + */ + void squareFill( + int64_t rw, // row of cell to backtrace from + int64_t cl, // column of cell to backtrace from + int hef, // cell to backtrace from is H (0), E (1), or F (2) + TAlScore targ, // score of cell to backtrace from + TAlScore targ_final, // score of alignment we're looking for + RandomSource& rnd, // pseudo-random generator + int64_t& row_new, // out: row we ended up in after backtrace + int64_t& col_new, // out: column we ended up in after backtrace + int& hef_new, // out: H/E/F after backtrace + TAlScore& targ_new, // out: score up to cell we ended up in + bool& done, // out: finished tracing out an alignment? + bool& abort); // out: aborted b/c cell was seen before? + +protected: + + /** + * Get the next valid alignment given a backtrace problem. Return false + * if there is no valid solution. Use a backtracking search to find the + * solution. This can be very slow. + */ + bool nextAlignmentBacktrace( + size_t maxiter, + SwResult& res, + size_t& off, + size_t& nrej, + size_t& niter, + RandomSource& rnd); + + /** + * Get the next valid alignment given a backtrace problem. Return false + * if there is no valid solution. Use a triangle-fill backtrace to find + * the solution. This is usually fast (it's O(m + n)). + */ + bool nextAlignmentFill( + size_t maxiter, + SwResult& res, + size_t& off, + size_t& nrej, + size_t& niter, + RandomSource& rnd); + + /** + * Try all the solutions accumulated so far. Solutions might be rejected + * if they, for instance, overlap a previous solution, have too many Ns, + * fail to overlap a core diagonal, etc. + */ + bool trySolutions( + bool lookForOlap, + SwResult& res, + size_t& off, + size_t& nrej, + RandomSource& rnd, + bool& success); + + /** + * See if a given solution branch works as a solution (i.e. doesn't overlap + * another one, have too many Ns, fail to overlap a core diagonal, etc.) + */ + int trySolution( + size_t id, + bool lookForOlap, + SwResult& res, + size_t& off, + size_t& nrej, + RandomSource& rnd); + + BtBranchProblem prob_; // problem configuration + EFactory bs_; // global BtBranch factory + + // already reported alignments going through these diagonal segments + ELList > seenPaths_; + ELSet sawcell_; // cells already backtraced through + + EList > unsorted_; // unsorted list of as-yet-unflished BtBranches + EList sorted1_; // list of BtBranch, sorted by score + EList sorted2_; // list of BtBranch, sorted by score + EList solutions_; // list of solution branches + bool sortedSel_; // true -> 1, false -> 2 + size_t cur_; // cursor into sorted list to start from + + size_t nmm_; // number of mismatches attempted + size_t nnmm_; // number of mismatches involving N attempted + size_t nrdop_; // number of read gap opens attempted + size_t nrfop_; // number of ref gap opens attempted + size_t nrdex_; // number of read gap extensions attempted + size_t nrfex_; // number of ref gap extensions attempted + + size_t nmmPrune_; // + size_t nnmmPrune_; // + size_t nrdopPrune_; // + size_t nrfopPrune_; // + size_t nrdexPrune_; // + size_t nrfexPrune_; // + + size_t row_; // row + size_t col_; // column + + bool doTri_; // true -> fill in triangles; false -> squares + EList sq_; // square to fill when doing mini-fills + ELList tri_; // triangle to fill when doing mini-fills + EList ndep_; // # triangles mini-filled at various depths + +#ifndef NDEBUG + ESet seen_; // seedn branch ids; should never see same twice +#endif +}; + +#endif /*ndef ALIGNER_BT_H_*/ diff --git a/aligner_cache.cpp b/aligner_cache.cpp new file mode 100644 index 0000000..7a8de26 --- /dev/null +++ b/aligner_cache.cpp @@ -0,0 +1,181 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include "aligner_cache.h" +#include "tinythread.h" + +#ifdef ALIGNER_CACHE_MAIN + +#include +#include +#include +#include "random_source.h" + +using namespace std; + +enum { + ARG_TESTS = 256 +}; + +static const char *short_opts = "vCt"; +static struct option long_opts[] = { + {(char*)"verbose", no_argument, 0, 'v'}, + {(char*)"tests", no_argument, 0, ARG_TESTS}, +}; + +static void printUsage(ostream& os) { + os << "Usage: sawhi-cache [options]*" << endl; + os << "Options:" << endl; + os << " --tests run unit tests" << endl; + os << " -v/--verbose talkative mode" << endl; +} + +int gVerbose = 0; + +static void add( + RedBlack& t, + Pool& p, + const char *dna) +{ + QKey qk; + qk.init(BTDnaString(dna, true)); + t.add(p, qk, NULL); +} + +/** + * Small tests for the AlignmentCache. + */ +static void aligner_cache_tests() { + RedBlack rb(1024); + Pool p(64 * 1024, 1024); + // Small test + add(rb, p, "ACGTCGATCGT"); + add(rb, p, "ACATCGATCGT"); + add(rb, p, "ACGACGATCGT"); + add(rb, p, "ACGTAGATCGT"); + add(rb, p, "ACGTCAATCGT"); + add(rb, p, "ACGTCGCTCGT"); + add(rb, p, "ACGTCGAACGT"); + assert_eq(7, rb.size()); + rb.clear(); + p.clear(); + // Another small test + add(rb, p, "ACGTCGATCGT"); + add(rb, p, "CCGTCGATCGT"); + add(rb, p, "TCGTCGATCGT"); + add(rb, p, "GCGTCGATCGT"); + add(rb, p, "AAGTCGATCGT"); + assert_eq(5, rb.size()); + rb.clear(); + p.clear(); + // Regression test (attempt to make it smaller) + add(rb, p, "CCTA"); + add(rb, p, "AGAA"); + add(rb, p, "TCTA"); + add(rb, p, "GATC"); + add(rb, p, "CTGC"); + add(rb, p, "TTGC"); + add(rb, p, "GCCG"); + add(rb, p, "GGAT"); + rb.clear(); + p.clear(); + // Regression test + add(rb, p, "CCTA"); + add(rb, p, "AGAA"); + add(rb, p, "TCTA"); + add(rb, p, "GATC"); + add(rb, p, "CTGC"); + add(rb, p, "CATC"); + add(rb, p, "CAAA"); + add(rb, p, "CTAT"); + add(rb, p, "CTCA"); + add(rb, p, "TTGC"); + add(rb, p, "GCCG"); + add(rb, p, "GGAT"); + assert_eq(12, rb.size()); + rb.clear(); + p.clear(); + // Larger random test + EList strs; + char buf[5]; + for(int i = 0; i < 4; i++) { + for(int j = 0; j < 4; j++) { + for(int k = 0; k < 4; k++) { + for(int m = 0; m < 4; m++) { + buf[0] = "ACGT"[i]; + buf[1] = "ACGT"[j]; + buf[2] = "ACGT"[k]; + buf[3] = "ACGT"[m]; + buf[4] = '\0'; + strs.push_back(BTDnaString(buf, true)); + } + } + } + } + // Add all of the 4-mers in several different random orders + RandomSource rand; + for(uint32_t runs = 0; runs < 100; runs++) { + rb.clear(); + p.clear(); + assert_eq(0, rb.size()); + rand.init(runs); + EList used; + used.resize(256); + for(int i = 0; i < 256; i++) used[i] = false; + for(int i = 0; i < 256; i++) { + int r = rand.nextU32() % (256-i); + int unused = 0; + bool added = false; + for(int j = 0; j < 256; j++) { + if(!used[j] && unused == r) { + used[j] = true; + QKey qk; + qk.init(strs[j]); + rb.add(p, qk, NULL); + added = true; + break; + } + if(!used[j]) unused++; + } + assert(added); + } + } +} + +/** + * A way of feeding simply tests to the seed alignment infrastructure. + */ +int main(int argc, char **argv) { + int option_index = 0; + int next_option; + do { + next_option = getopt_long(argc, argv, short_opts, long_opts, &option_index); + switch (next_option) { + case 'v': gVerbose = true; break; + case ARG_TESTS: aligner_cache_tests(); return 0; + case -1: break; + default: { + cerr << "Unknown option: " << (char)next_option << endl; + printUsage(cerr); + exit(1); + } + } + } while(next_option != -1); +} +#endif diff --git a/aligner_cache.h b/aligner_cache.h new file mode 100644 index 0000000..2237071 --- /dev/null +++ b/aligner_cache.h @@ -0,0 +1,1013 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef ALIGNER_CACHE_H_ +#define ALIGNER_CACHE_H_ + +/** + * CACHEING + * + * By caching the results of some alignment sub-problems, we hope to + * enable a "fast path" for read alignment whereby answers are mostly + * looked up rather than calculated from scratch. This is particularly + * effective when the input is sorted or otherwise grouped in a way + * that brings together reads with (at least some) seed sequences in + * common. + * + * But the cache is also where results are held, regardless of whether + * the results are maintained & re-used across reads. + * + * The cache consists of two linked potions: + * + * 1. A multimap from seed strings (i.e. read substrings) to reference strings + * that are within some edit distance (roughly speaking). This is the "seed + * multimap". + * + * Key: Read substring (2-bit-per-base encoded + length) + * Value: Set of reference substrings (i.e. keys into the suffix + * array multimap). + * + * 2. A multimap from reference strings to the corresponding elements of the + * suffix array. Elements are filled in with reference-offset info as it's + * calculated. This is the "suffix array multimap" + * + * Key: Reference substring (2-bit-per-base encoded + length) + * Value: (a) top from BWT, (b) length of range, (c) offset of first + * range element in + * + * For both multimaps, we use a combo Red-Black tree and EList. The payload in + * the Red-Black tree nodes points to a range in the EList. + */ + +#include +#include "ds.h" +#include "read.h" +#include "threading.h" +#include "mem_ids.h" +#include "simple_func.h" +#include "btypes.h" + +#define CACHE_PAGE_SZ (16 * 1024) + +typedef PListSlice TSlice; + +/** + * Key for the query multimap: the read substring and its length. + */ +struct QKey { + + /** + * Initialize invalid QKey. + */ + QKey() { reset(); } + + /** + * Initialize QKey from DNA string. + */ + QKey(const BTDnaString& s ASSERT_ONLY(, BTDnaString& tmp)) { + init(s ASSERT_ONLY(, tmp)); + } + + /** + * Initialize QKey from DNA string. Rightmost character is placed in the + * least significant bitpair. + */ + bool init( + const BTDnaString& s + ASSERT_ONLY(, BTDnaString& tmp)) + { + seq = 0; + len = (uint32_t)s.length(); + ASSERT_ONLY(tmp.clear()); + if(len > 32) { + len = 0xffffffff; + return false; // wasn't cacheable + } else { + // Rightmost char of 's' goes in the least significant bitpair + for(size_t i = 0; i < 32 && i < s.length(); i++) { + int c = (int)s.get(i); + assert_range(0, 4, c); + if(c == 4) { + len = 0xffffffff; + return false; + } + seq = (seq << 2) | s.get(i); + } + ASSERT_ONLY(toString(tmp)); + assert(sstr_eq(tmp, s)); + assert_leq(len, 32); + return true; // was cacheable + } + } + + /** + * Convert this key to a DNA string. + */ + void toString(BTDnaString& s) { + s.resize(len); + uint64_t sq = seq; + for(int i = (len)-1; i >= 0; i--) { + s.set((uint32_t)(sq & 3), i); + sq >>= 2; + } + } + + /** + * Return true iff the read substring is cacheable. + */ + bool cacheable() const { return len != 0xffffffff; } + + /** + * Reset to uninitialized state. + */ + void reset() { seq = 0; len = 0xffffffff; } + + /** + * True -> my key is less than the given key. + */ + bool operator<(const QKey& o) const { + return seq < o.seq || (seq == o.seq && len < o.len); + } + + /** + * True -> my key is greater than the given key. + */ + bool operator>(const QKey& o) const { + return !(*this < o || *this == o); + } + + /** + * True -> my key is equal to the given key. + */ + bool operator==(const QKey& o) const { + return seq == o.seq && len == o.len; + } + + + /** + * True -> my key is not equal to the given key. + */ + bool operator!=(const QKey& o) const { + return !(*this == o); + } + +#ifndef NDEBUG + /** + * Check that this is a valid, initialized QKey. + */ + bool repOk() const { + return len != 0xffffffff; + } +#endif + + uint64_t seq; // sequence + uint32_t len; // length of sequence +}; + +template +class AlignmentCache; + +/** + * Payload for the query multimap: a range of elements in the reference + * string list. + */ +template +class QVal { + +public: + + QVal() { reset(); } + + /** + * Return the offset of the first reference substring in the qlist. + */ + index_t offset() const { return i_; } + + /** + * Return the number of reference substrings associated with a read + * substring. + */ + index_t numRanges() const { + assert(valid()); + return rangen_; + } + + /** + * Return the number of elements associated with all associated + * reference substrings. + */ + index_t numElts() const { + assert(valid()); + return eltn_; + } + + /** + * Return true iff the read substring is not associated with any + * reference substrings. + */ + bool empty() const { + assert(valid()); + return numRanges() == 0; + } + + /** + * Return true iff the QVal is valid. + */ + bool valid() const { return rangen_ != (index_t)OFF_MASK; } + + /** + * Reset to invalid state. + */ + void reset() { + i_ = 0; rangen_ = eltn_ = (index_t)OFF_MASK; + } + + /** + * Initialize Qval. + */ + void init(index_t i, index_t ranges, index_t elts) { + i_ = i; rangen_ = ranges; eltn_ = elts; + } + + /** + * Tally another range with given number of elements. + */ + void addRange(index_t numElts) { + rangen_++; + eltn_ += numElts; + } + +#ifndef NDEBUG + /** + * Check that this QVal is internally consistent and consistent + * with the contents of the given cache. + */ + bool repOk(const AlignmentCache& ac) const; +#endif + +protected: + + index_t i_; // idx of first elt in qlist + index_t rangen_; // # ranges (= # associated reference substrings) + index_t eltn_; // # elements (total) +}; + +/** + * Key for the suffix array multimap: the reference substring and its + * length. Same as QKey so I typedef it. + */ +typedef QKey SAKey; + +/** + * Payload for the suffix array multimap: (a) the top element of the + * range in BWT, (b) the offset of the first elt in the salist, (c) + * length of the range. + */ +template +struct SAVal { + + SAVal() : topf(), topb(), i(), len(OFF_MASK) { } + + /** + * Return true iff the SAVal is valid. + */ + bool valid() { return len != (index_t)OFF_MASK; } + +#ifndef NDEBUG + /** + * Check that this SAVal is internally consistent and consistent + * with the contents of the given cache. + */ + bool repOk(const AlignmentCache& ac) const; +#endif + + /** + * Initialize the SAVal. + */ + void init( + index_t tf, + index_t tb, + index_t ii, + index_t ln) + { + topf = tf; + topb = tb; + i = ii; + len = ln; + } + + index_t topf; // top in BWT + index_t topb; // top in BWT' + index_t i; // idx of first elt in salist + index_t len; // length of range +}; + +/** + * One data structure that encapsulates all of the cached information + * associated with a particular reference substring. This is useful + * for summarizing what info should be added to the cache for a partial + * alignment. + */ +template +class SATuple { + +public: + + SATuple() { reset(); }; + + SATuple(SAKey k, index_t tf, index_t tb, TSlice o) { + init(k, tf, tb, o); + } + + void init(SAKey k, index_t tf, index_t tb, TSlice o) { + key = k; topf = tf; topb = tb; offs = o; + } + + /** + * Initialize this SATuple from a subrange of the SATuple 'src'. + */ + void init(const SATuple& src, index_t first, index_t last) { + assert_neq((index_t)OFF_MASK, src.topb); + key = src.key; + topf = (index_t)(src.topf + first); + topb = (index_t)OFF_MASK; // unknown! + offs.init(src.offs, first, last); + } + +#ifndef NDEBUG + /** + * Check that this SATuple is internally consistent and that its + * PListSlice is consistent with its backing PList. + */ + bool repOk() const { + assert(offs.repOk()); + return true; + } +#endif + + /** + * Function for ordering SATuples. This is used when prioritizing which to + * explore first when extending seed hits into full alignments. Smaller + * ranges get higher priority and we use 'top' to break ties, though any + * way of breaking a tie would be fine. + */ + bool operator<(const SATuple& o) const { + if(offs.size() < o.offs.size()) { + return true; + } + if(offs.size() > o.offs.size()) { + return false; + } + return topf < o.topf; + } + bool operator>(const SATuple& o) const { + if(offs.size() < o.offs.size()) { + return false; + } + if(offs.size() > o.offs.size()) { + return true; + } + return topf > o.topf; + } + + bool operator==(const SATuple& o) const { + return key == o.key && topf == o.topf && topb == o.topb && offs == o.offs; + } + + void reset() { topf = topb = (index_t)OFF_MASK; offs.reset(); } + + /** + * Set the length to be at most the original length. + */ + void setLength(index_t nlen) { + assert_leq(nlen, offs.size()); + offs.setLength(nlen); + } + + /** + * Return the number of times this reference substring occurs in the + * reference, which is also the size of the 'offs' TSlice. + */ + index_t size() const { return (index_t)offs.size(); } + + // bot/length of SA range equals offs.size() + SAKey key; // sequence key + index_t topf; // top in BWT index + index_t topb; // top in BWT' index + TSlice offs; // offsets +}; + +/** + * Encapsulate the data structures and routines that constitute a + * particular cache, i.e., a particular stratum of the cache system, + * which might comprise many strata. + * + * Each thread has a "current-read" AlignmentCache which is used to + * build and store subproblem results as alignment is performed. When + * we're finished with a read, we might copy the cached results for + * that read (and perhaps a bundle of other recently-aligned reads) to + * a higher-level "across-read" cache. Higher-level caches may or may + * not be shared among threads. + * + * A cache consists chiefly of two multimaps, each implemented as a + * Red-Black tree map backed by an EList. A 'version' counter is + * incremented every time the cache is cleared. + */ +template +class AlignmentCache { + + typedef RedBlackNode > QNode; + typedef RedBlackNode > SANode; + + typedef PList TQList; + typedef PList TSAList; + +public: + + AlignmentCache( + uint64_t bytes, + bool shared) : + pool_(bytes, CACHE_PAGE_SZ, CA_CAT), + qmap_(CACHE_PAGE_SZ, CA_CAT), + qlist_(CA_CAT), + samap_(CACHE_PAGE_SZ, CA_CAT), + salist_(CA_CAT), + shared_(shared), + mutex_m(), + version_(0) + { + } + + /** + * Given a QVal, populate the given EList of SATuples with records + * describing all of the cached information about the QVal's + * reference substrings. + */ + template + void queryQval( + const QVal& qv, + EList, S>& satups, + index_t& nrange, + index_t& nelt, + bool getLock = true) + { + ThreadSafe ts(lockPtr(), shared_ && getLock); + assert(qv.repOk(*this)); + const index_t refi = qv.offset(); + const index_t reff = refi + qv.numRanges(); + // For each reference sequence sufficiently similar to the + // query sequence in the QKey... + for(index_t i = refi; i < reff; i++) { + // Get corresponding SAKey, containing similar reference + // sequence & length + SAKey sak = qlist_.get(i); + // Shouldn't have identical keys in qlist_ + assert(i == refi || qlist_.get(i) != qlist_.get(i-1)); + // Get corresponding SANode + SANode *n = samap_.lookup(sak); + assert(n != NULL); + const SAVal& sav = n->payload; + assert(sav.repOk(*this)); + if(sav.len > 0) { + nrange++; + satups.expand(); + satups.back().init(sak, sav.topf, sav.topb, TSlice(salist_, sav.i, sav.len)); + nelt += sav.len; +#ifndef NDEBUG + // Shouldn't add consecutive identical entries too satups + if(i > refi) { + const SATuple b1 = satups.back(); + const SATuple b2 = satups[satups.size()-2]; + assert(b1.key != b2.key || b1.topf != b2.topf || b1.offs != b2.offs); + } +#endif + } + } + } + + /** + * Return true iff the cache has no entries in it. + */ + bool empty() const { + bool ret = qmap_.empty(); + assert(!ret || qlist_.empty()); + assert(!ret || samap_.empty()); + assert(!ret || salist_.empty()); + return ret; + } + + /** + * Add a new query key ('qk'), usually a 2-bit encoded substring of + * the read) as the key in a new Red-Black node in the qmap and + * return a pointer to the node's QVal. + * + * The expectation is that the caller is about to set about finding + * associated reference substrings, and that there will be future + * calls to addOnTheFly to add associations to reference substrings + * found. + */ + QVal* add( + const QKey& qk, + bool *added, + bool getLock = true) + { + ThreadSafe ts(lockPtr(), shared_ && getLock); + assert(qk.cacheable()); + QNode *n = qmap_.add(pool(), qk, added); + return (n != NULL ? &n->payload : NULL); + } + + /** + * Add a new association between a read sequnce ('seq') and a + * reference sequence ('') + */ + bool addOnTheFly( + QVal& qv, // qval that points to the range of reference substrings + const SAKey& sak, // the key holding the reference substring + index_t topf, // top range elt in BWT index + index_t botf, // bottom range elt in BWT index + index_t topb, // top range elt in BWT' index + index_t botb, // bottom range elt in BWT' index + bool getLock = true); + + /** + * Clear the cache, i.e. turn it over. All HitGens referring to + * ranges in this cache will become invalid and the corresponding + * reads will have to be re-aligned. + */ + void clear(bool getLock = true) { + ThreadSafe ts(lockPtr(), shared_ && getLock); + pool_.clear(); + qmap_.clear(); + qlist_.clear(); + samap_.clear(); + salist_.clear(); + version_++; + } + + /** + * Return the number of keys in the query multimap. + */ + index_t qNumKeys() const { return (index_t)qmap_.size(); } + + /** + * Return the number of keys in the suffix array multimap. + */ + index_t saNumKeys() const { return (index_t)samap_.size(); } + + /** + * Return the number of elements in the reference substring list. + */ + index_t qSize() const { return (index_t)qlist_.size(); } + + /** + * Return the number of elements in the SA range list. + */ + index_t saSize() const { return (index_t)salist_.size(); } + + /** + * Return the pool. + */ + Pool& pool() { return pool_; } + + /** + * Return the lock object. + */ + MUTEX_T& lock() { + return mutex_m; + } + + /** + * Return a const pointer to the lock object. This allows us to + * write const member functions that grab the lock. + */ + MUTEX_T* lockPtr() const { + return const_cast(&mutex_m); + } + + /** + * Return true iff this cache is shared among threads. + */ + bool shared() const { return shared_; } + + /** + * Return the current "version" of the cache, i.e. the total number + * of times it has turned over since its creation. + */ + uint32_t version() const { return version_; } + +protected: + + Pool pool_; // dispenses memory pages + RedBlack > qmap_; // map from query substrings to reference substrings + TQList qlist_; // list of reference substrings + RedBlack > samap_; // map from reference substrings to SA ranges + TSAList salist_; // list of SA ranges + + bool shared_; // true -> this cache is global + MUTEX_T mutex_m; // mutex used for syncronization in case the the cache is shared. + uint32_t version_; // cache version +}; + +/** + * Interface used to query and update a pair of caches: one thread- + * local and unsynchronized, another shared and synchronized. One or + * both can be NULL. + */ +template +class AlignmentCacheIface { + +public: + + AlignmentCacheIface( + AlignmentCache *current, + AlignmentCache *local, + AlignmentCache *shared) : + qk_(), + qv_(NULL), + cacheable_(false), + rangen_(0), + eltsn_(0), + current_(current), + local_(local), + shared_(shared) + { + assert(current_ != NULL); + } + +#if 0 + /** + * Query the relevant set of caches, looking for a QVal to go with + * the provided QKey. If the QVal is found in a cache other than + * the current-read cache, it is copied into the current-read cache + * first and the QVal pointer for the current-read cache is + * returned. This function never returns a pointer from any cache + * other than the current-read cache. If the QVal could not be + * found in any cache OR if the QVal was found in a cache other + * than the current-read cache but could not be copied into the + * current-read cache, NULL is returned. + */ + QVal* queryCopy(const QKey& qk, bool getLock = true) { + assert(qk.cacheable()); + AlignmentCache* caches[3] = { current_, local_, shared_ }; + for(int i = 0; i < 3; i++) { + if(caches[i] == NULL) continue; + QVal* qv = caches[i]->query(qk, getLock); + if(qv != NULL) { + if(i == 0) return qv; + if(!current_->copy(qk, *qv, *caches[i], getLock)) { + // Exhausted memory in the current cache while + // attempting to copy in the qk + return NULL; + } + QVal* curqv = current_->query(qk, getLock); + assert(curqv != NULL); + return curqv; + } + } + return NULL; + } + + /** + * Query the relevant set of caches, looking for a QVal to go with + * the provided QKey. If a QVal is found and which is non-NULL, + * *which is set to 0 if the qval was found in the current-read + * cache, 1 if it was found in the local across-read cache, and 2 + * if it was found in the shared across-read cache. + */ + inline QVal* query( + const QKey& qk, + AlignmentCache** which, + bool getLock = true) + { + assert(qk.cacheable()); + AlignmentCache* caches[3] = { current_, local_, shared_ }; + for(int i = 0; i < 3; i++) { + if(caches[i] == NULL) continue; + QVal* qv = caches[i]->query(qk, getLock); + if(qv != NULL) { + if(which != NULL) *which = caches[i]; + return qv; + } + } + return NULL; + } +#endif + + /** + * This function is called whenever we start to align a new read or + * read substring. We make key for it and store the key in qk_. + * If the sequence is uncacheable, we don't actually add it to the + * map but the corresponding reference substrings are still added + * to the qlist_. + * + * Returns: + * -1 if out of memory + * 0 if key was found in cache + * 1 if key was not found in cache (and there's enough memory to + * add a new key) + */ + int beginAlign( + const BTDnaString& seq, + const BTString& qual, + QVal& qv, // out: filled in if we find it in the cache + bool getLock = true) + { + assert(repOk()); + qk_.init(seq ASSERT_ONLY(, tmpdnastr_)); + //if(qk_.cacheable() && (qv_ = current_->query(qk_, getLock)) != NULL) { + // // qv_ holds the answer + // assert(qv_->valid()); + // qv = *qv_; + // resetRead(); + // return 1; // found in cache + //} else + if(qk_.cacheable()) { + // Make a QNode for this key and possibly add the QNode to the + // Red-Black map; but if 'seq' isn't cacheable, just create the + // QNode (without adding it to the map). + qv_ = current_->add(qk_, &cacheable_, getLock); + } else { + qv_ = &qvbuf_; + } + if(qv_ == NULL) { + resetRead(); + return -1; // Not in memory + } + qv_->reset(); + return 0; // Need to search for it + } + ASSERT_ONLY(BTDnaString tmpdnastr_); + + /** + * Called when is finished aligning a read (and so is finished + * adding associated reference strings). Returns a copy of the + * final QVal object and resets the alignment state of the + * current-read cache. + * + * Also, if the alignment is cacheable, it commits it to the next + * cache up in the cache hierarchy. + */ + QVal finishAlign(bool getLock = true) { + if(!qv_->valid()) { + qv_->init(0, 0, 0); + } + // Copy this pointer because we're about to reset the qv_ field + // to NULL + QVal* qv = qv_; + // Commit the contents of the current-read cache to the next + // cache up in the hierarchy. + // If qk is cacheable, then it must be in the cache +#if 0 + if(qk_.cacheable()) { + AlignmentCache* caches[3] = { current_, local_, shared_ }; + ASSERT_ONLY(AlignmentCache* which); + ASSERT_ONLY(QVal* qv2 = query(qk_, &which, true)); + assert(qv2 == qv); + assert(which == current_); + for(int i = 1; i < 3; i++) { + if(caches[i] != NULL) { + // Copy this key/value pair to the to the higher + // level cache and, if its memory is exhausted, + // clear the cache and try again. + caches[i]->clearCopy(qk_, *qv_, *current_, getLock); + break; + } + } + } +#endif + // Reset the state in this iface in preparation for the next + // alignment. + resetRead(); + assert(repOk()); + return *qv; + } + + /** + * A call to this member indicates that the caller has finished + * with the last read (if any) and is ready to work on the next. + * This gives the cache a chance to reset some of its state if + * necessary. + */ + void nextRead() { + current_->clear(); + resetRead(); + assert(!aligning()); + } + + /** + * Return true iff we're in the middle of aligning a sequence. + */ + bool aligning() const { + return qv_ != NULL; + } + + /** + * Clears both the local and shared caches. + */ + void clear() { + if(current_ != NULL) current_->clear(); + if(local_ != NULL) local_->clear(); + if(shared_ != NULL) shared_->clear(); + } + + /** + * Add an alignment to the running list of alignments being + * compiled for the current read in the local cache. + */ + bool addOnTheFly( + const BTDnaString& rfseq, // reference sequence close to read seq + index_t topf, // top in BWT index + index_t botf, // bot in BWT index + index_t topb, // top in BWT' index + index_t botb, // bot in BWT' index + bool getLock = true) // true -> lock is not held by caller + { + + assert(aligning()); + assert(repOk()); + ASSERT_ONLY(BTDnaString tmp); + SAKey sak(rfseq ASSERT_ONLY(, tmp)); + //assert(sak.cacheable()); + if(current_->addOnTheFly((*qv_), sak, topf, botf, topb, botb, getLock)) { + rangen_++; + eltsn_ += (botf-topf); + return true; + } + return false; + } + + /** + * Given a QVal, populate the given EList of SATuples with records + * describing all of the cached information about the QVal's + * reference substrings. + */ + template + void queryQval( + const QVal& qv, + EList, S>& satups, + index_t& nrange, + index_t& nelt, + bool getLock = true) + { + current_->queryQval(qv, satups, nrange, nelt, getLock); + } + + /** + * Return a pointer to the current-read cache object. + */ + const AlignmentCache* currentCache() const { return current_; } + + index_t curNumRanges() const { return rangen_; } + index_t curNumElts() const { return eltsn_; } + +#ifndef NDEBUG + /** + * Check that AlignmentCacheIface is internally consistent. + */ + bool repOk() const { + assert(current_ != NULL); + assert_geq(eltsn_, rangen_); + if(qv_ == NULL) { + assert_eq(0, rangen_); + assert_eq(0, eltsn_); + } + return true; + } +#endif + + /** + * Return the alignment cache for the current read. + */ + const AlignmentCache& current() { + return *current_; + } + +protected: + + /** + * Reset fields encoding info about the in-process read. + */ + void resetRead() { + cacheable_ = false; + rangen_ = eltsn_ = 0; + qv_ = NULL; + } + + QKey qk_; // key representation for current read substring + QVal *qv_; // pointer to value representation for current read substring + QVal qvbuf_; // buffer for when key is uncacheable but we need a qv + bool cacheable_; // true iff the read substring currently being aligned is cacheable + + index_t rangen_; // number of ranges since last alignment job began + index_t eltsn_; // number of elements since last alignment job began + + AlignmentCache *current_; // cache dedicated to the current read + AlignmentCache *local_; // local, unsynchronized cache + AlignmentCache *shared_; // shared, synchronized cache +}; + +#ifndef NDEBUG +/** + * Check that this QVal is internally consistent and consistent + * with the contents of the given cache. + */ +template +bool QVal::repOk(const AlignmentCache& ac) const { + if(rangen_ > 0) { + assert_lt(i_, ac.qSize()); + assert_leq(i_ + rangen_, ac.qSize()); + } + assert_geq(eltn_, rangen_); + return true; +} +#endif + +#ifndef NDEBUG +/** + * Check that this SAVal is internally consistent and consistent + * with the contents of the given cache. + */ +template +bool SAVal::repOk(const AlignmentCache& ac) const { + assert(len == 0 || i < ac.saSize()); + assert_leq(i + len, ac.saSize()); + return true; +} +#endif + +/** + * Add a new association between a read sequnce ('seq') and a + * reference sequence ('') + */ +template +bool AlignmentCache::addOnTheFly( + QVal& qv, // qval that points to the range of reference substrings + const SAKey& sak, // the key holding the reference substring + index_t topf, // top range elt in BWT index + index_t botf, // bottom range elt in BWT index + index_t topb, // top range elt in BWT' index + index_t botb, // bottom range elt in BWT' index + bool getLock) +{ + ThreadSafe ts(lockPtr(), shared_ && getLock); + bool added = true; + // If this is the first reference sequence we're associating with + // the query sequence, initialize the QVal. + if(!qv.valid()) { + qv.init((index_t)qlist_.size(), 0, 0); + } + qv.addRange(botf-topf); // update tally for # ranges and # elts + if(!qlist_.add(pool(), sak)) { + return false; // Exhausted pool memory + } +#ifndef NDEBUG + for(index_t i = qv.offset(); i < qlist_.size(); i++) { + if(i > qv.offset()) { + assert(qlist_.get(i) != qlist_.get(i-1)); + } + } +#endif + assert_eq(qv.offset() + qv.numRanges(), qlist_.size()); + SANode *s = samap_.add(pool(), sak, &added); + if(s == NULL) { + return false; // Exhausted pool memory + } + assert(s->key.repOk()); + if(added) { + s->payload.i = (index_t)salist_.size(); + s->payload.len = botf - topf; + s->payload.topf = topf; + s->payload.topb = topb; + for(size_t j = 0; j < (botf-topf); j++) { + if(!salist_.add(pool(), (index_t)0xffffffff)) { + // Change the payload's len field + s->payload.len = (uint32_t)j; + return false; // Exhausted pool memory + } + } + assert(s->payload.repOk(*this)); + } + // Now that we know all allocations have succeeded, we can do a few final + // updates + + return true; +} + +#endif /*ALIGNER_CACHE_H_*/ diff --git a/aligner_driver.cpp b/aligner_driver.cpp new file mode 100644 index 0000000..00703de --- /dev/null +++ b/aligner_driver.cpp @@ -0,0 +1,80 @@ +/* + * Copyright 2012, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include "aligner_driver.h" + +void AlignerDriverRootSelector::select( + const Read& q, + const Read* qo, + bool nofw, + bool norc, + EList& confs, + EList& roots) +{ + // Calculate interval length for both mates + int interval = rootIval_.f((double)q.length()); + if(qo != NULL) { + // Boost interval length by 20% for paired-end reads + interval = (int)(interval * 1.2 + 0.5); + } + float pri = 0.0f; + for(int fwi = 0; fwi < 2; fwi++) { + bool fw = (fwi == 0); + if((fw && nofw) || (!fw && norc)) { + continue; + } + // Put down left-to-right roots w/r/t forward and reverse-complement reads + { + bool first = true; + size_t i = 0; + while(first || (i + landing_ <= q.length())) { + confs.expand(); + confs.back().cons.init(landing_, consExp_); + roots.expand(); + roots.back().init( + i, // offset from 5' end + true, // left-to-right? + fw, // fw? + q.length(), // query length + pri); // root priority + i += interval; + first = false; + } + } + // Put down right-to-left roots w/r/t forward and reverse-complement reads + { + bool first = true; + size_t i = 0; + while(first || (i + landing_ <= q.length())) { + confs.expand(); + confs.back().cons.init(landing_, consExp_); + roots.expand(); + roots.back().init( + q.length() - i - 1, // offset from 5' end + false, // left-to-right? + fw, // fw? + q.length(), // query length + pri); // root priority + i += interval; + first = false; + } + } + } +} + diff --git a/aligner_driver.h b/aligner_driver.h new file mode 100644 index 0000000..97f06bf --- /dev/null +++ b/aligner_driver.h @@ -0,0 +1,247 @@ +/* + * Copyright 2012, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +/* + * aligner_driver.h + * + * REDUNDANT SEED HITS + * + * We say that two seed hits are redundant if they trigger identical + * seed-extend dynamic programming problems. Put another way, they both lie on + * the same diagonal of the overall read/reference dynamic programming matrix. + * Detecting redundant seed hits is simple when the seed hits are ungapped. We + * do this after offset resolution but before the offset is converted to genome + * coordinates (see uses of the seenDiags1_/seenDiags2_ fields for examples). + * + * REDUNDANT ALIGNMENTS + * + * In an unpaired context, we say that two alignments are redundant if they + * share any cells in the global DP table. Roughly speaking, this is like + * saying that two alignments are redundant if any read character aligns to the + * same reference character (same reference sequence, same strand, same offset) + * in both alignments. + * + * In a paired-end context, we say that two paired-end alignments are redundant + * if the mate #1s are redundant and the mate #2s are redundant. + * + * How do we enforce this? In the unpaired context, this is relatively simple: + * the cells from each alignment are checked against a set containing all cells + * from all previous alignments. Given a new alignment, for each cell in the + * new alignment we check whether it is in the set. If there is any overlap, + * the new alignment is rejected as redundant. Otherwise, the new alignment is + * accepted and its cells are added to the set. + * + * Enforcement in a paired context is a little trickier. Consider the + * following approaches: + * + * 1. Skip anchors that are redundant with any previous anchor or opposite + * alignment. This is sufficient to ensure no two concordant alignments + * found are redundant. + * + * 2. Same as scheme 1, but with a "transitive closure" scheme for finding all + * concordant pairs in the vicinity of an anchor. Consider the AB/AC + * scenario from the previous paragraph. If B is the anchor alignment, we + * will find AB but not AC. But under this scheme, once we find AB we then + * let B be a new anchor and immediately look for its opposites. Likewise, + * if we find any opposite, we make them anchors and continue searching. We + * don't stop searching until every opposite is used as an anchor. + * + * 3. Skip anchors that are redundant with any previous anchor alignment (but + * allow anchors that are redundant with previous opposite alignments). + * This isn't sufficient to avoid redundant concordant alignments. To avoid + * redundant concordants, we need an additional procedure that checks each + * new concordant alignment one-by-one against a list of previous concordant + * alignments to see if it is redundant. + * + * We take approach 1. + */ + +#ifndef ALIGNER_DRIVER_H_ +#define ALIGNER_DRIVER_H_ + +#include "aligner_seed2.h" +#include "simple_func.h" +#include "aln_sink.h" + +/** + * Concrete subclass of DescentRootSelector. Puts a root every 'ival' chars, + * where 'ival' is determined by user-specified parameters. A root is filtered + * out if the end of the read is less than 'landing' positions away, in the + * direction of the search. + */ +class AlignerDriverRootSelector : public DescentRootSelector { + +public: + + AlignerDriverRootSelector( + double consExp, + const SimpleFunc& rootIval, + size_t landing) + { + consExp_ = consExp; + rootIval_ = rootIval; + landing_ = landing; + } + + virtual ~AlignerDriverRootSelector() { } + + virtual void select( + const Read& q, // read that we're selecting roots for + const Read* qo, // opposite mate, if applicable + bool nofw, // don't add roots for fw read + bool norc, // don't add roots for rc read + EList& confs, // put DescentConfigs here + EList& roots); // put DescentRoot here + +protected: + + double consExp_; + SimpleFunc rootIval_; + size_t landing_; +}; + +/** + * Return values from extendSeeds and extendSeedsPaired. + */ +enum { + // Candidates were examined exhaustively + ALDRIVER_EXHAUSTED_CANDIDATES = 1, + // The policy does not need us to look any further + ALDRIVER_POLICY_FULFILLED, + // We stopped because we ran up against a limit on how much work we should + // do for one set of seed ranges, e.g. the limit on number of consecutive + // unproductive DP extensions + ALDRIVER_EXCEEDED_LIMIT +}; + +/** + * This class is the glue between a DescentDriver and the dynamic programming + * implementations in Bowtie 2. The DescentDriver is used to find some very + * high-scoring alignments, but is additionally used to rank partial alignments + * so that they can be extended using dynamic programming. + */ +template +class AlignerDriver { + +public: + + AlignerDriver( + double consExp, + const SimpleFunc& rootIval, + size_t landing, + bool veryVerbose, + const SimpleFunc& totsz, + const SimpleFunc& totfmops) : + sel_(consExp, rootIval, landing), + alsel_(), + dr1_(veryVerbose), + dr2_(veryVerbose) + { + totsz_ = totsz; + totfmops_ = totfmops; + } + + /** + * Initialize driver with respect to a new read or pair. + */ + void initRead( + const Read& q1, + bool nofw, + bool norc, + TAlScore minsc, + TAlScore maxpen, + const Read* q2) + { + dr1_.initRead(q1, nofw, norc, minsc, maxpen, q2, &sel_); + red1_.init(q1.length()); + paired_ = false; + if(q2 != NULL) { + dr2_.initRead(*q2, nofw, norc, minsc, maxpen, &q1, &sel_); + red2_.init(q2->length()); + paired_ = true; + } else { + dr2_.reset(); + } + size_t totsz = totsz_.f(q1.length()); + size_t totfmops = totfmops_.f(q1.length()); + stop_.init( + totsz, + 0, + true, + totfmops); + } + + /** + * Start the driver. The driver will begin by conducting a best-first, + * index-assisted search through the space of possible full and partial + * alignments. This search may be followed up with a dynamic programming + * extension step, taking a prioritized set of partial SA ranges found + * during the search and extending each with DP. The process might also be + * iterated, with the search being occasioanally halted so that DPs can be + * tried, then restarted, etc. + */ + int go( + const Scoring& sc, + const GFM& gfmFw, + const GFM& gfmBw, + const BitPairReference& ref, + DescentMetrics& met, + WalkMetrics& wlm, + PerReadMetrics& prm, + RandomSource& rnd, + AlnSinkWrap& sink); + + /** + * Reset state of all DescentDrivers. + */ + void reset() { + dr1_.reset(); + dr2_.reset(); + red1_.reset(); + red2_.reset(); + } + +protected: + + AlignerDriverRootSelector sel_; // selects where roots should go + DescentAlignmentSelector alsel_; // one selector can deal with >1 drivers + DescentDriver dr1_; // driver for mate 1/unpaired reads + DescentDriver dr2_; // driver for paired-end reads + DescentStoppingConditions stop_; // when to pause index-assisted BFS + bool paired_; // current read is paired? + + SimpleFunc totsz_; // memory limit on best-first search data + SimpleFunc totfmops_; // max # FM ops for best-first search + + // For detecting redundant alignments + RedundantAlns red1_; // database of cells used for mate 1 alignments + RedundantAlns red2_; // database of cells used for mate 2 alignments + + // For AlnRes::matchesRef + ASSERT_ONLY(SStringExpandable raw_refbuf_); + ASSERT_ONLY(SStringExpandable raw_destU32_); + ASSERT_ONLY(EList raw_matches_); + ASSERT_ONLY(BTDnaString tmp_rf_); + ASSERT_ONLY(BTDnaString tmp_rdseq_); + ASSERT_ONLY(BTString tmp_qseq_); + ASSERT_ONLY(EList tmp_reflens_); + ASSERT_ONLY(EList tmp_refoffs_); +}; + +#endif /* defined(ALIGNER_DRIVER_H_) */ diff --git a/aligner_metrics.h b/aligner_metrics.h new file mode 100644 index 0000000..c0b0182 --- /dev/null +++ b/aligner_metrics.h @@ -0,0 +1,352 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef ALIGNER_METRICS_H_ +#define ALIGNER_METRICS_H_ + +#include +#include +#include "alphabet.h" +#include "timer.h" +#include "sstring.h" + +using namespace std; + +/** + * Borrowed from http://www.johndcook.com/standard_deviation.html, + * which in turn is borrowed from Knuth. + */ +class RunningStat { +public: + RunningStat() : m_n(0), m_tot(0.0) { } + + void clear() { + m_n = 0; + m_tot = 0.0; + } + + void push(float x) { + m_n++; + m_tot += x; + // See Knuth TAOCP vol 2, 3rd edition, page 232 + if (m_n == 1) { + m_oldM = m_newM = x; + m_oldS = 0.0; + } else { + m_newM = m_oldM + (x - m_oldM)/m_n; + m_newS = m_oldS + (x - m_oldM)*(x - m_newM); + // set up for next iteration + m_oldM = m_newM; + m_oldS = m_newS; + } + } + + int num() const { + return m_n; + } + + double tot() const { + return m_tot; + } + + double mean() const { + return (m_n > 0) ? m_newM : 0.0; + } + + double variance() const { + return ( (m_n > 1) ? m_newS/(m_n - 1) : 0.0 ); + } + + double stddev() const { + return sqrt(variance()); + } + +private: + int m_n; + double m_tot; + double m_oldM, m_newM, m_oldS, m_newS; +}; + +/** + * Encapsulates a set of metrics that we would like an aligner to keep + * track of, so that we can possibly use it to diagnose performance + * issues. + */ +class AlignerMetrics { + +public: + + AlignerMetrics() : + curBacktracks_(0), + curBwtOps_(0), + first_(true), + curIsLowEntropy_(false), + curIsHomoPoly_(false), + curHadRanges_(false), + curNumNs_(0), + reads_(0), + homoReads_(0), + lowEntReads_(0), + hiEntReads_(0), + alignedReads_(0), + unalignedReads_(0), + threeOrMoreNReads_(0), + lessThanThreeNRreads_(0), + bwtOpsPerRead_(), + backtracksPerRead_(), + bwtOpsPerHomoRead_(), + backtracksPerHomoRead_(), + bwtOpsPerLoEntRead_(), + backtracksPerLoEntRead_(), + bwtOpsPerHiEntRead_(), + backtracksPerHiEntRead_(), + bwtOpsPerAlignedRead_(), + backtracksPerAlignedRead_(), + bwtOpsPerUnalignedRead_(), + backtracksPerUnalignedRead_(), + bwtOpsPer0nRead_(), + backtracksPer0nRead_(), + bwtOpsPer1nRead_(), + backtracksPer1nRead_(), + bwtOpsPer2nRead_(), + backtracksPer2nRead_(), + bwtOpsPer3orMoreNRead_(), + backtracksPer3orMoreNRead_(), + timer_(cout, "", false) + { } + + void printSummary() { + if(!first_) { + finishRead(); + } + cout << "AlignerMetrics:" << endl; + cout << " # Reads: " << reads_ << endl; + float hopct = (reads_ > 0) ? (((float)homoReads_)/((float)reads_)) : (0.0f); + hopct *= 100.0f; + cout << " % homo-polymeric: " << (hopct) << endl; + float lopct = (reads_ > 0) ? ((float)lowEntReads_/(float)(reads_)) : (0.0f); + lopct *= 100.0f; + cout << " % low-entropy: " << (lopct) << endl; + float unpct = (reads_ > 0) ? ((float)unalignedReads_/(float)(reads_)) : (0.0f); + unpct *= 100.0f; + cout << " % unaligned: " << (unpct) << endl; + float npct = (reads_ > 0) ? ((float)threeOrMoreNReads_/(float)(reads_)) : (0.0f); + npct *= 100.0f; + cout << " % with 3 or more Ns: " << (npct) << endl; + cout << endl; + cout << " Total BWT ops: avg: " << bwtOpsPerRead_.mean() << ", stddev: " << bwtOpsPerRead_.stddev() << endl; + cout << " Total Backtracks: avg: " << backtracksPerRead_.mean() << ", stddev: " << backtracksPerRead_.stddev() << endl; + time_t elapsed = timer_.elapsed(); + cout << " BWT ops per second: " << (bwtOpsPerRead_.tot()/elapsed) << endl; + cout << " Backtracks per second: " << (backtracksPerRead_.tot()/elapsed) << endl; + cout << endl; + cout << " Homo-poly:" << endl; + cout << " BWT ops: avg: " << bwtOpsPerHomoRead_.mean() << ", stddev: " << bwtOpsPerHomoRead_.stddev() << endl; + cout << " Backtracks: avg: " << backtracksPerHomoRead_.mean() << ", stddev: " << backtracksPerHomoRead_.stddev() << endl; + cout << " Low-entropy:" << endl; + cout << " BWT ops: avg: " << bwtOpsPerLoEntRead_.mean() << ", stddev: " << bwtOpsPerLoEntRead_.stddev() << endl; + cout << " Backtracks: avg: " << backtracksPerLoEntRead_.mean() << ", stddev: " << backtracksPerLoEntRead_.stddev() << endl; + cout << " High-entropy:" << endl; + cout << " BWT ops: avg: " << bwtOpsPerHiEntRead_.mean() << ", stddev: " << bwtOpsPerHiEntRead_.stddev() << endl; + cout << " Backtracks: avg: " << backtracksPerHiEntRead_.mean() << ", stddev: " << backtracksPerHiEntRead_.stddev() << endl; + cout << endl; + cout << " Unaligned:" << endl; + cout << " BWT ops: avg: " << bwtOpsPerUnalignedRead_.mean() << ", stddev: " << bwtOpsPerUnalignedRead_.stddev() << endl; + cout << " Backtracks: avg: " << backtracksPerUnalignedRead_.mean() << ", stddev: " << backtracksPerUnalignedRead_.stddev() << endl; + cout << " Aligned:" << endl; + cout << " BWT ops: avg: " << bwtOpsPerAlignedRead_.mean() << ", stddev: " << bwtOpsPerAlignedRead_.stddev() << endl; + cout << " Backtracks: avg: " << backtracksPerAlignedRead_.mean() << ", stddev: " << backtracksPerAlignedRead_.stddev() << endl; + cout << endl; + cout << " 0 Ns:" << endl; + cout << " BWT ops: avg: " << bwtOpsPer0nRead_.mean() << ", stddev: " << bwtOpsPer0nRead_.stddev() << endl; + cout << " Backtracks: avg: " << backtracksPer0nRead_.mean() << ", stddev: " << backtracksPer0nRead_.stddev() << endl; + cout << " 1 N:" << endl; + cout << " BWT ops: avg: " << bwtOpsPer1nRead_.mean() << ", stddev: " << bwtOpsPer1nRead_.stddev() << endl; + cout << " Backtracks: avg: " << backtracksPer1nRead_.mean() << ", stddev: " << backtracksPer1nRead_.stddev() << endl; + cout << " 2 Ns:" << endl; + cout << " BWT ops: avg: " << bwtOpsPer2nRead_.mean() << ", stddev: " << bwtOpsPer2nRead_.stddev() << endl; + cout << " Backtracks: avg: " << backtracksPer2nRead_.mean() << ", stddev: " << backtracksPer2nRead_.stddev() << endl; + cout << " >2 Ns:" << endl; + cout << " BWT ops: avg: " << bwtOpsPer3orMoreNRead_.mean() << ", stddev: " << bwtOpsPer3orMoreNRead_.stddev() << endl; + cout << " Backtracks: avg: " << backtracksPer3orMoreNRead_.mean() << ", stddev: " << backtracksPer3orMoreNRead_.stddev() << endl; + cout << endl; + } + + /** + * + */ + void nextRead(const BTDnaString& read) { + if(!first_) { + finishRead(); + } + first_ = false; + //float ent = entropyDna5(read); + float ent = 0.0f; + curIsLowEntropy_ = (ent < 0.75f); + curIsHomoPoly_ = (ent < 0.001f); + curHadRanges_ = false; + curBwtOps_ = 0; + curBacktracks_ = 0; + // Count Ns + curNumNs_ = 0; + const size_t len = read.length(); + for(size_t i = 0; i < len; i++) { + if((int)read[i] == 4) curNumNs_++; + } + } + + /** + * + */ + void setReadHasRange() { + curHadRanges_ = true; + } + + /** + * Commit the running statistics for this read to + */ + void finishRead() { + reads_++; + if(curIsHomoPoly_) homoReads_++; + else if(curIsLowEntropy_) lowEntReads_++; + else hiEntReads_++; + if(curHadRanges_) alignedReads_++; + else unalignedReads_++; + bwtOpsPerRead_.push((float)curBwtOps_); + backtracksPerRead_.push((float)curBacktracks_); + // Drill down by entropy + if(curIsHomoPoly_) { + bwtOpsPerHomoRead_.push((float)curBwtOps_); + backtracksPerHomoRead_.push((float)curBacktracks_); + } else if(curIsLowEntropy_) { + bwtOpsPerLoEntRead_.push((float)curBwtOps_); + backtracksPerLoEntRead_.push((float)curBacktracks_); + } else { + bwtOpsPerHiEntRead_.push((float)curBwtOps_); + backtracksPerHiEntRead_.push((float)curBacktracks_); + } + // Drill down by whether it aligned + if(curHadRanges_) { + bwtOpsPerAlignedRead_.push((float)curBwtOps_); + backtracksPerAlignedRead_.push((float)curBacktracks_); + } else { + bwtOpsPerUnalignedRead_.push((float)curBwtOps_); + backtracksPerUnalignedRead_.push((float)curBacktracks_); + } + if(curNumNs_ == 0) { + lessThanThreeNRreads_++; + bwtOpsPer0nRead_.push((float)curBwtOps_); + backtracksPer0nRead_.push((float)curBacktracks_); + } else if(curNumNs_ == 1) { + lessThanThreeNRreads_++; + bwtOpsPer1nRead_.push((float)curBwtOps_); + backtracksPer1nRead_.push((float)curBacktracks_); + } else if(curNumNs_ == 2) { + lessThanThreeNRreads_++; + bwtOpsPer2nRead_.push((float)curBwtOps_); + backtracksPer2nRead_.push((float)curBacktracks_); + } else { + threeOrMoreNReads_++; + bwtOpsPer3orMoreNRead_.push((float)curBwtOps_); + backtracksPer3orMoreNRead_.push((float)curBacktracks_); + } + } + + // Running-total of the number of backtracks and BWT ops for the + // current read + uint32_t curBacktracks_; + uint32_t curBwtOps_; + +protected: + + bool first_; + + // true iff the current read is low entropy + bool curIsLowEntropy_; + // true if current read is all 1 char (or very close) + bool curIsHomoPoly_; + // true iff the current read has had one or more ranges reported + bool curHadRanges_; + // number of Ns in current read + int curNumNs_; + + // # reads + uint32_t reads_; + // # homo-poly reads + uint32_t homoReads_; + // # low-entropy reads + uint32_t lowEntReads_; + // # high-entropy reads + uint32_t hiEntReads_; + // # reads with alignments + uint32_t alignedReads_; + // # reads without alignments + uint32_t unalignedReads_; + // # reads with 3 or more Ns + uint32_t threeOrMoreNReads_; + // # reads with < 3 Ns + uint32_t lessThanThreeNRreads_; + + // Distribution of BWT operations per read + RunningStat bwtOpsPerRead_; + RunningStat backtracksPerRead_; + + // Distribution of BWT operations per homo-poly read + RunningStat bwtOpsPerHomoRead_; + RunningStat backtracksPerHomoRead_; + + // Distribution of BWT operations per low-entropy read + RunningStat bwtOpsPerLoEntRead_; + RunningStat backtracksPerLoEntRead_; + + // Distribution of BWT operations per high-entropy read + RunningStat bwtOpsPerHiEntRead_; + RunningStat backtracksPerHiEntRead_; + + // Distribution of BWT operations per read that "aligned" (for + // which a range was arrived at - range may not have necessarily + // lead to an alignment) + RunningStat bwtOpsPerAlignedRead_; + RunningStat backtracksPerAlignedRead_; + + // Distribution of BWT operations per read that didn't align + RunningStat bwtOpsPerUnalignedRead_; + RunningStat backtracksPerUnalignedRead_; + + // Distribution of BWT operations/backtracks per read with no Ns + RunningStat bwtOpsPer0nRead_; + RunningStat backtracksPer0nRead_; + + // Distribution of BWT operations/backtracks per read with one N + RunningStat bwtOpsPer1nRead_; + RunningStat backtracksPer1nRead_; + + // Distribution of BWT operations/backtracks per read with two Ns + RunningStat bwtOpsPer2nRead_; + RunningStat backtracksPer2nRead_; + + // Distribution of BWT operations/backtracks per read with three or + // more Ns + RunningStat bwtOpsPer3orMoreNRead_; + RunningStat backtracksPer3orMoreNRead_; + + Timer timer_; +}; + +#endif /* ALIGNER_METRICS_H_ */ diff --git a/aligner_report.h b/aligner_report.h new file mode 100644 index 0000000..c5cd8db --- /dev/null +++ b/aligner_report.h @@ -0,0 +1,35 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef ALIGNER_REPORT_H_ +#define ALIGNER_REPORT_H_ + +#include "aligner_cache.h" + +class Reporter { +public: + /** + * + */ + bool report(const AlignmentCacheIface& cache, const QVal& qv) { + return true; // don't retry + } +}; + +#endif /*ALIGNER_REPORT_H_*/ diff --git a/aligner_result.cpp b/aligner_result.cpp new file mode 100644 index 0000000..9043a11 --- /dev/null +++ b/aligner_result.cpp @@ -0,0 +1,2162 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include +#include "reference.h" +#include "aligner_result.h" +#include "read.h" +#include "edit.h" +#include "sstring.h" +#include "ds.h" +#include "util.h" +#include "alphabet.h" + +using namespace std; + +/** + * Clear all contents. + */ +void AlnRes::reset() { + if(ned_ != NULL) { + assert(aed_ != NULL); + ned_->clear(); + aed_->clear(); + } + score_.invalidate(); + refcoord_.reset(); + refival_.reset(); + shapeSet_ = false; + rdlen_ = 0; + rdid_ = 0; + reflen_ = 0; + rdrows_ = 0; + rdextent_ = 0; + rdexrows_ = 0; + rfextent_ = 0; + refns_ = 0; + type_ = ALN_RES_TYPE_UNPAIRED; + fraglen_ = -1; + trimSoft_ = false; + trim5p_ = 0; + trim3p_ = 0; + pretrimSoft_ = true; + pretrim5p_ = 0; + pretrim3p_ = 0; + seedmms_ = 0; // number of mismatches allowed in seed + seedlen_ = 0; // length of seed + seedival_ = 0; // interval between seeds + minsc_ = 0; // minimum score + nuc5p_ = 0; + nuc3p_ = 0; + fraglenSet_ = false; + num_spliced_ = 0; + assert(!refcoord_.inited()); + assert(!refival_.inited()); +} + +/** + * Set the upstream-most reference offset involved in the alignment, and + * the extent of the alignment (w/r/t the reference) + */ +void AlnRes::setShape( + TRefId id, // id of reference aligned to + TRefOff off, // offset of first aligned char into ref seq + TRefOff reflen, // length of reference sequence aligned to + bool fw, // aligned to Watson strand? + size_t rdlen, // length of read after hard trimming, before soft + TReadId rdid, // read ID + bool pretrimSoft, // whether trimming prior to alignment was soft + size_t pretrim5p, // # poss trimmed form 5p end before alignment + size_t pretrim3p, // # poss trimmed form 3p end before alignment + bool trimSoft, // whether local-alignment trimming was soft + size_t trim5p, // # poss trimmed form 5p end during alignment + size_t trim3p) // # poss trimmed form 3p end during alignment +{ + rdlen_ = rdlen; + rdid_ = rdid; + rdrows_ = rdlen; + refcoord_.init(id, off, fw); + pretrimSoft_ = pretrimSoft; + pretrim5p_ = pretrim5p; + pretrim3p_ = pretrim3p; + trimSoft_ = trimSoft; + trim5p_ = trim5p; + trim3p_ = trim3p; + // Propagate trimming to the edits. We assume that the pos fields of the + // edits are set w/r/t to the rows of the dynamic programming table, and + // haven't taken trimming into account yet. + // + // TODO: The division of labor between the aligner and the AlnRes is not + // clean. Perhaps the trimming and *all* of its side-effects should be + // handled by the aligner. + + // daehwan - check this out - this doesn't seem to work with SAWHI + // size_t trimBeg = fw ? trim5p : trim3p; + size_t trimBeg = trim5p; + if(trimBeg > 0) { + for(size_t i = 0; i < ned_->size(); i++) { + // Shift by trim5p, since edits are w/r/t 5p end + assert_geq((*ned_)[i].pos, trimBeg); + (*ned_)[i].pos -= (uint32_t)trimBeg; + } + } + // Length after all soft trimming and any hard trimming that occurred + // during alignment + rdextent_ = rdlen; + if(pretrimSoft_) { + rdextent_ -= (pretrim5p + pretrim3p); // soft trim + } + rdextent_ -= (trim5p + trim3p); // soft or hard trim from alignment + assert_gt(rdextent_, 0); + rdexrows_ = rdextent_; + calcRefExtent(); + refival_.init(id, off, fw, rfextent_); + reflen_ = reflen; + shapeSet_ = true; +} + +/** + * Initialize new AlnRes. + */ +void AlnRes::init( + size_t rdlen, // # chars after hard trimming + TReadId rdid, // read ID + AlnScore score, // alignment score + const EList* ned, // nucleotide edits + size_t ned_i, // first position to copy + size_t ned_n, // # positions to copy + const EList* aed, // ambiguous base resolutions + size_t aed_i, // first position to copy + size_t aed_n, // # positions to copy + Coord refcoord, // leftmost ref pos of 1st al char + TRefOff reflen, // length of ref aligned to + LinkedEList >* raw_edits, + int seedmms, // # seed mms allowed + int seedlen, // seed length + int seedival, // space between seeds + int64_t minsc, // minimum score for valid aln + int nuc5p, + int nuc3p, + bool pretrimSoft, + size_t pretrim5p, // trimming prior to alignment + size_t pretrim3p, // trimming prior to alignment + bool trimSoft, + size_t trim5p, // trimming from alignment + size_t trim3p, // trimming from alignment + bool repeat) // repeat +{ + assert(raw_edits != NULL); + assert(raw_edits_ == NULL || raw_edits_ == raw_edits); + raw_edits_ = raw_edits; + if(ned_ != NULL) { + assert(aed_ != NULL); + ned_->clear(); + aed_->clear(); + } else if(raw_edits_ != NULL) { + assert(aed_ == NULL); + assert(ned_node_ == NULL && aed_node_ == NULL); + ned_node_ = raw_edits_->new_node(); + aed_node_ = raw_edits_->new_node(); + assert(ned_node_ != NULL && aed_node_ != NULL); + ned_ = &(ned_node_->payload); + aed_ = &(aed_node_->payload); + } + + rdlen_ = rdlen; + rdid_ = rdid; + rdrows_ = rdlen; + score_ = score; + ned_->clear(); + aed_->clear(); + if(ned != NULL) { + for(size_t i = ned_i; i < ned_i + ned_n; i++) { + ned_->push_back((*ned)[i]); + } + } + if(aed != NULL) { + for(size_t i = aed_i; i < aed_i + aed_n; i++) { + aed_->push_back((*aed)[i]); + } + } + refcoord_ = refcoord; + reflen_ = reflen; + seedmms_ = seedmms; + seedlen_ = seedlen; + seedival_ = seedival; + minsc_ = minsc; + nuc5p_ = nuc5p; + nuc3p_ = nuc3p; + pretrimSoft_ = pretrimSoft; + pretrim5p_ = pretrim5p; + pretrim3p_ = pretrim3p; + trimSoft_ = trimSoft; + trim5p_ = trim5p; + trim3p_ = trim3p; + repeat_ = repeat; + rdextent_ = rdlen; // # read characters after any hard trimming + if(pretrimSoft) { + rdextent_ -= (pretrim5p + pretrim3p); + } + if(trimSoft) { + rdextent_ -= (trim5p + trim3p); + } + rdexrows_ = rdextent_; + calcRefExtent(); + setShape( + refcoord.ref(), // id of reference aligned to + refcoord.off(), // offset of first aligned char into ref seq + reflen, // length of reference sequence aligned to + refcoord.fw(), // aligned to Watson strand? + rdlen, // length of read after hard trimming, before soft + rdid, // read ID + pretrimSoft, // whether trimming prior to alignment was soft + pretrim5p, // # poss trimmed form 5p end before alignment + pretrim3p, // # poss trimmed form 3p end before alignment + trimSoft, // whether local-alignment trimming was soft + trim5p, // # poss trimmed form 5p end during alignment + trim3p); // # poss trimmed form 3p end during alignment + shapeSet_ = true; + + num_spliced_ = 0; + for(size_t i = 0; i < ned_->size(); i++) { + if((*ned_)[i].type == EDIT_TYPE_SPL) { + num_spliced_++; + } + } +} + +/** + * Clip given number of characters from the Watson-upstream end of the + * alignment. + */ +void AlnRes::clipLeft(size_t rd_amt, size_t rf_amt) { + assert_geq(rd_amt, 0); + assert_geq(rf_amt, 0); + assert_leq(rd_amt, rdexrows_); + assert_leq(rf_amt, rfextent_); + assert(trimSoft_); + if(fw()) { + trim5p_ += rd_amt; + Edit::clipLo(*ned_, rdexrows_, rd_amt); + Edit::clipLo(*aed_, rdexrows_, rd_amt); + } else { + trim3p_ += rd_amt; + Edit::clipHi(*ned_, rdexrows_, rd_amt); + Edit::clipHi(*aed_, rdexrows_, rd_amt); + } + rdexrows_ -= rd_amt; + rdextent_ -= rd_amt; + rfextent_ -= rf_amt; + refcoord_.adjustOff(rf_amt); + refival_.adjustOff(rf_amt); + // Adjust refns_? +} + +/** + * Clip given number of characters from the Watson-downstream end of the + * alignment. + */ +void AlnRes::clipRight(size_t rd_amt, size_t rf_amt) { + assert_geq(rd_amt, 0); + assert_geq(rf_amt, 0); + assert_leq(rd_amt, rdexrows_); + assert_leq(rf_amt, rfextent_); + assert(trimSoft_); + if(fw()) { + trim3p_ += rd_amt; + Edit::clipHi(*ned_, rdexrows_, rd_amt); + Edit::clipHi(*aed_, rdexrows_, rd_amt); + } else { + trim5p_ += rd_amt; + Edit::clipLo(*ned_, rdexrows_, rd_amt); + Edit::clipLo(*aed_, rdexrows_, rd_amt); + } + rdexrows_ -= rd_amt; + rdextent_ -= rd_amt; + rfextent_ -= rf_amt; + // Adjust refns_? +} + +/** + * Clip away portions of the alignment that are outside the given bounds. + * Clipping is soft if soft == true, hard otherwise. Assuming for now that + * there isn't any other clipping. + * + * Note that all clipping is expressed in terms of read positions. So if there + * are reference gaps in the overhanging portion, we must + */ +void AlnRes::clipOutside(bool soft, TRefOff refi, TRefOff reff) { + // Overhang on LHS + TRefOff left = refcoord_.off(); + if(left < refi) { + size_t rf_amt = (size_t)(refi - left); + size_t rf_i = rf_amt; + size_t nedsz = ned_->size(); + if(!fw()) { + Edit::invertPoss(*ned_, rdexrows_, false); + } + for(size_t i = 0; i < nedsz; i++) { + assert_lt((*ned_)[i].pos, rdexrows_); + if((*ned_)[i].pos > rf_i) break; + if((*ned_)[i].isRefGap()) rf_i++; + } + if(!fw()) { + Edit::invertPoss(*ned_, rdexrows_, false); + } + clipLeft(rf_i, rf_amt); + } + // Overhang on RHS + TRefOff right = refcoord_.off() + refNucExtent(); + if(right > reff) { + size_t rf_amt = (size_t)(right - reff); + size_t rf_i = rf_amt; + size_t nedsz = ned_->size(); + if(fw()) { + Edit::invertPoss(*ned_, rdexrows_, false); + } + for(size_t i = 0; i < nedsz; i++) { + assert_lt((*ned_)[i].pos, rdexrows_); + if((*ned_)[i].pos > rf_i) break; + if((*ned_)[i].isRefGap()) rf_i++; + } + if(fw()) { + Edit::invertPoss(*ned_, rdexrows_, false); + } + clipRight(rf_i, rf_amt); + } +} + +/** + * Return true iff this AlnRes and the given AlnRes overlap. Two AlnRess + * overlap if they share a cell in the overall dynamic programming table: + * i.e. if there exists a read position s.t. that position in both reads + * matches up with the same reference character. E.g., the following + * alignments (drawn schematically as paths through a dynamic programming + * table) are redundant: + * + * a b a b + * \ \ \ \ + * \ \ \ \ + * \ \ \ \ + * ---\ \ \ + * \ ---\--- + * ---\ \ \ + * \ \ \ \ + * \ \ \ \ + * \ \ \ \ + * a b a b + * + * We iterate over each read position that hasn't been hard-trimmed, but + * only overlaps at positions that have also not been soft-trimmed are + * considered. + */ +bool AlnRes::overlap(AlnRes& res) { + if(fw() != res.fw() || refid() != res.refid()) { + // Must be same reference and same strand in order to overlap + return false; + } + TRefOff my_left = refoff(); // my leftmost aligned char + TRefOff other_left = res.refoff(); // other leftmost aligned char + TRefOff my_right = my_left + refExtent(); + TRefOff other_right = other_left + res.refExtent(); + if(my_right < other_left || other_right < my_left) { + // The rectangular hulls of the two alignments don't overlap, so + // they can't overlap at any cell + return false; + } + // Reference and strand are the same and hulls overlap. Now go read + // position by read position testing if any align identically with the + // reference. + + // Edits are ordered and indexed from 5' to 3' to start with. We + // reorder them to go from left to right along the Watson strand. + if(!fw()) { + invertEdits(); + } + if(!res.fw()) { + res.invertEdits(); + } + size_t nedidx = 0, onedidx = 0; + bool olap = false; + // For each row, going left to right along Watson reference strand... + for(size_t i = 0; i < rdexrows_; i++) { + size_t diff = 1; // amount to shift to right for next round + size_t odiff = 1; // amount to shift to right for next round + // Unless there are insertions before the next position, we say + // that there is one cell in this row involved in the alignment + my_right = my_left + 1; + other_right = other_left + 1; + while(nedidx < ned_->size() && (*ned_)[nedidx].pos == i) { + if((*ned_)[nedidx].isRefGap()) { + // Next my_left will be in same column as this round + diff = 0; + } + nedidx++; + } + while(onedidx < res.ned_->size() && (*res.ned_)[onedidx].pos == i) { + if((*res.ned_)[onedidx].isRefGap()) { + // Next my_left will be in same column as this round + odiff = 0; + } + onedidx++; + } + if(i < rdexrows_ - 1) { + // See how many inserts there are before the next read + // character + size_t nedidx_next = nedidx; + size_t onedidx_next = onedidx; + while(nedidx_next < ned_->size() && + (*ned_)[nedidx_next].pos == i+1) + { + if((*ned_)[nedidx_next].isReadGap()) { + my_right++; + } + nedidx_next++; + } + while(onedidx_next < res.ned_->size() && + (*res.ned_)[onedidx_next].pos == i+1) + { + if((*res.ned_)[onedidx_next].isReadGap()) { + other_right++; + } + onedidx_next++; + } + } + // Contained? + olap = + (my_left >= other_left && my_right <= other_right) || + (other_left >= my_left && other_right <= my_right); + // Overlapping but not contained? + if(!olap) { + olap = + (my_left <= other_left && my_right > other_left) || + (other_left <= my_left && other_right > my_left); + } + if(olap) { + break; + } + // How to do adjust my_left and my_right + my_left = my_right + diff - 1; + other_left = other_right + odiff - 1; + } + if(!fw()) { + invertEdits(); + } + if(!res.fw()) { + res.invertEdits(); + } + return olap; +} + +#ifndef NDEBUG + +/** + * Assuming this AlnRes is an alignment for 'rd', check that the alignment and + * 'rd' are compatible with the corresponding reference sequence. + */ +bool AlnRes::matchesRef( + const Read& rd, + const BitPairReference& ref, + BTDnaString& rf, + BTDnaString& rdseq, + BTString& qseq, + SStringExpandable& raw_refbuf, + SStringExpandable& destU32, + EList& matches, + SStringExpandable& raw_refbuf2, + EList& reflens, + EList& refoffs) +{ + assert(!empty()); + assert(repOk()); + assert(refcoord_.inited()); + size_t rdlen = rd.length(); + bool fw = refcoord_.fw(); + if(!fw) { + assert_lt(trim3p_, rdlen); + Edit::invertPoss(const_cast&>(*ned_), rdlen - trim5p_ - trim3p_, false); + } + size_t refallen = 0; + reflens.clear(); refoffs.clear(); + int64_t reflen = 0; + int64_t refoff = refcoord_.off(); + refoffs.push_back((uint32_t)refoff); + size_t eidx = 0; + assert_lt(trim5p_ + trim3p_, rdlen); + for(size_t i = 0; i < rdlen - trim5p_ - trim3p_; i++, reflen++, refoff++) { + while(eidx < ned_->size() && (*ned_)[eidx].pos == i) { + if((*ned_)[eidx].isReadGap()) { + reflen++; + refoff++; + } else if((*ned_)[eidx].isRefGap()) { + reflen--; + refoff--; + } + if((*ned_)[eidx].isSpliced()) { + assert_gt(reflen, 0); + refallen += (uint32_t)reflen; + reflens.push_back((uint32_t)reflen); + reflen = 0; + refoff += (*ned_)[eidx].splLen; + assert_gt(refoff, 0); + refoffs.push_back((uint32_t)refoff); + } + eidx++; + } + } + assert_gt(reflen, 0); + refallen += (uint32_t)reflen; + reflens.push_back((uint32_t)reflen); + assert_gt(reflens.size(), 0); + assert_gt(refoffs.size(), 0); + assert_eq(reflens.size(), refoffs.size()); + if(!fw) { + assert_lt(trim3p_, rdlen); + Edit::invertPoss(const_cast&>(*ned_), rdlen - trim5p_ - trim3p_, false); + } + + // Adjust reference string length according to edits +#ifndef NDEBUG + if(reflens.size() == 1) { + assert_eq(refallen, refNucExtent()); + } +#endif + + assert_geq(refcoord_.ref(), 0); + int nsOnLeft = 0; + if(refcoord_.off() < 0) { + nsOnLeft = -((int)refcoord_.off()); + } + raw_refbuf.resize(refallen); + raw_refbuf.clear(); + raw_refbuf2.clear(); + for(size_t i = 0; i < reflens.size(); i++) { + assert_gt(reflens[i], 0); +#ifndef NDEBUG + if(i > 0) { + assert_gt(refoffs[i], refoffs[i-1]); + } +#endif + raw_refbuf2.resize(reflens[i] + 16); + raw_refbuf2.clear(); + int off = ref.getStretch( + reinterpret_cast(raw_refbuf2.wbuf()), + (size_t)refcoord_.ref(), + (size_t)max(refoffs[i], 0), + reflens[i], + destU32); + assert_leq(off, 16); + raw_refbuf.append(raw_refbuf2.wbuf() + off, reflens[i]); + } + char *refbuf = raw_refbuf.wbuf(); + size_t trim5 = 0, trim3 = 0; + if(trimSoft_) { + trim5 += trim5p_; + trim3 += trim3p_; + } + if(pretrimSoft_) { + trim5 += pretrim5p_; + trim3 += pretrim3p_; + } + rf.clear(); + rdseq.clear(); + rdseq = rd.patFw; + if(!fw) { + rdseq.reverseComp(false); + } + assert_eq(rdrows_, rdseq.length()); + // rdseq is the nucleotide sequence from upstream to downstream on the + // Watson strand. ned_ are the nucleotide edits from upstream to + // downstream. rf contains the reference characters. + assert(Edit::repOk(*ned_, rdseq, fw, trim5, trim3)); + Edit::toRef(rdseq, *ned_, rf, fw, trim5, trim3); + assert_eq(refallen, rf.length()); + matches.clear(); + bool matchesOverall = true; + matches.resize(refallen); + matches.fill(true); + for(size_t i = 0; i < refallen; i++) { + if((int)i < nsOnLeft) { + if((int)rf[i] != 4) { + matches[i] = false; + matchesOverall = false; + } + } else { + if((int)rf[i] != (int)refbuf[i-nsOnLeft]) { + matches[i] = false; + matchesOverall = false; + } + } + } + if(!matchesOverall) { + // Print a friendly message showing the difference between the + // reference sequence obtained with Edit::toRef and the actual + // reference sequence + cerr << endl; + Edit::printQAlignNoCheck( + cerr, + " ", + rdseq, + *ned_); + cerr << " "; + for(size_t i = 0; i < refallen; i++) { + cerr << (matches[i] ? " " : "*"); + } + cerr << endl; + cerr << " "; + for(size_t i = 0; i < refallen-nsOnLeft; i++) { + cerr << "ACGTN"[(int)refbuf[i]]; + } + cerr << endl; + Edit::printQAlign( + cerr, + " ", + rdseq, + *ned_); + cerr << endl; + } + return matchesOverall; +} + +#endif /*ndef NDEBUG*/ + +#define COPY_BUF() { \ + char *bufc = buf; \ + while(*bufc != '\0') { \ + *occ = *bufc; \ + occ++; \ + bufc++; \ + } \ +} + +/** + * Initialized the stacked alignment with respect to a read string, a list of + * edits (expressed left-to-right), and integers indicating how much hard and + * soft trimming has occurred on either end of the read. + * + * s: read sequence + * ed: all relevant edits, including ambiguous nucleotides + * trimLS: # bases soft-trimmed from LHS + * trimLH: # bases hard-trimmed from LHS + * trimRS: # bases soft-trimmed from RHS + * trimRH: # bases hard-trimmed from RHS + */ +void StackedAln::init( + const BTDnaString& s, + const EList& ed, + size_t trimLS, + size_t trimLH, + size_t trimRS, + size_t trimRH) +{ + trimLS_ = trimLS; + trimLH_ = trimLH; + trimRS_ = trimRS; + trimRH_ = trimRH; + ASSERT_ONLY(size_t ln_postsoft = s.length() - trimLS - trimRS); + stackRef_.clear(); + stackRel_.clear(); + stackSNP_.clear(); + stackRead_.clear(); + size_t rdoff = trimLS; + for(size_t i = 0; i < ed.size(); i++) { + assert_lt(ed[i].pos, ln_postsoft); + size_t pos = ed[i].pos + trimLS; + while(rdoff < pos) { + int c = s[rdoff++]; + assert_range(0, 4, c); + stackRef_.push_back("ACGTN"[c]); + stackRel_.push_back('='); + stackSNP_.push_back(false); + stackRead_.push_back("ACGTN"[c]); + } + if(ed[i].isMismatch()) { + int c = s[rdoff++]; + assert_range(0, 4, c); + assert_eq(c, asc2dna[(int)ed[i].qchr]); + assert_neq(c, asc2dna[(int)ed[i].chr]); + stackRef_.push_back(ed[i].chr); + stackRel_.push_back('X'); + stackSNP_.push_back(ed[i].snpID != (uint32_t)INDEX_MAX); + stackRead_.push_back("ACGTN"[c]); + } else if(ed[i].isRefGap()) { + int c = s[rdoff++]; + assert_range(0, 4, c); + assert_eq(c, asc2dna[(int)ed[i].qchr]); + stackRef_.push_back('-'); + stackRel_.push_back('I'); + stackSNP_.push_back(ed[i].snpID != (uint32_t)INDEX_MAX); + stackRead_.push_back("ACGTN"[c]); + } else if(ed[i].isReadGap()) { + stackRef_.push_back(ed[i].chr); + stackRel_.push_back('D'); + stackSNP_.push_back(ed[i].snpID != (uint32_t)INDEX_MAX); + stackRead_.push_back('-'); + } else if(ed[i].isSpliced()) { + stackRef_.push_back('N'); + stackRel_.push_back('N'); + stackSNP_.push_back(false); + stackRead_.push_back('N'); + assert_gt(ed[i].splLen, 0); + stackSkip_.push_back(ed[i].splLen); + } + } + while(rdoff < s.length() - trimRS) { + int c = s[rdoff++]; + assert_range(0, 4, c); + stackRef_.push_back("ACGTN"[c]); + stackRel_.push_back('='); + stackSNP_.push_back(false); + stackRead_.push_back("ACGTN"[c]); + } + inited_ = true; +} + +/** + * Left-align all the gaps. If this changes the alignment and the CIGAR or + * MD:Z strings have already been calculated, this renders them invalid. + * + * We left-align gaps with in the following way: for each gap, we check + * whether the character opposite the rightmost gap character is the same + * as the character opposite the character just to the left of the gap. If + * this is the case, we can slide the gap to the left and make the + * rightmost position previously covered by the gap into a non-gap. + * + * This scheme allows us to push the gap past a mismatch. BWA does seem to + * allow this. It's not clear that Bowtie 2 should, since moving the + * mismatch could cause a mismatch with one base quality to be replaced + * with a mismatch with a different base quality. + */ +void StackedAln::leftAlign(bool pastMms) { + assert(inited_); + bool changed = false; + size_t ln = stackRef_.size(); + // Scan left-to-right + for(size_t i = 0; i < ln; i++) { + int rel = stackRel_[i]; + if(rel != '=' && rel != 'X' && rel != 'N') { + // Neither a match nor a mismatch - must be a gap + assert(rel == 'I' || rel == 'D'); + if(stackSNP_[i]) continue; + size_t glen = 1; + // Scan further right to measure length of gap + for(size_t j = i+1; j < ln; j++) { + if(rel != (int)stackRel_[j]) break; + glen++; + } + // We've identified a gap of type 'rel' (D = deletion or read + // gap, I = insertion or ref gap) with length 'glen'. Now we + // can try to slide it to the left repeatedly. + size_t l = i - 1; + size_t r = l + glen; + EList& gp = ((rel == 'I') ? stackRef_ : stackRead_); + const EList& ngp = ((rel == 'I') ? stackRead_ : stackRef_); + while(l > 0 && ngp[l] == ngp[r]) { + if(stackRel_[l] == 'I' || stackRel_[l] == 'D') break; + assert(stackRel_[l] == '=' || stackRel_[l] == 'X' || stackRel_[l] == 'N'); + assert(stackRel_[r] == 'D' || stackRel_[r] == 'I'); + if(!pastMms && (stackRel_[l] == 'X' || stackRel_[l] == 'N')) { + break; + } + swap(gp[l], gp[r]); + swap(stackRel_[l], stackRel_[r]); + assert_neq('-', gp[r]); + assert_eq('-', gp[l]); + l--; r--; + changed = true; + } + i += (glen-1); + } + } + if(changed) { + cigCalc_ = mdzCalc_ = false; + } +} + +/** + * Build the CIGAR list, if it hasn't already built. Returns true iff it + * was built for the first time. + */ +bool StackedAln::buildCigar(bool xeq) { + assert(inited_); + if(cigCalc_) { + return false; // already done + } + cigOp_.clear(); + cigRun_.clear(); + if(trimLS_ > 0) { + cigOp_.push_back('S'); + cigRun_.push_back(trimLS_); + } + size_t numSkips = 0; + size_t ln = stackRef_.size(); + for(size_t i = 0; i < ln; i++) { + char op = stackRel_[i]; + if(!xeq && (op == 'X' || op == '=')) { + op = 'M'; + } + size_t run; + if(op != 'N') { + run = 1; + for(; i + run < ln; run++) { + char op2 = stackRel_[i + run]; + if(!xeq && (op2 == 'X' || op2 == '=')) { + op2 = 'M'; + } + if(op2 != op) { + break; + } + } + i += (run-1); + } else { + assert_lt(numSkips, stackSkip_.size()); + run = stackSkip_[numSkips]; + numSkips++; + } + cigOp_.push_back(op); + cigRun_.push_back(run); + } + if(trimRS_ > 0) { + cigOp_.push_back('S'); + cigRun_.push_back(trimRS_); + } + cigCalc_ = true; + return true; +} + +/** + * Build the CIGAR list, if it hasn't already built. Returns true iff it + * was built for the first time. + */ +bool StackedAln::buildMdz() { + assert(inited_); + if(mdzCalc_) { + return false; // already done + } + mdzOp_.clear(); + mdzChr_.clear(); + mdzRun_.clear(); + size_t ln = stackRef_.size(); + for(size_t i = 0; i < ln; i++) { + char op = stackRel_[i]; + if(op == '=') { + size_t run = 1; + size_t ninserts = 0; + size_t nskips = 0; + // Skip over matches and insertions (ref gaps) + for(; i+run < ln; run++) { + if(stackRel_[i + run] == '=') { + // do nothing + } else if(stackRel_[i + run] == 'I') { + ninserts++; + } else if(stackRel_[i + run] == 'N') { + nskips++; + } else { + break; + } + } + i += (run - 1); + mdzOp_.push_back('='); // = X or G + mdzChr_.push_back('-'); + mdzRun_.push_back(run - ninserts - nskips); + } else if(op == 'X') { + assert_neq(stackRef_[i], stackRead_[i]); + mdzOp_.push_back('X'); // = X or G + mdzChr_.push_back(stackRef_[i]); + mdzRun_.push_back(1); + } else if(op == 'D') { + assert_neq('-', stackRef_[i]); + mdzOp_.push_back('G'); // = X or G + mdzChr_.push_back(stackRef_[i]); + mdzRun_.push_back(1); + } + } + mdzCalc_ = true; + return true; +} + +/** + * Write a CIGAR representation of the alignment to the given string and/or + * char buffer. + */ +void StackedAln::writeCigar( + BTString* o, // if non-NULL, string to append to + char* occ) const // if non-NULL, character string to append to +{ + const EList& op = cigOp_; + const EList& run = cigRun_; + assert_eq(op.size(), run.size()); + if(o != NULL || occ != NULL) { + char buf[128]; + ASSERT_ONLY(bool printed = false); + for(size_t i = 0; i < op.size(); i++) { + size_t r = run[i]; + if(r > 0) { + itoa10(r, buf); + ASSERT_ONLY(printed = true); + if(o != NULL) { + o->append(buf); + o->append(op[i]); + } + if(occ != NULL) { + COPY_BUF(); + *occ = op[i]; + occ++; + } + } + } + assert(printed); + if(occ != NULL) { + *occ = '\0'; + } + } +} + +void StackedAln::writeCigar(Alignment* o, char* occ) const { + const EList& op = cigOp_; + const EList& run = cigRun_; + assert_eq(op.size(), run.size()); + if(o != NULL || occ != NULL) { + char buf[128]; + ASSERT_ONLY(bool printed = false); + o->cigarSegments.reserve(op.size()); + for(size_t i = 0; i < op.size(); i++) { + size_t r = run[i]; + if(r > 0) { + itoa10(r, buf); + ASSERT_ONLY(printed = true); + if(o != NULL) { + o->cigarString.append(buf); + o->cigarString.append(op[i]); + o->cigarSegments.emplace_back(r, op[i]); + o->cigarLength += r; + } + if(occ != NULL) { + COPY_BUF(); + *occ = op[i]; + occ++; + } + } + } + assert(printed); + if(occ != NULL) { + *occ = '\0'; + } + } +} + +/** + * Write an MD:Z representation of the alignment to the given string and/or + * char buffer. + */ +void StackedAln::writeMdz(BTString* o, char* occ) const { + char buf[128]; + bool mm_last = false; + bool rdgap_last = false; + bool first_print = true; + const EList& op = mdzOp_; + const EList& ch = mdzChr_; + const EList& run = mdzRun_; + for(size_t i = 0; i < op.size(); i++) { + size_t r = run[i]; + if(r > 0) { + if(op[i] == '=') { + // Write run length + itoa10(r, buf); + if(o != NULL) { o->append(buf); } + if(occ != NULL) { COPY_BUF(); } + first_print = false; + mm_last = false; + rdgap_last = false; + } else if(op[i] == 'X') { + if(o != NULL) { + if(rdgap_last || mm_last || first_print) { + o->append('0'); + } + o->append(ch[i]); + } + if(occ != NULL) { + if(rdgap_last || mm_last || first_print) { + *occ = '0'; + occ++; + } + *occ = ch[i]; + occ++; + } + first_print = false; + mm_last = true; + rdgap_last = false; + } else if(op[i] == 'G') { + if(o != NULL) { + if(mm_last || first_print) { + o->append('0'); + } + if(!rdgap_last) { + o->append('^'); + } + o->append(ch[i]); + } + if(occ != NULL) { + if(mm_last || first_print) { + *occ = '0'; occ++; + } + if(!rdgap_last) { + *occ = '^'; occ++; + } + *occ = ch[i]; + occ++; + } + first_print = false; + mm_last = false; + rdgap_last = true; + } + } // if r > 0 + } // for loop over ops + if(mm_last || rdgap_last) { + if(o != NULL) { o->append('0'); } + if(occ != NULL) { *occ = '0'; occ++; } + } + if(occ != NULL) { *occ = '\0'; } +} + +/** + * Print the sequence for the read that aligned using A, C, G and + * T. This will simply print the read sequence (or its reverse + * complement). + */ +void AlnRes::printSeq( + const Read& rd, // read + const BTDnaString* dns, // already-decoded nucleotides + BTString& o) const // buffer to write to +{ + assert(!rd.patFw.empty()); + ASSERT_ONLY(size_t written = 0); + // Print decoded nucleotides + assert(dns != NULL); + size_t len = dns->length(); + size_t st = 0; + size_t en = len; + for(size_t i = st; i < en; i++) { + int c = dns->get(i); + assert_range(0, 3, c); + o.append("ACGT"[c]); + ASSERT_ONLY(written++); + } +#ifndef NDEBUG + for(size_t i = 0; i < ned_->size(); i++) { + if((*ned_)[i].isReadGap()) { + assert_leq((*ned_)[i].pos, dns->length()); + } else { + assert_lt((*ned_)[i].pos, dns->length()); + } + } +#endif +} + +/** + * Print the quality string for the read that aligned. This will simply print + * the read qualities (or their reverse). + */ +void AlnRes::printQuals( + const Read& rd, // read + const BTString* dqs, // already-decoded qualities + BTString& o) const // output stream to write to +{ + assert(dqs != NULL); + size_t len = dqs->length(); + // Print decoded qualities from upstream to downstream Watson + for(size_t i = 1; i < len-1; i++) { + o.append(dqs->get(i)); + } +} + +/** + * Add all of the cells involved in the given alignment to the database. + */ +void RedundantAlns::add(const AlnRes& res) { + assert(!cells_.empty()); + TRefOff left = res.refoff(), right; + const size_t len = res.readExtentRows(); + if(!res.fw()) { + const_cast(res).invertEdits(); + } + const EList& ned = res.ned(); + size_t nedidx = 0; + assert_leq(len, cells_.size()); + // For each row... + for(size_t i = 0; i < len; i++) { + size_t diff = 1; // amount to shift to right for next round + right = left + 1; + while(nedidx < ned.size() && ned[nedidx].pos == i) { + if(ned[nedidx].isRefGap()) { + // Next my_left will be in same column as this round + diff = 0; + } + nedidx++; + } + if(i < len - 1) { + // See how many inserts there are before the next read + // character + size_t nedidx_next = nedidx; + while(nedidx_next < ned.size() && ned[nedidx_next].pos == i+1) + { + if(ned[nedidx_next].isReadGap()) { + right++; + } + nedidx_next++; + } + } + for(TRefOff j = left; j < right; j++) { + // Add to db + RedundantCell c(res.refid(), res.fw(), j, i); + ASSERT_ONLY(bool ret =) cells_[i].insert(c); + assert(ret); + } + left = right + diff - 1; + } + if(!res.fw()) { + const_cast(res).invertEdits(); + } +} + +/** + * Return true iff the given alignment has at least one cell that overlaps + * one of the cells in the database. + */ +bool RedundantAlns::overlap(const AlnRes& res) { + assert(!cells_.empty()); + TRefOff left = res.refoff(), right; + const size_t len = res.readExtentRows(); + if(!res.fw()) { + const_cast(res).invertEdits(); + } + const EList& ned = res.ned(); + size_t nedidx = 0; + // For each row... + bool olap = false; + assert_leq(len, cells_.size()); + for(size_t i = 0; i < len; i++) { + size_t diff = 1; // amount to shift to right for next round + right = left + 1; + while(nedidx < ned.size() && ned[nedidx].pos == i) { + if(ned[nedidx].isRefGap()) { + // Next my_left will be in same column as this round + diff = 0; + } + nedidx++; + } + if(i < len - 1) { + // See how many inserts there are before the next read + // character + size_t nedidx_next = nedidx; + while(nedidx_next < ned.size() && ned[nedidx_next].pos == i+1) + { + if(ned[nedidx_next].isReadGap()) { + right++; + } + nedidx_next++; + } + } + for(TRefOff j = left; j < right; j++) { + // Add to db + RedundantCell c(res.refid(), res.fw(), j, i); + if(cells_[i].contains(c)) { + olap = true; + break; + } + } + if(olap) { + break; + } + left = right + diff - 1; + } + if(!res.fw()) { + const_cast(res).invertEdits(); + } + return olap; +} + +/** + * Given all the paired and unpaired results involving mates #1 and #2, + * calculate best and second-best scores for both mates. These are + * used for future MAPQ calculations. + */ +void AlnSetSumm::init( + const Read* rd1, + const Read* rd2, + const EList* rs1, + const EList* rs2, + const EList* rs1u, + const EList* rs2u, + bool exhausted1, + bool exhausted2, + TRefId orefid, + TRefOff orefoff, + bool repeat) +{ + assert(rd1 != NULL || rd2 != NULL); + assert((rs1 == NULL) == (rs2 == NULL)); + AlnScore best[2], secbest[2], bestPaired, secbestPaired; + size_t szs[2]; + best[0].invalidate(); secbest[0].invalidate(); + best[1].invalidate(); secbest[1].invalidate(); + bestPaired.invalidate(); secbestPaired.invalidate(); + bool paired = (rs1 != NULL && rs2 != NULL); + szs[0] = szs[1] = 0; + TNumAlns numAlns1 = 0, numAlns2 = 0, numAlnsPaired = 0; + if(paired) { + // Paired alignments + assert_eq(rs1->size(), rs2->size()); + szs[0] = szs[1] = rs1->size(); + assert_gt(szs[0], 0); + numAlnsPaired = szs[0]; + for(size_t i = 0; i < rs1->size(); i++) { + AlnScore sc = (*rs1)[i].score() + (*rs2)[i].score(); + if(sc > bestPaired) { + secbestPaired = bestPaired; + bestPaired = sc; + assert(VALID_AL_SCORE(bestPaired)); + } else if(sc > secbestPaired) { + secbestPaired = sc; + assert(VALID_AL_SCORE(bestPaired)); + assert(VALID_AL_SCORE(secbestPaired)); + } + } + } + for(int j = 0; j < 2; j++) { + const EList* rs = (j == 0 ? rs1u : rs2u); + if(rs == NULL) { + continue; + } + assert(rs != NULL); + szs[j] = rs->size(); + if(j == 0) { + numAlns1 = szs[j]; + } else { + numAlns2 = szs[j]; + } + //assert_gt(szs[j], 0); + for(size_t i = 0; i < rs->size(); i++) { + AlnScore sc = (*rs)[i].score(); + if(sc > best[j]) { + secbest[j] = best[j]; + best[j] = sc; + assert(VALID_AL_SCORE(best[j])); + } else if(sc > secbest[j]) { + secbest[j] = sc; + assert(VALID_AL_SCORE(best[j])); + assert(VALID_AL_SCORE(secbest[j])); + } + } + } + if(szs[0] > 0 || szs[1] > 0) { + init( + best[0], + secbest[0], + best[1], + secbest[1], + bestPaired, + secbestPaired, + (szs[0] == 0) ? 0 : (szs[0] - 1), + (szs[1] == 0) ? 0 : (szs[1] - 1), + paired, + exhausted1, + exhausted2, + orefid, + orefoff, + repeat, + numAlns1, + numAlns2, + numAlnsPaired); + } else { + reset(); + orefid_ = orefid; + orefoff_ = orefoff; + repeat_ = repeat; + } +} + +/** + * Print out string representation of YF:i flag for indicating whether and + * why the mate was filtered. + */ +bool AlnFlags::printYF(BTString& o, bool first) const { + const char *flag = ""; + if (!lenfilt_) flag = "LN"; + else if(!nfilt_ ) flag = "NS"; + else if(!scfilt_ ) flag = "SC"; + else if(!qcfilt_ ) flag = "QC"; + if(*flag > 0) { + if(!first) o.append('\t'); + o.append("YF:Z:"); + o.append(flag); + return false; + } + return true; +} + + +/** + * Print out string representation of YM:i flag for indicating with the + * mate per se aligned repetitively. + */ +void AlnFlags::printYM(BTString& o) const { + o.append("YM:i:"); + o.append(maxed() ? '1' : '0'); +} + +/** + * Print out string representation of YM:i flag for indicating with the + * pair containing the mate aligned repetitively. + */ +void AlnFlags::printYP(BTString& o) const { + o.append("YP:i:"); + o.append(maxedPair() ? '1' : '0'); +} + +/** + * Print out string representation of these flags. + */ +void AlnFlags::printYT(BTString& o) const { + o.append("YT:Z:"); + if(alignedConcordant()) { + o.append("CP"); + } else if(alignedDiscordant()) { + o.append("DP"); + } else if(alignedUnpairedMate()) { + o.append("UP"); + } else if(alignedUnpaired()) { + o.append("UU"); + } else { throw 1; } +} + +#ifdef ALIGNER_RESULT_MAIN + +#include "mem_ids.h" + +int main() { + EList op; + EList ch; + EList run; + { + // On top of each other, same length + cerr << "Test case 1, simple overlap 1 ... "; + AlnRes res1; + res1.init( + 10, + AlnScore(), + NULL, + NULL, + NULL, + Coord(0, 0, true), + false); + AlnRes res2; + res2.init( + 10, + AlnScore(), + NULL, + NULL, + NULL, + Coord(0, 0, true), + false); + assert(res1.overlap(res2)); + + // Try again, but using the redundant-alignment database + RedundantAlns ra; + ra.reset(); + ra.init(10); + ra.add(res1); + assert(ra.overlap(res1)); + assert(ra.overlap(res2)); + + char buf1[1024]; + res1.printCigar(false, false, false, op, run, NULL, buf1); + assert_eq(0, strcmp(buf1, "10M")); + res1.printCigar(false, false, true, op, run, NULL, buf1); + assert_eq(0, strcmp(buf1, "10=")); + + char buf2[1024]; + res2.printCigar(false, false, false, op, run, NULL, buf2); + assert_eq(0, strcmp(buf2, "10M")); + res2.printCigar(false, false, true, op, run, NULL, buf2); + assert_eq(0, strcmp(buf2, "10=")); + + char buf3[1024]; + res1.printMD(false, false, op, ch, run, NULL, buf3); + assert_eq(0, strcmp(buf3, "10")); + res1.printMD(false, true, op, ch, run, NULL, buf3); + assert_eq(0, strcmp(buf3, "8")); + + char buf4[1024]; + res2.printMD(false, false, op, ch, run, NULL, buf4); + assert_eq(0, strcmp(buf4, "10")); + res2.printMD(false, true, op, ch, run, NULL, buf4); + assert_eq(0, strcmp(buf4, "8")); + + cerr << "PASSED" << endl; + } + + { + // On top of each other, different lengths + cerr << "Test case 2, simple overlap 2 ... "; + AlnRes res1; + res1.init( + 10, + AlnScore(), + NULL, + NULL, + NULL, + Coord(0, 0, true), + false); + AlnRes res2; + res2.init( + 11, + AlnScore(), + NULL, + NULL, + NULL, + Coord(0, 0, true), + false); + assert(res1.overlap(res2)); + + // Try again, but using the redundant-alignment database + RedundantAlns ra; + ra.reset(); + ra.init(11); + ra.add(res1); + assert(ra.overlap(res1)); + assert(ra.overlap(res2)); + + char buf1[1024]; + res1.printCigar(false, false, false, op, run, NULL, buf1); + assert_eq(0, strcmp(buf1, "10M")); + res1.printCigar(false, false, true, op, run, NULL, buf1); + assert_eq(0, strcmp(buf1, "10=")); + + char buf2[1024]; + res2.printCigar(false, false, false, op, run, NULL, buf2); + assert_eq(0, strcmp(buf2, "11M")); + res2.printCigar(false, false, true, op, run, NULL, buf2); + assert_eq(0, strcmp(buf2, "11=")); + + char buf3[1024]; + res1.printMD(false, false, op, ch, run, NULL, buf3); + assert_eq(0, strcmp(buf3, "10")); + res1.printMD(false, true, op, ch, run, NULL, buf3); + assert_eq(0, strcmp(buf3, "8")); + + char buf4[1024]; + res2.printMD(false, false, op, ch, run, NULL, buf4); + assert_eq(0, strcmp(buf4, "11")); + res2.printMD(false, true, op, ch, run, NULL, buf4); + assert_eq(0, strcmp(buf4, "9")); + + cerr << "PASSED" << endl; + } + + { + // Different references + cerr << "Test case 3, simple overlap 3 ... "; + AlnRes res1; + res1.init( + 10, + AlnScore(), + NULL, + NULL, + NULL, + Coord(0, 1, true), + false); + AlnRes res2; + res2.init( + 11, + AlnScore(), + NULL, + NULL, + NULL, + Coord(0, 0, true), + false); + assert(!res1.overlap(res2)); + + // Try again, but using the redundant-alignment database + RedundantAlns ra; + ra.reset(); + ra.init(11); + ra.add(res1); + assert(ra.overlap(res1)); + assert(!ra.overlap(res2)); + + cerr << "PASSED" << endl; + } + + { + // Different references + cerr << "Test case 4, simple overlap 4 ... "; + AlnRes res1; + res1.init( + 10, + AlnScore(), + NULL, + NULL, + NULL, + Coord(0, 0, true), + false); + AlnRes res2; + res2.init( + 10, + AlnScore(), + NULL, + NULL, + NULL, + Coord(1, 0, true), + false); + assert(!res1.overlap(res2)); + + // Try again, but using the redundant-alignment database + RedundantAlns ra; + ra.reset(); + ra.init(10); + ra.add(res1); + assert(ra.overlap(res1)); + assert(!ra.overlap(res2)); + + cerr << "PASSED" << endl; + } + + { + // Different strands + cerr << "Test case 5, simple overlap 5 ... "; + AlnRes res1; + res1.init( + 10, + AlnScore(), + NULL, + NULL, + NULL, + Coord(0, 0, true), + false); + AlnRes res2; + res2.init( + 10, + AlnScore(), + NULL, + NULL, + NULL, + Coord(0, 0, false), + false); + assert(!res1.overlap(res2)); + + // Try again, but using the redundant-alignment database + RedundantAlns ra; + ra.reset(); + ra.init(10); + ra.add(res1); + assert(ra.overlap(res1)); + assert(!ra.overlap(res2)); + + cerr << "PASSED" << endl; + } + + { + // Different strands + cerr << "Test case 6, simple overlap 6 ... "; + EList ned1(RES_CAT); + ned1.expand(); + // 1 step to the right in the middle of the alignment + ned1.back().init(5, 'A' /*chr*/, '-' /*qchr*/, EDIT_TYPE_READ_GAP); + AlnRes res1; + res1.init( + 10, + AlnScore(), + &ned1, + NULL, + NULL, + Coord(0, 5, false), + false); + AlnRes res2; + res2.init( + 10, + AlnScore(), + NULL, + NULL, + NULL, + Coord(0, 6, false), + false); + assert(res1.overlap(res2)); + + // Try again, but using the redundant-alignment database + RedundantAlns ra; + ra.reset(); + ra.init(10); + ra.add(res1); + assert(ra.overlap(res1)); + assert(ra.overlap(res2)); + + char buf1[1024]; + res1.printCigar(false, false, false, op, run, NULL, buf1); + assert_eq(0, strcmp(buf1, "5M1D5M")); + res1.printCigar(false, false, true, op, run, NULL, buf1); + assert_eq(0, strcmp(buf1, "5=1D5=")); + + char buf2[1024]; + res2.printCigar(false, false, false, op, run, NULL, buf2); + assert_eq(0, strcmp(buf2, "10M")); + res2.printCigar(false, false, true, op, run, NULL, buf2); + assert_eq(0, strcmp(buf2, "10=")); + + char buf3[1024]; + res1.printMD(false, false, op, ch, run, NULL, buf3); + assert_eq(0, strcmp(buf3, "5^A5")); + res1.printMD(false, true, op, ch, run, NULL, buf3); + assert_eq(0, strcmp(buf3, "4^A4")); + + char buf4[1024]; + res2.printMD(false, false, op, ch, run, NULL, buf4); + assert_eq(0, strcmp(buf4, "10")); + res2.printMD(false, true, op, ch, run, NULL, buf4); + assert_eq(0, strcmp(buf4, "8")); + + cerr << "PASSED" << endl; + } + + { + // Different strands + cerr << "Test case 7, simple overlap 7 ... "; + EList ned1(RES_CAT); + // 3 steps to the right in the middle of the alignment + ned1.push_back(Edit(5, 'A', '-', EDIT_TYPE_READ_GAP)); + ned1.push_back(Edit(5, 'C', '-', EDIT_TYPE_READ_GAP)); + ned1.push_back(Edit(5, 'G', '-', EDIT_TYPE_READ_GAP)); + AlnRes res1; + res1.init( + 10, + AlnScore(), + &ned1, + NULL, + NULL, + Coord(0, 5, false), + false); + AlnRes res2; + res2.init( + 10, + AlnScore(), + NULL, + NULL, + NULL, + Coord(0, 6, false), + false); + assert(res1.overlap(res2)); + + // Try again, but using the redundant-alignment database + RedundantAlns ra; + ra.reset(); + ra.init(10); + ra.add(res1); + assert(ra.overlap(res1)); + assert(ra.overlap(res2)); + + char buf1[1024]; + res1.printCigar(false, false, false, op, run, NULL, buf1); + assert_eq(0, strcmp(buf1, "5M3D5M")); + res1.printCigar(false, false, true, op, run, NULL, buf1); + assert_eq(0, strcmp(buf1, "5=3D5=")); + + char buf2[1024]; + res2.printCigar(false, false, false, op, run, NULL, buf2); + assert_eq(0, strcmp(buf2, "10M")); + res2.printCigar(false, false, true, op, run, NULL, buf2); + assert_eq(0, strcmp(buf2, "10=")); + + char buf3[1024]; + res1.printMD(false, false, op, ch, run, NULL, buf3); + assert_eq(0, strcmp(buf3, "5^GCA5")); + res1.printMD(false, true, op, ch, run, NULL, buf3); + assert_eq(0, strcmp(buf3, "4^GCA4")); + + char buf4[1024]; + res2.printMD(false, false, op, ch, run, NULL, buf4); + assert_eq(0, strcmp(buf4, "10")); + res2.printMD(false, true, op, ch, run, NULL, buf4); + assert_eq(0, strcmp(buf4, "8")); + + cerr << "PASSED" << endl; + } + + { + // Both with horizontal movements; overlap + cerr << "Test case 8, simple overlap 8 ... "; + EList ned1(RES_CAT); + // 2 steps to the right in the middle of the alignment + ned1.push_back(Edit(5, 'A', '-', EDIT_TYPE_READ_GAP)); + ned1.push_back(Edit(5, 'C', '-', EDIT_TYPE_READ_GAP)); + AlnRes res1; + res1.init( + 10, + AlnScore(), + &ned1, + NULL, + NULL, + Coord(0, 5, false), + false); + EList ned2(RES_CAT); + // 2 steps to the right in the middle of the alignment + ned2.push_back(Edit(5, 'A', '-', EDIT_TYPE_READ_GAP)); + ned2.push_back(Edit(5, 'C', '-', EDIT_TYPE_READ_GAP)); + AlnRes res2; + res2.init( + 10, + AlnScore(), + &ned2, + NULL, + NULL, + Coord(0, 6, false), + false); + assert(res1.overlap(res2)); + + // Try again, but using the redundant-alignment database + RedundantAlns ra; + ra.reset(); + ra.init(10); + ra.add(res1); + assert(ra.overlap(res1)); + assert(ra.overlap(res2)); + + char buf1[1024]; + res1.printCigar(false, false, false, op, run, NULL, buf1); + assert_eq(0, strcmp(buf1, "5M2D5M")); + res1.printCigar(false, false, true, op, run, NULL, buf1); + assert_eq(0, strcmp(buf1, "5=2D5=")); + + char buf2[1024]; + res2.printCigar(false, false, false, op, run, NULL, buf2); + assert_eq(0, strcmp(buf2, "5M2D5M")); + res2.printCigar(false, false, true, op, run, NULL, buf2); + assert_eq(0, strcmp(buf2, "5=2D5=")); + + cerr << "PASSED" << endl; + } + + { + // Both with horizontal movements; no overlap + cerr << "Test case 9, simple overlap 9 ... "; + EList ned1(RES_CAT); + // 2 steps to the right in the middle of the alignment + ned1.push_back(Edit(6, 'A', '-', EDIT_TYPE_READ_GAP)); + ned1.push_back(Edit(6, 'C', '-', EDIT_TYPE_READ_GAP)); + AlnRes res1; + res1.init( + 10, + AlnScore(), + &ned1, + NULL, + NULL, + Coord(0, 5, true), + false); + EList ned2(RES_CAT); + // 2 steps to the right in the middle of the alignment + ned2.push_back(Edit(5, 'A', '-', EDIT_TYPE_READ_GAP)); + ned2.push_back(Edit(5, 'C', '-', EDIT_TYPE_READ_GAP)); + AlnRes res2; + res2.init( + 10, + AlnScore(), + &ned2, + NULL, + NULL, + Coord(0, 6, true), + false); + assert(!res1.overlap(res2)); + + // Try again, but using the redundant-alignment database + RedundantAlns ra; + ra.reset(); + ra.init(10); + ra.add(res1); + assert(ra.overlap(res1)); + assert(!ra.overlap(res2)); + + char buf1[1024]; + res1.printCigar(false, false, false, op, run, NULL, buf1); + assert_eq(0, strcmp(buf1, "6M2D4M")); + res1.printCigar(false, false, true, op, run, NULL, buf1); + assert_eq(0, strcmp(buf1, "6=2D4=")); + + char buf2[1024]; + res2.printCigar(false, false, false, op, run, NULL, buf2); + assert_eq(0, strcmp(buf2, "5M2D5M")); + res2.printCigar(false, false, true, op, run, NULL, buf2); + assert_eq(0, strcmp(buf2, "5=2D5=")); + + cerr << "PASSED" << endl; + } + + { + // Both with horizontal movements; no overlap. Reverse strand. + cerr << "Test case 10, simple overlap 10 ... "; + EList ned1(RES_CAT); + // 2 steps to the right in the middle of the alignment + ned1.push_back(Edit(5, 'A', '-', EDIT_TYPE_READ_GAP)); + ned1.push_back(Edit(5, 'C', '-', EDIT_TYPE_READ_GAP)); + AlnRes res1; + res1.init( + 10, + AlnScore(), + &ned1, + NULL, + NULL, + Coord(0, 5, false), + false); + EList ned2(RES_CAT); + // 2 steps to the right in the middle of the alignment + ned2.push_back(Edit(6, 'A', '-', EDIT_TYPE_READ_GAP)); + ned2.push_back(Edit(6, 'C', '-', EDIT_TYPE_READ_GAP)); + AlnRes res2; + res2.init( + 10, + AlnScore(), + &ned2, + NULL, + NULL, + Coord(0, 6, false), + false); + assert(!res1.overlap(res2)); + + // Try again, but using the redundant-alignment database + RedundantAlns ra; + ra.reset(); + ra.init(10); + ra.add(res1); + assert(ra.overlap(res1)); + assert(!ra.overlap(res2)); + + char buf1[1024]; + res1.printCigar(false, false, false, op, run, NULL, buf1); + assert_eq(0, strcmp(buf1, "5M2D5M")); + res1.printCigar(false, false, true, op, run, NULL, buf1); + assert_eq(0, strcmp(buf1, "5=2D5=")); + + char buf2[1024]; + res2.printCigar(false, false, false, op, run, NULL, buf2); + assert_eq(0, strcmp(buf2, "4M2D6M")); + res2.printCigar(false, false, true, op, run, NULL, buf2); + assert_eq(0, strcmp(buf2, "4=2D6=")); + + cerr << "PASSED" << endl; + } + + { + // Both with vertical movements; no overlap + cerr << "Test case 11, simple overlap 11 ... "; + EList ned1(RES_CAT); + // 2 steps to the right in the middle of the alignment + ned1.push_back(Edit(5, '-', 'A', EDIT_TYPE_REF_GAP)); + ned1.push_back(Edit(6, '-', 'C', EDIT_TYPE_REF_GAP)); + AlnRes res1; + res1.init( + 10, + AlnScore(), + &ned1, + NULL, + NULL, + Coord(0, 5, true), + false); + EList ned2(RES_CAT); + // 2 steps to the right in the middle of the alignment + ned2.push_back(Edit(6, '-', 'A', EDIT_TYPE_REF_GAP)); + ned2.push_back(Edit(7, '-', 'C', EDIT_TYPE_REF_GAP)); + AlnRes res2; + res2.init( + 10, + AlnScore(), + &ned2, + NULL, + NULL, + Coord(0, 6, true), + false); + assert(!res1.overlap(res2)); + + // Try again, but using the redundant-alignment database + RedundantAlns ra; + ra.reset(); + ra.init(10); + ra.add(res1); + assert(ra.overlap(res1)); + assert(!ra.overlap(res2)); + + char buf1[1024]; + res1.printCigar(false, false, false, op, run, NULL, buf1); + assert_eq(0, strcmp(buf1, "5M2I3M")); + res1.printCigar(false, false, true, op, run, NULL, buf1); + assert_eq(0, strcmp(buf1, "5=2I3=")); + + char buf2[1024]; + res2.printCigar(false, false, false, op, run, NULL, buf2); + assert_eq(0, strcmp(buf2, "6M2I2M")); + res2.printCigar(false, false, true, op, run, NULL, buf2); + assert_eq(0, strcmp(buf2, "6=2I2=")); + + cerr << "PASSED" << endl; + } + + { + // Both with vertical movements; no overlap + cerr << "Test case 12, simple overlap 12 ... "; + EList ned1(RES_CAT); + // 2 steps to the right in the middle of the alignment + ned1.push_back(Edit(5, '-', 'A', EDIT_TYPE_REF_GAP)); + ned1.push_back(Edit(6, '-', 'C', EDIT_TYPE_REF_GAP)); + AlnRes res1; + res1.init( + 10, + AlnScore(), + &ned1, + NULL, + NULL, + Coord(0, 5, true), + false); + EList ned2(RES_CAT); + // 2 steps to the right in the middle of the alignment + ned2.push_back(Edit(5, '-', 'A', EDIT_TYPE_REF_GAP)); + ned2.push_back(Edit(6, '-', 'C', EDIT_TYPE_REF_GAP)); + AlnRes res2; + res2.init( + 10, + AlnScore(), + &ned2, + NULL, + NULL, + Coord(0, 6, true), + false); + assert(!res1.overlap(res2)); + + // Try again, but using the redundant-alignment database + RedundantAlns ra; + ra.reset(); + ra.init(10); + ra.add(res1); + assert(ra.overlap(res1)); + assert(!ra.overlap(res2)); + + char buf1[1024]; + res1.printCigar(false, false, false, op, run, NULL, buf1); + assert_eq(0, strcmp(buf1, "5M2I3M")); + res1.printCigar(false, false, true, op, run, NULL, buf1); + assert_eq(0, strcmp(buf1, "5=2I3=")); + + char buf2[1024]; + res2.printCigar(false, false, false, op, run, NULL, buf2); + assert_eq(0, strcmp(buf2, "5M2I3M")); + res2.printCigar(false, false, true, op, run, NULL, buf2); + assert_eq(0, strcmp(buf2, "5=2I3=")); + + cerr << "PASSED" << endl; + } + + { + // Both with vertical movements; overlap + cerr << "Test case 13, simple overlap 13 ... "; + EList ned1(RES_CAT); + // 2 steps to the right in the middle of the alignment + ned1.push_back(Edit(5, '-', 'A', EDIT_TYPE_REF_GAP)); + ned1.push_back(Edit(6, '-', 'C', EDIT_TYPE_REF_GAP)); + AlnRes res1; + res1.init( + 10, + AlnScore(), + &ned1, + NULL, + NULL, + Coord(0, 5, true), + false); + EList ned2(RES_CAT); + // 2 steps to the right in the middle of the alignment + ned2.push_back(Edit(4, '-', 'A', EDIT_TYPE_REF_GAP)); + ned2.push_back(Edit(5, '-', 'C', EDIT_TYPE_REF_GAP)); + AlnRes res2; + res2.init( + 10, + AlnScore(), + &ned2, + NULL, + NULL, + Coord(0, 6, true), + false); + assert(res1.overlap(res2)); + + // Try again, but using the redundant-alignment database + RedundantAlns ra; + ra.reset(); + ra.init(10); + ra.add(res1); + assert(ra.overlap(res1)); + assert(ra.overlap(res2)); + + char buf1[1024]; + res1.printCigar(false, false, false, op, run, NULL, buf1); + assert_eq(0, strcmp(buf1, "5M2I3M")); + res1.printCigar(false, false, true, op, run, NULL, buf1); + assert_eq(0, strcmp(buf1, "5=2I3=")); + + char buf2[1024]; + res2.printCigar(false, false, false, op, run, NULL, buf2); + assert_eq(0, strcmp(buf2, "4M2I4M")); + res2.printCigar(false, false, true, op, run, NULL, buf2); + assert_eq(0, strcmp(buf2, "4=2I4=")); + + cerr << "PASSED" << endl; + } + + { + // Not even close + cerr << "Test case 14, simple overlap 14 ... "; + EList ned1(RES_CAT); + // 2 steps to the right in the middle of the alignment + ned1.push_back(Edit(5, '-', 'A', EDIT_TYPE_REF_GAP)); + ned1.push_back(Edit(6, '-', 'C', EDIT_TYPE_REF_GAP)); + AlnRes res1; + res1.init( + 10, + AlnScore(), + &ned1, + NULL, + NULL, + Coord(0, 5, true), + false); + EList ned2(RES_CAT); + // 2 steps to the right in the middle of the alignment + ned2.push_back(Edit(4, '-', 'A', EDIT_TYPE_REF_GAP)); + ned2.push_back(Edit(5, '-', 'C', EDIT_TYPE_REF_GAP)); + AlnRes res2; + res2.init( + 10, + AlnScore(), + &ned2, + NULL, + NULL, + Coord(0, 400, true), + false); + assert(!res1.overlap(res2)); + + // Try again, but using the redundant-alignment database + RedundantAlns ra; + ra.reset(); + ra.init(10); + ra.add(res1); + assert(ra.overlap(res1)); + assert(!ra.overlap(res2)); + + char buf1[1024]; + res1.printCigar(false, false, false, op, run, NULL, buf1); + assert_eq(0, strcmp(buf1, "5M2I3M")); + res1.printCigar(false, false, true, op, run, NULL, buf1); + assert_eq(0, strcmp(buf1, "5=2I3=")); + + char buf2[1024]; + res2.printCigar(false, false, false, op, run, NULL, buf2); + assert_eq(0, strcmp(buf2, "4M2I4M")); + res2.printCigar(false, false, true, op, run, NULL, buf2); + assert_eq(0, strcmp(buf2, "4=2I4=")); + + cerr << "PASSED" << endl; + } + + { + cerr << "Test case 15, CIGAR string with mismatches ... "; + EList ned(RES_CAT); + // 2 steps to the right in the middle of the alignment + ned.push_back(Edit(0, 'C', 'A', EDIT_TYPE_MM)); + ned.push_back(Edit(4, '-', 'C', EDIT_TYPE_REF_GAP)); + ned.push_back(Edit(6, '-', 'C', EDIT_TYPE_REF_GAP)); + ned.push_back(Edit(7, '-', 'C', EDIT_TYPE_REF_GAP)); + ned.push_back(Edit(9, '-', 'A', EDIT_TYPE_READ_GAP)); + ned.push_back(Edit(9, '-', 'A', EDIT_TYPE_READ_GAP)); + ned.push_back(Edit(9, '-', 'A', EDIT_TYPE_READ_GAP)); + ned.push_back(Edit(9, '-', 'A', EDIT_TYPE_READ_GAP)); + ned.push_back(Edit(10, '-', 'A', EDIT_TYPE_MM)); + AlnRes res; res.init( + 11, + AlnScore(), + &ned, + NULL, + NULL, + Coord(0, 44, true), + false); + char buf[1024]; + res.printCigar(false, false, false, op, run, NULL, buf); + assert_eq(0, strcmp(buf, "4M1I1M2I1M4D2M")); + res.printCigar(false, false, true, op, run, NULL, buf); + assert_eq(0, strcmp(buf, "1X3=1I1=2I1=4D1=1X")); + cerr << "PASSED" << endl; + } + + { + cerr << "Test case 17, Overhang ... "; + EList ned(RES_CAT); + // 2 steps to the right in the middle of the alignment + ned.push_back(Edit(0, 'N', 'A', EDIT_TYPE_MM)); + ned.push_back(Edit(5, 'C', 'A', EDIT_TYPE_MM)); + AlnRes res; res.init( + 10, + AlnScore(), + &ned, + NULL, + NULL, + Coord(0, -1, true), + false); + + char buf[1024]; + res.printCigar(false, false, false, op, run, NULL, buf); + assert_eq(0, strcmp(buf, "10M")); + res.printCigar(false, false, true, op, run, NULL, buf); + assert_eq(0, strcmp(buf, "1X4=1X4=")); + res.printMD(false, false, op, ch, run, NULL, buf); + assert_eq(0, strcmp(buf, "0N4C4")); + + #if 0 + AlnRes res2(res); + // Now soft-clip away the overhang + res2.clipOutside( + true, // soft clip + 0, // ref begins + 40); // ref ends (excl) + res2.printCigar(false, false, false, op, run, NULL, buf); + assert_eq(0, strcmp(buf, "1S9M")); + res2.printCigar(false, false, true, op, run, NULL, buf); + assert_eq(0, strcmp(buf, "4=1X4=")); + res2.printMD(false, false, op, ch, run, NULL, buf); + assert_eq(0, strcmp(buf, "4C4")); + + AlnRes res3 = res; + // Now hard-clip away the overhang + res3.clipOutside( + false, // hard clip + 0, // ref begins + 40); // ref ends (excl) + res3.printCigar(false, false, false, op, run, NULL, buf); + assert_eq(0, strcmp(buf, "9M")); + res3.printCigar(false, false, true, op, run, NULL, buf); + assert_eq(0, strcmp(buf, "4=1X4=")); + res3.printMD(false, false, op, ch, run, NULL, buf); + assert_eq(0, strcmp(buf, "4C4")); + #endif + + cerr << "PASSED" << endl; + } +} + +#endif /*def ALIGNER_RESULT_MAIN*/ diff --git a/aligner_result.h b/aligner_result.h new file mode 100644 index 0000000..745647e --- /dev/null +++ b/aligner_result.h @@ -0,0 +1,2325 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef ALIGNER_RESULT_H_ +#define ALIGNER_RESULT_H_ + +#include +#include +#include "mem_ids.h" +#include "ref_coord.h" +#include "read.h" +#include "filebuf.h" +#include "ds.h" +#include "edit.h" +#include "limit.h" +#include "splice_site.h" +#include "alignment_3n.h" + +typedef int64_t TAlScore; + +#define VALID_AL_SCORE(x) ((x).score_ > MIN_I64) +#define VALID_SCORE(x) ((x) > MIN_I64) +#define INVALIDATE_SCORE(x) ((x) = MIN_I64) + +/** + * A generic score object for an alignment. Used for accounting during + * SW and elsewhere. Encapsulates the score, the number of N positions + * and the number gaps in the alignment. + * + * The scale for 'score' is such that a perfect alignment score is 0 + * and a score with non-zero penalty is less than 0. So differences + * between scores work as expected, but interpreting an individual + * score (larger is better) as a penalty (smaller is better) requires + * taking the absolute value. + */ +class AlnScore { + +public: + + /** + * Gapped scores are invalid until proven valid. + */ + inline AlnScore() { + reset(); + invalidate(); + assert(!valid()); + } + + /** + * Gapped scores are invalid until proven valid. + */ + inline AlnScore( + TAlScore score, + TAlScore ns, + TAlScore gaps, + bool repeat = false, + TAlScore splicescore = 0, + bool knownTranscripts = false, + bool nearSpliceSites = false, + int leftTrim = 0, + int rightTrim = 0) { + score_ = score; + ns_ = ns; + gaps_ = gaps; + repeat_ = repeat; + splicescore_ = splicescore; + knownTranscripts_ = knownTranscripts; + nearSpliceSites_ = nearSpliceSites; + leftTrim_ = leftTrim; + rightTrim_ = rightTrim; + hisat2_score_ = calculate_hisat2_score(); + assert(valid()); + } + + /** + * Reset the score. + */ + void reset() { + score_ = hisat2_score_ = ns_ = gaps_ = 0; + repeat_ = false; + splicescore_ = 0; + knownTranscripts_ = false; + nearSpliceSites_ = false; + leftTrim_ = 0; + rightTrim_ = 0; + } + + /** + * Return an invalid SwScore. + */ + inline static AlnScore INVALID() { + AlnScore s; + s.invalidate(); + assert(!s.valid()); + return s; + } + + /** + * Return true iff this score has a valid value. + */ + inline bool valid() const { + return score_ != MIN_I64; + } + + /** + * Make this score invalid (and therefore <= all other scores). + */ + inline void invalidate() { + score_ = MIN_I64; + assert(!valid()); + } + + /** + * Increment the number of gaps. If currently invalid, this makes + * the score valid with gaps == 1. + */ + inline void incNs(int nceil) { + if(++ns_ > nceil) { + invalidate(); + } + assert_lt(ns_, 0x7fffffff); + } + + /** + * Return true iff this score is > score o. + * Note: An "invalid" score is <= all other scores. + */ + inline bool operator>(const AlnScore& o) const { + if(!VALID_AL_SCORE(o)) { + if(!VALID_AL_SCORE(*this)) { + // both invalid + return false; + } else { + // I'm valid, other is invalid + return true; + } + } else if(!VALID_AL_SCORE(*this)) { + // I'm invalid, other is valid + return false; + } + return score_ > o.score_ || (score_ == o.score_ && hisat2_score_ > o.hisat2_score_); + } + + /** + * Scores are equal iff they're bitwise equal. + */ + inline AlnScore& operator=(const AlnScore& o) { + // Profiling shows many cache misses on following lines + gaps_ = o.gaps_; + ns_ = o.ns_; + score_ = o.score_; + repeat_ = o.repeat_; + hisat2_score_ = o.hisat2_score_; + splicescore_ = o.splicescore_; + knownTranscripts_ = o.knownTranscripts_; + nearSpliceSites_ = o.nearSpliceSites_; + leftTrim_ = o.leftTrim_; + rightTrim_ = o.rightTrim_; + assert_lt(ns_, 0x7fffffff); + return *this; + } + + /** + * Scores are equal iff they're bitwise equal. + */ + inline bool operator==(const AlnScore& o) const { + // Profiling shows cache misses on following line + return VALID_AL_SCORE(*this) && VALID_AL_SCORE(o) && score_ == o.score_ && hisat2_score_ == o.hisat2_score_; + } + + /** + * Return true iff the two scores are unequal. + */ + inline bool operator!=(const AlnScore& o) const { + return !(*this == o); + } + + /** + * Return true iff this score is >= score o. + */ + inline bool operator>=(const AlnScore& o) const { + if(!VALID_AL_SCORE(o)) { + if(!VALID_AL_SCORE(*this)) { + // both invalid + return false; + } else { + // I'm valid, other is invalid + return true; + } + } else if(!VALID_AL_SCORE(*this)) { + // I'm invalid, other is valid + return false; + } + return score_ > o.score_ || (score_ == o.score_ && hisat2_score_ >= o.hisat2_score_); + } + + /** + * Return true iff this score is < score o. + */ + inline bool operator<(const AlnScore& o) const { + return !operator>=(o); + } + + /** + * Return true iff this score is <= score o. + */ + inline bool operator<=(const AlnScore& o) const { + return !operator>(o); + } + + /** + * Calculate difference between two SwScores. + */ + inline AlnScore operator-(const AlnScore& o) const { + if(!VALID_AL_SCORE(*this)) return *this; + AlnScore s; + s.gaps_ = gaps_ - o.gaps_; + s.ns_ = ns_; + s.score_ = score_ - o.score_; + s.splicescore_ = splicescore_ - o.splicescore_; + assert_lt(s.ns_, 0x7fffffff); + return s; + } + + /** + * Calculate sum of two SwScores. + */ + inline AlnScore operator+(const AlnScore& o) const { + if(!VALID_AL_SCORE(*this)) return *this; + AlnScore s; + s.gaps_ = gaps_ + o.gaps_; + s.ns_ = ns_; + s.score_ = score_ + o.score_; + s.repeat_ = repeat_ | o.repeat_; + s.splicescore_ = splicescore_ + o.splicescore_; + s.hisat2_score_ = hisat2_score_ + o.hisat2_score_; + s.knownTranscripts_ = knownTranscripts_ | o.knownTranscripts_; + s.nearSpliceSites_ = nearSpliceSites_ | o.nearSpliceSites_; + s.leftTrim_ = leftTrim_ + o.leftTrim_; + s.rightTrim_ = rightTrim_ + o.rightTrim_; + assert_lt(s.ns_, 0x7fffffff); + return s; + } + + /** + * Add given SwScore into this one. + */ + inline AlnScore operator+=(const AlnScore& o) { + if(VALID_AL_SCORE(*this)) { + gaps_ += o.gaps_; + score_ += o.score_; + repeat_ |= o.repeat_; + splicescore_ += o.splicescore_; + hisat2_score_ += o.hisat2_score_; + knownTranscripts_ |= o.knownTranscripts_; + nearSpliceSites_ |= o.nearSpliceSites_; + leftTrim_ += o.leftTrim_; + rightTrim_ += o.rightTrim_; + } + return (*this); + } + + /** + * Subtract given SwScore from this one. + */ + inline AlnScore operator-=(const AlnScore& o) { + if(VALID_AL_SCORE(*this)) { + gaps_ -= o.gaps_; + score_ -= o.score_; + // splicescore_ -= o.splicescore_; + } + return (*this); + } + + /** + * Calculate difference between two SwScores. + */ + inline AlnScore operator-(int o) const { + return (*this) + -o; + } + + /** + * Calculate sum of a SwScore and an integer. + */ + inline AlnScore operator+(int o) const { + if(!VALID_AL_SCORE(*this)) return *this; + AlnScore s; + s.gaps_ = gaps_; + s.ns_ = ns_; + s.score_ = score_ + o; + // s.splicescore_ = splicescore_; + assert_lt(s.ns_, 0x7fffffff); + return s; + } + + TAlScore score() const { return score_; } + TAlScore hisat2_score() const { return hisat2_score_; } + TAlScore penalty() const { return -score_; } + TAlScore gaps() const { return gaps_; } + TAlScore ns() const { return ns_; } + bool repeat() const { return repeat_;} + TAlScore splicescore() const { return splicescore_; } + bool knownTranscripts() const { return knownTranscripts_; } + bool nearSpliceSites() const { return nearSpliceSites_; } + bool trimed() const { return leftTrim_ > 0 || rightTrim_ > 0; } + + TAlScore calculate_hisat2_score() const + { + // TAlScore 32 bits used for score_ + TAlScore score = score_; + if(score > MAX_I32) score = MAX_I32; + else if(score < MIN_I32) score = MIN_I32; + + // Next 4 bits for repeat score + TAlScore repeat_score = 0; + if(repeat_) repeat_score = 1; + + // Next 4 bits for alignments against transcripts + TAlScore transcript_score = 0; + if(knownTranscripts_) transcript_score = 2; + else if(nearSpliceSites_) transcript_score = 1; + + // Next 8 bits for splice site score + TAlScore splicescore = splicescore_ / 100; + if(splicescore > MAX_U8) splicescore = 0; + else splicescore = MAX_U8 - splicescore; + + // Remaining 16 bits (rightmost 16 bits) for sum of left and right trim lengths + TAlScore trim = leftTrim_ + rightTrim_; + if(trim > MAX_U16) trim = 0; + else trim = MAX_U16 - trim; + return (score << 32) | (repeat_score << 28) | (transcript_score << 24) | (splicescore << 16) | trim; + } + + // Score accumulated so far (penalties are subtracted starting at 0) + TAlScore score_; + + // HISAT2 score, which is used internally to distinguish the alignments of RNA-seq reads + TAlScore hisat2_score_; + + // Ns accumulated so far. An N opposite a non-gap counts as 1 N + // (even if it's N-to-N) + TAlScore ns_; + + // # gaps encountered so far, unless that number exceeds the + // target, in which case the score becomes invalid and therefore <= + // all other scores + TAlScore gaps_; + + bool repeat_; + + // splice scores + TAlScore splicescore_; + + // mapped to known transcripts? + bool knownTranscripts_; + + // continuous alignment near (known) splice sites? + bool nearSpliceSites_; + + int leftTrim_; + int rightTrim_; +}; + +enum { + // This alignment is one of a pair of alignments that form a concordant + // alignment for a read + ALN_FLAG_PAIR_CONCORD_MATE1 = 1, + ALN_FLAG_PAIR_CONCORD_MATE2, + + // This alignment is one of a pair of alignments that form a discordant + // alignment for a read + ALN_FLAG_PAIR_DISCORD_MATE1, + ALN_FLAG_PAIR_DISCORD_MATE2, + + // This is an unpaired alignment but the read in question is a pair; + // usually, this happens because the read had no reportable paired-end + // alignments + ALN_FLAG_PAIR_UNPAIRED_MATE1, + ALN_FLAG_PAIR_UNPAIRED_MATE2, + + // This is an unpaired alignment of an unpaired read + ALN_FLAG_PAIR_UNPAIRED +}; + +/** + * Encapsulates some general information about an alignment that doesn't belong + * in AlnRes. Specifically: + * + * 1. Whether the alignment is paired + * 2. If it's paried, whether it's concordant or discordant + * 3. Whether this alignment was found after the paired-end categories were + * maxed out + * 4. Whether the relevant unpaired category was maxed out + */ +class AlnFlags { + +public: + + AlnFlags() { + init( + ALN_FLAG_PAIR_UNPAIRED, + false, // canMax + false, // maxed + false, // maxedPair + false, // nfilt + false, // scfilt + false, // lenfilt + false, // qcfilt + false, // mixedMode + false, // primary + false, // oppAligned + false); // oppFw + } + + AlnFlags( + int pairing, + bool canMax, + bool maxed, + bool maxedPair, + bool nfilt, + bool scfilt, + bool lenfilt, + bool qcfilt, + bool mixedMode, + bool primary, + bool oppAligned, // opposite mate aligned? + bool oppFw) // opposite mate aligned forward? + { + init(pairing, canMax, maxed, maxedPair, nfilt, scfilt, + lenfilt, qcfilt, mixedMode, primary, oppAligned, oppFw); + } + + /** + * Initialize given values for all settings. + */ + void init( + int pairing, + bool canMax, + bool maxed, + bool maxedPair, + bool nfilt, + bool scfilt, + bool lenfilt, + bool qcfilt, + bool mixedMode, + bool primary, + bool oppAligned, + bool oppFw) + { + assert_gt(pairing, 0); + assert_leq(pairing, ALN_FLAG_PAIR_UNPAIRED); + pairing_ = pairing; + canMax_ = canMax; + maxed_ = maxed; + maxedPair_ = maxedPair; + nfilt_ = nfilt; + scfilt_ = scfilt; + lenfilt_ = lenfilt; + qcfilt_ = qcfilt; + mixedMode_ = mixedMode; + primary_ = primary; + oppAligned_ = oppAligned; + } + + /** + * Return true iff this alignment is from a paired-end read. + */ + bool partOfPair() const { + assert_gt(pairing_, 0); + return pairing_ < ALN_FLAG_PAIR_UNPAIRED; + } + +#ifndef NDEBUG + /** + * Check that the flags are internally consistent. + */ + bool repOk() const { + assert(partOfPair() || !maxedPair_); + return true; + } +#endif + + /** + * Print out string representation of YF:i flag for indicating whether and + * why the mate was filtered. + */ + bool printYF(BTString& o, bool first) const; + + /** + * Print out string representation of YM:i flag for indicating with the + * mate per se aligned repetitively. + */ + void printYM(BTString& o) const; + + /** + * Print out string representation of YM:i flag for indicating with the + * pair containing the mate aligned repetitively. + */ + void printYP(BTString& o) const; + + /** + * Print out string representation of these flags. + */ + void printYT(BTString& o) const; + + inline int pairing() const { return pairing_; } + inline bool maxed() const { return maxed_; } + inline bool maxedPair() const { return maxedPair_; } + + /** + * Return true iff the alignment is not the primary alignment; i.e. not the + * first reported alignment for the fragment. + */ + inline bool isPrimary() const { + return primary_; + } + + /** + * Set the primary flag. + */ + void setPrimary(bool primary) { + primary_ = primary; + } + + /** + * Return whether both paired and unpaired alignments are considered for + * pairs & their constituent mates + */ + inline bool isMixedMode() const { + return mixedMode_; + } + + /** + * Return true iff the alignment params are such that it's possible for a + * read to be suppressed for being repetitive. + */ + inline bool canMax() const { + return canMax_; + } + + /** + * Return true iff the alignment was filtered out. + */ + bool filtered() const { + return !nfilt_ || !scfilt_ || !lenfilt_ || !qcfilt_; + } + + /** + * Return true iff the read is mate #1 of a pair, regardless of whether it + * aligned as a pair. + */ + bool readMate1() const { + return pairing_ == ALN_FLAG_PAIR_CONCORD_MATE1 || + pairing_ == ALN_FLAG_PAIR_DISCORD_MATE1 || + pairing_ == ALN_FLAG_PAIR_UNPAIRED_MATE1; + } + + /** + * Return true iff the read is mate #2 of a pair, regardless of whether it + * aligned as a pair. + */ + bool readMate2() const { + return pairing_ == ALN_FLAG_PAIR_CONCORD_MATE2 || + pairing_ == ALN_FLAG_PAIR_DISCORD_MATE2 || + pairing_ == ALN_FLAG_PAIR_UNPAIRED_MATE2; + } + + /** + * Return true iff the read aligned as either mate of a concordant pair. + */ + bool alignedConcordant() const { + return pairing_ == ALN_FLAG_PAIR_CONCORD_MATE1 || + pairing_ == ALN_FLAG_PAIR_CONCORD_MATE2; + } + + /** + * Return true iff the read aligned as either mate of a discordant pair. + */ + bool alignedDiscordant() const { + return pairing_ == ALN_FLAG_PAIR_DISCORD_MATE1 || + pairing_ == ALN_FLAG_PAIR_DISCORD_MATE2; + } + + /** + * Return true iff the read aligned as either mate of a pair, concordant or + * discordant. + */ + bool alignedPaired() const { + return alignedConcordant() || alignedDiscordant(); + } + + /** + * Return true iff the read aligned as an unpaired read. + */ + bool alignedUnpaired() const { + return pairing_ == ALN_FLAG_PAIR_UNPAIRED; + } + + /** + * Return true iff the read aligned as an unpaired mate from a paired read. + */ + bool alignedUnpairedMate() const { + return pairing_ == ALN_FLAG_PAIR_UNPAIRED_MATE1 || + pairing_ == ALN_FLAG_PAIR_UNPAIRED_MATE2; + } + + bool mateAligned() const { + return oppAligned_; + } + +protected: + + // See ALN_FLAG_PAIR_* above + int pairing_; + + // True iff the alignment params are such that it's possible for a read to + // be suppressed for being repetitive + bool canMax_; + + // This alignment is sampled from among many alignments that, taken + // together, cause this mate to align non-uniquely + bool maxed_; + + // The paired-end read of which this mate is part has repetitive concordant + // alignments + bool maxedPair_; + + bool nfilt_; // read/mate filtered b/c proportion of Ns exceeded ceil + bool scfilt_; // read/mate filtered b/c length can't provide min score + bool lenfilt_; // read/mate filtered b/c less than or equal to seed mms + bool qcfilt_; // read/mate filtered by upstream qc + + // Whether both paired and unpaired alignments are considered for pairs & + // their constituent mates + bool mixedMode_; + + // The read is the primary read + bool primary_; + + // True iff the opposite mate aligned + bool oppAligned_; +}; + +static inline ostream& operator<<(ostream& os, const AlnScore& o) { + os << o.score(); + return os; +} + +// Forward declaration +class BitPairReference; + +// A given AlnRes can be one of these three types +enum { + ALN_RES_TYPE_UNPAIRED = 1, // unpaired alignment + ALN_RES_TYPE_UNPAIRED_MATE1, // mate #1 in pair, aligned unpaired + ALN_RES_TYPE_UNPAIRED_MATE2, // mate #2 in pair, aligned unpaired + ALN_RES_TYPE_MATE1, // mate #1 in paired-end alignment + ALN_RES_TYPE_MATE2 // mate #2 in paired-end alignment +}; + +/** + * Seed alignment summary + */ +struct SeedAlSumm { + + SeedAlSumm() { reset(); } + + void reset() { + nonzTot = nonzFw = nonzRc = 0; + nrangeTot = nrangeFw = nrangeRc = 0; + neltTot = neltFw = neltRc = 0; + minNonzRangeFw = minNonzRangeRc = 0; + maxNonzRangeFw = maxNonzRangeRc = 0; + minNonzEltFw = minNonzEltRc = 0; + maxNonzEltFw = maxNonzEltRc = 0; + } + + size_t nonzTot; + size_t nonzFw; + size_t nonzRc; + + size_t nrangeTot; + size_t nrangeFw; + size_t nrangeRc; + + size_t neltTot; + size_t neltFw; + size_t neltRc; + + size_t minNonzRangeFw; + size_t minNonzRangeRc; + + size_t maxNonzRangeFw; + size_t maxNonzRangeRc; + + size_t minNonzEltFw; + size_t minNonzEltRc; + + size_t maxNonzEltFw; + size_t maxNonzEltRc; +}; + +/** + * Encapsulates a stacked alignment, a nice intermediate format for alignments + * from which to left-align gaps, print CIGAR strings, and print MD:Z strings. + */ +class StackedAln { + +public: + + StackedAln() : + stackRef_(RES_CAT), + stackRel_(RES_CAT), + stackSNP_(RES_CAT), + stackRead_(RES_CAT), + stackSkip_(RES_CAT), + cigOp_(RES_CAT), + cigRun_(RES_CAT), + mdzOp_(RES_CAT), + mdzChr_(RES_CAT), + mdzRun_(RES_CAT) + { + reset(); + } + + /** + * Reset to an uninitialized state. + */ + void reset() { + inited_ = false; + trimLS_ = trimLH_ = trimRS_ = trimRH_ = 0; + stackRef_.clear(); + stackRel_.clear(); + stackSNP_.clear(); + stackRead_.clear(); + stackSkip_.clear(); + cigDistMm_ = cigCalc_ = false; + cigOp_.clear(); + cigRun_.clear(); + mdzCalc_ = false; + mdzOp_.clear(); + mdzChr_.clear(); + mdzRun_.clear(); + } + + /** + * Return true iff the stacked alignment has been initialized. + */ + bool inited() const { return inited_; } + + /** + * Initialized the stacked alignment with respect to a read string, a list of + * edits (expressed left-to-right), and integers indicating how much hard and + * soft trimming has occurred on either end of the read. + * + * s: read sequence + * ed: all relevant edits, including ambiguous nucleotides + * trimLS: # bases soft-trimmed from LHS + * trimLH: # bases hard-trimmed from LHS + * trimRS: # bases soft-trimmed from RHS + * trimRH: # bases hard-trimmed from RHS + */ + void init( + const BTDnaString& s, + const EList& ed, + size_t trimLS, + size_t trimLH, + size_t trimRS, + size_t trimRH); + + /** + * Left-align all the gaps. If this changes the alignment and the CIGAR or + * MD:Z strings have already been calculated, this renders them invalid. + * + * We left-align gaps with in the following way: for each gap, we check + * whether the character opposite the rightmost gap character is the same + * as the character opposite the character just to the left of the gap. If + * this is the case, we can slide the gap to the left and make the + * rightmost position previously covered by the gap into a non-gap. + * + * This scheme allows us to push the gap past a mismatch. BWA does seem to + * allow this. It's not clear that Bowtie 2 should, since moving the + * mismatch could cause a mismatch with one base quality to be replaced + * with a mismatch with a different base quality. + */ + void leftAlign(bool pastMms); + + /** + * Build the CIGAR list, if it hasn't already built. Returns true iff it + * was built for the first time. + */ + bool buildCigar(bool xeq); + + /** + * Build the MD:Z list, if it hasn't already built. Returns true iff it + * was built for the first time. + */ + bool buildMdz(); + + /** + * Write a CIGAR representation of the alignment to the given string and/or + * char buffer. + */ + void writeCigar(BTString* o, char* oc) const; + + /** + * Write a CIGAR representation of the alignment to the given string and/or + * char buffer. This function is for HISAT-3N. + */ + void writeCigar(Alignment* o, char* oc) const; + + /** + * Write an MD:Z representation of the alignment to the given string and/or + * char buffer. + */ + void writeMdz(BTString* o, char* oc) const; + + /** + * Check internal consistency. + */ +#ifndef NDEBUG + bool repOk() const { + if(inited_) { + assert_eq(stackRef_.size(), stackRead_.size()); + assert_eq(stackRef_.size(), stackRel_.size()); + } + return true; + } +#endif + +protected: + + bool inited_; // true iff stacked alignment is initialized + + size_t trimLS_; // amount soft-trimmed from the LHS + size_t trimLH_; // amount hard-trimmed from the LHS + size_t trimRS_; // amount soft-trimmed from the RHS + size_t trimRH_; // amount hard-trimmed from the RHS + + EList stackRef_; // reference characters + EList stackRel_; // bars relating reference to read characters + EList stackSNP_; // known SNP? + EList stackRead_; // read characters + EList stackSkip_; + + bool cigDistMm_; // distinguish between =/X, rather than just M + bool cigCalc_; // whether we've calculated CIGAR ops/runs + EList cigOp_; // CIGAR operations + EList cigRun_; // CIGAR run lengths + + bool mdzCalc_; // whether we've calculated MD:Z ops/runs + EList mdzOp_; // MD:Z operations + EList mdzChr_; // MD:Z operations + EList mdzRun_; // MD:Z run lengths +}; + +/** + * Encapsulates an alignment result. The result comprises: + * + * 1. All the nucleotide edits for both mates ('ned'). + * 2. All "edits" where an ambiguous reference char is resolved to an + * unambiguous char ('aed'). + * 3. The score for the alginment, including summary information about the + * number of gaps and Ns involved. + * 4. The reference id, strand, and 0-based offset of the leftmost character + * involved in the alignment. + * 5. Information about trimming prior to alignment and whether it was hard or + * soft. + * 6. Information about trimming during alignment and whether it was hard or + * soft. Local-alignment trimming is usually soft when aligning nucleotide + * reads. + * + * Note that the AlnRes, together with the Read and an AlnSetSumm (*and* the + * opposite mate's AlnRes and Read in the case of a paired-end alignment), + * should contain enough information to print an entire alignment record. + * + * TRIMMING + * + * Accounting for trimming is tricky. Trimming affects: + * + * 1. The values of the trim* and pretrim* fields. + * 2. The offsets of the Edits in the ELists. + * 3. The read extent, if the trimming is soft. + * 4. The read extent and the read sequence and length, if trimming is hard. + * + * Handling 1. is not too difficult. 2., 3., and 4. are handled in setShape(). + */ +class AlnRes { + +public: + + AlnRes() : + // ned_(RES_CAT), + // aed_(RES_CAT) + ned_(NULL), + aed_(NULL), + ned_node_(NULL), + aed_node_(NULL), + raw_edits_(NULL) + { + reset(); + } + + AlnRes(const AlnRes& other) : + ned_(NULL), + aed_(NULL), + ned_node_(NULL), + aed_node_(NULL), + raw_edits_(NULL) + { + shapeSet_ = other.shapeSet_; + rdlen_ = other.rdlen_; + rdid_ = other.rdid_; + rdrows_ = other.rdrows_; + score_ = other.score_; + oscore_ = other.oscore_; + refcoord_ = other.refcoord_; + reflen_ = other.reflen_; + refival_ = other.refival_; + rdextent_ = other.rdextent_; + rdexrows_ = other.rdexrows_; + rfextent_ = other.rfextent_; + seedmms_ = other.seedmms_; + seedlen_ = other.seedlen_; + minsc_ = other.minsc_; + nuc5p_ = other.nuc5p_; + nuc3p_ = other.nuc3p_; + refns_ = other.refns_; + type_ = other.type_; + fraglenSet_ = other.fraglenSet_; + fraglen_ = other.fraglen_; + pretrimSoft_ = other.pretrimSoft_; + pretrim5p_ = other.pretrim5p_; + pretrim3p_ = other.pretrim3p_; + trimSoft_ = other.trimSoft_; + trim5p_ = other.trim5p_; + trim3p_ = other.trim3p_; + repeat_ = other.repeat_; + + num_spliced_ = other.num_spliced_; + raw_edits_ = other.raw_edits_; + if(raw_edits_ != NULL) { + assert(ned_ == NULL && aed_ == NULL); + assert(ned_node_ == NULL && aed_node_ == NULL); + ned_node_ = raw_edits_->new_node(); + aed_node_ = raw_edits_->new_node(); + assert(ned_node_ != NULL && aed_node_ != NULL); + ned_ = &(ned_node_->payload); + aed_ = &(aed_node_->payload); + assert(other.ned_ != NULL && other.aed_ != NULL); + *ned_ = *(other.ned_); + *aed_ = *(other.aed_); + } + } + + AlnRes& operator=(const AlnRes& other) { + if(this == &other) return *this; + shapeSet_ = other.shapeSet_; + rdlen_ = other.rdlen_; + rdid_ = other.rdid_; + rdrows_ = other.rdrows_; + score_ = other.score_; + oscore_ = other.oscore_; + refcoord_ = other.refcoord_; + reflen_ = other.reflen_; + refival_ = other.refival_; + rdextent_ = other.rdextent_; + rdexrows_ = other.rdexrows_; + rfextent_ = other.rfextent_; + seedmms_ = other.seedmms_; + seedlen_ = other.seedlen_; + minsc_ = other.minsc_; + nuc5p_ = other.nuc5p_; + nuc3p_ = other.nuc3p_; + refns_ = other.refns_; + type_ = other.type_; + fraglenSet_ = other.fraglenSet_; + fraglen_ = other.fraglen_; + pretrimSoft_ = other.pretrimSoft_; + pretrim5p_ = other.pretrim5p_; + pretrim3p_ = other.pretrim3p_; + trimSoft_ = other.trimSoft_; + trim5p_ = other.trim5p_; + trim3p_ = other.trim3p_; + repeat_ = other.repeat_; + + num_spliced_ = other.num_spliced_; + assert(raw_edits_ == NULL || raw_edits_ == other.raw_edits_); + raw_edits_ = other.raw_edits_; + if(ned_ != NULL) { + assert(aed_ != NULL); + ned_->clear(); + aed_->clear(); + } else if(raw_edits_ != NULL) { + assert(aed_ == NULL); + assert(ned_node_ == NULL && aed_node_ == NULL); + ned_node_ = raw_edits_->new_node(); + aed_node_ = raw_edits_->new_node(); + assert(ned_node_ != NULL && aed_node_ != NULL); + ned_ = &(ned_node_->payload); + aed_ = &(aed_node_->payload); + } + + if(other.ned_ != NULL) { + assert(other.aed_ != NULL); + *ned_ = *(other.ned_); + *aed_ = *(other.aed_); + } + + return *this; + } + + ~AlnRes() + { +#ifndef NDEBUG + if(ned_node_ == NULL || aed_node_ == NULL) { + assert(ned_node_ == NULL && aed_node_ == NULL); + assert(ned_ == NULL && aed_ == NULL); + assert(raw_edits_ == NULL); + } else { + assert(ned_node_ != NULL && aed_node_ != NULL); + assert(ned_ != NULL && aed_ != NULL); + assert(raw_edits_ != NULL); + } +#endif + if(ned_ != NULL) { + ned_->clear(); aed_->clear(); + raw_edits_->delete_node(ned_node_); + raw_edits_->delete_node(aed_node_); + ned_ = aed_ = NULL; + ned_node_ = aed_node_ = NULL; + raw_edits_ = NULL; + } + } + + /* DK - temporary implementation */ + void init_raw_edits(LinkedEList >* raw_edits) { + if(raw_edits == NULL) + return; + raw_edits_ = raw_edits; + assert(ned_ == NULL && aed_ == NULL); + assert(ned_node_ == NULL && aed_node_ == NULL); + ned_node_ = raw_edits_->new_node(); + aed_node_ = raw_edits_->new_node(); + assert(ned_node_ != NULL && aed_node_ != NULL); + ned_ = &(ned_node_->payload); + aed_ = &(aed_node_->payload); + } + + /** + * Clear all contents. + */ + void reset(); + + /** + * Reverse all edit lists. + */ + void reverseEdits() { + (*ned_).reverse(); + (*aed_).reverse(); + } + + /** + * Invert positions of edits so that they're with respect to the other end + * of the alignment. The assumption is that the .pos fields of the edits + * in the ned_/aed_/ced_ structures are offsets with respect to the first + * aligned character (i.e. after all trimming). + */ + void invertEdits() { + assert(shapeSet_); + assert_gt(rdlen_, 0); + assert_gt(rdrows_, 0); + Edit::invertPoss(*ned_, rdexrows_, false); + Edit::invertPoss(*aed_, rdexrows_, false); + } + + /** + * Return true iff no result has been installed. + */ + bool empty() const { + if(!VALID_AL_SCORE(score_)) { + assert(ned_ == NULL || ned_->empty()); + assert(aed_ == NULL || aed_->empty()); + assert(!refcoord_.inited()); + assert(!refival_.inited()); + return true; + } else { + return false; + } + } + + /** + * Return the identifier for the reference that the alignment + * occurred in. + */ + inline TRefId refid() const { + assert(shapeSet_); + return refcoord_.ref(); + } + + /** + * Return the orientation that the alignment occurred in. + */ + inline int orient() const { + assert(shapeSet_); + return refcoord_.orient(); + } + + /** + * Return the 0-based offset of the alignment into the reference + * sequence it aligned to. + */ + inline TRefOff refoff() const { + assert(shapeSet_); + return refcoord_.off(); + } + + /** + * Set arguments to coordinates for the upstream-most and downstream-most + * reference positions involved in the alignment. + */ + inline void getCoords( + Coord& st, // out: install starting coordinate here + Coord& en, // out: install ending coordinate here + Coord& st2, + Coord& en2) + const + { + assert(shapeSet_); + st.init(refcoord_); + en.init(refcoord_); + en.adjustOff(refExtent() - 1); + Coord right = refcoord_right(); + st2.init(right); + st2.adjustOff(1 - refExtent()); + en2.init(right); + } + + /** + * Set arguments to coordinates for the upstream-most and downstream-most + * reference positions covered by the read taking any read trimming into + * account. I.e. if the upstream-most offset involved in an alignment is + * 40 but the read was hard-trimmed by 5 on that end, the inferred + * upstream-most covered position is 35. + */ + inline void getExtendedCoords( + Coord& st, // out: install starting coordinate here + Coord& en, // out: install ending coordinate here + Coord& st2, + Coord& en2) + const + { + getCoords(st, en, st2, en2); + // Take trimming into account + int64_t trim_st = (fw() ? trim5p_ : trim3p_); + int64_t trim_en = (fw() ? trim3p_ : trim5p_); + trim_st += (fw() ? pretrim5p_ : pretrim3p_); + trim_en += (fw() ? pretrim3p_ : pretrim5p_); + st.adjustOff(-trim_st); + en.adjustOff( trim_en); + st2.adjustOff(-trim_st); + en2.adjustOff( trim_en); + } + + /** + * Set the upstream-most reference offset involved in the alignment, and + * the extent of the alignment (w/r/t the reference) + */ + void setShape( + TRefId id, // id of reference aligned to + TRefOff off, // offset of first aligned char into ref seq + TRefOff reflen, // length of reference sequence aligned to + bool fw, // aligned to Watson strand? + size_t rdlen, // length of read after hard trimming, before soft + TReadId rdid, // read ID + bool pretrimSoft, // whether trimming prior to alignment was soft + size_t pretrim5p, // # poss trimmed form 5p end before alignment + size_t pretrim3p, // # poss trimmed form 3p end before alignment + bool trimSoft, // whether local-alignment trimming was soft + size_t trim5p, // # poss trimmed form 5p end during alignment + size_t trim3p); // # poss trimmed form 3p end during alignment + + /** + * Return true iff the reference chars involved in this alignment result + * are entirely within with given bounds. + */ + bool within( + TRefId id, + TRefOff off, + bool fw, + size_t extent) const + { + if(refcoord_.ref() == id && + refcoord_.off() >= off && + refcoord_.off() + refExtent() <= off + extent && + refcoord_.fw() == fw) + { + return true; + } + return false; + } + + /** + * Set alignment score for this alignment. + */ + void setScore(AlnScore score) { + score_ = score; + } + + /** + * Set the upstream-most and downstream-most nucleotides. + */ + void setNucs(bool fw, int nup, int ndn) { + nuc5p_ = fw ? nup : ndn; + nuc3p_ = fw ? ndn : nup; + } + + /** + * Return the 0-based offset of the leftmost reference position involved in + * the alignment. + */ + const Coord& refcoord() const { + return refcoord_; + } + + /** + * Return the 0-based offset of the leftmost reference position involved in + * the alignment. + */ + const Interval& refival() const { + return refival_; + } + + /** + * Return the 0-based offset of the leftmost reference position involved in + * the alignment. + */ + Coord& refcoord() { + return refcoord_; + } + + /** + * Return the 0-based offset of the rightmost reference position involved in + * the alignment. + */ + Coord refcoord_right() const { + Coord coord_right = refcoord_; + TRefOff right = coord_right.off() + rfextent_ - 1; + for(size_t i = 0; i < ned_->size(); i++) { + const Edit& ed = (*ned_)[i]; + if(ed.type == EDIT_TYPE_SPL) { + right += ed.splLen; + } + } + + coord_right.setOff(right); + return coord_right; + } + + /** + * Return true if this alignment is to the Watson strand. + */ + inline bool fw() const { + return refcoord_.fw(); + } + + AlnScore score() const { return score_; } + AlnScore oscore() const { return oscore_; } + EList& ned() { return *ned_; } + EList& aed() { return *aed_; } + const EList& ned() const { return *ned_; } + const EList& aed() const { return *aed_; } + size_t readExtent() const { return rdextent_; } + size_t readExtentRows() const { return rdexrows_; } + size_t readLength() const { return rdlen_; } + TReadId readID() const { return rdid_; } + bool spliced() const { return num_spliced_ > 0; } + size_t num_spliced() const { return num_spliced_; } + uint8_t spliced_whichsense_transcript() const { + uint8_t whichsense = SPL_UNKNOWN; + if(spliced()) { + for(size_t i = 0; i < ned_->size(); i++) { + const Edit& ed = (*ned_)[i]; + if(ed.type != EDIT_TYPE_SPL) continue; + if(whichsense == SPL_UNKNOWN) { + whichsense = ed.splDir; + } else if(ed.splDir != SPL_UNKNOWN) { + assert_neq(whichsense, SPL_UNKNOWN); + if(whichsense == SPL_FW || whichsense == SPL_SEMI_FW) { + if(ed.splDir != SPL_FW && ed.splDir != SPL_SEMI_FW) { + whichsense = SPL_UNKNOWN; + break; + } + } + if(whichsense == SPL_RC || whichsense == SPL_SEMI_RC) { + if(ed.splDir != SPL_RC && ed.splDir != SPL_SEMI_RC) { + whichsense = SPL_UNKNOWN; + break; + } + } + } + } + } + + return whichsense; + } + + /** + * Return the number of reference nucleotides involved in the alignment + * (i.e. the number of characters in the inclusive range from the first + * matched-up ref char to the last). + */ + size_t refExtent() const { + return rfextent_; + } + + /** + * Return length of reference sequence aligned to. + */ + TRefOff reflen() const { + return reflen_; + } + + /** + * Return the number of reference nucleotides in the alignment (i.e. the + * number of characters in the inclusive range from the first matched-up + * ref char to the last). + */ + size_t refNucExtent() const { + return rfextent_; + } + + /** + * Print the sequence for the read that aligned using A, C, G and + * T. This will simply print the read sequence (or its reverse + * complement). + */ + void printSeq( + const Read& rd, + const BTDnaString* dns, + BTString& o) const; + + /** + * Print the quality string for the read that aligned. This will + * simply print the read qualities (or their reverse). + */ + void printQuals( + const Read& rd, + const BTString* dqs, + BTString& o) const; + + /** + * Print a stacked alignment with the reference on top, query on bottom, + * and lines connecting matched-up positions. + */ + void printStacked( + const Read& rd, + std::ostream& o) const + { + printStacked(refcoord_.fw() ? rd.patFw : rd.patRc, o); + } + + /** + * Print a stacked alignment with the reference on bottom, query on top, + * and lines connecting matched-up positions. + */ + void printStacked( + const BTDnaString& seq, + std::ostream& o) const + { + Edit::printQAlign(o, seq, *ned_); + // Print reference offset below reference string + o << "^" << std::endl; + o << "(" << refcoord_.ref() << "," << refcoord_.off() << ")" << std::endl; + } + +#ifndef NDEBUG + /** + * Check that alignment score is internally consistent. + */ + bool repOk() const { + assert(refcoord_.repOk()); + if(shapeSet_) { + assert_lt(refoff(), reflen_); + } + assert(refival_.repOk()); + assert(VALID_AL_SCORE(score_) || ned_ == NULL || ned_->empty()); + assert(VALID_AL_SCORE(score_) || aed_ == NULL || aed_->empty()); + assert(empty() || refcoord_.inited()); + assert(empty() || refival_.inited()); + assert_geq(rdexrows_, rdextent_); + assert(empty() || rdextent_ > 0); + assert(empty() || rfextent_ > 0); + return true; + } + + /** + * Check that alignment score is internally consistent. + */ + bool repOk(const Read& rd) const { + assert(Edit::repOk(*ned_, refcoord_.fw() ? rd.patFw : rd.patRc, + refcoord_.fw(), trimmed5p(true), trimmed3p(true))); + return repOk(); + } +#endif + +#ifndef NDEBUG + /** + * Assuming this AlnRes is an alignment for 'rd', check that the + * alignment and 'rd' are compatible with the corresponding + * reference sequence. + */ + bool matchesRef( + const Read& rd, + const BitPairReference& ref, + BTDnaString& rf, + BTDnaString& rdseq, + BTString& qseq, + SStringExpandable& raw_refbuf, + SStringExpandable& destU32, + EList& matches, + SStringExpandable& raw_refbuf2, + EList& reflens, + EList& refoffs); +#endif + + /** + * Set information about the alignment parameters that led to this + * alignment. + */ + void setParams( + int seedmms, + int seedlen, + int seedival, + int64_t minsc) + { + seedmms_ = seedmms; + seedlen_ = seedlen; + seedival_ = seedival; + minsc_ = minsc; + } + + // Accessors for alignment parameters + int seedmms() const { return seedmms_; } + int seedlen() const { return seedlen_; } + int seedival() const { return seedival_; } + int64_t minScore() const { return minsc_; } + + /** + * Is the ith row from the 5' end of the DP table one of the ones + * soft-trimmed away by local alignment? + */ + inline bool trimmedRow5p(size_t i) const { + return i < trim5p_ || rdrows_ - i - 1 < trim3p_; + } + + /** + * Is the ith character from the 5' end of read sequence one of the ones + * soft-trimmed away by local alignment? + */ + inline bool trimmedPos5p(size_t i) const { + return i < trim5p_ || rdlen_ - i - 1 < trim3p_; + } + + /** + * Is the ith row from the 5' end of the DP table one of the ones that + * survived local-alignment soft trimming? + */ + inline bool alignedRow5p(size_t i) const { + return !trimmedRow5p(i); + } + + /** + * Is the ith character from the 5' end of the read sequence one of the + * ones that survived local-alignment soft trimming? + */ + inline bool alignedPos5p(size_t i) const { + return !trimmedPos5p(i); + } + + /** + * Return true iff this AlnRes and the given AlnRes overlap. Two AlnRess + * overlap if they share a cell in the overall dynamic programming table: + * i.e. if there exists a read position s.t. that position in both reads + * matches up with the same reference character. E.g., the following + * alignments (drawn schematically as paths through a dynamic programming + * table) are redundant: + * + * a b a b + * \ \ \ \ + * \ \ \ \ + * \ \ \ \ + * ---\ \ \ + * \ ---\--- + * ---\ \ \ + * \ \ \ \ + * \ \ \ \ + * \ \ \ \ + * a b b a + * + * We iterate over each read position that hasn't been hard-trimmed, but + * only overlaps at positions that have also not been soft-trimmed are + * considered. + */ + bool overlap(AlnRes& res); + + /** + * Return true iff this read was unpaired to begin with. + */ + inline bool readUnpaired() const { + assert_gt(type_, 0); + return type_ == ALN_RES_TYPE_UNPAIRED; + } + + /** + * Return true iff this alignment aligned in an unpaired fashion; not part + * of a concordant or discordant pair. + */ + inline bool alignedUnpaired() const { + assert_gt(type_, 0); + return type_ == ALN_RES_TYPE_UNPAIRED || + type_ == ALN_RES_TYPE_UNPAIRED_MATE1 || + type_ == ALN_RES_TYPE_UNPAIRED_MATE2; + } + + /** + * Return true iff this alignment aligned as mate #1 or mate #2 in a pair, + * either concordant or discordant. + */ + inline bool alignedPaired() const { + assert_gt(type_, 0); + return type_ == ALN_RES_TYPE_MATE1 || + type_ == ALN_RES_TYPE_MATE2; + } + + /** + * Return true iff this read started as mate #1 in a pair. + */ + inline bool readMate1() const { + assert_gt(type_, 0); + return type_ == ALN_RES_TYPE_MATE1 || + type_ == ALN_RES_TYPE_UNPAIRED_MATE1; + } + + /** + * Return true iff this read aligned as mate #1 in a concordant or + * discordant pair. + */ + inline bool alignedMate1() const { + assert_gt(type_, 0); + return type_ == ALN_RES_TYPE_MATE1; + } + + /** + * Return true iff this alignment aligned as mate #2 in a pair, either + * concordant or discordant. + */ + inline bool readMate2() const { + assert_gt(type_, 0); + return type_ == ALN_RES_TYPE_MATE2 || + type_ == ALN_RES_TYPE_UNPAIRED_MATE2; + } + + /** + * Return true iff this read aligned as mate #2 in a concordant or + * discordant pair. + */ + inline bool alignedMate2() const { + assert_gt(type_, 0); + return type_ == ALN_RES_TYPE_MATE2; + } + + /** + * Return true iff fragment length is set. + */ + bool isFraglenSet() const { + return fraglenSet_; + } + + /** + * Set whether this alignment is unpaired, or is mate #1 or mate #2 in a + * paired-end alignment. + */ + void setMateParams( + int type, + const AlnRes* omate, // alignment result for the opposite mate + const AlnFlags& flags, // flags for this mate + const SpliceSiteDB* ssdb = NULL, // splice sites + uint64_t threads_rids_mindist = 0, + EList* spliceSites = NULL) + { + assert_gt(type, 0); + type_ = type; + fraglen_ = 0; + if(omate != NULL) { + oscore_ = omate->score_; + // When should we calculate a fragment length here? There are a + // couple reasonable ideas: + // 1. When mates align concordantly + // 2. When both mates align to the same reference string + // BWA seems to do 2., so that's what we'll do here. + bool sameChr = true; + if((sameChr && refcoord_.ref() == omate->refcoord_.ref()) || + flags.alignedConcordant()) + { + setFragmentLength(*omate, ssdb, threads_rids_mindist, spliceSites); + } else { + assert(!isFraglenSet()); + } + } + } + + /** + * Assuming this alignment and the given alignment are at the extreme ends + * of a fragment, return the length of the fragment. We take all clipping, + * both hard and soft, into account here. Any clipping that occurred + * earlier and isn't accounted for within Bowtie2 should be accounted for + * by the user in how they set the maximum and minimum fragment length + * settings. + */ + int64_t setFragmentLength(const AlnRes& omate, + const SpliceSiteDB* ssdb = NULL, // splice sites + uint64_t threads_rids_mindist = 0, + EList* spliceSites = NULL) { + Coord st, en, st2, en2; + Coord ost, oen, ost2, oen2; + assert_eq(refid(), omate.refid()); + getExtendedCoords(st, en, st2, en2); + omate.getExtendedCoords(ost, oen, ost2, oen2); + bool imUpstream = false; + + if(st.off() < ost.off()) { + imUpstream = true; + } else if(st.off() == ost.off()) { + if(st.fw() && ost.fw() && readMate1()) { + imUpstream = true; + } else if(st.fw() && !ost.fw()) { + imUpstream = true; + } else { + imUpstream = false; + } + } else { + imUpstream = false; + } + + TRefOff up, dn, up_right, dn_left; + if(imUpstream) { + up = std::min(st2.off(), ost.off()); + up_right = std::min(en2.off(), oen.off()); + dn_left = std::max(st2.off(), ost.off()); + dn = std::max(en2.off(), oen.off()); + } else { + up = std::min(st.off(), ost2.off()); + up_right = std::min(en.off(), oen2.off()); + dn_left = std::max(st.off(), ost2.off()); + dn = std::max(en.off(), oen2.off()); + } + assert_geq(dn, up); + TRefOff intron_len = 0; + if(ssdb != NULL && + !repeat() && + up_right + 100 < dn_left) { + assert(spliceSites != NULL); + if(spliceSites->size() == 0) { + ssdb->getRightSpliceSites(refid(), up_right, dn_left - up_right, *spliceSites); + } + for(size_t si = 0; si < spliceSites->size(); si++) { + const SpliceSite& ss = (*spliceSites)[si]; + if(!ss._fromfile && ss._readid + threads_rids_mindist > rdid_) continue; + if(ss.left() <= up || ss.right() >= dn) continue; + TRefOff tmp_intron_len = ss.intron_len(); + if(intron_len < tmp_intron_len) { + intron_len = tmp_intron_len; + } + } + } + fraglen_ = 1 + dn - up; + assert_geq(fraglen_, intron_len); + fraglen_ -= intron_len; + if(!imUpstream) { + fraglen_ = -fraglen_; + } + fraglenSet_ = true; + return fraglen_; + } + + /** + * Return fragment length inferred by a paired-end alignment, or -1 if the + * alignment is not part of a pair. + */ + int64_t fragmentLength() const { + assert_gt(type_, 0); + assert(fraglenSet_); + return fraglen_; + } + + /** + * Initialize new AlnRes. + */ + void init( + size_t rdlen, // # chars after hard trimming + TReadId rdid, // read ID + AlnScore score, // alignment score + const EList* ned, // nucleotide edits + size_t ned_i, // first position to copy + size_t ned_n, // # positions to copy + const EList* aed, // ambiguous base resolutions + size_t aed_i, // first position to copy + size_t aed_n, // # positions to copy + Coord refcoord, // leftmost ref pos of 1st al char + TRefOff reflen, // length of the reference + LinkedEList >* raw_edits, + int seedmms = -1,// # seed mms allowed + int seedlen = -1,// seed length + int seedival = -1,// space between seeds + int64_t minsc = -1,// minimum score for valid aln + int nuc5p = -1,// + int nuc3p = -1, + bool pretrimSoft = false, + size_t pretrim5p = 0, // trimming prior to alignment + size_t pretrim3p = 0, // trimming prior to alignment + bool trimSoft = true, + size_t trim5p = 0, // trimming from alignment + size_t trim3p = 0, // trimming from alignment + bool repeat = false); // repeat + + /** + * Return number of bases trimmed from the 5' end. Argument determines + * whether we're counting hard- or soft-trimmed bases. + */ + size_t trimmed5p(bool soft) const { + size_t trim = 0; + if(pretrimSoft_ == soft) trim += pretrim5p_; + if(trimSoft_ == soft) trim += trim5p_; + return trim; + } + + /** + * Return number of bases trimmed from the 3' end. Argument determines + * whether we're counting hard- or soft-trimmed bases. + */ + size_t trimmed3p(bool soft) const { + size_t trim = 0; + if(pretrimSoft_ == soft) trim += pretrim3p_; + if(trimSoft_ == soft) trim += trim3p_; + return trim; + } + + /** + * Return number of bases trimmed from the left end. Argument determines + * whether we're counting hard- or soft-trimmed bases. + */ + size_t trimmedLeft(bool soft) const { + return fw() ? trimmed5p(soft) : trimmed3p(soft); + } + + /** + * Return number of bases trimmed from the right end. Argument determines + * whether we're counting hard- or soft-trimmed bases. + */ + size_t trimmedRight(bool soft) const { + return fw() ? trimmed3p(soft) : trimmed5p(soft); + } + + bool repeat() const { return repeat_; } + + /** + * Set the number of reference Ns covered by the alignment. + */ + void setRefNs(size_t refns) { + refns_ = refns; + } + + /** + * Return the number of reference Ns covered by the alignment. + */ + size_t refNs() const { return refns_; } + + /** + * Clip away portions of the alignment that are outside the given bounds. + * Clipping is soft if soft == true, hard otherwise. + */ + void clipOutside(bool soft, TRefOff refi, TRefOff reff); + + /** + * Soft trim bases from the LHS of the alignment. + */ + void clipLeft(size_t rd_amt, size_t rf_amt); + + /** + * Soft trim bases from the RHS of the alignment. + */ + void clipRight(size_t rd_amt, size_t rf_amt); + + /** + * In debug mode, we put a copy of the decoded nucleotide sequence here. + */ + ASSERT_ONLY(BTDnaString drd); + + /** + * Return true iff this AlnRes should come before the given AlnRes in a + * prioritized list of results. + */ + bool operator<(const AlnRes& o) const { + return score_ > o.score_; + } + + bool operator==(const AlnRes& o) const { + return + shapeSet_ == o.shapeSet_ && + rdlen_ == o.rdlen_ && + rdid_ == o.rdid_ && + rdrows_ == o.rdrows_ && + score_ == o.score_ && + //oscore_ == o.oscore_ && + *ned_ == *(o.ned_) && + *aed_ == *(o.aed_) && + refcoord_ == o.refcoord_ && + reflen_ == o.reflen_ && + refival_ == o.refival_ && + rdextent_ == o.rdextent_ && + rdexrows_ == o.rdexrows_ && + rfextent_ == o.rfextent_ && + seedmms_ == o.seedmms_ && + seedlen_ == o.seedlen_ && + seedival_ == o.seedival_ && + minsc_ == o.minsc_ && + nuc5p_ == o.nuc5p_ && + nuc3p_ == o.nuc3p_ && + refns_ == o.refns_ && + type_ == o.type_ && + fraglen_ == o.fraglen_ && + pretrimSoft_ == o.pretrimSoft_ && + pretrim5p_ == o.pretrim5p_ && + pretrim3p_ == o.pretrim3p_ && + trimSoft_ == o.trimSoft_ && + trim5p_ == o.trim5p_ && + trim3p_ == o.trim3p_ && + repeat_ == o.repeat_ && + num_spliced_ == o.num_spliced_; + } + + /** + * Initialize a StackedAln (stacked alignment) object w/r/t this alignment. + */ + void initStacked(const Read& rd, StackedAln& st) const { + size_t trimLS = trimmed5p(true); + size_t trimLH = trimmed5p(false); + size_t trimRS = trimmed3p(true); + size_t trimRH = trimmed3p(false); + size_t len_trimmed = rd.length() - trimLS - trimRS; + if(!fw()) { + Edit::invertPoss(const_cast&>(*ned_), len_trimmed, false); + swap(trimLS, trimRS); + swap(trimLH, trimRH); + } + st.init( + fw() ? rd.patFw : rd.patRc, + *ned_, trimLS, trimLH, trimRS, trimRH); + if(!fw()) { + Edit::invertPoss(const_cast&>(*ned_), len_trimmed, false); + } + } + +protected: + + /** + * Given that rdextent_ and ned_ are already set, calculate rfextent_. + */ + void calcRefExtent() { + assert_gt(rdextent_, 0); + rfextent_ = rdextent_; + for(size_t i = 0; i < ned_->size(); i++) { + if((*ned_)[i].isRefGap()) rfextent_--; + if((*ned_)[i].isReadGap()) rfextent_++; + } + } + + bool shapeSet_; // true iff setShape() has been called + size_t rdlen_; // length of the original read + TReadId rdid_; // read id + size_t rdrows_; // # rows in alignment problem + AlnScore score_; // best SW score found + AlnScore oscore_; // score of opposite mate + EList* ned_; // base edits + EList* aed_; // ambiguous base resolutions + Coord refcoord_; // ref coordinates (seq idx, offset, orient) + TRefOff reflen_; // reference length + Interval refival_; // ref interval (coord + length) + size_t rdextent_; // number of read chars involved in alignment + size_t rdexrows_; // number of read rows involved in alignment + size_t rfextent_; // number of ref chars involved in alignment + int seedmms_; // number of mismatches allowed in seed + int seedlen_; // length of seed + int seedival_; // interval between seeds + int64_t minsc_; // minimum score + int nuc5p_; // 5'-most decoded base; clipped if excluding end + int nuc3p_; // 3'-most decoded base; clipped if excluding end + size_t refns_; // # of reference Ns overlapped + int type_; // unpaired or mate #1 or mate #2? + bool fraglenSet_; // true iff a fragment length has been inferred + int64_t fraglen_; // inferred fragment length + + // A tricky aspect of trimming is that we have to decide what the units are: + // read positions, reference positions??? We choose read positions here. + // In other words, if an alignment overhangs the end of the reference and + // part of the overhanging portion is a reference gap, we have to make sure + // the trim amount reflects the number of *read characters* to trim + // including the character opposite the reference gap. + + // Nucleotide-sequence trimming + bool pretrimSoft_; // trimming prior to alignment is soft? + size_t pretrim5p_; // # bases trimmed from 5p end prior to alignment + size_t pretrim3p_; // # bases trimmed from 3p end prior to alignment + bool trimSoft_; // trimming by local alignment is soft? + size_t trim5p_; // # bases trimmed from 5p end by local alignment + size_t trim3p_; // # bases trimmed from 3p end by local alignment + bool repeat_; // repeat? + + size_t num_spliced_; + LinkedEListNode >* ned_node_; + LinkedEListNode >* aed_node_; + LinkedEList >* raw_edits_; +}; + +/** + * Unique ID for a cell in the overall DP table. This is a helpful concept + * because of our definition of "redundnant". Two alignments are redundant iff + * they have at least one cell in common in the overall DP table. + */ +struct RedundantCell { + + RedundantCell() { + rfid = 0; + fw = true; + rfoff = 0; + rdoff = 0; + } + + RedundantCell( + TRefId rfid_, + bool fw_, + TRefOff rfoff_, + size_t rdoff_) + { + init(rfid_, fw_, rfoff_, rdoff_); + } + + void init( + TRefId rfid_, + bool fw_, + TRefOff rfoff_, + size_t rdoff_) + { + rfid = rfid_; + fw = fw_; + rfoff = rfoff_; + rdoff = rdoff_; + } + + /** + * Return true iff this RedundantCell is less than the given RedundantCell. + */ + inline bool operator<(const RedundantCell& c) const { + if(rfid < c.rfid) return true; + if(rfid > c.rfid) return false; + if(!fw && c.fw) return true; + if( fw && !c.fw) return false; + if(rfoff < c.rfoff) return true; + if(rfoff > c.rfoff) return false; + return rdoff < c.rdoff; + } + + /** + * Return true iff this RedundantCell is greater than the given + * RedundantCell. + */ + inline bool operator>(const RedundantCell& c) const { + if(rfid > c.rfid) return true; + if(rfid < c.rfid) return false; + if( fw && !c.fw) return true; + if(!fw && c.fw) return false; + if(rfoff > c.rfoff) return true; + if(rfoff < c.rfoff) return false; + return rdoff > c.rdoff; + } + + /** + * Return true iff this RedundantCell is equal to the given RedundantCell. + */ + inline bool operator==(const RedundantCell& c) const { + return + rfid == c.rfid && + fw == c.fw && + rfoff == c.rfoff && + rdoff == c.rdoff; + } + + TRefId rfid; // reference id + bool fw; // orientation + TRefOff rfoff; // column + size_t rdoff; // row +}; + +/** + * Encapsulates data structures and routines allowing client to determine + * whether one alignment is redundant (has a DP cell in common with) with a set + * of others. + * + * Adding cells to and checking cell against this data structure can get rather + * slow when there are many alignments in play. Dividing the burden over + * read-position bins helps some. + */ +class RedundantAlns { + +public: + + RedundantAlns(int cat = DP_CAT) : cells_(cat) { } + + /** + * Empty the cell database. + */ + void reset() { cells_.clear(); } + + /** + * Initialize and set the list of sets to equal the read length. + */ + void init(size_t npos) { + cells_.resize(npos); + for(size_t i = 0; i < npos; i++) { + cells_[i].clear(); + } + } + + /** + * Add all of the cells involved in the given alignment to the database. + */ + void add(const AlnRes& res); + + /** + * Return true iff the given alignment has at least one cell that overlaps + * one of the cells in the database. + */ + bool overlap(const AlnRes& res); + +protected: + + EList > cells_; +}; + +typedef uint64_t TNumAlns; + +/** + * Encapsulates a concise summary of a set of alignment results for a + * given pair or mate. Referring to the fields of this object should + * provide enough information to print output records for the read. + */ +class AlnSetSumm { + +public: + + AlnSetSumm() { reset(); } + + /** + * Given an unpaired read (in either rd1 or rd2) or a read pair + * (mate 1 in rd1, mate 2 in rd2). + */ + explicit AlnSetSumm( + const Read* rd1, + const Read* rd2, + const EList* rs1, + const EList* rs2, + const EList* rs1u, + const EList* rs2u, + bool exhausted1, + bool exhausted2, + TRefId orefid, + TRefOff orefoff, + bool repeat) + { + init(rd1, rd2, rs1, rs2, rs1u, rs2u, exhausted1, exhausted2, + orefid, orefoff, repeat); + } + + explicit AlnSetSumm( + AlnScore best1, + AlnScore secbest1, + AlnScore best2, + AlnScore secbest2, + AlnScore bestPaired, + AlnScore secbestPaired, + TNumAlns other1, + TNumAlns other2, + bool paired, + bool exhausted1, + bool exhausted2, + TRefId orefid, + TRefOff orefoff, + bool repeat, + TNumAlns numAlns1, + TNumAlns numAlns2, + TNumAlns numAlnsPaired) + { + init( + best1, + secbest1, + best2, + secbest2, + bestPaired, + secbestPaired, + other1, + other2, + paired, + exhausted1, + exhausted2, + orefid, + orefoff, + repeat, + numAlns1, + numAlns2, + numAlnsPaired); + } + + /** + * Set to uninitialized state. + */ + void reset() { + best1_.invalidate(); + secbest1_.invalidate(); + best2_.invalidate(); + secbest2_.invalidate(); + bestPaired_.invalidate(); + secbestPaired_.invalidate(); + other1_ = other2_ = 0; + paired_ = false; + exhausted1_ = exhausted2_ = false; + orefid_ = -1; + orefoff_ = -1; + repeat_ = false; + numAlns1_ = numAlns2_= numAlnsPaired_ = 0; + } + + void init( + const Read* rd1, + const Read* rd2, + const EList* rs1, + const EList* rs2, + const EList* rs1u, + const EList* rs2u, + bool exhausted1, + bool exhausted2, + TRefId orefid, + TRefOff orefoff, + bool repeat); + + /** + * Initialize given fields. See constructor for how fields are set. + */ + void init( + AlnScore best1, + AlnScore secbest1, + AlnScore best2, + AlnScore secbest2, + AlnScore bestPaired, + AlnScore secbestPaired, + TNumAlns other1, + TNumAlns other2, + bool paired, + bool exhausted1, + bool exhausted2, + TRefId orefid, + TRefOff orefoff, + bool repeat, + TNumAlns numAlns1, + TNumAlns numAlns2, + TNumAlns numAlnsPaired) + { + best1_ = best1; + secbest1_ = secbest1; + best2_ = best2; + secbest2_ = secbest2; + bestPaired_ = bestPaired; + secbestPaired_ = secbestPaired; + other1_ = other1; + other2_ = other2; + paired_ = paired; + exhausted1_ = exhausted1; + exhausted2_ = exhausted2; + orefid_ = orefid; + orefoff_ = orefoff; + repeat_ = repeat; + numAlns1_ = numAlns1; + numAlns2_ = numAlns2; + numAlnsPaired_ = numAlnsPaired; + assert(repOk()); + } + + /** + * Return true iff there is at least a best alignment + */ + bool empty() const { + assert(repOk()); + return !VALID_AL_SCORE(best1_); + } + +#ifndef NDEBUG + /** + * Check that the summary is internally consistent. + */ + bool repOk() const { + assert(other1_ == 0 || VALID_AL_SCORE(secbest1_)); + assert(other1_ != 0 || !VALID_AL_SCORE(secbest1_)); + assert(other2_ == 0 || VALID_AL_SCORE(secbest2_)); + assert(other2_ != 0 || !VALID_AL_SCORE(secbest2_)); + return true; + } +#endif + + AlnScore best1() const { return best1_; } + AlnScore secbest1() const { return secbest1_; } + AlnScore best2() const { return best2_; } + AlnScore secbest2() const { return secbest2_; } + AlnScore bestPaired() const { return bestPaired_; } + AlnScore secbestPaired() const { return secbestPaired_; } + TNumAlns other1() const { return other1_; } + TNumAlns other2() const { return other2_; } + bool paired() const { return paired_; } + bool exhausted1() const { return exhausted1_; } + bool exhausted2() const { return exhausted2_; } + TRefId orefid() const { return orefid_; } + TRefOff orefoff() const { return orefoff_; } + bool repeat() const { return repeat_; } + + TNumAlns numAlns1() const { return numAlns1_; } + TNumAlns numAlns2() const { return numAlns2_; } + TNumAlns numAlnsPaired() const { return numAlnsPaired_; } + + void numAlns1(TNumAlns numAlns1) { numAlns1_ = numAlns1; } + void numAlns2(TNumAlns numAlns2) { numAlns2_ = numAlns2; } + void numAlnsPaired(TNumAlns numAlnsPaired) { numAlnsPaired_ = numAlnsPaired; } + + /** + * + */ + AlnScore best(bool mate1) const { return mate1 ? best1_ : best2_; } + + bool exhausted(bool mate1) const { + return mate1 ? exhausted1_ : exhausted2_; + } + + /** + * Return the second-best score for the specified mate. If the alignment + * is paired and the specified mate aligns uniquely, return an invalid + * second-best score. This allows us to treat mates separately, so that + * repetitive paired-end alignments don't trump potentially unique unpaired + * alignments. + */ + AlnScore secbestMate(bool mate1) const { + return mate1 ? secbest1_ : secbest2_; + } + + /** + * Return the second-best score for the specified mate. If the alignment + * is paired and the specified mate aligns uniquely, return an invalid + * second-best score. This allows us to treat mates separately, so that + * repetitive paired-end alignments don't trump potentially unique unpaired + * alignments. + */ + AlnScore secbest(bool mate1) const { + if(paired_) { + if(mate1) { + //if(!secbest1_.valid()) { + return secbest1_; + //} + } else { + //if(!secbest2_.valid()) { + return secbest2_; + //} + } + //return secbestPaired_; + } else { + return mate1 ? secbest1_ : secbest2_; + } + } + +protected: + + AlnScore bestPaired_; // best full-alignment score found for this read + AlnScore secbestPaired_; // second-best + AlnScore best1_; // best full-alignment score found for this read + AlnScore secbest1_; // second-best + AlnScore best2_; // best full-alignment score found for this read + AlnScore secbest2_; // second-best + TNumAlns other1_; // # more alignments within N points of second-best + TNumAlns other2_; // # more alignments within N points of second-best + bool paired_; // results are paired + bool exhausted1_; // searched exhaustively for mate 1 alignments? + bool exhausted2_; // searched exhaustively for mate 2 alignments? + TRefId orefid_; + TRefOff orefoff_; + bool repeat_; + + TNumAlns numAlns1_; // number of alignments for mate 1 as singleton or discordantly mapped + TNumAlns numAlns2_; // number of alignments for mate 2 as singleton or discordantly mapped + TNumAlns numAlnsPaired_; // number of concordant pair alignments +}; + +#endif diff --git a/aligner_seed.cpp b/aligner_seed.cpp new file mode 100644 index 0000000..5fe0419 --- /dev/null +++ b/aligner_seed.cpp @@ -0,0 +1,530 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include "aligner_cache.h" +#include "aligner_seed.h" +#include "search_globals.h" +#include "gfm.h" + +using namespace std; + +/** + * Construct a constraint with no edits of any kind allowed. + */ +Constraint Constraint::exact() { + Constraint c; + c.edits = c.mms = c.ins = c.dels = c.penalty = 0; + return c; +} + +/** + * Construct a constraint where the only constraint is a total + * penalty constraint. + */ +Constraint Constraint::penaltyBased(int pen) { + Constraint c; + c.penalty = pen; + return c; +} + +/** + * Construct a constraint where the only constraint is a total + * penalty constraint related to the length of the read. + */ +Constraint Constraint::penaltyFuncBased(const SimpleFunc& f) { + Constraint c; + c.penFunc = f; + return c; +} + +/** + * Construct a constraint where the only constraint is a total + * penalty constraint. + */ +Constraint Constraint::mmBased(int mms) { + Constraint c; + c.mms = mms; + c.edits = c.dels = c.ins = 0; + return c; +} + +/** + * Construct a constraint where the only constraint is a total + * penalty constraint. + */ +Constraint Constraint::editBased(int edits) { + Constraint c; + c.edits = edits; + c.dels = c.ins = c.mms = 0; + return c; +} + +// +// Some static methods for constructing some standard SeedPolicies +// + +/** + * Given a read, depth and orientation, extract a seed data structure + * from the read and fill in the steps & zones arrays. The Seed + * contains the sequence and quality values. + */ +bool +Seed::instantiate( + const Read& read, + const BTDnaString& seq, // seed read sequence + const BTString& qual, // seed quality sequence + const Scoring& pens, + int depth, + int seedoffidx, + int seedtypeidx, + bool fw, + InstantiatedSeed& is) const +{ + assert(overall != NULL); + int seedlen = len; + if((int)read.length() < seedlen) { + // Shrink seed length to fit read if necessary + seedlen = (int)read.length(); + } + assert_gt(seedlen, 0); + is.steps.resize(seedlen); + is.zones.resize(seedlen); + // Fill in 'steps' and 'zones' + // + // The 'steps' list indicates which read character should be + // incorporated at each step of the search process. Often we will + // simply proceed from one end to the other, in which case the + // 'steps' list is ascending or descending. In some cases (e.g. + // the 2mm case), we might want to switch directions at least once + // during the search, in which case 'steps' will jump in the + // middle. When an element of the 'steps' list is negative, this + // indicates that the next + // + // The 'zones' list indicates which zone constraint is active at + // each step. Each element of the 'zones' list is a pair; the + // first pair element indicates the applicable zone when + // considering either mismatch or delete (ref gap) events, while + // the second pair element indicates the applicable zone when + // considering insertion (read gap) events. When either pair + // element is a negative number, that indicates that we are about + // to leave the zone for good, at which point we may need to + // evaluate whether we have reached the zone's budget. + // + switch(type) { + case SEED_TYPE_EXACT: { + for(int k = 0; k < seedlen; k++) { + is.steps[k] = -(seedlen - k); + // Zone 0 all the way + is.zones[k].first = is.zones[k].second = 0; + } + break; + } + case SEED_TYPE_LEFT_TO_RIGHT: { + for(int k = 0; k < seedlen; k++) { + is.steps[k] = k+1; + // Zone 0 from 0 up to ceil(len/2), then 1 + is.zones[k].first = is.zones[k].second = ((k < (seedlen+1)/2) ? 0 : 1); + } + // Zone 1 ends at the RHS + is.zones[seedlen-1].first = is.zones[seedlen-1].second = -1; + break; + } + case SEED_TYPE_RIGHT_TO_LEFT: { + for(int k = 0; k < seedlen; k++) { + is.steps[k] = -(seedlen - k); + // Zone 0 from 0 up to floor(len/2), then 1 + is.zones[k].first = ((k < seedlen/2) ? 0 : 1); + // Inserts: Zone 0 from 0 up to ceil(len/2)-1, then 1 + is.zones[k].second = ((k < (seedlen+1)/2+1) ? 0 : 1); + } + is.zones[seedlen-1].first = is.zones[seedlen-1].second = -1; + break; + } + case SEED_TYPE_INSIDE_OUT: { + // Zone 0 from ceil(N/4) up to N-floor(N/4) + int step = 0; + for(int k = (seedlen+3)/4; k < seedlen - (seedlen/4); k++) { + is.zones[step].first = is.zones[step].second = 0; + is.steps[step++] = k+1; + } + // Zone 1 from N-floor(N/4) up + for(int k = seedlen - (seedlen/4); k < seedlen; k++) { + is.zones[step].first = is.zones[step].second = 1; + is.steps[step++] = k+1; + } + // No Zone 1 if seedlen is short (like 2) + //assert_eq(1, is.zones[step-1].first); + is.zones[step-1].first = is.zones[step-1].second = -1; + // Zone 2 from ((seedlen+3)/4)-1 down to 0 + for(int k = ((seedlen+3)/4)-1; k >= 0; k--) { + is.zones[step].first = is.zones[step].second = 2; + is.steps[step++] = -(k+1); + } + assert_eq(2, is.zones[step-1].first); + is.zones[step-1].first = is.zones[step-1].second = -2; + assert_eq(seedlen, step); + break; + } + default: + throw 1; + } + // Instantiate constraints + for(int i = 0; i < 3; i++) { + is.cons[i] = zones[i]; + is.cons[i].instantiate(read.length()); + } + is.overall = *overall; + is.overall.instantiate(read.length()); + // Take a sweep through the seed sequence. Consider where the Ns + // occur and how zones are laid out. Calculate the maximum number + // of positions we can jump over initially (e.g. with the ftab) and + // perhaps set this function's return value to false, indicating + // that the arrangements of Ns prevents the seed from aligning. + bool streak = true; + is.maxjump = 0; + bool ret = true; + bool ltr = (is.steps[0] > 0); // true -> left-to-right + for(size_t i = 0; i < is.steps.size(); i++) { + assert_neq(0, is.steps[i]); + int off = is.steps[i]; + off = abs(off)-1; + Constraint& cons = is.cons[abs(is.zones[i].first)]; + int c = seq[off]; assert_range(0, 4, c); + int q = qual[off]; + if(ltr != (is.steps[i] > 0) || // changed direction + is.zones[i].first != 0 || // changed zone + is.zones[i].second != 0) // changed zone + { + streak = false; + } + if(c == 4) { + // Induced mismatch + if(cons.canN(q, pens)) { + cons.chargeN(q, pens); + } else { + // Seed disqualified due to arrangement of Ns + return false; + } + } + if(streak) is.maxjump++; + } + is.seedoff = depth; + is.seedoffidx = seedoffidx; + is.fw = fw; + is.s = *this; + return ret; +} + +/** + * Return a set consisting of 1 seed encapsulating an exact matching + * strategy. + */ +void +Seed::zeroMmSeeds(int ln, EList& pols, Constraint& oall) { + oall.init(); + // Seed policy 1: left-to-right search + pols.expand(); + pols.back().len = ln; + pols.back().type = SEED_TYPE_EXACT; + pols.back().zones[0] = Constraint::exact(); + pols.back().zones[1] = Constraint::exact(); + pols.back().zones[2] = Constraint::exact(); // not used + pols.back().overall = &oall; +} + +/** + * Return a set of 2 seeds encapsulating a half-and-half 1mm strategy. + */ +void +Seed::oneMmSeeds(int ln, EList& pols, Constraint& oall) { + oall.init(); + // Seed policy 1: left-to-right search + pols.expand(); + pols.back().len = ln; + pols.back().type = SEED_TYPE_LEFT_TO_RIGHT; + pols.back().zones[0] = Constraint::exact(); + pols.back().zones[1] = Constraint::mmBased(1); + pols.back().zones[2] = Constraint::exact(); // not used + pols.back().overall = &oall; + // Seed policy 2: right-to-left search + pols.expand(); + pols.back().len = ln; + pols.back().type = SEED_TYPE_RIGHT_TO_LEFT; + pols.back().zones[0] = Constraint::exact(); + pols.back().zones[1] = Constraint::mmBased(1); + pols.back().zones[1].mmsCeil = 0; + pols.back().zones[2] = Constraint::exact(); // not used + pols.back().overall = &oall; +} + +/** + * Return a set of 3 seeds encapsulating search roots for: + * + * 1. Starting from the left-hand side and searching toward the + * right-hand side allowing 2 mismatches in the right half. + * 2. Starting from the right-hand side and searching toward the + * left-hand side allowing 2 mismatches in the left half. + * 3. Starting (effectively) from the center and searching out toward + * both the left and right-hand sides, allowing one mismatch on + * either side. + * + * This is not exhaustive. There are 2 mismatch cases mised; if you + * imagine the seed as divided into four successive quarters A, B, C + * and D, the cases we miss are when mismatches occur in A and C or B + * and D. + */ +void +Seed::twoMmSeeds(int ln, EList& pols, Constraint& oall) { + oall.init(); + // Seed policy 1: left-to-right search + pols.expand(); + pols.back().len = ln; + pols.back().type = SEED_TYPE_LEFT_TO_RIGHT; + pols.back().zones[0] = Constraint::exact(); + pols.back().zones[1] = Constraint::mmBased(2); + pols.back().zones[2] = Constraint::exact(); // not used + pols.back().overall = &oall; + // Seed policy 2: right-to-left search + pols.expand(); + pols.back().len = ln; + pols.back().type = SEED_TYPE_RIGHT_TO_LEFT; + pols.back().zones[0] = Constraint::exact(); + pols.back().zones[1] = Constraint::mmBased(2); + pols.back().zones[1].mmsCeil = 1; // Must have used at least 1 mismatch + pols.back().zones[2] = Constraint::exact(); // not used + pols.back().overall = &oall; + // Seed policy 3: inside-out search + pols.expand(); + pols.back().len = ln; + pols.back().type = SEED_TYPE_INSIDE_OUT; + pols.back().zones[0] = Constraint::exact(); + pols.back().zones[1] = Constraint::mmBased(1); + pols.back().zones[1].mmsCeil = 0; // Must have used at least 1 mismatch + pols.back().zones[2] = Constraint::mmBased(1); + pols.back().zones[2].mmsCeil = 0; // Must have used at least 1 mismatch + pols.back().overall = &oall; +} + +/** + * Types of actions that can be taken by the SeedAligner. + */ +enum { + SA_ACTION_TYPE_RESET = 1, + SA_ACTION_TYPE_SEARCH_SEED, // 2 + SA_ACTION_TYPE_FTAB, // 3 + SA_ACTION_TYPE_FCHR, // 4 + SA_ACTION_TYPE_MATCH, // 5 + SA_ACTION_TYPE_EDIT // 6 +}; + +#define MIN(x, y) ((x < y) ? x : y) + +#ifdef ALIGNER_SEED_MAIN + +#include +#include + +/** + * Parse an int out of optarg and enforce that it be at least 'lower'; + * if it is less than 'lower', than output the given error message and + * exit with an error and a usage message. + */ +static int parseInt(const char *errmsg, const char *arg) { + long l; + char *endPtr = NULL; + l = strtol(arg, &endPtr, 10); + if (endPtr != NULL) { + return (int32_t)l; + } + cerr << errmsg << endl; + throw 1; + return -1; +} + +enum { + ARG_NOFW = 256, + ARG_NORC, + ARG_MM, + ARG_SHMEM, + ARG_TESTS, + ARG_RANDOM_TESTS, + ARG_SEED +}; + +static const char *short_opts = "vCt"; +static struct option long_opts[] = { + {(char*)"verbose", no_argument, 0, 'v'}, + {(char*)"color", no_argument, 0, 'C'}, + {(char*)"timing", no_argument, 0, 't'}, + {(char*)"nofw", no_argument, 0, ARG_NOFW}, + {(char*)"norc", no_argument, 0, ARG_NORC}, + {(char*)"mm", no_argument, 0, ARG_MM}, + {(char*)"shmem", no_argument, 0, ARG_SHMEM}, + {(char*)"tests", no_argument, 0, ARG_TESTS}, + {(char*)"random", required_argument, 0, ARG_RANDOM_TESTS}, + {(char*)"seed", required_argument, 0, ARG_SEED}, +}; + +static void printUsage(ostream& os) { + os << "Usage: ac [options]* " << endl; + os << "Options:" << endl; + os << " --mm memory-mapped mode" << endl; + os << " --shmem shared memory mode" << endl; + os << " --nofw don't align forward-oriented read" << endl; + os << " --norc don't align reverse-complemented read" << endl; + os << " -t/--timing show timing information" << endl; + os << " -C/--color colorspace mode" << endl; + os << " -v/--verbose talkative mode" << endl; +} + +bool gNorc = false; +bool gNofw = false; +bool gColor = false; +int gVerbose = 0; +int gGapBarrier = 1; +bool gColorExEnds = true; +int gSnpPhred = 30; +bool gReportOverhangs = true; + +extern void aligner_seed_tests(); +extern void aligner_random_seed_tests( + int num_tests, + uint32_t qslo, + uint32_t qshi, + bool color, + uint32_t seed); + +/** + * A way of feeding simply tests to the seed alignment infrastructure. + */ +int main(int argc, char **argv) { + bool useMm = false; + bool useShmem = false; + bool mmSweep = false; + bool noRefNames = false; + bool sanity = false; + bool timing = false; + int option_index = 0; + int seed = 777; + int next_option; + do { + next_option = getopt_long( + argc, argv, short_opts, long_opts, &option_index); + switch (next_option) { + case 'v': gVerbose = true; break; + case 'C': gColor = true; break; + case 't': timing = true; break; + case ARG_NOFW: gNofw = true; break; + case ARG_NORC: gNorc = true; break; + case ARG_MM: useMm = true; break; + case ARG_SHMEM: useShmem = true; break; + case ARG_SEED: seed = parseInt("", optarg); break; + case ARG_TESTS: { + aligner_seed_tests(); + aligner_random_seed_tests( + 100, // num references + 100, // queries per reference lo + 400, // queries per reference hi + false, // true -> generate colorspace reference/reads + 18); // pseudo-random seed + return 0; + } + case ARG_RANDOM_TESTS: { + seed = parseInt("", optarg); + aligner_random_seed_tests( + 100, // num references + 100, // queries per reference lo + 400, // queries per reference hi + false, // true -> generate colorspace reference/reads + seed); // pseudo-random seed + return 0; + } + case -1: break; + default: { + cerr << "Unknown option: " << (char)next_option << endl; + printUsage(cerr); + exit(1); + } + } + } while(next_option != -1); + char *reffn; + if(optind >= argc) { + cerr << "No reference; quitting..." << endl; + return 1; + } + reffn = argv[optind++]; + if(optind >= argc) { + cerr << "No reads; quitting..." << endl; + return 1; + } + string gfmBase(reffn); + BitPairReference ref( + gfmBase, // base path + gColor, // whether we expect it to be colorspace + sanity, // whether to sanity-check reference as it's loaded + NULL, // fasta files to sanity check reference against + NULL, // another way of specifying original sequences + false, // true -> infiles (2 args ago) contains raw seqs + useMm, // use memory mapping to load index? + useShmem, // use shared memory (not memory mapping) + mmSweep, // touch all the pages after memory-mapping the index + gVerbose, // verbose + gVerbose); // verbose but just for startup messages + Timer *t = new Timer(cerr, "Time loading fw index: ", timing); + GFM gfmFw( + gfmBase, + 0, // don't need entireReverse for fw index + true, // index is for the forward direction + -1, // offrate (irrelevant) + useMm, // whether to use memory-mapped files + useShmem, // whether to use shared memory + mmSweep, // sweep memory-mapped files + !noRefNames, // load names? + false, // load SA sample? + true, // load ftab? + true, // load rstarts? + NULL, // reference map, or NULL if none is needed + gVerbose, // whether to be talkative + gVerbose, // talkative during initialization + false, // handle memory exceptions, don't pass them up + sanity); + delete t; + t = new Timer(cerr, "Time loading bw index: ", timing); + GFM gfmBw( + gfmBase + ".rev", + 1, // need entireReverse + false, // index is for the backward direction + -1, // offrate (irrelevant) + useMm, // whether to use memory-mapped files + useShmem, // whether to use shared memory + mmSweep, // sweep memory-mapped files + !noRefNames, // load names? + false, // load SA sample? + true, // load ftab? + false, // load rstarts? + NULL, // reference map, or NULL if none is needed + gVerbose, // whether to be talkative + gVerbose, // talkative during initialization + false, // handle memory exceptions, don't pass them up + sanity); + delete t; + for(int i = optind; i < argc; i++) { + } +} +#endif diff --git a/aligner_seed.h b/aligner_seed.h new file mode 100644 index 0000000..a832fd4 --- /dev/null +++ b/aligner_seed.h @@ -0,0 +1,2922 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef ALIGNER_SEED_H_ +#define ALIGNER_SEED_H_ + +#include +#include +#include +#include "qual.h" +#include "ds.h" +#include "sstring.h" +#include "alphabet.h" +#include "edit.h" +#include "read.h" +// Threading is necessary to synchronize the classes that dump +// intermediate alignment results to files. Otherwise, all data herein +// is constant and shared, or per-thread. +#include "threading.h" +#include "aligner_result.h" +#include "aligner_cache.h" +#include "scoring.h" +#include "mem_ids.h" +#include "simple_func.h" +#include "btypes.h" + +/** + * A constraint to apply to an alignment zone, or to an overall + * alignment. + * + * The constraint can put both caps and ceilings on the number and + * types of edits allowed. + */ +struct Constraint { + + Constraint() { init(); } + + /** + * Initialize Constraint to be fully permissive. + */ + void init() { + edits = mms = ins = dels = penalty = editsCeil = mmsCeil = + insCeil = delsCeil = penaltyCeil = MAX_I; + penFunc.reset(); + instantiated = false; + } + + /** + * Return true iff penalities and constraints prevent us from + * adding any edits. + */ + bool mustMatch() { + assert(instantiated); + return (mms == 0 && edits == 0) || + penalty == 0 || + (mms == 0 && dels == 0 && ins == 0); + } + + /** + * Return true iff a mismatch of the given quality is permitted. + */ + bool canMismatch(int q, const Scoring& cm) { + assert(instantiated); + return (mms > 0 || edits > 0) && + penalty >= cm.mm(q); + } + + /** + * Return true iff a mismatch of the given quality is permitted. + */ + bool canN(int q, const Scoring& cm) { + assert(instantiated); + return (mms > 0 || edits > 0) && + penalty >= cm.n(q); + } + + /** + * Return true iff a mismatch of *any* quality (even qual=1) is + * permitted. + */ + bool canMismatch() { + assert(instantiated); + return (mms > 0 || edits > 0) && penalty > 0; + } + + /** + * Return true iff a mismatch of *any* quality (even qual=1) is + * permitted. + */ + bool canN() { + assert(instantiated); + return (mms > 0 || edits > 0); + } + + /** + * Return true iff a deletion of the given extension (0=open, 1=1st + * extension, etc) is permitted. + */ + bool canDelete(int ex, const Scoring& cm) { + assert(instantiated); + return (dels > 0 && edits > 0) && + penalty >= cm.del(ex); + } + + /** + * Return true iff a deletion of any extension is permitted. + */ + bool canDelete() { + assert(instantiated); + return (dels > 0 || edits > 0) && + penalty > 0; + } + + /** + * Return true iff an insertion of the given extension (0=open, + * 1=1st extension, etc) is permitted. + */ + bool canInsert(int ex, const Scoring& cm) { + assert(instantiated); + return (ins > 0 || edits > 0) && + penalty >= cm.ins(ex); + } + + /** + * Return true iff an insertion of any extension is permitted. + */ + bool canInsert() { + assert(instantiated); + return (ins > 0 || edits > 0) && + penalty > 0; + } + + /** + * Return true iff a gap of any extension is permitted + */ + bool canGap() { + assert(instantiated); + return ((ins > 0 || dels > 0) || edits > 0) && penalty > 0; + } + + /** + * Charge a mismatch of the given quality. + */ + void chargeMismatch(int q, const Scoring& cm) { + assert(instantiated); + if(mms == 0) { assert_gt(edits, 0); edits--; } + else mms--; + penalty -= cm.mm(q); + assert_geq(mms, 0); + assert_geq(edits, 0); + assert_geq(penalty, 0); + } + + /** + * Charge an N mismatch of the given quality. + */ + void chargeN(int q, const Scoring& cm) { + assert(instantiated); + if(mms == 0) { assert_gt(edits, 0); edits--; } + else mms--; + penalty -= cm.n(q); + assert_geq(mms, 0); + assert_geq(edits, 0); + assert_geq(penalty, 0); + } + + /** + * Charge a deletion of the given extension. + */ + void chargeDelete(int ex, const Scoring& cm) { + assert(instantiated); + dels--; + edits--; + penalty -= cm.del(ex); + assert_geq(dels, 0); + assert_geq(edits, 0); + assert_geq(penalty, 0); + } + + /** + * Charge an insertion of the given extension. + */ + void chargeInsert(int ex, const Scoring& cm) { + assert(instantiated); + ins--; + edits--; + penalty -= cm.ins(ex); + assert_geq(ins, 0); + assert_geq(edits, 0); + assert_geq(penalty, 0); + } + + /** + * Once the constrained area is completely explored, call this + * function to check whether there were *at least* as many + * dissimilarities as required by the constraint. Bounds like this + * are helpful to resolve instances where two search roots would + * otherwise overlap in what alignments they can find. + */ + bool acceptable() { + assert(instantiated); + return edits <= editsCeil && + mms <= mmsCeil && + ins <= insCeil && + dels <= delsCeil && + penalty <= penaltyCeil; + } + + /** + * Instantiate a constraint w/r/t the read length and the constant + * and linear coefficients for the penalty function. + */ + static int instantiate(size_t rdlen, const SimpleFunc& func) { + return func.f((double)rdlen); + } + + /** + * Instantiate this constraint w/r/t the read length. + */ + void instantiate(size_t rdlen) { + assert(!instantiated); + if(penFunc.initialized()) { + penalty = Constraint::instantiate(rdlen, penFunc); + } + instantiated = true; + } + + int edits; // # edits permitted + int mms; // # mismatches permitted + int ins; // # insertions permitted + int dels; // # deletions permitted + int penalty; // penalty total permitted + int editsCeil; // <= this many edits can be left at the end + int mmsCeil; // <= this many mismatches can be left at the end + int insCeil; // <= this many inserts can be left at the end + int delsCeil; // <= this many deletions can be left at the end + int penaltyCeil;// <= this much leftover penalty can be left at the end + SimpleFunc penFunc;// penalty function; function of read len + bool instantiated; // whether constraint is instantiated w/r/t read len + + // + // Some static methods for constructing some standard Constraints + // + + /** + * Construct a constraint with no edits of any kind allowed. + */ + static Constraint exact(); + + /** + * Construct a constraint where the only constraint is a total + * penalty constraint. + */ + static Constraint penaltyBased(int pen); + + /** + * Construct a constraint where the only constraint is a total + * penalty constraint related to the length of the read. + */ + static Constraint penaltyFuncBased(const SimpleFunc& func); + + /** + * Construct a constraint where the only constraint is a total + * penalty constraint. + */ + static Constraint mmBased(int mms); + + /** + * Construct a constraint where the only constraint is a total + * penalty constraint. + */ + static Constraint editBased(int edits); +}; + +/** + * We divide seed search strategies into three categories: + * + * 1. A left-to-right search where the left half of the read is + * constrained to match exactly and the right half is subject to + * some looser constraint (e.g. 1mm or 2mm) + * 2. Same as 1, but going right to left with the exact matching half + * on the right. + * 3. Inside-out search where the center half of the read is + * constrained to match exactly, and the extreme quarters of the + * read are subject to a looser constraint. + */ +enum { + SEED_TYPE_EXACT = 1, + SEED_TYPE_LEFT_TO_RIGHT, + SEED_TYPE_RIGHT_TO_LEFT, + SEED_TYPE_INSIDE_OUT +}; + +struct InstantiatedSeed; + +/** + * Policy dictating how to size and arrange seeds along the length of + * the read, and what constraints to force on the zones of the seed. + * We assume that seeds are plopped down at regular intervals from the + * 5' to 3' ends, with the first seed flush to the 5' end. + * + * If the read is shorter than a single seed, one seed is used and it + * is shrunk to accommodate the read. + */ +struct Seed { + + int len; // length of a seed + int type; // dictates anchor portion, direction of search + Constraint *overall; // for the overall alignment + + Seed() { init(0, 0, NULL); } + + /** + * Construct and initialize this seed with given length and type. + */ + Seed(int ln, int ty, Constraint* oc) { + init(ln, ty, oc); + } + + /** + * Initialize this seed with given length and type. + */ + void init(int ln, int ty, Constraint* oc) { + len = ln; + type = ty; + overall = oc; + } + + // If the seed is split into halves, we just use zones[0] and + // zones[1]; 0 is the near half and 1 is the far half. If the seed + // is split into thirds (i.e. inside-out) then 0 is the center, 1 + // is the far portion on the left, and 2 is the far portion on the + // right. + Constraint zones[3]; + + /** + * Once the constrained seed is completely explored, call this + * function to check whether there were *at least* as many + * dissimilarities as required by all constraints. Bounds like this + * are helpful to resolve instances where two search roots would + * otherwise overlap in what alignments they can find. + */ + bool acceptable() { + assert(overall != NULL); + return zones[0].acceptable() && + zones[1].acceptable() && + zones[2].acceptable() && + overall->acceptable(); + } + + /** + * Given a read, depth and orientation, extract a seed data structure + * from the read and fill in the steps & zones arrays. The Seed + * contains the sequence and quality values. + */ + bool instantiate( + const Read& read, + const BTDnaString& seq, // already-extracted seed sequence + const BTString& qual, // already-extracted seed quality sequence + const Scoring& pens, + int depth, + int seedoffidx, + int seedtypeidx, + bool fw, + InstantiatedSeed& si) const; + + /** + * Return a list of Seed objects encapsulating + */ + static void mmSeeds( + int mms, + int ln, + EList& pols, + Constraint& oall) + { + if(mms == 0) { + zeroMmSeeds(ln, pols, oall); + } else if(mms == 1) { + oneMmSeeds(ln, pols, oall); + } else if(mms == 2) { + twoMmSeeds(ln, pols, oall); + } else throw 1; + } + + static void zeroMmSeeds(int ln, EList&, Constraint&); + static void oneMmSeeds (int ln, EList&, Constraint&); + static void twoMmSeeds (int ln, EList&, Constraint&); +}; + +/** + * An instantiated seed is a seed (perhaps modified to fit the read) + * plus all data needed to conduct a search of the seed. + */ +struct InstantiatedSeed { + + InstantiatedSeed() : steps(AL_CAT), zones(AL_CAT) { } + + // Steps map. There are as many steps as there are positions in + // the seed. The map is a helpful abstraction because we sometimes + // visit seed positions in an irregular order (e.g. inside-out + // search). + EList steps; + + // Zones map. For each step, records what constraint to charge an + // edit to. The first entry in each pair gives the constraint for + // non-insert edits and the second entry in each pair gives the + // constraint for insert edits. If the value stored is negative, + // this indicates that the zone is "closed out" after this + // position, so zone acceptility should be checked. + EList > zones; + + // Nucleotide sequence covering the seed, extracted from read + BTDnaString *seq; + + // Quality sequence covering the seed, extracted from read + BTString *qual; + + // Initial constraints governing zones 0, 1, 2. We precalculate + // the effect of Ns on these. + Constraint cons[3]; + + // Overall constraint, tailored to the read length. + Constraint overall; + + // Maximum number of positions that the aligner may advance before + // its first step. This lets the aligner know whether it can use + // the ftab or not. + int maxjump; + + // Offset of seed from 5' end of read + int seedoff; + + // Id for seed offset; ids are such that the smallest index is the + // closest to the 5' end and consecutive ids are adjacent (i.e. + // there are no intervening offsets with seeds) + int seedoffidx; + + // Type of seed (left-to-right, etc) + int seedtypeidx; + + // Seed comes from forward-oriented read? + bool fw; + + // Filtered out due to the pattern of Ns present. If true, this + // seed should be ignored by searchAllSeeds(). + bool nfiltered; + + // Seed this was instantiated from + Seed s; + +#ifndef NDEBUG + /** + * Check that InstantiatedSeed is internally consistent. + */ + bool repOk() const { + return true; + } +#endif +}; + +/** + * Simple struct for holding a end-to-end alignments for the read with at most + * 2 edits. + */ +template +struct EEHit { + + EEHit() { reset(); } + + void reset() { + top = bot = 0; + fw = false; + e1.reset(); + e2.reset(); + score = MIN_I64; + } + + void init( + index_t top_, + index_t bot_, + const Edit* e1_, + const Edit* e2_, + bool fw_, + int64_t score_) + { + top = top_; bot = bot_; + if(e1_ != NULL) { + e1 = *e1_; + } else { + e1.reset(); + } + if(e2_ != NULL) { + e2 = *e2_; + } else { + e2.reset(); + } + fw = fw_; + score = score_; + } + + /** + * Return number of mismatches in the alignment. + */ + int mms() const { + if (e2.inited()) return 2; + else if(e1.inited()) return 1; + else return 0; + } + + /** + * Return the number of Ns involved in the alignment. + */ + int ns() const { + int ns = 0; + if(e1.inited() && e1.hasN()) { + ns++; + if(e2.inited() && e2.hasN()) { + ns++; + } + } + return ns; + } + + /** + * Return the number of Ns involved in the alignment. + */ + int refns() const { + int ns = 0; + if(e1.inited() && e1.chr == 'N') { + ns++; + if(e2.inited() && e2.chr == 'N') { + ns++; + } + } + return ns; + } + + /** + * Return true iff there is no hit. + */ + bool empty() const { + return bot <= top; + } + + /** + * Higher score = higher priority. + */ + bool operator<(const EEHit& o) const { + return score > o.score; + } + + /** + * Return the size of the alignments SA range.s + */ + index_t size() const { return bot - top; } + +#ifndef NDEBUG + /** + * Check that hit is sane w/r/t read. + */ + bool repOk(const Read& rd) const { + assert_gt(bot, top); + if(e1.inited()) { + assert_lt(e1.pos, rd.length()); + if(e2.inited()) { + assert_lt(e2.pos, rd.length()); + } + } + return true; + } +#endif + + index_t top; + index_t bot; + Edit e1; + Edit e2; + bool fw; + int64_t score; +}; + +/** + * Data structure for holding all of the seed hits associated with a read. All + * the seed hits for a given read are encapsulated in a single QVal object. A + * QVal refers to a range of values in the qlist, where each qlist value is a + * BW range and a slot to hold the hit's suffix array offset. QVals are kept + * in two lists (hitsFw_ and hitsRc_), one for seeds on the forward read strand, + * one for seeds on the reverse read strand. The list is indexed by read + * offset index (e.g. 0=closest-to-5', 1=second-closest, etc). + * + * An assumption behind this data structure is that all the seeds are found + * first, then downstream analyses try to extend them. In between finding the + * seed hits and extending them, the sort() member function is called, which + * ranks QVals according to the order they should be extended. Right now the + * policy is that QVals with fewer elements (hits) should be tried first. + */ +template +class SeedResults { + +public: + SeedResults() : + seqFw_(AL_CAT), + seqRc_(AL_CAT), + qualFw_(AL_CAT), + qualRc_(AL_CAT), + hitsFw_(AL_CAT), + hitsRc_(AL_CAT), + isFw_(AL_CAT), + isRc_(AL_CAT), + sortedFw_(AL_CAT), + sortedRc_(AL_CAT), + offIdx2off_(AL_CAT), + rankOffs_(AL_CAT), + rankFws_(AL_CAT), + mm1Hit_(AL_CAT) + { + clear(); + } + + /** + * Set the current read. + */ + void nextRead(const Read& read) { + read_ = &read; + } + + /** + * Set the appropriate element of either hitsFw_ or hitsRc_ to the given + * QVal. A QVal encapsulates all the BW ranges for reference substrings + * that are within some distance of the seed string. + */ + void add( + const QVal& qv, // range of ranges in cache + const AlignmentCache& ac, // cache + index_t seedIdx, // seed index (from 5' end) + bool seedFw) // whether seed is from forward read + { + assert(qv.repOk(ac)); + assert(repOk(&ac)); + assert_lt(seedIdx, hitsFw_.size()); + assert_gt(numOffs_, 0); // if this fails, probably failed to call reset + if(qv.empty()) return; + if(seedFw) { + assert(!hitsFw_[seedIdx].valid()); + hitsFw_[seedIdx] = qv; + numEltsFw_ += qv.numElts(); + numRangesFw_ += qv.numRanges(); + if(qv.numRanges() > 0) nonzFw_++; + } else { + assert(!hitsRc_[seedIdx].valid()); + hitsRc_[seedIdx] = qv; + numEltsRc_ += qv.numElts(); + numRangesRc_ += qv.numRanges(); + if(qv.numRanges() > 0) nonzRc_++; + } + numElts_ += qv.numElts(); + numRanges_ += qv.numRanges(); + if(qv.numRanges() > 0) { + nonzTot_++; + } + assert(repOk(&ac)); + } + + /** + * Clear buffered seed hits and state. Set the number of seed + * offsets and the read. + */ + void reset( + const Read& read, + const EList& offIdx2off, + size_t numOffs) + { + assert_gt(numOffs, 0); + clearSeeds(); + numOffs_ = numOffs; + seqFw_.resize(numOffs_); + seqRc_.resize(numOffs_); + qualFw_.resize(numOffs_); + qualRc_.resize(numOffs_); + hitsFw_.resize(numOffs_); + hitsRc_.resize(numOffs_); + isFw_.resize(numOffs_); + isRc_.resize(numOffs_); + sortedFw_.resize(numOffs_); + sortedRc_.resize(numOffs_); + offIdx2off_ = offIdx2off; + for(size_t i = 0; i < numOffs_; i++) { + sortedFw_[i] = sortedRc_[i] = false; + hitsFw_[i].reset(); + hitsRc_[i].reset(); + isFw_[i].clear(); + isRc_[i].clear(); + } + read_ = &read; + sorted_ = false; + } + + /** + * Clear seed-hit state. + */ + void clearSeeds() { + sortedFw_.clear(); + sortedRc_.clear(); + rankOffs_.clear(); + rankFws_.clear(); + offIdx2off_.clear(); + hitsFw_.clear(); + hitsRc_.clear(); + isFw_.clear(); + isRc_.clear(); + seqFw_.clear(); + seqRc_.clear(); + nonzTot_ = 0; + nonzFw_ = 0; + nonzRc_ = 0; + numOffs_ = 0; + numRanges_ = 0; + numElts_ = 0; + numRangesFw_ = 0; + numEltsFw_ = 0; + numRangesRc_ = 0; + numEltsRc_ = 0; + } + + /** + * Clear seed-hit state and end-to-end alignment state. + */ + void clear() { + clearSeeds(); + read_ = NULL; + exactFwHit_.reset(); + exactRcHit_.reset(); + mm1Hit_.clear(); + mm1Sorted_ = false; + mm1Elt_ = 0; + assert(empty()); + } + + /** + * Extract key summaries from this SeedResults and put into 'ssum'. + */ + void toSeedAlSumm(SeedAlSumm& ssum) const { + // Number of positions with at least 1 range + ssum.nonzTot = nonzTot_; + ssum.nonzFw = nonzFw_; + ssum.nonzRc = nonzRc_; + + // Number of ranges + ssum.nrangeTot = numRanges_; + ssum.nrangeFw = numRangesFw_; + ssum.nrangeRc = numRangesRc_; + + // Number of elements + ssum.neltTot = numElts_; + ssum.neltFw = numEltsFw_; + ssum.neltRc = numEltsRc_; + + // Other summaries + ssum.maxNonzRangeFw = ssum.minNonzRangeFw = 0; + ssum.maxNonzRangeRc = ssum.minNonzRangeRc = 0; + ssum.maxNonzEltFw = ssum.minNonzEltFw = 0; + ssum.maxNonzEltRc = ssum.minNonzEltRc = 0; + for(size_t i = 0; i < numOffs_; i++) { + if(hitsFw_[i].valid()) { + if(ssum.minNonzEltFw == 0 || hitsFw_[i].numElts() < ssum.minNonzEltFw) { + ssum.minNonzEltFw = hitsFw_[i].numElts(); + } + if(ssum.maxNonzEltFw == 0 || hitsFw_[i].numElts() > ssum.maxNonzEltFw) { + ssum.maxNonzEltFw = hitsFw_[i].numElts(); + } + if(ssum.minNonzRangeFw == 0 || hitsFw_[i].numRanges() < ssum.minNonzRangeFw) { + ssum.minNonzRangeFw = hitsFw_[i].numRanges(); + } + if(ssum.maxNonzRangeFw == 0 || hitsFw_[i].numRanges() > ssum.maxNonzRangeFw) { + ssum.maxNonzRangeFw = hitsFw_[i].numRanges(); + } + } + if(hitsRc_[i].valid()) { + if(ssum.minNonzEltRc == 0 || hitsRc_[i].numElts() < ssum.minNonzEltRc) { + ssum.minNonzEltRc = hitsRc_[i].numElts(); + } + if(ssum.maxNonzEltRc == 0 || hitsRc_[i].numElts() > ssum.maxNonzEltRc) { + ssum.maxNonzEltRc = hitsRc_[i].numElts(); + } + if(ssum.minNonzRangeRc == 0 || hitsRc_[i].numRanges() < ssum.minNonzRangeRc) { + ssum.minNonzRangeRc = hitsRc_[i].numRanges(); + } + if(ssum.maxNonzRangeRc == 0 || hitsRc_[i].numRanges() > ssum.maxNonzRangeRc) { + ssum.maxNonzRangeRc = hitsRc_[i].numRanges(); + } + } + } + } + + /** + * Return average number of hits per seed. + */ + float averageHitsPerSeed() const { + return (float)numElts_ / (float)nonzTot_; + } + + /** + * Return median of all the non-zero per-seed # hits + */ + float medianHitsPerSeed() const { + EList& median = const_cast&>(tmpMedian_); + median.clear(); + for(size_t i = 0; i < numOffs_; i++) { + if(hitsFw_[i].valid() && hitsFw_[i].numElts() > 0) { + median.push_back(hitsFw_[i].numElts()); + } + if(hitsRc_[i].valid() && hitsRc_[i].numElts() > 0) { + median.push_back(hitsRc_[i].numElts()); + } + } + if(tmpMedian_.empty()) { + return 0.0f; + } + median.sort(); + float med1 = (float)median[tmpMedian_.size() >> 1]; + float med2 = med1; + if((median.size() & 1) == 0) { + med2 = (float)median[(tmpMedian_.size() >> 1) - 1]; + } + return med1 + med2 * 0.5f; + } + + /** + * Return a number that's meant to quantify how hopeful we are that this + * set of seed hits will lead to good alignments. + */ + double uniquenessFactor() const { + double result = 0.0; + for(size_t i = 0; i < numOffs_; i++) { + if(hitsFw_[i].valid()) { + size_t nelt = hitsFw_[i].numElts(); + result += (1.0 / (double)(nelt * nelt)); + } + if(hitsRc_[i].valid()) { + size_t nelt = hitsRc_[i].numElts(); + result += (1.0 / (double)(nelt * nelt)); + } + } + return result; + } + + /** + * Return the number of ranges being held. + */ + index_t numRanges() const { return numRanges_; } + + /** + * Return the number of elements being held. + */ + index_t numElts() const { return numElts_; } + + /** + * Return the number of ranges being held for seeds on the forward + * read strand. + */ + index_t numRangesFw() const { return numRangesFw_; } + + /** + * Return the number of elements being held for seeds on the + * forward read strand. + */ + index_t numEltsFw() const { return numEltsFw_; } + + /** + * Return the number of ranges being held for seeds on the + * reverse-complement read strand. + */ + index_t numRangesRc() const { return numRangesRc_; } + + /** + * Return the number of elements being held for seeds on the + * reverse-complement read strand. + */ + index_t numEltsRc() const { return numEltsRc_; } + + /** + * Given an offset index, return the offset that has that index. + */ + index_t idx2off(size_t off) const { + return offIdx2off_[off]; + } + + /** + * Return true iff there are 0 hits being held. + */ + bool empty() const { return numRanges() == 0; } + + /** + * Get the QVal representing all the reference hits for the given + * orientation and seed offset index. + */ + const QVal& hitsAtOffIdx(bool fw, size_t seedoffidx) const { + assert_lt(seedoffidx, numOffs_); + assert(repOk(NULL)); + return fw ? hitsFw_[seedoffidx] : hitsRc_[seedoffidx]; + } + + /** + * Get the Instantiated seeds for the given orientation and offset. + */ + EList& instantiatedSeeds(bool fw, size_t seedoffidx) { + assert_lt(seedoffidx, numOffs_); + assert(repOk(NULL)); + return fw ? isFw_[seedoffidx] : isRc_[seedoffidx]; + } + + /** + * Return the number of different seed offsets possible. + */ + index_t numOffs() const { return numOffs_; } + + /** + * Return the read from which seeds were extracted, aligned. + */ + const Read& read() const { return *read_; } + +#ifndef NDEBUG + /** + * Check that this SeedResults is internally consistent. + */ + bool repOk( + const AlignmentCache* ac, + bool requireInited = false) const + { + if(requireInited) { + assert(read_ != NULL); + } + if(numOffs_ > 0) { + assert_eq(numOffs_, hitsFw_.size()); + assert_eq(numOffs_, hitsRc_.size()); + assert_leq(numRanges_, numElts_); + assert_leq(nonzTot_, numRanges_); + size_t nonzs = 0; + for(int fw = 0; fw <= 1; fw++) { + const EList >& rrs = (fw ? hitsFw_ : hitsRc_); + for(size_t i = 0; i < numOffs_; i++) { + if(rrs[i].valid()) { + if(rrs[i].numRanges() > 0) nonzs++; + if(ac != NULL) { + assert(rrs[i].repOk(*ac)); + } + } + } + } + assert_eq(nonzs, nonzTot_); + assert(!sorted_ || nonzTot_ == rankFws_.size()); + assert(!sorted_ || nonzTot_ == rankOffs_.size()); + } + return true; + } +#endif + + /** + * Populate rankOffs_ and rankFws_ with the list of QVals that need to be + * examined for this SeedResults, in order. The order is ascending by + * number of elements, so QVals with fewer elements (i.e. seed sequences + * that are more unique) will be tried first and QVals with more elements + * (i.e. seed sequences + */ + void rankSeedHits(RandomSource& rnd) { + while(rankOffs_.size() < nonzTot_) { + index_t minsz = (index_t)0xffffffff; + index_t minidx = 0; + bool minfw = true; + // Rank seed-hit positions in ascending order by number of elements + // in all BW ranges + bool rb = rnd.nextBool(); + assert(rb == 0 || rb == 1); + for(int fwi = 0; fwi <= 1; fwi++) { + bool fw = (fwi == (rb ? 1 : 0)); + EList >& rrs = (fw ? hitsFw_ : hitsRc_); + EList& sorted = (fw ? sortedFw_ : sortedRc_); + index_t i = (rnd.nextU32() % (index_t)numOffs_); + for(index_t ii = 0; ii < numOffs_; ii++) { + if(rrs[i].valid() && // valid QVal + rrs[i].numElts() > 0 && // non-empty + !sorted[i] && // not already sorted + rrs[i].numElts() < minsz) // least elts so far? + { + minsz = rrs[i].numElts(); + minidx = i; + minfw = (fw == 1); + } + if((++i) == numOffs_) { + i = 0; + } + } + } + assert_neq((index_t)0xffffffff, minsz); + if(minfw) { + sortedFw_[minidx] = true; + } else { + sortedRc_[minidx] = true; + } + rankOffs_.push_back(minidx); + rankFws_.push_back(minfw); + } + assert_eq(rankOffs_.size(), rankFws_.size()); + sorted_ = true; + } + + /** + * Return the number of orientation/offsets into the read that have + * at least one seed hit. + */ + size_t nonzeroOffsets() const { + assert(!sorted_ || nonzTot_ == rankFws_.size()); + assert(!sorted_ || nonzTot_ == rankOffs_.size()); + return nonzTot_; + } + + /** + * Return true iff all seeds hit for forward read. + */ + bool allFwSeedsHit() const { + return nonzFw_ == numOffs(); + } + + /** + * Return true iff all seeds hit for revcomp read. + */ + bool allRcSeedsHit() const { + return nonzRc_ == numOffs(); + } + + /** + * Return the minimum number of edits that an end-to-end alignment of the + * fw read could have. Uses knowledge of how many seeds have exact hits + * and how the seeds overlap. + */ + index_t fewestEditsEE(bool fw, int seedlen, int per) const { + assert_gt(seedlen, 0); + assert_gt(per, 0); + index_t nonz = fw ? nonzFw_ : nonzRc_; + if(nonz < numOffs()) { + int maxdepth = (seedlen + per - 1) / per; + int missing = (int)(numOffs() - nonz); + return (missing + maxdepth - 1) / maxdepth; + } else { + // Exact hit is possible (not guaranteed) + return 0; + } + } + + /** + * Return the number of offsets into the forward read that have at + * least one seed hit. + */ + index_t nonzeroOffsetsFw() const { + return nonzFw_; + } + + /** + * Return the number of offsets into the reverse-complement read + * that have at least one seed hit. + */ + index_t nonzeroOffsetsRc() const { + return nonzRc_; + } + + /** + * Return a QVal of seed hits of the given rank 'r'. 'offidx' gets the id + * of the offset from 5' from which it was extracted (0 for the 5-most + * offset, 1 for the next closes to 5', etc). 'off' gets the offset from + * the 5' end. 'fw' gets true iff the seed was extracted from the forward + * read. + */ + const QVal& hitsByRank( + index_t r, // in + index_t& offidx, // out + index_t& off, // out + bool& fw, // out + index_t& seedlen) // out + { + assert(sorted_); + assert_lt(r, nonzTot_); + if(rankFws_[r]) { + fw = true; + offidx = rankOffs_[r]; + assert_lt(offidx, offIdx2off_.size()); + off = offIdx2off_[offidx]; + seedlen = (index_t)seqFw_[rankOffs_[r]].length(); + return hitsFw_[rankOffs_[r]]; + } else { + fw = false; + offidx = rankOffs_[r]; + assert_lt(offidx, offIdx2off_.size()); + off = offIdx2off_[offidx]; + seedlen = (index_t)seqRc_[rankOffs_[r]].length(); + return hitsRc_[rankOffs_[r]]; + } + } + + /** + * Return an EList of seed hits of the given rank. + */ + const BTDnaString& seqByRank(index_t r) { + assert(sorted_); + assert_lt(r, nonzTot_); + return rankFws_[r] ? seqFw_[rankOffs_[r]] : seqRc_[rankOffs_[r]]; + } + + /** + * Return an EList of seed hits of the given rank. + */ + const BTString& qualByRank(index_t r) { + assert(sorted_); + assert_lt(r, nonzTot_); + return rankFws_[r] ? qualFw_[rankOffs_[r]] : qualRc_[rankOffs_[r]]; + } + + /** + * Return the list of extracted seed sequences for seeds on either + * the forward or reverse strand. + */ + EList& seqs(bool fw) { return fw ? seqFw_ : seqRc_; } + + /** + * Return the list of extracted quality sequences for seeds on + * either the forward or reverse strand. + */ + EList& quals(bool fw) { return fw ? qualFw_ : qualRc_; } + + /** + * Return exact end-to-end alignment of fw read. + */ + EEHit exactFwEEHit() const { return exactFwHit_; } + + /** + * Return exact end-to-end alignment of rc read. + */ + EEHit exactRcEEHit() const { return exactRcHit_; } + + /** + * Return const ref to list of 1-mismatch end-to-end alignments. + */ + const EList >& mm1EEHits() const { return mm1Hit_; } + + /** + * Sort the end-to-end 1-mismatch alignments, prioritizing by score (higher + * score = higher priority). + */ + void sort1mmEe(RandomSource& rnd) { + assert(!mm1Sorted_); + mm1Hit_.sort(); + size_t streak = 0; + for(size_t i = 1; i < mm1Hit_.size(); i++) { + if(mm1Hit_[i].score == mm1Hit_[i-1].score) { + if(streak == 0) { streak = 1; } + streak++; + } else { + if(streak > 1) { + assert_geq(i, streak); + mm1Hit_.shufflePortion(i-streak, streak, rnd); + } + streak = 0; + } + } + if(streak > 1) { + mm1Hit_.shufflePortion(mm1Hit_.size() - streak, streak, rnd); + } + mm1Sorted_ = true; + } + + /** + * Add an end-to-end 1-mismatch alignment. + */ + void add1mmEe( + index_t top, + index_t bot, + const Edit* e1, + const Edit* e2, + bool fw, + int64_t score) + { + mm1Hit_.expand(); + mm1Hit_.back().init(top, bot, e1, e2, fw, score); + mm1Elt_ += (bot - top); + } + + /** + * Add an end-to-end exact alignment. + */ + void addExactEeFw( + index_t top, + index_t bot, + const Edit* e1, + const Edit* e2, + bool fw, + int64_t score) + { + exactFwHit_.init(top, bot, e1, e2, fw, score); + } + + /** + * Add an end-to-end exact alignment. + */ + void addExactEeRc( + index_t top, + index_t bot, + const Edit* e1, + const Edit* e2, + bool fw, + int64_t score) + { + exactRcHit_.init(top, bot, e1, e2, fw, score); + } + + /** + * Clear out the end-to-end exact alignments. + */ + void clearExactE2eHits() { + exactFwHit_.reset(); + exactRcHit_.reset(); + } + + /** + * Clear out the end-to-end 1-mismatch alignments. + */ + void clear1mmE2eHits() { + mm1Hit_.clear(); // 1-mismatch end-to-end hits + mm1Elt_ = 0; // number of 1-mismatch hit rows + mm1Sorted_ = false; // true iff we've sorted the mm1Hit_ list + } + + /** + * Return the number of distinct exact and 1-mismatch end-to-end hits + * found. + */ + index_t numE2eHits() const { + return (index_t)(exactFwHit_.size() + exactRcHit_.size() + mm1Elt_); + } + + /** + * Return the number of distinct exact end-to-end hits found. + */ + index_t numExactE2eHits() const { + return (index_t)(exactFwHit_.size() + exactRcHit_.size()); + } + + /** + * Return the number of distinct 1-mismatch end-to-end hits found. + */ + index_t num1mmE2eHits() const { + return mm1Elt_; + } + + /** + * Return the length of the read that yielded the seed hits. + */ + index_t readLength() const { + assert(read_ != NULL); + return read_->length(); + } + +protected: + + // As seed hits and edits are added they're sorted into these + // containers + EList seqFw_; // seqs for seeds from forward read + EList seqRc_; // seqs for seeds from revcomp read + EList qualFw_; // quals for seeds from forward read + EList qualRc_; // quals for seeds from revcomp read + EList > hitsFw_; // hits for forward read + EList > hitsRc_; // hits for revcomp read + EList > isFw_; // hits for forward read + EList > isRc_; // hits for revcomp read + EList sortedFw_; // true iff fw QVal was sorted/ranked + EList sortedRc_; // true iff rc QVal was sorted/ranked + index_t nonzTot_; // # offsets with non-zero size + index_t nonzFw_; // # offsets into fw read with non-0 size + index_t nonzRc_; // # offsets into rc read with non-0 size + index_t numRanges_; // # ranges added + index_t numElts_; // # elements added + index_t numRangesFw_; // # ranges added for fw seeds + index_t numEltsFw_; // # elements added for fw seeds + index_t numRangesRc_; // # ranges added for rc seeds + index_t numEltsRc_; // # elements added for rc seeds + + EList offIdx2off_;// map from offset indexes to offsets from 5' end + + // When the sort routine is called, the seed hits collected so far + // are sorted into another set of containers that allow easy access + // to hits from the lowest-ranked offset (the one with the fewest + // BW elements) to the greatest-ranked offset. Offsets with 0 hits + // are ignored. + EList rankOffs_; // sorted offests of seeds to try + EList rankFws_; // sorted orientations assoc. with rankOffs_ + bool sorted_; // true if sort() called since last reset + + // These fields set once per read + index_t numOffs_; // # different seed offsets possible + const Read* read_; // read from which seeds were extracted + + EEHit exactFwHit_; // end-to-end exact hit for fw read + EEHit exactRcHit_; // end-to-end exact hit for rc read + EList > mm1Hit_; // 1-mismatch end-to-end hits + index_t mm1Elt_; // number of 1-mismatch hit rows + bool mm1Sorted_; // true iff we've sorted the mm1Hit_ list + + EList tmpMedian_; // temporary storage for calculating median +}; + + +// Forward decl +template class Ebwt; +template struct SideLocus; + +/** + * Encapsulates a sumamry of what the searchAllSeeds aligner did. + */ +struct SeedSearchMetrics { + + SeedSearchMetrics() : mutex_m() { + reset(); + } + + /** + * Merge this metrics object with the given object, i.e., sum each + * category. This is the only safe way to update a + * SeedSearchMetrics object shread by multiple threads. + */ + void merge(const SeedSearchMetrics& m, bool getLock = false) { + ThreadSafe ts(&mutex_m, getLock); + seedsearch += m.seedsearch; + possearch += m.possearch; + intrahit += m.intrahit; + interhit += m.interhit; + filteredseed += m.filteredseed; + ooms += m.ooms; + bwops += m.bwops; + bweds += m.bweds; + bestmin0 += m.bestmin0; + bestmin1 += m.bestmin1; + bestmin2 += m.bestmin2; + } + + /** + * Set all counters to 0. + */ + void reset() { + seedsearch = + possearch = + intrahit = + interhit = + filteredseed = + ooms = + bwops = + bweds = + bestmin0 = + bestmin1 = + bestmin2 = 0; + } + + uint64_t seedsearch; // # times we executed strategy in InstantiatedSeed + uint64_t possearch; // # offsets where aligner executed >= 1 strategy + uint64_t intrahit; // # offsets where current-read cache gave answer + uint64_t interhit; // # offsets where across-read cache gave answer + uint64_t filteredseed; // # seed instantiations skipped due to Ns + uint64_t ooms; // out-of-memory errors + uint64_t bwops; // Burrows-Wheeler operations + uint64_t bweds; // Burrows-Wheeler edits + uint64_t bestmin0; // # times the best min # edits was 0 + uint64_t bestmin1; // # times the best min # edits was 1 + uint64_t bestmin2; // # times the best min # edits was 2 + MUTEX_T mutex_m; +}; + +/** + * Given an index and a seeding scheme, searches for seed hits. + */ +template +class SeedAligner { + +public: + + /** + * Initialize with index. + */ + SeedAligner() : edits_(AL_CAT), offIdx2off_(AL_CAT) { } + + /** + * Given a read and a few coordinates that describe a substring of the + * read (or its reverse complement), fill in 'seq' and 'qual' objects + * with the seed sequence and qualities. + */ + void instantiateSeq( + const Read& read, // input read + BTDnaString& seq, // output sequence + BTString& qual, // output qualities + int len, // seed length + int depth, // seed's 0-based offset from 5' end + bool fw) const; // seed's orientation + + /** + * Iterate through the seeds that cover the read and initiate a + * search for each seed. + */ + std::pair instantiateSeeds( + const EList& seeds, // search seeds + index_t off, // offset into read to start extracting + int per, // interval between seeds + const Read& read, // read to align + const Scoring& pens, // scoring scheme + bool nofw, // don't align forward read + bool norc, // don't align revcomp read + AlignmentCacheIface& cache, // holds some seed hits from previous reads + SeedResults& sr, // holds all the seed hits + SeedSearchMetrics& met); // metrics + + /** + * Iterate through the seeds that cover the read and initiate a + * search for each seed. + */ + void searchAllSeeds( + const EList& seeds, // search seeds + const Ebwt* ebwtFw, // BWT index + const Ebwt* ebwtBw, // BWT' index + const Read& read, // read to align + const Scoring& pens, // scoring scheme + AlignmentCacheIface& cache, // local seed alignment cache + SeedResults& hits, // holds all the seed hits + SeedSearchMetrics& met, // metrics + PerReadMetrics& prm); // per-read metrics + + /** + * Sanity-check a partial alignment produced during oneMmSearch. + */ + bool sanityPartial( + const Ebwt* ebwtFw, // BWT index + const Ebwt* ebwtBw, // BWT' index + const BTDnaString& seq, + index_t dep, + index_t len, + bool do1mm, + index_t topfw, + index_t botfw, + index_t topbw, + index_t botbw); + + /** + * Do an exact-matching sweet to establish a lower bound on number of edits + * and to find exact alignments. + */ + size_t exactSweep( + const Ebwt& ebwt, // BWT index + const Read& read, // read to align + const Scoring& sc, // scoring scheme + bool nofw, // don't align forward read + bool norc, // don't align revcomp read + size_t mineMax, // don't care about edit bounds > this + size_t& mineFw, // minimum # edits for forward read + size_t& mineRc, // minimum # edits for revcomp read + bool repex, // report 0mm hits? + SeedResults& hits, // holds all the seed hits (and exact hit) + SeedSearchMetrics& met); // metrics + + /** + * Search for end-to-end alignments with up to 1 mismatch. + */ + bool oneMmSearch( + const Ebwt* ebwtFw, // BWT index + const Ebwt* ebwtBw, // BWT' index + const Read& read, // read to align + const Scoring& sc, // scoring + int64_t minsc, // minimum score + bool nofw, // don't align forward read + bool norc, // don't align revcomp read + bool local, // 1mm hits must be legal local alignments + bool repex, // report 0mm hits? + bool rep1mm, // report 1mm hits? + SeedResults& hits, // holds all the seed hits (and exact hit) + SeedSearchMetrics& met); // metrics + +protected: + + /** + * Report a seed hit found by searchSeedBi(), but first try to extend it out in + * either direction as far as possible without hitting any edits. This will + * allow us to prioritize the seed hits better later on. Call reportHit() when + * we're done, which actually adds the hit to the cache. Returns result from + * calling reportHit(). + */ + bool extendAndReportHit( + index_t topf, // top in BWT + index_t botf, // bot in BWT + index_t topb, // top in BWT' + index_t botb, // bot in BWT' + index_t len, // length of hit + DoublyLinkedList *prevEdit); // previous edit + + /** + * Report a seed hit found by searchSeedBi() by adding it to the cache. Return + * false if the hit could not be reported because of, e.g., cache exhaustion. + */ + bool reportHit( + index_t topf, // top in BWT + index_t botf, // bot in BWT + index_t topb, // top in BWT' + index_t botb, // bot in BWT' + index_t len, // length of hit + DoublyLinkedList *prevEdit); // previous edit + + /** + * Given an instantiated seed (in s_ and other fields), search + */ + bool searchSeedBi(); + + /** + * Main, recursive implementation of the seed search. + */ + bool searchSeedBi( + int step, // depth into steps_[] array + int depth, // recursion depth + index_t topf, // top in BWT + index_t botf, // bot in BWT + index_t topb, // top in BWT' + index_t botb, // bot in BWT' + SideLocus tloc, // locus for top (perhaps unititialized) + SideLocus bloc, // locus for bot (perhaps unititialized) + Constraint c0, // constraints to enforce in seed zone 0 + Constraint c1, // constraints to enforce in seed zone 1 + Constraint c2, // constraints to enforce in seed zone 2 + Constraint overall, // overall constraints + DoublyLinkedList *prevEdit); // previous edit + + /** + * Get tloc and bloc ready for the next step. + */ + inline void nextLocsBi( + SideLocus& tloc, // top locus + SideLocus& bloc, // bot locus + index_t topf, // top in BWT + index_t botf, // bot in BWT + index_t topb, // top in BWT' + index_t botb, // bot in BWT' + int step); // step to get ready for + + // Following are set in searchAllSeeds then used by searchSeed() + // and other protected members. + const Ebwt* ebwtFw_; // forward index (BWT) + const Ebwt* ebwtBw_; // backward/mirror index (BWT') + const Scoring* sc_; // scoring scheme + const InstantiatedSeed* s_; // current instantiated seed + + const Read* read_; // read whose seeds are currently being aligned + + // The following are set just before a call to searchSeedBi() + const BTDnaString* seq_; // sequence of current seed + const BTString* qual_; // quality string for current seed + index_t off_; // offset of seed currently being searched + bool fw_; // orientation of seed currently being searched + + EList edits_; // temporary place to sort edits + AlignmentCacheIface *ca_; // local alignment cache for seed alignments + EList offIdx2off_; // offset idx to read offset map, set up instantiateSeeds() + uint64_t bwops_; // Burrows-Wheeler operations + uint64_t bwedits_; // Burrows-Wheeler edits + BTDnaString tmprfdnastr_; // used in reportHit + + ASSERT_ONLY(ESet hits_); // Ref hits so far for seed being aligned + BTDnaString tmpdnastr_; +}; + +#define INIT_LOCS(top, bot, tloc, bloc, e) { \ + if(bot - top == 1) { \ + tloc.initFromRow(top, (e).eh(), (e).ebwt()); \ + bloc.invalidate(); \ + } else { \ + SideLocus::initFromTopBot(top, bot, (e).eh(), (e).ebwt(), tloc, bloc); \ + assert(bloc.valid()); \ + } \ +} + +#define SANITY_CHECK_4TUP(t, b, tp, bp) { \ + ASSERT_ONLY(index_t tot = (b[0]-t[0])+(b[1]-t[1])+(b[2]-t[2])+(b[3]-t[3])); \ + ASSERT_ONLY(index_t totp = (bp[0]-tp[0])+(bp[1]-tp[1])+(bp[2]-tp[2])+(bp[3]-tp[3])); \ + assert_eq(tot, totp); \ +} + +/** + * Given a read and a few coordinates that describe a substring of the read (or + * its reverse complement), fill in 'seq' and 'qual' objects with the seed + * sequence and qualities. + * + * The seq field is filled with the sequence as it would align to the Watson + * reference strand. I.e. if fw is false, then the sequence that appears in + * 'seq' is the reverse complement of the raw read substring. + */ +template +void SeedAligner::instantiateSeq( + const Read& read, // input read + BTDnaString& seq, // output sequence + BTString& qual, // output qualities + int len, // seed length + int depth, // seed's 0-based offset from 5' end + bool fw) const // seed's orientation +{ + // Fill in 'seq' and 'qual' + int seedlen = len; + if((int)read.length() < seedlen) seedlen = (int)read.length(); + seq.resize(len); + qual.resize(len); + // If fw is false, we take characters starting at the 3' end of the + // reverse complement of the read. + for(int i = 0; i < len; i++) { + seq.set(read.patFw.windowGetDna(i, fw, read.color, depth, len), i); + qual.set(read.qual.windowGet(i, fw, depth, len), i); + } +} + +/** + * We assume that all seeds are the same length. + * + * For each seed, instantiate the seed, retracting if necessary. + */ +template +pair SeedAligner::instantiateSeeds( + const EList& seeds, // search seeds + index_t off, // offset into read to start extracting + int per, // interval between seeds + const Read& read, // read to align + const Scoring& pens, // scoring scheme + bool nofw, // don't align forward read + bool norc, // don't align revcomp read + AlignmentCacheIface& cache,// holds some seed hits from previous reads + SeedResults& sr, // holds all the seed hits + SeedSearchMetrics& met) // metrics +{ + assert(!seeds.empty()); + assert_gt(read.length(), 0); + // Check whether read has too many Ns + offIdx2off_.clear(); + int len = seeds[0].len; // assume they're all the same length +#ifndef NDEBUG + for(size_t i = 1; i < seeds.size(); i++) { + assert_eq(len, seeds[i].len); + } +#endif + // Calc # seeds within read interval + int nseeds = 1; + if((int)read.length() - (int)off > len) { + nseeds += ((int)read.length() - (int)off - len) / per; + } + for(int i = 0; i < nseeds; i++) { + offIdx2off_.push_back(per * i + (int)off); + } + pair ret; + ret.first = 0; // # seeds that require alignment + ret.second = 0; // # seeds that hit in cache with non-empty results + sr.reset(read, offIdx2off_, nseeds); + assert(sr.repOk(&cache.current(), true)); // require that SeedResult be initialized + // For each seed position + for(int fwi = 0; fwi < 2; fwi++) { + bool fw = (fwi == 0); + if((fw && nofw) || (!fw && norc)) { + // Skip this orientation b/c user specified --nofw or --norc + continue; + } + // For each seed position + for(int i = 0; i < nseeds; i++) { + int depth = i * per + (int)off; + int seedlen = seeds[0].len; + // Extract the seed sequence at this offset + // If fw == true, we extract the characters from i*per to + // i*(per-1) (exclusive). If fw == false, + instantiateSeq( + read, + sr.seqs(fw)[i], + sr.quals(fw)[i], + std::min((int)seedlen, (int)read.length()), + depth, + fw); + //QKey qk(sr.seqs(fw)[i] ASSERT_ONLY(, tmpdnastr_)); + // For each search strategy + EList& iss = sr.instantiatedSeeds(fw, i); + for(int j = 0; j < (int)seeds.size(); j++) { + iss.expand(); + assert_eq(seedlen, seeds[j].len); + InstantiatedSeed* is = &iss.back(); + if(seeds[j].instantiate( + read, + sr.seqs(fw)[i], + sr.quals(fw)[i], + pens, + depth, + i, + j, + fw, + *is)) + { + // Can we fill this seed hit in from the cache? + ret.first++; + } else { + // Seed may fail to instantiate if there are Ns + // that prevent it from matching + met.filteredseed++; + iss.pop_back(); + } + } + } + } + return ret; +} + +/** + * We assume that all seeds are the same length. + * + * For each seed: + * + * 1. Instantiate all seeds, retracting them if necessary. + * 2. Calculate zone boundaries for each seed + */ +template +void SeedAligner::searchAllSeeds( + const EList& seeds, // search seeds + const Ebwt* ebwtFw, // BWT index + const Ebwt* ebwtBw, // BWT' index + const Read& read, // read to align + const Scoring& pens, // scoring scheme + AlignmentCacheIface& cache, // local cache for seed alignments + SeedResults& sr, // holds all the seed hits + SeedSearchMetrics& met, // metrics + PerReadMetrics& prm) // per-read metrics +{ + assert(!seeds.empty()); + assert(ebwtFw != NULL); + assert(ebwtFw->isInMemory()); + assert(sr.repOk(&cache.current())); + ebwtFw_ = ebwtFw; + ebwtBw_ = ebwtBw; + sc_ = &pens; + read_ = &read; + ca_ = &cache; + bwops_ = bwedits_ = 0; + uint64_t possearches = 0, seedsearches = 0, intrahits = 0, interhits = 0, ooms = 0; + // For each instantiated seed + for(int i = 0; i < (int)sr.numOffs(); i++) { + size_t off = sr.idx2off(i); + for(int fwi = 0; fwi < 2; fwi++) { + bool fw = (fwi == 0); + assert(sr.repOk(&cache.current())); + EList& iss = sr.instantiatedSeeds(fw, i); + if(iss.empty()) { + // Cache hit in an across-read cache + continue; + } + QVal qv; + seq_ = &sr.seqs(fw)[i]; // seed sequence + qual_ = &sr.quals(fw)[i]; // seed qualities + off_ = off; // seed offset (from 5') + fw_ = fw; // seed orientation + // Tell the cache that we've started aligning, so the cache can + // expect a series of on-the-fly updates + int ret = cache.beginAlign(*seq_, *qual_, qv); + ASSERT_ONLY(hits_.clear()); + if(ret == -1) { + // Out of memory when we tried to add key to map + ooms++; + continue; + } + bool abort = false; + if(ret == 0) { + // Not already in cache + assert(cache.aligning()); + possearches++; + for(size_t j = 0; j < iss.size(); j++) { + // Set seq_ and qual_ appropriately, using the seed sequences + // and qualities already installed in SeedResults + assert_eq(fw, iss[j].fw); + assert_eq(i, (int)iss[j].seedoffidx); + s_ = &iss[j]; + // Do the search with respect to seq_, qual_ and s_. + if(!searchSeedBi()) { + // Memory exhausted during search + ooms++; + abort = true; + break; + } + seedsearches++; + assert(cache.aligning()); + } + if(!abort) { + qv = cache.finishAlign(); + } + } else { + // Already in cache + assert_eq(1, ret); + assert(qv.valid()); + intrahits++; + } + assert(abort || !cache.aligning()); + if(qv.valid()) { + sr.add( + qv, // range of ranges in cache + cache.current(), // cache + i, // seed index (from 5' end) + fw); // whether seed is from forward read + } + } + } + prm.nSeedRanges = sr.numRanges(); + prm.nSeedElts = sr.numElts(); + prm.nSeedRangesFw = sr.numRangesFw(); + prm.nSeedRangesRc = sr.numRangesRc(); + prm.nSeedEltsFw = sr.numEltsFw(); + prm.nSeedEltsRc = sr.numEltsRc(); + prm.seedMedian = (uint64_t)(sr.medianHitsPerSeed() + 0.5); + prm.seedMean = (uint64_t)sr.averageHitsPerSeed(); + + prm.nSdFmops += bwops_; + met.seedsearch += seedsearches; + met.possearch += possearches; + met.intrahit += intrahits; + met.interhit += interhits; + met.ooms += ooms; + met.bwops += bwops_; + met.bweds += bwedits_; +} + +template +bool SeedAligner::sanityPartial( + const Ebwt* ebwtFw, // BWT index + const Ebwt* ebwtBw, // BWT' index + const BTDnaString& seq, + index_t dep, + index_t len, + bool do1mm, + index_t topfw, + index_t botfw, + index_t topbw, + index_t botbw) +{ + tmpdnastr_.clear(); + for(size_t i = dep; i < len; i++) { + tmpdnastr_.append(seq[i]); + } + index_t top_fw = 0, bot_fw = 0; + ebwtFw->contains(tmpdnastr_, &top_fw, &bot_fw); + assert_eq(top_fw, topfw); + assert_eq(bot_fw, botfw); + if(do1mm && ebwtBw != NULL) { + tmpdnastr_.reverse(); + index_t top_bw = 0, bot_bw = 0; + ebwtBw->contains(tmpdnastr_, &top_bw, &bot_bw); + assert_eq(top_bw, topbw); + assert_eq(bot_bw, botbw); + } + return true; +} + +/** + * Sweep right-to-left and left-to-right using exact matching. Remember all + * the SA ranges encountered along the way. Report exact matches if there are + * any. Calculate a lower bound on the number of edits in an end-to-end + * alignment. + */ +template +size_t SeedAligner::exactSweep( + const Ebwt& ebwt, // BWT index + const Read& read, // read to align + const Scoring& sc, // scoring scheme + bool nofw, // don't align forward read + bool norc, // don't align revcomp read + size_t mineMax, // don't care about edit bounds > this + size_t& mineFw, // minimum # edits for forward read + size_t& mineRc, // minimum # edits for revcomp read + bool repex, // report 0mm hits? + SeedResults& hits, // holds all the seed hits (and exact hit) + SeedSearchMetrics& met) // metrics +{ + assert_gt(mineMax, 0); + index_t top = 0, bot = 0; + SideLocus tloc, bloc; + const size_t len = read.length(); + size_t nelt = 0; + for(int fwi = 0; fwi < 2; fwi++) { + bool fw = (fwi == 0); + if( fw && nofw) continue; + if(!fw && norc) continue; + const BTDnaString& seq = fw ? read.patFw : read.patRc; + assert(!seq.empty()); + int ftabLen = ebwt.eh().ftabChars(); + size_t dep = 0; + size_t nedit = 0; + bool done = false; + while(dep < len && !done) { + top = bot = 0; + size_t left = len - dep; + assert_gt(left, 0); + bool doFtab = ftabLen > 1 && left >= (size_t)ftabLen; + if(doFtab) { + // Does N interfere with use of Ftab? + for(size_t i = 0; i < (size_t)ftabLen; i++) { + int c = seq[len-dep-1-i]; + if(c > 3) { + doFtab = false; + break; + } + } + } + if(doFtab) { + // Use ftab + ebwt.ftabLoHi(seq, len - dep - ftabLen, false, top, bot); + dep += (size_t)ftabLen; + } else { + // Use fchr + int c = seq[len-dep-1]; + if(c < 4) { + top = ebwt.fchr()[c]; + bot = ebwt.fchr()[c+1]; + } + dep++; + } + if(bot <= top) { + nedit++; + if(nedit >= mineMax) { + if(fw) { mineFw = nedit; } else { mineRc = nedit; } + break; + } + continue; + } + INIT_LOCS(top, bot, tloc, bloc, ebwt); + // Keep going + while(dep < len) { + int c = seq[len-dep-1]; + if(c > 3) { + top = bot = 0; + } else { + if(bloc.valid()) { + bwops_ += 2; + top = ebwt.mapLF(tloc, c); + bot = ebwt.mapLF(bloc, c); + } else { + bwops_++; + top = ebwt.mapLF1(top, tloc, c); + if(top == (index_t)OFF_MASK) { + top = bot = 0; + } else { + bot = top+1; + } + } + } + if(bot <= top) { + nedit++; + if(nedit >= mineMax) { + if(fw) { mineFw = nedit; } else { mineRc = nedit; } + done = true; + } + break; + } + INIT_LOCS(top, bot, tloc, bloc, ebwt); + dep++; + } + if(done) { + break; + } + if(dep == len) { + // Set the minimum # edits + if(fw) { mineFw = nedit; } else { mineRc = nedit; } + // Done + if(nedit == 0 && bot > top) { + if(repex) { + // This is an exact hit + int64_t score = len * sc.match(); + if(fw) { + hits.addExactEeFw(top, bot, NULL, NULL, fw, score); + assert(ebwt.contains(seq, NULL, NULL)); + } else { + hits.addExactEeRc(top, bot, NULL, NULL, fw, score); + assert(ebwt.contains(seq, NULL, NULL)); + } + } + nelt += (bot - top); + } + break; + } + dep++; + } + } + return nelt; +} + +/** + * Search for end-to-end exact hit for read. Return true iff one is found. + */ +template +bool SeedAligner::oneMmSearch( + const Ebwt* ebwtFw, // BWT index + const Ebwt* ebwtBw, // BWT' index + const Read& read, // read to align + const Scoring& sc, // scoring + int64_t minsc, // minimum score + bool nofw, // don't align forward read + bool norc, // don't align revcomp read + bool local, // 1mm hits must be legal local alignments + bool repex, // report 0mm hits? + bool rep1mm, // report 1mm hits? + SeedResults& hits, // holds all the seed hits (and exact hit) + SeedSearchMetrics& met) // metrics +{ + assert(!rep1mm || ebwtBw != NULL); + const size_t len = read.length(); + int nceil = sc.nCeil.f((double)len); + size_t ns = read.ns(); + if(ns > 1) { + // Can't align this with <= 1 mismatches + return false; + } else if(ns == 1 && !rep1mm) { + // Can't align this with 0 mismatches + return false; + } + assert_geq(len, 2); + assert(!rep1mm || ebwtBw->eh().ftabChars() == ebwtFw->eh().ftabChars()); +#ifndef NDEBUG + if(ebwtBw != NULL) { + for(int i = 0; i < 4; i++) { + assert_eq(ebwtBw->fchr()[i], ebwtFw->fchr()[i]); + } + } +#endif + size_t halfFw = len >> 1; + size_t halfBw = len >> 1; + if((len & 1) != 0) { + halfBw++; + } + assert_geq(halfFw, 1); + assert_geq(halfBw, 1); + SideLocus tloc, bloc; + index_t t[4], b[4]; // dest BW ranges for BWT + t[0] = t[1] = t[2] = t[3] = 0; + b[0] = b[1] = b[2] = b[3] = 0; + index_t tp[4], bp[4]; // dest BW ranges for BWT' + tp[0] = tp[1] = tp[2] = tp[3] = 0; + bp[0] = bp[1] = bp[2] = bp[3] = 0; + index_t top = 0, bot = 0, topp = 0, botp = 0; + // Align fw read / rc read + bool results = false; + for(int fwi = 0; fwi < 2; fwi++) { + bool fw = (fwi == 0); + if( fw && nofw) continue; + if(!fw && norc) continue; + // Align going right-to-left, left-to-right + int lim = rep1mm ? 2 : 1; + for(int ebwtfwi = 0; ebwtfwi < lim; ebwtfwi++) { + bool ebwtfw = (ebwtfwi == 0); + const Ebwt* ebwt = (ebwtfw ? ebwtFw : ebwtBw); + const Ebwt* ebwtp = (ebwtfw ? ebwtBw : ebwtFw); + assert(rep1mm || ebwt->fw()); + const BTDnaString& seq = + (fw ? (ebwtfw ? read.patFw : read.patFwRev) : + (ebwtfw ? read.patRc : read.patRcRev)); + assert(!seq.empty()); + const BTString& qual = + (fw ? (ebwtfw ? read.qual : read.qualRev) : + (ebwtfw ? read.qualRev : read.qual)); + int ftabLen = ebwt->eh().ftabChars(); + size_t nea = ebwtfw ? halfFw : halfBw; + // Check if there's an N in the near portion + bool skip = false; + for(size_t dep = 0; dep < nea; dep++) { + if(seq[len-dep-1] > 3) { + skip = true; + break; + } + } + if(skip) { + continue; + } + size_t dep = 0; + // Align near half + if(ftabLen > 1 && (size_t)ftabLen <= nea) { + // Use ftab to jump partway into near half + bool rev = !ebwtfw; + ebwt->ftabLoHi(seq, len - ftabLen, rev, top, bot); + if(rep1mm) { + ebwtp->ftabLoHi(seq, len - ftabLen, rev, topp, botp); + assert_eq(bot - top, botp - topp); + } + if(bot - top == 0) { + continue; + } + int c = seq[len - ftabLen]; + t[c] = top; b[c] = bot; + tp[c] = topp; bp[c] = botp; + dep = ftabLen; + // initialize tloc, bloc?? + } else { + // Use fchr to jump in by 1 pos + int c = seq[len-1]; + assert_range(0, 3, c); + top = topp = tp[c] = ebwt->fchr()[c]; + bot = botp = bp[c] = ebwt->fchr()[c+1]; + if(bot - top == 0) { + continue; + } + dep = 1; + // initialize tloc, bloc?? + } + INIT_LOCS(top, bot, tloc, bloc, *ebwt); + assert(sanityPartial(ebwt, ebwtp, seq, len-dep, len, rep1mm, top, bot, topp, botp)); + bool do_continue = false; + for(; dep < nea; dep++) { + assert_lt(dep, len); + int rdc = seq[len - dep - 1]; + tp[0] = tp[1] = tp[2] = tp[3] = topp; + bp[0] = bp[1] = bp[2] = bp[3] = botp; + if(bloc.valid()) { + bwops_++; + t[0] = t[1] = t[2] = t[3] = b[0] = b[1] = b[2] = b[3] = 0; + ebwt->mapBiLFEx(tloc, bloc, t, b, tp, bp); + SANITY_CHECK_4TUP(t, b, tp, bp); + top = t[rdc]; bot = b[rdc]; + if(bot <= top) { + do_continue = true; + break; + } + topp = tp[rdc]; botp = bp[rdc]; + assert(!rep1mm || bot - top == botp - topp); + } else { + assert_eq(bot, top+1); + assert(!rep1mm || botp == topp+1); + bwops_++; + top = ebwt->mapLF1(top, tloc, rdc); + if(top == (index_t)OFF_MASK) { + do_continue = true; + break; + } + bot = top + 1; + t[rdc] = top; b[rdc] = bot; + tp[rdc] = topp; bp[rdc] = botp; + assert(!rep1mm || b[rdc] - t[rdc] == bp[rdc] - tp[rdc]); + // topp/botp stay the same + } + INIT_LOCS(top, bot, tloc, bloc, *ebwt); + assert(sanityPartial(ebwt, ebwtp, seq, len - dep - 1, len, rep1mm, top, bot, topp, botp)); + } + if(do_continue) { + continue; + } + // Align far half + for(; dep < len; dep++) { + int rdc = seq[len-dep-1]; + int quc = qual[len-dep-1]; + if(rdc > 3 && nceil == 0) { + break; + } + tp[0] = tp[1] = tp[2] = tp[3] = topp; + bp[0] = bp[1] = bp[2] = bp[3] = botp; + int clo = 0, chi = 3; + bool match = true; + if(bloc.valid()) { + bwops_++; + t[0] = t[1] = t[2] = t[3] = b[0] = b[1] = b[2] = b[3] = 0; + ebwt->mapBiLFEx(tloc, bloc, t, b, tp, bp); + SANITY_CHECK_4TUP(t, b, tp, bp); + match = rdc < 4; + top = t[rdc]; bot = b[rdc]; + topp = tp[rdc]; botp = bp[rdc]; + } else { + assert_eq(bot, top+1); + assert(!rep1mm || botp == topp+1); + bwops_++; + clo = ebwt->mapLF1(top, tloc); + match = (clo == rdc); + assert_range(-1, 3, clo); + if(clo < 0) { + break; // Hit the $ + } else { + t[clo] = top; + b[clo] = bot = top + 1; + } + bp[clo] = botp; + tp[clo] = topp; + assert(!rep1mm || bot - top == botp - topp); + assert(!rep1mm || b[clo] - t[clo] == bp[clo] - tp[clo]); + chi = clo; + } + //assert(sanityPartial(ebwt, ebwtp, seq, len - dep - 1, len, rep1mm, top, bot, topp, botp)); + if(rep1mm && (ns == 0 || rdc > 3)) { + for(int j = clo; j <= chi; j++) { + if(j == rdc || b[j] == t[j]) { + // Either matches read or isn't a possibility + continue; + } + // Potential mismatch - next, try + size_t depm = dep + 1; + index_t topm = t[j], botm = b[j]; + index_t topmp = tp[j], botmp = bp[j]; + assert_eq(botm - topm, botmp - topmp); + index_t tm[4], bm[4]; // dest BW ranges for BWT + tm[0] = t[0]; tm[1] = t[1]; + tm[2] = t[2]; tm[3] = t[3]; + bm[0] = b[0]; bm[1] = t[1]; + bm[2] = b[2]; bm[3] = t[3]; + index_t tmp[4], bmp[4]; // dest BW ranges for BWT' + tmp[0] = tp[0]; tmp[1] = tp[1]; + tmp[2] = tp[2]; tmp[3] = tp[3]; + bmp[0] = bp[0]; bmp[1] = tp[1]; + bmp[2] = bp[2]; bmp[3] = tp[3]; + SideLocus tlocm, blocm; + INIT_LOCS(topm, botm, tlocm, blocm, *ebwt); + for(; depm < len; depm++) { + int rdcm = seq[len - depm - 1]; + tmp[0] = tmp[1] = tmp[2] = tmp[3] = topmp; + bmp[0] = bmp[1] = bmp[2] = bmp[3] = botmp; + if(blocm.valid()) { + bwops_++; + tm[0] = tm[1] = tm[2] = tm[3] = + bm[0] = bm[1] = bm[2] = bm[3] = 0; + ebwt->mapBiLFEx(tlocm, blocm, tm, bm, tmp, bmp); + SANITY_CHECK_4TUP(tm, bm, tmp, bmp); + topm = tm[rdcm]; botm = bm[rdcm]; + topmp = tmp[rdcm]; botmp = bmp[rdcm]; + if(botm <= topm) { + break; + } + } else { + assert_eq(botm, topm+1); + assert_eq(botmp, topmp+1); + bwops_++; + topm = ebwt->mapLF1(topm, tlocm, rdcm); + if(topm == (index_t)0xffffffff) { + break; + } + botm = topm + 1; + // topp/botp stay the same + } + INIT_LOCS(topm, botm, tlocm, blocm, *ebwt); + } + if(depm == len) { + // Success; this is a 1MM hit + size_t off5p = dep; // offset from 5' end of read + size_t offstr = dep; // offset into patFw/patRc + if(fw == ebwtfw) { + off5p = len - off5p - 1; + } + if(!ebwtfw) { + offstr = len - offstr - 1; + } + Edit e((uint32_t)off5p, j, rdc, EDIT_TYPE_MM, false); + results = true; + int64_t score = (len - 1) * sc.match(); + // In --local mode, need to double-check that + // end-to-end alignment doesn't violate local + // alignment principles. Specifically, it + // shouldn't to or below 0 anywhere in the middle. + int pen = sc.score(rdc, (int)(1 << j), quc - 33); + score += pen; + bool valid = true; + if(local) { + int64_t locscore_fw = 0, locscore_bw = 0; + for(size_t i = 0; i < len; i++) { + if(i == dep) { + if(locscore_fw + pen <= 0) { + valid = false; + break; + } + locscore_fw += pen; + } else { + locscore_fw += sc.match(); + } + if(len-i-1 == dep) { + if(locscore_bw + pen <= 0) { + valid = false; + break; + } + locscore_bw += pen; + } else { + locscore_bw += sc.match(); + } + } + } + if(valid) { + valid = score >= minsc; + } + if(valid) { +#ifndef NDEBUG + BTDnaString& rf = tmprfdnastr_; + rf.clear(); + edits_.clear(); + edits_.push_back(e); + if(!fw) Edit::invertPoss(edits_, len, false); + Edit::toRef(fw ? read.patFw : read.patRc, edits_, rf); + if(!fw) Edit::invertPoss(edits_, len, false); + assert_eq(len, rf.length()); + for(size_t i = 0; i < len; i++) { + assert_lt((int)rf[i], 4); + } + ASSERT_ONLY(index_t toptmp = 0); + ASSERT_ONLY(index_t bottmp = 0); + assert(ebwtFw->contains(rf, &toptmp, &bottmp)); +#endif + index_t toprep = ebwtfw ? topm : topmp; + index_t botrep = ebwtfw ? botm : botmp; + assert_eq(toprep, toptmp); + assert_eq(botrep, bottmp); + hits.add1mmEe(toprep, botrep, &e, NULL, fw, score); + } + } + } + } + if(bot > top && match) { + assert_lt(rdc, 4); + if(dep == len-1) { + // Success; this is an exact hit + if(ebwtfw && repex) { + if(fw) { + results = true; + int64_t score = len * sc.match(); + hits.addExactEeFw( + ebwtfw ? top : topp, + ebwtfw ? bot : botp, + NULL, NULL, fw, score); + assert(ebwtFw->contains(seq, NULL, NULL)); + } else { + results = true; + int64_t score = len * sc.match(); + hits.addExactEeRc( + ebwtfw ? top : topp, + ebwtfw ? bot : botp, + NULL, NULL, fw, score); + assert(ebwtFw->contains(seq, NULL, NULL)); + } + } + break; // End of far loop + } else { + INIT_LOCS(top, bot, tloc, bloc, *ebwt); + assert(sanityPartial(ebwt, ebwtp, seq, len - dep - 1, len, rep1mm, top, bot, topp, botp)); + } + } else { + break; // End of far loop + } + } // for(; dep < len; dep++) + } // for(int ebwtfw = 0; ebwtfw < 2; ebwtfw++) + } // for(int fw = 0; fw < 2; fw++) + return results; +} + +/** + * Wrapper for initial invcation of searchSeed. + */ +template +bool SeedAligner::searchSeedBi() { + return searchSeedBi( + 0, 0, + 0, 0, 0, 0, + SideLocus(), SideLocus(), + s_->cons[0], s_->cons[1], s_->cons[2], s_->overall, + NULL); +} + +/** + * Get tloc, bloc ready for the next step. If the new range is under + * the ceiling. + */ +template +inline void SeedAligner::nextLocsBi( + SideLocus& tloc, // top locus + SideLocus& bloc, // bot locus + index_t topf, // top in BWT + index_t botf, // bot in BWT + index_t topb, // top in BWT' + index_t botb, // bot in BWT' + int step // step to get ready for +#if 0 + , const SABWOffTrack* prevOt, // previous tracker + SABWOffTrack& ot // current tracker +#endif + ) +{ + assert_gt(botf, 0); + assert(ebwtBw_ == NULL || botb > 0); + assert_geq(step, 0); // next step can't be first one + assert(ebwtBw_ == NULL || botf-topf == botb-topb); + if(step == (int)s_->steps.size()) return; // no more steps! + // Which direction are we going in next? + if(s_->steps[step] > 0) { + // Left to right; use BWT' + if(botb - topb == 1) { + // Already down to 1 row; just init top locus + tloc.initFromRow(topb, ebwtBw_->eh(), ebwtBw_->ebwt()); + bloc.invalidate(); + } else { + SideLocus::initFromTopBot( + topb, botb, ebwtBw_->eh(), ebwtBw_->ebwt(), tloc, bloc); + assert(bloc.valid()); + } + } else { + // Right to left; use BWT + if(botf - topf == 1) { + // Already down to 1 row; just init top locus + tloc.initFromRow(topf, ebwtFw_->eh(), ebwtFw_->ebwt()); + bloc.invalidate(); + } else { + SideLocus::initFromTopBot( + topf, botf, ebwtFw_->eh(), ebwtFw_->ebwt(), tloc, bloc); + assert(bloc.valid()); + } + } + // Check if we should update the tracker with this refinement +#if 0 + if(botf-topf <= BW_OFF_TRACK_CEIL) { + if(ot.size() == 0 && prevOt != NULL && prevOt->size() > 0) { + // Inherit state from the predecessor + ot = *prevOt; + } + bool ltr = s_->steps[step-1] > 0; + int adj = abs(s_->steps[step-1])-1; + const Ebwt* ebwt = ltr ? ebwtBw_ : ebwtFw_; + ot.update( + ltr ? topb : topf, // top + ltr ? botb : botf, // bot + adj, // adj (to be subtracted from offset) + ebwt->offs(), // offs array + ebwt->eh().offRate(), // offrate (sample = every 1 << offrate elts) + NULL // dead + ); + assert_gt(ot.size(), 0); + } +#endif + assert(botf - topf == 1 || bloc.valid()); + assert(botf - topf > 1 || !bloc.valid()); +} + +/** + * Report a seed hit found by searchSeedBi(), but first try to extend it out in + * either direction as far as possible without hitting any edits. This will + * allow us to prioritize the seed hits better later on. Call reportHit() when + * we're done, which actually adds the hit to the cache. Returns result from + * calling reportHit(). + */ +template +bool SeedAligner::extendAndReportHit( + index_t topf, // top in BWT + index_t botf, // bot in BWT + index_t topb, // top in BWT' + index_t botb, // bot in BWT' + index_t len, // length of hit + DoublyLinkedList *prevEdit) // previous edit +{ + index_t nlex = 0, nrex = 0; + index_t t[4], b[4]; + index_t tp[4], bp[4]; + SideLocus tloc, bloc; + if(off_ > 0) { + const Ebwt *ebwt = ebwtFw_; + assert(ebwt != NULL); + // Extend left using forward index + const BTDnaString& seq = fw_ ? read_->patFw : read_->patRc; + // See what we get by extending + index_t top = topf, bot = botf; + t[0] = t[1] = t[2] = t[3] = 0; + b[0] = b[1] = b[2] = b[3] = 0; + tp[0] = tp[1] = tp[2] = tp[3] = topb; + bp[0] = bp[1] = bp[2] = bp[3] = botb; + SideLocus tloc, bloc; + INIT_LOCS(top, bot, tloc, bloc, *ebwt); + for(size_t ii = off_; ii > 0; ii--) { + size_t i = ii-1; + // Get char from read + int rdc = seq.get(i); + // See what we get by extending + if(bloc.valid()) { + bwops_++; + t[0] = t[1] = t[2] = t[3] = + b[0] = b[1] = b[2] = b[3] = 0; + ebwt->mapBiLFEx(tloc, bloc, t, b, tp, bp); + SANITY_CHECK_4TUP(t, b, tp, bp); + int nonz = -1; + bool abort = false; + for(int j = 0; j < 4; j++) { + if(b[i] > t[i]) { + if(nonz >= 0) { + abort = true; + break; + } + nonz = j; + top = t[i]; bot = b[i]; + } + } + if(abort || nonz != rdc) { + break; + } + } else { + assert_eq(bot, top+1); + bwops_++; + int c = ebwt->mapLF1(top, tloc); + if(c != rdc) { + break; + } + bot = top + 1; + } + if(++nlex == 255) { + break; + } + INIT_LOCS(top, bot, tloc, bloc, *ebwt); + } + } + size_t rdlen = read_->length(); + size_t nright = rdlen - off_ - len; + if(nright > 0 && ebwtBw_ != NULL) { + const Ebwt *ebwt = ebwtBw_; + assert(ebwt != NULL); + // Extend right using backward index + const BTDnaString& seq = fw_ ? read_->patFw : read_->patRc; + // See what we get by extending + index_t top = topb, bot = botb; + t[0] = t[1] = t[2] = t[3] = 0; + b[0] = b[1] = b[2] = b[3] = 0; + tp[0] = tp[1] = tp[2] = tp[3] = topb; + bp[0] = bp[1] = bp[2] = bp[3] = botb; + INIT_LOCS(top, bot, tloc, bloc, *ebwt); + for(size_t i = off_ + len; i < rdlen; i++) { + // Get char from read + int rdc = seq.get(i); + // See what we get by extending + if(bloc.valid()) { + bwops_++; + t[0] = t[1] = t[2] = t[3] = + b[0] = b[1] = b[2] = b[3] = 0; + ebwt->mapBiLFEx(tloc, bloc, t, b, tp, bp); + SANITY_CHECK_4TUP(t, b, tp, bp); + int nonz = -1; + bool abort = false; + for(int j = 0; j < 4; j++) { + if(b[i] > t[i]) { + if(nonz >= 0) { + abort = true; + break; + } + nonz = j; + top = t[i]; bot = b[i]; + } + } + if(abort || nonz != rdc) { + break; + } + } else { + assert_eq(bot, top+1); + bwops_++; + int c = ebwt->mapLF1(top, tloc); + if(c != rdc) { + break; + } + bot = top + 1; + } + if(++nrex == 255) { + break; + } + INIT_LOCS(top, bot, tloc, bloc, *ebwt); + } + } + assert_lt(nlex, rdlen); + assert_leq(nlex, off_); + assert_lt(nrex, rdlen); + return reportHit(topf, botf, topb, botb, len, prevEdit); +} + +/** + * Report a seed hit found by searchSeedBi() by adding it to the cache. Return + * false if the hit could not be reported because of, e.g., cache exhaustion. + */ +template +bool SeedAligner::reportHit( + index_t topf, // top in BWT + index_t botf, // bot in BWT + index_t topb, // top in BWT' + index_t botb, // bot in BWT' + index_t len, // length of hit + DoublyLinkedList *prevEdit) // previous edit +{ + // Add information about the seed hit to AlignmentCache. This + // information eventually makes its way back to the SeedResults + // object when we call finishAlign(...). + BTDnaString& rf = tmprfdnastr_; + rf.clear(); + edits_.clear(); + if(prevEdit != NULL) { + prevEdit->toList(edits_); + Edit::sort(edits_); + assert(Edit::repOk(edits_, *seq_)); + Edit::toRef(*seq_, edits_, rf); + } else { + rf = *seq_; + } + // Sanity check: shouldn't add the same hit twice. If this + // happens, it may be because our zone Constraints are not set up + // properly and erroneously return true from acceptable() when they + // should return false in some cases. + assert_eq(hits_.size(), ca_->curNumRanges()); + assert(hits_.insert(rf)); + if(!ca_->addOnTheFly(rf, topf, botf, topb, botb)) { + return false; + } + assert_eq(hits_.size(), ca_->curNumRanges()); +#ifndef NDEBUG + // Sanity check that the topf/botf and topb/botb ranges really + // correspond to the reference sequence aligned to + { + BTDnaString rfr; + index_t tpf, btf, tpb, btb; + tpf = btf = tpb = btb = 0; + assert(ebwtFw_->contains(rf, &tpf, &btf)); + if(ebwtBw_ != NULL) { + rfr = rf; + rfr.reverse(); + assert(ebwtBw_->contains(rfr, &tpb, &btb)); + assert_eq(tpf, topf); + assert_eq(btf, botf); + assert_eq(tpb, topb); + assert_eq(btb, botb); + } + } +#endif + return true; +} + +/** + * Given a seed, search. Assumes zone 0 = no backtracking. + * + * Return a list of Seed hits. + * 1. Edits + * 2. Bidirectional BWT range(s) on either end + */ +template +bool SeedAligner::searchSeedBi( + int step, // depth into steps_[] array + int depth, // recursion depth + index_t topf, // top in BWT + index_t botf, // bot in BWT + index_t topb, // top in BWT' + index_t botb, // bot in BWT' + SideLocus tloc, // locus for top (perhaps unititialized) + SideLocus bloc, // locus for bot (perhaps unititialized) + Constraint c0, // constraints to enforce in seed zone 0 + Constraint c1, // constraints to enforce in seed zone 1 + Constraint c2, // constraints to enforce in seed zone 2 + Constraint overall, // overall constraints to enforce + DoublyLinkedList *prevEdit // previous edit +#if 0 + , const SABWOffTrack* prevOt // prev off tracker (if tracking started) +#endif + ) +{ + assert(s_ != NULL); + const InstantiatedSeed& s = *s_; + assert_gt(s.steps.size(), 0); + assert(ebwtBw_ == NULL || ebwtBw_->eh().ftabChars() == ebwtFw_->eh().ftabChars()); +#ifndef NDEBUG + for(int i = 0; i < 4; i++) { + assert(ebwtBw_ == NULL || ebwtBw_->fchr()[i] == ebwtFw_->fchr()[i]); + } +#endif + if(step == (int)s.steps.size()) { + // Finished aligning seed + assert(c0.acceptable()); + assert(c1.acceptable()); + assert(c2.acceptable()); + if(!reportHit(topf, botf, topb, botb, seq_->length(), prevEdit)) { + return false; // Memory exhausted + } + return true; + } +#ifndef NDEBUG + if(depth > 0) { + assert(botf - topf == 1 || bloc.valid()); + assert(botf - topf > 1 || !bloc.valid()); + } +#endif + int off; + index_t tp[4], bp[4]; // dest BW ranges for "prime" index + if(step == 0) { + // Just starting + assert(prevEdit == NULL); + assert(!tloc.valid()); + assert(!bloc.valid()); + off = s.steps[0]; + bool ltr = off > 0; + off = abs(off)-1; + // Check whether/how far we can jump using ftab or fchr + int ftabLen = ebwtFw_->eh().ftabChars(); + if(ftabLen > 1 && ftabLen <= s.maxjump) { + if(!ltr) { + assert_geq(off+1, ftabLen-1); + off = off - ftabLen + 1; + } + ebwtFw_->ftabLoHi(*seq_, off, false, topf, botf); +#ifdef NDEBUG + if(botf - topf == 0) return true; +#endif +#ifdef NDEBUG + if(ebwtBw_ != NULL) { + topb = ebwtBw_->ftabHi(*seq_, off); + botb = topb + (botf-topf); + } +#else + if(ebwtBw_ != NULL) { + ebwtBw_->ftabLoHi(*seq_, off, false, topb, botb); + assert_eq(botf-topf, botb-topb); + } + if(botf - topf == 0) return true; +#endif + step += ftabLen; + } else if(s.maxjump > 0) { + // Use fchr + int c = (*seq_)[off]; + assert_range(0, 3, c); + topf = topb = ebwtFw_->fchr()[c]; + botf = botb = ebwtFw_->fchr()[c+1]; + if(botf - topf == 0) return true; + step++; + } else { + assert_eq(0, s.maxjump); + topf = topb = 0; + botf = botb = ebwtFw_->fchr()[4]; + } + if(step == (int)s.steps.size()) { + // Finished aligning seed + assert(c0.acceptable()); + assert(c1.acceptable()); + assert(c2.acceptable()); + if(!reportHit(topf, botf, topb, botb, seq_->length(), prevEdit)) { + return false; // Memory exhausted + } + return true; + } + nextLocsBi(tloc, bloc, topf, botf, topb, botb, step); + assert(tloc.valid()); + } else assert(prevEdit != NULL); + assert(tloc.valid()); + assert(botf - topf == 1 || bloc.valid()); + assert(botf - topf > 1 || !bloc.valid()); + assert_geq(step, 0); + index_t t[4], b[4]; // dest BW ranges + Constraint* zones[3] = { &c0, &c1, &c2 }; + ASSERT_ONLY(index_t lasttot = botf - topf); + for(int i = step; i < (int)s.steps.size(); i++) { + assert_gt(botf, topf); + assert(botf - topf == 1 || bloc.valid()); + assert(botf - topf > 1 || !bloc.valid()); + assert(ebwtBw_ == NULL || botf-topf == botb-topb); + assert(tloc.valid()); + off = s.steps[i]; + bool ltr = off > 0; + const Ebwt* ebwt = ltr ? ebwtBw_ : ebwtFw_; + assert(ebwt != NULL); + if(ltr) { + tp[0] = tp[1] = tp[2] = tp[3] = topf; + bp[0] = bp[1] = bp[2] = bp[3] = botf; + } else { + tp[0] = tp[1] = tp[2] = tp[3] = topb; + bp[0] = bp[1] = bp[2] = bp[3] = botb; + } + t[0] = t[1] = t[2] = t[3] = b[0] = b[1] = b[2] = b[3] = 0; + if(bloc.valid()) { + // Range delimited by tloc/bloc has size >1. If size == 1, + // we use a simpler query (see if(!bloc.valid()) blocks below) + bwops_++; + ebwt->mapBiLFEx(tloc, bloc, t, b, tp, bp); + ASSERT_ONLY(index_t tot = (b[0]-t[0])+(b[1]-t[1])+(b[2]-t[2])+(b[3]-t[3])); + ASSERT_ONLY(index_t totp = (bp[0]-tp[0])+(bp[1]-tp[1])+(bp[2]-tp[2])+(bp[3]-tp[3])); + assert_eq(tot, totp); + assert_leq(tot, lasttot); + ASSERT_ONLY(lasttot = tot); + } + index_t *tf = ltr ? tp : t, *tb = ltr ? t : tp; + index_t *bf = ltr ? bp : b, *bb = ltr ? b : bp; + off = abs(off)-1; + // + bool leaveZone = s.zones[i].first < 0; + //bool leaveZoneIns = zones_[i].second < 0; + Constraint& cons = *zones[abs(s.zones[i].first)]; + Constraint& insCons = *zones[abs(s.zones[i].second)]; + int c = (*seq_)[off]; assert_range(0, 4, c); + int q = (*qual_)[off]; + // Is it legal for us to advance on characters other than 'c'? + if(!(cons.mustMatch() && !overall.mustMatch()) || c == 4) { + // There may be legal edits + bool bail = false; + if(!bloc.valid()) { + // Range delimited by tloc/bloc has size 1 + index_t ntop = ltr ? topb : topf; + bwops_++; + int cc = ebwt->mapLF1(ntop, tloc); + assert_range(-1, 3, cc); + if(cc < 0) bail = true; + else { t[cc] = ntop; b[cc] = ntop+1; } + } + if(!bail) { + if((cons.canMismatch(q, *sc_) && overall.canMismatch(q, *sc_)) || c == 4) { + Constraint oldCons = cons, oldOvCons = overall; + SideLocus oldTloc = tloc, oldBloc = bloc; + if(c != 4) { + cons.chargeMismatch(q, *sc_); + overall.chargeMismatch(q, *sc_); + } + // Can leave the zone as-is + if(!leaveZone || (cons.acceptable() && overall.acceptable())) { + for(int j = 0; j < 4; j++) { + if(j == c || b[j] == t[j]) continue; + // Potential mismatch + nextLocsBi(tloc, bloc, tf[j], bf[j], tb[j], bb[j], i+1); + int loff = off; + if(!ltr) loff = (int)(s.steps.size() - loff - 1); + assert(prevEdit == NULL || prevEdit->next == NULL); + Edit edit(off, j, c, EDIT_TYPE_MM, false); + DoublyLinkedList editl; + editl.payload = edit; + if(prevEdit != NULL) { + prevEdit->next = &editl; + editl.prev = prevEdit; + } + assert(editl.next == NULL); + bwedits_++; + if(!searchSeedBi( + i+1, // depth into steps_[] array + depth+1, // recursion depth + tf[j], // top in BWT + bf[j], // bot in BWT + tb[j], // top in BWT' + bb[j], // bot in BWT' + tloc, // locus for top (perhaps unititialized) + bloc, // locus for bot (perhaps unititialized) + c0, // constraints to enforce in seed zone 0 + c1, // constraints to enforce in seed zone 1 + c2, // constraints to enforce in seed zone 2 + overall, // overall constraints to enforce + &editl)) // latest edit + { + return false; + } + if(prevEdit != NULL) prevEdit->next = NULL; + } + } else { + // Not enough edits to make this path + // non-redundant with other seeds + } + cons = oldCons; + overall = oldOvCons; + tloc = oldTloc; + bloc = oldBloc; + } + if(cons.canGap() && overall.canGap()) { + throw 1; // TODO + int delEx = 0; + if(cons.canDelete(delEx, *sc_) && overall.canDelete(delEx, *sc_)) { + // Try delete + } + int insEx = 0; + if(insCons.canInsert(insEx, *sc_) && overall.canInsert(insEx, *sc_)) { + // Try insert + } + } + } // if(!bail) + } + if(c == 4) { + return true; // couldn't handle the N + } + if(leaveZone && (!cons.acceptable() || !overall.acceptable())) { + // Not enough edits to make this path non-redundant with + // other seeds + return true; + } + if(!bloc.valid()) { + assert(ebwtBw_ == NULL || bp[c] == tp[c]+1); + // Range delimited by tloc/bloc has size 1 + index_t top = ltr ? topb : topf; + bwops_++; + t[c] = ebwt->mapLF1(top, tloc, c); + if(t[c] == (index_t)OFF_MASK) { + return true; + } + assert_geq(t[c], ebwt->fchr()[c]); + assert_lt(t[c], ebwt->fchr()[c+1]); + b[c] = t[c]+1; + assert_gt(b[c], 0); + } + assert(ebwtBw_ == NULL || bf[c]-tf[c] == bb[c]-tb[c]); + assert_leq(bf[c]-tf[c], lasttot); + ASSERT_ONLY(lasttot = bf[c]-tf[c]); + if(b[c] == t[c]) { + return true; + } + topf = tf[c]; botf = bf[c]; + topb = tb[c]; botb = bb[c]; + if(i+1 == (int)s.steps.size()) { + // Finished aligning seed + assert(c0.acceptable()); + assert(c1.acceptable()); + assert(c2.acceptable()); + if(!reportHit(topf, botf, topb, botb, seq_->length(), prevEdit)) { + return false; // Memory exhausted + } + return true; + } + nextLocsBi(tloc, bloc, tf[c], bf[c], tb[c], bb[c], i+1); + } + return true; +} + +#endif /*ALIGNER_SEED_H_*/ diff --git a/aligner_seed2.cpp b/aligner_seed2.cpp new file mode 100644 index 0000000..685fbb0 --- /dev/null +++ b/aligner_seed2.cpp @@ -0,0 +1,1245 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include +#include +#include "aligner_seed2.h" +#include "assert_helpers.h" +#include "gfm.h" + +#ifdef ALIGNER_SEED2_MAIN + +#include +#include "sstring.h" + +using namespace std; + +/** + * A way of feeding simply tests to the seed alignment infrastructure. + */ +int main(int argc, char **argv) { + + EList strs; + // GCTATATAGCGCGCTCGCATCATTTTGTGT + strs.push_back(string("CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA" + "NNNNNNNNNN" + "CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA")); + // GCTATATAGCGCGCTTGCATCATTTTGTGT + // ^ + bool packed = false; + int color = 0; + pair gfms = GFM::fromStrings >( + strs, + packed, + REF_READ_REVERSE, + Ebwt::default_bigEndian, + Ebwt::default_lineRate, + Ebwt::default_offRate, + Ebwt::default_ftabChars, + ".aligner_seed2.cpp.tmp", + Ebwt::default_useBlockwise, + Ebwt::default_bmax, + Ebwt::default_bmaxMultSqrt, + Ebwt::default_bmaxDivN, + Ebwt::default_dcv, + Ebwt::default_seed, + false, // verbose + false, // autoMem + false); // sanity + + gfms.first->loadIntoMemory (-1, true, true, true, true, false); + gfms.second->loadIntoMemory(1, true, true, true, true, false); + + int testnum = 0; + + // Query is longer than ftab and matches exactly twice + for(int rc = 0; rc < 2; rc++) { + for(int i = 0; i < 2; i++) { + cerr << "Test " << (++testnum) << endl; + cerr << " Query with length greater than ftab" << endl; + DescentMetrics mets; + PerReadMetrics prm; + DescentDriver dr; + + // Set up the read + BTDnaString seq ("GCTATATAGCGCGCTCGCATCATTTTGTGT", true); + BTString qual("ABCDEFGHIabcdefghiABCDEFGHIabc"); + if(rc) { + seq.reverseComp(); + qual.reverse(); + } + dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30); + + // Set up the DescentConfig + DescentConfig conf; + conf.cons.init(GFM::default_ftabChars, 1.0); + conf.expol = DESC_EX_NONE; + + // Set up the search roots + dr.addRoot( + conf, // DescentConfig + (i == 0) ? 0 : (seq.length() - 1), // 5' offset into read of root + (i == 0) ? true : false, // left-to-right? + rc == 0, // forward? + 0.0f); // root priority + + // Do the search + Scoring sc = Scoring::base1(); + dr.go(sc, *gfms.first, *gfms.second, mets, prm); + + // Confirm that an exact-matching alignment was found + assert_eq(1, dr.sink().nrange()); + assert_eq(2, dr.sink().nelt()); + } + } + + // Query has length euqal to ftab and matches exactly twice + for(int i = 0; i < 2; i++) { + cerr << "Test " << (++testnum) << endl; + cerr << " Query with length equal to ftab" << endl; + DescentMetrics mets; + PerReadMetrics prm; + DescentDriver dr; + + // Set up the read + BTDnaString seq ("GCTATATAGC", true); + BTString qual("ABCDEFGHIa"); + dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30); + + // Set up the DescentConfig + DescentConfig conf; + conf.cons.init(GFM::default_ftabChars, 1.0); + conf.expol = DESC_EX_NONE; + + // Set up the search roots + dr.addRoot( + conf, // DescentConfig + (i == 0) ? 0 : (seq.length() - 1), // 5' offset into read of root + (i == 0) ? true : false, // left-to-right? + true, // forward? + 0.0f); // root priority + + // Do the search + Scoring sc = Scoring::base1(); + dr.go(sc, *gfms.first, *gfms.second, mets, prm); + + // Confirm that an exact-matching alignment was found + assert_eq(1, dr.sink().nrange()); + assert_eq(2, dr.sink().nelt()); + } + + // Query has length less than ftab length and matches exactly twice + for(int i = 0; i < 2; i++) { + cerr << "Test " << (++testnum) << endl; + cerr << " Query with length less than ftab" << endl; + DescentMetrics mets; + PerReadMetrics prm; + DescentDriver dr; + + // Set up the read + BTDnaString seq ("GCTATATAG", true); + BTString qual("ABCDEFGHI"); + dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30); + + // Set up the DescentConfig + DescentConfig conf; + conf.cons.init(GFM::default_ftabChars, 1.0); + conf.expol = DESC_EX_NONE; + + // Set up the search roots + dr.addRoot( + conf, // DescentConfig + (i == 0) ? 0 : (seq.length() - 1), // 5' offset into read of root + (i == 0) ? true : false, // left-to-right? + true, // forward? + 0.0f); // root priority + + // Do the search + Scoring sc = Scoring::base1(); + dr.go(sc, *gfms.first, *gfms.second, mets, prm); + + // Confirm that an exact-matching alignment was found + assert_eq(1, dr.sink().nrange()); + assert_eq(2, dr.sink().nelt()); + } + + // Search root is in the middle of the read, requiring a bounce + for(int i = 0; i < 2; i++) { + cerr << "Test " << (++testnum) << endl; + cerr << " Search root in middle of read" << endl; + DescentMetrics mets; + PerReadMetrics prm; + DescentDriver dr; + + // Set up the read + // 012345678901234567890123456789 + BTDnaString seq ("GCTATATAGCGCGCTCGCATCATTTTGTGT", true); + BTString qual("ABCDEFGHIabcdefghiABCDEFGHIabc"); + TIndexOffU top, bot; + top = bot = 0; + bool ret = gfms.first->contains("GCGCTCGCATCATTTTGTGT", &top, &bot); + cerr << ret << ", " << top << ", " << bot << endl; + dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30); + + // Set up the DescentConfig + DescentConfig conf; + conf.cons.init(GFM::default_ftabChars, 1.0); + conf.expol = DESC_EX_NONE; + + // Set up the search roots + dr.addRoot( + conf, // DescentConfig + (i == 0) ? 10 : (seq.length() - 1 - 10), // 5' offset into read of root + (i == 0) ? true : false, // left-to-right? + true, // forward? + 0.0f); // root priority + + // Do the search + Scoring sc = Scoring::base1(); + dr.go(sc, *gfms.first, *gfms.second, mets, prm); + + // Confirm that an exact-matching alignment was found + assert_eq(1, dr.sink().nrange()); + assert_eq(2, dr.sink().nelt()); + } + + delete gfms.first; + delete gfms.second; + + strs.clear(); + strs.push_back(string("CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA" + "NNNNNNNNNN" + "CATGTCAGCTATATAGCG")); + gfms = GFM::fromStrings >( + strs, + packed, + REF_READ_REVERSE, + GFM::default_bigEndian, + GFM::default_lineRate, + GFM::default_offRate, + GFM::default_ftabChars, + ".aligner_seed2.cpp.tmp", + GFM::default_useBlockwise, + GFM::default_bmax, + GfM::default_bmaxMultSqrt, + GFM::default_bmaxDivN, + GFM::default_dcv, + GFM::default_seed, + false, // verbose + false, // autoMem + false); // sanity + + gfms.first->loadIntoMemory (-1, true, true, true, true, false); + gfms.second->loadIntoMemory(1, true, true, true, true, false); + + // Query is longer than ftab and matches exactly once. One search root for + // forward read. + { + size_t last_topf = std::numeric_limits::max(); + size_t last_botf = std::numeric_limits::max(); + for(int i = 0; i < 2; i++) { + BTDnaString seq ("GCTATATAGCGCGCTCGCATCATTTTGTGT", true); + BTString qual("ABCDEFGHIabcdefghiABCDEFGHIabc"); + for(size_t j = 0; j < seq.length(); j++) { + cerr << "Test " << (++testnum) << endl; + cerr << " Query with length greater than ftab and matches exactly once" << endl; + DescentMetrics mets; + PerReadMetrics prm; + DescentDriver dr; + + // Set up the read + dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30); + + // Set up the DescentConfig + DescentConfig conf; + conf.cons.init(GFM::default_ftabChars, 1.0); + conf.expol = DESC_EX_NONE; + + // Set up the search roots + dr.addRoot( + conf, // DescentConfig + j, // 5' offset into read of root + i == 0, // left-to-right? + true, // forward? + 0.0f); // root priority + + // Do the search + Scoring sc = Scoring::base1(); + dr.go(sc, *gfms.first, *gfms.second, mets, prm); + + // Confirm that an exact-matching alignment was found + assert_eq(1, dr.sink().nrange()); + assert(last_topf == std::numeric_limits::max() || last_topf == dr.sink()[0].topf); + assert(last_botf == std::numeric_limits::max() || last_botf == dr.sink()[0].botf); + assert_eq(1, dr.sink().nelt()); + } + } + } + + // Query is longer than ftab and its reverse complement matches exactly + // once. Search roots on forward and reverse-comp reads. + { + size_t last_topf = std::numeric_limits::max(); + size_t last_botf = std::numeric_limits::max(); + for(int i = 0; i < 2; i++) { + BTDnaString seq ("GCTATATAGCGCGCTCGCATCATTTTGTGT", true); + BTString qual("ABCDEFGHIabcdefghiABCDEFGHIabc"); + for(size_t j = 0; j < seq.length(); j++) { + cerr << "Test " << (++testnum) << endl; + cerr << " Query with length greater than ftab and reverse complement matches exactly once" << endl; + DescentMetrics mets; + PerReadMetrics prm; + DescentDriver dr; + + // Set up the read + dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30); + + // Set up the DescentConfig + DescentConfig conf; + conf.cons.init(GFM::default_ftabChars, 1.0); + conf.expol = DESC_EX_NONE; + + // Set up the search roots + dr.addRoot( + conf, // DescentConfig + j, // 5' offset into read of root + i == 0, // left-to-right? + true, // forward? + 0.0f); // root priority + dr.addRoot( + conf, // DescentConfig + j, // 5' offset into read of root + i == 0, // left-to-right? + false, // forward? + 1.0f); // root priority + + // Do the search + Scoring sc = Scoring::base1(); + dr.go(sc, *gfms.first, *gfms.second, mets, prm); + + // Confirm that an exact-matching alignment was found + assert_eq(1, dr.sink().nrange()); + assert(last_topf == std::numeric_limits::max() || last_topf == dr.sink()[0].topf); + assert(last_botf == std::numeric_limits::max() || last_botf == dr.sink()[0].botf); + assert_eq(1, dr.sink().nelt()); + } + } + } + + // Query is longer than ftab and matches exactly once with one mismatch + { + size_t last_topf = std::numeric_limits::max(); + size_t last_botf = std::numeric_limits::max(); + for(int i = 0; i < 2; i++) { + // Set up the read + // Ref: CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA + // |||||||||||||||||||||||||||||| + BTDnaString orig("GCTATATAGCGCGCTCGCATCATTTTGTGT", true); + // 012345678901234567890123456789 + BTString qual("ABCDEFGHIabcdefghiABCDEFGHIabc"); + for(size_t k = 0; k < orig.length(); k++) { + BTDnaString seq = orig; + seq.set(seq[k] ^ 3, k); + for(size_t j = 0; j < seq.length(); j++) { + // Assume left-to-right + size_t beg = j; + size_t end = j + GFM::default_ftabChars; + // Mismatch penalty is 3, so we have to skip starting + // points that are within 2 from the mismatch + if((i > 0 && j > 0) || j == seq.length()-1) { + // Right-to-left + if(beg < GFM::default_ftabChars) { + beg = 0; + } else { + beg -= GFM::default_ftabChars; + } + end -= GFM::default_ftabChars; + } + size_t kk = k; + //if(rc) { + // kk = seq.length() - k - 1; + //} + if(beg <= kk && end > kk) { + continue; + } + if((j > kk) ? (j - kk <= 2) : (kk - j <= 2)) { + continue; + } + cerr << "Test " << (++testnum) << endl; + cerr << " Query with length greater than ftab and matches exactly once with 1mm" << endl; + DescentMetrics mets; + PerReadMetrics prm; + DescentDriver dr; + + dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30); + + // Set up the DescentConfig + DescentConfig conf; + // Changed + conf.cons.init(0, 1.0); + conf.expol = DESC_EX_NONE; + + // Set up the search roots + dr.addRoot( + conf, // DescentConfig + j, // 5' offset into read of root + i == 0, // left-to-right? + true, // forward? + 0.0f); // root priority + + // Do the search + Scoring sc = Scoring::base1(); + dr.go(sc, *gfms.first, *gfms.second, mets, prm); + + // Confirm that an exact-matching alignment was found + assert_eq(1, dr.sink().nrange()); + assert(last_topf == std::numeric_limits::max() || last_topf == dr.sink()[0].topf); + assert(last_botf == std::numeric_limits::max() || last_botf == dr.sink()[0].botf); + cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl; + assert_eq(1, dr.sink().nelt()); + last_topf = dr.sink()[0].topf; + last_botf = dr.sink()[0].botf; + } + } + } + } + + // Query is longer than ftab and matches exactly once with one N mismatch + { + size_t last_topf = std::numeric_limits::max(); + size_t last_botf = std::numeric_limits::max(); + for(int i = 0; i < 2; i++) { + // Set up the read + // Ref: CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA + // |||||||||||||||||||||||||||||| + BTDnaString orig("GCTATATAGCGCGCTCGCATCATTTTGTGT", true); + // 012345678901234567890123456789 + BTString qual("ABCDEFGHIabcdefghiABCDEFGHIabc"); + for(size_t k = 0; k < orig.length(); k++) { + BTDnaString seq = orig; + seq.set(4, k); + for(size_t j = 0; j < seq.length(); j++) { + // Assume left-to-right + size_t beg = j; + size_t end = j + GFM::default_ftabChars; + // Mismatch penalty is 3, so we have to skip starting + // points that are within 2 from the mismatch + if((i > 0 && j > 0) || j == seq.length()-1) { + // Right-to-left + if(beg < GFM::default_ftabChars) { + beg = 0; + } else { + beg -= GFM::default_ftabChars; + } + end -= GFM::default_ftabChars; + } + if(beg <= k && end > k) { + continue; + } + if((j > k) ? (j - k <= 2) : (k - j <= 2)) { + continue; + } + cerr << "Test " << (++testnum) << endl; + cerr << " Query with length greater than ftab and matches exactly once with 1mm" << endl; + DescentMetrics mets; + PerReadMetrics prm; + DescentDriver dr; + + dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30); + + // Set up the DescentConfig + DescentConfig conf; + // Changed + conf.cons.init(0, 1.0); + conf.expol = DESC_EX_NONE; + + // Set up the search roots + dr.addRoot( + conf, // DescentConfig + j, // 5' offset into read of root + i == 0, // left-to-right? + true, // forward? + 0.0f); // root priority + + // Do the search + Scoring sc = Scoring::base1(); + dr.go(sc, *gfms.first, *gfms.second, mets, prm); + + // Confirm that an exact-matching alignment was found + assert_eq(1, dr.sink().nrange()); + assert_eq(sc.n(40), dr.sink()[0].pen); + assert(last_topf == std::numeric_limits::max() || last_topf == dr.sink()[0].topf); + assert(last_botf == std::numeric_limits::max() || last_botf == dr.sink()[0].botf); + cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl; + assert_eq(1, dr.sink().nelt()); + last_topf = dr.sink()[0].topf; + last_botf = dr.sink()[0].botf; + } + } + } + } + + // Throw a bunch of queries with a bunch of Ns in and try to force an assert + { + RandomSource rnd(79); + for(int i = 0; i < 2; i++) { + // Set up the read + // Ref: CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA + // |||||||||||||||||||||||||||||| + BTDnaString orig("GCTATATAGCGCGCTCGCATCATTTTGTGT", true); + // 012345678901234567890123456789 + BTString qual("ABCDEFGHIabcdefghiABCDEFGHIabc"); + if(i == 1) { + orig.reverseComp(); + qual.reverse(); + } + for(size_t trials = 0; trials < 100; trials++) { + BTDnaString seq = orig; + size_t ns = 10; + for(size_t k = 0; k < ns; k++) { + size_t pos = rnd.nextU32() % seq.length(); + seq.set(4, pos); + } + + cerr << "Test " << (++testnum) << endl; + cerr << " Query with a bunch of Ns" << endl; + + DescentMetrics mets; + PerReadMetrics prm; + DescentDriver dr; + + dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30); + + // Set up the DescentConfig + DescentConfig conf; + // Changed + conf.cons.init(GFM::default_ftabChars, 1.0); + conf.expol = DESC_EX_NONE; + + // Set up the search roots + for(size_t k = 0; k < ns; k++) { + size_t j = rnd.nextU32() % seq.length(); + bool ltr = (rnd.nextU2() == 0) ? true : false; + bool fw = (rnd.nextU2() == 0) ? true : false; + dr.addRoot( + conf, // DescentConfig + j, // 5' offset into read of root + ltr, // left-to-right? + fw, // forward? + 0.0f); // root priority + } + + // Do the search + Scoring sc = Scoring::base1(); + dr.go(sc, *gfms.first, *gfms.second, mets, prm); + } + } + } + + // Query is longer than ftab and matches exactly once with one mismatch + { + RandomSource rnd(77); + size_t last_topf = std::numeric_limits::max(); + size_t last_botf = std::numeric_limits::max(); + for(int i = 0; i < 2; i++) { + // Set up the read + // Ref: CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA + // |||||||||||||||||||||||||||||| + BTDnaString orig("GCTATATAGCGCGCTCGCATCATTTTGTGT", true); + // 012345678901234567890123456789 + BTString qual("ABCDEFGHIabcdefghiABCDEFGHIabc"); + // revcomp: ACACAAAATGATGCGAGCGCGCTATATAGC + // revqual: cbaIHGFEDCBAihgfedcbaIHGFEDCBA + bool fwi = (i == 0); + if(!fwi) { + orig.reverseComp(); + } + for(size_t k = 0; k < orig.length(); k++) { + BTDnaString seq = orig; + seq.set(seq[k] ^ 3, k); + cerr << "Test " << (++testnum) << endl; + cerr << " Query with length greater than ftab and matches exactly once with 1mm. Many search roots." << endl; + DescentMetrics mets; + PerReadMetrics prm; + DescentDriver dr; + + dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30); + + // Set up the DescentConfig + DescentConfig conf; + // Changed + conf.cons.init(0, 1.0); + conf.expol = DESC_EX_NONE; + + // Set up several random search roots + bool onegood = false; + for(size_t y = 0; y < 10; y++) { + size_t j = rnd.nextU32() % seq.length(); + bool ltr = (rnd.nextU2() == 0) ? true : false; + bool fw = (rnd.nextU2() == 0) ? true : false; + dr.addRoot( + conf, // DescentConfig + (TReadOff)j, // 5' offset into read of root + ltr, // left-to-right? + fw, // forward? + (float)((float)y * 1.0f)); // root priority + // Assume left-to-right + size_t beg = j; + size_t end = j + GFM::default_ftabChars; + // Mismatch penalty is 3, so we have to skip starting + // points that are within 2 from the mismatch + if(!ltr) { + // Right-to-left + if(beg < GFM::default_ftabChars) { + beg = 0; + } else { + beg -= GFM::default_ftabChars; + } + end -= GFM::default_ftabChars; + } + bool good = true; + if(fw != fwi) { + good = false; + } + if(beg <= k && end > k) { + good = false; + } + if((j > k) ? (j - k <= 2) : (k - j <= 2)) { + good = false; + } + if(good) { + onegood = true; + } + } + if(!onegood) { + continue; + } + + // Do the search + Scoring sc = Scoring::base1(); + dr.go(sc, *gfms.first, *gfms.second, mets, prm); + + // Confirm that an exact-matching alignment was found + assert_eq(1, dr.sink().nrange()); + assert(last_topf == std::numeric_limits::max() || last_topf == dr.sink()[0].topf); + assert(last_botf == std::numeric_limits::max() || last_botf == dr.sink()[0].botf); + cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl; + assert_eq(1, dr.sink().nelt()); + last_topf = dr.sink()[0].topf; + last_botf = dr.sink()[0].botf; + } + } + } + + // Query is longer than ftab and matches exactly once with one read gap + { + size_t last_topf = std::numeric_limits::max(); + size_t last_botf = std::numeric_limits::max(); + for(int i = 0; i < 2; i++) { + for(int k = 0; k < 2; k++) { + // Set up the read + // GCTATATAGCGCGCCTGCATCATTTTGTGT + // Ref: CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA + // |||||||||||||||/////////////// + BTDnaString seq ("GCTATATAGCGCGCTGCATCATTTTGTGT", true); + // 01234567890123456789012345678 + // 87654321098765432109876543210 + BTString qual("ABCDEFGHIabcdefghiABCDEFGHIab"); + if(k == 1) { + seq.reverseComp(); + qual.reverse(); + } + assert_eq(seq.length(), qual.length()); + // js iterate over offsets from 5' end for the search root + for(size_t j = 0; j < seq.length(); j++) { + // Assume left-to-right + size_t beg = j; + if(k == 1) { + beg = seq.length() - beg - 1; + } + size_t end = beg + GFM::default_ftabChars; + // Mismatch penalty is 3, so we have to skip starting + // points that are within 2 from the mismatch + if((i > 0 && j > 0) || j == seq.length()-1) { + // Right-to-left + if(beg < GFM::default_ftabChars) { + beg = 0; + } else { + beg -= GFM::default_ftabChars; + } + end -= GFM::default_ftabChars; + } + assert_geq(end, beg); + if(beg <= 15 && end >= 15) { + continue; + } + cerr << "Test " << (++testnum) << endl; + cerr << " Query matches once with a read gap of length 1" << endl; + DescentMetrics mets; + PerReadMetrics prm; + DescentDriver dr; + + Read q("test", seq.toZBuf(), qual.toZBuf()); + assert(q.repOk()); + dr.initRead(q, -30, 30); + + // Set up the DescentConfig + DescentConfig conf; + // Changed + conf.cons.init(0, 0.5); + conf.expol = DESC_EX_NONE; + + // Set up the search roots + dr.addRoot( + conf, // DescentConfig + j, // 5' offset into read of root + i == 0, // left-to-right? + k == 0, // forward? + 0.0f); // root priority + + // Do the search + Scoring sc = Scoring::base1(); + dr.go(sc, *gfms.first, *gfms.second, mets, prm); + + // Confirm that an exact-matching alignment was found + assert_eq(1, dr.sink().nrange()); + assert_eq(sc.readGapOpen() + 0 * sc.readGapExtend(), dr.sink()[0].pen); + assert(last_topf == std::numeric_limits::max() || last_topf == dr.sink()[0].topf); + assert(last_botf == std::numeric_limits::max() || last_botf == dr.sink()[0].botf); + cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl; + assert_eq(1, dr.sink().nelt()); + last_topf = dr.sink()[0].topf; + last_botf = dr.sink()[0].botf; + } + }} + } + + // Query is longer than ftab and matches exactly once with one read gap of + // length 3 + { + size_t last_topf = std::numeric_limits::max(); + size_t last_botf = std::numeric_limits::max(); + for(int i = 0; i < 2; i++) { + for(int k = 0; k < 2; k++) { + // Set up the read + // GCTATATAGCGCGCGCTCATCATTTTGTGT + // Ref: CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA + // |||||||||||||| ||||||||||||| + BTDnaString seq ("GCTATATAGCGCGC" "CATCATTTTGTGT", true); + // 01234567890123 4567890123456 + // 65432109876543 2109876543210 + BTString qual("ABCDEFGHIabcde" "fghiABCDEFGHI"); + if(k == 1) { + seq.reverseComp(); + qual.reverse(); + } + for(size_t j = 0; j < seq.length(); j++) { + // Assume left-to-right + size_t beg = j; + if(k == 1) { + beg = seq.length() - beg - 1; + } + size_t end = beg + GFM::default_ftabChars; + // Mismatch penalty is 3, so we have to skip starting + // points that are within 2 from the mismatch + if((i > 0 && j > 0) || j == seq.length()-1) { + // Right-to-left + if(beg < GFM::default_ftabChars) { + beg = 0; + } else { + beg -= GFM::default_ftabChars; + } + end -= GFM::default_ftabChars; + } + if(beg <= 14 && end >= 14) { + continue; + } + cerr << "Test " << (++testnum) << endl; + cerr << " Query matches once with a read gap of length 3" << endl; + DescentMetrics mets; + PerReadMetrics prm; + DescentDriver dr; + + dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30); + + // Set up the DescentConfig + DescentConfig conf; + // Changed + conf.cons.init(0, 0.2); + conf.expol = DESC_EX_NONE; + + // Set up the search roots + dr.addRoot( + conf, // DescentConfig + j, // 5' offset into read of root + i == 0, // left-to-right? + k == 0, // forward? + 0.0f); // root priority + + // Do the search + Scoring sc = Scoring::base1(); + // Need to adjust the mismatch penalty up to avoid alignments + // with lots of mismatches. + sc.setMmPen(COST_MODEL_CONSTANT, 6, 6); + dr.go(sc, *gfms.first, *gfms.second, mets, prm); + + // Confirm that an exact-matching alignment was found + assert_eq(1, dr.sink().nrange()); + assert_eq(sc.readGapOpen() + 2 * sc.readGapExtend(), dr.sink()[0].pen); + assert(last_topf == std::numeric_limits::max() || last_topf == dr.sink()[0].topf); + assert(last_botf == std::numeric_limits::max() || last_botf == dr.sink()[0].botf); + cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl; + assert_eq(1, dr.sink().nelt()); + last_topf = dr.sink()[0].topf; + last_botf = dr.sink()[0].botf; + } + }} + } + + // Query is longer than ftab and matches exactly once with one reference gap + { + size_t last_topf = std::numeric_limits::max(); + size_t last_botf = std::numeric_limits::max(); + for(int i = 0; i < 2; i++) { + // Set up the read + // Ref: CATGTCAGCTATATAGCGCGC" "TCGCATCATTTTGTGTGTAAACCA + // |||||||||||||| |||||||||||||||| + BTDnaString seq ("GCTATATAGCGCGCA""TCGCATCATTTTGTGT", true); + // 012345678901234 5678901234567890 + BTString qual("ABCDEFGHIabcdef""ghiABCDEFGHIabcd"); + for(size_t j = 0; j < seq.length(); j++) { + // Assume left-to-right + size_t beg = j; + size_t end = j + GFM::default_ftabChars; + // Mismatch penalty is 3, so we have to skip starting + // points that are within 2 from the mismatch + if((i > 0 && j > 0) || j == seq.length()-1) { + // Right-to-left + if(beg < GFM::default_ftabChars) { + beg = 0; + } else { + beg -= GFM::default_ftabChars; + } + end -= GFM::default_ftabChars; + } + if(beg <= 14 && end >= 14) { + continue; + } + cerr << "Test " << (++testnum) << endl; + cerr << " Query matches once with a reference gap of length 1" << endl; + DescentMetrics mets; + PerReadMetrics prm; + DescentDriver dr; + + dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30); + + // Set up the DescentConfig + DescentConfig conf; + // Changed + conf.cons.init(1, 0.5); + conf.expol = DESC_EX_NONE; + + // Set up the search roots + dr.addRoot( + conf, // DescentConfig + j, // 5' offset into read of root + i == 0, // left-to-right? + true, // forward? + 0.0f); // root priority + + // Do the search + Scoring sc = Scoring::base1(); + // Need to adjust the mismatch penalty up to avoid alignments + // with lots of mismatches. + sc.setMmPen(COST_MODEL_CONSTANT, 6, 6); + dr.go(sc, *gfms.first, *gfms.second, mets, prm); + + // Confirm that an exact-matching alignment was found + assert_eq(1, dr.sink().nrange()); + assert_eq(sc.refGapOpen() + 0 * sc.refGapExtend(), dr.sink()[0].pen); + assert(last_topf == std::numeric_limits::max() || last_topf == dr.sink()[0].topf); + assert(last_botf == std::numeric_limits::max() || last_botf == dr.sink()[0].botf); + cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl; + assert_eq(1, dr.sink().nelt()); + last_topf = dr.sink()[0].topf; + last_botf = dr.sink()[0].botf; + } + } + } + + // Query is longer than ftab and matches exactly once with one reference gap + { + size_t last_topf = std::numeric_limits::max(); + size_t last_botf = std::numeric_limits::max(); + for(int i = 0; i < 2; i++) { + // Set up the read + // Ref: CATGTCAGCTATATAGCGCGC" "TCGCATCATTTTGTGTGTAAACCA + // |||||||||||||| |||||||||||||||| + BTDnaString seq ("GCTATATAGCGCGCATG""TCGCATCATTTTGTGT", true); + // 01234567890123456 7890123456789012 + BTString qual("ABCDEFGHIabcdefgh""iABCDEFGHIabcdef"); + for(size_t j = 0; j < seq.length(); j++) { + // Assume left-to-right + size_t beg = j; + size_t end = j + GFM::default_ftabChars; + // Mismatch penalty is 3, so we have to skip starting + // points that are within 2 from the mismatch + if((i > 0 && j > 0) || j == seq.length()-1) { + // Right-to-left + if(beg < GFM::default_ftabChars) { + beg = 0; + } else { + beg -= GFM::default_ftabChars; + } + end -= GFM::default_ftabChars; + } + if(beg <= 14 && end >= 14) { + continue; + } + if(beg <= 15 && end >= 15) { + continue; + } + if(beg <= 16 && end >= 16) { + continue; + } + cerr << "Test " << (++testnum) << endl; + cerr << " Query matches once with a reference gap of length 1" << endl; + DescentMetrics mets; + PerReadMetrics prm; + DescentDriver dr; + + dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30); + + // Set up the DescentConfig + DescentConfig conf; + // Changed + conf.cons.init(1, 0.25); + conf.expol = DESC_EX_NONE; + + // Set up the search roots + dr.addRoot( + conf, // DescentConfig + j, // 5' offset into read of root + i == 0, // left-to-right? + true, // forward? + 0.0f); // root priority + + // Do the search + Scoring sc = Scoring::base1(); + // Need to adjust the mismatch penalty up to avoid alignments + // with lots of mismatches. + sc.setMmPen(COST_MODEL_CONSTANT, 6, 6); + dr.go(sc, *gfms.first, *gfms.second, mets, prm); + + // Confirm that an exact-matching alignment was found + assert_eq(1, dr.sink().nrange()); + assert_eq(sc.refGapOpen() + 2 * sc.refGapExtend(), dr.sink()[0].pen); + assert(last_topf == std::numeric_limits::max() || last_topf == dr.sink()[0].topf); + assert(last_botf == std::numeric_limits::max() || last_botf == dr.sink()[0].botf); + cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl; + assert_eq(1, dr.sink().nelt()); + last_topf = dr.sink()[0].topf; + last_botf = dr.sink()[0].botf; + } + } + } + + // Query is longer than ftab and matches exactly once with one read gap, + // one ref gap, and one mismatch + { + size_t last_topf = std::numeric_limits::max(); + size_t last_botf = std::numeric_limits::max(); + for(int i = 0; i < 2; i++) { + // Set up the read + // Ref: CATGTCAGCT ATATAGCGCGCT CGCATCATTTTGTGTGTAAACCA + // |||||||||| |||||||||||| |||||| ||||||||||||| + BTDnaString seq ("CATGTCAGCT""GATATAGCGCGCT" "GCATCAATTTGTGTGTAAAC", true); + // 0123456789 0123456789012 34567890123456789012 + BTString qual("ABCDEFGHIa""bcdefghiACDEF" "GHIabcdefghijkABCDEF"); + for(size_t j = 0; j < seq.length(); j++) { + // Assume left-to-right + size_t beg = j; + size_t end = j + GFM::default_ftabChars; + // Mismatch penalty is 3, so we have to skip starting + // points that are within 2 from the mismatch + if((i > 0 && j > 0) || j == seq.length()-1) { + // Right-to-left + if(beg < GFM::default_ftabChars) { + beg = 0; + } else { + beg -= GFM::default_ftabChars; + } + end -= GFM::default_ftabChars; + } + if(beg <= 10 && end >= 10) { + continue; + } + if(beg <= 22 && end >= 22) { + continue; + } + if(beg <= 30 && end >= 30) { + continue; + } + cerr << "Test " << (++testnum) << endl; + cerr << " Query matches once with a read gap of length 1" << endl; + DescentMetrics mets; + PerReadMetrics prm; + DescentDriver dr; + + dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -50, 50); + + // Set up the DescentConfig + DescentConfig conf; + // Changed + conf.cons.init(1, 0.5); + conf.expol = DESC_EX_NONE; + + // Set up the search roots + dr.addRoot( + conf, // DescentConfig + j, // 5' offset into read of root + i == 0, // left-to-right? + true, // forward? + 0.0f); // root priority + + // Do the search + Scoring sc = Scoring::base1(); + dr.go(sc, *gfms.first, *gfms.second, mets, prm); + + // Confirm that an exact-matching alignment was found + assert_eq(1, dr.sink().nrange()); + assert_eq(sc.readGapOpen() + sc.refGapOpen() + sc.mm((int)'d' - 33), dr.sink()[0].pen); + assert(last_topf == std::numeric_limits::max() || last_topf == dr.sink()[0].topf); + assert(last_botf == std::numeric_limits::max() || last_botf == dr.sink()[0].botf); + cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl; + assert_eq(1, dr.sink().nelt()); + last_topf = dr.sink()[0].topf; + last_botf = dr.sink()[0].botf; + } + } + } + + delete gfms.first; + delete gfms.second; + + // Ref CATGTCAGCT-ATATAGCGCGCTCGCATCATTTTGTGTGTAAAC + // |||||||||| |||||||||||| |||||| ||||||||||||| + // Rd CATGTCAGCTGATATAGCGCGCT-GCATCAATTTGTGTGTAAAC + strs.clear(); + strs.push_back(string("CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAAC" + "NNNNNNNNNN" + "CATGTCAGCTGATATAGCGCGCTCGCATCATTTTGTGTGTAAAC" // same but without first ref gap + "N" + "CATGTCAGCTATATAGCGCGCTGCATCATTTTGTGTGTAAAC" // same but without first read gap + "N" + "CATGTCAGCTATATAGCGCGCTCGCATCAATTTGTGTGTAAAC" // same but without first mismatch + "N" + "CATGTCAGCTGATATAGCGCGCTGCATCAATTTGTGTGTAAAC" // Exact match for read + )); + gfms = GFM::fromStrings >( + strs, + packed, + REF_READ_REVERSE, + GFM::default_bigEndian, + GFM::default_lineRate, + GFM::default_offRate, + GFM::default_ftabChars, + ".aligner_seed2.cpp.tmp", + GFM::default_useBlockwise, + GFM::default_bmax, + GFM::default_bmaxMultSqrt, + GFM::default_bmaxDivN, + GFM::default_dcv, + GFM::default_seed, + false, // verbose + false, // autoMem + false); // sanity + + gfms.first->loadIntoMemory (color, -1, true, true, true, true, false); + gfms.second->loadIntoMemory(color, 1, true, true, true, true, false); + + // Query is longer than ftab and matches exactly once with one read gap, + // one ref gap, and one mismatch + { + size_t last_topf = std::numeric_limits::max(); + size_t last_botf = std::numeric_limits::max(); + for(int i = 0; i < 2; i++) { + // Set up the read + // Ref: CATGTCAGCT ATATAGCGCGCT CGCATCATTTTGTGTGTAAACCA + // |||||||||| |||||||||||| |||||| ||||||||||||| + BTDnaString seq ("CATGTCAGCT""GATATAGCGCGCT" "GCATCAATTTGTGTGTAAAC", true); + // 0123456789 0123456789012 34567890123456789012 + BTString qual("ABCDEFGHIa""bcdefghiACDEF" "GHIabcdefghijkABCDEF"); + for(size_t j = 0; j < seq.length(); j++) { + // Assume left-to-right + size_t beg = j; + size_t end = j + GFM::default_ftabChars; + // Mismatch penalty is 3, so we have to skip starting + // points that are within 2 from the mismatch + if((i > 0 && j > 0) || j == seq.length()-1) { + // Right-to-left + if(beg < GFM::default_ftabChars) { + beg = 0; + } else { + beg -= GFM::default_ftabChars; + } + end -= GFM::default_ftabChars; + } + if(beg <= 10 && end >= 10) { + continue; + } + if(beg <= 22 && end >= 22) { + continue; + } + if(beg <= 30 && end >= 30) { + continue; + } + cerr << "Test " << (++testnum) << endl; + cerr << " Query matches once with a read gap of length 1" << endl; + DescentMetrics mets; + PerReadMetrics prm; + DescentDriver dr; + + dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -50, 50); + + // Set up the DescentConfig + DescentConfig conf; + // Changed + conf.cons.init(1, 0.5); + conf.expol = DESC_EX_NONE; + + // Set up the search roots + dr.addRoot( + conf, // DescentConfig + j, // 5' offset into read of root + i == 0, // left-to-right? + true, // forward? + 0.0f); // root priority + + // Do the search + Scoring sc = Scoring::base1(); + dr.go(sc, *gfms.first, *gfms.second, mets, prm); + + // Confirm that an exact-matching alignment was found + assert_eq(5, dr.sink().nrange()); + assert_eq(0, dr.sink()[0].pen); + assert_eq(min(sc.readGapOpen(), sc.refGapOpen()) + sc.mm((int)'d' - 33), dr.sink()[1].pen); + assert_eq(max(sc.readGapOpen(), sc.refGapOpen()) + sc.mm((int)'d' - 33), dr.sink()[2].pen); + assert_eq(sc.readGapOpen() + sc.refGapOpen(), dr.sink()[3].pen); + assert_eq(sc.readGapOpen() + sc.refGapOpen() + sc.mm((int)'d' - 33), dr.sink()[4].pen); + assert(last_topf == std::numeric_limits::max() || last_topf == dr.sink()[0].topf); + assert(last_botf == std::numeric_limits::max() || last_botf == dr.sink()[0].botf); + cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl; + assert_eq(5, dr.sink().nelt()); + last_topf = dr.sink()[0].topf; + last_botf = dr.sink()[0].botf; + } + } + } + + // Query is longer than ftab and matches exactly once with one read gap, + // one ref gap, one mismatch, and one N + { + size_t last_topf = std::numeric_limits::max(); + size_t last_botf = std::numeric_limits::max(); + for(int i = 0; i < 2; i++) { + // Set up the read + // Ref: CATGTCAGCT ATATAGCGCGCT CGCATCATTTTGTGTGTAAACCA + // |||||||||| |||||||||||| |||||| |||||| |||||| + BTDnaString seq ("CATGTCAGCT""GATATAGCGCGCT" "GCATCAATTTGTGNGTAAAC", true); + // 0123456789 0123456789012 34567890123456789012 + BTString qual("ABCDEFGHIa""bcdefghiACDEF" "GHIabcdefghijkABCDEF"); + for(size_t j = 0; j < seq.length(); j++) { + // Assume left-to-right + size_t beg = j; + size_t end = j + GFM::default_ftabChars; + // Mismatch penalty is 3, so we have to skip starting + // points that are within 2 from the mismatch + if((i > 0 && j > 0) || j == seq.length()-1) { + // Right-to-left + if(beg < GFM::default_ftabChars) { + beg = 0; + } else { + beg -= GFM::default_ftabChars; + } + end -= GFM::default_ftabChars; + } + if(beg <= 10 && end >= 10) { + continue; + } + if(beg <= 22 && end >= 22) { + continue; + } + if(beg <= 30 && end >= 30) { + continue; + } + if(beg <= 36 && end >= 36) { + continue; + } + cerr << "Test " << (++testnum) << endl; + cerr << " Query matches with various patterns of gaps, mismatches and Ns" << endl; + DescentMetrics mets; + PerReadMetrics prm; + DescentDriver dr; + + dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -50, 50); + + // Set up the DescentConfig + DescentConfig conf; + // Changed + conf.cons.init(1, 0.5); + conf.expol = DESC_EX_NONE; + + // Set up the search roots + dr.addRoot( + conf, // DescentConfig + j, // 5' offset into read of root + i == 0, // left-to-right? + true, // forward? + 0.0f); // root priority + + // Do the search + Scoring sc = Scoring::base1(); + sc.setNPen(COST_MODEL_CONSTANT, 1); + dr.go(sc, *gfms.first, *gfms.second, mets, prm); + + // Confirm that an exact-matching alignment was found + assert_eq(5, dr.sink().nrange()); + assert_eq(sc.n(40), dr.sink()[0].pen); + assert_eq(sc.n(40) + min(sc.readGapOpen(), sc.refGapOpen()) + sc.mm((int)'d' - 33), dr.sink()[1].pen); + assert_eq(sc.n(40) + max(sc.readGapOpen(), sc.refGapOpen()) + sc.mm((int)'d' - 33), dr.sink()[2].pen); + assert_eq(sc.n(40) + sc.readGapOpen() + sc.refGapOpen(), dr.sink()[3].pen); + assert_eq(sc.n(40) + sc.readGapOpen() + sc.refGapOpen() + sc.mm((int)'d' - 33), dr.sink()[4].pen); + assert(last_topf == std::numeric_limits::max() || last_topf == dr.sink()[0].topf); + assert(last_botf == std::numeric_limits::max() || last_botf == dr.sink()[0].botf); + cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl; + assert_eq(5, dr.sink().nelt()); + last_topf = dr.sink()[0].topf; + last_botf = dr.sink()[0].botf; + } + } + } + + delete gfms.first; + delete gfms.second; + + cerr << "DONE" << endl; +} +#endif + diff --git a/aligner_seed2.h b/aligner_seed2.h new file mode 100644 index 0000000..552f7be --- /dev/null +++ b/aligner_seed2.h @@ -0,0 +1,4291 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef ALIGNER_SEED2_H_ +#define ALIGNER_SEED2_H_ + +/** + * The user of the DescentDriver class specifies a collection of search roots. + * Logic for picking these search roots is located elsewhere, not in this + * module. The search roots are annotated with a priority score, which + * + * The heap is a min-heap over pairs, where the first element of each pair is + * the score associated with a descent and the second element of each pair is + * the descent ID. + * + * Weeding out redundant descents is key; otherwise we end up reporting slight + * variations on the same alignment repeatedly, including variations with poor + * scores. What criteria do we use to determine whether two paths are + * redundant? + * + * Here's an example where the same set of read characters have been aligned in + * all three cases: + * + * Alignment 1 (sc = 0): + * Rd: GCTATATAGCGCGCTCGCATCATTTTGTGT + * |||||||||||||||||||||||||||||| + * Rf: GCTATATAGCGCGCTCGCATCATTTTGTGT + * + * Alignment 2 (sc = -22): + * Rd: GCTATATAGCGCGCTCGCATCATTTTGTGT + * ||||||||||||||||||||||| | ||| + * Rf: GCTATATAGCGCGCTCGCATCAT--TTTGT + * + * Alignment 3 (sc = -22): + * Rd: GCTATATAGCGCGCTCGCATCATT--TTGTGT + * |||||||||||||||||||||||| ||||| + * Rf: GCTATATAGCGCGCTCGCATCATTTTGTGTGT + * + * Rf from aln 1: GCTATATAGCGCGCTCGCATCATTTTGTGT + * Rf from aln 2: GCTATATAGCGCGCTCGCATCATTTTGT + * Rf from aln 3: GCTATATAGCGCGCTCGCATCATTTTGTGTGT + * + * Are alignments 2 and 3 redundant with alignment 1? We can't totally say + * without knowing the associated SA ranges. Take alignments 1 and 2. Either + * the SA ranges are the same or the SA range for 2 contains the SA range for + * 1. If they're the same, then alignment 2 is redundant with alignment 1. + * Otherwise, *some* of the elements in the SA range for alignment 2 are not + * redundant. + * + * In that example, the same read characters are aligned in all three + * alignments. Is it possible and profitable to consider scenarios where an + * alignment might be redundant with another alignment + * + * Another question is *when* do we try to detect the redundancy? Before we + * try to extend through the matches, or after. After is easier, but less work + * has been avoided. + * + * What data structure do we query to determine whether there's redundancy? + * The situation is harder when we try to detect overlaps between SA ranges + * rather than identical SA ranges. Maybe: read intervals -> intersection tree -> penalties. + * + * 1. If we're introducing a gap and we could have introduced it deeper in the + * descent with the same effect w/r/t homopolymer length. + * 2. If we have Descent A with penalty B and Descent a with penalty b, and A + * aligns read characters [X, Y] to SA range [Z, W], and B aligns read + * characters [x, y] to SA range [z, w], then A is redundant with B if + * [x, y] is within [X, Y]. + * + * Found an alignment with total penalty = 3 + * GCAATATAGCGCGCTCGCATCATTTTGTGT + * || ||||||||||||||||||||||||||| + * GCTATATAGCGCGCTCGCATCATTTTGTGT + * + * Found an alignment with total penalty = 27 + * gCAATATAGCGCGCTCGCATCATTTTGTGT + * | |||||||||||||||||||||||| + * TATA-TAGCGCGCTCGCATCATTTTGTGT + */ + +#include +#include +#include +#include +#include "assert_helpers.h" +#include "random_util.h" +#include "aligner_result.h" +#include "gfm.h" +#include "simple_func.h" +#include "scoring.h" +#include "edit.h" +#include "read.h" +#include "ds.h" +#include "group_walk.h" +#include "btypes.h" + +typedef size_t TReadOff; +typedef int64_t TScore; +typedef float TRootPri; +typedef size_t TDescentId; +typedef size_t TRootId; + +/** + * enum encapsulating a few different policies for how we might extend descents + * in the direction opposite from their primary direction. + */ +enum { + // Never extened in the direction opposite from the primary. Just go in + // the primary direction until the bounce. + DESC_EX_NONE = 1, + + // When we're finished extending out the matches for a descent, try to + // extend in the opposite direction in a way that extends all branches + // simultaneously. The Descent.nex_ field contains the number of positions + // we were able to extend through in this way. + DESC_EX_FROM_1ST_BRANCH = 2, + + // Each time we add an edge to the summary, extend it in the opposite + // direction. The DescentEdge.nex field contains the number of positions + // we were able to extend through, and this in turn gets propagated to + // Descent.nex_ if and when we branch from the DescentEdge. + DESC_EX_EACH_EDGE = 3 +}; + +/** + * Counters to keep track of how much work is being done. + */ +struct DescentMetrics { + + DescentMetrics() { reset(); } + + void reset() { + bwops = bwops_1 = bwops_bi = recalc = branch = branch_mm = + branch_del = branch_ins = heap_max = descent_max = descentpos_max = + nex = 0; + } + + uint64_t bwops; // # FM Index opbs + uint64_t bwops_1; // # LF1 FM Index opbs + uint64_t bwops_bi; // # BiEx FM Index opbs + uint64_t recalc; // # times outgoing edge summary was recalculated + uint64_t branch; // # times we descended from another descent + uint64_t branch_mm; // # times branch was on a mismatch + uint64_t branch_del; // # times branch was on a deletion + uint64_t branch_ins; // # times branch was on a insertion + uint64_t heap_max; // maximum size of Descent heap + uint64_t descent_max; // maximum size of Descent factory + uint64_t descentpos_max; // maximum size of DescentPos factory + uint64_t nex; // # extensions +}; + +/** + * Priority used to rank which descent we should branch from next. Right now, + * priority is governed by a 4-tuple. From higher to lower priority: + * + * 1. Penalty accumulated so far + * 2. Depth into the search space, including extensions + * 3. Width of the SA range (i.e. uniqueness) + * 4. Root priority + */ +struct DescentPriority { + + DescentPriority() { reset(); } + + DescentPriority( + TScore pen_, + size_t depth_, + TIndexOffU width_, + float rootpri_) + { + pen = pen_; + depth = depth_; + width = width_; + rootpri = rootpri_; + } + + /** + * Initialize new DescentPriority. + */ + void init(TScore pen_, size_t depth_, TIndexOffU width_, float rootpri_) { + pen = pen_; + depth = depth_; + width = width_; + rootpri = rootpri_; + } + + /** + * Reset to uninitialized state. + */ + void reset() { + width = 0; + } + + /** + * Return true iff DescentPriority is initialized. + */ + bool inited() const { + return width > 0; + } + + /** + * Return true iff this priority is prior to given priority. + */ + bool operator<(const DescentPriority& o) const { + assert(inited()); + assert(o.inited()); + // 1st priority: penalty accumulated so far + if(pen < o.pen) return true; + if(pen > o.pen) return false; + // 2nd priority: depth into the search space, including extensions + if(depth > o.depth) return true; + if(depth < o.depth) return false; + // 3rd priority: width of the SA range (i.e. uniqueness) + if(width < o.width) return true; + if(width > o.width) return false; + // 4th priority: root priority + if(rootpri > o.rootpri) return true; + return false; + } + + /** + * Return true iff this priority is prior to or equal to given priority. + */ + bool operator<=(const DescentPriority& o) const { + assert(inited()); + assert(o.inited()); + // 1st priority: penalty accumulated so far + if(pen < o.pen) return true; + if(pen > o.pen) return false; + // 2nd priority: depth into the search space, including extensions + if(depth > o.depth) return true; + if(depth < o.depth) return false; + // 3rd priority: width of the SA range (i.e. uniqueness) + if(width < o.depth) return true; + if(width > o.width) return false; + // 4th priority: root priority + if(rootpri > o.rootpri) return true; + return true; + } + + /** + * Return true iff this priority is prior to or equal to given priority. + */ + bool operator==(const DescentPriority& o) const { + assert(inited()); + assert(o.inited()); + return pen == o.pen && depth == o.depth && width == o.width && rootpri == o.rootpri; + } + + TScore pen; // total penalty accumulated so far + size_t depth; // depth from root of descent + TIndexOffU width; // width of the SA range + float rootpri; // priority of the root +}; + +static inline std::ostream& operator<<( + std::ostream& os, + const DescentPriority& o) +{ + os << "[" << o.pen << ", " << o.depth << ", " << o.width << ", " << o.rootpri << "]"; + return os; +} + +static inline std::ostream& operator<<( + std::ostream& os, + const std::pair& o) +{ + os << "{[" << o.first.pen << ", " << o.first.depth << ", " + << o.first.width << ", " << o.first.rootpri << "], " << o.second << "}"; + return os; +} + +typedef std::pair TDescentPair; + +/** + * Encapsulates the constraints limiting which outgoing edges are permitted. + * Specifically, we constrain the total penalty accumulated so far so that some + * outgoing edges will exceed the limit and be pruned. The limit is set + * according to our "depth" into the search, as measured by the number of read + * characters aligned so far. We divide the depth domain into two pieces, a + * piece close to the root, where the penty is constrained to be 0, and the + * remainder, where the maximum penalty is an interpolation between 0 and the + * maximum penalty + */ +struct DescentConstraints { + + DescentConstraints() { reset(); } + + /** + * Initialize with new constraint function. + */ + DescentConstraints(size_t nzero, double exp) { + init(nzero, exp); + } + + /** + * Initialize with given function. + */ + void init(size_t nzero_, double exp_) { + nzero = nzero_ > 0 ? nzero_ : 1; + exp = exp_; +#ifndef NDEBUG + for(size_t i = 1; i < nzero_ + 5; i++) { + assert_geq(get(i, nzero_ + 10, 100), get(i-1, nzero_ + 10, 100)); + } +#endif + } + + /** + * Reset to uninitialized state. + */ + void reset() { + nzero = 0; + exp = -1.0f; + } + + /** + * Return true iff the DescentConstraints has been initialized. + */ + bool inited() const { + return exp >= 0.0f; + } + + /** + * Get the maximum penalty total for depth 'off'. + */ + inline TScore get(TReadOff off, TReadOff rdlen, TAlScore maxpen) const { + if(off < nzero || nzero >= rdlen) { + return 0; + } + double frac = (double)(off - nzero) / (rdlen - nzero); + if(fabs(exp - 1.0f) > 0.00001) { + if(fabs(exp - 2.0f) < 0.00001) { + frac *= frac; + } else { + frac = pow(frac, exp); + } + } + return (TAlScore)(frac * maxpen + 0.5f); + } + + size_t nzero; + double exp; +}; + +/** + * Encapsulates settings governing how we descent. + */ +struct DescentConfig { + + DescentConfig() { reset(); } + + /** + * Reset the DescentConfig to an uninitialized state. + */ + void reset() { expol = 0; } + + /** + * Return true iff this DescentConfig is initialized. + */ + bool inited() const { return expol != 0; } + + DescentConstraints cons; // constraints + int expol; // extend policy +}; + +/** + * Encapsulates the state of a Descent that allows us to determine whether it + * is redundant with another Descent. Two Descents are redundant if: + * + * 1. Both are aligning the same read orientation (fw or rc) + * 2. Both are growing the alignment in the same direction (left-to-right or + * right-to-left) + * 3. They have aligned exactly the same read characters (which are always + * consecutive in the read) + * 4. The corresponding reference strings are identical + */ +struct DescentRedundancyKey { + + DescentRedundancyKey() { reset(); } + + DescentRedundancyKey( + TReadOff al5pf_, + size_t rflen_, + TIndexOffU topf_, + TIndexOffU botf_) + { + init(al5pf_, rflen_, topf_, botf_); + } + + void reset() { + al5pf = 0; + rflen = 0; + topf = botf = 0; + } + + bool inited() const { return rflen > 0; } + + void init( + TReadOff al5pf_, + size_t rflen_, + TIndexOffU topf_, + TIndexOffU botf_) + { + al5pf = al5pf_; + rflen = rflen_; + topf = topf_; + botf = botf_; + } + + bool operator==(const DescentRedundancyKey& o) const { + return al5pf == o.al5pf && rflen == o.rflen && topf == o.topf && botf == o.botf; + } + + bool operator<(const DescentRedundancyKey& o) const { + if(al5pf < o.al5pf) return true; + if(al5pf > o.al5pf) return false; + if(rflen < o.rflen) return true; + if(rflen > o.rflen) return false; + if(topf < o.topf) return true; + if(topf > o.topf) return false; + return botf < o.botf; + } + + TReadOff al5pf; // 3'-most aligned char, as offset from 5' end + size_t rflen; // number of reference characters involved in alignment + TIndexOffU topf; // top w/r/t forward index + TIndexOffU botf; // bot w/r/t forward index +}; + +/** + * Map from pairs to top, bot, penalty triples. + */ +class DescentRedundancyChecker { + +public: + + DescentRedundancyChecker() { reset(); } + + void clear() { reset(); } + + /** + * Reset to uninitialized state. + */ + void reset() { + bits_.reset(); + inited_ = false; + totsz_ = 0; // total size + totcap_ = 0; // total capacity + } + + const static int NPARTS = 8; + const static int PART_MASK = 7; + const static int NBITS = (1 << 16); + + /** + * Initialize using given read length. + */ + void init(TReadOff rdlen) { + reset(); + // daehwan - for debugging purposes +#if 0 + bits_.resize(NBITS); + maplist_fl_.resize(NPARTS); + maplist_fr_.resize(NPARTS); + maplist_rl_.resize(NPARTS); + maplist_rr_.resize(NPARTS); + for(int i = 0; i < NPARTS; i++) { + maplist_fl_[i].resize(rdlen); + maplist_fr_[i].resize(rdlen); + maplist_rl_[i].resize(rdlen); + maplist_rr_[i].resize(rdlen); + totcap_ += maplist_fl_[i].totalCapacityBytes(); + totcap_ += maplist_fr_[i].totalCapacityBytes(); + totcap_ += maplist_rl_[i].totalCapacityBytes(); + totcap_ += maplist_rr_[i].totalCapacityBytes(); + for(size_t j = 0; j < rdlen; j++) { + maplist_fl_[i][j].clear(); + maplist_fr_[i][j].clear(); + maplist_rl_[i][j].clear(); + maplist_rr_[i][j].clear(); + totcap_ += maplist_fl_[i][j].totalCapacityBytes(); + totcap_ += maplist_fr_[i][j].totalCapacityBytes(); + totcap_ += maplist_rl_[i][j].totalCapacityBytes(); + totcap_ += maplist_rr_[i][j].totalCapacityBytes(); + } + } +#endif + inited_ = true; + } + + /** + * Return true iff the checker is initialized. + */ + bool inited() const { + return inited_; + } + + /** + * Check if this partial alignment is redundant with one that we've already + * explored. + */ + bool check( + bool fw, + bool l2r, + TReadOff al5pi, + TReadOff al5pf, + size_t rflen, + TIndexOffU topf, + TIndexOffU botf, + TScore pen) + { + // daehwan - for debugging purposes + return true; + + assert(inited_); + assert(topf > 0 || botf > 0); + DescentRedundancyKey k(al5pf, rflen, topf, botf); + size_t i = std::numeric_limits::max(); + size_t mask = topf & PART_MASK; + EMap& map = + (fw ? (l2r ? maplist_fl_[mask][al5pi] : maplist_fr_[mask][al5pi]) : + (l2r ? maplist_rl_[mask][al5pi] : maplist_rr_[mask][al5pi])); + size_t key = (topf & 255) | ((botf & 255) << 8); + if(bits_.test(key) && map.containsEx(k, i)) { + // Already contains the key + assert_lt(i, map.size()); + assert_geq(pen, map[i].second); + return false; + } + assert(!map.containsEx(k, i)); + size_t oldsz = map.totalSizeBytes(); + size_t oldcap = map.totalCapacityBytes(); + map.insert(make_pair(k, pen)); + bits_.set(key); + totsz_ += (map.totalSizeBytes() - oldsz); + totcap_ += (map.totalCapacityBytes() - oldcap); + return true; + } + + /** + * Check if this partial alignment is redundant with one that we've already + * explored using the Bw index SA range. + */ + bool contains( + bool fw, + bool l2r, + TReadOff al5pi, + TReadOff al5pf, + size_t rflen, + TIndexOffU topf, + TIndexOffU botf, + TScore pen) + { + // daehwan - for debugging purposes + return false; + + assert(inited_); + size_t key = (topf & 255) | ((botf & 255) << 8); + if(!bits_.test(key)) { + return false; + } + DescentRedundancyKey k(al5pf, rflen, topf, botf); + size_t mask = topf & PART_MASK; + EMap& map = + (fw ? (l2r ? maplist_fl_[mask][al5pi] : maplist_fr_[mask][al5pi]) : + (l2r ? maplist_rl_[mask][al5pi] : maplist_rr_[mask][al5pi])); + return map.contains(k); + } + + /** + * Return the total size of the redundancy map. + */ + size_t totalSizeBytes() const { + return totsz_; + } + + /** + * Return the total capacity of the redundancy map. + */ + size_t totalCapacityBytes() const { + return totcap_; + } + +protected: + + bool inited_; // initialized? + size_t totsz_; // total size + size_t totcap_; // total capacity + + // List of maps. Each entry is a map for all the DescentRedundancyKeys + // with al5pi equal to the offset into the list. + ELList, NPARTS, 100> maplist_fl_; // fw, l2r + ELList, NPARTS, 100> maplist_rl_; // !fw, l2r + ELList, NPARTS, 100> maplist_fr_; // fw, !l2r + ELList, NPARTS, 100> maplist_rr_; // !fw, !l2r + + EBitList<128> bits_; +}; + +/** + * A search root. Consists of an offset from the 5' end read and flags + * indicating (a) whether we're initially heading left-to-right or + * right-to-left, and (b) whether we're examining the read or its reverse + * complement. + * + * A root also comes with a priority ("pri") score indicating how promising it + * is as a root. Promising roots have long stretches of high-quality, + * non-repetitive nucleotides in the first several ply of the search tree. + * Also, roots beginning at the 5' end of the read may receive a higher + * priority. + */ +struct DescentRoot { + + DescentRoot() { reset(); } + + DescentRoot(size_t off5p_, bool l2r_, bool fw_, size_t len, float pri_) { + init(off5p_, l2r_, fw_, len, pri_); + } + + /** + * Reset this DescentRoot to uninitialized state. + */ + void reset() { + off5p = std::numeric_limits::max(); + } + + /** + * Return true iff this DescentRoot is uninitialized. + */ + bool inited() const { + return off5p == std::numeric_limits::max(); + } + + /** + * Initialize a new descent root. + */ + void init(size_t off5p_, bool l2r_, bool fw_, size_t len, float pri_) { + off5p = off5p_; + l2r = l2r_; + fw = fw_; + pri = pri_; + assert_lt(off5p, len); + } + + TReadOff off5p; // root origin offset, expressed as offset from 5' end + bool l2r; // true -> move in left-to-right direction + bool fw; // true -> work with forward read, false -> revcomp + float pri; // priority of seed +}; + +/** + * Set of flags indicating outgoing edges we've tried from a DescentPos. + */ +struct DescentPosFlags { + + DescentPosFlags() { reset(); } + + /** + * Set all flags to 1, indicating all outgoing edges are yet to be + * explored. + */ + void reset() { + mm_a = mm_c = mm_g = mm_t = rdg_a = rdg_c = rdg_g = rdg_t = rfg = 1; + reserved = 0; + } + + /** + * Return true iff all outgoing edges have already been explored. + */ + bool exhausted() const { + return ((uint16_t*)this)[0] == 0; + } + + /** + * Return false iff the specified mismatch has already been explored. + */ + bool mmExplore(int c) { + assert_range(0, 3, c); + if(c == 0) { + return mm_a; + } else if(c == 1) { + return mm_c; + } else if(c == 2) { + return mm_g; + } else { + return mm_t; + } + } + + /** + * Try to explore a mismatch. Return false iff it has already been + * explored. + */ + bool mmSet(int c) { + assert_range(0, 3, c); + if(c == 0) { + bool ret = mm_a; mm_a = 0; return ret; + } else if(c == 1) { + bool ret = mm_c; mm_c = 0; return ret; + } else if(c == 2) { + bool ret = mm_g; mm_g = 0; return ret; + } else { + bool ret = mm_t; mm_t = 0; return ret; + } + } + + /** + * Return false iff specified read gap has already been explored. + */ + bool rdgExplore(int c) { + assert_range(0, 3, c); + if(c == 0) { + return rdg_a; + } else if(c == 1) { + return rdg_c; + } else if(c == 2) { + return rdg_g; + } else { + return rdg_t; + } + } + + /** + * Try to explore a read gap. Return false iff it has already been + * explored. + */ + bool rdgSet(int c) { + assert_range(0, 3, c); + if(c == 0) { + bool ret = rdg_a; rdg_a = 0; return ret; + } else if(c == 1) { + bool ret = rdg_c; rdg_c = 0; return ret; + } else if(c == 2) { + bool ret = rdg_g; rdg_g = 0; return ret; + } else { + bool ret = rdg_t; rdg_t = 0; return ret; + } + } + + /** + * Return false iff the reference gap has already been explored. + */ + bool rfgExplore() { + return rfg; + } + + /** + * Try to explore a reference gap. Return false iff it has already been + * explored. + */ + bool rfgSet() { + bool ret = rfg; rfg = 0; return ret; + } + + uint16_t mm_a : 1; + uint16_t mm_c : 1; + uint16_t mm_g : 1; + uint16_t mm_t : 1; + + uint16_t rdg_a : 1; + uint16_t rdg_c : 1; + uint16_t rdg_g : 1; + uint16_t rdg_t : 1; + + uint16_t rfg : 1; + + uint16_t reserved : 7; +}; + +/** + * FM Index state associated with a single position in a descent. For both the + * forward and backward indexes, it stores the four SA ranges corresponding to + * the four nucleotides. + */ +struct DescentPos { + + /** + * Reset all tops and bots to 0. + */ + void reset() { + topf[0] = topf[1] = topf[2] = topf[3] = 0; + botf[0] = botf[1] = botf[2] = botf[3] = 0; + topb[0] = topb[1] = topb[2] = topb[3] = 0; + botb[0] = botb[1] = botb[2] = botb[3] = 0; + c = -1; + flags.reset(); + } + + /** + * Return true iff DescentPos has been initialized. + */ + bool inited() const { + return c >= 0; + } + +#ifndef NDEBUG + /** + * Check that DescentPos is internally consistent. + */ + bool repOk() const { + assert_range(0, 3, (int)c); + return true; + } +#endif + + TIndexOffU topf[4]; // SA range top indexes in fw index + TIndexOffU botf[4]; // SA range bottom indexes (exclusive) in fw index + TIndexOffU topb[4]; // SA range top indexes in bw index + TIndexOffU botb[4]; // SA range bottom indexes (exclusive) in bw index + char c; // read char that would yield match + DescentPosFlags flags; // flags +}; + +/** + * Encapsulates an edge outgoing from a descent. + */ +struct DescentEdge { + + DescentEdge() { reset(); } + + DescentEdge( + Edit e_, + TReadOff off5p_, + DescentPriority pri_, + size_t posFlag_, + TReadOff nex_ +#ifndef NDEBUG + , + size_t d_, + TIndexOffU topf_, + TIndexOffU botf_, + TIndexOffU topb_, + TIndexOffU botb_ +#endif + ) + { + init(e_, off5p_, pri_, posFlag_ +#ifndef NDEBUG + , d_, topf_, botf_, topb_, botb_ +#endif + ); + } + + /** + * Return true iff edge is initialized. + */ + bool inited() const { return e.inited(); } + + /** + * Reset to uninitialized state. + */ + void reset() { e.reset(); } + + /** + * Initialize DescentEdge given 5' offset, nucleotide, and priority. + */ + void init( + Edit e_, + TReadOff off5p_, + DescentPriority pri_, + size_t posFlag_ +#ifndef NDEBUG + , + size_t d_, + TIndexOffU topf_, + TIndexOffU botf_, + TIndexOffU topb_, + TIndexOffU botb_ +#endif + ) + { + e = e_; + off5p = off5p_; + pri = pri_; + posFlag = posFlag_; +#ifndef NDEBUG + d = d_; + topf = topf_; + botf = botf_; + topb = topb_; + botb = botb_; +#endif + } + + /** + * Update flags to show this edge as visited. + */ + void updateFlags(EFactory& pf) { + if(inited()) { + if(e.isReadGap()) { + assert_neq('-', e.chr); + pf[posFlag].flags.rdgSet(asc2dna[e.chr]); + } else if(e.isRefGap()) { + pf[posFlag].flags.rfgSet(); + } else { + assert_neq('-', e.chr); + pf[posFlag].flags.mmSet(asc2dna[e.chr]); + } + } + } + + /** + * Return true iff this edge has higher priority than the given edge. + */ + bool operator<(const DescentEdge& o) const { + if(inited() && !o.inited()) { + return true; + } else if(!inited()) { + return false; + } + return pri < o.pri; + } + + DescentPriority pri; // priority of the edge + //TReadOff nex; // # extends possible from this edge + size_t posFlag; // depth of DescentPos where flag should be set + + +#ifndef NDEBUG + // This can be recreated by looking at the edit, the paren't descent's + // len_, al5pi_, al5pf_. I have it here so we can sanity check. + size_t d; + TIndexOffU topf, botf, topb, botb; +#endif + + Edit e; + TReadOff off5p; +}; + +/** + * Encapsulates an incomplete summary of the outgoing edges from a descent. We + * don't try to store information about all outgoing edges, because doing so + * will generally be wasteful. We'll typically only try a handful of them per + * descent. + */ +class DescentOutgoing { + +public: + + /** + * Return the best edge and rotate in preparation for next call. + */ + DescentEdge rotate() { + DescentEdge tmp = best1; + assert(!(best2 < tmp)); + best1 = best2; + assert(!(best3 < best2)); + best2 = best3; + assert(!(best4 < best3)); + best3 = best4; + assert(!(best5 < best4)); + best4 = best5; + best5.reset(); + return tmp; + } + + /** + * Given a potental outgoing edge, place it where it belongs in the running + * list of best 5 outgoing edges from this descent. + */ + void update(DescentEdge e) { + if(!best1.inited()) { + best1 = e; + } else if(e < best1) { + best5 = best4; + best4 = best3; + best3 = best2; + best2 = best1; + best1 = e; + } else if(!best2.inited()) { + best2 = e; + } else if(e < best2) { + best5 = best4; + best4 = best3; + best3 = best2; + best2 = e; + } else if(!best3.inited()) { + best3 = e; + } else if(e < best3) { + best5 = best4; + best4 = best3; + best3 = e; + } else if(!best4.inited()) { + best4 = e; + } else if(e < best4) { + best5 = best4; + best4 = e; + } else if(!best5.inited() || e < best5) { + best5 = e; + } + } + + /** + * Clear all the outgoing edges stored here. + */ + void clear() { + best1.reset(); + best2.reset(); + best3.reset(); + best4.reset(); + best5.reset(); + } + + /** + * Return true iff there are no outgoing edges currently represented in + * this summary. There may still be outgoing edges, they just haven't + * been added to the summary. + */ + bool empty() const { + return !best1.inited(); + } + + /** + * Return the DescentPriority of the best outgoing edge. + */ + DescentPriority bestPri() const { + assert(!empty()); + return best1.pri; + } + + DescentEdge best1; // best + DescentEdge best2; // 2nd-best + DescentEdge best3; // 3rd-best + DescentEdge best4; // 4th-best + DescentEdge best5; // 5th-best +}; + +template +class DescentAlignmentSink; + +/** + * Encapsulates a descent through a search tree, along a path of matches. + * Descents that are part of the same alignment form a chain. Two aligments + * adjacent in the chain are connected either by an edit, or by a switch in + * direction. Because a descent might have a different direction from the + * DescentRoot it ultimately came from, it has its own 'l2r' field, which might + * differ from the root's. + */ +template +class Descent { + +public: + + Descent() { reset(); } + + /** + * Initialize a new descent branching from the given descent via the given + * edit. Return false if the Descent has no outgoing edges (and can + * therefore have its memory freed), true otherwise. + */ + bool init( + const Read& q, // query + TRootId rid, // root id + const Scoring& sc, // scoring scheme + TAlScore minsc, // minimum score + TAlScore maxpen, // maximum penalty + TReadOff al5pi, // offset from 5' of 1st aligned char + TReadOff al5pf, // offset from 5' of last aligned char + TIndexOffU topf, // SA range top in FW index + TIndexOffU botf, // SA range bottom in FW index + TIndexOffU topb, // SA range top in BW index + TIndexOffU botb, // SA range bottom in BW index + bool l2r, // direction this descent will go in + size_t descid, // my ID + TDescentId parent, // parent ID + TScore pen, // total penalties so far + const Edit& e, // edit for incoming edge + const GFM& gfmFw, // forward index + const GFM& gfmBw, // mirror index + DescentRedundancyChecker& re, // redundancy checker + EFactory& df, // Descent factory + EFactory& pf, // DescentPos factory + const EList& rs, // roots + const EList& cs, // configs + EHeap& heap, // heap + DescentAlignmentSink& alsink, // alignment sink + DescentMetrics& met, // metrics + PerReadMetrics& prm); // per-read metrics + + /** + * Initialize a new descent beginning at the given root. Return false if + * the Descent has no outgoing edges (and can therefore have its memory + * freed), true otherwise. + */ + bool init( + const Read& q, // query + TRootId rid, // root id + const Scoring& sc, // scoring scheme + TAlScore minsc, // minimum score + TAlScore maxpen, // maximum penalty + size_t descid, // id of this Descent + const GFM& gfmFw, // forward index + const GFM& gfmBw, // mirror index + DescentRedundancyChecker& re, // redundancy checker + EFactory& df, // Descent factory + EFactory& pf, // DescentPos factory + const EList& rs, // roots + const EList& cs, // configs + EHeap& heap, // heap + DescentAlignmentSink& alsink, // alignment sink + DescentMetrics& met, // metrics + PerReadMetrics& prm); // per-read metrics + + /** + * Return true iff this Descent has been initialized. + */ + bool inited() const { + return descid_ != std::numeric_limits::max(); + } + + /** + * Reset to uninitialized state. + */ + void reset() { + lastRecalc_ = true; + descid_ = std::numeric_limits::max(); + } + + /** + * Return true iff this Descent is a search root. + */ + bool root() const { + return parent_ == std::numeric_limits::max(); + } + + /** + * Return the edit. + */ + const Edit& edit() const { + return edit_; + } + + /** + * Return id of parent. + */ + TDescentId parent() const { + return parent_; + } + + /** + * Take the best outgoing edge and follow it. + */ + void followBestOutgoing( + const Read& q, // read + const GFM& gfmFw, // forward index + const GFM& gfmBw, // mirror index + const Scoring& sc, // scoring scheme + TAlScore minsc, // minimum score + TAlScore maxpen, // maximum penalty + DescentRedundancyChecker& re, // redundancy checker + EFactory& df, // factory with Descent + EFactory& pf, // factory with DescentPoss + const EList& rs, // roots + const EList& cs, // configs + EHeap& heap, // heap of descents + DescentAlignmentSink& alsink, // alignment sink + DescentMetrics& met, // metrics + PerReadMetrics& prm); // per-read metrics + + /** + * Return true iff no outgoing edges from this descent remain unexplored. + */ + bool empty() const { return lastRecalc_ && out_.empty(); } + +#ifndef NDEBUG + /** + * Return true iff the Descent is internally consistent. + */ + bool repOk(const Read *q) const { + // A non-root can have an uninitialized edit_ if it is from a bounce + //assert( root() || edit_.inited()); + assert(!root() || !edit_.inited()); + assert_eq(botf_ - topf_, botb_ - topb_); + if(q != NULL) { + assert_leq(len_, q->length()); + } + return true; + } +#endif + + size_t al5pi() const { return al5pi_; } + size_t al5pf() const { return al5pf_; } + bool l2r() const { return l2r_; } + + /** + * Print a stacked representation of this descent and all its parents. Assumes that + */ + void print( + std::ostream* os, + const char *prefix, + const Read& q, + size_t trimLf, + size_t trimRg, + bool fw, + const EList& edits, + size_t ei, + size_t en, + BTDnaString& rf) const; + + /** + * Collect all the edits + */ + void collectEdits( + EList& edits, + const Edit *e, + EFactory& df) + { + // Take just the portion of the read that has aligned up until this + // point + size_t nuninited = 0; + size_t ei = edits.size(); + size_t en = 0; + if(e != NULL && e->inited()) { + edits.push_back(*e); + en++; + } + size_t cur = descid_; + while(cur != std::numeric_limits::max()) { + if(!df[cur].edit().inited()) { + nuninited++; + assert_leq(nuninited, 2); + } else { + edits.push_back(df[cur].edit()); + en++; + } + cur = df[cur].parent(); + } + // Sort just the edits we just added + edits.sortPortion(ei, en); + } + +protected: + + /** + * + */ + bool bounce( + const Read& q, // query string + TIndexOffU topf, // SA range top in fw index + TIndexOffU botf, // SA range bottom in fw index + TIndexOffU topb, // SA range top in bw index + TIndexOffU botb, // SA range bottom in bw index + const GFM& gfmFw, // forward index + const GFM& gfmBw, // mirror index + const Scoring& sc, // scoring scheme + TAlScore minsc, // minimum score + TAlScore maxpen, // maximum penalty + DescentRedundancyChecker& re, // redundancy checker + EFactory& df, // factory with Descent + EFactory& pf, // factory with DescentPoss + const EList& rs, // roots + const EList& cs, // configs + EHeap& heap, // heap of descents + DescentAlignmentSink& alsink, // alignment sink + DescentMetrics& met, // metrics + PerReadMetrics& prm); // per-read metrics + + /** + * Given the forward and backward indexes, and given topf/botf/topb/botb, + * get tloc, bloc ready for the next step. + */ + void nextLocsBi( + const GFM& gfmFw, // forward index + const GFM& gfmBw, // mirror index + SideLocus& tloc, // top locus + SideLocus& bloc, // bot locus + index_t topf, // top in BWT + index_t botf, // bot in BWT + index_t topb, // top in BWT' + index_t botb); // bot in BWT' + + /** + * Advance this descent by following read matches as far as possible. + */ + bool followMatches( + const Read& q, // query string + const Scoring& sc, // scoring scheme + const GFM& gfmFw, // forward index + const GFM& gfmBw, // mirror index + DescentRedundancyChecker& re, // redundancy checker + EFactory& df, // Descent factory + EFactory& pf, // DescentPos factory + const EList& rs, // roots + const EList& cs, // configs + EHeap& heap, // heap + DescentAlignmentSink& alsink, // alignment sink + DescentMetrics& met, // metrics + PerReadMetrics& prm, // per-read metrics + bool& branches, // out: true -> there are > 0 ways to branch + bool& hitEnd, // out: true -> hit read end with non-empty range + bool& done, // out: true -> we made a full alignment + TReadOff& off5p_i, // out: initial 5' offset + TIndexOffU& topf_bounce, // out: top of SA range for fw idx for bounce + TIndexOffU& botf_bounce, // out: bot of SA range for fw idx for bounce + TIndexOffU& topb_bounce, // out: top of SA range for bw idx for bounce + TIndexOffU& botb_bounce); // out: bot of SA range for bw idx for bounce + + /** + * Recalculate our summary of the outgoing edges from this descent. When + * deciding what outgoing edges are legal, we abide by constraints. + * Typically, they limit the total of the penalties accumulated so far, as + * a function of distance from the search root. E.g. a constraint might + * disallow any gaps or mismatches within 20 ply of the search root, then + * allow 1 mismatch within 30 ply, then allow up to 1 mismatch or 1 gap + * within 40 ply, etc. + */ + size_t recalcOutgoing( + const Read& q, // query string + const Scoring& sc, // scoring scheme + TAlScore minsc, // minimum score + TAlScore maxpen, // maximum penalty + DescentRedundancyChecker& re, // redundancy checker + EFactory& pf, // factory with DescentPoss + const EList& rs, // roots + const EList& cs, // configs + PerReadMetrics& prm); // per-read metrics + + TRootId rid_; // root id + + TReadOff al5pi_; // lo offset from 5' end of aligned read char + TReadOff al5pf_; // hi offset from 5' end of aligned read char + bool l2r_; // left-to-right? + int gapadd_; // net ref characters additional + TReadOff off5p_i_; // offset we started out at for this descent + + TIndexOffU topf_, botf_; // incoming SA range w/r/t forward index + TIndexOffU topb_, botb_; // incoming SA range w/r/t forward index + + size_t descid_; // ID of this descent + TDescentId parent_; // ID of parent descent + TScore pen_; // total penalties accumulated so far + size_t posid_; // ID of 1st elt of the DescentPos factory w/ + // descent pos info for this descent + size_t len_; // length of stretch of matches + DescentOutgoing out_; // summary of outgoing edges + Edit edit_; // edit joining this descent with parent + bool lastRecalc_; // set by recalcOutgoing if out edges empty +}; + +/** + * An alignment result from a Descent. + */ +struct DescentAlignment { + + DescentAlignment() { reset(); } + + /** + * Reset DescentAlignment to be uninitialized. + */ + void reset() { + topf = botf = 0; + pen = 0; + fw = false; + ei = en = 0; + } + + /** + * Initialize this DescentAlignment. + */ + void init( + TScore pen_, + bool fw_, + TIndexOffU topf_, + TIndexOffU botf_, + size_t ei_, + size_t en_) + { + assert_gt(botf_, topf_); + pen = pen_; + fw = fw_; + topf = topf_; + botf = botf_; + ei = ei_; + en = en_; + } + + /** + * Return true iff DescentAlignment is initialized. + */ + bool inited() const { + return botf > topf; + } + + /** + * Return true iff the alignment is perfect (has no edits) + */ + bool perfect() const { + return pen == 0; + } + + /** + * Return the number of elements in this range. + */ + size_t size() const { + return botf - topf; + } + + TScore pen; // score + + bool fw; // forward or revcomp aligned? + + TIndexOffU topf; // top in forward index + TIndexOffU botf; // bot in forward index + + size_t ei; // First edit in DescentAlignmentSink::edits_ involved in aln + size_t en; // # edits in DescentAlignmentSink::edits_ involved in aln +}; + +/** + * A partial alignment result from a Descent where the reference offset has + * been resolved. + */ +struct DescentPartialResolvedAlignment { + + DescentPartialResolvedAlignment() { reset(); } + + /** + * Reset DescentAlignment to be uninitialized. + */ + void reset() { + topf = botf = 0; + pen = 0; + fw = false; + ei = en = 0; + refcoord.reset(); + } + + /** + * Initialize this DescentAlignment. + */ + void init( + TScore pen_, + bool fw_, + TIndexOffU topf_, + TIndexOffU botf_, + size_t ei_, + size_t en_, + const Coord& refcoord_) + { + assert_gt(botf_, topf_); + pen = pen_; + fw = fw_; + topf = topf_; + botf = botf_; + ei = ei_; + en = en_; + refcoord = refcoord_; + } + + /** + * Return true iff DescentAlignment is initialized. + */ + bool inited() const { + return botf > topf; + } + + /** + * Return the number of elements in this range. + */ + size_t size() const { + return botf - topf; + } + + TScore pen; // score + + bool fw; // forward or revcomp aligned? + + TIndexOffU topf; // top in forward index + TIndexOffU botf; // bot in forward index + + size_t ei; // First edit in DescentAlignmentSink::edits_ involved in aln + size_t en; // # edits in DescentAlignmentSink::edits_ involved in aln + + Coord refcoord; // reference coord of leftmost ref char involved +}; + +/** + * Class that accepts alignments found during descent and maintains the state + * required to dispense them to consumers in an appropriate order. + * + * As for order in which they are dispensed, in order to maintain uniform + * distribution over equal-scoring alignments, a good policy may be not to + * dispense alignments at a given score stratum until *all* alignments at that + * stratum have been accumulated (i.e. until our best-first search has moved on + * to a worse stratum). This also has the advantage that, for each alignment, + * we can also report the number of other alignments in that cost stratum. + * + * A lazier alternative is to assume that the order in which alignments in a + * given stratum arrive is already pseudo-random, which frees us from having to + * wait until the entire stratum has been explored. But there is reason to + * think that this order is not truly pseudo-random, since our root placement + * and root priorities will tend to first lead us to alignments with certain + * patterns of edits. + */ +template +class DescentAlignmentSink { + +public: + + /** + * If this is the final descent in a complete end-to-end alignment, report + * the alignment. + */ + bool reportAlignment( + const Read& q, // query string + const GFM& gfmFw, // forward index + const GFM& gfmBw, // mirror index + TIndexOffU topf, // SA range top in forward index + TIndexOffU botf, // SA range bottom in forward index + TIndexOffU topb, // SA range top in backward index + TIndexOffU botb, // SA range bottom in backward index + TDescentId id, // id of leaf Descent + TRootId rid, // id of search root + const Edit& e, // final edit, if needed + TScore pen, // total penalty + EFactory >& df, // factory with Descent + EFactory& pf, // factory with DescentPoss + const EList& rs, // roots + const EList& cs); // configs + + /** + * Reset to uninitialized state. + */ + void reset() { + edits_.clear(); + als_.clear(); + lhs_.clear(); + rhs_.clear(); + nelt_ = 0; + bestPen_ = worstPen_ = std::numeric_limits::max(); + } + + /** + * Return the total size occupued by the Descent driver and all its + * constituent parts. + */ + size_t totalSizeBytes() const { + return edits_.totalSizeBytes() + + als_.totalSizeBytes() + + lhs_.totalSizeBytes() + + rhs_.totalSizeBytes() + + sizeof(size_t); + } + + /** + * Return the total capacity of the Descent driver and all its constituent + * parts. + */ + size_t totalCapacityBytes() const { + return edits_.totalCapacityBytes() + + als_.totalCapacityBytes() + + lhs_.totalCapacityBytes() + + rhs_.totalCapacityBytes() + + sizeof(size_t); + } + + /** + * Return the number of SA ranges involved in hits. + */ + size_t nrange() const { + return als_.size(); + } + + /** + * Return the number of SA elements involved in hits. + */ + size_t nelt() const { + return nelt_; + } + + /** + * The caller provides 'i', which is an offset of a particular element in + * one of the SA ranges in the current stratum. This function returns, in + * 'al' and 'off', information about the element in terms of the range it's + * part of and its offset into that range. + */ + void elt(size_t i, DescentAlignment& al, size_t& ri, size_t& off) const { + assert_lt(i, nelt()); + for(size_t j = 0; j < als_.size(); j++) { + if(i < als_[j].size()) { + al = als_[j]; + ri = j; + off = i; + return; + } + i -= als_[j].size(); + } + assert(false); + } + + /** + * Get a particular alignment. + */ + const DescentAlignment& operator[](size_t i) const { + return als_[i]; + } + + /** + * Return true iff (a) we found an alignment since the sink was initialized + * or since the last time advanceStratum() was called, and (b) the penalty + * associated with the current-best task on the heap ('best') is worse + * (higher) than the penalty associated with the alignments found most + * recently (worstPen_). + */ + bool stratumDone(TAlScore bestPen) const { + if(nelt_ > 0 && bestPen > worstPen_) { + return true; + } + return false; + } + + /** + * The alignment consumer calls this to indicate that they are done with + * all the alignments in the current best non-empty stratum. We can + * therefore mark all those alignments as "reported" and start collecting + * results for the next stratum. + */ + void advanceStratum() { + assert_gt(nelt_, 0); + edits_.clear(); + als_.clear(); + // Don't reset lhs_ or rhs_ + nelt_ = 0; + bestPen_ = worstPen_ = std::numeric_limits::max(); + } + +#ifndef NDEBUG + /** + * Check that alignment sink is internally consistent. + */ + bool repOk() const { + assert_geq(nelt_, als_.size()); + for(size_t i = 1; i < als_.size(); i++) { + assert_geq(als_[i].pen, als_[i-1].pen); + } + assert(bestPen_ == std::numeric_limits::max() || worstPen_ >= bestPen_); + return true; + } +#endif + + TAlScore bestPenalty() const { return bestPen_; } + TAlScore worstPenalty() const { return worstPen_; } + + size_t editsSize() const { return edits_.size(); } + size_t alsSize() const { return als_.size(); } + size_t lhsSize() const { return lhs_.size(); } + size_t rhsSize() const { return rhs_.size(); } + + const EList& edits() const { return edits_; } + +protected: + + EList edits_; + EList als_; + ESet > lhs_; + ESet > rhs_; + size_t nelt_; + TAlScore bestPen_; // best (smallest) penalty among as-yet-unreported alns + TAlScore worstPen_; // worst (greatest) penalty among as-yet-unreported alns +#ifndef NDEBUG + BTDnaString tmprfdnastr_; +#endif + +}; + +/** + * Class that aggregates partial alignments taken from a snapshot of the + * DescentDriver heap. + */ +class DescentPartialResolvedAlignmentSink { + +public: + + /** + * Reset to uninitialized state. + */ + void reset() { + edits_.clear(); + als_.clear(); + nelt_ = 0; + bestPen_ = worstPen_ = std::numeric_limits::max(); + } + + /** + * Return the total size occupued by the Descent driver and all its + * constituent parts. + */ + size_t totalSizeBytes() const { + return edits_.totalSizeBytes() + + als_.totalSizeBytes() + + sizeof(size_t); + } + + /** + * Return the total capacity of the Descent driver and all its constituent + * parts. + */ + size_t totalCapacityBytes() const { + return edits_.totalCapacityBytes() + + als_.totalCapacityBytes() + + sizeof(size_t); + } + + /** + * Return the number of SA ranges involved in hits. + */ + size_t nrange() const { + return als_.size(); + } + + /** + * Return the number of SA elements involved in hits. + */ + size_t nelt() const { + return nelt_; + } + + /** + * The caller provides 'i', which is an offset of a particular element in + * one of the SA ranges in the current stratum. This function returns, in + * 'al' and 'off', information about the element in terms of the range it's + * part of and its offset into that range. + */ + void elt(size_t i, DescentPartialResolvedAlignment& al, size_t& ri, size_t& off) const { + assert_lt(i, nelt()); + for(size_t j = 0; j < als_.size(); j++) { + if(i < als_[j].size()) { + al = als_[j]; + ri = j; + off = i; + return; + } + i -= als_[j].size(); + } + assert(false); + } + + /** + * Get a particular alignment. + */ + const DescentPartialResolvedAlignment& operator[](size_t i) const { + return als_[i]; + } + + /** + * Return true iff (a) we found an alignment since the sink was initialized + * or since the last time advanceStratum() was called, and (b) the penalty + * associated with the current-best task on the heap ('best') is worse + * (higher) than the penalty associated with the alignments found most + * recently (worstPen_). + */ + bool stratumDone(TAlScore bestPen) const { + if(nelt_ > 0 && bestPen > worstPen_) { + return true; + } + return false; + } + + /** + * The alignment consumer calls this to indicate that they are done with + * all the alignments in the current best non-empty stratum. We can + * therefore mark all those alignments as "reported" and start collecting + * results for the next stratum. + */ + void advanceStratum() { + assert_gt(nelt_, 0); + edits_.clear(); + als_.clear(); + nelt_ = 0; + bestPen_ = worstPen_ = std::numeric_limits::max(); + } + +#ifndef NDEBUG + /** + * Check that partial alignment sink is internally consistent. + */ + bool repOk() const { + assert_geq(nelt_, als_.size()); + //for(size_t i = 1; i < als_.size(); i++) { + // assert_geq(als_[i].pen, als_[i-1].pen); + //} + assert(bestPen_ == std::numeric_limits::max() || worstPen_ >= bestPen_); + return true; + } +#endif + + TAlScore bestPenalty() const { return bestPen_; } + TAlScore worstPenalty() const { return worstPen_; } + + size_t editsSize() const { return edits_.size(); } + size_t alsSize() const { return als_.size(); } + + const EList& edits() const { return edits_; } + +protected: + + EList edits_; + EList als_; + size_t nelt_; + TAlScore bestPen_; // best (smallest) penalty among as-yet-unreported alns + TAlScore worstPen_; // worst (greatest) penalty among as-yet-unreported alns +}; + +/** + * Abstract parent for classes that select descent roots and descent + * configurations given information about the read. + */ +class DescentRootSelector { + +public: + + virtual ~DescentRootSelector() { } + + virtual void select( + const Read& q, // read that we're selecting roots for + const Read* qo, // opposite mate, if applicable + bool nofw, // don't add roots for fw read + bool norc, // don't add roots for rc read + EList& confs, // put DescentConfigs here + EList& roots) = 0; // put DescentRoot here +}; + +/** + * Encapsulates a set of conditions governing when the DescentDriver should + * stop. + */ +struct DescentStoppingConditions { + + DescentStoppingConditions() { reset(); } + + DescentStoppingConditions( + size_t totsz_, + size_t nfound_, + bool stra_, + size_t nbwop_) + { + init(totsz_, nfound_, stra_, nbwop_); + } + + /** + * Reset to uninitialized state. + */ + void reset() { + totsz = nfound = nbwop = std::numeric_limits::max(); + stra = false; + assert(!inited()); + } + + /** + * Initialize this DescentStoppingConditions. + */ + void init( + size_t totsz_, + size_t nfound_, + bool stra_, + size_t nbwop_) + { + totsz = totsz_; + nfound = nfound_; + stra = stra_; + nbwop = nbwop_; + assert(inited()); + } + + /** + * Return true iff this instance is initialized. + */ + bool inited() const { + return totsz != std::numeric_limits::max(); + } + + size_t totsz; // total size of all the expandable data structures in bytes + size_t nfound; // # alignments found + bool stra; // stop after each non-empty stratum + size_t nbwop; // # Burrows-Wheeler (rank) operations performed +}; + +enum { + DESCENT_DRIVER_ALN = 1, + DESCENT_DRIVER_STRATA = 2, + DESCENT_DRIVER_MEM = 4, + DESCENT_DRIVER_BWOPS = 8, + DESCENT_DRIVER_DONE = 16 +}; + +/** + * Class responsible for advancing all the descents. The initial descents may + * emanate from several different locations in the read. Note that descents + * may become redundant with each other, and should then be eliminated. + */ +template +class DescentDriver { +public: + + DescentDriver(bool veryVerbose) : + veryVerbose_(veryVerbose) + { + reset(); + } + + /** + * Initialize driver with respect to a new read. If a DescentRootSelector + * is specified, then it is used to obtain roots as well. + */ + void initRead( + const Read& q, + bool nofw, + bool norc, + TAlScore minsc, + TAlScore maxpen, + const Read* qu = NULL, + DescentRootSelector *sel = NULL) + { + reset(); + q_ = q; + minsc_ = minsc; + maxpen_ = maxpen; + if(sel != NULL) { + sel->select(q_, qu, nofw, norc, confs_, roots_); + } + re_.init(q.length()); + } + + /** + * Add a new search root, which might (a) prefer to move in a left-to-right + * direction, and might (b) be with respect to the read or its reverse + * complement. + */ + void addRoot( + const DescentConfig& conf, + TReadOff off, + bool l2r, + bool fw, + float pri) + { + confs_.push_back(conf); + assert_lt(off, q_.length()); + if(l2r && off == q_.length()-1) { + l2r = !l2r; + } else if(!l2r && off == 0) { + l2r = !l2r; + } + roots_.push_back(DescentRoot(off, l2r, fw, q_.length(), pri)); + } + + /** + * Clear out the DescentRoots currently configured. + */ + void clearRoots() { + confs_.clear(); + roots_.clear(); + } + + /** + * Clear the Descent driver so that we're ready to re-start seed alignment + * for the current read. + */ + void resetRead() { + df_.clear(); // clear Descents + assert_leq(df_.totalSizeBytes(), 100); + pf_.clear(); // clear DescentPoss + assert_leq(pf_.totalSizeBytes(), 100); + heap_.clear(); // clear Heap + assert_leq(heap_.totalSizeBytes(), 100); + roots_.clear(); // clear roots + assert_leq(roots_.totalSizeBytes(), 100); + confs_.clear(); // clear confs + assert_leq(confs_.totalSizeBytes(), 100); + alsink_.reset(); // clear alignment sink + assert_leq(alsink_.totalSizeBytes(), 100); + re_.reset(); + assert_leq(re_.totalSizeBytes(), 100); + rootsInited_ = 0; // haven't yet created initial descents + curPen_ = 0; // + } + + /** + * Clear the Descent driver so that we're ready to re-start seed alignment + * for the current read. + */ + void reset() { + resetRead(); + } + + /** + * Perform seed alignment. + */ + void go( + const Scoring& sc, // scoring scheme + const GFM& gfmFw, // forward index + const GFM& gfmBw, // mirror index + DescentMetrics& met, // metrics + PerReadMetrics& prm); // per-read metrics + + /** + * Perform seed alignment until some stopping condition is satisfied. + */ + int advance( + const DescentStoppingConditions& stopc, // stopping conditions + const Scoring& sc, // scoring scheme + const GFM& gfmFw, // forward index + const GFM& gfmBw, // mirror index + DescentMetrics& met, // metrics + PerReadMetrics& prm); // per-read metrics + +#ifndef NDEBUG + /** + * Return true iff this DescentDriver is well formed. Throw an assertion + * otherwise. + */ + bool repOk() const { + return true; + } +#endif + + /** + * Return the number of end-to-end alignments reported. + */ + size_t numAlignments() const { + return alsink_.nelt(); + } + + /** + * Return the associated DescentAlignmentSink object. + */ + const DescentAlignmentSink& sink() const { + return alsink_; + } + + /** + * Return the associated DescentAlignmentSink object. + */ + DescentAlignmentSink& sink() { + return alsink_; + } + + /** + * Return the total size occupued by the Descent driver and all its + * constituent parts. + */ + size_t totalSizeBytes() const { + return df_.totalSizeBytes() + + pf_.totalSizeBytes() + + heap_.totalSizeBytes() + + roots_.totalSizeBytes() + + confs_.totalSizeBytes() + + alsink_.totalSizeBytes() + + re_.totalSizeBytes(); + } + + /** + * Return the total capacity of the Descent driver and all its constituent + * parts. + */ + size_t totalCapacityBytes() const { + return df_.totalCapacityBytes() + + pf_.totalCapacityBytes() + + heap_.totalCapacityBytes() + + roots_.totalCapacityBytes() + + confs_.totalCapacityBytes() + + alsink_.totalCapacityBytes() + + re_.totalCapacityBytes(); + } + + /** + * Return a const ref to the query. + */ + const Read& query() const { + return q_; + } + + /** + * Return the minimum score that must be achieved by an alignment in order + * for it to be considered "valid". + */ + TAlScore minScore() const { + return minsc_; + } + +protected: + + Read q_; // query nucleotide and quality strings + TAlScore minsc_; // minimum score + TAlScore maxpen_; // maximum penalty + EFactory > df_; // factory holding all the Descents, which + // must be referred to by ID + EFactory pf_; // factory holding all the DescentPoss, which + // must be referred to by ID + EList roots_; // search roots + EList confs_; // configuration params for each root + size_t rootsInited_; // # initial Descents already created + EHeap heap_; // priority queue of Descents + DescentAlignmentSink alsink_; // alignment sink + DescentRedundancyChecker re_; // redundancy checker + TAlScore curPen_; // current penalty + bool veryVerbose_; // print lots of partial alignments + + EList tmpedit_; + BTDnaString tmprfdnastr_; +}; + +/** + * Selects alignments to report from a complete non-empty stratum of + * alignments stored in the DescentAlignmentSink. + */ +template +class DescentAlignmentSelector { + +public: + + DescentAlignmentSelector() : gwstate_(GW_CAT) { reset(); } + + /** + * Initialize a new selector w/r/t a DescentAlignmentSink holding a + * non-empty alignment stratum. + */ + void init( + const Read& q, + const DescentAlignmentSink& sink, + const GFM& gfmFw, // forward Bowtie index for walking left + const BitPairReference& ref, // bitpair-encoded reference + RandomSource& rnd, // pseudo-random generator for sampling rows + WalkMetrics& met) + { + // We're going to sample from space of *alignments*, not ranges. So + // when we extract a sample, we'll have to do a little extra work to + // convert it to a coordinate. + rnd_.init( + sink.nelt(), // # elements to choose from + true); // without replacement + offs_.resize(sink.nelt()); + offs_.fill(std::numeric_limits::max()); + sas_.resize(sink.nrange()); + gws_.resize(sink.nrange()); + size_t ei = 0; + for(size_t i = 0; i < sas_.size(); i++) { + size_t en = sink[i].botf - sink[i].topf; + sas_[i].init(sink[i].topf, q.length(), EListSlice(offs_, ei, en)); + gws_[i].init(gfmFw, ref, sas_[i], rnd, met); + ei += en; + } + } + + /** + * Reset the selector. + */ + void reset() { + rnd_.reset(); + } + + /** + * Return true iff the selector is currently initialized. + */ + bool inited() const { + return rnd_.size() > 0; + } + + /** + * Get next alignment and convert it to an AlnRes. + */ + bool next( + const DescentDriver& dr, + const GFM& gfmFw, // forward Bowtie index for walking left + const BitPairReference& ref, // bitpair-encoded reference + RandomSource& rnd, + AlnRes& rs, + WalkMetrics& met, + PerReadMetrics& prm) + { + // Sample one alignment randomly from pool of remaining alignments + size_t ri = (size_t)rnd_.next(rnd); + size_t off = 0; + DescentAlignment al; + size_t rangei = 0; + // Convert random alignment index into a coordinate + dr.sink().elt(ri, al, rangei, off); + assert_lt(off, al.size()); + Coord refcoord; + WalkResult wr; + TIndexOffU tidx = 0, toff = 0, tlen = 0; + gws_[rangei].advanceElement( + (TIndexOffU)off, + gfmFw, // forward Bowtie index for walking left + ref, // bitpair-encoded reference + sas_[rangei], // SA range with offsets + gwstate_, // GroupWalk state; scratch space + wr, // put the result here + met, // metrics + prm); // per-read metrics + assert_neq(OFF_MASK, wr.toff); + bool straddled = false; + gfmFw.joinedToTextOff( + wr.elt.len, + wr.toff, + tidx, + toff, + tlen, + true, // reject straddlers? + straddled); // straddled? + if(tidx == OFF_MASK) { + // The seed hit straddled a reference boundary so the seed + // hit isn't valid + return false; + } + // Coordinate of the seed hit w/r/t the pasted reference string + refcoord.init(tidx, (int64_t)toff, dr.sink()[rangei].fw); + const EList& edits = dr.sink().edits(); + size_t ns = 0, ngap = 0, nrefn = 0; + for(size_t i = al.ei; i < al.ei + al.en; i++) { + if(edits[i].qchr == 'N' || edits[i].chr == 'N') ns++; + if(edits[i].chr == 'N') nrefn++; + if(edits[i].isGap()) ngap++; + } + AlnScore asc( + -dr.sink().bestPenalty(), // numeric score + ns, // # Ns + ngap); // # gaps + rs.init( + dr.query().length(), // # chars after hard trimming + asc, // alignment score + &dr.sink().edits(), // nucleotide edits array + al.ei, // nucleotide edits first pos + al.en, // nucleotide edits last pos + NULL, // ambig base array + 0, // ambig base first pos + 0, // ambig base last pos + refcoord, // coord of leftmost aligned char in ref + tlen, // length of reference aligned to + -1, // # seed mms allowed + -1, // seed length + -1, // seed interval + dr.minScore(), // minimum score for valid alignment + -1, // nuc5p (for colorspace) + -1, // nuc3p (for colorspace) + false, // soft pre-trimming? + 0, // 5p pre-trimming + 0, // 3p pre-trimming + false, // soft trimming? + 0, // 5p trimming + 0); // 3p trimming + rs.setRefNs(nrefn); + return true; + } + + /** + * Return true iff all elements have been reported. + */ + bool done() const { + return rnd_.done(); + } + + /** + * Return the total size occupued by the Descent driver and all its + * constituent parts. + */ + size_t totalSizeBytes() const { + return rnd_.totalSizeBytes() + + offs_.totalSizeBytes() + + sas_.totalSizeBytes() + + gws_.totalSizeBytes(); + } + + /** + * Return the total capacity of the Descent driver and all its constituent + * parts. + */ + size_t totalCapacityBytes() const { + return rnd_.totalCapacityBytes() + + offs_.totalCapacityBytes() + + sas_.totalCapacityBytes() + + gws_.totalCapacityBytes(); + } + +protected: + + Random1toN rnd_; + EList offs_; + EList, index_t> > sas_; + EList, 16> > gws_; + GroupWalkState gwstate_; +}; + +/** + * Selects and prioritizes partial alignments from the heap of the + * DescentDriver. We assume that the heap is no longer changing (i.e. that the + * DescentDriver is done). Usually, the user will then attempt to extend the + * partial alignments into full alignments. This can happen incrementally; + * that is, the user might ask for the partial alignments one "batch" at a + * time, and the selector will only do as much work is necessary to supply each + * requesteded batch. + * + * The actual work done here includes: (a) scanning the heap for high-priority + * partial alignments, (b) setting up the rnd_, offs_, sas_, gws_, and gwstate_ + * fields and resolving offsets of partial alignments, (c) packaging and + * delivering batches of results to the caller. + * + * How to prioritize partial alignments? One idea is to use the same + * penalty-based prioritization used in the heap. This has pros: (a) maintains + * the guarantee that we're visiting alignments in best-to-worst order in + * end-to-end alignment mode, (b) the heap is already prioritized this way, so + * it's easier for us to compile high-priority partial alignments. But the con + * is that it doesn't take depth into account, which could mean that we're + * extending a lot of very short partial alignments first. + * + * A problem we should keep in mind is that some + */ +template +class DescentPartialAlignmentSelector { + +public: + + DescentPartialAlignmentSelector() : gwstate_(GW_CAT) { reset(); } + + /** + * Initialize a new selector w/r/t a read, index and heap of partial + * alignments. + */ + void init( + const Read& q, // read + const EHeap& heap, // the heap w/ the partial alns + TAlScore depthBonus, // use depth when prioritizing + size_t nbatch, // # of alignments in a batch + const GFM& gfmFw, // forward Bowtie index for walk-left + const BitPairReference& ref, // bitpair-encoded reference + RandomSource& rnd, // pseudo-randoms for sampling rows + WalkMetrics& met) // metrics re: offset resolution + { + // Make our internal heap + if(depthBonus > 0) { + heap_.clear(); + for(size_t i = 0; i < heap.size(); i++) { + TDescentPair p = heap[i]; + p.first.pen += depthBonus * p.first.depth; + heap_.insert(p); + } + } else { + heap_ = heap; + } +#if 0 + // We're going to sample from space of *alignments*, not ranges. So + // when we extract a sample, we'll have to do a little extra work to + // convert it to a coordinate. + rnd_.init( + sink.nelt(), // # elements to choose from + true); // without replacement + offs_.resize(sink.nelt()); + offs_.fill(std::numeric_limits::max()); + sas_.resize(sink.nrange()); + gws_.resize(sink.nrange()); + size_t ei = 0; + for(size_t i = 0; i < sas_.size(); i++) { + size_t en = sink[i].botf - sink[i].topf; + sas_[i].init(sink[i].topf, q.length(), EListSlice(offs_, ei, en)); + gws_[i].init(gfmFw, ref, sas_[i], rnd, met); + ei += en; + } +#endif + } + + /** + * + */ + void compileBatch() { + } + + /** + * Reset the selector. + */ + void reset() { + heap_.clear(); + } + + /** + * Return true iff the selector is currently initialized. + */ + bool inited() const { + return !heap_.empty(); + } + + /** + * Get next alignment and convert it to an AlnRes. + */ + bool next( + const DescentDriver& dr, + const GFM& gfmFw, // forward Bowtie index for walking left + const BitPairReference& ref, // bitpair-encoded reference + RandomSource& rnd, + AlnRes& rs, + WalkMetrics& met, + PerReadMetrics& prm) + { + // Sample one alignment randomly from pool of remaining alignments + size_t ri = (size_t)rnd_.next(rnd); + size_t off = 0; + DescentAlignment al; + size_t rangei = 0; + // Convert random alignment index into a coordinate + dr.sink().elt(ri, al, rangei, off); + assert_lt(off, al.size()); + Coord refcoord; + WalkResult wr; + uint32_t tidx = 0, toff = 0, tlen = 0; + gws_[rangei].advanceElement( + (uint32_t)off, + gfmFw, // forward Bowtie index for walking left + ref, // bitpair-encoded reference + sas_[rangei], // SA range with offsets + gwstate_, // GroupWalk state; scratch space + wr, // put the result here + met, // metrics + prm); // per-read metrics + assert_neq(0xffffffff, wr.toff); + bool straddled = false; + gfmFw.joinedToTextOff( + wr.elt.len, + wr.toff, + tidx, + toff, + tlen, + true, // reject straddlers? + straddled); // straddled? + if(tidx == 0xffffffff) { + // The seed hit straddled a reference boundary so the seed + // hit isn't valid + return false; + } + // Coordinate of the seed hit w/r/t the pasted reference string + refcoord.init(tidx, (int64_t)toff, dr.sink()[rangei].fw); + const EList& edits = dr.sink().edits(); + size_t ns = 0, ngap = 0, nrefn = 0; + for(size_t i = al.ei; i < al.ei + al.en; i++) { + if(edits[i].qchr == 'N' || edits[i].chr == 'N') ns++; + if(edits[i].chr == 'N') nrefn++; + if(edits[i].isGap()) ngap++; + } + return true; + } + + /** + * Return true iff all elements have been reported. + */ + bool done() const { + return rnd_.done(); + } + + /** + * Return the total size occupued by the Descent driver and all its + * constituent parts. + */ + size_t totalSizeBytes() const { + return heap_.totalSizeBytes() + + rnd_.totalSizeBytes() + + offs_.totalSizeBytes() + + sas_.totalSizeBytes() + + gws_.totalSizeBytes(); + } + + /** + * Return the total capacity of the Descent driver and all its constituent + * parts. + */ + size_t totalCapacityBytes() const { + return heap_.totalCapacityBytes() + + rnd_.totalCapacityBytes() + + offs_.totalCapacityBytes() + + sas_.totalCapacityBytes() + + gws_.totalCapacityBytes(); + } + +protected: + + // This class's working heap. This might simply be a copy of the original + // heap, or it might be re-prioritized in some way. + EHeap heap_; + + Random1toN rnd_; + EList offs_; + EList, index_t> > sas_; + EList, 16> > gws_; + GroupWalkState gwstate_; +}; + +/** + * Drive the process of descending from all search roots. + */ +template +void DescentDriver::go( + const Scoring& sc, // scoring scheme + const GFM& gfmFw, // forward index + const GFM& gfmBw, // mirror index + DescentMetrics& met, // metrics + PerReadMetrics& prm) // per-read metrics +{ + assert(q_.repOk()); + // Convert DescentRoots to the initial Descents + for(size_t i = 0; i < roots_.size(); i++) { + size_t dfsz = df_.size(); + size_t pfsz = pf_.size(); + TDescentId id = df_.alloc(); + Edit e_null; + assert(!e_null.inited()); + bool succ = df_[id].init( + q_, // read + i, // root and conf id + sc, // scoring scheme + minsc_, // minimum score + maxpen_, // maximum penalty + id, // new Descent's id + gfmFw, // forward index + gfmBw, // mirror index + re_, // redundancy checker + df_, // Descent factory + pf_, // DescentPos factory + roots_, // DescentRoots + confs_, // DescentConfs + heap_, // heap + alsink_, // alignment sink + met, // metrics + prm); // per-read metrics + if(veryVerbose_) { + bool fw = roots_[i].fw; + tmpedit_.clear(); + df_[id].print( + &cerr, + "", + q_, + 0, + 0, + fw, + tmpedit_, + 0, + tmpedit_.size(), + tmprfdnastr_); + } + if(!succ) { + // Reclaim memory we had used for this descent and its DescentPos info + df_.resize(dfsz); + pf_.resize(pfsz); + } + } + // Advance until some stopping condition + bool stop = heap_.empty(); + while(!stop) { + // Pop off the highest-priority descent. Note that some outgoing edges + // might have since been explored, which could reduce the priority of + // the descent once we . + TDescentPair p = heap_.pop(); + df_.alloc(); df_.pop(); + df_[p.second].followBestOutgoing( + q_, // read + gfmFw, // index over text + gfmBw, // index over reverse text + sc, // scoring scheme + minsc_, // minimum score + maxpen_, // maximum penalty + re_, // redundancy checker + df_, // Descent factory + pf_, // DescentPos factory + roots_, // + confs_, // + heap_, // priority queue for Descents + alsink_, // alignment sink + met, // metrics + prm); // per-read metrics + stop = heap_.empty(); + } +} + +/** + * Perform seed alignment until some stopping condition is satisfied. + */ +template +int DescentDriver::advance( + const DescentStoppingConditions& stopc, // stopping conditions + const Scoring& sc, // scoring scheme + const GFM& gfmFw, // forward index + const GFM& gfmBw, // mirror index + DescentMetrics& met, // metrics + PerReadMetrics& prm) // per-read metrics +{ + size_t nbwop_i = met.bwops; + while(rootsInited_ < roots_.size()) { + size_t dfsz = df_.size(); + size_t pfsz = pf_.size(); + TDescentId id = df_.alloc(); + Edit e_null; + assert(!e_null.inited()); + bool succ = df_[id].init( + q_, // query + rootsInited_, // root and conf id + sc, // scoring scheme + minsc_, // minimum score + maxpen_, // maximum penalty + id, // new Descent's id + gfmFw, // forward index + gfmBw, // mirror index + re_, // redundancy checker + df_, // Descent factory + pf_, // DescentPos factory + roots_, // DescentRoots + confs_, // DescentConfs + heap_, // heap + alsink_, // alignment sink + met, // metrics + prm); // per-read metrics + if(!succ) { + // Reclaim memory we had used for this descent and its DescentPos info + df_.resize(dfsz); + pf_.resize(pfsz); + } + rootsInited_++; + TAlScore best = std::numeric_limits::max(); + if(!heap_.empty()) { + best = heap_.top().first.pen; + } + if(stopc.nfound > 0 && alsink_.nelt() > stopc.nfound) { + return DESCENT_DRIVER_ALN; + } + if(alsink_.stratumDone(best)) { + return DESCENT_DRIVER_STRATA; + } + if(stopc.nbwop > 0 && (met.bwops - nbwop_i) > stopc.nbwop) { + return DESCENT_DRIVER_BWOPS; + } + if(stopc.totsz > 0 && totalSizeBytes() > stopc.totsz) { + return DESCENT_DRIVER_MEM; + } + } + // Advance until some stopping condition + bool stop = heap_.empty(); + while(!stop) { + // Pop off the highest-priority descent. Note that some outgoing edges + // might have since been explored, which could reduce the priority of + // the descent once we . + TDescentPair p = heap_.pop(); + df_.alloc(); df_.pop(); + df_[p.second].followBestOutgoing( + q_, + gfmFw, + gfmBw, + sc, + minsc_, // minimum score + maxpen_, // maximum penalty + re_, // redundancy checker + df_, // Descent factory + pf_, // DescentPos factory + roots_, + confs_, + heap_, + alsink_, + met, + prm); // per-read metrics + TAlScore best = std::numeric_limits::max(); + if(!heap_.empty()) { + best = heap_.top().first.pen; + } + if(stopc.nfound > 0 && alsink_.nelt() > stopc.nfound) { + return DESCENT_DRIVER_ALN; + } + if(alsink_.stratumDone(best)) { + return DESCENT_DRIVER_STRATA; + } + if(stopc.nbwop > 0 && (met.bwops - nbwop_i) > stopc.nbwop) { + return DESCENT_DRIVER_BWOPS; + } + if(stopc.totsz > 0 && totalSizeBytes() > stopc.totsz) { + return DESCENT_DRIVER_MEM; + } + stop = heap_.empty(); + } + return DESCENT_DRIVER_DONE; +} + +/** + * If this is the final descent in a complete end-to-end alignment, report + * the alignment. + */ +template +bool DescentAlignmentSink::reportAlignment( + const Read& q, // query string + const GFM& gfmFw, // forward index + const GFM& gfmBw, // mirror index + TIndexOffU topf, // SA range top in forward index + TIndexOffU botf, // SA range bottom in forward index + TIndexOffU topb, // SA range top in backward index + TIndexOffU botb, // SA range bottom in backward index + TDescentId id, // id of leaf Descent + TRootId rid, // id of search root + const Edit& e, // final edit, if needed + TScore pen, // total penalty + EFactory >& df, // factory with Descent + EFactory& pf, // factory with DescentPoss + const EList& rs, // roots + const EList& cs) // configs +{ + TDescentId cur = id; + ASSERT_ONLY(const Descent& desc = df[id]); + const bool fw = rs[rid].fw; + ASSERT_ONLY(size_t len = q.length()); + assert(q.repOk()); + assert_lt(desc.al5pf(), len); + // Adjust al5pi and al5pf to take the final edit into account (if + // there is one) + // Check if this is redundant with a previous reported alignment + Triple lhs(topf, botf, 0); + Triple rhs(topb, botb, q.length()-1); + if(!lhs_.insert(lhs)) { + rhs_.insert(rhs); + return false; // Already there + } + if(!rhs_.insert(rhs)) { + return false; // Already there + } + size_t ei = edits_.size(); + df[cur].collectEdits(edits_, &e, df); + size_t en = edits_.size() - ei; +#ifndef NDEBUG + { + for(size_t i = 1; i < en; i++) { + assert_geq(edits_[ei+i].pos, edits_[ei+i-1].pos); + } + // Now figure out how much we refrained from aligning on either + // side. + size_t trimLf = 0; + size_t trimRg = 0; + BTDnaString& rf = tmprfdnastr_; + rf.clear(); + if(!fw) { + // Edit offsets are w/r/t 5' end, but desc.print wants them w/r/t + // the *left* end of the read sequence that aligned + Edit::invertPoss(edits_, len, ei, en, true); + } + desc.print(NULL, "", q, trimLf, trimRg, fw, edits_, ei, en, rf); + if(!fw) { + // Invert them back to how they were before + Edit::invertPoss(edits_, len, ei, en, true); + } + ASSERT_ONLY(TIndexOffU toptmp = 0); + ASSERT_ONLY(TIndexOffU bottmp = 0); + // Check that the edited string occurs in the reference + if(!gfmFw.contains(rf, &toptmp, &bottmp)) { + std::cerr << rf << std::endl; + assert(false); + } + } +#endif + als_.expand(); + als_.back().init(pen, fw, topf, botf, ei, en); + nelt_ += (botf - topf); + if(bestPen_ == std::numeric_limits::max() || pen < bestPen_) { + bestPen_ = pen; + } + if(worstPen_ == std::numeric_limits::max() || pen > worstPen_) { + worstPen_ = pen; + } + return true; +} + +/** + * Initialize a new descent branching from the given descent via the given + * edit. Return false if the Descent has no outgoing edges (and can + * therefore have its memory freed), true otherwise. + */ +template +bool Descent::init( + const Read& q, // query + TRootId rid, // root id + const Scoring& sc, // scoring scheme + TAlScore minsc, // minimum score + TAlScore maxpen, // maximum penalty + TReadOff al5pi, // offset from 5' of 1st aligned char + TReadOff al5pf, // offset from 5' of last aligned char + TIndexOffU topf, // SA range top in FW index + TIndexOffU botf, // SA range bottom in FW index + TIndexOffU topb, // SA range top in BW index + TIndexOffU botb, // SA range bottom in BW index + bool l2r, // direction this descent will go in + size_t descid, // my ID + TDescentId parent, // parent ID + TScore pen, // total penalties so far + const Edit& e, // edit for incoming edge + const GFM& gfmFw, // forward index + const GFM& gfmBw, // mirror index + DescentRedundancyChecker& re, // redundancy checker + EFactory& df, // Descent factory + EFactory& pf, // DescentPos factory + const EList& rs, // roots + const EList& cs, // configs + EHeap& heap, // heap + DescentAlignmentSink& alsink, // alignment sink + DescentMetrics& met, // metrics + PerReadMetrics& prm) // per-read metrics +{ + assert(q.repOk()); + rid_ = rid; + al5pi_ = al5pi; + al5pf_ = al5pf; + l2r_ = l2r; + topf_ = topf; + botf_ = botf; + topb_ = topb; + botb_ = botb; + descid_ = descid; + parent_ = parent; + pen_ = pen; + posid_ = std::numeric_limits::max(); + len_ = 0; + out_.clear(); + edit_ = e; + lastRecalc_ = true; + gapadd_ = df[parent].gapadd_; + if(e.inited()) { + if(e.isReadGap()) { + gapadd_++; + } else if(e.isRefGap()) { + gapadd_--; + } + } + bool branches = false, hitEnd = false, done = false; + TIndexOffU topf_new = 0, botf_new = 0, topb_new = 0, botb_new = 0; + off5p_i_ = 0; +#ifndef NDEBUG + size_t depth = al5pf_ - al5pi_ + 1; + TAlScore maxpend = cs[rid_].cons.get(depth, q.length(), maxpen); + assert_geq(maxpend, pen_); // can't have already exceeded max penalty +#endif + bool matchSucc = followMatches( + q, + sc, + gfmFw, + gfmBw, + re, + df, + pf, + rs, + cs, + heap, + alsink, + met, + prm, + branches, + hitEnd, + done, + off5p_i_, + topf_new, + botf_new, + topb_new, + botb_new); + bool bounceSucc = false; + if(matchSucc && hitEnd && !done) { + assert(topf_new > 0 || botf_new > 0); + bounceSucc = bounce( + q, + topf_new, + botf_new, + topb_new, + botb_new, + gfmFw, + gfmBw, + sc, + minsc, // minimum score + maxpen, // maximum penalty + re, + df, + pf, + rs, + cs, + heap, + alsink, + met, // descent metrics + prm); // per-read metrics + } + if(matchSucc) { + // Calculate info about outgoing edges + recalcOutgoing(q, sc, minsc, maxpen, re, pf, rs, cs, prm); + if(!empty()) { + heap.insert(make_pair(out_.bestPri(), descid)); // Add to heap + } + } + return !empty() || bounceSucc; +} + +/** + * Initialize a new descent beginning at the given root. Return false if + * the Descent has no outgoing edges (and can therefore have its memory + * freed), true otherwise. + */ +template +bool Descent::init( + const Read& q, // query + TRootId rid, // root id + const Scoring& sc, // scoring scheme + TAlScore minsc, // minimum score + TAlScore maxpen, // maximum penalty + size_t descid, // id of this Descent + const GFM& gfmFw, // forward index + const GFM& gfmBw, // mirror index + DescentRedundancyChecker& re, // redundancy checker + EFactory >& df, // Descent factory + EFactory& pf, // DescentPos factory + const EList& rs, // roots + const EList& cs, // configs + EHeap& heap, // heap + DescentAlignmentSink& alsink, // alignment sink + DescentMetrics& met, // metrics + PerReadMetrics& prm) // per-read metrics +{ + rid_ = rid; + al5pi_ = rs[rid].off5p; + al5pf_ = rs[rid].off5p; + assert_lt(al5pi_, q.length()); + assert_lt(al5pf_, q.length()); + l2r_ = rs[rid].l2r; + topf_ = botf_ = topb_ = botb_ = 0; + descid_ = descid; + parent_ = std::numeric_limits::max(); + pen_ = 0; + posid_ = std::numeric_limits::max(); + len_ = 0; + out_.clear(); + edit_.reset(); + lastRecalc_ = true; + gapadd_ = 0; + bool branches = false, hitEnd = false, done = false; + TIndexOffU topf_new = 0, botf_new = 0, topb_new = 0, botb_new = 0; + off5p_i_ = 0; + bool matchSucc = followMatches( + q, + sc, + gfmFw, + gfmBw, + re, + df, + pf, + rs, + cs, + heap, + alsink, + met, + prm, + branches, + hitEnd, + done, + off5p_i_, + topf_new, + botf_new, + topb_new, + botb_new); + bool bounceSucc = false; + if(matchSucc && hitEnd && !done) { + assert(topf_new > 0 || botf_new > 0); + bounceSucc = bounce( + q, + topf_new, + botf_new, + topb_new, + botb_new, + gfmFw, + gfmBw, + sc, + minsc, // minimum score + maxpen, // maximum penalty + re, + df, + pf, + rs, + cs, + heap, + alsink, + met, // descent metrics + prm); // per-read metrics + } + // Calculate info about outgoing edges + assert(empty()); + if(matchSucc) { + recalcOutgoing(q, sc, minsc, maxpen, re, pf, rs, cs, prm); + if(!empty()) { + heap.insert(make_pair(out_.bestPri(), descid)); // Add to heap + } + } + return !empty() || bounceSucc; +} + +/** + * Recalculate our summary of the outgoing edges from this descent. When + * deciding what outgoing edges are legal, we abide by constraints. + * Typically, they limit the total of the penalties accumulated so far, as + * a function of distance from the search root. E.g. a constraint might + * disallow any gaps or mismatches within 20 ply of the search root, then + * allow 1 mismatch within 30 ply, then allow up to 1 mismatch or 1 gap + * within 40 ply, etc. + * + * Return the total number of valid outgoing edges found. + * + * TODO: Eliminate outgoing gap edges that are redundant with others owing to + * the DNA sequence and the fact that we don't care to distinguish among + * "equivalent" homopolymer extensinos and retractions. + */ +template +size_t Descent::recalcOutgoing( + const Read& q, // query string + const Scoring& sc, // scoring scheme + TAlScore minsc, // minimum score + TAlScore maxpen, // maximum penalty + DescentRedundancyChecker& re, // redundancy checker + EFactory& pf, // factory with DescentPoss + const EList& rs, // roots + const EList& cs, // configs + PerReadMetrics& prm) // per-read metrics +{ + assert_eq(botf_ - topf_, botb_ - topb_); + assert(out_.empty()); + assert(repOk(&q)); + // Get initial 5' and 3' offsets + bool fw = rs[rid_].fw; + float rootpri = rs[rid_].pri; + bool toward3p = (l2r_ == fw); + size_t off5p = off5p_i_; + assert_geq(al5pf_, al5pi_); + size_t off3p = q.length() - off5p - 1; + // By "depth" we essentially mean the number of characters already aligned + size_t depth, extrai = 0, extraf = 0; + size_t cur5pi = al5pi_, cur5pf = al5pf_; + if(toward3p) { + // Toward 3' + cur5pf = off5p; + depth = off5p - al5pi_; + // Failed to match out to the end? + if(al5pf_ < q.length() - 1) { + extraf = 1; // extra + } + } else { + // Toward 5' + cur5pi = off5p; + depth = al5pf_ - off5p; + if(al5pi_ > 0) { + extrai = 1; + } + } + // Get gap penalties + TScore pen_rdg_ex = sc.readGapExtend(), pen_rfg_ex = sc.refGapExtend(); + TScore pen_rdg_op = sc.readGapOpen(), pen_rfg_op = sc.refGapOpen(); + // Top and bot in the direction of the descent + TIndexOffU top = l2r_ ? topb_ : topf_; + TIndexOffU bot = l2r_ ? botb_ : botf_; + // Top and bot in the opposite direction + TIndexOffU topp = l2r_ ? topf_ : topb_; + TIndexOffU botp = l2r_ ? botf_ : botb_; + assert_eq(botp - topp, bot - top); + DescentEdge edge; + size_t nout = 0; + // Enumerate all outgoing edges, starting at the root and going out + size_t d = posid_; + // At first glance, we might think we should be bounded by al5pi_ and + // al5pf_, but those delimit the positions that matched between reference + // and read. If we hit a position that failed to match as part of + // followMatches, then we also want to evaluate ways of leaving that + // position, which adds one more position to viist. + while(off5p >= al5pi_ - extrai && off5p <= al5pf_ + extraf) { + assert_lt(off5p, q.length()); + assert_lt(off3p, q.length()); + TScore maxpend = cs[rid_].cons.get(depth, q.length(), maxpen); + assert(depth > 0 || maxpend == 0); + assert_geq(maxpend, pen_); // can't have already exceeded max penalty + TScore diff = maxpend - pen_; // room we have left + // Get pointer to SA ranges in the direction of descent + const TIndexOffU *t = l2r_ ? pf[d].topb : pf[d].topf; + const TIndexOffU *b = l2r_ ? pf[d].botb : pf[d].botf; + const TIndexOffU *tp = l2r_ ? pf[d].topf : pf[d].topb; + const TIndexOffU *bp = l2r_ ? pf[d].botf : pf[d].botb; + assert_eq(pf[d].botf - pf[d].topf, pf[d].botb - pf[d].topb); + // What are the read char / quality? + std::pair p = q.get(off5p, fw); + int c = p.first; + assert_range(0, 4, c); + // Only entertain edits if there is at least one type of edit left and + // there is some penalty budget left + if(!pf[d].flags.exhausted() && diff > 0) { + // What would the penalty be if we mismatched at this position? + // This includes the case where the mismatch is for an N in the + // read. + int qq = p.second; + assert_geq(qq, 0); + TScore pen_mm = sc.mm(c, qq); + if(pen_mm <= diff) { + for(int j = 0; j < 4; j++) { + if(j == c) continue; // Match, not mismatch + if(b[j] <= t[j]) { + continue; // No outgoing edge with this nucleotide + } + if(!pf[d].flags.mmExplore(j)) { + continue; // Already been explored + } + TIndexOffU topf = pf[d].topf[j], botf = pf[d].botf[j]; + ASSERT_ONLY(TIndexOffU topb = pf[d].topb[j], botb = pf[d].botb[j]); + if(re.contains(fw, l2r_, cur5pi, cur5pf, cur5pf - cur5pi + 1 + gapadd_, topf, botf, pen_ + pen_mm)) { + prm.nRedSkip++; + continue; // Redundant with a path already explored + } + prm.nRedFail++; + TIndexOffU width = b[j] - t[j]; + Edit edit((uint32_t)off5p, (int)("ACGTN"[j]), (int)("ACGTN"[c]), EDIT_TYPE_MM); + DescentPriority pri(pen_ + pen_mm, depth, width, rootpri); + assert(topf != 0 || botf != 0); + assert(topb != 0 || botb != 0); + assert_eq(botb - topb, botf - topf); + edge.init(edit, off5p, pri, d +#ifndef NDEBUG + , d, topf, botf, topb, botb +#endif + ); + out_.update(edge); + nout++; + } + } + bool gapsAllowed = (off5p >= (size_t)sc.gapbar && off3p >= (size_t)sc.gapbar); + if(gapsAllowed) { + assert_gt(depth, 0); + // An easy redundancy check is: if all ways of proceeding are + // matches, then there's no need to entertain gaps here. + // Shifting the gap one position further downstream is + // guarnteed not to be worse. + size_t totwidth = (b[0] - t[0]) + + (b[1] - t[1]) + + (b[2] - t[2]) + + (b[3] - t[3]); + assert(c > 3 || b[c] - t[c] <= totwidth); + bool allmatch = c < 4 && (totwidth == (b[c] - t[c])); + bool rdex = false, rfex = false; + size_t cur5pi_i = cur5pi, cur5pf_i = cur5pf; + if(toward3p) { + cur5pf_i--; + } else { + cur5pi_i++; + } + if(off5p == off5p_i_ && edit_.inited()) { + // If we're at the root of the descent, and the descent + // branched on a gap, then this could be scored as an + // extension of that gap. + if(pen_rdg_ex <= diff && edit_.isReadGap()) { + // Extension of a read gap + rdex = true; + for(int j = 0; j < 4; j++) { + if(b[j] <= t[j]) { + continue; // No outgoing edge with this nucleotide + } + if(!pf[d].flags.rdgExplore(j)) { + continue; // Already been explored + } + TIndexOffU topf = pf[d].topf[j], botf = pf[d].botf[j]; + ASSERT_ONLY(TIndexOffU topb = pf[d].topb[j], botb = pf[d].botb[j]); + assert(topf != 0 || botf != 0); + assert(topb != 0 || botb != 0); + if(re.contains(fw, l2r_, cur5pi_i, cur5pf_i, cur5pf - cur5pi + 1 + gapadd_, topf, botf, pen_ + pen_rdg_ex)) { + prm.nRedSkip++; + continue; // Redundant with a path already explored + } + prm.nRedFail++; + TIndexOffU width = b[j] - t[j]; + // off5p holds the offset from the 5' of the next + // character we were trying to align when we decided to + // introduce a read gap (before that character). If we + // were walking toward the 5' end, we need to increment + // by 1. + uint32_t off = (uint32_t)off5p + (toward3p ? 0 : 1); + Edit edit(off, (int)("ACGTN"[j]), '-', EDIT_TYPE_READ_GAP); + assert(edit.pos2 != std::numeric_limits::max()); + edit.pos2 = edit_.pos2 + (toward3p ? 1 : -1); + DescentPriority pri(pen_ + pen_rdg_ex, depth, width, rootpri); + assert(topf != 0 || botf != 0); + assert(topb != 0 || botb != 0); + assert_eq(botb - topb, botf - topf); + edge.init(edit, off5p, pri, d +#ifndef NDEBUG + , d, + topf, botf, topb, botb +#endif + ); + out_.update(edge); + nout++; + } + } + if(pen_rfg_ex <= diff && edit_.isRefGap()) { + // Extension of a reference gap + rfex = true; + if(pf[d].flags.rfgExplore()) { + TIndexOffU topf = l2r_ ? topp : top; + TIndexOffU botf = l2r_ ? botp : bot; + ASSERT_ONLY(TIndexOffU topb = l2r_ ? top : topp); + ASSERT_ONLY(TIndexOffU botb = l2r_ ? bot : botp); + assert(topf != 0 || botf != 0); + assert(topb != 0 || botb != 0); + size_t nrefal = cur5pf - cur5pi + gapadd_; + if(!re.contains(fw, l2r_, cur5pi, cur5pf, nrefal, topf, botf, pen_ + pen_rfg_ex)) { + TIndexOffU width = bot - top; + Edit edit((uint32_t)off5p, '-', (int)("ACGTN"[c]), EDIT_TYPE_REF_GAP); + DescentPriority pri(pen_ + pen_rfg_ex, depth, width, rootpri); + assert(topf != 0 || botf != 0); + assert(topb != 0 || botb != 0); + edge.init(edit, off5p, pri, d +#ifndef NDEBUG + // It's a little unclear what the depth ought to be. + // Is it the depth we were at when we did the ref + // gap? I.e. the depth of the flags where rfgExplore() + // returned true? Or is it the depth where we can + // retrieve the appropriate top/bot? We make it the + // latter, might wrap around, indicating we should get + // top/bot from the descent's topf_, ... fields. + , (d == posid_) ? std::numeric_limits::max() : (d - 1), + topf, botf, topb, botb +#endif + ); + out_.update(edge); + nout++; + prm.nRedFail++; + } else { + prm.nRedSkip++; + } + } + } + } + if(!allmatch && pen_rdg_op <= diff && !rdex) { + // Opening a new read gap + for(int j = 0; j < 4; j++) { + if(b[j] <= t[j]) { + continue; // No outgoing edge with this nucleotide + } + if(!pf[d].flags.rdgExplore(j)) { + continue; // Already been explored + } + TIndexOffU topf = pf[d].topf[j], botf = pf[d].botf[j]; + ASSERT_ONLY(TIndexOffU topb = pf[d].topb[j], botb = pf[d].botb[j]); + assert(topf != 0 || botf != 0); + assert(topb != 0 || botb != 0); + if(re.contains(fw, l2r_, cur5pi_i, cur5pf_i, cur5pf - cur5pi + 1 + gapadd_, topf, botf, pen_ + pen_rdg_op)) { + prm.nRedSkip++; + continue; // Redundant with a path already explored + } + prm.nRedFail++; + TIndexOffU width = b[j] - t[j]; + // off5p holds the offset from the 5' of the next + // character we were trying to align when we decided to + // introduce a read gap (before that character). If we + // were walking toward the 5' end, we need to increment + // by 1. + uint32_t off = (uint32_t)off5p + (toward3p ? 0 : 1); + Edit edit(off, (int)("ACGTN"[j]), '-', EDIT_TYPE_READ_GAP); + assert(edit.pos2 != std::numeric_limits::max()); + DescentPriority pri(pen_ + pen_rdg_op, depth, width, rootpri); + assert(topf != 0 || botf != 0); + assert(topb != 0 || botb != 0); + assert_eq(botb - topb, botf - topf); + edge.init(edit, off5p, pri, d +#ifndef NDEBUG + , d, topf, botf, topb, botb +#endif + ); + out_.update(edge); + nout++; + } + } + if(!allmatch && pen_rfg_op <= diff && !rfex) { + // Opening a new reference gap + if(pf[d].flags.rfgExplore()) { + TIndexOffU topf = l2r_ ? topp : top; + TIndexOffU botf = l2r_ ? botp : bot; + ASSERT_ONLY(TIndexOffU topb = l2r_ ? top : topp); + ASSERT_ONLY(TIndexOffU botb = l2r_ ? bot : botp); + assert(topf != 0 || botf != 0); + assert(topb != 0 || botb != 0); + size_t nrefal = cur5pf - cur5pi + gapadd_; + if(!re.contains(fw, l2r_, cur5pi, cur5pf, nrefal, topf, botf, pen_ + pen_rfg_op)) { + TIndexOffU width = bot - top; + Edit edit((uint32_t)off5p, '-', (int)("ACGTN"[c]), EDIT_TYPE_REF_GAP); + DescentPriority pri(pen_ + pen_rfg_op, depth, width, rootpri); + assert(topf != 0 || botf != 0); + assert(topb != 0 || botb != 0); + edge.init(edit, off5p, pri, d +#ifndef NDEBUG + // It's a little unclear what the depth ought to be. + // Is it the depth we were at when we did the ref + // gap? I.e. the depth of the flags where rfgExplore() + // returned true? Or is it the depth where we can + // retrieve the appropriate top/bot? We make it the + // latter, might wrap around, indicating we should get + // top/bot from the descent's topf_, ... fields. + , (d == posid_) ? std::numeric_limits::max() : (d - 1), + topf, botf, topb, botb +#endif + ); + out_.update(edge); + nout++; + prm.nRedFail++; + } else { + prm.nRedSkip++; + } + } + } + } + } + // Update off5p, off3p, depth + d++; + depth++; + assert_leq(depth, al5pf_ - al5pi_ + 2); + if(toward3p) { + if(off3p == 0) { + break; + } + off5p++; + off3p--; + cur5pf++; + } else { + if(off5p == 0) { + break; + } + off3p++; + off5p--; + cur5pi--; + } + // Update top and bot + if(off5p >= al5pi_ - extrai && off5p <= al5pf_ + extraf) { + assert_range(0, 3, c); + top = t[c]; topp = tp[c]; + bot = b[c]; botp = bp[c]; + assert_eq(bot-top, botp-topp); + } + } + lastRecalc_ = (nout <= 5); + out_.best1.updateFlags(pf); + out_.best2.updateFlags(pf); + out_.best3.updateFlags(pf); + out_.best4.updateFlags(pf); + out_.best5.updateFlags(pf); + return nout; +} + +template +void Descent::print( + std::ostream *os, + const char *prefix, + const Read& q, + size_t trimLf, + size_t trimRg, + bool fw, + const EList& edits, + size_t ei, + size_t en, + BTDnaString& rf) const +{ + const BTDnaString& read = fw ? q.patFw : q.patRc; + size_t eidx = ei; + if(os != NULL) { *os << prefix; } + // Print read + for(size_t i = 0; i < read.length(); i++) { + if(i < trimLf || i >= read.length() - trimRg) { + if(os != NULL) { *os << (char)tolower(read.toChar(i)); } + continue; + } + bool del = false, mm = false; + while(eidx < ei + en && edits[eidx].pos == i) { + if(edits[eidx].isReadGap()) { + if(os != NULL) { *os << '-'; } + } else if(edits[eidx].isRefGap()) { + del = true; + assert_eq((int)edits[eidx].qchr, read.toChar(i)); + if(os != NULL) { *os << read.toChar(i); } + } else { + mm = true; + assert(edits[eidx].isMismatch()); + assert_eq((int)edits[eidx].qchr, read.toChar(i)); + if(os != NULL) { *os << (char)edits[eidx].qchr; } + } + eidx++; + } + if(!del && !mm) { + // Print read character + if(os != NULL) { *os << read.toChar(i); } + } + } + if(os != NULL) { + *os << endl; + *os << prefix; + } + eidx = ei; + // Print match bars + for(size_t i = 0; i < read.length(); i++) { + if(i < trimLf || i >= read.length() - trimRg) { + if(os != NULL) { *os << ' '; } + continue; + } + bool del = false, mm = false; + while(eidx < ei + en && edits[eidx].pos == i) { + if(edits[eidx].isReadGap()) { + if(os != NULL) { *os << ' '; } + } else if(edits[eidx].isRefGap()) { + del = true; + if(os != NULL) { *os << ' '; } + } else { + mm = true; + assert(edits[eidx].isMismatch()); + if(os != NULL) { *os << ' '; } + } + eidx++; + } + if(!del && !mm && os != NULL) { *os << '|'; } + } + if(os != NULL) { + *os << endl; + *os << prefix; + } + eidx = ei; + // Print reference + for(size_t i = 0; i < read.length(); i++) { + if(i < trimLf || i >= read.length() - trimRg) { + if(os != NULL) { *os << ' '; } + continue; + } + bool del = false, mm = false; + while(eidx < ei + en && edits[eidx].pos == i) { + if(edits[eidx].isReadGap()) { + rf.appendChar((char)edits[eidx].chr); + if(os != NULL) { *os << (char)edits[eidx].chr; } + } else if(edits[eidx].isRefGap()) { + del = true; + if(os != NULL) { *os << '-'; } + } else { + mm = true; + assert(edits[eidx].isMismatch()); + rf.appendChar((char)edits[eidx].chr); + if(os != NULL) { *os << (char)edits[eidx].chr; } + } + eidx++; + } + if(!del && !mm) { + rf.append(read[i]); + if(os != NULL) { *os << read.toChar(i); } + } + } + if(os != NULL) { *os << endl; } +} + +/** + * Create a new Descent + */ +template +bool Descent::bounce( + const Read& q, // query string + TIndexOffU topf, // SA range top in fw index + TIndexOffU botf, // SA range bottom in fw index + TIndexOffU topb, // SA range top in bw index + TIndexOffU botb, // SA range bottom in bw index + const GFM& gfmFw, // forward index + const GFM& gfmBw, // mirror index + const Scoring& sc, // scoring scheme + TAlScore minsc, // minimum score + TAlScore maxpen, // maximum penalty + DescentRedundancyChecker& re, // redundancy checker + EFactory >& df, // factory with Descent + EFactory& pf, // factory with DescentPoss + const EList& rs, // roots + const EList& cs, // configs + EHeap& heap, // heap of descents + DescentAlignmentSink& alsink, // alignment sink + DescentMetrics& met, // metrics + PerReadMetrics& prm) // per-read metrics +{ + assert_gt(botf, topf); + assert(al5pi_ == 0 || al5pf_ == q.length()-1); + assert(!(al5pi_ == 0 && al5pf_ == q.length()-1)); + size_t dfsz = df.size(); + size_t pfsz = pf.size(); + TDescentId id = df.alloc(); + Edit e_null; + assert(!e_null.inited()); + // Follow matches + bool succ = df[id].init( + q, // query + rid_, // root id + sc, // scoring scheme + minsc, // minimum score + maxpen, // maximum penalty + al5pi_, // new near-5' extreme + al5pf_, // new far-5' extreme + topf, // SA range top in FW index + botf, // SA range bottom in FW index + topb, // SA range top in BW index + botb, // SA range bottom in BW index + !l2r_, // direction this descent will go in; opposite from parent + id, // my ID + descid_, // parent ID + pen_, // total penalties so far - same as parent + e_null, // edit for incoming edge; uninitialized if bounced + gfmFw, // forward index + gfmBw, // mirror index + re, // redundancy checker + df, // Descent factory + pf, // DescentPos factory + rs, // DescentRoot list + cs, // DescentConfig list + heap, // heap + alsink, // alignment sink + met, // metrics + prm); // per-read metrics + if(!succ) { + // Reclaim memory we had used for this descent and its DescentPos info + df.resize(dfsz); + pf.resize(pfsz); + } + return succ; +} + +/** + * Take the best outgoing edge and place it in the heap. When deciding what + * outgoing edges exist, abide by constraints in DescentConfig. These + * constraints limit total penalty accumulated so far versus distance from + * search root. E.g. a constraint might disallow any gaps or mismatches within + * 20 ply of the root, then allow 1 mismatch within 30 ply, 1 mismatch or 1 gap + * within 40 ply, etc. + */ +template +void Descent::followBestOutgoing( + const Read& q, // query string + const GFM& gfmFw, // forward index + const GFM& gfmBw, // mirror index + const Scoring& sc, // scoring scheme + TAlScore minsc, // minimum score + TAlScore maxpen, // maximum penalty + DescentRedundancyChecker& re, // redundancy checker + EFactory >& df, // factory with Descent + EFactory& pf, // factory with DescentPoss + const EList& rs, // roots + const EList& cs, // configs + EHeap& heap, // heap of descents + DescentAlignmentSink& alsink, // alignment sink + DescentMetrics& met, // metrics + PerReadMetrics& prm) // per-read metrics +{ + // We assume this descent has been popped off the heap. We'll re-add it if + // it hasn't been exhausted. + assert(q.repOk()); + assert(!empty()); + assert(!out_.empty()); + while(!out_.empty()) { + DescentPriority best = out_.bestPri(); + DescentEdge e = out_.rotate(); + TReadOff al5pi_new = al5pi_, al5pf_new = al5pf_; + bool fw = rs[rid_].fw; + bool toward3p = (l2r_ == fw); + TReadOff edoff = e.off5p; // 5' offset of edit + assert_leq(edoff, al5pf_ + 1); + assert_geq(edoff + 1, al5pi_); + if(out_.empty()) { + if(!lastRecalc_) { + // This might allocate new Descents + recalcOutgoing(q, sc, minsc, maxpen, re, pf, rs, cs, prm); + if(empty()) { + // Could happen, since some outgoing edges may have become + // redundant in the meantime. + break; + } + } else { + assert(empty()); + } + } + TReadOff doff; // edit's offset into this descent + int chr = asc2dna[e.e.chr]; + // hitEnd is set to true iff this edit pushes us to the extreme 5' or 3' + // end of the alignment + bool hitEnd = false; + // done is set to true iff this edit aligns the only remaining character of + // the read + bool done = false; + if(toward3p) { + // The 3' extreme of the new Descent is further in (away from the 3' + // end) than the parent's. + al5pf_new = doff = edoff; + if(e.e.isReadGap()) { + // We didn't actually consume the read character at 'edoff', so + // retract al5pf_new by one position. This doesn't effect the + // "depth" (doff) of the SA range we took, though. + assert_gt(al5pf_new, 0); + al5pf_new--; + } + assert_lt(al5pf_new, q.length()); + hitEnd = (al5pf_new == q.length() - 1); + done = (hitEnd && al5pi_new == 0); + assert_geq(doff, off5p_i_); + doff = doff - off5p_i_; + assert_leq(doff, len_); + } else { + // The 5' extreme of the new Descent is further in (away from the 5' + // end) than the parent's. + al5pi_new = doff = edoff; + if(e.e.isReadGap()) { + // We didn't actually consume the read character at 'edoff', so + // move al5pi_new closer to the 3' end by one position. This + // doesn't effect the "depth" (doff) of the SA range we took, + // though. + al5pi_new++; + } + hitEnd = (al5pi_new == 0); + done = (hitEnd && al5pf_new == q.length() - 1); + assert_geq(off5p_i_, doff); + doff = off5p_i_ - doff; + assert_leq(doff, len_); + } + // Check if this is redundant with an already-explored path + bool l2r = l2r_; // gets overridden if we bounce + if(!done && hitEnd) { + // Alignment finsihed extending in one direction + l2r = !l2r; + } + size_t dfsz = df.size(); + size_t pfsz = pf.size(); + TIndexOffU topf, botf, topb, botb; + size_t d = posid_ + doff; + if(e.e.isRefGap()) { + d--; // might underflow + if(doff == 0) { + topf = topf_; + botf = botf_; + topb = topb_; + botb = botb_; + d = std::numeric_limits::max(); + assert_eq(botf-topf, botb-topb); + } else { + assert_gt(al5pf_new, 0); + assert_gt(d, 0); + chr = pf[d].c; + assert(pf[d].inited()); + assert_range(0, 3, chr); + topf = pf[d].topf[chr]; + botf = pf[d].botf[chr]; + topb = pf[d].topb[chr]; + botb = pf[d].botb[chr]; + assert_eq(botf-topf, botb-topb); + } + } else { + // A read gap or a mismatch + assert(pf[d].inited()); + topf = pf[d].topf[chr]; + botf = pf[d].botf[chr]; + topb = pf[d].topb[chr]; + botb = pf[d].botb[chr]; + assert_eq(botf-topf, botb-topb); + } + assert_eq(d, e.d); + assert_eq(topf, e.topf); + assert_eq(botf, e.botf); + assert_eq(topb, e.topb); + assert_eq(botb, e.botb); + if(done) { + // Aligned the entire read end-to-end. Presumably there's no need to + // create a new Descent object. We just report the alignment. + alsink.reportAlignment( + q, // query + gfmFw, // forward index + gfmBw, // backward index + topf, // top of SA range in forward index + botf, // bottom of SA range in forward index + topb, // top of SA range in backward index + botb, // bottom of SA range in backward index + descid_, // Descent at the leaf + rid_, // root id + e.e, // extra edit, if necessary + best.pen, // penalty + df, // factory with Descent + pf, // factory with DescentPoss + rs, // roots + cs); // configs + assert(alsink.repOk()); + return; + } + assert(al5pi_new != 0 || al5pf_new != q.length() - 1); + TDescentId id = df.alloc(); + bool succ = df[id].init( + q, // query + rid_, // root id + sc, // scoring scheme + minsc, // minimum score + maxpen, // maximum penalty + al5pi_new, // new near-5' extreme + al5pf_new, // new far-5' extreme + topf, // SA range top in FW index + botf, // SA range bottom in FW index + topb, // SA range top in BW index + botb, // SA range bottom in BW index + l2r, // direction this descent will go in + id, // my ID + descid_, // parent ID + best.pen, // total penalties so far + e.e, // edit for incoming edge; uninitialized if bounced + gfmFw, // forward index + gfmBw, // mirror index + re, // redundancy checker + df, // Descent factory + pf, // DescentPos factory + rs, // DescentRoot list + cs, // DescentConfig list + heap, // heap + alsink, // alignment sink + met, // metrics + prm); // per-read metrics + if(!succ) { + // Reclaim memory we had used for this descent and its DescentPos info + df.resize(dfsz); + pf.resize(pfsz); + } + break; + } + if(!empty()) { + // Re-insert this Descent with its new priority + heap.insert(make_pair(out_.bestPri(), descid_)); + } +} + +/** + * Given the forward and backward indexes, and given topf/botf/topb/botb, get + * tloc, bloc ready for the next step. + */ +template +void Descent::nextLocsBi( + const GFM& gfmFw, // forward index + const GFM& gfmBw, // mirror index + SideLocus& tloc, // top locus + SideLocus& bloc, // bot locus + index_t topf, // top in BWT + index_t botf, // bot in BWT + index_t topb, // top in BWT' + index_t botb) // bot in BWT' +{ + assert_gt(botf, 0); + // Which direction are we going in next? + if(l2r_) { + // Left to right; use BWT' + if(botb - topb == 1) { + // Already down to 1 row; just init top locus + tloc.initFromRow(topb, gfmBw.gh(), gfmBw.gfm()); + bloc.invalidate(); + } else { + SideLocus::initFromTopBot( + topb, botb, gfmBw.gh(), gfmBw.gfm(), tloc, bloc); + assert(bloc.valid()); + } + } else { + // Right to left; use BWT + if(botf - topf == 1) { + // Already down to 1 row; just init top locus + tloc.initFromRow(topf, gfmFw.gh(), gfmFw.gfm()); + bloc.invalidate(); + } else { + SideLocus::initFromTopBot( + topf, botf, gfmFw.gh(), gfmFw.gfm(), tloc, bloc); + assert(bloc.valid()); + } + } + // Check if we should update the tracker with this refinement + assert(botf - topf == 1 || bloc.valid()); + assert(botf - topf > 1 || !bloc.valid()); +} + +/** + * Advance this descent by following read matches as far as possible. + * + * This routine doesn't have to consider the whole gamut of constraints on + * which outgoing edges can be followed. If it is a root descent, it does have + * to know how deep the no-edit constraint goes, though, so we can decide + * whether using the ftab would potentially jump over relevant branch points. + * Apart from that, though, we simply proceed as far as it can go by matching + * characters in the query, irrespective of the constraints. + * recalcOutgoing(...) and followBestOutgoing(...) do have to consider these + * constraints, though. + * + * Conceptually, as we make descending steps, we have: + * 1. Before each step, a single range indicating how we departed the previous + * step + * 2. As part of each step, a quad of ranges indicating what range would result + * if we proceeded on an a, c, g ot t + * + * Return true iff it is possible to branch from this descent. If we haven't + * exceeded the no-branch depth. + */ +template +bool Descent::followMatches( + const Read& q, // query string + const Scoring& sc, // scoring scheme + const GFM& gfmFw, // forward index + const GFM& gfmBw, // mirror index + DescentRedundancyChecker& re, // redundancy checker + EFactory >& df, // Descent factory + EFactory& pf, // DescentPos factory + const EList& rs, // roots + const EList& cs, // configs + EHeap& heap, // heap + DescentAlignmentSink& alsink, // alignment sink + DescentMetrics& met, // metrics + PerReadMetrics& prm, // per-read metrics + bool& branches, // out: true -> there are > 0 ways to branch + bool& hitEnd, // out: true -> hit read end with non-empty range + bool& done, // out: true -> we made a full alignment + TReadOff& off5p_i, // out: initial 5' offset + TIndexOffU& topf_bounce, // out: top of SA range for fw idx for bounce + TIndexOffU& botf_bounce, // out: bot of SA range for fw idx for bounce + TIndexOffU& topb_bounce, // out: top of SA range for bw idx for bounce + TIndexOffU& botb_bounce) // out: bot of SA range for bw idx for bounce +{ + // TODO: make these full-fledged parameters + size_t nobranchDepth = 20; + bool stopOnN = true; + assert(q.repOk()); + assert(repOk(&q)); + assert_eq(gfmFw.eh().ftabChars(), gfmBw.gh().ftabChars()); +#ifndef NDEBUG + for(int i = 0; i < 4; i++) { + assert_eq(gfmFw.fchr()[i], gfmBw.fchr()[i]); + } +#endif + SideLocus tloc, bloc; + TIndexOffU topf = topf_, botf = botf_, topb = topb_, botb = botb_; + bool fw = rs[rid_].fw; + bool toward3p; + size_t off5p; + assert_lt(al5pi_, q.length()); + assert_lt(al5pf_, q.length()); + while(true) { + toward3p = (l2r_ == fw); + assert_geq(al5pf_, al5pi_); + assert(al5pi_ != 0 || al5pf_ != q.length() - 1); + if(toward3p) { + if(al5pf_ == q.length()-1) { + l2r_ = !l2r_; + continue; + } + if(al5pi_ == al5pf_ && root()) { + off5p = off5p_i = al5pi_; + } else { + off5p = off5p_i = (al5pf_ + 1); + } + } else { + if(al5pi_ == 0) { + l2r_ = !l2r_; + continue; + } + assert_gt(al5pi_, 0); + if(al5pi_ == al5pf_ && root()) { + off5p = off5p_i = al5pi_; + } else { + off5p = off5p_i = (al5pi_ - 1); + } + } + break; + } + size_t off3p = q.length() - off5p - 1; + assert_lt(off5p, q.length()); + assert_lt(off3p, q.length()); + bool firstPos = true; + assert_eq(0, len_); + + // Number of times pf.alloc() is called. So we can sanity check it. + size_t nalloc = 0; + // Set to true as soon as we encounter a branch point along this descent. + branches = false; + // hitEnd is set to true iff this edit pushes us to the extreme 5' or 3' + // end of the alignment + hitEnd = false; + // done is set to true iff this edit aligns the only remaining character of + // the read + done = false; + if(root()) { + assert_eq(al5pi_, al5pf_); + // Check whether/how far we can jump using ftab + int ftabLen = gfmFw.gh().ftabChars(); + bool ftabFits = true; + if(toward3p && ftabLen + off5p > q.length()) { + ftabFits = false; + } else if(!toward3p && off5p < (size_t)ftabLen) { + ftabFits = false; + } + bool useFtab = ftabLen > 1 && (size_t)ftabLen <= nobranchDepth && ftabFits; + bool ftabFailed = false; + if(useFtab) { + prm.nFtabs++; + // Forward index: right-to-left + size_t off_r2l = fw ? off5p : q.length() - off5p - 1; + if(l2r_) { + // + } else { + assert_geq((int)off_r2l, ftabLen - 1); + off_r2l -= (ftabLen - 1); + } + bool ret = gfmFw.ftabLoHi(fw ? q.patFw : q.patRc, off_r2l, + false, // reverse + topf, botf); + if(!ret) { + // Encountered an N or something else that made it impossible + // to use the ftab + ftabFailed = true; + } else { + if(botf - topf == 0) { + return false; + } + int c_r2l = fw ? q.patFw[off_r2l] : q.patRc[off_r2l]; + // Backward index: left-to-right + size_t off_l2r = fw ? off5p : q.length() - off5p - 1; + if(l2r_) { + // + } else { + assert_geq((int)off_l2r, ftabLen - 1); + off_l2r -= (ftabLen - 1); + } + ASSERT_ONLY(bool ret2 = ) + gfmBw.ftabLoHi(fw ? q.patFw : q.patRc, off_l2r, + false, // don't reverse + topb, botb); + assert(ret == ret2); + int c_l2r = fw ? q.patFw[off_l2r + ftabLen - 1] : + q.patRc[off_l2r + ftabLen - 1]; + assert_eq(botf - topf, botb - topb); + if(toward3p) { + assert_geq((int)off3p, ftabLen - 1); + off5p += ftabLen; off3p -= ftabLen; + } else { + assert_geq((int)off5p, ftabLen - 1); + off5p -= ftabLen; off3p += ftabLen; + } + len_ += ftabLen; + if(toward3p) { + // By convention, al5pf_ and al5pi_ start out equal, so we only + // advance al5pf_ by ftabLen - 1 (not ftabLen) + al5pf_ += (ftabLen - 1); // -1 accounts for inclusive al5pf_ + if(al5pf_ == q.length() - 1) { + hitEnd = true; + done = (al5pi_ == 0); + } + } else { + // By convention, al5pf_ and al5pi_ start out equal, so we only + // advance al5pi_ by ftabLen - 1 (not ftabLen) + al5pi_ -= (ftabLen - 1); + if(al5pi_ == 0) { + hitEnd = true; + done = (al5pf_ == q.length()-1); + } + } + // Allocate DescentPos data structures and leave them empty. We + // jumped over them by doing our lookup in the ftab, so we have no + // info about outgoing edges from them, besides the matching + // outgoing edge from the last pos which is in topf/botf and + // topb/botb. + size_t id = 0; + if(firstPos) { + posid_ = pf.alloc(); + pf[posid_].reset(); + firstPos = false; + for(int i = 1; i < ftabLen; i++) { + id = pf.alloc(); + pf[id].reset(); + } + } else { + for(int i = 0; i < ftabLen; i++) { + id = pf.alloc(); + pf[id].reset(); + } + } + assert_eq(botf-topf, botb-topb); + pf[id].c = l2r_ ? c_l2r : c_r2l; + pf[id].topf[l2r_ ? c_l2r : c_r2l] = topf; + pf[id].botf[l2r_ ? c_l2r : c_r2l] = botf; + pf[id].topb[l2r_ ? c_l2r : c_r2l] = topb; + pf[id].botb[l2r_ ? c_l2r : c_r2l] = botb; + assert(pf[id].inited()); + nalloc += ftabLen; + } + } + if(!useFtab || ftabFailed) { + // Can't use ftab, use fchr instead + int rdc = q.getc(off5p, fw); + // If rdc is N, that's pretty bad! That means we placed a root + // right on an N. The only thing we can reasonably do is to pick a + // nucleotide at random and proceed. + if(rdc > 3) { + return false; + } + assert_range(0, 3, rdc); + topf = topb = gfmFw.fchr()[rdc]; + botf = botb = gfmFw.fchr()[rdc+1]; + if(botf - topf == 0) { + return false; + } + if(toward3p) { + off5p++; off3p--; + } else { + off5p--; off3p++; + } + len_++; + if(toward3p) { + if(al5pf_ == q.length()-1) { + hitEnd = true; + done = (al5pi_ == 0); + } + } else { + if(al5pi_ == 0) { + hitEnd = true; + done = (al5pf_ == q.length()-1); + } + } + // Allocate DescentPos data structure. We could fill it with the + // four ranges from fchr if we wanted to, but that will never be + // relevant. + size_t id = 0; + if(firstPos) { + posid_ = id = pf.alloc(); + firstPos = false; + } else { + id = pf.alloc(); + } + assert_eq(botf-topf, botb-topb); + pf[id].c = rdc; + pf[id].topf[rdc] = topf; + pf[id].botf[rdc] = botf; + pf[id].topb[rdc] = topb; + pf[id].botb[rdc] = botb; + assert(pf[id].inited()); + nalloc++; + } + assert_gt(botf, topf); + assert_eq(botf - topf, botb - topb); + // Check if this is redundant with an already-explored path + if(!re.check(fw, l2r_, al5pi_, al5pf_, al5pf_ - al5pi_ + 1 + gapadd_, + topf, botf, pen_)) + { + prm.nRedSkip++; + return false; + } + prm.nRedFail++; // not pruned by redundancy list + prm.nRedIns++; // inserted into redundancy list + } + if(done) { + Edit eempty; + alsink.reportAlignment( + q, // query + gfmFw, // forward index + gfmBw, // backward index + topf, // top of SA range in forward index + botf, // bottom of SA range in forward index + topb, // top of SA range in backward index + botb, // bottom of SA range in backward index + descid_, // Descent at the leaf + rid_, // root id + eempty, // extra edit, if necessary + pen_, // penalty + df, // factory with Descent + pf, // factory with DescentPoss + rs, // roots + cs); // configs + assert(alsink.repOk()); + return true; + } else if(hitEnd) { + assert(botf > 0 || topf > 0); + assert_gt(botf, topf); + topf_bounce = topf; + botf_bounce = botf; + topb_bounce = topb; + botb_bounce = botb; + return true; // Bounced + } + // We just advanced either ftabLen characters, or 1 character, + // depending on whether we used ftab or fchr. + nextLocsBi(gfmFw, gfmBw, tloc, bloc, topf, botf, topb, botb); + assert(tloc.valid()); + assert(botf - topf == 1 || bloc.valid()); + assert(botf - topf > 1 || !bloc.valid()); + TIndexOffU t[4], b[4]; // dest BW ranges + TIndexOffU tp[4], bp[4]; // dest BW ranges for "prime" index + ASSERT_ONLY(TIndexOff lasttot = botf - topf); + bool fail = false; + while(!fail && !hitEnd) { + assert(!done); + int rdc = q.getc(off5p, fw); + int rdq = q.getq(off5p); + assert_range(0, 4, rdc); + assert_gt(botf, topf); + assert(botf - topf == 1 || bloc.valid()); + assert(botf - topf > 1 || !bloc.valid()); + assert(tloc.valid()); + TIndexOffU width = botf - topf; + bool ltr = l2r_; + const GFM& gfm = ltr ? gfmBw : gfmFw; + t[0] = t[1] = t[2] = t[3] = b[0] = b[1] = b[2] = b[3] = 0; + int only = -1; // if we only get 1 non-empty range, this is the char + size_t nopts = 1; + if(bloc.valid()) { + // Set up initial values for the primes + if(ltr) { + tp[0] = tp[1] = tp[2] = tp[3] = topf; + bp[0] = bp[1] = bp[2] = bp[3] = botf; + } else { + tp[0] = tp[1] = tp[2] = tp[3] = topb; + bp[0] = bp[1] = bp[2] = bp[3] = botb; + } + // Range delimited by tloc/bloc has size >1. If size == 1, + // we use a simpler query (see if(!bloc.valid()) blocks below) + met.bwops++; + met.bwops_bi++; + prm.nSdFmops++; + if(prm.doFmString) { + prm.fmString.add(false, pen_, 1); + } + gfm.mapBiLFEx(tloc, bloc, t, b, tp, bp); + // t, b, tp and bp now filled + ASSERT_ONLY(TIndexOffU tot = (b[0]-t[0])+(b[1]-t[1])+(b[2]-t[2])+(b[3]-t[3])); + ASSERT_ONLY(TIndexOffU totp = (bp[0]-tp[0])+(bp[1]-tp[1])+(bp[2]-tp[2])+(bp[3]-tp[3])); + assert_eq(tot, totp); + assert_leq(tot, lasttot); + ASSERT_ONLY(lasttot = tot); + fail = (rdc > 3 || b[rdc] <= t[rdc]); + size_t nopts = 0; + if(b[0] > t[0]) { nopts++; only = 0; } + if(b[1] > t[1]) { nopts++; only = 1; } + if(b[2] > t[2]) { nopts++; only = 2; } + if(b[3] > t[3]) { nopts++; only = 3; } + if(!fail && b[rdc] - t[rdc] < width) { + branches = true; + } + } else { + tp[0] = tp[1] = tp[2] = tp[3] = bp[0] = bp[1] = bp[2] = bp[3] = 0; + // Range delimited by tloc/bloc has size 1 + TIndexOffU ntop = ltr ? topb : topf; + met.bwops++; + met.bwops_1++; + prm.nSdFmops++; + if(prm.doFmString) { + prm.fmString.add(false, pen_, 1); + } + int cc = gfm.mapLF1(ntop, tloc); + assert_range(-1, 3, cc); + fail = (cc != rdc); + if(fail) { + branches = true; + } + if(cc >= 0) { + only = cc; + t[cc] = ntop; b[cc] = ntop+1; + tp[cc] = ltr ? topf : topb; + bp[cc] = ltr ? botf : botb; + } + } + // Now figure out what to do with our N. + int origRdc = rdc; + if(rdc == 4) { + fail = true; + } else { + topf = ltr ? tp[rdc] : t[rdc]; + botf = ltr ? bp[rdc] : b[rdc]; + topb = ltr ? t[rdc] : tp[rdc]; + botb = ltr ? b[rdc] : bp[rdc]; + assert_eq(botf - topf, botb - topb); + } + // The trouble with !stopOnN is that we don't have a way to store the N + // edits. There could be several per Descent. + if(rdc == 4 && !stopOnN && nopts == 1) { + fail = false; + rdc = only; + int pen = sc.n(rdq); + assert_gt(pen, 0); + pen_ += pen; + } + assert_range(0, 4, origRdc); + assert_range(0, 4, rdc); + // If 'fail' is true, we failed to align this read character. We still + // install the SA ranges into the DescentPos and increment len_ in this + // case. + + // Convert t, tp, b, bp info tf, bf, tb, bb + TIndexOffU *tf = ltr ? tp : t; + TIndexOffU *bf = ltr ? bp : b; + TIndexOffU *tb = ltr ? t : tp; + TIndexOffU *bb = ltr ? b : bp; + // Allocate DescentPos data structure. + if(firstPos) { + posid_ = pf.alloc(); + firstPos = false; + } else { + pf.alloc(); + } + nalloc++; + pf[posid_ + len_].reset(); + pf[posid_ + len_].c = origRdc; + for(size_t i = 0; i < 4; i++) { + pf[posid_ + len_].topf[i] = tf[i]; + pf[posid_ + len_].botf[i] = bf[i]; + pf[posid_ + len_].topb[i] = tb[i]; + pf[posid_ + len_].botb[i] = bb[i]; + assert_eq(pf[posid_ + len_].botf[i] - pf[posid_ + len_].topf[i], + pf[posid_ + len_].botb[i] - pf[posid_ + len_].topb[i]); + } + if(!fail) { + // Check if this is redundant with an already-explored path + size_t al5pf = al5pf_, al5pi = al5pi_; + if(toward3p) { + al5pf++; + } else { + al5pi--; + } + fail = !re.check(fw, l2r_, al5pi, al5pf, + al5pf - al5pi + 1 + gapadd_, topf, botf, pen_); + if(fail) { + prm.nRedSkip++; + } else { + prm.nRedFail++; // not pruned by redundancy list + prm.nRedIns++; // inserted into redundancy list + } + } + if(!fail) { + len_++; + if(toward3p) { + al5pf_++; + off5p++; + off3p--; + if(al5pf_ == q.length() - 1) { + hitEnd = true; + done = (al5pi_ == 0); + } + } else { + assert_gt(al5pi_, 0); + al5pi_--; + off5p--; + off3p++; + if(al5pi_ == 0) { + hitEnd = true; + done = (al5pf_ == q.length() - 1); + } + } + } + if(!fail && !hitEnd) { + nextLocsBi(gfmFw, gfmBw, tloc, bloc, tf[rdc], bf[rdc], tb[rdc], bb[rdc]); + } + } + assert_geq(al5pf_, al5pi_); + assert(!root() || al5pf_ - al5pi_ + 1 == nalloc || al5pf_ - al5pi_ + 2 == nalloc); + assert_geq(pf.size(), nalloc); + if(done) { + Edit eempty; + alsink.reportAlignment( + q, // query + gfmFw, // forward index + gfmBw, // backward index + topf, // top of SA range in forward index + botf, // bottom of SA range in forward index + topb, // top of SA range in backward index + botb, // bottom of SA range in backward index + descid_, // Descent at the leaf + rid_, // root id + eempty, // extra edit, if necessary + pen_, // penalty + df, // factory with Descent + pf, // factory with DescentPoss + rs, // roots + cs); // configs + assert(alsink.repOk()); + return true; + } else if(hitEnd) { + assert(botf > 0 || topf > 0); + assert_gt(botf, topf); + topf_bounce = topf; + botf_bounce = botf; + topb_bounce = topb; + botb_bounce = botb; + return true; // Bounced + } + assert(repOk(&q)); + assert(!hitEnd || topf_bounce > 0 || botf_bounce > 0); + return true; +} + +#endif diff --git a/aligner_seed_policy.cpp b/aligner_seed_policy.cpp new file mode 100644 index 0000000..204e66e --- /dev/null +++ b/aligner_seed_policy.cpp @@ -0,0 +1,916 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include +#include +#include +#include +#include "ds.h" +#include "aligner_seed_policy.h" +#include "mem_ids.h" + +using namespace std; + +static int parseFuncType(const std::string& otype) { + string type = otype; + if(type == "C" || type == "Constant") { + return SIMPLE_FUNC_CONST; + } else if(type == "L" || type == "Linear") { + return SIMPLE_FUNC_LINEAR; + } else if(type == "S" || type == "Sqrt") { + return SIMPLE_FUNC_SQRT; + } else if(type == "G" || type == "Log") { + return SIMPLE_FUNC_LOG; + } + std::cerr << "Error: Bad function type '" << otype.c_str() + << "'. Should be C (constant), L (linear), " + << "S (square root) or G (natural log)." << std::endl; + throw 1; +} + +#define PARSE_FUNC(fv) { \ + if(ctoks.size() >= 1) { \ + fv.setType(parseFuncType(ctoks[0])); \ + } \ + if(ctoks.size() >= 2) { \ + double co; \ + istringstream tmpss(ctoks[1]); \ + tmpss >> co; \ + fv.setConst(co); \ + } \ + if(ctoks.size() >= 3) { \ + double ce; \ + istringstream tmpss(ctoks[2]); \ + tmpss >> ce; \ + fv.setCoeff(ce); \ + } \ + if(ctoks.size() >= 4) { \ + double mn; \ + istringstream tmpss(ctoks[3]); \ + tmpss >> mn; \ + fv.setMin(mn); \ + } \ + if(ctoks.size() >= 5) { \ + double mx; \ + istringstream tmpss(ctoks[4]); \ + tmpss >> mx; \ + fv.setMin(mx); \ + } \ +} + +/** + * Parse alignment policy when provided in this format: + * =;=;=... + * + * And label=value possibilities are: + * + * Bonus for a match + * ----------------- + * + * MA=xx (default: MA=0, or MA=2 if --local is set) + * + * xx = Each position where equal read and reference characters match up + * in the alignment contriubtes this amount to the total score. + * + * Penalty for a mismatch + * ---------------------- + * + * MMP={Cxx|Q|RQ} (default: MMP=C6) + * + * Cxx = Each mismatch costs xx. If MMP=Cxx is specified, quality + * values are ignored when assessing penalities for mismatches. + * Q = Each mismatch incurs a penalty equal to the mismatched base's + * value. + * R = Each mismatch incurs a penalty equal to the mismatched base's + * rounded quality value. Qualities are rounded off to the + * nearest 10, and qualities greater than 30 are rounded to 30. + * + * Penalty for position with N (in either read or reference) + * --------------------------------------------------------- + * + * NP={Cxx|Q|RQ} (default: NP=C1) + * + * Cxx = Each alignment position with an N in either the read or the + * reference costs xx. If NP=Cxx is specified, quality values are + * ignored when assessing penalities for Ns. + * Q = Each alignment position with an N in either the read or the + * reference incurs a penalty equal to the read base's quality + * value. + * R = Each alignment position with an N in either the read or the + * reference incurs a penalty equal to the read base's rounded + * quality value. Qualities are rounded off to the nearest 10, + * and qualities greater than 30 are rounded to 30. + * + * Penalty for a read gap + * ---------------------- + * + * RDG=xx,yy (default: RDG=5,3) + * + * xx = Read gap open penalty. + * yy = Read gap extension penalty. + * + * Total cost incurred by a read gap = xx + (yy * gap length) + * + * Penalty for a reference gap + * --------------------------- + * + * RFG=xx,yy (default: RFG=5,3) + * + * xx = Reference gap open penalty. + * yy = Reference gap extension penalty. + * + * Total cost incurred by a reference gap = xx + (yy * gap length) + * + * Minimum score for valid alignment + * --------------------------------- + * + * MIN=xx,yy (defaults: MIN=-0.6,-0.6, or MIN=0.0,0.66 if --local is set) + * + * xx,yy = For a read of length N, the total score must be at least + * xx + (read length * yy) for the alignment to be valid. The + * total score is the sum of all negative penalties (from + * mismatches and gaps) and all positive bonuses. The minimum + * can be negative (and is by default in global alignment mode). + * + * Score floor for local alignment + * ------------------------------- + * + * FL=xx,yy (defaults: FL=-Infinity,0.0, or FL=0.0,0.0 if --local is set) + * + * xx,yy = If a cell in the dynamic programming table has a score less + * than xx + (read length * yy), then no valid alignment can go + * through it. Defaults are highly recommended. + * + * N ceiling + * --------- + * + * NCEIL=xx,yy (default: NCEIL=0.0,0.15) + * + * xx,yy = For a read of length N, the number of alignment + * positions with an N in either the read or the + * reference cannot exceed + * ceiling = xx + (read length * yy). If the ceiling is + * exceeded, the alignment is considered invalid. + * + * Seeds + * ----- + * + * SEED=mm,len,ival (default: SEED=0,22) + * + * mm = Maximum number of mismatches allowed within a seed. + * Must be >= 0 and <= 2. Note that 2-mismatch mode is + * not fully sensitive; i.e. some 2-mismatch seed + * alignments may be missed. + * len = Length of seed. + * ival = Interval between seeds. If not specified, seed + * interval is determined by IVAL. + * + * Seed interval + * ------------- + * + * IVAL={L|S|C},xx,yy (default: IVAL=S,1.0,0.0) + * + * L = let interval between seeds be a linear function of the + * read length. xx and yy are the constant and linear + * coefficients respectively. In other words, the interval + * equals a * len + b, where len is the read length. + * Intervals less than 1 are rounded up to 1. + * S = let interval between seeds be a function of the sqaure + * root of the read length. xx and yy are the + * coefficients. In other words, the interval equals + * a * sqrt(len) + b, where len is the read length. + * Intervals less than 1 are rounded up to 1. + * C = Like S but uses cube root of length instead of square + * root. + * + * Example 1: + * + * SEED=1,10,5 and read sequence is TGCTATCGTACGATCGTAC: + * + * The following seeds are extracted from the forward + * representation of the read and aligned to the reference + * allowing up to 1 mismatch: + * + * Read: TGCTATCGTACGATCGTACA + * + * Seed 1+: TGCTATCGTA + * Seed 2+: TCGTACGATC + * Seed 3+: CGATCGTACA + * + * ...and the following are extracted from the reverse-complement + * representation of the read and align to the reference allowing + * up to 1 mismatch: + * + * Seed 1-: TACGATAGCA + * Seed 2-: GATCGTACGA + * Seed 3-: TGTACGATCG + * + * Example 2: + * + * SEED=1,20,20 and read sequence is TGCTATCGTACGATC. The seed + * length is 20 but the read is only 15 characters long. In this + * case, Bowtie2 automatically shrinks the seed length to be equal + * to the read length. + * + * Read: TGCTATCGTACGATC + * + * Seed 1+: TGCTATCGTACGATC + * Seed 1-: GATCGTACGATAGCA + * + * Example 3: + * + * SEED=1,10,10 and read sequence is TGCTATCGTACGATC. Only one seed + * fits on the read; a second seed would overhang the end of the read + * by 5 positions. In this case, Bowtie2 extracts one seed. + * + * Read: TGCTATCGTACGATC + * + * Seed 1+: TGCTATCGTA + * Seed 1-: TACGATAGCA + */ +void SeedAlignmentPolicy::parseString( + const std::string& s, + bool local, + bool noisyHpolymer, + bool ignoreQuals, + int& bonusMatchType, + int& bonusMatch, + int& penMmcType, + int& penMmcMax, + int& penMmcMin, + int& penScMax, + int& penScMin, + int& penNType, + int& penN, + int& penRdExConst, + int& penRfExConst, + int& penRdExLinear, + int& penRfExLinear, + SimpleFunc& costMin, + SimpleFunc& nCeil, + bool& nCatPair, + int& multiseedMms, + int& multiseedLen, + SimpleFunc& multiseedIval, + size_t& failStreak, + size_t& seedRounds, + SimpleFunc* penCanIntronLen, + SimpleFunc* penNoncanIntronLen) +{ + + bonusMatchType = local ? DEFAULT_MATCH_BONUS_TYPE_LOCAL : DEFAULT_MATCH_BONUS_TYPE; + bonusMatch = local ? DEFAULT_MATCH_BONUS_LOCAL : DEFAULT_MATCH_BONUS; + penMmcType = ignoreQuals ? DEFAULT_MM_PENALTY_TYPE_IGNORE_QUALS : + DEFAULT_MM_PENALTY_TYPE; + penMmcMax = DEFAULT_MM_PENALTY_MAX; + penMmcMin = DEFAULT_MM_PENALTY_MIN; + penNType = DEFAULT_N_PENALTY_TYPE; + penN = DEFAULT_N_PENALTY; + + penScMax = DEFAULT_SC_PENALTY_MAX; + penScMin = DEFAULT_SC_PENALTY_MIN; + + const double DMAX = std::numeric_limits::max(); + costMin.init( + local ? SIMPLE_FUNC_LOG : SIMPLE_FUNC_LINEAR, + local ? DEFAULT_MIN_CONST_LOCAL : 0.0f, + local ? DEFAULT_MIN_LINEAR_LOCAL : -0.2f); + nCeil.init( + SIMPLE_FUNC_LINEAR, 0.0f, DMAX, + DEFAULT_N_CEIL_CONST, DEFAULT_N_CEIL_LINEAR); + multiseedIval.init( + DEFAULT_IVAL, 1.0f, DMAX, + DEFAULT_IVAL_B, DEFAULT_IVAL_A); + nCatPair = DEFAULT_N_CAT_PAIR; + + if(!noisyHpolymer) { + penRdExConst = DEFAULT_READ_GAP_CONST; + penRdExLinear = DEFAULT_READ_GAP_LINEAR; + penRfExConst = DEFAULT_REF_GAP_CONST; + penRfExLinear = DEFAULT_REF_GAP_LINEAR; + } else { + penRdExConst = DEFAULT_READ_GAP_CONST_BADHPOLY; + penRdExLinear = DEFAULT_READ_GAP_LINEAR_BADHPOLY; + penRfExConst = DEFAULT_REF_GAP_CONST_BADHPOLY; + penRfExLinear = DEFAULT_REF_GAP_LINEAR_BADHPOLY; + } + + multiseedMms = DEFAULT_SEEDMMS; + multiseedLen = DEFAULT_SEEDLEN; + + EList toks(MISC_CAT); + string tok; + istringstream ss(s); + int setting = 0; + // Get each ;-separated token + while(getline(ss, tok, ';')) { + setting++; + EList etoks(MISC_CAT); + string etok; + // Divide into tokens on either side of = + istringstream ess(tok); + while(getline(ess, etok, '=')) { + etoks.push_back(etok); + } + // Must be exactly 1 = + if(etoks.size() != 2) { + cerr << "Error parsing alignment policy setting " << setting + << "; must be bisected by = sign" << endl + << "Policy: " << s.c_str() << endl; + assert(false); throw 1; + } + // LHS is tag, RHS value + string tag = etoks[0], val = etoks[1]; + // Separate value into comma-separated tokens + EList ctoks(MISC_CAT); + string ctok; + istringstream css(val); + while(getline(css, ctok, ',')) { + ctoks.push_back(ctok); + } + if(ctoks.size() == 0) { + cerr << "Error parsing alignment policy setting " << setting + << "; RHS must have at least 1 token" << endl + << "Policy: " << s.c_str() << endl; + assert(false); throw 1; + } + for(size_t i = 0; i < ctoks.size(); i++) { + if(ctoks[i].length() == 0) { + cerr << "Error parsing alignment policy setting " << setting + << "; token " << i+1 << " on RHS had length=0" << endl + << "Policy: " << s.c_str() << endl; + assert(false); throw 1; + } + } + // Bonus for a match + // MA=xx (default: MA=0, or MA=10 if --local is set) + if(tag == "MA") { + if(ctoks.size() != 1) { + cerr << "Error parsing alignment policy setting " << setting + << "; RHS must have 1 token" << endl + << "Policy: " << s.c_str() << endl; + assert(false); throw 1; + } + string tmp = ctoks[0]; + istringstream tmpss(tmp); + tmpss >> bonusMatch; + } + // Scoring for mismatches + // MMP={Cxx|Q|RQ} + // Cxx = constant, where constant is integer xx + // Qxx = equal to quality, scaled + // R = equal to maq-rounded quality value (rounded to nearest + // 10, can't be greater than 30) + else if(tag == "MMP") { + if(ctoks.size() > 3) { + cerr << "Error parsing alignment policy setting " + << "'" << tag.c_str() << "'" + << "; RHS must have at most 3 tokens" << endl + << "Policy: '" << s.c_str() << "'" << endl; + assert(false); throw 1; + } + if(ctoks[0][0] == 'C') { + string tmp = ctoks[0].substr(1); + // Parse constant penalty + istringstream tmpss(tmp); + tmpss >> penMmcMax; + penMmcMin = penMmcMax; + // Parse constant penalty + penMmcType = COST_MODEL_CONSTANT; + } else if(ctoks[0][0] == 'Q') { + if(ctoks.size() >= 2) { + string tmp = ctoks[1]; + istringstream tmpss(tmp); + tmpss >> penMmcMax; + } else { + penMmcMax = DEFAULT_MM_PENALTY_MAX; + } + if(ctoks.size() >= 3) { + string tmp = ctoks[2]; + istringstream tmpss(tmp); + tmpss >> penMmcMin; + } else { + penMmcMin = DEFAULT_MM_PENALTY_MIN; + } + if(penMmcMin > penMmcMax) { + cerr << "Error: Maximum mismatch penalty (" << penMmcMax + << ") is less than minimum penalty (" << penMmcMin + << endl; + throw 1; + } + // Set type to =quality + penMmcType = COST_MODEL_QUAL; + } else if(ctoks[0][0] == 'R') { + // Set type to=Maq-quality + penMmcType = COST_MODEL_ROUNDED_QUAL; + } else { + cerr << "Error parsing alignment policy setting " + << "'" << tag.c_str() << "'" + << "; RHS must start with C, Q or R" << endl + << "Policy: '" << s.c_str() << "'" << endl; + assert(false); throw 1; + } + } + else if(tag == "SCP") { + if(ctoks.size() > 3) { + cerr << "Error parsing alignment policy setting " + << "'" << tag.c_str() << "'" + << "; SCP must have at most 3 tokens" << endl + << "Policy: '" << s.c_str() << "'" << endl; + assert(false); throw 1; + } + istringstream tmpMax(ctoks[1]); + tmpMax >> penScMax; + istringstream tmpMin(ctoks[1]); + tmpMin >> penScMin; + if(penScMin > penScMax) { + cerr << "max (" << penScMax << ") should be >= min (" << penScMin << ")" << endl; + assert(false); throw 1; + } + if(penScMin < 1) { + cerr << "min (" << penScMin << ") should be greater than 0" << endl; + assert(false); throw 1; + } + } + // Scoring for mismatches where read char=N + // NP={Cxx|Q|RQ} + // Cxx = constant, where constant is integer xx + // Q = equal to quality + // R = equal to maq-rounded quality value (rounded to nearest + // 10, can't be greater than 30) + else if(tag == "NP") { + if(ctoks.size() != 1) { + cerr << "Error parsing alignment policy setting " + << "'" << tag.c_str() << "'" + << "; RHS must have 1 token" << endl + << "Policy: '" << s.c_str() << "'" << endl; + assert(false); throw 1; + } + if(ctoks[0][0] == 'C') { + string tmp = ctoks[0].substr(1); + // Parse constant penalty + istringstream tmpss(tmp); + tmpss >> penN; + // Parse constant penalty + penNType = COST_MODEL_CONSTANT; + } else if(ctoks[0][0] == 'Q') { + // Set type to =quality + penNType = COST_MODEL_QUAL; + } else if(ctoks[0][0] == 'R') { + // Set type to=Maq-quality + penNType = COST_MODEL_ROUNDED_QUAL; + } else { + cerr << "Error parsing alignment policy setting " + << "'" << tag.c_str() << "'" + << "; RHS must start with C, Q or R" << endl + << "Policy: '" << s.c_str() << "'" << endl; + assert(false); throw 1; + } + } + // Scoring for read gaps + // RDG=xx,yy,zz + // xx = read gap open penalty + // yy = read gap extension penalty constant coefficient + // (defaults to open penalty) + // zz = read gap extension penalty linear coefficient + // (defaults to 0) + else if(tag == "RDG") { + if(ctoks.size() >= 1) { + istringstream tmpss(ctoks[0]); + tmpss >> penRdExConst; + } else { + penRdExConst = noisyHpolymer ? + DEFAULT_READ_GAP_CONST_BADHPOLY : + DEFAULT_READ_GAP_CONST; + } + if(ctoks.size() >= 2) { + istringstream tmpss(ctoks[1]); + tmpss >> penRdExLinear; + } else { + penRdExLinear = noisyHpolymer ? + DEFAULT_READ_GAP_LINEAR_BADHPOLY : + DEFAULT_READ_GAP_LINEAR; + } + } + // Scoring for reference gaps + // RFG=xx,yy,zz + // xx = ref gap open penalty + // yy = ref gap extension penalty constant coefficient + // (defaults to open penalty) + // zz = ref gap extension penalty linear coefficient + // (defaults to 0) + else if(tag == "RFG") { + if(ctoks.size() >= 1) { + istringstream tmpss(ctoks[0]); + tmpss >> penRfExConst; + } else { + penRfExConst = noisyHpolymer ? + DEFAULT_REF_GAP_CONST_BADHPOLY : + DEFAULT_REF_GAP_CONST; + } + if(ctoks.size() >= 2) { + istringstream tmpss(ctoks[1]); + tmpss >> penRfExLinear; + } else { + penRfExLinear = noisyHpolymer ? + DEFAULT_REF_GAP_LINEAR_BADHPOLY : + DEFAULT_REF_GAP_LINEAR; + } + } + // Minimum score as a function of read length + // MIN=xx,yy + // xx = constant coefficient + // yy = linear coefficient + else if(tag == "MIN") { + PARSE_FUNC(costMin); + } + // Per-read N ceiling as a function of read length + // NCEIL=xx,yy + // xx = N ceiling constant coefficient + // yy = N ceiling linear coefficient (set to 0 if unspecified) + else if(tag == "NCEIL") { + PARSE_FUNC(nCeil); + } + /* + * Seeds + * ----- + * + * SEED=mm,len,ival (default: SEED=0,22) + * + * mm = Maximum number of mismatches allowed within a seed. + * Must be >= 0 and <= 2. Note that 2-mismatch mode is + * not fully sensitive; i.e. some 2-mismatch seed + * alignments may be missed. + * len = Length of seed. + * ival = Interval between seeds. If not specified, seed + * interval is determined by IVAL. + */ + else if(tag == "SEED") { + if(ctoks.size() > 2) { + cerr << "Error parsing alignment policy setting " + << "'" << tag.c_str() << "'; RHS must have 1 or 2 tokens, " + << "had " << ctoks.size() << ". " + << "Policy: '" << s.c_str() << "'" << endl; + assert(false); throw 1; + } + if(ctoks.size() >= 1) { + istringstream tmpss(ctoks[0]); + tmpss >> multiseedMms; + if(multiseedMms > 1) { + cerr << "Error: -N was set to " << multiseedMms << ", but cannot be set greater than 1" << endl; + throw 1; + } + if(multiseedMms < 0) { + cerr << "Error: -N was set to a number less than 0 (" << multiseedMms << ")" << endl; + throw 1; + } + } + if(ctoks.size() >= 2) { + istringstream tmpss(ctoks[1]); + tmpss >> multiseedLen; + } else { + multiseedLen = DEFAULT_SEEDLEN; + } + } + else if(tag == "SEEDLEN") { + if(ctoks.size() > 1) { + cerr << "Error parsing alignment policy setting " + << "'" << tag.c_str() << "'; RHS must have 1 token, " + << "had " << ctoks.size() << ". " + << "Policy: '" << s.c_str() << "'" << endl; + assert(false); throw 1; + } + if(ctoks.size() >= 1) { + istringstream tmpss(ctoks[0]); + tmpss >> multiseedLen; + } + } + else if(tag == "DPS") { + if(ctoks.size() > 1) { + cerr << "Error parsing alignment policy setting " + << "'" << tag.c_str() << "'; RHS must have 1 token, " + << "had " << ctoks.size() << ". " + << "Policy: '" << s.c_str() << "'" << endl; + assert(false); throw 1; + } + if(ctoks.size() >= 1) { + istringstream tmpss(ctoks[0]); + tmpss >> failStreak; + } + } + else if(tag == "ROUNDS") { + if(ctoks.size() > 1) { + cerr << "Error parsing alignment policy setting " + << "'" << tag.c_str() << "'; RHS must have 1 token, " + << "had " << ctoks.size() << ". " + << "Policy: '" << s.c_str() << "'" << endl; + assert(false); throw 1; + } + if(ctoks.size() >= 1) { + istringstream tmpss(ctoks[0]); + tmpss >> seedRounds; + } + } + /* + * Seed interval + * ------------- + * + * IVAL={L|S|C},a,b (default: IVAL=S,1.0,0.0) + * + * L = let interval between seeds be a linear function of the + * read length. xx and yy are the constant and linear + * coefficients respectively. In other words, the interval + * equals a * len + b, where len is the read length. + * Intervals less than 1 are rounded up to 1. + * S = let interval between seeds be a function of the sqaure + * root of the read length. xx and yy are the + * coefficients. In other words, the interval equals + * a * sqrt(len) + b, where len is the read length. + * Intervals less than 1 are rounded up to 1. + * C = Like S but uses cube root of length instead of square + * root. + */ + else if(tag == "IVAL") { + PARSE_FUNC(multiseedIval); + } + else if(tag == "CANINTRONLEN") { + assert(penCanIntronLen != NULL); + PARSE_FUNC((*penCanIntronLen)); + } + else if(tag == "NONCANINTRONLEN") { + assert(penNoncanIntronLen != NULL); + PARSE_FUNC((*penNoncanIntronLen)); + } + else { + // Unknown tag + cerr << "Unexpected alignment policy setting " + << "'" << tag.c_str() << "'" << endl + << "Policy: '" << s.c_str() << "'" << endl; + assert(false); throw 1; + } + } +} + +#ifdef ALIGNER_SEED_POLICY_MAIN +int main() { + + int bonusMatchType; + int bonusMatch; + int penMmcType; + int penMmc; + int penScMax; + int penScMin; + int penNType; + int penN; + int penRdExConst; + int penRfExConst; + int penRdExLinear; + int penRfExLinear; + SimpleFunc costMin; + SimpleFunc costFloor; + SimpleFunc nCeil; + bool nCatPair; + int multiseedMms; + int multiseedLen; + SimpleFunc msIval; + SimpleFunc posfrac; + SimpleFunc rowmult; + uint32_t mhits; + + { + cout << "Case 1: Defaults 1 ... "; + const char *pol = ""; + SeedAlignmentPolicy::parseString( + string(pol), + false, // --local? + false, // noisy homopolymers a la 454? + false, // ignore qualities? + bonusMatchType, + bonusMatch, + penMmcType, + penMmc, + penScMax, + penScMin, + penNType, + penN, + penRdExConst, + penRfExConst, + penRdExLinear, + penRfExLinear, + costMin, + costFloor, + nCeil, + nCatPair, + multiseedMms, + multiseedLen, + msIval, + mhits); + + assert_eq(DEFAULT_MATCH_BONUS_TYPE, bonusMatchType); + assert_eq(DEFAULT_MATCH_BONUS, bonusMatch); + assert_eq(DEFAULT_MM_PENALTY_TYPE, penMmcType); + assert_eq(DEFAULT_MM_PENALTY_MAX, penMmcMax); + assert_eq(DEFAULT_MM_PENALTY_MIN, penMmcMin); + assert_eq(DEFAULT_N_PENALTY_TYPE, penNType); + assert_eq(DEFAULT_N_PENALTY, penN); + assert_eq(DEFAULT_MIN_CONST, costMin.getConst()); + assert_eq(DEFAULT_MIN_LINEAR, costMin.getCoeff()); + assert_eq(DEFAULT_FLOOR_CONST, costFloor.getConst()); + assert_eq(DEFAULT_FLOOR_LINEAR, costFloor.getCoeff()); + assert_eq(DEFAULT_N_CEIL_CONST, nCeil.getConst()); + assert_eq(DEFAULT_N_CAT_PAIR, nCatPair); + + assert_eq(DEFAULT_READ_GAP_CONST, penRdExConst); + assert_eq(DEFAULT_READ_GAP_LINEAR, penRdExLinear); + assert_eq(DEFAULT_REF_GAP_CONST, penRfExConst); + assert_eq(DEFAULT_REF_GAP_LINEAR, penRfExLinear); + assert_eq(DEFAULT_SEEDMMS, multiseedMms); + assert_eq(DEFAULT_SEEDLEN, multiseedLen); + assert_eq(DEFAULT_IVAL, msIval.getType()); + assert_eq(DEFAULT_IVAL_A, msIval.getCoeff()); + assert_eq(DEFAULT_IVAL_B, msIval.getConst()); + + cout << "PASSED" << endl; + } + + { + cout << "Case 2: Defaults 2 ... "; + const char *pol = ""; + SeedAlignmentPolicy::parseString( + string(pol), + false, // --local? + true, // noisy homopolymers a la 454? + false, // ignore qualities? + bonusMatchType, + bonusMatch, + penMmcType, + penMmc, + + penNType, + penN, + penRdExConst, + penRfExConst, + penRdExLinear, + penRfExLinear, + costMin, + costFloor, + nCeil, + nCatPair, + multiseedMms, + multiseedLen, + msIval, + mhits); + + assert_eq(DEFAULT_MATCH_BONUS_TYPE, bonusMatchType); + assert_eq(DEFAULT_MATCH_BONUS, bonusMatch); + assert_eq(DEFAULT_MM_PENALTY_TYPE, penMmcType); + assert_eq(DEFAULT_MM_PENALTY_MAX, penMmc); + assert_eq(DEFAULT_MM_PENALTY_MIN, penMmc); + assert_eq(DEFAULT_N_PENALTY_TYPE, penNType); + assert_eq(DEFAULT_N_PENALTY, penN); + assert_eq(DEFAULT_MIN_CONST, costMin.getConst()); + assert_eq(DEFAULT_MIN_LINEAR, costMin.getCoeff()); + assert_eq(DEFAULT_FLOOR_CONST, costFloor.getConst()); + assert_eq(DEFAULT_FLOOR_LINEAR, costFloor.getCoeff()); + assert_eq(DEFAULT_N_CEIL_CONST, nCeil.getConst()); + assert_eq(DEFAULT_N_CAT_PAIR, nCatPair); + + assert_eq(DEFAULT_READ_GAP_CONST_BADHPOLY, penRdExConst); + assert_eq(DEFAULT_READ_GAP_LINEAR_BADHPOLY, penRdExLinear); + assert_eq(DEFAULT_REF_GAP_CONST_BADHPOLY, penRfExConst); + assert_eq(DEFAULT_REF_GAP_LINEAR_BADHPOLY, penRfExLinear); + assert_eq(DEFAULT_SEEDMMS, multiseedMms); + assert_eq(DEFAULT_SEEDLEN, multiseedLen); + assert_eq(DEFAULT_IVAL, msIval.getType()); + assert_eq(DEFAULT_IVAL_A, msIval.getCoeff()); + assert_eq(DEFAULT_IVAL_B, msIval.getConst()); + + cout << "PASSED" << endl; + } + + { + cout << "Case 3: Defaults 3 ... "; + const char *pol = ""; + SeedAlignmentPolicy::parseString( + string(pol), + true, // --local? + false, // noisy homopolymers a la 454? + false, // ignore qualities? + bonusMatchType, + bonusMatch, + penMmcType, + penMmc, + penNType, + penN, + penRdExConst, + penRfExConst, + penRdExLinear, + penRfExLinear, + costMin, + costFloor, + nCeil, + nCatPair, + multiseedMms, + multiseedLen, + msIval, + mhits); + + assert_eq(DEFAULT_MATCH_BONUS_TYPE_LOCAL, bonusMatchType); + assert_eq(DEFAULT_MATCH_BONUS_LOCAL, bonusMatch); + assert_eq(DEFAULT_MM_PENALTY_TYPE, penMmcType); + assert_eq(DEFAULT_MM_PENALTY_MAX, penMmcMax); + assert_eq(DEFAULT_MM_PENALTY_MIN, penMmcMin); + assert_eq(DEFAULT_N_PENALTY_TYPE, penNType); + assert_eq(DEFAULT_N_PENALTY, penN); + assert_eq(DEFAULT_MIN_CONST_LOCAL, costMin.getConst()); + assert_eq(DEFAULT_MIN_LINEAR_LOCAL, costMin.getCoeff()); + assert_eq(DEFAULT_FLOOR_CONST_LOCAL, costFloor.getConst()); + assert_eq(DEFAULT_FLOOR_LINEAR_LOCAL, costFloor.getCoeff()); + assert_eq(DEFAULT_N_CEIL_CONST, nCeil.getConst()); + assert_eq(DEFAULT_N_CEIL_LINEAR, nCeil.getCoeff()); + assert_eq(DEFAULT_N_CAT_PAIR, nCatPair); + + assert_eq(DEFAULT_READ_GAP_CONST, penRdExConst); + assert_eq(DEFAULT_READ_GAP_LINEAR, penRdExLinear); + assert_eq(DEFAULT_REF_GAP_CONST, penRfExConst); + assert_eq(DEFAULT_REF_GAP_LINEAR, penRfExLinear); + assert_eq(DEFAULT_SEEDMMS, multiseedMms); + assert_eq(DEFAULT_SEEDLEN, multiseedLen); + assert_eq(DEFAULT_IVAL, msIval.getType()); + assert_eq(DEFAULT_IVAL_A, msIval.getCoeff()); + assert_eq(DEFAULT_IVAL_B, msIval.getConst()); + + cout << "PASSED" << endl; + } + + { + cout << "Case 4: Simple string 1 ... "; + const char *pol = "MMP=C44;MA=4;RFG=24,12;FL=C,8;RDG=2;NP=C4;MIN=C,7"; + SeedAlignmentPolicy::parseString( + string(pol), + true, // --local? + false, // noisy homopolymers a la 454? + false, // ignore qualities? + bonusMatchType, + bonusMatch, + penMmcType, + penMmc, + penNType, + penN, + penRdExConst, + penRfExConst, + penRdExLinear, + penRfExLinear, + costMin, + costFloor, + nCeil, + nCatPair, + multiseedMms, + multiseedLen, + msIval, + mhits); + + assert_eq(COST_MODEL_CONSTANT, bonusMatchType); + assert_eq(4, bonusMatch); + assert_eq(COST_MODEL_CONSTANT, penMmcType); + assert_eq(44, penMmc); + assert_eq(COST_MODEL_CONSTANT, penNType); + assert_eq(4.0f, penN); + assert_eq(7, costMin.getConst()); + assert_eq(DEFAULT_MIN_LINEAR_LOCAL, costMin.getCoeff()); + assert_eq(8, costFloor.getConst()); + assert_eq(DEFAULT_FLOOR_LINEAR_LOCAL, costFloor.getCoeff()); + assert_eq(DEFAULT_N_CEIL_CONST, nCeil.getConst()); + assert_eq(DEFAULT_N_CEIL_LINEAR, nCeil.getCoeff()); + assert_eq(DEFAULT_N_CAT_PAIR, nCatPair); + + assert_eq(2.0f, penRdExConst); + assert_eq(DEFAULT_READ_GAP_LINEAR, penRdExLinear); + assert_eq(24.0f, penRfExConst); + assert_eq(12.0f, penRfExLinear); + assert_eq(DEFAULT_SEEDMMS, multiseedMms); + assert_eq(DEFAULT_SEEDLEN, multiseedLen); + assert_eq(DEFAULT_IVAL, msIval.getType()); + assert_eq(DEFAULT_IVAL_A, msIval.getCoeff()); + assert_eq(DEFAULT_IVAL_B, msIval.getConst()); + + cout << "PASSED" << endl; + } +} +#endif /*def ALIGNER_SEED_POLICY_MAIN*/ diff --git a/aligner_seed_policy.h b/aligner_seed_policy.h new file mode 100644 index 0000000..b8d7fc6 --- /dev/null +++ b/aligner_seed_policy.h @@ -0,0 +1,234 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef ALIGNER_SEED_POLICY_H_ +#define ALIGNER_SEED_POLICY_H_ + +#include "scoring.h" +#include "simple_func.h" + +#define DEFAULT_SEEDMMS 0 +#define DEFAULT_SEEDLEN 22 + +#define DEFAULT_IVAL SIMPLE_FUNC_SQRT +#define DEFAULT_IVAL_A 1.15f +#define DEFAULT_IVAL_B 0.0f + +#define DEFAULT_UNGAPPED_HITS 6 + +/** + * Encapsulates the set of all parameters that affect what the + * SeedAligner does with reads. + */ +class SeedAlignmentPolicy { + +public: + + /** + * Parse alignment policy when provided in this format: + * =;=;=... + * + * And label=value possibilities are: + * + * Bonus for a match + * ----------------- + * + * MA=xx (default: MA=0, or MA=2 if --local is set) + * + * xx = Each position where equal read and reference characters match up + * in the alignment contriubtes this amount to the total score. + * + * Penalty for a mismatch + * ---------------------- + * + * MMP={Cxx|Q|RQ} (default: MMP=C6) + * + * Cxx = Each mismatch costs xx. If MMP=Cxx is specified, quality + * values are ignored when assessing penalities for mismatches. + * Q = Each mismatch incurs a penalty equal to the mismatched base's + * value. + * R = Each mismatch incurs a penalty equal to the mismatched base's + * rounded quality value. Qualities are rounded off to the + * nearest 10, and qualities greater than 30 are rounded to 30. + * + * Penalty for position with N (in either read or reference) + * --------------------------------------------------------- + * + * NP={Cxx|Q|RQ} (default: NP=C1) + * + * Cxx = Each alignment position with an N in either the read or the + * reference costs xx. If NP=Cxx is specified, quality values are + * ignored when assessing penalities for Ns. + * Q = Each alignment position with an N in either the read or the + * reference incurs a penalty equal to the read base's quality + * value. + * R = Each alignment position with an N in either the read or the + * reference incurs a penalty equal to the read base's rounded + * quality value. Qualities are rounded off to the nearest 10, + * and qualities greater than 30 are rounded to 30. + * + * Penalty for a read gap + * ---------------------- + * + * RDG=xx,yy (default: RDG=5,3) + * + * xx = Read gap open penalty. + * yy = Read gap extension penalty. + * + * Total cost incurred by a read gap = xx + (yy * gap length) + * + * Penalty for a reference gap + * --------------------------- + * + * RFG=xx,yy (default: RFG=5,3) + * + * xx = Reference gap open penalty. + * yy = Reference gap extension penalty. + * + * Total cost incurred by a reference gap = xx + (yy * gap length) + * + * Minimum score for valid alignment + * --------------------------------- + * + * MIN=xx,yy (defaults: MIN=-0.6,-0.6, or MIN=0.0,0.66 if --local is set) + * + * xx,yy = For a read of length N, the total score must be at least + * xx + (read length * yy) for the alignment to be valid. The + * total score is the sum of all negative penalties (from + * mismatches and gaps) and all positive bonuses. The minimum + * can be negative (and is by default in global alignment mode). + * + * N ceiling + * --------- + * + * NCEIL=xx,yy (default: NCEIL=0.0,0.15) + * + * xx,yy = For a read of length N, the number of alignment + * positions with an N in either the read or the + * reference cannot exceed + * ceiling = xx + (read length * yy). If the ceiling is + * exceeded, the alignment is considered invalid. + * + * Seeds + * ----- + * + * SEED=mm,len,ival (default: SEED=0,22) + * + * mm = Maximum number of mismatches allowed within a seed. + * Must be >= 0 and <= 2. Note that 2-mismatch mode is + * not fully sensitive; i.e. some 2-mismatch seed + * alignments may be missed. + * len = Length of seed. + * ival = Interval between seeds. If not specified, seed + * interval is determined by IVAL. + * + * Seed interval + * ------------- + * + * IVAL={L|S|C},xx,yy (default: IVAL=S,1.0,0.0) + * + * L = let interval between seeds be a linear function of the + * read length. xx and yy are the constant and linear + * coefficients respectively. In other words, the interval + * equals a * len + b, where len is the read length. + * Intervals less than 1 are rounded up to 1. + * S = let interval between seeds be a function of the sqaure + * root of the read length. xx and yy are the + * coefficients. In other words, the interval equals + * a * sqrt(len) + b, where len is the read length. + * Intervals less than 1 are rounded up to 1. + * C = Like S but uses cube root of length instead of square + * root. + * + * Example 1: + * + * SEED=1,10,5 and read sequence is TGCTATCGTACGATCGTAC: + * + * The following seeds are extracted from the forward + * representation of the read and aligned to the reference + * allowing up to 1 mismatch: + * + * Read: TGCTATCGTACGATCGTACA + * + * Seed 1+: TGCTATCGTA + * Seed 2+: TCGTACGATC + * Seed 3+: CGATCGTACA + * + * ...and the following are extracted from the reverse-complement + * representation of the read and align to the reference allowing + * up to 1 mismatch: + * + * Seed 1-: TACGATAGCA + * Seed 2-: GATCGTACGA + * Seed 3-: TGTACGATCG + * + * Example 2: + * + * SEED=1,20,20 and read sequence is TGCTATCGTACGATC. The seed + * length is 20 but the read is only 15 characters long. In this + * case, Bowtie2 automatically shrinks the seed length to be equal + * to the read length. + * + * Read: TGCTATCGTACGATC + * + * Seed 1+: TGCTATCGTACGATC + * Seed 1-: GATCGTACGATAGCA + * + * Example 3: + * + * SEED=1,10,10 and read sequence is TGCTATCGTACGATC. Only one seed + * fits on the read; a second seed would overhang the end of the read + * by 5 positions. In this case, Bowtie2 extracts one seed. + * + * Read: TGCTATCGTACGATC + * + * Seed 1+: TGCTATCGTA + * Seed 1-: TACGATAGCA + */ + static void parseString( + const std::string& s, + bool local, + bool noisyHpolymer, + bool ignoreQuals, + int& bonusMatchType, + int& bonusMatch, + int& penMmcType, + int& penMmcMax, + int& penMmcMin, + int& penScMax, + int& penScMin, + int& penNType, + int& penN, + int& penRdExConst, + int& penRfExConst, + int& penRdExLinear, + int& penRfExLinear, + SimpleFunc& costMin, + SimpleFunc& nCeil, + bool& nCatPair, + int& multiseedMms, + int& multiseedLen, + SimpleFunc& multiseedIval, + size_t& failStreak, + size_t& seedRounds, + SimpleFunc* penCanIntronLen = NULL, + SimpleFunc* penNoncanIntronLen = NULL); +}; + +#endif /*ndef ALIGNER_SEED_POLICY_H_*/ diff --git a/aligner_sw.cpp b/aligner_sw.cpp new file mode 100644 index 0000000..9341a35 --- /dev/null +++ b/aligner_sw.cpp @@ -0,0 +1,3214 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include +// -- BTL remove -- +//#include +//#include +// -- -- +#include "aligner_sw.h" +#include "aligner_result.h" +#include "search_globals.h" +#include "scoring.h" +#include "mask.h" + +/** + * Initialize with a new read. + */ +void SwAligner::initRead( + const BTDnaString& rdfw, // forward read sequence + const BTDnaString& rdrc, // revcomp read sequence + const BTString& qufw, // forward read qualities + const BTString& qurc, // reverse read qualities + size_t rdi, // offset of first read char to align + size_t rdf, // offset of last read char to align + const Scoring& sc) // scoring scheme +{ + assert_gt(rdf, rdi); + int nceil = sc.nCeil.f((double)rdfw.length()); + rdfw_ = &rdfw; // read sequence + rdrc_ = &rdrc; // read sequence + qufw_ = &qufw; // read qualities + qurc_ = &qurc; // read qualities + rdi_ = rdi; // offset of first read char to align + rdf_ = rdf; // offset of last read char to align + sc_ = ≻ // scoring scheme + nceil_ = nceil; // max # Ns allowed in ref portion of aln + readSse16_ = false; // true -> sse16 from now on for this read + initedRead_ = true; +#ifndef NO_SSE + sseU8fwBuilt_ = false; // built fw query profile, 8-bit score + sseU8rcBuilt_ = false; // built rc query profile, 8-bit score + sseI16fwBuilt_ = false; // built fw query profile, 16-bit score + sseI16rcBuilt_ = false; // built rc query profile, 16-bit score +#endif +} + +/** + * Initialize with a new alignment problem. + */ +void SwAligner::initRef( + bool fw, // whether to forward or revcomp read is aligning + TRefId refidx, // id of reference aligned against + const DPRect& rect, // DP rectangle + char *rf, // reference sequence + size_t rfi, // offset of first reference char to align to + size_t rff, // offset of last reference char to align to + TRefOff reflen, // length of reference sequence + const Scoring& sc, // scoring scheme + TAlScore minsc, // minimum score + bool enable8, // use 8-bit SSE if possible? + size_t cminlen, // minimum length for using checkpointing scheme + size_t cpow2, // interval b/t checkpointed diags; 1 << this + bool doTri, // triangular mini-fills? + bool extend) // is this a seed extension? +{ + size_t readGaps = sc.maxReadGaps(minsc, rdfw_->length()); + size_t refGaps = sc.maxRefGaps(minsc, rdfw_->length()); + assert_geq(readGaps, 0); + assert_geq(refGaps, 0); + assert_gt(rff, rfi); + rdgap_ = readGaps; // max # gaps in read + rfgap_ = refGaps; // max # gaps in reference + state_ = STATE_INITED; + fw_ = fw; // orientation + rd_ = fw ? rdfw_ : rdrc_; // read sequence + qu_ = fw ? qufw_ : qurc_; // quality sequence + refidx_ = refidx; // id of reference aligned against + rf_ = rf; // reference sequence + rfi_ = rfi; // offset of first reference char to align to + rff_ = rff; // offset of last reference char to align to + reflen_ = reflen; // length of entire reference sequence + rect_ = ▭ // DP rectangle + minsc_ = minsc; // minimum score + cural_ = 0; // idx of next alignment to give out + initedRef_ = true; // indicate we've initialized the ref portion + enable8_ = enable8; // use 8-bit SSE if possible? + extend_ = extend; // true iff this is a seed extension + cperMinlen_ = cminlen; // reads shorter than this won't use checkpointer + cperPerPow2_ = cpow2; // interval b/t checkpointed diags; 1 << this + cperEf_ = true; // whether to checkpoint H, E, and F + cperTri_ = doTri; // triangular mini-fills? + bter_.initRef( + fw_ ? rdfw_->buf() : // in: read sequence + rdrc_->buf(), + fw_ ? qufw_->buf() : // in: quality sequence + qurc_->buf(), + // daehwan + // rd_->length(), // in: read sequence length + rdf_ - rdi_, + rf_ + rfi_, // in: reference sequence + rff_ - rfi_, // in: in-rectangle reference sequence length + reflen, // in: total reference sequence length + refidx_, // in: reference id + rfi_, // in: reference offset + fw_, // in: orientation + rect_, // in: DP rectangle + &cper_, // in: checkpointer + *sc_, // in: scoring scheme + nceil_); // in: N ceiling +} + +/** + * Given a read, an alignment orientation, a range of characters in a referece + * sequence, and a bit-encoded version of the reference, set up and execute the + * corresponding dynamic programming problem. + * + * The caller has already narrowed down the relevant portion of the reference + * using, e.g., the location of a seed hit, or the range of possible fragment + * lengths if we're searching for the opposite mate in a pair. + */ +void SwAligner::initRef( + bool fw, // whether to forward or revcomp read is aligning + TRefId refidx, // reference aligned against + const DPRect& rect, // DP rectangle + const BitPairReference& refs, // Reference strings + TRefOff reflen, // length of reference sequence + const Scoring& sc, // scoring scheme + TAlScore minsc, // minimum score + bool enable8, // use 8-bit SSE if possible? + size_t cminlen, // minimum length for using checkpointing scheme + size_t cpow2, // interval b/t checkpointed diags; 1 << this + bool doTri, // triangular mini-fills? + bool extend, // true iff this is a seed extension + size_t upto, // count the number of Ns up to this offset + size_t& nsUpto) // output: the number of Ns up to 'upto' +{ + TRefOff rfi = rect.refl; + TRefOff rff = rect.refr + 1; + assert_gt(rff, rfi); + // Capture an extra reference character outside the rectangle so that we + // can check matches in the next column over to the right + rff++; + // rflen = full length of the reference substring to consider, including + // overhang off the boundaries of the reference sequence + const size_t rflen = (size_t)(rff - rfi); + // Figure the number of Ns we're going to add to either side + size_t leftNs = + (rfi >= 0 ? 0 : (size_t)std::abs(static_cast(rfi))); + leftNs = min(leftNs, rflen); + size_t rightNs = + (rff <= (TRefOff)reflen ? 0 : (size_t)std::abs(static_cast(rff - reflen))); + rightNs = min(rightNs, rflen); + // rflenInner = length of just the portion that doesn't overhang ref ends + assert_geq(rflen, leftNs + rightNs); + const size_t rflenInner = rflen - (leftNs + rightNs); +#ifndef NDEBUG + bool haveRfbuf2 = false; + EList rfbuf2(rflen); + // This is really slow, so only do it some of the time + if((rand() % 10) == 0) { + TRefOff rfii = rfi; + for(size_t i = 0; i < rflen; i++) { + if(rfii < 0 || (TRefOff)rfii >= reflen) { + rfbuf2.push_back(4); + } else { + rfbuf2.push_back(refs.getBase(refidx, (uint32_t)rfii)); + } + rfii++; + } + haveRfbuf2 = true; + } +#endif + // rfbuf_ = uint32_t list large enough to accommodate both the reference + // sequence and any Ns we might add to either side. + rfwbuf_.resize((rflen + 16) / 4); + int offset = refs.getStretch( + rfwbuf_.ptr(), // buffer to store words in + refidx, // which reference + (rfi < 0) ? 0 : (size_t)rfi, // starting offset (can't be < 0) + rflenInner // length to grab (exclude overhang) + ASSERT_ONLY(, tmp_destU32_));// for BitPairReference::getStretch() + assert_leq(offset, 16); + rf_ = (char*)rfwbuf_.ptr() + offset; + // Shift ref chars away from 0 so we can stick Ns at the beginning + if(leftNs > 0) { + // Slide everyone down + for(size_t i = rflenInner; i > 0; i--) { + rf_[i+leftNs-1] = rf_[i-1]; + } + // Add Ns + for(size_t i = 0; i < leftNs; i++) { + rf_[i] = 4; + } + } + if(rightNs > 0) { + // Add Ns to the end + for(size_t i = 0; i < rightNs; i++) { + rf_[i + leftNs + rflenInner] = 4; + } + } +#ifndef NDEBUG + // Sanity check reference characters + for(size_t i = 0; i < rflen; i++) { + assert(!haveRfbuf2 || rf_[i] == rfbuf2[i]); + assert_range(0, 4, (int)rf_[i]); + } +#endif + // Count Ns and convert reference characters into A/C/G/T masks. Ambiguous + // nucleotides (IUPAC codes) have more than one mask bit set. If a + // reference scanner was provided, use it to opportunistically resolve seed + // hits. + nsUpto = 0; + for(size_t i = 0; i < rflen; i++) { + // rf_[i] gets mask version of refence char, with N=16 + if(i < upto && rf_[i] > 3) { + nsUpto++; + } + rf_[i] = (1 << rf_[i]); + } + // Correct for having captured an extra reference character + rff--; + initRef( + fw, // whether to forward or revcomp read is aligning + refidx, // id of reference aligned against + rect, // DP rectangle + rf_, // reference sequence, wrapped up in BTString object + 0, // use the whole thing + (size_t)(rff - rfi), // ditto + reflen, // reference length + sc, // scoring scheme + minsc, // minimum score + enable8, // use 8-bit SSE if possible? + cminlen, // minimum length for using checkpointing scheme + cpow2, // interval b/t checkpointed diags; 1 << this + doTri, // triangular mini-fills? + extend); // true iff this is a seed extension +} + +/** + * Given a read, an alignment orientation, a range of characters in a referece + * sequence, and a bit-encoded version of the reference, set up and execute the + * corresponding ungapped alignment problem. There can only be one solution. + * + * The caller has already narrowed down the relevant portion of the reference + * using, e.g., the location of a seed hit, or the range of possible fragment + * lengths if we're searching for the opposite mate in a pair. + */ +int SwAligner::ungappedAlign( + const BTDnaString& rd, // read sequence (could be RC) + const BTString& qu, // qual sequence (could be rev) + const Coord& coord, // coordinate aligned to + const BitPairReference& refs, // Reference strings + size_t reflen, // length of reference sequence + const Scoring& sc, // scoring scheme + bool ohang, // allow overhang? + TAlScore minsc, // minimum score + SwResult& res) // put alignment result here +{ + const size_t len = rd.length(); + int nceil = sc.nCeil.f((double)len); + int ns = 0; + TRefOff rfi = coord.off(); + TRefOff rff = rfi + (TRefOff)len; + TRefId refidx = coord.ref(); + assert_gt(rff, rfi); + // Figure the number of Ns we're going to add to either side + size_t leftNs = 0; + if(rfi < 0) { + if(ohang) { + leftNs = (size_t)(-rfi); + } else { + return 0; + } + } + size_t rightNs = 0; + if(rff > (TRefOff)reflen) { + if(ohang) { + rightNs = (size_t)(rff - (TRefOff)reflen); + } else { + return 0; + } + } + if((leftNs + rightNs) > (size_t)nceil) { + return 0; + } + // rflenInner = length of just the portion that doesn't overhang ref ends + assert_geq(len, leftNs + rightNs); + const size_t rflenInner = len - (leftNs + rightNs); +#ifndef NDEBUG + bool haveRfbuf2 = false; + EList rfbuf2(len); + // This is really slow, so only do it some of the time + if((rand() % 10) == 0) { + TRefOff rfii = rfi; + for(size_t i = 0; i < len; i++) { + if(rfii < 0 || (size_t)rfii >= reflen) { + rfbuf2.push_back(4); + } else { + rfbuf2.push_back(refs.getBase(refidx, (uint32_t)rfii)); + } + rfii++; + } + haveRfbuf2 = true; + } +#endif + // rfbuf_ = uint32_t list large enough to accommodate both the reference + // sequence and any Ns we might add to either side. + rfwbuf_.resize((len + 16) / 4); + int offset = refs.getStretch( + rfwbuf_.ptr(), // buffer to store words in + refidx, // which reference + (rfi < 0) ? 0 : (size_t)rfi, // starting offset (can't be < 0) + rflenInner // length to grab (exclude overhang) + ASSERT_ONLY(, tmp_destU32_));// for BitPairReference::getStretch() + assert_leq(offset, 16); + rf_ = (char*)rfwbuf_.ptr() + offset; + // Shift ref chars away from 0 so we can stick Ns at the beginning + if(leftNs > 0) { + // Slide everyone down + for(size_t i = rflenInner; i > 0; i--) { + rf_[i+leftNs-1] = rf_[i-1]; + } + // Add Ns + for(size_t i = 0; i < leftNs; i++) { + rf_[i] = 4; + } + } + if(rightNs > 0) { + // Add Ns to the end + for(size_t i = 0; i < rightNs; i++) { + rf_[i + leftNs + rflenInner] = 4; + } + } +#ifndef NDEBUG + // Sanity check reference characters + for(size_t i = 0; i < len; i++) { + assert(!haveRfbuf2 || rf_[i] == rfbuf2[i]); + assert_range(0, 4, (int)rf_[i]); + } +#endif + // Count Ns and convert reference characters into A/C/G/T masks. Ambiguous + // nucleotides (IUPAC codes) have more than one mask bit set. If a + // reference scanner was provided, use it to opportunistically resolve seed + // hits. + TAlScore score = 0; + res.alres.reset(); + size_t rowi = 0; + size_t rowf = len-1; + if(sc.monotone) { + for(size_t i = 0; i < len; i++) { + // rf_[i] gets mask version of refence char, with N=16 + assert_geq(qu[i], 33); + score += sc.score(rd[i], (int)(1 << rf_[i]), qu[i] - 33, ns); + assert_leq(score, 0); + if(score < minsc || ns > nceil) { + // Fell below threshold + return 0; + } + } + // Got a result! Fill in the rest of the result object. + } else { + // Definitely ways to short-circuit this. E.g. if diff between cur + // score and minsc can't be met by matches. + TAlScore floorsc = 0; + TAlScore scoreMax = floorsc; + size_t lastfloor = 0; + rowi = MAX_SIZE_T; + size_t sols = 0; + for(size_t i = 0; i < len; i++) { + score += sc.score(rd[i], (int)(1 << rf_[i]), qu[i] - 33, ns); + if(score >= minsc && score >= scoreMax) { + scoreMax = score; + rowf = i; + if(rowi != lastfloor) { + rowi = lastfloor; + sols++; + } + } + if(score <= floorsc) { + score = floorsc; + lastfloor = i+1; + } + } + if(ns > nceil || scoreMax < minsc) { + // Too many Ns + return 0; + } + if(sols > 1) { + // >1 distinct solution in this diag; defer to DP aligner + return -1; + } + score = scoreMax; + // Got a result! Fill in the rest of the result object. + } + // Now fill in the edits + res.alres.setScore(AlnScore(score, ns, 0)); + assert_geq(rowf, rowi); + EList& ned = res.alres.ned(); + size_t refns = 0; + ASSERT_ONLY(BTDnaString refstr); + for(size_t i = rowi; i <= rowf; i++) { + ASSERT_ONLY(refstr.append((int)rf_[i])); + if(rf_[i] > 3 || rd[i] != rf_[i]) { + // Add edit + Edit e((int)i, + mask2dna[1 << (int)rf_[i]], + "ACGTN"[(int)rd[i]], + EDIT_TYPE_MM); + ned.push_back(e); + if(rf_[i] > 3) { + refns++; + } + } + } + assert(Edit::repOk(ned, rd)); + bool fw = coord.fw(); + assert_leq(rowf, len-1); + size_t trimEnd = (len-1) - rowf; + res.alres.setShape( + coord.ref(), // ref id + coord.off()+rowi, // 0-based ref offset + reflen, // length of reference sequence aligned to + fw, // aligned to Watson? + len, // read length + 0, // read ID + true, // pretrim soft? + 0, // pretrim 5' end + 0, // pretrim 3' end + true, // alignment trim soft? + fw ? rowi : trimEnd, // alignment trim 5' end + fw ? trimEnd : rowi); // alignment trim 3' end + res.alres.setRefNs(refns); + assert(res.repOk()); +#ifndef NDEBUG + BTDnaString editstr; + Edit::toRef(rd, ned, editstr, true, rowi, trimEnd); + if(refstr != editstr) { + cerr << "Decoded nucleotides and edits don't match reference:" << endl; + cerr << " score: " << res.alres.score().score() << endl; + cerr << " edits: "; + Edit::print(cerr, ned); + cerr << endl; + cerr << " decoded nucs: " << rd << endl; + cerr << " edited nucs: " << editstr << endl; + cerr << " reference nucs: " << refstr << endl; + assert(0); + } +#endif + if(!fw) { + // All edits are currently w/r/t upstream end; if read aligned to Crick + // strand, invert them to be w/r/t 5' end instead. + res.alres.invertEdits(); + } + return 1; +} + +/** + * Align read 'rd' to reference using read & reference information given + * last time init() was called. + */ +bool SwAligner::align( + RandomSource& rnd, // source of pseudo-randoms + TAlScore& best) // best alignment score observed in DP matrix +{ + assert(initedRef() && initedRead()); + assert_eq(STATE_INITED, state_); + state_ = STATE_ALIGNED; + // Reset solutions lists + btncand_.clear(); + btncanddone_.clear(); + btncanddoneSucc_ = btncanddoneFail_ = 0; + best = std::numeric_limits::min(); + sse8succ_ = sse16succ_ = false; + int flag = 0; + size_t rdlen = rdf_ - rdi_; + bool checkpointed = rdlen >= cperMinlen_; + bool gathered = false; // Did gathering happen along with alignment? + if(sc_->monotone) { + // End-to-end + if(enable8_ && !readSse16_ && minsc_ >= -254) { + // 8-bit end-to-end + if(checkpointed) { + best = alignGatherEE8(flag, false); + if(flag == 0) { + gathered = true; + } + } else { + best = alignNucleotidesEnd2EndSseU8(flag, false); +#ifndef NDEBUG + int flagtmp = 0; + TAlScore besttmp = alignGatherEE8(flagtmp, true); // debug + assert_eq(flagtmp, flag); + assert_eq(besttmp, best); +#endif + } + sse8succ_ = (flag == 0); +#ifndef NDEBUG + { + int flag2 = 0; + TAlScore best2 = alignNucleotidesEnd2EndSseI16(flag2, true); + { + int flagtmp = 0; + TAlScore besttmp = alignGatherEE16(flagtmp, true); + assert_eq(flagtmp, flag2); + assert(flag2 != 0 || best2 == besttmp); + } + assert(flag < 0 || best == best2); + sse16succ_ = (flag2 == 0); + } +#endif /*ndef NDEBUG*/ + } else { + // 16-bit end-to-end + if(checkpointed) { + best = alignGatherEE16(flag, false); + if(flag == 0) { + gathered = true; + } + } else { + best = alignNucleotidesEnd2EndSseI16(flag, false); +#ifndef NDEBUG + int flagtmp = 0; + TAlScore besttmp = alignGatherEE16(flagtmp, true); + assert_eq(flagtmp, flag); + assert_eq(besttmp, best); +#endif + } + sse16succ_ = (flag == 0); + } + } else { + // Local + flag = -2; + if(enable8_ && !readSse16_) { + // 8-bit local + if(checkpointed) { + best = alignGatherLoc8(flag, false); + if(flag == 0) { + gathered = true; + } + } else { + best = alignNucleotidesLocalSseU8(flag, false); +#ifndef NDEBUG + int flagtmp = 0; + TAlScore besttmp = alignGatherLoc8(flagtmp, true); + assert_eq(flag, flagtmp); + assert_eq(best, besttmp); +#endif + } + } + if(flag == -2) { + // 16-bit local + flag = 0; + if(checkpointed) { + best = alignNucleotidesLocalSseI16(flag, false); + best = alignGatherLoc16(flag, false); + if(flag == 0) { + gathered = true; + } + } else { + best = alignNucleotidesLocalSseI16(flag, false); +#ifndef NDEBUG + int flagtmp = 0; + TAlScore besttmp = alignGatherLoc16(flagtmp, true); + assert_eq(flag, flagtmp); + assert_eq(best, besttmp); +#endif + } + sse16succ_ = (flag == 0); + } else { + sse8succ_ = (flag == 0); +#ifndef NDEBUG + int flag2 = 0; + TAlScore best2 = alignNucleotidesLocalSseI16(flag2, true); + { + int flagtmp = 0; + TAlScore besttmp = alignGatherLoc16(flagtmp, true); + assert_eq(flag2, flagtmp); + assert(flag2 != 0 || best2 == besttmp); + } + assert(flag2 < 0 || best == best2); + sse16succ_ = (flag2 == 0); +#endif /*ndef NDEBUG*/ + } + } +#ifndef NDEBUG + if(!checkpointed && (rand() & 15) == 0 && sse8succ_ && sse16succ_) { + SSEData& d8 = fw_ ? sseU8fw_ : sseU8rc_; + SSEData& d16 = fw_ ? sseI16fw_ : sseI16rc_; + assert_eq(d8.mat_.nrow(), d16.mat_.nrow()); + assert_eq(d8.mat_.ncol(), d16.mat_.ncol()); + for(size_t i = 0; i < d8.mat_.nrow(); i++) { + for(size_t j = 0; j < colstop_; j++) { + int h8 = d8.mat_.helt(i, j); + int h16 = d16.mat_.helt(i, j); + int e8 = d8.mat_.eelt(i, j); + int e16 = d16.mat_.eelt(i, j); + int f8 = d8.mat_.felt(i, j); + int f16 = d16.mat_.felt(i, j); + TAlScore h8s = + (sc_->monotone ? (h8 - 0xff ) : h8); + TAlScore h16s = + (sc_->monotone ? (h16 - 0x7fff) : (h16 + 0x8000)); + TAlScore e8s = + (sc_->monotone ? (e8 - 0xff ) : e8); + TAlScore e16s = + (sc_->monotone ? (e16 - 0x7fff) : (e16 + 0x8000)); + TAlScore f8s = + (sc_->monotone ? (f8 - 0xff ) : f8); + TAlScore f16s = + (sc_->monotone ? (f16 - 0x7fff) : (f16 + 0x8000)); + if(h8s < minsc_) { + h8s = minsc_ - 1; + } + if(h16s < minsc_) { + h16s = minsc_ - 1; + } + if(e8s < minsc_) { + e8s = minsc_ - 1; + } + if(e16s < minsc_) { + e16s = minsc_ - 1; + } + if(f8s < minsc_) { + f8s = minsc_ - 1; + } + if(f16s < minsc_) { + f16s = minsc_ - 1; + } + if((h8 != 0 || (int16_t)h16 != (int16_t)0x8000) && h8 > 0) { + assert_eq(h8s, h16s); + } + if((e8 != 0 || (int16_t)e16 != (int16_t)0x8000) && e8 > 0) { + assert_eq(e8s, e16s); + } + if((f8 != 0 || (int16_t)f16 != (int16_t)0x8000) && f8 > 0) { + assert_eq(f8s, f16s); + } + } + } + } +#endif + assert(repOk()); + cural_ = 0; + if(best == MIN_I64 || best < minsc_) { + return false; + } + if(!gathered) { + // Look for solutions using SSE matrix + assert(sse8succ_ || sse16succ_); + if(sc_->monotone) { + if(sse8succ_) { + gatherCellsNucleotidesEnd2EndSseU8(best); +#ifndef NDEBUG + if(sse16succ_) { + cand_tmp_ = btncand_; + gatherCellsNucleotidesEnd2EndSseI16(best); + cand_tmp_.sort(); + btncand_.sort(); + assert(cand_tmp_ == btncand_); + } +#endif /*ndef NDEBUG*/ + } else { + gatherCellsNucleotidesEnd2EndSseI16(best); + } + } else { + if(sse8succ_) { + gatherCellsNucleotidesLocalSseU8(best); +#ifndef NDEBUG + if(sse16succ_) { + cand_tmp_ = btncand_; + gatherCellsNucleotidesLocalSseI16(best); + cand_tmp_.sort(); + btncand_.sort(); + assert(cand_tmp_ == btncand_); + } +#endif /*ndef NDEBUG*/ + } else { + gatherCellsNucleotidesLocalSseI16(best); + } + } + } + if(!btncand_.empty()) { + btncand_.sort(); + } + return !btncand_.empty(); +} + +/** + * Populate the given SwResult with information about the "next best" + * alignment if there is one. If there isn't one, false is returned. Note + * that false might be returned even though a call to done() would have + * returned false. + */ +bool SwAligner::nextAlignment( + SwResult& res, + TAlScore minsc, + RandomSource& rnd) +{ + assert(initedRead() && initedRef()); + assert_eq(STATE_ALIGNED, state_); + assert(repOk()); + if(done()) { + res.reset(); + return false; + } + assert(!done()); + size_t off = 0, nbts = 0; + assert_lt(cural_, btncand_.size()); + assert(res.repOk()); + // For each candidate cell that we should try to backtrack from... + const size_t candsz = btncand_.size(); + size_t SQ = dpRows() >> 4; + if(SQ == 0) SQ = 1; + size_t rdlen = rdf_ - rdi_; + bool checkpointed = rdlen >= cperMinlen_; + while(cural_ < candsz) { + // Doing 'continue' anywhere in here simply causes us to move on to the + // next candidate + if(btncand_[cural_].score < minsc) { + btncand_[cural_].fate = BT_CAND_FATE_FILT_SCORE; + nbtfiltsc_++; cural_++; continue; + } + nbts = 0; + assert(sse8succ_ || sse16succ_); + size_t row = btncand_[cural_].row; + size_t col = btncand_[cural_].col; + assert_lt(row, dpRows()); + assert_lt((TRefOff)col, rff_-rfi_); + if(sse16succ_) { + SSEData& d = fw_ ? sseI16fw_ : sseI16rc_; + if(!checkpointed && d.mat_.reset_[row] && d.mat_.reportedThrough(row, col)) { + // Skipping this candidate because a previous candidate already + // moved through this cell + btncand_[cural_].fate = BT_CAND_FATE_FILT_START; + //cerr << " skipped becuase starting cell was covered" << endl; + nbtfiltst_++; cural_++; continue; + } + } else if(sse8succ_) { + SSEData& d = fw_ ? sseU8fw_ : sseU8rc_; + if(!checkpointed && d.mat_.reset_[row] && d.mat_.reportedThrough(row, col)) { + // Skipping this candidate because a previous candidate already + // moved through this cell + btncand_[cural_].fate = BT_CAND_FATE_FILT_START; + //cerr << " skipped becuase starting cell was covered" << endl; + nbtfiltst_++; cural_++; continue; + } + } + if(sc_->monotone) { + bool ret = false; + if(sse8succ_) { + uint32_t reseed = rnd.nextU32() + 1; + rnd.init(reseed); + res.reset(); + if(checkpointed) { + size_t maxiter = MAX_SIZE_T; + size_t niter = 0; + ret = backtrace( + btncand_[cural_].score, // in: expected score + true, // in: use mini-fill? + true, // in: use checkpoints? + res, // out: store results (edits and scores) here + off, // out: store diagonal projection of origin + row, // start in this rectangle row + col, // start in this rectangle column + maxiter, // max # extensions to try + niter, // # extensions tried + rnd); // random gen, to choose among equal paths + } else { + ret = backtraceNucleotidesEnd2EndSseU8( + btncand_[cural_].score, // in: expected score + res, // out: store results (edits and scores) here + off, // out: store diagonal projection of origin + nbts, // out: # backtracks + row, // start in this rectangle row + col, // start in this rectangle column + rnd); // random gen, to choose among equal paths + } +#ifndef NDEBUG + // if(...) statement here should check not whether the primary + // alignment was checkpointed, but whether a checkpointed + // alignment was done at all. + if(!checkpointed) { + SwResult res2; + res2.alres = res.alres; res2.alres.reset(); + size_t maxiter2 = MAX_SIZE_T; + size_t niter2 = 0; + bool ret2 = backtrace( + btncand_[cural_].score, // in: expected score + true, // in: use mini-fill? + true, // in: use checkpoints? + res2, // out: store results (edits and scores) here + off, // out: store diagonal projection of origin + row, // start in this rectangle row + col, // start in this rectangle column + maxiter2, // max # extensions to try + niter2, // # extensions tried + rnd); // random gen, to choose among equal paths + // After the first alignment, there's no guarantee we'll + // get the same answer from both backtrackers because of + // differences in how they handle marking cells as + // reported-through. + assert(cural_ > 0 || !ret || ret == ret2); + assert(cural_ > 0 || !ret || res.alres == res2.alres); + } + if(sse16succ_ && !checkpointed) { + SwResult res2; + res2.alres = res.alres; res2.alres.reset(); + size_t off2, nbts2 = 0; + rnd.init(reseed); + bool ret2 = backtraceNucleotidesEnd2EndSseI16( + btncand_[cural_].score, // in: expected score + res2, // out: store results (edits and scores) here + off2, // out: store diagonal projection of origin + nbts2, // out: # backtracks + row, // start in this rectangle row + col, // start in this rectangle column + rnd); // random gen, to choose among equal paths + assert_eq(ret, ret2); + assert_eq(nbts, nbts2); + assert(!ret || res2.alres.score() == res.alres.score()); +#if 0 + if(!checkpointed && (rand() & 15) == 0) { + // Check that same cells are reported through + SSEData& d8 = fw_ ? sseU8fw_ : sseU8rc_; + SSEData& d16 = fw_ ? sseI16fw_ : sseI16rc_; + for(size_t i = d8.mat_.nrow(); i > 0; i--) { + for(size_t j = 0; j < d8.mat_.ncol(); j++) { + assert_eq(d8.mat_.reportedThrough(i-1, j), + d16.mat_.reportedThrough(i-1, j)); + } + } + } +#endif + } +#endif + rnd.init(reseed+1); // debug/release pseudo-randoms in lock step + } else if(sse16succ_) { + uint32_t reseed = rnd.nextU32() + 1; + res.reset(); + if(checkpointed) { + size_t maxiter = MAX_SIZE_T; + size_t niter = 0; + ret = backtrace( + btncand_[cural_].score, // in: expected score + true, // in: use mini-fill? + true, // in: use checkpoints? + res, // out: store results (edits and scores) here + off, // out: store diagonal projection of origin + row, // start in this rectangle row + col, // start in this rectangle column + maxiter, // max # extensions to try + niter, // # extensions tried + rnd); // random gen, to choose among equal paths + } else { + ret = backtraceNucleotidesEnd2EndSseI16( + btncand_[cural_].score, // in: expected score + res, // out: store results (edits and scores) here + off, // out: store diagonal projection of origin + nbts, // out: # backtracks + row, // start in this rectangle row + col, // start in this rectangle column + rnd); // random gen, to choose among equal paths + } +#ifndef NDEBUG + // if(...) statement here should check not whether the primary + // alignment was checkpointed, but whether a checkpointed + // alignment was done at all. + if(!checkpointed) { + SwResult res2; + size_t maxiter2 = MAX_SIZE_T; + size_t niter2 = 0; + bool ret2 = backtrace( + btncand_[cural_].score, // in: expected score + true, // in: use mini-fill? + true, // in: use checkpoints? + res2, // out: store results (edits and scores) here + off, // out: store diagonal projection of origin + row, // start in this rectangle row + col, // start in this rectangle column + maxiter2, // max # extensions to try + niter2, // # extensions tried + rnd); // random gen, to choose among equal paths + // After the first alignment, there's no guarantee we'll + // get the same answer from both backtrackers because of + // differences in how they handle marking cells as + // reported-through. + assert(cural_ > 0 || !ret || ret == ret2); + assert(cural_ > 0 || !ret || res.alres == res2.alres); + } +#endif + rnd.init(reseed); // debug/release pseudo-randoms in lock step + } + if(ret) { + btncand_[cural_].fate = BT_CAND_FATE_SUCCEEDED; + break; + } else { + btncand_[cural_].fate = BT_CAND_FATE_FAILED; + } + } else { + // Local alignment + // Check if this solution is "dominated" by a prior one. + // Domination is a heuristic designed to eliminate the vast + // majority of valid-but-redundant candidates lying in the + // "penumbra" of a high-scoring alignment. + bool dom = false; + { + size_t donesz = btncanddone_.size(); + const size_t col = btncand_[cural_].col; + const size_t row = btncand_[cural_].row; + for(size_t i = 0; i < donesz; i++) { + assert_gt(btncanddone_[i].fate, 0); + size_t colhi = col, rowhi = row; + size_t rowlo = btncanddone_[i].row; + size_t collo = btncanddone_[i].col; + if(colhi < collo) swap(colhi, collo); + if(rowhi < rowlo) swap(rowhi, rowlo); + if(colhi - collo <= SQ && rowhi - rowlo <= SQ) { + // Skipping this candidate because it's "dominated" by + // a previous candidate + dom = true; + break; + } + } + } + if(dom) { + btncand_[cural_].fate = BT_CAND_FATE_FILT_DOMINATED; + nbtfiltdo_++; + cural_++; + continue; + } + bool ret = false; + if(sse8succ_) { + uint32_t reseed = rnd.nextU32() + 1; + res.reset(); + rnd.init(reseed); + if(checkpointed) { + size_t maxiter = MAX_SIZE_T; + size_t niter = 0; + ret = backtrace( + btncand_[cural_].score, // in: expected score + true, // in: use mini-fill? + true, // in: use checkpoints? + res, // out: store results (edits and scores) here + off, // out: store diagonal projection of origin + row, // start in this rectangle row + col, // start in this rectangle column + maxiter, // max # extensions to try + niter, // # extensions tried + rnd); // random gen, to choose among equal paths + } else { + ret = backtraceNucleotidesLocalSseU8( + btncand_[cural_].score, // in: expected score + res, // out: store results (edits and scores) here + off, // out: store diagonal projection of origin + nbts, // out: # backtracks + row, // start in this rectangle row + col, // start in this rectangle column + rnd); // random gen, to choose among equal paths + } +#ifndef NDEBUG + // if(...) statement here should check not whether the primary + // alignment was checkpointed, but whether a checkpointed + // alignment was done at all. + if(!checkpointed) { + SwResult res2; + size_t maxiter2 = MAX_SIZE_T; + size_t niter2 = 0; + bool ret2 = backtrace( + btncand_[cural_].score, // in: expected score + true, // in: use mini-fill? + true, // in: use checkpoints? + res2, // out: store results (edits and scores) here + off, // out: store diagonal projection of origin + row, // start in this rectangle row + col, // start in this rectangle column + maxiter2, // max # extensions to try + niter2, // # extensions tried + rnd); // random gen, to choose among equal paths + // After the first alignment, there's no guarantee we'll + // get the same answer from both backtrackers because of + // differences in how they handle marking cells as + // reported-through. + assert(cural_ > 0 || !ret || ret == ret2); + assert(cural_ > 0 || !ret || res.alres == res2.alres); + } + if(!checkpointed && sse16succ_) { + SwResult res2; + size_t off2, nbts2 = 0; + rnd.init(reseed); // same b/t backtrace calls + bool ret2 = backtraceNucleotidesLocalSseI16( + btncand_[cural_].score, // in: expected score + res2, // out: store results (edits and scores) here + off2, // out: store diagonal projection of origin + nbts2, // out: # backtracks + row, // start in this rectangle row + col, // start in this rectangle column + rnd); // random gen, to choose among equal paths + assert_eq(ret, ret2); + assert_eq(nbts, nbts2); + assert(!ret || res2.alres.score() == res.alres.score()); +#if 0 + if(!checkpointed && (rand() & 15) == 0) { + // Check that same cells are reported through + SSEData& d8 = fw_ ? sseU8fw_ : sseU8rc_; + SSEData& d16 = fw_ ? sseI16fw_ : sseI16rc_; + for(size_t i = d8.mat_.nrow(); i > 0; i--) { + for(size_t j = 0; j < d8.mat_.ncol(); j++) { + assert_eq(d8.mat_.reportedThrough(i-1, j), + d16.mat_.reportedThrough(i-1, j)); + } + } + } +#endif + } +#endif + rnd.init(reseed+1); // debug/release pseudo-randoms in lock step + } else if(sse16succ_) { + uint32_t reseed = rnd.nextU32() + 1; + res.reset(); + if(checkpointed) { + size_t maxiter = MAX_SIZE_T; + size_t niter = 0; + ret = backtrace( + btncand_[cural_].score, // in: expected score + true, // in: use mini-fill? + true, // in: use checkpoints? + res, // out: store results (edits and scores) here + off, // out: store diagonal projection of origin + row, // start in this rectangle row + col, // start in this rectangle column + maxiter, // max # extensions to try + niter, // # extensions tried + rnd); // random gen, to choose among equal paths + } else { + ret = backtraceNucleotidesLocalSseI16( + btncand_[cural_].score, // in: expected score + res, // out: store results (edits and scores) here + off, // out: store diagonal projection of origin + nbts, // out: # backtracks + row, // start in this rectangle row + col, // start in this rectangle column + rnd); // random gen, to choose among equal paths + } +#ifndef NDEBUG + // if(...) statement here should check not whether the primary + // alignment was checkpointed, but whether a checkpointed + // alignment was done at all. + if(!checkpointed) { + SwResult res2; + size_t maxiter2 = MAX_SIZE_T; + size_t niter2 = 0; + bool ret2 = backtrace( + btncand_[cural_].score, // in: expected score + true, // in: use mini-fill? + true, // in: use checkpoints? + res2, // out: store results (edits and scores) here + off, // out: store diagonal projection of origin + row, // start in this rectangle row + col, // start in this rectangle column + maxiter2, // max # extensions to try + niter2, // # extensions tried + rnd); // random gen, to choose among equal paths + // After the first alignment, there's no guarantee we'll + // get the same answer from both backtrackers because of + // differences in how they handle marking cells as + // reported-through. + assert(cural_ > 0 || !ret || ret == ret2); + assert(cural_ > 0 || !ret || res.alres == res2.alres); + } +#endif + rnd.init(reseed); // same b/t backtrace calls + } + if(ret) { + btncand_[cural_].fate = BT_CAND_FATE_SUCCEEDED; + btncanddone_.push_back(btncand_[cural_]); + btncanddoneSucc_++; + assert(res.repOk()); + break; + } else { + btncand_[cural_].fate = BT_CAND_FATE_FAILED; + btncanddone_.push_back(btncand_[cural_]); + btncanddoneFail_++; + } + } + cural_++; + } // while(cural_ < btncand_.size()) + if(cural_ == btncand_.size()) { + assert(res.repOk()); + return false; + } + assert(!res.alres.empty()); + assert(res.repOk()); + if(!fw_) { + // All edits are currently w/r/t upstream end; if read aligned + // to Crick strand, we need to invert them so that they're + // w/r/t the read's 5' end instead. + res.alres.invertEdits(); + } + cural_++; + assert(res.repOk()); + return true; +} + +#ifdef MAIN_ALIGNER_SW + +#include +#include +#include +#include "scoring.h" +#include "aligner_seed_policy.h" + +int gGapBarrier; +int gSnpPhred; +static int bonusMatchType; // how to reward matches +static int bonusMatch; // constant if match bonus is a constant +static int penMmcType; // how to penalize mismatches +static int penMmc; // constant if mm pelanty is a constant +static int penNType; // how to penalize Ns in the read +static int penN; // constant if N pelanty is a constant +static bool nPairCat; // true -> concatenate mates before N filter +static int penRdExConst; // constant coeff for cost of gap in read +static int penRfExConst; // constant coeff for cost of gap in ref +static int penRdExLinear; // linear coeff for cost of gap in read +static int penRfExLinear; // linear coeff for cost of gap in ref +static float costMinConst; // constant coeff for min score w/r/t read len +static float costMinLinear; // linear coeff for min score w/r/t read len +static float costFloorConst; // constant coeff for score floor w/r/t read len +static float costFloorLinear;// linear coeff for score floor w/r/t read len +static float nCeilConst; // constant coeff for N ceiling w/r/t read len +static float nCeilLinear; // linear coeff for N ceiling w/r/t read len +static bool nCatPair; // concat mates before applying N filter? +static int multiseedMms; // mismatches permitted in a multiseed seed +static int multiseedLen; // length of multiseed seeds +static int multiseedIvalType; +static float multiseedIvalA; +static float multiseedIvalB; +static float posmin; +static float posfrac; +static float rowmult; + +enum { + ARG_TESTS = 256 +}; + +static const char *short_opts = "s:m:r:d:i:"; +static struct option long_opts[] = { + {(char*)"snppen", required_argument, 0, 's'}, + {(char*)"misspen", required_argument, 0, 'm'}, + {(char*)"seed", required_argument, 0, 'r'}, + {(char*)"align-policy", no_argument, 0, 'A'}, + {(char*)"test", no_argument, 0, ARG_TESTS}, +}; + +static void printUsage(ostream& os) { + os << "Usage: aligner_sw [options]*" << endl; + os << "Options:" << endl; + os << " -s/--snppen penalty incurred by SNP; used for decoding" + << endl; + os << " -m/--misspen quality to use for read chars" << endl; + os << " -r/-seed seed for pseudo-random generator" << endl; +} + +/** + * Parse a T from a string 's' + */ +template +T parse(const char *s) { + T tmp; + stringstream ss(s); + ss >> tmp; + return tmp; +} + +static EList stbuf, enbuf; +static BTDnaString btread; +static BTString btqual; +static BTString btref; +static BTString btref2; + +static BTDnaString readrc; +static BTString qualrc; + +/** + * Helper function for running a case consisting of a read (sequence + * and quality), a reference string, and an offset that anchors the 0th + * character of the read to a reference position. + */ +static void doTestCase( + SwAligner& al, + const BTDnaString& read, + const BTString& qual, + const BTString& refin, + TRefOff off, + EList *en, + const Scoring& sc, + TAlScore minsc, + SwResult& res, + bool nsInclusive, + bool filterns, + uint32_t seed) +{ + RandomSource rnd(seed); + btref2 = refin; + assert_eq(read.length(), qual.length()); + size_t nrow = read.length(); + TRefOff rfi, rff; + // Calculate the largest possible number of read and reference gaps given + // 'minsc' and 'pens' + size_t maxgaps; + size_t padi, padf; + { + int readGaps = sc.maxReadGaps(minsc, read.length()); + int refGaps = sc.maxRefGaps(minsc, read.length()); + assert_geq(readGaps, 0); + assert_geq(refGaps, 0); + int maxGaps = max(readGaps, refGaps); + padi = 2 * maxGaps; + padf = maxGaps; + maxgaps = (size_t)maxGaps; + } + size_t nceil = (size_t)sc.nCeil.f((double)read.length()); + size_t width = 1 + padi + padf; + rfi = off; + off = 0; + // Pad the beginning of the reference with Ns if necessary + if(rfi < padi) { + size_t beginpad = (size_t)(padi - rfi); + for(size_t i = 0; i < beginpad; i++) { + btref2.insert('N', 0); + off--; + } + rfi = 0; + } else { + rfi -= padi; + } + assert_geq(rfi, 0); + // Pad the end of the reference with Ns if necessary + while(rfi + nrow + padi + padf > btref2.length()) { + btref2.append('N'); + } + rff = rfi + nrow + padi + padf; + // Convert reference string to masks + for(size_t i = 0; i < btref2.length(); i++) { + if(toupper(btref2[i]) == 'N' && !nsInclusive) { + btref2.set(16, i); + } else { + int num = 0; + int alts[] = {4, 4, 4, 4}; + decodeNuc(toupper(btref2[i]), num, alts); + assert_leq(num, 4); + assert_gt(num, 0); + btref2.set(0, i); + for(int j = 0; j < num; j++) { + btref2.set(btref2[i] | (1 << alts[j]), i); + } + } + } + bool fw = true; + uint32_t refidx = 0; + size_t solwidth = width; + if(maxgaps >= solwidth) { + solwidth = 0; + } else { + solwidth -= maxgaps; + } + if(en == NULL) { + enbuf.resize(solwidth); + enbuf.fill(true); + en = &enbuf; + } + assert_geq(rfi, 0); + assert_gt(rff, rfi); + readrc = read; + qualrc = qual; + al.initRead( + read, // read sequence + readrc, + qual, // read qualities + qualrc, + 0, // offset of first character within 'read' to consider + read.length(), // offset of last char (exclusive) in 'read' to consider + floorsc); // local-alignment score floor + al.initRef( + fw, // 'read' is forward version of read? + refidx, // id of reference aligned to + off, // offset of upstream ref char aligned against + btref2.wbuf(), // reference sequence (masks) + rfi, // offset of first char in 'ref' to consider + rff, // offset of last char (exclusive) in 'ref' to consider + width, // # bands to do (width of parallelogram) + solwidth, // # rightmost cols where solns can end + sc, // scoring scheme + minsc, // minimum score for valid alignment + maxgaps, // max of max # read gaps, ref gaps + 0, // amount to truncate on left-hand side + en); // mask indicating which columns we can end in + if(filterns) { + al.filter((int)nceil); + } + al.align(rnd); +} + +/** + * Another interface for running a case. + */ +static void doTestCase2( + SwAligner& al, + const char *read, + const char *qual, + const char *refin, + TRefOff off, + const Scoring& sc, + float costMinConst, + float costMinLinear, + SwResult& res, + bool nsInclusive = false, + bool filterns = false, + uint32_t seed = 0) +{ + btread.install(read, true); + TAlScore minsc = (TAlScore)(Scoring::linearFunc( + btread.length(), + costMinConst, + costMinLinear)); + TAlScore floorsc = (TAlScore)(Scoring::linearFunc( + btread.length(), + costFloorConst, + costFloorLinear)); + btqual.install(qual); + btref.install(refin); + doTestCase( + al, + btread, + btqual, + btref, + off, + NULL, + sc, + minsc, + floorsc, + res, + nsInclusive, + filterns, + seed + ); +} + +/** + * Another interface for running a case. + */ +static void doTestCase3( + SwAligner& al, + const char *read, + const char *qual, + const char *refin, + TRefOff off, + Scoring& sc, + float costMinConst, + float costMinLinear, + float nCeilConst, + float nCeilLinear, + SwResult& res, + bool nsInclusive = false, + bool filterns = false, + uint32_t seed = 0) +{ + btread.install(read, true); + // Calculate the penalty ceiling for the read + TAlScore minsc = (TAlScore)(Scoring::linearFunc( + btread.length(), + costMinConst, + costMinLinear)); + TAlScore floorsc = (TAlScore)(Scoring::linearFunc( + btread.length(), + costFloorConst, + costFloorLinear)); + btqual.install(qual); + btref.install(refin); + sc.nCeil.setType(SIMPLE_FUNC_LINEAR); + sc.nCeil.setConst(costMinConst); + sc.nCeil.setCoeff(costMinLinear); + doTestCase( + al, + btread, + btqual, + btref, + off, + NULL, + sc, + minsc, + floorsc, + res, + nsInclusive, + filterns, + seed + ); +} + +/** + * Another interface for running a case. Like doTestCase3 but caller specifies + * st_ and en_ lists. + */ +static void doTestCase4( + SwAligner& al, + const char *read, + const char *qual, + const char *refin, + TRefOff off, + EList& en, + Scoring& sc, + float costMinConst, + float costMinLinear, + float nCeilConst, + float nCeilLinear, + SwResult& res, + bool nsInclusive = false, + bool filterns = false, + uint32_t seed = 0) +{ + btread.install(read, true); + // Calculate the penalty ceiling for the read + TAlScore minsc = (TAlScore)(Scoring::linearFunc( + btread.length(), + costMinConst, + costMinLinear)); + TAlScore floorsc = (TAlScore)(Scoring::linearFunc( + btread.length(), + costFloorConst, + costFloorLinear)); + btqual.install(qual); + btref.install(refin); + sc.nCeil.setType(SIMPLE_FUNC_LINEAR); + sc.nCeil.setConst(costMinConst); + sc.nCeil.setCoeff(costMinLinear); + doTestCase( + al, + btread, + btqual, + btref, + off, + &en, + sc, + minsc, + floorsc, + res, + nsInclusive, + filterns, + seed + ); +} + +/** + * Do a set of unit tests. + */ +static void doTests() { + bonusMatchType = DEFAULT_MATCH_BONUS_TYPE; + bonusMatch = DEFAULT_MATCH_BONUS; + penMmcType = DEFAULT_MM_PENALTY_TYPE; + penMmc = DEFAULT_MM_PENALTY; + penSnp = DEFAULT_SNP_PENALTY; + penNType = DEFAULT_N_PENALTY_TYPE; + penN = DEFAULT_N_PENALTY; + nPairCat = DEFAULT_N_CAT_PAIR; + penRdExConst = DEFAULT_READ_GAP_CONST; + penRfExConst = DEFAULT_REF_GAP_CONST; + penRdExLinear = DEFAULT_READ_GAP_LINEAR; + penRfExLinear = DEFAULT_REF_GAP_LINEAR; + costMinConst = DEFAULT_MIN_CONST; + costMinLinear = DEFAULT_MIN_LINEAR; + costFloorConst = DEFAULT_FLOOR_CONST; + costFloorLinear = DEFAULT_FLOOR_LINEAR; + nCeilConst = 1.0f; // constant factor in N ceil w/r/t read len + nCeilLinear = 0.1f; // coeff of linear term in N ceil w/r/t read len + multiseedMms = DEFAULT_SEEDMMS; + multiseedLen = DEFAULT_SEEDLEN; + // Set up penalities + Scoring sc( + bonusMatch, + penMmcType, // how to penalize mismatches + 30, // constant if mm pelanty is a constant + 30, // penalty for decoded SNP + costMinConst, // constant factor in N ceiling w/r/t read length + costMinLinear, // coeff of linear term in N ceiling w/r/t read length + costFloorConst, // constant factor in N ceiling w/r/t read length + costFloorLinear, // coeff of linear term in N ceiling w/r/t read length + nCeilConst, // constant factor in N ceiling w/r/t read length + nCeilLinear, // coeff of linear term in N ceiling w/r/t read length + penNType, // how to penalize Ns in the read + penN, // constant if N pelanty is a constant + nPairCat, // true -> concatenate mates before N filtering + 25, // constant coeff for cost of gap in read + 25, // constant coeff for cost of gap in ref + 15, // linear coeff for cost of gap in read + 15, // linear coeff for cost of gap in ref + 1, // # rows at top/bot can only be entered diagonally + -1, // min row idx to backtrace from; -1 = no limit + false // sort results first by row then by score? + ); + // Set up alternative penalities + Scoring sc2( + bonusMatch, + COST_MODEL_QUAL, // how to penalize mismatches + 30, // constant if mm pelanty is a constant + 30, // penalty for decoded SNP + costMinConst, // constant factor in N ceiling w/r/t read length + costMinLinear, // coeff of linear term in N ceiling w/r/t read length + costFloorConst, // constant factor in N ceiling w/r/t read length + costFloorLinear, // coeff of linear term in N ceiling w/r/t read length + 1.0f, // constant factor in N ceiling w/r/t read length + 1.0f, // coeff of linear term in N ceiling w/r/t read length + penNType, // how to penalize Ns in the read + penN, // constant if N pelanty is a constant + nPairCat, // true -> concatenate mates before N filtering + 25, // constant coeff for cost of gap in read + 25, // constant coeff for cost of gap in ref + 15, // linear coeff for cost of gap in read + 15, // linear coeff for cost of gap in ref + 1, // # rows at top/bot can only be entered diagonally + -1, // min row idx to backtrace from; -1 = no limit + false // sort results first by row then by score? + ); + SwResult res; + + // + // Basic nucleotide-space tests + // + cerr << "Running tests..." << endl; + int tests = 1; + bool nIncl = false; + bool nfilter = false; + + SwAligner al; + RandomSource rnd(73); + for(int i = 0; i < 3; i++) { + cerr << " Test " << tests++ << " (nuc space, offset " + << (i*4) << ", exact)..."; + sc.rdGapConst = 40; + sc.rfGapConst = 40; + sc.rdGapLinear = 15; + sc.rfGapLinear = 15; + // A C G T A C G T + // H E F H E F H E F H E F H E F H E F H E F H E F + // A 0 lo lo -30 lo lo -30 lo lo -30 lo lo 0 lo lo -30 lo lo-30 lo lo-30 lo lo + // C -30 lo -55 0 -85 -85 -55 -55 -85 + // G -30 lo -70 -55 -85 -55 0 -100-100 + // T -30 lo -85 -60 -85 -70 -55-100 -55 + // A 0 lo -85 -55 -55 -85 -70 -70 -70 + // C -30 lo -55 0 -85-100 -55 -55 -85 + // G -30 lo -70 -55 -85 -55 0 -100-100 + // T -30 lo -85 -60 -85 -70 -55-100 -55 + doTestCase2( + al, + "ACGTACGT", // read + "IIIIIIII", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc, // scoring scheme + -30.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 0); + assert_eq(res.alres.score().score(), 0); + assert_eq(res.alres.score().ns(), 0); + assert(res.alres.ned().empty()); + assert(res.alres.aed().empty()); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + assert(al.done()); + res.reset(); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", 1mm allowed by minsc)..."; + sc.setMmPen(COST_MODEL_CONSTANT, 30); + //sc.setMatchBonus(10); + doTestCase2( + al, + "ACGTTCGT", // read + "IIIIIIII", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc, // scoring scheme + -30.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 0); + assert_eq(res.alres.score().score(), -30); + assert_eq(res.alres.score().ns(), 0); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + assert(al.done()); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", 1mm allowed by minsc, check qual 1)..."; + doTestCase2( + al, + "ACGTTCGT", // read + "ABCDEFGH", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc2, // scoring scheme + -40.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + size_t lo, hi; + if(i == 0) { + lo = 0; hi = 1; + } else if(i == 1) { + lo = 1; hi = 2; + } else { + lo = 2; hi = 3; + } + for(size_t j = lo; j < hi; j++) { + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(j*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 0); + assert_eq(res.alres.score().score(), -36); + assert_eq(res.alres.score().ns(), 0); + res.reset(); + } + al.nextAlignment(res, rnd); + assert(res.empty()); + res.reset(); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", 1mm allowed by minsc, check qual 2)..."; + doTestCase2( + al, + "ACGAACGT", // read + "ABCDEFGH", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc2, // scoring scheme + -40.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 0); + assert_eq(res.alres.score().score(), -35); + assert_eq(res.alres.score().ns(), 0); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + res.reset(); + assert(res.empty()); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", 1mm allowed by minsc, check qual )..."; + assert(res.empty()); + doTestCase2( + al, + "TCGTACGT", // read + "ABCDEFGH", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc2, // scoring scheme + -40.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 0); + assert_eq(res.alres.score().score(), -32); + assert_eq(res.alres.score().ns(), 0); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + res.reset(); + assert(res.empty()); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", 1mm at the beginning, allowed by minsc)..."; + doTestCase2( + al, + "CCGTACGT", // read + "IIIIIIII", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc, // scoring scheme + -30.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 0); + assert_eq(res.alres.score().score(), -30); + assert_eq(res.alres.score().ns(), 0); + assert_eq(1, res.alres.ned().size()); + assert_eq(0, res.alres.aed().size()); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + res.reset(); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", 1 n in read, allowed)..."; + doTestCase3( + al, + "ACGTNCGT", // read + "IIIIIIII", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc, // scoring scheme + -30.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + 1.0f, // allow 1 N + 0.0f, // allow 1 N + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 0); + assert_eq(res.alres.score().score(), -1); + assert_eq(res.alres.score().ns(), 1); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + res.reset(); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", 2 n in read, allowed)..."; + doTestCase3( + al, + "ACGNNCGT", // read + "IIIIIIII", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc, // scoring scheme + -30.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + 2.0f, // const coeff for N ceiling + 0.0f, // linear coeff for N ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 0); + assert_eq(res.alres.score().score(), -2); + assert_eq(res.alres.score().ns(), 2); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + res.reset(); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", 2 n in read, 1 at beginning, allowed)..."; + doTestCase2( + al, + "NCGTNCGT", // read + "IIIIIIII", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc, // scoring scheme + -30.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 0); + assert_eq(res.alres.score().score(), -2); + assert_eq(res.alres.score().ns(), 2); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + res.reset(); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", 1 n in ref, allowed)..."; + doTestCase2( + al, + "ACGTACGT", // read + "IIIIIIII", // qual + "ACGTNCGTACGTANGT", // ref in + i*4, // off + sc, // scoring scheme + -30.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 0); + assert_eq(res.alres.score().score(), -1); + assert_eq(res.alres.score().ns(), 1); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + res.reset(); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", 1mm disallowed by minsc)..."; + doTestCase2( + al, + "ACGTTCGT", // read + "IIIIIIII", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc, // scoring scheme + -10.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + al.nextAlignment(res, rnd); + assert(res.empty()); + assert(al.done()); + // Read gap with equal read and ref gap penalties + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", read gap allowed by minsc)..."; + assert(res.empty()); + sc.rfGapConst = 25; + sc.rdGapConst = 25; + sc.rfGapLinear = 15; + sc.rdGapLinear = 15; + doTestCase2( + al, + "ACGTCGT", // read + "IIIIIII", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc, // scoring scheme + -40.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 1); + assert_eq(res.alres.score().score(), -40); + assert_eq(res.alres.score().ns(), 0); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + res.reset(); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", read gap disallowed by minsc)..."; + sc.rfGapConst = 25; + sc.rdGapConst = 25; + sc.rfGapLinear = 15; + sc.rdGapLinear = 15; + doTestCase2( + al, + "ACGTCGT", // read + "IIIIIII", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc, // scoring scheme + -30.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + al.nextAlignment(res, rnd); + assert(res.empty()); + assert(al.done()); + res.reset(); + + cerr << "PASSED" << endl; + // Ref gap with equal read and ref gap penalties + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", ref gap allowed by minsc)..."; + doTestCase2( + al, + "ACGTAACGT", // read + "IIIIIIIII", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc, // scoring scheme + -40.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 1); + assert_eq(res.alres.score().score(), -40); + assert_eq(res.alres.score().ns(), 0); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + res.reset(); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", read gap disallowed by gap barrier)..."; + sc.rfGapConst = 25; + sc.rdGapConst = 25; + sc.rfGapLinear = 15; + sc.rdGapLinear = 15; + sc.gapbar = 4; + doTestCase2( + al, + "ACGTCGT", // read + "IIIIIII", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc, // scoring scheme + -40.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + sc.gapbar = 1; + al.nextAlignment(res, rnd); + assert(res.empty()); + assert(al.done()); + res.reset(); + + cerr << "PASSED" << endl; + // Ref gap with equal read and ref gap penalties + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", ref gap allowed by minsc, gapbar=3)..."; + sc.gapbar = 3; + doTestCase2( + al, + "ACGTAACGT", // read + "IIIIIIIII", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc, // scoring scheme + -40.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + sc.gapbar = 1; + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 1); + assert_eq(res.alres.score().score(), -40); + assert_eq(res.alres.score().ns(), 0); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + res.reset(); + cerr << "PASSED" << endl; + + // Ref gap with equal read and ref gap penalties + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", ref gap allowed by minsc, gapbar=4)..."; + sc.gapbar = 4; + doTestCase2( + al, + "ACGTAACGT", // read + "IIIIIIIII", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc, // scoring scheme + -40.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + sc.gapbar = 1; + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 1); + assert_eq(res.alres.score().score(), -40); + assert_eq(res.alres.score().ns(), 0); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + res.reset(); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", ref gap disallowed by minsc)..."; + doTestCase2( + al, + "ACGTAACGT", // read + "IIIIIIIII", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc, // scoring scheme + -30.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + al.nextAlignment(res, rnd); + assert(res.empty()); + res.reset(); + assert(al.done()); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", ref gap disallowed by gap barrier)..."; + sc.gapbar = 5; + doTestCase2( + al, + "ACGTAACGT", // read + "IIIIIIIII", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc, // scoring scheme + -40.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + sc.gapbar = 1; + al.nextAlignment(res, rnd); + assert(res.empty()); + res.reset(); + assert(al.done()); + cerr << "PASSED" << endl; + + // Read gap with one read gap and zero ref gaps allowed + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", 1 read gap, ref gaps disallowed by minsc)..."; + sc.rfGapConst = 35; + sc.rdGapConst = 25; + sc.rfGapLinear = 20; + sc.rdGapLinear = 10; + doTestCase2( + al, + "ACGTCGT", // read + "IIIIIII", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc, // scoring scheme + -40.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 1); + assert_eq(res.alres.score().score(), -35); + assert_eq(res.alres.score().ns(), 0); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + res.reset(); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", gaps disallowed by minsc)..."; + sc.rfGapConst = 25; + sc.rdGapConst = 25; + sc.rfGapLinear = 10; + sc.rdGapLinear = 10; + doTestCase2( + al, + "ACGTCGT", // read + "IIIIIII", // qual + "ACGTACGTACGTACGT", // ref + i*4, // off + sc, // scoring scheme + -30.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + al.nextAlignment(res, rnd); + assert(res.empty()); + res.reset(); + assert(res.empty()); + cerr << "PASSED" << endl; + + // Ref gap with one ref gap and zero read gaps allowed + sc.rfGapConst = 25; + sc.rdGapConst = 35; + sc.rfGapLinear = 12; + sc.rdGapLinear = 22; + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", 1 ref gap, read gaps disallowed by minsc)..."; + assert(res.empty()); + doTestCase2( + al, + "ACGTAACGT", + "IIIIIIIII", + "ACGTACGTACGTACGT", + i*4, // off + sc, // scoring scheme + -40.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 1); + assert_eq(res.alres.score().score(), -37); + assert_eq(res.alres.score().ns(), 0); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + res.reset(); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", gaps disallowed by minsc)..."; + doTestCase2( + al, + "ACGTAACGT", + "IIIIIIIII", + "ACGTACGTACGTACGT", + i*4, // off + sc, // scoring scheme + -30.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + al.nextAlignment(res, rnd); + assert(res.empty()); + assert(al.done()); + cerr << "PASSED" << endl; + + // Read gap with one read gap and two ref gaps allowed + sc.rfGapConst = 20; + sc.rdGapConst = 25; + sc.rfGapLinear = 10; + sc.rdGapLinear = 15; + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", 1 read gap, 2 ref gaps allowed by minsc)..."; + doTestCase2( + al, + "ACGTCGT", + "IIIIIII", + "ACGTACGTACGTACGT", + i*4, // off + sc, // scoring scheme + -40.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 1); + assert_eq(res.alres.score().score(), -40); + assert_eq(res.alres.score().ns(), 0); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + res.reset(); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", gaps disallowed by minsc)..."; + doTestCase2( + al, + "ACGTCGT", + "IIIIIII", + "ACGTACGTACGTACGT", + i*4, // off + sc, // scoring scheme + -30.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + al.nextAlignment(res, rnd); + assert(res.empty()); + assert(al.done()); + cerr << "PASSED" << endl; + + // Ref gap with one ref gap and two read gaps allowed + sc.rfGapConst = 25; + sc.rdGapConst = 11; // if this were 10, we'd have ties + sc.rfGapLinear = 15; + sc.rdGapLinear = 10; + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", 1 ref gap, 2 read gaps allowed by minsc)..."; + doTestCase2( + al, + "ACGTAACGT", + "IIIIIIIII", + "ACGTACGTACGTACGT", + i*4, // off + sc, // scoring scheme + -40.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 1); + assert_eq(res.alres.score().score(), -40); + assert_eq(res.alres.score().ns(), 0); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + res.reset(); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", gaps disallowed by minsc)..."; + doTestCase2( + al, + "ACGTAACGT", + "IIIIIIIII", + "ACGTACGTACGTACGT", + i*4, // off + sc, // scoring scheme + -30.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + al.nextAlignment(res, rnd); + assert(res.empty()); + res.reset(); + assert(al.done()); + cerr << "PASSED" << endl; + + // Read gap with two read gaps and two ref gaps allowed + sc.rfGapConst = 15; + sc.rdGapConst = 15; + sc.rfGapLinear = 10; + sc.rdGapLinear = 10; + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", 2 ref gaps, 2 read gaps allowed by minsc)..."; + doTestCase3( + al, + "ACGTCGT", + "IIIIIII", + "ACGTACGTACGTACGT", + i*4, // off + sc, // scoring scheme + -40.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + 1.0, // const coeff for N ceiling + 0.0, // linear coeff for N ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + true); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + if(!res.empty()) { + //al.printResultStacked(res, cerr); cerr << endl; + } + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 1); + assert_eq(res.alres.score().score(), -25); + assert_eq(res.alres.score().ns(), 0); + res.reset(); + al.nextAlignment(res, rnd); + // The following alignment is possible when i == 2: + // ACGTACGTACGTACGTN + // A x + // C x + // G x + // T x + // C x + // G x + // T x + assert(i == 2 || res.empty()); + res.reset(); + cerr << "PASSED" << endl; + + sc.rfGapConst = 10; + sc.rdGapConst = 10; + sc.rfGapLinear = 10; + sc.rdGapLinear = 10; + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", 1 ref gap, 1 read gap allowed by minsc)..."; + doTestCase2( + al, + "ACGTCGT", + "IIIIIII", + "ACGTACGTACGTACGT", + i*4, // off + sc, // scoring scheme + -30.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 1); + assert_eq(res.alres.score().score(), -20); + assert_eq(res.alres.score().ns(), 0); + res.reset(); + cerr << "PASSED" << endl; + + // Ref gap with two ref gaps and zero read gaps allowed + sc.rfGapConst = 15; + sc.rdGapConst = 15; + sc.rfGapLinear = 5; + sc.rdGapLinear = 5; + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", 2 ref gaps, 2 read gaps allowed by minsc)..."; + // Careful: it might be possible for the read to align with overhang + // instead of with a gap + doTestCase3( + al, + "ACGTAACGT", + "IIIIIIIII", + "ACGTACGTACGTACGT", + i*4, // off + sc, // scoring scheme + -35.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + 1.0f, // needed to avoid overhang alignments + 0.0f, // needed to avoid overhang alignments + res, // result + nIncl, // Ns inclusive (not mismatches) + true); // filter Ns + if(i == 0) { + lo = 0; hi = 1; + } else if(i == 1) { + lo = 1; hi = 2; + } else { + lo = 2; hi = 3; + } + for(size_t j = lo; j < hi; j++) { + al.nextAlignment(res, rnd); + assert(!res.empty()); + //al.printResultStacked(res, cerr); cerr << endl; + assert(res.alres.refoff() == 0 || + res.alres.refoff() == 4 || + res.alres.refoff() == 8); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 1); + assert_eq(res.alres.score().score(), -20); + assert_eq(res.alres.score().ns(), 0); + res.reset(); + } + al.nextAlignment(res, rnd); + //assert(res.empty()); + //res.reset(); + cerr << "PASSED" << endl; + + sc.rfGapConst = 25; + sc.rdGapConst = 25; + sc.rfGapLinear = 4; + sc.rdGapLinear = 4; + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", 1 ref gap, 1 read gap allowed by minsc)..."; + doTestCase2( + al, + "ACGTAACGT", + "IIIIIIIII", + "ACGTACGTACGTACGT", + i*4, // off + sc, // scoring scheme + -30.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 1); + assert_eq(res.alres.score().score(), -29); + assert_eq(res.alres.score().ns(), 0); + res.reset(); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", short read)..."; + doTestCase2( + al, + "A", + "I", + "AAAAAAAAAAAA", + i*4, // off + sc, // scoring scheme + -30.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(res.alres.score().gaps(), 0); + assert_eq(res.alres.score().score(), 0); + assert_eq(res.alres.score().ns(), 0); + res.reset(); + cerr << "PASSED" << endl; + + if(i == 0) { + cerr << " Test " << tests++ + << " (nuc space, offset 0, short read & ref)..."; + doTestCase2( + al, + "A", + "I", + "A", + 0, // off + sc, // scoring scheme + -30.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(res.alres.score().gaps(), 0); + assert_eq(res.alres.score().score(), 0); + assert_eq(res.alres.score().ns(), 0); + res.reset(); + cerr << "PASSED" << endl; + } + + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", short read, many allowed gaps)..."; + doTestCase2( + al, + "A", + "I", + "AAAAAAAAAAAA", + i*4, // off + sc, // scoring scheme + -150.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(res.alres.score().gaps(), 0); + assert_eq(res.alres.score().score(), 0); + assert_eq(res.alres.score().ns(), 0); + res.reset(); + cerr << "PASSED" << endl; + + if(i == 0) { + cerr << " Test " << tests++ + << " (nuc space, offset 0, short read & ref, " + << "many allowed gaps)..."; + doTestCase2( + al, + "A", + "I", + "A", + 0, // off + sc, // scoring scheme + -150.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(res.alres.score().gaps(), 0); + assert_eq(res.alres.score().score(), 0); + assert_eq(res.alres.score().ns(), 0); + res.reset(); + cerr << "PASSED" << endl; + } + } + + // A test case where a valid alignment with a worse score should be + // accepted over a valid alignment with a better score but too many + // Ns + cerr << " Test " << tests++ << " (N ceiling 1)..."; + sc.mmcostType = COST_MODEL_CONSTANT; + sc.mmcost = 10; + sc.snp = 30; + sc.nCeilConst = 0.0f; + sc.nCeilLinear = 0.0f; + sc.rfGapConst = 10; + sc.rdGapLinear = 10; + sc.rfGapConst = 10; + sc.rfGapLinear = 10; + sc.setNPen(COST_MODEL_CONSTANT, 2); + sc.gapbar = 1; + // No Ns allowed, so this hit should be filtered + doTestCase2( + al, + "ACGTACGT", // read seq + "IIIIIIII", // read quals + "NCGTACGT", // ref seq + 0, // offset + sc, // scoring scheme + -25.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + false, // ns are in inclusive + true, // nfilter + 0); + al.nextAlignment(res, rnd); + assert(res.empty()); + cerr << "PASSED" << endl; + res.reset(); + + // 1 N allowed, so this hit should stand + cerr << " Test " << tests++ << " (N ceiling 2)..."; + doTestCase3( + al, + "ACGTACGT", // read seq + "IIIIIIII", // read quals + "NCGTACGT", // ref seq + 0, // offset + sc, // scoring scheme + -25.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + 1.0f, // constant coefficient for # Ns allowed + 0.0f, // linear coefficient for # Ns allowed + res, // result + false, // ns are in inclusive + false, // nfilter - NOTE: FILTER OFF + 0); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(0, res.alres.score().gaps()); + assert_eq(-2, res.alres.score().score()); + assert_eq(1, res.alres.score().ns()); + cerr << "PASSED" << endl; + res.reset(); + + // 1 N allowed, but we set st_ such that this hit should not stand + for(size_t i = 0; i < 2; i++) { + cerr << " Test " << tests++ << " (N ceiling 2 with st_ override)..."; + EList en; + en.resize(3); en.fill(true); + if(i == 1) { + en[1] = false; + } + sc.rfGapConst = 10; + sc.rdGapLinear = 10; + sc.rfGapConst = 10; + sc.rfGapLinear = 10; + doTestCase4( + al, + "ACGTACGT", // read seq + "IIIIIIII", // read quals + "NCGTACGT", // ref seq + 0, // offset + en, // rectangle columns where solution can end + sc, // scoring scheme + -25.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + 1.0f, // constant coefficient for # Ns allowed + 0.0f, // linear coefficient for # Ns allowed + res, // result + false, // ns are in inclusive + false, // nfilter - NOTE: FILTER OFF + 0); + al.nextAlignment(res, rnd); + if(i > 0) { + assert(res.empty()); + } else { + assert(!res.empty()); + } + cerr << "PASSED" << endl; + res.reset(); + } + + // No Ns allowed, so this hit should be filtered + cerr << " Test " << tests++ << " (N ceiling 3)..."; + sc.nCeilConst = 1.0f; + sc.nCeilLinear = 0.0f; + doTestCase2( + al, + "ACGTACGT", // read seq + "IIIIIIII", // read quals + "NCGTACGT", // ref seq + 0, // offset + sc, // scoring scheme + -25.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + false, // ns are in inclusive + true, // nfilter - NOTE: FILTER ON + 0); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(0, res.alres.score().gaps()); + assert_eq(-2, res.alres.score().score()); + assert_eq(1, res.alres.score().ns()); + cerr << "PASSED" << endl; + res.reset(); + + // No Ns allowed, so this hit should be filtered + cerr << " Test " << tests++ << " (redundant alignment elimination 1)..."; + sc.nCeilConst = 1.0f; + sc.nCeilLinear = 0.0f; + sc.rfGapConst = 25; + sc.rdGapLinear = 15; + sc.rfGapConst = 25; + sc.rfGapLinear = 15; + doTestCase2( + al, + // 1 2 3 4 + // 01234567890123456789012345678901234567890123456 + "AGGCTATGCCTCTGACGCGATATCGGCGCCCACTTCAGAGCTAACCG", + "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", + "TTTTTTTTAGGCTATGCCTCTGACGCGATATCGGCGCCCACTTCAGAGCTAACCGTTTTTTT", + // 01234567890123456789012345678901234567890123456789012345678901 + // 1 2 3 4 5 6 + 8, // offset + sc, // scoring scheme + -25.0f, // const coeff for cost ceiling + -5.0f, // linear coeff for cost ceiling + res, // result + false, // ns are in inclusive + true, // nfilter - NOTE: FILTER ON + 0); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(8, res.alres.refoff()); + assert_eq(47, res.alres.refExtent()); + assert_eq(0, res.alres.score().gaps()); + assert_eq(0, res.alres.score().score()); + assert_eq(0, res.alres.score().ns()); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + assert(al.done()); + cerr << "PASSED" << endl; + res.reset(); + +} + +/** + * Do a set of unit tests for local alignment. + */ +static void doLocalTests() { + bonusMatchType = DEFAULT_MATCH_BONUS_TYPE; + bonusMatch = DEFAULT_MATCH_BONUS_LOCAL; + penMmcType = DEFAULT_MM_PENALTY_TYPE; + penMmc = DEFAULT_MM_PENALTY; + penSnp = DEFAULT_SNP_PENALTY; + penNType = DEFAULT_N_PENALTY_TYPE; + penN = DEFAULT_N_PENALTY; + nPairCat = DEFAULT_N_CAT_PAIR; + penRdExConst = DEFAULT_READ_GAP_CONST; + penRfExConst = DEFAULT_REF_GAP_CONST; + penRdExLinear = DEFAULT_READ_GAP_LINEAR; + penRfExLinear = DEFAULT_REF_GAP_LINEAR; + costMinConst = DEFAULT_MIN_CONST_LOCAL; + costMinLinear = DEFAULT_MIN_LINEAR_LOCAL; + costFloorConst = DEFAULT_FLOOR_CONST_LOCAL; + costFloorLinear = DEFAULT_FLOOR_LINEAR_LOCAL; + nCeilConst = 1.0f; // constant factor in N ceil w/r/t read len + nCeilLinear = 0.1f; // coeff of linear term in N ceil w/r/t read len + multiseedMms = DEFAULT_SEEDMMS; + multiseedLen = DEFAULT_SEEDLEN; + // Set up penalities + Scoring sc( + 10, + penMmcType, // how to penalize mismatches + 30, // constant if mm pelanty is a constant + penSnp, // penalty for decoded SNP + costMinConst, // constant factor in N ceiling w/r/t read length + costMinLinear, // coeff of linear term in N ceiling w/r/t read length + costFloorConst, // constant factor in N ceiling w/r/t read length + costFloorLinear, // coeff of linear term in N ceiling w/r/t read length + nCeilConst, // constant factor in N ceiling w/r/t read length + nCeilLinear, // coeff of linear term in N ceiling w/r/t read length + penNType, // how to penalize Ns in the read + penN, // constant if N pelanty is a constant + nPairCat, // true -> concatenate mates before N filtering + 25, // constant coeff for cost of gap in read + 25, // constant coeff for cost of gap in ref + 15, // linear coeff for cost of gap in read + 15, // linear coeff for cost of gap in ref + 1, // # rows at top/bot can only be entered diagonally + -1, // min row idx to backtrace from; -1 = no limit + false // sort results first by row then by score? + ); + SwResult res; + + // + // Basic nucleotide-space tests + // + cerr << "Running local tests..." << endl; + int tests = 1; + bool nIncl = false; + bool nfilter = false; + + SwAligner al; + RandomSource rnd(73); + for(int i = 0; i < 3; i++) { + cerr << " Test " << tests++ << " (short nuc space, offset " + << (i*4) << ", exact)..."; + sc.rdGapConst = 40; + sc.rfGapConst = 40; + doTestCase2( + al, + "ACGT", // read + "IIII", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc, // scoring scheme + 0.0f, // const coeff for cost ceiling + 8.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(4, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 0); + assert_eq(res.alres.score().score(), 40); + assert_eq(res.alres.score().ns(), 0); + assert(res.alres.ned().empty()); + assert(res.alres.aed().empty()); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + assert(al.done()); + res.reset(); + cerr << "PASSED" << endl; + + // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + // A C G T A C G T A C G T A C G T + // 0 C + // 1 C x + // 2 G x + // 3 T x + + cerr << " Test " << tests++ << " (short nuc space, offset " + << (i*4) << ", 1mm)..."; + sc.rdGapConst = 40; + sc.rfGapConst = 40; + doTestCase2( + al, + "CCGT", // read + "IIII", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc, // scoring scheme + 0.0f, // const coeff for cost ceiling + 7.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4+1, res.alres.refoff()); + assert_eq(3, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 0); + assert_eq(res.alres.score().score(), 30); + assert_eq(res.alres.score().ns(), 0); + assert(res.alres.ned().empty()); + assert(res.alres.aed().empty()); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + assert(al.done()); + res.reset(); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (short nuc space, offset " + << (i*4) << ", 1mm)..."; + sc.rdGapConst = 40; + sc.rfGapConst = 40; + doTestCase2( + al, + "ACGA", // read + "IIII", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc, // scoring scheme + 0.0f, // const coeff for cost ceiling + 7.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(3, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 0); + assert_eq(res.alres.score().score(), 30); + assert_eq(res.alres.score().ns(), 0); + assert(res.alres.ned().empty()); + assert(res.alres.aed().empty()); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + assert(al.done()); + res.reset(); + cerr << "PASSED" << endl; + + if(i == 0) { + cerr << " Test " << tests++ << " (short nuc space, offset " + << (i*4) << ", 1mm, match bonus=20)..."; + sc.rdGapConst = 40; + sc.rfGapConst = 40; + sc.setMatchBonus(20); + doTestCase2( + al, + "TTGT", // read + "IIII", // qual + "TTGA", // ref in + i*4, // off + sc, // scoring scheme + 25.0f, // const coeff for cost ceiling + 0.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(3, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 0); + assert_eq(res.alres.score().score(), 60); + assert_eq(res.alres.score().ns(), 0); + assert(res.alres.ned().empty()); + assert(res.alres.aed().empty()); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + assert(al.done()); + res.reset(); + sc.setMatchBonus(10); + cerr << "PASSED" << endl; + } + + cerr << " Test " << tests++ << " (nuc space, offset " + << (i*4) << ", exact)..."; + sc.rdGapConst = 40; + sc.rfGapConst = 40; + doTestCase2( + al, + "ACGTACGT", // read + "IIIIIIII", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc, // scoring scheme + 0.0f, // const coeff for cost ceiling + 8.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 0); + assert_eq(res.alres.score().score(), 80); + assert_eq(res.alres.score().ns(), 0); + assert(res.alres.ned().empty()); + assert(res.alres.aed().empty()); + res.reset(); + al.nextAlignment(res, rnd); + assert(res.empty()); + assert(al.done()); + res.reset(); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (long nuc space, offset " + << (i*8) << ", exact)..."; + sc.rdGapConst = 40; + sc.rfGapConst = 40; + doTestCase2( + al, + "ACGTACGTACGTACGTACGTA", // read + "IIIIIIIIIIIIIIIIIIIII", // qual + "ACGTACGTACGTACGTACGTACGTACGTACGTACGTA", // ref in + // ACGTACGTACGTACGTACGT + // ACGTACGTACGTACGTACGT + // ACGTACGTACGTACGTACGT + i*8, // off + sc, // scoring scheme + 0.0f, // const coeff for cost ceiling + 8.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*8, res.alres.refoff()); + assert_eq(21, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 0); + assert_eq(res.alres.score().score(), 210); + assert_eq(res.alres.score().ns(), 0); + assert(res.alres.ned().empty()); + assert(res.alres.aed().empty()); + res.reset(); + al.nextAlignment(res, rnd); + //assert(res.empty()); + //assert(al.done()); + res.reset(); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) + << ", 1mm allowed by minsc)..."; + doTestCase2( + al, + "ACGTTCGT", // read + "IIIIIIII", // qual + "ACGTACGTACGTACGT", // ref in + i*4, // off + sc, // scoring scheme + 0.0f, // const coeff for cost ceiling + 5.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*4, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 0); + assert_eq(res.alres.score().score(), 40); + assert_eq(res.alres.score().ns(), 0); + res.reset(); + al.nextAlignment(res, rnd); + //assert(res.empty()); + //assert(al.done()); + cerr << "PASSED" << endl; + + cerr << " Test " << tests++ << " (long nuc space, offset " + << (i*8) << ", 6mm allowed by minsc)..."; + sc.rdGapConst = 50; + sc.rfGapConst = 50; + sc.rdGapLinear = 45; + sc.rfGapLinear = 45; + doTestCase2( + al, + "ACGTACGATGCATCGTACGTA", // read + "IIIIIIIIIIIIIIIIIIIII", // qual + "ACGTACGTACGTACGTACGTACGTACGTACGTACGTA", // ref in + // ACGTACGTACGTACGTACGT + // ACGTACGTACGTACGTACGT + // ACGTACGTACGTACGTACGT + i*8, // off + sc, // scoring scheme + 0.0f, // const coeff for cost ceiling + 1.0f, // linear coeff for cost ceiling + res, // result + nIncl, // Ns inclusive (not mismatches) + nfilter); // filter Ns + assert(!al.done()); + al.nextAlignment(res, rnd); + assert(!res.empty()); + assert_eq(i*8 + 13, res.alres.refoff()); + assert_eq(8, res.alres.refExtent()); + assert_eq(res.alres.score().gaps(), 0); + assert_eq(res.alres.score().score(), 80); + assert_eq(res.alres.score().ns(), 0); + assert(res.alres.ned().empty()); + assert(res.alres.aed().empty()); + res.reset(); + al.nextAlignment(res, rnd); + res.reset(); + cerr << "PASSED" << endl; + } +} + +int main(int argc, char **argv) { + int option_index = 0; + int next_option; + unsigned seed = 0; + gGapBarrier = 1; + gSnpPhred = 30; + bool nsInclusive = false; + bool nfilter = false; + bonusMatchType = DEFAULT_MATCH_BONUS_TYPE; + bonusMatch = DEFAULT_MATCH_BONUS; + penMmcType = DEFAULT_MM_PENALTY_TYPE; + penMmc = DEFAULT_MM_PENALTY; + penSnp = DEFAULT_SNP_PENALTY; + penNType = DEFAULT_N_PENALTY_TYPE; + penN = DEFAULT_N_PENALTY; + penRdExConst = DEFAULT_READ_GAP_CONST; + penRfExConst = DEFAULT_REF_GAP_CONST; + penRdExLinear = DEFAULT_READ_GAP_LINEAR; + penRfExLinear = DEFAULT_REF_GAP_LINEAR; + costMinConst = DEFAULT_MIN_CONST; + costMinLinear = DEFAULT_MIN_LINEAR; + costFloorConst = DEFAULT_FLOOR_CONST; + costFloorLinear = DEFAULT_FLOOR_LINEAR; + nCeilConst = 1.0f; // constant factor in N ceiling w/r/t read length + nCeilLinear = 1.0f; // coeff of linear term in N ceiling w/r/t read length + nCatPair = false; + multiseedMms = DEFAULT_SEEDMMS; + multiseedLen = DEFAULT_SEEDLEN; + multiseedIvalType = DEFAULT_IVAL; + multiseedIvalA = DEFAULT_IVAL_A; + multiseedIvalB = DEFAULT_IVAL_B; + mhits = 1; + do { + next_option = getopt_long(argc, argv, short_opts, long_opts, &option_index); + switch (next_option) { + case 's': gSnpPhred = parse(optarg); break; + case 'r': seed = parse(optarg); break; + case ARG_TESTS: { + doTests(); + cout << "PASSED end-to-ends" << endl; + doLocalTests(); + cout << "PASSED locals" << endl; + return 0; + } + case 'A': { + bool localAlign = false; + bool noisyHpolymer = false; + bool ignoreQuals = false; + SeedAlignmentPolicy::parseString( + optarg, + localAlign, + noisyHpolymer, + ignoreQuals, + bonusMatchType, + bonusMatch, + penMmcType, + penMmc, + penNType, + penN, + penRdExConst, + penRfExConst, + penRdExLinear, + penRfExLinear, + costMinConst, + costMinLinear, + costFloorConst, + costFloorLinear, + nCeilConst, + nCeilLinear, + nCatPair, + multiseedMms, + multiseedLen, + multiseedIvalType, + multiseedIvalA, + multiseedIvalB, + posmin); + break; + } + case -1: break; + default: { + cerr << "Unknown option: " << (char)next_option << endl; + printUsage(cerr); + exit(1); + } + } + } while(next_option != -1); + srand(seed); + if(argc - optind < 4) { + cerr << "Need at least 4 arguments" << endl; + printUsage(cerr); + exit(1); + } + BTDnaString read; + BTString ref, qual; + // Get read + read.installChars(argv[optind]); + // Get qualities + qual.install(argv[optind+1]); + assert_eq(read.length(), qual.length()); + // Get reference + ref.install(argv[optind+2]); + // Get reference offset + size_t off = parse(argv[optind+3]); + // Set up penalities + Scoring sc( + false, // local alignment? + false, // bad homopolymer? + bonusMatchType, + bonusMatch, + penMmcType, // how to penalize mismatches + penMmc, // constant if mm pelanty is a constant + costMinConst, + costMinLinear, + costFloorConst, + costFloorLinear, + nCeilConst, // N ceiling constant coefficient + nCeilLinear, // N ceiling linear coefficient + penNType, // how to penalize Ns in the read + penN, // constant if N pelanty is a constant + nCatPair, // true -> concatenate mates before N filtering + penRdExConst, // constant cost of extending a gap in the read + penRfExConst, // constant cost of extending a gap in the reference + penRdExLinear, // coeff of linear term for cost of gap extension in read + penRfExLinear // coeff of linear term for cost of gap extension in ref + ); + // Calculate the penalty ceiling for the read + TAlScore minsc = Scoring::linearFunc( + read.length(), + costMinConst, + costMinLinear); + TAlScore floorsc = Scoring::linearFunc( + read.length(), + costFloorConst, + costFloorLinear); + SwResult res; + SwAligner al; + doTestCase( + al, + read, + qual, + ref, + off, + NULL, + sc, + minsc, + res, + nsInclusive, + nfilter, + seed); +} +#endif /*MAIN_ALIGNER_SW*/ diff --git a/aligner_sw.h b/aligner_sw.h new file mode 100644 index 0000000..add5c87 --- /dev/null +++ b/aligner_sw.h @@ -0,0 +1,648 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +/* + * aligner_sw.h + * + * Classes and routines for solving dynamic programming problems in aid of read + * alignment. Goals include the ability to handle: + * + * - Both read alignment, where the query must align end-to-end, and local + * alignment, where we seek a high-scoring alignment that need not involve + * the entire query. + * - Situations where: (a) we've found a seed hit and are trying to extend it + * into a larger hit, (b) we've found an alignment for one mate of a pair and + * are trying to find a nearby alignment for the other mate, (c) we're + * aligning against an entire reference sequence. + * - Caller-specified indicators for what columns of the dynamic programming + * matrix we are allowed to start in or end in. + * + * TODO: + * + * - A slicker way to filter out alignments that violate a ceiling placed on + * the number of Ns permitted in the reference portion of the alignment. + * Right now we accomplish this by masking out ending columns that correspond + * to *ungapped* alignments with too many Ns. This results in false + * positives and false negatives for gapped alignments. The margin of error + * (# of Ns by which we might miscount) is bounded by the number of gaps. + */ + +/** + * |-maxgaps-| + * ***********oooooooooooooooooooooo - + * ***********ooooooooooooooooooooo | + * ***********oooooooooooooooooooo | + * ***********ooooooooooooooooooo | + * ***********oooooooooooooooooo | + * ***********ooooooooooooooooo read len + * ***********oooooooooooooooo | + * ***********ooooooooooooooo | + * ***********oooooooooooooo | + * ***********ooooooooooooo | + * ***********oooooooooooo - + * |-maxgaps-| + * |-readlen-| + * |-------skip--------| + */ + +#ifndef ALIGNER_SW_H_ +#define ALIGNER_SW_H_ + +#define INLINE_CUPS + +#include +#include +#include +#include "threading.h" +#include +#include "aligner_sw_common.h" +#include "aligner_sw_nuc.h" +#include "ds.h" +#include "aligner_seed.h" +#include "reference.h" +#include "random_source.h" +#include "mem_ids.h" +#include "aligner_result.h" +#include "mask.h" +#include "dp_framer.h" +#include "aligner_swsse.h" +#include "aligner_bt.h" + +#define QUAL2(d, f) sc_->mm((int)(*rd_)[rdi_ + d], \ + (int) rf_ [rfi_ + f], \ + (int)(*qu_)[rdi_ + d] - 33) +#define QUAL(d) sc_->mm((int)(*rd_)[rdi_ + d], \ + (int)(*qu_)[rdi_ + d] - 33) +#define N_SNP_PEN(c) (((int)rf_[rfi_ + c] > 15) ? sc_->n(30) : sc_->penSnp) + +/** + * SwAligner + * ========= + * + * Ensapsulates facilities for alignment using dynamic programming. Handles + * alignment of nucleotide reads against known reference nucleotides. + * + * The class is stateful. First the user must call init() to initialize the + * object with details regarding the dynamic programming problem to be solved. + * Next, the user calls align() to fill the dynamic programming matrix and + * calculate summaries describing the solutions. Finally the user calls + * nextAlignment(...), perhaps repeatedly, to populate the SwResult object with + * the next result. Results are dispensend in best-to-worst, left-to-right + * order. + * + * The class expects the read string, quality string, and reference string + * provided by the caller live at least until the user is finished aligning and + * obtaining alignments from this object. + * + * There is a design tradeoff between hiding/exposing details of the genome and + * its strands to the SwAligner. In a sense, a better design is to hide + * details such as the id of the reference sequence aligned to, or whether + * we're aligning the read in its original forward orientation or its reverse + * complement. But this means that any alignment results returned by SwAligner + * have to be extended to include those details before they're useful to the + * caller. We opt for messy but expedient - the reference id and orientation + * of the read are given to SwAligner, remembered, and used to populate + * SwResults. + * + * LOCAL VS GLOBAL + * + * The dynamic programming aligner supports both local and global alignment, + * and one option in between. To implement global alignment, the aligner (a) + * allows negative scores (i.e. doesn't necessarily clamp them up to 0), (b) + * checks in rows other than the last row for acceptable solutions, and (c) + * optionally adds a bonus to the score for matches. + * + * For global alignment, we: + * + * (a) Allow negative scores + * (b) Check only in the last row + * (c) Either add a bonus for matches or not (doesn't matter) + * + * For local alignment, we: + * + * (a) Clamp scores to 0 + * (b) Check in any row for a sufficiently high score + * (c) Add a bonus for matches + * + * An in-between solution is to allow alignments to be curtailed on the + * right-hand side if a better score can be achieved thereby, but not on the + * left. For this, we: + * + * (a) Allow negative scores + * (b) Check in any row for a sufficiently high score + * (c) Either add a bonus for matches or not (doesn't matter) + * + * REDUNDANT ALIGNMENTS + * + * When are two alignments distinct and when are they redundant (not distinct)? + * At one extreme, we might say the best alignment from any given dynamic + * programming problem is redundant with all other alignments from that + # problem. At the other extreme, we might say that any two alignments with + * distinct starting points and edits are distinct. The former is probably too + * conservative for mate-finding DP problems. The latter is certainly too + * permissive, since two alignments that differ only in how gaps are arranged + * should not be considered distinct. + * + * Some in-between solutions are: + * + * (a) If two alignments share an end point on either end, they are redundant. + * Otherwise, they are distinct. + * (b) If two alignments share *both* end points, they are redundant. + * (c) If two alignments share any cells in the DP table, they are redundant. + * (d) 2 alignments are redundant if either end within N poss of each other + * (e) Like (d) but both instead of either + * (f, g) Like d, e, but where N is tied to maxgaps somehow + * + * Why not (a)? One reason is that it's possible for two alignments to have + * different start & end positions but share many cells. Consider alignments 1 + * and 2 below; their end-points are labeled. + * + * 1 2 + * \ \ + * -\ + * \ + * \ + * \ + * -\ + * \ \ + * 1 2 + * + * 1 and 2 are distinct according to (a) but they share many cells in common. + * + * Why not (f, g)? It fixes the problem with (a) above by forcing the + * alignments to be spread so far that they can't possibly share diagonal cells + * in common + */ +class SwAligner { + + typedef std::pair SizeTPair; + + // States that the aligner can be in + enum { + STATE_UNINIT, // init() hasn't been called yet + STATE_INITED, // init() has been called, but not align() + STATE_ALIGNED, // align() has been called + }; + + const static size_t ALPHA_SIZE = 5; + +public: + + explicit SwAligner() : + sseU8fw_(DP_CAT), + sseU8rc_(DP_CAT), + sseI16fw_(DP_CAT), + sseI16rc_(DP_CAT), + state_(STATE_UNINIT), + initedRead_(false), + readSse16_(false), + initedRef_(false), + rfwbuf_(DP_CAT), + btnstack_(DP_CAT), + btcells_(DP_CAT), + btdiag_(), + btncand_(DP_CAT), + btncanddone_(DP_CAT), + btncanddoneSucc_(0), + btncanddoneFail_(0), + cper_(), + cperMinlen_(), + cperPerPow2_(), + cperEf_(), + cperTri_(), + colstop_(0), + lastsolcol_(0), + cural_(0) + ASSERT_ONLY(, cand_tmp_(DP_CAT)) + { } + + /** + * Prepare the dynamic programming driver with a new read and a new scoring + * scheme. + */ + void initRead( + const BTDnaString& rdfw, // read sequence for fw read + const BTDnaString& rdrc, // read sequence for rc read + const BTString& qufw, // read qualities for fw read + const BTString& qurc, // read qualities for rc read + size_t rdi, // offset of first read char to align + size_t rdf, // offset of last read char to align + const Scoring& sc); // scoring scheme + + /** + * Initialize with a new alignment problem. + */ + void initRef( + bool fw, // whether to forward or revcomp read is aligning + TRefId refidx, // id of reference aligned against + const DPRect& rect, // DP rectangle + char *rf, // reference sequence + size_t rfi, // offset of first reference char to align to + size_t rff, // offset of last reference char to align to + TRefOff reflen, // length of reference sequence + const Scoring& sc, // scoring scheme + TAlScore minsc, // minimum score + bool enable8, // use 8-bit SSE if possible? + size_t cminlen, // minimum length for using checkpointing scheme + size_t cpow2, // interval b/t checkpointed diags; 1 << this + bool doTri, // triangular mini-fills? + bool extend); // true iff this is a seed extension + + /** + * Given a read, an alignment orientation, a range of characters in a + * referece sequence, and a bit-encoded version of the reference, + * execute the corresponding dynamic programming problem. + * + * Here we expect that the caller has already narrowed down the relevant + * portion of the reference (e.g. using a seed hit) and all we do is + * banded dynamic programming in the vicinity of that portion. This is not + * the function to call if we are trying to solve the whole alignment + * problem with dynamic programming (that is TODO). + * + * Returns true if an alignment was found, false otherwise. + */ + void initRef( + bool fw, // whether to forward or revcomp read aligned + TRefId refidx, // reference aligned against + const DPRect& rect, // DP rectangle + const BitPairReference& refs, // Reference strings + TRefOff reflen, // length of reference sequence + const Scoring& sc, // scoring scheme + TAlScore minsc, // minimum alignment score + bool enable8, // use 8-bit SSE if possible? + size_t cminlen, // minimum length for using checkpointing scheme + size_t cpow2, // interval b/t checkpointed diags; 1 << this + bool doTri, // triangular mini-fills? + bool extend, // true iff this is a seed extension + size_t upto, // count the number of Ns up to this offset + size_t& nsUpto); // output: the number of Ns up to 'upto' + + /** + * Given a read, an alignment orientation, a range of characters in a + * referece sequence, and a bit-encoded version of the reference, set up + * and execute the corresponding ungapped alignment problem. There can + * only be one solution. + * + * The caller has already narrowed down the relevant portion of the + * reference using, e.g., the location of a seed hit, or the range of + * possible fragment lengths if we're searching for the opposite mate in a + * pair. + */ + int ungappedAlign( + const BTDnaString& rd, // read sequence (could be RC) + const BTString& qu, // qual sequence (could be rev) + const Coord& coord, // coordinate aligned to + const BitPairReference& refs, // Reference strings + size_t reflen, // length of reference sequence + const Scoring& sc, // scoring scheme + bool ohang, // allow overhang? + TAlScore minsc, // minimum score + SwResult& res); // put alignment result here + + /** + * Align read 'rd' to reference using read & reference information given + * last time init() was called. Uses dynamic programming. + */ + bool align(RandomSource& rnd, TAlScore& best); + + /** + * Populate the given SwResult with information about the "next best" + * alignment if there is one. If there isn't one, false is returned. Note + * that false might be returned even though a call to done() would have + * returned false. + */ + bool nextAlignment( + SwResult& res, + TAlScore minsc, + RandomSource& rnd); + + /** + * Print out an alignment result as an ASCII DP table. + */ + void printResultStacked( + const SwResult& res, + std::ostream& os) + { + res.alres.printStacked(*rd_, os); + } + + /** + * Return true iff there are no more solution cells to backtace from. + * Note that this may return false in situations where there are actually + * no more solutions, but that hasn't been discovered yet. + */ + bool done() const { + assert(initedRead() && initedRef()); + return cural_ == btncand_.size(); + } + + /** + * Return true iff this SwAligner has been initialized with a read to align. + */ + inline bool initedRef() const { return initedRef_; } + + /** + * Return true iff this SwAligner has been initialized with a reference to + * align against. + */ + inline bool initedRead() const { return initedRead_; } + + /** + * Reset, signaling that we're done with this dynamic programming problem + * and won't be asking for any more alignments. + */ + inline void reset() { initedRef_ = initedRead_ = false; } + +#ifndef NDEBUG + /** + * Check that aligner is internally consistent. + */ + bool repOk() const { + assert_gt(dpRows(), 0); + // Check btncand_ + for(size_t i = 0; i < btncand_.size(); i++) { + assert(btncand_[i].repOk()); + assert_geq(btncand_[i].score, minsc_); + } + return true; + } +#endif + + /** + * Return the number of alignments given out so far by nextAlignment(). + */ + size_t numAlignmentsReported() const { return cural_; } + + /** + * Merge tallies in the counters related to filling the DP table. + */ + void merge( + SSEMetrics& sseU8ExtendMet, + SSEMetrics& sseU8MateMet, + SSEMetrics& sseI16ExtendMet, + SSEMetrics& sseI16MateMet, + uint64_t& nbtfiltst, + uint64_t& nbtfiltsc, + uint64_t& nbtfiltdo) + { + sseU8ExtendMet.merge(sseU8ExtendMet_); + sseU8MateMet.merge(sseU8MateMet_); + sseI16ExtendMet.merge(sseI16ExtendMet_); + sseI16MateMet.merge(sseI16MateMet_); + nbtfiltst += nbtfiltst_; + nbtfiltsc += nbtfiltsc_; + nbtfiltdo += nbtfiltdo_; + } + + /** + * Reset all the counters related to filling in the DP table to 0. + */ + void resetCounters() { + sseU8ExtendMet_.reset(); + sseU8MateMet_.reset(); + sseI16ExtendMet_.reset(); + sseI16MateMet_.reset(); + nbtfiltst_ = nbtfiltsc_ = nbtfiltdo_ = 0; + } + + /** + * Return the size of the DP problem. + */ + size_t size() const { + return dpRows() * (rff_ - rfi_); + } + +protected: + + /** + * Return the number of rows that will be in the dynamic programming table. + */ + inline size_t dpRows() const { + assert(initedRead_); + return rdf_ - rdi_; + } + + /** + * Align nucleotides from read 'rd' to the reference string 'rf' using + * vector instructions. Return the score of the best alignment found, or + * the minimum integer if an alignment could not be found. Flag is set to + * 0 if an alignment is found, -1 if no valid alignment is found, or -2 if + * the score saturated at any point during alignment. + */ + TAlScore alignNucleotidesEnd2EndSseU8( // unsigned 8-bit elements + int& flag, bool debug); + TAlScore alignNucleotidesLocalSseU8( // unsigned 8-bit elements + int& flag, bool debug); + TAlScore alignNucleotidesEnd2EndSseI16( // signed 16-bit elements + int& flag, bool debug); + TAlScore alignNucleotidesLocalSseI16( // signed 16-bit elements + int& flag, bool debug); + + /** + * Aligns by filling a dynamic programming matrix with the SSE-accelerated, + * banded DP approach of Farrar. As it goes, it determines which cells we + * might backtrace from and tallies the best (highest-scoring) N backtrace + * candidate cells per diagonal. Also returns the alignment score of the best + * alignment in the matrix. + * + * This routine does *not* maintain a matrix holding the entire matrix worth of + * scores, nor does it maintain any other dense O(mn) data structure, as this + * would quickly exhaust memory for queries longer than about 10,000 kb. + * Instead, in the fill stage it maintains two columns worth of scores at a + * time (current/previous, or right/left) - these take O(m) space. When + * finished with the current column, it determines which cells from the + * previous column, if any, are candidates we might backtrace from to find a + * full alignment. A candidate cell has a score that rises above the threshold + * and isn't improved upon by a match in the next column. The best N + * candidates per diagonal are stored in a O(m + n) data structure. + */ + TAlScore alignGatherEE8( // unsigned 8-bit elements + int& flag, bool debug); + TAlScore alignGatherLoc8( // unsigned 8-bit elements + int& flag, bool debug); + TAlScore alignGatherEE16( // signed 16-bit elements + int& flag, bool debug); + TAlScore alignGatherLoc16( // signed 16-bit elements + int& flag, bool debug); + + /** + * Build query profile look up tables for the read. The query profile look + * up table is organized as a 1D array indexed by [i][j] where i is the + * reference character in the current DP column (0=A, 1=C, etc), and j is + * the segment of the query we're currently working on. + */ + void buildQueryProfileEnd2EndSseU8(bool fw); + void buildQueryProfileLocalSseU8(bool fw); + + /** + * Build query profile look up tables for the read. The query profile look + * up table is organized as a 1D array indexed by [i][j] where i is the + * reference character in the current DP column (0=A, 1=C, etc), and j is + * the segment of the query we're currently working on. + */ + void buildQueryProfileEnd2EndSseI16(bool fw); + void buildQueryProfileLocalSseI16(bool fw); + + bool gatherCellsNucleotidesLocalSseU8(TAlScore best); + bool gatherCellsNucleotidesEnd2EndSseU8(TAlScore best); + + bool gatherCellsNucleotidesLocalSseI16(TAlScore best); + bool gatherCellsNucleotidesEnd2EndSseI16(TAlScore best); + + bool backtraceNucleotidesLocalSseU8( + TAlScore escore, // in: expected score + SwResult& res, // out: store results (edits and scores) here + size_t& off, // out: store diagonal projection of origin + size_t& nbts, // out: # backtracks + size_t row, // start in this rectangle row + size_t col, // start in this rectangle column + RandomSource& rand); // random gen, to choose among equal paths + + bool backtraceNucleotidesLocalSseI16( + TAlScore escore, // in: expected score + SwResult& res, // out: store results (edits and scores) here + size_t& off, // out: store diagonal projection of origin + size_t& nbts, // out: # backtracks + size_t row, // start in this rectangle row + size_t col, // start in this rectangle column + RandomSource& rand); // random gen, to choose among equal paths + + bool backtraceNucleotidesEnd2EndSseU8( + TAlScore escore, // in: expected score + SwResult& res, // out: store results (edits and scores) here + size_t& off, // out: store diagonal projection of origin + size_t& nbts, // out: # backtracks + size_t row, // start in this rectangle row + size_t col, // start in this rectangle column + RandomSource& rand); // random gen, to choose among equal paths + + bool backtraceNucleotidesEnd2EndSseI16( + TAlScore escore, // in: expected score + SwResult& res, // out: store results (edits and scores) here + size_t& off, // out: store diagonal projection of origin + size_t& nbts, // out: # backtracks + size_t row, // start in this rectangle row + size_t col, // start in this rectangle column + RandomSource& rand); // random gen, to choose among equal paths + + bool backtrace( + TAlScore escore, // in: expected score + bool fill, // in: use mini-fill? + bool usecp, // in: use checkpoints? + SwResult& res, // out: store results (edits and scores) here + size_t& off, // out: store diagonal projection of origin + size_t row, // start in this rectangle row + size_t col, // start in this rectangle column + size_t maxiter,// max # extensions to try + size_t& niter, // # extensions tried + RandomSource& rnd) // random gen, to choose among equal paths + { + bter_.initBt( + escore, // in: alignment score + row, // in: start in this row + col, // in: start in this column + fill, // in: use mini-fill? + usecp, // in: use checkpoints? + cperTri_, // in: triangle-shaped mini-fills? + rnd); // in: random gen, to choose among equal paths + assert(bter_.inited()); + size_t nrej = 0; + if(bter_.emptySolution()) { + return false; + } else { + return bter_.nextAlignment(maxiter, res, off, nrej, niter, rnd); + } + } + + const BTDnaString *rd_; // read sequence + const BTString *qu_; // read qualities + const BTDnaString *rdfw_; // read sequence for fw read + const BTDnaString *rdrc_; // read sequence for rc read + const BTString *qufw_; // read qualities for fw read + const BTString *qurc_; // read qualities for rc read + TReadOff rdi_; // offset of first read char to align + TReadOff rdf_; // offset of last read char to align + bool fw_; // true iff read sequence is original fw read + TRefId refidx_; // id of reference aligned against + TRefOff reflen_; // length of entire reference sequence + const DPRect* rect_; // DP rectangle + char *rf_; // reference sequence + TRefOff rfi_; // offset of first ref char to align to + TRefOff rff_; // offset of last ref char to align to (excl) + size_t rdgap_; // max # gaps in read + size_t rfgap_; // max # gaps in reference + bool enable8_;// enable 8-bit sse + bool extend_; // true iff this is a seed-extend problem + const Scoring *sc_; // penalties for edit types + TAlScore minsc_; // penalty ceiling for valid alignments + int nceil_; // max # Ns allowed in ref portion of aln + + bool sse8succ_; // whether 8-bit worked + bool sse16succ_; // whether 16-bit worked + SSEData sseU8fw_; // buf for fw query, 8-bit score + SSEData sseU8rc_; // buf for rc query, 8-bit score + SSEData sseI16fw_; // buf for fw query, 16-bit score + SSEData sseI16rc_; // buf for rc query, 16-bit score + bool sseU8fwBuilt_; // built fw query profile, 8-bit score + bool sseU8rcBuilt_; // built rc query profile, 8-bit score + bool sseI16fwBuilt_; // built fw query profile, 16-bit score + bool sseI16rcBuilt_; // built rc query profile, 16-bit score + + SSEMetrics sseU8ExtendMet_; + SSEMetrics sseU8MateMet_; + SSEMetrics sseI16ExtendMet_; + SSEMetrics sseI16MateMet_; + + int state_; // state + bool initedRead_; // true iff initialized with initRead + bool readSse16_; // true -> sse16 from now on for read + bool initedRef_; // true iff initialized with initRef + EList rfwbuf_; // buffer for wordized ref stretches + + EList btnstack_; // backtrace stack for nucleotides + EList btcells_; // cells involved in current backtrace + + NBest btdiag_; // per-diagonal backtrace candidates + EList btncand_; // cells we might backtrace from + EList btncanddone_; // candidates that we investigated + size_t btncanddoneSucc_; // # investigated and succeeded + size_t btncanddoneFail_; // # investigated and failed + + BtBranchTracer bter_; // backtracer + + Checkpointer cper_; // structure for saving checkpoint cells + size_t cperMinlen_; // minimum length for using checkpointer + size_t cperPerPow2_; // checkpoint every 1 << perpow2 diags (& next) + bool cperEf_; // store E and F in addition to H? + bool cperTri_; // checkpoint for triangular mini-fills? + + size_t colstop_; // bailed on DP loop after this many cols + size_t lastsolcol_; // last DP col with valid cell + size_t cural_; // index of next alignment to be given + + uint64_t nbtfiltst_; // # candidates filtered b/c starting cell was seen + uint64_t nbtfiltsc_; // # candidates filtered b/c score uninteresting + uint64_t nbtfiltdo_; // # candidates filtered b/c dominated by other cell + + ASSERT_ONLY(SStringExpandable tmp_destU32_); + ASSERT_ONLY(BTDnaString tmp_editstr_, tmp_refstr_); + ASSERT_ONLY(EList cand_tmp_); +}; + +#endif /*ALIGNER_SW_H_*/ diff --git a/aligner_sw_common.h b/aligner_sw_common.h new file mode 100644 index 0000000..639a3c6 --- /dev/null +++ b/aligner_sw_common.h @@ -0,0 +1,305 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef ALIGNER_SW_COMMON_H_ +#define ALIGNER_SW_COMMON_H_ + +#include "aligner_result.h" + +/** + * Encapsulates the result of a dynamic programming alignment, including + * colorspace alignments. In our case, the result is a combination of: + * + * 1. All the nucleotide edits + * 2. All the "edits" where an ambiguous reference char is resolved to + * an unambiguous char. + * 3. All the color edits (if applicable) + * 4. All the color miscalls (if applicable). This is a subset of 3. + * 5. The score of the best alginment + * 6. The score of the second-best alignment + * + * Having scores for the best and second-best alignments gives us an + * idea of where gaps may make reassembly beneficial. + */ +struct SwResult { + + SwResult() : + alres(), + sws(0), + swcups(0), + swrows(0), + swskiprows(0), + swskip(0), + swsucc(0), + swfail(0), + swbts(0) + { } + + /** + * Clear all contents. + */ + void reset() { + sws = swcups = swrows = swskiprows = swskip = swsucc = + swfail = swbts = 0; + alres.reset(); + } + + /** + * Reverse all edit lists. + */ + void reverse() { + alres.reverseEdits(); + } + + /** + * Return true iff no result has been installed. + */ + bool empty() const { + return alres.empty(); + } + +#ifndef NDEBUG + /** + * Check that result is internally consistent. + */ + bool repOk() const { + assert(alres.repOk()); + return true; + } + + /** + * Check that result is internally consistent w/r/t read. + */ + bool repOk(const Read& rd) const { + assert(alres.repOk(rd)); + return true; + } +#endif + + AlnRes alres; + uint64_t sws; // # DP problems solved + uint64_t swcups; // # DP cell updates + uint64_t swrows; // # DP row updates + uint64_t swskiprows; // # skipped DP row updates (b/c no valid alignments can go thru row) + uint64_t swskip; // # DP problems skipped by sse filter + uint64_t swsucc; // # DP problems resulting in alignment + uint64_t swfail; // # DP problems not resulting in alignment + uint64_t swbts; // # DP backtrace steps + + int nup; // upstream decoded nucleotide; for colorspace reads + int ndn; // downstream decoded nucleotide; for colorspace reads +}; + +/** + * Encapsulates counters that measure how much work has been done by + * the dynamic programming driver and aligner. + */ +struct SwMetrics { + + SwMetrics() : mutex_m() { + reset(); + } + + void reset() { + sws = swcups = swrows = swskiprows = swskip = swsucc = swfail = swbts = + sws10 = sws5 = sws3 = + rshit = ungapsucc = ungapfail = ungapnodec = 0; + exatts = exranges = exrows = exsucc = exooms = 0; + mm1atts = mm1ranges = mm1rows = mm1succ = mm1ooms = 0; + sdatts = sdranges = sdrows = sdsucc = sdooms = 0; + } + + void init( + uint64_t sws_, + uint64_t sws10_, + uint64_t sws5_, + uint64_t sws3_, + uint64_t swcups_, + uint64_t swrows_, + uint64_t swskiprows_, + uint64_t swskip_, + uint64_t swsucc_, + uint64_t swfail_, + uint64_t swbts_, + uint64_t rshit_, + uint64_t ungapsucc_, + uint64_t ungapfail_, + uint64_t ungapnodec_, + uint64_t exatts_, + uint64_t exranges_, + uint64_t exrows_, + uint64_t exsucc_, + uint64_t exooms_, + uint64_t mm1atts_, + uint64_t mm1ranges_, + uint64_t mm1rows_, + uint64_t mm1succ_, + uint64_t mm1ooms_, + uint64_t sdatts_, + uint64_t sdranges_, + uint64_t sdrows_, + uint64_t sdsucc_, + uint64_t sdooms_) + { + sws = sws_; + sws10 = sws10_; + sws5 = sws5_; + sws3 = sws3_; + swcups = swcups_; + swrows = swrows_; + swskiprows = swskiprows_; + swskip = swskip_; + swsucc = swsucc_; + swfail = swfail_; + swbts = swbts_; + ungapsucc = ungapsucc_; + ungapfail = ungapfail_; + ungapnodec = ungapnodec_; + + // Exact end-to-end attempts + exatts = exatts_; + exranges = exranges_; + exrows = exrows_; + exsucc = exsucc_; + exooms = exooms_; + + // 1-mismatch end-to-end attempts + mm1atts = mm1atts_; + mm1ranges = mm1ranges_; + mm1rows = mm1rows_; + mm1succ = mm1succ_; + mm1ooms = mm1ooms_; + + // Seed attempts + sdatts = sdatts_; + sdranges = sdranges_; + sdrows = sdrows_; + sdsucc = sdsucc_; + sdooms = sdooms_; + } + + /** + * Merge (add) the counters in the given SwResult object into this + * SwMetrics object. + */ + void update(const SwResult& r) { + sws += r.sws; + swcups += r.swcups; + swrows += r.swrows; + swskiprows += r.swskiprows; + swskip += r.swskip; + swsucc += r.swsucc; + swfail += r.swfail; + swbts += r.swbts; + } + + /** + * Merge (add) the counters in the given SwMetrics object into this + * object. This is the only safe way to update a SwMetrics shared + * by multiple threads. + */ + void merge(const SwMetrics& r, bool getLock = false) { + ThreadSafe ts(&mutex_m, getLock); + sws += r.sws; + sws10 += r.sws10; + sws5 += r.sws5; + sws3 += r.sws3; + swcups += r.swcups; + swrows += r.swrows; + swskiprows += r.swskiprows; + swskip += r.swskip; + swsucc += r.swsucc; + swfail += r.swfail; + swbts += r.swbts; + rshit += r.rshit; + ungapsucc += r.ungapsucc; + ungapfail += r.ungapfail; + ungapnodec += r.ungapnodec; + exatts += r.exatts; + exranges += r.exranges; + exrows += r.exrows; + exsucc += r.exsucc; + exooms += r.exooms; + mm1atts += r.mm1atts; + mm1ranges += r.mm1ranges; + mm1rows += r.mm1rows; + mm1succ += r.mm1succ; + mm1ooms += r.mm1ooms; + sdatts += r.sdatts; + sdranges += r.sdranges; + sdrows += r.sdrows; + sdsucc += r.sdsucc; + sdooms += r.sdooms; + } + + void tallyGappedDp(size_t readGaps, size_t refGaps) { + size_t mx = max(readGaps, refGaps); + if(mx < 10) sws10++; + if(mx < 5) sws5++; + if(mx < 3) sws3++; + } + + uint64_t sws; // # DP problems solved + uint64_t sws10; // # DP problems solved where max gaps < 10 + uint64_t sws5; // # DP problems solved where max gaps < 5 + uint64_t sws3; // # DP problems solved where max gaps < 3 + uint64_t swcups; // # DP cell updates + uint64_t swrows; // # DP row updates + uint64_t swskiprows; // # skipped DP rows (b/c no valid alns go thru row) + uint64_t swskip; // # DP problems skipped by sse filter + uint64_t swsucc; // # DP problems resulting in alignment + uint64_t swfail; // # DP problems not resulting in alignment + uint64_t swbts; // # DP backtrace steps + uint64_t rshit; // # DP problems avoided b/c seed hit was redundant + uint64_t ungapsucc; // # DP problems avoided b/c seed hit was redundant + uint64_t ungapfail; // # DP problems avoided b/c seed hit was redundant + uint64_t ungapnodec; // # DP problems avoided b/c seed hit was redundant + + uint64_t exatts; // total # attempts at exact-hit end-to-end aln + uint64_t exranges; // total # ranges returned by exact-hit queries + uint64_t exrows; // total # rows returned by exact-hit queries + uint64_t exsucc; // exact-hit yielded non-empty result + uint64_t exooms; // exact-hit offset memory exhausted + + uint64_t mm1atts; // total # attempts at 1mm end-to-end aln + uint64_t mm1ranges; // total # ranges returned by 1mm-hit queries + uint64_t mm1rows; // total # rows returned by 1mm-hit queries + uint64_t mm1succ; // 1mm-hit yielded non-empty result + uint64_t mm1ooms; // 1mm-hit offset memory exhausted + + uint64_t sdatts; // total # attempts to find seed alignments + uint64_t sdranges; // total # seed-alignment ranges found + uint64_t sdrows; // total # seed-alignment rows found + uint64_t sdsucc; // # times seed alignment yielded >= 1 hit + uint64_t sdooms; // # times an OOM occurred during seed alignment + + MUTEX_T mutex_m; +}; + +// The various ways that one might backtrack from a later cell (either oall, +// rdgap or rfgap) to an earlier cell +enum { + SW_BT_OALL_DIAG, // from oall cell to oall cell + SW_BT_OALL_REF_OPEN, // from oall cell to oall cell + SW_BT_OALL_READ_OPEN, // from oall cell to oall cell + SW_BT_RDGAP_EXTEND, // from rdgap cell to rdgap cell + SW_BT_RFGAP_EXTEND // from rfgap cell to rfgap cell +}; + +#endif /*def ALIGNER_SW_COMMON_H_*/ diff --git a/aligner_sw_driver.cpp b/aligner_sw_driver.cpp new file mode 100644 index 0000000..d0be5f2 --- /dev/null +++ b/aligner_sw_driver.cpp @@ -0,0 +1,20 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + + diff --git a/aligner_sw_driver.h b/aligner_sw_driver.h new file mode 100644 index 0000000..075f614 --- /dev/null +++ b/aligner_sw_driver.h @@ -0,0 +1,2938 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +/* + * aligner_sw_driver.h + * + * REDUNDANT SEED HITS + * + * We say that two seed hits are redundant if they trigger identical + * seed-extend dynamic programming problems. Put another way, they both lie on + * the same diagonal of the overall read/reference dynamic programming matrix. + * Detecting redundant seed hits is simple when the seed hits are ungapped. We + * do this after offset resolution but before the offset is converted to genome + * coordinates (see uses of the seenDiags1_/seenDiags2_ fields for examples). + * + * REDUNDANT ALIGNMENTS + * + * In an unpaired context, we say that two alignments are redundant if they + * share any cells in the global DP table. Roughly speaking, this is like + * saying that two alignments are redundant if any read character aligns to the + * same reference character (same reference sequence, same strand, same offset) + * in both alignments. + * + * In a paired-end context, we say that two paired-end alignments are redundant + * if the mate #1s are redundant and the mate #2s are redundant. + * + * How do we enforce this? In the unpaired context, this is relatively simple: + * the cells from each alignment are checked against a set containing all cells + * from all previous alignments. Given a new alignment, for each cell in the + * new alignment we check whether it is in the set. If there is any overlap, + * the new alignment is rejected as redundant. Otherwise, the new alignment is + * accepted and its cells are added to the set. + * + * Enforcement in a paired context is a little trickier. Consider the + * following approaches: + * + * 1. Skip anchors that are redundant with any previous anchor or opposite + * alignment. This is sufficient to ensure no two concordant alignments + * found are redundant. + * + * 2. Same as scheme 1, but with a "transitive closure" scheme for finding all + * concordant pairs in the vicinity of an anchor. Consider the AB/AC + * scenario from the previous paragraph. If B is the anchor alignment, we + * will find AB but not AC. But under this scheme, once we find AB we then + * let B be a new anchor and immediately look for its opposites. Likewise, + * if we find any opposite, we make them anchors and continue searching. We + * don't stop searching until every opposite is used as an anchor. + * + * 3. Skip anchors that are redundant with any previous anchor alignment (but + * allow anchors that are redundant with previous opposite alignments). + * This isn't sufficient to avoid redundant concordant alignments. To avoid + * redundant concordants, we need an additional procedure that checks each + * new concordant alignment one-by-one against a list of previous concordant + * alignments to see if it is redundant. + * + * We take approach 1. + */ + +#ifndef ALIGNER_SW_DRIVER_H_ +#define ALIGNER_SW_DRIVER_H_ + +#include +// -- BTL remove -- +#include +#include +// -- -- +#include +#include "ds.h" +#include "aligner_seed.h" +#include "aligner_sw.h" +#include "aligner_cache.h" +#include "reference.h" +#include "group_walk.h" +#include "gfm.h" +#include "mem_ids.h" +#include "aln_sink.h" +#include "pe.h" +#include "ival_list.h" +#include "simple_func.h" +#include "random_util.h" +#include "dp_framer.h" + +using namespace std; + +template +struct SeedPos { + + SeedPos() : fw(false), offidx(0), rdoff(0), seedlen(0) { } + + SeedPos( + bool fw_, + index_t offidx_, + index_t rdoff_, + index_t seedlen_) + { + init(fw_, offidx_, rdoff_, seedlen_); + } + + void init( + bool fw_, + index_t offidx_, + index_t rdoff_, + index_t seedlen_) + { + fw = fw_; + offidx = offidx_; + rdoff = rdoff_; + seedlen = seedlen_; + } + + bool operator<(const SeedPos& o) const { + if(offidx < o.offidx) return true; + if(offidx > o.offidx) return false; + if(rdoff < o.rdoff) return true; + if(rdoff > o.rdoff) return false; + if(seedlen < o.seedlen) return true; + if(seedlen > o.seedlen) return false; + if(fw && !o.fw) return true; + if(!fw && o.fw) return false; + return false; + } + + bool operator>(const SeedPos& o) const { + if(offidx < o.offidx) return false; + if(offidx > o.offidx) return true; + if(rdoff < o.rdoff) return false; + if(rdoff > o.rdoff) return true; + if(seedlen < o.seedlen) return false; + if(seedlen > o.seedlen) return true; + if(fw && !o.fw) return false; + if(!fw && o.fw) return true; + return false; + } + + bool operator==(const SeedPos& o) const { + return fw == o.fw && offidx == o.offidx && + rdoff == o.rdoff && seedlen == o.seedlen; + } + + bool fw; + index_t offidx; + index_t rdoff; + index_t seedlen; +}; + +/** + * An SATuple along with the associated seed position. + */ +template +struct SATupleAndPos { + + SATuple sat; // result for this seed hit + SeedPos pos; // seed position that yielded the range this was taken from + index_t origSz; // size of range this was taken from + index_t nlex; // # position we can extend seed hit to left w/o edit + index_t nrex; // # position we can extend seed hit to right w/o edit + + bool operator<(const SATupleAndPos& o) const { + if(sat < o.sat) return true; + if(sat > o.sat) return false; + return pos < o.pos; + } + + bool operator==(const SATupleAndPos& o) const { + return sat == o.sat && pos == o.pos; + } +}; + +/** + * Encapsulates the weighted random sampling scheme we want to use to pick + * which seed hit range to sample a row from. + */ +template +class RowSampler { + +public: + + RowSampler(int cat = 0) : elim_(cat), masses_(cat) { + mass_ = 0.0f; + } + + /** + * Initialze sampler with respect to a range of elements in a list of + * SATupleAndPos's. + */ + void init( + const EList, 16>& salist, + index_t sai, + index_t saf, + bool lensq, // whether to square the numerator, which = extended length + bool szsq) // whether to square denominator, which = + { + assert_gt(saf, sai); + elim_.resize(saf - sai); + elim_.fill(false); + // Initialize mass + mass_ = 0.0f; + masses_.resize(saf - sai); + for(index_t i = sai; i < saf; i++) { + index_t len = salist[i].nlex + salist[i].nrex + 1; // + salist[i].sat.key.len; + double num = (double)len; + if(lensq) { + num *= num; + } + double denom = (double)salist[i].sat.size(); + if(szsq) { + denom *= denom; + } + masses_[i - sai] = num / denom; + mass_ += masses_[i - sai]; + } + } + + /** + * Caller is indicating that the bin at index i is exhausted and we should + * exclude it from our sampling from now on. + */ + void finishedRange(index_t i) { + assert_lt(i, masses_.size()); + elim_[i] = true; + mass_ -= masses_[i]; + } + + /** + * Sample randomly from the mass. + */ + size_t next(RandomSource& rnd) { + // Throw the dart + double rd = rnd.nextFloat() * mass_; + double mass_sofar = 0.0f; + size_t sz = masses_.size(); + size_t last_unelim = std::numeric_limits::max(); + for(size_t i = 0; i < sz; i++) { + if(!elim_[i]) { + last_unelim = i; + mass_sofar += masses_[i]; + if(rd < mass_sofar) { + // This is the one we hit + return i; + } + } + } + assert_neq(std::numeric_limits::max(), last_unelim); + return last_unelim; + } + +protected: + double mass_; // total probability mass to throw darts at + EList elim_; // whether the range is eliminated + EList masses_; // mass of each range +}; + +/** + * Return values from extendSeeds and extendSeedsPaired. + */ +enum { + // All end-to-end and seed hits were examined + // The policy does not need us to look any further + EXTEND_EXHAUSTED_CANDIDATES = 1, + EXTEND_POLICY_FULFILLED, + // We stopped because we reached a point where the only remaining + // alignments of interest have perfect scores, but we already investigated + // perfect alignments + EXTEND_PERFECT_SCORE, + // We stopped because we ran up against a limit on how much work we should + // do for one set of seed ranges, e.g. the limit on number of consecutive + // unproductive DP extensions + EXTEND_EXCEEDED_SOFT_LIMIT, + // We stopped because we ran up against a limit on how much work we should + // do for overall before giving up on a mate + EXTEND_EXCEEDED_HARD_LIMIT +}; + +/** + * Data structure encapsulating a range that's been extended out in two + * directions. + */ +struct ExtendRange { + + void init(size_t off_, size_t len_, size_t sz_) { + off = off_; len = len_; sz = sz_; + } + + size_t off; // offset of extended region + size_t len; // length between extremes of extended region + size_t sz; // # of elements in SA range +}; + +template +class SwDriver { + + typedef PList TSAList; + +public: + + SwDriver(size_t bytes) : + satups_(DP_CAT), + gws_(DP_CAT), + seenDiags1_(DP_CAT), + seenDiags2_(DP_CAT), + redAnchor_(DP_CAT), + redMate1_(DP_CAT), + redMate2_(DP_CAT), + pool_(bytes, CACHE_PAGE_SZ, DP_CAT), + salistEe_(DP_CAT), + gwstate_(GW_CAT) { } + + /** + * Given a collection of SeedHits for a single read, extend seed alignments + * into full alignments. Where possible, try to avoid redundant offset + * lookups and dynamic programming problems. Optionally report alignments + * to a AlnSinkWrap object as they are discovered. + * + * If 'reportImmediately' is true, returns true iff a call to + * mhs->report() returned true (indicating that the reporting + * policy is satisfied and we can stop). Otherwise, returns false. + */ + int extendSeeds( + Read& rd, // read to align + bool mate1, // true iff rd is mate #1 + SeedResults& sh, // seed hits to extend into full alignments + const GFM& gfmFw, // BWT + const GFM* gfmBw, // BWT' + const BitPairReference& ref, // Reference strings + SwAligner& swa, // dynamic programming aligner + const Scoring& sc, // scoring scheme + int seedmms, // # mismatches allowed in seed + int seedlen, // length of seed + int seedival, // interval between seeds + TAlScore& minsc, // minimum score for anchor + int nceil, // maximum # Ns permitted in ref portion + size_t maxhalf, // maximum width on one side of DP table + bool doUngapped, // do ungapped alignment + size_t maxIters, // stop after this many seed-extend loop iters + size_t maxUg, // max # ungapped extends + size_t maxDp, // max # DPs + size_t maxUgStreak, // stop after streak of this many ungap fails + size_t maxDpStreak, // stop after streak of this many dp fails + bool doExtend, // do seed extension + bool enable8, // use 8-bit SSE where possible + size_t cminlen, // use checkpointer if read longer than this + size_t cpow2, // interval between diagonals to checkpoint + bool doTri, // triangular mini-fills + int tighten, // -M score tightening mode + AlignmentCacheIface& ca, // alignment cache for seed hits + RandomSource& rnd, // pseudo-random source + WalkMetrics& wlm, // group walk left metrics + SwMetrics& swmSeed, // DP metrics for seed-extend + PerReadMetrics& prm, // per-read metrics + AlnSinkWrap* mhs, // HitSink for multiseed-style aligner + bool reportImmediately, // whether to report hits immediately to mhs + bool& exhaustive); + + /** + * Given a collection of SeedHits for a read pair, extend seed + * alignments into full alignments and then look for the opposite + * mate using dynamic programming. Where possible, try to avoid + * redundant offset lookups. Optionally report alignments to a + * AlnSinkWrap object as they are discovered. + * + * If 'reportImmediately' is true, returns true iff a call to + * mhs->report() returned true (indicating that the reporting + * policy is satisfied and we can stop). Otherwise, returns false. + */ + int extendSeedsPaired( + Read& rd, // mate to align as anchor + Read& ord, // mate to align as opposite + bool anchor1, // true iff anchor mate is mate1 + bool oppFilt, // true iff opposite mate was filtered out + SeedResults& sh, // seed hits for anchor + const GFM& gfmFw, // BWT + const GFM* gfmBw, // BWT' + const BitPairReference& ref, // Reference strings + SwAligner& swa, // dyn programming aligner for anchor + SwAligner& swao, // dyn programming aligner for opposite + const Scoring& sc, // scoring scheme + const PairedEndPolicy& pepol,// paired-end policy + int seedmms, // # mismatches allowed in seed + int seedlen, // length of seed + int seedival, // interval between seeds + TAlScore& minsc, // minimum score for anchor + TAlScore& ominsc, // minimum score for opposite + int nceil, // max # Ns permitted in ref for anchor + int onceil, // max # Ns permitted in ref for opposite + bool nofw, // don't align forward read + bool norc, // don't align revcomp read + size_t maxhalf, // maximum width on one side of DP table + bool doUngapped, // do ungapped alignment + size_t maxIters, // stop after this many seed-extend loop iters + size_t maxUg, // max # ungapped extends + size_t maxDp, // max # DPs + size_t maxEeStreak, // stop after streak of this many end-to-end fails + size_t maxUgStreak, // stop after streak of this many ungap fails + size_t maxDpStreak, // stop after streak of this many dp fails + size_t maxMateStreak, // stop seed range after N mate-find fails + bool doExtend, // do seed extension + bool enable8, // use 8-bit SSE where possible + size_t cminlen, // use checkpointer if read longer than this + size_t cpow2, // interval between diagonals to checkpoint + bool doTri, // triangular mini-fills + int tighten, // -M score tightening mode + AlignmentCacheIface& cs, // alignment cache for seed hits + RandomSource& rnd, // pseudo-random source + WalkMetrics& wlm, // group walk left metrics + SwMetrics& swmSeed, // DP metrics for seed-extend + SwMetrics& swmMate, // DP metrics for mate finidng + PerReadMetrics& prm, // per-read metrics for anchor + AlnSinkWrap* msink, // AlnSink wrapper for multiseed-style aligner + bool swMateImmediately, // whether to look for mate immediately + bool reportImmediately, // whether to report hits immediately to msink + bool discord, // look for discordant alignments? + bool mixed, // look for unpaired as well as paired alns? + bool& exhaustive); + + /** + * Prepare for a new read. + */ + void nextRead(bool paired, size_t mate1len, size_t mate2len) { + redAnchor_.reset(); + seenDiags1_.reset(); + seenDiags2_.reset(); + seedExRangeFw_[0].clear(); // mate 1 fw + seedExRangeFw_[1].clear(); // mate 2 fw + seedExRangeRc_[0].clear(); // mate 1 rc + seedExRangeRc_[1].clear(); // mate 2 rc + size_t maxlen = mate1len; + if(paired) { + redMate1_.reset(); + redMate1_.init(mate1len); + redMate2_.reset(); + redMate2_.init(mate2len); + if(mate2len > maxlen) { + maxlen = mate2len; + } + } + redAnchor_.init(maxlen); + } + +protected: + + bool eeSaTups( + const Read& rd, // read + SeedResults& sh, // seed hits to extend into full alignments + const GFM& gfm, // BWT + const BitPairReference& ref, // Reference strings + RandomSource& rnd, // pseudo-random generator + WalkMetrics& wlm, // group walk left metrics + SwMetrics& swmSeed, // metrics for seed extensions + index_t& nelt_out, // out: # elements total + index_t maxelts, // max # elts to report + bool all); // report all hits? + + void extend( + const Read& rd, // read + const GFM& gfmFw, // Forward Bowtie index + const GFM* gfmBw, // Backward Bowtie index + index_t topf, // top in fw index + index_t botf, // bot in fw index + index_t topb, // top in bw index + index_t botb, // bot in bw index + bool fw, // seed orientation + index_t off, // seed offset from 5' end + index_t len, // seed length + PerReadMetrics& prm, // per-read metrics + index_t& nlex, // # positions we can extend to left w/o edit + index_t& nrex); // # positions we can extend to right w/o edit + + void prioritizeSATups( + const Read& rd, // read + SeedResults& sh, // seed hits to extend into full alignments + const GFM& gfmFw, // BWT + const GFM* gfmBw, // BWT' + const BitPairReference& ref, // Reference strings + int seedmms, // # seed mismatches allowed + index_t maxelt, // max elts we'll consider + bool doExtend, // extend out seeds + bool lensq, // square extended length + bool szsq, // square SA range size + index_t nsm, // if range as <= nsm elts, it's "small" + AlignmentCacheIface& ca, // alignment cache for seed hits + RandomSource& rnd, // pseudo-random generator + WalkMetrics& wlm, // group walk left metrics + PerReadMetrics& prm, // per-read metrics + index_t& nelt_out, // out: # elements total + bool all); // report all hits? + + Random1toN rand_; // random number generators + EList rands_; // random number generators + EList rands2_; // random number generators + EList, 16> eehits_; // holds end-to-end hits + EList, 16> satpos_; // holds SATuple, SeedPos pairs + EList, 16> satpos2_; // holds SATuple, SeedPos pairs + EList, 16> satups_; // holds SATuples to explore elements from + EList > gws_; // list of GroupWalks; no particular order + EList mateStreaks_; // mate-find fail streaks + RowSampler rowsamp_; // row sampler + + // Ranges that we've extended through when extending seed hits + EList seedExRangeFw_[2]; + EList seedExRangeRc_[2]; + + // Data structures encapsulating the diagonals that have already been used + // to seed alignment for mate 1 and mate 2. + EIvalMergeListBinned seenDiags1_; + EIvalMergeListBinned seenDiags2_; + + // For weeding out redundant alignments + RedundantAlns redAnchor_; // database of cells used for anchor alignments + RedundantAlns redMate1_; // database of cells used for mate 1 alignments + RedundantAlns redMate2_; // database of cells used for mate 2 alignments + + // For holding results for anchor (res_) and opposite (ores_) mates + SwResult resGap_; // temp holder for alignment result + SwResult oresGap_; // temp holder for alignment result, opp mate + SwResult resUngap_; // temp holder for ungapped alignment result + SwResult oresUngap_; // temp holder for ungap. aln. opp mate + SwResult resEe_; // temp holder for ungapped alignment result + SwResult oresEe_; // temp holder for ungap. aln. opp mate + + Pool pool_; // memory pages for salistExact_ + TSAList salistEe_; // PList for offsets for end-to-end hits + GroupWalkState gwstate_; // some per-thread state shared by all GroupWalks + + // For AlnRes::matchesRef: + ASSERT_ONLY(SStringExpandable raw_refbuf_); + ASSERT_ONLY(SStringExpandable raw_destU32_); + ASSERT_ONLY(EList raw_matches_); + ASSERT_ONLY(BTDnaString tmp_rf_); + ASSERT_ONLY(BTDnaString tmp_rdseq_); + ASSERT_ONLY(BTString tmp_qseq_); + ASSERT_ONLY(EList tmp_reflens_); + ASSERT_ONLY(EList tmp_refoffs_); +}; + +#define TIMER_START() \ +struct timeval tv_i, tv_f; \ +struct timezone tz_i, tz_f; \ +size_t total_usecs; \ +gettimeofday(&tv_i, &tz_i) + +#define IF_TIMER_END() \ +gettimeofday(&tv_f, &tz_f); \ +total_usecs = \ +(tv_f.tv_sec - tv_i.tv_sec) * 1000000 + (tv_f.tv_usec - tv_i.tv_usec); \ +if(total_usecs > 300000) + +/* + * aligner_sw_driver.cpp + * + * Routines that drive the alignment process given a collection of seed hits. + * This is generally done in a few stages: extendSeeds visits the set of + * seed-hit BW elements in some order; for each element visited it resolves its + * reference offset; once the reference offset is known, bounds for a dynamic + * programming subproblem are established; if these bounds are distinct from + * the bounds we've already tried, we solve the dynamic programming subproblem + * and report the hit; if the AlnSinkWrap indicates that we can stop, we + * return, otherwise we continue on to the next BW element. + */ + + +/** + * Given end-to-end alignment results stored in the SeedResults structure, set + * up all of our state for resolving and keeping track of reference offsets for + * hits. Order the list of ranges to examine such that all exact end-to-end + * alignments are examined before any 1mm end-to-end alignments. + * + * Note: there might be a lot of hits and a lot of wide ranges to look for + * here. We use 'maxelt'. + */ +template +bool SwDriver::eeSaTups( + const Read& rd, // read + SeedResults& sh, // seed hits to extend into full alignments + const GFM& gfm, // BWT + const BitPairReference& ref, // Reference strings + RandomSource& rnd, // pseudo-random generator + WalkMetrics& wlm, // group walk left metrics + SwMetrics& swmSeed, // metrics for seed extensions + index_t& nelt_out, // out: # elements total + index_t maxelt, // max elts we'll consider + bool all) // report all hits? +{ + assert_eq(0, nelt_out); + gws_.clear(); + rands_.clear(); + satpos_.clear(); + eehits_.clear(); + // First, count up the total number of satpos_, rands_, eehits_, and gws_ + // we're going to tuse + index_t nobj = 0; + if(!sh.exactFwEEHit().empty()) nobj++; + if(!sh.exactRcEEHit().empty()) nobj++; + nobj += sh.mm1EEHits().size(); + nobj = min(nobj, maxelt); + gws_.ensure(nobj); + rands_.ensure(nobj); + satpos_.ensure(nobj); + eehits_.ensure(nobj); + index_t tot = sh.exactFwEEHit().size() + sh.exactRcEEHit().size(); + bool succ = false; + bool firstEe = true; + bool done = false; + if(tot > 0) { + bool fwFirst = true; + // Pick fw / rc to go first in a weighted random fashion +#ifdef BOWTIE_64BIT_INDEX + index_t rn64 = rnd.nextU64(); + index_t rn = rn64 % (uint64_t)tot; +#else + index_t rn32 = rnd.nextU32(); + index_t rn = rn32 % (uint32_t)tot; +#endif + if(rn >= sh.exactFwEEHit().size()) { + fwFirst = false; + } + for(int fwi = 0; fwi < 2 && !done; fwi++) { + bool fw = ((fwi == 0) == fwFirst); + EEHit hit = fw ? sh.exactFwEEHit() : sh.exactRcEEHit(); + if(hit.empty()) { + continue; + } + assert(hit.fw == fw); + if(hit.bot > hit.top) { + // Possibly adjust bot and width if we would have exceeded maxelt + index_t tops[2] = { hit.top, 0 }; + index_t bots[2] = { hit.bot, 0 }; + index_t width = hit.bot - hit.top; + if(nelt_out + width > maxelt) { + index_t trim = (index_t)((nelt_out + width) - maxelt); +#ifdef BOWTIE_64BIT_INDEX + index_t rn = rnd.nextU64() % width; +#else + index_t rn = rnd.nextU32() % width; +#endif + index_t newwidth = width - trim; + if(hit.top + rn + newwidth > hit.bot) { + // Two pieces + tops[0] = hit.top + rn; + bots[0] = hit.bot; + tops[1] = hit.top; + bots[1] = hit.top + newwidth - (bots[0] - tops[0]); + } else { + // One piece + tops[0] = hit.top + rn; + bots[0] = tops[0] + newwidth; + } + assert_leq(bots[0], hit.bot); + assert_leq(bots[1], hit.bot); + assert_geq(bots[0], tops[0]); + assert_geq(bots[1], tops[1]); + assert_eq(newwidth, (bots[0] - tops[0]) + (bots[1] - tops[1])); + } + for(int i = 0; i < 2 && !done; i++) { + if(bots[i] <= tops[i]) break; + index_t width = bots[i] - tops[i]; + index_t top = tops[i]; + // Clear list where resolved offsets are stored + swmSeed.exranges++; + swmSeed.exrows += width; + if(!succ) { + swmSeed.exsucc++; + succ = true; + } + if(firstEe) { + salistEe_.clear(); + pool_.clear(); + firstEe = false; + } + // We have to be careful not to allocate excessive amounts of memory here + TSlice o(salistEe_, (index_t)salistEe_.size(), width); + for(index_t i = 0; i < width; i++) { + if(!salistEe_.add(pool_, (index_t)OFF_MASK)) { + swmSeed.exooms++; + return false; + } + } + assert(!done); + eehits_.push_back(hit); + satpos_.expand(); + satpos_.back().sat.init(SAKey(), top, (index_t)OFF_MASK, o); + satpos_.back().sat.key.seq = MAX_U64; + satpos_.back().sat.key.len = (index_t)rd.length(); + satpos_.back().pos.init(fw, 0, 0, (index_t)rd.length()); + satpos_.back().origSz = width; + rands_.expand(); + rands_.back().init(width, all); + gws_.expand(); + SARangeWithOffs sa; + sa.topf = satpos_.back().sat.topf; + sa.len = satpos_.back().sat.key.len; + sa.offs = satpos_.back().sat.offs; + gws_.back().init( + gfm, // forward Bowtie index + ref, // reference sequences + sa, // SATuple + rnd, // pseudo-random generator + wlm); // metrics + assert(gws_.back().repOk(sa)); + nelt_out += width; + if(nelt_out >= maxelt) { + done = true; + } + } + } + } + } + succ = false; + if(!done && !sh.mm1EEHits().empty()) { + sh.sort1mmEe(rnd); + index_t sz = sh.mm1EEHits().size(); + for(index_t i = 0; i < sz && !done; i++) { + EEHit hit = sh.mm1EEHits()[i]; + assert(hit.repOk(rd)); + assert(!hit.empty()); + // Possibly adjust bot and width if we would have exceeded maxelt + index_t tops[2] = { hit.top, 0 }; + index_t bots[2] = { hit.bot, 0 }; + index_t width = hit.bot - hit.top; + if(nelt_out + width > maxelt) { + index_t trim = (index_t)((nelt_out + width) - maxelt); +#ifdef BOWTIE_64BIT_INDEX + index_t rn = rnd.nextU64() % width; +#else + index_t rn = rnd.nextU32() % width; +#endif + index_t newwidth = width - trim; + if(hit.top + rn + newwidth > hit.bot) { + // Two pieces + tops[0] = hit.top + rn; + bots[0] = hit.bot; + tops[1] = hit.top; + bots[1] = hit.top + newwidth - (bots[0] - tops[0]); + } else { + // One piece + tops[0] = hit.top + rn; + bots[0] = tops[0] + newwidth; + } + assert_leq(bots[0], hit.bot); + assert_leq(bots[1], hit.bot); + assert_geq(bots[0], tops[0]); + assert_geq(bots[1], tops[1]); + assert_eq(newwidth, (bots[0] - tops[0]) + (bots[1] - tops[1])); + } + for(int i = 0; i < 2 && !done; i++) { + if(bots[i] <= tops[i]) break; + index_t width = bots[i] - tops[i]; + index_t top = tops[i]; + // Clear list where resolved offsets are stored + swmSeed.mm1ranges++; + swmSeed.mm1rows += width; + if(!succ) { + swmSeed.mm1succ++; + succ = true; + } + if(firstEe) { + salistEe_.clear(); + pool_.clear(); + firstEe = false; + } + TSlice o(salistEe_, (index_t)salistEe_.size(), width); + for(size_t i = 0; i < width; i++) { + if(!salistEe_.add(pool_, (index_t)OFF_MASK)) { + swmSeed.mm1ooms++; + return false; + } + } + eehits_.push_back(hit); + satpos_.expand(); + satpos_.back().sat.init(SAKey(), top, (index_t)OFF_MASK, o); + satpos_.back().sat.key.seq = MAX_U64; + satpos_.back().sat.key.len = (index_t)rd.length(); + satpos_.back().pos.init(hit.fw, 0, 0, (index_t)rd.length()); + satpos_.back().origSz = width; + rands_.expand(); + rands_.back().init(width, all); + gws_.expand(); + SARangeWithOffs sa; + sa.topf = satpos_.back().sat.topf; + sa.len = satpos_.back().sat.key.len; + sa.offs = satpos_.back().sat.offs; + gws_.back().init( + gfm, // forward Bowtie index + ref, // reference sequences + sa, // SATuple + rnd, // pseudo-random generator + wlm); // metrics + assert(gws_.back().repOk(sa)); + nelt_out += width; + if(nelt_out >= maxelt) { + done = true; + } + } + } + } + return true; +} + +/** + * Extend a seed hit out on either side. Requires that we know the seed hit's + * offset into the read and orientation. Also requires that we know top/bot + * for the seed hit in both the forward and (if we want to extend to the right) + * reverse index. + */ +template +void SwDriver::extend( + const Read& rd, // read + const GFM& gfmFw, // Forward Bowtie index + const GFM* gfmBw, // Backward Bowtie index + index_t topf, // top in fw index + index_t botf, // bot in fw index + index_t topb, // top in bw index + index_t botb, // bot in bw index + bool fw, // seed orientation + index_t off, // seed offset from 5' end + index_t len, // seed length + PerReadMetrics& prm, // per-read metrics + index_t& nlex, // # positions we can extend to left w/o edit + index_t& nrex) // # positions we can extend to right w/o edit +{ + index_t t[4], b[4]; + index_t tp[4], bp[4]; + SideLocus tloc, bloc; + index_t rdlen = (index_t)rd.length(); + index_t lim = fw ? off : rdlen - len - off; + // We're about to add onto the beginning, so reverse it +#ifndef NDEBUG + if(false) { + // TODO: This will sometimes fail even when the extension is legitimate + // This is because contains() comes in from one extreme or the other, + // whereas we started from the inside and worked outwards. This + // affects which Ns are OK and which are not OK. + + // Have to do both because whether we can get through an N depends on + // which direction we're coming in + bool fwContains = gfmFw.contains(tmp_rdseq_); + tmp_rdseq_.reverse(); + bool bwContains = gfmBw != NULL && gfmBw->contains(tmp_rdseq_); + tmp_rdseq_.reverse(); + assert(fwContains || bwContains); + } +#endif + ASSERT_ONLY(tmp_rdseq_.reverse()); + if(lim > 0) { + const GFM *gfm = &gfmFw; + assert(gfm != NULL); + // Extend left using forward index + const BTDnaString& seq = fw ? rd.patFw : rd.patRc; + // See what we get by extending + index_t top = topf, bot = botf; + t[0] = t[1] = t[2] = t[3] = 0; + b[0] = b[1] = b[2] = b[3] = 0; + tp[0] = tp[1] = tp[2] = tp[3] = topb; + bp[0] = bp[1] = bp[2] = bp[3] = botb; + SideLocus tloc, bloc; + INIT_LOCS(top, bot, tloc, bloc, *gfm); + for(index_t ii = 0; ii < lim; ii++) { + // Starting to left of seed (mapBiLFEx(tloc, bloc, t, b, tp, bp); + SANITY_CHECK_4TUP(t, b, tp, bp); + int nonz = -1; + bool abort = false; + size_t origSz = bot - top; + for(int j = 0; j < 4; j++) { + if(b[j] > t[j]) { + if(nonz >= 0) { + abort = true; + break; + } + nonz = j; + top = t[j]; bot = b[j]; + } + } + assert_leq(bot - top, origSz); + if(abort || (nonz != rdc && rdc <= 3) || bot - top < origSz) { + break; + } + } else { + assert_eq(bot, top+1); + prm.nSdFmops++; + int c = gfm->mapLF1(top, tloc); + if(c != rdc && rdc <= 3) { + break; + } + bot = top + 1; + } + ASSERT_ONLY(tmp_rdseq_.append(rdc)); + if(++nlex == 255) { + break; + } + INIT_LOCS(top, bot, tloc, bloc, *gfm); + } + } + // We're about to add onto the end, so re-reverse + ASSERT_ONLY(tmp_rdseq_.reverse()); + lim = fw ? rdlen - len - off : off; + if(lim > 0 && gfmBw != NULL) { + const GFM *gfm = gfmBw; + assert(gfm != NULL); + // Extend right using backward index + const BTDnaString& seq = fw ? rd.patFw : rd.patRc; + // See what we get by extending + index_t top = topb, bot = botb; + t[0] = t[1] = t[2] = t[3] = 0; + b[0] = b[1] = b[2] = b[3] = 0; + tp[0] = tp[1] = tp[2] = tp[3] = topf; + bp[0] = bp[1] = bp[2] = bp[3] = botf; + INIT_LOCS(top, bot, tloc, bloc, *gfm); + for(index_t ii = 0; ii < lim; ii++) { + // Starting to right of seed (mapBiLFEx(tloc, bloc, t, b, tp, bp); + SANITY_CHECK_4TUP(t, b, tp, bp); + int nonz = -1; + bool abort = false; + size_t origSz = bot - top; + for(int j = 0; j < 4; j++) { + if(b[j] > t[j]) { + if(nonz >= 0) { + abort = true; + break; + } + nonz = j; + top = t[j]; bot = b[j]; + } + } + assert_leq(bot - top, origSz); + if(abort || (nonz != rdc && rdc <= 3) || bot - top < origSz) { + break; + } + } else { + assert_eq(bot, top+1); + prm.nSdFmops++; + int c = gfm->mapLF1(top, tloc); + if(c != rdc && rdc <= 3) { + break; + } + bot = top + 1; + } + ASSERT_ONLY(tmp_rdseq_.append(rdc)); + if(++nrex == 255) { + break; + } + INIT_LOCS(top, bot, tloc, bloc, *gfm); + } + } +#ifndef NDEBUG + if(false) { + // TODO: This will sometimes fail even when the extension is legitimate + // This is because contains() comes in from one extreme or the other, + // whereas we started from the inside and worked outwards. This + // affects which Ns are OK and which are not OK. + + // Have to do both because whether we can get through an N depends on + // which direction we're coming in + bool fwContains = gfmFw.contains(tmp_rdseq_); + tmp_rdseq_.reverse(); + bool bwContains = gfmBw != NULL && gfmBw->contains(tmp_rdseq_); + tmp_rdseq_.reverse(); + assert(fwContains || bwContains); + } +#endif + assert_lt(nlex, rdlen); + assert_lt(nrex, rdlen); + return; +} + +/** + * Given seed results, set up all of our state for resolving and keeping + * track of reference offsets for hits. + */ +template +void SwDriver::prioritizeSATups( + const Read& read, // read + SeedResults& sh, // seed hits to extend into full alignments + const GFM& gfmFw, // BWT + const GFM* gfmBw, // BWT + const BitPairReference& ref, // Reference strings + int seedmms, // # mismatches allowed in seed + index_t maxelt, // max elts we'll consider + bool doExtend, // do extension of seed hits? + bool lensq, // square length in weight calculation + bool szsq, // square range size in weight calculation + index_t nsm, // if range as <= nsm elts, it's "small" + AlignmentCacheIface& ca, // alignment cache for seed hits + RandomSource& rnd, // pseudo-random generator + WalkMetrics& wlm, // group walk left metrics + PerReadMetrics& prm, // per-read metrics + index_t& nelt_out, // out: # elements total + bool all) // report all hits? +{ + const index_t nonz = sh.nonzeroOffsets(); // non-zero positions + const int matei = (read.mate <= 1 ? 0 : 1); + satups_.clear(); + gws_.clear(); + rands_.clear(); + rands2_.clear(); + satpos_.clear(); + satpos2_.clear(); + index_t nrange = 0, nelt = 0, nsmall = 0, nsmall_elts = 0; + bool keepWhole = false; + EList, 16>& satpos = keepWhole ? satpos_ : satpos2_; + for(index_t i = 0; i < nonz; i++) { + bool fw = true; + index_t offidx = 0, rdoff = 0, seedlen = 0; + QVal qv = sh.hitsByRank(i, offidx, rdoff, fw, seedlen); + assert(qv.valid()); + assert(!qv.empty()); + assert(qv.repOk(ca.current())); + ca.queryQval(qv, satups_, nrange, nelt); + for(size_t j = 0; j < satups_.size(); j++) { + const index_t sz = satups_[j].size(); + // Check whether this hit occurs inside the extended boundaries of + // another hit we already processed for this read. + if(seedmms == 0) { + // See if we're covered by a previous extended seed hit + EList& range = + fw ? seedExRangeFw_[matei] : seedExRangeRc_[matei]; + bool skip = false; + for(index_t k = 0; k < range.size(); k++) { + index_t p5 = range[k].off; + index_t len = range[k].len; + if(p5 <= rdoff && p5 + len >= (rdoff + seedlen)) { + if(sz <= range[k].sz) { + skip = true; + break; + } + } + } + if(skip) { + assert_gt(nrange, 0); + nrange--; + assert_geq(nelt, sz); + nelt -= sz; + continue; // Skip this seed + } + } + satpos.expand(); + satpos.back().sat = satups_[j]; + satpos.back().origSz = sz; + satpos.back().pos.init(fw, offidx, rdoff, seedlen); + if(sz <= nsm) { + nsmall++; + nsmall_elts += sz; + } + satpos.back().nlex = satpos.back().nrex = 0; +#ifndef NDEBUG + tmp_rdseq_.clear(); + uint64_t key = satpos.back().sat.key.seq; + for(size_t k = 0; k < seedlen; k++) { + int c = (int)(key & 3); + tmp_rdseq_.append(c); + key >>= 2; + } + tmp_rdseq_.reverse(); +#endif + index_t nlex = 0, nrex = 0; + if(doExtend) { + extend( + read, + gfmFw, + gfmBw, + satpos.back().sat.topf, + (index_t)(satpos.back().sat.topf + sz), + satpos.back().sat.topb, + (index_t)(satpos.back().sat.topb + sz), + fw, + rdoff, + seedlen, + prm, + nlex, + nrex); + } + satpos.back().nlex = nlex; + satpos.back().nrex = nrex; + if(seedmms == 0 && (nlex > 0 || nrex > 0)) { + assert_geq(rdoff, (fw ? nlex : nrex)); + index_t p5 = rdoff - (fw ? nlex : nrex); + EList& range = + fw ? seedExRangeFw_[matei] : seedExRangeRc_[matei]; + range.expand(); + range.back().off = p5; + range.back().len = seedlen + nlex + nrex; + range.back().sz = sz; + } + } + satups_.clear(); + } + assert_leq(nsmall, nrange); + nelt_out = nelt; // return the total number of elements + assert_eq(nrange, satpos.size()); + satpos.sort(); + if(keepWhole) { + gws_.ensure(nrange); + rands_.ensure(nrange); + for(index_t i = 0; i < nrange; i++) { + gws_.expand(); + SARangeWithOffs sa; + sa.topf = satpos_.back().sat.topf; + sa.len = satpos_.back().sat.key.len; + sa.offs = satpos_.back().sat.offs; + gws_.back().init( + gfmFw, // forward Bowtie index + ref, // reference sequences + sa, // SA tuples: ref hit, salist range + rnd, // pseudo-random generator + wlm); // metrics + assert(gws_.back().initialized()); + rands_.expand(); + rands_.back().init(satpos_[i].sat.size(), all); + } + return; + } + // Resize satups_ list so that ranges having elements that we might + // possibly explore are present + satpos_.ensure(min(maxelt, nelt)); + gws_.ensure(min(maxelt, nelt)); + rands_.ensure(min(maxelt, nelt)); + rands2_.ensure(min(maxelt, nelt)); + size_t nlarge_elts = nelt - nsmall_elts; + if(maxelt < nelt) { + size_t diff = nelt - maxelt; + if(diff >= nlarge_elts) { + nlarge_elts = 0; + } else { + nlarge_elts -= diff; + } + } + index_t nelt_added = 0; + // Now we have a collection of ranges in satpos2_. Now we want to decide + // how we explore elements from them. The basic idea is that: for very + // small guys, where "very small" means that the size of the range is less + // than or equal to the parameter 'nsz', we explore them in their entirety + // right away. For the rest, we want to select in a way that is (a) + // random, and (b) weighted toward examining elements from the smaller + // ranges more frequently (and first). + // + // 1. do the smalls + for(index_t j = 0; j < nsmall && nelt_added < maxelt; j++) { + satpos_.expand(); + satpos_.back() = satpos2_[j]; + gws_.expand(); + SARangeWithOffs sa; + sa.topf = satpos_.back().sat.topf; + sa.len = satpos_.back().sat.key.len; + sa.offs = satpos_.back().sat.offs; + gws_.back().init( + gfmFw, // forward Bowtie index + ref, // reference sequences + sa, // SA tuples: ref hit, salist range + rnd, // pseudo-random generator + wlm); // metrics + assert(gws_.back().initialized()); + rands_.expand(); + rands_.back().init(satpos_.back().sat.size(), all); + nelt_added += satpos_.back().sat.size(); +#ifndef NDEBUG + for(size_t k = 0; k < satpos_.size()-1; k++) { + assert(!(satpos_[k] == satpos_.back())); + } +#endif + } + if(nelt_added >= maxelt || nsmall == satpos2_.size()) { + nelt_out = nelt_added; + return; + } + // 2. do the non-smalls + // Initialize the row sampler + rowsamp_.init(satpos2_, nsmall, satpos2_.size(), lensq, szsq); + // Initialize the random choosers + rands2_.resize(satpos2_.size()); + for(index_t j = 0; j < satpos2_.size(); j++) { + rands2_[j].reset(); + } + while(nelt_added < maxelt && nelt_added < nelt) { + // Pick a non-small range to sample from + index_t ri = rowsamp_.next(rnd) + nsmall; + assert_geq(ri, nsmall); + assert_lt(ri, satpos2_.size()); + // Initialize random element chooser for that range + if(!rands2_[ri].inited()) { + rands2_[ri].init(satpos2_[ri].sat.size(), all); + assert(!rands2_[ri].done()); + } + assert(!rands2_[ri].done()); + // Choose an element from the range + uint32_t r = rands2_[ri].next(rnd); + if(rands2_[ri].done()) { + // Tell the row sampler this range is done + rowsamp_.finishedRange(ri - nsmall); + } + // Add the element to the satpos_ list + SATuple sat; + TSlice o; + o.init(satpos2_[ri].sat.offs, r, r+1); + sat.init(satpos2_[ri].sat.key, (index_t)(satpos2_[ri].sat.topf + r), (index_t)OFF_MASK, o); + satpos_.expand(); + satpos_.back().sat = sat; + satpos_.back().origSz = satpos2_[ri].origSz; + satpos_.back().pos = satpos2_[ri].pos; + // Initialize GroupWalk object + gws_.expand(); + SARangeWithOffs sa; + sa.topf = sat.topf; + sa.len = sat.key.len; + sa.offs = sat.offs; + gws_.back().init( + gfmFw, // forward Bowtie index + ref, // reference sequences + sa, // SA tuples: ref hit, salist range + rnd, // pseudo-random generator + wlm); // metrics + assert(gws_.back().initialized()); + // Initialize random selector + rands_.expand(); + rands_.back().init(1, all); + nelt_added++; + } + nelt_out = nelt_added; + return; +} + +enum { + FOUND_NONE = 0, + FOUND_EE, + FOUND_UNGAPPED, +}; + +/** + * Given a collection of SeedHits for a single read, extend seed alignments + * into full alignments. Where possible, try to avoid redundant offset lookups + * and dynamic programming wherever possible. Optionally report alignments to + * a AlnSinkWrap object as they are discovered. + * + * If 'reportImmediately' is true, returns true iff a call to msink->report() + * returned true (indicating that the reporting policy is satisfied and we can + * stop). Otherwise, returns false. + */ +template +int SwDriver::extendSeeds( + Read& rd, // read to align + bool mate1, // true iff rd is mate #1 + SeedResults& sh, // seed hits to extend into full alignments + const GFM& gfmFw, // BWT + const GFM* gfmBw, // BWT' + const BitPairReference& ref, // Reference strings + SwAligner& swa, // dynamic programming aligner + const Scoring& sc, // scoring scheme + int seedmms, // # mismatches allowed in seed + int seedlen, // length of seed + int seedival, // interval between seeds + TAlScore& minsc, // minimum score for anchor + int nceil, // maximum # Ns permitted in reference portion + size_t maxhalf, // max width in either direction for DP tables + bool doUngapped, // do ungapped alignment + size_t maxIters, // stop after this many seed-extend loop iters + size_t maxUg, // stop after this many ungaps + size_t maxDp, // stop after this many dps + size_t maxUgStreak, // stop after streak of this many ungap fails + size_t maxDpStreak, // stop after streak of this many dp fails + bool doExtend, // do seed extension + bool enable8, // use 8-bit SSE where possible + size_t cminlen, // use checkpointer if read longer than this + size_t cpow2, // interval between diagonals to checkpoint + bool doTri, // triangular mini-fills? + int tighten, // -M score tightening mode + AlignmentCacheIface& ca, // alignment cache for seed hits + RandomSource& rnd, // pseudo-random source + WalkMetrics& wlm, // group walk left metrics + SwMetrics& swmSeed, // DP metrics for seed-extend + PerReadMetrics& prm, // per-read metrics + AlnSinkWrap* msink, // AlnSink wrapper for multiseed-style aligner + bool reportImmediately, // whether to report hits immediately to msink + bool& exhaustive) // set to true iff we searched all seeds exhaustively +{ + bool all = msink->allHits(); + // typedef std::pair UPair; + + assert(!reportImmediately || msink != NULL); + assert(!reportImmediately || !msink->maxed()); + + assert_geq(nceil, 0); + assert_leq((size_t)nceil, rd.length()); + + // Calculate the largest possible number of read and reference gaps + const index_t rdlen = (index_t)rd.length(); + TAlScore perfectScore = sc.perfectScore(rdlen); + + DynProgFramer dpframe(!gReportOverhangs); + swa.reset(); + + // Initialize a set of GroupWalks, one for each seed. Also, intialize the + // accompanying lists of reference seed hits (satups*) + const index_t nsm = 5; + const index_t nonz = sh.nonzeroOffsets(); // non-zero positions + index_t eeHits = sh.numE2eHits(); + bool eeMode = eeHits > 0; + bool firstEe = true; + bool firstExtend = true; + + // Reset all the counters related to streaks + prm.nEeFail = 0; + prm.nUgFail = 0; + prm.nDpFail = 0; + + index_t nelt = 0, neltLeft = 0; + index_t rows = rdlen; + index_t eltsDone = 0; + // cerr << "===" << endl; + while(true) { + if(eeMode) { + if(firstEe) { + firstEe = false; + eeMode = eeSaTups( + rd, // read + sh, // seed hits to extend into full alignments + gfmFw, // BWT + ref, // Reference strings + rnd, // pseudo-random generator + wlm, // group walk left metrics + swmSeed, // seed-extend metrics + nelt, // out: # elements total + maxIters, // max # to report + all); // report all hits? + assert_eq(gws_.size(), rands_.size()); + assert_eq(gws_.size(), satpos_.size()); + } else { + eeMode = false; + } + } + if(!eeMode) { + if(nonz == 0) { + return EXTEND_EXHAUSTED_CANDIDATES; // No seed hits! Bail. + } + if(minsc == perfectScore) { + return EXTEND_PERFECT_SCORE; // Already found all perfect hits! + } + if(firstExtend) { + nelt = 0; + prioritizeSATups( + rd, // read + sh, // seed hits to extend into full alignments + gfmFw, // BWT + gfmBw, // BWT' + ref, // Reference strings + seedmms, // # seed mismatches allowed + maxIters, // max rows to consider per position + doExtend, // extend out seeds + true, // square extended length + true, // square SA range size + nsm, // smallness threshold + ca, // alignment cache for seed hits + rnd, // pseudo-random generator + wlm, // group walk left metrics + prm, // per-read metrics + nelt, // out: # elements total + all); // report all hits? + assert_eq(gws_.size(), rands_.size()); + assert_eq(gws_.size(), satpos_.size()); + neltLeft = nelt; + firstExtend = false; + } + if(neltLeft == 0) { + // Finished examining gapped candidates + break; + } + } + for(size_t i = 0; i < gws_.size(); i++) { + if(eeMode && eehits_[i].score < minsc) { + return EXTEND_PERFECT_SCORE; + } + bool is_small = satpos_[i].sat.size() < nsm; + bool fw = satpos_[i].pos.fw; + index_t rdoff = satpos_[i].pos.rdoff; + index_t seedhitlen = satpos_[i].pos.seedlen; + if(!fw) { + // 'rdoff' and 'offidx' are with respect to the 5' end of + // the read. Here we convert rdoff to be with respect to + // the upstream (3') end of ther read. + rdoff = (index_t)(rdlen - rdoff - seedhitlen); + } + bool first = true; + // If the range is small, investigate all elements now. If the + // range is large, just investigate one and move on - we might come + // back to this range later. + index_t riter = 0; + while(!rands_[i].done() && (first || is_small || eeMode)) { + assert(!gws_[i].done()); + riter++; + if(minsc == perfectScore) { + if(!eeMode || eehits_[i].score < perfectScore) { + return EXTEND_PERFECT_SCORE; + } + } else if(eeMode && eehits_[i].score < minsc) { + break; + } + if(prm.nExDps >= maxDp || prm.nMateDps >= maxDp) { + return EXTEND_EXCEEDED_HARD_LIMIT; + } + if(prm.nExUgs >= maxUg || prm.nMateUgs >= maxUg) { + return EXTEND_EXCEEDED_HARD_LIMIT; + } + if(prm.nExIters >= maxIters) { + return EXTEND_EXCEEDED_HARD_LIMIT; + } + prm.nExIters++; + first = false; + // Resolve next element offset + WalkResult wr; + uint32_t elt = rands_[i].next(rnd); + //cerr << "elt=" << elt << endl; + SARangeWithOffs sa; + sa.topf = satpos_[i].sat.topf; + sa.len = satpos_[i].sat.key.len; + sa.offs = satpos_[i].sat.offs; + gws_[i].advanceElement((index_t)elt, gfmFw, ref, sa, gwstate_, wr, wlm, prm); + eltsDone++; + if(!eeMode) { + assert_gt(neltLeft, 0); + neltLeft--; + } + assert_neq((index_t)OFF_MASK, wr.toff); + index_t tidx = 0, toff = 0, tlen = 0; + bool straddled = false; + gfmFw.joinedToTextOff( + wr.elt.len, + wr.toff, + tidx, + toff, + tlen, + eeMode, // reject straddlers? + straddled); // did it straddle? + if(tidx == (index_t)OFF_MASK) { + // The seed hit straddled a reference boundary so the seed hit + // isn't valid + continue; + } +#ifndef NDEBUG + if(!eeMode && !straddled) { // Check that seed hit matches reference + uint64_t key = satpos_[i].sat.key.seq; + for(index_t k = 0; k < wr.elt.len; k++) { + int c = ref.getBase(tidx, toff + wr.elt.len - k - 1); + assert_leq(c, 3); + int ck = (int)(key & 3); + key >>= 2; + assert_eq(c, ck); + } + } +#endif + // Find offset of alignment's upstream base assuming net gaps=0 + // between beginning of read and beginning of seed hit + int64_t refoff = (int64_t)toff - rdoff; + // Coordinate of the seed hit w/r/t the pasted reference string + Coord refcoord(tidx, refoff, fw); + if(seenDiags1_.locusPresent(refcoord)) { + // Already handled alignments seeded on this diagonal + prm.nRedundants++; + swmSeed.rshit++; + continue; + } + // Now that we have a seed hit, there are many issues to solve + // before we have a completely framed dynamic programming problem. + // They include: + // + // 1. Setting reference offsets on either side of the seed hit, + // accounting for where the seed occurs in the read + // 2. Adjusting the width of the banded dynamic programming problem + // and adjusting reference bounds to allow for gaps in the + // alignment + // 3. Accounting for the edges of the reference, which can impact + // the width of the DP problem and reference bounds. + // 4. Perhaps filtering the problem down to a smaller problem based + // on what DPs we've already solved for this read + // + // We do #1 here, since it is simple and we have all the seed-hit + // information here. #2 and #3 are handled in the DynProgFramer. + int readGaps = 0, refGaps = 0; + bool ungapped = false; + if(!eeMode) { + readGaps = sc.maxReadGaps(minsc, rdlen); + refGaps = sc.maxRefGaps(minsc, rdlen); + ungapped = (readGaps == 0 && refGaps == 0); + } + int state = FOUND_NONE; + bool found = false; + if(eeMode) { + resEe_.reset(); + resEe_.alres.reset(); + const EEHit& h = eehits_[i]; + assert_leq(h.score, perfectScore); + resEe_.alres.setScore(AlnScore(h.score, h.ns(), 0)); + resEe_.alres.setShape( + refcoord.ref(), // ref id + refcoord.off(), // 0-based ref offset + tlen, // length of reference + fw, // aligned to Watson? + rdlen, // read length + true, // pretrim soft? + 0, // pretrim 5' end + 0, // pretrim 3' end + true, // alignment trim soft? + 0, // alignment trim 5' end + 0); // alignment trim 3' end + resEe_.alres.setRefNs(h.refns()); + if(h.mms() > 0) { + assert_eq(1, h.mms()); + assert_lt(h.e1.pos, rd.length()); + resEe_.alres.ned().push_back(h.e1); + } + assert(resEe_.repOk(rd)); + state = FOUND_EE; + found = true; + Interval refival(refcoord, 1); + seenDiags1_.add(refival); + } else if(doUngapped && ungapped) { + resUngap_.reset(); + int al = swa.ungappedAlign( + fw ? rd.patFw : rd.patRc, + fw ? rd.qual : rd.qualRev, + refcoord, + ref, + tlen, + sc, + gReportOverhangs, + minsc, + resUngap_); + Interval refival(refcoord, 1); + seenDiags1_.add(refival); + prm.nExUgs++; + if(al == 0) { + prm.nExUgFails++; + prm.nUgFail++; + if(prm.nUgFail >= maxUgStreak) { + return EXTEND_EXCEEDED_SOFT_LIMIT; + } + swmSeed.ungapfail++; + continue; + } else if(al == -1) { + prm.nExUgFails++; + prm.nUgFail++; // count this as failure + if(prm.nUgFail >= maxUgStreak) { + return EXTEND_EXCEEDED_SOFT_LIMIT; + } + swmSeed.ungapnodec++; + } else { + prm.nExUgSuccs++; + prm.nUgLastSucc = prm.nExUgs-1; + if(prm.nUgFail > prm.nUgFailStreak) { + prm.nUgFailStreak = prm.nUgFail; + } + prm.nUgFail = 0; + found = true; + state = FOUND_UNGAPPED; + swmSeed.ungapsucc++; + } + } + int64_t pastedRefoff = (int64_t)wr.toff - rdoff; + DPRect rect; + if(state == FOUND_NONE) { + found = dpframe.frameSeedExtensionRect( + refoff, // ref offset implied by seed hit assuming no gaps + rows, // length of read sequence used in DP table + tlen, // length of reference + readGaps, // max # of read gaps permitted in opp mate alignment + refGaps, // max # of ref gaps permitted in opp mate alignment + (size_t)nceil, // # Ns permitted + maxhalf, // max width in either direction + rect); // DP rectangle + assert(rect.repOk()); + // Add the seed diagonal at least + seenDiags1_.add(Interval(refcoord, 1)); + if(!found) { + continue; + } + } + int64_t leftShift = refoff - rect.refl; + size_t nwindow = 0; + if(toff >= rect.refl) { + nwindow = (size_t)(toff - rect.refl); + } + // NOTE: We might be taking off more than we should because the + // pasted string omits non-A/C/G/T characters, but we included them + // when calculating leftShift. We'll account for this later. + pastedRefoff -= leftShift; + size_t nsInLeftShift = 0; + if(state == FOUND_NONE) { + if(!swa.initedRead()) { + // Initialize the aligner with a new read + swa.initRead( + rd.patFw, // fw version of query + rd.patRc, // rc version of query + rd.qual, // fw version of qualities + rd.qualRev,// rc version of qualities + 0, // off of first char in 'rd' to consider + rdlen, // off of last char (excl) in 'rd' to consider + sc); // scoring scheme + } + swa.initRef( + fw, // whether to align forward or revcomp read + tidx, // reference aligned against + rect, // DP rectangle + ref, // Reference strings + tlen, // length of reference sequence + sc, // scoring scheme + minsc, // minimum score permitted + enable8, // use 8-bit SSE if possible? + cminlen, // minimum length for using checkpointing scheme + cpow2, // interval b/t checkpointed diags; 1 << this + doTri, // triangular mini-fills? + true, // this is a seed extension - not finding a mate + nwindow, + nsInLeftShift); + // Because of how we framed the problem, we can say that we've + // exhaustively scored the seed diagonal as well as maxgaps + // diagonals on either side + Interval refival(tidx, 0, fw, 0); + rect.initIval(refival); + seenDiags1_.add(refival); + // Now fill the dynamic programming matrix and return true iff + // there is at least one valid alignment + TAlScore bestCell = std::numeric_limits::min(); + found = swa.align(rnd, bestCell); + swmSeed.tallyGappedDp(readGaps, refGaps); + prm.nExDps++; + if(!found) { + prm.nExDpFails++; + prm.nDpFail++; + if(prm.nDpFail >= maxDpStreak) { + return EXTEND_EXCEEDED_SOFT_LIMIT; + } + if(bestCell > std::numeric_limits::min() && bestCell > prm.bestLtMinscMate1) { + prm.bestLtMinscMate1 = bestCell; + } + continue; // Look for more anchor alignments + } else { + prm.nExDpSuccs++; + prm.nDpLastSucc = prm.nExDps-1; + if(prm.nDpFail > prm.nDpFailStreak) { + prm.nDpFailStreak = prm.nDpFail; + } + prm.nDpFail = 0; + } + } + bool firstInner = true; + while(true) { + assert(found); + SwResult *res = NULL; + if(state == FOUND_EE) { + if(!firstInner) { + break; + } + res = &resEe_; + } else if(state == FOUND_UNGAPPED) { + if(!firstInner) { + break; + } + res = &resUngap_; + } else { + resGap_.reset(); + assert(resGap_.empty()); + if(swa.done()) { + break; + } + swa.nextAlignment(resGap_, minsc, rnd); + found = !resGap_.empty(); + if(!found) { + break; + } + res = &resGap_; + } + assert(res != NULL); + firstInner = false; + assert(res->alres.matchesRef( + rd, + ref, + tmp_rf_, + tmp_rdseq_, + tmp_qseq_, + raw_refbuf_, + raw_destU32_, + raw_matches_, + tmp_reflens_, + tmp_refoffs_)); + Interval refival(tidx, 0, fw, tlen); + assert_gt(res->alres.refExtent(), 0); + if(gReportOverhangs && + !refival.containsIgnoreOrient(res->alres.refival())) + { + res->alres.clipOutside(true, 0, tlen); + if(res->alres.refExtent() == 0) { + continue; + } + } + assert(gReportOverhangs || + refival.containsIgnoreOrient(res->alres.refival())); + // Did the alignment fall entirely outside the reference? + if(!refival.overlapsIgnoreOrient(res->alres.refival())) { + continue; + } + // Is this alignment redundant with one we've seen previously? + if(redAnchor_.overlap(res->alres)) { + // Redundant with an alignment we found already + continue; + } + redAnchor_.add(res->alres); + // Annotate the AlnRes object with some key parameters + // that were used to obtain the alignment. + res->alres.setParams( + seedmms, // # mismatches allowed in seed + seedlen, // length of seed + seedival, // interval between seeds + minsc); // minimum score for valid alignment + + if(reportImmediately) { + assert(msink != NULL); + assert(res->repOk()); + // Check that alignment accurately reflects the + // reference characters aligned to + assert(res->alres.matchesRef( + rd, + ref, + tmp_rf_, + tmp_rdseq_, + tmp_qseq_, + raw_refbuf_, + raw_destU32_, + raw_matches_, + tmp_reflens_, + tmp_refoffs_)); + // Report an unpaired alignment + assert(!msink->maxed()); + if(msink->report( + 0, + mate1 ? &res->alres : NULL, + mate1 ? NULL : &res->alres)) + { + // Short-circuited because a limit, e.g. -k, -m or + // -M, was exceeded + return EXTEND_POLICY_FULFILLED; + } + if(tighten > 0 && + msink->Mmode() && + msink->hasSecondBestUnp1()) + { + if(tighten == 1) { + if(msink->bestUnp1() >= minsc) { + minsc = msink->bestUnp1(); + if(minsc < perfectScore && + msink->bestUnp1() == msink->secondBestUnp1()) + { + minsc++; + } + } + } else if(tighten == 2) { + if(msink->secondBestUnp1() >= minsc) { + minsc = msink->secondBestUnp1(); + if(minsc < perfectScore) { + minsc++; + } + } + } else { + TAlScore diff = msink->bestUnp1() - msink->secondBestUnp1(); + TAlScore bot = msink->secondBestUnp1() + ((diff*3)/4); + if(bot >= minsc) { + minsc = bot; + if(minsc < perfectScore) { + minsc++; + } + } + } + assert_leq(minsc, perfectScore); + } + } + } + + // At this point we know that we aren't bailing, and will + // continue to resolve seed hits. + + } // while(!gws_[i].done()) + } + } + // Short-circuited because a limit, e.g. -k, -m or -M, was exceeded + return EXTEND_EXHAUSTED_CANDIDATES; +} + +/** + * Given a collection of SeedHits for both mates in a read pair, extend seed + * alignments into full alignments and then look for the opposite mate using + * dynamic programming. Where possible, try to avoid redundant offset lookups. + * Optionally report alignments to a AlnSinkWrap object as they are discovered. + * + * If 'reportImmediately' is true, returns true iff a call to + * msink->report() returned true (indicating that the reporting + * policy is satisfied and we can stop). Otherwise, returns false. + * + * REDUNDANT SEED HITS + * + * See notes at top of aligner_sw_driver.h. + * + * REDUNDANT ALIGNMENTS + * + * See notes at top of aligner_sw_driver.h. + * + * MIXING PAIRED AND UNPAIRED ALIGNMENTS + * + * There are distinct paired-end alignment modes for the cases where (a) the + * user does or does not want to see unpaired alignments for individual mates + * when there are no reportable paired-end alignments involving both mates, and + * (b) the user does or does not want to see discordant paired-end alignments. + * The modes have implications for this function and for the AlnSinkWrap, since + * it affects when we're "done." Also, whether the user has asked us to report + * discordant alignments affects whether and how much searching for unpaired + * alignments we must do (i.e. if there are no paired-end alignments, we must + * at least do -m 1 for both mates). + * + * Mode 1: Just concordant paired-end. Print only concordant paired-end + * alignments. As soon as any limits (-k/-m/-M) are reached, stop. + * + * Mode 2: Concordant and discordant paired-end. If -k/-m/-M limits are + * reached for paired-end alignments, stop. Otherwise, if no paired-end + * alignments are found, align both mates in an unpaired -m 1 fashion. If + * there is exactly one unpaired alignment for each mate, report the + * combination as a discordant alignment. + * + * Mode 3: Concordant paired-end if possible, otherwise unpaired. If -k/-M + * limit is reached for paired-end alignmnts, stop. If -m limit is reached for + * paired-end alignments or no paired-end alignments are found, align both + * mates in an unpaired fashion. All the same settings governing validity and + * reportability in paired-end mode apply here too (-k/-m/-M/etc). + * + * Mode 4: Concordant or discordant paired-end if possible, otherwise unpaired. + * If -k/-M limit is reached for paired-end alignmnts, stop. If -m limit is + * reached for paired-end alignments or no paired-end alignments are found, + * align both mates in an unpaired fashion. If the -m limit was reached, there + * is no need to search for a discordant alignment, and unapired alignment can + * proceed as in Mode 3. If no paired-end alignments were found, then unpaired + * alignment proceeds as in Mode 3 but with this caveat: alignment must be at + * least as thorough as dictated by -m 1 up until the point where + * + * Print paired-end alignments when there are reportable paired-end + * alignments, otherwise report reportable unpaired alignments. If -k limit is + * reached for paired-end alignments, stop. If -m/-M limit is reached for + * paired-end alignments, stop searching for paired-end alignments and look + * only for unpaired alignments. If searching only for unpaired alignments, + * respect -k/-m/-M limits separately for both mates. + * + * The return value from the AlnSinkWrap's report member function must be + * specific enough to distinguish between: + * + * 1. Stop searching for paired-end alignments + * 2. Stop searching for alignments for unpaired alignments for mate #1 + * 3. Stop searching for alignments for unpaired alignments for mate #2 + * 4. Stop searching for any alignments + * + * Note that in Mode 2, options affecting validity and reportability of + * alignments apply . E.g. if -m 1 is specified + * + * WORKFLOW + * + * Our general approach to finding paired and unpaired alignments here + * is as follows: + * + * - For mate in mate1, mate2: + * - For each seed hit in mate: + * - Try to extend it into a full alignment; if we can't, continue + * to the next seed hit + * - Look for alignment for opposite mate; if we can't find one, + * - + * - + * + */ +template +int SwDriver::extendSeedsPaired( + Read& rd, // mate to align as anchor + Read& ord, // mate to align as opposite + bool anchor1, // true iff anchor mate is mate1 + bool oppFilt, // true iff opposite mate was filtered out + SeedResults& sh, // seed hits for anchor + const GFM& gfmFw, // BWT + const GFM* gfmBw, // BWT' + const BitPairReference& ref, // Reference strings + SwAligner& swa, // dynamic programming aligner for anchor + SwAligner& oswa, // dynamic programming aligner for opposite + const Scoring& sc, // scoring scheme + const PairedEndPolicy& pepol,// paired-end policy + int seedmms, // # mismatches allowed in seed + int seedlen, // length of seed + int seedival, // interval between seeds + TAlScore& minsc, // minimum score for valid anchor aln + TAlScore& ominsc, // minimum score for valid opposite aln + int nceil, // max # Ns permitted in ref for anchor + int onceil, // max # Ns permitted in ref for opposite + bool nofw, // don't align forward read + bool norc, // don't align revcomp read + size_t maxhalf, // max width in either direction for DP tables + bool doUngapped, // do ungapped alignment + size_t maxIters, // stop after this many seed-extend loop iters + size_t maxUg, // stop after this many ungaps + size_t maxDp, // stop after this many dps + size_t maxEeStreak, // stop after streak of this many end-to-end fails + size_t maxUgStreak, // stop after streak of this many ungap fails + size_t maxDpStreak, // stop after streak of this many dp fails + size_t maxMateStreak, // stop seed range after N mate-find fails + bool doExtend, // do seed extension + bool enable8, // use 8-bit SSE where possible + size_t cminlen, // use checkpointer if read longer than this + size_t cpow2, // interval between diagonals to checkpoint + bool doTri, // triangular mini-fills? + int tighten, // -M score tightening mode + AlignmentCacheIface& ca, // alignment cache for seed hits + RandomSource& rnd, // pseudo-random source + WalkMetrics& wlm, // group walk left metrics + SwMetrics& swmSeed, // DP metrics for seed-extend + SwMetrics& swmMate, // DP metrics for mate finidng + PerReadMetrics& prm, // per-read metrics + AlnSinkWrap* msink, // AlnSink wrapper for multiseed-style aligner + bool swMateImmediately, // whether to look for mate immediately + bool reportImmediately, // whether to report hits immediately to msink + bool discord, // look for discordant alignments? + bool mixed, // look for unpaired as well as paired alns? + bool& exhaustive) +{ + bool all = msink->allHits(); + // typedef std::pair U32Pair; + + assert(!reportImmediately || msink != NULL); + assert(!reportImmediately || !msink->maxed()); + assert(!msink->state().doneWithMate(anchor1)); + + assert_geq(nceil, 0); + assert_geq(onceil, 0); + assert_leq((size_t)nceil, rd.length()); + assert_leq((size_t)onceil, ord.length()); + + const index_t rdlen = rd.length(); + const index_t ordlen = ord.length(); + const TAlScore perfectScore = sc.perfectScore(rdlen); + const TAlScore operfectScore = sc.perfectScore(ordlen); + + assert_leq(minsc, perfectScore); + assert(oppFilt || ominsc <= operfectScore); + + TAlScore bestPairScore = perfectScore + operfectScore; + if(tighten > 0 && msink->Mmode() && msink->hasSecondBestPair()) { + // Paired-end alignments should have at least this score from now + TAlScore ps; + if(tighten == 1) { + ps = msink->bestPair(); + } else if(tighten == 2) { + ps = msink->secondBestPair(); + } else { + TAlScore diff = msink->bestPair() - msink->secondBestPair(); + ps = msink->secondBestPair() + (diff * 3)/4; + } + if(tighten == 1 && ps < bestPairScore && + msink->bestPair() == msink->secondBestPair()) + { + ps++; + } + if(tighten >= 2 && ps < bestPairScore) { + ps++; + } + // Anchor mate must have score at least 'ps' minus the best possible + // score for the opposite mate. + TAlScore nc = ps - operfectScore; + if(nc > minsc) { + minsc = nc; + } + assert_leq(minsc, perfectScore); + } + + DynProgFramer dpframe(!gReportOverhangs); + swa.reset(); + oswa.reset(); + + // Initialize a set of GroupWalks, one for each seed. Also, intialize the + // accompanying lists of reference seed hits (satups*) + const index_t nsm = 5; + const index_t nonz = sh.nonzeroOffsets(); // non-zero positions + index_t eeHits = sh.numE2eHits(); + bool eeMode = eeHits > 0; + bool firstEe = true; + bool firstExtend = true; + + // Reset all the counters related to streaks + prm.nEeFail = 0; + prm.nUgFail = 0; + prm.nDpFail = 0; + + index_t nelt = 0, neltLeft = 0; + const index_t rows = rdlen; + const index_t orows = ordlen; + index_t eltsDone = 0; + while(true) { + if(eeMode) { + if(firstEe) { + firstEe = false; + eeMode = eeSaTups( + rd, // read + sh, // seed hits to extend into full alignments + gfmFw, // BWT + ref, // Reference strings + rnd, // pseudo-random generator + wlm, // group walk left metrics + swmSeed, // seed-extend metrics + nelt, // out: # elements total + maxIters, // max elts to report + all); // report all hits + assert_eq(gws_.size(), rands_.size()); + assert_eq(gws_.size(), satpos_.size()); + neltLeft = nelt; + // Initialize list that contains the mate-finding failure + // streak for each range + mateStreaks_.resize(gws_.size()); + mateStreaks_.fill(0); + } else { + eeMode = false; + } + } + if(!eeMode) { + if(nonz == 0) { + // No seed hits! Bail. + return EXTEND_EXHAUSTED_CANDIDATES; + } + if(msink->Mmode() && minsc == perfectScore) { + // Already found all perfect hits! + return EXTEND_PERFECT_SCORE; + } + if(firstExtend) { + nelt = 0; + prioritizeSATups( + rd, // read + sh, // seed hits to extend into full alignments + gfmFw, // BWT + gfmBw, // BWT' + ref, // Reference strings + seedmms, // # seed mismatches allowed + maxIters, // max rows to consider per position + doExtend, // extend out seeds + true, // square extended length + true, // square SA range size + nsm, // smallness threshold + ca, // alignment cache for seed hits + rnd, // pseudo-random generator + wlm, // group walk left metrics + prm, // per-read metrics + nelt, // out: # elements total + all); // report all hits? + assert_eq(gws_.size(), rands_.size()); + assert_eq(gws_.size(), satpos_.size()); + neltLeft = nelt; + firstExtend = false; + mateStreaks_.resize(gws_.size()); + mateStreaks_.fill(0); + } + if(neltLeft == 0) { + // Finished examining gapped candidates + break; + } + } + for(index_t i = 0; i < gws_.size(); i++) { + if(eeMode && eehits_[i].score < minsc) { + return EXTEND_PERFECT_SCORE; + } + bool is_small = satpos_[i].sat.size() < nsm; + bool fw = satpos_[i].pos.fw; + index_t rdoff = satpos_[i].pos.rdoff; + index_t seedhitlen = satpos_[i].pos.seedlen; + if(!fw) { + // 'rdoff' and 'offidx' are with respect to the 5' end of + // the read. Here we convert rdoff to be with respect to + // the upstream (3') end of ther read. + rdoff = (index_t)(rdlen - rdoff - seedhitlen); + } + bool first = true; + // If the range is small, investigate all elements now. If the + // range is large, just investigate one and move on - we might come + // back to this range later. + while(!rands_[i].done() && (first || is_small || eeMode)) { + if(minsc == perfectScore) { + if(!eeMode || eehits_[i].score < perfectScore) { + return EXTEND_PERFECT_SCORE; + } + } else if(eeMode && eehits_[i].score < minsc) { + break; + } + if(prm.nExDps >= maxDp || prm.nMateDps >= maxDp) { + return EXTEND_EXCEEDED_HARD_LIMIT; + } + if(prm.nExUgs >= maxUg || prm.nMateUgs >= maxUg) { + return EXTEND_EXCEEDED_HARD_LIMIT; + } + if(prm.nExIters >= maxIters) { + return EXTEND_EXCEEDED_HARD_LIMIT; + } + if(eeMode && prm.nEeFail >= maxEeStreak) { + return EXTEND_EXCEEDED_SOFT_LIMIT; + } + if(!eeMode && prm.nDpFail >= maxDpStreak) { + return EXTEND_EXCEEDED_SOFT_LIMIT; + } + if(!eeMode && prm.nUgFail >= maxUgStreak) { + return EXTEND_EXCEEDED_SOFT_LIMIT; + } + if(mateStreaks_[i] >= maxMateStreak) { + // Don't try this seed range anymore + rands_[i].setDone(); + assert(rands_[i].done()); + break; + } + prm.nExIters++; + first = false; + assert(!gws_[i].done()); + // Resolve next element offset + WalkResult wr; + uint32_t elt = rands_[i].next(rnd); + SARangeWithOffs sa; + sa.topf = satpos_[i].sat.topf; + sa.len = satpos_[i].sat.key.len; + sa.offs = satpos_[i].sat.offs; + gws_[i].advanceElement((index_t)elt, gfmFw, ref, sa, gwstate_, wr, wlm, prm); + eltsDone++; + assert_gt(neltLeft, 0); + neltLeft--; + assert_neq((index_t)OFF_MASK, wr.toff); + index_t tidx = 0, toff = 0, tlen = 0; + bool straddled = false; + gfmFw.joinedToTextOff( + wr.elt.len, + wr.toff, + tidx, + toff, + tlen, + eeMode, // reject straddlers? + straddled); // straddled? + if(tidx == (index_t)OFF_MASK) { + // The seed hit straddled a reference boundary so the seed hit + // isn't valid + continue; + } +#ifndef NDEBUG + if(!eeMode && !straddled) { // Check that seed hit matches reference + uint64_t key = satpos_[i].sat.key.seq; + for(index_t k = 0; k < wr.elt.len; k++) { + int c = ref.getBase(tidx, toff + wr.elt.len - k - 1); + assert_leq(c, 3); + int ck = (int)(key & 3); + key >>= 2; + assert_eq(c, ck); + } + } +#endif + // Find offset of alignment's upstream base assuming net gaps=0 + // between beginning of read and beginning of seed hit + int64_t refoff = (int64_t)toff - rdoff; + EIvalMergeListBinned& seenDiags = anchor1 ? seenDiags1_ : seenDiags2_; + // Coordinate of the seed hit w/r/t the pasted reference string + Coord refcoord(tidx, refoff, fw); + if(seenDiags.locusPresent(refcoord)) { + // Already handled alignments seeded on this diagonal + prm.nRedundants++; + swmSeed.rshit++; + continue; + } + // Now that we have a seed hit, there are many issues to solve + // before we have a completely framed dynamic programming problem. + // They include: + // + // 1. Setting reference offsets on either side of the seed hit, + // accounting for where the seed occurs in the read + // 2. Adjusting the width of the banded dynamic programming problem + // and adjusting reference bounds to allow for gaps in the + // alignment + // 3. Accounting for the edges of the reference, which can impact + // the width of the DP problem and reference bounds. + // 4. Perhaps filtering the problem down to a smaller problem based + // on what DPs we've already solved for this read + // + // We do #1 here, since it is simple and we have all the seed-hit + // information here. #2 and #3 are handled in the DynProgFramer. + int readGaps = 0, refGaps = 0; + bool ungapped = false; + if(!eeMode) { + readGaps = sc.maxReadGaps(minsc, rdlen); + refGaps = sc.maxRefGaps(minsc, rdlen); + ungapped = (readGaps == 0 && refGaps == 0); + } + int state = FOUND_NONE; + bool found = false; + // In unpaired mode, a seed extension is successful if it + // results in a full alignment that meets the minimum score + // threshold. In paired-end mode, a seed extension is + // successful if it results in a *full paired-end* alignment + // that meets the minimum score threshold. + if(eeMode) { + resEe_.reset(); + resEe_.alres.reset(); + const EEHit& h = eehits_[i]; + assert_leq(h.score, perfectScore); + resEe_.alres.setScore(AlnScore(h.score, h.ns(), 0)); + resEe_.alres.setShape( + refcoord.ref(), // ref id + refcoord.off(), // 0-based ref offset + tlen, // reference length + fw, // aligned to Watson? + rdlen, // read length + true, // pretrim soft? + 0, // pretrim 5' end + 0, // pretrim 3' end + true, // alignment trim soft? + 0, // alignment trim 5' end + 0); // alignment trim 3' end + resEe_.alres.setRefNs(h.refns()); + if(h.mms() > 0) { + assert_eq(1, h.mms()); + assert_lt(h.e1.pos, rd.length()); + resEe_.alres.ned().push_back(h.e1); + } + assert(resEe_.repOk(rd)); + state = FOUND_EE; + found = true; + Interval refival(refcoord, 1); + seenDiags.add(refival); + prm.nExEes++; + prm.nEeFail++; // say it's failed until proven successful + prm.nExEeFails++; + } else if(doUngapped && ungapped) { + resUngap_.reset(); + int al = swa.ungappedAlign( + fw ? rd.patFw : rd.patRc, + fw ? rd.qual : rd.qualRev, + refcoord, + ref, + tlen, + sc, + gReportOverhangs, + minsc, // minimum + resUngap_); + Interval refival(refcoord, 1); + seenDiags.add(refival); + prm.nExUgs++; + prm.nUgFail++; // say it's failed until proven successful + prm.nExUgFails++; + if(al == 0) { + swmSeed.ungapfail++; + continue; + } else if(al == -1) { + swmSeed.ungapnodec++; + } else { + found = true; + state = FOUND_UNGAPPED; + swmSeed.ungapsucc++; + } + } + int64_t pastedRefoff = (int64_t)wr.toff - rdoff; + DPRect rect; + if(state == FOUND_NONE) { + found = dpframe.frameSeedExtensionRect( + refoff, // ref offset implied by seed hit assuming no gaps + rows, // length of read sequence used in DP table + tlen, // length of reference + readGaps, // max # of read gaps permitted in opp mate alignment + refGaps, // max # of ref gaps permitted in opp mate alignment + (size_t)nceil, // # Ns permitted + maxhalf, // max width in either direction + rect); // DP rectangle + assert(rect.repOk()); + // Add the seed diagonal at least + seenDiags.add(Interval(refcoord, 1)); + if(!found) { + continue; + } + } + int64_t leftShift = refoff - rect.refl; + size_t nwindow = 0; + if(toff >= rect.refl) { + nwindow = (size_t)(toff - rect.refl); + } + // NOTE: We might be taking off more than we should because the + // pasted string omits non-A/C/G/T characters, but we included them + // when calculating leftShift. We'll account for this later. + pastedRefoff -= leftShift; + size_t nsInLeftShift = 0; + if(state == FOUND_NONE) { + if(!swa.initedRead()) { + // Initialize the aligner with a new read + swa.initRead( + rd.patFw, // fw version of query + rd.patRc, // rc version of query + rd.qual, // fw version of qualities + rd.qualRev,// rc version of qualities + 0, // off of first char in 'rd' to consider + rdlen, // off of last char (excl) in 'rd' to consider + sc); // scoring scheme + } + swa.initRef( + fw, // whether to align forward or revcomp read + tidx, // reference aligned against + rect, // DP rectangle + ref, // Reference strings + tlen, // length of reference sequence + sc, // scoring scheme + minsc, // minimum score permitted + enable8, // use 8-bit SSE if possible? + cminlen, // minimum length for using checkpointing scheme + cpow2, // interval b/t checkpointed diags; 1 << this + doTri, // triangular mini-fills? + true, // this is a seed extension - not finding a mate + nwindow, + nsInLeftShift); + // Because of how we framed the problem, we can say that we've + // exhaustively scored the seed diagonal as well as maxgaps + // diagonals on either side + Interval refival(tidx, 0, fw, 0); + rect.initIval(refival); + seenDiags.add(refival); + // Now fill the dynamic programming matrix and return true iff + // there is at least one valid alignment + TAlScore bestCell = std::numeric_limits::min(); + found = swa.align(rnd, bestCell); + swmSeed.tallyGappedDp(readGaps, refGaps); + prm.nExDps++; + prm.nDpFail++; // failed until proven successful + prm.nExDpFails++; // failed until proven successful + if(!found) { + TAlScore bestLast = anchor1 ? prm.bestLtMinscMate1 : prm.bestLtMinscMate2; + if(bestCell > std::numeric_limits::min() && bestCell > bestLast) { + if(anchor1) { + prm.bestLtMinscMate1 = bestCell; + } else { + prm.bestLtMinscMate2 = bestCell; + } + } + continue; // Look for more anchor alignments + } + } + bool firstInner = true; + bool foundConcordant = false; + while(true) { + assert(found); + SwResult *res = NULL; + if(state == FOUND_EE) { + if(!firstInner) { + break; + } + res = &resEe_; + assert(res->repOk(rd)); + } else if(state == FOUND_UNGAPPED) { + if(!firstInner) { + break; + } + res = &resUngap_; + assert(res->repOk(rd)); + } else { + resGap_.reset(); + assert(resGap_.empty()); + if(swa.done()) { + break; + } + swa.nextAlignment(resGap_, minsc, rnd); + found = !resGap_.empty(); + if(!found) { + break; + } + res = &resGap_; + assert(res->repOk(rd)); + } + // TODO: If we're just taking anchor alignments out of the + // same rectangle, aren't we getting very similar + // rectangles for the opposite mate each time? Seems like + // we could save some work by detecting this. + assert(res != NULL); + firstInner = false; + assert(res->alres.matchesRef( + rd, + ref, + tmp_rf_, + tmp_rdseq_, + tmp_qseq_, + raw_refbuf_, + raw_destU32_, + raw_matches_, + tmp_reflens_, + tmp_refoffs_)); + Interval refival(tidx, 0, fw, tlen); + assert_gt(res->alres.refExtent(), 0); + if(gReportOverhangs && + !refival.containsIgnoreOrient(res->alres.refival())) + { + res->alres.clipOutside(true, 0, tlen); + if(res->alres.refExtent() == 0) { + continue; + } + } + assert(gReportOverhangs || + refival.containsIgnoreOrient(res->alres.refival())); + // Did the alignment fall entirely outside the reference? + if(!refival.overlapsIgnoreOrient(res->alres.refival())) { + continue; + } + // Is this alignment redundant with one we've seen previously? + if(redAnchor_.overlap(res->alres)) { + continue; + } + redAnchor_.add(res->alres); + // Annotate the AlnRes object with some key parameters + // that were used to obtain the alignment. + res->alres.setParams( + seedmms, // # mismatches allowed in seed + seedlen, // length of seed + seedival, // interval between seeds + minsc); // minimum score for valid alignment + bool foundMate = false; + TRefOff off = res->alres.refoff(); + if( msink->state().doneWithMate(!anchor1) && + !msink->state().doneWithMate( anchor1)) + { + // We're done with the opposite mate but not with the + // anchor mate; don't try to mate up the anchor. + swMateImmediately = false; + } + if(found && swMateImmediately) { + assert(!msink->state().doneWithMate(!anchor1)); + bool oleft = false, ofw = false; + int64_t oll = 0, olr = 0, orl = 0, orr = 0; + assert(!msink->state().done()); + foundMate = !oppFilt; + TAlScore ominsc_cur = ominsc; + //bool oungapped = false; + int oreadGaps = 0, orefGaps = 0; + //int oungappedAlign = -1; // defer + if(foundMate) { + // Adjust ominsc given the alignment score of the + // anchor mate + ominsc_cur = ominsc; + if(tighten > 0 && msink->Mmode() && msink->hasSecondBestPair()) { + // Paired-end alignments should have at least this score from now + TAlScore ps; + if(tighten == 1) { + ps = msink->bestPair(); + } else if(tighten == 2) { + ps = msink->secondBestPair(); + } else { + TAlScore diff = msink->bestPair() - msink->secondBestPair(); + ps = msink->secondBestPair() + (diff * 3)/4; + } + if(tighten == 1 && ps < bestPairScore && + msink->bestPair() == msink->secondBestPair()) + { + ps++; + } + if(tighten >= 2 && ps < bestPairScore) { + ps++; + } + // Anchor mate must have score at least 'ps' minus the best possible + // score for the opposite mate. + TAlScore nc = ps - res->alres.score().score(); + if(nc > ominsc_cur) { + ominsc_cur = nc; + assert_leq(ominsc_cur, operfectScore); + } + } + oreadGaps = sc.maxReadGaps(ominsc_cur, ordlen); + orefGaps = sc.maxRefGaps (ominsc_cur, ordlen); + //oungapped = (oreadGaps == 0 && orefGaps == 0); + // TODO: Something lighter-weight than DP to scan + // for other mate?? + //if(oungapped) { + // oresUngap_.reset(); + // oungappedAlign = oswa.ungappedAlign( + // ofw ? ord.patFw : ord.patRc, + // ofw ? ord.qual : ord.qualRev, + // orefcoord, + // ref, + // otlen, + // sc, + // gReportOverhangs, + // ominsc_cur, + // 0, + // oresUngap_); + //} + foundMate = pepol.otherMate( + anchor1, // anchor mate is mate #1? + fw, // anchor aligned to Watson? + off, // offset of anchor mate + orows + oreadGaps, // max # columns spanned by alignment + tlen, // reference length + anchor1 ? rd.length() : ord.length(), // mate 1 len + anchor1 ? ord.length() : rd.length(), // mate 2 len + oleft, // out: look left for opposite mate? + oll, + olr, + orl, + orr, + ofw); + } + DPRect orect; + if(foundMate) { + foundMate = dpframe.frameFindMateRect( + !oleft, // true iff anchor alignment is to the left + oll, // leftmost Watson off for LHS of opp aln + olr, // rightmost Watson off for LHS of opp aln + orl, // leftmost Watson off for RHS of opp aln + orr, // rightmost Watson off for RHS of opp aln + orows, // length of opposite mate + tlen, // length of reference sequence aligned to + oreadGaps, // max # of read gaps in opp mate aln + orefGaps, // max # of ref gaps in opp mate aln + (size_t)onceil, // max # Ns on opp mate + maxhalf, // max width in either direction + orect); // DP rectangle + assert(!foundMate || orect.refr >= orect.refl); + } + if(foundMate) { + oresGap_.reset(); + assert(oresGap_.empty()); + if(!oswa.initedRead()) { + oswa.initRead( + ord.patFw, // read to align + ord.patRc, // qualities + ord.qual, // read to align + ord.qualRev,// qualities + 0, // off of first char to consider + ordlen, // off of last char (ex) to consider + sc); // scoring scheme + } + // Given the boundaries defined by refi and reff, initilize + // the SwAligner with the dynamic programming problem that + // aligns the read to this reference stretch. + size_t onsInLeftShift = 0; + assert_geq(orect.refr, orect.refl); + oswa.initRef( + ofw, // align forward or revcomp read? + tidx, // reference aligned against + orect, // DP rectangle + ref, // Reference strings + tlen, // length of reference sequence + sc, // scoring scheme + ominsc_cur,// min score for valid alignments + enable8, // use 8-bit SSE if possible? + cminlen, // minimum length for using checkpointing scheme + cpow2, // interval b/t checkpointed diags; 1 << this + doTri, // triangular mini-fills? + false, // this is finding a mate - not seed ext + 0, // nwindow? + onsInLeftShift); + // TODO: Can't we add some diagonals to the + // opposite mate's seenDiags when we fill in the + // opposite mate's DP? Or can we? We might want + // to use this again as an anchor - will that still + // happen? Also, isn't there a problem with + // consistency of the minimum score? Minimum score + // here depends in part on the score of the anchor + // alignment here, but it won't when the current + // opposite becomes the anchor. + + // Because of how we framed the problem, we can say + // that we've exhaustively explored the "core" + // diagonals + //Interval orefival(tidx, 0, ofw, 0); + //orect.initIval(orefival); + //oseenDiags.add(orefival); + + // Now fill the dynamic programming matrix, return true + // iff there is at least one valid alignment + TAlScore bestCell = std::numeric_limits::min(); + foundMate = oswa.align(rnd, bestCell); + prm.nMateDps++; + swmMate.tallyGappedDp(oreadGaps, orefGaps); + if(!foundMate) { + TAlScore bestLast = anchor1 ? prm.bestLtMinscMate2 : prm.bestLtMinscMate1; + if(bestCell > std::numeric_limits::min() && bestCell > bestLast) { + if(anchor1) { + prm.bestLtMinscMate2 = bestCell; + } else { + prm.bestLtMinscMate1 = bestCell; + } + } + } + } + bool didAnchor = false; + do { + oresGap_.reset(); + assert(oresGap_.empty()); + if(foundMate && oswa.done()) { + foundMate = false; + } else if(foundMate) { + oswa.nextAlignment(oresGap_, ominsc_cur, rnd); + foundMate = !oresGap_.empty(); + assert(!foundMate || oresGap_.alres.matchesRef( + ord, + ref, + tmp_rf_, + tmp_rdseq_, + tmp_qseq_, + raw_refbuf_, + raw_destU32_, + raw_matches_, + tmp_reflens_, + tmp_refoffs_)); + } + if(foundMate) { + // Redundant with one we've seen previously? + if(!redAnchor_.overlap(oresGap_.alres)) { + redAnchor_.add(oresGap_.alres); + } + assert_eq(ofw, oresGap_.alres.fw()); + // Annotate the AlnRes object with some key parameters + // that were used to obtain the alignment. + oresGap_.alres.setParams( + seedmms, // # mismatches allowed in seed + seedlen, // length of seed + seedival, // interval between seeds + ominsc); // minimum score for valid alignment + assert_gt(oresGap_.alres.refExtent(), 0); + if(gReportOverhangs && + !refival.containsIgnoreOrient(oresGap_.alres.refival())) + { + oresGap_.alres.clipOutside(true, 0, tlen); + foundMate = oresGap_.alres.refExtent() > 0; + } + if(foundMate && + ((!gReportOverhangs && + !refival.containsIgnoreOrient(oresGap_.alres.refival())) || + !refival.overlapsIgnoreOrient(oresGap_.alres.refival()))) + { + foundMate = false; + } + } + ASSERT_ONLY(TRefId refid); + TRefOff off1, off2; + size_t len1, len2; + bool fw1, fw2; + int pairCl = PE_ALS_DISCORD; + if(foundMate) { + ASSERT_ONLY(refid =) res->alres.refid(); + assert_eq(refid, oresGap_.alres.refid()); + off1 = anchor1 ? off : oresGap_.alres.refoff(); + off2 = anchor1 ? oresGap_.alres.refoff() : off; + len1 = anchor1 ? + res->alres.refExtent() : oresGap_.alres.refExtent(); + len2 = anchor1 ? + oresGap_.alres.refExtent() : res->alres.refExtent(); + fw1 = anchor1 ? res->alres.fw() : oresGap_.alres.fw(); + fw2 = anchor1 ? oresGap_.alres.fw() : res->alres.fw(); + // Check that final mate alignments are consistent with + // paired-end fragment constraints + pairCl = pepol.peClassifyPair( + off1, + len1, + fw1, + off2, + len2, + fw2); + // Instead of trying + //foundMate = pairCl != PE_ALS_DISCORD; + } + if(msink->state().doneConcordant()) { + foundMate = false; + } + if(reportImmediately) { + if(foundMate) { + // Report pair to the AlnSinkWrap + assert(!msink->state().doneConcordant()); + assert(msink != NULL); + assert(res->repOk()); + assert(oresGap_.repOk()); + // Report an unpaired alignment + assert(!msink->maxed()); + assert(!msink->state().done()); + bool doneUnpaired = false; + //if(mixed || discord) { + // Report alignment for mate #1 as an + // unpaired alignment. + if(!anchor1 || !didAnchor) { + if(anchor1) { + didAnchor = true; + } + const AlnRes& r1 = anchor1 ? + res->alres : oresGap_.alres; + if(!redMate1_.overlap(r1)) { + redMate1_.add(r1); + if(msink->report(0, &r1, NULL)) { + doneUnpaired = true; // Short-circuited + } + } + } + // Report alignment for mate #2 as an + // unpaired alignment. + if(anchor1 || !didAnchor) { + if(!anchor1) { + didAnchor = true; + } + const AlnRes& r2 = anchor1 ? + oresGap_.alres : res->alres; + if(!redMate2_.overlap(r2)) { + redMate2_.add(r2); + if(msink->report(0, NULL, &r2)) { + doneUnpaired = true; // Short-circuited + } + } + } + //} // if(mixed || discord) + bool donePaired = false; + if(pairCl != PE_ALS_DISCORD) { + foundConcordant = true; + if(msink->report( + 0, + anchor1 ? &res->alres : &oresGap_.alres, + anchor1 ? &oresGap_.alres : &res->alres)) + { + // Short-circuited because a limit, e.g. + // -k, -m or -M, was exceeded + donePaired = true; + } else { + if(tighten > 0 && msink->Mmode() && msink->hasSecondBestPair()) { + // Paired-end alignments should have at least this score from now + TAlScore ps; + if(tighten == 1) { + ps = msink->bestPair(); + } else if(tighten == 2) { + ps = msink->secondBestPair(); + } else { + TAlScore diff = msink->bestPair() - msink->secondBestPair(); + ps = msink->secondBestPair() + (diff * 3)/4; + } + if(tighten == 1 && ps < bestPairScore && + msink->bestPair() == msink->secondBestPair()) + { + ps++; + } + if(tighten >= 2 && ps < bestPairScore) { + ps++; + } + // Anchor mate must have score at least 'ps' minus the best possible + // score for the opposite mate. + TAlScore nc = ps - operfectScore; + if(nc > minsc) { + minsc = nc; + assert_leq(minsc, perfectScore); + if(minsc > res->alres.score().score()) { + // We're done with this anchor + break; + } + } + assert_leq(minsc, perfectScore); + } + } + } // if(pairCl != PE_ALS_DISCORD) + if(donePaired || doneUnpaired) { + return EXTEND_POLICY_FULFILLED; + } + if(msink->state().doneWithMate(anchor1)) { + // We're now done with the mate that we're + // currently using as our anchor. We're not + // with the read overall. + return EXTEND_POLICY_FULFILLED; + } + } else if((mixed || discord) && !didAnchor) { + didAnchor = true; + // Report unpaired hit for anchor + assert(msink != NULL); + assert(res->repOk()); + // Check that alignment accurately reflects the + // reference characters aligned to + assert(res->alres.matchesRef( + rd, + ref, + tmp_rf_, + tmp_rdseq_, + tmp_qseq_, + raw_refbuf_, + raw_destU32_, + raw_matches_, + tmp_reflens_, + tmp_refoffs_)); + // Report an unpaired alignment + assert(!msink->maxed()); + assert(!msink->state().done()); + // Report alignment for mate #1 as an + // unpaired alignment. + if(!msink->state().doneUnpaired(anchor1)) { + const AlnRes& r = res->alres; + RedundantAlns& red = anchor1 ? redMate1_ : redMate2_; + const AlnRes* r1 = anchor1 ? &res->alres : NULL; + const AlnRes* r2 = anchor1 ? NULL : &res->alres; + if(!red.overlap(r)) { + red.add(r); + if(msink->report(0, r1, r2)) { + return EXTEND_POLICY_FULFILLED; // Short-circuited + } + } + } + if(msink->state().doneWithMate(anchor1)) { + // Done with mate, but not read overall + return EXTEND_POLICY_FULFILLED; + } + } + } + } while(!oresGap_.empty()); + } // if(found && swMateImmediately) + else if(found) { + assert(!msink->state().doneWithMate(anchor1)); + // We found an anchor alignment but did not attempt to find + // an alignment for the opposite mate (probably because + // we're done with it) + if(reportImmediately && (mixed || discord)) { + // Report unpaired hit for anchor + assert(msink != NULL); + assert(res->repOk()); + // Check that alignment accurately reflects the + // reference characters aligned to + assert(res->alres.matchesRef( + rd, + ref, + tmp_rf_, + tmp_rdseq_, + tmp_qseq_, + raw_refbuf_, + raw_destU32_, + raw_matches_, + tmp_reflens_, + tmp_refoffs_)); + // Report an unpaired alignment + assert(!msink->maxed()); + assert(!msink->state().done()); + // Report alignment for mate #1 as an + // unpaired alignment. + if(!msink->state().doneUnpaired(anchor1)) { + const AlnRes& r = res->alres; + RedundantAlns& red = anchor1 ? redMate1_ : redMate2_; + const AlnRes* r1 = anchor1 ? &res->alres : NULL; + const AlnRes* r2 = anchor1 ? NULL : &res->alres; + if(!red.overlap(r)) { + red.add(r); + if(msink->report(0, r1, r2)) { + return EXTEND_POLICY_FULFILLED; // Short-circuited + } + } + } + if(msink->state().doneWithMate(anchor1)) { + // Done with mate, but not read overall + return EXTEND_POLICY_FULFILLED; + } + } + } + } // while(true) + + if(foundConcordant) { + prm.nMateDpSuccs++; + mateStreaks_[i] = 0; + // Register this as a success. Now we need to + // make the streak variables reflect the + // success. + if(state == FOUND_UNGAPPED) { + assert_gt(prm.nUgFail, 0); + assert_gt(prm.nExUgFails, 0); + prm.nExUgFails--; + prm.nExUgSuccs++; + prm.nUgLastSucc = prm.nExUgs-1; + if(prm.nUgFail > prm.nUgFailStreak) { + prm.nUgFailStreak = prm.nUgFail; + } + prm.nUgFail = 0; + } else if(state == FOUND_EE) { + assert_gt(prm.nEeFail, 0); + assert_gt(prm.nExEeFails, 0); + prm.nExEeFails--; + prm.nExEeSuccs++; + prm.nEeLastSucc = prm.nExEes-1; + if(prm.nEeFail > prm.nEeFailStreak) { + prm.nEeFailStreak = prm.nEeFail; + } + prm.nEeFail = 0; + } else { + assert_gt(prm.nDpFail, 0); + assert_gt(prm.nExDpFails, 0); + prm.nExDpFails--; + prm.nExDpSuccs++; + prm.nDpLastSucc = prm.nExDps-1; + if(prm.nDpFail > prm.nDpFailStreak) { + prm.nDpFailStreak = prm.nDpFail; + } + prm.nDpFail = 0; + } + } else { + prm.nMateDpFails++; + mateStreaks_[i]++; + } + // At this point we know that we aren't bailing, and will continue to resolve seed hits. + + } // while(!gw.done()) + } // for(size_t i = 0; i < gws_.size(); i++) + } + return EXTEND_EXHAUSTED_CANDIDATES; +} + +#endif /*ALIGNER_SW_DRIVER_H_*/ diff --git a/aligner_sw_nuc.h b/aligner_sw_nuc.h new file mode 100644 index 0000000..6bec1de --- /dev/null +++ b/aligner_sw_nuc.h @@ -0,0 +1,262 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef ALIGNER_SW_NUC_H_ +#define ALIGNER_SW_NUC_H_ + +#include +#include "aligner_sw_common.h" +#include "aligner_result.h" + +/** + * Encapsulates a backtrace stack frame. Includes enough information that we + * can "pop" back up to this frame and choose to make a different backtracking + * decision. The information included is: + * + * 1. The mask at the decision point. When we first move through the mask and + * when we backtrack to it, we're careful to mask out the bit corresponding + * to the path we're taking. When we move through it after removing the + * last bit from the mask, we're careful to pop it from the stack. + * 2. The sizes of the edit lists. When we backtrack, we resize the lists back + * down to these sizes to get rid of any edits introduced since the branch + * point. + */ +struct DpNucFrame { + + /** + * Initialize a new DpNucFrame stack frame. + */ + void init( + size_t nedsz_, + size_t aedsz_, + size_t celsz_, + size_t row_, + size_t col_, + size_t gaps_, + size_t readGaps_, + size_t refGaps_, + AlnScore score_, + int ct_) + { + nedsz = nedsz_; + aedsz = aedsz_; + celsz = celsz_; + row = row_; + col = col_; + gaps = gaps_; + readGaps = readGaps_; + refGaps = refGaps_; + score = score_; + ct = ct_; + } + + size_t nedsz; // size of the nucleotide edit list at branch (before + // adding the branch edit) + size_t aedsz; // size of ambiguous nucleotide edit list at branch + size_t celsz; // size of cell-traversed list at branch + size_t row; // row of cell where branch occurred + size_t col; // column of cell where branch occurred + size_t gaps; // number of gaps before branch occurred + size_t readGaps; // number of read gaps before branch occurred + size_t refGaps; // number of ref gaps before branch occurred + AlnScore score; // score where branch occurred + int ct; // table type (oall, rdgap or rfgap) +}; + +enum { + BT_CAND_FATE_SUCCEEDED = 1, + BT_CAND_FATE_FAILED, + BT_CAND_FATE_FILT_START, // skipped b/c starting cell already explored + BT_CAND_FATE_FILT_DOMINATED, // skipped b/c it was dominated + BT_CAND_FATE_FILT_SCORE // skipped b/c score not interesting anymore +}; + +/** + * Encapsulates a cell that we might want to backtrace from. + */ +struct DpBtCandidate { + + DpBtCandidate() { reset(); } + + DpBtCandidate(size_t row_, size_t col_, TAlScore score_) { + init(row_, col_, score_); + } + + void reset() { init(0, 0, 0); } + + void init(size_t row_, size_t col_, TAlScore score_) { + row = row_; + col = col_; + score = score_; + // 0 = invalid; this should be set later according to what happens + // before / during the backtrace + fate = 0; + } + + /** + * Return true iff this candidate is (heuristically) dominated by the given + * candidate. We say that candidate A dominates candidate B if (a) B is + * somewhere in the N x N square that extends up and to the left of A, + * where N is an arbitrary number like 20, and (b) B's score is <= than + * A's. + */ + inline bool dominatedBy(const DpBtCandidate& o) { + const size_t SQ = 40; + size_t rowhi = row; + size_t rowlo = o.row; + if(rowhi < rowlo) swap(rowhi, rowlo); + size_t colhi = col; + size_t collo = o.col; + if(colhi < collo) swap(colhi, collo); + return (colhi - collo) <= SQ && + (rowhi - rowlo) <= SQ; + } + + /** + * Return true if this candidate is "greater than" (should be considered + * later than) the given candidate. + */ + bool operator>(const DpBtCandidate& o) const { + if(score < o.score) return true; + if(score > o.score) return false; + if(row < o.row ) return true; + if(row > o.row ) return false; + if(col < o.col ) return true; + if(col > o.col ) return false; + return false; + } + + /** + * Return true if this candidate is "less than" (should be considered + * sooner than) the given candidate. + */ + bool operator<(const DpBtCandidate& o) const { + if(score > o.score) return true; + if(score < o.score) return false; + if(row > o.row ) return true; + if(row < o.row ) return false; + if(col > o.col ) return true; + if(col < o.col ) return false; + return false; + } + + /** + * Return true if this candidate equals the given candidate. + */ + bool operator==(const DpBtCandidate& o) const { + return row == o.row && + col == o.col && + score == o.score; + } + bool operator>=(const DpBtCandidate& o) const { return !((*this) < o); } + bool operator<=(const DpBtCandidate& o) const { return !((*this) > o); } + +#ifndef NDEBUG + /** + * Check internal consistency. + */ + bool repOk() const { + assert(VALID_SCORE(score)); + return true; + } +#endif + + size_t row; // cell row + size_t col; // cell column w/r/t LHS of rectangle + TAlScore score; // score fo alignment + int fate; // flag indicating whether we succeeded, failed, skipped +}; + +template +class NBest { + +public: + + NBest() { nelt_ = nbest_ = n_ = 0; } + + bool inited() const { return nelt_ > 0; } + + void init(size_t nelt, size_t nbest) { + nelt_ = nelt; + nbest_ = nbest; + elts_.resize(nelt * nbest); + ncur_.resize(nelt); + ncur_.fill(0); + n_ = 0; + } + + /** + * Add a new result to bin 'elt'. Where it gets prioritized in the list of + * results in that bin depends on the result of operator>. + */ + bool add(size_t elt, const T& o) { + assert_lt(elt, nelt_); + const size_t ncur = ncur_[elt]; + assert_leq(ncur, nbest_); + n_++; + for(size_t i = 0; i < nbest_ && i <= ncur; i++) { + if(o > elts_[nbest_ * elt + i] || i >= ncur) { + // Insert it here + // Move everyone from here on down by one slot + for(int j = (int)ncur; j > (int)i; j--) { + if(j < (int)nbest_) { + elts_[nbest_ * elt + j] = elts_[nbest_ * elt + j - 1]; + } + } + elts_[nbest_ * elt + i] = o; + if(ncur < nbest_) { + ncur_[elt]++; + } + return true; + } + } + return false; + } + + /** + * Return true iff there are no solutions. + */ + bool empty() const { + return n_ == 0; + } + + /** + * Dump all the items in our payload into the given EList. + */ + template + void dump(TList& l) const { + if(empty()) return; + for(size_t i = 0; i < nelt_; i++) { + assert_leq(ncur_[i], nbest_); + for(size_t j = 0; j < ncur_[i]; j++) { + l.push_back(elts_[i * nbest_ + j]); + } + } + } + +protected: + + size_t nelt_; + size_t nbest_; + EList elts_; + EList ncur_; + size_t n_; // total # results added +}; + +#endif /*def ALIGNER_SW_NUC_H_*/ diff --git a/aligner_swsse.cpp b/aligner_swsse.cpp new file mode 100644 index 0000000..d4f7d78 --- /dev/null +++ b/aligner_swsse.cpp @@ -0,0 +1,88 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include +#include "aligner_sw_common.h" +#include "aligner_swsse.h" + +/** + * Given a number of rows (nrow), a number of columns (ncol), and the + * number of words to fit inside a single __m128i vector, initialize the + * matrix buffer to accomodate the needed configuration of vectors. + */ +void SSEMatrix::init( + size_t nrow, + size_t ncol, + size_t wperv) +{ + nrow_ = nrow; + ncol_ = ncol; + wperv_ = wperv; + nvecPerCol_ = (nrow + (wperv-1)) / wperv; + // The +1 is so that we don't have to special-case the final column; + // instead, we just write off the end of the useful part of the table + // with pvEStore. + try { + matbuf_.resizeNoCopy((ncol+1) * nvecPerCell_ * nvecPerCol_); + } catch(exception& e) { + cerr << "Tried to allocate DP matrix with " << (ncol+1) + << " columns, " << nvecPerCol_ + << " vectors per column, and and " << nvecPerCell_ + << " vectors per cell" << endl; + throw e; + } + assert(wperv_ == 8 || wperv_ == 16); + vecshift_ = (wperv_ == 8) ? 3 : 4; + nvecrow_ = (nrow + (wperv_-1)) >> vecshift_; + nveccol_ = ncol; + colstride_ = nvecPerCol_ * nvecPerCell_; + rowstride_ = nvecPerCell_; + inited_ = true; +} + +/** + * Initialize the matrix of masks and backtracking flags. + */ +void SSEMatrix::initMasks() { + assert_gt(nrow_, 0); + assert_gt(ncol_, 0); + masks_.resize(nrow_); + reset_.resizeNoCopy(nrow_); + reset_.fill(false); +} + +/** + * Given a row, col and matrix (i.e. E, F or H), return the corresponding + * element. + */ +int SSEMatrix::eltSlow(size_t row, size_t col, size_t mat) const { + assert_lt(row, nrow_); + assert_lt(col, ncol_); + assert_leq(mat, 3); + // Move to beginning of column/row + size_t rowelt = row / nvecrow_; + size_t rowvec = row % nvecrow_; + size_t eltvec = (col * colstride_) + (rowvec * rowstride_) + mat; + if(wperv_ == 16) { + return (int)((uint8_t*)(matbuf_.ptr() + eltvec))[rowelt]; + } else { + assert_eq(8, wperv_); + return (int)((int16_t*)(matbuf_.ptr() + eltvec))[rowelt]; + } +} diff --git a/aligner_swsse.h b/aligner_swsse.h new file mode 100644 index 0000000..70d5c6d --- /dev/null +++ b/aligner_swsse.h @@ -0,0 +1,500 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef ALIGNER_SWSSE_H_ +#define ALIGNER_SWSSE_H_ + +#include "ds.h" +#include "mem_ids.h" +#include "random_source.h" +#include "scoring.h" +#include "mask.h" +#include "sse_util.h" +#include + + +struct SSEMetrics { + + SSEMetrics():mutex_m() { reset(); } + + void clear() { reset(); } + void reset() { + dp = dpsat = dpfail = dpsucc = + col = cell = inner = fixup = + gathsol = bt = btfail = btsucc = btcell = + corerej = nrej = 0; + } + + void merge(const SSEMetrics& o, bool getLock = false) { + ThreadSafe ts(&mutex_m, getLock); + dp += o.dp; + dpsat += o.dpsat; + dpfail += o.dpfail; + dpsucc += o.dpsucc; + col += o.col; + cell += o.cell; + inner += o.inner; + fixup += o.fixup; + gathsol += o.gathsol; + bt += o.bt; + btfail += o.btfail; + btsucc += o.btsucc; + btcell += o.btcell; + corerej += o.corerej; + nrej += o.nrej; + } + + uint64_t dp; // DPs tried + uint64_t dpsat; // DPs saturated + uint64_t dpfail; // DPs failed + uint64_t dpsucc; // DPs succeeded + uint64_t col; // DP columns + uint64_t cell; // DP cells + uint64_t inner; // DP inner loop iters + uint64_t fixup; // DP fixup loop iters + uint64_t gathsol; // DP gather solution cells found + uint64_t bt; // DP backtraces + uint64_t btfail; // DP backtraces failed + uint64_t btsucc; // DP backtraces succeeded + uint64_t btcell; // DP backtrace cells traversed + uint64_t corerej; // DP backtrace core rejections + uint64_t nrej; // DP backtrace N rejections + MUTEX_T mutex_m; +}; + +/** + * Encapsulates matrix information calculated by the SSE aligner. + * + * Matrix memory is laid out as follows: + * + * - Elements (individual cell scores) are packed into __m128i vectors + * - Vectors are packed into quartets, quartet elements correspond to: a vector + * from E, one from F, one from H, and one that's "reserved" + * - Quartets are packed into columns, where the number of quartets is + * determined by the number of query characters divided by the number of + * elements per vector + * + * Regarding the "reserved" element of the vector quartet: we use it for two + * things. First, we use the first column of reserved vectors to stage the + * initial column of H vectors. Second, we use the "reserved" vectors during + * the backtrace procedure to store information about (a) which cells have been + * traversed, (b) whether the cell is "terminal" (in local mode), etc. + */ +struct SSEMatrix { + + // Each matrix element is a quartet of vectors. These constants are used + // to identify members of the quartet. + const static size_t E = 0; + const static size_t F = 1; + const static size_t H = 2; + const static size_t TMP = 3; + + SSEMatrix(int cat = 0) : nvecPerCell_(4), matbuf_(cat) { } + + /** + * Return a pointer to the matrix buffer. + */ + inline __m128i *ptr() { + assert(inited_); + return matbuf_.ptr(); + } + + /** + * Return a pointer to the E vector at the given row and column. Note: + * here row refers to rows of vectors, not rows of elements. + */ + inline __m128i* evec(size_t row, size_t col) { + assert_lt(row, nvecrow_); + assert_lt(col, nveccol_); + size_t elt = row * rowstride() + col * colstride() + E; + assert_lt(elt, matbuf_.size()); + return ptr() + elt; + } + + /** + * Like evec, but it's allowed to ask for a pointer to one column after the + * final one. + */ + inline __m128i* evecUnsafe(size_t row, size_t col) { + assert_lt(row, nvecrow_); + assert_leq(col, nveccol_); + size_t elt = row * rowstride() + col * colstride() + E; + assert_lt(elt, matbuf_.size()); + return ptr() + elt; + } + + /** + * Return a pointer to the F vector at the given row and column. Note: + * here row refers to rows of vectors, not rows of elements. + */ + inline __m128i* fvec(size_t row, size_t col) { + assert_lt(row, nvecrow_); + assert_lt(col, nveccol_); + size_t elt = row * rowstride() + col * colstride() + F; + assert_lt(elt, matbuf_.size()); + return ptr() + elt; + } + + /** + * Return a pointer to the H vector at the given row and column. Note: + * here row refers to rows of vectors, not rows of elements. + */ + inline __m128i* hvec(size_t row, size_t col) { + assert_lt(row, nvecrow_); + assert_lt(col, nveccol_); + size_t elt = row * rowstride() + col * colstride() + H; + assert_lt(elt, matbuf_.size()); + return ptr() + elt; + } + + /** + * Return a pointer to the TMP vector at the given row and column. Note: + * here row refers to rows of vectors, not rows of elements. + */ + inline __m128i* tmpvec(size_t row, size_t col) { + assert_lt(row, nvecrow_); + assert_lt(col, nveccol_); + size_t elt = row * rowstride() + col * colstride() + TMP; + assert_lt(elt, matbuf_.size()); + return ptr() + elt; + } + + /** + * Like tmpvec, but it's allowed to ask for a pointer to one column after + * the final one. + */ + inline __m128i* tmpvecUnsafe(size_t row, size_t col) { + assert_lt(row, nvecrow_); + assert_leq(col, nveccol_); + size_t elt = row * rowstride() + col * colstride() + TMP; + assert_lt(elt, matbuf_.size()); + return ptr() + elt; + } + + /** + * Given a number of rows (nrow), a number of columns (ncol), and the + * number of words to fit inside a single __m128i vector, initialize the + * matrix buffer to accomodate the needed configuration of vectors. + */ + void init( + size_t nrow, + size_t ncol, + size_t wperv); + + /** + * Return the number of __m128i's you need to skip over to get from one + * cell to the cell one column over from it. + */ + inline size_t colstride() const { return colstride_; } + + /** + * Return the number of __m128i's you need to skip over to get from one + * cell to the cell one row down from it. + */ + inline size_t rowstride() const { return rowstride_; } + + /** + * Given a row, col and matrix (i.e. E, F or H), return the corresponding + * element. + */ + int eltSlow(size_t row, size_t col, size_t mat) const; + + /** + * Given a row, col and matrix (i.e. E, F or H), return the corresponding + * element. + */ + inline int elt(size_t row, size_t col, size_t mat) const { + assert(inited_); + assert_lt(row, nrow_); + assert_lt(col, ncol_); + assert_lt(mat, 3); + // Move to beginning of column/row + size_t rowelt = row / nvecrow_; + size_t rowvec = row % nvecrow_; + size_t eltvec = (col * colstride_) + (rowvec * rowstride_) + mat; + assert_lt(eltvec, matbuf_.size()); + if(wperv_ == 16) { + return (int)((uint8_t*)(matbuf_.ptr() + eltvec))[rowelt]; + } else { + assert_eq(8, wperv_); + return (int)((int16_t*)(matbuf_.ptr() + eltvec))[rowelt]; + } + } + + /** + * Return the element in the E matrix at element row, col. + */ + inline int eelt(size_t row, size_t col) const { + return elt(row, col, E); + } + + /** + * Return the element in the F matrix at element row, col. + */ + inline int felt(size_t row, size_t col) const { + return elt(row, col, F); + } + + /** + * Return the element in the H matrix at element row, col. + */ + inline int helt(size_t row, size_t col) const { + return elt(row, col, H); + } + + /** + * Return true iff the given cell has its reportedThru bit set. + */ + inline bool reportedThrough( + size_t row, // current row + size_t col) const // current column + { + return (masks_[row][col] & (1 << 0)) != 0; + } + + /** + * Set the given cell's reportedThru bit. + */ + inline void setReportedThrough( + size_t row, // current row + size_t col) // current column + { + masks_[row][col] |= (1 << 0); + } + + /** + * Return true iff the H mask has been set with a previous call to hMaskSet. + */ + bool isHMaskSet( + size_t row, // current row + size_t col) const; // current column + + /** + * Set the given cell's H mask. This is the mask of remaining legal ways to + * backtrack from the H cell at this coordinate. It's 5 bits long and has + * offset=2 into the 16-bit field. + */ + void hMaskSet( + size_t row, // current row + size_t col, // current column + int mask); + + /** + * Return true iff the E mask has been set with a previous call to eMaskSet. + */ + bool isEMaskSet( + size_t row, // current row + size_t col) const; // current column + + /** + * Set the given cell's E mask. This is the mask of remaining legal ways to + * backtrack from the E cell at this coordinate. It's 2 bits long and has + * offset=8 into the 16-bit field. + */ + void eMaskSet( + size_t row, // current row + size_t col, // current column + int mask); + + /** + * Return true iff the F mask has been set with a previous call to fMaskSet. + */ + bool isFMaskSet( + size_t row, // current row + size_t col) const; // current column + + /** + * Set the given cell's F mask. This is the mask of remaining legal ways to + * backtrack from the F cell at this coordinate. It's 2 bits long and has + * offset=11 into the 16-bit field. + */ + void fMaskSet( + size_t row, // current row + size_t col, // current column + int mask); + + /** + * Analyze a cell in the SSE-filled dynamic programming matrix. Determine & + * memorize ways that we can backtrack from the cell. If there is at least one + * way to backtrack, select one at random and return the selection. + * + * There are a few subtleties to keep in mind regarding which cells can be at + * the end of a backtrace. First of all: cells from which we can backtrack + * should not be at the end of a backtrace. But have to distinguish between + * cells whose masks eventually become 0 (we shouldn't end at those), from + * those whose masks were 0 all along (we can end at those). + */ + void analyzeCell( + size_t row, // current row + size_t col, // current column + size_t ct, // current cell type: E/F/H + int refc, + int readc, + int readq, + const Scoring& sc, // scoring scheme + int64_t offsetsc, // offset to add to each score + RandomSource& rand, // rand gen for choosing among equal options + bool& empty, // out: =true iff no way to backtrace + int& cur, // out: =type of transition + bool& branch, // out: =true iff we chose among >1 options + bool& canMoveThru, // out: =true iff ... + bool& reportedThru); // out: =true iff ... + + /** + * Initialize the matrix of masks and backtracking flags. + */ + void initMasks(); + + /** + * Return the number of rows in the dynamic programming matrix. + */ + size_t nrow() const { + return nrow_; + } + + /** + * Return the number of columns in the dynamic programming matrix. + */ + size_t ncol() const { + return ncol_; + } + + /** + * Prepare a row so we can use it to store masks. + */ + void resetRow(size_t i) { + assert(!reset_[i]); + masks_[i].resizeNoCopy(ncol_); + masks_[i].fillZero(); + reset_[i] = true; + } + + bool inited_; // initialized? + size_t nrow_; // # rows + size_t ncol_; // # columns + size_t nvecrow_; // # vector rows (<= nrow_) + size_t nveccol_; // # vector columns (<= ncol_) + size_t wperv_; // # words per vector + size_t vecshift_; // # bits to shift to divide by words per vec + size_t nvecPerCol_; // # vectors per column + size_t nvecPerCell_; // # vectors per matrix cell (4) + size_t colstride_; // # vectors b/t adjacent cells in same row + size_t rowstride_; // # vectors b/t adjacent cells in same col + EList_m128i matbuf_; // buffer for holding vectors + ELList masks_; // buffer for masks/backtracking flags + EList reset_; // true iff row in masks_ has been reset +}; + +/** + * All the data associated with the query profile and other data needed for SSE + * alignment of a query. + */ +struct SSEData { + SSEData(int cat = 0) : profbuf_(cat), mat_(cat) { } + EList_m128i profbuf_; // buffer for query profile & temp vecs + EList_m128i vecbuf_; // buffer for 2 column vectors (not using mat_) + size_t qprofStride_; // stride for query profile + size_t gbarStride_; // gap barrier for query profile + SSEMatrix mat_; // SSE matrix for holding all E, F, H vectors + size_t maxPen_; // biggest penalty of all + size_t maxBonus_; // biggest bonus of all + size_t lastIter_; // which 128-bit striped word has final row? + size_t lastWord_; // which word within 128-word has final row? + int bias_; // all scores shifted up by this for unsigned +}; + +/** + * Return true iff the H mask has been set with a previous call to hMaskSet. + */ +inline bool SSEMatrix::isHMaskSet( + size_t row, // current row + size_t col) const // current column +{ + return (masks_[row][col] & (1 << 1)) != 0; +} + +/** + * Set the given cell's H mask. This is the mask of remaining legal ways to + * backtrack from the H cell at this coordinate. It's 5 bits long and has + * offset=2 into the 16-bit field. + */ +inline void SSEMatrix::hMaskSet( + size_t row, // current row + size_t col, // current column + int mask) +{ + assert_lt(mask, 32); + masks_[row][col] &= ~(31 << 1); + masks_[row][col] |= (1 << 1 | mask << 2); +} + +/** + * Return true iff the E mask has been set with a previous call to eMaskSet. + */ +inline bool SSEMatrix::isEMaskSet( + size_t row, // current row + size_t col) const // current column +{ + return (masks_[row][col] & (1 << 7)) != 0; +} + +/** + * Set the given cell's E mask. This is the mask of remaining legal ways to + * backtrack from the E cell at this coordinate. It's 2 bits long and has + * offset=8 into the 16-bit field. + */ +inline void SSEMatrix::eMaskSet( + size_t row, // current row + size_t col, // current column + int mask) +{ + assert_lt(mask, 4); + masks_[row][col] &= ~(7 << 7); + masks_[row][col] |= (1 << 7 | mask << 8); +} + +/** + * Return true iff the F mask has been set with a previous call to fMaskSet. + */ +inline bool SSEMatrix::isFMaskSet( + size_t row, // current row + size_t col) const // current column +{ + return (masks_[row][col] & (1 << 10)) != 0; +} + +/** + * Set the given cell's F mask. This is the mask of remaining legal ways to + * backtrack from the F cell at this coordinate. It's 2 bits long and has + * offset=11 into the 16-bit field. + */ +inline void SSEMatrix::fMaskSet( + size_t row, // current row + size_t col, // current column + int mask) +{ + assert_lt(mask, 4); + masks_[row][col] &= ~(7 << 10); + masks_[row][col] |= (1 << 10 | mask << 11); +} + +#define ROWSTRIDE_2COL 4 +#define ROWSTRIDE 4 + +#endif /*ndef ALIGNER_SWSSE_H_*/ diff --git a/aligner_swsse_ee_i16.cpp b/aligner_swsse_ee_i16.cpp new file mode 100644 index 0000000..4a28646 --- /dev/null +++ b/aligner_swsse_ee_i16.cpp @@ -0,0 +1,1911 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +/** + * aligner_sw_sse.cpp + * + * Versions of key alignment functions that use vector instructions to + * accelerate dynamic programming. Based chiefly on the striped Smith-Waterman + * paper and implementation by Michael Farrar. See: + * + * Farrar M. Striped Smith-Waterman speeds database searches six times over + * other SIMD implementations. Bioinformatics. 2007 Jan 15;23(2):156-61. + * http://sites.google.com/site/farrarmichael/smith-waterman + * + * While the paper describes an implementation of Smith-Waterman, we extend it + * do end-to-end read alignment as well as local alignment. The change + * required for this is minor: we simply let vmax be the maximum element in the + * score domain rather than the minimum. + * + * The vectorized dynamic programming implementation lacks some features that + * make it hard to adapt to solving the entire dynamic-programming alignment + * problem. For instance: + * + * - It doesn't respect gap barriers on either end of the read + * - It just gives a maximum; not enough information to backtrace without + * redoing some alignment + * - It's a little difficult to handle st_ and en_, especially st_. + * - The query profile mechanism makes handling of ambiguous reference bases a + * little tricky (16 cols in query profile lookup table instead of 5) + * + * Given the drawbacks, it is tempting to use SSE dynamic programming as a + * filter rather than as an aligner per se. Here are a few ideas for how it + * can be extended to handle more of the alignment problem: + * + * - Save calculated scores to a big array as we go. We return to this array + * to find and backtrace from good solutions. + */ + +#include +#include "aligner_sw.h" + +static const size_t NBYTES_PER_REG = 16; +static const size_t NWORDS_PER_REG = 8; +static const size_t NBITS_PER_WORD = 16; +static const size_t NBYTES_PER_WORD = 2; + +// In 16-bit end-to-end mode, we have the option of using signed saturated +// arithmetic. Because we have signed arithmetic, there's no need to add/subtract +// bias when building an applying the query profile. The lowest value we can +// use is 0x8000, and the greatest is 0x7fff. + +typedef int16_t TCScore; + +/** + * Build query profile look up tables for the read. The query profile look + * up table is organized as a 1D array indexed by [i][j] where i is the + * reference character in the current DP column (0=A, 1=C, etc), and j is + * the segment of the query we're currently working on. + */ +void SwAligner::buildQueryProfileEnd2EndSseI16(bool fw) { + bool& done = fw ? sseI16fwBuilt_ : sseI16rcBuilt_; + if(done) { + return; + } + done = true; + const BTDnaString* rd = fw ? rdfw_ : rdrc_; + const BTString* qu = fw ? qufw_ : qurc_; + // daehwan - allows to align a portion of a read, not the whole + // const size_t len = rd->length(); + const size_t len = dpRows(); + const size_t seglen = (len + (NWORDS_PER_REG-1)) / NWORDS_PER_REG; + // How many __m128i's are needed + size_t n128s = + 64 + // slack bytes, for alignment? + (seglen * ALPHA_SIZE) // query profile data + * 2; // & gap barrier data + assert_gt(n128s, 0); + SSEData& d = fw ? sseI16fw_ : sseI16rc_; + d.profbuf_.resizeNoCopy(n128s); + assert(!d.profbuf_.empty()); + d.maxPen_ = d.maxBonus_ = 0; + d.lastIter_ = d.lastWord_ = 0; + d.qprofStride_ = d.gbarStride_ = 2; + d.bias_ = 0; // no bias when words are signed + // For each reference character A, C, G, T, N ... + for(size_t refc = 0; refc < ALPHA_SIZE; refc++) { + // For each segment ... + for(size_t i = 0; i < seglen; i++) { + size_t j = i; + int16_t *qprofWords = + reinterpret_cast(d.profbuf_.ptr() + (refc * seglen * 2) + (i * 2)); + int16_t *gbarWords = + reinterpret_cast(d.profbuf_.ptr() + (refc * seglen * 2) + (i * 2) + 1); + // For each sub-word (byte) ... + for(size_t k = 0; k < NWORDS_PER_REG; k++) { + int sc = 0; + *gbarWords = 0; + if(j < len) { + int readc = (*rd)[j]; + int readq = (*qu)[j]; + sc = sc_->score(readc, (int)(1 << refc), readq - 33); + size_t j_from_end = len - j - 1; + if(j < (size_t)sc_->gapbar || + j_from_end < (size_t)sc_->gapbar) + { + // Inside the gap barrier + *gbarWords = 0x8000; // add this twice + } + } + if(refc == 0 && j == len-1) { + // Remember which 128-bit word and which smaller word has + // the final row + d.lastIter_ = i; + d.lastWord_ = k; + } + if(sc < 0) { + if((size_t)(-sc) > d.maxPen_) { + d.maxPen_ = (size_t)(-sc); + } + } else { + if((size_t)sc > d.maxBonus_) { + d.maxBonus_ = (size_t)sc; + } + } + *qprofWords = (int16_t)sc; + gbarWords++; + qprofWords++; + j += seglen; // update offset into query + } + } + } +} + +#ifndef NDEBUG +/** + * Return true iff the cell has sane E/F/H values w/r/t its predecessors. + */ +static bool cellOkEnd2EndI16( + SSEData& d, + size_t row, + size_t col, + int refc, + int readc, + int readq, + const Scoring& sc) // scoring scheme +{ + TCScore floorsc = 0x8000; + TCScore ceilsc = MAX_I64; + TAlScore offsetsc = -0x7fff; + TAlScore sc_h_cur = (TAlScore)d.mat_.helt(row, col); + TAlScore sc_e_cur = (TAlScore)d.mat_.eelt(row, col); + TAlScore sc_f_cur = (TAlScore)d.mat_.felt(row, col); + if(sc_h_cur > floorsc) { + sc_h_cur += offsetsc; + } + if(sc_e_cur > floorsc) { + sc_e_cur += offsetsc; + } + if(sc_f_cur > floorsc) { + sc_f_cur += offsetsc; + } + bool gapsAllowed = true; + size_t rowFromEnd = d.mat_.nrow() - row - 1; + if(row < (size_t)sc.gapbar || rowFromEnd < (size_t)sc.gapbar) { + gapsAllowed = false; + } + bool e_left_trans = false, h_left_trans = false; + bool f_up_trans = false, h_up_trans = false; + bool h_diag_trans = false; + if(gapsAllowed) { + TAlScore sc_h_left = floorsc; + TAlScore sc_e_left = floorsc; + TAlScore sc_h_up = floorsc; + TAlScore sc_f_up = floorsc; + if(col > 0 && sc_e_cur > floorsc && sc_e_cur <= ceilsc) { + sc_h_left = d.mat_.helt(row, col-1) + offsetsc; + sc_e_left = d.mat_.eelt(row, col-1) + offsetsc; + e_left_trans = (sc_e_left > floorsc && sc_e_cur == sc_e_left - sc.readGapExtend()); + h_left_trans = (sc_h_left > floorsc && sc_e_cur == sc_h_left - sc.readGapOpen()); + assert(e_left_trans || h_left_trans); + } + if(row > 0 && sc_f_cur > floorsc && sc_f_cur <= ceilsc) { + sc_h_up = d.mat_.helt(row-1, col) + offsetsc; + sc_f_up = d.mat_.felt(row-1, col) + offsetsc; + f_up_trans = (sc_f_up > floorsc && sc_f_cur == sc_f_up - sc.refGapExtend()); + h_up_trans = (sc_h_up > floorsc && sc_f_cur == sc_h_up - sc.refGapOpen()); + assert(f_up_trans || h_up_trans); + } + } else { + assert_geq(floorsc, sc_e_cur); + assert_geq(floorsc, sc_f_cur); + } + if(col > 0 && row > 0 && sc_h_cur > floorsc && sc_h_cur <= ceilsc) { + TAlScore sc_h_upleft = d.mat_.helt(row-1, col-1) + offsetsc; + TAlScore sc_diag = sc.score(readc, (int)refc, readq - 33); + h_diag_trans = sc_h_cur == sc_h_upleft + sc_diag; + } + assert( + sc_h_cur <= floorsc || + e_left_trans || + h_left_trans || + f_up_trans || + h_up_trans || + h_diag_trans || + sc_h_cur > ceilsc || + row == 0 || + col == 0); + return true; +} +#endif /*ndef NDEBUG*/ + +#ifdef NDEBUG + +#define assert_all_eq0(x) +#define assert_all_gt(x, y) +#define assert_all_gt_lo(x) +#define assert_all_lt(x, y) +#define assert_all_lt_hi(x) + +#else + +#define assert_all_eq0(x) { \ + __m128i z = _mm_setzero_si128(); \ + __m128i tmp = _mm_setzero_si128(); \ + z = _mm_xor_si128(z, z); \ + tmp = _mm_cmpeq_epi16(x, z); \ + assert_eq(0xffff, _mm_movemask_epi8(tmp)); \ +} + +#define assert_all_gt(x, y) { \ + __m128i tmp = _mm_cmpgt_epi16(x, y); \ + assert_eq(0xffff, _mm_movemask_epi8(tmp)); \ +} + +#define assert_all_gt_lo(x) { \ + __m128i z = _mm_setzero_si128(); \ + __m128i tmp = _mm_setzero_si128(); \ + z = _mm_xor_si128(z, z); \ + tmp = _mm_cmpgt_epi16(x, z); \ + assert_eq(0xffff, _mm_movemask_epi8(tmp)); \ +} + +#define assert_all_lt(x, y) { \ + __m128i tmp = _mm_cmplt_epi16(x, y); \ + assert_eq(0xffff, _mm_movemask_epi8(tmp)); \ +} + +#define assert_all_leq(x, y) { \ + __m128i tmp = _mm_cmpgt_epi16(x, y); \ + assert_eq(0x0000, _mm_movemask_epi8(tmp)); \ +} + +#define assert_all_lt_hi(x) { \ + __m128i z = _mm_setzero_si128(); \ + __m128i tmp = _mm_setzero_si128(); \ + z = _mm_cmpeq_epi16(z, z); \ + z = _mm_srli_epi16(z, 1); \ + tmp = _mm_cmplt_epi16(x, z); \ + assert_eq(0xffff, _mm_movemask_epi8(tmp)); \ +} +#endif + +/** + * Aligns by filling a dynamic programming matrix with the SSE-accelerated, + * banded DP approach of Farrar. As it goes, it determines which cells we + * might backtrace from and tallies the best (highest-scoring) N backtrace + * candidate cells per diagonal. Also returns the alignment score of the best + * alignment in the matrix. + * + * This routine does *not* maintain a matrix holding the entire matrix worth of + * scores, nor does it maintain any other dense O(mn) data structure, as this + * would quickly exhaust memory for queries longer than about 10,000 kb. + * Instead, in the fill stage it maintains two columns worth of scores at a + * time (current/previous, or right/left) - these take O(m) space. When + * finished with the current column, it determines which cells from the + * previous column, if any, are candidates we might backtrace from to find a + * full alignment. A candidate cell has a score that rises above the threshold + * and isn't improved upon by a match in the next column. The best N + * candidates per diagonal are stored in a O(m + n) data structure. + */ +TAlScore SwAligner::alignGatherEE16(int& flag, bool debug) { + assert_leq(rdf_, rd_->length()); + assert_leq(rdf_, qu_->length()); + assert_lt(rfi_, rff_); + assert_lt(rdi_, rdf_); + assert_eq(rd_->length(), qu_->length()); + assert_geq(sc_->gapbar, 1); + assert(repOk()); +#ifndef NDEBUG + for(size_t i = (size_t)rfi_; i < (size_t)rff_; i++) { + assert_range(0, 16, (int)rf_[i]); + } +#endif + + SSEData& d = fw_ ? sseI16fw_ : sseI16rc_; + SSEMetrics& met = extend_ ? sseI16ExtendMet_ : sseI16MateMet_; + if(!debug) met.dp++; + buildQueryProfileEnd2EndSseI16(fw_); + assert(!d.profbuf_.empty()); + + assert_eq(0, d.maxBonus_); + size_t iter = + (dpRows() + (NWORDS_PER_REG-1)) / NWORDS_PER_REG; // iter = segLen + + // Now set up the score vectors. We just need two columns worth, which + // we'll call "left" and "right". + d.vecbuf_.resize(4 * 2 * iter); + d.vecbuf_.zero(); + __m128i *vbuf_l = d.vecbuf_.ptr(); + __m128i *vbuf_r = d.vecbuf_.ptr() + (4 * iter); + + // This is the data structure that holds candidate cells per diagonal. + const size_t ndiags = rff_ - rfi_ + dpRows() - 1; + if(!debug) { + btdiag_.init(ndiags, 2); + } + + // Data structure that holds checkpointed anti-diagonals + TAlScore perfectScore = sc_->perfectScore(dpRows()); + bool checkpoint = true; + bool cpdebug = false; +#ifndef NDEBUG + cpdebug = dpRows() < 1000; +#endif + cper_.init( + dpRows(), // # rows + rff_ - rfi_, // # columns + cperPerPow2_, // checkpoint every 1 << perpow2 diags (& next) + perfectScore, // perfect score (for sanity checks) + false, // matrix cells have 8-bit scores? + cperTri_, // triangular mini-fills? + false, // alignment is local? + cpdebug); // save all cells for debugging? + + // Many thanks to Michael Farrar for releasing his striped Smith-Waterman + // implementation: + // + // http://sites.google.com/site/farrarmichael/smith-waterman + // + // Much of the implmentation below is adapted from Michael's code. + + // Set all elts to reference gap open penalty + __m128i rfgapo = _mm_setzero_si128(); + __m128i rfgape = _mm_setzero_si128(); + __m128i rdgapo = _mm_setzero_si128(); + __m128i rdgape = _mm_setzero_si128(); + __m128i vlo = _mm_setzero_si128(); + __m128i vhi = _mm_setzero_si128(); + __m128i vhilsw = _mm_setzero_si128(); + __m128i vlolsw = _mm_setzero_si128(); + __m128i ve = _mm_setzero_si128(); + __m128i vf = _mm_setzero_si128(); + __m128i vh = _mm_setzero_si128(); + __m128i vhd = _mm_setzero_si128(); + __m128i vhdtmp = _mm_setzero_si128(); + __m128i vtmp = _mm_setzero_si128(); + + assert_gt(sc_->refGapOpen(), 0); + assert_leq(sc_->refGapOpen(), MAX_I16); + rfgapo = _mm_insert_epi16(rfgapo, sc_->refGapOpen(), 0); + rfgapo = _mm_shufflelo_epi16(rfgapo, 0); + rfgapo = _mm_shuffle_epi32(rfgapo, 0); + + // Set all elts to reference gap extension penalty + assert_gt(sc_->refGapExtend(), 0); + assert_leq(sc_->refGapExtend(), MAX_I16); + assert_leq(sc_->refGapExtend(), sc_->refGapOpen()); + rfgape = _mm_insert_epi16(rfgape, sc_->refGapExtend(), 0); + rfgape = _mm_shufflelo_epi16(rfgape, 0); + rfgape = _mm_shuffle_epi32(rfgape, 0); + + // Set all elts to read gap open penalty + assert_gt(sc_->readGapOpen(), 0); + assert_leq(sc_->readGapOpen(), MAX_I16); + rdgapo = _mm_insert_epi16(rdgapo, sc_->readGapOpen(), 0); + rdgapo = _mm_shufflelo_epi16(rdgapo, 0); + rdgapo = _mm_shuffle_epi32(rdgapo, 0); + + // Set all elts to read gap extension penalty + assert_gt(sc_->readGapExtend(), 0); + assert_leq(sc_->readGapExtend(), MAX_I16); + assert_leq(sc_->readGapExtend(), sc_->readGapOpen()); + rdgape = _mm_insert_epi16(rdgape, sc_->readGapExtend(), 0); + rdgape = _mm_shufflelo_epi16(rdgape, 0); + rdgape = _mm_shuffle_epi32(rdgape, 0); + + // Set all elts to 0x8000 (min value for signed 16-bit) + vlo = _mm_cmpeq_epi16(vlo, vlo); // all elts = 0xffff + vlo = _mm_slli_epi16(vlo, NBITS_PER_WORD-1); // all elts = 0x8000 + + // Set all elts to 0x7fff (max value for signed 16-bit) + vhi = _mm_cmpeq_epi16(vhi, vhi); // all elts = 0xffff + vhi = _mm_srli_epi16(vhi, 1); // all elts = 0x7fff + + // vlolsw: topmost (least sig) word set to 0x8000, all other words=0 + vlolsw = _mm_shuffle_epi32(vlo, 0); + vlolsw = _mm_srli_si128(vlolsw, NBYTES_PER_REG - NBYTES_PER_WORD); + + // vhilsw: topmost (least sig) word set to 0x7fff, all other words=0 + vhilsw = _mm_shuffle_epi32(vhi, 0); + vhilsw = _mm_srli_si128(vhilsw, NBYTES_PER_REG - NBYTES_PER_WORD); + + // Points to a long vector of __m128i where each element is a block of + // contiguous cells in the E, F or H matrix. If the index % 3 == 0, then + // the block of cells is from the E matrix. If index % 3 == 1, they're + // from the F matrix. If index % 3 == 2, then they're from the H matrix. + // Blocks of cells are organized in the same interleaved manner as they are + // calculated by the Farrar algorithm. + const __m128i *pvScore; // points into the query profile + + const size_t colstride = ROWSTRIDE_2COL * iter; + + // Initialize the H and E vectors in the first matrix column + __m128i *pvELeft = vbuf_l + 0; __m128i *pvERight = vbuf_r + 0; + /* __m128i *pvFLeft = vbuf_l + 1; */ __m128i *pvFRight = vbuf_r + 1; + __m128i *pvHLeft = vbuf_l + 2; __m128i *pvHRight = vbuf_r + 2; + + // Maximum score in final row + bool found = false; + TCScore lrmax = MIN_I16; + + for(size_t i = 0; i < iter; i++) { + _mm_store_si128(pvERight, vlo); pvERight += ROWSTRIDE_2COL; + // Could initialize Hs to high or low. If high, cells in the lower + // triangle will have somewhat more legitiate scores, but still won't + // be exhaustively scored. + _mm_store_si128(pvHRight, vlo); pvHRight += ROWSTRIDE_2COL; + } + + assert_gt(sc_->gapbar, 0); + size_t nfixup = 0; + + // Fill in the table as usual but instead of using the same gap-penalty + // vector for each iteration of the inner loop, load words out of a + // pre-calculated gap vector parallel to the query profile. The pre- + // calculated gap vectors enforce the gap barrier constraint by making it + // infinitely costly to introduce a gap in barrier rows. + // + // AND use a separate loop to fill in the first row of the table, enforcing + // the st_ constraints in the process. This is awkward because it + // separates the processing of the first row from the others and might make + // it difficult to use the first-row results in the next row, but it might + // be the simplest and least disruptive way to deal with the st_ constraint. + + for(size_t i = (size_t)rfi_; i < (size_t)rff_; i++) { + // Swap left and right; vbuf_l is the vector on the left, which we + // generally load from, and vbuf_r is the vector on the right, which we + // generally store to. + swap(vbuf_l, vbuf_r); + pvELeft = vbuf_l + 0; pvERight = vbuf_r + 0; + /* pvFLeft = vbuf_l + 1; */ pvFRight = vbuf_r + 1; + pvHLeft = vbuf_l + 2; pvHRight = vbuf_r + 2; + + // Fetch the appropriate query profile. Note that elements of rf_ must + // be numbers, not masks. + const int refc = (int)rf_[i]; + + // Fetch the appropriate query profile + size_t off = (size_t)firsts5[refc] * iter * 2; + pvScore = d.profbuf_.ptr() + off; // even elts = query profile, odd = gap barrier + + // Set all cells to low value + vf = _mm_cmpeq_epi16(vf, vf); + vf = _mm_slli_epi16(vf, NBITS_PER_WORD-1); + vf = _mm_or_si128(vf, vlolsw); + + // Load H vector from the final row of the previous column + vh = _mm_load_si128(pvHLeft + colstride - ROWSTRIDE_2COL); + // Shift 2 bytes down so that topmost (least sig) cell gets 0 + vh = _mm_slli_si128(vh, NBYTES_PER_WORD); + // Fill topmost (least sig) cell with high value + vh = _mm_or_si128(vh, vhilsw); + + // For each character in the reference text: + size_t j; + for(j = 0; j < iter; j++) { + // Load cells from E, calculated previously + ve = _mm_load_si128(pvELeft); + vhd = _mm_load_si128(pvHLeft); + assert_all_lt(ve, vhi); + pvELeft += ROWSTRIDE_2COL; + + // Store cells in F, calculated previously + vf = _mm_adds_epi16(vf, pvScore[1]); // veto some ref gap extensions + vf = _mm_adds_epi16(vf, pvScore[1]); // veto some ref gap extensions + _mm_store_si128(pvFRight, vf); + pvFRight += ROWSTRIDE_2COL; + + // Factor in query profile (matches and mismatches) + vh = _mm_adds_epi16(vh, pvScore[0]); + + // Update H, factoring in E and F + vh = _mm_max_epi16(vh, vf); + + // Update vE value + vhdtmp = vhd; + vhd = _mm_subs_epi16(vhd, rdgapo); + vhd = _mm_adds_epi16(vhd, pvScore[1]); // veto some read gap opens + vhd = _mm_adds_epi16(vhd, pvScore[1]); // veto some read gap opens + ve = _mm_subs_epi16(ve, rdgape); + ve = _mm_max_epi16(ve, vhd); + vh = _mm_max_epi16(vh, ve); + + // Save the new vH values + _mm_store_si128(pvHRight, vh); + pvHRight += ROWSTRIDE_2COL; + vtmp = vh; + assert_all_lt(ve, vhi); + + // Load the next h value + vh = vhdtmp; + pvHLeft += ROWSTRIDE_2COL; + + // Save E values + _mm_store_si128(pvERight, ve); + pvERight += ROWSTRIDE_2COL; + + // Update vf value + vtmp = _mm_subs_epi16(vtmp, rfgapo); + vf = _mm_subs_epi16(vf, rfgape); + assert_all_lt(vf, vhi); + vf = _mm_max_epi16(vf, vtmp); + + pvScore += 2; // move on to next query profile / gap veto + } + // pvHStore, pvELoad, pvEStore have all rolled over to the next column + pvFRight -= colstride; // reset to start of column + vtmp = _mm_load_si128(pvFRight); + + pvHRight -= colstride; // reset to start of column + vh = _mm_load_si128(pvHRight); + + pvScore = d.profbuf_.ptr() + off + 1; // reset veto vector + + // vf from last row gets shifted down by one to overlay the first row + // rfgape has already been subtracted from it. + vf = _mm_slli_si128(vf, NBYTES_PER_WORD); + vf = _mm_or_si128(vf, vlolsw); + + vf = _mm_adds_epi16(vf, *pvScore); // veto some ref gap extensions + vf = _mm_adds_epi16(vf, *pvScore); // veto some ref gap extensions + vf = _mm_max_epi16(vtmp, vf); + vtmp = _mm_cmpgt_epi16(vf, vtmp); + int cmp = _mm_movemask_epi8(vtmp); + + // If any element of vtmp is greater than H - gap-open... + j = 0; + while(cmp != 0x0000) { + // Store this vf + _mm_store_si128(pvFRight, vf); + pvFRight += ROWSTRIDE_2COL; + + // Update vh w/r/t new vf + vh = _mm_max_epi16(vh, vf); + + // Save vH values + _mm_store_si128(pvHRight, vh); + pvHRight += ROWSTRIDE_2COL; + + pvScore += 2; + + assert_lt(j, iter); + if(++j == iter) { + pvFRight -= colstride; + vtmp = _mm_load_si128(pvFRight); // load next vf ASAP + pvHRight -= colstride; + vh = _mm_load_si128(pvHRight); // load next vh ASAP + pvScore = d.profbuf_.ptr() + off + 1; + j = 0; + vf = _mm_slli_si128(vf, NBYTES_PER_WORD); + vf = _mm_or_si128(vf, vlolsw); + } else { + vtmp = _mm_load_si128(pvFRight); // load next vf ASAP + vh = _mm_load_si128(pvHRight); // load next vh ASAP + } + + // Update F with another gap extension + vf = _mm_subs_epi16(vf, rfgape); + vf = _mm_adds_epi16(vf, *pvScore); // veto some ref gap extensions + vf = _mm_adds_epi16(vf, *pvScore); // veto some ref gap extensions + vf = _mm_max_epi16(vtmp, vf); + vtmp = _mm_cmpgt_epi16(vf, vtmp); + cmp = _mm_movemask_epi8(vtmp); + nfixup++; + } + + + // Check in the last row for the maximum so far + __m128i *vtmp = vbuf_r + 2 /* H */ + (d.lastIter_ * ROWSTRIDE_2COL); + // Note: we may not want to extract from the final row + TCScore lr = ((TCScore*)(vtmp))[d.lastWord_]; + found = true; + if(lr > lrmax) { + lrmax = lr; + } + + // Now we'd like to know whether the bottommost element of the right + // column is a candidate we might backtrace from. First question is: + // did it exceed the minimum score threshold? + TAlScore score = (TAlScore)(lr - 0x7fff); + if(lr == MIN_I16) { + score = MIN_I64; + } + if(!debug && score >= minsc_) { + DpBtCandidate cand(dpRows() - 1, i - rfi_, score); + btdiag_.add(i - rfi_, cand); + } + + // Save some elements to checkpoints + if(checkpoint) { + + __m128i *pvE = vbuf_r + 0; + __m128i *pvF = vbuf_r + 1; + __m128i *pvH = vbuf_r + 2; + size_t coli = i - rfi_; + if(coli < cper_.locol_) cper_.locol_ = coli; + if(coli > cper_.hicol_) cper_.hicol_ = coli; + + if(cperTri_) { + size_t rc_mod = coli & cper_.lomask_; + assert_lt(rc_mod, cper_.per_); + int64_t row = -(int64_t)rc_mod-1; + int64_t row_mod = row; + int64_t row_div = 0; + size_t idx = coli >> cper_.perpow2_; + size_t idxrow = idx * cper_.nrow_; + assert_eq(4, ROWSTRIDE_2COL); + bool done = false; + while(true) { + row += (cper_.per_ - 2); + row_mod += (cper_.per_ - 2); + for(size_t j = 0; j < 2; j++) { + row++; + row_mod++; + if(row >= 0 && (size_t)row < cper_.nrow_) { + // Update row divided by iter_ and mod iter_ + while(row_mod >= (int64_t)iter) { + row_mod -= (int64_t)iter; + row_div++; + } + size_t delt = idxrow + row; + size_t vecoff = (row_mod << 5) + row_div; + assert_lt(row_div, 8); + int16_t h_sc = ((int16_t*)pvH)[vecoff]; + int16_t e_sc = ((int16_t*)pvE)[vecoff]; + int16_t f_sc = ((int16_t*)pvF)[vecoff]; + if(h_sc != MIN_I16) h_sc -= 0x7fff; + if(e_sc != MIN_I16) e_sc -= 0x7fff; + if(f_sc != MIN_I16) f_sc -= 0x7fff; + assert_leq(h_sc, cper_.perf_); + assert_leq(e_sc, cper_.perf_); + assert_leq(f_sc, cper_.perf_); + CpQuad *qdiags = ((j == 0) ? cper_.qdiag1s_.ptr() : cper_.qdiag2s_.ptr()); + qdiags[delt].sc[0] = h_sc; + qdiags[delt].sc[1] = e_sc; + qdiags[delt].sc[2] = f_sc; + } // if(row >= 0 && row < nrow_) + else if(row >= 0 && (size_t)row >= cper_.nrow_) { + done = true; + break; + } + } // end of loop over anti-diags + if(done) { + break; + } + idx++; + idxrow += cper_.nrow_; + } + } else { + // If this is the first column, take this opportunity to + // pre-calculate the coordinates of the elements we're going to + // checkpoint. + if(coli == 0) { + size_t cpi = cper_.per_-1; + size_t cpimod = cper_.per_-1; + size_t cpidiv = 0; + cper_.commitMap_.clear(); + while(cpi < cper_.nrow_) { + while(cpimod >= iter) { + cpimod -= iter; + cpidiv++; + } + size_t vecoff = (cpimod << 5) + cpidiv; + cper_.commitMap_.push_back(vecoff); + cpi += cper_.per_; + cpimod += cper_.per_; + } + } + // Save all the rows + size_t rowoff = 0; + size_t sz = cper_.commitMap_.size(); + for(size_t i = 0; i < sz; i++, rowoff += cper_.ncol_) { + size_t vecoff = cper_.commitMap_[i]; + int16_t h_sc = ((int16_t*)pvH)[vecoff]; + int16_t e_sc = ((int16_t*)pvE)[vecoff]; + int16_t f_sc = ((int16_t*)pvF)[vecoff]; + if(h_sc != MIN_I16) h_sc -= 0x7fff; + if(e_sc != MIN_I16) e_sc -= 0x7fff; + if(f_sc != MIN_I16) f_sc -= 0x7fff; + assert_leq(h_sc, cper_.perf_); + assert_leq(e_sc, cper_.perf_); + assert_leq(f_sc, cper_.perf_); + CpQuad& dst = cper_.qrows_[rowoff + coli]; + dst.sc[0] = h_sc; + dst.sc[1] = e_sc; + dst.sc[2] = f_sc; + } + // Is this a column we'd like to checkpoint? + if((coli & cper_.lomask_) == cper_.lomask_) { + // Save the column using memcpys + assert_gt(coli, 0); + size_t wordspercol = cper_.niter_ * ROWSTRIDE_2COL; + size_t coloff = (coli >> cper_.perpow2_) * wordspercol; + __m128i *dst = cper_.qcols_.ptr() + coloff; + memcpy(dst, vbuf_r, sizeof(__m128i) * wordspercol); + } + } + if(cper_.debug_) { + // Save the column using memcpys + size_t wordspercol = cper_.niter_ * ROWSTRIDE_2COL; + size_t coloff = coli * wordspercol; + __m128i *dst = cper_.qcolsD_.ptr() + coloff; + memcpy(dst, vbuf_r, sizeof(__m128i) * wordspercol); + } + } + } + + // Update metrics + if(!debug) { + size_t ninner = (rff_ - rfi_) * iter; + met.col += (rff_ - rfi_); // DP columns + met.cell += (ninner * NWORDS_PER_REG); // DP cells + met.inner += ninner; // DP inner loop iters + met.fixup += nfixup; // DP fixup loop iters + } + + flag = 0; + + // Did we find a solution? + TAlScore score = MIN_I64; + if(!found) { + flag = -1; // no + if(!debug) met.dpfail++; + return MIN_I64; + } else { + score = (TAlScore)(lrmax - 0x7fff); + if(score < minsc_) { + flag = -1; // no + if(!debug) met.dpfail++; + return score; + } + } + + // Could we have saturated? + if(lrmax == MIN_I16) { + flag = -2; // yes + if(!debug) met.dpsat++; + return MIN_I64; + } + + // Now take all the backtrace candidates in the btdaig_ structure and + // dump them into the btncand_ array. They'll be sorted later. + if(!debug) { + btdiag_.dump(btncand_); + assert(!btncand_.empty()); + } + + // Return largest score + if(!debug) met.dpsucc++; + return score; +} + +/** + * Solve the current alignment problem using SSE instructions that operate on 8 + * signed 16-bit values packed into a single 128-bit register. + */ +TAlScore SwAligner::alignNucleotidesEnd2EndSseI16(int& flag, bool debug) { + assert_leq(rdf_, rd_->length()); + assert_leq(rdf_, qu_->length()); + assert_lt(rfi_, rff_); + assert_lt(rdi_, rdf_); + assert_eq(rd_->length(), qu_->length()); + assert_geq(sc_->gapbar, 1); + assert(repOk()); +#ifndef NDEBUG + for(size_t i = (size_t)rfi_; i < (size_t)rff_; i++) { + assert_range(0, 16, (int)rf_[i]); + } +#endif + + SSEData& d = fw_ ? sseI16fw_ : sseI16rc_; + SSEMetrics& met = extend_ ? sseI16ExtendMet_ : sseI16MateMet_; + if(!debug) met.dp++; + buildQueryProfileEnd2EndSseI16(fw_); + assert(!d.profbuf_.empty()); + + assert_eq(0, d.maxBonus_); + size_t iter = + (dpRows() + (NWORDS_PER_REG-1)) / NWORDS_PER_REG; // iter = segLen + + // Many thanks to Michael Farrar for releasing his striped Smith-Waterman + // implementation: + // + // http://sites.google.com/site/farrarmichael/smith-waterman + // + // Much of the implmentation below is adapted from Michael's code. + + // Set all elts to reference gap open penalty + __m128i rfgapo = _mm_setzero_si128(); + __m128i rfgape = _mm_setzero_si128(); + __m128i rdgapo = _mm_setzero_si128(); + __m128i rdgape = _mm_setzero_si128(); + __m128i vlo = _mm_setzero_si128(); + __m128i vhi = _mm_setzero_si128(); + __m128i vhilsw = _mm_setzero_si128(); + __m128i vlolsw = _mm_setzero_si128(); + __m128i ve = _mm_setzero_si128(); + __m128i vf = _mm_setzero_si128(); + __m128i vh = _mm_setzero_si128(); +#if 0 + __m128i vhd = _mm_setzero_si128(); + __m128i vhdtmp = _mm_setzero_si128(); +#endif + __m128i vtmp = _mm_setzero_si128(); + + assert_gt(sc_->refGapOpen(), 0); + assert_leq(sc_->refGapOpen(), MAX_I16); + rfgapo = _mm_insert_epi16(rfgapo, sc_->refGapOpen(), 0); + rfgapo = _mm_shufflelo_epi16(rfgapo, 0); + rfgapo = _mm_shuffle_epi32(rfgapo, 0); + + // Set all elts to reference gap extension penalty + assert_gt(sc_->refGapExtend(), 0); + assert_leq(sc_->refGapExtend(), MAX_I16); + assert_leq(sc_->refGapExtend(), sc_->refGapOpen()); + rfgape = _mm_insert_epi16(rfgape, sc_->refGapExtend(), 0); + rfgape = _mm_shufflelo_epi16(rfgape, 0); + rfgape = _mm_shuffle_epi32(rfgape, 0); + + // Set all elts to read gap open penalty + assert_gt(sc_->readGapOpen(), 0); + assert_leq(sc_->readGapOpen(), MAX_I16); + rdgapo = _mm_insert_epi16(rdgapo, sc_->readGapOpen(), 0); + rdgapo = _mm_shufflelo_epi16(rdgapo, 0); + rdgapo = _mm_shuffle_epi32(rdgapo, 0); + + // Set all elts to read gap extension penalty + assert_gt(sc_->readGapExtend(), 0); + assert_leq(sc_->readGapExtend(), MAX_I16); + assert_leq(sc_->readGapExtend(), sc_->readGapOpen()); + rdgape = _mm_insert_epi16(rdgape, sc_->readGapExtend(), 0); + rdgape = _mm_shufflelo_epi16(rdgape, 0); + rdgape = _mm_shuffle_epi32(rdgape, 0); + + // Set all elts to 0x8000 (min value for signed 16-bit) + vlo = _mm_cmpeq_epi16(vlo, vlo); // all elts = 0xffff + vlo = _mm_slli_epi16(vlo, NBITS_PER_WORD-1); // all elts = 0x8000 + + // Set all elts to 0x7fff (max value for signed 16-bit) + vhi = _mm_cmpeq_epi16(vhi, vhi); // all elts = 0xffff + vhi = _mm_srli_epi16(vhi, 1); // all elts = 0x7fff + + // vlolsw: topmost (least sig) word set to 0x8000, all other words=0 + vlolsw = _mm_shuffle_epi32(vlo, 0); + vlolsw = _mm_srli_si128(vlolsw, NBYTES_PER_REG - NBYTES_PER_WORD); + + // vhilsw: topmost (least sig) word set to 0x7fff, all other words=0 + vhilsw = _mm_shuffle_epi32(vhi, 0); + vhilsw = _mm_srli_si128(vhilsw, NBYTES_PER_REG - NBYTES_PER_WORD); + + // Points to a long vector of __m128i where each element is a block of + // contiguous cells in the E, F or H matrix. If the index % 3 == 0, then + // the block of cells is from the E matrix. If index % 3 == 1, they're + // from the F matrix. If index % 3 == 2, then they're from the H matrix. + // Blocks of cells are organized in the same interleaved manner as they are + // calculated by the Farrar algorithm. + const __m128i *pvScore; // points into the query profile + + d.mat_.init(dpRows(), rff_ - rfi_, NWORDS_PER_REG); + const size_t colstride = d.mat_.colstride(); + assert_eq(ROWSTRIDE, colstride / iter); + + // Initialize the H and E vectors in the first matrix column + __m128i *pvHTmp = d.mat_.tmpvec(0, 0); + __m128i *pvETmp = d.mat_.evec(0, 0); + + // Maximum score in final row + bool found = false; + TCScore lrmax = MIN_I16; + + for(size_t i = 0; i < iter; i++) { + _mm_store_si128(pvETmp, vlo); + // Could initialize Hs to high or low. If high, cells in the lower + // triangle will have somewhat more legitiate scores, but still won't + // be exhaustively scored. + _mm_store_si128(pvHTmp, vlo); + pvETmp += ROWSTRIDE; + pvHTmp += ROWSTRIDE; + } + // These are swapped just before the innermost loop + __m128i *pvHStore = d.mat_.hvec(0, 0); + __m128i *pvHLoad = d.mat_.tmpvec(0, 0); + __m128i *pvELoad = d.mat_.evec(0, 0); + __m128i *pvEStore = d.mat_.evecUnsafe(0, 1); + __m128i *pvFStore = d.mat_.fvec(0, 0); + __m128i *pvFTmp = NULL; + + assert_gt(sc_->gapbar, 0); + size_t nfixup = 0; + + // Fill in the table as usual but instead of using the same gap-penalty + // vector for each iteration of the inner loop, load words out of a + // pre-calculated gap vector parallel to the query profile. The pre- + // calculated gap vectors enforce the gap barrier constraint by making it + // infinitely costly to introduce a gap in barrier rows. + // + // AND use a separate loop to fill in the first row of the table, enforcing + // the st_ constraints in the process. This is awkward because it + // separates the processing of the first row from the others and might make + // it difficult to use the first-row results in the next row, but it might + // be the simplest and least disruptive way to deal with the st_ constraint. + + colstop_ = rff_ - 1; + lastsolcol_ = 0; + + for(size_t i = (size_t)rfi_; i < (size_t)rff_; i++) { + assert(pvFStore == d.mat_.fvec(0, i - rfi_)); + assert(pvHStore == d.mat_.hvec(0, i - rfi_)); + + // Fetch the appropriate query profile. Note that elements of rf_ must + // be numbers, not masks. + const int refc = (int)rf_[i]; + size_t off = (size_t)firsts5[refc] * iter * 2; + pvScore = d.profbuf_.ptr() + off; // even elts = query profile, odd = gap barrier + + // Set all cells to low value + vf = _mm_cmpeq_epi16(vf, vf); + vf = _mm_slli_epi16(vf, NBITS_PER_WORD-1); + vf = _mm_or_si128(vf, vlolsw); + + // Load H vector from the final row of the previous column + vh = _mm_load_si128(pvHLoad + colstride - ROWSTRIDE); + // Shift 2 bytes down so that topmost (least sig) cell gets 0 + vh = _mm_slli_si128(vh, NBYTES_PER_WORD); + // Fill topmost (least sig) cell with high value + vh = _mm_or_si128(vh, vhilsw); + + // For each character in the reference text: + size_t j; + for(j = 0; j < iter; j++) { + // Load cells from E, calculated previously + ve = _mm_load_si128(pvELoad); +#if 0 + vhd = _mm_load_si128(pvHLoad); +#endif + assert_all_lt(ve, vhi); + pvELoad += ROWSTRIDE; + + // Store cells in F, calculated previously + vf = _mm_adds_epi16(vf, pvScore[1]); // veto some ref gap extensions + vf = _mm_adds_epi16(vf, pvScore[1]); // veto some ref gap extensions + _mm_store_si128(pvFStore, vf); + pvFStore += ROWSTRIDE; + + // Factor in query profile (matches and mismatches) + vh = _mm_adds_epi16(vh, pvScore[0]); + + // Update H, factoring in E and F + vh = _mm_max_epi16(vh, ve); + vh = _mm_max_epi16(vh, vf); + + // Save the new vH values + _mm_store_si128(pvHStore, vh); + pvHStore += ROWSTRIDE; + + // Update vE value + vtmp = vh; +#if 0 + vhdtmp = vhd; + vhd = _mm_subs_epi16(vhd, rdgapo); + vhd = _mm_adds_epi16(vhd, pvScore[1]); // veto some read gap opens + vhd = _mm_adds_epi16(vhd, pvScore[1]); // veto some read gap opens + ve = _mm_subs_epi16(ve, rdgape); + ve = _mm_max_epi16(ve, vhd); +#else + vh = _mm_subs_epi16(vh, rdgapo); + vh = _mm_adds_epi16(vh, pvScore[1]); // veto some read gap opens + vh = _mm_adds_epi16(vh, pvScore[1]); // veto some read gap opens + ve = _mm_subs_epi16(ve, rdgape); + ve = _mm_max_epi16(ve, vh); +#endif + assert_all_lt(ve, vhi); + + // Load the next h value +#if 0 + vh = vhdtmp; +#else + vh = _mm_load_si128(pvHLoad); +#endif + pvHLoad += ROWSTRIDE; + + // Save E values + _mm_store_si128(pvEStore, ve); + pvEStore += ROWSTRIDE; + + // Update vf value + vtmp = _mm_subs_epi16(vtmp, rfgapo); + vf = _mm_subs_epi16(vf, rfgape); + assert_all_lt(vf, vhi); + vf = _mm_max_epi16(vf, vtmp); + + pvScore += 2; // move on to next query profile / gap veto + } + // pvHStore, pvELoad, pvEStore have all rolled over to the next column + pvFTmp = pvFStore; + pvFStore -= colstride; // reset to start of column + vtmp = _mm_load_si128(pvFStore); + + pvHStore -= colstride; // reset to start of column + vh = _mm_load_si128(pvHStore); + +#if 0 +#else + pvEStore -= colstride; // reset to start of column + ve = _mm_load_si128(pvEStore); +#endif + + pvHLoad = pvHStore; // new pvHLoad = pvHStore + pvScore = d.profbuf_.ptr() + off + 1; // reset veto vector + + // vf from last row gets shifted down by one to overlay the first row + // rfgape has already been subtracted from it. + vf = _mm_slli_si128(vf, NBYTES_PER_WORD); + vf = _mm_or_si128(vf, vlolsw); + + vf = _mm_adds_epi16(vf, *pvScore); // veto some ref gap extensions + vf = _mm_adds_epi16(vf, *pvScore); // veto some ref gap extensions + vf = _mm_max_epi16(vtmp, vf); + vtmp = _mm_cmpgt_epi16(vf, vtmp); + int cmp = _mm_movemask_epi8(vtmp); + + // If any element of vtmp is greater than H - gap-open... + j = 0; + while(cmp != 0x0000) { + // Store this vf + _mm_store_si128(pvFStore, vf); + pvFStore += ROWSTRIDE; + + // Update vh w/r/t new vf + vh = _mm_max_epi16(vh, vf); + + // Save vH values + _mm_store_si128(pvHStore, vh); + pvHStore += ROWSTRIDE; + + // Update E in case it can be improved using our new vh +#if 0 +#else + vh = _mm_subs_epi16(vh, rdgapo); + vh = _mm_adds_epi16(vh, *pvScore); // veto some read gap opens + vh = _mm_adds_epi16(vh, *pvScore); // veto some read gap opens + ve = _mm_max_epi16(ve, vh); + _mm_store_si128(pvEStore, ve); + pvEStore += ROWSTRIDE; +#endif + pvScore += 2; + + assert_lt(j, iter); + if(++j == iter) { + pvFStore -= colstride; + vtmp = _mm_load_si128(pvFStore); // load next vf ASAP + pvHStore -= colstride; + vh = _mm_load_si128(pvHStore); // load next vh ASAP +#if 0 +#else + pvEStore -= colstride; + ve = _mm_load_si128(pvEStore); // load next ve ASAP +#endif + pvScore = d.profbuf_.ptr() + off + 1; + j = 0; + vf = _mm_slli_si128(vf, NBYTES_PER_WORD); + vf = _mm_or_si128(vf, vlolsw); + } else { + vtmp = _mm_load_si128(pvFStore); // load next vf ASAP + vh = _mm_load_si128(pvHStore); // load next vh ASAP +#if 0 +#else + ve = _mm_load_si128(pvEStore); // load next vh ASAP +#endif + } + + // Update F with another gap extension + vf = _mm_subs_epi16(vf, rfgape); + vf = _mm_adds_epi16(vf, *pvScore); // veto some ref gap extensions + vf = _mm_adds_epi16(vf, *pvScore); // veto some ref gap extensions + vf = _mm_max_epi16(vtmp, vf); + vtmp = _mm_cmpgt_epi16(vf, vtmp); + cmp = _mm_movemask_epi8(vtmp); + nfixup++; + } + +#ifndef NDEBUG + if((rand() & 15) == 0) { + // This is a work-intensive sanity check; each time we finish filling + // a column, we check that each H, E, and F is sensible. + for(size_t k = 0; k < dpRows(); k++) { + assert(cellOkEnd2EndI16( + d, + k, // row + i - rfi_, // col + refc, // reference mask + (int)(*rd_)[rdi_+k], // read char + (int)(*qu_)[rdi_+k], // read quality + *sc_)); // scoring scheme + } + } +#endif + + __m128i *vtmp = d.mat_.hvec(d.lastIter_, i-rfi_); + // Note: we may not want to extract from the final row + TCScore lr = ((TCScore*)(vtmp))[d.lastWord_]; + found = true; + if(lr > lrmax) { + lrmax = lr; + } + + // pvELoad and pvHLoad are already where they need to be + + // Adjust the load and store vectors here. + pvHStore = pvHLoad + colstride; + pvEStore = pvELoad + colstride; + pvFStore = pvFTmp; + } + + // Update metrics + if(!debug) { + size_t ninner = (rff_ - rfi_) * iter; + met.col += (rff_ - rfi_); // DP columns + met.cell += (ninner * NWORDS_PER_REG); // DP cells + met.inner += ninner; // DP inner loop iters + met.fixup += nfixup; // DP fixup loop iters + } + + flag = 0; + + // Did we find a solution? + TAlScore score = MIN_I64; + if(!found) { + flag = -1; // no + if(!debug) met.dpfail++; + return MIN_I64; + } else { + score = (TAlScore)(lrmax - 0x7fff); + if(score < minsc_) { + flag = -1; // no + if(!debug) met.dpfail++; + return score; + } + } + + // Could we have saturated? + if(lrmax == MIN_I16) { + flag = -2; // yes + if(!debug) met.dpsat++; + return MIN_I64; + } + + // Return largest score + if(!debug) met.dpsucc++; + return score; +} + +/** + * Given a filled-in DP table, populate the btncand_ list with candidate cells + * that might be at the ends of valid alignments. No need to do this unless + * the maximum score returned by the align*() func is >= the minimum. + * + * Only cells that are exhaustively scored are candidates. Those are the + * cells inside the shape made of o's in this: + * + * |-maxgaps-| + * ********************************* - + * ******************************** | + * ******************************* | + * ****************************** | + * ***************************** | + * **************************** read len + * *************************** | + * ************************** | + * ************************* | + * ************************ | + * ***********oooooooooooo - + * |-maxgaps-| + * |-readlen-| + * |-------skip--------| + * + * And it's possible for the shape to be truncated on the left and right sides. + * + * + */ +bool SwAligner::gatherCellsNucleotidesEnd2EndSseI16(TAlScore best) { + // What's the minimum number of rows that can possibly be spanned by an + // alignment that meets the minimum score requirement? + assert(sse16succ_); + const size_t ncol = rff_ - rfi_; + const size_t nrow = dpRows(); + assert_gt(nrow, 0); + btncand_.clear(); + btncanddone_.clear(); + SSEData& d = fw_ ? sseI16fw_ : sseI16rc_; + SSEMetrics& met = extend_ ? sseI16ExtendMet_ : sseI16MateMet_; + assert(!d.profbuf_.empty()); + const size_t colstride = d.mat_.colstride(); + ASSERT_ONLY(bool sawbest = false); + __m128i *pvH = d.mat_.hvec(d.lastIter_, 0); + for(size_t j = 0; j < ncol; j++) { + TAlScore sc = (TAlScore)(((TCScore*)pvH)[d.lastWord_] - 0x7fff); + assert_leq(sc, best); + ASSERT_ONLY(sawbest = (sawbest || sc == best)); + if(sc >= minsc_) { + // Yes, this is legit + met.gathsol++; + btncand_.expand(); + btncand_.back().init(nrow-1, j, sc); + } + pvH += colstride; + } + assert(sawbest); + if(!btncand_.empty()) { + d.mat_.initMasks(); + } + return !btncand_.empty(); +} + +#define MOVE_VEC_PTR_UP(vec, rowvec, rowelt) { \ + if(rowvec == 0) { \ + rowvec += d.mat_.nvecrow_; \ + vec += d.mat_.colstride_; \ + rowelt--; \ + } \ + rowvec--; \ + vec -= ROWSTRIDE; \ +} + +#define MOVE_VEC_PTR_LEFT(vec, rowvec, rowelt) { vec -= d.mat_.colstride_; } + +#define MOVE_VEC_PTR_UPLEFT(vec, rowvec, rowelt) { \ + MOVE_VEC_PTR_UP(vec, rowvec, rowelt); \ + MOVE_VEC_PTR_LEFT(vec, rowvec, rowelt); \ +} + +#define MOVE_ALL_LEFT() { \ + MOVE_VEC_PTR_LEFT(cur_vec, rowvec, rowelt); \ + MOVE_VEC_PTR_LEFT(left_vec, left_rowvec, left_rowelt); \ + MOVE_VEC_PTR_LEFT(up_vec, up_rowvec, up_rowelt); \ + MOVE_VEC_PTR_LEFT(upleft_vec, upleft_rowvec, upleft_rowelt); \ +} + +#define MOVE_ALL_UP() { \ + MOVE_VEC_PTR_UP(cur_vec, rowvec, rowelt); \ + MOVE_VEC_PTR_UP(left_vec, left_rowvec, left_rowelt); \ + MOVE_VEC_PTR_UP(up_vec, up_rowvec, up_rowelt); \ + MOVE_VEC_PTR_UP(upleft_vec, upleft_rowvec, upleft_rowelt); \ +} + +#define MOVE_ALL_UPLEFT() { \ + MOVE_VEC_PTR_UPLEFT(cur_vec, rowvec, rowelt); \ + MOVE_VEC_PTR_UPLEFT(left_vec, left_rowvec, left_rowelt); \ + MOVE_VEC_PTR_UPLEFT(up_vec, up_rowvec, up_rowelt); \ + MOVE_VEC_PTR_UPLEFT(upleft_vec, upleft_rowvec, upleft_rowelt); \ +} + +#define NEW_ROW_COL(row, col) { \ + rowelt = row / d.mat_.nvecrow_; \ + rowvec = row % d.mat_.nvecrow_; \ + eltvec = (col * d.mat_.colstride_) + (rowvec * ROWSTRIDE); \ + cur_vec = d.mat_.matbuf_.ptr() + eltvec; \ + left_vec = cur_vec; \ + left_rowelt = rowelt; \ + left_rowvec = rowvec; \ + MOVE_VEC_PTR_LEFT(left_vec, left_rowvec, left_rowelt); \ + up_vec = cur_vec; \ + up_rowelt = rowelt; \ + up_rowvec = rowvec; \ + MOVE_VEC_PTR_UP(up_vec, up_rowvec, up_rowelt); \ + upleft_vec = up_vec; \ + upleft_rowelt = up_rowelt; \ + upleft_rowvec = up_rowvec; \ + MOVE_VEC_PTR_LEFT(upleft_vec, upleft_rowvec, upleft_rowelt); \ +} + +/** + * Given the dynamic programming table and a cell, trace backwards from the + * cell and install the edits and score/penalty in the appropriate fields + * of res. The RandomSource is used to break ties among equally good ways + * of tracing back. + * + * Whenever we enter a cell, we check whether the read/ref coordinates of + * that cell correspond to a cell we traversed constructing a previous + * alignment. If so, we backtrack to the last decision point, mask out the + * path that led to the previously observed cell, and continue along a + * different path; or, if there are no more paths to try, we give up. + * + * If an alignment is found, 'off' is set to the alignment's upstream-most + * reference character's offset into the chromosome and true is returned. + * Otherwise, false is returned. + */ +bool SwAligner::backtraceNucleotidesEnd2EndSseI16( + TAlScore escore, // in: expected score + SwResult& res, // out: store results (edits and scores) here + size_t& off, // out: store diagonal projection of origin + size_t& nbts, // out: # backtracks + size_t row, // start in this row + size_t col, // start in this column + RandomSource& rnd) // random gen, to choose among equal paths +{ + assert_lt(row, dpRows()); + assert_lt(col, (size_t)(rff_ - rfi_)); + SSEData& d = fw_ ? sseI16fw_ : sseI16rc_; + SSEMetrics& met = extend_ ? sseI16ExtendMet_ : sseI16MateMet_; + met.bt++; + assert(!d.profbuf_.empty()); + assert_lt(row, rd_->length()); + btnstack_.clear(); // empty the backtrack stack + btcells_.clear(); // empty the cells-so-far list + AlnScore score; score.score_ = 0; + score.gaps_ = score.ns_ = 0; + size_t origCol = col; + size_t gaps = 0, readGaps = 0, refGaps = 0; + res.alres.reset(); + EList& ned = res.alres.ned(); + assert(ned.empty()); + assert_gt(dpRows(), row); + size_t trimEnd = dpRows() - row - 1; + size_t trimBeg = 0; + size_t ct = SSEMatrix::H; // cell type + // Row and col in terms of where they fall in the SSE vector matrix + size_t rowelt, rowvec, eltvec; + size_t left_rowelt, up_rowelt, upleft_rowelt; + size_t left_rowvec, up_rowvec, upleft_rowvec; + __m128i *cur_vec, *left_vec, *up_vec, *upleft_vec; + NEW_ROW_COL(row, col); + while((int)row >= 0) { + met.btcell++; + nbts++; + int readc = (*rd_)[rdi_ + row]; + int refm = (int)rf_[rfi_ + col]; + int readq = (*qu_)[row]; + assert_leq(col, origCol); + // Get score in this cell + bool empty = false, reportedThru, canMoveThru, branch = false; + int cur = SSEMatrix::H; + if(!d.mat_.reset_[row]) { + d.mat_.resetRow(row); + } + reportedThru = d.mat_.reportedThrough(row, col); + canMoveThru = true; + if(reportedThru) { + canMoveThru = false; + } else { + empty = false; + if(row > 0) { + assert_gt(row, 0); + size_t rowFromEnd = d.mat_.nrow() - row - 1; + bool gapsAllowed = true; + if(row < (size_t)sc_->gapbar || + rowFromEnd < (size_t)sc_->gapbar) + { + gapsAllowed = false; + } + const TAlScore floorsc = MIN_I64; + const int offsetsc = -0x7fff; + // Move to beginning of column/row + if(ct == SSEMatrix::E) { // AKA rdgap + assert_gt(col, 0); + TAlScore sc_cur = ((TCScore*)(cur_vec + SSEMatrix::E))[rowelt] + offsetsc; + assert(gapsAllowed); + // Currently in the E matrix; incoming transition must come from the + // left. It's either a gap open from the H matrix or a gap extend from + // the E matrix. + // TODO: save and restore origMask as well as mask + int origMask = 0, mask = 0; + // Get H score of cell to the left + TAlScore sc_h_left = ((TCScore*)(left_vec + SSEMatrix::H))[left_rowelt] + offsetsc; + if(sc_h_left > floorsc && sc_h_left - sc_->readGapOpen() == sc_cur) { + mask |= (1 << 0); + } + // Get E score of cell to the left + TAlScore sc_e_left = ((TCScore*)(left_vec + SSEMatrix::E))[left_rowelt] + offsetsc; + if(sc_e_left > floorsc && sc_e_left - sc_->readGapExtend() == sc_cur) { + mask |= (1 << 1); + } + origMask = mask; + assert(origMask > 0 || sc_cur <= sc_->match()); + if(d.mat_.isEMaskSet(row, col)) { + mask = (d.mat_.masks_[row][col] >> 8) & 3; + } + if(mask == 3) { +#if 1 + // Pick H -> E cell + cur = SW_BT_OALL_READ_OPEN; + d.mat_.eMaskSet(row, col, 2); // might choose E later +#else + if(rnd.nextU2()) { + // Pick H -> E cell + cur = SW_BT_OALL_READ_OPEN; + d.mat_.eMaskSet(row, col, 2); // might choose E later + } else { + // Pick E -> E cell + cur = SW_BT_RDGAP_EXTEND; + d.mat_.eMaskSet(row, col, 1); // might choose H later + } +#endif + branch = true; + } else if(mask == 2) { + // I chose the E cell + cur = SW_BT_RDGAP_EXTEND; + d.mat_.eMaskSet(row, col, 0); // done + } else if(mask == 1) { + // I chose the H cell + cur = SW_BT_OALL_READ_OPEN; + d.mat_.eMaskSet(row, col, 0); // done + } else { + empty = true; + // It's empty, so the only question left is whether we should be + // allowed in terimnate in this cell. If it's got a valid score + // then we *shouldn't* be allowed to terminate here because that + // means it's part of a larger alignment that was already reported. + canMoveThru = (origMask == 0); + } + assert(!empty || !canMoveThru); + } else if(ct == SSEMatrix::F) { // AKA rfgap + assert_gt(row, 0); + assert(gapsAllowed); + TAlScore sc_h_up = ((TCScore*)(up_vec + SSEMatrix::H))[up_rowelt] + offsetsc; + TAlScore sc_f_up = ((TCScore*)(up_vec + SSEMatrix::F))[up_rowelt] + offsetsc; + TAlScore sc_cur = ((TCScore*)(cur_vec + SSEMatrix::F))[rowelt] + offsetsc; + // Currently in the F matrix; incoming transition must come from above. + // It's either a gap open from the H matrix or a gap extend from the F + // matrix. + // TODO: save and restore origMask as well as mask + int origMask = 0, mask = 0; + // Get H score of cell above + if(sc_h_up > floorsc && sc_h_up - sc_->refGapOpen() == sc_cur) { + mask |= (1 << 0); + } + // Get F score of cell above + if(sc_f_up > floorsc && sc_f_up - sc_->refGapExtend() == sc_cur) { + mask |= (1 << 1); + } + origMask = mask; + assert(origMask > 0 || sc_cur <= sc_->match()); + if(d.mat_.isFMaskSet(row, col)) { + mask = (d.mat_.masks_[row][col] >> 11) & 3; + } + if(mask == 3) { +#if 1 + // I chose the H cell + cur = SW_BT_OALL_REF_OPEN; + d.mat_.fMaskSet(row, col, 2); // might choose E later +#else + if(rnd.nextU2()) { + // I chose the H cell + cur = SW_BT_OALL_REF_OPEN; + d.mat_.fMaskSet(row, col, 2); // might choose E later + } else { + // I chose the F cell + cur = SW_BT_RFGAP_EXTEND; + d.mat_.fMaskSet(row, col, 1); // might choose E later + } +#endif + branch = true; + } else if(mask == 2) { + // I chose the F cell + cur = SW_BT_RFGAP_EXTEND; + d.mat_.fMaskSet(row, col, 0); // done + } else if(mask == 1) { + // I chose the H cell + cur = SW_BT_OALL_REF_OPEN; + d.mat_.fMaskSet(row, col, 0); // done + } else { + empty = true; + // It's empty, so the only question left is whether we should be + // allowed in terimnate in this cell. If it's got a valid score + // then we *shouldn't* be allowed to terminate here because that + // means it's part of a larger alignment that was already reported. + canMoveThru = (origMask == 0); + } + assert(!empty || !canMoveThru); + } else { + assert_eq(SSEMatrix::H, ct); + TAlScore sc_cur = ((TCScore*)(cur_vec + SSEMatrix::H))[rowelt] + offsetsc; + TAlScore sc_f_up = ((TCScore*)(up_vec + SSEMatrix::F))[up_rowelt] + offsetsc; + TAlScore sc_h_up = ((TCScore*)(up_vec + SSEMatrix::H))[up_rowelt] + offsetsc; + TAlScore sc_h_left = col > 0 ? (((TCScore*)(left_vec + SSEMatrix::H))[left_rowelt] + offsetsc) : floorsc; + TAlScore sc_e_left = col > 0 ? (((TCScore*)(left_vec + SSEMatrix::E))[left_rowelt] + offsetsc) : floorsc; + TAlScore sc_h_upleft = col > 0 ? (((TCScore*)(upleft_vec + SSEMatrix::H))[upleft_rowelt] + offsetsc) : floorsc; + TAlScore sc_diag = sc_->score(readc, refm, readq - 33); + // TODO: save and restore origMask as well as mask + int origMask = 0, mask = 0; + if(gapsAllowed) { + if(sc_h_up > floorsc && sc_cur == sc_h_up - sc_->refGapOpen()) { + mask |= (1 << 0); + } + if(sc_h_left > floorsc && sc_cur == sc_h_left - sc_->readGapOpen()) { + mask |= (1 << 1); + } + if(sc_f_up > floorsc && sc_cur == sc_f_up - sc_->refGapExtend()) { + mask |= (1 << 2); + } + if(sc_e_left > floorsc && sc_cur == sc_e_left - sc_->readGapExtend()) { + mask |= (1 << 3); + } + } + if(sc_h_upleft > floorsc && sc_cur == sc_h_upleft + sc_diag) { + mask |= (1 << 4); + } + origMask = mask; + assert(origMask > 0 || sc_cur <= sc_->match()); + if(d.mat_.isHMaskSet(row, col)) { + mask = (d.mat_.masks_[row][col] >> 2) & 31; + } + assert(gapsAllowed || mask == (1 << 4) || mask == 0); + int opts = alts5[mask]; + int select = -1; + if(opts == 1) { + select = firsts5[mask]; + assert_geq(mask, 0); + d.mat_.hMaskSet(row, col, 0); + } else if(opts > 1) { +#if 1 + if( (mask & 16) != 0) { + select = 4; // H diag + } else if((mask & 1) != 0) { + select = 0; // H up + } else if((mask & 4) != 0) { + select = 2; // F up + } else if((mask & 2) != 0) { + select = 1; // H left + } else if((mask & 8) != 0) { + select = 3; // E left + } +#else + select = randFromMask(rnd, mask); +#endif + assert_geq(mask, 0); + mask &= ~(1 << select); + assert(gapsAllowed || mask == (1 << 4) || mask == 0); + d.mat_.hMaskSet(row, col, mask); + branch = true; + } else { /* No way to backtrack! */ } + if(select != -1) { + if(select == 4) { + cur = SW_BT_OALL_DIAG; + } else if(select == 0) { + cur = SW_BT_OALL_REF_OPEN; + } else if(select == 1) { + cur = SW_BT_OALL_READ_OPEN; + } else if(select == 2) { + cur = SW_BT_RFGAP_EXTEND; + } else { + assert_eq(3, select) + cur = SW_BT_RDGAP_EXTEND; + } + } else { + empty = true; + // It's empty, so the only question left is whether we should be + // allowed in terimnate in this cell. If it's got a valid score + // then we *shouldn't* be allowed to terminate here because that + // means it's part of a larger alignment that was already reported. + canMoveThru = (origMask == 0); + } + } + assert(!empty || !canMoveThru || ct == SSEMatrix::H); + } + } + d.mat_.setReportedThrough(row, col); + assert_eq(gaps, Edit::numGaps(ned)); + assert_leq(gaps, rdgap_ + rfgap_); + // Cell was involved in a previously-reported alignment? + if(!canMoveThru) { + if(!btnstack_.empty()) { + // Remove all the cells from list back to and including the + // cell where the branch occurred + btcells_.resize(btnstack_.back().celsz); + // Pop record off the top of the stack + ned.resize(btnstack_.back().nedsz); + //aed.resize(btnstack_.back().aedsz); + row = btnstack_.back().row; + col = btnstack_.back().col; + gaps = btnstack_.back().gaps; + readGaps = btnstack_.back().readGaps; + refGaps = btnstack_.back().refGaps; + score = btnstack_.back().score; + ct = btnstack_.back().ct; + btnstack_.pop_back(); + assert(!sc_->monotone || score.score() >= escore); + NEW_ROW_COL(row, col); + continue; + } else { + // No branch points to revisit; just give up + res.reset(); + met.btfail++; // DP backtraces failed + return false; + } + } + assert(!reportedThru); + assert(!sc_->monotone || score.score() >= minsc_); + if(empty || row == 0) { + assert_eq(SSEMatrix::H, ct); + btcells_.expand(); + btcells_.back().first = row; + btcells_.back().second = col; + // This cell is at the end of a legitimate alignment + trimBeg = row; + assert_eq(btcells_.size(), dpRows() - trimBeg - trimEnd + readGaps); + break; + } + if(branch) { + // Add a frame to the backtrack stack + btnstack_.expand(); + btnstack_.back().init( + ned.size(), + 0, // aed.size() + btcells_.size(), + row, + col, + gaps, + readGaps, + refGaps, + score, + (int)ct); + } + btcells_.expand(); + btcells_.back().first = row; + btcells_.back().second = col; + switch(cur) { + // Move up and to the left. If the reference nucleotide in the + // source row mismatches the read nucleotide, penalize + // it and add a nucleotide mismatch. + case SW_BT_OALL_DIAG: { + assert_gt(row, 0); assert_gt(col, 0); + // Check for color mismatch + int readC = (*rd_)[row]; + int refNmask = (int)rf_[rfi_+col]; + assert_gt(refNmask, 0); + int m = matchesEx(readC, refNmask); + ct = SSEMatrix::H; + if(m != 1) { + Edit e( + (int)row, + mask2dna[refNmask], + "ACGTN"[readC], + EDIT_TYPE_MM); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + int pen = QUAL2(row, col); + score.score_ -= pen; + assert(!sc_->monotone || score.score() >= escore); + } else { + // Reward a match + int64_t bonus = sc_->match(30); + score.score_ += bonus; + assert(!sc_->monotone || score.score() >= escore); + } + if(m == -1) { + score.ns_++; + } + row--; col--; + MOVE_ALL_UPLEFT(); + assert(VALID_AL_SCORE(score)); + break; + } + // Move up. Add an edit encoding the ref gap. + case SW_BT_OALL_REF_OPEN: + { + assert_gt(row, 0); + Edit e( + (int)row, + '-', + "ACGTN"[(int)(*rd_)[row]], + EDIT_TYPE_REF_GAP); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + assert_geq(row, (size_t)sc_->gapbar); + assert_geq((int)(rdf_-rdi_-row-1), sc_->gapbar-1); + row--; + ct = SSEMatrix::H; + int pen = sc_->refGapOpen(); + score.score_ -= pen; + assert(!sc_->monotone || score.score() >= minsc_); + gaps++; refGaps++; + assert_eq(gaps, Edit::numGaps(ned)); + assert_leq(gaps, rdgap_ + rfgap_); + MOVE_ALL_UP(); + break; + } + // Move up. Add an edit encoding the ref gap. + case SW_BT_RFGAP_EXTEND: + { + assert_gt(row, 1); + Edit e( + (int)row, + '-', + "ACGTN"[(int)(*rd_)[row]], + EDIT_TYPE_REF_GAP); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + assert_geq(row, (size_t)sc_->gapbar); + assert_geq((int)(rdf_-rdi_-row-1), sc_->gapbar-1); + row--; + ct = SSEMatrix::F; + int pen = sc_->refGapExtend(); + score.score_ -= pen; + assert(!sc_->monotone || score.score() >= minsc_); + gaps++; refGaps++; + assert_eq(gaps, Edit::numGaps(ned)); + assert_leq(gaps, rdgap_ + rfgap_); + MOVE_ALL_UP(); + break; + } + case SW_BT_OALL_READ_OPEN: + { + assert_gt(col, 0); + Edit e( + (int)row+1, + mask2dna[(int)rf_[rfi_+col]], + '-', + EDIT_TYPE_READ_GAP); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + assert_geq(row, (size_t)sc_->gapbar); + assert_geq((int)(rdf_-rdi_-row-1), sc_->gapbar-1); + col--; + ct = SSEMatrix::H; + int pen = sc_->readGapOpen(); + score.score_ -= pen; + assert(!sc_->monotone || score.score() >= minsc_); + gaps++; readGaps++; + assert_eq(gaps, Edit::numGaps(ned)); + assert_leq(gaps, rdgap_ + rfgap_); + MOVE_ALL_LEFT(); + break; + } + case SW_BT_RDGAP_EXTEND: + { + assert_gt(col, 1); + Edit e( + (int)row+1, + mask2dna[(int)rf_[rfi_+col]], + '-', + EDIT_TYPE_READ_GAP); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + assert_geq(row, (size_t)sc_->gapbar); + assert_geq((int)(rdf_-rdi_-row-1), sc_->gapbar-1); + col--; + ct = SSEMatrix::E; + int pen = sc_->readGapExtend(); + score.score_ -= pen; + assert(!sc_->monotone || score.score() >= minsc_); + gaps++; readGaps++; + assert_eq(gaps, Edit::numGaps(ned)); + assert_leq(gaps, rdgap_ + rfgap_); + MOVE_ALL_LEFT(); + break; + } + default: throw 1; + } + } // while((int)row > 0) + assert_eq(0, trimBeg); + assert_eq(0, trimEnd); + assert_geq(col, 0); + assert_eq(SSEMatrix::H, ct); + // The number of cells in the backtracs should equal the number of read + // bases after trimming plus the number of gaps + assert_eq(btcells_.size(), dpRows() - trimBeg - trimEnd + readGaps); + // Check whether we went through a core diagonal and set 'reported' flag on + // each cell + bool overlappedCoreDiag = false; + for(size_t i = 0; i < btcells_.size(); i++) { + size_t rw = btcells_[i].first; + size_t cl = btcells_[i].second; + // Calculate the diagonal within the *trimmed* rectangle, i.e. the + // rectangle we dealt with in align, gather and backtrack. + int64_t diagi = cl - rw; + // Now adjust to the diagonal within the *untrimmed* rectangle by + // adding on the amount trimmed from the left. + diagi += rect_->triml; + if(diagi >= 0) { + size_t diag = (size_t)diagi; + if(diag >= rect_->corel && diag <= rect_->corer) { + overlappedCoreDiag = true; + break; + } + } + assert(d.mat_.reportedThrough(rw, cl)); + } + if(!overlappedCoreDiag) { + // Must overlap a core diagonal. Otherwise, we run the risk of + // reporting an alignment that overlaps (and trumps) a higher-scoring + // alignment that lies partially outside the dynamic programming + // rectangle. + res.reset(); + met.corerej++; + return false; + } + int readC = (*rd_)[rdi_+row]; // get last char in read + int refNmask = (int)rf_[rfi_+col]; // get last ref char ref involved in aln + assert_gt(refNmask, 0); + int m = matchesEx(readC, refNmask); + if(m != 1) { + Edit e((int)row, mask2dna[refNmask], "ACGTN"[readC], EDIT_TYPE_MM); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + score.score_ -= QUAL2(row, col); + assert_geq(score.score(), minsc_); + } else { + score.score_ += sc_->match(30); + } + if(m == -1) { + score.ns_++; + } + if(score.ns_ > nceil_) { + // Alignment has too many Ns in it! + res.reset(); + met.nrej++; + return false; + } + res.reverse(); + assert(Edit::repOk(ned, (*rd_))); + assert_eq(score.score(), escore); + assert_leq(gaps, rdgap_ + rfgap_); + off = col; + assert_lt(col + (size_t)rfi_, (size_t)rff_); + score.gaps_ = gaps; + res.alres.setScore(score); + res.alres.setShape( + refidx_, // ref id + off + rfi_ + rect_->refl, // 0-based ref offset + reflen_, // reference length + fw_, // aligned to Watson? + rdf_ - rdi_, // read length + 0, // read ID + true, // pretrim soft? + 0, // pretrim 5' end + 0, // pretrim 3' end + true, // alignment trim soft? + fw_ ? trimBeg : trimEnd, // alignment trim 5' end + fw_ ? trimEnd : trimBeg); // alignment trim 3' end + size_t refns = 0; + for(size_t i = col; i <= origCol; i++) { + if((int)rf_[rfi_+i] > 15) { + refns++; + } + } + res.alres.setRefNs(refns); + assert(Edit::repOk(ned, (*rd_), true, trimBeg, trimEnd)); + assert(res.repOk()); +#ifndef NDEBUG + size_t gapsCheck = 0; + for(size_t i = 0; i < ned.size(); i++) { + if(ned[i].isGap()) gapsCheck++; + } + assert_eq(gaps, gapsCheck); + BTDnaString refstr; + for(size_t i = col; i <= origCol; i++) { + refstr.append(firsts5[(int)rf_[rfi_+i]]); + } + BTDnaString editstr; + // daehwan + // Edit::toRef((*rd_), ned, editstr, true, trimBeg, trimEnd); + Edit::toRef((*rd_), ned, editstr, true, trimBeg + rdi_, trimEnd + (rd_->length() - rdf_)); + if(refstr != editstr) { + cerr << "Decoded nucleotides and edits don't match reference:" << endl; + cerr << " score: " << score.score() + << " (" << gaps << " gaps)" << endl; + cerr << " edits: "; + Edit::print(cerr, ned); + cerr << endl; + cerr << " decoded nucs: " << (*rd_) << endl; + cerr << " edited nucs: " << editstr << endl; + cerr << " reference nucs: " << refstr << endl; + assert(0); + } +#endif + met.btsucc++; // DP backtraces succeeded + return true; +} diff --git a/aligner_swsse_ee_u8.cpp b/aligner_swsse_ee_u8.cpp new file mode 100644 index 0000000..8b1fc18 --- /dev/null +++ b/aligner_swsse_ee_u8.cpp @@ -0,0 +1,1902 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +/** + * aligner_sw_sse.cpp + * + * Versions of key alignment functions that use vector instructions to + * accelerate dynamic programming. Based chiefly on the striped Smith-Waterman + * paper and implementation by Michael Farrar. See: + * + * Farrar M. Striped Smith-Waterman speeds database searches six times over + * other SIMD implementations. Bioinformatics. 2007 Jan 15;23(2):156-61. + * http://sites.google.com/site/farrarmichael/smith-waterman + * + * While the paper describes an implementation of Smith-Waterman, we extend it + * do end-to-end read alignment as well as local alignment. The change + * required for this is minor: we simply let vmax be the maximum element in the + * score domain rather than the minimum. + * + * The vectorized dynamic programming implementation lacks some features that + * make it hard to adapt to solving the entire dynamic-programming alignment + * problem. For instance: + * + * - It doesn't respect gap barriers on either end of the read + * - It just gives a maximum; not enough information to backtrace without + * redoing some alignment + * - It's a little difficult to handle st_ and en_, especially st_. + * - The query profile mechanism makes handling of ambiguous reference bases a + * little tricky (16 cols in query profile lookup table instead of 5) + * + * Given the drawbacks, it is tempting to use SSE dynamic programming as a + * filter rather than as an aligner per se. Here are a few ideas for how it + * can be extended to handle more of the alignment problem: + * + * - Save calculated scores to a big array as we go. We return to this array + * to find and backtrace from good solutions. + */ + +#include +#include "aligner_sw.h" + +static const size_t NBYTES_PER_REG = 16; +static const size_t NWORDS_PER_REG = 16; +// static const size_t NBITS_PER_WORD = 8; +static const size_t NBYTES_PER_WORD = 1; + +// In end-to-end mode, we start high (255) and go low (0). Factoring in +// a query profile involves unsigned saturating subtraction, so all the +// query profile elements should be expressed as a positive penalty rather +// than a negative score. + +typedef uint8_t TCScore; + +/** + * Build query profile look up tables for the read. The query profile look + * up table is organized as a 1D array indexed by [i][j] where i is the + * reference character in the current DP column (0=A, 1=C, etc), and j is + * the segment of the query we're currently working on. + */ +void SwAligner::buildQueryProfileEnd2EndSseU8(bool fw) { + bool& done = fw ? sseU8fwBuilt_ : sseU8rcBuilt_; + if(done) { + return; + } + done = true; + const BTDnaString* rd = fw ? rdfw_ : rdrc_; + const BTString* qu = fw ? qufw_ : qurc_; + // daehwan - allows to align a portion of a read, not the whole. + // const size_t len = rd->length(); + const size_t len = dpRows(); + const size_t seglen = (len + (NWORDS_PER_REG-1)) / NWORDS_PER_REG; + // How many __m128i's are needed + size_t n128s = + 64 + // slack bytes, for alignment? + (seglen * ALPHA_SIZE) // query profile data + * 2; // & gap barrier data + assert_gt(n128s, 0); + SSEData& d = fw ? sseU8fw_ : sseU8rc_; + d.profbuf_.resizeNoCopy(n128s); + assert(!d.profbuf_.empty()); + d.maxPen_ = d.maxBonus_ = 0; + d.lastIter_ = d.lastWord_ = 0; + d.qprofStride_ = d.gbarStride_ = 2; + d.bias_ = 0; // no bias needed for end-to-end alignment; just use subtraction + // For each reference character A, C, G, T, N ... + for(size_t refc = 0; refc < ALPHA_SIZE; refc++) { + // For each segment ... + for(size_t i = 0; i < seglen; i++) { + size_t j = i; + uint8_t *qprofWords = + reinterpret_cast(d.profbuf_.ptr() + (refc * seglen * 2) + (i * 2)); + uint8_t *gbarWords = + reinterpret_cast(d.profbuf_.ptr() + (refc * seglen * 2) + (i * 2) + 1); + // For each sub-word (byte) ... + for(size_t k = 0; k < NWORDS_PER_REG; k++) { + int sc = 0; + *gbarWords = 0; + if(j < len) { + int readc = (*rd)[j]; + int readq = (*qu)[j]; + sc = sc_->score(readc, (int)(1 << refc), readq - 33); + // Make score positive, to fit in an unsigned + sc = -sc; + assert_range(0, 255, sc); + size_t j_from_end = len - j - 1; + if(j < (size_t)sc_->gapbar || + j_from_end < (size_t)sc_->gapbar) + { + // Inside the gap barrier + *gbarWords = 0xff; + } + } + if(refc == 0 && j == len-1) { + // Remember which 128-bit word and which smaller word has + // the final row + d.lastIter_ = i; + d.lastWord_ = k; + } + if((size_t)sc > d.maxPen_) { + d.maxPen_ = (size_t)sc; + } + *qprofWords = (uint8_t)sc; + gbarWords++; + qprofWords++; + j += seglen; // update offset into query + } + } + } +} + +#ifndef NDEBUG +/** + * Return true iff the cell has sane E/F/H values w/r/t its predecessors. + */ +static bool cellOkEnd2EndU8( + SSEData& d, + size_t row, + size_t col, + int refc, + int readc, + int readq, + const Scoring& sc) // scoring scheme +{ + TCScore floorsc = 0; + TAlScore ceilsc = MAX_I64; + TAlScore offsetsc = -0xff; + TAlScore sc_h_cur = (TAlScore)d.mat_.helt(row, col); + TAlScore sc_e_cur = (TAlScore)d.mat_.eelt(row, col); + TAlScore sc_f_cur = (TAlScore)d.mat_.felt(row, col); + if(sc_h_cur > floorsc) { + sc_h_cur += offsetsc; + } + if(sc_e_cur > floorsc) { + sc_e_cur += offsetsc; + } + if(sc_f_cur > floorsc) { + sc_f_cur += offsetsc; + } + bool gapsAllowed = true; + size_t rowFromEnd = d.mat_.nrow() - row - 1; + if(row < (size_t)sc.gapbar || rowFromEnd < (size_t)sc.gapbar) { + gapsAllowed = false; + } + bool e_left_trans = false, h_left_trans = false; + bool f_up_trans = false, h_up_trans = false; + bool h_diag_trans = false; + if(gapsAllowed) { + TAlScore sc_h_left = floorsc; + TAlScore sc_e_left = floorsc; + TAlScore sc_h_up = floorsc; + TAlScore sc_f_up = floorsc; + if(col > 0 && sc_e_cur > floorsc && sc_e_cur <= ceilsc) { + sc_h_left = d.mat_.helt(row, col-1) + offsetsc; + sc_e_left = d.mat_.eelt(row, col-1) + offsetsc; + e_left_trans = (sc_e_left > floorsc && sc_e_cur == sc_e_left - sc.readGapExtend()); + h_left_trans = (sc_h_left > floorsc && sc_e_cur == sc_h_left - sc.readGapOpen()); + assert(e_left_trans || h_left_trans); + // Check that we couldn't have got a better E score + assert_geq(sc_e_cur, sc_e_left - sc.readGapExtend()); + assert_geq(sc_e_cur, sc_h_left - sc.readGapOpen()); + } + if(row > 0 && sc_f_cur > floorsc && sc_f_cur <= ceilsc) { + sc_h_up = d.mat_.helt(row-1, col) + offsetsc; + sc_f_up = d.mat_.felt(row-1, col) + offsetsc; + f_up_trans = (sc_f_up > floorsc && sc_f_cur == sc_f_up - sc.refGapExtend()); + h_up_trans = (sc_h_up > floorsc && sc_f_cur == sc_h_up - sc.refGapOpen()); + assert(f_up_trans || h_up_trans); + // Check that we couldn't have got a better F score + assert_geq(sc_f_cur, sc_f_up - sc.refGapExtend()); + assert_geq(sc_f_cur, sc_h_up - sc.refGapOpen()); + } + } else { + assert_geq(floorsc, sc_e_cur); + assert_geq(floorsc, sc_f_cur); + } + if(col > 0 && row > 0 && sc_h_cur > floorsc && sc_h_cur <= ceilsc) { + TAlScore sc_h_upleft = d.mat_.helt(row-1, col-1) + offsetsc; + TAlScore sc_diag = sc.score(readc, (int)refc, readq - 33); + h_diag_trans = sc_h_cur == sc_h_upleft + sc_diag; + } + assert( + sc_h_cur <= floorsc || + e_left_trans || + h_left_trans || + f_up_trans || + h_up_trans || + h_diag_trans || + sc_h_cur > ceilsc || + row == 0 || + col == 0); + return true; +} +#endif /*ndef NDEBUG*/ + +#ifdef NDEBUG + +#define assert_all_eq0(x) +#define assert_all_gt(x, y) +#define assert_all_gt_lo(x) +#define assert_all_lt(x, y) +#define assert_all_lt_hi(x) + +#else + +#define assert_all_eq0(x) { \ + __m128i z = _mm_setzero_si128(); \ + __m128i tmp = _mm_setzero_si128(); \ + z = _mm_xor_si128(z, z); \ + tmp = _mm_cmpeq_epi16(x, z); \ + assert_eq(0xffff, _mm_movemask_epi8(tmp)); \ +} + +#define assert_all_gt(x, y) { \ + __m128i tmp = _mm_cmpgt_epu8(x, y); \ + assert_eq(0xffff, _mm_movemask_epi8(tmp)); \ +} + +#define assert_all_gt_lo(x) { \ + __m128i z = _mm_setzero_si128(); \ + __m128i tmp = _mm_setzero_si128(); \ + z = _mm_xor_si128(z, z); \ + tmp = _mm_cmpgt_epu8(x, z); \ + assert_eq(0xffff, _mm_movemask_epi8(tmp)); \ +} + +#define assert_all_lt(x, y) { \ + __m128i z = _mm_setzero_si128(); \ + z = _mm_xor_si128(z, z); \ + __m128i tmp = _mm_subs_epu8(y, x); \ + tmp = _mm_cmpeq_epi16(tmp, z); \ + assert_eq(0x0000, _mm_movemask_epi8(tmp)); \ +} + +#define assert_all_lt_hi(x) { \ + __m128i z = _mm_setzero_si128(); \ + __m128i tmp = _mm_setzero_si128(); \ + z = _mm_cmpeq_epu8(z, z); \ + z = _mm_srli_epu8(z, 1); \ + tmp = _mm_cmplt_epu8(x, z); \ + assert_eq(0xffff, _mm_movemask_epi8(tmp)); \ +} +#endif + +/** + * Aligns by filling a dynamic programming matrix with the SSE-accelerated, + * banded DP approach of Farrar. As it goes, it determines which cells we + * might backtrace from and tallies the best (highest-scoring) N backtrace + * candidate cells per diagonal. Also returns the alignment score of the best + * alignment in the matrix. + * + * This routine does *not* maintain a matrix holding the entire matrix worth of + * scores, nor does it maintain any other dense O(mn) data structure, as this + * would quickly exhaust memory for queries longer than about 10,000 kb. + * Instead, in the fill stage it maintains two columns worth of scores at a + * time (current/previous, or right/left) - these take O(m) space. When + * finished with the current column, it determines which cells from the + * previous column, if any, are candidates we might backtrace from to find a + * full alignment. A candidate cell has a score that rises above the threshold + * and isn't improved upon by a match in the next column. The best N + * candidates per diagonal are stored in a O(m + n) data structure. + */ +TAlScore SwAligner::alignGatherEE8(int& flag, bool debug) { + assert_leq(rdf_, rd_->length()); + assert_leq(rdf_, qu_->length()); + assert_lt(rfi_, rff_); + assert_lt(rdi_, rdf_); + assert_eq(rd_->length(), qu_->length()); + assert_geq(sc_->gapbar, 1); + assert(repOk()); +#ifndef NDEBUG + for(size_t i = (size_t)rfi_; i < (size_t)rff_; i++) { + assert_range(0, 16, (int)rf_[i]); + } +#endif + + SSEData& d = fw_ ? sseU8fw_ : sseU8rc_; + SSEMetrics& met = extend_ ? sseU8ExtendMet_ : sseU8MateMet_; + if(!debug) met.dp++; + buildQueryProfileEnd2EndSseU8(fw_); + assert(!d.profbuf_.empty()); + + assert_eq(0, d.maxBonus_); + size_t iter = + (dpRows() + (NWORDS_PER_REG-1)) / NWORDS_PER_REG; // iter = segLen + + int dup; + + // Now set up the score vectors. We just need two columns worth, which + // we'll call "left" and "right". + d.vecbuf_.resize(4 * 2 * iter); + d.vecbuf_.zero(); + __m128i *vbuf_l = d.vecbuf_.ptr(); + __m128i *vbuf_r = d.vecbuf_.ptr() + (4 * iter); + + // This is the data structure that holds candidate cells per diagonal. + const size_t ndiags = rff_ - rfi_ + dpRows() - 1; + if(!debug) { + btdiag_.init(ndiags, 2); + } + + // Data structure that holds checkpointed anti-diagonals + TAlScore perfectScore = sc_->perfectScore(dpRows()); + bool checkpoint = true; + bool cpdebug = false; +#ifndef NDEBUG + cpdebug = dpRows() < 1000; +#endif + cper_.init( + dpRows(), // # rows + rff_ - rfi_, // # columns + cperPerPow2_, // checkpoint every 1 << perpow2 diags (& next) + perfectScore, // perfect score (for sanity checks) + true, // matrix cells have 8-bit scores? + cperTri_, // triangular mini-fills? + false, // alignment is local? + cpdebug); // save all cells for debugging? + + // Many thanks to Michael Farrar for releasing his striped Smith-Waterman + // implementation: + // + // http://sites.google.com/site/farrarmichael/smith-waterman + // + // Much of the implmentation below is adapted from Michael's code. + + // Set all elts to reference gap open penalty + __m128i rfgapo = _mm_setzero_si128(); + __m128i rfgape = _mm_setzero_si128(); + __m128i rdgapo = _mm_setzero_si128(); + __m128i rdgape = _mm_setzero_si128(); + __m128i vlo = _mm_setzero_si128(); + __m128i vhi = _mm_setzero_si128(); + __m128i ve = _mm_setzero_si128(); + __m128i vf = _mm_setzero_si128(); + __m128i vh = _mm_setzero_si128(); + __m128i vhd = _mm_setzero_si128(); + __m128i vhdtmp = _mm_setzero_si128(); + __m128i vtmp = _mm_setzero_si128(); + __m128i vzero = _mm_setzero_si128(); + __m128i vhilsw = _mm_setzero_si128(); + + assert_gt(sc_->refGapOpen(), 0); + assert_leq(sc_->refGapOpen(), MAX_U8); + dup = (sc_->refGapOpen() << 8) | (sc_->refGapOpen() & 0x00ff); + rfgapo = _mm_insert_epi16(rfgapo, dup, 0); + rfgapo = _mm_shufflelo_epi16(rfgapo, 0); + rfgapo = _mm_shuffle_epi32(rfgapo, 0); + + // Set all elts to reference gap extension penalty + assert_gt(sc_->refGapExtend(), 0); + assert_leq(sc_->refGapExtend(), MAX_U8); + assert_leq(sc_->refGapExtend(), sc_->refGapOpen()); + dup = (sc_->refGapExtend() << 8) | (sc_->refGapExtend() & 0x00ff); + rfgape = _mm_insert_epi16(rfgape, dup, 0); + rfgape = _mm_shufflelo_epi16(rfgape, 0); + rfgape = _mm_shuffle_epi32(rfgape, 0); + + // Set all elts to read gap open penalty + assert_gt(sc_->readGapOpen(), 0); + assert_leq(sc_->readGapOpen(), MAX_U8); + dup = (sc_->readGapOpen() << 8) | (sc_->readGapOpen() & 0x00ff); + rdgapo = _mm_insert_epi16(rdgapo, dup, 0); + rdgapo = _mm_shufflelo_epi16(rdgapo, 0); + rdgapo = _mm_shuffle_epi32(rdgapo, 0); + + // Set all elts to read gap extension penalty + assert_gt(sc_->readGapExtend(), 0); + assert_leq(sc_->readGapExtend(), MAX_U8); + assert_leq(sc_->readGapExtend(), sc_->readGapOpen()); + dup = (sc_->readGapExtend() << 8) | (sc_->readGapExtend() & 0x00ff); + rdgape = _mm_insert_epi16(rdgape, dup, 0); + rdgape = _mm_shufflelo_epi16(rdgape, 0); + rdgape = _mm_shuffle_epi32(rdgape, 0); + + vhi = _mm_cmpeq_epi16(vhi, vhi); // all elts = 0xffff + vlo = _mm_xor_si128(vlo, vlo); // all elts = 0 + + // vhilsw: topmost (least sig) word set to 0x7fff, all other words=0 + vhilsw = _mm_shuffle_epi32(vhi, 0); + vhilsw = _mm_srli_si128(vhilsw, NBYTES_PER_REG - NBYTES_PER_WORD); + + // Points to a long vector of __m128i where each element is a block of + // contiguous cells in the E, F or H matrix. If the index % 3 == 0, then + // the block of cells is from the E matrix. If index % 3 == 1, they're + // from the F matrix. If index % 3 == 2, then they're from the H matrix. + // Blocks of cells are organized in the same interleaved manner as they are + // calculated by the Farrar algorithm. + const __m128i *pvScore; // points into the query profile + + const size_t colstride = ROWSTRIDE_2COL * iter; + + // Initialize the H and E vectors in the first matrix column + __m128i *pvELeft = vbuf_l + 0; __m128i *pvERight = vbuf_r + 0; + /* __m128i *pvFLeft = vbuf_l + 1; */ __m128i *pvFRight = vbuf_r + 1; + __m128i *pvHLeft = vbuf_l + 2; __m128i *pvHRight = vbuf_r + 2; + + // Maximum score in final row + bool found = false; + TCScore lrmax = MIN_U8; + + for(size_t i = 0; i < iter; i++) { + _mm_store_si128(pvERight, vlo); pvERight += ROWSTRIDE_2COL; + // Could initialize Hs to high or low. If high, cells in the lower + // triangle will have somewhat more legitiate scores, but still won't + // be exhaustively scored. + _mm_store_si128(pvHRight, vlo); pvHRight += ROWSTRIDE_2COL; + } + + assert_gt(sc_->gapbar, 0); + size_t nfixup = 0; + + // Fill in the table as usual but instead of using the same gap-penalty + // vector for each iteration of the inner loop, load words out of a + // pre-calculated gap vector parallel to the query profile. The pre- + // calculated gap vectors enforce the gap barrier constraint by making it + // infinitely costly to introduce a gap in barrier rows. + // + // AND use a separate loop to fill in the first row of the table, enforcing + // the st_ constraints in the process. This is awkward because it + // separates the processing of the first row from the others and might make + // it difficult to use the first-row results in the next row, but it might + // be the simplest and least disruptive way to deal with the st_ constraint. + + for(size_t i = (size_t)rfi_; i < (size_t)rff_; i++) { + // Swap left and right; vbuf_l is the vector on the left, which we + // generally load from, and vbuf_r is the vector on the right, which we + // generally store to. + swap(vbuf_l, vbuf_r); + pvELeft = vbuf_l + 0; pvERight = vbuf_r + 0; + /* pvFLeft = vbuf_l + 1; */ pvFRight = vbuf_r + 1; + pvHLeft = vbuf_l + 2; pvHRight = vbuf_r + 2; + + // Fetch the appropriate query profile. Note that elements of rf_ must + // be numbers, not masks. + const int refc = (int)rf_[i]; + + // Fetch the appropriate query profile + size_t off = (size_t)firsts5[refc] * iter * 2; + pvScore = d.profbuf_.ptr() + off; // even elts = query profile, odd = gap barrier + + // Set all cells to low value + vf = _mm_xor_si128(vf, vf); + + // Load H vector from the final row of the previous column + vh = _mm_load_si128(pvHLeft + colstride - ROWSTRIDE_2COL); + // Shift 2 bytes down so that topmost (least sig) cell gets 0 + vh = _mm_slli_si128(vh, NBYTES_PER_WORD); + // Fill topmost (least sig) cell with high value + vh = _mm_or_si128(vh, vhilsw); + + // For each character in the reference text: + size_t j; + for(j = 0; j < iter; j++) { + // Load cells from E, calculated previously + ve = _mm_load_si128(pvELeft); + vhd = _mm_load_si128(pvHLeft); + assert_all_lt(ve, vhi); + pvELeft += ROWSTRIDE_2COL; + + // Store cells in F, calculated previously + vf = _mm_subs_epu8(vf, pvScore[1]); // veto some ref gap extensions + _mm_store_si128(pvFRight, vf); + pvFRight += ROWSTRIDE_2COL; + + // Factor in query profile (matches and mismatches) + vh = _mm_subs_epu8(vh, pvScore[0]); + + // Update H, factoring in E and F + vh = _mm_max_epu8(vh, vf); + + // Update vE value + vhdtmp = vhd; + vhd = _mm_subs_epu8(vhd, rdgapo); + vhd = _mm_subs_epu8(vhd, pvScore[1]); // veto some read gap opens + ve = _mm_subs_epu8(ve, rdgape); + ve = _mm_max_epu8(ve, vhd); + vh = _mm_max_epu8(vh, ve); + + // Save the new vH values + _mm_store_si128(pvHRight, vh); + pvHRight += ROWSTRIDE_2COL; + vtmp = vh; + assert_all_lt(ve, vhi); + + // Load the next h value + vh = vhdtmp; + pvHLeft += ROWSTRIDE_2COL; + + // Save E values + _mm_store_si128(pvERight, ve); + pvERight += ROWSTRIDE_2COL; + + // Update vf value + vtmp = _mm_subs_epu8(vtmp, rfgapo); + + vf = _mm_subs_epu8(vf, rfgape); + assert_all_lt(vf, vhi); + vf = _mm_max_epu8(vf, vtmp); + + pvScore += 2; // move on to next query profile / gap veto + } + // pvHStore, pvELoad, pvEStore have all rolled over to the next column + pvFRight -= colstride; // reset to start of column + vtmp = _mm_load_si128(pvFRight); + + pvHRight -= colstride; // reset to start of column + vh = _mm_load_si128(pvHRight); + + pvScore = d.profbuf_.ptr() + off + 1; // reset veto vector + + // vf from last row gets shifted down by one to overlay the first row + // rfgape has already been subtracted from it. + vf = _mm_slli_si128(vf, NBYTES_PER_WORD); + + vf = _mm_subs_epu8(vf, *pvScore); // veto some ref gap extensions + vf = _mm_max_epu8(vtmp, vf); + vtmp = _mm_subs_epu8(vf, vtmp); + vtmp = _mm_cmpeq_epi8(vtmp, vzero); + int cmp = _mm_movemask_epi8(vtmp); + + // If any element of vtmp is greater than H - gap-open... + j = 0; + while(cmp != 0xffff) { + // Store this vf + _mm_store_si128(pvFRight, vf); + pvFRight += ROWSTRIDE_2COL; + + // Update vh w/r/t new vf + vh = _mm_max_epu8(vh, vf); + + // Save vH values + _mm_store_si128(pvHRight, vh); + pvHRight += ROWSTRIDE_2COL; + + pvScore += 2; + + assert_lt(j, iter); + if(++j == iter) { + pvFRight -= colstride; + vtmp = _mm_load_si128(pvFRight); // load next vf ASAP + pvHRight -= colstride; + vh = _mm_load_si128(pvHRight); // load next vh ASAP + pvScore = d.profbuf_.ptr() + off + 1; + j = 0; + vf = _mm_slli_si128(vf, NBYTES_PER_WORD); + } else { + vtmp = _mm_load_si128(pvFRight); // load next vf ASAP + vh = _mm_load_si128(pvHRight); // load next vh ASAP + } + + // Update F with another gap extension + vf = _mm_subs_epu8(vf, rfgape); + vf = _mm_subs_epu8(vf, *pvScore); // veto some ref gap extensions + vf = _mm_max_epu8(vtmp, vf); + vtmp = _mm_subs_epu8(vf, vtmp); + vtmp = _mm_cmpeq_epi8(vtmp, vzero); + cmp = _mm_movemask_epi8(vtmp); + nfixup++; + } + + // Check in the last row for the maximum so far + __m128i *vtmp = vbuf_r + 2 /* H */ + (d.lastIter_ * ROWSTRIDE_2COL); + // Note: we may not want to extract from the final row + TCScore lr = ((TCScore*)(vtmp))[d.lastWord_]; + found = true; + if(lr > lrmax) { + lrmax = lr; + } + + // Now we'd like to know whether the bottommost element of the right + // column is a candidate we might backtrace from. First question is: + // did it exceed the minimum score threshold? + TAlScore score = (TAlScore)(lr - 0xff); + if(lr == MIN_U8) { + score = MIN_I64; + } + if(!debug && score >= minsc_) { + DpBtCandidate cand(dpRows() - 1, i - rfi_, score); + btdiag_.add(i - rfi_, cand); + } + + // Save some elements to checkpoints + if(checkpoint) { + + __m128i *pvE = vbuf_r + 0; + __m128i *pvF = vbuf_r + 1; + __m128i *pvH = vbuf_r + 2; + size_t coli = i - rfi_; + if(coli < cper_.locol_) cper_.locol_ = coli; + if(coli > cper_.hicol_) cper_.hicol_ = coli; + + if(cperTri_) { + size_t rc_mod = coli & cper_.lomask_; + assert_lt(rc_mod, cper_.per_); + int64_t row = -(int64_t)rc_mod-1; + int64_t row_mod = row; + int64_t row_div = 0; + size_t idx = coli >> cper_.perpow2_; + size_t idxrow = idx * cper_.nrow_; + assert_eq(4, ROWSTRIDE_2COL); + bool done = false; + while(true) { + row += (cper_.per_ - 2); + row_mod += (cper_.per_ - 2); + for(size_t j = 0; j < 2; j++) { + row++; + row_mod++; + if(row >= 0 && (size_t)row < cper_.nrow_) { + // Update row divided by iter_ and mod iter_ + while(row_mod >= (int64_t)iter) { + row_mod -= (int64_t)iter; + row_div++; + } + size_t delt = idxrow + row; + size_t vecoff = (row_mod << 6) + row_div; + assert_lt(row_div, 16); + int16_t h_sc = ((uint8_t*)pvH)[vecoff]; + int16_t e_sc = ((uint8_t*)pvE)[vecoff]; + int16_t f_sc = ((uint8_t*)pvF)[vecoff]; + if(h_sc == 0) h_sc = MIN_I16; + else h_sc -= 0xff; + if(e_sc == 0) e_sc = MIN_I16; + else e_sc -= 0xff; + if(f_sc == 0) f_sc = MIN_I16; + else f_sc -= 0xff; + assert_leq(h_sc, cper_.perf_); + assert_leq(e_sc, cper_.perf_); + assert_leq(f_sc, cper_.perf_); + CpQuad *qdiags = ((j == 0) ? cper_.qdiag1s_.ptr() : cper_.qdiag2s_.ptr()); + qdiags[delt].sc[0] = h_sc; + qdiags[delt].sc[1] = e_sc; + qdiags[delt].sc[2] = f_sc; + } // if(row >= 0 && row < nrow_) + else if(row >= 0 && (size_t)row >= cper_.nrow_) { + done = true; + break; + } + } // end of loop over anti-diags + if(done) { + break; + } + idx++; + idxrow += cper_.nrow_; + } + } else { + // If this is the first column, take this opportunity to + // pre-calculate the coordinates of the elements we're going to + // checkpoint. + if(coli == 0) { + size_t cpi = cper_.per_-1; + size_t cpimod = cper_.per_-1; + size_t cpidiv = 0; + cper_.commitMap_.clear(); + while(cpi < cper_.nrow_) { + while(cpimod >= iter) { + cpimod -= iter; + cpidiv++; + } + size_t vecoff = (cpimod << 6) + cpidiv; + cper_.commitMap_.push_back(vecoff); + cpi += cper_.per_; + cpimod += cper_.per_; + } + } + // Save all the rows + size_t rowoff = 0; + size_t sz = cper_.commitMap_.size(); + for(size_t i = 0; i < sz; i++, rowoff += cper_.ncol_) { + size_t vecoff = cper_.commitMap_[i]; + int16_t h_sc = ((uint8_t*)pvH)[vecoff]; + //int16_t e_sc = ((uint8_t*)pvE)[vecoff]; + int16_t f_sc = ((uint8_t*)pvF)[vecoff]; + if(h_sc == 0) h_sc = MIN_I16; + else h_sc -= 0xff; + //if(e_sc == 0) e_sc = MIN_I16; + //else e_sc -= 0xff; + if(f_sc == 0) f_sc = MIN_I16; + else f_sc -= 0xff; + assert_leq(h_sc, cper_.perf_); + //assert_leq(e_sc, cper_.perf_); + assert_leq(f_sc, cper_.perf_); + CpQuad& dst = cper_.qrows_[rowoff + coli]; + dst.sc[0] = h_sc; + //dst.sc[1] = e_sc; + dst.sc[2] = f_sc; + } + // Is this a column we'd like to checkpoint? + if((coli & cper_.lomask_) == cper_.lomask_) { + // Save the column using memcpys + assert_gt(coli, 0); + size_t wordspercol = cper_.niter_ * ROWSTRIDE_2COL; + size_t coloff = (coli >> cper_.perpow2_) * wordspercol; + __m128i *dst = cper_.qcols_.ptr() + coloff; + memcpy(dst, vbuf_r, sizeof(__m128i) * wordspercol); + } + } + if(cper_.debug_) { + // Save the column using memcpys + size_t wordspercol = cper_.niter_ * ROWSTRIDE_2COL; + size_t coloff = coli * wordspercol; + __m128i *dst = cper_.qcolsD_.ptr() + coloff; + memcpy(dst, vbuf_r, sizeof(__m128i) * wordspercol); + } + } + } + + // Update metrics + if(!debug) { + size_t ninner = (rff_ - rfi_) * iter; + met.col += (rff_ - rfi_); // DP columns + met.cell += (ninner * NWORDS_PER_REG); // DP cells + met.inner += ninner; // DP inner loop iters + met.fixup += nfixup; // DP fixup loop iters + } + + flag = 0; + + // Did we find a solution? + TAlScore score = MIN_I64; + if(!found) { + flag = -1; // no + if(!debug) met.dpfail++; + return MIN_I64; + } else { + score = (TAlScore)(lrmax - 0xff); + if(score < minsc_) { + flag = -1; // no + if(!debug) met.dpfail++; + return score; + } + } + + // Could we have saturated? + if(lrmax == MIN_U8) { + flag = -2; // yes + if(!debug) met.dpsat++; + return MIN_I64; + } + + // Now take all the backtrace candidates in the btdaig_ structure and + // dump them into the btncand_ array. They'll be sorted later. + if(!debug) { + btdiag_.dump(btncand_); + assert(!btncand_.empty()); + } + + // Return largest score + if(!debug) met.dpsucc++; + return score; +} + +/** + * Solve the current alignment problem using SSE instructions that operate on 16 + * unsigned 8-bit values packed into a single 128-bit register. + */ +TAlScore SwAligner::alignNucleotidesEnd2EndSseU8(int& flag, bool debug) { + assert_leq(rdf_, rd_->length()); + assert_leq(rdf_, qu_->length()); + assert_lt(rfi_, rff_); + assert_lt(rdi_, rdf_); + assert_eq(rd_->length(), qu_->length()); + assert_geq(sc_->gapbar, 1); + assert(repOk()); +#ifndef NDEBUG + for(size_t i = (size_t)rfi_; i < (size_t)rff_; i++) { + assert_range(0, 16, (int)rf_[i]); + } +#endif + + SSEData& d = fw_ ? sseU8fw_ : sseU8rc_; + SSEMetrics& met = extend_ ? sseU8ExtendMet_ : sseU8MateMet_; + if(!debug) met.dp++; + buildQueryProfileEnd2EndSseU8(fw_); + assert(!d.profbuf_.empty()); + + assert_eq(0, d.maxBonus_); + size_t iter = + (dpRows() + (NWORDS_PER_REG-1)) / NWORDS_PER_REG; // iter = segLen + + int dup; + + // Many thanks to Michael Farrar for releasing his striped Smith-Waterman + // implementation: + // + // http://sites.google.com/site/farrarmichael/smith-waterman + // + // Much of the implmentation below is adapted from Michael's code. + + // Set all elts to reference gap open penalty + __m128i rfgapo = _mm_setzero_si128(); + __m128i rfgape = _mm_setzero_si128(); + __m128i rdgapo = _mm_setzero_si128(); + __m128i rdgape = _mm_setzero_si128(); + __m128i vlo = _mm_setzero_si128(); + __m128i vhi = _mm_setzero_si128(); + __m128i ve = _mm_setzero_si128(); + __m128i vf = _mm_setzero_si128(); + __m128i vh = _mm_setzero_si128(); +#if 0 + __m128i vhd = _mm_setzero_si128(); + __m128i vhdtmp = _mm_setzero_si128(); +#endif + __m128i vtmp = _mm_setzero_si128(); + __m128i vzero = _mm_setzero_si128(); + __m128i vhilsw = _mm_setzero_si128(); + + assert_gt(sc_->refGapOpen(), 0); + assert_leq(sc_->refGapOpen(), MAX_U8); + dup = (sc_->refGapOpen() << 8) | (sc_->refGapOpen() & 0x00ff); + rfgapo = _mm_insert_epi16(rfgapo, dup, 0); + rfgapo = _mm_shufflelo_epi16(rfgapo, 0); + rfgapo = _mm_shuffle_epi32(rfgapo, 0); + + // Set all elts to reference gap extension penalty + assert_gt(sc_->refGapExtend(), 0); + assert_leq(sc_->refGapExtend(), MAX_U8); + assert_leq(sc_->refGapExtend(), sc_->refGapOpen()); + dup = (sc_->refGapExtend() << 8) | (sc_->refGapExtend() & 0x00ff); + rfgape = _mm_insert_epi16(rfgape, dup, 0); + rfgape = _mm_shufflelo_epi16(rfgape, 0); + rfgape = _mm_shuffle_epi32(rfgape, 0); + + // Set all elts to read gap open penalty + assert_gt(sc_->readGapOpen(), 0); + assert_leq(sc_->readGapOpen(), MAX_U8); + dup = (sc_->readGapOpen() << 8) | (sc_->readGapOpen() & 0x00ff); + rdgapo = _mm_insert_epi16(rdgapo, dup, 0); + rdgapo = _mm_shufflelo_epi16(rdgapo, 0); + rdgapo = _mm_shuffle_epi32(rdgapo, 0); + + // Set all elts to read gap extension penalty + assert_gt(sc_->readGapExtend(), 0); + assert_leq(sc_->readGapExtend(), MAX_U8); + assert_leq(sc_->readGapExtend(), sc_->readGapOpen()); + dup = (sc_->readGapExtend() << 8) | (sc_->readGapExtend() & 0x00ff); + rdgape = _mm_insert_epi16(rdgape, dup, 0); + rdgape = _mm_shufflelo_epi16(rdgape, 0); + rdgape = _mm_shuffle_epi32(rdgape, 0); + + vhi = _mm_cmpeq_epi16(vhi, vhi); // all elts = 0xffff + vlo = _mm_xor_si128(vlo, vlo); // all elts = 0 + + // vhilsw: topmost (least sig) word set to 0x7fff, all other words=0 + vhilsw = _mm_shuffle_epi32(vhi, 0); + vhilsw = _mm_srli_si128(vhilsw, NBYTES_PER_REG - NBYTES_PER_WORD); + + // Points to a long vector of __m128i where each element is a block of + // contiguous cells in the E, F or H matrix. If the index % 3 == 0, then + // the block of cells is from the E matrix. If index % 3 == 1, they're + // from the F matrix. If index % 3 == 2, then they're from the H matrix. + // Blocks of cells are organized in the same interleaved manner as they are + // calculated by the Farrar algorithm. + const __m128i *pvScore; // points into the query profile + + d.mat_.init(dpRows(), rff_ - rfi_, NWORDS_PER_REG); + const size_t colstride = d.mat_.colstride(); + //const size_t rowstride = d.mat_.rowstride(); + assert_eq(ROWSTRIDE, colstride / iter); + + // Initialize the H and E vectors in the first matrix column + __m128i *pvHTmp = d.mat_.tmpvec(0, 0); + __m128i *pvETmp = d.mat_.evec(0, 0); + + // Maximum score in final row + bool found = false; + TCScore lrmax = MIN_U8; + + for(size_t i = 0; i < iter; i++) { + _mm_store_si128(pvETmp, vlo); + _mm_store_si128(pvHTmp, vlo); // start high in end-to-end mode + pvETmp += ROWSTRIDE; + pvHTmp += ROWSTRIDE; + } + // These are swapped just before the innermost loop + __m128i *pvHStore = d.mat_.hvec(0, 0); + __m128i *pvHLoad = d.mat_.tmpvec(0, 0); + __m128i *pvELoad = d.mat_.evec(0, 0); + __m128i *pvEStore = d.mat_.evecUnsafe(0, 1); + __m128i *pvFStore = d.mat_.fvec(0, 0); + __m128i *pvFTmp = NULL; + + assert_gt(sc_->gapbar, 0); + size_t nfixup = 0; + + // Fill in the table as usual but instead of using the same gap-penalty + // vector for each iteration of the inner loop, load words out of a + // pre-calculated gap vector parallel to the query profile. The pre- + // calculated gap vectors enforce the gap barrier constraint by making it + // infinitely costly to introduce a gap in barrier rows. + // + // AND use a separate loop to fill in the first row of the table, enforcing + // the st_ constraints in the process. This is awkward because it + // separates the processing of the first row from the others and might make + // it difficult to use the first-row results in the next row, but it might + // be the simplest and least disruptive way to deal with the st_ constraint. + + colstop_ = rff_ - 1; + lastsolcol_ = 0; + + for(size_t i = (size_t)rfi_; i < (size_t)rff_; i++) { + assert(pvFStore == d.mat_.fvec(0, i - rfi_)); + assert(pvHStore == d.mat_.hvec(0, i - rfi_)); + + // Fetch the appropriate query profile. Note that elements of rf_ must + // be numbers, not masks. + const int refc = (int)rf_[i]; + size_t off = (size_t)firsts5[refc] * iter * 2; + pvScore = d.profbuf_.ptr() + off; // even elts = query profile, odd = gap barrier + + // Set all cells to low value + vf = _mm_xor_si128(vf, vf); + + // Load H vector from the final row of the previous column + vh = _mm_load_si128(pvHLoad + colstride - ROWSTRIDE); + // Shift 2 bytes down so that topmost (least sig) cell gets 0 + vh = _mm_slli_si128(vh, NBYTES_PER_WORD); + // Fill topmost (least sig) cell with high value + vh = _mm_or_si128(vh, vhilsw); + + // For each character in the reference text: + size_t j; + for(j = 0; j < iter; j++) { + // Load cells from E, calculated previously + ve = _mm_load_si128(pvELoad); +#if 0 + vhd = _mm_load_si128(pvHLoad); +#endif + assert_all_lt(ve, vhi); + pvELoad += ROWSTRIDE; + + // Store cells in F, calculated previously + vf = _mm_subs_epu8(vf, pvScore[1]); // veto some ref gap extensions + _mm_store_si128(pvFStore, vf); + pvFStore += ROWSTRIDE; + + // Factor in query profile (matches and mismatches) + vh = _mm_subs_epu8(vh, pvScore[0]); + + // Update H, factoring in E and F + vh = _mm_max_epu8(vh, ve); + vh = _mm_max_epu8(vh, vf); + + // Save the new vH values + _mm_store_si128(pvHStore, vh); + pvHStore += ROWSTRIDE; + + // Update vE value + vtmp = vh; +#if 0 + vhdtmp = vhd; + vhd = _mm_subs_epu8(vhd, rdgapo); + vhd = _mm_subs_epu8(vhd, pvScore[1]); // veto some read gap opens + ve = _mm_subs_epu8(ve, rdgape); + ve = _mm_max_epu8(ve, vhd); +#else + vh = _mm_subs_epu8(vh, rdgapo); + vh = _mm_subs_epu8(vh, pvScore[1]); // veto some read gap opens + ve = _mm_subs_epu8(ve, rdgape); + ve = _mm_max_epu8(ve, vh); +#endif + assert_all_lt(ve, vhi); + + // Load the next h value +#if 0 + vh = vhdtmp; +#else + vh = _mm_load_si128(pvHLoad); +#endif + pvHLoad += ROWSTRIDE; + + // Save E values + _mm_store_si128(pvEStore, ve); + pvEStore += ROWSTRIDE; + + // Update vf value + vtmp = _mm_subs_epu8(vtmp, rfgapo); + vf = _mm_subs_epu8(vf, rfgape); + assert_all_lt(vf, vhi); + vf = _mm_max_epu8(vf, vtmp); + + pvScore += 2; // move on to next query profile / gap veto + } + // pvHStore, pvELoad, pvEStore have all rolled over to the next column + pvFTmp = pvFStore; + pvFStore -= colstride; // reset to start of column + vtmp = _mm_load_si128(pvFStore); + + pvHStore -= colstride; // reset to start of column + vh = _mm_load_si128(pvHStore); + +#if 0 +#else + pvEStore -= colstride; // reset to start of column + ve = _mm_load_si128(pvEStore); +#endif + + pvHLoad = pvHStore; // new pvHLoad = pvHStore + pvScore = d.profbuf_.ptr() + off + 1; // reset veto vector + + // vf from last row gets shifted down by one to overlay the first row + // rfgape has already been subtracted from it. + vf = _mm_slli_si128(vf, NBYTES_PER_WORD); + + vf = _mm_subs_epu8(vf, *pvScore); // veto some ref gap extensions + vf = _mm_max_epu8(vtmp, vf); + vtmp = _mm_subs_epu8(vf, vtmp); + vtmp = _mm_cmpeq_epi8(vtmp, vzero); + int cmp = _mm_movemask_epi8(vtmp); + + // If any element of vtmp is greater than H - gap-open... + j = 0; + while(cmp != 0xffff) { + // Store this vf + _mm_store_si128(pvFStore, vf); + pvFStore += ROWSTRIDE; + + // Update vh w/r/t new vf + vh = _mm_max_epu8(vh, vf); + + // Save vH values + _mm_store_si128(pvHStore, vh); + pvHStore += ROWSTRIDE; + + // Update E in case it can be improved using our new vh +#if 0 +#else + vh = _mm_subs_epu8(vh, rdgapo); + vh = _mm_subs_epu8(vh, *pvScore); // veto some read gap opens + ve = _mm_max_epu8(ve, vh); + _mm_store_si128(pvEStore, ve); + pvEStore += ROWSTRIDE; +#endif + pvScore += 2; + + assert_lt(j, iter); + if(++j == iter) { + pvFStore -= colstride; + vtmp = _mm_load_si128(pvFStore); // load next vf ASAP + pvHStore -= colstride; + vh = _mm_load_si128(pvHStore); // load next vh ASAP +#if 0 +#else + pvEStore -= colstride; + ve = _mm_load_si128(pvEStore); // load next ve ASAP +#endif + pvScore = d.profbuf_.ptr() + off + 1; + j = 0; + vf = _mm_slli_si128(vf, NBYTES_PER_WORD); + } else { + vtmp = _mm_load_si128(pvFStore); // load next vf ASAP + vh = _mm_load_si128(pvHStore); // load next vh ASAP +#if 0 +#else + ve = _mm_load_si128(pvEStore); // load next vh ASAP +#endif + } + + // Update F with another gap extension + vf = _mm_subs_epu8(vf, rfgape); + vf = _mm_subs_epu8(vf, *pvScore); // veto some ref gap extensions + vf = _mm_max_epu8(vtmp, vf); + vtmp = _mm_subs_epu8(vf, vtmp); + vtmp = _mm_cmpeq_epi8(vtmp, vzero); + cmp = _mm_movemask_epi8(vtmp); + nfixup++; + } + +#ifndef NDEBUG + if(true && (rand() & 15) == 0) { + // This is a work-intensive sanity check; each time we finish filling + // a column, we check that each H, E, and F is sensible. + for(size_t k = 0; k < dpRows(); k++) { + assert(cellOkEnd2EndU8( + d, + k, // row + i - rfi_, // col + refc, // reference mask + (int)(*rd_)[rdi_+k], // read char + (int)(*qu_)[rdi_+k], // read quality + *sc_)); // scoring scheme + } + } +#endif + + __m128i *vtmp = d.mat_.hvec(d.lastIter_, i-rfi_); + // Note: we may not want to extract from the final row + TCScore lr = ((TCScore*)(vtmp))[d.lastWord_]; + found = true; + if(lr > lrmax) { + lrmax = lr; + } + + // pvELoad and pvHLoad are already where they need to be + + // Adjust the load and store vectors here. + pvHStore = pvHLoad + colstride; + pvEStore = pvELoad + colstride; + pvFStore = pvFTmp; + } + + // Update metrics + if(!debug) { + size_t ninner = (rff_ - rfi_) * iter; + met.col += (rff_ - rfi_); // DP columns + met.cell += (ninner * NWORDS_PER_REG); // DP cells + met.inner += ninner; // DP inner loop iters + met.fixup += nfixup; // DP fixup loop iters + } + + flag = 0; + + // Did we find a solution? + TAlScore score = MIN_I64; + if(!found) { + flag = -1; // no + if(!debug) met.dpfail++; + return MIN_I64; + } else { + score = (TAlScore)(lrmax - 0xff); + if(score < minsc_) { + flag = -1; // no + if(!debug) met.dpfail++; + return score; + } + } + + // Could we have saturated? + if(lrmax == MIN_U8) { + flag = -2; // yes + if(!debug) met.dpsat++; + return MIN_I64; + } + + // Return largest score + if(!debug) met.dpsucc++; + return score; +} + +/** + * Given a filled-in DP table, populate the btncand_ list with candidate cells + * that might be at the ends of valid alignments. No need to do this unless + * the maximum score returned by the align*() func is >= the minimum. + * + * Only cells that are exhaustively scored are candidates. Those are the + * cells inside the shape made of o's in this: + * + * |-maxgaps-| + * ********************************* - + * ******************************** | + * ******************************* | + * ****************************** | + * ***************************** | + * **************************** read len + * *************************** | + * ************************** | + * ************************* | + * ************************ | + * ***********oooooooooooo - + * |-maxgaps-| + * |-readlen-| + * |-------skip--------| + * + * And it's possible for the shape to be truncated on the left and right sides. + * + * + */ +bool SwAligner::gatherCellsNucleotidesEnd2EndSseU8(TAlScore best) { + // What's the minimum number of rows that can possibly be spanned by an + // alignment that meets the minimum score requirement? + assert(sse8succ_); + const size_t ncol = rff_ - rfi_; + const size_t nrow = dpRows(); + assert_gt(nrow, 0); + btncand_.clear(); + btncanddone_.clear(); + SSEData& d = fw_ ? sseU8fw_ : sseU8rc_; + SSEMetrics& met = extend_ ? sseU8ExtendMet_ : sseU8MateMet_; + assert(!d.profbuf_.empty()); + const size_t colstride = d.mat_.colstride(); + ASSERT_ONLY(bool sawbest = false); + __m128i *pvH = d.mat_.hvec(d.lastIter_, 0); + for(size_t j = 0; j < ncol; j++) { + TAlScore sc = (TAlScore)(((TCScore*)pvH)[d.lastWord_] - 0xff); + assert_leq(sc, best); + ASSERT_ONLY(sawbest = (sawbest || sc == best)); + if(sc >= minsc_) { + // Yes, this is legit + met.gathsol++; + btncand_.expand(); + btncand_.back().init(nrow-1, j, sc); + } + pvH += colstride; + } + assert(sawbest); + if(!btncand_.empty()) { + d.mat_.initMasks(); + } + return !btncand_.empty(); +} + +#define MOVE_VEC_PTR_UP(vec, rowvec, rowelt) { \ + if(rowvec == 0) { \ + rowvec += d.mat_.nvecrow_; \ + vec += d.mat_.colstride_; \ + rowelt--; \ + } \ + rowvec--; \ + vec -= ROWSTRIDE; \ +} + +#define MOVE_VEC_PTR_LEFT(vec, rowvec, rowelt) { vec -= d.mat_.colstride_; } + +#define MOVE_VEC_PTR_UPLEFT(vec, rowvec, rowelt) { \ + MOVE_VEC_PTR_UP(vec, rowvec, rowelt); \ + MOVE_VEC_PTR_LEFT(vec, rowvec, rowelt); \ +} + +#define MOVE_ALL_LEFT() { \ + MOVE_VEC_PTR_LEFT(cur_vec, rowvec, rowelt); \ + MOVE_VEC_PTR_LEFT(left_vec, left_rowvec, left_rowelt); \ + MOVE_VEC_PTR_LEFT(up_vec, up_rowvec, up_rowelt); \ + MOVE_VEC_PTR_LEFT(upleft_vec, upleft_rowvec, upleft_rowelt); \ +} + +#define MOVE_ALL_UP() { \ + MOVE_VEC_PTR_UP(cur_vec, rowvec, rowelt); \ + MOVE_VEC_PTR_UP(left_vec, left_rowvec, left_rowelt); \ + MOVE_VEC_PTR_UP(up_vec, up_rowvec, up_rowelt); \ + MOVE_VEC_PTR_UP(upleft_vec, upleft_rowvec, upleft_rowelt); \ +} + +#define MOVE_ALL_UPLEFT() { \ + MOVE_VEC_PTR_UPLEFT(cur_vec, rowvec, rowelt); \ + MOVE_VEC_PTR_UPLEFT(left_vec, left_rowvec, left_rowelt); \ + MOVE_VEC_PTR_UPLEFT(up_vec, up_rowvec, up_rowelt); \ + MOVE_VEC_PTR_UPLEFT(upleft_vec, upleft_rowvec, upleft_rowelt); \ +} + +#define NEW_ROW_COL(row, col) { \ + rowelt = row / d.mat_.nvecrow_; \ + rowvec = row % d.mat_.nvecrow_; \ + eltvec = (col * d.mat_.colstride_) + (rowvec * ROWSTRIDE); \ + cur_vec = d.mat_.matbuf_.ptr() + eltvec; \ + left_vec = cur_vec; \ + left_rowelt = rowelt; \ + left_rowvec = rowvec; \ + MOVE_VEC_PTR_LEFT(left_vec, left_rowvec, left_rowelt); \ + up_vec = cur_vec; \ + up_rowelt = rowelt; \ + up_rowvec = rowvec; \ + MOVE_VEC_PTR_UP(up_vec, up_rowvec, up_rowelt); \ + upleft_vec = up_vec; \ + upleft_rowelt = up_rowelt; \ + upleft_rowvec = up_rowvec; \ + MOVE_VEC_PTR_LEFT(upleft_vec, upleft_rowvec, upleft_rowelt); \ +} + +/** + * Given the dynamic programming table and a cell, trace backwards from the + * cell and install the edits and score/penalty in the appropriate fields + * of res. The RandomSource is used to break ties among equally good ways + * of tracing back. + * + * Whenever we enter a cell, we check whether the read/ref coordinates of + * that cell correspond to a cell we traversed constructing a previous + * alignment. If so, we backtrack to the last decision point, mask out the + * path that led to the previously observed cell, and continue along a + * different path; or, if there are no more paths to try, we give up. + * + * If an alignment is found, 'off' is set to the alignment's upstream-most + * reference character's offset into the chromosome and true is returned. + * Otherwise, false is returned. + */ +bool SwAligner::backtraceNucleotidesEnd2EndSseU8( + TAlScore escore, // in: expected score + SwResult& res, // out: store results (edits and scores) here + size_t& off, // out: store diagonal projection of origin + size_t& nbts, // out: # backtracks + size_t row, // start in this row + size_t col, // start in this column + RandomSource& rnd) // random gen, to choose among equal paths +{ + assert_lt(row, dpRows()); + assert_lt(col, (size_t)(rff_ - rfi_)); + SSEData& d = fw_ ? sseU8fw_ : sseU8rc_; + SSEMetrics& met = extend_ ? sseU8ExtendMet_ : sseU8MateMet_; + met.bt++; + assert(!d.profbuf_.empty()); + assert_lt(row, rd_->length()); + btnstack_.clear(); // empty the backtrack stack + btcells_.clear(); // empty the cells-so-far list + AlnScore score; score.score_ = 0; + score.gaps_ = score.ns_ = 0; + size_t origCol = col; + size_t gaps = 0, readGaps = 0, refGaps = 0; + res.alres.reset(); + EList& ned = res.alres.ned(); + assert(ned.empty()); + assert_gt(dpRows(), row); + size_t trimEnd = dpRows() - row - 1; + size_t trimBeg = 0; + size_t ct = SSEMatrix::H; // cell type + // Row and col in terms of where they fall in the SSE vector matrix + size_t rowelt, rowvec, eltvec; + size_t left_rowelt, up_rowelt, upleft_rowelt; + size_t left_rowvec, up_rowvec, upleft_rowvec; + __m128i *cur_vec, *left_vec, *up_vec, *upleft_vec; + NEW_ROW_COL(row, col); + while((int)row >= 0) { + met.btcell++; + nbts++; + int readc = (*rd_)[rdi_ + row]; + int refm = (int)rf_[rfi_ + col]; + int readq = (*qu_)[row]; + assert_leq(col, origCol); + // Get score in this cell + bool empty = false, reportedThru, canMoveThru, branch = false; + int cur = SSEMatrix::H; + if(!d.mat_.reset_[row]) { + d.mat_.resetRow(row); + } + reportedThru = d.mat_.reportedThrough(row, col); + canMoveThru = true; + if(reportedThru) { + canMoveThru = false; + } else { + empty = false; + if(row > 0) { + assert_gt(row, 0); + size_t rowFromEnd = d.mat_.nrow() - row - 1; + bool gapsAllowed = true; + if(row < (size_t)sc_->gapbar || + rowFromEnd < (size_t)sc_->gapbar) + { + gapsAllowed = false; + } + const TAlScore floorsc = MIN_I64; + const int offsetsc = -0xff; + // Move to beginning of column/row + if(ct == SSEMatrix::E) { // AKA rdgap + assert_gt(col, 0); + TAlScore sc_cur = ((TCScore*)(cur_vec + SSEMatrix::E))[rowelt] + offsetsc; + assert(gapsAllowed); + // Currently in the E matrix; incoming transition must come from the + // left. It's either a gap open from the H matrix or a gap extend from + // the E matrix. + // TODO: save and restore origMask as well as mask + int origMask = 0, mask = 0; + // Get H score of cell to the left + TAlScore sc_h_left = ((TCScore*)(left_vec + SSEMatrix::H))[left_rowelt] + offsetsc; + if(sc_h_left > floorsc && sc_h_left - sc_->readGapOpen() == sc_cur) { + mask |= (1 << 0); + } + // Get E score of cell to the left + TAlScore sc_e_left = ((TCScore*)(left_vec + SSEMatrix::E))[left_rowelt] + offsetsc; + if(sc_e_left > floorsc && sc_e_left - sc_->readGapExtend() == sc_cur) { + mask |= (1 << 1); + } + origMask = mask; + assert(origMask > 0 || sc_cur <= sc_->match()); + if(d.mat_.isEMaskSet(row, col)) { + mask = (d.mat_.masks_[row][col] >> 8) & 3; + } + if(mask == 3) { +#if 1 + // Pick H -> E cell + cur = SW_BT_OALL_READ_OPEN; + d.mat_.eMaskSet(row, col, 2); // might choose E later +#else + if(rnd.nextU2()) { + // Pick H -> E cell + cur = SW_BT_OALL_READ_OPEN; + d.mat_.eMaskSet(row, col, 2); // might choose E later + } else { + // Pick E -> E cell + cur = SW_BT_RDGAP_EXTEND; + d.mat_.eMaskSet(row, col, 1); // might choose H later + } +#endif + branch = true; + } else if(mask == 2) { + // I chose the E cell + cur = SW_BT_RDGAP_EXTEND; + d.mat_.eMaskSet(row, col, 0); // done + } else if(mask == 1) { + // I chose the H cell + cur = SW_BT_OALL_READ_OPEN; + d.mat_.eMaskSet(row, col, 0); // done + } else { + empty = true; + // It's empty, so the only question left is whether we should be + // allowed in terimnate in this cell. If it's got a valid score + // then we *shouldn't* be allowed to terminate here because that + // means it's part of a larger alignment that was already reported. + canMoveThru = (origMask == 0); + } + assert(!empty || !canMoveThru); + } else if(ct == SSEMatrix::F) { // AKA rfgap + assert_gt(row, 0); + assert(gapsAllowed); + TAlScore sc_h_up = ((TCScore*)(up_vec + SSEMatrix::H))[up_rowelt] + offsetsc; + TAlScore sc_f_up = ((TCScore*)(up_vec + SSEMatrix::F))[up_rowelt] + offsetsc; + TAlScore sc_cur = ((TCScore*)(cur_vec + SSEMatrix::F))[rowelt] + offsetsc; + // Currently in the F matrix; incoming transition must come from above. + // It's either a gap open from the H matrix or a gap extend from the F + // matrix. + // TODO: save and restore origMask as well as mask + int origMask = 0, mask = 0; + // Get H score of cell above + if(sc_h_up > floorsc && sc_h_up - sc_->refGapOpen() == sc_cur) { + mask |= (1 << 0); + } + // Get F score of cell above + if(sc_f_up > floorsc && sc_f_up - sc_->refGapExtend() == sc_cur) { + mask |= (1 << 1); + } + origMask = mask; + assert(origMask > 0 || sc_cur <= sc_->match()); + if(d.mat_.isFMaskSet(row, col)) { + mask = (d.mat_.masks_[row][col] >> 11) & 3; + } + if(mask == 3) { +#if 1 + // I chose the H cell + cur = SW_BT_OALL_REF_OPEN; + d.mat_.fMaskSet(row, col, 2); // might choose E later +#else + if(rnd.nextU2()) { + // I chose the H cell + cur = SW_BT_OALL_REF_OPEN; + d.mat_.fMaskSet(row, col, 2); // might choose E later + } else { + // I chose the F cell + cur = SW_BT_RFGAP_EXTEND; + d.mat_.fMaskSet(row, col, 1); // might choose E later + } +#endif + branch = true; + } else if(mask == 2) { + // I chose the F cell + cur = SW_BT_RFGAP_EXTEND; + d.mat_.fMaskSet(row, col, 0); // done + } else if(mask == 1) { + // I chose the H cell + cur = SW_BT_OALL_REF_OPEN; + d.mat_.fMaskSet(row, col, 0); // done + } else { + empty = true; + // It's empty, so the only question left is whether we should be + // allowed in terimnate in this cell. If it's got a valid score + // then we *shouldn't* be allowed to terminate here because that + // means it's part of a larger alignment that was already reported. + canMoveThru = (origMask == 0); + } + assert(!empty || !canMoveThru); + } else { + assert_eq(SSEMatrix::H, ct); + TAlScore sc_cur = ((TCScore*)(cur_vec + SSEMatrix::H))[rowelt] + offsetsc; + TAlScore sc_f_up = ((TCScore*)(up_vec + SSEMatrix::F))[up_rowelt] + offsetsc; + TAlScore sc_h_up = ((TCScore*)(up_vec + SSEMatrix::H))[up_rowelt] + offsetsc; + TAlScore sc_h_left = col > 0 ? (((TCScore*)(left_vec + SSEMatrix::H))[left_rowelt] + offsetsc) : floorsc; + TAlScore sc_e_left = col > 0 ? (((TCScore*)(left_vec + SSEMatrix::E))[left_rowelt] + offsetsc) : floorsc; + TAlScore sc_h_upleft = col > 0 ? (((TCScore*)(upleft_vec + SSEMatrix::H))[upleft_rowelt] + offsetsc) : floorsc; + TAlScore sc_diag = sc_->score(readc, refm, readq - 33); + // TODO: save and restore origMask as well as mask + int origMask = 0, mask = 0; + if(gapsAllowed) { + if(sc_h_up > floorsc && sc_cur == sc_h_up - sc_->refGapOpen()) { + mask |= (1 << 0); + } + if(sc_h_left > floorsc && sc_cur == sc_h_left - sc_->readGapOpen()) { + mask |= (1 << 1); + } + if(sc_f_up > floorsc && sc_cur == sc_f_up - sc_->refGapExtend()) { + mask |= (1 << 2); + } + if(sc_e_left > floorsc && sc_cur == sc_e_left - sc_->readGapExtend()) { + mask |= (1 << 3); + } + } + if(sc_h_upleft > floorsc && sc_cur == sc_h_upleft + sc_diag) { + mask |= (1 << 4); + } + origMask = mask; + assert(origMask > 0 || sc_cur <= sc_->match()); + if(d.mat_.isHMaskSet(row, col)) { + mask = (d.mat_.masks_[row][col] >> 2) & 31; + } + assert(gapsAllowed || mask == (1 << 4) || mask == 0); + int opts = alts5[mask]; + int select = -1; + if(opts == 1) { + select = firsts5[mask]; + assert_geq(mask, 0); + d.mat_.hMaskSet(row, col, 0); + } else if(opts > 1) { +#if 1 + if( (mask & 16) != 0) { + select = 4; // H diag + } else if((mask & 1) != 0) { + select = 0; // H up + } else if((mask & 4) != 0) { + select = 2; // F up + } else if((mask & 2) != 0) { + select = 1; // H left + } else if((mask & 8) != 0) { + select = 3; // E left + } +#else + select = randFromMask(rnd, mask); +#endif + assert_geq(mask, 0); + mask &= ~(1 << select); + assert(gapsAllowed || mask == (1 << 4) || mask == 0); + d.mat_.hMaskSet(row, col, mask); + branch = true; + } else { /* No way to backtrack! */ } + if(select != -1) { + if(select == 4) { + cur = SW_BT_OALL_DIAG; + } else if(select == 0) { + cur = SW_BT_OALL_REF_OPEN; + } else if(select == 1) { + cur = SW_BT_OALL_READ_OPEN; + } else if(select == 2) { + cur = SW_BT_RFGAP_EXTEND; + } else { + assert_eq(3, select) + cur = SW_BT_RDGAP_EXTEND; + } + } else { + empty = true; + // It's empty, so the only question left is whether we should be + // allowed in terimnate in this cell. If it's got a valid score + // then we *shouldn't* be allowed to terminate here because that + // means it's part of a larger alignment that was already reported. + canMoveThru = (origMask == 0); + } + } + assert(!empty || !canMoveThru || ct == SSEMatrix::H); + } + } + //cerr << "reportedThrough rejected (" << row << ", " << col << ")" << endl; + d.mat_.setReportedThrough(row, col); + assert_eq(gaps, Edit::numGaps(ned)); + assert_leq(gaps, rdgap_ + rfgap_); + // Cell was involved in a previously-reported alignment? + if(!canMoveThru) { + if(!btnstack_.empty()) { + // Remove all the cells from list back to and including the + // cell where the branch occurred + btcells_.resize(btnstack_.back().celsz); + // Pop record off the top of the stack + ned.resize(btnstack_.back().nedsz); + //aed.resize(btnstack_.back().aedsz); + row = btnstack_.back().row; + col = btnstack_.back().col; + gaps = btnstack_.back().gaps; + readGaps = btnstack_.back().readGaps; + refGaps = btnstack_.back().refGaps; + score = btnstack_.back().score; + ct = btnstack_.back().ct; + btnstack_.pop_back(); + assert(!sc_->monotone || score.score() >= escore); + NEW_ROW_COL(row, col); + continue; + } else { + // No branch points to revisit; just give up + res.reset(); + met.btfail++; // DP backtraces failed + return false; + } + } + assert(!reportedThru); + assert(!sc_->monotone || score.score() >= minsc_); + if(empty || row == 0) { + assert_eq(SSEMatrix::H, ct); + btcells_.expand(); + btcells_.back().first = row; + btcells_.back().second = col; + // This cell is at the end of a legitimate alignment + trimBeg = row; + assert_eq(0, trimBeg); + assert_eq(btcells_.size(), dpRows() - trimBeg - trimEnd + readGaps); + break; + } + if(branch) { + // Add a frame to the backtrack stack + btnstack_.expand(); + btnstack_.back().init( + ned.size(), + 0, // aed.size() + btcells_.size(), + row, + col, + gaps, + readGaps, + refGaps, + score, + (int)ct); + } + btcells_.expand(); + btcells_.back().first = row; + btcells_.back().second = col; + switch(cur) { + // Move up and to the left. If the reference nucleotide in the + // source row mismatches the read nucleotide, penalize + // it and add a nucleotide mismatch. + case SW_BT_OALL_DIAG: { + assert_gt(row, 0); assert_gt(col, 0); + // Check for color mismatch + int readC = (*rd_)[row]; + int refNmask = (int)rf_[rfi_+col]; + assert_gt(refNmask, 0); + int m = matchesEx(readC, refNmask); + ct = SSEMatrix::H; + if(m != 1) { + Edit e( + (int)row, + mask2dna[refNmask], + "ACGTN"[readC], + EDIT_TYPE_MM); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + int pen = QUAL2(row, col); + score.score_ -= pen; + assert(!sc_->monotone || score.score() >= escore); + } else { + // Reward a match + int64_t bonus = sc_->match(30); + score.score_ += bonus; + assert(!sc_->monotone || score.score() >= escore); + } + if(m == -1) { + score.ns_++; + } + row--; col--; + MOVE_ALL_UPLEFT(); + assert(VALID_AL_SCORE(score)); + break; + } + // Move up. Add an edit encoding the ref gap. + case SW_BT_OALL_REF_OPEN: + { + assert_gt(row, 0); + Edit e( + (int)row, + '-', + "ACGTN"[(int)(*rd_)[row]], + EDIT_TYPE_REF_GAP); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + assert_geq(row, (size_t)sc_->gapbar); + assert_geq((int)(rdf_-rdi_-row-1), sc_->gapbar-1); + row--; + ct = SSEMatrix::H; + int pen = sc_->refGapOpen(); + score.score_ -= pen; + assert(!sc_->monotone || score.score() >= minsc_); + gaps++; refGaps++; + assert_eq(gaps, Edit::numGaps(ned)); + assert_leq(gaps, rdgap_ + rfgap_); + MOVE_ALL_UP(); + break; + } + // Move up. Add an edit encoding the ref gap. + case SW_BT_RFGAP_EXTEND: + { + assert_gt(row, 1); + Edit e( + (int)row, + '-', + "ACGTN"[(int)(*rd_)[row]], + EDIT_TYPE_REF_GAP); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + assert_geq(row, (size_t)sc_->gapbar); + assert_geq((int)(rdf_-rdi_-row-1), sc_->gapbar-1); + row--; + ct = SSEMatrix::F; + int pen = sc_->refGapExtend(); + score.score_ -= pen; + assert(!sc_->monotone || score.score() >= minsc_); + gaps++; refGaps++; + assert_eq(gaps, Edit::numGaps(ned)); + assert_leq(gaps, rdgap_ + rfgap_); + MOVE_ALL_UP(); + break; + } + case SW_BT_OALL_READ_OPEN: + { + assert_gt(col, 0); + Edit e( + (int)row+1, + mask2dna[(int)rf_[rfi_+col]], + '-', + EDIT_TYPE_READ_GAP); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + assert_geq(row, (size_t)sc_->gapbar); + assert_geq((int)(rdf_-rdi_-row-1), sc_->gapbar-1); + col--; + ct = SSEMatrix::H; + int pen = sc_->readGapOpen(); + score.score_ -= pen; + assert(!sc_->monotone || score.score() >= minsc_); + gaps++; readGaps++; + assert_eq(gaps, Edit::numGaps(ned)); + assert_leq(gaps, rdgap_ + rfgap_); + MOVE_ALL_LEFT(); + break; + } + case SW_BT_RDGAP_EXTEND: + { + assert_gt(col, 1); + Edit e( + (int)row+1, + mask2dna[(int)rf_[rfi_+col]], + '-', + EDIT_TYPE_READ_GAP); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + assert_geq(row, (size_t)sc_->gapbar); + assert_geq((int)(rdf_-rdi_-row-1), sc_->gapbar-1); + col--; + ct = SSEMatrix::E; + int pen = sc_->readGapExtend(); + score.score_ -= pen; + assert(!sc_->monotone || score.score() >= minsc_); + gaps++; readGaps++; + assert_eq(gaps, Edit::numGaps(ned)); + assert_leq(gaps, rdgap_ + rfgap_); + MOVE_ALL_LEFT(); + break; + } + default: throw 1; + } + } // while((int)row > 0) + assert_eq(0, trimBeg); + assert_eq(0, trimEnd); + assert_geq(col, 0); + assert_eq(SSEMatrix::H, ct); + // The number of cells in the backtracs should equal the number of read + // bases after trimming plus the number of gaps + assert_eq(btcells_.size(), dpRows() - trimBeg - trimEnd + readGaps); + // Check whether we went through a core diagonal and set 'reported' flag on + // each cell + bool overlappedCoreDiag = false; + for(size_t i = 0; i < btcells_.size(); i++) { + size_t rw = btcells_[i].first; + size_t cl = btcells_[i].second; + // Calculate the diagonal within the *trimmed* rectangle, i.e. the + // rectangle we dealt with in align, gather and backtrack. + int64_t diagi = cl - rw; + // Now adjust to the diagonal within the *untrimmed* rectangle by + // adding on the amount trimmed from the left. + diagi += rect_->triml; + if(diagi >= 0) { + size_t diag = (size_t)diagi; + if(diag >= rect_->corel && diag <= rect_->corer) { + overlappedCoreDiag = true; + break; + } + } +#ifndef NDEBUG + //assert(!d.mat_.reportedThrough(rw, cl)); + //d.mat_.setReportedThrough(rw, cl); + assert(d.mat_.reportedThrough(rw, cl)); +#endif + } + if(!overlappedCoreDiag) { + // Must overlap a core diagonal. Otherwise, we run the risk of + // reporting an alignment that overlaps (and trumps) a higher-scoring + // alignment that lies partially outside the dynamic programming + // rectangle. + res.reset(); + met.corerej++; + return false; + } + int readC = (*rd_)[rdi_+row]; // get last char in read + int refNmask = (int)rf_[rfi_+col]; // get last ref char ref involved in aln + assert_gt(refNmask, 0); + int m = matchesEx(readC, refNmask); + if(m != 1) { + Edit e((int)row, mask2dna[refNmask], "ACGTN"[readC], EDIT_TYPE_MM); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + score.score_ -= QUAL2(row, col); + assert_geq(score.score(), minsc_); + } else { + score.score_ += sc_->match(30); + } + if(m == -1) { + score.ns_++; + } + if(score.ns_ > nceil_) { + // Alignment has too many Ns in it! + res.reset(); + met.nrej++; + return false; + } + res.reverse(); + assert(Edit::repOk(ned, (*rd_))); + assert_eq(score.score(), escore); + assert_leq(gaps, rdgap_ + rfgap_); + off = col; + assert_lt(col + (size_t)rfi_, (size_t)rff_); + score.gaps_ = gaps; + res.alres.setScore(score); + res.alres.setShape( + refidx_, // ref id + off + rfi_ + rect_->refl, // 0-based ref offset + reflen_, // length of entire reference + fw_, // aligned to Watson? + rdf_ - rdi_, // read length + 0, // read ID + true, // pretrim soft? + 0, // pretrim 5' end + 0, // pretrim 3' end + true, // alignment trim soft? + fw_ ? trimBeg : trimEnd, // alignment trim 5' end + fw_ ? trimEnd : trimBeg); // alignment trim 3' end + size_t refns = 0; + for(size_t i = col; i <= origCol; i++) { + if((int)rf_[rfi_+i] > 15) { + refns++; + } + } + res.alres.setRefNs(refns); + assert(Edit::repOk(ned, (*rd_), true, trimBeg, trimEnd)); + assert(res.repOk()); +#ifndef NDEBUG + size_t gapsCheck = 0; + for(size_t i = 0; i < ned.size(); i++) { + if(ned[i].isGap()) gapsCheck++; + } + assert_eq(gaps, gapsCheck); + BTDnaString refstr; + for(size_t i = col; i <= origCol; i++) { + refstr.append(firsts5[(int)rf_[rfi_+i]]); + } + BTDnaString editstr; + // daehwan + // Edit::toRef((*rd_), ned, editstr, true, trimBeg, trimEnd); + Edit::toRef((*rd_), ned, editstr, true, trimBeg + rdi_, trimEnd + (rd_->length() - rdf_)); + if(refstr != editstr) { + cerr << "Decoded nucleotides and edits don't match reference:" << endl; + cerr << " score: " << score.score() + << " (" << gaps << " gaps)" << endl; + cerr << " edits: "; + Edit::print(cerr, ned); + cerr << endl; + cerr << " decoded nucs: " << (*rd_) << endl; + cerr << " edited nucs: " << editstr << endl; + cerr << " reference nucs: " << refstr << endl; + assert(0); + } +#endif + met.btsucc++; // DP backtraces succeeded + return true; +} diff --git a/aligner_swsse_loc_i16.cpp b/aligner_swsse_loc_i16.cpp new file mode 100644 index 0000000..e4e8fac --- /dev/null +++ b/aligner_swsse_loc_i16.cpp @@ -0,0 +1,2272 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +/** + * aligner_sw_sse.cpp + * + * Versions of key alignment functions that use vector instructions to + * accelerate dynamic programming. Based chiefly on the striped Smith-Waterman + * paper and implementation by Michael Farrar. See: + * + * Farrar M. Striped Smith-Waterman speeds database searches six times over + * other SIMD implementations. Bioinformatics. 2007 Jan 15;23(2):156-61. + * http://sites.google.com/site/farrarmichael/smith-waterman + * + * While the paper describes an implementation of Smith-Waterman, we extend it + * do end-to-end read alignment as well as local alignment. The change + * required for this is minor: we simply let vmax be the maximum element in the + * score domain rather than the minimum. + * + * The vectorized dynamic programming implementation lacks some features that + * make it hard to adapt to solving the entire dynamic-programming alignment + * problem. For instance: + * + * - It doesn't respect gap barriers on either end of the read + * - It just gives a maximum; not enough information to backtrace without + * redoing some alignment + * - It's a little difficult to handle st_ and en_, especially st_. + * - The query profile mechanism makes handling of ambiguous reference bases a + * little tricky (16 cols in query profile lookup table instead of 5) + * + * Given the drawbacks, it is tempting to use SSE dynamic programming as a + * filter rather than as an aligner per se. Here are a few ideas for how it + * can be extended to handle more of the alignment problem: + * + * - Save calculated scores to a big array as we go. We return to this array + * to find and backtrace from good solutions. + */ + +#include +#include "aligner_sw.h" + +static const size_t NBYTES_PER_REG = 16; +static const size_t NWORDS_PER_REG = 8; +static const size_t NBITS_PER_WORD = 16; +static const size_t NBYTES_PER_WORD = 2; + +// In 16-bit local mode, we have the option of using signed saturated +// arithmetic. Because we have signed arithmetic, there's no need to +// add/subtract bias when building an applying the query profile. The lowest +// value we can use is 0x8000, greatest is 0x7fff. + +typedef int16_t TCScore; + +/** + * Build query profile look up tables for the read. The query profile look + * up table is organized as a 1D array indexed by [i][j] where i is the + * reference character in the current DP column (0=A, 1=C, etc), and j is + * the segment of the query we're currently working on. + */ +void SwAligner::buildQueryProfileLocalSseI16(bool fw) { + bool& done = fw ? sseI16fwBuilt_ : sseI16rcBuilt_; + if(done) { + return; + } + done = true; + const BTDnaString* rd = fw ? rdfw_ : rdrc_; + const BTString* qu = fw ? qufw_ : qurc_; + const size_t len = rd->length(); + const size_t seglen = (len + (NWORDS_PER_REG-1)) / NWORDS_PER_REG; + // How many __m128i's are needed + size_t n128s = + 64 + // slack bytes, for alignment? + (seglen * ALPHA_SIZE) // query profile data + * 2; // & gap barrier data + assert_gt(n128s, 0); + SSEData& d = fw ? sseI16fw_ : sseI16rc_; + d.profbuf_.resizeNoCopy(n128s); + assert(!d.profbuf_.empty()); + d.maxPen_ = d.maxBonus_ = 0; + d.lastIter_ = d.lastWord_ = 0; + d.qprofStride_ = d.gbarStride_ = 2; + d.bias_ = 0; // no bias when words are signed + // For each reference character A, C, G, T, N ... + for(size_t refc = 0; refc < ALPHA_SIZE; refc++) { + // For each segment ... + for(size_t i = 0; i < seglen; i++) { + size_t j = i; + int16_t *qprofWords = + reinterpret_cast(d.profbuf_.ptr() + (refc * seglen * 2) + (i * 2)); + int16_t *gbarWords = + reinterpret_cast(d.profbuf_.ptr() + (refc * seglen * 2) + (i * 2) + 1); + // For each sub-word (byte) ... + for(size_t k = 0; k < NWORDS_PER_REG; k++) { + int sc = 0; + *gbarWords = 0; + if(j < len) { + int readc = (*rd)[j]; + int readq = (*qu)[j]; + sc = sc_->score(readc, (int)(1 << refc), readq - 33); + size_t j_from_end = len - j - 1; + if(j < (size_t)sc_->gapbar || + j_from_end < (size_t)sc_->gapbar) + { + // Inside the gap barrier + *gbarWords = 0x8000; // add this twice + } + } + if(refc == 0 && j == len-1) { + // Remember which 128-bit word and which smaller word has + // the final row + d.lastIter_ = i; + d.lastWord_ = k; + } + if(sc < 0) { + if((size_t)(-sc) > d.maxPen_) { + d.maxPen_ = (size_t)(-sc); + } + } else { + if((size_t)sc > d.maxBonus_) { + d.maxBonus_ = (size_t)sc; + } + } + *qprofWords = (int16_t)sc; + gbarWords++; + qprofWords++; + j += seglen; // update offset into query + } + } + } +} + +#ifndef NDEBUG +/** + * Return true iff the cell has sane E/F/H values w/r/t its predecessors. + */ +static bool cellOkLocalI16( + SSEData& d, + size_t row, + size_t col, + int refc, + int readc, + int readq, + const Scoring& sc) // scoring scheme +{ + TCScore floorsc = MIN_I16; + TCScore ceilsc = MIN_I16-1; + TAlScore offsetsc = 0x8000; + TAlScore sc_h_cur = (TAlScore)d.mat_.helt(row, col); + TAlScore sc_e_cur = (TAlScore)d.mat_.eelt(row, col); + TAlScore sc_f_cur = (TAlScore)d.mat_.felt(row, col); + if(sc_h_cur > floorsc) { + sc_h_cur += offsetsc; + } + if(sc_e_cur > floorsc) { + sc_e_cur += offsetsc; + } + if(sc_f_cur > floorsc) { + sc_f_cur += offsetsc; + } + bool gapsAllowed = true; + size_t rowFromEnd = d.mat_.nrow() - row - 1; + if(row < (size_t)sc.gapbar || rowFromEnd < (size_t)sc.gapbar) { + gapsAllowed = false; + } + bool e_left_trans = false, h_left_trans = false; + bool f_up_trans = false, h_up_trans = false; + bool h_diag_trans = false; + if(gapsAllowed) { + TAlScore sc_h_left = floorsc; + TAlScore sc_e_left = floorsc; + TAlScore sc_h_up = floorsc; + TAlScore sc_f_up = floorsc; + if(col > 0 && sc_e_cur > floorsc && sc_e_cur <= ceilsc) { + sc_h_left = d.mat_.helt(row, col-1) + offsetsc; + sc_e_left = d.mat_.eelt(row, col-1) + offsetsc; + e_left_trans = (sc_e_left > floorsc && sc_e_cur == sc_e_left - sc.readGapExtend()); + h_left_trans = (sc_h_left > floorsc && sc_e_cur == sc_h_left - sc.readGapOpen()); + assert(e_left_trans || h_left_trans); + } + if(row > 0 && sc_f_cur > floorsc && sc_f_cur <= ceilsc) { + sc_h_up = d.mat_.helt(row-1, col) + offsetsc; + sc_f_up = d.mat_.felt(row-1, col) + offsetsc; + f_up_trans = (sc_f_up > floorsc && sc_f_cur == sc_f_up - sc.refGapExtend()); + h_up_trans = (sc_h_up > floorsc && sc_f_cur == sc_h_up - sc.refGapOpen()); + assert(f_up_trans || h_up_trans); + } + } else { + assert_geq(floorsc, sc_e_cur); + assert_geq(floorsc, sc_f_cur); + } + if(col > 0 && row > 0 && sc_h_cur > floorsc && sc_h_cur <= ceilsc) { + TAlScore sc_h_upleft = d.mat_.helt(row-1, col-1) + offsetsc; + TAlScore sc_diag = sc.score(readc, (int)refc, readq - 33); + h_diag_trans = sc_h_cur == sc_h_upleft + sc_diag; + } + assert( + sc_h_cur <= floorsc || + e_left_trans || + h_left_trans || + f_up_trans || + h_up_trans || + h_diag_trans || + sc_h_cur > ceilsc || + row == 0 || + col == 0); + return true; +} +#endif /*ndef NDEBUG*/ + +#ifdef NDEBUG + +#define assert_all_eq0(x) +#define assert_all_gt(x, y) +#define assert_all_gt_lo(x) +#define assert_all_lt(x, y) +#define assert_all_lt_hi(x) + +#else + +#define assert_all_eq0(x) { \ + __m128i z = _mm_setzero_si128(); \ + __m128i tmp = _mm_setzero_si128(); \ + z = _mm_xor_si128(z, z); \ + tmp = _mm_cmpeq_epi16(x, z); \ + assert_eq(0xffff, _mm_movemask_epi8(tmp)); \ +} + +#define assert_all_gt(x, y) { \ + __m128i tmp = _mm_cmpgt_epi16(x, y); \ + assert_eq(0xffff, _mm_movemask_epi8(tmp)); \ +} + +#define assert_all_gt_lo(x) { \ + __m128i z = _mm_setzero_si128(); \ + __m128i tmp = _mm_setzero_si128(); \ + z = _mm_xor_si128(z, z); \ + tmp = _mm_cmpgt_epi16(x, z); \ + assert_eq(0xffff, _mm_movemask_epi8(tmp)); \ +} + +#define assert_all_lt(x, y) { \ + __m128i tmp = _mm_cmplt_epi16(x, y); \ + assert_eq(0xffff, _mm_movemask_epi8(tmp)); \ +} + +#define assert_all_leq(x, y) { \ + __m128i tmp = _mm_cmpgt_epi16(x, y); \ + assert_eq(0x0000, _mm_movemask_epi8(tmp)); \ +} + +#define assert_all_lt_hi(x) { \ + __m128i z = _mm_setzero_si128(); \ + __m128i tmp = _mm_setzero_si128(); \ + z = _mm_cmpeq_epi16(z, z); \ + z = _mm_srli_epi16(z, 1); \ + tmp = _mm_cmplt_epi16(x, z); \ + assert_eq(0xffff, _mm_movemask_epi8(tmp)); \ +} +#endif + +/** + * Aligns by filling a dynamic programming matrix with the SSE-accelerated, + * banded DP approach of Farrar. As it goes, it determines which cells we + * might backtrace from and tallies the best (highest-scoring) N backtrace + * candidate cells per diagonal. Also returns the alignment score of the best + * alignment in the matrix. + * + * This routine does *not* maintain a matrix holding the entire matrix worth of + * scores, nor does it maintain any other dense O(mn) data structure, as this + * would quickly exhaust memory for queries longer than about 10,000 kb. + * Instead, in the fill stage it maintains two columns worth of scores at a + * time (current/previous, or right/left) - these take O(m) space. When + * finished with the current column, it determines which cells from the + * previous column, if any, are candidates we might backtrace from to find a + * full alignment. A candidate cell has a score that rises above the threshold + * and isn't improved upon by a match in the next column. The best N + * candidates per diagonal are stored in a O(m + n) data structure. + */ +TAlScore SwAligner::alignGatherLoc16(int& flag, bool debug) { + assert_leq(rdf_, rd_->length()); + assert_leq(rdf_, qu_->length()); + assert_lt(rfi_, rff_); + assert_lt(rdi_, rdf_); + assert_eq(rd_->length(), qu_->length()); + assert_geq(sc_->gapbar, 1); + assert_gt(minsc_, 0); + assert_leq(minsc_, MAX_I16); + assert(repOk()); +#ifndef NDEBUG + for(size_t i = (size_t)rfi_; i < (size_t)rff_; i++) { + assert_range(0, 16, (int)rf_[i]); + } +#endif + + SSEData& d = fw_ ? sseI16fw_ : sseI16rc_; + SSEMetrics& met = extend_ ? sseI16ExtendMet_ : sseI16MateMet_; + if(!debug) met.dp++; + buildQueryProfileLocalSseI16(fw_); + assert(!d.profbuf_.empty()); + + assert_gt(d.maxBonus_, 0); + size_t iter = + (dpRows() + (NWORDS_PER_REG-1)) / NWORDS_PER_REG; // iter = segLen + + // Now set up the score vectors. We just need two columns worth, which + // we'll call "left" and "right". + d.vecbuf_.resize(ROWSTRIDE_2COL * iter * 2); + d.vecbuf_.zero(); + __m128i *vbuf_l = d.vecbuf_.ptr(); + __m128i *vbuf_r = d.vecbuf_.ptr() + (ROWSTRIDE_2COL * iter); + + // This is the data structure that holds candidate cells per diagonal. + const size_t ndiags = rff_ - rfi_ + dpRows() - 1; + if(!debug) { + btdiag_.init(ndiags, 2); + } + + // Data structure that holds checkpointed anti-diagonals + TAlScore perfectScore = sc_->perfectScore(dpRows()); + bool checkpoint = true; + bool cpdebug = false; +#ifndef NDEBUG + cpdebug = dpRows() < 1000; +#endif + cper_.init( + dpRows(), // # rows + rff_ - rfi_, // # columns + cperPerPow2_, // checkpoint every 1 << perpow2 diags (& next) + perfectScore, // perfect score (for sanity checks) + false, // matrix cells have 8-bit scores? + cperTri_, // triangular mini-fills? + true, // alignment is local? + cpdebug); // save all cells for debugging? + + // Many thanks to Michael Farrar for releasing his striped Smith-Waterman + // implementation: + // + // http://sites.google.com/site/farrarmichael/smith-waterman + // + // Much of the implmentation below is adapted from Michael's code. + + // Set all elts to reference gap open penalty + __m128i rfgapo = _mm_setzero_si128(); + __m128i rfgape = _mm_setzero_si128(); + __m128i rdgapo = _mm_setzero_si128(); + __m128i rdgape = _mm_setzero_si128(); + __m128i vlo = _mm_setzero_si128(); + __m128i vhi = _mm_setzero_si128(); + __m128i vlolsw = _mm_setzero_si128(); + __m128i vmax = _mm_setzero_si128(); + __m128i vcolmax = _mm_setzero_si128(); + __m128i vmaxtmp = _mm_setzero_si128(); + __m128i ve = _mm_setzero_si128(); + __m128i vf = _mm_setzero_si128(); + __m128i vh = _mm_setzero_si128(); + __m128i vhd = _mm_setzero_si128(); + __m128i vhdtmp = _mm_setzero_si128(); + __m128i vtmp = _mm_setzero_si128(); + __m128i vzero = _mm_setzero_si128(); + __m128i vminsc = _mm_setzero_si128(); + + assert_gt(sc_->refGapOpen(), 0); + assert_leq(sc_->refGapOpen(), MAX_I16); + rfgapo = _mm_insert_epi16(rfgapo, sc_->refGapOpen(), 0); + rfgapo = _mm_shufflelo_epi16(rfgapo, 0); + rfgapo = _mm_shuffle_epi32(rfgapo, 0); + + // Set all elts to reference gap extension penalty + assert_gt(sc_->refGapExtend(), 0); + assert_leq(sc_->refGapExtend(), MAX_I16); + assert_leq(sc_->refGapExtend(), sc_->refGapOpen()); + rfgape = _mm_insert_epi16(rfgape, sc_->refGapExtend(), 0); + rfgape = _mm_shufflelo_epi16(rfgape, 0); + rfgape = _mm_shuffle_epi32(rfgape, 0); + + // Set all elts to read gap open penalty + assert_gt(sc_->readGapOpen(), 0); + assert_leq(sc_->readGapOpen(), MAX_I16); + rdgapo = _mm_insert_epi16(rdgapo, sc_->readGapOpen(), 0); + rdgapo = _mm_shufflelo_epi16(rdgapo, 0); + rdgapo = _mm_shuffle_epi32(rdgapo, 0); + + // Set all elts to read gap extension penalty + assert_gt(sc_->readGapExtend(), 0); + assert_leq(sc_->readGapExtend(), MAX_I16); + assert_leq(sc_->readGapExtend(), sc_->readGapOpen()); + rdgape = _mm_insert_epi16(rdgape, sc_->readGapExtend(), 0); + rdgape = _mm_shufflelo_epi16(rdgape, 0); + rdgape = _mm_shuffle_epi32(rdgape, 0); + + // Set all elts to minimum score threshold. Actually, to 1 less than the + // threshold so we can use gt instead of geq. + vminsc = _mm_insert_epi16(vminsc, (int)minsc_-1, 0); + vminsc = _mm_shufflelo_epi16(vminsc, 0); + vminsc = _mm_shuffle_epi32(vminsc, 0); + + // Set all elts to 0x8000 (min value for signed 16-bit) + vlo = _mm_cmpeq_epi16(vlo, vlo); // all elts = 0xffff + vlo = _mm_slli_epi16(vlo, NBITS_PER_WORD-1); // all elts = 0x8000 + + // Set all elts to 0x7fff (max value for signed 16-bit) + vhi = _mm_cmpeq_epi16(vhi, vhi); // all elts = 0xffff + vhi = _mm_srli_epi16(vhi, 1); // all elts = 0x7fff + + // Set all elts to 0x8000 (min value for signed 16-bit) + vmax = vlo; + + // vlolsw: topmost (least sig) word set to 0x8000, all other words=0 + vlolsw = _mm_shuffle_epi32(vlo, 0); + vlolsw = _mm_srli_si128(vlolsw, NBYTES_PER_REG - NBYTES_PER_WORD); + + // Points to a long vector of __m128i where each element is a block of + // contiguous cells in the E, F or H matrix. If the index % 3 == 0, then + // the block of cells is from the E matrix. If index % 3 == 1, they're + // from the F matrix. If index % 3 == 2, then they're from the H matrix. + // Blocks of cells are organized in the same interleaved manner as they are + // calculated by the Farrar algorithm. + const __m128i *pvScore; // points into the query profile + + const size_t colstride = ROWSTRIDE_2COL * iter; + + // Initialize the H and E vectors in the first matrix column + __m128i *pvELeft = vbuf_l + 0; __m128i *pvERight = vbuf_r + 0; + //__m128i *pvFLeft = vbuf_l + 1; + __m128i *pvFRight = vbuf_r + 1; + __m128i *pvHLeft = vbuf_l + 2; __m128i *pvHRight = vbuf_r + 2; + + for(size_t i = 0; i < iter; i++) { + // start low in local mode + _mm_store_si128(pvERight, vlo); pvERight += ROWSTRIDE_2COL; + _mm_store_si128(pvHRight, vlo); pvHRight += ROWSTRIDE_2COL; + // Note: right and left are going to be swapped as soon as we enter + // the outer loop below + } + + assert_gt(sc_->gapbar, 0); + size_t nfixup = 0; + TAlScore matchsc = sc_->match(30); + TAlScore leftmax = MIN_I64; + + // Fill in the table as usual but instead of using the same gap-penalty + // vector for each iteration of the inner loop, load words out of a + // pre-calculated gap vector parallel to the query profile. The pre- + // calculated gap vectors enforce the gap barrier constraint by making it + // infinitely costly to introduce a gap in barrier rows. + // + // AND use a separate loop to fill in the first row of the table, enforcing + // the st_ constraints in the process. This is awkward because it + // separates the processing of the first row from the others and might make + // it difficult to use the first-row results in the next row, but it might + // be the simplest and least disruptive way to deal with the st_ constraint. + + size_t off = MAX_SIZE_T, lastoff; + bool bailed = false; + for(size_t i = (size_t)rfi_; i < (size_t)rff_; i++) { + // Swap left and right; vbuf_l is the vector on the left, which we + // generally load from, and vbuf_r is the vector on the right, which we + // generally store to. + swap(vbuf_l, vbuf_r); + pvELeft = vbuf_l + 0; pvERight = vbuf_r + 0; + /* pvFLeft = vbuf_l + 1; */ pvFRight = vbuf_r + 1; + pvHLeft = vbuf_l + 2; pvHRight = vbuf_r + 2; + + // Fetch this column's reference mask + const int refm = (int)rf_[i]; + + // Fetch the appropriate query profile + lastoff = off; + off = (size_t)firsts5[refm] * iter * 2; + pvScore = d.profbuf_.ptr() + off; // even elts = query profile, odd = gap barrier + + // Load H vector from the final row of the previous column. + // ??? perhaps we should calculate the next iter's F instead of the + // current iter's? The way we currently do it, seems like it will + // almost always require at least one fixup loop iter (to recalculate + // this topmost F). + vh = _mm_load_si128(pvHLeft + colstride - ROWSTRIDE_2COL); + + // Set all F cells to low value + vf = _mm_cmpeq_epi16(vf, vf); + vf = _mm_slli_epi16(vf, NBITS_PER_WORD-1); + vf = _mm_or_si128(vf, vlolsw); + // vf now contains the vertical contribution + + // Store cells in F, calculated previously + // No need to veto ref gap extensions, they're all 0x8000s + _mm_store_si128(pvFRight, vf); + pvFRight += ROWSTRIDE_2COL; + + // Shift down so that topmost (least sig) cell gets 0 + vh = _mm_slli_si128(vh, NBYTES_PER_WORD); + // Fill topmost (least sig) cell with low value + vh = _mm_or_si128(vh, vlolsw); + + // We pull out one loop iteration to make it easier to veto values in the top row + + // Load cells from E, calculated previously + ve = _mm_load_si128(pvELeft); + vhd = _mm_load_si128(pvHLeft); + assert_all_lt(ve, vhi); + pvELeft += ROWSTRIDE_2COL; + // ve now contains the horizontal contribution + + // Factor in query profile (matches and mismatches) + vh = _mm_adds_epi16(vh, pvScore[0]); + // vh now contains the diagonal contribution + + // Update vE value + vhdtmp = vhd; + vhd = _mm_subs_epi16(vhd, rdgapo); + vhd = _mm_adds_epi16(vhd, pvScore[1]); // veto some read gap opens + vhd = _mm_adds_epi16(vhd, pvScore[1]); // veto some read gap opens + ve = _mm_subs_epi16(ve, rdgape); + ve = _mm_max_epi16(ve, vhd); + + // Update H, factoring in E and F + vh = _mm_max_epi16(vh, ve); + // F won't change anything! + + vf = vh; + + // Update highest score so far + vcolmax = vh; + + // Save the new vH values + _mm_store_si128(pvHRight, vh); + + assert_all_lt(ve, vhi); + + vh = vhdtmp; + + assert_all_lt(ve, vhi); + pvHRight += ROWSTRIDE_2COL; + pvHLeft += ROWSTRIDE_2COL; + + // Save E values + _mm_store_si128(pvERight, ve); + pvERight += ROWSTRIDE_2COL; + + // Update vf value + vf = _mm_subs_epi16(vf, rfgapo); + assert_all_lt(vf, vhi); + + pvScore += 2; // move on to next query profile + + // For each character in the reference text: + size_t j; + for(j = 1; j < iter; j++) { + // Load cells from E, calculated previously + ve = _mm_load_si128(pvELeft); + vhd = _mm_load_si128(pvHLeft); + assert_all_lt(ve, vhi); + pvELeft += ROWSTRIDE_2COL; + + // Store cells in F, calculated previously + vf = _mm_adds_epi16(vf, pvScore[1]); // veto some ref gap extensions + vf = _mm_adds_epi16(vf, pvScore[1]); // veto some ref gap extensions + _mm_store_si128(pvFRight, vf); + pvFRight += ROWSTRIDE_2COL; + + // Factor in query profile (matches and mismatches) + vh = _mm_adds_epi16(vh, pvScore[0]); + vh = _mm_max_epi16(vh, vf); + + // Update vE value + vhdtmp = vhd; + vhd = _mm_subs_epi16(vhd, rdgapo); + vhd = _mm_adds_epi16(vhd, pvScore[1]); // veto some read gap opens + vhd = _mm_adds_epi16(vhd, pvScore[1]); // veto some read gap opens + ve = _mm_subs_epi16(ve, rdgape); + ve = _mm_max_epi16(ve, vhd); + + vh = _mm_max_epi16(vh, ve); + vtmp = vh; + + // Update highest score encountered this far + vcolmax = _mm_max_epi16(vcolmax, vh); + + // Save the new vH values + _mm_store_si128(pvHRight, vh); + + vh = vhdtmp; + + assert_all_lt(ve, vhi); + pvHRight += ROWSTRIDE_2COL; + pvHLeft += ROWSTRIDE_2COL; + + // Save E values + _mm_store_si128(pvERight, ve); + pvERight += ROWSTRIDE_2COL; + + // Update vf value + vtmp = _mm_subs_epi16(vtmp, rfgapo); + vf = _mm_subs_epi16(vf, rfgape); + assert_all_lt(vf, vhi); + vf = _mm_max_epi16(vf, vtmp); + + pvScore += 2; // move on to next query profile / gap veto + } + // pvHStore, pvELoad, pvEStore have all rolled over to the next column + pvFRight -= colstride; // reset to start of column + vtmp = _mm_load_si128(pvFRight); + + pvHRight -= colstride; // reset to start of column + vh = _mm_load_si128(pvHRight); + + pvScore = d.profbuf_.ptr() + off + 1; // reset veto vector + + // vf from last row gets shifted down by one to overlay the first row + // rfgape has already been subtracted from it. + vf = _mm_slli_si128(vf, NBYTES_PER_WORD); + vf = _mm_or_si128(vf, vlolsw); + + vf = _mm_adds_epi16(vf, *pvScore); // veto some ref gap extensions + vf = _mm_adds_epi16(vf, *pvScore); // veto some ref gap extensions + vf = _mm_max_epi16(vtmp, vf); + vtmp = _mm_cmpgt_epi16(vf, vtmp); + int cmp = _mm_movemask_epi8(vtmp); + + // If any element of vtmp is greater than H - gap-open... + j = 0; + while(cmp != 0x0000) { + // Store this vf + _mm_store_si128(pvFRight, vf); + pvFRight += ROWSTRIDE_2COL; + + // Update vh w/r/t new vf + vh = _mm_max_epi16(vh, vf); + + // Save vH values + _mm_store_si128(pvHRight, vh); + pvHRight += ROWSTRIDE_2COL; + + // Update highest score encountered so far. + vcolmax = _mm_max_epi16(vcolmax, vh); + + pvScore += 2; + + assert_lt(j, iter); + if(++j == iter) { + pvFRight -= colstride; + vtmp = _mm_load_si128(pvFRight); // load next vf ASAP + pvHRight -= colstride; + vh = _mm_load_si128(pvHRight); // load next vh ASAP + pvScore = d.profbuf_.ptr() + off + 1; + j = 0; + vf = _mm_slli_si128(vf, NBYTES_PER_WORD); + vf = _mm_or_si128(vf, vlolsw); + } else { + vtmp = _mm_load_si128(pvFRight); // load next vf ASAP + vh = _mm_load_si128(pvHRight); // load next vh ASAP + } + + // Update F with another gap extension + vf = _mm_subs_epi16(vf, rfgape); + vf = _mm_adds_epi16(vf, *pvScore); // veto some ref gap extensions + vf = _mm_adds_epi16(vf, *pvScore); // veto some ref gap extensions + vf = _mm_max_epi16(vtmp, vf); + vtmp = _mm_cmpgt_epi16(vf, vtmp); + cmp = _mm_movemask_epi8(vtmp); + nfixup++; + } + + // Now we'd like to know exactly which cells in the left column are + // candidates we might backtrace from. First question is: did *any* + // elements in the column exceed the minimum score threshold? + if(!debug && leftmax >= minsc_) { + // Yes. Next question is: which cells are candidates? We have to + // allow matches in the right column to override matches above and + // to the left in the left column. + assert_gt(i - rfi_, 0); + pvHLeft = vbuf_l + 2; + assert_lt(lastoff, MAX_SIZE_T); + pvScore = d.profbuf_.ptr() + lastoff; // even elts = query profile, odd = gap barrier + for(size_t k = 0; k < iter; k++) { + vh = _mm_load_si128(pvHLeft); + vtmp = _mm_cmpgt_epi16(pvScore[0], vzero); + int cmp = _mm_movemask_epi8(vtmp); + if(cmp != 0) { + // At least one candidate in this mask. Now iterate + // through vm/vh to evaluate individual cells. + for(size_t m = 0; m < NWORDS_PER_REG; m++) { + size_t row = k + m * iter; + if(row >= dpRows()) { + break; + } + TAlScore sc = (TAlScore)(((TCScore *)&vh)[m] + 0x8000); + if(sc >= minsc_) { + if(((TCScore *)&vtmp)[m] != 0) { + // Add to data structure holding all candidates + size_t col = i - rfi_ - 1; // -1 b/c prev col + size_t frombot = dpRows() - row - 1; + DpBtCandidate cand(row, col, sc); + btdiag_.add(frombot + col, cand); + } + } + } + } + pvHLeft += ROWSTRIDE_2COL; + pvScore += 2; + } + } + + // Save some elements to checkpoints + if(checkpoint) { + + __m128i *pvE = vbuf_r + 0; + __m128i *pvF = vbuf_r + 1; + __m128i *pvH = vbuf_r + 2; + size_t coli = i - rfi_; + if(coli < cper_.locol_) cper_.locol_ = coli; + if(coli > cper_.hicol_) cper_.hicol_ = coli; + + if(cperTri_) { + size_t rc_mod = coli & cper_.lomask_; + assert_lt(rc_mod, cper_.per_); + int64_t row = -(int64_t)rc_mod-1; + int64_t row_mod = row; + int64_t row_div = 0; + size_t idx = coli >> cper_.perpow2_; + size_t idxrow = idx * cper_.nrow_; + assert_eq(4, ROWSTRIDE_2COL); + bool done = false; + while(true) { + row += (cper_.per_ - 2); + row_mod += (cper_.per_ - 2); + for(size_t j = 0; j < 2; j++) { + row++; + row_mod++; + if(row >= 0 && (size_t)row < cper_.nrow_) { + // Update row divided by iter_ and mod iter_ + while(row_mod >= (int64_t)iter) { + row_mod -= (int64_t)iter; + row_div++; + } + size_t delt = idxrow + row; + size_t vecoff = (row_mod << 5) + row_div; + assert_lt(row_div, 8); + int16_t h_sc = ((int16_t*)pvH)[vecoff]; + int16_t e_sc = ((int16_t*)pvE)[vecoff]; + int16_t f_sc = ((int16_t*)pvF)[vecoff]; + h_sc += 0x8000; assert_geq(h_sc, 0); + e_sc += 0x8000; assert_geq(e_sc, 0); + f_sc += 0x8000; assert_geq(f_sc, 0); + assert_leq(h_sc, cper_.perf_); + assert_leq(e_sc, cper_.perf_); + assert_leq(f_sc, cper_.perf_); + CpQuad *qdiags = ((j == 0) ? cper_.qdiag1s_.ptr() : cper_.qdiag2s_.ptr()); + qdiags[delt].sc[0] = h_sc; + qdiags[delt].sc[1] = e_sc; + qdiags[delt].sc[2] = f_sc; + } // if(row >= 0 && row < nrow_) + else if(row >= 0 && (size_t)row >= cper_.nrow_) { + done = true; + break; + } + } // end of loop over anti-diags + if(done) { + break; + } + idx++; + idxrow += cper_.nrow_; + } + } else { + // If this is the first column, take this opportunity to + // pre-calculate the coordinates of the elements we're going to + // checkpoint. + if(coli == 0) { + size_t cpi = cper_.per_-1; + size_t cpimod = cper_.per_-1; + size_t cpidiv = 0; + cper_.commitMap_.clear(); + while(cpi < cper_.nrow_) { + while(cpimod >= iter) { + cpimod -= iter; + cpidiv++; + } + size_t vecoff = (cpimod << 5) + cpidiv; + cper_.commitMap_.push_back(vecoff); + cpi += cper_.per_; + cpimod += cper_.per_; + } + } + // Save all the rows + size_t rowoff = 0; + size_t sz = cper_.commitMap_.size(); + for(size_t i = 0; i < sz; i++, rowoff += cper_.ncol_) { + size_t vecoff = cper_.commitMap_[i]; + int16_t h_sc = ((int16_t*)pvH)[vecoff]; + //int16_t e_sc = ((int16_t*)pvE)[vecoff]; + int16_t f_sc = ((int16_t*)pvF)[vecoff]; + h_sc += 0x8000; assert_geq(h_sc, 0); + //e_sc += 0x8000; assert_geq(e_sc, 0); + f_sc += 0x8000; assert_geq(f_sc, 0); + assert_leq(h_sc, cper_.perf_); + //assert_leq(e_sc, cper_.perf_); + assert_leq(f_sc, cper_.perf_); + CpQuad& dst = cper_.qrows_[rowoff + coli]; + dst.sc[0] = h_sc; + //dst.sc[1] = e_sc; + dst.sc[2] = f_sc; + } + // Is this a column we'd like to checkpoint? + if((coli & cper_.lomask_) == cper_.lomask_) { + // Save the column using memcpys + assert_gt(coli, 0); + size_t wordspercol = cper_.niter_ * ROWSTRIDE_2COL; + size_t coloff = (coli >> cper_.perpow2_) * wordspercol; + __m128i *dst = cper_.qcols_.ptr() + coloff; + memcpy(dst, vbuf_r, sizeof(__m128i) * wordspercol); + } + } + if(cper_.debug_) { + // Save the column using memcpys + size_t wordspercol = cper_.niter_ * ROWSTRIDE_2COL; + size_t coloff = coli * wordspercol; + __m128i *dst = cper_.qcolsD_.ptr() + coloff; + memcpy(dst, vbuf_r, sizeof(__m128i) * wordspercol); + } + } + + vmax = _mm_max_epi16(vmax, vcolmax); + { + // Get single largest score in this column + vmaxtmp = vcolmax; + vtmp = _mm_srli_si128(vmaxtmp, 8); + vmaxtmp = _mm_max_epi16(vmaxtmp, vtmp); + vtmp = _mm_srli_si128(vmaxtmp, 4); + vmaxtmp = _mm_max_epi16(vmaxtmp, vtmp); + vtmp = _mm_srli_si128(vmaxtmp, 2); + vmaxtmp = _mm_max_epi16(vmaxtmp, vtmp); + int16_t ret = _mm_extract_epi16(vmaxtmp, 0); + TAlScore score = (TAlScore)(ret + 0x8000); + if(ret == MIN_I16) { + score = MIN_I64; + } + + if(score < minsc_) { + size_t ncolleft = rff_ - i - 1; + if(max(score, 0) + (TAlScore)ncolleft * matchsc < minsc_) { + // Bail! There can't possibly be a valid alignment that + // passes through this column. + bailed = true; + break; + } + } + + leftmax = score; + } + } + + lastoff = off; + + // Now we'd like to know exactly which cells in the *rightmost* column are + // candidates we might backtrace from. Did *any* elements exceed the + // minimum score threshold? + if(!debug && !bailed && leftmax >= minsc_) { + // Yes. Next question is: which cells are candidates? We have to + // allow matches in the right column to override matches above and + // to the left in the left column. + pvHLeft = vbuf_r + 2; + assert_lt(lastoff, MAX_SIZE_T); + pvScore = d.profbuf_.ptr() + lastoff; // even elts = query profile, odd = gap barrier + for(size_t k = 0; k < iter; k++) { + vh = _mm_load_si128(pvHLeft); + vtmp = _mm_cmpgt_epi16(pvScore[0], vzero); + int cmp = _mm_movemask_epi8(vtmp); + if(cmp != 0) { + // At least one candidate in this mask. Now iterate + // through vm/vh to evaluate individual cells. + for(size_t m = 0; m < NWORDS_PER_REG; m++) { + size_t row = k + m * iter; + if(row >= dpRows()) { + break; + } + TAlScore sc = (TAlScore)(((TCScore *)&vh)[m] + 0x8000); + if(sc >= minsc_) { + if(((TCScore *)&vtmp)[m] != 0) { + // Add to data structure holding all candidates + size_t col = rff_ - rfi_ - 1; // -1 b/c prev col + size_t frombot = dpRows() - row - 1; + DpBtCandidate cand(row, col, sc); + btdiag_.add(frombot + col, cand); + } + } + } + } + pvHLeft += ROWSTRIDE_2COL; + pvScore += 2; + } + } + + // Find largest score in vmax + vtmp = _mm_srli_si128(vmax, 8); + vmax = _mm_max_epi16(vmax, vtmp); + vtmp = _mm_srli_si128(vmax, 4); + vmax = _mm_max_epi16(vmax, vtmp); + vtmp = _mm_srli_si128(vmax, 2); + vmax = _mm_max_epi16(vmax, vtmp); + int16_t ret = _mm_extract_epi16(vmax, 0); + + // Update metrics + if(!debug) { + size_t ninner = (rff_ - rfi_) * iter; + met.col += (rff_ - rfi_); // DP columns + met.cell += (ninner * NWORDS_PER_REG); // DP cells + met.inner += ninner; // DP inner loop iters + met.fixup += nfixup; // DP fixup loop iters + } + + flag = 0; + + // Did we find a solution? + TAlScore score = MIN_I64; + if(ret == MIN_I16) { + flag = -1; // no + if(!debug) met.dpfail++; + return MIN_I64; + } else { + score = (TAlScore)(ret + 0x8000); + if(score < minsc_) { + flag = -1; // no + if(!debug) met.dpfail++; + return score; + } + } + + // Could we have saturated? + if(ret == MAX_I16) { + flag = -2; // yes + if(!debug) met.dpsat++; + return MIN_I64; + } + + // Now take all the backtrace candidates in the btdaig_ structure and + // dump them into the btncand_ array. They'll be sorted later. + if(!debug) { + btdiag_.dump(btncand_); + assert(!btncand_.empty()); + } + + // Return largest score + if(!debug) met.dpsucc++; + return score; +} + +/** + * Solve the current alignment problem using SSE instructions that operate on 8 + * signed 16-bit values packed into a single 128-bit register. + */ +TAlScore SwAligner::alignNucleotidesLocalSseI16(int& flag, bool debug) { + assert_leq(rdf_, rd_->length()); + assert_leq(rdf_, qu_->length()); + assert_lt(rfi_, rff_); + assert_lt(rdi_, rdf_); + assert_eq(rd_->length(), qu_->length()); + assert_geq(sc_->gapbar, 1); + assert(repOk()); +#ifndef NDEBUG + for(size_t i = (size_t)rfi_; i < (size_t)rff_; i++) { + assert_range(0, 16, (int)rf_[i]); + } +#endif + + SSEData& d = fw_ ? sseI16fw_ : sseI16rc_; + SSEMetrics& met = extend_ ? sseI16ExtendMet_ : sseI16MateMet_; + if(!debug) met.dp++; + buildQueryProfileLocalSseI16(fw_); + assert(!d.profbuf_.empty()); + + assert_gt(d.maxBonus_, 0); + size_t iter = + (dpRows() + (NWORDS_PER_REG-1)) / NWORDS_PER_REG; // iter = segLen + + // Many thanks to Michael Farrar for releasing his striped Smith-Waterman + // implementation: + // + // http://sites.google.com/site/farrarmichael/smith-waterman + // + // Much of the implmentation below is adapted from Michael's code. + + // Set all elts to reference gap open penalty + __m128i rfgapo = _mm_setzero_si128(); + __m128i rfgape = _mm_setzero_si128(); + __m128i rdgapo = _mm_setzero_si128(); + __m128i rdgape = _mm_setzero_si128(); + __m128i vlo = _mm_setzero_si128(); + __m128i vhi = _mm_setzero_si128(); + __m128i vlolsw = _mm_setzero_si128(); + __m128i vmax = _mm_setzero_si128(); + __m128i vcolmax = _mm_setzero_si128(); + __m128i vmaxtmp = _mm_setzero_si128(); + __m128i ve = _mm_setzero_si128(); + __m128i vf = _mm_setzero_si128(); + __m128i vh = _mm_setzero_si128(); + __m128i vtmp = _mm_setzero_si128(); + + assert_gt(sc_->refGapOpen(), 0); + assert_leq(sc_->refGapOpen(), MAX_I16); + rfgapo = _mm_insert_epi16(rfgapo, sc_->refGapOpen(), 0); + rfgapo = _mm_shufflelo_epi16(rfgapo, 0); + rfgapo = _mm_shuffle_epi32(rfgapo, 0); + + // Set all elts to reference gap extension penalty + assert_gt(sc_->refGapExtend(), 0); + assert_leq(sc_->refGapExtend(), MAX_I16); + assert_leq(sc_->refGapExtend(), sc_->refGapOpen()); + rfgape = _mm_insert_epi16(rfgape, sc_->refGapExtend(), 0); + rfgape = _mm_shufflelo_epi16(rfgape, 0); + rfgape = _mm_shuffle_epi32(rfgape, 0); + + // Set all elts to read gap open penalty + assert_gt(sc_->readGapOpen(), 0); + assert_leq(sc_->readGapOpen(), MAX_I16); + rdgapo = _mm_insert_epi16(rdgapo, sc_->readGapOpen(), 0); + rdgapo = _mm_shufflelo_epi16(rdgapo, 0); + rdgapo = _mm_shuffle_epi32(rdgapo, 0); + + // Set all elts to read gap extension penalty + assert_gt(sc_->readGapExtend(), 0); + assert_leq(sc_->readGapExtend(), MAX_I16); + assert_leq(sc_->readGapExtend(), sc_->readGapOpen()); + rdgape = _mm_insert_epi16(rdgape, sc_->readGapExtend(), 0); + rdgape = _mm_shufflelo_epi16(rdgape, 0); + rdgape = _mm_shuffle_epi32(rdgape, 0); + + // Set all elts to 0x8000 (min value for signed 16-bit) + vlo = _mm_cmpeq_epi16(vlo, vlo); // all elts = 0xffff + vlo = _mm_slli_epi16(vlo, NBITS_PER_WORD-1); // all elts = 0x8000 + + // Set all elts to 0x7fff (max value for signed 16-bit) + vhi = _mm_cmpeq_epi16(vhi, vhi); // all elts = 0xffff + vhi = _mm_srli_epi16(vhi, 1); // all elts = 0x7fff + + // Set all elts to 0x8000 (min value for signed 16-bit) + vmax = vlo; + + // vlolsw: topmost (least sig) word set to 0x8000, all other words=0 + vlolsw = _mm_shuffle_epi32(vlo, 0); + vlolsw = _mm_srli_si128(vlolsw, NBYTES_PER_REG - NBYTES_PER_WORD); + + // Points to a long vector of __m128i where each element is a block of + // contiguous cells in the E, F or H matrix. If the index % 3 == 0, then + // the block of cells is from the E matrix. If index % 3 == 1, they're + // from the F matrix. If index % 3 == 2, then they're from the H matrix. + // Blocks of cells are organized in the same interleaved manner as they are + // calculated by the Farrar algorithm. + const __m128i *pvScore; // points into the query profile + + d.mat_.init(dpRows(), rff_ - rfi_, NWORDS_PER_REG); + const size_t colstride = d.mat_.colstride(); + //const size_t rowstride = d.mat_.rowstride(); + assert_eq(ROWSTRIDE, colstride / iter); + + // Initialize the H and E vectors in the first matrix column + __m128i *pvHTmp = d.mat_.tmpvec(0, 0); + __m128i *pvETmp = d.mat_.evec(0, 0); + + for(size_t i = 0; i < iter; i++) { + _mm_store_si128(pvETmp, vlo); + _mm_store_si128(pvHTmp, vlo); // start low in local mode + pvETmp += ROWSTRIDE; + pvHTmp += ROWSTRIDE; + } + // These are swapped just before the innermost loop + __m128i *pvHStore = d.mat_.hvec(0, 0); + __m128i *pvHLoad = d.mat_.tmpvec(0, 0); + __m128i *pvELoad = d.mat_.evec(0, 0); + __m128i *pvEStore = d.mat_.evecUnsafe(0, 1); + __m128i *pvFStore = d.mat_.fvec(0, 0); + __m128i *pvFTmp = NULL; + + assert_gt(sc_->gapbar, 0); + size_t nfixup = 0; + TAlScore matchsc = sc_->match(30); + + // Fill in the table as usual but instead of using the same gap-penalty + // vector for each iteration of the inner loop, load words out of a + // pre-calculated gap vector parallel to the query profile. The pre- + // calculated gap vectors enforce the gap barrier constraint by making it + // infinitely costly to introduce a gap in barrier rows. + // + // AND use a separate loop to fill in the first row of the table, enforcing + // the st_ constraints in the process. This is awkward because it + // separates the processing of the first row from the others and might make + // it difficult to use the first-row results in the next row, but it might + // be the simplest and least disruptive way to deal with the st_ constraint. + + colstop_ = rff_ - rfi_; + lastsolcol_ = 0; + for(size_t i = (size_t)rfi_; i < (size_t)rff_; i++) { + assert(pvFStore == d.mat_.fvec(0, i - rfi_)); + assert(pvHStore == d.mat_.hvec(0, i - rfi_)); + + // Fetch this column's reference mask + const int refm = (int)rf_[i]; + + // Fetch the appropriate query profile + size_t off = (size_t)firsts5[refm] * iter * 2; + pvScore = d.profbuf_.ptr() + off; // even elts = query profile, odd = gap barrier + + // Load H vector from the final row of the previous column + vh = _mm_load_si128(pvHLoad + colstride - ROWSTRIDE); + + // Set all F cells to low value + vf = _mm_cmpeq_epi16(vf, vf); + vf = _mm_slli_epi16(vf, NBITS_PER_WORD-1); + vf = _mm_or_si128(vf, vlolsw); + // vf now contains the vertical contribution + + // Store cells in F, calculated previously + // No need to veto ref gap extensions, they're all 0x8000s + _mm_store_si128(pvFStore, vf); + pvFStore += ROWSTRIDE; + + // Shift down so that topmost (least sig) cell gets 0 + vh = _mm_slli_si128(vh, NBYTES_PER_WORD); + // Fill topmost (least sig) cell with low value + vh = _mm_or_si128(vh, vlolsw); + + // We pull out one loop iteration to make it easier to veto values in the top row + + // Load cells from E, calculated previously + ve = _mm_load_si128(pvELoad); + assert_all_lt(ve, vhi); + pvELoad += ROWSTRIDE; + // ve now contains the horizontal contribution + + // Factor in query profile (matches and mismatches) + vh = _mm_adds_epi16(vh, pvScore[0]); + // vh now contains the diagonal contribution + + // Update H, factoring in E and F + vtmp = _mm_max_epi16(vh, ve); + // F won't change anything! + + vh = vtmp; + + // Update highest score so far + vcolmax = vlo; + vcolmax = _mm_max_epi16(vcolmax, vh); + + // Save the new vH values + _mm_store_si128(pvHStore, vh); + pvHStore += ROWSTRIDE; + + // Update vE value + vf = vh; + vh = _mm_subs_epi16(vh, rdgapo); + vh = _mm_adds_epi16(vh, pvScore[1]); // veto some read gap opens + vh = _mm_adds_epi16(vh, pvScore[1]); // veto some read gap opens + ve = _mm_subs_epi16(ve, rdgape); + ve = _mm_max_epi16(ve, vh); + assert_all_lt(ve, vhi); + + // Load the next h value + vh = _mm_load_si128(pvHLoad); + pvHLoad += ROWSTRIDE; + + // Save E values + _mm_store_si128(pvEStore, ve); + pvEStore += ROWSTRIDE; + + // Update vf value + vf = _mm_subs_epi16(vf, rfgapo); + assert_all_lt(vf, vhi); + + pvScore += 2; // move on to next query profile + + // For each character in the reference text: + size_t j; + for(j = 1; j < iter; j++) { + // Load cells from E, calculated previously + ve = _mm_load_si128(pvELoad); + assert_all_lt(ve, vhi); + pvELoad += ROWSTRIDE; + + // Store cells in F, calculated previously + vf = _mm_adds_epi16(vf, pvScore[1]); // veto some ref gap extensions + vf = _mm_adds_epi16(vf, pvScore[1]); // veto some ref gap extensions + _mm_store_si128(pvFStore, vf); + pvFStore += ROWSTRIDE; + + // Factor in query profile (matches and mismatches) + vh = _mm_adds_epi16(vh, pvScore[0]); + + // Update H, factoring in E and F + vh = _mm_max_epi16(vh, ve); + vh = _mm_max_epi16(vh, vf); + + // Update highest score encountered this far + vcolmax = _mm_max_epi16(vcolmax, vh); + + // Save the new vH values + _mm_store_si128(pvHStore, vh); + pvHStore += ROWSTRIDE; + + // Update vE value + vtmp = vh; + vh = _mm_subs_epi16(vh, rdgapo); + vh = _mm_adds_epi16(vh, pvScore[1]); // veto some read gap opens + vh = _mm_adds_epi16(vh, pvScore[1]); // veto some read gap opens + ve = _mm_subs_epi16(ve, rdgape); + ve = _mm_max_epi16(ve, vh); + assert_all_lt(ve, vhi); + + // Load the next h value + vh = _mm_load_si128(pvHLoad); + pvHLoad += ROWSTRIDE; + + // Save E values + _mm_store_si128(pvEStore, ve); + pvEStore += ROWSTRIDE; + + // Update vf value + vtmp = _mm_subs_epi16(vtmp, rfgapo); + vf = _mm_subs_epi16(vf, rfgape); + assert_all_lt(vf, vhi); + vf = _mm_max_epi16(vf, vtmp); + + pvScore += 2; // move on to next query profile / gap veto + } + // pvHStore, pvELoad, pvEStore have all rolled over to the next column + pvFTmp = pvFStore; + pvFStore -= colstride; // reset to start of column + vtmp = _mm_load_si128(pvFStore); + + pvHStore -= colstride; // reset to start of column + vh = _mm_load_si128(pvHStore); + + pvEStore -= colstride; // reset to start of column + ve = _mm_load_si128(pvEStore); + + pvHLoad = pvHStore; // new pvHLoad = pvHStore + pvScore = d.profbuf_.ptr() + off + 1; // reset veto vector + + // vf from last row gets shifted down by one to overlay the first row + // rfgape has already been subtracted from it. + vf = _mm_slli_si128(vf, NBYTES_PER_WORD); + vf = _mm_or_si128(vf, vlolsw); + + vf = _mm_adds_epi16(vf, *pvScore); // veto some ref gap extensions + vf = _mm_adds_epi16(vf, *pvScore); // veto some ref gap extensions + vf = _mm_max_epi16(vtmp, vf); + vtmp = _mm_cmpgt_epi16(vf, vtmp); + int cmp = _mm_movemask_epi8(vtmp); + + // If any element of vtmp is greater than H - gap-open... + j = 0; + while(cmp != 0x0000) { + // Store this vf + _mm_store_si128(pvFStore, vf); + pvFStore += ROWSTRIDE; + + // Update vh w/r/t new vf + vh = _mm_max_epi16(vh, vf); + + // Save vH values + _mm_store_si128(pvHStore, vh); + pvHStore += ROWSTRIDE; + + // Update highest score encountered this far + vcolmax = _mm_max_epi16(vcolmax, vh); + + // Update E in case it can be improved using our new vh + vh = _mm_subs_epi16(vh, rdgapo); + vh = _mm_adds_epi16(vh, *pvScore); // veto some read gap opens + vh = _mm_adds_epi16(vh, *pvScore); // veto some read gap opens + ve = _mm_max_epi16(ve, vh); + _mm_store_si128(pvEStore, ve); + pvEStore += ROWSTRIDE; + pvScore += 2; + + assert_lt(j, iter); + if(++j == iter) { + pvFStore -= colstride; + vtmp = _mm_load_si128(pvFStore); // load next vf ASAP + pvHStore -= colstride; + vh = _mm_load_si128(pvHStore); // load next vh ASAP + pvEStore -= colstride; + ve = _mm_load_si128(pvEStore); // load next ve ASAP + pvScore = d.profbuf_.ptr() + off + 1; + j = 0; + vf = _mm_slli_si128(vf, NBYTES_PER_WORD); + vf = _mm_or_si128(vf, vlolsw); + } else { + vtmp = _mm_load_si128(pvFStore); // load next vf ASAP + vh = _mm_load_si128(pvHStore); // load next vh ASAP + ve = _mm_load_si128(pvEStore); // load next vh ASAP + } + + // Update F with another gap extension + vf = _mm_subs_epi16(vf, rfgape); + vf = _mm_adds_epi16(vf, *pvScore); // veto some ref gap extensions + vf = _mm_adds_epi16(vf, *pvScore); // veto some ref gap extensions + vf = _mm_max_epi16(vtmp, vf); + vtmp = _mm_cmpgt_epi16(vf, vtmp); + cmp = _mm_movemask_epi8(vtmp); + nfixup++; + } + +#ifndef NDEBUG + if((rand() & 15) == 0) { + // This is a work-intensive sanity check; each time we finish filling + // a column, we check that each H, E, and F is sensible. + for(size_t k = 0; k < dpRows(); k++) { + assert(cellOkLocalI16( + d, + k, // row + i - rfi_, // col + refm, // reference mask + (int)(*rd_)[rdi_+k], // read char + (int)(*qu_)[rdi_+k], // read quality + *sc_)); // scoring scheme + } + } +#endif + + // Store column maximum vector in first element of tmp + vmax = _mm_max_epi16(vmax, vcolmax); + _mm_store_si128(d.mat_.tmpvec(0, i - rfi_), vcolmax); + + { + // Get single largest score in this column + vmaxtmp = vcolmax; + vtmp = _mm_srli_si128(vmaxtmp, 8); + vmaxtmp = _mm_max_epi16(vmaxtmp, vtmp); + vtmp = _mm_srli_si128(vmaxtmp, 4); + vmaxtmp = _mm_max_epi16(vmaxtmp, vtmp); + vtmp = _mm_srli_si128(vmaxtmp, 2); + vmaxtmp = _mm_max_epi16(vmaxtmp, vtmp); + int16_t ret = _mm_extract_epi16(vmaxtmp, 0); + TAlScore score = (TAlScore)(ret + 0x8000); + + if(score < minsc_) { + size_t ncolleft = rff_ - i - 1; + if(score + (TAlScore)ncolleft * matchsc < minsc_) { + // Bail! We're guaranteed not to see a valid alignment in + // the rest of the matrix + colstop_ = (i+1) - rfi_; + break; + } + } else { + lastsolcol_ = i - rfi_; + } + } + + // pvELoad and pvHLoad are already where they need to be + + // Adjust the load and store vectors here. + pvHStore = pvHLoad + colstride; + pvEStore = pvELoad + colstride; + pvFStore = pvFTmp; + } + + // Find largest score in vmax + vtmp = _mm_srli_si128(vmax, 8); + vmax = _mm_max_epi16(vmax, vtmp); + vtmp = _mm_srli_si128(vmax, 4); + vmax = _mm_max_epi16(vmax, vtmp); + vtmp = _mm_srli_si128(vmax, 2); + vmax = _mm_max_epi16(vmax, vtmp); + int16_t ret = _mm_extract_epi16(vmax, 0); + + // Update metrics + if(!debug) { + size_t ninner = (rff_ - rfi_) * iter; + met.col += (rff_ - rfi_); // DP columns + met.cell += (ninner * NWORDS_PER_REG); // DP cells + met.inner += ninner; // DP inner loop iters + met.fixup += nfixup; // DP fixup loop iters + } + + flag = 0; + + // Did we find a solution? + TAlScore score = MIN_I64; + if(ret == MIN_I16) { + flag = -1; // no + if(!debug) met.dpfail++; + return MIN_I64; + } else { + score = (TAlScore)(ret + 0x8000); + if(score < minsc_) { + flag = -1; // no + if(!debug) met.dpfail++; + return score; + } + } + + // Could we have saturated? + if(ret == MAX_I16) { + flag = -2; // yes + if(!debug) met.dpsat++; + return MIN_I64; + } + + // Return largest score + if(!debug) met.dpsucc++; + return score; +} + +/** + * Given a filled-in DP table, populate the btncand_ list with candidate cells + * that might be at the ends of valid alignments. No need to do this unless + * the maximum score returned by the align*() func is >= the minimum. + * + * We needn't consider cells that have no chance of reaching any of the core + * diagonals. These are the cells that are more than 'maxgaps' cells away from + * a core diagonal. + * + * We need to be careful to consider that the rectangle might be truncated on + * one or both ends. + * + * The seed extend case looks like this: + * + * |Rectangle| 0: seed diagonal + * **OO0oo---- o: "RHS gap" diagonals + * -**OO0oo--- O: "LHS gap" diagonals + * --**OO0oo-- *: "LHS extra" diagonals + * ---**OO0oo- -: cells that can't possibly be involved in a valid + * ----**OO0oo alignment that overlaps one of the core diagonals + * + * The anchor-to-left case looks like this: + * + * |Anchor| | ---- Rectangle ---- | + * o---------OO0000000000000oo------ 0: mate diagonal (also core diags!) + * -o---------OO0000000000000oo----- o: "RHS gap" diagonals + * --o---------OO0000000000000oo---- O: "LHS gap" diagonals + * ---oo--------OO0000000000000oo--- *: "LHS extra" diagonals + * -----o--------OO0000000000000oo-- -: cells that can't possibly be + * ------o--------OO0000000000000oo- involved in a valid alignment that + * -------o--------OO0000000000000oo overlaps one of the core diagonals + * XXXXXXXXXXXXX + * | RHS Range | + * ^ ^ + * rl rr + * + * The anchor-to-right case looks like this: + * + * ll lr + * v v + * | LHS Range | + * XXXXXXXXXXXXX |Anchor| + * OO0000000000000oo--------o-------- 0: mate diagonal (also core diags!) + * -OO0000000000000oo--------o------- o: "RHS gap" diagonals + * --OO0000000000000oo--------o------ O: "LHS gap" diagonals + * ---OO0000000000000oo--------oo---- *: "LHS extra" diagonals + * ----OO0000000000000oo---------o--- -: cells that can't possibly be + * -----OO0000000000000oo---------o-- involved in a valid alignment that + * ------OO0000000000000oo---------o- overlaps one of the core diagonals + * | ---- Rectangle ---- | + */ +bool SwAligner::gatherCellsNucleotidesLocalSseI16(TAlScore best) { + // What's the minimum number of rows that can possibly be spanned by an + // alignment that meets the minimum score requirement? + assert(sse16succ_); + size_t bonus = (size_t)sc_->match(30); + const size_t ncol = lastsolcol_ + 1; + const size_t nrow = dpRows(); + assert_gt(nrow, 0); + btncand_.clear(); + btncanddone_.clear(); + SSEData& d = fw_ ? sseI16fw_ : sseI16rc_; + SSEMetrics& met = extend_ ? sseI16ExtendMet_ : sseI16MateMet_; + assert(!d.profbuf_.empty()); + //const size_t rowstride = d.mat_.rowstride(); + //const size_t colstride = d.mat_.colstride(); + size_t iter = (dpRows() + (NWORDS_PER_REG - 1)) / NWORDS_PER_REG; + assert_gt(iter, 0); + assert_geq(minsc_, 0); + assert_gt(bonus, 0); + size_t minrow = (size_t)(((minsc_ + bonus - 1) / bonus) - 1); + for(size_t j = 0; j < ncol; j++) { + // Establish the range of rows where a backtrace from the cell in this + // row/col is close enough to one of the core diagonals that it could + // conceivably count + size_t nrow_lo = MIN_SIZE_T; + size_t nrow_hi = nrow; + // First, check if there is a cell in this column with a score + // above the score threshold + __m128i vmax = *d.mat_.tmpvec(0, j); + __m128i vtmp = _mm_srli_si128(vmax, 8); + vmax = _mm_max_epi16(vmax, vtmp); + vtmp = _mm_srli_si128(vmax, 4); + vmax = _mm_max_epi16(vmax, vtmp); + vtmp = _mm_srli_si128(vmax, 2); + vmax = _mm_max_epi16(vmax, vtmp); + TAlScore score = (TAlScore)((int16_t)_mm_extract_epi16(vmax, 0) + 0x8000); + assert_geq(score, 0); +#ifndef NDEBUG + { + // Start in upper vector row and move down + TAlScore max = 0; + vmax = *d.mat_.tmpvec(0, j); + __m128i *pvH = d.mat_.hvec(0, j); + for(size_t i = 0; i < iter; i++) { + for(size_t k = 0; k < NWORDS_PER_REG; k++) { + TAlScore sc = (TAlScore)(((TCScore*)pvH)[k] + 0x8000); + TAlScore scm = (TAlScore)(((TCScore*)&vmax)[k] + 0x8000); + assert_leq(sc, scm); + if(sc > max) { + max = sc; + } + } + pvH += ROWSTRIDE; + } + assert_eq(max, score); + } +#endif + if(score < minsc_) { + // Scores in column aren't good enough + continue; + } + // Get pointer to first cell in column to examine: + __m128i *pvHorig = d.mat_.hvec(0, j); + __m128i *pvH = pvHorig; + // Get pointer to the vector in the following column that corresponds + // to the cells diagonally down and to the right from the cells in pvH + __m128i *pvHSucc = (j < ncol-1) ? d.mat_.hvec(0, j+1) : NULL; + // Start in upper vector row and move down + for(size_t i = 0; i < iter; i++) { + if(pvHSucc != NULL) { + pvHSucc += ROWSTRIDE; + if(i == iter-1) { + pvHSucc = d.mat_.hvec(0, j+1); + } + } + // Which elements of this vector are exhaustively scored? + size_t rdoff = i; + for(size_t k = 0; k < NWORDS_PER_REG; k++) { + // Is this row, col one that we can potential backtrace from? + // I.e. are we close enough to a core diagonal? + if(rdoff >= nrow_lo && rdoff < nrow_hi) { + // This cell has been exhaustively scored + if(rdoff >= minrow) { + // ... and it could potentially score high enough + TAlScore sc = (TAlScore)(((TCScore*)pvH)[k] + 0x8000); + assert_leq(sc, best); + if(sc >= minsc_) { + // This is a potential solution + bool matchSucc = false; + int readc = (*rd_)[rdoff]; + int refc = rf_[j + rfi_]; + bool match = ((refc & (1 << readc)) != 0); + if(rdoff < dpRows()-1) { + int readcSucc = (*rd_)[rdoff+1]; + int refcSucc = rf_[j + rfi_ + 1]; + assert_range(0, 16, refcSucc); + matchSucc = ((refcSucc & (1 << readcSucc)) != 0); + } + if(match && !matchSucc) { + // Yes, this is legit + met.gathsol++; + btncand_.expand(); + btncand_.back().init(rdoff, j, sc); + } + } + } + } else { + // Already saw every element in the vector that's been + // exhaustively scored + break; + } + rdoff += iter; + } + pvH += ROWSTRIDE; + } + } + if(!btncand_.empty()) { + d.mat_.initMasks(); + } + return !btncand_.empty(); +} + +#define MOVE_VEC_PTR_UP(vec, rowvec, rowelt) { \ + if(rowvec == 0) { \ + rowvec += d.mat_.nvecrow_; \ + vec += d.mat_.colstride_; \ + rowelt--; \ + } \ + rowvec--; \ + vec -= ROWSTRIDE; \ +} + +#define MOVE_VEC_PTR_LEFT(vec, rowvec, rowelt) { vec -= d.mat_.colstride_; } + +#define MOVE_VEC_PTR_UPLEFT(vec, rowvec, rowelt) { \ + MOVE_VEC_PTR_UP(vec, rowvec, rowelt); \ + MOVE_VEC_PTR_LEFT(vec, rowvec, rowelt); \ +} + +#define MOVE_ALL_LEFT() { \ + MOVE_VEC_PTR_LEFT(cur_vec, rowvec, rowelt); \ + MOVE_VEC_PTR_LEFT(left_vec, left_rowvec, left_rowelt); \ + MOVE_VEC_PTR_LEFT(up_vec, up_rowvec, up_rowelt); \ + MOVE_VEC_PTR_LEFT(upleft_vec, upleft_rowvec, upleft_rowelt); \ +} + +#define MOVE_ALL_UP() { \ + MOVE_VEC_PTR_UP(cur_vec, rowvec, rowelt); \ + MOVE_VEC_PTR_UP(left_vec, left_rowvec, left_rowelt); \ + MOVE_VEC_PTR_UP(up_vec, up_rowvec, up_rowelt); \ + MOVE_VEC_PTR_UP(upleft_vec, upleft_rowvec, upleft_rowelt); \ +} + +#define MOVE_ALL_UPLEFT() { \ + MOVE_VEC_PTR_UPLEFT(cur_vec, rowvec, rowelt); \ + MOVE_VEC_PTR_UPLEFT(left_vec, left_rowvec, left_rowelt); \ + MOVE_VEC_PTR_UPLEFT(up_vec, up_rowvec, up_rowelt); \ + MOVE_VEC_PTR_UPLEFT(upleft_vec, upleft_rowvec, upleft_rowelt); \ +} + +#define NEW_ROW_COL(row, col) { \ + rowelt = row / d.mat_.nvecrow_; \ + rowvec = row % d.mat_.nvecrow_; \ + eltvec = (col * d.mat_.colstride_) + (rowvec * ROWSTRIDE); \ + cur_vec = d.mat_.matbuf_.ptr() + eltvec; \ + left_vec = cur_vec; \ + left_rowelt = rowelt; \ + left_rowvec = rowvec; \ + MOVE_VEC_PTR_LEFT(left_vec, left_rowvec, left_rowelt); \ + up_vec = cur_vec; \ + up_rowelt = rowelt; \ + up_rowvec = rowvec; \ + MOVE_VEC_PTR_UP(up_vec, up_rowvec, up_rowelt); \ + upleft_vec = up_vec; \ + upleft_rowelt = up_rowelt; \ + upleft_rowvec = up_rowvec; \ + MOVE_VEC_PTR_LEFT(upleft_vec, upleft_rowvec, upleft_rowelt); \ +} + +/** + * Given the dynamic programming table and a cell, trace backwards from the + * cell and install the edits and score/penalty in the appropriate fields of + * res. The RandomSource is used to break ties among equally good ways of + * tracing back. + * + * Whenever we enter a cell, we check if its read/ref coordinates correspond to + * a cell we traversed constructing a previous alignment. If so, we backtrack + * to the last decision point, mask out the path that led to the previously + * observed cell, and continue along a different path. If there are no more + * paths to try, we stop. + * + * If an alignment is found, 'off' is set to the alignment's upstream-most + * reference character's offset and true is returned. Otherwise, false is + * returned. + * + * In local alignment mode, this method is liable to be slow, especially for + * long reads. This is chiefly because if there is one valid solution + * (especially if it is pretty high scoring), then many, many paths shooting + * off that solution's path will also have valid solutions. + */ +bool SwAligner::backtraceNucleotidesLocalSseI16( + TAlScore escore, // in: expected score + SwResult& res, // out: store results (edits and scores) here + size_t& off, // out: store diagonal projection of origin + size_t& nbts, // out: # backtracks + size_t row, // start in this row + size_t col, // start in this column + RandomSource& rnd) // random gen, to choose among equal paths +{ + assert_lt(row, dpRows()); + assert_lt(col, (size_t)(rff_ - rfi_)); + SSEData& d = fw_ ? sseI16fw_ : sseI16rc_; + SSEMetrics& met = extend_ ? sseI16ExtendMet_ : sseI16MateMet_; + met.bt++; + assert(!d.profbuf_.empty()); + assert_lt(row, rd_->length()); + btnstack_.clear(); // empty the backtrack stack + btcells_.clear(); // empty the cells-so-far list + AlnScore score; + score.score_ = score.gaps_ = score.ns_ = 0; + size_t origCol = col; + size_t gaps = 0, readGaps = 0, refGaps = 0; + res.alres.reset(); + EList& ned = res.alres.ned(); + assert(ned.empty()); + assert_gt(dpRows(), row); + size_t trimEnd = dpRows() - row - 1; + size_t trimBeg = 0; + size_t ct = SSEMatrix::H; // cell type + // Row and col in terms of where they fall in the SSE vector matrix + size_t rowelt, rowvec, eltvec; + size_t left_rowelt, up_rowelt, upleft_rowelt; + size_t left_rowvec, up_rowvec, upleft_rowvec; + __m128i *cur_vec, *left_vec, *up_vec, *upleft_vec; + const size_t gbar = sc_->gapbar; + NEW_ROW_COL(row, col); + // If 'backEliminate' is true, then every time we visit a cell, we remove + // edges into the cell. We do this to avoid some of the thrashing around + // that occurs when there are lots of valid candidates in the same DP + // problem. + //const bool backEliminate = true; + while((int)row >= 0) { + // TODO: As soon as we enter a cell, set it as being reported through, + // *and* mark all cells that point into this cell as being reported + // through. This will save us from having to consider quite so many + // candidates. + + met.btcell++; + nbts++; + int readc = (*rd_)[rdi_ + row]; + int refm = (int)rf_[rfi_ + col]; + int readq = (*qu_)[row]; + assert_leq(col, origCol); + // Get score in this cell + bool empty = false, reportedThru, canMoveThru, branch = false; + int cur = SSEMatrix::H; + if(!d.mat_.reset_[row]) { + d.mat_.resetRow(row); + } + reportedThru = d.mat_.reportedThrough(row, col); + canMoveThru = true; + if(reportedThru) { + canMoveThru = false; + } else { + empty = false; + if(row > 0) { + size_t rowFromEnd = d.mat_.nrow() - row - 1; + bool gapsAllowed = !(row < gbar || rowFromEnd < gbar); + const int floorsc = 0; + const int offsetsc = 0x8000; + // Move to beginning of column/row + if(ct == SSEMatrix::E) { // AKA rdgap + assert_gt(col, 0); + TAlScore sc_cur = ((TCScore*)(cur_vec + SSEMatrix::E))[rowelt] + offsetsc; + assert(gapsAllowed); + // Currently in the E matrix; incoming transition must come from the + // left. It's either a gap open from the H matrix or a gap extend from + // the E matrix. + // TODO: save and restore origMask as well as mask + int origMask = 0, mask = 0; + // Get H score of cell to the left + TAlScore sc_h_left = ((TCScore*)(left_vec + SSEMatrix::H))[left_rowelt] + offsetsc; + if(sc_h_left > floorsc && sc_h_left - sc_->readGapOpen() == sc_cur) { + mask |= (1 << 0); // horiz H -> E move possible + } + // Get E score of cell to the left + TAlScore sc_e_left = ((TCScore*)(left_vec + SSEMatrix::E))[left_rowelt] + offsetsc; + if(sc_e_left > floorsc && sc_e_left - sc_->readGapExtend() == sc_cur) { + mask |= (1 << 1); // horiz E -> E move possible + } + origMask = mask; + assert(origMask > 0 || sc_cur <= sc_->match()); + if(d.mat_.isEMaskSet(row, col)) { + mask = (d.mat_.masks_[row][col] >> 8) & 3; + } + if(mask == 3) { + // Horiz H -> E or horiz E -> E moves possible +#if 1 + // Pick H -> E cell + cur = SW_BT_OALL_READ_OPEN; + d.mat_.eMaskSet(row, col, 2); // might choose E later +#else + if(rnd.nextU2()) { + // Pick H -> E cell + cur = SW_BT_OALL_READ_OPEN; + d.mat_.eMaskSet(row, col, 2); // might choose E later + } else { + // Pick E -> E cell + cur = SW_BT_RDGAP_EXTEND; + d.mat_.eMaskSet(row, col, 1); // might choose H later + } +#endif + branch = true; + } else if(mask == 2) { + // Only horiz E -> E move possible, pick it + cur = SW_BT_RDGAP_EXTEND; + d.mat_.eMaskSet(row, col, 0); // done + } else if(mask == 1) { + // I chose the H cell + cur = SW_BT_OALL_READ_OPEN; + d.mat_.eMaskSet(row, col, 0); // done + } else { + empty = true; + // It's empty, so the only question left is whether we should be + // allowed in terimnate in this cell. If it's got a valid score + // then we *shouldn't* be allowed to terminate here because that + // means it's part of a larger alignment that was already reported. + canMoveThru = (origMask == 0); + } + if(!branch) { + // Is this where we can eliminate some incoming paths as well? + } + assert(!empty || !canMoveThru); + } else if(ct == SSEMatrix::F) { // AKA rfgap + assert_gt(row, 0); + assert(gapsAllowed); + TAlScore sc_h_up = ((TCScore*)(up_vec + SSEMatrix::H))[up_rowelt] + offsetsc; + TAlScore sc_f_up = ((TCScore*)(up_vec + SSEMatrix::F))[up_rowelt] + offsetsc; + TAlScore sc_cur = ((TCScore*)(cur_vec + SSEMatrix::F))[rowelt] + offsetsc; + // Currently in the F matrix; incoming transition must come from above. + // It's either a gap open from the H matrix or a gap extend from the F + // matrix. + // TODO: save and restore origMask as well as mask + int origMask = 0, mask = 0; + // Get H score of cell above + if(sc_h_up > floorsc && sc_h_up - sc_->refGapOpen() == sc_cur) { + mask |= (1 << 0); + } + // Get F score of cell above + if(sc_f_up > floorsc && sc_f_up - sc_->refGapExtend() == sc_cur) { + mask |= (1 << 1); + } + origMask = mask; + assert(origMask > 0 || sc_cur <= sc_->match()); + if(d.mat_.isFMaskSet(row, col)) { + mask = (d.mat_.masks_[row][col] >> 11) & 3; + } + if(mask == 3) { +#if 1 + // I chose the H cell + cur = SW_BT_OALL_REF_OPEN; + d.mat_.fMaskSet(row, col, 2); // might choose E later +#else + if(rnd.nextU2()) { + // I chose the H cell + cur = SW_BT_OALL_REF_OPEN; + d.mat_.fMaskSet(row, col, 2); // might choose E later + } else { + // I chose the F cell + cur = SW_BT_RFGAP_EXTEND; + d.mat_.fMaskSet(row, col, 1); // might choose E later + } +#endif + branch = true; + } else if(mask == 2) { + // I chose the F cell + cur = SW_BT_RFGAP_EXTEND; + d.mat_.fMaskSet(row, col, 0); // done + } else if(mask == 1) { + // I chose the H cell + cur = SW_BT_OALL_REF_OPEN; + d.mat_.fMaskSet(row, col, 0); // done + } else { + empty = true; + // It's empty, so the only question left is whether we should be + // allowed in terimnate in this cell. If it's got a valid score + // then we *shouldn't* be allowed to terminate here because that + // means it's part of a larger alignment that was already reported. + canMoveThru = (origMask == 0); + } + assert(!empty || !canMoveThru); + } else { + assert_eq(SSEMatrix::H, ct); + TAlScore sc_cur = ((TCScore*)(cur_vec + SSEMatrix::H))[rowelt] + offsetsc; + TAlScore sc_f_up = ((TCScore*)(up_vec + SSEMatrix::F))[up_rowelt] + offsetsc; + TAlScore sc_h_up = ((TCScore*)(up_vec + SSEMatrix::H))[up_rowelt] + offsetsc; + TAlScore sc_h_left = col > 0 ? (((TCScore*)(left_vec + SSEMatrix::H))[left_rowelt] + offsetsc) : floorsc; + TAlScore sc_e_left = col > 0 ? (((TCScore*)(left_vec + SSEMatrix::E))[left_rowelt] + offsetsc) : floorsc; + TAlScore sc_h_upleft = col > 0 ? (((TCScore*)(upleft_vec + SSEMatrix::H))[upleft_rowelt] + offsetsc) : floorsc; + TAlScore sc_diag = sc_->score(readc, refm, readq - 33); + // TODO: save and restore origMask as well as mask + int origMask = 0, mask = 0; + if(gapsAllowed) { + if(sc_h_up > floorsc && sc_cur == sc_h_up - sc_->refGapOpen()) { + mask |= (1 << 0); + } + if(sc_h_left > floorsc && sc_cur == sc_h_left - sc_->readGapOpen()) { + mask |= (1 << 1); + } + if(sc_f_up > floorsc && sc_cur == sc_f_up - sc_->refGapExtend()) { + mask |= (1 << 2); + } + if(sc_e_left > floorsc && sc_cur == sc_e_left - sc_->readGapExtend()) { + mask |= (1 << 3); + } + } + if(sc_h_upleft > floorsc && sc_cur == sc_h_upleft + sc_diag) { + mask |= (1 << 4); // diagonal is + } + origMask = mask; + assert(origMask > 0 || sc_cur <= sc_->match()); + if(d.mat_.isHMaskSet(row, col)) { + mask = (d.mat_.masks_[row][col] >> 2) & 31; + } + assert(gapsAllowed || mask == (1 << 4) || mask == 0); + int opts = alts5[mask]; + int select = -1; + if(opts == 1) { + select = firsts5[mask]; + assert_geq(mask, 0); + d.mat_.hMaskSet(row, col, 0); + } else if(opts > 1) { +#if 1 + if( (mask & 16) != 0) { + select = 4; // H diag + } else if((mask & 1) != 0) { + select = 0; // H up + } else if((mask & 4) != 0) { + select = 2; // F up + } else if((mask & 2) != 0) { + select = 1; // H left + } else if((mask & 8) != 0) { + select = 3; // E left + } +#else + select = randFromMask(rnd, mask); +#endif + assert_geq(mask, 0); + mask &= ~(1 << select); + assert(gapsAllowed || mask == (1 << 4) || mask == 0); + d.mat_.hMaskSet(row, col, mask); + branch = true; + } else { /* No way to backtrack! */ } + if(select != -1) { + if(select == 4) { + cur = SW_BT_OALL_DIAG; + } else if(select == 0) { + cur = SW_BT_OALL_REF_OPEN; + } else if(select == 1) { + cur = SW_BT_OALL_READ_OPEN; + } else if(select == 2) { + cur = SW_BT_RFGAP_EXTEND; + } else { + assert_eq(3, select) + cur = SW_BT_RDGAP_EXTEND; + } + } else { + empty = true; + // It's empty, so the only question left is whether we should be + // allowed in terimnate in this cell. If it's got a valid score + // then we *shouldn't* be allowed to terminate here because that + // means it's part of a larger alignment that was already reported. + canMoveThru = (origMask == 0); + } + } + assert(!empty || !canMoveThru || ct == SSEMatrix::H); + } // if(row > 0) + } // else clause of if(reportedThru) + if(!reportedThru) { + d.mat_.setReportedThrough(row, col); + } + assert(d.mat_.reportedThrough(row, col)); + //if(backEliminate && row < d.mat_.nrow()-1) { + // // Possibly pick off neighbors below and to the right if the + // // neighbor's only way of backtracking is through this cell. + //} + assert_eq(gaps, Edit::numGaps(ned)); + assert_leq(gaps, rdgap_ + rfgap_); + // Cell was involved in a previously-reported alignment? + if(!canMoveThru) { + if(!btnstack_.empty()) { + // Remove all the cells from list back to and including the + // cell where the branch occurred + btcells_.resize(btnstack_.back().celsz); + // Pop record off the top of the stack + ned.resize(btnstack_.back().nedsz); + //aed.resize(btnstack_.back().aedsz); + row = btnstack_.back().row; + col = btnstack_.back().col; + gaps = btnstack_.back().gaps; + readGaps = btnstack_.back().readGaps; + refGaps = btnstack_.back().refGaps; + score = btnstack_.back().score; + ct = btnstack_.back().ct; + btnstack_.pop_back(); + assert(!sc_->monotone || score.score() >= escore); + NEW_ROW_COL(row, col); + continue; + } else { + // No branch points to revisit; just give up + res.reset(); + met.btfail++; // DP backtraces failed + return false; + } + } + assert(!reportedThru); + assert(!sc_->monotone || score.score() >= minsc_); + if(empty || row == 0) { + assert_eq(SSEMatrix::H, ct); + btcells_.expand(); + btcells_.back().first = row; + btcells_.back().second = col; + // This cell is at the end of a legitimate alignment + trimBeg = row; + assert_eq(btcells_.size(), dpRows() - trimBeg - trimEnd + readGaps); + break; + } + if(branch) { + // Add a frame to the backtrack stack + btnstack_.expand(); + btnstack_.back().init( + ned.size(), + 0, // aed.size() + btcells_.size(), + row, + col, + gaps, + readGaps, + refGaps, + score, + (int)ct); + } + btcells_.expand(); + btcells_.back().first = row; + btcells_.back().second = col; + switch(cur) { + // Move up and to the left. If the reference nucleotide in the + // source row mismatches the read nucleotide, penalize + // it and add a nucleotide mismatch. + case SW_BT_OALL_DIAG: { + assert_gt(row, 0); assert_gt(col, 0); + int readC = (*rd_)[row]; + int refNmask = (int)rf_[rfi_+col]; + assert_gt(refNmask, 0); + int m = matchesEx(readC, refNmask); + ct = SSEMatrix::H; + if(m != 1) { + Edit e( + (int)row, + mask2dna[refNmask], + "ACGTN"[readC], + EDIT_TYPE_MM); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + int pen = QUAL2(row, col); + score.score_ -= pen; + assert(!sc_->monotone || score.score() >= escore); + } else { + // Reward a match + int64_t bonus = sc_->match(30); + score.score_ += bonus; + assert(!sc_->monotone || score.score() >= escore); + } + if(m == -1) { + score.ns_++; + } + row--; col--; + MOVE_ALL_UPLEFT(); + assert(VALID_AL_SCORE(score)); + break; + } + // Move up. Add an edit encoding the ref gap. + case SW_BT_OALL_REF_OPEN: + { + assert_gt(row, 0); + Edit e( + (int)row, + '-', + "ACGTN"[(int)(*rd_)[row]], + EDIT_TYPE_REF_GAP); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + assert_geq(row, (size_t)sc_->gapbar); + assert_geq((int)(rdf_-rdi_-row-1), sc_->gapbar-1); + row--; + ct = SSEMatrix::H; + int pen = sc_->refGapOpen(); + score.score_ -= pen; + assert(!sc_->monotone || score.score() >= minsc_); + gaps++; refGaps++; + assert_eq(gaps, Edit::numGaps(ned)); + assert_leq(gaps, rdgap_ + rfgap_); + MOVE_ALL_UP(); + break; + } + // Move up. Add an edit encoding the ref gap. + case SW_BT_RFGAP_EXTEND: + { + assert_gt(row, 1); + Edit e( + (int)row, + '-', + "ACGTN"[(int)(*rd_)[row]], + EDIT_TYPE_REF_GAP); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + assert_geq(row, (size_t)sc_->gapbar); + assert_geq((int)(rdf_-rdi_-row-1), sc_->gapbar-1); + row--; + ct = SSEMatrix::F; + int pen = sc_->refGapExtend(); + score.score_ -= pen; + assert(!sc_->monotone || score.score() >= minsc_); + gaps++; refGaps++; + assert_eq(gaps, Edit::numGaps(ned)); + assert_leq(gaps, rdgap_ + rfgap_); + MOVE_ALL_UP(); + break; + } + case SW_BT_OALL_READ_OPEN: + { + assert_gt(col, 0); + Edit e( + (int)row+1, + mask2dna[(int)rf_[rfi_+col]], + '-', + EDIT_TYPE_READ_GAP); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + assert_geq(row, (size_t)sc_->gapbar); + assert_geq((int)(rdf_-rdi_-row-1), sc_->gapbar-1); + col--; + ct = SSEMatrix::H; + int pen = sc_->readGapOpen(); + score.score_ -= pen; + assert(!sc_->monotone || score.score() >= minsc_); + gaps++; readGaps++; + assert_eq(gaps, Edit::numGaps(ned)); + assert_leq(gaps, rdgap_ + rfgap_); + MOVE_ALL_LEFT(); + break; + } + case SW_BT_RDGAP_EXTEND: + { + assert_gt(col, 1); + Edit e( + (int)row+1, + mask2dna[(int)rf_[rfi_+col]], + '-', + EDIT_TYPE_READ_GAP); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + assert_geq(row, (size_t)sc_->gapbar); + assert_geq((int)(rdf_-rdi_-row-1), sc_->gapbar-1); + col--; + ct = SSEMatrix::E; + int pen = sc_->readGapExtend(); + score.score_ -= pen; + assert(!sc_->monotone || score.score() >= minsc_); + gaps++; readGaps++; + assert_eq(gaps, Edit::numGaps(ned)); + assert_leq(gaps, rdgap_ + rfgap_); + MOVE_ALL_LEFT(); + break; + } + default: throw 1; + } + } // while((int)row > 0) + assert_geq(col, 0); + assert_eq(SSEMatrix::H, ct); + // The number of cells in the backtracs should equal the number of read + // bases after trimming plus the number of gaps + assert_eq(btcells_.size(), dpRows() - trimBeg - trimEnd + readGaps); + // Check whether we went through a core diagonal and set 'reported' flag on + // each cell + bool overlappedCoreDiag = false; + for(size_t i = 0; i < btcells_.size(); i++) { + size_t rw = btcells_[i].first; + size_t cl = btcells_[i].second; + // Calculate the diagonal within the *trimmed* rectangle, i.e. the + // rectangle we dealt with in align, gather and backtrack. + int64_t diagi = cl - rw; + // Now adjust to the diagonal within the *untrimmed* rectangle by + // adding on the amount trimmed from the left. + diagi += rect_->triml; + if(diagi >= 0) { + size_t diag = (size_t)diagi; + if(diag >= rect_->corel && diag <= rect_->corer) { + overlappedCoreDiag = true; + break; + } + } +#ifndef NDEBUG + //assert(!d.mat_.reportedThrough(rw, cl)); + //d.mat_.setReportedThrough(rw, cl); + assert(d.mat_.reportedThrough(rw, cl)); +#endif + } + if(!overlappedCoreDiag) { + // Must overlap a core diagonal. Otherwise, we run the risk of + // reporting an alignment that overlaps (and trumps) a higher-scoring + // alignment that lies partially outside the dynamic programming + // rectangle. + res.reset(); + met.corerej++; + return false; + } + int readC = (*rd_)[rdi_+row]; // get last char in read + int refNmask = (int)rf_[rfi_+col]; // get last ref char ref involved in aln + assert_gt(refNmask, 0); + int m = matchesEx(readC, refNmask); + if(m != 1) { + Edit e((int)row, mask2dna[refNmask], "ACGTN"[readC], EDIT_TYPE_MM); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + score.score_ -= QUAL2(row, col); + assert_geq(score.score(), minsc_); + } else { + score.score_ += sc_->match(30); + } + if(m == -1) { + score.ns_++; + } + if(score.ns_ > nceil_) { + // Alignment has too many Ns in it! + res.reset(); + met.nrej++; + return false; + } + res.reverse(); + assert(Edit::repOk(ned, (*rd_))); + assert_eq(score.score(), escore); + assert_leq(gaps, rdgap_ + rfgap_); + off = col; + assert_lt(col + (size_t)rfi_, (size_t)rff_); + score.gaps_ = gaps; + res.alres.setScore(score); + res.alres.setShape( + refidx_, // ref id + off + rfi_ + rect_->refl, // 0-based ref offset + reflen_, // reference length + fw_, // aligned to Watson? + rdf_ - rdi_, // read length + 0, // read ID + true, // pretrim soft? + 0, // pretrim 5' end + 0, // pretrim 3' end + true, // alignment trim soft? + fw_ ? trimBeg : trimEnd, // alignment trim 5' end + fw_ ? trimEnd : trimBeg); // alignment trim 3' end + size_t refns = 0; + for(size_t i = col; i <= origCol; i++) { + if((int)rf_[rfi_+i] > 15) { + refns++; + } + } + res.alres.setRefNs(refns); + assert(Edit::repOk(ned, (*rd_), true, trimBeg, trimEnd)); + assert(res.repOk()); +#ifndef NDEBUG + size_t gapsCheck = 0; + for(size_t i = 0; i < ned.size(); i++) { + if(ned[i].isGap()) gapsCheck++; + } + assert_eq(gaps, gapsCheck); + BTDnaString refstr; + for(size_t i = col; i <= origCol; i++) { + refstr.append(firsts5[(int)rf_[rfi_+i]]); + } + BTDnaString editstr; + Edit::toRef((*rd_), ned, editstr, true, trimBeg, trimEnd); + if(refstr != editstr) { + cerr << "Decoded nucleotides and edits don't match reference:" << endl; + cerr << " score: " << score.score() + << " (" << gaps << " gaps)" << endl; + cerr << " edits: "; + Edit::print(cerr, ned); + cerr << endl; + cerr << " decoded nucs: " << (*rd_) << endl; + cerr << " edited nucs: " << editstr << endl; + cerr << " reference nucs: " << refstr << endl; + assert(0); + } +#endif + met.btsucc++; // DP backtraces succeeded + return true; +} diff --git a/aligner_swsse_loc_u8.cpp b/aligner_swsse_loc_u8.cpp new file mode 100644 index 0000000..673df2d --- /dev/null +++ b/aligner_swsse_loc_u8.cpp @@ -0,0 +1,2266 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +/** + * aligner_sw_sse.cpp + * + * Versions of key alignment functions that use vector instructions to + * accelerate dynamic programming. Based chiefly on the striped Smith-Waterman + * paper and implementation by Michael Farrar. See: + * + * Farrar M. Striped Smith-Waterman speeds database searches six times over + * other SIMD implementations. Bioinformatics. 2007 Jan 15;23(2):156-61. + * http://sites.google.com/site/farrarmichael/smith-waterman + * + * While the paper describes an implementation of Smith-Waterman, we extend it + * do end-to-end read alignment as well as local alignment. The change + * required for this is minor: we simply let vmax be the maximum element in the + * score domain rather than the minimum. + * + * The vectorized dynamic programming implementation lacks some features that + * make it hard to adapt to solving the entire dynamic-programming alignment + * problem. For instance: + * + * - It doesn't respect gap barriers on either end of the read + * - It just gives a maximum; not enough information to backtrace without + * redoing some alignment + * - It's a little difficult to handle st_ and en_, especially st_. + * - The query profile mechanism makes handling of ambiguous reference bases a + * little tricky (16 cols in query profile lookup table instead of 5) + * + * Given the drawbacks, it is tempting to use SSE dynamic programming as a + * filter rather than as an aligner per se. Here are a few ideas for how it + * can be extended to handle more of the alignment problem: + * + * - Save calculated scores to a big array as we go. We return to this array + * to find and backtrace from good solutions. + */ + +#include +#include "aligner_sw.h" + +// static const size_t NBYTES_PER_REG = 16; +static const size_t NWORDS_PER_REG = 16; +// static const size_t NBITS_PER_WORD = 8; +static const size_t NBYTES_PER_WORD = 1; + +// In local mode, we start low (0) and go high (255). Factoring in a query +// profile involves unsigned saturating addition. All query profile elements +// should be expressed as a positive number; this is done by adding -min +// where min is the smallest (negative) score in the query profile. + +typedef uint8_t TCScore; + +/** + * Build query profile look up tables for the read. The query profile look + * up table is organized as a 1D array indexed by [i][j] where i is the + * reference character in the current DP column (0=A, 1=C, etc), and j is + * the segment of the query we're currently working on. + */ +void SwAligner::buildQueryProfileLocalSseU8(bool fw) { + bool& done = fw ? sseU8fwBuilt_ : sseU8rcBuilt_; + if(done) { + return; + } + done = true; + const BTDnaString* rd = fw ? rdfw_ : rdrc_; + const BTString* qu = fw ? qufw_ : qurc_; + const size_t len = rd->length(); + const size_t seglen = (len + (NWORDS_PER_REG-1)) / NWORDS_PER_REG; + // How many __m128i's are needed + size_t n128s = + 64 + // slack bytes, for alignment? + (seglen * ALPHA_SIZE) // query profile data + * 2; // & gap barrier data + assert_gt(n128s, 0); + SSEData& d = fw ? sseU8fw_ : sseU8rc_; + d.profbuf_.resizeNoCopy(n128s); + assert(!d.profbuf_.empty()); + d.maxPen_ = d.maxBonus_ = 0; + d.lastIter_ = d.lastWord_ = 0; + d.qprofStride_ = d.gbarStride_ = 2; + d.bias_ = 0; + // Calculate bias + for(size_t refc = 0; refc < ALPHA_SIZE; refc++) { + for(size_t i = 0; i < len; i++) { + int readc = (*rd)[i]; + int readq = (*qu)[i]; + int sc = sc_->score(readc, (int)(1 << refc), readq - 33); + if(sc < 0 && sc < d.bias_) { + d.bias_ = sc; + } + } + } + assert_leq(d.bias_, 0); + d.bias_ = -d.bias_; + // For each reference character A, C, G, T, N ... + for(size_t refc = 0; refc < ALPHA_SIZE; refc++) { + // For each segment ... + for(size_t i = 0; i < seglen; i++) { + size_t j = i; + uint8_t *qprofWords = + reinterpret_cast(d.profbuf_.ptr() + (refc * seglen * 2) + (i * 2)); + uint8_t *gbarWords = + reinterpret_cast(d.profbuf_.ptr() + (refc * seglen * 2) + (i * 2) + 1); + // For each sub-word (byte) ... + for(size_t k = 0; k < NWORDS_PER_REG; k++) { + int sc = 0; + *gbarWords = 0; + if(j < len) { + int readc = (*rd)[j]; + int readq = (*qu)[j]; + sc = sc_->score(readc, (int)(1 << refc), readq - 33); + assert_range(0, 255, sc + d.bias_); + size_t j_from_end = len - j - 1; + if(j < (size_t)sc_->gapbar || + j_from_end < (size_t)sc_->gapbar) + { + // Inside the gap barrier + *gbarWords = 0xff; + } + } + if(refc == 0 && j == len-1) { + // Remember which 128-bit word and which smaller word has + // the final row + d.lastIter_ = i; + d.lastWord_ = k; + } + if(sc < 0) { + if((size_t)(-sc) > d.maxPen_) { + d.maxPen_ = (size_t)(-sc); + } + } else { + if((size_t)sc > d.maxBonus_) { + d.maxBonus_ = (size_t)sc; + } + } + *qprofWords = (uint8_t)(sc + d.bias_); + gbarWords++; + qprofWords++; + j += seglen; // update offset into query + } + } + } +} + +#ifndef NDEBUG +/** + * Return true iff the cell has sane E/F/H values w/r/t its predecessors. + */ +static bool cellOkLocalU8( + SSEData& d, + size_t row, + size_t col, + int refc, + int readc, + int readq, + const Scoring& sc) // scoring scheme +{ + TCScore floorsc = 0; + TCScore ceilsc = 255 - d.bias_ - 1; + TAlScore offsetsc = 0; + TAlScore sc_h_cur = (TAlScore)d.mat_.helt(row, col); + TAlScore sc_e_cur = (TAlScore)d.mat_.eelt(row, col); + TAlScore sc_f_cur = (TAlScore)d.mat_.felt(row, col); + if(sc_h_cur > floorsc) { + sc_h_cur += offsetsc; + } + if(sc_e_cur > floorsc) { + sc_e_cur += offsetsc; + } + if(sc_f_cur > floorsc) { + sc_f_cur += offsetsc; + } + bool gapsAllowed = true; + size_t rowFromEnd = d.mat_.nrow() - row - 1; + if(row < (size_t)sc.gapbar || rowFromEnd < (size_t)sc.gapbar) { + gapsAllowed = false; + } + bool e_left_trans = false, h_left_trans = false; + bool f_up_trans = false, h_up_trans = false; + bool h_diag_trans = false; + if(gapsAllowed) { + TAlScore sc_h_left = floorsc; + TAlScore sc_e_left = floorsc; + TAlScore sc_h_up = floorsc; + TAlScore sc_f_up = floorsc; + if(col > 0 && sc_e_cur > floorsc && sc_e_cur <= ceilsc) { + sc_h_left = d.mat_.helt(row, col-1) + offsetsc; + sc_e_left = d.mat_.eelt(row, col-1) + offsetsc; + e_left_trans = (sc_e_left > floorsc && sc_e_cur == sc_e_left - sc.readGapExtend()); + h_left_trans = (sc_h_left > floorsc && sc_e_cur == sc_h_left - sc.readGapOpen()); + assert(e_left_trans || h_left_trans); + } + if(row > 0 && sc_f_cur > floorsc && sc_f_cur <= ceilsc) { + sc_h_up = d.mat_.helt(row-1, col) + offsetsc; + sc_f_up = d.mat_.felt(row-1, col) + offsetsc; + f_up_trans = (sc_f_up > floorsc && sc_f_cur == sc_f_up - sc.refGapExtend()); + h_up_trans = (sc_h_up > floorsc && sc_f_cur == sc_h_up - sc.refGapOpen()); + assert(f_up_trans || h_up_trans); + } + } else { + assert_geq(floorsc, sc_e_cur); + assert_geq(floorsc, sc_f_cur); + } + if(col > 0 && row > 0 && sc_h_cur > floorsc && sc_h_cur <= ceilsc) { + TAlScore sc_h_upleft = d.mat_.helt(row-1, col-1) + offsetsc; + TAlScore sc_diag = sc.score(readc, (int)refc, readq - 33); + h_diag_trans = sc_h_cur == sc_h_upleft + sc_diag; + } + assert( + sc_h_cur <= floorsc || + e_left_trans || + h_left_trans || + f_up_trans || + h_up_trans || + h_diag_trans || + sc_h_cur > ceilsc || + row == 0 || + col == 0); + return true; +} +#endif /*ndef NDEBUG*/ + +#ifdef NDEBUG + +#define assert_all_eq0(x) +#define assert_all_gt(x, y) +#define assert_all_gt_lo(x) +#define assert_all_lt(x, y) +#define assert_all_lt_hi(x) + +#else + +#define assert_all_eq0(x) { \ + __m128i z = _mm_setzero_si128(); \ + __m128i tmp = _mm_setzero_si128(); \ + z = _mm_xor_si128(z, z); \ + tmp = _mm_cmpeq_epi16(x, z); \ + assert_eq(0xffff, _mm_movemask_epi8(tmp)); \ +} + +#define assert_all_gt(x, y) { \ + __m128i tmp = _mm_cmpgt_epu8(x, y); \ + assert_eq(0xffff, _mm_movemask_epi8(tmp)); \ +} + +#define assert_all_gt_lo(x) { \ + __m128i z = _mm_setzero_si128(); \ + __m128i tmp = _mm_setzero_si128(); \ + z = _mm_xor_si128(z, z); \ + tmp = _mm_cmpgt_epu8(x, z); \ + assert_eq(0xffff, _mm_movemask_epi8(tmp)); \ +} + +#define assert_all_lt(x, y) { \ + __m128i z = _mm_setzero_si128(); \ + z = _mm_xor_si128(z, z); \ + __m128i tmp = _mm_subs_epu8(y, x); \ + tmp = _mm_cmpeq_epi16(tmp, z); \ + assert_eq(0x0000, _mm_movemask_epi8(tmp)); \ +} + +#define assert_all_lt_hi(x) { \ + __m128i z = _mm_setzero_si128(); \ + __m128i tmp = _mm_setzero_si128(); \ + z = _mm_cmpeq_epu8(z, z); \ + z = _mm_srli_epu8(z, 1); \ + tmp = _mm_cmplt_epu8(x, z); \ + assert_eq(0xffff, _mm_movemask_epi8(tmp)); \ +} +#endif + +/** + * Aligns by filling a dynamic programming matrix with the SSE-accelerated, + * banded DP approach of Farrar. As it goes, it determines which cells we + * might backtrace from and tallies the best (highest-scoring) N backtrace + * candidate cells per diagonal. Also returns the alignment score of the best + * alignment in the matrix. + * + * This routine does *not* maintain a matrix holding the entire matrix worth of + * scores, nor does it maintain any other dense O(mn) data structure, as this + * would quickly exhaust memory for queries longer than about 10,000 kb. + * Instead, in the fill stage it maintains two columns worth of scores at a + * time (current/previous, or right/left) - these take O(m) space. When + * finished with the current column, it determines which cells from the + * previous column, if any, are candidates we might backtrace from to find a + * full alignment. A candidate cell has a score that rises above the threshold + * and isn't improved upon by a match in the next column. The best N + * candidates per diagonal are stored in a O(m + n) data structure. + */ +TAlScore SwAligner::alignGatherLoc8(int& flag, bool debug) { + assert_leq(rdf_, rd_->length()); + assert_leq(rdf_, qu_->length()); + assert_lt(rfi_, rff_); + assert_lt(rdi_, rdf_); + assert_eq(rd_->length(), qu_->length()); + assert_geq(sc_->gapbar, 1); + assert_gt(minsc_, 0); + assert(repOk()); +#ifndef NDEBUG + for(size_t i = (size_t)rfi_; i < (size_t)rff_; i++) { + assert_range(0, 16, (int)rf_[i]); + } +#endif + + SSEData& d = fw_ ? sseU8fw_ : sseU8rc_; + SSEMetrics& met = extend_ ? sseU8ExtendMet_ : sseU8MateMet_; + if(!debug) met.dp++; + buildQueryProfileLocalSseU8(fw_); + assert(!d.profbuf_.empty()); + assert_gt(d.bias_, 0); + assert_lt(d.bias_, 127); + + assert_gt(d.maxBonus_, 0); + size_t iter = + (dpRows() + (NWORDS_PER_REG-1)) / NWORDS_PER_REG; // iter = segLen + + // Now set up the score vectors. We just need two columns worth, which + // we'll call "left" and "right". + d.vecbuf_.resize(ROWSTRIDE_2COL * iter * 2); + d.vecbuf_.zero(); + __m128i *vbuf_l = d.vecbuf_.ptr(); + __m128i *vbuf_r = d.vecbuf_.ptr() + (ROWSTRIDE_2COL * iter); + + // This is the data structure that holds candidate cells per diagonal. + const size_t ndiags = rff_ - rfi_ + dpRows() - 1; + if(!debug) { + btdiag_.init(ndiags, 2); + } + + // Data structure that holds checkpointed anti-diagonals + TAlScore perfectScore = sc_->perfectScore(dpRows()); + bool checkpoint = true; + bool cpdebug = false; +#ifndef NDEBUG + cpdebug = dpRows() < 1000; +#endif + cper_.init( + dpRows(), // # rows + rff_ - rfi_, // # columns + cperPerPow2_, // checkpoint every 1 << perpow2 diags (& next) + perfectScore, // perfect score (for sanity checks) + true, // matrix cells have 8-bit scores? + cperTri_, // triangular mini-fills? + true, // alignment is local? + cpdebug); // save all cells for debugging? + + // Many thanks to Michael Farrar for releasing his striped Smith-Waterman + // implementation: + // + // http://sites.google.com/site/farrarmichael/smith-waterman + // + // Much of the implmentation below is adapted from Michael's code. + + // Set all elts to reference gap open penalty + __m128i rfgapo = _mm_setzero_si128(); + __m128i rfgape = _mm_setzero_si128(); + __m128i rdgapo = _mm_setzero_si128(); + __m128i rdgape = _mm_setzero_si128(); + __m128i vlo = _mm_setzero_si128(); + __m128i vhi = _mm_setzero_si128(); + __m128i vmax = _mm_setzero_si128(); + __m128i vcolmax = _mm_setzero_si128(); + __m128i vmaxtmp = _mm_setzero_si128(); + __m128i ve = _mm_setzero_si128(); + __m128i vf = _mm_setzero_si128(); + __m128i vh = _mm_setzero_si128(); + __m128i vhd = _mm_setzero_si128(); + __m128i vhdtmp = _mm_setzero_si128(); + __m128i vtmp = _mm_setzero_si128(); + __m128i vzero = _mm_setzero_si128(); + __m128i vbias = _mm_setzero_si128(); + __m128i vbiasm1 = _mm_setzero_si128(); + __m128i vminsc = _mm_setzero_si128(); + + int dup; + + assert_gt(sc_->refGapOpen(), 0); + assert_leq(sc_->refGapOpen(), MAX_U8); + dup = (sc_->refGapOpen() << 8) | (sc_->refGapOpen() & 0x00ff); + rfgapo = _mm_insert_epi16(rfgapo, dup, 0); + rfgapo = _mm_shufflelo_epi16(rfgapo, 0); + rfgapo = _mm_shuffle_epi32(rfgapo, 0); + + // Set all elts to reference gap extension penalty + assert_gt(sc_->refGapExtend(), 0); + assert_leq(sc_->refGapExtend(), MAX_U8); + assert_leq(sc_->refGapExtend(), sc_->refGapOpen()); + dup = (sc_->refGapExtend() << 8) | (sc_->refGapExtend() & 0x00ff); + rfgape = _mm_insert_epi16(rfgape, dup, 0); + rfgape = _mm_shufflelo_epi16(rfgape, 0); + rfgape = _mm_shuffle_epi32(rfgape, 0); + + // Set all elts to read gap open penalty + assert_gt(sc_->readGapOpen(), 0); + assert_leq(sc_->readGapOpen(), MAX_U8); + dup = (sc_->readGapOpen() << 8) | (sc_->readGapOpen() & 0x00ff); + rdgapo = _mm_insert_epi16(rdgapo, dup, 0); + rdgapo = _mm_shufflelo_epi16(rdgapo, 0); + rdgapo = _mm_shuffle_epi32(rdgapo, 0); + + // Set all elts to read gap extension penalty + assert_gt(sc_->readGapExtend(), 0); + assert_leq(sc_->readGapExtend(), MAX_U8); + assert_leq(sc_->readGapExtend(), sc_->readGapOpen()); + dup = (sc_->readGapExtend() << 8) | (sc_->readGapExtend() & 0x00ff); + rdgape = _mm_insert_epi16(rdgape, dup, 0); + rdgape = _mm_shufflelo_epi16(rdgape, 0); + rdgape = _mm_shuffle_epi32(rdgape, 0); + + // Set all elts to minimum score threshold. Actually, to 1 less than the + // threshold so we can use gt instead of geq. + dup = (((int)minsc_ - 1) << 8) | (((int)minsc_ - 1) & 0x00ff); + vminsc = _mm_insert_epi16(vminsc, dup, 0); + vminsc = _mm_shufflelo_epi16(vminsc, 0); + vminsc = _mm_shuffle_epi32(vminsc, 0); + + dup = ((d.bias_ - 1) << 8) | ((d.bias_ - 1) & 0x00ff); + vbiasm1 = _mm_insert_epi16(vbiasm1, dup, 0); + vbiasm1 = _mm_shufflelo_epi16(vbiasm1, 0); + vbiasm1 = _mm_shuffle_epi32(vbiasm1, 0); + vhi = _mm_cmpeq_epi16(vhi, vhi); // all elts = 0xffff + vlo = _mm_xor_si128(vlo, vlo); // all elts = 0 + vmax = vlo; + + // Make a vector of bias offsets + dup = (d.bias_ << 8) | (d.bias_ & 0x00ff); + vbias = _mm_insert_epi16(vbias, dup, 0); + vbias = _mm_shufflelo_epi16(vbias, 0); + vbias = _mm_shuffle_epi32(vbias, 0); + + // Points to a long vector of __m128i where each element is a block of + // contiguous cells in the E, F or H matrix. If the index % 3 == 0, then + // the block of cells is from the E matrix. If index % 3 == 1, they're + // from the F matrix. If index % 3 == 2, then they're from the H matrix. + // Blocks of cells are organized in the same interleaved manner as they are + // calculated by the Farrar algorithm. + const __m128i *pvScore; // points into the query profile + + const size_t colstride = ROWSTRIDE_2COL * iter; + + // Initialize the H and E vectors in the first matrix column + __m128i *pvELeft = vbuf_l + 0; __m128i *pvERight = vbuf_r + 0; + /* __m128i *pvFLeft = vbuf_l + 1; */ __m128i *pvFRight = vbuf_r + 1; + __m128i *pvHLeft = vbuf_l + 2; __m128i *pvHRight = vbuf_r + 2; + + for(size_t i = 0; i < iter; i++) { + // start low in local mode + _mm_store_si128(pvERight, vlo); pvERight += ROWSTRIDE_2COL; + _mm_store_si128(pvHRight, vlo); pvHRight += ROWSTRIDE_2COL; + } + + assert_gt(sc_->gapbar, 0); + size_t nfixup = 0; + TAlScore matchsc = sc_->match(30); + TAlScore leftmax = MIN_I64; + + // Fill in the table as usual but instead of using the same gap-penalty + // vector for each iteration of the inner loop, load words out of a + // pre-calculated gap vector parallel to the query profile. The pre- + // calculated gap vectors enforce the gap barrier constraint by making it + // infinitely costly to introduce a gap in barrier rows. + // + // AND use a separate loop to fill in the first row of the table, enforcing + // the st_ constraints in the process. This is awkward because it + // separates the processing of the first row from the others and might make + // it difficult to use the first-row results in the next row, but it might + // be the simplest and least disruptive way to deal with the st_ constraint. + + size_t off = MAX_SIZE_T, lastoff; + bool bailed = false; + for(size_t i = (size_t)rfi_; i < (size_t)rff_; i++) { + // Swap left and right; vbuf_l is the vector on the left, which we + // generally load from, and vbuf_r is the vector on the right, which we + // generally store to. + swap(vbuf_l, vbuf_r); + pvELeft = vbuf_l + 0; pvERight = vbuf_r + 0; + /* pvFLeft = vbuf_l + 1; */ pvFRight = vbuf_r + 1; + pvHLeft = vbuf_l + 2; pvHRight = vbuf_r + 2; + + // Fetch this column's reference mask + const int refm = (int)rf_[i]; + + // Fetch the appropriate query profile + lastoff = off; + off = (size_t)firsts5[refm] * iter * 2; + pvScore = d.profbuf_.ptr() + off; // even elts = query profile, odd = gap barrier + + // Load H vector from the final row of the previous column. + // ??? perhaps we should calculate the next iter's F instead of the + // current iter's? The way we currently do it, seems like it will + // almost always require at least one fixup loop iter (to recalculate + // this topmost F). + vh = _mm_load_si128(pvHLeft + colstride - ROWSTRIDE_2COL); + + // Set all cells to low value + vf = _mm_xor_si128(vf, vf); + // vf now contains the vertical contribution + + // Store cells in F, calculated previously + // No need to veto ref gap extensions, they're all 0x00s + _mm_store_si128(pvFRight, vf); + pvFRight += ROWSTRIDE_2COL; + + // Shift down so that topmost (least sig) cell gets 0 + vh = _mm_slli_si128(vh, NBYTES_PER_WORD); + + // We pull out one loop iteration to make it easier to veto values in the top row + + // Load cells from E, calculated previously + ve = _mm_load_si128(pvELeft); + vhd = _mm_load_si128(pvHLeft); + assert_all_lt(ve, vhi); + pvELeft += ROWSTRIDE_2COL; + // ve now contains the horizontal contribution + + // Factor in query profile (matches and mismatches) + vh = _mm_adds_epu8(vh, pvScore[0]); + vh = _mm_subs_epu8(vh, vbias); + // vh now contains the diagonal contribution + + vhdtmp = vhd; + vhd = _mm_subs_epu8(vhd, rdgapo); + vhd = _mm_subs_epu8(vhd, pvScore[1]); // veto some read gap opens + ve = _mm_subs_epu8(ve, rdgape); + ve = _mm_max_epu8(ve, vhd); + + vh = _mm_max_epu8(vh, ve); + vf = vh; + + // Update highest score so far + vcolmax = vh; + + // Save the new vH values + _mm_store_si128(pvHRight, vh); + + vh = vhdtmp; + assert_all_lt(ve, vhi); + pvHRight += ROWSTRIDE_2COL; + pvHLeft += ROWSTRIDE_2COL; + + // Save E values + _mm_store_si128(pvERight, ve); + pvERight += ROWSTRIDE_2COL; + + // Update vf value + vf = _mm_subs_epu8(vf, rfgapo); + assert_all_lt(vf, vhi); + + pvScore += 2; // move on to next query profile + + // For each character in the reference text: + size_t j; + for(j = 1; j < iter; j++) { + // Load cells from E, calculated previously + ve = _mm_load_si128(pvELeft); + vhd = _mm_load_si128(pvHLeft); + assert_all_lt(ve, vhi); + pvELeft += ROWSTRIDE_2COL; + + // Store cells in F, calculated previously + vf = _mm_subs_epu8(vf, pvScore[1]); // veto some ref gap extensions + _mm_store_si128(pvFRight, vf); + pvFRight += ROWSTRIDE_2COL; + + // Factor in query profile (matches and mismatches) + vh = _mm_adds_epu8(vh, pvScore[0]); + vh = _mm_subs_epu8(vh, vbias); + + // Update H, factoring in E and F + vh = _mm_max_epu8(vh, vf); + + vhdtmp = vhd; + vhd = _mm_subs_epu8(vhd, rdgapo); + vhd = _mm_subs_epu8(vhd, pvScore[1]); // veto some read gap opens + ve = _mm_subs_epu8(ve, rdgape); + ve = _mm_max_epu8(ve, vhd); + + vh = _mm_max_epu8(vh, ve); + vtmp = vh; + + // Update highest score encountered this far + vcolmax = _mm_max_epu8(vcolmax, vh); + + // Save the new vH values + _mm_store_si128(pvHRight, vh); + + vh = vhdtmp; + + assert_all_lt(ve, vhi); + pvHRight += ROWSTRIDE_2COL; + pvHLeft += ROWSTRIDE_2COL; + + // Save E values + _mm_store_si128(pvERight, ve); + pvERight += ROWSTRIDE_2COL; + + // Update vf value + vtmp = _mm_subs_epu8(vtmp, rfgapo); + vf = _mm_subs_epu8(vf, rfgape); + assert_all_lt(vf, vhi); + vf = _mm_max_epu8(vf, vtmp); + + pvScore += 2; // move on to next query profile / gap veto + } + // pvHStore, pvELoad, pvEStore have all rolled over to the next column + pvFRight -= colstride; // reset to start of column + vtmp = _mm_load_si128(pvFRight); + + pvHRight -= colstride; // reset to start of column + vh = _mm_load_si128(pvHRight); + + pvScore = d.profbuf_.ptr() + off + 1; // reset veto vector + + // vf from last row gets shifted down by one to overlay the first row + // rfgape has already been subtracted from it. + vf = _mm_slli_si128(vf, NBYTES_PER_WORD); + + vf = _mm_subs_epu8(vf, *pvScore); // veto some ref gap extensions + vf = _mm_max_epu8(vtmp, vf); + // TODO: We're testing whether F changed. Can't we just assume that F + // did change and instead check whether H changed? Might save us from + // entering the fixup loop. + vtmp = _mm_subs_epu8(vf, vtmp); + vtmp = _mm_cmpeq_epi8(vtmp, vzero); + int cmp = _mm_movemask_epi8(vtmp); + + // If any element of vtmp is greater than H - gap-open... + j = 0; + while(cmp != 0xffff) { + // Store this vf + _mm_store_si128(pvFRight, vf); + pvFRight += ROWSTRIDE_2COL; + + // Update vh w/r/t new vf + vh = _mm_max_epu8(vh, vf); + + // Save vH values + _mm_store_si128(pvHRight, vh); + pvHRight += ROWSTRIDE_2COL; + + // Update highest score encountered so far. + vcolmax = _mm_max_epu8(vcolmax, vh); + + pvScore += 2; + + assert_lt(j, iter); + if(++j == iter) { + pvFRight -= colstride; + vtmp = _mm_load_si128(pvFRight); // load next vf ASAP + pvHRight -= colstride; + vh = _mm_load_si128(pvHRight); // load next vh ASAP + pvScore = d.profbuf_.ptr() + off + 1; + j = 0; + vf = _mm_slli_si128(vf, NBYTES_PER_WORD); + } else { + vtmp = _mm_load_si128(pvFRight); // load next vf ASAP + vh = _mm_load_si128(pvHRight); // load next vh ASAP + } + + // Update F with another gap extension + vf = _mm_subs_epu8(vf, rfgape); + vf = _mm_subs_epu8(vf, *pvScore); // veto some ref gap extensions + vf = _mm_max_epu8(vtmp, vf); + vtmp = _mm_subs_epu8(vf, vtmp); + vtmp = _mm_cmpeq_epi8(vtmp, vzero); + cmp = _mm_movemask_epi8(vtmp); + nfixup++; + } + + // Now we'd like to know exactly which cells in the left column are + // candidates we might backtrace from. First question is: did *any* + // elements in the column exceed the minimum score threshold? + if(!debug && leftmax >= minsc_) { + // Yes. Next question is: which cells are candidates? We have to + // allow matches in the right column to override matches above and + // to the left in the left column. + assert_gt(i - rfi_, 0); + pvHLeft = vbuf_l + 2; + assert_lt(lastoff, MAX_SIZE_T); + pvScore = d.profbuf_.ptr() + lastoff; // even elts = query profile, odd = gap barrier + for(size_t k = 0; k < iter; k++) { + vh = _mm_load_si128(pvHLeft); + vtmp = _mm_cmpgt_epi8(pvScore[0], vbiasm1); + int cmp = _mm_movemask_epi8(vtmp); + if(cmp != 0xffff) { + // At least one candidate in this mask. Now iterate + // through vm/vh to evaluate individual cells. + for(size_t m = 0; m < NWORDS_PER_REG; m++) { + size_t row = k + m * iter; + if(row >= dpRows()) { + break; + } + if(((TCScore *)&vtmp)[m] > 0 && ((TCScore *)&vh)[m] >= minsc_) { + TCScore sc = ((TCScore *)&vh)[m]; + assert_geq(sc, minsc_); + // Add to data structure holding all candidates + size_t col = i - rfi_ - 1; // -1 b/c prev col + size_t frombot = dpRows() - row - 1; + DpBtCandidate cand(row, col, sc); + btdiag_.add(frombot + col, cand); + } + } + } + pvHLeft += ROWSTRIDE_2COL; + pvScore += 2; + } + } + + // Save some elements to checkpoints + if(checkpoint) { + + __m128i *pvE = vbuf_r + 0; + __m128i *pvF = vbuf_r + 1; + __m128i *pvH = vbuf_r + 2; + size_t coli = i - rfi_; + if(coli < cper_.locol_) cper_.locol_ = coli; + if(coli > cper_.hicol_) cper_.hicol_ = coli; + if(cperTri_) { + // Checkpoint for triangular mini-fills + size_t rc_mod = coli & cper_.lomask_; + assert_lt(rc_mod, cper_.per_); + int64_t row = -(int64_t)rc_mod-1; + int64_t row_mod = row; + int64_t row_div = 0; + size_t idx = coli >> cper_.perpow2_; + size_t idxrow = idx * cper_.nrow_; + assert_eq(4, ROWSTRIDE_2COL); + bool done = false; + while(true) { + row += (cper_.per_ - 2); + row_mod += (cper_.per_ - 2); + for(size_t j = 0; j < 2; j++) { + row++; + row_mod++; + if(row >= 0 && (size_t)row < cper_.nrow_) { + // Update row divided by iter_ and mod iter_ + while(row_mod >= (int64_t)iter) { + row_mod -= (int64_t)iter; + row_div++; + } + size_t delt = idxrow + row; + size_t vecoff = (row_mod << 6) + row_div; + assert_lt(row_div, 16); + int16_t h_sc = ((uint8_t*)pvH)[vecoff]; + int16_t e_sc = ((uint8_t*)pvE)[vecoff]; + int16_t f_sc = ((uint8_t*)pvF)[vecoff]; + assert_leq(h_sc, cper_.perf_); + assert_leq(e_sc, cper_.perf_); + assert_leq(f_sc, cper_.perf_); + CpQuad *qdiags = ((j == 0) ? cper_.qdiag1s_.ptr() : cper_.qdiag2s_.ptr()); + qdiags[delt].sc[0] = h_sc; + qdiags[delt].sc[1] = e_sc; + qdiags[delt].sc[2] = f_sc; + } // if(row >= 0 && row < nrow_) + else if(row >= 0 && (size_t)row >= cper_.nrow_) { + done = true; + break; + } + } // for(size_t j = 0; j < 2; j++) + if(done) { + break; + } + idx++; + idxrow += cper_.nrow_; + } // while(true) + } else { + // Checkpoint for square mini-fills + + // If this is the first column, take this opportunity to + // pre-calculate the coordinates of the elements we're going to + // checkpoint. + if(coli == 0) { + size_t cpi = cper_.per_-1; + size_t cpimod = cper_.per_-1; + size_t cpidiv = 0; + cper_.commitMap_.clear(); + while(cpi < cper_.nrow_) { + while(cpimod >= iter) { + cpimod -= iter; + cpidiv++; + } + size_t vecoff = (cpimod << 6) + cpidiv; + cper_.commitMap_.push_back(vecoff); + cpi += cper_.per_; + cpimod += cper_.per_; + } + } + // Save all the rows + size_t rowoff = 0; + size_t sz = cper_.commitMap_.size(); + for(size_t i = 0; i < sz; i++, rowoff += cper_.ncol_) { + size_t vecoff = cper_.commitMap_[i]; + int16_t h_sc = ((uint8_t*)pvH)[vecoff]; + //int16_t e_sc = ((uint8_t*)pvE)[vecoff]; + int16_t f_sc = ((uint8_t*)pvF)[vecoff]; + assert_leq(h_sc, cper_.perf_); + //assert_leq(e_sc, cper_.perf_); + assert_leq(f_sc, cper_.perf_); + CpQuad& dst = cper_.qrows_[rowoff + coli]; + dst.sc[0] = h_sc; + //dst.sc[1] = e_sc; + dst.sc[2] = f_sc; + } + // Is this a column we'd like to checkpoint? + if((coli & cper_.lomask_) == cper_.lomask_) { + // Save the column using memcpys + assert_gt(coli, 0); + size_t wordspercol = cper_.niter_ * ROWSTRIDE_2COL; + size_t coloff = (coli >> cper_.perpow2_) * wordspercol; + __m128i *dst = cper_.qcols_.ptr() + coloff; + memcpy(dst, vbuf_r, sizeof(__m128i) * wordspercol); + } + } + if(cper_.debug_) { + // Save the column using memcpys + size_t wordspercol = cper_.niter_ * ROWSTRIDE_2COL; + size_t coloff = coli * wordspercol; + __m128i *dst = cper_.qcolsD_.ptr() + coloff; + memcpy(dst, vbuf_r, sizeof(__m128i) * wordspercol); + } + } + + // Store column maximum vector in first element of tmp + vmax = _mm_max_epu8(vmax, vcolmax); + + { + // Get single largest score in this column + vmaxtmp = vcolmax; + vtmp = _mm_srli_si128(vmaxtmp, 8); + vmaxtmp = _mm_max_epu8(vmaxtmp, vtmp); + vtmp = _mm_srli_si128(vmaxtmp, 4); + vmaxtmp = _mm_max_epu8(vmaxtmp, vtmp); + vtmp = _mm_srli_si128(vmaxtmp, 2); + vmaxtmp = _mm_max_epu8(vmaxtmp, vtmp); + vtmp = _mm_srli_si128(vmaxtmp, 1); + vmaxtmp = _mm_max_epu8(vmaxtmp, vtmp); + int score = _mm_extract_epi16(vmaxtmp, 0); + score = score & 0x00ff; + + // Could we have saturated? + if(score + d.bias_ >= 255) { + flag = -2; // yes + if(!debug) met.dpsat++; + return MIN_I64; + } + + if(score < minsc_) { + size_t ncolleft = rff_ - i - 1; + if(score + (TAlScore)ncolleft * matchsc < minsc_) { + // Bail! There can't possibly be a valid alignment that + // passes through this column. + bailed = true; + break; + } + } + + leftmax = score; + } + } + + lastoff = off; + + // Now we'd like to know exactly which cells in the *rightmost* column are + // candidates we might backtrace from. Did *any* elements exceed the + // minimum score threshold? + if(!debug && !bailed && leftmax >= minsc_) { + // Yes. Next question is: which cells are candidates? We have to + // allow matches in the right column to override matches above and + // to the left in the left column. + pvHLeft = vbuf_r + 2; + assert_lt(lastoff, MAX_SIZE_T); + pvScore = d.profbuf_.ptr() + lastoff; // even elts = query profile, odd = gap barrier + for(size_t k = 0; k < iter; k++) { + vh = _mm_load_si128(pvHLeft); + vtmp = _mm_cmpgt_epi8(pvScore[0], vbiasm1); + int cmp = _mm_movemask_epi8(vtmp); + if(cmp != 0xffff) { + // At least one candidate in this mask. Now iterate + // through vm/vh to evaluate individual cells. + for(size_t m = 0; m < NWORDS_PER_REG; m++) { + size_t row = k + m * iter; + if(row >= dpRows()) { + break; + } + if(((TCScore *)&vtmp)[m] > 0 && ((TCScore *)&vh)[m] >= minsc_) { + TCScore sc = ((TCScore *)&vh)[m]; + assert_geq(sc, minsc_); + // Add to data structure holding all candidates + size_t col = rff_ - rfi_ - 1; // -1 b/c prev col + size_t frombot = dpRows() - row - 1; + DpBtCandidate cand(row, col, sc); + btdiag_.add(frombot + col, cand); + } + } + } + pvHLeft += ROWSTRIDE_2COL; + pvScore += 2; + } + } + + // Find largest score in vmax + vtmp = _mm_srli_si128(vmax, 8); + vmax = _mm_max_epu8(vmax, vtmp); + vtmp = _mm_srli_si128(vmax, 4); + vmax = _mm_max_epu8(vmax, vtmp); + vtmp = _mm_srli_si128(vmax, 2); + vmax = _mm_max_epu8(vmax, vtmp); + vtmp = _mm_srli_si128(vmax, 1); + vmax = _mm_max_epu8(vmax, vtmp); + + // Update metrics + if(!debug) { + size_t ninner = (rff_ - rfi_) * iter; + met.col += (rff_ - rfi_); // DP columns + met.cell += (ninner * NWORDS_PER_REG); // DP cells + met.inner += ninner; // DP inner loop iters + met.fixup += nfixup; // DP fixup loop iters + } + + int score = _mm_extract_epi16(vmax, 0); + score = score & 0x00ff; + + flag = 0; + + // Could we have saturated? + if(score + d.bias_ >= 255) { + flag = -2; // yes + if(!debug) met.dpsat++; + return MIN_I64; + } + + // Did we find a solution? + if(score == MIN_U8 || score < minsc_) { + flag = -1; // no + if(!debug) met.dpfail++; + return (TAlScore)score; + } + + // Now take all the backtrace candidates in the btdaig_ structure and + // dump them into the btncand_ array. They'll be sorted later. + if(!debug) { + assert(!btdiag_.empty()); + btdiag_.dump(btncand_); + assert(!btncand_.empty()); + } + + // Return largest score + if(!debug) met.dpsucc++; + return (TAlScore)score; +} + +/** + * Solve the current alignment problem using SSE instructions that operate on 16 + * unsigned 8-bit values packed into a single 128-bit register. + */ +TAlScore SwAligner::alignNucleotidesLocalSseU8(int& flag, bool debug) { + assert_leq(rdf_, rd_->length()); + assert_leq(rdf_, qu_->length()); + assert_lt(rfi_, rff_); + assert_lt(rdi_, rdf_); + assert_eq(rd_->length(), qu_->length()); + assert_geq(sc_->gapbar, 1); + assert(repOk()); +#ifndef NDEBUG + for(size_t i = (size_t)rfi_; i < (size_t)rff_; i++) { + assert_range(0, 16, (int)rf_[i]); + } +#endif + + SSEData& d = fw_ ? sseU8fw_ : sseU8rc_; + SSEMetrics& met = extend_ ? sseU8ExtendMet_ : sseU8MateMet_; + if(!debug) met.dp++; + buildQueryProfileLocalSseU8(fw_); + assert(!d.profbuf_.empty()); + assert_geq(d.bias_, 0); + + assert_gt(d.maxBonus_, 0); + size_t iter = + (dpRows() + (NWORDS_PER_REG-1)) / NWORDS_PER_REG; // iter = segLen + + int dup; + + // Many thanks to Michael Farrar for releasing his striped Smith-Waterman + // implementation: + // + // http://sites.google.com/site/farrarmichael/smith-waterman + // + // Much of the implmentation below is adapted from Michael's code. + + // Set all elts to reference gap open penalty + __m128i rfgapo = _mm_setzero_si128(); + __m128i rfgape = _mm_setzero_si128(); + __m128i rdgapo = _mm_setzero_si128(); + __m128i rdgape = _mm_setzero_si128(); + __m128i vlo = _mm_setzero_si128(); + __m128i vhi = _mm_setzero_si128(); + __m128i vmax = _mm_setzero_si128(); + __m128i vcolmax = _mm_setzero_si128(); + __m128i vmaxtmp = _mm_setzero_si128(); + __m128i ve = _mm_setzero_si128(); + __m128i vf = _mm_setzero_si128(); + __m128i vh = _mm_setzero_si128(); + __m128i vtmp = _mm_setzero_si128(); + __m128i vzero = _mm_setzero_si128(); + __m128i vbias = _mm_setzero_si128(); + + assert_gt(sc_->refGapOpen(), 0); + assert_leq(sc_->refGapOpen(), MAX_U8); + dup = (sc_->refGapOpen() << 8) | (sc_->refGapOpen() & 0x00ff); + rfgapo = _mm_insert_epi16(rfgapo, dup, 0); + rfgapo = _mm_shufflelo_epi16(rfgapo, 0); + rfgapo = _mm_shuffle_epi32(rfgapo, 0); + + // Set all elts to reference gap extension penalty + assert_gt(sc_->refGapExtend(), 0); + assert_leq(sc_->refGapExtend(), MAX_U8); + assert_leq(sc_->refGapExtend(), sc_->refGapOpen()); + dup = (sc_->refGapExtend() << 8) | (sc_->refGapExtend() & 0x00ff); + rfgape = _mm_insert_epi16(rfgape, dup, 0); + rfgape = _mm_shufflelo_epi16(rfgape, 0); + rfgape = _mm_shuffle_epi32(rfgape, 0); + + // Set all elts to read gap open penalty + assert_gt(sc_->readGapOpen(), 0); + assert_leq(sc_->readGapOpen(), MAX_U8); + dup = (sc_->readGapOpen() << 8) | (sc_->readGapOpen() & 0x00ff); + rdgapo = _mm_insert_epi16(rdgapo, dup, 0); + rdgapo = _mm_shufflelo_epi16(rdgapo, 0); + rdgapo = _mm_shuffle_epi32(rdgapo, 0); + + // Set all elts to read gap extension penalty + assert_gt(sc_->readGapExtend(), 0); + assert_leq(sc_->readGapExtend(), MAX_U8); + assert_leq(sc_->readGapExtend(), sc_->readGapOpen()); + dup = (sc_->readGapExtend() << 8) | (sc_->readGapExtend() & 0x00ff); + rdgape = _mm_insert_epi16(rdgape, dup, 0); + rdgape = _mm_shufflelo_epi16(rdgape, 0); + rdgape = _mm_shuffle_epi32(rdgape, 0); + + vhi = _mm_cmpeq_epi16(vhi, vhi); // all elts = 0xffff + vlo = _mm_xor_si128(vlo, vlo); // all elts = 0 + vmax = vlo; + + // Make a vector of bias offsets + dup = (d.bias_ << 8) | (d.bias_ & 0x00ff); + vbias = _mm_insert_epi16(vbias, dup, 0); + vbias = _mm_shufflelo_epi16(vbias, 0); + vbias = _mm_shuffle_epi32(vbias, 0); + + // Points to a long vector of __m128i where each element is a block of + // contiguous cells in the E, F or H matrix. If the index % 3 == 0, then + // the block of cells is from the E matrix. If index % 3 == 1, they're + // from the F matrix. If index % 3 == 2, then they're from the H matrix. + // Blocks of cells are organized in the same interleaved manner as they are + // calculated by the Farrar algorithm. + const __m128i *pvScore; // points into the query profile + + d.mat_.init(dpRows(), rff_ - rfi_, NWORDS_PER_REG); + const size_t colstride = d.mat_.colstride(); + //const size_t rowstride = d.mat_.rowstride(); + assert_eq(ROWSTRIDE, colstride / iter); + + // Initialize the H and E vectors in the first matrix column + __m128i *pvHTmp = d.mat_.tmpvec(0, 0); + __m128i *pvETmp = d.mat_.evec(0, 0); + + for(size_t i = 0; i < iter; i++) { + _mm_store_si128(pvETmp, vlo); + _mm_store_si128(pvHTmp, vlo); // start low in local mode + pvETmp += ROWSTRIDE; + pvHTmp += ROWSTRIDE; + } + // These are swapped just before the innermost loop + __m128i *pvHStore = d.mat_.hvec(0, 0); + __m128i *pvHLoad = d.mat_.tmpvec(0, 0); + __m128i *pvELoad = d.mat_.evec(0, 0); + __m128i *pvEStore = d.mat_.evecUnsafe(0, 1); + __m128i *pvFStore = d.mat_.fvec(0, 0); + __m128i *pvFTmp = NULL; + + assert_gt(sc_->gapbar, 0); + size_t nfixup = 0; + TAlScore matchsc = sc_->match(30); + + // Fill in the table as usual but instead of using the same gap-penalty + // vector for each iteration of the inner loop, load words out of a + // pre-calculated gap vector parallel to the query profile. The pre- + // calculated gap vectors enforce the gap barrier constraint by making it + // infinitely costly to introduce a gap in barrier rows. + // + // AND use a separate loop to fill in the first row of the table, enforcing + // the st_ constraints in the process. This is awkward because it + // separates the processing of the first row from the others and might make + // it difficult to use the first-row results in the next row, but it might + // be the simplest and least disruptive way to deal with the st_ constraint. + + colstop_ = rff_ - rfi_; + lastsolcol_ = 0; + for(size_t i = (size_t)rfi_; i < (size_t)rff_; i++) { + assert(pvFStore == d.mat_.fvec(0, i - rfi_)); + assert(pvHStore == d.mat_.hvec(0, i - rfi_)); + + // Fetch this column's reference mask + const int refm = (int)rf_[i]; + + // Fetch the appropriate query profile + size_t off = (size_t)firsts5[refm] * iter * 2; + pvScore = d.profbuf_.ptr() + off; // even elts = query profile, odd = gap barrier + + // Load H vector from the final row of the previous column + vh = _mm_load_si128(pvHLoad + colstride - ROWSTRIDE); + + // Set all cells to low value + vf = _mm_xor_si128(vf, vf); + + // Store cells in F, calculated previously + // No need to veto ref gap extensions, they're all 0x00s + _mm_store_si128(pvFStore, vf); + pvFStore += ROWSTRIDE; + + // Shift down so that topmost (least sig) cell gets 0 + vh = _mm_slli_si128(vh, NBYTES_PER_WORD); + + // We pull out one loop iteration to make it easier to veto values in the top row + + // Load cells from E, calculated previously + ve = _mm_load_si128(pvELoad); + assert_all_lt(ve, vhi); + pvELoad += ROWSTRIDE; + + // Factor in query profile (matches and mismatches) + vh = _mm_adds_epu8(vh, pvScore[0]); + vh = _mm_subs_epu8(vh, vbias); + + // Update H, factoring in E and F + vh = _mm_max_epu8(vh, ve); + vh = _mm_max_epu8(vh, vf); + + // Update highest score so far + vcolmax = _mm_xor_si128(vcolmax, vcolmax); + vcolmax = _mm_max_epu8(vcolmax, vh); + + // Save the new vH values + _mm_store_si128(pvHStore, vh); + pvHStore += ROWSTRIDE; + + // Update vE value + vf = vh; + vh = _mm_subs_epu8(vh, rdgapo); + vh = _mm_subs_epu8(vh, pvScore[1]); // veto some read gap opens + ve = _mm_subs_epu8(ve, rdgape); + ve = _mm_max_epu8(ve, vh); + assert_all_lt(ve, vhi); + + // Load the next h value + vh = _mm_load_si128(pvHLoad); + pvHLoad += ROWSTRIDE; + + // Save E values + _mm_store_si128(pvEStore, ve); + pvEStore += ROWSTRIDE; + + // Update vf value + vf = _mm_subs_epu8(vf, rfgapo); + assert_all_lt(vf, vhi); + + pvScore += 2; // move on to next query profile + + // For each character in the reference text: + size_t j; + for(j = 1; j < iter; j++) { + // Load cells from E, calculated previously + ve = _mm_load_si128(pvELoad); + assert_all_lt(ve, vhi); + pvELoad += ROWSTRIDE; + + // Store cells in F, calculated previously + vf = _mm_subs_epu8(vf, pvScore[1]); // veto some ref gap extensions + _mm_store_si128(pvFStore, vf); + pvFStore += ROWSTRIDE; + + // Factor in query profile (matches and mismatches) + vh = _mm_adds_epu8(vh, pvScore[0]); + vh = _mm_subs_epu8(vh, vbias); + + // Update H, factoring in E and F + vh = _mm_max_epu8(vh, ve); + vh = _mm_max_epu8(vh, vf); + + // Update highest score encountered this far + vcolmax = _mm_max_epu8(vcolmax, vh); + + // Save the new vH values + _mm_store_si128(pvHStore, vh); + pvHStore += ROWSTRIDE; + + // Update vE value + vtmp = vh; + vh = _mm_subs_epu8(vh, rdgapo); + vh = _mm_subs_epu8(vh, pvScore[1]); // veto some read gap opens + ve = _mm_subs_epu8(ve, rdgape); + ve = _mm_max_epu8(ve, vh); + assert_all_lt(ve, vhi); + + // Load the next h value + vh = _mm_load_si128(pvHLoad); + pvHLoad += ROWSTRIDE; + + // Save E values + _mm_store_si128(pvEStore, ve); + pvEStore += ROWSTRIDE; + + // Update vf value + vtmp = _mm_subs_epu8(vtmp, rfgapo); + vf = _mm_subs_epu8(vf, rfgape); + assert_all_lt(vf, vhi); + vf = _mm_max_epu8(vf, vtmp); + + pvScore += 2; // move on to next query profile / gap veto + } + // pvHStore, pvELoad, pvEStore have all rolled over to the next column + pvFTmp = pvFStore; + pvFStore -= colstride; // reset to start of column + vtmp = _mm_load_si128(pvFStore); + + pvHStore -= colstride; // reset to start of column + vh = _mm_load_si128(pvHStore); + + pvEStore -= colstride; // reset to start of column + ve = _mm_load_si128(pvEStore); + + pvHLoad = pvHStore; // new pvHLoad = pvHStore + pvScore = d.profbuf_.ptr() + off + 1; // reset veto vector + + // vf from last row gets shifted down by one to overlay the first row + // rfgape has already been subtracted from it. + vf = _mm_slli_si128(vf, NBYTES_PER_WORD); + + vf = _mm_subs_epu8(vf, *pvScore); // veto some ref gap extensions + vf = _mm_max_epu8(vtmp, vf); + vtmp = _mm_subs_epu8(vf, vtmp); + vtmp = _mm_cmpeq_epi8(vtmp, vzero); + int cmp = _mm_movemask_epi8(vtmp); + + // If any element of vtmp is greater than H - gap-open... + j = 0; + while(cmp != 0xffff) { + // Store this vf + _mm_store_si128(pvFStore, vf); + pvFStore += ROWSTRIDE; + + // Update vh w/r/t new vf + vh = _mm_max_epu8(vh, vf); + + // Save vH values + _mm_store_si128(pvHStore, vh); + pvHStore += ROWSTRIDE; + + // Update highest score encountered this far + vcolmax = _mm_max_epu8(vcolmax, vh); + + // Update E in case it can be improved using our new vh + vh = _mm_subs_epu8(vh, rdgapo); + vh = _mm_subs_epu8(vh, *pvScore); // veto some read gap opens + ve = _mm_max_epu8(ve, vh); + _mm_store_si128(pvEStore, ve); + pvEStore += ROWSTRIDE; + pvScore += 2; + + assert_lt(j, iter); + if(++j == iter) { + pvFStore -= colstride; + vtmp = _mm_load_si128(pvFStore); // load next vf ASAP + pvHStore -= colstride; + vh = _mm_load_si128(pvHStore); // load next vh ASAP + pvEStore -= colstride; + ve = _mm_load_si128(pvEStore); // load next ve ASAP + pvScore = d.profbuf_.ptr() + off + 1; + j = 0; + vf = _mm_slli_si128(vf, NBYTES_PER_WORD); + } else { + vtmp = _mm_load_si128(pvFStore); // load next vf ASAP + vh = _mm_load_si128(pvHStore); // load next vh ASAP + ve = _mm_load_si128(pvEStore); // load next vh ASAP + } + + // Update F with another gap extension + vf = _mm_subs_epu8(vf, rfgape); + vf = _mm_subs_epu8(vf, *pvScore); // veto some ref gap extensions + vf = _mm_max_epu8(vtmp, vf); + vtmp = _mm_subs_epu8(vf, vtmp); + vtmp = _mm_cmpeq_epi8(vtmp, vzero); + cmp = _mm_movemask_epi8(vtmp); + nfixup++; + } + +#ifndef NDEBUG + if((rand() & 15) == 0) { + // This is a work-intensive sanity check; each time we finish filling + // a column, we check that each H, E, and F is sensible. + for(size_t k = 0; k < dpRows(); k++) { + assert(cellOkLocalU8( + d, + k, // row + i - rfi_, // col + refm, // reference mask + (int)(*rd_)[rdi_+k], // read char + (int)(*qu_)[rdi_+k], // read quality + *sc_)); // scoring scheme + } + } +#endif + + // Store column maximum vector in first element of tmp + vmax = _mm_max_epu8(vmax, vcolmax); + _mm_store_si128(d.mat_.tmpvec(0, i - rfi_), vcolmax); + + { + // Get single largest score in this column + vmaxtmp = vcolmax; + vtmp = _mm_srli_si128(vmaxtmp, 8); + vmaxtmp = _mm_max_epu8(vmaxtmp, vtmp); + vtmp = _mm_srli_si128(vmaxtmp, 4); + vmaxtmp = _mm_max_epu8(vmaxtmp, vtmp); + vtmp = _mm_srli_si128(vmaxtmp, 2); + vmaxtmp = _mm_max_epu8(vmaxtmp, vtmp); + vtmp = _mm_srli_si128(vmaxtmp, 1); + vmaxtmp = _mm_max_epu8(vmaxtmp, vtmp); + int score = _mm_extract_epi16(vmaxtmp, 0); + score = score & 0x00ff; + + // Could we have saturated? + if(score + d.bias_ >= 255) { + flag = -2; // yes + if(!debug) met.dpsat++; + return MIN_I64; + } + + if(score < minsc_) { + size_t ncolleft = rff_ - i - 1; + if(score + (TAlScore)ncolleft * matchsc < minsc_) { + // Bail! We're guaranteed not to see a valid alignment in + // the rest of the matrix + colstop_ = (i+1) - rfi_; + break; + } + } else { + lastsolcol_ = i - rfi_; + } + } + + // pvELoad and pvHLoad are already where they need to be + + // Adjust the load and store vectors here. + pvHStore = pvHLoad + colstride; + pvEStore = pvELoad + colstride; + pvFStore = pvFTmp; + } + + // Find largest score in vmax + vtmp = _mm_srli_si128(vmax, 8); + vmax = _mm_max_epu8(vmax, vtmp); + vtmp = _mm_srli_si128(vmax, 4); + vmax = _mm_max_epu8(vmax, vtmp); + vtmp = _mm_srli_si128(vmax, 2); + vmax = _mm_max_epu8(vmax, vtmp); + vtmp = _mm_srli_si128(vmax, 1); + vmax = _mm_max_epu8(vmax, vtmp); + + // Update metrics + if(!debug) { + size_t ninner = (rff_ - rfi_) * iter; + met.col += (rff_ - rfi_); // DP columns + met.cell += (ninner * NWORDS_PER_REG); // DP cells + met.inner += ninner; // DP inner loop iters + met.fixup += nfixup; // DP fixup loop iters + } + + int score = _mm_extract_epi16(vmax, 0); + score = score & 0x00ff; + + flag = 0; + + // Could we have saturated? + if(score + d.bias_ >= 255) { + flag = -2; // yes + if(!debug) met.dpsat++; + return MIN_I64; + } + + // Did we find a solution? + if(score == MIN_U8 || score < minsc_) { + flag = -1; // no + if(!debug) met.dpfail++; + return (TAlScore)score; + } + + // Return largest score + if(!debug) met.dpsucc++; + return (TAlScore)score; +} + +/** + * Given a filled-in DP table, populate the btncand_ list with candidate cells + * that might be at the ends of valid alignments. No need to do this unless + * the maximum score returned by the align*() func is >= the minimum. + * + * We needn't consider cells that have no chance of reaching any of the core + * diagonals. These are the cells that are more than 'maxgaps' cells away from + * a core diagonal. + * + * We need to be careful to consider that the rectangle might be truncated on + * one or both ends. + * + * The seed extend case looks like this: + * + * |Rectangle| 0: seed diagonal + * **OO0oo---- o: "RHS gap" diagonals + * -**OO0oo--- O: "LHS gap" diagonals + * --**OO0oo-- *: "LHS extra" diagonals + * ---**OO0oo- -: cells that can't possibly be involved in a valid + * ----**OO0oo alignment that overlaps one of the core diagonals + * + * The anchor-to-left case looks like this: + * + * |Anchor| | ---- Rectangle ---- | + * o---------OO0000000000000oo------ 0: mate diagonal (also core diags!) + * -o---------OO0000000000000oo----- o: "RHS gap" diagonals + * --o---------OO0000000000000oo---- O: "LHS gap" diagonals + * ---oo--------OO0000000000000oo--- *: "LHS extra" diagonals + * -----o--------OO0000000000000oo-- -: cells that can't possibly be + * ------o--------OO0000000000000oo- involved in a valid alignment that + * -------o--------OO0000000000000oo overlaps one of the core diagonals + * XXXXXXXXXXXXX + * | RHS Range | + * ^ ^ + * rl rr + * + * The anchor-to-right case looks like this: + * + * ll lr + * v v + * | LHS Range | + * XXXXXXXXXXXXX |Anchor| + * OO0000000000000oo--------o-------- 0: mate diagonal (also core diags!) + * -OO0000000000000oo--------o------- o: "RHS gap" diagonals + * --OO0000000000000oo--------o------ O: "LHS gap" diagonals + * ---OO0000000000000oo--------oo---- *: "LHS extra" diagonals + * ----OO0000000000000oo---------o--- -: cells that can't possibly be + * -----OO0000000000000oo---------o-- involved in a valid alignment that + * ------OO0000000000000oo---------o- overlaps one of the core diagonals + * | ---- Rectangle ---- | + */ +bool SwAligner::gatherCellsNucleotidesLocalSseU8(TAlScore best) { + // What's the minimum number of rows that can possibly be spanned by an + // alignment that meets the minimum score requirement? + assert(sse8succ_); + size_t bonus = (size_t)sc_->match(30); + const size_t ncol = lastsolcol_ + 1; + const size_t nrow = dpRows(); + assert_gt(nrow, 0); + btncand_.clear(); + btncanddone_.clear(); + SSEData& d = fw_ ? sseU8fw_ : sseU8rc_; + SSEMetrics& met = extend_ ? sseU8ExtendMet_ : sseU8MateMet_; + assert(!d.profbuf_.empty()); + //const size_t rowstride = d.mat_.rowstride(); + //const size_t colstride = d.mat_.colstride(); + size_t iter = (dpRows() + (NWORDS_PER_REG - 1)) / NWORDS_PER_REG; + assert_gt(iter, 0); + assert_geq(minsc_, 0); + assert_gt(bonus, 0); + size_t minrow = (size_t)(((minsc_ + bonus - 1) / bonus) - 1); + for(size_t j = 0; j < ncol; j++) { + // Establish the range of rows where a backtrace from the cell in this + // row/col is close enough to one of the core diagonals that it could + // conceivably count + size_t nrow_lo = MIN_SIZE_T; + size_t nrow_hi = nrow; + // First, check if there is a cell in this column with a score + // above the score threshold + __m128i vmax = *d.mat_.tmpvec(0, j); + __m128i vtmp = _mm_srli_si128(vmax, 8); + vmax = _mm_max_epu8(vmax, vtmp); + vtmp = _mm_srli_si128(vmax, 4); + vmax = _mm_max_epu8(vmax, vtmp); + vtmp = _mm_srli_si128(vmax, 2); + vmax = _mm_max_epu8(vmax, vtmp); + vtmp = _mm_srli_si128(vmax, 1); + vmax = _mm_max_epu8(vmax, vtmp); + int score = _mm_extract_epi16(vmax, 0); + score = score & 0x00ff; +#ifndef NDEBUG + { + // Start in upper vector row and move down + TAlScore max = 0; + __m128i *pvH = d.mat_.hvec(0, j); + for(size_t i = 0; i < iter; i++) { + for(size_t k = 0; k < NWORDS_PER_REG; k++) { + TAlScore sc = (TAlScore)((TCScore*)pvH)[k]; + if(sc > max) { + max = sc; + } + } + pvH += ROWSTRIDE; + } + assert_eq(max, score); + } +#endif + if((TAlScore)score < minsc_) { + // Scores in column aren't good enough + continue; + } + // Get pointer to first cell in column to examine: + __m128i *pvHorig = d.mat_.hvec(0, j); + __m128i *pvH = pvHorig; + // Get pointer to the vector in the following column that corresponds + // to the cells diagonally down and to the right from the cells in pvH + __m128i *pvHSucc = (j < ncol-1) ? d.mat_.hvec(0, j+1) : NULL; + // Start in upper vector row and move down + for(size_t i = 0; i < iter; i++) { + if(pvHSucc != NULL) { + pvHSucc += ROWSTRIDE; + if(i == iter-1) { + pvHSucc = d.mat_.hvec(0, j+1); + } + } + // Which elements of this vector are exhaustively scored? + size_t rdoff = i; + for(size_t k = 0; k < NWORDS_PER_REG; k++) { + // Is this row, col one that we can potential backtrace from? + // I.e. are we close enough to a core diagonal? + if(rdoff >= nrow_lo && rdoff < nrow_hi) { + // This cell has been exhaustively scored + if(rdoff >= minrow) { + // ... and it could potentially score high enough + TAlScore sc = (TAlScore)((TCScore*)pvH)[k]; + assert_leq(sc, best); + if(sc >= minsc_) { + // This is a potential solution + bool matchSucc = false; + int readc = (*rd_)[rdoff]; + int refc = rf_[j + rfi_]; + bool match = ((refc & (1 << readc)) != 0); + if(rdoff < dpRows()-1) { + int readcSucc = (*rd_)[rdoff+1]; + int refcSucc = rf_[j + rfi_ + 1]; + assert_range(0, 16, refcSucc); + matchSucc = ((refcSucc & (1 << readcSucc)) != 0); + } + if(match && !matchSucc) { + // Yes, this is legit + met.gathsol++; + btncand_.expand(); + btncand_.back().init(rdoff, j, sc); + } + } + } + } else { + // Already saw every element in the vector that's been + // exhaustively scored + break; + } + rdoff += iter; + } + pvH += ROWSTRIDE; + } + } + if(!btncand_.empty()) { + d.mat_.initMasks(); + } + return !btncand_.empty(); +} + +#define MOVE_VEC_PTR_UP(vec, rowvec, rowelt) { \ + if(rowvec == 0) { \ + rowvec += d.mat_.nvecrow_; \ + vec += d.mat_.colstride_; \ + rowelt--; \ + } \ + rowvec--; \ + vec -= ROWSTRIDE; \ +} + +#define MOVE_VEC_PTR_LEFT(vec, rowvec, rowelt) { vec -= d.mat_.colstride_; } + +#define MOVE_VEC_PTR_UPLEFT(vec, rowvec, rowelt) { \ + MOVE_VEC_PTR_UP(vec, rowvec, rowelt); \ + MOVE_VEC_PTR_LEFT(vec, rowvec, rowelt); \ +} + +#define MOVE_ALL_LEFT() { \ + MOVE_VEC_PTR_LEFT(cur_vec, rowvec, rowelt); \ + MOVE_VEC_PTR_LEFT(left_vec, left_rowvec, left_rowelt); \ + MOVE_VEC_PTR_LEFT(up_vec, up_rowvec, up_rowelt); \ + MOVE_VEC_PTR_LEFT(upleft_vec, upleft_rowvec, upleft_rowelt); \ +} + +#define MOVE_ALL_UP() { \ + MOVE_VEC_PTR_UP(cur_vec, rowvec, rowelt); \ + MOVE_VEC_PTR_UP(left_vec, left_rowvec, left_rowelt); \ + MOVE_VEC_PTR_UP(up_vec, up_rowvec, up_rowelt); \ + MOVE_VEC_PTR_UP(upleft_vec, upleft_rowvec, upleft_rowelt); \ +} + +#define MOVE_ALL_UPLEFT() { \ + MOVE_VEC_PTR_UPLEFT(cur_vec, rowvec, rowelt); \ + MOVE_VEC_PTR_UPLEFT(left_vec, left_rowvec, left_rowelt); \ + MOVE_VEC_PTR_UPLEFT(up_vec, up_rowvec, up_rowelt); \ + MOVE_VEC_PTR_UPLEFT(upleft_vec, upleft_rowvec, upleft_rowelt); \ +} + +#define NEW_ROW_COL(row, col) { \ + rowelt = row / d.mat_.nvecrow_; \ + rowvec = row % d.mat_.nvecrow_; \ + eltvec = (col * d.mat_.colstride_) + (rowvec * ROWSTRIDE); \ + cur_vec = d.mat_.matbuf_.ptr() + eltvec; \ + left_vec = cur_vec; \ + left_rowelt = rowelt; \ + left_rowvec = rowvec; \ + MOVE_VEC_PTR_LEFT(left_vec, left_rowvec, left_rowelt); \ + up_vec = cur_vec; \ + up_rowelt = rowelt; \ + up_rowvec = rowvec; \ + MOVE_VEC_PTR_UP(up_vec, up_rowvec, up_rowelt); \ + upleft_vec = up_vec; \ + upleft_rowelt = up_rowelt; \ + upleft_rowvec = up_rowvec; \ + MOVE_VEC_PTR_LEFT(upleft_vec, upleft_rowvec, upleft_rowelt); \ +} + +/** + * Given the dynamic programming table and a cell, trace backwards from the + * cell and install the edits and score/penalty in the appropriate fields + * of SwResult res, which contains an AlnRes. The RandomSource is used to + * break ties among equally good ways of tracing back. + * + * Upon entering a cell, we check if the read/ref coordinates of the cell + * correspond to a cell we traversed constructing a previous alignment. If so, + * we backtrack to the last decision point, mask out the path that led to the + * previously observed cell, and continue along a different path; or, if there + * are no more paths to try, we give up. + * + * An alignment found is subject to a filtering step designed to remove + * alignments that could spuriously trump a better alignment falling partially + * outside the rectangle. + * + * 1 + * 67890123456 0: seed diagonal + * **OO0oo---- o: right-hand "gap" diagonals: band of 'maxgap' diags + * -**OO0oo--- O: left-hand "gap" diagonals: band of 'maxgap' diags + * --**OO0oo-- *: "extra" diagonals: additional band of 'maxgap' diags + * ---**OO0oo- +: cells not in any of the above + * ----**OO0oo + * |-| + * Gotta touch one of these diags + * + * Basically, the filtering step removes alignments that do not at some point + * touch a cell labeled '0' or 'O' in the diagram above. + * + */ +bool SwAligner::backtraceNucleotidesLocalSseU8( + TAlScore escore, // in: expected score + SwResult& res, // out: store results (edits and scores) here + size_t& off, // out: store diagonal projection of origin + size_t& nbts, // out: # backtracks + size_t row, // start in this row + size_t col, // start in this column + RandomSource& rnd) // random gen, to choose among equal paths +{ + assert_lt(row, dpRows()); + assert_lt(col, (size_t)(rff_ - rfi_)); + SSEData& d = fw_ ? sseU8fw_ : sseU8rc_; + SSEMetrics& met = extend_ ? sseU8ExtendMet_ : sseU8MateMet_; + met.bt++; + assert(!d.profbuf_.empty()); + assert_lt(row, rd_->length()); + btnstack_.clear(); // empty the backtrack stack + btcells_.clear(); // empty the cells-so-far list + AlnScore score; score.score_ = 0; + score.gaps_ = score.ns_ = 0; + size_t origCol = col; + size_t gaps = 0, readGaps = 0, refGaps = 0; + res.alres.reset(); + EList& ned = res.alres.ned(); + assert(ned.empty()); + assert_gt(dpRows(), row); + size_t trimEnd = dpRows() - row - 1; + size_t trimBeg = 0; + size_t ct = SSEMatrix::H; // cell type + // Row and col in terms of where they fall in the SSE vector matrix + size_t rowelt, rowvec, eltvec; + size_t left_rowelt, up_rowelt, upleft_rowelt; + size_t left_rowvec, up_rowvec, upleft_rowvec; + __m128i *cur_vec, *left_vec, *up_vec, *upleft_vec; + NEW_ROW_COL(row, col); + while((int)row >= 0) { + met.btcell++; + nbts++; + int readc = (*rd_)[rdi_ + row]; + int refm = (int)rf_[rfi_ + col]; + int readq = (*qu_)[row]; + assert_leq(col, origCol); + // Get score in this cell + bool empty = false, reportedThru, canMoveThru, branch = false; + int cur = SSEMatrix::H; + if(!d.mat_.reset_[row]) { + d.mat_.resetRow(row); + } + reportedThru = d.mat_.reportedThrough(row, col); + canMoveThru = true; + if(reportedThru) { + canMoveThru = false; + } else { + empty = false; + if(row > 0) { + assert_gt(row, 0); + size_t rowFromEnd = d.mat_.nrow() - row - 1; + bool gapsAllowed = true; + if(row < (size_t)sc_->gapbar || + rowFromEnd < (size_t)sc_->gapbar) + { + gapsAllowed = false; + } + const int floorsc = 0; + const int offsetsc = 0; + // Move to beginning of column/row + if(ct == SSEMatrix::E) { // AKA rdgap + assert_gt(col, 0); + TAlScore sc_cur = ((TCScore*)(cur_vec + SSEMatrix::E))[rowelt] + offsetsc; + assert(gapsAllowed); + // Currently in the E matrix; incoming transition must come from the + // left. It's either a gap open from the H matrix or a gap extend from + // the E matrix. + // TODO: save and restore origMask as well as mask + int origMask = 0, mask = 0; + // Get H score of cell to the left + TAlScore sc_h_left = ((TCScore*)(left_vec + SSEMatrix::H))[left_rowelt] + offsetsc; + if(sc_h_left > 0 && sc_h_left - sc_->readGapOpen() == sc_cur) { + mask |= (1 << 0); + } + // Get E score of cell to the left + TAlScore sc_e_left = ((TCScore*)(left_vec + SSEMatrix::E))[left_rowelt] + offsetsc; + if(sc_e_left > 0 && sc_e_left - sc_->readGapExtend() == sc_cur) { + mask |= (1 << 1); + } + origMask = mask; + assert(origMask > 0 || sc_cur <= sc_->match()); + if(d.mat_.isEMaskSet(row, col)) { + mask = (d.mat_.masks_[row][col] >> 8) & 3; + } + if(mask == 3) { +#if 1 + // Pick H -> E cell + cur = SW_BT_OALL_READ_OPEN; + d.mat_.eMaskSet(row, col, 2); // might choose E later +#else + if(rnd.nextU2()) { + // Pick H -> E cell + cur = SW_BT_OALL_READ_OPEN; + d.mat_.eMaskSet(row, col, 2); // might choose E later + } else { + // Pick E -> E cell + cur = SW_BT_RDGAP_EXTEND; + d.mat_.eMaskSet(row, col, 1); // might choose H later + } +#endif + branch = true; + } else if(mask == 2) { + // I chose the E cell + cur = SW_BT_RDGAP_EXTEND; + d.mat_.eMaskSet(row, col, 0); // done + } else if(mask == 1) { + // I chose the H cell + cur = SW_BT_OALL_READ_OPEN; + d.mat_.eMaskSet(row, col, 0); // done + } else { + empty = true; + // It's empty, so the only question left is whether we should be + // allowed in terimnate in this cell. If it's got a valid score + // then we *shouldn't* be allowed to terminate here because that + // means it's part of a larger alignment that was already reported. + canMoveThru = (origMask == 0); + } + assert(!empty || !canMoveThru); + } else if(ct == SSEMatrix::F) { // AKA rfgap + assert_gt(row, 0); + assert(gapsAllowed); + TAlScore sc_h_up = ((TCScore*)(up_vec + SSEMatrix::H))[up_rowelt] + offsetsc; + TAlScore sc_f_up = ((TCScore*)(up_vec + SSEMatrix::F))[up_rowelt] + offsetsc; + TAlScore sc_cur = ((TCScore*)(cur_vec + SSEMatrix::F))[rowelt] + offsetsc; + // Currently in the F matrix; incoming transition must come from above. + // It's either a gap open from the H matrix or a gap extend from the F + // matrix. + // TODO: save and restore origMask as well as mask + int origMask = 0, mask = 0; + // Get H score of cell above + if(sc_h_up > floorsc && sc_h_up - sc_->refGapOpen() == sc_cur) { + mask |= (1 << 0); + } + // Get F score of cell above + if(sc_f_up > floorsc && sc_f_up - sc_->refGapExtend() == sc_cur) { + mask |= (1 << 1); + } + origMask = mask; + assert(origMask > 0 || sc_cur <= sc_->match()); + if(d.mat_.isFMaskSet(row, col)) { + mask = (d.mat_.masks_[row][col] >> 11) & 3; + } + if(mask == 3) { +#if 1 + // I chose the H cell + cur = SW_BT_OALL_REF_OPEN; + d.mat_.fMaskSet(row, col, 2); // might choose E later +#else + if(rnd.nextU2()) { + // I chose the H cell + cur = SW_BT_OALL_REF_OPEN; + d.mat_.fMaskSet(row, col, 2); // might choose E later + } else { + // I chose the F cell + cur = SW_BT_RFGAP_EXTEND; + d.mat_.fMaskSet(row, col, 1); // might choose E later + } +#endif + branch = true; + } else if(mask == 2) { + // I chose the F cell + cur = SW_BT_RFGAP_EXTEND; + d.mat_.fMaskSet(row, col, 0); // done + } else if(mask == 1) { + // I chose the H cell + cur = SW_BT_OALL_REF_OPEN; + d.mat_.fMaskSet(row, col, 0); // done + } else { + empty = true; + // It's empty, so the only question left is whether we should be + // allowed in terimnate in this cell. If it's got a valid score + // then we *shouldn't* be allowed to terminate here because that + // means it's part of a larger alignment that was already reported. + canMoveThru = (origMask == 0); + } + assert(!empty || !canMoveThru); + } else { + assert_eq(SSEMatrix::H, ct); + TAlScore sc_cur = ((TCScore*)(cur_vec + SSEMatrix::H))[rowelt] + offsetsc; + TAlScore sc_f_up = ((TCScore*)(up_vec + SSEMatrix::F))[up_rowelt] + offsetsc; + TAlScore sc_h_up = ((TCScore*)(up_vec + SSEMatrix::H))[up_rowelt] + offsetsc; + TAlScore sc_h_left = col > 0 ? (((TCScore*)(left_vec + SSEMatrix::H))[left_rowelt] + offsetsc) : floorsc; + TAlScore sc_e_left = col > 0 ? (((TCScore*)(left_vec + SSEMatrix::E))[left_rowelt] + offsetsc) : floorsc; + TAlScore sc_h_upleft = col > 0 ? (((TCScore*)(upleft_vec + SSEMatrix::H))[upleft_rowelt] + offsetsc) : floorsc; + TAlScore sc_diag = sc_->score(readc, refm, readq - 33); + // TODO: save and restore origMask as well as mask + int origMask = 0, mask = 0; + if(gapsAllowed) { + if(sc_h_up > floorsc && sc_cur == sc_h_up - sc_->refGapOpen()) { + mask |= (1 << 0); + } + if(sc_h_left > floorsc && sc_cur == sc_h_left - sc_->readGapOpen()) { + mask |= (1 << 1); + } + if(sc_f_up > floorsc && sc_cur == sc_f_up - sc_->refGapExtend()) { + mask |= (1 << 2); + } + if(sc_e_left > floorsc && sc_cur == sc_e_left - sc_->readGapExtend()) { + mask |= (1 << 3); + } + } + if(sc_h_upleft > floorsc && sc_cur == sc_h_upleft + sc_diag) { + mask |= (1 << 4); + } + origMask = mask; + assert(origMask > 0 || sc_cur <= sc_->match()); + if(d.mat_.isHMaskSet(row, col)) { + mask = (d.mat_.masks_[row][col] >> 2) & 31; + } + assert(gapsAllowed || mask == (1 << 4) || mask == 0); + int opts = alts5[mask]; + int select = -1; + if(opts == 1) { + select = firsts5[mask]; + assert_geq(mask, 0); + d.mat_.hMaskSet(row, col, 0); + } else if(opts > 1) { +#if 1 + if( (mask & 16) != 0) { + select = 4; // H diag + } else if((mask & 1) != 0) { + select = 0; // H up + } else if((mask & 4) != 0) { + select = 2; // F up + } else if((mask & 2) != 0) { + select = 1; // H left + } else if((mask & 8) != 0) { + select = 3; // E left + } +#else + select = randFromMask(rnd, mask); +#endif + assert_geq(mask, 0); + mask &= ~(1 << select); + assert(gapsAllowed || mask == (1 << 4) || mask == 0); + d.mat_.hMaskSet(row, col, mask); + branch = true; + } else { /* No way to backtrack! */ } + if(select != -1) { + if(select == 4) { + cur = SW_BT_OALL_DIAG; + } else if(select == 0) { + cur = SW_BT_OALL_REF_OPEN; + } else if(select == 1) { + cur = SW_BT_OALL_READ_OPEN; + } else if(select == 2) { + cur = SW_BT_RFGAP_EXTEND; + } else { + assert_eq(3, select) + cur = SW_BT_RDGAP_EXTEND; + } + } else { + empty = true; + // It's empty, so the only question left is whether we should be + // allowed in terimnate in this cell. If it's got a valid score + // then we *shouldn't* be allowed to terminate here because that + // means it's part of a larger alignment that was already reported. + canMoveThru = (origMask == 0); + } + } + assert(!empty || !canMoveThru || ct == SSEMatrix::H); + } + } + d.mat_.setReportedThrough(row, col); + assert_eq(gaps, Edit::numGaps(ned)); + assert_leq(gaps, rdgap_ + rfgap_); + // Cell was involved in a previously-reported alignment? + if(!canMoveThru) { + if(!btnstack_.empty()) { + // Remove all the cells from list back to and including the + // cell where the branch occurred + btcells_.resize(btnstack_.back().celsz); + // Pop record off the top of the stack + ned.resize(btnstack_.back().nedsz); + //aed.resize(btnstack_.back().aedsz); + row = btnstack_.back().row; + col = btnstack_.back().col; + gaps = btnstack_.back().gaps; + readGaps = btnstack_.back().readGaps; + refGaps = btnstack_.back().refGaps; + score = btnstack_.back().score; + ct = btnstack_.back().ct; + btnstack_.pop_back(); + assert(!sc_->monotone || score.score() >= escore); + NEW_ROW_COL(row, col); + continue; + } else { + // No branch points to revisit; just give up + res.reset(); + met.btfail++; // DP backtraces failed + return false; + } + } + assert(!reportedThru); + assert(!sc_->monotone || score.score() >= minsc_); + if(empty || row == 0) { + assert_eq(SSEMatrix::H, ct); + btcells_.expand(); + btcells_.back().first = row; + btcells_.back().second = col; + // This cell is at the end of a legitimate alignment + trimBeg = row; + assert_eq(btcells_.size(), dpRows() - trimBeg - trimEnd + readGaps); + break; + } + if(branch) { + // Add a frame to the backtrack stack + btnstack_.expand(); + btnstack_.back().init( + ned.size(), + 0, // aed.size() + btcells_.size(), + row, + col, + gaps, + readGaps, + refGaps, + score, + (int)ct); + } + btcells_.expand(); + btcells_.back().first = row; + btcells_.back().second = col; + switch(cur) { + // Move up and to the left. If the reference nucleotide in the + // source row mismatches the read nucleotide, penalize + // it and add a nucleotide mismatch. + case SW_BT_OALL_DIAG: { + assert_gt(row, 0); assert_gt(col, 0); + // Check for color mismatch + int readC = (*rd_)[row]; + int refNmask = (int)rf_[rfi_+col]; + assert_gt(refNmask, 0); + int m = matchesEx(readC, refNmask); + ct = SSEMatrix::H; + if(m != 1) { + Edit e( + (int)row, + mask2dna[refNmask], + "ACGTN"[readC], + EDIT_TYPE_MM); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + int pen = QUAL2(row, col); + score.score_ -= pen; + assert(!sc_->monotone || score.score() >= escore); + } else { + // Reward a match + int64_t bonus = sc_->match(30); + score.score_ += bonus; + assert(!sc_->monotone || score.score() >= escore); + } + if(m == -1) { + score.ns_++; + } + row--; col--; + MOVE_ALL_UPLEFT(); + assert(VALID_AL_SCORE(score)); + break; + } + // Move up. Add an edit encoding the ref gap. + case SW_BT_OALL_REF_OPEN: + { + assert_gt(row, 0); + Edit e( + (int)row, + '-', + "ACGTN"[(int)(*rd_)[row]], + EDIT_TYPE_REF_GAP); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + assert_geq(row, (size_t)sc_->gapbar); + assert_geq((int)(rdf_-rdi_-row-1), sc_->gapbar-1); + row--; + ct = SSEMatrix::H; + int pen = sc_->refGapOpen(); + score.score_ -= pen; + assert(!sc_->monotone || score.score() >= minsc_); + gaps++; refGaps++; + assert_eq(gaps, Edit::numGaps(ned)); + assert_leq(gaps, rdgap_ + rfgap_); + MOVE_ALL_UP(); + break; + } + // Move up. Add an edit encoding the ref gap. + case SW_BT_RFGAP_EXTEND: + { + assert_gt(row, 1); + Edit e( + (int)row, + '-', + "ACGTN"[(int)(*rd_)[row]], + EDIT_TYPE_REF_GAP); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + assert_geq(row, (size_t)sc_->gapbar); + assert_geq((int)(rdf_-rdi_-row-1), sc_->gapbar-1); + row--; + ct = SSEMatrix::F; + int pen = sc_->refGapExtend(); + score.score_ -= pen; + assert(!sc_->monotone || score.score() >= minsc_); + gaps++; refGaps++; + assert_eq(gaps, Edit::numGaps(ned)); + assert_leq(gaps, rdgap_ + rfgap_); + MOVE_ALL_UP(); + break; + } + case SW_BT_OALL_READ_OPEN: + { + assert_gt(col, 0); + Edit e( + (int)row+1, + mask2dna[(int)rf_[rfi_+col]], + '-', + EDIT_TYPE_READ_GAP); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + assert_geq(row, (size_t)sc_->gapbar); + assert_geq((int)(rdf_-rdi_-row-1), sc_->gapbar-1); + col--; + ct = SSEMatrix::H; + int pen = sc_->readGapOpen(); + score.score_ -= pen; + assert(!sc_->monotone || score.score() >= minsc_); + gaps++; readGaps++; + assert_eq(gaps, Edit::numGaps(ned)); + assert_leq(gaps, rdgap_ + rfgap_); + MOVE_ALL_LEFT(); + break; + } + case SW_BT_RDGAP_EXTEND: + { + assert_gt(col, 1); + Edit e( + (int)row+1, + mask2dna[(int)rf_[rfi_+col]], + '-', + EDIT_TYPE_READ_GAP); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + assert_geq(row, (size_t)sc_->gapbar); + assert_geq((int)(rdf_-rdi_-row-1), sc_->gapbar-1); + col--; + ct = SSEMatrix::E; + int pen = sc_->readGapExtend(); + score.score_ -= pen; + assert(!sc_->monotone || score.score() >= minsc_); + gaps++; readGaps++; + assert_eq(gaps, Edit::numGaps(ned)); + assert_leq(gaps, rdgap_ + rfgap_); + MOVE_ALL_LEFT(); + break; + } + default: throw 1; + } + } // while((int)row > 0) + assert_geq(col, 0); + assert_eq(SSEMatrix::H, ct); + // The number of cells in the backtracs should equal the number of read + // bases after trimming plus the number of gaps + assert_eq(btcells_.size(), dpRows() - trimBeg - trimEnd + readGaps); + // Check whether we went through a core diagonal and set 'reported' flag on + // each cell + bool overlappedCoreDiag = false; + for(size_t i = 0; i < btcells_.size(); i++) { + size_t rw = btcells_[i].first; + size_t cl = btcells_[i].second; + // Calculate the diagonal within the *trimmed* rectangle, i.e. the + // rectangle we dealt with in align, gather and backtrack. + int64_t diagi = cl - rw; + // Now adjust to the diagonal within the *untrimmed* rectangle by + // adding on the amount trimmed from the left. + diagi += rect_->triml; + if(diagi >= 0) { + size_t diag = (size_t)diagi; + if(diag >= rect_->corel && diag <= rect_->corer) { + overlappedCoreDiag = true; + break; + } + } +#ifndef NDEBUG + //assert(!d.mat_.reportedThrough(rw, cl)); + //d.mat_.setReportedThrough(rw, cl); + assert(d.mat_.reportedThrough(rw, cl)); +#endif + } + if(!overlappedCoreDiag) { + // Must overlap a core diagonal. Otherwise, we run the risk of + // reporting an alignment that overlaps (and trumps) a higher-scoring + // alignment that lies partially outside the dynamic programming + // rectangle. + res.reset(); + met.corerej++; + return false; + } + int readC = (*rd_)[rdi_+row]; // get last char in read + int refNmask = (int)rf_[rfi_+col]; // get last ref char ref involved in aln + assert_gt(refNmask, 0); + int m = matchesEx(readC, refNmask); + if(m != 1) { + Edit e((int)row, mask2dna[refNmask], "ACGTN"[readC], EDIT_TYPE_MM); + assert(e.repOk()); + assert(ned.empty() || ned.back().pos >= row); + ned.push_back(e); + score.score_ -= QUAL2(row, col); + assert_geq(score.score(), minsc_); + } else { + score.score_ += sc_->match(30); + } + if(m == -1) { + score.ns_++; + } + if(score.ns_ > nceil_) { + // Alignment has too many Ns in it! + res.reset(); + met.nrej++; + return false; + } + res.reverse(); + assert(Edit::repOk(ned, (*rd_))); + assert_eq(score.score(), escore); + assert_leq(gaps, rdgap_ + rfgap_); + off = col; + assert_lt(col + (size_t)rfi_, (size_t)rff_); + score.gaps_ = gaps; + res.alres.setScore(score); + res.alres.setShape( + refidx_, // ref id + off + rfi_ + rect_->refl, // 0-based ref offset + reflen_, // reference length + fw_, // aligned to Watson? + rdf_ - rdi_, // read length + 0, // read ID + true, // pretrim soft? + 0, // pretrim 5' end + 0, // pretrim 3' end + true, // alignment trim soft? + fw_ ? trimBeg : trimEnd, // alignment trim 5' end + fw_ ? trimEnd : trimBeg); // alignment trim 3' end + size_t refns = 0; + for(size_t i = col; i <= origCol; i++) { + if((int)rf_[rfi_+i] > 15) { + refns++; + } + } + res.alres.setRefNs(refns); + assert(Edit::repOk(ned, (*rd_), true, trimBeg, trimEnd)); + assert(res.repOk()); +#ifndef NDEBUG + size_t gapsCheck = 0; + for(size_t i = 0; i < ned.size(); i++) { + if(ned[i].isGap()) gapsCheck++; + } + assert_eq(gaps, gapsCheck); + BTDnaString refstr; + for(size_t i = col; i <= origCol; i++) { + refstr.append(firsts5[(int)rf_[rfi_+i]]); + } + BTDnaString editstr; + Edit::toRef((*rd_), ned, editstr, true, trimBeg, trimEnd); + if(refstr != editstr) { + cerr << "Decoded nucleotides and edits don't match reference:" << endl; + cerr << " score: " << score.score() + << " (" << gaps << " gaps)" << endl; + cerr << " edits: "; + Edit::print(cerr, ned); + cerr << endl; + cerr << " decoded nucs: " << (*rd_) << endl; + cerr << " edited nucs: " << editstr << endl; + cerr << " reference nucs: " << refstr << endl; + assert(0); + } +#endif + met.btsucc++; // DP backtraces succeeded + return true; +} diff --git a/alignment_3n.cpp b/alignment_3n.cpp new file mode 100644 index 0000000..68b1202 --- /dev/null +++ b/alignment_3n.cpp @@ -0,0 +1,193 @@ +/* + * Copyright 2020, Yun (Leo) Zhang + * + * This file is part of HISAT-3N. + * + * HISAT-3N is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT-3N is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT-3N. If not, see . + */ + +#include "alignment_3n.h" +#include "aln_sink.h" + + +/** + * return true if two location is concordant. + * return false, if there are not concordant or too far (>maxPairDistance). + */ +bool Alignment::isConcordant(long long int location1, bool &forward1, long long int readLength1, long long int location2, bool &forward2, long long int readLength2) { + if (forward1 == forward2) // same direction + { + return false; + } + // adjust the location of the start of the read + if (!forward1) + { + location1 = location1 + readLength1 - 1; + } + if (!forward2) + { + location2 = location2 + readLength2 - 1; + } + // return false if two reads are too far from each other + if (abs(location1-location2) > maxPairDistance) + { + return false; + } + + if (location1 == location2) + { + return true; + } + else if (location1 < location2) + { + if (forward1 && !forward2) + { + return true; + } + } + else + { + if (!forward1 && forward2) + { + return true; + } + } + return false; +} + +/** + * this is the basic function to calculate DNA pair score. + * if the distance between 2 alignments is more than penaltyFreeDistance_DNA, we reduce the score by the distance/100. + * if two alignment is concordant we add concordantScoreBounce to make sure to select the concordant pair as best pair. + */ +int Alignment::calculatePairScore_DNA (long long int &location0, int& AS0, bool& forward0, long long int readLength0, long long int &location1, int &AS1, bool &forward1, long long int readLength1, bool& concordant) { + + int score = ASPenalty*AS0 + ASPenalty*AS1; + int distance = abs(location0 - location1); + if (distance > maxPairDistance) { return numeric_limits::min(); } + if (distance > penaltyFreeDistance_DNA) { score -= distance/distancePenaltyFraction_DNA; } + concordant = isConcordant(location0, forward0, readLength0, location1, forward1, readLength1); + if (concordant) { score += concordantScoreBounce; } + return score; +} + +/** + * this is the basic function to calculate RNA pair score. + * if the distance between 2 alignments is more than penaltyFreeDistance_RNA, we reduce the score by the distance/1000. + * if two alignment is concordant we add concordantScoreBounce to make sure to select the concordant pair as best pair. + */ +int Alignment::calculatePairScore_RNA (long long int &location0, int& XM0, bool& forward0, long long int readLength0, long long int &location1, int &XM1, bool &forward1, long long int readLength1, bool& concordant) { + // this is the basic function to calculate pair score. + // if the distance between 2 alignment is more than 100,000, we reduce the score by the distance/1000. + // if two alignment is concordant we add 500,000 to make sure to select the concordant pair as best pair. + int score = -ASPenalty*XM0 + -ASPenalty*XM1; + int distance = abs(location0 - location1); + if (distance > maxPairDistance) { return numeric_limits::min(); } + if (distance > penaltyFreeDistance_RNA) { score -= distance/distancePenaltyFraction_RNA; } + concordant = isConcordant(location0, forward0, readLength0, location1, forward1, readLength1); + if (concordant) { score += concordantScoreBounce; } + return score; +} + +/** + * calculate the pairScore for a pair of alignment result. Output pair Score and number of pair. + * Do not update their pairScore. + */ +int Alignment::calculatePairScore(Alignment *inputAlignment, int &nPair) { + int pairScore = numeric_limits::min(); + nPair = 0; + if (pairSegment == inputAlignment->pairSegment){ + // when 2 alignment results are from same pair segment, output the lowest score and number of pair equal zero. + pairScore = numeric_limits::min(); + } else if (!mapped && !inputAlignment->mapped) { + // both unmapped. + pairScore = numeric_limits::min()/2 - 1; + } else if (!mapped || !inputAlignment->mapped) { + // one of the segment unmapped. + pairScore = numeric_limits::min()/2; + nPair = 1; + } else if ((!repeat && !inputAlignment->repeat)){ + // both mapped and (both non-repeat or not expand repeat) + bool concordant; + if (DNA) { + pairScore = calculatePairScore_DNA(location, + AS, + forward, + readSequence.length(), + inputAlignment->location, + inputAlignment->AS, + inputAlignment->forward, + inputAlignment->readSequence.length(), + concordant); + } else { + pairScore = calculatePairScore_RNA(location, + XM, + forward, + readSequence.length(), + inputAlignment->location, + inputAlignment->XM, + inputAlignment->forward, + inputAlignment->readSequence.length(), + concordant); + } + setConcordant(concordant); + inputAlignment->setConcordant(concordant); + nPair = 1; + } + return pairScore; +} + +void Alignments::reportStats_single(ReportingMetrics& met) { + + int nAlignment = alignmentPositions.nBestSingle; + if (nAlignment == 0) { + met.nunp_0++; + } else { + met.nunp_uni++; + if (nAlignment == 1) { met.nunp_uni1++; } + else { met.nunp_uni2++; } + } +} + +void Alignments::reportStats_paired(ReportingMetrics& met) { + if (!alignmentPositions.concordantExist) { + met.nconcord_0++; + if (alignmentPositions.nBestPair == 0) { + met.nunp_0_0 += 2; + return; + } + if (alignmentPositions.bestPairScore == numeric_limits::min()/2) { + // one mate is unmapped, one mate is mapped + met.nunp_0_0++; + met.nunp_0_uni++; + if (alignmentPositions.nBestPair == 1) { met.nunp_0_uni1++; } + else { met.nunp_0_uni2++; } + } else { //both mate is mapped + if (alignmentPositions.nBestPair == 1) { + met.ndiscord++; + return; + } + else { + met.nunp_0_uni += 2; + met.nunp_0_uni2 += 2; + } + } + } else { + assert(alignmentPositions.nBestPair > 0); + met.nconcord_uni++; + if (alignmentPositions.nBestPair == 1) { met.nconcord_uni1++; } + else { met.nconcord_uni2++; } + } +} + diff --git a/alignment_3n.h b/alignment_3n.h new file mode 100644 index 0000000..deefede --- /dev/null +++ b/alignment_3n.h @@ -0,0 +1,1214 @@ +/* + * Copyright 2020, Yun (Leo) Zhang + * + * This file is part of HISAT-3N. + * + * HISAT-3N is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT-3N is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT-3N. If not, see . + */ + +#ifndef HISAT2_ALIGNMENT_3N_H +#define HISAT2_ALIGNMENT_3N_H + +#include +#include +#include +#include +#include +#include "sstring.h" +#include "util.h" +#include "hisat2lib/ht2.h" +#include "read.h" +#include "outq.h" +#include "reference.h" +#include +#include +#include "position_3n.h" +#include "utility_3n.h" +#include "simple_func.h" + + +extern char usrInput_convertedFrom; +extern char usrInput_convertedTo; +extern char usrInput_convertedFromComplement; +extern char usrInput_convertedToComplement; +extern SimpleFunc scoreMin; // minimum valid score as function of read len +extern int penMmcMax; // max mm penalty + +extern char hs3N_convertedFrom; +extern char hs3N_convertedTo; +extern char hs3N_convertedFromComplement; +extern char hs3N_convertedToComplement; + +extern vector repeatHandles; +extern struct ht2_index_getrefnames_result *refNameMap; +extern int repeatLimit; +extern bool uniqueOutputOnly; +extern int directional3NMapping; + +using namespace std; + +struct ReportingMetrics; + +/** + * the data structure to store all information of one alignment result. + */ +class Alignment { +public: + // basic information + BTString readName; + int flag; + BTString chromosomeName; + int chromosomeIndex; // the chromosome index to use getStretch() function + long long int location; + BTString MAPQ; + BTString cigarString; + vector cigarSegments; + int cigarLength; // this is the length that read cover genome. + BTString pairToChromosome; + long long int pairToLocation; + long long int pairingDistance; + BTString readSequence; + BTString readQuality; + //tags + int AS; // alignment score + int NH; // number of alignment + int XM; // number of mismatch + int NM; // edit distance + int YS; // mate's AS + BTString MD; + BTString YT; //"UU" for single-end. "CP" for concordant alignment, "DP" for disconcordant alignment, "UP" for else. + // special tags in HISAT-3N + int Yf; // number of conversion. + int Zf; // number of unconverted base. + char YZ; // this tag shows alignment strand: check makeYZ function for the classification rule + // + for REF strand (conversionCount[0] is equal or smaller than conversionCount[1]), + // - for REF-RC strand (conversionCount[1] is smaller) + // unChanged tags + BTString unChangedTags; + BTString passThroughLine; // this is controlled by print_xr_ in SamConfig + + // for pairScore calculation + static const int maxPairDistance = 500000; + static const int penaltyFreeDistance_DNA = 1000; + static const int penaltyFreeDistance_RNA = 100000; + static const int distancePenaltyFraction_DNA = 100; + static const int distancePenaltyFraction_RNA = 1000; + static const int ASPenalty = 100; + static const int concordantScoreBounce = 500000; + + // intermediate variable + bool outputted = false; // whether the alignment is outputted. + bool DNA = false; + int cycle_3N; // indicate which cycle_3N make this alignment result. 0 or 3 for repeatHandles[0], else repeatHandles[1] + bool paired; + bool forward; + bool mapped; + bool concordant; + int64_t MinimumScore; + int pairSegment; // 0 for first segment, 1 for second segment. + struct ht2_repeat_expand_result *repeatResult = nullptr; + int pairScore; // to identify the better pair + bool mateMapped; // to adjust the YT tag + bool repeat; + bool pairToRepeat; + RepeatMappingPositions repeatPositions; // to store the expanded repeat information + int conversionCount[2] = {0}; // there are two type of conversion could happen, save the number of conversion separately. + int unConversionCount[2] = {0}; // save the unconverted base count. + string intToBase = "ACGTN"; + + void initialize() { + readName.clear(); + flag = -1; + chromosomeName.clear(); + chromosomeIndex = -1; + location = 0; + MAPQ.clear(); + cigarString.clear(); + cigarSegments.clear(); + cigarLength = 0; + pairToChromosome.clear(); + pairToLocation = 0; + pairingDistance = 0; + readSequence.clear(); + readQuality.clear(); + + AS = numeric_limits::min(); + NH = 0; + XM = 0; + NM = 0; + YS = 0; + MD.clear(); + YT.clear(); + Yf = 0; + Zf = 0; + unChangedTags.clear(); + + outputted = false; + DNA = false; + cycle_3N = -1; + paired = false; + forward = false; + mapped = false; + concordant = false; + pairSegment = 0; + if (repeatResult != nullptr) { + free(repeatResult); + repeatResult = nullptr; + } + pairScore = numeric_limits::min(); + mateMapped = false; + + repeat = false; + pairToRepeat = false; + repeatPositions.initialize(); + conversionCount[0] = 0; + conversionCount[1] = 0; + unConversionCount[0] = 0; + unConversionCount[1] = 0; + passThroughLine.clear(); + } + + Alignment() { + initialize(); + } + + ~Alignment() { + if (repeatResult != nullptr) free(repeatResult); + } + + /** + * change YS tag for output. + */ + void setYS (Alignment* input) { + YS = input->AS; + } + + void setYS(RepeatMappingPosition* input) { + YS = input->AS; + } + + /** + * change concordant status and flag + */ + void setConcordant(bool concordant_) { + concordant = concordant_; + if (concordant) { + flag |= 2; + } else { + flag &= ~((int)2); + } + } + + /** + * change mateMapped status and flag + */ + void setMateMappingFlag(long long int *mateLocation) { + if (mateLocation == NULL) { return; } + mateMapped = *mateLocation != 0; + if ((flag&8) && mateMapped) flag -= 8; + else if (!(flag&8) && !mateMapped) flag += 8; + } + + /** + * change YT tag base on mateMapped and concordant information. + */ + void setYT() { + if (paired) { + if (!mateMapped) { + YT = "UP"; + return; + } + if (concordant) { YT = "CP"; } + else { YT = "DP"; } + } else { + YT = "UU"; + } + } + + /** + * update NH and MAPQ by number of alignment. + */ + void updateNH(int nAlignment) { + if (!mapped) return; + NH = nAlignment; + if (nAlignment == 0) return; + else if (nAlignment == 1) MAPQ = "60"; + else MAPQ = "1"; + } + + /** + * extract information from flag, change flag to secondary alignment. + */ + void extractFlagInfo() { + paired = (flag & 1) != 0; + forward = (flag & 16) == 0; + if ((flag & 256) == 0) { // change all read to secondary alignment + flag += 256; + } + mapped = (flag & 4) == 0; + if (flag & 128) { + pairSegment = 1; + } else { + pairSegment = 0; // it could be the first pair segment or it is single read. + } + concordant = (flag & 2) != 0; + if (!mapped) { + repeat = false; + } + MinimumScore = scoreMin.f(readSequence.length()); + } + + /** + * calculate the pairScore for a pair of alignment result. Output pair Score and number of pair. + * Do not update their pairScore. + */ + int calculatePairScore(Alignment *inputAlignment, int &nPair); + + /** + * make YZ tag. + * if (no conversion) or (conversion type 0 == conversion type 1) or (directional mapping): + * if the (pairSegment is 0) && (forward) => YZ = REF (+) + * if the (pairSegment is 0) && (reverse) => YZ = REF-RC (-) + * if the (pairSegment is 1) && (forward) => YZ = REF-RC (-) + * if the (pairSegment is 1) && (reverse) => YZ = REF (+) + * if the conversion type 0 is less, the read is mapped to REF (+). + * if the conversion type 1 is less, the read is mapped to REF-RC (-). + */ + void makeYZ(char &YZ_string) { + if (directional3NMapping == 2){ + if (pairSegment == 0 && forward) { + YZ_string = '-'; + } else if (pairSegment == 0 && !forward) { + YZ_string = '+'; + } else if (pairSegment == 1 && forward) { + YZ_string = '+'; + } else if (pairSegment == 1 && !forward) { + YZ_string = '-'; + } + } else if (directional3NMapping == 1 || (conversionCount[0] == 0 && conversionCount[1] == 0) || conversionCount[0] == conversionCount[1]){ + if (pairSegment == 0 && forward) { + YZ_string = '+'; + } else if (pairSegment == 0 && !forward) { + YZ_string = '-'; + } else if (pairSegment == 1 && forward) { + YZ_string = '-'; + } else if (pairSegment == 1 && !forward) { + YZ_string = '+'; + } + } else if (conversionCount[0] >= conversionCount[1]) { + YZ_string = '+'; + } else { + YZ_string = '-'; + } + } + + /** + * make Yf tag, Yf tag is to count the number of conversion in the string. + * use YZ tag to decide which type of conversion is legal conversion. + */ + int makeYf(char YZTag) { + int outYf = -1; + if (YZTag == '+'){ + outYf = conversionCount[0]; + } else if (YZTag == '-'){ + outYf = conversionCount[1]; + } + assert(outYf >= 0); + return outYf; + } + + /** + * make Zf tag, Zf tag is to count the number of un-converted base in the string. + * use YZ tag to decide which type of conversion is legal conversion. + */ + int makeZf(char YZTag) { + int outZf = -1; + if (YZTag == '+'){ + outZf = unConversionCount[0]; + } else if (YZTag == '-'){ + outZf = unConversionCount[1]; + } + assert (outZf >= 0); + return outZf; + } + + /** + * expand the repeat mapping location and construct MD for each location. + * return ture if there is any mapping location pass the filter, else, false. + */ + bool constructRepeatMD(BitPairReference* bitReference, MappingPositions &alignmentPositions) { + + if (!mapped) { + return true; + } + + // expand the repeat locations + ht2_error_t err = ht2_repeat_expand((cycle_3N == 0 || cycle_3N == 3) ? repeatHandles[0] : repeatHandles[1], + chromosomeName.toZBuf(), + location - 1, + readSequence.length(), + &repeatResult); + + BTString chromosomeRepeat; + long long int locationRepeat; + for (int i = 0; i < repeatResult->count; i++) { + struct ht2_position *pos = &repeatResult->positions[i]; + chromosomeRepeat = refNameMap->names[pos->chr_id]; + for (int j = 0; j < chromosomeRepeat.length(); j++) { + if (chromosomeRepeat[j] == ' ') { + chromosomeRepeat.trimEnd(chromosomeRepeat.length() - j); + break; + } + } + locationRepeat = (pos->pos) + 1; + bool genomeForward = pos->direction == 0; + if (!genomeForward) { continue; } // if the repeat mapping direction is different to the designed direction, ignore it. + + // if the mapping location is already exist, continue. + if (alignmentPositions.positionExist(chromosomeRepeat, locationRepeat, pairSegment)){ + continue; + } + + // get reference sequence + ASSERT_ONLY(SStringExpandable destU32); + SStringExpandable raw_refbuf; + raw_refbuf.resize(cigarLength + 16); + raw_refbuf.clear(); + int off = bitReference->getStretch( + reinterpret_cast(raw_refbuf.wbuf()), + (size_t)pos->chr_id, + (size_t)max(locationRepeat-1, 0), + (size_t)cigarLength ASSERT_ONLY(, destU32)); + char* refSeq = raw_refbuf.wbuf() + off; + BTString refSequence; + refSequence.resize(cigarLength); + for (int j = 0; j < cigarLength; j++) { + refSequence.set(intToBase[*(refSeq + j)], j); + } + + // check whether the refSequence is exist. if do, directly append the repeat. + int repeatPositionsIndex; + if (repeatPositions.sequenceExist(refSequence, repeatPositionsIndex)) { + repeatPositions.append(chromosomeRepeat, locationRepeat, repeatPositionsIndex); + continue; + } + + BTString newMD; + int newMismatch = 0; + char repeatYZ; + int repeatYf; + int repeatZf; + if (!constructRepeatMD(refSequence, newMD, newMismatch, repeatYf, repeatZf, repeatYZ)) { + continue; + } + + int newXM = XM + newMismatch; + int newNM = NM + newMismatch; + int newAS = AS - penMmcMax * newMismatch; + if (newAS < MinimumScore) + { + continue; + } + repeatPositions.append(locationRepeat, chromosomeRepeat, refSequence,newAS, newMD, newXM, newNM, repeatYf, repeatZf, repeatYZ); + + // if there are too many mappingPosition exist return. + if (repeatPositions.size() >= repeatLimit || alignmentPositions.size() > repeatLimit) { + return true; + } + } + if (repeatPositions.size() == 0) { + return false; + } else { + return true; + } + } + + /** + * for each repeat mapping position, construct its MD + * return true if the mapping result does not have a lot of mismatch, else return false. + */ + bool constructRepeatMD(BTString &refSeq, BTString &newMD_String, int &newMismatch, int& repeatYf, int& repeatZf, char &repeatYZ) { + char buf[1024]; + + conversionCount[0] = 0; + conversionCount[1] = 0; + unConversionCount[0] = 0; + unConversionCount[1] = 0; + + int readPos = 0; + long long int refPos = 0; + int count = 0; + int newXM = 0; + + char cigarSymbol; + int cigarLen; + for (int i = 0; i < cigarSegments.size(); i++) { + cigarSymbol = cigarSegments[i].getLabel(); + cigarLen = cigarSegments[i].getLen(); + + if (cigarSymbol == 'S') { + readPos += cigarLen; + } else if (cigarSymbol == 'N') { + refPos += cigarLen; + } else if (cigarSymbol == 'M') { + for (int j = 0; j < cigarLen; j++) { + char readChar = readSequence[readPos]; + char refChar = refSeq[refPos]; + if (readChar == refChar) { + if (refChar == usrInput_convertedFrom) + { + unConversionCount[0]++; + } + else if (refChar == usrInput_convertedFromComplement) + { + unConversionCount[1]++; + } + count++; + } else {// mismatch + // output matched count + if (count != 0) { + itoa10(count, buf); + newMD_String.append(buf); + count = 0; + } + // output mismatch + if (!newMD_String.empty() && isalpha(newMD_String[newMD_String.length()-1])) { + newMD_String.append('0'); + } + if ((readChar == usrInput_convertedTo) && (refChar == usrInput_convertedFrom)) { + conversionCount[0]++; + } else if ((readChar == usrInput_convertedToComplement) && (refChar == usrInput_convertedFromComplement)) { + conversionCount[1]++; + } else { + // real mismatch + newXM++; + } + newMD_String.append(refChar); + } + readPos++; + refPos++; + } + } else if (cigarSymbol == 'I') { + readPos += cigarLen; + } else if (cigarSymbol == 'D') { + newMD_String.append('^'); + for (int j = 0; j < cigarLen; j++) { + newMD_String.append(refSeq[refPos]); + refPos++; + } + } + } + + if (count != 0) { + itoa10(count, buf); + newMD_String.append(buf); + } + if (isalpha(newMD_String[0])) { newMD_String.insert('0', 0); } + if (isalpha(newMD_String[newMD_String.length()-1])) { newMD_String.append('0'); } + + makeYZ(repeatYZ); + int badConversion = 0; + // identify the bad conversion number based on repeatYZ; + if (repeatYZ == '+') { + badConversion = conversionCount[1]; + } else { + badConversion = conversionCount[0]; + } + + repeatYf = makeYf(repeatYZ); + repeatZf = makeZf(repeatZf); + + newXM += badConversion; + newMismatch = newXM - XM; + + if (newMismatch < 0){ + newMismatch = 0; + } + + return true; + } + + /** + * for each non-repeat mapping position, construct its MD + * return true if the mapping result does not have a lot of mismatch, else return false. + */ + bool constructMD(BitPairReference* bitReference) { + if (!mapped) { + return true; + } + char buf[1024]; + MD.clear(); + + ASSERT_ONLY(SStringExpandable destU32); + SStringExpandable raw_refbuf; + raw_refbuf.resize(cigarLength + 16); + raw_refbuf.clear(); + int off = bitReference->getStretch( + reinterpret_cast(raw_refbuf.wbuf()), + (size_t)chromosomeIndex, + (size_t)max(location-1, 0), + (size_t)cigarLength ASSERT_ONLY(, destU32)); + char* refSeq = raw_refbuf.wbuf() + off; + + int readPos = 0; + long long int refPos = 0; + int count = 0; + int newXM = 0; + + char cigarSymbol; + int cigarLen; + for (int i = 0; i < cigarSegments.size(); i++) { + cigarSymbol = cigarSegments[i].getLabel(); + cigarLen = cigarSegments[i].getLen(); + if (cigarSymbol == 'S') { + readPos += cigarLen; + } else if (cigarSymbol == 'N') { + refPos += cigarLen; + } else if (cigarSymbol == 'M') { + for (int j = 0; j < cigarLen; j++) { + char readChar = readSequence[readPos]; + char refChar = intToBase[*(refSeq + refPos)]; + if (readChar == refChar) { + if (refChar == usrInput_convertedFrom) + { + unConversionCount[0]++; + } + else if (refChar == usrInput_convertedFromComplement) + { + unConversionCount[1]++; + } + count++; + } else {// mismatch + // output matched count + if (count != 0) { + itoa10(count, buf); + MD.append(buf); + count = 0; + } + // output mismatch + if (!MD.empty() && isalpha(MD[MD.length()-1])) { + MD.append('0'); + } + + if ((readChar == usrInput_convertedTo) && (refChar == usrInput_convertedFrom)) { + conversionCount[0]++; + } else if ((readChar == usrInput_convertedToComplement) && (refChar == usrInput_convertedFromComplement)) { + conversionCount[1]++; + } else { + // real mismatch + newXM++; + } + MD.append(refChar); + } + readPos++; + refPos++; + } + } else if (cigarSymbol == 'I') { + readPos += cigarLen; + } else if (cigarSymbol == 'D') { + if (count != 0) { + itoa10(count, buf); + MD.append(buf); + count = 0; + } + MD.append('^'); + for (int j = 0; j < cigarLen; j++) { + MD.append(intToBase[*(refSeq + refPos)]); + refPos++; + } + } + } + + if (count != 0) { + itoa10(count, buf); + MD.append(buf); + } + if (isalpha(MD[0])) { MD.insert('0', 0); } + if (isalpha(MD[MD.length()-1])) { MD.append('0'); } + + makeYZ(YZ); + int badConversion = 0; + // identify the bad conversion number based on YZ tag; + if (YZ == '+') { + badConversion = conversionCount[1]; + } else { + badConversion = conversionCount[0]; + } + Yf = makeYf(YZ); + Zf = makeZf(YZ); + + newXM += badConversion; + newXM -= XM; + + if (newXM < 0){ + newXM = 0; + } + + + NM += newXM; + XM += newXM; + AS = AS - penMmcMax * newXM; + if (AS < MinimumScore) + { + return false; + } + BTString tmp; + if (pairToRepeat) { + repeatPositions.append(location, chromosomeName, tmp, AS, MD, XM, NM, Yf, Zf, YZ); + } + return true; + } + + /** + * output the tags for non-repeat alignment. + */ + void outputTags(BTString& o) { + char buf[1024]; + if (mapped) { + o.append('\t'); + // AS + assert(AS <= 0); + o.append("AS:i:"); + itoa10(AS, buf); + o.append(buf); + o.append('\t'); + // NH + assert(NH > 0); + o.append("NH:i:"); + itoa10(NH, buf); + o.append(buf); + o.append('\t'); + // XM + assert(XM >= 0); + o.append("XM:i:"); + itoa10(XM, buf); + o.append(buf); + o.append('\t'); + // NM + assert(NM >= 0); + o.append("NM:i:"); + itoa10(NM, buf); + o.append(buf); + o.append('\t'); + // MD + assert(!MD.empty()); + o.append("MD:Z:"); + o.append(MD.toZBuf()); + o.append('\t'); + // YS + if (paired && mateMapped) { + o.append("YS:i:"); + itoa10(YS, buf); + o.append(buf); + o.append('\t'); + } + // YZ + o.append("YZ:A:"); + o.append(YZ); + o.append('\t'); + // Yf + o.append("Yf:i:"); + itoa10(Yf, buf); + o.append(buf); + o.append('\t'); + //Zf + o.append("Zf:i:"); + itoa10(Zf, buf); + o.append(buf); + } + // unchanged Tags + if (!unChangedTags.empty()) { + o.append('\t'); + o.append(unChangedTags.toZBuf()); + } + o.append(passThroughLine.toZBuf()); + } + + /** + * output the tags for repeat alignment. + */ + void outputTags(BTString& o, RepeatMappingPosition* repeatInfo){ + // this function is for repeat alignment output. + char buf[1024]; + o.append('\t'); + // AS + assert(AS <= 0); + o.append("AS:i:"); + itoa10(repeatInfo->AS, buf); + o.append(buf); + o.append('\t'); + // NH + assert(NH > 0); + o.append("NH:i:"); + itoa10(NH, buf); + o.append(buf); + o.append('\t'); + // XM + assert(XM >= 0); + o.append("XM:i:"); + itoa10(repeatInfo->XM, buf); + o.append(buf); + o.append('\t'); + // NM + assert(NM >= 0); + o.append("NM:i:"); + itoa10(repeatInfo->NM, buf); + o.append(buf); + o.append('\t'); + // MD + assert(!MD.empty()); + o.append("MD:Z:"); + o.append(repeatInfo->MD.toZBuf()); + o.append('\t'); + // YS + if (paired) { + o.append("YS:i:"); + itoa10(YS, buf); + o.append(buf); + o.append('\t'); + } + //YT + o.append("YT:Z:"); + o.append(YT.toZBuf()); + o.append('\t'); + // YS + if (paired && mateMapped) { + o.append("YS:i:"); + itoa10(YS, buf); + o.append(buf); + o.append('\t'); + } + // YZ + o.append("YZ:A:"); + o.append(repeatInfo->YZ); + o.append('\t'); + // Yf + o.append("Yf:i:"); + itoa10(repeatInfo->Yf, buf); + o.append(buf); + o.append('\t'); + // Zf + o.append("Zf:i:"); + itoa10(repeatInfo->Zf, buf); + o.append(buf); + + // unchanged Tags + if (!unChangedTags.empty()) { + o.append('\t'); + o.append(unChangedTags.toZBuf()); + } + o.append(passThroughLine.toZBuf()); + } + + /** + * output alignment. this function is for both repeat and non-repeat alignment. + */ + void outputAlignment (BTString& o, RepeatMappingPosition* repeatInfo, long long int* oppoLocation, bool& primaryAlignment) { + + BTString* outputChromosome; + long long int* outputLocation; + + if (repeatInfo == NULL) { + if (outputted) { return; } + outputted = true; + outputChromosome = &chromosomeName; + outputLocation = &location; + } else { + if (repeatInfo->outputted) { return; } + repeatInfo->outputted = true; + outputChromosome = &repeatInfo->repeatChromosome; + outputLocation = &repeatInfo->repeatLocation; + } + + //setMateMappingFlag(oppoLocation); + setYT(); + + char buf[1024]; + // readName + o.append(readName.toZBuf()); + o.append('\t'); + // flag, if it is primary alignment, -256 + assert(flag >=0); + itoa10(flag-primaryAlignment*256, buf); + o.append(buf); + o.append('\t'); + // chromosome + assert(!outputChromosome->empty()); + o.append(outputChromosome->toZBuf()); + o.append('\t'); + // location + assert(*outputLocation >= 0); + itoa10(*outputLocation, buf); + o.append(buf); + o.append('\t'); + + //MAPQ + o.append(MAPQ.toZBuf()); + o.append('\t'); + // cigar + o.append(cigarString.toZBuf()); + o.append('\t'); + // pair to chromosome + if (paired && *oppoLocation!=0) { + o.append("="); + o.append('\t'); + } else { + o.append("*"); + o.append('\t'); + } + // pair to location + if (paired) { + itoa10(*oppoLocation, buf); + o.append(buf); + o.append('\t'); + } else { + o.append('0'); + o.append('\t'); + } + // pairing distance + if (paired) { + itoa10(*oppoLocation - *outputLocation, buf); + o.append(buf); + o.append('\t'); + } else { + o.append('0'); + o.append('\t'); + } + // read sequence + o.append(readSequence.toZBuf()); + o.append('\t'); + // read quality + o.append(readQuality.toZBuf()); + + // make sure there is no '\t' at the beginning of unChangedTags + while (!unChangedTags.empty() && unChangedTags[0] == '\t') { + unChangedTags.remove(0); + } + + // tags + if (repeatInfo == NULL) { + outputTags(o); + o.append('\n'); + } else { + outputTags(o, repeatInfo->flagInfoIndex == -1?repeatInfo:&repeatPositions.positions[repeatInfo->flagInfoIndex]); + o.append('\n'); + } + } + + /** + * return true if two location is concordant. + * return false, if there are not concordant or too far (>maxPairDistance). + */ + static bool isConcordant(long long int location1, bool &forward1, long long int readLength1, long long int location2, bool &forward2, long long int readLength2); + + /** + * this is the basic function to calculate DNA pair score. + * if the distance between 2 alignments is more than penaltyFreeDistance_DNA, we reduce the score by the distance/100. + * if two alignment is concordant we add concordantScoreBounce to make sure to select the concordant pair as best pair. + */ + static int calculatePairScore_DNA (long long int &location0, int& AS0, bool& forward0, long long int readLength0, long long int &location1, int &AS1, bool &forward1, long long int readLength1, bool& concordant); + + /** + * this is the basic function to calculate RNA pair score. + * if the distance between 2 alignments is more than penaltyFreeDistance_RNA, we reduce the score by the distance/1000. + * if two alignment is concordant we add concordantScoreBounce to make sure to select the concordant pair as best pair. + */ + static int calculatePairScore_RNA (long long int &location0, int& XM0, bool& forward0, long long int readLength0, long long int &location1, int &XM1, bool &forward1, long long int readLength1, bool& concordant); +}; + +/** + * the data structure to store, process, and output all Alignment + */ +class Alignments { +public: + vector alignments; // pool to store current alignment result. + vector freeAlignments; // free pointer pool for new alignment result. after output a alignment, return the pointer back to this pool. + + TReadId previousReadID; + MappingPositions alignmentPositions; // the pool to save all alignment position + + BTString readName[2]; // the read name could be different for segment 1 and segment 2. + BTDnaString readSequence[2]; // save the read sequence for output. + BTString qualityScore[2]; // save the quality score for output. + + bool paired; + const int repeatPoolLimit = 20; // this is the maximum number of repeat alignment we allowed. + bool multipleAligned; // check whether we have multiple alignment, it is work unique mode. + + const int maxPairScore = 500000; // maximum pair score, if pairScore == maxPairScore, both math are perfect match and the pairDistance is small. + + BitPairReference* bitReference; // bit pair reference sequence + bool DNA; + int nRepeatAlignment; // count number of repeat alignment we received, for short sequence we could receive a lot of repeat alignment result. + + BTString passThroughLines[2]; + + void initialize() { + alignmentPositions.initialize(); + paired = false; + multipleAligned = false; + nRepeatAlignment = 0; + + for (int i = 0; i < 2; i++) { + readName[i].clear(); + readSequence[i].clear(); + qualityScore[i].clear(); + passThroughLines[i].clear(); + } + for (int i = 0; i < alignments.size(); i++) { + alignments[i]->initialize(); + freeAlignments.push_back(alignments[i]); + } + alignments.clear(); + } + + Alignments(BitPairReference* ref, bool inputDNA): bitReference(ref), DNA(inputDNA) { + initialize(); + } + + ~Alignments() { + while (!freeAlignments.empty()) { + delete freeAlignments.back(); + freeAlignments.pop_back(); + } + for (int i = 0; i < alignments.size(); i++) { + delete alignments[i]; + } + } + + /** + * get sequence for rd. if it already exist, ignore it. + */ + void getSequence(const Read& rd) { + int pairSegment = rd.mate == 0? rd.mate : rd.mate-1; + if (readName[pairSegment].empty()) { readName[pairSegment] = rd.name; } + if (readSequence[pairSegment].empty()) { readSequence[pairSegment] = rd.originalFw; } + if (qualityScore[pairSegment].empty()) { qualityScore[pairSegment] = rd.qual; } + } + + /** + * return true if we want to receive more new alignment. + */ + bool acceptNewAlignment() { + if (uniqueOutputOnly && multipleAligned || + alignmentPositions.nBestSingle >= repeatLimit || + nRepeatAlignment > repeatPoolLimit || + alignmentPositions.nBestPair >= repeatLimit) { + return false; + } + return true; + } + + /** + * return the alignment back to freeAlignment pool. + */ + void returnToFreeAlignments (Alignment*& currentAlignment) { + currentAlignment->initialize(); + freeAlignments.push_back(currentAlignment); + } + + /** + * get a Alignment pointer from freeAlignments, if freeAlignment is empty, make a new Alignment. + */ + void getFreeAlignmentPointer(Alignment*& newAlignment) { + if (!freeAlignments.empty()) { + newAlignment = freeAlignments.back(); + freeAlignments.pop_back(); + } else { + newAlignment = new Alignment(); + } + } + + /** + * receive alignment information from AlnSink3NSam::appendMate() and append it to alignment pool. + */ + void append(Alignment *newAlignment) { + + newAlignment->extractFlagInfo(); + paired = newAlignment->paired; + newAlignment->DNA = DNA; + if (passThroughLines[newAlignment->pairSegment].empty()) { + passThroughLines[newAlignment->pairSegment] = newAlignment->passThroughLine; + } + + // check if the alignment is already exist. if exist, ignore it. + if (!alignmentPositions.append(newAlignment)) { + alignments.push_back(newAlignment); + return; + } + + // construct MD tag and check if the alignment has too many mismatch, if do, ignore it. + if (newAlignment->repeat) { + if (!newAlignment->constructRepeatMD(bitReference, alignmentPositions)) { + alignmentPositions.badAligned(); + alignments.push_back(newAlignment); + return; + } + nRepeatAlignment++; // for each repeat alignment, record it. + } else { + // check mismatch, update tags + if (!newAlignment->constructMD(bitReference)) { + alignmentPositions.badAligned(); + alignments.push_back(newAlignment); + return; + } + } + + // update pair score or AS, for output using. + // if the new alignment has lower paring score or AS than bestPairScore or bestAS, ignore it. + if (paired) { + if (!alignmentPositions.updatePairScore()) { + alignments.push_back(newAlignment); + return; + } + if (alignmentPositions.bestPairScore == maxPairScore && alignmentPositions.nBestPair > 1) { + multipleAligned = true; + } + } else { + if (!alignmentPositions.updateAS()) { + alignments.push_back(newAlignment); + return; + } + if (alignmentPositions.bestAS == 0 && alignmentPositions.nBestSingle > 1) { + multipleAligned = true; + } + } + alignments.push_back(newAlignment); + } + + /** + * if there is no alignment, output unAlignment result. + * this function is important when hisat2 give mapped result, but it does not pass my filter (has too many mismatch). + */ + void outputUnAlignmentRead(BTString& o) { + if (paired) { + for (int i = 0; i < 2; i++) { + assert(!readName[i].empty()); + uint ReadNameLength = readName[i].length(); + if (readName[i].length() > 255) + { + ReadNameLength = 255; + } + for (int j = 0; j < ReadNameLength; j++) + { + if(isspace(readName[i][j])) { + break; + } + o.append(readName[i][j]); + } + o.append("\t"); + + string flag = (i == 0) ? "77" : "141"; + o.append(flag.c_str()); + + o.append("\t*\t0\t0\t*\t*\t0\t0\t"); + o.append(readSequence[i].toZBuf()); + o.append("\t"); + o.append(qualityScore[i].toZBuf()); + o.append("\tYT:Z:UP"); + o.append(passThroughLines[i].toZBuf()); + o.append('\n'); + } + } else { + assert(!readName[0].empty()); + string ReadName = ""; + uint ReadNameLength = readName[0].length(); + if (readName[0].length() > 255) + { + ReadNameLength = 255; + } + for (int j = 0; j < ReadNameLength; j++) + { + if(isspace(readName[0][j])) { + break; + } + o.append(readName[0][j]); + } + o.append("\t4\t*\t0\t0\t*\t*\t0\t0\t"); + o.append(readSequence[0].toZBuf()); + o.append("\t"); + o.append(qualityScore[0].toZBuf()); + o.append("\tYT:Z:UU"); + o.append(passThroughLines[0].toZBuf()); + o.append('\n'); + } + } + + /** + * report alignment statistics for single-end alignment + */ + void reportStats_single(ReportingMetrics& met); + + + /** + * report alignment statistics for paired-end alignment + */ + void reportStats_paired(ReportingMetrics& met); + + /** + * output single-end alignment reuslts + */ + void output_single(BTString& o, + ReportingMetrics& met) { + + reportStats_single(met); + + // output + if (uniqueOutputOnly && (alignmentPositions.nBestSingle != 1 || multipleAligned)) { + // do not output anything + } else if (alignments.empty() || alignmentPositions.nBestSingle == 0) { + // make a unalignment result and output it. + outputUnAlignmentRead(o); + } else { + // output + alignmentPositions.outputSingle(o); + } + } + + /** + * output paired-end alignment reuslts + */ + void output_paired(BTString& o, + ReportingMetrics& met) { + + reportStats_paired(met); + + if ((uniqueOutputOnly && (alignmentPositions.nBestPair != 1 || multipleAligned))) { + // do not report anything + } else if (alignments.empty() || + alignmentPositions.nBestPair == 0 || + alignmentPositions.bestPairScore == numeric_limits::min()) { + // make a unalignment result and output it. + outputUnAlignmentRead(o); + } else { + // output + alignmentPositions.outputPair(o); + } + } + /** + * output function will be redirected to output_single or output_paired + */ + void output(ReportingMetrics& met, + BTString& o) { + + if (paired) { + output_paired(o, met); + } else { + output_single(o,met); + } + initialize(); + } +}; + +#endif //HISAT2_ALIGNMENT_3N_H diff --git a/alignment_3n_table.h b/alignment_3n_table.h new file mode 100644 index 0000000..83715a7 --- /dev/null +++ b/alignment_3n_table.h @@ -0,0 +1,287 @@ +/* + * Copyright 2020, Yun (Leo) Zhang + * + * This file is part of HISAT-3N. + * + * HISAT-3N is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT-3N is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT-3N. If not, see . + */ + +#ifndef ALIGNMENT_3N_TABLE_H +#define ALIGNMENT_3N_TABLE_H + +#include +#include "utility_3n_table.h" + +extern bool uniqueOnly; +extern bool multipleOnly; +extern char convertFrom; +extern char convertTo; +extern char convertFromComplement; +extern char convertToComplement; + +using namespace std; + +/** + * the class to store information from one SAM line + */ +class Alignment { +public: + string chromosome; + long long int location; + long long int mateLocation; + int flag; + bool mapped; + char strand; + string sequence; + string quality; + bool unique; + string mapQ; + int NH; + vector bases; + CIGAR cigarString; + MD_tag MD; + unsigned long long readNameID; + int sequenceCoveredLength; // the sum of number is cigarString; + bool overlap; // if the segment could overlap with the mate segment. + bool paired; + + void initialize() { + chromosome.clear(); + location = -1; + mateLocation = -1; + flag = -1; + mapped = false; + MD.initialize(); + cigarString.initialize(); + sequence.clear(); + quality.clear(); + unique = false; + mapQ.clear(); + NH = -1; + bases.clear(); + readNameID = 0; + sequenceCoveredLength = 0; + overlap = false; + paired = false; + } + + /** + * for start position in input Line, check if it contain the target information. + */ + bool startWith(string* inputLine, int startPosition, string tag){ + for (int i = 0; i < tag.size(); i++){ + if (inputLine->at(startPosition+i) != tag[i]){ + return false; + } + } + return true; + } + + /** + * generate a hash value for readName + */ + void getNameHash(string& readName) { + readNameID = 0; + int a = 63689; + for (size_t i = 0; i < readName.size(); i++) { + readNameID = (readNameID * a) + (int)readName[i]; + } + } + + /** + * extract the information from SAM line to Alignment. + */ + void parseInfo(string* line) { + int startPosition = 0; + int endPosition = 0; + int count = 0; + + while ((endPosition = line->find("\t", startPosition)) != string::npos) { + if (count == 0) { + string readName = line->substr(startPosition, endPosition - startPosition); + getNameHash(readName); + } else if (count == 1) { + flag = stoi(line->substr(startPosition, endPosition - startPosition)); + mapped = (flag & 4) == 0; + paired = (flag & 1) != 0; + } else if (count == 2) { + chromosome = line->substr(startPosition, endPosition - startPosition); + } else if (count == 3) { + location = stoll(line->substr(startPosition, endPosition - startPosition)); + } else if (count == 4) { + mapQ = line->substr(startPosition, endPosition - startPosition); + if (mapQ == "1") { + unique = false; + } else { + unique = true; + } + } else if (count == 5) { + cigarString.loadString(line->substr(startPosition, endPosition - startPosition)); + } else if (count == 7) { + mateLocation = stoll(line->substr(startPosition, endPosition - startPosition)); + } else if (count == 9) { + sequence = line->substr(startPosition, endPosition - startPosition); + } else if (count == 10) { + quality = line->substr(startPosition, endPosition - startPosition); + } else if (count > 10) { + if (startWith(line, startPosition, "MD")) { + MD.loadString(line->substr(startPosition + 5, endPosition - startPosition - 5)); + } else if (startWith(line, startPosition, "NM")) { + NH = stoi(line->substr(startPosition + 5, endPosition - startPosition - 5)); + } else if (startWith(line, startPosition, "YZ")) { + strand = line->at(endPosition-1); + } + } + startPosition = endPosition + 1; + count++; + } + if (startWith(line, startPosition, "MD")) { + MD.loadString(line->substr(startPosition + 5, endPosition - startPosition - 5)); + } else if (startWith(line, startPosition, "NM")) { + NH = stoi(line->substr(startPosition + 5, endPosition - startPosition - 5)); + } else if (startWith(line, startPosition, "YZ")) { + strand = line->at(endPosition-1); + } + } + + /** + * change the overlap = true, if the read is not uniquely mapped or the read segment is overlap to it's mate. + */ + void checkOverlap() { + if (!unique) { + overlap = true; + } else { + if (paired && (location + sequenceCoveredLength >= mateLocation)) { + overlap = true; + } else { + overlap = false; + } + } + } + + + /** + * parse the sam line to alignment information + */ + void parse(string* line) { + initialize(); + parseInfo(line); + if ((uniqueOnly && !unique) || (multipleOnly && unique)) { + return; + } + appendBase(); + } + + /** + * scan all base in read sequence label them if they are qualified. + */ + void appendBase() { + if (!mapped || sequenceCoveredLength > 500000) { // if the read's intron longer than 500,000 ignore this read + return; + } + + bases.reserve(sequence.size()); + for (int i = 0; i < sequence.size(); i++) { + bases.emplace_back(i); + } + int pos = adjustPos(); + + string match; + while (MD.getNextSegment(match)) { + if (isdigit(match.front())) { // the first char of match is digit this is match + int len = stoi(match); + for (int i = 0; i < len; i++) { + while (bases[pos].remove) { + pos++; + } + if ((strand == '+' && sequence[pos] == convertFrom) || + (strand == '-' && sequence[pos] == convertFromComplement)) { + bases[pos].setQual(quality[pos], false); + } else { + bases[pos].remove = true; + } + pos ++; + } + } else if (isalpha(match.front())) { // this is mismatch or conversion + char refBase = match.front(); + // for + strand, it should have C->T change + // for - strand, it should have G->A change + while (bases[pos].remove) { + pos++; + } + + if ((strand == '+' && refBase == convertFrom && sequence[pos] == convertTo) || + (strand == '-' && refBase == convertFromComplement && sequence[pos] == convertToComplement)){ + bases[pos].setQual(quality[pos], true); + } else { + bases[pos].remove = true; + } + pos ++; + } else { // deletion. do nothing. + + } + } + } + + /** + * adjust the reference position in bases + */ + int adjustPos() { + + int readPos = 0; + int returnPos = 0; + int seqLength = sequence.size(); + + char cigarSymbol; + int cigarLen; + sequenceCoveredLength = 0; + + while (cigarString.getNextSegment(cigarLen, cigarSymbol)) { + sequenceCoveredLength += cigarLen; + if (cigarSymbol == 'S') { + if (readPos == 0) { // soft clip is at the begin of the read + returnPos = cigarLen; + for (int i = cigarLen; i < seqLength; i++) { + bases[i].refPos -= cigarLen; + } + } else { // soft clip is at the end of the read + // do nothing + } + readPos += cigarLen; + } else if (cigarSymbol == 'N') { + for (int i = readPos; i < seqLength; i++) { + bases[i].refPos += cigarLen; + } + } else if (cigarSymbol == 'M') { + for (int i = readPos; i < readPos+cigarLen; i++) { + bases[i].remove = false; + } + readPos += cigarLen; + } else if (cigarSymbol == 'I') { + for (int i = readPos + cigarLen; i < seqLength; i++) { + bases[i].refPos -= cigarLen; + } + readPos += cigarLen; + } else if (cigarSymbol == 'D') { + for (int i = readPos; i < seqLength; i++) { + bases[i].refPos += cigarLen; + } + } + } + return returnPos; + } + +}; + +#endif //ALIGNMENT_3N_TABLE_H diff --git a/aln_sink.cpp b/aln_sink.cpp new file mode 100644 index 0000000..b0d4244 --- /dev/null +++ b/aln_sink.cpp @@ -0,0 +1,785 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include +#include +#include "aln_sink.h" +#include "aligner_seed.h" +#include "util.h" + +using namespace std; + + +/** + * Initialize state machine with a new read. The state we start in depends + * on whether it's paired-end or unpaired. + */ +void ReportingState::nextRead(bool paired) { + paired_ = paired; + if(paired) { + state_ = CONCORDANT_PAIRS; + doneConcord_ = false; + doneDiscord_ = p_.discord ? false : true; + doneUnpair1_ = p_.mixed ? false : true; + doneUnpair2_ = p_.mixed ? false : true; + exitConcord_ = ReportingState::EXIT_DID_NOT_EXIT; + exitDiscord_ = p_.discord ? + ReportingState::EXIT_DID_NOT_EXIT : + ReportingState::EXIT_DID_NOT_ENTER; + exitUnpair1_ = p_.mixed ? + ReportingState::EXIT_DID_NOT_EXIT : + ReportingState::EXIT_DID_NOT_ENTER; + exitUnpair2_ = p_.mixed ? + ReportingState::EXIT_DID_NOT_EXIT : + ReportingState::EXIT_DID_NOT_ENTER; + } else { + // Unpaired + state_ = UNPAIRED; + doneConcord_ = true; + doneDiscord_ = true; + doneUnpair1_ = false; + doneUnpair2_ = true; + exitConcord_ = ReportingState::EXIT_DID_NOT_ENTER; // not relevant + exitDiscord_ = ReportingState::EXIT_DID_NOT_ENTER; // not relevant + exitUnpair1_ = ReportingState::EXIT_DID_NOT_EXIT; + exitUnpair2_ = ReportingState::EXIT_DID_NOT_ENTER; // not relevant + } + doneUnpair_ = doneUnpair1_ && doneUnpair2_; + done_ = false; + nconcord_ = ndiscord_ = nunpair1_ = nunpair2_ = 0; + nunpairRepeat1_ = nunpairRepeat2_ = 0; + concordBest_ = getMinScore(); +} + +/** + * Caller uses this member function to indicate that one additional + * concordant alignment has been found. + */ +bool ReportingState::foundConcordant(TAlScore score) { + assert(paired_); + assert_geq(state_, ReportingState::CONCORDANT_PAIRS); + assert(!doneConcord_); + + if(score > concordBest_) { + concordBest_ = score; + nconcord_ = 0; + } + nconcord_++; + + // DK CONCORDANT - debugging purpuses + // areDone(nconcord_, doneConcord_, exitConcord_); + + // No need to search for discordant alignments if there are one or more + // concordant alignments. + doneDiscord_ = true; + exitDiscord_ = ReportingState::EXIT_SHORT_CIRCUIT_TRUMPED; + if(doneConcord_) { + // If we're finished looking for concordant alignments, do we have to + // continue on to search for unpaired alignments? Only if our exit + // from the concordant stage is EXIT_SHORT_CIRCUIT_M. If it's + // EXIT_SHORT_CIRCUIT_k or EXIT_WITH_ALIGNMENTS, we can skip unpaired. + assert_neq(ReportingState::EXIT_NO_ALIGNMENTS, exitConcord_); + if(exitConcord_ != ReportingState::EXIT_SHORT_CIRCUIT_M) { + if(!doneUnpair1_) { + doneUnpair1_ = true; + exitUnpair1_ = ReportingState::EXIT_SHORT_CIRCUIT_TRUMPED; + } + if(!doneUnpair2_) { + doneUnpair2_ = true; + exitUnpair2_ = ReportingState::EXIT_SHORT_CIRCUIT_TRUMPED; + } + } + } + updateDone(); + return done(); +} + +/** + * Caller uses this member function to indicate that one additional unpaired + * mate alignment has been found for the specified mate. + */ +bool ReportingState::foundUnpaired(bool mate1, bool repeat) { + assert_gt(state_, ReportingState::NO_READ); + // Note: it's not right to assert !doneUnpair1_/!doneUnpair2_ here. + // Even if we're done with finding + if(mate1) { + nunpair1_++; + if(repeat) { + nunpairRepeat1_++; + } + // Did we just finish with this mate? + if(!doneUnpair1_) { + areDone(nunpair1_, doneUnpair1_, exitUnpair1_); + if(doneUnpair1_) { + doneUnpair_ = doneUnpair1_ && doneUnpair2_; + updateDone(); + } + } + if(nunpair1_ > 1) { + doneDiscord_ = true; + exitDiscord_ = ReportingState::EXIT_NO_ALIGNMENTS; + } + } else { + nunpair2_++; + if(repeat) { + nunpairRepeat2_++; + } + // Did we just finish with this mate? + if(!doneUnpair2_) { + areDone(nunpair2_, doneUnpair2_, exitUnpair2_); + if(doneUnpair2_) { + doneUnpair_ = doneUnpair1_ && doneUnpair2_; + updateDone(); + } + } + if(nunpair2_ > 1) { + doneDiscord_ = true; + exitDiscord_ = ReportingState::EXIT_NO_ALIGNMENTS; + } + } + return done(); +} + +/** + * Called to indicate that the aligner has finished searching for + * alignments. This gives us a chance to finalize our state. + * + * TODO: Keep track of short-circuiting information. + */ +void ReportingState::finish() { + if(!doneConcord_) { + doneConcord_ = true; + exitConcord_ = + ((nconcord_ > 0) ? + ReportingState::EXIT_WITH_ALIGNMENTS : + ReportingState::EXIT_NO_ALIGNMENTS); + } + assert_gt(exitConcord_, EXIT_DID_NOT_EXIT); + if(!doneUnpair1_) { + doneUnpair1_ = true; + exitUnpair1_ = + ((nunpair1_ > 0) ? + ReportingState::EXIT_WITH_ALIGNMENTS : + ReportingState::EXIT_NO_ALIGNMENTS); + } + assert_gt(exitUnpair1_, EXIT_DID_NOT_EXIT); + if(!doneUnpair2_) { + doneUnpair2_ = true; + exitUnpair2_ = + ((nunpair2_ > 0) ? + ReportingState::EXIT_WITH_ALIGNMENTS : + ReportingState::EXIT_NO_ALIGNMENTS); + } + assert_gt(exitUnpair2_, EXIT_DID_NOT_EXIT); + if(!doneDiscord_) { + // Check if the unpaired alignments should be converted to a single + // discordant paired-end alignment. + assert_eq(0, ndiscord_); + if(nconcord_ == 0 && nunpair1_ == 1 && nunpair2_ == 1) { + convertUnpairedToDiscordant(); + } + doneDiscord_ = true; + exitDiscord_ = + ((ndiscord_ > 0) ? + ReportingState::EXIT_WITH_ALIGNMENTS : + ReportingState::EXIT_NO_ALIGNMENTS); + } + assert(!paired_ || exitDiscord_ > ReportingState::EXIT_DID_NOT_EXIT); + doneUnpair_ = done_ = true; + assert(done()); +} + +/** + * Populate given counters with the number of various kinds of alignments + * to report for this read. Concordant alignments are preferable to (and + * mutually exclusive with) discordant alignments, and paired-end + * alignments are preferable to unpaired alignments. + * + * The caller also needs some additional information for the case where a + * pair or unpaired read aligns repetitively. If the read is paired-end + * and the paired-end has repetitive concordant alignments, that should be + * reported, and 'pairMax' is set to true to indicate this. If the read is + * paired-end, does not have any conordant alignments, but does have + * repetitive alignments for one or both mates, then that should be + * reported, and 'unpair1Max' and 'unpair2Max' are set accordingly. + * + * Note that it's possible in the case of a paired-end read for the read to + * have repetitive concordant alignments, but for one mate to have a unique + * unpaired alignment. + */ +void ReportingState::getReport( + uint64_t& nconcordAln, // # concordant alignments to report + uint64_t& ndiscordAln, // # discordant alignments to report + uint64_t& nunpair1Aln, // # unpaired alignments for mate #1 to report + uint64_t& nunpair2Aln, // # unpaired alignments for mate #2 to report + uint64_t& nunpairRepeat1Aln, // # unpaired alignments for mate #1 to report + uint64_t& nunpairRepeat2Aln, // # unpaired alignments for mate #2 to report + bool& pairMax, // repetitive concordant alignments + bool& unpair1Max, // repetitive alignments for mate #1 + bool& unpair2Max) // repetitive alignments for mate #2 + const +{ + nconcordAln = ndiscordAln = nunpair1Aln = nunpair2Aln = 0; + nunpairRepeat1Aln = nunpairRepeat2Aln = 0; + pairMax = unpair1Max = unpair2Max = false; + assert_gt(p_.khits, 0); + assert_gt(p_.mhits, 0); + if(paired_) { + // Do we have 1 or more concordant alignments to report? + if(exitConcord_ == ReportingState::EXIT_SHORT_CIRCUIT_k) { + // k at random + assert_geq(nconcord_, (uint64_t)p_.khits); + nconcordAln = p_.khits; + return; + } else if(exitConcord_ == ReportingState::EXIT_SHORT_CIRCUIT_M) { + assert(p_.msample); + assert_gt(nconcord_, 0); + pairMax = true; // repetitive concordant alignments + if(p_.mixed) { + unpair1Max = nunpair1_ > (uint64_t)p_.mhits; + unpair2Max = nunpair2_ > (uint64_t)p_.mhits; + } + // Not sure if this is OK + nconcordAln = 1; // 1 at random + return; + } else if(exitConcord_ == ReportingState::EXIT_WITH_ALIGNMENTS) { + assert_gt(nconcord_, 0); + // <= k at random + nconcordAln = min(p_.khits, nconcord_); + } + assert(!p_.mhitsSet() || nconcord_ <= (uint64_t)p_.mhits+1); + + // Do we have a discordant alignment to report? + if(exitDiscord_ == ReportingState::EXIT_WITH_ALIGNMENTS) { + // Report discordant + assert(p_.discord); + ndiscordAln = 1; + return; + } + } + + assert_neq(ReportingState::EXIT_SHORT_CIRCUIT_TRUMPED, exitUnpair1_); + assert_neq(ReportingState::EXIT_SHORT_CIRCUIT_TRUMPED, exitUnpair2_); + + if((paired_ && !p_.mixed) || nunpair1_ + nunpair2_ == 0) { + // Unpaired alignments either not reportable or non-existant + return; + } + + // Do we have 1 or more alignments for mate #1 to report? + if(exitUnpair1_ == ReportingState::EXIT_SHORT_CIRCUIT_k) { + // k at random + assert_geq(nunpair1_, (uint64_t)p_.khits); + nunpair1Aln = p_.khits; + } else if(exitUnpair1_ == ReportingState::EXIT_SHORT_CIRCUIT_M) { + assert(p_.msample); + assert_gt(nunpair1_, 0); + unpair1Max = true; // repetitive alignments for mate #1 + nunpair1Aln = 1; // 1 at random + } else if(exitUnpair1_ == ReportingState::EXIT_WITH_ALIGNMENTS) { + assert_gt(nunpair1_, 0); + // <= k at random + nunpair1Aln = min(nunpair1_, (uint64_t)p_.khits); + } + assert(!p_.mhitsSet() || paired_ || nunpair1_ <= (uint64_t)p_.mhits+1); + if(p_.repeat) nunpairRepeat1Aln = nunpairRepeat1_; + + // Do we have 2 or more alignments for mate #2 to report? + if(exitUnpair2_ == ReportingState::EXIT_SHORT_CIRCUIT_k) { + // k at random + nunpair2Aln = p_.khits; + } else if(exitUnpair2_ == ReportingState::EXIT_SHORT_CIRCUIT_M) { + assert(p_.msample); + assert_gt(nunpair2_, 0); + unpair2Max = true; // repetitive alignments for mate #1 + nunpair2Aln = 1; // 1 at random + } else if(exitUnpair2_ == ReportingState::EXIT_WITH_ALIGNMENTS) { + assert_gt(nunpair2_, 0); + // <= k at random + nunpair2Aln = min(nunpair2_, (uint64_t)p_.khits); + } + assert(!p_.mhitsSet() || paired_ || nunpair2_ <= (uint64_t)p_.mhits+1); + if(p_.repeat) nunpairRepeat2Aln = nunpairRepeat2_; +} + +/** + * Given the number of alignments in a category, check whether we + * short-circuited out of the category. Set the done and exit arguments to + * indicate whether and how we short-circuited. + */ +inline void ReportingState::areDone( + uint64_t cnt, // # alignments in category + bool& done, // out: whether we short-circuited out of category + int& exit) const // out: if done, how we short-circuited (-k? -m? etc) +{ + assert(!done); + // Have we exceeded the -k limit? + assert_gt(p_.khits, 0); + assert_gt(p_.mhits, 0); + if(cnt >= (uint64_t)p_.khits && !p_.mhitsSet()) { + done = true; + exit = ReportingState::EXIT_SHORT_CIRCUIT_k; + } + // Have we exceeded the -m or -M limit? + else if(p_.mhitsSet() && cnt > (uint64_t)p_.mhits) { + done = true; + assert(p_.msample); + exit = ReportingState::EXIT_SHORT_CIRCUIT_M; + } +} + +#ifdef ALN_SINK_MAIN + +#include + +bool testDones( + const ReportingState& st, + bool done1, + bool done2, + bool done3, + bool done4, + bool done5, + bool done6) +{ + assert(st.doneConcordant() == done1); + assert(st.doneDiscordant() == done2); + assert(st.doneUnpaired(true) == done3); + assert(st.doneUnpaired(false) == done4); + assert(st.doneUnpaired() == done5); + assert(st.done() == done6); + assert(st.repOk()); + return true; +} + +int main(void) { + cerr << "Case 1 (simple unpaired 1) ... "; + { + uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0; + bool pairMax = false, unpair1Max = false, unpair2Max = false; + ReportingParams rp( + 2, // khits + 0, // mhits + 0, // pengap + false, // msample + false, // discord + false); // mixed + ReportingState st(rp); + st.nextRead(false); // unpaired read + assert(testDones(st, true, true, false, true, false, false)); + st.foundUnpaired(true); + assert(testDones(st, true, true, false, true, false, false)); + st.foundUnpaired(true); + assert(testDones(st, true, true, true, true, true, true)); + st.finish(); + assert(testDones(st, true, true, true, true, true, true)); + assert_eq(0, st.numConcordant()); + assert_eq(0, st.numDiscordant()); + assert_eq(2, st.numUnpaired1()); + assert_eq(0, st.numUnpaired2()); + assert(st.repOk()); + st.getReport(nconcord, ndiscord, nunpair1, nunpair2, + pairMax, unpair1Max, unpair2Max); + assert_eq(0, nconcord); + assert_eq(0, ndiscord); + assert_eq(2, nunpair1); + assert_eq(0, nunpair2); + assert(!pairMax); + assert(!unpair1Max); + assert(!unpair2Max); + } + cerr << "PASSED" << endl; + + cerr << "Case 2 (simple unpaired 1) ... "; + { + uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0; + bool pairMax = false, unpair1Max = false, unpair2Max = false; + ReportingParams rp( + 2, // khits + 3, // mhits + 0, // pengap + false, // msample + false, // discord + false); // mixed + ReportingState st(rp); + st.nextRead(false); // unpaired read + assert(testDones(st, true, true, false, true, false, false)); + st.foundUnpaired(true); + assert(testDones(st, true, true, false, true, false, false)); + st.foundUnpaired(true); + assert(testDones(st, true, true, false, true, false, false)); + st.foundUnpaired(true); + assert(testDones(st, true, true, false, true, false, false)); + st.foundUnpaired(true); + assert(testDones(st, true, true, true, true, true, true)); + assert_eq(0, st.numConcordant()); + assert_eq(0, st.numDiscordant()); + assert_eq(4, st.numUnpaired1()); + assert_eq(0, st.numUnpaired2()); + st.finish(); + assert(testDones(st, true, true, true, true, true, true)); + assert_eq(0, st.numConcordant()); + assert_eq(0, st.numDiscordant()); + assert_eq(4, st.numUnpaired1()); + assert_eq(0, st.numUnpaired2()); + assert(st.repOk()); + st.getReport(nconcord, ndiscord, nunpair1, nunpair2, + pairMax, unpair1Max, unpair2Max); + assert_eq(0, nconcord); + assert_eq(0, ndiscord); + assert_eq(0, nunpair1); + assert_eq(0, nunpair2); + assert(!pairMax); + assert(unpair1Max); + assert(!unpair2Max); + } + cerr << "PASSED" << endl; + + cerr << "Case 3 (simple paired 1) ... "; + { + uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0; + bool pairMax = false, unpair1Max = false, unpair2Max = false; + ReportingParams rp( + 2, // khits + 3, // mhits + 0, // pengap + false, // msample + false, // discord + false); // mixed + ReportingState st(rp); + st.nextRead(true); // unpaired read + assert(testDones(st, false, true, true, true, true, false)); + st.foundUnpaired(true); + assert(testDones(st, false, true, true, true, true, false)); + st.foundUnpaired(true); + assert(testDones(st, false, true, true, true, true, false)); + st.foundUnpaired(true); + assert(testDones(st, false, true, true, true, true, false)); + st.foundUnpaired(true); + assert(testDones(st, false, true, true, true, true, false)); + st.foundUnpaired(false); + assert(testDones(st, false, true, true, true, true, false)); + st.foundUnpaired(false); + assert(testDones(st, false, true, true, true, true, false)); + st.foundUnpaired(false); + assert(testDones(st, false, true, true, true, true, false)); + st.foundUnpaired(false); + assert(testDones(st, false, true, true, true, true, false)); + st.foundConcordant(); + assert(testDones(st, false, true, true, true, true, false)); + st.foundConcordant(); + assert(testDones(st, false, true, true, true, true, false)); + st.foundConcordant(); + assert(testDones(st, false, true, true, true, true, false)); + st.foundConcordant(); + assert(testDones(st, true, true, true, true, true, true)); + assert_eq(4, st.numConcordant()); + assert_eq(0, st.numDiscordant()); + assert_eq(4, st.numUnpaired1()); + assert_eq(4, st.numUnpaired2()); + st.finish(); + assert(testDones(st, true, true, true, true, true, true)); + assert_eq(4, st.numConcordant()); + assert_eq(0, st.numDiscordant()); + assert_eq(4, st.numUnpaired1()); + assert_eq(4, st.numUnpaired2()); + assert(st.repOk()); + st.getReport(nconcord, ndiscord, nunpair1, nunpair2, + pairMax, unpair1Max, unpair2Max); + assert_eq(0, nconcord); + assert_eq(0, ndiscord); + assert_eq(0, nunpair1); + assert_eq(0, nunpair2); + assert(pairMax); + assert(!unpair1Max); // because !mixed + assert(!unpair2Max); // because !mixed + } + cerr << "PASSED" << endl; + + cerr << "Case 4 (simple paired 2) ... "; + { + uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0; + bool pairMax = false, unpair1Max = false, unpair2Max = false; + ReportingParams rp( + 2, // khits + 3, // mhits + 0, // pengap + false, // msample + true, // discord + true); // mixed + ReportingState st(rp); + st.nextRead(true); // unpaired read + assert(testDones(st, false, false, false, false, false, false)); + st.foundUnpaired(true); + assert(testDones(st, false, false, false, false, false, false)); + st.foundUnpaired(true); + assert(testDones(st, false, true, false, false, false, false)); + st.foundUnpaired(true); + assert(testDones(st, false, true, false, false, false, false)); + st.foundUnpaired(true); + assert(testDones(st, false, true, true, false, false, false)); + st.foundUnpaired(false); + assert(testDones(st, false, true, true, false, false, false)); + st.foundUnpaired(false); + assert(testDones(st, false, true, true, false, false, false)); + st.foundUnpaired(false); + assert(testDones(st, false, true, true, false, false, false)); + st.foundUnpaired(false); + assert(testDones(st, false, true, true, true, true, false)); + st.foundConcordant(); + assert(testDones(st, false, true, true, true, true, false)); + st.foundConcordant(); + assert(testDones(st, false, true, true, true, true, false)); + st.foundConcordant(); + assert(testDones(st, false, true, true, true, true, false)); + st.foundConcordant(); + assert(testDones(st, true, true, true, true, true, true)); + assert_eq(4, st.numConcordant()); + assert_eq(0, st.numDiscordant()); + assert_eq(4, st.numUnpaired1()); + assert_eq(4, st.numUnpaired2()); + st.finish(); + assert(testDones(st, true, true, true, true, true, true)); + assert_eq(4, st.numConcordant()); + assert_eq(0, st.numDiscordant()); + assert_eq(4, st.numUnpaired1()); + assert_eq(4, st.numUnpaired2()); + assert(st.repOk()); + st.getReport(nconcord, ndiscord, nunpair1, nunpair2, + pairMax, unpair1Max, unpair2Max); + assert_eq(0, nconcord); + assert_eq(0, ndiscord); + assert_eq(0, nunpair1); + assert_eq(0, nunpair2); + assert(pairMax); + assert(unpair1Max); + assert(unpair2Max); + } + cerr << "PASSED" << endl; + + cerr << "Case 5 (potential discordant after concordant) ... "; + { + uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0; + bool pairMax = false, unpair1Max = false, unpair2Max = false; + ReportingParams rp( + 2, // khits + 3, // mhits + 0, // pengap + false, // msample + true, // discord + true); // mixed + ReportingState st(rp); + st.nextRead(true); + assert(testDones(st, false, false, false, false, false, false)); + st.foundUnpaired(true); + st.foundUnpaired(false); + st.foundConcordant(); + assert(testDones(st, false, true, false, false, false, false)); + st.finish(); + assert(testDones(st, true, true, true, true, true, true)); + assert_eq(1, st.numConcordant()); + assert_eq(0, st.numDiscordant()); + assert_eq(1, st.numUnpaired1()); + assert_eq(1, st.numUnpaired2()); + assert(st.repOk()); + st.getReport(nconcord, ndiscord, nunpair1, nunpair2, + pairMax, unpair1Max, unpair2Max); + assert_eq(1, nconcord); + assert_eq(0, ndiscord); + assert_eq(0, nunpair1); + assert_eq(0, nunpair2); + assert(!pairMax); + assert(!unpair1Max); + assert(!unpair2Max); + } + cerr << "PASSED" << endl; + + cerr << "Case 6 (true discordant) ... "; + { + uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0; + bool pairMax = false, unpair1Max = false, unpair2Max = false; + ReportingParams rp( + 2, // khits + 3, // mhits + 0, // pengap + false, // msample + true, // discord + true); // mixed + ReportingState st(rp); + st.nextRead(true); + assert(testDones(st, false, false, false, false, false, false)); + st.foundUnpaired(true); + st.foundUnpaired(false); + assert(testDones(st, false, false, false, false, false, false)); + st.finish(); + assert(testDones(st, true, true, true, true, true, true)); + assert_eq(0, st.numConcordant()); + assert_eq(1, st.numDiscordant()); + assert_eq(0, st.numUnpaired1()); + assert_eq(0, st.numUnpaired2()); + assert(st.repOk()); + st.getReport(nconcord, ndiscord, nunpair1, nunpair2, + pairMax, unpair1Max, unpair2Max); + assert_eq(0, nconcord); + assert_eq(1, ndiscord); + assert_eq(0, nunpair1); + assert_eq(0, nunpair2); + assert(!pairMax); + assert(!unpair1Max); + assert(!unpair2Max); + } + cerr << "PASSED" << endl; + + cerr << "Case 7 (unaligned pair & uniquely aligned mate, mixed-mode) ... "; + { + uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0; + bool pairMax = false, unpair1Max = false, unpair2Max = false; + ReportingParams rp( + 1, // khits + 1, // mhits + 0, // pengap + false, // msample + true, // discord + true); // mixed + ReportingState st(rp); + st.nextRead(true); // unpaired read + // assert(st.doneConcordant() == done1); + // assert(st.doneDiscordant() == done2); + // assert(st.doneUnpaired(true) == done3); + // assert(st.doneUnpaired(false) == done4); + // assert(st.doneUnpaired() == done5); + // assert(st.done() == done6); + st.foundUnpaired(true); + assert(testDones(st, false, false, false, false, false, false)); + st.foundUnpaired(true); + assert(testDones(st, false, true, true, false, false, false)); + assert_eq(0, st.numConcordant()); + assert_eq(0, st.numDiscordant()); + assert_eq(2, st.numUnpaired1()); + assert_eq(0, st.numUnpaired2()); + st.finish(); + st.getReport(nconcord, ndiscord, nunpair1, nunpair2, + pairMax, unpair1Max, unpair2Max); + assert_eq(0, nconcord); + assert_eq(0, ndiscord); + assert_eq(0, nunpair1); + assert_eq(0, nunpair2); + assert(!pairMax); + assert(unpair1Max); + assert(!unpair2Max); + } + cerr << "PASSED" << endl; + + cerr << "Case 8 (unaligned pair & uniquely aligned mate, NOT mixed-mode) ... "; + { + uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0; + bool pairMax = false, unpair1Max = false, unpair2Max = false; + ReportingParams rp( + 1, // khits + 1, // mhits + 0, // pengap + false, // msample + true, // discord + false); // mixed + ReportingState st(rp); + st.nextRead(true); // unpaired read + // assert(st.doneConcordant() == done1); + // assert(st.doneDiscordant() == done2); + // assert(st.doneUnpaired(true) == done3); + // assert(st.doneUnpaired(false) == done4); + // assert(st.doneUnpaired() == done5); + // assert(st.done() == done6); + st.foundUnpaired(true); + assert(testDones(st, false, false, true, true, true, false)); + st.foundUnpaired(true); + assert(testDones(st, false, true, true, true, true, false)); + assert_eq(0, st.numConcordant()); + assert_eq(0, st.numDiscordant()); + assert_eq(2, st.numUnpaired1()); + assert_eq(0, st.numUnpaired2()); + st.finish(); + st.getReport(nconcord, ndiscord, nunpair1, nunpair2, + pairMax, unpair1Max, unpair2Max); + assert_eq(0, nconcord); + assert_eq(0, ndiscord); + assert_eq(0, nunpair1); + assert_eq(0, nunpair2); + assert(!pairMax); + assert(!unpair1Max); // not really relevant + assert(!unpair2Max); // not really relevant + } + cerr << "PASSED" << endl; + + cerr << "Case 9 (repetitive pair, only one mate repetitive) ... "; + { + uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0; + bool pairMax = false, unpair1Max = false, unpair2Max = false; + ReportingParams rp( + 1, // khits + 1, // mhits + 0, // pengap + true, // msample + true, // discord + true); // mixed + ReportingState st(rp); + st.nextRead(true); // unpaired read + // assert(st.doneConcordant() == done1); + // assert(st.doneDiscordant() == done2); + // assert(st.doneUnpaired(true) == done3); + // assert(st.doneUnpaired(false) == done4); + // assert(st.doneUnpaired() == done5); + // assert(st.done() == done6); + st.foundConcordant(); + assert(st.repOk()); + st.foundUnpaired(true); + assert(st.repOk()); + st.foundUnpaired(false); + assert(st.repOk()); + assert(testDones(st, false, true, false, false, false, false)); + assert(st.repOk()); + st.foundConcordant(); + assert(st.repOk()); + st.foundUnpaired(true); + assert(st.repOk()); + assert(testDones(st, true, true, true, false, false, false)); + assert_eq(2, st.numConcordant()); + assert_eq(0, st.numDiscordant()); + assert_eq(2, st.numUnpaired1()); + assert_eq(1, st.numUnpaired2()); + st.foundUnpaired(false); + assert(st.repOk()); + assert(testDones(st, true, true, true, true, true, true)); + assert_eq(2, st.numConcordant()); + assert_eq(0, st.numDiscordant()); + assert_eq(2, st.numUnpaired1()); + assert_eq(2, st.numUnpaired2()); + st.finish(); + st.getReport(nconcord, ndiscord, nunpair1, nunpair2, + pairMax, unpair1Max, unpair2Max); + assert_eq(1, nconcord); + assert_eq(0, ndiscord); + assert_eq(0, nunpair1); + assert_eq(0, nunpair2); + assert(pairMax); + assert(unpair1Max); // not really relevant + assert(unpair2Max); // not really relevant + } + cerr << "PASSED" << endl; +} + +#endif /*def ALN_SINK_MAIN*/ diff --git a/aln_sink.h b/aln_sink.h new file mode 100644 index 0000000..cf1ac50 --- /dev/null +++ b/aln_sink.h @@ -0,0 +1,4384 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * This file is edited by Yun (Leo) Zhang for HISAT-3N. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef ALN_SINK_H_ +#define ALN_SINK_H_ + +#include +#include "read.h" +#include "unique.h" +#include "sam.h" +#include "ds.h" +#include "simple_func.h" +#include "outq.h" +#include +#include "alt.h" +#include "splice_site.h" + +static const TAlScore getMinScore() { + return std::numeric_limits::min() / 2; +} + +// Forward decl +template +class SeedResults; + +enum { + OUTPUT_SAM = 1 +}; + +/** + * Metrics summarizing the work done by the reporter and summarizing + * the number of reads that align, that fail to align, and that align + * non-uniquely. + */ +struct ReportingMetrics { + + ReportingMetrics():mutex_m() { + reset(); + } + + void reset() { + init(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + } + + void init( + uint64_t nread_, + uint64_t npaired_, + uint64_t nunpaired_, + uint64_t nconcord_uni_, + uint64_t nconcord_uni1_, + uint64_t nconcord_uni2_, + uint64_t nconcord_rep_, + uint64_t nconcord_0_, + uint64_t ndiscord_, + uint64_t nunp_0_uni_, + uint64_t nunp_0_uni1_, + uint64_t nunp_0_uni2_, + uint64_t nunp_0_rep_, + uint64_t nunp_0_0_, + uint64_t nunp_rep_uni_, + uint64_t nunp_rep_uni1_, + uint64_t nunp_rep_uni2_, + uint64_t nunp_rep_rep_, + uint64_t nunp_rep_0_, + uint64_t nunp_uni_, + uint64_t nunp_uni1_, + uint64_t nunp_uni2_, + uint64_t nunp_rep_, + uint64_t nunp_0_, + uint64_t sum_best1_, + uint64_t sum_best2_, + uint64_t sum_best_) + { + nread = nread_; + + npaired = npaired_; + nunpaired = nunpaired_; + + nconcord_uni = nconcord_uni_; + nconcord_uni1 = nconcord_uni1_; + nconcord_uni2 = nconcord_uni2_; + nconcord_rep = nconcord_rep_; + nconcord_0 = nconcord_0_; + + ndiscord = ndiscord_; + + nunp_0_uni = nunp_0_uni_; + nunp_0_uni1 = nunp_0_uni1_; + nunp_0_uni2 = nunp_0_uni2_; + nunp_0_rep = nunp_0_rep_; + nunp_0_0 = nunp_0_0_; + + nunp_rep_uni = nunp_rep_uni_; + nunp_rep_uni1 = nunp_rep_uni1_; + nunp_rep_uni2 = nunp_rep_uni2_; + nunp_rep_rep = nunp_rep_rep_; + nunp_rep_0 = nunp_rep_0_; + + nunp_uni = nunp_uni_; + nunp_uni1 = nunp_uni1_; + nunp_uni2 = nunp_uni2_; + nunp_rep = nunp_rep_; + nunp_0 = nunp_0_; + + sum_best1 = sum_best1_; + sum_best2 = sum_best2_; + sum_best = sum_best_; + } + + /** + * Merge (add) the counters in the given ReportingMetrics object + * into this object. This is the only safe way to update a + * ReportingMetrics shared by multiple threads. + */ + void merge(const ReportingMetrics& met, bool getLock = false) { + ThreadSafe ts(&mutex_m, getLock); + nread += met.nread; + + npaired += met.npaired; + nunpaired += met.nunpaired; + + nconcord_uni += met.nconcord_uni; + nconcord_uni1 += met.nconcord_uni1; + nconcord_uni2 += met.nconcord_uni2; + nconcord_rep += met.nconcord_rep; + nconcord_0 += met.nconcord_0; + + ndiscord += met.ndiscord; + + nunp_0_uni += met.nunp_0_uni; + nunp_0_uni1 += met.nunp_0_uni1; + nunp_0_uni2 += met.nunp_0_uni2; + nunp_0_rep += met.nunp_0_rep; + nunp_0_0 += met.nunp_0_0; + + nunp_rep_uni += met.nunp_rep_uni; + nunp_rep_uni1 += met.nunp_rep_uni1; + nunp_rep_uni2 += met.nunp_rep_uni2; + nunp_rep_rep += met.nunp_rep_rep; + nunp_rep_0 += met.nunp_rep_0; + + nunp_uni += met.nunp_uni; + nunp_uni1 += met.nunp_uni1; + nunp_uni2 += met.nunp_uni2; + nunp_rep += met.nunp_rep; + nunp_0 += met.nunp_0; + + sum_best1 += met.sum_best1; + sum_best2 += met.sum_best2; + sum_best += met.sum_best; + } + + uint64_t nread; // # reads + uint64_t npaired; // # pairs + uint64_t nunpaired; // # unpaired reads + + // Paired + + // Concordant + uint64_t nconcord_uni; // # pairs with unique concordant alns + uint64_t nconcord_uni1; // # pairs with exactly 1 concordant alns + uint64_t nconcord_uni2; // # pairs with >1 concordant aln, still unique + uint64_t nconcord_rep; // # pairs with repetitive concordant alns + uint64_t nconcord_0; // # pairs with 0 concordant alns + // Discordant + uint64_t ndiscord; // # pairs with 1 discordant aln + + // Unpaired from failed pairs + uint64_t nunp_0_uni; // # unique from nconcord_0_ - ndiscord_ + uint64_t nunp_0_uni1; // # pairs with exactly 1 concordant alns + uint64_t nunp_0_uni2; // # pairs with >1 concordant aln, still unique + uint64_t nunp_0_rep; // # repetitive from + uint64_t nunp_0_0; // # with 0 alignments + + // Unpaired from repetitive pairs + uint64_t nunp_rep_uni; // # pairs with unique concordant alns + uint64_t nunp_rep_uni1; // # pairs with exactly 1 concordant alns + uint64_t nunp_rep_uni2; // # pairs with >1 concordant aln, still unique + uint64_t nunp_rep_rep; // # pairs with repetitive concordant alns + uint64_t nunp_rep_0; // # pairs with 0 concordant alns + + // Unpaired + + uint64_t nunp_uni; // # unique from nconcord_0_ - ndiscord_ + uint64_t nunp_uni1; // # pairs with exactly 1 concordant alns + uint64_t nunp_uni2; // # pairs with >1 concordant aln, still unique + uint64_t nunp_rep; // # repetitive from + uint64_t nunp_0; // # with 0 alignments + + + uint64_t sum_best1; // Sum of all the best alignment scores + uint64_t sum_best2; // Sum of all the second-best alignment scores + uint64_t sum_best; // Sum of all the best and second-best + + MUTEX_T mutex_m; +}; + +// Type for expression numbers of hits +typedef int64_t THitInt; + +/** + * Parameters affecting reporting of alignments, specifically -k & -a, + * -m & -M. + */ +struct ReportingParams { + + explicit ReportingParams( + THitInt khits_, + THitInt kseeds_, + THitInt mhits_, + THitInt pengap_, + bool msample_, + bool discord_, + bool mixed_, + bool secondary_, + bool localAlign_, + int bowtie2_dp_, + bool sensitive_, + bool repeat_) + + { + init( + khits_, + kseeds_, + mhits_, + pengap_, + msample_, + discord_, + mixed_, + secondary_, + localAlign_, + bowtie2_dp_, + sensitive_, + repeat_); + } + + void init( + THitInt khits_, + THitInt kseeds_, + THitInt mhits_, + THitInt pengap_, + bool msample_, + bool discord_, + bool mixed_, + bool secondary_, + bool localAlign_, + int bowtie2_dp_, + bool sensitive_, + bool repeat_) + { + khits = khits_; // -k (or high if -a) + kseeds = kseeds_; + mhits = ((mhits_ == 0) ? std::numeric_limits::max() : mhits_); + pengap = pengap_; + msample = msample_; + discord = discord_; + mixed = mixed_; + secondary = secondary_; + localAlign = localAlign_; + bowtie2_dp = bowtie2_dp_; + sensitive = sensitive_; + repeat = repeat_; + } + +#ifndef NDEBUG + /** + * Check that reporting parameters are internally consistent. + */ + bool repOk() const { + assert_geq(khits, 1); + assert_geq(mhits, 1); + return true; + } +#endif + + /** + * Return true iff a -m or -M limit was set by the user. + */ + inline bool mhitsSet() const { + return mhits < std::numeric_limits::max(); + } + + /** + * Return a multiplier that indicates how many alignments we might look for + * (max). We can use this to boost parameters like ROWM and POSF + * appropriately. + */ + inline THitInt mult() const { + if(mhitsSet()) { + return mhits+1; + } + return khits; + } + + /** + * Given ROWM, POSF thresholds, boost them according to mult(). + */ + void boostThreshold(SimpleFunc& func) { + THitInt mul = mult(); + assert_gt(mul, 0); + if(mul == std::numeric_limits::max()) { + func.setMin(std::numeric_limits::max()); + } else if(mul > 1) { + func.mult(mul); + } + } + + /** + * Return true iff we are reporting all hits. + */ + bool allHits() const { + return khits == std::numeric_limits::max(); + } + + // Number of alignments to report + THitInt khits; + + // Number of seeds allowed to extend + THitInt kseeds; + + // Read is non-unique if mhits-1 next-best alignments are within + // pengap of the best alignment + THitInt mhits, pengap; + + // true if -M is specified, meaning that if the -M ceiling is + // exceeded, we should report 'khits' alignments chosen at random + // from those found + bool msample; + + // true iff we should seek and report discordant paired-end alignments for + // paired-end reads. + bool discord; + + // true iff we should seek and report unpaired mate alignments when there + // are paired-end alignments for a paired-end read, or if the number of + // paired-end alignments exceeds the -m ceiling. + bool mixed; + + // true iff we allow secondary alignments to be output (secondary alignments + // have lower scores) + bool secondary; + + // true iff we allow local alignment (not implemented yet) + bool localAlign; + + // true iff we allow dynamic alignment + int bowtie2_dp; + + // true iff we allow sensitive alignment + bool sensitive; + + // true iff we output alignments to repeat sequences + bool repeat; +}; + +/** + * A state machine keeping track of the number and type of alignments found so + * far. Its purpose is to inform the caller as to what stage the alignment is + * in and what categories of alignment are still of interest. This information + * should allow the caller to short-circuit some alignment work. Another + * purpose is to tell the AlnSinkWrap how many and what type of alignment to + * report. + * + * TODO: This class does not keep accurate information about what + * short-circuiting took place. If a read is identical to a previous read, + * there should be a way to query this object to determine what work, if any, + * has to be re-done for the new read. + */ +class ReportingState { + +public: + + enum { + NO_READ = 1, // haven't got a read yet + CONCORDANT_PAIRS, // looking for concordant pairs + DISCORDANT_PAIRS, // looking for discordant pairs + UNPAIRED, // looking for unpaired + DONE // finished looking + }; + + // Flags for different ways we can finish out a category of potential + // alignments. + + enum { + EXIT_DID_NOT_EXIT = 1, // haven't finished + EXIT_DID_NOT_ENTER, // never tried search + EXIT_SHORT_CIRCUIT_k, // -k exceeded + EXIT_SHORT_CIRCUIT_M, // -M exceeded + EXIT_SHORT_CIRCUIT_TRUMPED, // made irrelevant + EXIT_CONVERTED_TO_DISCORDANT, // unpair became discord + EXIT_NO_ALIGNMENTS, // none found + EXIT_WITH_ALIGNMENTS // some found + }; + + ReportingState(const ReportingParams& p) : p_(p) { reset(); } + + ReportingState operator=(ReportingState& copySource) { + state_ = copySource.state_; + paired_ = copySource.paired_;; + nconcord_ = copySource.nconcord_; + ndiscord_ = copySource.ndiscord_; + nunpair1_ = copySource.nunpair1_; + nunpair2_ = copySource.nunpair2_; + nunpairRepeat1_ = copySource.nunpairRepeat1_; + nunpairRepeat2_ = copySource.nunpairRepeat2_; + doneConcord_ = copySource.doneConcord_; + doneDiscord_ = copySource.doneDiscord_; + doneUnpair_ = copySource.doneUnpair_; + doneUnpair1_ = copySource.doneUnpair1_; + doneUnpair2_ = copySource.doneUnpair2_; + exitConcord_ = copySource.exitConcord_; + exitDiscord_ = copySource.exitDiscord_; + exitUnpair1_ = copySource.exitUnpair1_; + exitUnpair2_ = copySource.exitUnpair2_; + concordBest_ = copySource.concordBest_; + done_ = copySource.done_; + } + /** + * Set all state to uninitialized defaults. + */ + void reset() { + state_ = ReportingState::NO_READ; + paired_ = false; + nconcord_ = 0; + ndiscord_ = 0; + nunpair1_ = 0; + nunpair2_ = 0; + nunpairRepeat1_ = 0; + nunpairRepeat2_ = 0; + doneConcord_ = false; + doneDiscord_ = false; + doneUnpair_ = false; + doneUnpair1_ = false; + doneUnpair2_ = false; + exitConcord_ = ReportingState::EXIT_DID_NOT_ENTER; + exitDiscord_ = ReportingState::EXIT_DID_NOT_ENTER; + exitUnpair1_ = ReportingState::EXIT_DID_NOT_ENTER; + exitUnpair2_ = ReportingState::EXIT_DID_NOT_ENTER; + concordBest_ = getMinScore(); + done_ = false; + } + + /** + * Return true iff this ReportingState has been initialized with a call to + * nextRead() since the last time reset() was called. + */ + bool inited() const { return state_ != ReportingState::NO_READ; } + + /** + * Initialize state machine with a new read. The state we start in depends + * on whether it's paired-end or unpaired. + */ + void nextRead(bool paired); + + /** + * Caller uses this member function to indicate that one additional + * concordant alignment has been found. + */ + bool foundConcordant(TAlScore score); + + /** + * Caller uses this member function to indicate that one additional + * discordant alignment has been found. + */ + bool foundUnpaired(bool mate1, bool repeat = false); + + /** + * Called to indicate that the aligner has finished searching for + * alignments. This gives us a chance to finalize our state. + * + * TODO: Keep track of short-circuiting information. + */ + void finish(); + + /** + * Populate given counters with the number of various kinds of alignments + * to report for this read. Concordant alignments are preferable to (and + * mutually exclusive with) discordant alignments, and paired-end + * alignments are preferable to unpaired alignments. + * + * The caller also needs some additional information for the case where a + * pair or unpaired read aligns repetitively. If the read is paired-end + * and the paired-end has repetitive concordant alignments, that should be + * reported, and 'pairMax' is set to true to indicate this. If the read is + * paired-end, does not have any conordant alignments, but does have + * repetitive alignments for one or both mates, then that should be + * reported, and 'unpair1Max' and 'unpair2Max' are set accordingly. + * + * Note that it's possible in the case of a paired-end read for the read to + * have repetitive concordant alignments, but for one mate to have a unique + * unpaired alignment. + */ + void getReport( + uint64_t& nconcordAln, // # concordant alignments to report + uint64_t& ndiscordAln, // # discordant alignments to report + uint64_t& nunpair1Aln, // # unpaired alignments for mate #1 to report + uint64_t& nunpair2Aln, // # unpaired alignments for mate #2 to report + uint64_t& nunpairRepeat1Aln, // # unpaired alignments for mate #1 to report + uint64_t& nunpairRepeat2Aln, // # unpaired alignments for mate #2 to report + bool& pairMax, // repetitive concordant alignments + bool& unpair1Max, // repetitive alignments for mate #1 + bool& unpair2Max) // repetitive alignments for mate #2 + const; + + /** + * Return an integer representing the alignment state we're in. + */ + inline int state() const { return state_; } + + /** + * If false, there's no need to solve any more dynamic programming problems + * for finding opposite mates. + */ + inline bool doneConcordant() const { return doneConcord_; } + + /** + * If false, there's no need to seek any more discordant alignment. + */ + inline bool doneDiscordant() const { return doneDiscord_; } + + /** + * If false, there's no need to seek any more unpaired alignments for the + * specified mate. Note: this doesn't necessarily mean we can stop looking + * for alignments for the mate, since this might be necessary for finding + * concordant and discordant alignments. + */ + inline bool doneUnpaired(bool mate1) const { + return mate1 ? doneUnpair1_ : doneUnpair2_; + } + + /** + * If false, no further consideration of the given mate is necessary. It's + * not needed for *any* class of alignment: concordant, discordant or + * unpaired. + */ + inline bool doneWithMate(bool mate1) const { + bool doneUnpair = mate1 ? doneUnpair1_ : doneUnpair2_; + uint64_t nun = mate1 ? nunpair1_ : nunpair2_; + if(!doneUnpair || !doneConcord_) { + return false; // still needed for future concordant/unpaired alns + } + if(!doneDiscord_ && nun == 0) { + return false; // still needed for future discordant alignments + } + return true; // done + } + + /** + * Return true iff there's no need to seek any more unpaired alignments. + */ + inline bool doneUnpaired() const { return doneUnpair_; } + + /** + * Return true iff all alignment stages have been exited. + */ + inline bool done() const { return done_; } + + inline uint64_t numConcordant() const { return nconcord_; } + inline uint64_t numDiscordant() const { return ndiscord_; } + inline uint64_t numUnpaired1() const { return nunpair1_; } + inline uint64_t numUnpaired2() const { return nunpair2_; } + inline uint64_t numUnpairedRepeat1() const { return nunpairRepeat1_; } + inline uint64_t numUnpairedRepeat2() const { return nunpairRepeat2_; } + + inline int exitConcordant() const { return exitConcord_; } + inline int exitDiscordant() const { return exitDiscord_; } + inline int exitUnpaired1() const { return exitUnpair1_; } + inline int exitUnpaired2() const { return exitUnpair2_; } + + inline int64_t concordBest() const { return concordBest_; } + + void addNumUnpaired1() { + nunpair1_++; + } + void addNumUnpaired2() { + nunpair2_++; + } + +#ifndef NDEBUG + /** + * Check that ReportingState is internally consistent. + */ + bool repOk() const { + assert(p_.discord || doneDiscord_); + assert(p_.mixed || !paired_ || doneUnpair_); + assert(doneUnpair_ || !doneUnpair1_ || !doneUnpair2_); + if(p_.mhitsSet()) { + assert_leq(numConcordant(), (uint64_t)p_.mhits+1); + assert_leq(numDiscordant(), (uint64_t)p_.mhits+1); + assert(paired_ || numUnpaired1() <= (uint64_t)p_.mhits+1); + assert(paired_ || numUnpaired2() <= (uint64_t)p_.mhits+1); + } + assert(done() || !doneWithMate(true) || !doneWithMate(false)); + return true; + } +#endif + + /** + * Return ReportingParams object governing this ReportingState. + */ + const ReportingParams& params() const { + return p_; + } + +protected: + + /** + * Update state to reflect situation after converting two unique unpaired + * alignments, one for mate 1 and one for mate 2, into a single discordant + * alignment. + */ + void convertUnpairedToDiscordant() { + assert_eq(1, numUnpaired1()); + assert_eq(1, numUnpaired2()); + assert_eq(0, numDiscordant()); + exitUnpair1_ = exitUnpair2_ = ReportingState::EXIT_CONVERTED_TO_DISCORDANT; + nunpair1_ = nunpair2_ = 0; + nunpairRepeat1_ = nunpairRepeat2_ = 0; + ndiscord_ = 1; + assert_eq(1, numDiscordant()); + } + + /** + * Given the number of alignments in a category, check whether we + * short-circuited out of the category. Set the done and exit arguments to + * indicate whether and how we short-circuited. + */ + inline void areDone( + uint64_t cnt, // # alignments in category + bool& done, // out: whether we short-circuited out of category + int& exit) const; // out: if done, how we short-circuited (-k? -m? etc) + + /** + * Update done_ field to reflect whether we're totally done now. + */ + inline void updateDone() { + doneUnpair_ = doneUnpair1_ && doneUnpair2_; + done_ = doneUnpair_ && doneDiscord_ && doneConcord_; + } + + const ReportingParams& p_; // reporting parameters + int state_; // state we're currently in + bool paired_; // true iff read we're currently handling is paired + uint64_t nconcord_; // # concordants found so far + uint64_t ndiscord_; // # discordants found so far + uint64_t nunpair1_; // # unpaired alignments found so far for mate 1 + uint64_t nunpair2_; // # unpaired alignments found so far for mate 2 + uint64_t nunpairRepeat1_; // # unpaired repeat alignments found so far for mate 1 + uint64_t nunpairRepeat2_; // # unpaired repeat alignments found so far for mate 2 + bool doneConcord_; // true iff we're no longner interested in concordants + bool doneDiscord_; // true iff we're no longner interested in discordants + bool doneUnpair_; // no longner interested in unpaired alns + bool doneUnpair1_; // no longner interested in unpaired alns for mate 1 + bool doneUnpair2_; // no longner interested in unpaired alns for mate 2 + int exitConcord_; // flag indicating how we exited concordant state + int exitDiscord_; // flag indicating how we exited discordant state + int exitUnpair1_; // flag indicating how we exited unpaired 1 state + int exitUnpair2_; // flag indicating how we exited unpaired 2 state + TAlScore concordBest_; // + bool done_; // done with all alignments +}; + +/** + * Global hit sink for hits from the MultiSeed aligner. Encapsulates + * all aspects of the MultiSeed aligner hitsink that are global to all + * threads. This includes aspects relating to: + * + * (a) synchronized access to the output stream + * (b) the policy to be enforced by the per-thread wrapper + * + * TODO: Implement splitting up of alignments into separate files + * according to genomic coordinate. + */ +template +class AlnSink { + + typedef EList StrList; + +public: + + explicit AlnSink( + OutputQueue& oq, + const StrList& refnames, + const StrList& repnames, + bool quiet, + ALTDB* altdb = NULL, + SpliceSiteDB* ssdb = NULL) : + oq_(oq), + refnames_(refnames), + repnames_(repnames), + quiet_(quiet), + altdb_(altdb), + spliceSiteDB_(ssdb) + { } + + /** + * Destroy HitSinkobject; + */ + virtual ~AlnSink() { } + + /** + * Called when the AlnSink is wrapped by a new AlnSinkWrap. This helps us + * keep track of whether the main lock or any of the per-stream locks will + * be contended by multiple threads. + */ + void addWrapper() { numWrappers_++; } + + // for HISAT-3N + virtual void output(int threadId0, ReportingMetrics& met, BTString& o) { + + } + + /** + * Append a single hit to the given output stream. If + * synchronization is required, append() assumes the caller has + * already grabbed the appropriate lock. + */ + virtual void append( + BTString& o, + StackedAln& staln, + size_t threadId, + const Read *rd1, + const Read *rd2, + const TReadId rdid, + AlnRes *rs1, + AlnRes *rs2, + const AlnSetSumm& summ, + const SeedAlSumm& ssm1, + const SeedAlSumm& ssm2, + const AlnFlags* flags1, + const AlnFlags* flags2, + const PerReadMetrics& prm, + const Mapq& mapq, + const Scoring& sc, + bool report2) = 0; + + /*virtual void append( + ReportingMetrics& met, + BTString& o, + StackedAln& staln, + size_t threadId, + const Read *rd1, + const Read *rd2, + const TReadId rdid, + AlnRes *rs1, + AlnRes *rs2, + const AlnSetSumm& summ, + const SeedAlSumm& ssm1, + const SeedAlSumm& ssm2, + const AlnFlags* flags1, + const AlnFlags* flags2, + const PerReadMetrics& prm, + const Mapq& mapq, + const Scoring& sc, + bool report2) = 0;*/ + + + /** + * Report a given batch of hits for the given read or read pair. + * Should be called just once per read pair. Assumes all the + * alignments are paired, split between rs1 and rs2. + * + * The caller hasn't decided which alignments get reported as primary + * or secondary; that's up to the routine. Because the caller might + * want to know this, we use the pri1 and pri2 out arguments to + * convey this. + */ + virtual void reportHits( + BTString& o, // write to this buffer + StackedAln& staln, // StackedAln to write stacked alignment + size_t threadId, // which thread am I? + const Read *rd1, // mate #1 + const Read *rd2, // mate #2 + const TReadId rdid, // read ID + const EList& select1, // random subset of rd1s + const EList* select2, // random subset of rd2s + EList *rs1, // alignments for mate #1 + EList *rs2, // alignments for mate #2 + bool maxed, // true iff -m/-M exceeded + const AlnSetSumm& summ, // summary + const SeedAlSumm& ssm1, // seed alignment summ + const SeedAlSumm& ssm2, // seed alignment summ + const AlnFlags* flags1, // flags for mate #1 + const AlnFlags* flags2, // flags for mate #2 + const PerReadMetrics& prm, // per-read metrics + const Mapq& mapq, // MAPQ generator + const Scoring& sc, // scoring scheme + bool getLock = true) // true iff lock held by caller + { + // There are a few scenarios: + // 1. Read is unpaired, in which case rd2 is NULL + // 2. Read is paired-end and we're reporting concordant alignments + // 3. Read is paired-end and we're reporting discordant alignments + // 4. Read is paired-end and we're reporting unpaired alignments for + // both mates + // 5. Read is paired-end and we're reporting an unpaired alignments for + // just one mate or the other + assert(rd1 != NULL || rd2 != NULL); + assert(rs1 != NULL || rs2 != NULL); + AlnFlags flagscp1, flagscp2; + if(flags1 != NULL) { + flagscp1 = *flags1; + flags1 = &flagscp1; + flagscp1.setPrimary(true); + } + if(flags2 != NULL) { + flagscp2 = *flags2; + flags2 = &flagscp2; + flagscp2.setPrimary(true); + } + if(select2 != NULL) { + // Handle case 5 + assert(rd1 != NULL); assert(flags1 != NULL); + assert(rd2 != NULL); assert(flags2 != NULL); + assert_gt(select1.size(), 0); + assert_gt(select2->size(), 0); + AlnRes* r1pri = ((rs1 != NULL) ? &rs1->get(select1[0]) : NULL); + AlnRes* r2pri = ((rs2 != NULL) ? &rs2->get((*select2)[0]) : NULL); + append(o, staln, threadId, rd1, rd2, rdid, r1pri, r2pri, summ, + ssm1, ssm2, flags1, flags2, prm, mapq, sc, true); + flagscp1.setPrimary(false); + flagscp2.setPrimary(false); + for(size_t i = 1; i < select1.size(); i++) { + AlnRes* r1 = ((rs1 != NULL) ? &rs1->get(select1[i]) : NULL); + append(o, staln, threadId, rd1, rd2, rdid, r1, r2pri, summ, + ssm1, ssm2, flags1, flags2, prm, mapq, sc, false); + } + for(size_t i = 1; i < select2->size(); i++) { + AlnRes* r2 = ((rs2 != NULL) ? &rs2->get((*select2)[i]) : NULL); + append(o, staln, threadId, rd2, rd1, rdid, r2, r1pri, summ, + ssm2, ssm1, flags2, flags1, prm, mapq, sc, false); + } + } else { + // Handle cases 1-4 + for(size_t i = 0; i < select1.size(); i++) { + AlnRes* r1 = ((rs1 != NULL) ? &rs1->get(select1[i]) : NULL); + AlnRes* r2 = ((rs2 != NULL) ? &rs2->get(select1[i]) : NULL); + append(o, staln, threadId, rd1, rd2, rdid, r1, r2, summ, + ssm1, ssm2, flags1, flags2, prm, mapq, sc, true); + if(flags1 != NULL) { + flagscp1.setPrimary(false); + } + if(flags2 != NULL) { + flagscp2.setPrimary(false); + } + } + } + } + + /*virtual void report3NHits( + ReportingMetrics& met, // reporting metrics + BTString& o, // write to this buffer + StackedAln& staln, // StackedAln to write stacked alignment + size_t threadId, // which thread am I? + const Read *rd1, // mate #1 + const Read *rd2, // mate #2 + const TReadId rdid, // read ID + const EList& select1, // random subset of rd1s + const EList* select2, // random subset of rd2s + EList *rs1, // alignments for mate #1 + EList *rs2, // alignments for mate #2 + bool maxed, // true iff -m/-M exceeded + const AlnSetSumm& summ, // summary + const SeedAlSumm& ssm1, // seed alignment summ + const SeedAlSumm& ssm2, // seed alignment summ + const AlnFlags* flags1, // flags for mate #1 + const AlnFlags* flags2, // flags for mate #2 + const PerReadMetrics& prm, // per-read metrics + const Mapq& mapq, // MAPQ generator + const Scoring& sc, // scoring scheme + bool getLock = true) // true iff lock held by caller + { + // There are a few scenarios: + // 1. Read is unpaired, in which case rd2 is NULL + // 2. Read is paired-end and we're reporting concordant alignments + // 3. Read is paired-end and we're reporting discordant alignments + // 4. Read is paired-end and we're reporting unpaired alignments for + // both mates + // 5. Read is paired-end and we're reporting an unpaired alignments for + // just one mate or the other + assert(rd1 != NULL || rd2 != NULL); + assert(rs1 != NULL || rs2 != NULL); + AlnFlags flagscp1, flagscp2; + if(flags1 != NULL) { + flagscp1 = *flags1; + flags1 = &flagscp1; + flagscp1.setPrimary(true); + } + if(flags2 != NULL) { + flagscp2 = *flags2; + flags2 = &flagscp2; + flagscp2.setPrimary(true); + } + if(select2 != NULL) { + // Handle case 5 + assert(rd1 != NULL); assert(flags1 != NULL); + assert(rd2 != NULL); assert(flags2 != NULL); + assert_gt(select1.size(), 0); + assert_gt(select2->size(), 0); + AlnRes* r1pri = ((rs1 != NULL) ? &rs1->get(select1[0]) : NULL); + AlnRes* r2pri = ((rs2 != NULL) ? &rs2->get((*select2)[0]) : NULL); + append(met, o, staln, threadId, rd1, rd2, rdid, r1pri, r2pri, summ, + ssm1, ssm2, flags1, flags2, prm, mapq, sc, true); + flagscp1.setPrimary(false); + flagscp2.setPrimary(false); + for(size_t i = 1; i < select1.size(); i++) { + AlnRes* r1 = ((rs1 != NULL) ? &rs1->get(select1[i]) : NULL); + append(met, o, staln, threadId, rd1, rd2, rdid, r1, r2pri, summ, + ssm1, ssm2, flags1, flags2, prm, mapq, sc, false); + } + for(size_t i = 1; i < select2->size(); i++) { + AlnRes* r2 = ((rs2 != NULL) ? &rs2->get((*select2)[i]) : NULL); + append(met, o, staln, threadId, rd2, rd1, rdid, r2, r1pri, summ, + ssm2, ssm1, flags2, flags1, prm, mapq, sc, false); + } + } else { + // Handle cases 1-4 + for(size_t i = 0; i < select1.size(); i++) { + AlnRes* r1 = ((rs1 != NULL) ? &rs1->get(select1[i]) : NULL); + AlnRes* r2 = ((rs2 != NULL) ? &rs2->get(select1[i]) : NULL); + append(met, o, staln, threadId, rd1, rd2, rdid, r1, r2, summ, + ssm1, ssm2, flags1, flags2, prm, mapq, sc, true); + if(flags1 != NULL) { + flagscp1.setPrimary(false); + } + if(flags2 != NULL) { + flagscp2.setPrimary(false); + } + } + } + }*/ + + /** + * Report an unaligned read. Typically we do nothing, but we might + * want to print a placeholder when output is chained. + */ + virtual void reportUnaligned( + BTString& o, // write to this string + StackedAln& staln, // StackedAln to write stacked alignment + size_t threadId, // which thread am I? + const Read *rd1, // mate #1 + const Read *rd2, // mate #2 + const TReadId rdid, // read ID + const AlnSetSumm& summ, // summary + const SeedAlSumm& ssm1, // seed alignment summary + const SeedAlSumm& ssm2, // seed alignment summary + const AlnFlags* flags1, // flags for mate #1 + const AlnFlags* flags2, // flags for mate #2 + const PerReadMetrics& prm, // per-read metrics + const Mapq& mapq, // MAPQ calculator + const Scoring& sc, // scoring scheme + bool report2, // report alns for both mates? + bool getLock = true) // true iff lock held by caller + { + append(o, staln, threadId, rd1, rd2, rdid, NULL, NULL, summ, + ssm1, ssm2, flags1, flags2, prm, mapq, sc, report2); + } + + + /** + * Print summary of how many reads aligned, failed to align and aligned + * repetitively. Write it to stderr. Optionally write Hadoop counter + * updates. + */ + void printAlSumm( + ostream& out, + const ReportingMetrics& met, + size_t repThresh, // threshold for uniqueness, or max if no thresh + bool discord, // looked for discordant alignments + bool mixed, // looked for unpaired alignments where paired failed? + bool newSummary, // alignment summary in a new style + bool hadoopOut); // output Hadoop counters? + + /** + * Called when all alignments are complete. It is assumed that no + * synchronization is necessary. + */ + void finish( + ostream& out, + size_t repThresh, + bool discord, + bool mixed, + bool newSummary, + bool hadoopOut) + { + // Close output streams + if(!quiet_) { + printAlSumm( + out, + met_, + repThresh, + discord, + mixed, + newSummary, + hadoopOut); + } + } + +#ifndef NDEBUG + /** + * Check that hit sink is internally consistent. + */ + bool repOk() const { return true; } +#endif + + // + // Related to reporting seed hits + // + + /** + * Given a Read and associated, filled-in SeedResults objects, + * print a record summarizing the seed hits. + */ + void reportSeedSummary( + BTString& o, + const Read& rd, + TReadId rdid, + size_t threadId, + const SeedResults& rs, + bool getLock = true); + + /** + * Given a Read, print an empty record (all 0s). + */ + void reportEmptySeedSummary( + BTString& o, + const Read& rd, + TReadId rdid, + size_t threadId, + bool getLock = true); + + /** + * Append a batch of unresolved seed alignment results (i.e. seed + * alignments where all we know is the reference sequence aligned + * to and its SA range, not where it falls in the reference + * sequence) to the given output stream in Bowtie's seed-alignment + * verbose-mode format. + */ + virtual void appendSeedSummary( + BTString& o, + const Read& rd, + const TReadId rdid, + size_t seedsTried, + size_t nonzero, + size_t ranges, + size_t elts, + size_t seedsTriedFw, + size_t nonzeroFw, + size_t rangesFw, + size_t eltsFw, + size_t seedsTriedRc, + size_t nonzeroRc, + size_t rangesRc, + size_t eltsRc); + + /** + * Merge given metrics in with ours by summing all individual metrics. + */ + void mergeMetrics(const ReportingMetrics& met, bool getLock = true) { + met_.merge(met, getLock); + } + + /** + * Return mutable reference to the shared OutputQueue. + */ + OutputQueue& outq() { + return oq_; + } + +protected: + + OutputQueue& oq_; // output queue + int numWrappers_; // # threads owning a wrapper for this HitSink + const StrList& refnames_; // reference names + const StrList& repnames_; // repeat names + bool quiet_; // true -> don't print alignment stats at the end + ReportingMetrics met_; // global repository of reporting metrics + ALTDB* altdb_; + SpliceSiteDB* spliceSiteDB_; // +}; + +/** + * Per-thread hit sink "wrapper" for the MultiSeed aligner. Encapsulates + * aspects of the MultiSeed aligner hit sink that are per-thread. This + * includes aspects relating to: + * + * (a) Enforcement of the reporting policy + * (b) Tallying of results + * (c) Storing of results for the previous read in case this allows us to + * short-circuit some work for the next read (i.e. if it's identical) + * + * PHASED ALIGNMENT ASSUMPTION + * + * We make some assumptions about how alignment proceeds when we try to + * short-circuit work for identical reads. Specifically, we assume that for + * each read the aligner proceeds in a series of stages (or perhaps just one + * stage). In each stage, the aligner either: + * + * (a) Finds no alignments, or + * (b) Finds some alignments and short circuits out of the stage with some + * random reporting involved (e.g. in -k and/or -M modes), or + * (c) Finds all of the alignments in the stage + * + * In the event of (a), the aligner proceeds to the next stage and keeps + * trying; we can skip the stage entirely for the next read if it's identical. + * In the event of (b), or (c), the aligner stops and does not proceed to + * further stages. In the event of (b1), if the next read is identical we + * would like to tell the aligner to start again at the beginning of the stage + * that was short-circuited. + * + * In any event, the rs1_/rs2_/rs1u_/rs2u_ fields contain the alignments found + * in the last alignment stage attempted. + * + * HANDLING REPORTING LIMITS + * + * The user can specify reporting limits, like -k (specifies number of + * alignments to report out of those found) and -M (specifies a ceiling s.t. if + * there are more alignments than the ceiling, read is called repetitive and + * best found is reported). Enforcing these limits is straightforward for + * unpaired alignments: if a new alignment causes us to exceed the -M ceiling, + * we can stop looking. + * + * The case where both paired-end and unpaired alignments are possible is + * trickier. Once we have a number of unpaired alignments that exceeds the + * ceiling, we can stop looking *for unpaired alignments* - but we can't + * necessarily stop looking for paired-end alignments, since there may yet be + * more to find. However, if the input read is not a pair, then we can stop at + * this point. If the input read is a pair and we have a number of paired + * aligments that exceeds the -M ceiling, we can stop looking. + * + * CONCORDANT & DISCORDANT, PAIRED & UNPAIRED + * + * A note on paired-end alignment: Clearly, if an input read is + * paired-end and we find either concordant or discordant paired-end + * alignments for the read, then we would like to tally and report + * those alignments as such (and not as groups of 2 unpaired + * alignments). And if we fail to find any paired-end alignments, but + * we do find some unpaired alignments for one mate or the other, then + * we should clearly tally and report those alignments as unpaired + * alignments (if the user so desires). + * + * The situation is murkier when there are no paired-end alignments, + * but there are unpaired alignments for *both* mates. In this case, + * we might want to pick out zero or more pairs of mates and classify + * those pairs as discordant paired-end alignments. And we might want + * to classify the remaining alignments as unpaired. But how do we + * pick which pairs if any to call discordant? + * + * Because the most obvious use for discordant pairs is for identifying + * large-scale variation, like rearrangements or large indels, we would + * usually like to be conservative about what we call a discordant + * alignment. If there's a good chance that one or the other of the + * two mates has a good alignment to another place on the genome, this + * compromises the evidence for the large-scale variant. For this + * reason, Bowtie 2's policy is: if there are no paired-end alignments + * and there is *exactly one alignment each* for both mates, then the + * two alignments are paired and treated as a discordant paired-end + * alignment. Otherwise, all alignments are treated as unpaired + * alignments. + * + * When both paired and unpaired alignments are discovered by the + * aligner, only the paired alignments are reported by default. This + * is sensible considering relative likelihoods: if a good paired-end + * alignment is found, it is much more likely that the placement of + * the two mates implied by that paired alignment is correct than any + * placement implied by an unpaired alignment. + * + * + */ +template +class AlnSinkWrap { +public: + + AlnSinkWrap( + AlnSink& g, // AlnSink being wrapped + const ReportingParams& rp, // Parameters governing reporting + Mapq& mapq, // Mapq calculator + size_t threadId, // Thread ID + bool secondary = false, // Secondary alignments + const SpliceSiteDB* ssdb = NULL, // splice sites + uint64_t threads_rids_mindist = 0) : // synchronization + g_(g), + rp_(rp), + threadid_(threadId), + mapq_(mapq), + secondary_(secondary), + ssdb_(ssdb), + threads_rids_mindist_(threads_rids_mindist), + init_(false), + maxed1_(false), // read is pair and we maxed out mate 1 unp alns + maxed2_(false), // read is pair and we maxed out mate 2 unp alns + maxedOverall_(false), // alignments found so far exceed -m/-M ceiling + bestPair_(getMinScore()), + best2Pair_(getMinScore()), + bestUnp1_(getMinScore()), + best2Unp1_(getMinScore()), + bestUnp2_(getMinScore()), + best2Unp2_(getMinScore()), + bestUnpRepeat1_(getMinScore()), + best2UnpRepeat1_(getMinScore()), + bestUnpRepeat2_(getMinScore()), + best2UnpRepeat2_(getMinScore()), + bestSplicedPair_(0), + best2SplicedPair_(0), + bestSplicedUnp1_(0), + best2SplicedUnp1_(0), + bestSplicedUnp2_(0), + best2SplicedUnp2_(0), + rd1_(NULL), // mate 1 + rd2_(NULL), // mate 2 + rdid_(std::numeric_limits::max()), // read id + rs1_(), // mate 1 alignments for paired-end alignments + rs2_(), // mate 2 alignments for paired-end alignments + rs1u_(), // mate 1 unpaired alignments + rs2u_(), // mate 2 unpaired alignments + select1_(), // for selecting random subsets for mate 1 + select2_(), // for selecting random subsets for mate 2 + st_(rp) // reporting state - what's left to do? + { + assert(rp_.repOk()); + } + + void resetInit_(){ + init_ = false; + }; + + + /** + * Initialize the wrapper with a new read pair and return an + * integer >= -1 indicating which stage the aligner should start + * at. If -1 is returned, the aligner can skip the read entirely. + * at. If . Checks if the new read pair is identical to the + * previous pair. If it is, then we return the id of the first + * stage to run. + */ + int nextRead( + // One of the other of rd1, rd2 will = NULL if read is unpaired + const Read* rd1, // new mate #1 + const Read* rd2, // new mate #2 + TReadId rdid, // read ID for new pair + bool qualitiesMatter);// aln policy distinguishes b/t quals? + + /** + * Inform global, shared AlnSink object that we're finished with + * this read. The global AlnSink is responsible for updating + * counters, creating the output record, and delivering the record + * to the appropriate output stream. + */ + virtual void finishRead( + const SeedResults *sr1, // seed alignment results for mate 1 + const SeedResults *sr2, // seed alignment results for mate 2 + bool exhaust1, // mate 1 exhausted? + bool exhaust2, // mate 2 exhausted? + bool nfilt1, // mate 1 N-filtered? + bool nfilt2, // mate 2 N-filtered? + bool scfilt1, // mate 1 score-filtered? + bool scfilt2, // mate 2 score-filtered? + bool lenfilt1, // mate 1 length-filtered? + bool lenfilt2, // mate 2 length-filtered? + bool qcfilt1, // mate 1 qc-filtered? + bool qcfilt2, // mate 2 qc-filtered? + bool sortByScore, // prioritize alignments by score + RandomSource& rnd, // pseudo-random generator + ReportingMetrics& met, // reporting metrics + const PerReadMetrics& prm, // per-read metrics + const Scoring& sc, // scoring scheme + bool suppressSeedSummary = true, + bool suppressAlignments = false, + bool templateLenAdjustment = true); + + /*void finish3NRead( + const SeedResults *sr1, // seed alignment results for mate 1 + const SeedResults *sr2, // seed alignment results for mate 2 + bool exhaust1, // mate 1 exhausted? + bool exhaust2, // mate 2 exhausted? + bool nfilt1, // mate 1 N-filtered? + bool nfilt2, // mate 2 N-filtered? + bool scfilt1, // mate 1 score-filtered? + bool scfilt2, // mate 2 score-filtered? + bool lenfilt1, // mate 1 length-filtered? + bool lenfilt2, // mate 2 length-filtered? + bool qcfilt1, // mate 1 qc-filtered? + bool qcfilt2, // mate 2 qc-filtered? + bool sortByScore, // prioritize alignments by score + RandomSource& rnd, // pseudo-random generator + ReportingMetrics& met, // reporting metrics + const PerReadMetrics& prm, // per-read metrics + const Scoring& sc, // scoring scheme + bool suppressSeedSummary = true, + bool suppressAlignments = false, + bool templateLenAdjustment = true);*/ + + + /** + * Called by the aligner when a new unpaired or paired alignment is + * discovered in the given stage. This function checks whether the + * addition of this alignment causes the reporting policy to be + * violated (by meeting or exceeding the limits set by -k, -m, -M), + * in which case true is returned immediately and the aligner is + * short circuited. Otherwise, the alignment is tallied and false + * is returned. + */ + bool report( + int stage, + const AlnRes* rs1, + const AlnRes* rs2, + bool alignMate = false); + +#ifndef NDEBUG + /** + * Check that hit sink wrapper is internally consistent. + */ + bool repOk() const { + assert_eq(rs2_.size(), rs1_.size()); + if(rp_.mhitsSet()) { + assert_gt(rp_.mhits, 0); + assert_leq((int)rs1_.size(), rp_.mhits+1); + assert_leq((int)rs2_.size(), rp_.mhits+1); + assert(readIsPair() || (int)rs1u_.size() <= rp_.mhits+1); + assert(readIsPair() || (int)rs2u_.size() <= rp_.mhits+1); + } + if(init_) { + assert(rd1_ != NULL); + assert_neq(std::numeric_limits::max(), rdid_); + } + //assert_eq(st_.numConcordant() + st_.numDiscordant(), rs1_.size()); + //assert_eq(st_.numUnpaired1(), rs1u_.size()); + //assert_eq(st_.numUnpaired2(), rs2u_.size()); + assert(st_.repOk()); + return true; + } +#endif + + /** + * Return true iff no alignments have been reported to this wrapper + * since the last call to nextRead(). + */ + bool empty() const { + return rs1_.empty() && rs1u_.empty() && rs2u_.empty(); + } + + /** + * Return true iff we have already encountered a number of alignments that + * exceeds the -m/-M ceiling. TODO: how does this distinguish between + * pairs and mates? + */ + bool maxed() const { + return maxedOverall_; + } + + /** + * Return true if the current read is paired. + */ + bool readIsPair() const { + return rd1_ != NULL && rd2_ != NULL; + } + + /** + * Return true iff nextRead() has been called since the last time + * finishRead() was called. + */ + bool inited() const { return init_; } + + /** + * Return a const ref to the ReportingState object associated with the + * AlnSinkWrap. + */ + const ReportingState& state() const { return st_; } + + const ReportingParams& reportingParams() { return rp_;} + + /** + * Return true iff we're in -M mode. + */ + bool Mmode() const { + return rp_.mhitsSet(); + } + + /** + * Return true iff the policy is to report all hits. + */ + bool allHits() const { + return rp_.allHits(); + } + + /** + * Return true iff at least two alignments have been reported so far for an + * unpaired read or mate 1. + */ + bool hasSecondBestUnp1() const { + return best2Unp1_ != getMinScore(); + } + + /** + * Return true iff at least two alignments have been reported so far for + * mate 2. + */ + bool hasSecondBestUnp2() const { + return best2Unp2_ != getMinScore(); + } + + /** + * Return true iff at least two paired-end alignments have been reported so + * far. + */ + bool hasSecondBestPair() const { + return best2Pair_ != getMinScore(); + } + + /** + * Get best score observed so far for an unpaired read or mate 1. + */ + TAlScore bestUnp1() const { + return bestUnp1_; + } + + /** + * Get second-best score observed so far for an unpaired read or mate 1. + */ + TAlScore secondBestUnp1() const { + return best2Unp1_; + } + + /** + * Get best score observed so far for mate 2. + */ + TAlScore bestUnp2() const { + return bestUnp2_; + } + + /** + * Get second-best score observed so far for mate 2. + */ + TAlScore secondBestUnp2() const { + return best2Unp2_; + } + + /** + * Get best score observed so far for an unpaired read or mate 1. + */ + TAlScore bestUnpRepeat1() const { + return bestUnpRepeat1_; + } + + /** + * Get second-best score observed so far for an unpaired read or mate 1. + */ + TAlScore secondBestUnpRepeat1() const { + return best2UnpRepeat1_; + } + + /** + * Get best score observed so far for mate 2. + */ + TAlScore bestUnpRepeat2() const { + return bestUnpRepeat2_; + } + + /** + * Get second-best score observed so far for mate 2. + */ + TAlScore secondBestUnpRepeat2() const { + return best2UnpRepeat2_; + } + + /** + * Get best score observed so far for paired-end read. + */ + TAlScore bestPair() const { + return bestPair_; + } + + /** + * Get second-best score observed so far for paired-end read. + */ + TAlScore secondBestPair() const { + return best2Pair_; + } + + index_t bestSplicedPair() const { + return bestSplicedPair_; + } + + index_t best2SplicedPair() const { + return best2SplicedPair_; + } + + index_t bestSplicedUnp1() const { + return bestSplicedUnp1_; + } + + index_t best2SplicedUnp1() const { + return best2SplicedUnp1_; + } + + index_t bestSplicedUnp2() const { + return bestSplicedUnp2_; + } + + index_t best2SplicedUnp2() const { + return best2SplicedUnp2_; + } + + bool secondary() const { + return secondary_; + } + + /** + * + */ + void getUnp1(const EList*& rs) const { rs = &rs1u_; } + void getUnp2(const EList*& rs) const { rs = &rs2u_; } + void getPair(const EList*& rs1, const EList*& rs2) const { rs1 = &rs1_; rs2 = &rs2_; } + + index_t numUnp1() const { return rs1u_.size(); } + index_t numUnp2() const { return rs2u_.size(); } + index_t numPair() const { assert_eq(rs1_.size(), rs2_.size()); return rs1_.size(); } + + pair numBestUnp(index_t rdi) const { + index_t numGenome = 0, numRepeat = 0; + TAlScore maxScore = getMinScore(); + const EList& rs = (rdi == 0 ? rs1u_ : rs2u_); + for(size_t i = 0; i < rs.size(); i++) { + TAlScore curScore = rs[i].score().score(); + if(curScore > maxScore) { + numGenome = numRepeat = 0; + maxScore = curScore; + } + + if(curScore >= maxScore) { + if(rs[i].repeat()) { + numRepeat++; + } else { + numGenome++; + } + } + } + + return pair(numGenome, numRepeat); + } + + pair numBestPair() const { + index_t numGenome = 0, numRepeat = 0; + TAlScore maxScore = getMinScore(); + assert_eq(rs1_.size(), rs2_.size()); + for(size_t i = 0; i < rs1_.size(); i++) { + TAlScore curScore = rs1_[i].score().score() + rs2_[i].score().score(); + if(curScore > maxScore) { + numGenome = numRepeat = 0; + maxScore = curScore; + } + + if(curScore >= maxScore) { + if(rs1_[i].repeat() || rs2_[i].repeat()) { + assert(rs1_[i].repeat() && rs2_[i].repeat()); + numRepeat++; + } else { + numGenome++; + } + } + } + + return pair(numGenome, numRepeat); + } + +protected: + + /** + * Return true iff the read in rd1/rd2 matches the last read handled, which + * should still be in rd1_/rd2_. + */ + bool sameRead( + const Read* rd1, + const Read* rd2, + bool qualitiesMatter); + + /** + * If there is a configuration of unpaired alignments that fits our + * criteria for there being one or more discordant alignments, then + * shift the discordant alignments over to the rs1_/rs2_ lists, clear the + * rs1u_/rs2u_ lists and return true. Otherwise, return false. + */ + bool prepareDiscordants(); + + /** + * Given that rs is already populated with alignments, consider the + * alignment policy and make random selections where necessary. E.g. if we + * found 10 alignments and the policy is -k 2 -m 20, select 2 alignments at + * random. We "select" an alignment by setting the parallel entry in the + * 'select' list to true. + */ + size_t selectAlnsToReport( + const EList& rs, // alignments to select from + uint64_t num, // number of alignments to select + EList& select, // list to put results in + RandomSource& rnd) + const; + + /** + * rs1 (possibly together with rs2 if reads are paired) are populated with + * alignments. Here we prioritize them according to alignment score, and + * some randomness to break ties. Priorities are returned in the 'select' + * list. + */ + size_t selectByScore( + const EList* rs1, // alignments to select from (mate 1) + const EList* rs2, // alignments to select from (mate 2, or NULL) + uint64_t num, // number of alignments to select + EList& select, // prioritized list to put results in + RandomSource& rnd) + const; + + AlnSink& g_; // global alignment sink + ReportingParams rp_; // reporting parameters: khits, mhits etc + size_t threadid_; // thread ID + Mapq& mapq_; // mapq calculator + bool secondary_; // allow for secondary alignments + const SpliceSiteDB* ssdb_; // splice sites + uint64_t threads_rids_mindist_; // synchronization + bool init_; // whether we're initialized w/ read pair + bool maxed1_; // true iff # unpaired mate-1 alns reported so far exceeded -m/-M + bool maxed2_; // true iff # unpaired mate-2 alns reported so far exceeded -m/-M + bool maxedOverall_; // true iff # paired-end alns reported so far exceeded -m/-M + TAlScore bestPair_; // greatest score so far for paired-end + TAlScore best2Pair_; // second-greatest score so far for paired-end + TAlScore bestUnp1_; // greatest score so far for unpaired/mate1 + TAlScore best2Unp1_; // second-greatest score so far for unpaired/mate1 + TAlScore bestUnp2_; // greatest score so far for mate 2 + TAlScore best2Unp2_; // second-greatest score so far for mate 2 + TAlScore bestUnpRepeat1_; // greatest score so far for repeat unpaired/mate1 + TAlScore best2UnpRepeat1_; // second-greatest score so far for repeat unpaired/mate1 + TAlScore bestUnpRepeat2_; // greatest score so far for repeat mate 2 + TAlScore best2UnpRepeat2_; // second-greatest score so far for repeat mate 2 + index_t bestSplicedPair_; + index_t best2SplicedPair_; + index_t bestSplicedUnp1_; + index_t best2SplicedUnp1_; + index_t bestSplicedUnp2_; + index_t best2SplicedUnp2_; + const Read* rd1_; // mate #1 + const Read* rd2_; // mate #2 + TReadId rdid_; // read ID (potentially used for ordering) + EList rs1_; // paired alignments for mate #1 + EList rs2_; // paired alignments for mate #2 + EList rs1u_; // unpaired alignments for mate #1 + EList rs2u_; // unpaired alignments for mate #2 + EList select1_; // parallel to rs1_/rs2_ - which to report + EList select2_; // parallel to rs1_/rs2_ - which to report + ReportingState st_; // reporting state - what's left to do? + + EList > selectBuf_; + BTString obuf_; + StackedAln staln_; + + EList spliceSites_; + +}; + + +template +class AlnSinkWrap3N : public AlnSinkWrap { + using AlnSinkWrap::obuf_; + using AlnSinkWrap::g_; + using AlnSinkWrap::rdid_; + using AlnSinkWrap::threadid_; + using AlnSinkWrap::rd1_; + using AlnSinkWrap::rd2_; + using AlnSinkWrap::rs1_; + using AlnSinkWrap::rs2_; + using AlnSinkWrap::rs1u_; + using AlnSinkWrap::rs2u_; + using AlnSinkWrap::st_; + using AlnSinkWrap::readIsPair; + using AlnSinkWrap::select1_; + using AlnSinkWrap::select2_; + using AlnSinkWrap::spliceSites_; + using AlnSinkWrap::ssdb_; + using AlnSinkWrap::threads_rids_mindist_; + using AlnSinkWrap::staln_; + using AlnSinkWrap::mapq_; + using AlnSinkWrap::init_; + using AlnSinkWrap::prepareDiscordants; + using AlnSinkWrap::rp_; + using AlnSinkWrap::selectByScore; + using AlnSinkWrap::selectAlnsToReport; + + int lastMappingCycle; + +public: + AlnSinkWrap3N( + AlnSink& g, // AlnSink being wrapped + const ReportingParams& rp, // Parameters governing reporting + Mapq& mapq, // Mapq calculator + size_t threadId, // Thread ID + bool mappingCycles[4], // mapping cycles + bool secondary = false, // Secondary alignments + const SpliceSiteDB* ssdb = NULL, // splice sites + uint64_t threads_rids_mindist = 0) : + AlnSinkWrap( + g, + rp, + mapq, + threadId, + secondary, + ssdb, + threads_rids_mindist) + { + for (int i = 3; i >= 0; i--) + { + if (mappingCycles[i]) + { + lastMappingCycle = i; + break; + } + } + } + + void finishRead( + const SeedResults *sr1, // seed alignment results for mate 1 + const SeedResults *sr2, // seed alignment results for mate 2 + bool exhaust1, // mate 1 exhausted? + bool exhaust2, // mate 2 exhausted? + bool nfilt1, // mate 1 N-filtered? + bool nfilt2, // mate 2 N-filtered? + bool scfilt1, // mate 1 score-filtered? + bool scfilt2, // mate 2 score-filtered? + bool lenfilt1, // mate 1 length-filtered? + bool lenfilt2, // mate 2 length-filtered? + bool qcfilt1, // mate 1 qc-filtered? + bool qcfilt2, // mate 2 qc-filtered? + bool sortByScore, // prioritize alignments by score + RandomSource& rnd, // pseudo-random generator + ReportingMetrics& met, // reporting metrics + const PerReadMetrics& prm, // per-read metrics + const Scoring& sc, // scoring scheme + bool suppressSeedSummary, // = true + bool suppressAlignments, // = false + bool templateLenAdjustment) // = true + { + obuf_.clear(); + OutputQueueMark qqm(g_.outq(), obuf_, rdid_, threadid_); + assert(init_); + if(!suppressSeedSummary) { + if(sr1 != NULL) { + assert(rd1_ != NULL); + // Mate exists and has non-empty SeedResults + g_.reportSeedSummary(obuf_, *rd1_, rdid_, threadid_, *sr1, true); + } else if(rd1_ != NULL) { + // Mate exists but has NULL SeedResults + g_.reportEmptySeedSummary(obuf_, *rd1_, rdid_, true); + } + if(sr2 != NULL) { + assert(rd2_ != NULL); + // Mate exists and has non-empty SeedResults + g_.reportSeedSummary(obuf_, *rd2_, rdid_, threadid_, *sr2, true); + } else if(rd2_ != NULL) { + // Mate exists but has NULL SeedResults + g_.reportEmptySeedSummary(obuf_, *rd2_, rdid_, true); + } + } + if(!suppressAlignments) { + // Ask the ReportingState what to report + st_.finish(); + uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0; + uint64_t nunpairRepeat1 = 0, nunpairRepeat2 = 0; + bool pairMax = false, unpair1Max = false, unpair2Max = false; + st_.getReport( + nconcord, + ndiscord, + nunpair1, + nunpair2, + nunpairRepeat1, + nunpairRepeat2, + pairMax, + unpair1Max, + unpair2Max); + assert_leq(nconcord, rs1_.size()); + assert_leq(nunpair1, rs1u_.size()); + assert_leq(nunpair2, rs2u_.size()); + assert_leq(ndiscord, 1); + assert_gt(rp_.khits, 0); + assert_gt(rp_.mhits, 0); + assert(!pairMax || rs1_.size() >= (uint64_t)rp_.mhits); + assert(!unpair1Max || rs1u_.size() >= (uint64_t)rp_.mhits); + assert(!unpair2Max || rs2u_.size() >= (uint64_t)rp_.mhits); + + // Report concordant paired-end alignments if possible + if(nconcord > 0) { + AlnSetSumm concordSumm( + rd1_, rd2_, &rs1_, &rs2_, &rs1u_, &rs2u_, + exhaust1, exhaust2, -1, -1, false); + + // Possibly select a random subset + size_t off; + if(sortByScore) { + // Sort by score then pick from low to high + off = selectByScore(&rs1_, &rs2_, nconcord, select1_, rnd); + } else { + // Select subset randomly + off = selectAlnsToReport(rs1_, nconcord, select1_, rnd); + } + + concordSumm.numAlnsPaired(select1_.size()); + + assert_lt(off, rs1_.size()); + const AlnRes *rs1 = &rs1_[off]; + const AlnRes *rs2 = &rs2_[off]; + AlnFlags flags1( + ALN_FLAG_PAIR_CONCORD_MATE1, + st_.params().mhitsSet(), + unpair1Max, + pairMax, + nfilt1, + scfilt1, + lenfilt1, + qcfilt1, + st_.params().mixed, + true, // primary + true, // opp aligned + rs2->fw()); // opp fw + AlnFlags flags2( + ALN_FLAG_PAIR_CONCORD_MATE2, + st_.params().mhitsSet(), + unpair2Max, + pairMax, + nfilt2, + scfilt2, + lenfilt2, + qcfilt2, + st_.params().mixed, + false, // primary + true, // opp aligned + rs1->fw()); // opp fw + // Issue: we only set the flags once, but some of the flags might + // vary from pair to pair among the pairs we're reporting. For + // instance, whether a given mate aligns to the forward strand. + SeedAlSumm ssm1, ssm2; + if(sr1 != NULL && sr2 != NULL) { + sr1->toSeedAlSumm(ssm1); + sr2->toSeedAlSumm(ssm2); + } + for(size_t i = 0; i < rs1_.size(); i++) { + spliceSites_.clear(); + if(templateLenAdjustment) { + rs1_[i].setMateParams(ALN_RES_TYPE_MATE1, &rs2_[i], flags1, ssdb_, threads_rids_mindist_, &spliceSites_); + rs2_[i].setMateParams(ALN_RES_TYPE_MATE2, &rs1_[i], flags2, ssdb_, threads_rids_mindist_, &spliceSites_); + } else { + rs1_[i].setMateParams(ALN_RES_TYPE_MATE1, &rs2_[i], flags1); + rs2_[i].setMateParams(ALN_RES_TYPE_MATE2, &rs1_[i], flags2); + } + assert_eq(abs(rs1_[i].fragmentLength()), abs(rs2_[i].fragmentLength())); + } + assert(!select1_.empty()); + g_.reportHits( + obuf_, + staln_, + threadid_, + rd1_, + rd2_, + rdid_, + select1_, + NULL, + &rs1_, + &rs2_, + pairMax, + concordSumm, + ssm1, + ssm2, + &flags1, + &flags2, + prm, + mapq_, + sc); + + init_ = false; + //g_.outq().finishRead(obuf_, rdid_, threadid_); + if (rd1_->threeN_cycle == lastMappingCycle) { + g_.output(threadid_-1, met, obuf_); + } + return; + } + // Report concordant paired-end alignments if possible + else if(ndiscord > 0) { + ASSERT_ONLY(bool ret =) prepareDiscordants(); + assert(ret); + assert_eq(1, rs1_.size()); + assert_eq(1, rs2_.size()); + AlnSetSumm discordSumm( + rd1_, rd2_, &rs1_, &rs2_, &rs1u_, &rs2u_, + exhaust1, exhaust2, -1, -1, false); + const AlnRes *rs1 = &rs1_[0]; + const AlnRes *rs2 = &rs2_[0]; + AlnFlags flags1( + ALN_FLAG_PAIR_DISCORD_MATE1, + st_.params().mhitsSet(), + false, + pairMax, + nfilt1, + scfilt1, + lenfilt1, + qcfilt1, + st_.params().mixed, + true, // primary + true, // opp aligned + rs2->fw()); // opp fw + AlnFlags flags2( + ALN_FLAG_PAIR_DISCORD_MATE2, + st_.params().mhitsSet(), + false, + pairMax, + nfilt2, + scfilt2, + lenfilt2, + qcfilt2, + st_.params().mixed, + false, // primary + true, // opp aligned + rs1->fw()); // opp fw + SeedAlSumm ssm1, ssm2; + if(sr1 != NULL) sr1->toSeedAlSumm(ssm1); + if(sr2 != NULL) sr2->toSeedAlSumm(ssm2); + for(size_t i = 0; i < rs1_.size(); i++) { + rs1_[i].setMateParams(ALN_RES_TYPE_MATE1, &rs2_[i], flags1); + rs2_[i].setMateParams(ALN_RES_TYPE_MATE2, &rs1_[i], flags2); + assert(rs1_[i].isFraglenSet() == rs2_[i].isFraglenSet()); + assert(!rs1_[i].isFraglenSet() || abs(rs1_[i].fragmentLength()) == abs(rs2_[i].fragmentLength())); + } + ASSERT_ONLY(size_t off); + if(sortByScore) { + // Sort by score then pick from low to high + ASSERT_ONLY(off =) selectByScore(&rs1_, &rs2_, ndiscord, select1_, rnd); + } else { + // Select subset randomly + ASSERT_ONLY(off =) selectAlnsToReport(rs1_, ndiscord, select1_, rnd); + } + assert_eq(0, off); + assert(!select1_.empty()); + g_.reportHits( + obuf_, + staln_, + threadid_, + rd1_, + rd2_, + rdid_, + select1_, + NULL, + &rs1_, + &rs2_, + pairMax, + discordSumm, + ssm1, + ssm2, + &flags1, + &flags2, + prm, + mapq_, + sc); + + init_ = false; + //g_.outq().finishRead(obuf_, rdid_, threadid_); + if (rd1_->threeN_cycle == lastMappingCycle) { + g_.output(threadid_-1, met, obuf_); + } + return; + } + // If we're at this point, at least one mate failed to align. + // BTL: That's not true. It could be that there are no concordant + // alignments but both mates have unpaired alignments, with one of + // the mates having more than one. + //assert(nunpair1 == 0 || nunpair2 == 0); + assert(!pairMax); + + const AlnRes *repRs1 = NULL, *repRs2 = NULL; + AlnSetSumm summ1, summ2; + AlnFlags flags1, flags2; + TRefId refid = -1; TRefOff refoff = -1; bool repeat = false; + bool rep1 = rd1_ != NULL && nunpair1 > 0; + bool rep2 = rd2_ != NULL && nunpair2 > 0; + + // This is the preliminary if statement for mate 1 - here we're + // gathering some preliminary information, making it possible to call + // g_.reportHits(...) with information about both mates potentially + if(rep1) { + // Mate 1 aligned at least once + if(rep2) { + summ1.init( + rd1_, rd2_, NULL, NULL, &rs1u_, &rs2u_, + exhaust1, exhaust2, -1, -1, false); + } else { + summ1.init( + rd1_, NULL, NULL, NULL, &rs1u_, NULL, + exhaust1, exhaust2, -1, -1, false); + } + size_t off; + if(sortByScore) { + // Sort by score then pick from low to high + off = selectByScore(&rs1u_, NULL, nunpair1, select1_, rnd); + } else { + // Select subset randomly + off = selectAlnsToReport(rs1u_, nunpair1, select1_, rnd); + } + summ1.numAlns1(select1_.size()); + summ2.numAlns1(select1_.size()); + repRs1 = &rs1u_[off]; + } else if(rd1_ != NULL) { + // Mate 1 failed to align - don't do anything yet. First we want + // to collect information on mate 2 in case that factors into the + // summary + assert(!unpair1Max); + } + + if(rep2) { + if(rep1) { + summ2.init( + rd1_, rd2_, NULL, NULL, &rs1u_, &rs2u_, + exhaust1, exhaust2, -1, -1, false); + } else { + summ2.init( + NULL, rd2_, NULL, NULL, NULL, &rs2u_, + exhaust1, exhaust2, -1, -1, false); + } + size_t off; + if(sortByScore) { + // Sort by score then pick from low to high + off = selectByScore(&rs2u_, NULL, nunpair2, select2_, rnd); + } else { + // Select subset randomly + off = selectAlnsToReport(rs2u_, nunpair2, select2_, rnd); + } + repRs2 = &rs2u_[off]; + summ1.numAlns2(select2_.size()); + summ2.numAlns2(select2_.size()); + } else if(rd2_ != NULL) { + // Mate 2 failed to align - don't do anything yet. First we want + // to collect information on mate 1 in case that factors into the + // summary + assert(!unpair2Max); + } + + // Now set up flags + if(rep1) { + // Initialize flags. Note: We want to have information about how + // the other mate aligned (if it did) at this point + flags1.init( + readIsPair() ? + ALN_FLAG_PAIR_UNPAIRED_MATE1 : + ALN_FLAG_PAIR_UNPAIRED, + st_.params().mhitsSet(), + unpair1Max, + pairMax, + nfilt1, + scfilt1, + lenfilt1, + qcfilt1, + st_.params().mixed, + true, // primary + repRs2 != NULL, // opp aligned + repRs2 == NULL || repRs2->fw()); // opp fw + for(size_t i = 0; i < rs1u_.size(); i++) { + rs1u_[i].setMateParams(ALN_RES_TYPE_UNPAIRED_MATE1, NULL, flags1); + } + } + if(rep2) { + // Initialize flags. Note: We want to have information about how + // the other mate aligned (if it did) at this point + flags2.init( + readIsPair() ? + ALN_FLAG_PAIR_UNPAIRED_MATE2 : + ALN_FLAG_PAIR_UNPAIRED, + st_.params().mhitsSet(), + unpair2Max, + pairMax, + nfilt2, + scfilt2, + lenfilt2, + qcfilt2, + st_.params().mixed, + true, // primary + repRs1 != NULL, // opp aligned + repRs1 == NULL || repRs1->fw()); // opp fw + for(size_t i = 0; i < rs2u_.size(); i++) { + rs2u_[i].setMateParams(ALN_RES_TYPE_UNPAIRED_MATE2, NULL, flags2); + } + } + + // Now report mate 1 + if(rep1) { + SeedAlSumm ssm1, ssm2; + if(sr1 != NULL) sr1->toSeedAlSumm(ssm1); + if(sr2 != NULL) sr2->toSeedAlSumm(ssm2); + assert(!select1_.empty()); + g_.reportHits( + obuf_, + staln_, + threadid_, + rd1_, + repRs2 != NULL ? rd2_ : NULL, + rdid_, + select1_, + repRs2 != NULL ? &select2_ : NULL, + &rs1u_, + repRs2 != NULL ? &rs2u_ : NULL, + unpair1Max, + summ1, + ssm1, + ssm2, + &flags1, + repRs2 != NULL ? &flags2 : NULL, + prm, + mapq_, + sc); + assert_lt(select1_[0], rs1u_.size()); + refid = rs1u_[select1_[0]].refid(); + refoff = rs1u_[select1_[0]].refoff(); + repeat = rs1u_[select1_[0]].repeat(); + } + + // Now report mate 2 + if(rep2 && !rep1) { + SeedAlSumm ssm1, ssm2; + if(sr1 != NULL) sr1->toSeedAlSumm(ssm1); + if(sr2 != NULL) sr2->toSeedAlSumm(ssm2); + assert(!select2_.empty()); + g_.reportHits( + obuf_, + staln_, + threadid_, + rd2_, + repRs1 != NULL ? rd1_ : NULL, + rdid_, + select2_, + repRs1 != NULL ? &select1_ : NULL, + &rs2u_, + repRs1 != NULL ? &rs1u_ : NULL, + unpair2Max, + summ2, + ssm1, + ssm2, + &flags2, + repRs1 != NULL ? &flags1 : NULL, + prm, + mapq_, + sc); + assert_lt(select2_[0], rs2u_.size()); + refid = rs2u_[select2_[0]].refid(); + refoff = rs2u_[select2_[0]].refoff(); + repeat = rs2u_[select2_[0]].repeat(); + } + + if(rd1_ != NULL && nunpair1 == 0) { + if(nunpair2 > 0) { + assert_neq(-1, refid); + summ1.init( + rd1_, NULL, NULL, NULL, NULL, NULL, + exhaust1, exhaust2, refid, refoff, repeat); + } else { + summ1.init( + rd1_, NULL, NULL, NULL, NULL, NULL, + exhaust1, exhaust2, -1, -1, false); + } + SeedAlSumm ssm1, ssm2; + if(sr1 != NULL) sr1->toSeedAlSumm(ssm1); + if(sr2 != NULL) sr2->toSeedAlSumm(ssm2); + flags1.init( + readIsPair() ? + ALN_FLAG_PAIR_UNPAIRED_MATE1 : + ALN_FLAG_PAIR_UNPAIRED, + st_.params().mhitsSet(), + false, + false, + nfilt1, + scfilt1, + lenfilt1, + qcfilt1, + st_.params().mixed, + true, // primary + repRs2 != NULL, // opp aligned + (repRs2 != NULL) ? repRs2->fw() : false); // opp fw + g_.reportUnaligned( + obuf_, // string to write output to + staln_, + threadid_, + rd1_, // read 1 + NULL, // read 2 + rdid_, // read id + summ1, // summ + ssm1, // + ssm2, + &flags1, // flags 1 + NULL, // flags 2 + prm, // per-read metrics + mapq_, // MAPQ calculator + sc, // scoring scheme + true); // get lock? + } + if(rd2_ != NULL && nunpair2 == 0) { + if(nunpair1 > 0) { + assert_neq(-1, refid); + summ2.init( + NULL, rd2_, NULL, NULL, NULL, NULL, + exhaust1, exhaust2, refid, refoff, repeat); + } else { + summ2.init( + NULL, rd2_, NULL, NULL, NULL, NULL, + exhaust1, exhaust2, -1, -1, false); + } + SeedAlSumm ssm1, ssm2; + if(sr1 != NULL) sr1->toSeedAlSumm(ssm1); + if(sr2 != NULL) sr2->toSeedAlSumm(ssm2); + flags2.init( + readIsPair() ? + ALN_FLAG_PAIR_UNPAIRED_MATE2 : + ALN_FLAG_PAIR_UNPAIRED, + st_.params().mhitsSet(), + false, + false, + nfilt2, + scfilt2, + lenfilt2, + qcfilt2, + st_.params().mixed, + true, // primary + repRs1 != NULL, // opp aligned + (repRs1 != NULL) ? repRs1->fw() : false); // opp fw + g_.reportUnaligned( + obuf_, // string to write output to + staln_, + threadid_, + rd2_, // read 1 + NULL, // read 2 + rdid_, // read id + summ2, // summ + ssm1, + ssm2, + &flags2, // flags 1 + NULL, // flags 2 + prm, // per-read metrics + mapq_, // MAPQ calculator + sc, // scoring scheme + true); // get lock? + } + } // if(suppress alignments) + init_ = false; + if (rd1_->threeN_cycle == lastMappingCycle) { + g_.output(threadid_-1, met, obuf_); + } + return; + } +}; + +/** + * An AlnSink concrete subclass for printing SAM alignments. The user might + * want to customize SAM output in various ways. We encapsulate all these + * customizations, and some of the key printing routines, in the SamConfig + * class in sam.h/sam.cpp. + */ +template +class AlnSinkSam : public AlnSink { + + typedef EList StrList; + + +public: + + AlnSinkSam( + OutputQueue& oq, // output queue + const SamConfig& samc, // settings & routines for SAM output + const StrList& refnames, // reference names + const StrList& repnames, // repeat names + bool quiet, // don't print alignment summary at end + ALTDB* altdb = NULL, + SpliceSiteDB* ssdb = NULL) : + AlnSink( + oq, + refnames, + repnames, + quiet, + altdb, + ssdb), + samc_(samc) + { } + + virtual ~AlnSinkSam() { } + + /** + * Append a single alignment result, which might be paired or + * unpaired, to the given output stream in Bowtie's verbose-mode + * format. If the alignment is paired-end, print mate1's alignment + * then mate2's alignment. + */ + virtual void append( + BTString& o, // write output to this string + StackedAln& staln, // StackedAln to write stacked alignment + size_t threadId, // which thread am I? + const Read* rd1, // mate #1 + const Read* rd2, // mate #2 + const TReadId rdid, // read ID + AlnRes* rs1, // alignments for mate #1 + AlnRes* rs2, // alignments for mate #2 + const AlnSetSumm& summ, // summary + const SeedAlSumm& ssm1, // seed alignment summary + const SeedAlSumm& ssm2, // seed alignment summary + const AlnFlags* flags1, // flags for mate #1 + const AlnFlags* flags2, // flags for mate #2 + const PerReadMetrics& prm, // per-read metrics + const Mapq& mapq, // MAPQ calculator + const Scoring& sc, // scoring scheme + bool report2) // report alns for both mates + { + assert(rd1 != NULL || rd2 != NULL); + if(rd1 != NULL) { + assert(flags1 != NULL); + appendMate(o, staln, *rd1, rd2, rdid, rs1, rs2, summ, ssm1, ssm2, + *flags1, prm, mapq, sc); + if(rs1 != NULL && rs1->spliced() && this->spliceSiteDB_ != NULL) { + this->spliceSiteDB_->addSpliceSite(*rd1, *rs1); + } + } + if(rd2 != NULL && report2) { + assert(flags2 != NULL); + appendMate(o, staln, *rd2, rd1, rdid, rs2, rs1, summ, ssm2, ssm1, + *flags2, prm, mapq, sc); + if(rs2 != NULL && rs2->spliced() && this->spliceSiteDB_ != NULL) { + this->spliceSiteDB_->addSpliceSite(*rd2, *rs2); + } + } + } + + +protected: + + /** + * Append a single per-mate alignment result to the given output + * stream. If the alignment is part of a pair, information about + * the opposite mate and its alignment are given in rdo/rso. + */ + virtual void appendMate( + BTString& o, + StackedAln& staln, + const Read& rd, + const Read* rdo, + const TReadId rdid, + AlnRes* rs, + AlnRes* rso, + const AlnSetSumm& summ, + const SeedAlSumm& ssm, + const SeedAlSumm& ssmo, + const AlnFlags& flags, + const PerReadMetrics& prm, // per-read metrics + const Mapq& mapq, // MAPQ calculator + const Scoring& sc); // scoring scheme + + const SamConfig& samc_; // settings & routines for SAM output + BTDnaString dseq_; // buffer for decoded read sequence + BTString dqual_; // buffer for decoded quality sequence +}; + +/** + * This is the class similar to AlnSinkSAM. + * AlnSink3NSam will put all the alignment result information into the class Alignment in "alignment_3N.h". + * This class should only be used for HISAT-3N (3N mode for HISAT2). + */ + +template +class AlnSink3NSam : public AlnSink { + +public: + const SamConfig& samc_; // settings & routines for SAM output + BTDnaString dseq_; // buffer for decoded read sequence + BTString dqual_; // buffer for decoded quality sequence + + //using AlnSinkSam::samc_; + typedef EList StrList; + using AlnSink::oq_; + + //int nThreads; + vector alignmentsEachThreads; + + AlnSink3NSam( + OutputQueue& oq, // output queue + const SamConfig& samc, // settings & routines for SAM output + const StrList& refnames, // reference names + const StrList& repnames, // repeat names + bool quiet, // don't print alignment summary at end + int nthreads, + BitPairReference* ref, + bool DNA, + ALTDB* altdb = NULL, + SpliceSiteDB* ssdb = NULL) : + AlnSink( + oq, + refnames, + repnames, + quiet, + altdb, + ssdb), + samc_(samc) + { + for (int i = 0; i < nthreads; i++) { + Alignments* newAlignments = new Alignments(ref, DNA); + alignmentsEachThreads.push_back(newAlignments); + } + } + + ~AlnSink3NSam() { + for (int i = 0; i < alignmentsEachThreads.size(); i++) { + delete alignmentsEachThreads[i]; + } + }; + + virtual void reportUnaligned( + BTString& o, // write to this string + StackedAln& staln, // StackedAln to write stacked alignment + size_t threadId, // which thread am I? + const Read *rd1, // mate #1 + const Read *rd2, // mate #2 + const TReadId rdid, // read ID + const AlnSetSumm& summ, // summary + const SeedAlSumm& ssm1, // seed alignment summary + const SeedAlSumm& ssm2, // seed alignment summary + const AlnFlags* flags1, // flags for mate #1 + const AlnFlags* flags2, // flags for mate #2 + const PerReadMetrics& prm, // per-read metrics + const Mapq& mapq, // MAPQ calculator + const Scoring& sc, // scoring scheme + bool report2, // report alns for both mates? + bool getLock = true) + { + append(o, staln, threadId, rd1, rd2, rdid, NULL, NULL, summ, + ssm1, ssm2, flags1, flags2, prm, mapq, sc, report2); + } + + /** + * output the rest of alignment information in alignmentsEachThreads[threadId0]. + * this function will be used after we receive new alignment result (with different rdid to previous one). + */ + virtual void output(int threadId0, ReportingMetrics& met, BTString& o) { + if (alignmentsEachThreads[threadId0]->readName->empty()) { + return; + } + met.nread++; + if (alignmentsEachThreads[threadId0]->paired) { + met.npaired++; + } else { + met.nunpaired++; + } + alignmentsEachThreads[threadId0]->output(met, o); + } + + /** + * Append a single alignment result, this function is for HSIAT-3N. + */ + void append( + BTString& o, // write output to this string + StackedAln& staln, // StackedAln to write stacked alignment + size_t threadId, // which thread am I? + const Read* rd1, // mate #1 + const Read* rd2, // mate #2 + const TReadId rdid, // read ID + AlnRes* rs1, // alignments for mate #1 + AlnRes* rs2, // alignments for mate #2 + const AlnSetSumm& summ, // summary + const SeedAlSumm& ssm1, // seed alignment summary + const SeedAlSumm& ssm2, // seed alignment summary + const AlnFlags* flags1, // flags for mate #1 + const AlnFlags* flags2, // flags for mate #2 + const PerReadMetrics& prm, // per-read metrics + const Mapq& mapq, // MAPQ calculator + const Scoring& sc, // scoring scheme + bool report2) // report alns for both mates + { + // this function is for HLA alignment result report. + size_t threadId0 = threadId-1; + + assert(rd1 != NULL || rd2 != NULL); + if(rd1 != NULL) { + assert(flags1 != NULL); + appendMate(staln, *rd1, rd2, rdid, rs1, rs2, summ, ssm1, ssm2, + *flags1, prm, mapq, sc, threadId0); + if(rs1 != NULL && rs1->spliced() && this->spliceSiteDB_ != NULL) { + this->spliceSiteDB_->addSpliceSite(*rd1, *rs1); + } + } + if(rd2 != NULL && report2) { + assert(flags2 != NULL); + appendMate(staln, *rd2, rd1, rdid, rs2, rs1, summ, ssm2, ssm1, + *flags2, prm, mapq, sc, threadId0); + if(rs2 != NULL && rs2->spliced() && this->spliceSiteDB_ != NULL) { + this->spliceSiteDB_->addSpliceSite(*rd2, *rs2); + } + } + } + + /** + * Append a single per-mate alignment result to the Alignment class. + * This function is for HISAT-3N. + */ + virtual void appendMate( + //Alignment* newAlignment, + StackedAln& staln, // store stacked alignment struct here + const Read& rd, + const Read* rdo, + const TReadId rdid, + AlnRes* rs, + AlnRes* rso, + const AlnSetSumm& summ, + const SeedAlSumm& ssm, + const SeedAlSumm& ssmo, + const AlnFlags& flags, + const PerReadMetrics& prm, + const Mapq& mapqCalc, + const Scoring& sc, + size_t threadId0) // which thread am I? + { + // check whether we want to recieve this alignment reuslt. + if(rs == NULL && samc_.omitUnalignedReads() || !alignmentsEachThreads[threadId0]->acceptNewAlignment()) { + return; + } + // get the Alignment pointer and append information into it. + Alignment* newAlignment; + alignmentsEachThreads[threadId0]->getFreeAlignmentPointer(newAlignment); + alignmentsEachThreads[threadId0]->getSequence(rd); + newAlignment->cycle_3N = rd.threeN_cycle; + + char buf[1024]; + char mapqInps[1024]; + if(rs != NULL) { + staln.reset(); + rs->initStacked(rd, staln); + staln.leftAlign(false /* not past MMs */); + } + int offAdj = 0; + // QNAME + samc_.printReadName(newAlignment->readName, rd.name, flags.partOfPair()); + // FLAG + int fl = 0; + if(flags.partOfPair()) { + fl |= SAM_FLAG_PAIRED; + if(flags.alignedConcordant()) { + fl |= SAM_FLAG_MAPPED_PAIRED; + } + if(!flags.mateAligned()) { + // Other fragment is unmapped + fl |= SAM_FLAG_MATE_UNMAPPED; + } + fl |= (flags.readMate1() ? + SAM_FLAG_FIRST_IN_PAIR : SAM_FLAG_SECOND_IN_PAIR); + if(flags.mateAligned() && rso != NULL) { + if(!rso->fw()) { + fl |= SAM_FLAG_MATE_STRAND; + } + } + } + if(!flags.isPrimary()) { + fl |= SAM_FLAG_NOT_PRIMARY; + } + if(rs != NULL && !rs->fw()) { + fl |= SAM_FLAG_QUERY_STRAND; + } + if(rs == NULL) { + // Failed to align + fl |= SAM_FLAG_UNMAPPED; + } + newAlignment->flag = fl; + // RNAME + if(rs != NULL) { + samc_.printRefNameFromIndex(newAlignment->chromosomeName, (size_t)rs->refid(), rs->repeat()); + newAlignment->chromosomeIndex = rs->refid(); + if (rs->repeat()) { + newAlignment->repeat = true; + } + } else { + if(summ.orefid() != -1) { + // Opposite mate aligned but this one didn't - print the opposite + // mate's RNAME and POS as is customary + assert(flags.partOfPair()); + samc_.printRefNameFromIndex(newAlignment->chromosomeName, (size_t)summ.orefid(), summ.repeat()); + if (newAlignment->repeat) { + newAlignment->pairToRepeat = true; + } + } else { + // No alignment + newAlignment->chromosomeName = "*"; + } + } + // POS + // Note: POS is *after* soft clipping. I.e. POS points to the + // upstream-most character *involved in the clipped alignment*. + if(rs != NULL) { + newAlignment->location = rs->refoff()+1+offAdj; + } else { + if(summ.orefid() != -1) { + // Opposite mate aligned but this one didn't - print the opposite + // mate's RNAME and POS as is customary + assert(flags.partOfPair()); + newAlignment->location = summ.orefoff()+1+offAdj; + } else { + // No alignment + newAlignment->location = 0; + } + } + // MAPQ + mapqInps[0] = '\0'; + if(rs != NULL) { + itoa10(mapqCalc.mapq( + summ, flags, rd.mate < 2, rd.length(), + rdo == NULL ? 0 : rdo->length(), mapqInps), buf); + newAlignment->MAPQ = buf; + } else { + // No alignment + newAlignment->MAPQ = "0"; + } + // CIGAR + if(rs != NULL) { + staln.buildCigar(false); + staln.writeCigar(newAlignment, NULL); + //newAlignment->getCigarSegement(staln); + } else { + // No alignment + newAlignment->cigarString = "*"; + } + if (rso != NULL) { + if (rso->repeat()) { + newAlignment->pairToRepeat = true; + } + } + // RNEXT + if(rs != NULL && flags.partOfPair()) { + if(rso != NULL && (rs->refid() != rso->refid() || rs->repeat() != rso->repeat())) { + samc_.printRefNameFromIndex(newAlignment->pairToChromosome, (size_t)rso->refid(), rso->repeat()); + } else { + newAlignment->pairToChromosome = "="; + } + } else if(summ.orefid() != -1) { + // The convention if this mate fails to align but the other doesn't is + // to copy the mate's details into here + newAlignment->pairToChromosome = "="; + } else { + newAlignment->pairToChromosome = "*"; + } + // PNEXT + if(rs != NULL && flags.partOfPair()) { + if(rso != NULL) { + newAlignment->pairToLocation = rso->refoff()+1; + } else { + // The convenstion is that if this mate aligns but the opposite + // doesn't, we print this mate's offset here + newAlignment->pairToLocation = rs->refoff()+1; + } + } else if(summ.orefid() != -1) { + // The convention if this mate fails to align but the other doesn't is + // to copy the mate's details into here + newAlignment->pairToLocation = summ.orefoff()+1; + } else { + newAlignment->pairToLocation = 0; + } + // ISIZE + if(rs != NULL && rs->isFraglenSet()) { + newAlignment->pairingDistance = rs->fragmentLength(); + } else { + // No fragment + newAlignment->pairingDistance = 0; + } + // SEQ + if(!flags.isPrimary() && samc_.omitSecondarySeqQual()) { + newAlignment->readSequence = "*"; + } else { + // Print the read + if(rd.patFw.length() == 0) { + newAlignment->readSequence = "*"; + } else { + if(rs == NULL || rs->fw()) { + newAlignment->readSequence = rd.originalFw.toZBuf(); + } else { + newAlignment->readSequence = rd.originalRc.toZBuf(); + } + } + } + // QUAL + if(!flags.isPrimary() && samc_.omitSecondarySeqQual()) { + newAlignment->readQuality = "*"; + } else { + // Print the quals + if(rd.qual.length() == 0) { + newAlignment->readQuality = "*"; + } else { + if(rs == NULL || rs->fw()) { + newAlignment->readQuality = rd.qual.toZBuf(); + } else { + newAlignment->readQuality = rd.qualRev.toZBuf(); + } + } + } + + // Optional fields + // + if(rs != NULL) { + samc_.printAlignedOptFlags( + newAlignment, + true, // first opt flag printed is first overall? + rd, // read + *rs, // individual alignment result + staln, // stacked alignment + flags, // alignment flags + summ, // summary of alignments for this read + ssm, // seed alignment summary + prm, // per-read metrics + sc, // scoring scheme + mapqInps, // inputs to MAPQ calculation + this->altdb_); + } else { + samc_.printEmptyOptFlags( + newAlignment, + true, // first opt flag printed is first overall? + rd, // read + flags, // alignment flags + summ, // summary of alignments for this read + ssm, // seed alignment summary + prm, // per-read metrics + sc); // scoring scheme + } + alignmentsEachThreads[threadId0]->append(newAlignment); + } +}; + + +static inline std::ostream& printPct( + std::ostream& os, + uint64_t num, + uint64_t denom) +{ + double pct = 0.0f; + if(denom != 0) { pct = 100.0 * (double)num / (double)denom; } + os << fixed << setprecision(2) << pct << '%'; + return os; +} + +/** + * Print a friendly summary of: + * + * 1. How many reads were aligned and had one or more alignments + * reported + * 2. How many reads exceeded the -m or -M ceiling and therefore had + * their alignments suppressed or sampled + * 3. How many reads failed to align entirely + * + * Optionally print a series of Hadoop streaming-style counter updates + * with similar information. + */ +template +void AlnSink::printAlSumm( + ostream& out, + const ReportingMetrics& met, + size_t repThresh, // threshold for uniqueness, or max if no thresh + bool discord, // looked for discordant alignments + bool mixed, // looked for unpaired alignments where paired failed? + bool newSummary, // alignment summary in a new style + bool hadoopOut) // output Hadoop counters? +{ + // NOTE: there's a filtering step at the very beginning, so everything + // being reported here is post filtering + + bool canRep = repThresh != MAX_SIZE_T; + if(hadoopOut) { + out << "reporter:counter:HISAT2,Reads processed," << met.nread << endl; + } + uint64_t totread = met.nread; + uint64_t totpair = met.npaired; + uint64_t totunpair = met.nunpaired; + uint64_t tot_al_cand = totunpair + totpair*2; + uint64_t tot_al = (met.nconcord_uni + met.nconcord_rep) * 2 + (met.ndiscord) * 2 + met.nunp_0_uni + met.nunp_0_rep + met.nunp_uni + met.nunp_rep; + assert_leq(tot_al, tot_al_cand); + if(newSummary) { + out << "HISAT2 summary stats:" << endl; + if(totpair > 0) { + uint64_t ncondiscord_0 = met.nconcord_0 - met.ndiscord; + out << "\tTotal pairs: " << totpair << endl; + out << "\t\tAligned concordantly or discordantly 0 time: " << ncondiscord_0 << " ("; printPct(out, ncondiscord_0, met.npaired); out << ")" << endl; + out << "\t\tAligned concordantly 1 time: " << met.nconcord_uni1 << " ("; printPct(out, met.nconcord_uni1, met.npaired); out << ")" << endl; + out << "\t\tAligned concordantly >1 times: " << met.nconcord_uni2 << " ("; printPct(out, met.nconcord_uni2, met.npaired); out << ")" << endl; + out << "\t\tAligned discordantly 1 time: " << met.ndiscord << " ("; printPct(out, met.ndiscord, met.npaired); out << ")" << endl; + + out << "\tTotal unpaired reads: " << ncondiscord_0 * 2 << endl; + out << "\t\tAligned 0 time: " << met.nunp_0_0 << " ("; printPct(out, met.nunp_0_0, ncondiscord_0 * 2); out << ")" << endl; + out << "\t\tAligned 1 time: " << met.nunp_0_uni1 << " ("; printPct(out, met.nunp_0_uni1, ncondiscord_0 * 2); out << ")" << endl; + out << "\t\tAligned >1 times: " << met.nunp_0_uni2 << " ("; printPct(out, met.nunp_0_uni2, ncondiscord_0 * 2); out << ")" << endl; + } else { + out << "\tTotal reads: " << totread << endl; + out << "\t\tAligned 0 time: " << met.nunp_0 << " ("; printPct(out, met.nunp_0, met.nunpaired); out << ")" << endl; + out << "\t\tAligned 1 time: " << met.nunp_uni1 << " ("; printPct(out, met.nunp_uni1, met.nunpaired); out << ")" << endl; + out << "\t\tAligned >1 times: " << met.nunp_uni2 << " ("; printPct(out, met.nunp_uni2, met.nunpaired); out << ")" << endl; + } + out << "\tOverall alignment rate: "; printPct(out, tot_al, tot_al_cand); out << endl; + + } else { + if(totread > 0) { + out << "" << totread << " reads; of these:" << endl; + } else { + assert_eq(0, met.npaired); + assert_eq(0, met.nunpaired); + out << "" << totread << " reads" << endl; + } + if(totpair > 0) { + // Paired output + out << " " << totpair << " ("; + printPct(out, totpair, totread); + out << ") were paired; of these:" << endl; + + // Concordants + out << " " << met.nconcord_0 << " ("; + printPct(out, met.nconcord_0, met.npaired); + out << ") aligned concordantly 0 times" << endl; + if(canRep) { + // Print the number that aligned concordantly exactly once + assert_eq(met.nconcord_uni, met.nconcord_uni1+met.nconcord_uni2); + out << " " << met.nconcord_uni1 << " ("; + printPct(out, met.nconcord_uni1, met.npaired); + out << ") aligned concordantly exactly 1 time" << endl; + + // Print the number that aligned concordantly more than once but + // fewer times than the limit + + out << " " << met.nconcord_uni2+met.nconcord_rep << " ("; + printPct(out, met.nconcord_uni2+met.nconcord_rep, met.npaired); + out << ") aligned concordantly >1 times" << endl; + } else { + // Print the number that aligned concordantly exactly once + assert_eq(met.nconcord_uni, met.nconcord_uni1+met.nconcord_uni2); + out << " " << met.nconcord_uni1 << " ("; + printPct(out, met.nconcord_uni1, met.npaired); + out << ") aligned concordantly exactly 1 time" << endl; + + // Print the number that aligned concordantly more than once + out << " " << met.nconcord_uni2 << " ("; + printPct(out, met.nconcord_uni2, met.npaired); + out << ") aligned concordantly >1 times" << endl; + } + if(discord) { + // TODO: what about discoardant and on separate chromosomes? + + // Bring out the unaligned pair total so we can subtract discordants + out << " ----" << endl; + out << " " << met.nconcord_0 + << " pairs aligned concordantly 0 times; of these:" << endl; + // Discordants + out << " " << met.ndiscord << " ("; + printPct(out, met.ndiscord, met.nconcord_0); + out << ") aligned discordantly 1 time" << endl; + } + uint64_t ncondiscord_0 = met.nconcord_0 - met.ndiscord; + if(mixed) { + // Bring out the unaligned pair total so we can subtract discordants + out << " ----" << endl; + out << " " << ncondiscord_0 + << " pairs aligned 0 times concordantly or discordantly; of these:" << endl; + out << " " << (ncondiscord_0 * 2) << " mates make up the pairs; of these:" << endl; + out << " " << met.nunp_0_0 << " " << "("; + printPct(out, met.nunp_0_0, ncondiscord_0 * 2); + out << ") aligned 0 times" << endl; + if(canRep) { + // Print the number that aligned exactly once + assert_eq(met.nunp_0_uni, met.nunp_0_uni1+met.nunp_0_uni2); + out << " " << met.nunp_0_uni1 << " ("; + printPct(out, met.nunp_0_uni1, ncondiscord_0 * 2); + out << ") aligned exactly 1 time" << endl; + + // Print the number that aligned more than once but fewer times + // than the limit + out << " " << met.nunp_0_uni2+met.nunp_0_rep << " ("; + printPct(out, met.nunp_0_uni2+met.nunp_0_rep, ncondiscord_0 * 2); + out << ") aligned >1 times" << endl; + } else { + // Print the number that aligned exactly once + assert_eq(met.nunp_0_uni, met.nunp_0_uni1+met.nunp_0_uni2); + out << " " << met.nunp_0_uni1 << " ("; + printPct(out, met.nunp_0_uni1, ncondiscord_0 * 2); + out << ") aligned exactly 1 time" << endl; + + // Print the number that aligned more than once but fewer times + // than the limit + out << " " << met.nunp_0_uni2 << " ("; + printPct(out, met.nunp_0_uni2, ncondiscord_0 * 2); + out << ") aligned >1 times" << endl; + } + } + } + if(totunpair > 0) { + // Unpaired output + out << " " << totunpair << " ("; + printPct(out, totunpair, totread); + out << ") were unpaired; of these:" << endl; + + out << " " << met.nunp_0 << " ("; + printPct(out, met.nunp_0, met.nunpaired); + out << ") aligned 0 times" << endl; + if(hadoopOut) { + out << "reporter:counter:HISAT 2,Unpaired reads with 0 alignments," + << met.nunpaired << endl; + } + + if(canRep) { + // Print the number that aligned exactly once + assert_eq(met.nunp_uni, met.nunp_uni1+met.nunp_uni2); + out << " " << met.nunp_uni1 << " ("; + printPct(out, met.nunp_uni1, met.nunpaired); + out << ") aligned exactly 1 time" << endl; + + // Print the number that aligned more than once but fewer times + // than the limit + out << " " << met.nunp_uni2+met.nunp_rep << " ("; + printPct(out, met.nunp_uni2+met.nunp_rep, met.nunpaired); + out << ") aligned >1 times" << endl; + } else { + // Print the number that aligned exactly once + assert_eq(met.nunp_uni, met.nunp_uni1+met.nunp_uni2); + out << " " << met.nunp_uni1 << " ("; + printPct(out, met.nunp_uni1, met.nunpaired); + out << ") aligned exactly 1 time" << endl; + + // Print the number that aligned more than once + out << " " << met.nunp_uni2 << " ("; + printPct(out, met.nunp_uni2, met.nunpaired); + out << ") aligned >1 times" << endl; + } + } + + printPct(out, tot_al, tot_al_cand); + out << " overall alignment rate" << endl; + } +} + +/** + * Return true iff the read in rd1/rd2 matches the last read handled, which + * should still be in rd1_/rd2_. + */ +template +bool AlnSinkWrap::sameRead( + // One of the other of rd1, rd2 will = NULL if read is unpaired + const Read* rd1, // new mate #1 + const Read* rd2, // new mate #2 + bool qualitiesMatter) // aln policy distinguishes b/t quals? +{ + bool same = false; + if(rd1_ != NULL || rd2_ != NULL) { + // This is not the first time the sink was initialized with + // a read. Check if new read/pair is identical to previous + // read/pair + if((rd1_ == NULL) == (rd1 == NULL) && + (rd2_ == NULL) == (rd2 == NULL)) + { + bool m1same = (rd1 == NULL && rd1_ == NULL); + if(!m1same) { + assert(rd1 != NULL); + assert(rd1_ != NULL); + m1same = Read::same( + rd1->patFw, // new seq + rd1->qual, // new quals + rd1_->patFw, // old seq + rd1_->qual, // old quals + qualitiesMatter); + } + if(m1same) { + bool m2same = (rd2 == NULL && rd2_ == NULL); + if(!m2same) { + m2same = Read::same( + rd2->patFw, // new seq + rd2->qual, // new quals + rd2_->patFw, // old seq + rd2_->qual, // old quals + qualitiesMatter); + } + same = m2same; + } + } + } + return same; +} + +/** + * Initialize the wrapper with a new read pair and return an integer >= -1 + * indicating which stage the aligner should start at. If -1 is returned, the + * aligner can skip the read entirely. Checks if the new read pair is + * identical to the previous pair. If it is, then we return the id of the + * first stage to run. + */ +template +int AlnSinkWrap::nextRead( + // One of the other of rd1, rd2 will = NULL if read is unpaired + const Read* rd1, // new mate #1 + const Read* rd2, // new mate #2 + TReadId rdid, // read ID for new pair + bool qualitiesMatter) // aln policy distinguishes b/t quals? +{ + assert(!init_); + assert(rd1 != NULL || rd2 != NULL); + init_ = true; + // Keep copy of new read, so that we can compare it with the + // next one + if(rd1 != NULL) { + rd1_ = rd1; + } else rd1_ = NULL; + if(rd2 != NULL) { + rd2_ = rd2; + } else rd2_ = NULL; + rdid_ = rdid; + // Caller must now align the read + maxed1_ = false; + maxed2_ = false; + maxedOverall_ = false; + bestPair_ = best2Pair_ = + bestUnp1_ = best2Unp1_ = + bestUnp2_ = best2Unp2_ = std::numeric_limits::min(); + bestUnpRepeat1_ = best2UnpRepeat1_ = + bestUnpRepeat2_ = best2UnpRepeat2_ = std::numeric_limits::min(); + bestSplicedPair_ = best2SplicedPair_ = + bestSplicedUnp1_ = best2SplicedUnp1_ = + bestSplicedUnp2_ = best2SplicedUnp2_ = 0; + rs1_.clear(); // clear out paired-end alignments + rs2_.clear(); // clear out paired-end alignments + rs1u_.clear(); // clear out unpaired alignments for mate #1 + rs2u_.clear(); // clear out unpaired alignments for mate #2 + st_.nextRead(readIsPair()); // reset state + assert(empty()); + assert(!maxed()); + // Start from the first stage + return 0; +} + +/** + * Inform global, shared AlnSink object that we're finished with this read. + * The global AlnSink is responsible for updating counters, creating the output + * record, and delivering the record to the appropriate output stream. + * + * What gets reported for a paired-end alignment? + * + * 1. If there are reportable concordant alignments, report those and stop + * 2. If there are reportable discordant alignments, report those and stop + * 3. If unpaired alignments can be reported: + * 3a. Report + # + * Update metrics. Only ambiguity is: what if a pair aligns repetitively and + * one of its mates aligns uniquely? + * + * uint64_t al; // # mates w/ >= 1 reported alignment + * uint64_t unal; // # mates w/ 0 alignments + * uint64_t max; // # mates withheld for exceeding -M/-m ceiling + * uint64_t al_concord; // # pairs w/ >= 1 concordant alignment + * uint64_t al_discord; // # pairs w/ >= 1 discordant alignment + * uint64_t max_concord; // # pairs maxed out + * uint64_t unal_pair; // # pairs where neither mate aligned + */ +template +void AlnSinkWrap::finishRead( + const SeedResults *sr1, // seed alignment results for mate 1 + const SeedResults *sr2, // seed alignment results for mate 2 + bool exhaust1, // mate 1 exhausted? + bool exhaust2, // mate 2 exhausted? + bool nfilt1, // mate 1 N-filtered? + bool nfilt2, // mate 2 N-filtered? + bool scfilt1, // mate 1 score-filtered? + bool scfilt2, // mate 2 score-filtered? + bool lenfilt1, // mate 1 length-filtered? + bool lenfilt2, // mate 2 length-filtered? + bool qcfilt1, // mate 1 qc-filtered? + bool qcfilt2, // mate 2 qc-filtered? + bool sortByScore, // prioritize alignments by score + RandomSource& rnd, // pseudo-random generator + ReportingMetrics& met, // reporting metrics + const PerReadMetrics& prm, // per-read metrics + const Scoring& sc, // scoring scheme + bool suppressSeedSummary, // = true + bool suppressAlignments, // = false + bool templateLenAdjustment) // = true +{ + obuf_.clear(); + OutputQueueMark qqm(g_.outq(), obuf_, rdid_, threadid_); + assert(init_); + if(!suppressSeedSummary) { + if(sr1 != NULL) { + assert(rd1_ != NULL); + // Mate exists and has non-empty SeedResults + g_.reportSeedSummary(obuf_, *rd1_, rdid_, threadid_, *sr1, true); + } else if(rd1_ != NULL) { + // Mate exists but has NULL SeedResults + g_.reportEmptySeedSummary(obuf_, *rd1_, rdid_, true); + } + if(sr2 != NULL) { + assert(rd2_ != NULL); + // Mate exists and has non-empty SeedResults + g_.reportSeedSummary(obuf_, *rd2_, rdid_, threadid_, *sr2, true); + } else if(rd2_ != NULL) { + // Mate exists but has NULL SeedResults + g_.reportEmptySeedSummary(obuf_, *rd2_, rdid_, true); + } + } + if(!suppressAlignments) { + // Ask the ReportingState what to report + st_.finish(); + uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0; + uint64_t nunpairRepeat1 = 0, nunpairRepeat2 = 0; + bool pairMax = false, unpair1Max = false, unpair2Max = false; + st_.getReport( + nconcord, + ndiscord, + nunpair1, + nunpair2, + nunpairRepeat1, + nunpairRepeat2, + pairMax, + unpair1Max, + unpair2Max); + assert_leq(nconcord, rs1_.size()); + assert_leq(nunpair1, rs1u_.size()); + assert_leq(nunpair2, rs2u_.size()); + assert_leq(ndiscord, 1); + assert_gt(rp_.khits, 0); + assert_gt(rp_.mhits, 0); + assert(!pairMax || rs1_.size() >= (uint64_t)rp_.mhits); + assert(!unpair1Max || rs1u_.size() >= (uint64_t)rp_.mhits); + assert(!unpair2Max || rs2u_.size() >= (uint64_t)rp_.mhits); + met.nread++; + if(readIsPair()) { + met.npaired++; + } else { + met.nunpaired++; + } + // Report concordant paired-end alignments if possible + if(nconcord > 0) { + AlnSetSumm concordSumm( + rd1_, rd2_, &rs1_, &rs2_, &rs1u_, &rs2u_, + exhaust1, exhaust2, -1, -1, false); + + // Possibly select a random subset + size_t off; + if(sortByScore) { + // Sort by score then pick from low to high + off = selectByScore(&rs1_, &rs2_, nconcord, select1_, rnd); + } else { + // Select subset randomly + off = selectAlnsToReport(rs1_, nconcord, select1_, rnd); + } + + concordSumm.numAlnsPaired(select1_.size()); + + assert_lt(off, rs1_.size()); + const AlnRes *rs1 = &rs1_[off]; + const AlnRes *rs2 = &rs2_[off]; + AlnFlags flags1( + ALN_FLAG_PAIR_CONCORD_MATE1, + st_.params().mhitsSet(), + unpair1Max, + pairMax, + nfilt1, + scfilt1, + lenfilt1, + qcfilt1, + st_.params().mixed, + true, // primary + true, // opp aligned + rs2->fw()); // opp fw + AlnFlags flags2( + ALN_FLAG_PAIR_CONCORD_MATE2, + st_.params().mhitsSet(), + unpair2Max, + pairMax, + nfilt2, + scfilt2, + lenfilt2, + qcfilt2, + st_.params().mixed, + false, // primary + true, // opp aligned + rs1->fw()); // opp fw + // Issue: we only set the flags once, but some of the flags might + // vary from pair to pair among the pairs we're reporting. For + // instance, whether a given mate aligns to the forward strand. + SeedAlSumm ssm1, ssm2; + if(sr1 != NULL && sr2 != NULL) { + sr1->toSeedAlSumm(ssm1); + sr2->toSeedAlSumm(ssm2); + } + for(size_t i = 0; i < rs1_.size(); i++) { + spliceSites_.clear(); + if(templateLenAdjustment) { + rs1_[i].setMateParams(ALN_RES_TYPE_MATE1, &rs2_[i], flags1, ssdb_, threads_rids_mindist_, &spliceSites_); + rs2_[i].setMateParams(ALN_RES_TYPE_MATE2, &rs1_[i], flags2, ssdb_, threads_rids_mindist_, &spliceSites_); + } else { + rs1_[i].setMateParams(ALN_RES_TYPE_MATE1, &rs2_[i], flags1); + rs2_[i].setMateParams(ALN_RES_TYPE_MATE2, &rs1_[i], flags2); + } + assert_eq(abs(rs1_[i].fragmentLength()), abs(rs2_[i].fragmentLength())); + } + assert(!select1_.empty()); + g_.reportHits( + obuf_, + staln_, + threadid_, + rd1_, + rd2_, + rdid_, + select1_, + NULL, + &rs1_, + &rs2_, + pairMax, + concordSumm, + ssm1, + ssm2, + &flags1, + &flags2, + prm, + mapq_, + sc); + if(pairMax) { + met.nconcord_rep++; + } else { + met.nconcord_uni++; + assert(!rs1_.empty()); + if(select1_.size() == 1) { + met.nconcord_uni1++; + } else { + met.nconcord_uni2++; + } + } + init_ = false; + //g_.outq().finishRead(obuf_, rdid_, threadid_); + return; + } + // Report concordant paired-end alignments if possible + else if(ndiscord > 0) { + ASSERT_ONLY(bool ret =) prepareDiscordants(); + assert(ret); + assert_eq(1, rs1_.size()); + assert_eq(1, rs2_.size()); + AlnSetSumm discordSumm( + rd1_, rd2_, &rs1_, &rs2_, &rs1u_, &rs2u_, + exhaust1, exhaust2, -1, -1, false); + const AlnRes *rs1 = &rs1_[0]; + const AlnRes *rs2 = &rs2_[0]; + AlnFlags flags1( + ALN_FLAG_PAIR_DISCORD_MATE1, + st_.params().mhitsSet(), + false, + pairMax, + nfilt1, + scfilt1, + lenfilt1, + qcfilt1, + st_.params().mixed, + true, // primary + true, // opp aligned + rs2->fw()); // opp fw + AlnFlags flags2( + ALN_FLAG_PAIR_DISCORD_MATE2, + st_.params().mhitsSet(), + false, + pairMax, + nfilt2, + scfilt2, + lenfilt2, + qcfilt2, + st_.params().mixed, + false, // primary + true, // opp aligned + rs1->fw()); // opp fw + SeedAlSumm ssm1, ssm2; + if(sr1 != NULL) sr1->toSeedAlSumm(ssm1); + if(sr2 != NULL) sr2->toSeedAlSumm(ssm2); + for(size_t i = 0; i < rs1_.size(); i++) { + rs1_[i].setMateParams(ALN_RES_TYPE_MATE1, &rs2_[i], flags1); + rs2_[i].setMateParams(ALN_RES_TYPE_MATE2, &rs1_[i], flags2); + assert(rs1_[i].isFraglenSet() == rs2_[i].isFraglenSet()); + assert(!rs1_[i].isFraglenSet() || abs(rs1_[i].fragmentLength()) == abs(rs2_[i].fragmentLength())); + } + ASSERT_ONLY(size_t off); + if(sortByScore) { + // Sort by score then pick from low to high + ASSERT_ONLY(off =) selectByScore(&rs1_, &rs2_, ndiscord, select1_, rnd); + } else { + // Select subset randomly + ASSERT_ONLY(off =) selectAlnsToReport(rs1_, ndiscord, select1_, rnd); + } + assert_eq(0, off); + assert(!select1_.empty()); + g_.reportHits( + obuf_, + staln_, + threadid_, + rd1_, + rd2_, + rdid_, + select1_, + NULL, + &rs1_, + &rs2_, + pairMax, + discordSumm, + ssm1, + ssm2, + &flags1, + &flags2, + prm, + mapq_, + sc); + met.nconcord_0++; + met.ndiscord++; + init_ = false; + //g_.outq().finishRead(obuf_, rdid_, threadid_); + return; + } + // If we're at this point, at least one mate failed to align. + // BTL: That's not true. It could be that there are no concordant + // alignments but both mates have unpaired alignments, with one of + // the mates having more than one. + //assert(nunpair1 == 0 || nunpair2 == 0); + assert(!pairMax); + + const AlnRes *repRs1 = NULL, *repRs2 = NULL; + AlnSetSumm summ1, summ2; + AlnFlags flags1, flags2; + TRefId refid = -1; TRefOff refoff = -1; bool repeat = false; + bool rep1 = rd1_ != NULL && nunpair1 > 0; + bool rep2 = rd2_ != NULL && nunpair2 > 0; + + // This is the preliminary if statement for mate 1 - here we're + // gathering some preliminary information, making it possible to call + // g_.reportHits(...) with information about both mates potentially + if(rep1) { + // Mate 1 aligned at least once + if(rep2) { + summ1.init( + rd1_, rd2_, NULL, NULL, &rs1u_, &rs2u_, + exhaust1, exhaust2, -1, -1, false); + } else { + summ1.init( + rd1_, NULL, NULL, NULL, &rs1u_, NULL, + exhaust1, exhaust2, -1, -1, false); + } + size_t off; + if(sortByScore) { + // Sort by score then pick from low to high + off = selectByScore(&rs1u_, NULL, nunpair1, select1_, rnd); + } else { + // Select subset randomly + off = selectAlnsToReport(rs1u_, nunpair1, select1_, rnd); + } + summ1.numAlns1(select1_.size()); + summ2.numAlns1(select1_.size()); + repRs1 = &rs1u_[off]; + } else if(rd1_ != NULL) { + // Mate 1 failed to align - don't do anything yet. First we want + // to collect information on mate 2 in case that factors into the + // summary + assert(!unpair1Max); + } + + if(rep2) { + if(rep1) { + summ2.init( + rd1_, rd2_, NULL, NULL, &rs1u_, &rs2u_, + exhaust1, exhaust2, -1, -1, false); + } else { + summ2.init( + NULL, rd2_, NULL, NULL, NULL, &rs2u_, + exhaust1, exhaust2, -1, -1, false); + } + size_t off; + if(sortByScore) { + // Sort by score then pick from low to high + off = selectByScore(&rs2u_, NULL, nunpair2, select2_, rnd); + } else { + // Select subset randomly + off = selectAlnsToReport(rs2u_, nunpair2, select2_, rnd); + } + repRs2 = &rs2u_[off]; + summ1.numAlns2(select2_.size()); + summ2.numAlns2(select2_.size()); + } else if(rd2_ != NULL) { + // Mate 2 failed to align - don't do anything yet. First we want + // to collect information on mate 1 in case that factors into the + // summary + assert(!unpair2Max); + } + + // Update counters given that one mate didn't align + if(readIsPair()) { + met.nconcord_0++; + } + if(rd1_ != NULL) { + if(nunpair1 > 0) { + // Update counters + if(readIsPair()) { + if(unpair1Max) met.nunp_0_rep++; + else { + met.nunp_0_uni++; + assert(!rs1u_.empty()); + if(select1_.size() == 1) { + met.nunp_0_uni1++; + } else { + met.nunp_0_uni2++; + } + } + } else { + if(unpair1Max) met.nunp_rep++; + else { + met.nunp_uni++; + assert(!rs1u_.empty()); + if(select1_.size() == 1) { + met.nunp_uni1++; + } else { + met.nunp_uni2++; + } + } + } + } else if(unpair1Max) { + // Update counters + if(readIsPair()) met.nunp_0_rep++; + else met.nunp_rep++; + } else { + // Update counters + if(readIsPair()) met.nunp_0_0++; + else met.nunp_0++; + } + } + if(rd2_ != NULL) { + if(nunpair2 > 0) { + // Update counters + if(readIsPair()) { + if(unpair2Max) met.nunp_0_rep++; + else { + assert(!rs2u_.empty()); + met.nunp_0_uni++; + if(select2_.size() == 1) { + met.nunp_0_uni1++; + } else { + met.nunp_0_uni2++; + } + } + } else { + if(unpair2Max) met.nunp_rep++; + else { + assert(!rs2u_.empty()); + met.nunp_uni++; + if(select2_.size() == 1) { + met.nunp_uni1++; + } else { + met.nunp_uni2++; + } + } + } + } else if(unpair2Max) { + // Update counters + if(readIsPair()) met.nunp_0_rep++; + else met.nunp_rep++; + } else { + // Update counters + if(readIsPair()) met.nunp_0_0++; + else met.nunp_0++; + } + } + + // Now set up flags + if(rep1) { + // Initialize flags. Note: We want to have information about how + // the other mate aligned (if it did) at this point + flags1.init( + readIsPair() ? + ALN_FLAG_PAIR_UNPAIRED_MATE1 : + ALN_FLAG_PAIR_UNPAIRED, + st_.params().mhitsSet(), + unpair1Max, + pairMax, + nfilt1, + scfilt1, + lenfilt1, + qcfilt1, + st_.params().mixed, + true, // primary + repRs2 != NULL, // opp aligned + repRs2 == NULL || repRs2->fw()); // opp fw + for(size_t i = 0; i < rs1u_.size(); i++) { + rs1u_[i].setMateParams(ALN_RES_TYPE_UNPAIRED_MATE1, NULL, flags1); + } + } + if(rep2) { + // Initialize flags. Note: We want to have information about how + // the other mate aligned (if it did) at this point + flags2.init( + readIsPair() ? + ALN_FLAG_PAIR_UNPAIRED_MATE2 : + ALN_FLAG_PAIR_UNPAIRED, + st_.params().mhitsSet(), + unpair2Max, + pairMax, + nfilt2, + scfilt2, + lenfilt2, + qcfilt2, + st_.params().mixed, + true, // primary + repRs1 != NULL, // opp aligned + repRs1 == NULL || repRs1->fw()); // opp fw + for(size_t i = 0; i < rs2u_.size(); i++) { + rs2u_[i].setMateParams(ALN_RES_TYPE_UNPAIRED_MATE2, NULL, flags2); + } + } + + // Now report mate 1 + if(rep1) { + SeedAlSumm ssm1, ssm2; + if(sr1 != NULL) sr1->toSeedAlSumm(ssm1); + if(sr2 != NULL) sr2->toSeedAlSumm(ssm2); + assert(!select1_.empty()); + g_.reportHits( + obuf_, + staln_, + threadid_, + rd1_, + repRs2 != NULL ? rd2_ : NULL, + rdid_, + select1_, + repRs2 != NULL ? &select2_ : NULL, + &rs1u_, + repRs2 != NULL ? &rs2u_ : NULL, + unpair1Max, + summ1, + ssm1, + ssm2, + &flags1, + repRs2 != NULL ? &flags2 : NULL, + prm, + mapq_, + sc); + assert_lt(select1_[0], rs1u_.size()); + refid = rs1u_[select1_[0]].refid(); + refoff = rs1u_[select1_[0]].refoff(); + repeat = rs1u_[select1_[0]].repeat(); + } + + // Now report mate 2 + if(rep2 && !rep1) { + SeedAlSumm ssm1, ssm2; + if(sr1 != NULL) sr1->toSeedAlSumm(ssm1); + if(sr2 != NULL) sr2->toSeedAlSumm(ssm2); + assert(!select2_.empty()); + g_.reportHits( + obuf_, + staln_, + threadid_, + rd2_, + repRs1 != NULL ? rd1_ : NULL, + rdid_, + select2_, + repRs1 != NULL ? &select1_ : NULL, + &rs2u_, + repRs1 != NULL ? &rs1u_ : NULL, + unpair2Max, + summ2, + ssm1, + ssm2, + &flags2, + repRs1 != NULL ? &flags1 : NULL, + prm, + mapq_, + sc); + assert_lt(select2_[0], rs2u_.size()); + refid = rs2u_[select2_[0]].refid(); + refoff = rs2u_[select2_[0]].refoff(); + repeat = rs2u_[select2_[0]].repeat(); + } + + if(rd1_ != NULL && nunpair1 == 0) { + if(nunpair2 > 0) { + assert_neq(-1, refid); + summ1.init( + rd1_, NULL, NULL, NULL, NULL, NULL, + exhaust1, exhaust2, refid, refoff, repeat); + } else { + summ1.init( + rd1_, NULL, NULL, NULL, NULL, NULL, + exhaust1, exhaust2, -1, -1, false); + } + SeedAlSumm ssm1, ssm2; + if(sr1 != NULL) sr1->toSeedAlSumm(ssm1); + if(sr2 != NULL) sr2->toSeedAlSumm(ssm2); + flags1.init( + readIsPair() ? + ALN_FLAG_PAIR_UNPAIRED_MATE1 : + ALN_FLAG_PAIR_UNPAIRED, + st_.params().mhitsSet(), + false, + false, + nfilt1, + scfilt1, + lenfilt1, + qcfilt1, + st_.params().mixed, + true, // primary + repRs2 != NULL, // opp aligned + (repRs2 != NULL) ? repRs2->fw() : false); // opp fw + g_.reportUnaligned( + obuf_, // string to write output to + staln_, + threadid_, + rd1_, // read 1 + NULL, // read 2 + rdid_, // read id + summ1, // summ + ssm1, // + ssm2, + &flags1, // flags 1 + NULL, // flags 2 + prm, // per-read metrics + mapq_, // MAPQ calculator + sc, // scoring scheme + true); // get lock? + } + if(rd2_ != NULL && nunpair2 == 0) { + if(nunpair1 > 0) { + assert_neq(-1, refid); + summ2.init( + NULL, rd2_, NULL, NULL, NULL, NULL, + exhaust1, exhaust2, refid, refoff, repeat); + } else { + summ2.init( + NULL, rd2_, NULL, NULL, NULL, NULL, + exhaust1, exhaust2, -1, -1, false); + } + SeedAlSumm ssm1, ssm2; + if(sr1 != NULL) sr1->toSeedAlSumm(ssm1); + if(sr2 != NULL) sr2->toSeedAlSumm(ssm2); + flags2.init( + readIsPair() ? + ALN_FLAG_PAIR_UNPAIRED_MATE2 : + ALN_FLAG_PAIR_UNPAIRED, + st_.params().mhitsSet(), + false, + false, + nfilt2, + scfilt2, + lenfilt2, + qcfilt2, + st_.params().mixed, + true, // primary + repRs1 != NULL, // opp aligned + (repRs1 != NULL) ? repRs1->fw() : false); // opp fw + g_.reportUnaligned( + obuf_, // string to write output to + staln_, + threadid_, + rd2_, // read 1 + NULL, // read 2 + rdid_, // read id + summ2, // summ + ssm1, + ssm2, + &flags2, // flags 1 + NULL, // flags 2 + prm, // per-read metrics + mapq_, // MAPQ calculator + sc, // scoring scheme + true); // get lock? + } + } // if(suppress alignments) + init_ = false; + return; +} + + +/** + * Called by the aligner when a new unpaired or paired alignment is + * discovered in the given stage. This function checks whether the + * addition of this alignment causes the reporting policy to be + * violated (by meeting or exceeding the limits set by -k, -m, -M), + * in which case true is returned immediately and the aligner is + * short circuited. Otherwise, the alignment is tallied and false + * is returned. + */ +template +bool AlnSinkWrap::report( + int stage, + const AlnRes* rs1, + const AlnRes* rs2, + bool alignMate) +{ + assert(init_); + assert(rs1 != NULL || rs2 != NULL); + assert(rs1 == NULL || !rs1->empty()); + assert(rs2 == NULL || !rs2->empty()); + assert(rs1 == NULL || rs1->repOk()); + assert(rs2 == NULL || rs2->repOk()); + bool paired = (rs1 != NULL && rs2 != NULL); + bool one = (rs1 != NULL); + const AlnRes* rsa = one ? rs1 : rs2; + const AlnRes* rsb = one ? rs2 : rs1; + + // Tally overall alignment score + TAlScore score = rsa->score().score(); + if(rsb != NULL) score += rsb->score().score(); + index_t num_spliced = (index_t)rsa->num_spliced(); + if(rsb != NULL) num_spliced += (index_t)rsb->num_spliced(); + + if(paired) { + assert(readIsPair()); + st_.foundConcordant(score); + rs1_.push_back(*rs1); + rs2_.push_back(*rs2); + } else { + st_.foundUnpaired(one, rsa->repeat()); + if(one) { + rs1u_.push_back(*rs1); + } else { + rs2u_.push_back(*rs2); + } + } + + // Update best score so far + if(paired) { + if(score > bestPair_) { + best2Pair_ = bestPair_; + bestPair_ = score; + best2SplicedPair_ = bestSplicedPair_; + bestSplicedPair_ = num_spliced; + } else if(score > best2Pair_) { + best2Pair_ = score; + best2SplicedPair_ = num_spliced; + } + } else { + if(one) { + if(score > bestUnp1_) { + best2Unp1_ = bestUnp1_; + bestUnp1_ = score; + best2SplicedUnp1_ = bestSplicedUnp1_; + bestSplicedUnp1_ = num_spliced; + } else if(score > best2Unp1_) { + best2Unp1_ = score; + best2SplicedUnp1_ = num_spliced; + } + if(rs1->repeat()) { + if(score > bestUnpRepeat1_) { + best2UnpRepeat1_ = bestUnpRepeat1_; + bestUnpRepeat1_ = score; + } else if(score > best2UnpRepeat1_) { + best2UnpRepeat1_ = score; + } + } + } else { + if(score > bestUnp2_) { + best2Unp2_ = bestUnp2_; + bestUnp2_ = score; + best2SplicedUnp2_ = bestSplicedUnp2_; + bestSplicedUnp2_ = num_spliced; + } else if(score > best2Unp2_) { + best2Unp2_ = score; + best2SplicedUnp1_ = num_spliced; + } + if(rs2->repeat()) { + if(score > bestUnpRepeat2_) { + best2UnpRepeat2_ = bestUnpRepeat2_; + bestUnpRepeat2_ = score; + } else if(score > best2UnpRepeat2_) { + best2UnpRepeat2_ = score; + } + } + } + } + return st_.done(); +} + +/** + * If there is a configuration of unpaired alignments that fits our + * criteria for there being one or more discordant alignments, then + * shift the discordant alignments over to the rs1_/rs2_ lists, clear the + * rs1u_/rs2u_ lists and return true. Otherwise, return false. + */ +template +bool AlnSinkWrap::prepareDiscordants() { + if(rs1u_.size() == 1 && rs2u_.size() == 1) { + assert(rs1_.empty()); + assert(rs2_.empty()); + rs1_.push_back(rs1u_[0]); + rs2_.push_back(rs2u_[0]); + return true; + } + return false; +} + +/** + * rs1 (possibly together with rs2 if reads are paired) are populated with + * alignments. Here we prioritize them according to alignment score, and + * some randomness to break ties. Priorities are returned in the 'select' + * list. + */ +template +size_t AlnSinkWrap::selectByScore( + const EList* rs1, // alignments to select from (mate 1) + const EList* rs2, // alignments to select from (mate 2, or NULL) + uint64_t num, // number of alignments to select + EList& select, // prioritized list to put results in + RandomSource& rnd) +const +{ + assert(init_); + assert(repOk()); + assert_gt(num, 0); + assert(rs1 != NULL); + size_t sz = rs1->size(); // sz = # alignments found + assert_leq(num, sz); + if(sz < num) { + num = sz; + } + // num = # to select + if(sz < 1) { + return 0; + } + select.resize((size_t)num); + // Use 'selectBuf_' as a temporary list for sorting purposes + EList >& buf = + const_cast >& >(selectBuf_); + buf.resize(sz); + // Sort by score. If reads are pairs, sort by sum of mate scores. + for(size_t i = 0; i < sz; i++) { + buf[i].first = (*rs1)[i].score().hisat2_score(); + if(rs2 != NULL) { + buf[i].first += (*rs2)[i].score().hisat2_score(); + } + buf[i].second = i; // original offset + } + buf.sort(); buf.reverse(); // sort in descending order by score + + // Randomize streaks of alignments that are equal by score + size_t streak = 0; + for(size_t i = 1; i < buf.size(); i++) { + if(buf[i].first == buf[i-1].first) { + if(streak == 0) { streak = 1; } + streak++; + } else { + if(streak > 1) { + assert_geq(i, streak); + buf.shufflePortion(i-streak, streak, rnd); + } + streak = 0; + } + } + if(streak > 1) { + buf.shufflePortion(buf.size() - streak, streak, rnd); + } + + select.clear(); + for(size_t i = 0; i < buf.size(); i++) { + index_t add = buf[i].second; + if(i >= num && !(*rs1)[add].repeat()) { + assert(rs2 == NULL || !(*rs2)[add].repeat()); + break; + } + select.push_back(add); + } + + if(!secondary_) { + assert_geq(buf.size(), select.size()); + for(size_t i = 0; i + 1 < select.size(); i++) { + if(buf[i].first != buf[i+1].first) { + select.resize(i+1); + break; + } + } + } + + // Returns index of the representative alignment, but in 'select' also + // returns the indexes of the next best selected alignments in order by + // score. + return selectBuf_[0].second; +} + +/** + * Given that rs is already populated with alignments, consider the + * alignment policy and make random selections where necessary. E.g. if we + * found 10 alignments and the policy is -k 2 -m 20, select 2 alignments at + * random. We "select" an alignment by setting the parallel entry in the + * 'select' list to true. + * + * Return the "representative" alignment. This is simply the first one + * selected. That will also be what SAM calls the "primary" alignment. + */ +template +size_t AlnSinkWrap::selectAlnsToReport( + const EList& rs, // alignments to select from + uint64_t num, // number of alignments to select + EList& select, // list to put results in + RandomSource& rnd) +const +{ + assert(init_); + assert(repOk()); + assert_gt(num, 0); + size_t sz = rs.size(); + if(sz < num) { + num = sz; + } + if(sz < 1) { + return 0; + } + select.resize((size_t)num); + if(sz == 1) { + assert_eq(1, num); + select[0] = 0; + return 0; + } + // Select a random offset into the list of alignments + uint32_t off = rnd.nextU32() % (uint32_t)sz; + uint32_t offOrig = off; + // Now take elements starting at that offset, wrapping around to 0 if + // necessary. Leave the rest. + for(size_t i = 0; i < num; i++) { + select[i] = off; + off++; + if(off == sz) { + off = 0; + } + } + return offOrig; +} + +#define NOT_SUPPRESSED !suppress_[field++] +#define BEGIN_FIELD { \ +if(firstfield) firstfield = false; \ +else o.append('\t'); \ +} +#define WRITE_TAB { \ +if(firstfield) firstfield = false; \ +else o.append('\t'); \ +} +#define WRITE_NUM(o, x) { \ +itoa10(x, buf); \ +o.append(buf); \ +} + +/** + * Print a seed summary to the first output stream in the outs_ list. + */ +template +void AlnSink::reportSeedSummary( + BTString& o, + const Read& rd, + TReadId rdid, + size_t threadId, + const SeedResults& rs, + bool getLock) +{ + appendSeedSummary( + o, // string to write to + rd, // read + rdid, // read id + rs.numOffs()*2, // # seeds tried + rs.nonzeroOffsets(), // # seeds with non-empty results + rs.numRanges(), // # ranges for all seed hits + rs.numElts(), // # elements for all seed hits + rs.numOffs(), // # seeds tried from fw read + rs.nonzeroOffsetsFw(), // # seeds with non-empty results from fw read + rs.numRangesFw(), // # ranges for seed hits from fw read + rs.numEltsFw(), // # elements for seed hits from fw read + rs.numOffs(), // # seeds tried from rc read + rs.nonzeroOffsetsRc(), // # seeds with non-empty results from fw read + rs.numRangesRc(), // # ranges for seed hits from fw read + rs.numEltsRc()); // # elements for seed hits from fw read +} + +/** + * Print an empty seed summary to the first output stream in the outs_ list. + */ +template +void AlnSink::reportEmptySeedSummary( + BTString& o, + const Read& rd, + TReadId rdid, + size_t threadId, + bool getLock) +{ + appendSeedSummary( + o, // string to append to + rd, // read + rdid, // read id + 0, // # seeds tried + 0, // # seeds with non-empty results + 0, // # ranges for all seed hits + 0, // # elements for all seed hits + 0, // # seeds tried from fw read + 0, // # seeds with non-empty results from fw read + 0, // # ranges for seed hits from fw read + 0, // # elements for seed hits from fw read + 0, // # seeds tried from rc read + 0, // # seeds with non-empty results from fw read + 0, // # ranges for seed hits from fw read + 0); // # elements for seed hits from fw read +} + +/** + * Print the given string. If ws = true, print only up to and not + * including the first space or tab. Useful for printing reference + * names. + */ +template +static inline void printUptoWs( + BTString& s, + const T& str, + bool chopws) +{ + size_t len = str.length(); + for(size_t i = 0; i < len; i++) { + if(!chopws || (str[i] != ' ' && str[i] != '\t')) { + s.append(str[i]); + } else { + break; + } + } +} + +/** + * Append a batch of unresolved seed alignment summary results (i.e. + * seed alignments where all we know is the reference sequence aligned + * to and its SA range, not where it falls in the reference + * sequence) to the given output stream in Bowtie's seed-sumamry + * verbose-mode format. + * + * The seed summary format is: + * + * - One line per read + * - A typical line consists of a set of tab-delimited fields: + * + * 1. Read name + * 2. Total number of seeds extracted from the read + * 3. Total number of seeds that aligned to the reference at + * least once (always <= field 2) + * 4. Total number of distinct BW ranges found in all seed hits + * (always >= field 3) + * 5. Total number of distinct BW elements found in all seed + * hits (always >= field 4) + * 6-9.: Like 2-5. but just for seeds extracted from the + * forward representation of the read + * 10-13.: Like 2-5. but just for seeds extracted from the + * reverse-complement representation of the read + * + * Note that fields 6 and 10 should add to field 2, 7 and 11 + * should add to 3, etc. + * + * - Lines for reads that are filtered out for any reason (e.g. too + * many Ns) have columns 2 through 13 set to 0. + */ +template +void AlnSink::appendSeedSummary( + BTString& o, + const Read& rd, + const TReadId rdid, + size_t seedsTried, + size_t nonzero, + size_t ranges, + size_t elts, + size_t seedsTriedFw, + size_t nonzeroFw, + size_t rangesFw, + size_t eltsFw, + size_t seedsTriedRc, + size_t nonzeroRc, + size_t rangesRc, + size_t eltsRc) +{ + char buf[1024]; + bool firstfield = true; + // + // Read name + // + BEGIN_FIELD; + printUptoWs(o, rd.name, true); + + // + // Total number of seeds tried + // + BEGIN_FIELD; + WRITE_NUM(o, seedsTried); + + // + // Total number of seeds tried where at least one range was found. + // + BEGIN_FIELD; + WRITE_NUM(o, nonzero); + + // + // Total number of ranges found + // + BEGIN_FIELD; + WRITE_NUM(o, ranges); + + // + // Total number of elements found + // + BEGIN_FIELD; + WRITE_NUM(o, elts); + + // + // The same four numbers, but only for seeds extracted from the + // forward read representation. + // + BEGIN_FIELD; + WRITE_NUM(o, seedsTriedFw); + + BEGIN_FIELD; + WRITE_NUM(o, nonzeroFw); + + BEGIN_FIELD; + WRITE_NUM(o, rangesFw); + + BEGIN_FIELD; + WRITE_NUM(o, eltsFw); + + // + // The same four numbers, but only for seeds extracted from the + // reverse complement read representation. + // + BEGIN_FIELD; + WRITE_NUM(o, seedsTriedRc); + + BEGIN_FIELD; + WRITE_NUM(o, nonzeroRc); + + BEGIN_FIELD; + WRITE_NUM(o, rangesRc); + + BEGIN_FIELD; + WRITE_NUM(o, eltsRc); + + o.append('\n'); +} + +/** + * Append a single hit to the given output stream in Bowtie's + * verbose-mode format. + */ +template +void AlnSinkSam::appendMate( + BTString& o, // append to this string + StackedAln& staln, // store stacked alignment struct here + const Read& rd, + const Read* rdo, + const TReadId rdid, + AlnRes* rs, + AlnRes* rso, + const AlnSetSumm& summ, + const SeedAlSumm& ssm, + const SeedAlSumm& ssmo, + const AlnFlags& flags, + const PerReadMetrics& prm, + const Mapq& mapqCalc, + const Scoring& sc) +{ + if(rs == NULL && samc_.omitUnalignedReads()) { + return; + } + char buf[1024]; + char mapqInps[1024]; + if(rs != NULL) { + staln.reset(); + rs->initStacked(rd, staln); + staln.leftAlign(false /* not past MMs */); + } + int offAdj = 0; + // QNAME + samc_.printReadName(o, rd.name, flags.partOfPair()); + o.append('\t'); + // FLAG + int fl = 0; + if(flags.partOfPair()) { + fl |= SAM_FLAG_PAIRED; + if(flags.alignedConcordant()) { + fl |= SAM_FLAG_MAPPED_PAIRED; + } + if(!flags.mateAligned()) { + // Other fragment is unmapped + fl |= SAM_FLAG_MATE_UNMAPPED; + } + fl |= (flags.readMate1() ? + SAM_FLAG_FIRST_IN_PAIR : SAM_FLAG_SECOND_IN_PAIR); + if(flags.mateAligned() && rso != NULL) { + if(!rso->fw()) { + fl |= SAM_FLAG_MATE_STRAND; + } + } + } + if(!flags.isPrimary()) { + fl |= SAM_FLAG_NOT_PRIMARY; + } + if(rs != NULL && !rs->fw()) { + fl |= SAM_FLAG_QUERY_STRAND; + } + if(rs == NULL) { + // Failed to align + fl |= SAM_FLAG_UNMAPPED; + } + itoa10(fl, buf); + o.append(buf); + o.append('\t'); + // RNAME + if(rs != NULL) { + samc_.printRefNameFromIndex(o, (size_t)rs->refid(), rs->repeat()); + o.append('\t'); + } else { + if(summ.orefid() != -1) { + // Opposite mate aligned but this one didn't - print the opposite + // mate's RNAME and POS as is customary + assert(flags.partOfPair()); + samc_.printRefNameFromIndex(o, (size_t)summ.orefid(), summ.repeat()); + } else { + // No alignment + o.append('*'); + } + o.append('\t'); + } + // POS + // Note: POS is *after* soft clipping. I.e. POS points to the + // upstream-most character *involved in the clipped alignment*. + if(rs != NULL) { + itoa10(rs->refoff()+1+offAdj, buf); + o.append(buf); + o.append('\t'); + } else { + if(summ.orefid() != -1) { + // Opposite mate aligned but this one didn't - print the opposite + // mate's RNAME and POS as is customary + assert(flags.partOfPair()); + itoa10(summ.orefoff()+1+offAdj, buf); + o.append(buf); + } else { + // No alignment + o.append('0'); + } + o.append('\t'); + } + // MAPQ + mapqInps[0] = '\0'; + if(rs != NULL) { + itoa10(mapqCalc.mapq( + summ, flags, rd.mate < 2, rd.length(), + rdo == NULL ? 0 : rdo->length(), mapqInps), buf); + o.append(buf); + o.append('\t'); + } else { + // No alignment + o.append("0\t"); + } + // CIGAR + if(rs != NULL) { + staln.buildCigar(false); + staln.writeCigar(&o, NULL); + o.append('\t'); + } else { + // No alignment + o.append("*\t"); + } + // RNEXT + if(rs != NULL && flags.partOfPair()) { + if(rso != NULL && (rs->refid() != rso->refid() || rs->repeat() != rso->repeat())) { + samc_.printRefNameFromIndex(o, (size_t)rso->refid(), rso->repeat()); + o.append('\t'); + } else { + o.append("=\t"); + } + } else if(summ.orefid() != -1) { + // The convention if this mate fails to align but the other doesn't is + // to copy the mate's details into here + o.append("=\t"); + } else { + o.append("*\t"); + } + // PNEXT + if(rs != NULL && flags.partOfPair()) { + if(rso != NULL) { + itoa10(rso->refoff()+1, buf); + o.append(buf); + o.append('\t'); + } else { + // The convenstion is that if this mate aligns but the opposite + // doesn't, we print this mate's offset here + itoa10(rs->refoff()+1, buf); + o.append(buf); + o.append('\t'); + } + } else if(summ.orefid() != -1) { + // The convention if this mate fails to align but the other doesn't is + // to copy the mate's details into here + itoa10(summ.orefoff()+1, buf); + o.append(buf); + o.append('\t'); + } else { + o.append("0\t"); + } + // ISIZE + if(rs != NULL && rs->isFraglenSet()) { + itoa10(rs->fragmentLength(), buf); + o.append(buf); + o.append('\t'); + } else { + // No fragment + o.append("0\t"); + } + // SEQ + if(!flags.isPrimary() && samc_.omitSecondarySeqQual()) { + o.append('*'); + } else { + // Print the read + if(rd.patFw.length() == 0) { + o.append('*'); + } else { + if(rs == NULL || rs->fw()) { + o.append(rd.patFw.toZBuf()); + } else { + o.append(rd.patRc.toZBuf()); + } + } + } + o.append('\t'); + // QUAL + if(!flags.isPrimary() && samc_.omitSecondarySeqQual()) { + o.append('*'); + } else { + // Print the quals + if(rd.qual.length() == 0) { + o.append('*'); + } else { + if(rs == NULL || rs->fw()) { + o.append(rd.qual.toZBuf()); + } else { + o.append(rd.qualRev.toZBuf()); + } + } + } + o.append('\t'); + // + // Optional fields + // + if(rs != NULL) { + samc_.printAlignedOptFlags( + o, // output buffer + true, // first opt flag printed is first overall? + rd, // read + *rs, // individual alignment result + staln, // stacked alignment + flags, // alignment flags + summ, // summary of alignments for this read + ssm, // seed alignment summary + prm, // per-read metrics + sc, // scoring scheme + mapqInps, // inputs to MAPQ calculation + this->altdb_); + } else { + samc_.printEmptyOptFlags( + o, // output buffer + true, // first opt flag printed is first overall? + rd, // read + flags, // alignment flags + summ, // summary of alignments for this read + ssm, // seed alignment summary + prm, // per-read metrics + sc); // scoring scheme + } + o.append('\n'); +} + +#endif /*ndef ALN_SINK_H_*/ diff --git a/alphabet.cpp b/alphabet.cpp new file mode 100644 index 0000000..cb63651 --- /dev/null +++ b/alphabet.cpp @@ -0,0 +1,536 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include +#include +#include +#include "alphabet.h" + +using namespace std; + +/** + * Mapping from ASCII characters to DNA categories: + * + * 0 = invalid - error + * 1 = DNA + * 2 = IUPAC (ambiguous DNA) + * 3 = not an error, but unmatchable; alignments containing this + * character are invalid + */ +uint8_t asc2dnacat[] = { + /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 16 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 32 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, + /* - */ + /* 48 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 64 */ 0, 1, 2, 1, 2, 0, 0, 1, 2, 0, 0, 2, 0, 2, 2, 0, + /* A B C D G H K M N */ + /* 80 */ 0, 0, 2, 2, 1, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, + /* R S T V W X Y */ + /* 96 */ 0, 1, 2, 1, 2, 0, 0, 1, 2, 0, 0, 2, 0, 2, 2, 0, + /* a b c d g h k m n */ + /* 112 */ 0, 0, 2, 2, 1, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, + /* r s t v w x y */ + /* 128 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 144 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 160 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 176 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 192 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 208 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 224 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 240 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// 5-bit pop count +int mask2popcnt[] = { + 0, 1, 1, 2, 1, 2, 2, 3, + 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5 +}; + +/** + * Mapping from masks to ASCII characters for ambiguous nucleotides. + */ +char mask2dna[] = { + '?', // 0 + 'A', // 1 + 'C', // 2 + 'M', // 3 + 'G', // 4 + 'R', // 5 + 'S', // 6 + 'V', // 7 + 'T', // 8 + 'W', // 9 + 'Y', // 10 + 'H', // 11 + 'K', // 12 + 'D', // 13 + 'B', // 14 + 'N', // 15 (inclusive N) + 'N' // 16 (exclusive N) +}; + +/** + * Mapping from ASCII characters for ambiguous nucleotides into masks: + */ +uint8_t asc2dnamask[] = { + /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 16 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 32 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 48 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 64 */ 0, 1,14, 2,13, 0, 0, 4,11, 0, 0,12, 0, 3,15, 0, + /* A B C D G H K M N */ + /* 80 */ 0, 0, 5, 6, 8, 0, 7, 9, 0,10, 0, 0, 0, 0, 0, 0, + /* R S T V W Y */ + /* 96 */ 0, 1,14, 2,13, 0, 0, 4,11, 0, 0,12, 0, 3,15, 0, + /* a b c d g h k m n */ + /* 112 */ 0, 0, 5, 6, 8, 0, 7, 9, 0,10, 0, 0, 0, 0, 0, 0, + /* r s t v w y */ + /* 128 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 144 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 160 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 176 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 192 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 208 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 224 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 240 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +/** + * Convert a pair of DNA masks to a color mask + * + * + */ +uint8_t dnamasks2colormask[16][16] = { + /* 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 */ + /* 0 */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + /* 1 */ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + /* 2 */ { 0, 2, 1, 3, 8, 10, 9, 11, 4, 6, 5, 7, 12, 14, 13, 15 }, + /* 3 */ { 0, 3, 3, 3, 12, 15, 15, 15, 12, 15, 15, 15, 12, 15, 15, 15 }, + /* 4 */ { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }, + /* 5 */ { 0, 5, 10, 15, 5, 5, 15, 15, 10, 15, 10, 15, 15, 15, 15, 15 }, + /* 6 */ { 0, 6, 9, 15, 9, 15, 9, 15, 6, 6, 15, 15, 15, 15, 15, 15 }, + /* 7 */ { 0, 7, 11, 15, 13, 15, 15, 15, 14, 15, 15, 15, 15, 15, 15, 15 }, + /* 8 */ { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 }, + /* 9 */ { 0, 9, 6, 15, 6, 15, 6, 15, 9, 9, 15, 15, 15, 15, 15, 15 }, + /* 10 */ { 0, 10, 5, 15, 10, 10, 15, 15, 5, 15, 5, 15, 15, 15, 15, 15 }, + /* 11 */ { 0, 11, 7, 15, 14, 15, 15, 15, 13, 15, 15, 15, 15, 15, 15, 15 }, + /* 12 */ { 0, 12, 12, 12, 3, 15, 15, 15, 3, 15, 15, 15, 3, 15, 15, 15 }, + /* 13 */ { 0, 13, 14, 15, 7, 15, 15, 15, 11, 15, 15, 15, 15, 15, 15, 15 }, + /* 14 */ { 0, 14, 13, 15, 11, 15, 15, 15, 7, 15, 15, 15, 15, 15, 15, 15 }, + /* 15 */ { 0, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 } +}; + +/** + * Mapping from ASCII characters for ambiguous nucleotides into masks: + */ +char asc2dnacomp[] = { + /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 16 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 32 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'-', 0, 0, + /* 48 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 64 */ 0,'T','V','G','H', 0, 0,'C','D', 0, 0,'M', 0,'K','N', 0, + /* A B C D G H K M N */ + /* 80 */ 0, 0,'Y','S','A', 0,'B','W', 0,'R', 0, 0, 0, 0, 0, 0, + /* R S T V W Y */ + /* 96 */ 0,'T','V','G','H', 0, 0,'C','D', 0, 0,'M', 0,'K','N', 0, + /* a b c d g h k m n */ + /* 112 */ 0, 0,'Y','S','A', 0,'B','W', 0,'R', 0, 0, 0, 0, 0, 0, + /* r s t v w y */ + /* 128 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 144 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 160 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 176 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 192 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 208 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 224 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 240 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/** + * Mapping from ASCII characters for ambiguous nucleotides into masks: + */ +char col2dna[] = { + /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 16 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 32 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'-','N', 0, + /* - . */ + /* 48 */'A','C','G','T','N', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0 1 2 3 4 */ + /* 64 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 96 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 112 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 128 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 144 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 160 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 176 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 192 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 208 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 224 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 240 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/** + * Mapping from ASCII characters for ambiguous nucleotides into masks: + */ +char dna2col[] = { + /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 16 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 32 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'-', 0, 0, + /* 48 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 64 */ 0,'0', 0,'1', 0, 0, 0,'2', 0, 0, 0, 0, 0, 0,'4', 0, + /* A C G N */ + /* 80 */ 0, 0, 0, 0,'3', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* T */ + /* 92 */ 0,'0', 0,'1', 0, 0, 0,'2', 0, 0, 0, 0, 0, 0,'4', 0, + /* a c g n */ + /* 112 */ 0, 0, 0, 0,'3', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* t */ + /* 128 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 144 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 160 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 176 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 192 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 208 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 224 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 240 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/** + * Mapping from ASCII characters for ambiguous nucleotides into masks: + */ +const char* dna2colstr[] = { + /* 0 */ "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", + /* 16 */ "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", + /* 32 */ "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "-", "?", "?", + /* 48 */ "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", + /* 64 */ "?", "0","1|2|3","1","0|2|3","?", "?", "2","0|1|3","?", "?", "2|3", "?", "0|1", ".", "?", + /* A B C D G H K M N */ + /* 80 */ "?", "?", "0|2","1|2", "3", "?","0|1|2","0|3","?", "1|3", "?", "?", "?", "?", "?", "?", + /* R S T V W Y */ + /* 92 */ "?", "?","1|2|3","1","0|2|3","?", "?", "2","0|1|3","?", "?", "2|3", "?", "0|1", ".", "?", + /* a b c d g h k m n */ + /* 112 */ "?", "0", "0|2","1|2", "3", "?","0|1|2","0|3","?", "1|3", "?", "?", "?", "?", "?", "?", + /* r s t v w y */ + /* 128 */ "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", + /* 144 */ "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", + /* 160 */ "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", + /* 176 */ "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", + /* 192 */ "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", + /* 208 */ "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", + /* 224 */ "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", + /* 240 */ "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?" +}; + +/** + * Mapping from ASCII characters to color categories: + * + * 0 = invalid - error + * 1 = valid color + * 2 = IUPAC (ambiguous DNA) - there is no such thing for colors to my + * knowledge + * 3 = not an error, but unmatchable; alignments containing this + * character are invalid + */ +uint8_t asc2colcat[] = { + /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 16 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 32 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, + /* - . */ + /* 48 */ 1, 1, 1, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0 1 2 3 4 */ + /* 64 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 96 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 112 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 128 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 144 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 160 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 176 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 192 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 208 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 224 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 240 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +/** + * Set the category for all IUPAC codes. By default they're in + * category 2 (IUPAC), but sometimes we'd like to put them in category + * 3 (unmatchable), for example. + */ +void setIupacsCat(uint8_t cat) { + assert(cat < 4); + asc2dnacat[(int)'B'] = asc2dnacat[(int)'b'] = + asc2dnacat[(int)'D'] = asc2dnacat[(int)'d'] = + asc2dnacat[(int)'H'] = asc2dnacat[(int)'h'] = + asc2dnacat[(int)'K'] = asc2dnacat[(int)'k'] = + asc2dnacat[(int)'M'] = asc2dnacat[(int)'m'] = + asc2dnacat[(int)'N'] = asc2dnacat[(int)'n'] = + asc2dnacat[(int)'R'] = asc2dnacat[(int)'r'] = + asc2dnacat[(int)'S'] = asc2dnacat[(int)'s'] = + asc2dnacat[(int)'V'] = asc2dnacat[(int)'v'] = + asc2dnacat[(int)'W'] = asc2dnacat[(int)'w'] = + asc2dnacat[(int)'X'] = asc2dnacat[(int)'x'] = + asc2dnacat[(int)'Y'] = asc2dnacat[(int)'y'] = cat; +} + +/// For converting from ASCII to the Dna5 code where A=0, C=1, G=2, +/// T=3, N=4 + + +uint8_t asc2dna[] = { + /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 16 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 32 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 48 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 64 */ 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 4, 0, + /* A C G N */ + /* 80 */ 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* T */ + /* 96 */ 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 4, 0, + /* a c g n */ + /* 112 */ 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* t */ + /* 128 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 144 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 160 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 176 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 192 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 208 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 224 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 240 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +uint8_t asc2dna_3N[2][256] = { + { + /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 16 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 32 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 48 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 64 */ 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 4, 0, + /* A C G N */ + /* 80 */ 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* T */ + /* 96 */ 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 4, 0, + /* a c g n */ + /* 112 */ 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* t */ + /* 128 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 144 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 160 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 176 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 192 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 208 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 224 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 240 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }, + { + /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 16 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 32 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 48 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 64 */ 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 4, 0, + /* A C G N */ + /* 80 */ 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* T */ + /* 96 */ 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 4, 0, + /* a c g n */ + /* 112 */ 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* t */ + /* 128 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 144 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 160 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 176 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 192 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 208 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 224 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 240 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + } +}; + +// this is only used in BASE_CHANGE case +uint8_t asc2dna_1[] = { + /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 16 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 32 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 48 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 64 */ 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 4, 0, + /* A C G N */ + /* 80 */ 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* T */ + /* 96 */ 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 4, 0, + /* a c g n */ + /* 112 */ 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* t */ + /* 128 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 144 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 160 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 176 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 192 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 208 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 224 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 240 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +uint8_t asc2dna_2[] = { + /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 16 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 32 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 48 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 64 */ 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 4, 0, + /* A C G N */ + /* 80 */ 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* T */ + /* 96 */ 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 4, 0, + /* a c g n */ + /* 112 */ 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* t */ + /* 128 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 144 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 160 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 176 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 192 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 208 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 224 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 240 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +/// Convert an ascii char representing a base or a color to a 2-bit +/// code: 0=A,0; 1=C,1; 2=G,2; 3=T,3; 4=N,. +uint8_t asc2dnaOrCol[] = { + /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 16 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 32 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, + /* - . */ + /* 48 */ 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0 1 2 3 */ + /* 64 */ 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 4, 0, + /* A C G N */ + /* 80 */ 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* T */ + /* 96 */ 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 4, 0, + /* a c g n */ + /* 112 */ 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* t */ + /* 128 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 144 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 160 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 176 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 192 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 208 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 224 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 240 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +/// For converting from ASCII to the Dna5 code where A=0, C=1, G=2, +/// T=3, N=4 +uint8_t asc2col[] = { + /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 16 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 32 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, + /* - . */ + /* 48 */ 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0 1 2 3 */ + /* 64 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 96 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 112 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 128 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 144 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 160 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 176 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 192 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 208 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 224 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 240 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +/** + * Convert a nucleotide and a color to the paired nucleotide. Indexed + * first by nucleotide then by color. Note that this is exactly the + * same as the dinuc2color array. + */ +uint8_t nuccol2nuc[5][5] = { + /* B G O R . */ + /* A */ {0, 1, 2, 3, 4}, + /* C */ {1, 0, 3, 2, 4}, + /* G */ {2, 3, 0, 1, 4}, + /* T */ {3, 2, 1, 0, 4}, + /* N */ {4, 4, 4, 4, 4} +}; + +/** + * Convert a pair of nucleotides to a color. + */ +uint8_t dinuc2color[5][5] = { + /* A */ {0, 1, 2, 3, 4}, + /* C */ {1, 0, 3, 2, 4}, + /* G */ {2, 3, 0, 1, 4}, + /* T */ {3, 2, 1, 0, 4}, + /* N */ {4, 4, 4, 4, 4} +}; + +/// Convert bit encoded DNA char to its complement +int dnacomp[5] = { + 3, 2, 1, 0, 4 +}; + +const char *iupacs = "!ACMGRSVTWYHKDBN!acmgrsvtwyhkdbn"; + +char mask2iupac[16] = { + -1, + 'A', // 0001 + 'C', // 0010 + 'M', // 0011 + 'G', // 0100 + 'R', // 0101 + 'S', // 0110 + 'V', // 0111 + 'T', // 1000 + 'W', // 1001 + 'Y', // 1010 + 'H', // 1011 + 'K', // 1100 + 'D', // 1101 + 'B', // 1110 + 'N', // 1111 +}; + +int maskcomp[16] = { + 0, // 0000 (!) -> 0000 (!) + 8, // 0001 (A) -> 1000 (T) + 4, // 0010 (C) -> 0100 (G) + 12, // 0011 (M) -> 1100 (K) + 2, // 0100 (G) -> 0010 (C) + 10, // 0101 (R) -> 1010 (Y) + 6, // 0110 (S) -> 0110 (S) + 14, // 0111 (V) -> 1110 (B) + 1, // 1000 (T) -> 0001 (A) + 9, // 1001 (W) -> 1001 (W) + 5, // 1010 (Y) -> 0101 (R) + 13, // 1011 (H) -> 1101 (D) + 3, // 1100 (K) -> 0011 (M) + 11, // 1101 (D) -> 1011 (H) + 7, // 1110 (B) -> 0111 (V) + 15, // 1111 (N) -> 1111 (N) +}; + diff --git a/alphabet.h b/alphabet.h new file mode 100644 index 0000000..340942e --- /dev/null +++ b/alphabet.h @@ -0,0 +1,199 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef ALPHABETS_H_ +#define ALPHABETS_H_ + +#include +#include +#include +#include +#include "assert_helpers.h" + +using namespace std; + +/// Convert an ascii char to a DNA category. Categories are: +/// 0 -> invalid +/// 1 -> unambiguous a, c, g or t +/// 2 -> ambiguous +/// 3 -> unmatchable +extern uint8_t asc2dnacat[]; +/// Convert masks to ambiguous nucleotides +extern char mask2dna[]; +/// Convert ambiguous ASCII nuceleotide to mask +extern uint8_t asc2dnamask[]; +/// Convert mask to # of alternative in the mask +extern int mask2popcnt[]; +/// Convert an ascii char to a 2-bit base: 0=A, 1=C, 2=G, 3=T, 4=N +extern uint8_t asc2dna[]; +/// Convert an ascii char representing a base or a color to a 2-bit +/// code: 0=A,0; 1=C,1; 2=G,2; 3=T,3; 4=N,. +extern uint8_t asc2dnaOrCol[]; +/// Convert a pair of DNA masks to a color mask +extern uint8_t dnamasks2colormask[16][16]; + +/// Convert an ascii char to a color category. Categories are: +/// 0 -> invalid +/// 1 -> unambiguous 0, 1, 2 or 3 +/// 2 -> ambiguous (not applicable for colors) +/// 3 -> unmatchable +extern uint8_t asc2colcat[]; +/// Convert an ascii char to a 2-bit base: 0=A, 1=C, 2=G, 3=T, 4=N +extern uint8_t asc2col[]; +/// Convert an ascii char to its DNA complement, including IUPACs +extern char asc2dnacomp[]; + +/// Convert a pair of 2-bit (and 4=N) encoded DNA bases to a color +extern uint8_t dinuc2color[5][5]; +/// Convert a 2-bit nucleotide (and 4=N) and a color to the +/// corresponding 2-bit nucleotide +extern uint8_t nuccol2nuc[5][5]; +/// Convert a 4-bit mask into an IUPAC code +extern char mask2iupac[16]; + +/// Convert an ascii color to an ascii dna char +extern char col2dna[]; +/// Convert an ascii dna to a color char +extern char dna2col[]; +/// Convert an ascii dna to a color char +extern const char* dna2colstr[]; + +/// Convert bit encoded DNA char to its complement +extern int dnacomp[5]; + +/// String of all DNA and IUPAC characters +extern const char *iupacs; + +/// Map from masks to their reverse-complement masks +extern int maskcomp[16]; + +/** + * Return true iff c is a Dna character. + */ +static inline bool isDna(char c) { + return asc2dnacat[(int)c] > 0; +} + +/** + * Return true iff c is a color character. + */ +static inline bool isColor(char c) { + return asc2colcat[(int)c] > 0; +} + +/** + * Return true iff c is an ambiguous Dna character. + */ +static inline bool isAmbigNuc(char c) { + return asc2dnacat[(int)c] == 2; +} + +/** + * Return true iff c is an ambiguous color character. + */ +static inline bool isAmbigColor(char c) { + return asc2colcat[(int)c] == 2; +} + +/** + * Return true iff c is an ambiguous character. + */ +static inline bool isAmbig(char c, bool color) { + return (color ? asc2colcat[(int)c] : asc2dnacat[(int)c]) == 2; +} + +/** + * Return true iff c is an unambiguous DNA character. + */ +static inline bool isUnambigNuc(char c) { + return asc2dnacat[(int)c] == 1; +} + +/** + * Return the DNA complement of the given ASCII char. + */ +static inline char comp(char c) { + switch(c) { + case 'a': return 't'; + case 'A': return 'T'; + case 'c': return 'g'; + case 'C': return 'G'; + case 'g': return 'c'; + case 'G': return 'C'; + case 't': return 'a'; + case 'T': return 'A'; + default: return c; + } +} + +/** + * Return the reverse complement of a bit-encoded nucleotide. + */ +static inline int compDna(int c) { + assert_leq(c, 4); + return dnacomp[c]; +} + +/** + * Return true iff c is an unambiguous Dna character. + */ +static inline bool isUnambigDna(char c) { + return asc2dnacat[(int)c] == 1; +} + +/** + * Return true iff c is an unambiguous color character (0,1,2,3). + */ +static inline bool isUnambigColor(char c) { + return asc2colcat[(int)c] == 1; +} + +/// Convert a pair of 2-bit (and 4=N) encoded DNA bases to a color +extern uint8_t dinuc2color[5][5]; + +/** + * Decode a not-necessarily-ambiguous nucleotide. + */ +static inline void decodeNuc(char c , int& num, int *alts) { + switch(c) { + case 'A': alts[0] = 0; num = 1; break; + case 'C': alts[0] = 1; num = 1; break; + case 'G': alts[0] = 2; num = 1; break; + case 'T': alts[0] = 3; num = 1; break; + case 'M': alts[0] = 0; alts[1] = 1; num = 2; break; + case 'R': alts[0] = 0; alts[1] = 2; num = 2; break; + case 'W': alts[0] = 0; alts[1] = 3; num = 2; break; + case 'S': alts[0] = 1; alts[1] = 2; num = 2; break; + case 'Y': alts[0] = 1; alts[1] = 3; num = 2; break; + case 'K': alts[0] = 2; alts[1] = 3; num = 2; break; + case 'V': alts[0] = 0; alts[1] = 1; alts[2] = 2; num = 3; break; + case 'H': alts[0] = 0; alts[1] = 1; alts[2] = 3; num = 3; break; + case 'D': alts[0] = 0; alts[1] = 2; alts[2] = 3; num = 3; break; + case 'B': alts[0] = 1; alts[1] = 2; alts[2] = 3; num = 3; break; + case 'N': alts[0] = 0; alts[1] = 1; alts[2] = 2; alts[3] = 3; num = 4; break; + default: { + std::cerr << "Bad IUPAC code: " << c << ", (int: " << (int)c << ")" << std::endl; + throw std::runtime_error(""); + } + } +} + +extern void setIupacsCat(uint8_t cat); + +#endif /*ALPHABETS_H_*/ diff --git a/alt.h b/alt.h new file mode 100644 index 0000000..fa820a7 --- /dev/null +++ b/alt.h @@ -0,0 +1,294 @@ +/* + * Copyright 2015, Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#ifndef ALT_H_ +#define ALT_H_ + +#include +#include +#include +#include "assert_helpers.h" +#include "word_io.h" +#include "mem_ids.h" + +using namespace std; + +enum ALT_TYPE { + ALT_NONE = 0, + ALT_SNP_SGL, // single nucleotide substitution + ALT_SNP_INS, // small insertion wrt reference genome + ALT_SNP_DEL, // small deletion wrt reference genome + ALT_SNP_ALT, // alternative sequence (to be implemented ...) + ALT_SPLICESITE, + ALT_EXON +}; + +template +struct ALT { + ALT() { + reset(); + } + + void reset() { + type = ALT_NONE; + pos = len = 0; + seq = 0; + } + + ALT_TYPE type; + + union { + index_t pos; + index_t left; + }; + + union { + index_t len; + index_t right; + }; + + union { + uint64_t seq; // used to store 32 bp, but it can be used to store a pointer to EList + struct { + union { + bool fw; + bool reversed; + }; + bool excluded; + }; + }; + +public: + // in order to support a sequence longer than 32 bp + + bool snp() const { return type == ALT_SNP_SGL || type == ALT_SNP_DEL || type == ALT_SNP_INS; } + bool splicesite() const { return type == ALT_SPLICESITE; } + bool mismatch() const { return type == ALT_SNP_SGL; } + bool gap() const { return type == ALT_SNP_DEL || type == ALT_SNP_INS || type == ALT_SPLICESITE; } + bool deletion() const { return type == ALT_SNP_DEL; } + bool insertion() const { return type == ALT_SNP_INS; } + bool exon() const { return type == ALT_EXON; } + + bool operator< (const ALT& o) const { + if(pos != o.pos) return pos < o.pos; + if(type != o.type) { + if(type == ALT_NONE || o.type == ALT_NONE) { + return type == ALT_NONE; + } + if(type == ALT_SNP_INS) return true; + else if(o.type == ALT_SNP_INS) return false; + return type < o.type; + } + if(len != o.len) return len < o.len; + if(seq != o.seq) return seq < o.seq; + return false; + } + + bool compatibleWith(const ALT& o) const { + if(pos == o.pos) return false; + + // sort the two SNPs + const ALT& a = (pos < o.pos ? *this : o); + const ALT& b = (pos < o.pos ? o : *this); + + if(a.snp()) { + if(a.type == ALT_SNP_DEL || a.type == ALT_SNP_INS) { + if(b.pos <= a.pos + a.len) { + return false; + } + } + } else if(a.splicesite()) { + if(b.pos <= a.right + 2) { + return false; + } + } else { + assert(false); + } + + return true; + } + + bool isSame(const ALT& o) const { + if(type != o.type) + return false; + if(type == ALT_SNP_SGL) { + return pos == o.pos && seq == o.seq; + } else if(type == ALT_SNP_DEL || type == ALT_SNP_INS || type == ALT_SPLICESITE) { + if(type == ALT_SNP_INS) { + if(seq != o.seq) + return false; + } + if(reversed == o.reversed) { + return pos == o.pos && len == o.len; + } else { + if(reversed) { + return pos - len + 1 == o.pos && len == o.len; + } else { + return pos == o.pos - o.len + 1 && len == o.len; + } + } + } else { + assert(false); + } + return true; + } + +#ifndef NDEBUG + bool repOk() const { + if(type == ALT_SNP_SGL) { + if(len != 1) { + assert(false); + return false; + } + + if(seq > 3) { + assert(false); + return false; + } + } else if(type == ALT_SNP_DEL) { + if(len <= 0) { + assert(false); + return false; + } + if(seq != 0) { + assert(false); + return false; + } + } else if(type == ALT_SNP_INS) { + if(len <= 0) { + assert(false); + return false; + } + } else if(type == ALT_SPLICESITE) { + assert_lt(left, right); + assert_leq(fw, 1); + }else { + assert(false); + return false; + } + return true; + } +#endif + + bool write(ofstream& f_out, bool bigEndian) const { + writeIndex(f_out, pos, bigEndian); + writeU32(f_out, type, bigEndian); + writeIndex(f_out, len, bigEndian); + writeIndex(f_out, seq, bigEndian); + return true; + } + + bool read(ifstream& f_in, bool bigEndian) { + pos = readIndex(f_in, bigEndian); + type = (ALT_TYPE)readU32(f_in, bigEndian); + assert_neq(type, ALT_SNP_ALT); + len = readIndex(f_in, bigEndian); + seq = readIndex(f_in, bigEndian); + return true; + } +}; + + +template +struct Haplotype { + Haplotype() { + reset(); + } + + void reset() { + left = right = 0; + alts.clear(); + } + + index_t left; + index_t right; + EList alts; + + bool operator< (const Haplotype& o) const { + if(left != o.left) return left < o.left; + if(right != o.right) return right < o.right; + return false; + } + + bool write(ofstream& f_out, bool bigEndian) const { + writeIndex(f_out, left, bigEndian); + writeIndex(f_out, right, bigEndian); + writeIndex(f_out, alts.size(), bigEndian); + for(index_t i = 0; i < alts.size(); i++) { + writeIndex(f_out, alts[i], bigEndian); + } + return true; + } + + bool read(ifstream& f_in, bool bigEndian) { + left = readIndex(f_in, bigEndian); + right = readIndex(f_in, bigEndian); + assert_leq(left, right); + index_t num_alts = readIndex(f_in, bigEndian); + alts.resizeExact(num_alts); alts.clear(); + for(index_t i = 0; i < num_alts; i++) { + alts.push_back(readIndex(f_in, bigEndian)); + } + return true; + } +}; + + +template +class ALTDB { +public: + ALTDB() : + _snp(false), + _ss(false), + _exon(false) + {} + + virtual ~ALTDB() {} + + bool hasSNPs() const { return _snp; } + bool hasSpliceSites() const { return _ss; } + bool hasExons() const { return _exon; } + + void setSNPs(bool snp) { _snp = snp; } + void setSpliceSites(bool ss) { _ss = ss; } + void setExons(bool exon) { _exon = exon; } + + EList >& alts() { return _alts; } + EList& altnames() { return _altnames; } + EList >& haplotypes() { return _haplotypes; } + EList& haplotype_maxrights() { return _haplotype_maxrights; } + + const EList >& alts() const { return _alts; } + const EList& altnames() const { return _altnames; } + const EList >& haplotypes() const { return _haplotypes; } + const EList& haplotype_maxrights() const { return _haplotype_maxrights; } + +private: + bool _snp; + bool _ss; + bool _exon; + + EList > _alts; + EList _altnames; + EList > _haplotypes; + EList _haplotype_maxrights; +}; + + +#endif /*ifndef ALT_H_*/ diff --git a/assert_helpers.h b/assert_helpers.h new file mode 100644 index 0000000..688181a --- /dev/null +++ b/assert_helpers.h @@ -0,0 +1,279 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef ASSERT_HELPERS_H_ +#define ASSERT_HELPERS_H_ + +#include +#include +#include +#include + +/** + * Assertion for release-enabled assertions + */ +class ReleaseAssertException : public std::runtime_error { +public: + ReleaseAssertException(const std::string& msg = "") : std::runtime_error(msg) {} +}; + +/** + * Macros for release-enabled assertions, and helper macros to make + * all assertion error messages more helpful. + */ +#ifndef NDEBUG +#define ASSERT_ONLY(...) __VA_ARGS__ +#else +#define ASSERT_ONLY(...) +#endif + +#define rt_assert(b) \ + if(!(b)) { \ + std::cout << "rt_assert at " << __FILE__ << ":" << __LINE__ << std::endl; \ + throw ReleaseAssertException(); \ + } +#define rt_assert_msg(b,msg) \ + if(!(b)) { \ + std::cout << msg << " at " << __FILE__ << ":" << __LINE__ << std::endl; \ + throw ReleaseAssertException(msg); \ + } + +#define rt_assert_eq(ex,ac) \ + if(!((ex) == (ac))) { \ + std::cout << "rt_assert_eq: expected (" << (ex) << ", 0x" << std::hex << (ex) << std::dec << ") got (" << (ac) << ", 0x" << std::hex << (ac) << std::dec << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + throw ReleaseAssertException(); \ + } +#define rt_assert_eq_msg(ex,ac,msg) \ + if(!((ex) == (ac))) { \ + std::cout << "rt_assert_eq: " << msg << ": (" << (ex) << ", 0x" << std::hex << (ex) << std::dec << ") got (" << (ac) << ", 0x" << std::hex << (ac) << std::dec << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + throw ReleaseAssertException(msg); \ + } + +#ifndef NDEBUG +#define assert_eq(ex,ac) \ + if(!((ex) == (ac))) { \ + std::cout << "assert_eq: expected (" << (ex) << ", 0x" << std::hex << (ex) << std::dec << ") got (" << (ac) << ", 0x" << std::hex << (ac) << std::dec << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + assert(0); \ + } +#define assert_eq_msg(ex,ac,msg) \ + if(!((ex) == (ac))) { \ + std::cout << "assert_eq: " << msg << ": (" << (ex) << ", 0x" << std::hex << (ex) << std::dec << ") got (" << (ac) << ", 0x" << std::hex << (ac) << std::dec << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + assert(0); \ + } +#else +#define assert_eq(ex,ac) +#define assert_eq_msg(ex,ac,msg) +#endif + +#define rt_assert_neq(ex,ac) \ + if(!((ex) != (ac))) { \ + std::cout << "rt_assert_neq: expected not (" << (ex) << ", 0x" << std::hex << (ex) << std::dec << ") got (" << (ac) << ", 0x" << std::hex << (ac) << std::dec << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + throw ReleaseAssertException(); \ + } +#define rt_assert_neq_msg(ex,ac,msg) \ + if(!((ex) != (ac))) { \ + std::cout << "rt_assert_neq: " << msg << ": (" << (ex) << ", 0x" << std::hex << (ex) << std::dec << ") got (" << (ac) << ", 0x" << std::hex << (ac) << std::dec << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + throw ReleaseAssertException(msg); \ + } + +#ifndef NDEBUG +#define assert_neq(ex,ac) \ + if(!((ex) != (ac))) { \ + std::cout << "assert_neq: expected not (" << (ex) << ", 0x" << std::hex << (ex) << std::dec << ") got (" << (ac) << ", 0x" << std::hex << (ac) << std::dec << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + assert(0); \ + } +#define assert_neq_msg(ex,ac,msg) \ + if(!((ex) != (ac))) { \ + std::cout << "assert_neq: " << msg << ": (" << (ex) << ", 0x" << std::hex << (ex) << std::dec << ") got (" << (ac) << ", 0x" << std::hex << (ac) << std::dec << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + assert(0); \ + } +#else +#define assert_neq(ex,ac) +#define assert_neq_msg(ex,ac,msg) +#endif + +#define rt_assert_gt(a,b) \ + if(!((a) > (b))) { \ + std::cout << "rt_assert_gt: expected (" << (a) << ") > (" << (b) << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + throw ReleaseAssertException(); \ + } +#define rt_assert_gt_msg(a,b,msg) \ + if(!((a) > (b))) { \ + std::cout << "rt_assert_gt: " << msg << ": (" << (a) << ") > (" << (b) << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + throw ReleaseAssertException(msg); \ + } + +#ifndef NDEBUG +#define assert_gt(a,b) \ + if(!((a) > (b))) { \ + std::cout << "assert_gt: expected (" << (a) << ") > (" << (b) << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + assert(0); \ + } +#define assert_gt_msg(a,b,msg) \ + if(!((a) > (b))) { \ + std::cout << "assert_gt: " << msg << ": (" << (a) << ") > (" << (b) << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + assert(0); \ + } +#else +#define assert_gt(a,b) +#define assert_gt_msg(a,b,msg) +#endif + +#define rt_assert_geq(a,b) \ + if(!((a) >= (b))) { \ + std::cout << "rt_assert_geq: expected (" << (a) << ") >= (" << (b) << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + throw ReleaseAssertException(); \ + } +#define rt_assert_geq_msg(a,b,msg) \ + if(!((a) >= (b))) { \ + std::cout << "rt_assert_geq: " << msg << ": (" << (a) << ") >= (" << (b) << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + throw ReleaseAssertException(msg); \ + } + +#ifndef NDEBUG +#define assert_geq(a,b) \ + if(!((a) >= (b))) { \ + std::cout << "assert_geq: expected (" << (a) << ") >= (" << (b) << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + assert(0); \ + } +#define assert_geq_msg(a,b,msg) \ + if(!((a) >= (b))) { \ + std::cout << "assert_geq: " << msg << ": (" << (a) << ") >= (" << (b) << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + assert(0); \ + } +#else +#define assert_geq(a,b) +#define assert_geq_msg(a,b,msg) +#endif + +#define rt_assert_lt(a,b) \ + if(!(a < b)) { \ + std::cout << "rt_assert_lt: expected (" << a << ") < (" << b << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + throw ReleaseAssertException(); \ + } +#define rt_assert_lt_msg(a,b,msg) \ + if(!(a < b)) { \ + std::cout << "rt_assert_lt: " << msg << ": (" << a << ") < (" << b << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + throw ReleaseAssertException(msg); \ + } + +#ifndef NDEBUG +#define assert_lt(a,b) \ + if(!(a < b)) { \ + std::cout << "assert_lt: expected (" << a << ") < (" << b << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + assert(0); \ + } +#define assert_lt_msg(a,b,msg) \ + if(!(a < b)) { \ + std::cout << "assert_lt: " << msg << ": (" << a << ") < (" << b << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + assert(0); \ + } +#else +#define assert_lt(a,b) +#define assert_lt_msg(a,b,msg) +#endif + +#define rt_assert_leq(a,b) \ + if(!((a) <= (b))) { \ + std::cout << "rt_assert_leq: expected (" << (a) << ") <= (" << (b) << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + throw ReleaseAssertException(); \ + } +#define rt_assert_leq_msg(a,b,msg) \ + if(!((a) <= (b))) { \ + std::cout << "rt_assert_leq: " << msg << ": (" << (a) << ") <= (" << (b) << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + throw ReleaseAssertException(msg); \ + } + +#ifndef NDEBUG +#define assert_leq(a,b) \ + if(!((a) <= (b))) { \ + std::cout << "assert_leq: expected (" << (a) << ") <= (" << (b) << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + assert(0); \ + } +#define assert_leq_msg(a,b,msg) \ + if(!((a) <= (b))) { \ + std::cout << "assert_leq: " << msg << ": (" << (a) << ") <= (" << (b) << ")" << std::endl; \ + std::cout << __FILE__ << ":" << __LINE__ << std::endl; \ + assert(0); \ + } +#else +#define assert_leq(a,b) +#define assert_leq_msg(a,b,msg) +#endif + +#ifndef NDEBUG +#define assert_in(c, s) assert_in2(c, s, __FILE__, __LINE__) +static inline void assert_in2(char c, const char *str, const char *file, int line) { + const char *s = str; + while(*s != '\0') { + if(c == *s) return; + s++; + } + std::cout << "assert_in: (" << c << ") not in (" << str << ")" << std::endl; + std::cout << file << ":" << line << std::endl; + assert(0); +} +#else +#define assert_in(c, s) +#endif + +#ifndef NDEBUG +#define assert_range(b, e, v) assert_range_helper(b, e, v, __FILE__, __LINE__) +template +inline static void assert_range_helper(const T& begin, + const T& end, + const T& val, + const char *file, + int line) +{ + if(val < begin || val > end) { + std::cout << "assert_range: (" << val << ") not in [" + << begin << ", " << end << "]" << std::endl; + std::cout << file << ":" << line << std::endl; + assert(0); + } +} +#else +#define assert_range(b, e, v) +#endif + +#endif /*ASSERT_HELPERS_H_*/ diff --git a/banded.cpp b/banded.cpp new file mode 100644 index 0000000..e88f7ca --- /dev/null +++ b/banded.cpp @@ -0,0 +1,27 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include +#include "banded.h" + +#ifdef MAIN_BANDED +int main(void) { + +} +#endif diff --git a/banded.h b/banded.h new file mode 100644 index 0000000..37978bc --- /dev/null +++ b/banded.h @@ -0,0 +1,52 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef BANDED_H_ +#define BANDED_H_ + +#include "sse_util.h" + +/** + * Use SSE instructions to quickly find stretches with lots of matches, then + * resolve alignments. + */ +class BandedSseAligner { + +public: + + void init( + int *q, // query, maskized + size_t qi, // query start + size_t qf, // query end + int *r, // reference, maskized + size_t ri, // reference start + size_t rf) // reference end + { + + } + + void nextAlignment() { + } + +protected: + + EList_m128i mat_; +}; + +#endif diff --git a/binary_sa_search.h b/binary_sa_search.h new file mode 100644 index 0000000..4bb6eb7 --- /dev/null +++ b/binary_sa_search.h @@ -0,0 +1,102 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef BINARY_SA_SEARCH_H_ +#define BINARY_SA_SEARCH_H_ + +#include +#include +#include +#include "alphabet.h" +#include "assert_helpers.h" +#include "ds.h" +#include "btypes.h" + +/** + * Do a binary search using the suffix of 'host' beginning at offset + * 'qry' as the query and 'sa' as an already-lexicographically-sorted + * list of suffixes of host. 'sa' may be all suffixes of host or just + * a subset. Returns the index in sa of the smallest suffix of host + * that is larger than qry, or length(sa) if all suffixes of host are + * less than qry. + * + * We use the Manber and Myers optimization of maintaining a pair of + * counters for the longest lcp observed so far on the left- and right- + * hand sides and using the min of the two as a way of skipping over + * characters at the beginning of a new round. + * + * Returns maximum value if the query suffix matches an element of sa. + */ +template inline +TIndexOffU binarySASearch( + const TStr& host, + TIndexOffU qry, + const EList& sa) +{ + TIndexOffU lLcp = 0, rLcp = 0; // greatest observed LCPs on left and right + TIndexOffU l = 0, r = (TIndexOffU)sa.size()+1; // binary-search window + TIndexOffU hostLen = (TIndexOffU)host.length(); + while(true) { + assert_gt(r, l); + TIndexOffU m = (l+r) >> 1; + if(m == l) { + // Binary-search window has closed: we have an answer + if(m > 0 && sa[m-1] == qry) { + return std::numeric_limits::max(); // qry matches + } + assert_leq(m, sa.size()); + return m; // Return index of right-hand suffix + } + assert_gt(m, 0); + TIndexOffU suf = sa[m-1]; + if(suf == qry) { + return std::numeric_limits::max(); // query matches an elt of sa + } + TIndexOffU lcp = min(lLcp, rLcp); +#ifndef NDEBUG + if(sstr_suf_upto_neq(host, qry, host, suf, lcp)) { + assert(0); + } +#endif + // Keep advancing lcp, but stop when query mismatches host or + // when the counter falls off either the query or the suffix + while(suf+lcp < hostLen && qry+lcp < hostLen && host[suf+lcp] == host[qry+lcp]) { + lcp++; + } + // Fell off the end of either the query or the sa elt? + bool fell = (suf+lcp == hostLen || qry+lcp == hostLen); + if((fell && qry+lcp == hostLen) || (!fell && host[suf+lcp] < host[qry+lcp])) { + // Query is greater than sa elt + l = m; // update left bound + lLcp = max(lLcp, lcp); // update left lcp + } + else if((fell && suf+lcp == hostLen) || (!fell && host[suf+lcp] > host[qry+lcp])) { + // Query is less than sa elt + r = m; // update right bound + rLcp = max(rLcp, lcp); // update right lcp + } else { + assert(false); // Must be one or the other! + } + } + // Shouldn't get here + assert(false); + return std::numeric_limits::max(); +} + +#endif /*BINARY_SA_SEARCH_H_*/ diff --git a/bit_packed_array.cpp b/bit_packed_array.cpp new file mode 100644 index 0000000..c1be08f --- /dev/null +++ b/bit_packed_array.cpp @@ -0,0 +1,315 @@ +/* +* Copyright 2018, Chanhee Park and Daehwan Kim +* +* This file is part of HISAT 2. +* +* HISAT 2 is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* HISAT 2 is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with HISAT 2. If not, see . +*/ + +#include +#include +#include +#include "timer.h" +#include "aligner_sw.h" +#include "aligner_result.h" +#include "scoring.h" +#include "sstring.h" + +#include "bit_packed_array.h" + +TIndexOffU BitPackedArray::get(size_t index) const +{ + assert_lt(index, cur_); + + pair addr = indexToAddress(index); + uint64_t *block = blocks_[addr.first]; + pair pos = columnToPosition(addr.second); + TIndexOffU val = getItem(block, pos.first, pos.second); + + return val; +} + + +#define write_fp(x) fp.write((const char *)&(x), sizeof((x))) + +void BitPackedArray::writeFile(ofstream &fp) +{ + size_t sz = 0; + + write_fp(item_bit_size_); + write_fp(elm_bit_size_); + write_fp(items_per_block_bit_); + write_fp(items_per_block_bit_mask_); + write_fp(items_per_block_); + + write_fp(cur_); + write_fp(sz_); + + write_fp(block_size_); + + // number of blocks + sz = blocks_.size(); + write_fp(sz); + for(size_t i = 0; i < sz; i++) { + fp.write((const char *)blocks_[i], block_size_); + } +} + +void BitPackedArray::writeFile(const char *filename) +{ + ofstream fp(filename, std::ofstream::binary); + writeFile(fp); + fp.close(); +} + +void BitPackedArray::writeFile(const string &filename) +{ + writeFile(filename.c_str()); +} + + +#define read_fp(x) fp.read((char *)&(x), sizeof((x))) + +void BitPackedArray::readFile(ifstream &fp) +{ + size_t val_sz = 0; + + read_fp(val_sz); + init_by_log2(val_sz); + //rt_assert_eq(val_sz, item_bit_size_); + + read_fp(val_sz); + rt_assert_eq(val_sz, elm_bit_size_); + + read_fp(val_sz); + rt_assert_eq(val_sz, items_per_block_bit_); + + read_fp(val_sz); + rt_assert_eq(val_sz, items_per_block_bit_mask_); + + read_fp(val_sz); + rt_assert_eq(val_sz, items_per_block_); + + // skip cur_ + size_t prev_cnt = 0; + read_fp(prev_cnt); + cur_ = 0; + + // skip sz_ + size_t prev_sz = 0; + read_fp(prev_sz); + sz_ = 0; + + // block_size_ + read_fp(val_sz); + rt_assert_eq(val_sz, block_size_); + + // alloc blocks + allocItems(prev_cnt); + rt_assert_eq(prev_sz, sz_); + + // number of blocks + read_fp(val_sz); + rt_assert_eq(val_sz, blocks_.size()); + for(size_t i = 0; i < blocks_.size(); i++) { + fp.read((char *)blocks_[i], block_size_); + } + cur_ = prev_cnt; +} + +void BitPackedArray::readFile(const char *filename) +{ + ifstream fp(filename, std::ifstream::binary); + readFile(fp); + fp.close(); +} + +void BitPackedArray::readFile(const string &filename) +{ + readFile(filename.c_str()); +} + +void BitPackedArray::put(size_t index, TIndexOffU val) +{ + assert_lt(index, cur_); + + pair addr = indexToAddress(index); + uint64_t *block = blocks_[addr.first]; + pair pos = columnToPosition(addr.second); + + setItem(block, pos.first, pos.second, val); +} + +void BitPackedArray::pushBack(TIndexOffU val) +{ + if(cur_ == sz_) { + allocItems(items_per_block_); + } + + put(cur_++, val); + + assert_leq(cur_, sz_); +} + +TIndexOffU BitPackedArray::getItem(uint64_t *block, size_t idx, size_t offset) const +{ + size_t remains = item_bit_size_; + + TIndexOffU val = 0; + + while(remains > 0) { + size_t bits = min(elm_bit_size_ - offset, remains); + uint64_t mask = bitToMask(bits); + + // get value from block + TIndexOffU t = (block[idx] >> offset) & mask; + val = val | (t << (item_bit_size_ - remains)); + + remains -= bits; + offset = 0; + idx++; + } + + return val; +} + +void BitPackedArray::setItem(uint64_t *block, size_t idx, size_t offset, TIndexOffU val) +{ + size_t remains = item_bit_size_; + + while(remains > 0) { + size_t bits = min(elm_bit_size_ - offset, remains); + uint64_t mask = bitToMask(bits); + uint64_t dest_mask = mask << offset; + + // get 'bits' lsb from val + uint64_t t = val & mask; + val >>= bits; + + // save 't' to block[idx] + t <<= offset; + block[idx] &= ~(dest_mask); // clear + block[idx] |= t; + + idx++; + remains -= bits; + offset = 0; + } +} + +pair BitPackedArray::indexToAddress(size_t index) const +{ + pair addr; + + addr.first = index >> items_per_block_bit_; + addr.second = index & items_per_block_bit_mask_; + + return addr; +} + +pair BitPackedArray::columnToPosition(size_t col) const { + pair pos; + + pos.first = (col * item_bit_size_) / elm_bit_size_; + pos.second = (col * item_bit_size_) % elm_bit_size_; + return pos; +} + +void BitPackedArray::expand(size_t count) +{ + if((cur_ + count) > sz_) { + allocItems(count); + } + + cur_ += count; + + assert_leq(cur_, sz_); +} + +void BitPackedArray::allocSize(size_t sz) +{ + size_t num_block = (sz * sizeof(uint64_t) + block_size_ - 1) / block_size_; + + for(size_t i = 0; i < num_block; i++) { + uint64_t *ptr = new uint64_t[block_size_]; + blocks_.push_back(ptr); + sz_ += items_per_block_; + } +} + +void BitPackedArray::allocItems(size_t count) +{ + size_t sz = (count * item_bit_size_ + elm_bit_size_ - 1) / elm_bit_size_; + allocSize(sz); +} + +void BitPackedArray::init_by_log2(size_t ceil_log2) +{ + item_bit_size_ = ceil_log2; + + elm_bit_size_ = sizeof(uint64_t) * 8; + + items_per_block_bit_ = 20; // 1M + items_per_block_ = 1ULL << (items_per_block_bit_); + items_per_block_bit_mask_ = items_per_block_ - 1; + + block_size_ = (items_per_block_ * item_bit_size_ + elm_bit_size_ - 1) / elm_bit_size_ * sizeof(uint64_t); + + cur_ = 0; + sz_ = 0; +} + +void BitPackedArray::init(size_t max_value) +{ + init_by_log2((size_t)ceil(log2(max_value))); +} + +void BitPackedArray::dump() const +{ + cerr << "item_bit_size_: " << item_bit_size_ << endl; + cerr << "block_size_: " << block_size_ << endl; + cerr << "items_per_block_: " << items_per_block_ << endl; + cerr << "cur_: " << cur_ << endl; + cerr << "sz_: " << sz_ << endl; + cerr << "number of blocks: " << blocks_.size() << endl; +} + +size_t BitPackedArray::getMemUsage() const +{ + size_t tot = blocks_.size() * block_size_; + tot += blocks_.totalCapacityBytes(); + return tot; +} + +BitPackedArray::~BitPackedArray() +{ + for(size_t i = 0; i < blocks_.size(); i++) { + uint64_t *ptr = blocks_[i]; + delete [] ptr; + } +} + +void BitPackedArray::reset() +{ + cur_ = 0; + sz_ = 0; + + for(size_t i = 0; i < blocks_.size(); i++) { + uint64_t *ptr = blocks_[i]; + delete [] ptr; + } + + blocks_.clear(); +} + diff --git a/bit_packed_array.h b/bit_packed_array.h new file mode 100644 index 0000000..8350428 --- /dev/null +++ b/bit_packed_array.h @@ -0,0 +1,105 @@ +/* +* Copyright 2018, Chanhee Park and Daehwan Kim +* +* This file is part of HISAT 2. +* +* HISAT 2 is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* HISAT 2 is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with HISAT 2. If not, see . +*/ + +#ifndef __HISAT2_BIT_PACKED_ARRAY_H +#define __HISAT2_BIT_PACKED_ARRAY_H + +#include +#include +#include +#include +#include "assert_helpers.h" +#include "word_io.h" +#include "mem_ids.h" +#include "ds.h" + +using namespace std; + +class BitPackedArray { +public: + BitPackedArray () {} + ~BitPackedArray(); + + /** + * Return true iff there are no items + * @return + */ + inline bool empty() const { return cur_ == 0; } + inline size_t size() const { return cur_; } + + TIndexOffU get(size_t idx) const; + + inline TIndexOffU operator[](size_t i) const { return get(i); } + void pushBack(TIndexOffU val); + + void init(size_t max_value); + void reset(); + + void writeFile(const char *filename); + void writeFile(const string& filename); + void writeFile(ofstream &fp); + + void readFile(const char *filename); + void readFile(const string& filename); + void readFile(ifstream &fp); + + void dump() const; + + size_t getMemUsage() const; + +private: + void init_by_log2(size_t ceil_log2); + + void put(size_t index, TIndexOffU val); + inline uint64_t bitToMask(size_t bit) const + { + return (uint64_t) ((1ULL << bit) - 1); + } + + TIndexOffU getItem(uint64_t *block, size_t idx, size_t offset) const; + void setItem(uint64_t *block, size_t idx, size_t offset, TIndexOffU val); + + pair indexToAddress(size_t index) const; + pair columnToPosition(size_t col) const; + + + void expand(size_t count = 1); + void allocSize(size_t sz); + void allocItems(size_t count); + + +private: + size_t item_bit_size_; // item bit size(e.g. 33bit) + + size_t elm_bit_size_; // 64bit + size_t items_per_block_bit_; + size_t items_per_block_bit_mask_; + size_t items_per_block_; // number of items in block + + size_t cur_; // current item count + size_t sz_; // maximum item count + + size_t block_size_; // block size in byte + + // List of packed array + EList blocks_; +}; + + +#endif //__HISAT2_BIT_PACKED_ARRAY_H diff --git a/bitpack.h b/bitpack.h new file mode 100644 index 0000000..1504265 --- /dev/null +++ b/bitpack.h @@ -0,0 +1,80 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef BITPACK_H_ +#define BITPACK_H_ + +#include +#include "assert_helpers.h" + +/** + * Routines for marshalling 2-bit values into and out of 8-bit or + * 32-bit hosts + */ + +static inline void pack_2b_in_8b(const int two, uint8_t& eight, const int off) { + assert_lt(two, 4); + assert_lt(off, 4); + eight |= (two << (off*2)); +} + +static inline int unpack_2b_from_8b(const uint8_t eight, const int off) { + assert_lt(off, 4); + return ((eight >> (off*2)) & 0x3); +} + +static inline void pack_2b_in_32b(const int two, uint32_t& thirty2, const int off) { + assert_lt(two, 4); + assert_lt(off, 16); + thirty2 |= (two << (off*2)); +} + +static inline int unpack_2b_from_32b(const uint32_t thirty2, const int off) { + assert_lt(off, 16); + return ((thirty2 >> (off*2)) & 0x3); +} + +/** + * Routines for marshalling 1-bit values into and out of 8-bit or + * 32-bit hosts + */ + +static inline void pack_1b_in_8b(const int one, uint8_t& eight, const int off) { + assert_lt(one, 2); + assert_lt(off, 8); + eight |= (one << off); +} + +static inline int unpack_1b_from_8b(const uint8_t eight, const int off) { + assert_lt(off, 2); + return ((eight >> off) & 0x1); +} + +static inline void pack_1b_in_32b(const int one, uint32_t& thirty2, const int off) { + assert_lt(one, 2); + assert_lt(off, 32); + thirty2 |= (one << off); +} + +static inline int unpack_1b_from_32b(const uint32_t thirty2, const int off) { + assert_lt(off, 32); + return ((thirty2 >> off) & 0x1); +} + +#endif /*BITPACK_H_*/ diff --git a/blockwise_sa.h b/blockwise_sa.h new file mode 100644 index 0000000..22d8749 --- /dev/null +++ b/blockwise_sa.h @@ -0,0 +1,1113 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef BLOCKWISE_SA_H_ +#define BLOCKWISE_SA_H_ + +#include +#include +#include +#include +#include +#include "assert_helpers.h" +#include "diff_sample.h" +#include "multikey_qsort.h" +#include "random_source.h" +#include "binary_sa_search.h" +#include "zbox.h" +#include "alphabet.h" +#include "timer.h" +#include "ds.h" +#include "mem_ids.h" +#include "word_io.h" + +using namespace std; + +// Helpers for printing verbose messages + +#ifndef VMSG_NL +#define VMSG_NL(...) \ +if(this->verbose()) { \ + stringstream tmp; \ + tmp << __VA_ARGS__ << endl; \ + this->verbose(tmp.str()); \ +} +#endif + +#ifndef VMSG +#define VMSG(...) \ +if(this->verbose()) { \ + stringstream tmp; \ + tmp << __VA_ARGS__; \ + this->verbose(tmp.str()); \ +} +#endif + +/** + * Abstract parent class for blockwise suffix-array building schemes. + */ +template +class BlockwiseSA { +public: + BlockwiseSA(const TStr& __text, + TIndexOffU __bucketSz, + bool __sanityCheck = false, + bool __passMemExc = false, + bool __verbose = false, + ostream& __logger = cout) : + _text(__text), + _bucketSz(max(__bucketSz, 2u)), + _sanityCheck(__sanityCheck), + _passMemExc(__passMemExc), + _verbose(__verbose), + _itrBucket(EBWTB_CAT), + _itrBucketPos(OFF_MASK), + _itrPushedBackSuffix(OFF_MASK), + _logger(__logger) + { } + + virtual ~BlockwiseSA() { } + + /** + * Get the next suffix; compute the next bucket if necessary. + */ + virtual TIndexOffU nextSuffix() = 0; + + /** + * Return true iff the next call to nextSuffix will succeed. + */ + bool hasMoreSuffixes() { + if(_itrPushedBackSuffix != OFF_MASK) return true; + try { + _itrPushedBackSuffix = nextSuffix(); + } catch(out_of_range& e) { + assert_eq(OFF_MASK, _itrPushedBackSuffix); + return false; + } + return true; + } + + /** + * Reset the suffix iterator so that the next call to nextSuffix() + * returns the lexicographically-first suffix. + */ + void resetSuffixItr() { + _itrBucket.clear(); + _itrBucketPos = OFF_MASK; + _itrPushedBackSuffix = OFF_MASK; + reset(); + assert(suffixItrIsReset()); + } + + /** + * Returns true iff the next call to nextSuffix() returns the + * lexicographically-first suffix. + */ + bool suffixItrIsReset() { + return _itrBucket.size() == 0 && + _itrBucketPos == OFF_MASK && + _itrPushedBackSuffix == OFF_MASK && + isReset(); + } + + const TStr& text() const { return _text; } + TIndexOffU bucketSz() const { return _bucketSz; } + bool sanityCheck() const { return _sanityCheck; } + bool verbose() const { return _verbose; } + ostream& log() const { return _logger; } + size_t size() const { return _text.length()+1; } + +protected: + /// Reset back to the first block + virtual void reset() = 0; + /// Return true iff reset to the first block + virtual bool isReset() = 0; + + /** + * Grab the next block of sorted suffixes. The block is guaranteed + * to have at most _bucketSz elements. + */ + virtual void nextBlock(int cur_block, int tid = 0) = 0; + /// Return true iff more blocks are available + virtual bool hasMoreBlocks() const = 0; + /// Optionally output a verbose message + void verbose(const string& s) const { + if(this->verbose()) { + this->log() << s.c_str(); + this->log().flush(); + } + } + + const TStr& _text; /// original string + const TIndexOffU _bucketSz; /// target maximum bucket size + const bool _sanityCheck; /// whether to perform sanity checks + const bool _passMemExc; /// true -> pass on memory exceptions + const bool _verbose; /// be talkative + EList _itrBucket; /// current bucket + TIndexOffU _itrBucketPos;/// offset into current bucket + TIndexOffU _itrPushedBackSuffix; /// temporary slot for lookahead + ostream& _logger; /// write log messages here +}; + +/** + * Abstract parent class for a blockwise suffix array builder that + * always doles out blocks in lexicographical order. + */ +template +class InorderBlockwiseSA : public BlockwiseSA { +public: + InorderBlockwiseSA(const TStr& __text, + TIndexOffU __bucketSz, + bool __sanityCheck = false, + bool __passMemExc = false, + bool __verbose = false, + ostream& __logger = cout) : + BlockwiseSA(__text, __bucketSz, __sanityCheck, __passMemExc, __verbose, __logger) + { } +}; + +/** + * Build the SA a block at a time according to the scheme outlined in + * Karkkainen's "Fast BWT" paper. + */ +template +class KarkkainenBlockwiseSA : public InorderBlockwiseSA { +public: + typedef DifferenceCoverSample TDC; + + KarkkainenBlockwiseSA(const TStr& __text, + TIndexOffU __bucketSz, + int __nthreads, + uint32_t __dcV, + uint32_t __seed = 0, + bool __sanityCheck = false, + bool __passMemExc = false, + bool __verbose = false, + string base_fname = "", + ostream& __logger = cout) : + InorderBlockwiseSA(__text, __bucketSz, __sanityCheck, __passMemExc, __verbose, __logger), + _sampleSuffs(EBWTB_CAT), _nthreads(__nthreads), _itrBucketIdx(0), _cur(0), _dcV(__dcV), _dc(EBWTB_CAT), _built(false), _base_fname(base_fname), _bigEndian(currentlyBigEndian()) + { _randomSrc.init(__seed); reset(); } + + ~KarkkainenBlockwiseSA() + { + if(_threads.size() > 0) { + for (size_t tid = 0; tid < _threads.size(); tid++) { + _threads[tid]->join(); + delete _threads[tid]; + } + } + } + + + /** + * Allocate an amount of memory that simulates the peak memory + * usage of the DifferenceCoverSample with the given text and v. + * Throws bad_alloc if it's not going to fit in memory. Returns + * the approximate number of bytes the Cover takes at all times. + */ + static size_t simulateAllocs(const TStr& text, TIndexOffU bucketSz) { + size_t len = text.length(); + // _sampleSuffs and _itrBucket are in memory at the peak + size_t bsz = bucketSz; + size_t sssz = len / max(bucketSz-1, 1); + AutoArray tmp(bsz + sssz + (1024 * 1024 /*out of caution*/), EBWT_CAT); + return bsz; + } + + static void nextBlock_Worker(void *vp) { + pair param = *(pair*)vp; + KarkkainenBlockwiseSA* sa = param.first; + int tid = param.second; + while(true) { + size_t cur = 0; + { + ThreadSafe ts(&sa->_mutex, sa->_nthreads > 1); + cur = sa->_cur; + if(cur > sa->_sampleSuffs.size()) break; + sa->_cur++; + } + sa->nextBlock((int)cur, tid); + // Write suffixes into a file + std::ostringstream number; number << cur; + const string fname = sa->_base_fname + "." + number.str() + ".sa"; + ofstream sa_file(fname.c_str(), ios::binary); + if(!sa_file.good()) { + cerr << "Could not open file for writing a bucket: \"" << fname << "\"" << endl; + throw 1; + } + const EList& bucket = sa->_itrBuckets[tid]; + writeIndex(sa_file, (TIndexOffU)bucket.size(), sa->_bigEndian); + for(size_t i = 0; i < bucket.size(); i++) { + writeIndex(sa_file, bucket[i], sa->_bigEndian); + } + sa_file.close(); + sa->_itrBuckets[tid].clear(); + sa->_done[cur] = true; + } + } + + /** + * Get the next suffix; compute the next bucket if necessary. + */ + virtual TIndexOffU nextSuffix() { + // Launch threads if not + if(this->_nthreads > 1) { + if(_threads.size() == 0) { + _done.resize(_sampleSuffs.size() + 1); + _done.fill(false); + _itrBuckets.resize(this->_nthreads); + for(int tid = 0; tid < this->_nthreads; tid++) { + _tparams.expand(); + _tparams.back().first = this; + _tparams.back().second = tid; + _threads.push_back(new tthread::thread(nextBlock_Worker, (void*)&_tparams.back())); + } + assert_eq(_threads.size(), (size_t)this->_nthreads); + } + } + if(this->_itrPushedBackSuffix != OFF_MASK) { + TIndexOffU tmp = this->_itrPushedBackSuffix; + this->_itrPushedBackSuffix = OFF_MASK; + return tmp; + } + while(this->_itrBucketPos >= this->_itrBucket.size() || + this->_itrBucket.size() == 0) + { + if(!hasMoreBlocks()) { + throw out_of_range("No more suffixes"); + } + if(this->_nthreads == 1) { + nextBlock((int)_cur); + _cur++; + } else { + while(!_done[this->_itrBucketIdx]) { +#if defined(_TTHREAD_WIN32_) + Sleep(1); +#elif defined(_TTHREAD_POSIX_) + const static timespec ts = {0, 1000000}; // 1 millisecond + nanosleep(&ts, NULL); +#endif + } + // Read suffixes from a file + std::ostringstream number; number << this->_itrBucketIdx; + const string fname = _base_fname + "." + number.str() + ".sa"; + ifstream sa_file(fname.c_str(), ios::binary); + if(!sa_file.good()) { + cerr << "Could not open file for reading a bucket: \"" << fname << "\"" << endl; + throw 1; + } + size_t numSAs = readIndex(sa_file, _bigEndian); + this->_itrBucket.resizeExact(numSAs); + for(size_t i = 0; i < numSAs; i++) { + this->_itrBucket[i] = readIndex(sa_file, _bigEndian); + } + sa_file.close(); + std::remove(fname.c_str()); + } + this->_itrBucketIdx++; + this->_itrBucketPos = 0; + } + return this->_itrBucket[this->_itrBucketPos++]; + } + + /// Defined in blockwise_sa.cpp + virtual void nextBlock(int cur_block, int tid = 0); + + /// Defined in blockwise_sa.cpp + virtual void qsort(EList& bucket); + + /// Return true iff more blocks are available + virtual bool hasMoreBlocks() const { + return this->_itrBucketIdx <= _sampleSuffs.size(); + } + + /// Return the difference-cover period + uint32_t dcV() const { return _dcV; } + +protected: + + /** + * Initialize the state of the blockwise suffix sort. If the + * difference cover sample and the sample set have not yet been + * built, build them. Then reset the block cursor to point to + * the first block. + */ + virtual void reset() { + if(!_built) { + build(); + } + assert(_built); + _cur = 0; + } + + /// Return true iff we're about to dole out the first bucket + virtual bool isReset() { + return _cur == 0; + } + +private: + + /** + * Calculate the difference-cover sample and sample suffixes. + */ + void build() { + // Calculate difference-cover sample + assert(_dc.get() == NULL); + if(_dcV != 0) { + _dc.init(new TDC(this->text(), _dcV, this->verbose(), this->sanityCheck())); + _dc.get()->build(this->_nthreads); + } + // Calculate sample suffixes + if(this->bucketSz() <= this->text().length()) { + VMSG_NL("Building samples"); + buildSamples(); + } else { + VMSG_NL("Skipping building samples since text length " << + this->text().length() << " is less than bucket size: " << + this->bucketSz()); + } + _built = true; + } + + /** + * Calculate the lcp between two suffixes using the difference + * cover as a tie-breaker. If the tie-breaker is employed, then + * the calculated lcp may be an underestimate. + * + * Defined in blockwise_sa.cpp + */ + inline bool tieBreakingLcp(TIndexOffU aOff, + TIndexOffU bOff, + TIndexOffU& lcp, + bool& lcpIsSoft); + + /** + * Compare two suffixes using the difference-cover sample. + */ + inline bool suffixCmp(TIndexOffU cmp, + TIndexOffU i, + int64_t& j, + int64_t& k, + bool& kSoft, + const EList& z); + + void buildSamples(); + + EList _sampleSuffs; /// sample suffixes + int _nthreads; /// # of threads + TIndexOffU _itrBucketIdx; + TIndexOffU _cur; /// offset to 1st elt of next block + const uint32_t _dcV; /// difference-cover periodicity + PtrWrap _dc; /// queryable difference-cover data + bool _built; /// whether samples/DC have been built + RandomSource _randomSrc; /// source of pseudo-randoms + + MUTEX_T _mutex; /// synchronization of output message + string _base_fname; /// base file name for storing SA blocks + bool _bigEndian; /// bigEndian? + EList _threads; /// thread list + EList > _tparams; + ELList _itrBuckets; /// buckets + EList _done; /// is a block processed? +}; + +/** + * Qsort the set of suffixes whose offsets are in 'bucket'. + */ +template +inline void KarkkainenBlockwiseSA::qsort(EList& bucket) { + const TStr& t = this->text(); + TIndexOffU *s = bucket.ptr(); + size_t slen = bucket.size(); + TIndexOffU len = (TIndexOffU)t.length(); + if(_dc.get() != NULL) { + // Use the difference cover as a tie-breaker if we have it + VMSG_NL(" (Using difference cover)"); + // Extract the 'host' array because it's faster to work + // with than the EList<> container + const uint8_t *host = (const uint8_t *)t.buf(); + assert(_dc.get() != NULL); + mkeyQSortSufDcU8(t, host, len, s, slen, *_dc.get(), 4, + this->verbose(), this->sanityCheck()); + } else { + VMSG_NL(" (Not using difference cover)"); + // We don't have a difference cover - just do a normal + // suffix sort + mkeyQSortSuf(t, s, slen, 4, + this->verbose(), this->sanityCheck()); + } +} + +/** + * Qsort the set of suffixes whose offsets are in 'bucket'. This + * specialization for packed strings does not attempt to extract and + * operate directly on the host string; the fact that the string is + * packed means that the array cannot be sorted directly. + */ +template<> +inline void KarkkainenBlockwiseSA::qsort( + EList& bucket) +{ + const S2bDnaString& t = this->text(); + TIndexOffU *s = bucket.ptr(); + size_t slen = bucket.size(); + size_t len = t.length(); + if(_dc.get() != NULL) { + // Use the difference cover as a tie-breaker if we have it + VMSG_NL(" (Using difference cover)"); + // Can't use the text's 'host' array because the backing + // store for the packed string is not one-char-per-elt. + mkeyQSortSufDcU8(t, t, len, s, slen, *_dc.get(), 4, + this->verbose(), this->sanityCheck()); + } else { + VMSG_NL(" (Not using difference cover)"); + // We don't have a difference cover - just do a normal + // suffix sort + mkeyQSortSuf(t, s, slen, 4, + this->verbose(), this->sanityCheck()); + } +} + +template +struct BinarySortingParam { + const TStr* t; + const EList* sampleSuffs; + EList bucketSzs; + EList bucketReps; + size_t begin; + size_t end; +}; + +template +static void BinarySorting_worker(void *vp) +{ + BinarySortingParam* param = (BinarySortingParam*)vp; + const TStr& t = *(param->t); + size_t len = t.length(); + const EList& sampleSuffs = *(param->sampleSuffs); + EList& bucketSzs = param->bucketSzs; + EList& bucketReps = param->bucketReps; + ASSERT_ONLY(size_t numBuckets = bucketSzs.size()); + size_t begin = param->begin; + size_t end = param->end; + // Iterate through every suffix in the text, determine which + // bucket it falls into by doing a binary search across the + // sorted list of samples, and increment a counter associated + // with that bucket. Also, keep one representative for each + // bucket so that we can split it later. We loop in ten + // stretches so that we can print out a helpful progress + // message. (This step can take a long time.) + for(TIndexOffU i = (TIndexOffU)begin; i < end && i < len; i++) { + TIndexOffU r = binarySASearch(t, i, sampleSuffs); + if(r == std::numeric_limits::max()) continue; // r was one of the samples + assert_lt(r, numBuckets); + bucketSzs[r]++; + assert_lt(bucketSzs[r], len); + if(bucketReps[r] == OFF_MASK || (i & 100) == 0) { + bucketReps[r] = i; // clobbers previous one, but that's OK + } + } +} + +/** + * Select a set of bucket-delineating sample suffixes such that no + * bucket is greater than the requested upper limit. Some care is + * taken to make each bucket's size close to the limit without + * going over. + */ +template +void KarkkainenBlockwiseSA::buildSamples() { + const TStr& t = this->text(); + TIndexOffU bsz = this->bucketSz()-1; // subtract 1 to leave room for sample + size_t len = this->text().length(); + // Prepare _sampleSuffs array + _sampleSuffs.clear(); + TIndexOffU numSamples = (TIndexOffU)((len/bsz)+1)<<1; // ~len/bsz x 2 + assert_gt(numSamples, 0); + VMSG_NL("Reserving space for " << numSamples << " sample suffixes"); + if(this->_passMemExc) { + _sampleSuffs.resizeExact(numSamples); + // Randomly generate samples. Allow duplicates for now. + VMSG_NL("Generating random suffixes"); + for(size_t i = 0; i < numSamples; i++) { +#ifdef BOWTIE_64BIT_INDEX + _sampleSuffs[i] = (TIndexOffU)(_randomSrc.nextU64() % len); +#else + _sampleSuffs[i] = (TIndexOffU)(_randomSrc.nextU32() % len); +#endif + } + } else { + try { + _sampleSuffs.resizeExact(numSamples); + // Randomly generate samples. Allow duplicates for now. + VMSG_NL("Generating random suffixes"); + for(size_t i = 0; i < numSamples; i++) { +#ifdef BOWTIE_64BIT_INDEX + _sampleSuffs[i] = (TIndexOffU)(_randomSrc.nextU64() % len); +#else + _sampleSuffs[i] = (TIndexOffU)(_randomSrc.nextU32() % len); +#endif + } + } catch(bad_alloc &e) { + if(this->_passMemExc) { + throw e; // rethrow immediately + } else { + cerr << "Could not allocate sample suffix container of " << (numSamples * OFF_SIZE) << " bytes." << endl + << "Please try using a smaller number of blocks by specifying a larger --bmax or" << endl + << "a smaller --bmaxdivn" << endl; + throw 1; + } + } + } + // Remove duplicates; very important to do this before the call to + // mkeyQSortSuf so that it doesn't try to calculate lexicographical + // relationships between very long, identical strings, which takes + // an extremely long time in general, and causes the stack to grow + // linearly with the size of the input + { + Timer timer(cout, "QSorting sample offsets, eliminating duplicates time: ", this->verbose()); + VMSG_NL("QSorting " << _sampleSuffs.size() << " sample offsets, eliminating duplicates"); + _sampleSuffs.sort(); + size_t sslen = _sampleSuffs.size(); + for(size_t i = 0; i < sslen-1; i++) { + if(_sampleSuffs[i] == _sampleSuffs[i+1]) { + _sampleSuffs.erase(i--); + sslen--; + } + } + } + // Multikey quicksort the samples + { + Timer timer(cout, " Multikey QSorting samples time: ", this->verbose()); + VMSG_NL("Multikey QSorting " << _sampleSuffs.size() << " samples"); + this->qsort(_sampleSuffs); + } + // Calculate bucket sizes + VMSG_NL("Calculating bucket sizes"); + int limit = 5; + // Iterate until all buckets are less than + while(--limit >= 0) { + TIndexOffU numBuckets = (TIndexOffU)_sampleSuffs.size()+1; + AutoArray threads(this->_nthreads); + EList > tparams; + for(int tid = 0; tid < this->_nthreads; tid++) { + // Calculate bucket sizes by doing a binary search for each + // suffix and noting where it lands + tparams.expand(); + try { + // Allocate and initialize containers for holding bucket + // sizes and representatives. + tparams.back().bucketSzs.resizeExact(numBuckets); + tparams.back().bucketReps.resizeExact(numBuckets); + tparams.back().bucketSzs.fillZero(); + tparams.back().bucketReps.fill(OFF_MASK); + } catch(bad_alloc &e) { + if(this->_passMemExc) { + throw e; // rethrow immediately + } else { + cerr << "Could not allocate sizes, representatives (" << ((numBuckets*8)>>10) << " KB) for blocks." << endl + << "Please try using a smaller number of blocks by specifying a larger --bmax or a" << endl + << "smaller --bmaxdivn." << endl; + throw 1; + } + } + tparams.back().t = &t; + tparams.back().sampleSuffs = &_sampleSuffs; + tparams.back().begin = (tid == 0 ? 0 : len / this->_nthreads * tid); + tparams.back().end = (tid + 1 == this->_nthreads ? len : len / this->_nthreads * (tid + 1)); + if(this->_nthreads == 1) { + BinarySorting_worker((void*)&tparams.back()); + } else { + threads[tid] = new tthread::thread(BinarySorting_worker, (void*)&tparams.back()); + } + } + + if(this->_nthreads > 1) { + for (int tid = 0; tid < this->_nthreads; tid++) { + threads[tid]->join(); + } + } + + EList& bucketSzs = tparams[0].bucketSzs; + EList& bucketReps = tparams[0].bucketReps; + for(int tid = 1; tid < this->_nthreads; tid++) { + for(size_t j = 0; j < numBuckets; j++) { + bucketSzs[j] += tparams[tid].bucketSzs[j]; + if(bucketReps[j] == OFF_MASK) { + bucketReps[j] = tparams[tid].bucketReps[j]; + } + } + } + // Check for large buckets and mergeable pairs of small buckets + // and split/merge as necessary + TIndexOff added = 0; + TIndexOff merged = 0; + assert_eq(bucketSzs.size(), numBuckets); + assert_eq(bucketReps.size(), numBuckets); + { + Timer timer(cout, " Splitting and merging time: ", this->verbose()); + VMSG_NL("Splitting and merging"); + for(TIndexOffU i = 0; i < numBuckets; i++) { + TIndexOffU mergedSz = bsz + 1; + assert(bucketSzs[(size_t)i] == 0 || bucketReps[(size_t)i] != OFF_MASK); + if(i < numBuckets-1) { + mergedSz = bucketSzs[(size_t)i] + bucketSzs[(size_t)i+1] + 1; + } + // Merge? + if(mergedSz <= bsz) { + bucketSzs[(size_t)i+1] += (bucketSzs[(size_t)i]+1); + // The following may look strange, but it's necessary + // to ensure that the merged bucket has a representative + bucketReps[(size_t)i+1] = _sampleSuffs[(size_t)i+added]; + _sampleSuffs.erase((size_t)i+added); + bucketSzs.erase((size_t)i); + bucketReps.erase((size_t)i); + i--; // might go to -1 but ++ will overflow back to 0 + numBuckets--; + merged++; + assert_eq(numBuckets, _sampleSuffs.size()+1-added); + assert_eq(numBuckets, bucketSzs.size()); + } + // Split? + else if(bucketSzs[(size_t)i] > bsz) { + // Add an additional sample from the bucketReps[] + // set accumulated in the binarySASearch loop; this + // effectively splits the bucket + _sampleSuffs.insert(bucketReps[(size_t)i], (TIndexOffU)(i + (added++))); + } + } + } + if(added == 0) { + //if(this->verbose()) { + // cout << "Final bucket sizes:" << endl; + // cout << " (begin): " << bucketSzs[0] << " (" << (int)(bsz - bucketSzs[0]) << ")" << endl; + // for(uint32_t i = 1; i < numBuckets; i++) { + // cout << " " << bucketSzs[i] << " (" << (int)(bsz - bucketSzs[i]) << ")" << endl; + // } + //} + break; + } + // Otherwise, continue until no more buckets need to be + // split + VMSG_NL("Split " << added << ", merged " << merged << "; iterating..."); + } + // Do *not* force a do-over + // if(limit == 0) { + // VMSG_NL("Iterated too many times; trying again..."); + // buildSamples(); + // } + VMSG_NL("Avg bucket size: " << ((double)(len-_sampleSuffs.size()) / (_sampleSuffs.size()+1)) << " (target: " << bsz << ")"); +} + +/** + * Do a simple LCP calculation on two strings. + */ +template inline +static TIndexOffU suffixLcp(const T& t, TIndexOffU aOff, TIndexOffU bOff) { + TIndexOffU c = 0; + size_t len = t.length(); + assert_leq(aOff, len); + assert_leq(bOff, len); + while(aOff + c < len && bOff + c < len && t[aOff + c] == t[bOff + c]) c++; + return c; +} + +/** + * Calculate the lcp between two suffixes using the difference + * cover as a tie-breaker. If the tie-breaker is employed, then + * the calculated lcp may be an underestimate. If the tie-breaker is + * employed, lcpIsSoft will be set to true (otherwise, false). + */ +template inline +bool KarkkainenBlockwiseSA::tieBreakingLcp(TIndexOffU aOff, + TIndexOffU bOff, + TIndexOffU& lcp, + bool& lcpIsSoft) +{ + const TStr& t = this->text(); + TIndexOffU c = 0; + TIndexOffU tlen = (TIndexOffU)t.length(); + assert_leq(aOff, tlen); + assert_leq(bOff, tlen); + assert(_dc.get() != NULL); + uint32_t dcDist = _dc.get()->tieBreakOff(aOff, bOff); + lcpIsSoft = false; // hard until proven soft + while(c < dcDist && // we haven't hit the tie breaker + c < tlen-aOff && // we haven't fallen off of LHS suffix + c < tlen-bOff && // we haven't fallen off of RHS suffix + t[aOff+c] == t[bOff+c]) // we haven't hit a mismatch + c++; + lcp = c; + if(c == tlen-aOff) { + // Fell off LHS (a), a is greater + return false; + } else if(c == tlen-bOff) { + // Fell off RHS (b), b is greater + return true; + } else if(c == dcDist) { + // Hit a tie-breaker element + lcpIsSoft = true; + assert_neq(dcDist, 0xffffffff); + return _dc.get()->breakTie(aOff+c, bOff+c) < 0; + } else { + assert_neq(t[aOff+c], t[bOff+c]); + return t[aOff+c] < t[bOff+c]; + } +} + +/** + * Lookup a suffix LCP in the given z array; if the element is not + * filled in then calculate it from scratch. + */ +template +static TIndexOffU lookupSuffixZ( + const T& t, + TIndexOffU zOff, + TIndexOffU off, + const EList& z) +{ + if(zOff < z.size()) { + TIndexOffU ret = z[zOff]; + assert_eq(ret, suffixLcp(t, off + zOff, off)); + return ret; + } + assert_leq(off + zOff, t.length()); + return suffixLcp(t, off + zOff, off); +} + +/** + * true -> i < cmp + * false -> i > cmp + */ +template inline +bool KarkkainenBlockwiseSA::suffixCmp( + TIndexOffU cmp, + TIndexOffU i, + int64_t& j, + int64_t& k, + bool& kSoft, + const EList& z) +{ + const TStr& t = this->text(); + TIndexOffU len = (TIndexOffU)t.length(); + // i is not covered by any previous match + TIndexOffU l; + if((int64_t)i > k) { + k = i; // so that i + lHi == kHi + l = 0; // erase any previous l + kSoft = false; + // To be extended + } + // i is covered by a previous match + else /* i <= k */ { + assert_gt((int64_t)i, j); + TIndexOffU zIdx = (TIndexOffU)(i-j); + assert_leq(zIdx, len-cmp); + if(zIdx < _dcV || _dc.get() == NULL) { + // Go as far as the Z-box says + l = lookupSuffixZ(t, zIdx, cmp, z); + if(i + l > len) { + l = len-i; + } + assert_leq(i + l, len); + // Possibly to be extended + } else { + // But we're past the point of no-more-Z-boxes + bool ret = tieBreakingLcp(i, cmp, l, kSoft); + // Sanity-check tie-breaker + if(this->sanityCheck()) { + if(ret) assert(sstr_suf_lt(t, i, t, cmp, false)); + else assert(sstr_suf_gt(t, i, t, cmp, false)); + } + j = i; + k = i + l; + if(this->sanityCheck()) { + if(kSoft) { assert_leq(l, suffixLcp(t, i, cmp)); } + else { assert_eq (l, suffixLcp(t, i, cmp)); } + } + return ret; + } + } + + // Z box extends exactly as far as previous match (or there + // is neither a Z box nor a previous match) + if((int64_t)(i + l) == k) { + // Extend + while(l < len-cmp && k < (int64_t)len && t[(size_t)(cmp+l)] == t[(size_t)k]) { + k++; l++; + } + j = i; // update furthest-extending LHS + kSoft = false; + assert_eq(l, suffixLcp(t, i, cmp)); + } + // Z box extends further than previous match + else if((int64_t)(i + l) > k) { + l = (TIndexOffU)(k - i); // point to just after previous match + j = i; // update furthest-extending LHS + if(kSoft) { + while(l < len-cmp && k < (int64_t)len && t[(size_t)(cmp+l)] == t[(size_t)k]) { + k++; l++; + } + kSoft = false; + assert_eq(l, suffixLcp(t, i, cmp)); + } else assert_eq(l, suffixLcp(t, i, cmp)); + } + + // Check that calculated lcp matches actual lcp + if(this->sanityCheck()) { + if(!kSoft) { + // l should exactly match lcp + assert_eq(l, suffixLcp(t, i, cmp)); + } else { + // l is an underestimate of LCP + assert_leq(l, suffixLcp(t, i, cmp)); + } + } + assert_leq(l+i, len); + assert_leq(l, len-cmp); + + // i and cmp should not be the same suffix + assert(l != len-cmp || i+l != len); + + // Now we're ready to do a comparison on the next char + if(l+i != len && ( + l == len-cmp || // departure from paper algorithm: + // falling off pattern implies + // pattern is *greater* in our case + t[i + l] < t[cmp + l])) + { + // Case 2: Text suffix is less than upper sample suffix +#ifndef NDEBUG + if(this->sanityCheck()) { + assert(sstr_suf_lt(t, i, t, cmp, false)); + } +#endif + return true; // suffix at i is less than suffix at cmp + } + else { + // Case 3: Text suffix is greater than upper sample suffix +#ifndef NDEBUG + if(this->sanityCheck()) { + assert(sstr_suf_gt(t, i, t, cmp, false)); + } +#endif + return false; // suffix at i is less than suffix at cmp + } +} + +/** + * Retrieve the next block. This is the most performance-critical part + * of the blockwise suffix sorting process. + */ +template +void KarkkainenBlockwiseSA::nextBlock(int cur_block, int tid) { +#ifndef NDEBUG + if(this->_nthreads > 1) { + assert_lt(tid, this->_itrBuckets.size()); + } +#endif + EList& bucket = (this->_nthreads > 1 ? this->_itrBuckets[tid] : this->_itrBucket); + { + ThreadSafe ts(&_mutex, this->_nthreads > 1); + VMSG_NL("Getting block " << (cur_block+1) << " of " << _sampleSuffs.size()+1); + } + assert(_built); + assert_gt(_dcV, 3); + assert_leq(cur_block, _sampleSuffs.size()); + const TStr& t = this->text(); + TIndexOffU len = (TIndexOffU)t.length(); + // Set up the bucket + bucket.clear(); + TIndexOffU lo = OFF_MASK, hi = OFF_MASK; + if(_sampleSuffs.size() == 0) { + // Special case: if _sampleSuffs is 0, then multikey-quicksort + // everything + { + ThreadSafe ts(&_mutex, this->_nthreads > 1); + VMSG_NL(" No samples; assembling all-inclusive block"); + } + assert_eq(0, cur_block); + try { + if(bucket.capacity() < this->bucketSz()) { + bucket.reserveExact(len+1); + } + bucket.resize(len); + for(TIndexOffU i = 0; i < len; i++) { + bucket[i] = i; + } + } catch(bad_alloc &e) { + if(this->_passMemExc) { + throw e; // rethrow immediately + } else { + cerr << "Could not allocate a master suffix-array block of " << ((len+1) * 4) << " bytes" << endl + << "Please try using a larger number of blocks by specifying a smaller --bmax or" << endl + << "a larger --bmaxdivn" << endl; + throw 1; + } + } + } else { + try { + { + ThreadSafe ts(&_mutex, this->_nthreads > 1); + VMSG_NL(" Reserving size (" << this->bucketSz() << ") for bucket " << (cur_block+1)); + } + // BTL: Add a +100 fudge factor; there seem to be instances + // where a bucket ends up having one more elt than bucketSz() + if(bucket.size() < this->bucketSz()+100) { + bucket.reserveExact(this->bucketSz()+100); + } + } catch(bad_alloc &e) { + if(this->_passMemExc) { + throw e; // rethrow immediately + } else { + cerr << "Could not allocate a suffix-array block of " << ((this->bucketSz()+1) * 4) << " bytes" << endl; + cerr << "Please try using a larger number of blocks by specifying a smaller --bmax or" << endl + << "a larger --bmaxdivn" << endl; + throw 1; + } + } + // Select upper and lower bounds from _sampleSuffs[] and + // calculate the Z array up to the difference-cover periodicity + // for both. Be careful about first/last buckets. + EList zLo(EBWTB_CAT), zHi(EBWTB_CAT); + assert_geq(cur_block, 0); + assert_leq((size_t)cur_block, _sampleSuffs.size()); + bool first = (cur_block == 0); + bool last = ((size_t)cur_block == _sampleSuffs.size()); + try { + // Timer timer(cout, " Calculating Z arrays time: ", this->verbose()); + { + ThreadSafe ts(&_mutex, this->_nthreads > 1); + VMSG_NL(" Calculating Z arrays for bucket " << (cur_block+1)); + } + if(!last) { + // Not the last bucket + assert_lt(cur_block, _sampleSuffs.size()); + hi = _sampleSuffs[cur_block]; + zHi.resizeExact(_dcV); + zHi.fillZero(); + assert_eq(zHi[0], 0); + calcZ(t, hi, zHi, this->verbose(), this->sanityCheck()); + } + if(!first) { + // Not the first bucket + assert_gt(cur_block, 0); + assert_leq(cur_block, _sampleSuffs.size()); + lo = _sampleSuffs[cur_block-1]; + zLo.resizeExact(_dcV); + zLo.fillZero(); + assert_gt(_dcV, 3); + assert_eq(zLo[0], 0); + calcZ(t, lo, zLo, this->verbose(), this->sanityCheck()); + } + } catch(bad_alloc &e) { + if(this->_passMemExc) { + throw e; // rethrow immediately + } else { + cerr << "Could not allocate a z-array of " << (_dcV * 4) << " bytes" << endl; + cerr << "Please try using a larger number of blocks by specifying a smaller --bmax or" << endl + << "a larger --bmaxdivn" << endl; + throw 1; + } + } + + // This is the most critical loop in the algorithm; this is where + // we iterate over all suffixes in the text and pick out those that + // fall into the current bucket. + // + // This loop is based on the SMALLERSUFFIXES function outlined on + // p7 of the "Fast BWT" paper + // + int64_t kHi = -1, kLo = -1; + int64_t jHi = -1, jLo = -1; + bool kHiSoft = false, kLoSoft = false; + assert_eq(0, bucket.size()); + { + // Timer timer(cout, " Block accumulator loop time: ", this->verbose()); + { + ThreadSafe ts(&_mutex, this->_nthreads > 1); + VMSG_NL(" Entering block accumulator loop for bucket " << (cur_block+1) << ":"); + } + TIndexOffU lenDiv10 = (len + 9) / 10; + for(TIndexOffU iten = 0, ten = 0; iten < len; iten += lenDiv10, ten++) { + TIndexOffU itenNext = iten + lenDiv10; + { + ThreadSafe ts(&_mutex, this->_nthreads > 1); + if(ten > 0) VMSG_NL(" bucket " << (cur_block+1) << ": " << (ten * 10) << "%"); + } + for(TIndexOffU i = iten; i < itenNext && i < len; i++) { + assert_lt(jLo, (int64_t)i); assert_lt(jHi, (int64_t)i); + // Advance the upper-bound comparison by one character + if(i == hi || i == lo) continue; // equal to one of the bookends + if(hi != OFF_MASK && !suffixCmp(hi, i, jHi, kHi, kHiSoft, zHi)) { + continue; // not in the bucket + } + if(lo != OFF_MASK && suffixCmp(lo, i, jLo, kLo, kLoSoft, zLo)) { + continue; // not in the bucket + } + // In the bucket! - add it + assert_lt(i, len); + try { + bucket.push_back(i); + } catch(bad_alloc &e) { + cerr << "Could not append element to block of " << ((bucket.size()) * OFF_SIZE) << " bytes" << endl; + if(this->_passMemExc) { + throw e; // rethrow immediately + } else { + cerr << "Please try using a larger number of blocks by specifying a smaller --bmax or" << endl + << "a larger --bmaxdivn" << endl; + throw 1; + } + } + // Not necessarily true; we allow overflowing buckets + // since we can't guarantee that a good set of sample + // suffixes can be found in a reasonable amount of time + //assert_lt(bucket.size(), this->bucketSz()); + } + } // end loop over all suffixes of t + { + ThreadSafe ts(&_mutex, this->_nthreads > 1); + VMSG_NL(" bucket " << (cur_block+1) << ": 100%"); + } + } + } // end else clause of if(_sampleSuffs.size() == 0) + // Sort the bucket + if(bucket.size() > 0) { + Timer timer(cout, " Sorting block time: ", this->verbose()); + { + ThreadSafe ts(&_mutex, this->_nthreads > 1); + VMSG_NL(" Sorting block of length " << bucket.size() << " for bucket " << (cur_block+1)); + } + this->qsort(bucket); + } + if(hi != OFF_MASK) { + // Not the final bucket; throw in the sample on the RHS + bucket.push_back(hi); + } else { + // Final bucket; throw in $ suffix + bucket.push_back(len); + } + { + ThreadSafe ts(&_mutex, this->_nthreads > 1); + VMSG_NL("Returning block of " << bucket.size() << " for bucket " << (cur_block+1)); + } +} + +#endif /*BLOCKWISE_SA_H_*/ diff --git a/bp_aligner.h b/bp_aligner.h new file mode 100644 index 0000000..15b6836 --- /dev/null +++ b/bp_aligner.h @@ -0,0 +1,1237 @@ +/* + * Copyright 2014, Daehwan Kim + * + * This file is part of HISAT. + * + * HISAT is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT. If not, see . + */ + +#ifndef BP_ALIGNER_H_ +#define BP_ALIGNER_H_ + +#include "hi_aligner.h" + +/** + * With a hierarchical indexing, SplicedAligner provides several alignment strategies + * , which enable effective alignment of RNA-seq reads + */ +template +class BP_Aligner : public HI_Aligner { + +public: + /** + * Initialize with index. + */ + BP_Aligner( + const Ebwt& ebwt, + const EList& refnames, + MUTEX_T* mutex, + uint64_t threads_rids_mindist = 0, + bool no_spliced_alignment = false) : + HI_Aligner(ebwt, + threads_rids_mindist, + no_spliced_alignment), + _refnames(refnames), + _mutex(mutex), + _done(false) + { + } + + ~BP_Aligner() { + } + + /** + * Aligns a read or a pair + * This funcion is called per read or pair + */ + virtual + int go( + const Scoring& sc, + const Ebwt& ebwtFw, + const Ebwt& ebwtBw, + const BitPairReference& ref, + SwAligner& swa, + SpliceSiteDB& ssdb, + WalkMetrics& wlm, + PerReadMetrics& prm, + SwMetrics& swm, + HIMetrics& him, + RandomSource& rnd, + AlnSinkWrap& sink) + { + _done = false; + index_t rdi; + bool fw; + while(this->nextBWT(sc, ebwtFw, ebwtBw, ref, rdi, fw, wlm, prm, him, rnd, sink)) { + // given the partial alignment, try to extend it to full alignments + this->align(sc, ebwtFw, ebwtBw, ref, swa, ssdb, rdi, fw, wlm, prm, swm, him, rnd, sink); + if(_done) break; + } + + return EXTEND_POLICY_FULFILLED; + } + + /** + * Given a partial alignment of a read, try to further extend + * the alignment bidirectionally using a combination of + * local search, extension, and global search + */ + virtual + void hybridSearch( + const Scoring& sc, + const Ebwt& ebwtFw, + const Ebwt& ebwtBw, + const BitPairReference& ref, + SwAligner& swa, + SpliceSiteDB& ssdb, + index_t rdi, + bool fw, + WalkMetrics& wlm, + PerReadMetrics& prm, + SwMetrics& swm, + HIMetrics& him, + RandomSource& rnd, + AlnSinkWrap& sink); + + /** + * Given a partial alignment of a read, try to further extend + * the alignment bidirectionally using a combination of + * local search, extension, and global search + */ + virtual + void hybridSearch_recur( + const Scoring& sc, + const Ebwt& ebwtFw, + const Ebwt& ebwtBw, + const BitPairReference& ref, + SwAligner& swa, + SpliceSiteDB& ssdb, + index_t rdi, + const GenomeHit& hit, + index_t hitoff, + index_t hitlen, + WalkMetrics& wlm, + PerReadMetrics& prm, + SwMetrics& swm, + HIMetrics& him, + RandomSource& rnd, + AlnSinkWrap& sink, + index_t dep = 0); + +private: + EList _refnames; + MUTEX_T* _mutex; + bool _done; +}; + +/** + * Given a partial alignment of a read, try to further extend + * the alignment bidirectionally using a combination of + * local search, extension, and global search + */ +template +void BP_Aligner::hybridSearch( + const Scoring& sc, + const Ebwt& ebwtFw, + const Ebwt& ebwtBw, + const BitPairReference& ref, + SwAligner& swa, + SpliceSiteDB& ssdb, + index_t rdi, + bool fw, + WalkMetrics& wlm, + PerReadMetrics& prm, + SwMetrics& swm, + HIMetrics& him, + RandomSource& rnd, + AlnSinkWrap& sink) +{ + assert_lt(rdi, 2); + assert(this->_rds[rdi] != NULL); + him.localatts++; + + // before further alignment using local search, extend the partial alignments directly + // by comparing with the corresponding genomic sequences + // this extension is performed without any mismatches allowed + for(index_t hi = 0; hi < this->_genomeHits.size(); hi++) { + GenomeHit& genomeHit = this->_genomeHits[hi]; + index_t leftext = (index_t)OFF_MASK, rightext = (index_t)OFF_MASK; + genomeHit.extend(*(this->_rds[rdi]), ref, ssdb, swa, swm, prm, sc, this->_minsc[rdi], rnd, this->_minK_local, leftext, rightext); + } + + // for the candidate alignments, examine the longest (best) one first + this->_genomeHits_done.resize(this->_genomeHits.size()); + this->_genomeHits_done.fill(false); + for(size_t hi = 0; hi < this->_genomeHits.size(); hi++) { + index_t hj = 0; + for(; hj < this->_genomeHits.size(); hj++) { + if(!this->_genomeHits_done[hj]) break; + } + if(hj >= this->_genomeHits.size()) break; + for(index_t hk = hj + 1; hk < this->_genomeHits.size(); hk++) { + if(this->_genomeHits_done[hk]) continue; + GenomeHit& genomeHit_j = this->_genomeHits[hj]; + GenomeHit& genomeHit_k = this->_genomeHits[hk]; + if(genomeHit_k.hitcount() > genomeHit_j.hitcount() || + (genomeHit_k.hitcount() == genomeHit_j.hitcount() && genomeHit_k.len() > genomeHit_j.len())) { + hj = hk; + } + } + + // given a candidate partial alignment, extend it bidirectionally + him.anchoratts++; + GenomeHit& genomeHit = this->_genomeHits[hj]; + hybridSearch_recur( + sc, + ebwtFw, + ebwtBw, + ref, + swa, + ssdb, + rdi, + genomeHit, + genomeHit.rdoff(), + genomeHit.len(), + wlm, + prm, + swm, + him, + rnd, + sink); + this->_genomeHits_done[hj] = true; + if(_done) return; + } +} + + +/** + * Given a partial alignment of a read, try to further extend + * the alignment bidirectionally using a combination of + * local search, extension, and global search + */ +template +void BP_Aligner::hybridSearch_recur( + const Scoring& sc, + const Ebwt& ebwtFw, + const Ebwt& ebwtBw, + const BitPairReference& ref, + SwAligner& swa, + SpliceSiteDB& ssdb, + index_t rdi, + const GenomeHit& hit, + index_t hitoff, + index_t hitlen, + WalkMetrics& wlm, + PerReadMetrics& prm, + SwMetrics& swm, + HIMetrics& him, + RandomSource& rnd, + AlnSinkWrap& sink, + index_t dep) +{ + if(_done) return; + him.localsearchrecur++; + assert_lt(rdi, 2); + assert(this->_rds[rdi] != NULL); + const Read& rd = *(this->_rds[rdi]); + index_t rdlen = rd.length(); + if(hit.score() < this->_minsc[rdi]) return; + + // if it's already examined, just return + if(hitoff == hit.rdoff() - hit.trim5() && hitlen == hit.len() + hit.trim5() + hit.trim3()) { + if(this->isSearched(hit, rdi)) return; + this->addSearched(hit, rdi); + } + + // for effective use of memory allocation and deallocation + if(this->_coords.size() <= dep) { + this->_coords.expand(); + assert_leq(this->_local_genomeHits.size(), dep); + this->_local_genomeHits.expand(); + assert_leq(this->_spliceSites.size(), dep); + this->_spliceSites.expand(); + } + EList& coords = this->_coords[dep]; + EList >& local_genomeHits = this->_local_genomeHits[dep]; + EList& spliceSites = this->_spliceSites[dep]; + + // daehwan - for debugging purposes +#if 0 + cout << rd.name << "\t" + << (hit.fw() ? "+" : "-") << "\t" + << hitoff << "\t" + << hitoff + hitlen << "\t" + << "( " << hit.rdoff() << "\t" + << hit.rdoff() + hit.len() << " )" << "\t" + << hit.refoff() << "\t" + << hit.getRightOff() << "\t" + << hit.score() << "\t" + << "dep: " << dep << "\t"; + Edit::print(cout, hit.edits()); + cout << endl; +#endif + + assert_leq(hitoff + hitlen, rdlen); + // if this is a full alignment, report it + if(hitoff == 0 && hitlen == rdlen) { + if(!this->redundant(sink, rdi, hit)) { + // this->reportHit(sc, ebwtFw, ref, sink, rdi, hit); + return; + } + } else if(hitoff > 0 && (hitoff + hitlen == rdlen || hitoff + hitoff < rdlen - hitlen)) { + // extend the partial alignment in the left direction + index_t fragoff = 0, fraglen = 0, left = 0; + hit.getLeft(fragoff, fraglen, left); + const index_t minMatchLen = this->_minK_local; + // make use of a list of known or novel splice sites to further align the read + if(fraglen >= minMatchLen && left >= minMatchLen && !this->_no_spliced_alignment) { + spliceSites.clear(); + ssdb.getLeftSpliceSites(hit.ref(), left + minMatchLen, minMatchLen * 2, spliceSites); + for(size_t si = 0; si < spliceSites.size(); si++) { + const SpliceSite& ss = spliceSites[si]; + if(!ss._fromfile && ss._readid + this->_thread_rids_mindist > rd.rdid) continue; + if(left + fraglen - 1 < ss.right()) continue; + index_t frag2off = ss.left() - (ss.right() - left); + if(frag2off + 1 < hitoff) continue; + GenomeHit tempHit; + tempHit.init(hit.fw(), + 0, + hitoff, + 0, // trim5 + 0, // trim3 + hit.ref(), + frag2off + 1 - hitoff, + this->_sharedVars); + if(!tempHit.compatibleWith(hit, this->_no_spliced_alignment)) continue; + int64_t minsc = this->_minsc[rdi]; + bool combined = tempHit.combineWith(hit, rd, ref, ssdb, swa, swm, sc, minsc, rnd, this->_minK_local, 1, 1, false); + if(rdi == 0) minsc = max(minsc, sink.bestUnp1()); + else minsc = max(minsc, sink.bestUnp2()); + if(combined && tempHit.score() >= minsc) { + assert_eq(tempHit.trim5(), 0); + assert_leq(tempHit.rdoff() + tempHit.len() + tempHit.trim3(), rdlen); + hybridSearch_recur( + sc, + ebwtFw, + ebwtBw, + ref, + swa, + ssdb, + rdi, + tempHit, + tempHit.rdoff(), + tempHit.len() + tempHit.trim3(), + wlm, + prm, + swm, + him, + rnd, + sink, + dep + 1); + } + } + } + + // choose a local index based on the genomic location of the partial alignment + const HierEbwt* hierEbwtFw = (const HierEbwt*)(&ebwtFw); + const LocalEbwt* localEbwtFw = hierEbwtFw->getLocalEbwt(hit.ref(), hit.refoff()); + assert_leq(localEbwtFw->_localOffset, hit.refoff()); + bool success = false, first = true; + index_t count = 0; + // consider at most two local indexes + index_t max_count = 2; + int64_t prev_score = hit.score(); + local_genomeHits.clear(); + while(!success && count++ < max_count) { + if(him.localindexatts >= this->max_localindexatts) return; + if(first) { + first = false; + } else { + localEbwtFw = hierEbwtFw->prevLocalEbwt(localEbwtFw); + if(localEbwtFw == NULL || localEbwtFw->empty()) break; + } + // local index search + index_t extlen = 0; + local_index_t top = (local_index_t)OFF_MASK, bot = (local_index_t)OFF_MASK; + index_t extoff = hitoff - 1; + if(extoff > 0) extoff -= 1; + if(extoff < minAnchorLen) { + extoff = minAnchorLen; + } + index_t nelt = (index_t)OFF_MASK; + index_t max_nelt = std::max(5, extlen); + bool no_extension = false; + bool uniqueStop; + // daehwan - for debugging purposes + // index_t minUniqueLen = this->_minK_local; + index_t minUniqueLen = (index_t)OFF_MASK; + for(; extoff < rdlen; extoff++) { + extlen = 0; + // daehwan - for debugging purposes + // uniqueStop = true; + uniqueStop = false; + him.localindexatts++; + nelt = this->localEbwtSearch( + localEbwtFw, // BWT index + NULL, // BWT index + rd, // read to align + sc, // scoring scheme + hit.fw(), + false, // searchfw, + extoff, + extlen, + top, + bot, + rnd, + uniqueStop, + minUniqueLen); + if(extoff + 1 - extlen >= hitoff) { + no_extension = true; + break; + } + if(nelt <= max_nelt) break; + } + assert_leq(top, bot); + assert_eq(nelt, (index_t)(bot - top)); + assert_leq(extlen, extoff + 1); + if(nelt > 0 && + nelt <= max_nelt && + extlen >= minAnchorLen && + !no_extension) { + assert_leq(nelt, max_nelt); + coords.clear(); + bool straddled = false; + // get genomic locations for this local search + this->getGenomeCoords_local( + *localEbwtFw, + ref, + rnd, + top, + bot, + hit.fw(), + extoff + 1 - extlen, + extlen, + coords, + wlm, + prm, + him, + true, // reject straddled? + straddled); + assert_leq(coords.size(), nelt); + coords.sort(); + for(int ri = coords.size() - 1; ri >= 0; ri--) { + const Coord& coord = coords[ri]; + GenomeHit tempHit; + tempHit.init(coord.orient(), + extoff + 1 - extlen, + extlen, + 0, // trim5 + 0, // trim3 + coord.ref(), + coord.off(), + this->_sharedVars); + + // daehwan - for debugging purposes + if(coord.ref() == hit.ref() && + coord.off() > hit.refoff() && + coord.off() < hit.refoff() + 64000) { + index_t leftext = (index_t)OFF_MASK, rightext = (index_t)0; + index_t mm = 1; + tempHit.extend( + rd, + ref, + ssdb, + swa, + swm, + prm, + sc, + this->_minsc[rdi], + rnd, + this->_minK_local, + leftext, + rightext, + mm); + +#if 0 + cout << endl; + cout << rd.rdid << "\t" << rd.name << endl; + cout << "\ttype: " << 1 << endl; + cout << "\t" << hit.ref() << endl; + cout << "\t\ttempHit " << (tempHit.fw() ? "+" : "-") << "\t" << tempHit.refoff() << "\t" << tempHit.rdoff() << "\t" << tempHit.len() << "\t" << tempHit.score() << endl; + cout << "\t\tanchHit " << (hit.fw() ? "+" : "-") << "\t" << hit.refoff() << "\t" << hit.rdoff() << "\t" << hit.len() << "\t" << hit.score() << endl; + + spliceSites.clear(); + ssdb.getRightSpliceSites(hit.ref(), hit.refoff() - minMatchLen, minMatchLen * 2, spliceSites); + for(size_t si = 0; si < spliceSites.size(); si++) { + const SpliceSite& ss = spliceSites[si]; + if(ss.right() > tempHit.refoff()) { + index_t dist = ss.right() - 1 - (tempHit.refoff() + tempHit.len() - 1); + cout << rd.rdid << "s\t\t\t" << ss.left() + 1 << "\t" << ss.right() - 1 << "\t" << (ss.fw() ? "+" : "-") << "\t" << dist << endl; + } + } + + spliceSites.clear(); + ssdb.getLeftSpliceSites(tempHit.ref(), tempHit.getRightOff() + minMatchLen, minMatchLen * 2, spliceSites); + for(size_t si = 0; si < spliceSites.size(); si++) { + const SpliceSite& ss = spliceSites[si]; + if(ss.left() < hit.refoff()) { + index_t dist = hit.refoff() - (ss.left() + 1); + cout << rd.rdid << "s\t\t\t" << ss.left() + 1 << "\t" << ss.right() - 1 << "\t" << (ss.fw() ? "+" : "-") << "\t" << dist << endl; + } + } + + if(tempHit.rdoff() + tempHit.len() + 1 >= hit.rdoff()) return; +#else + assert_lt(hit.ref(), _refnames.size()); + { + ThreadSafe t(const_cast(_mutex), true); + cout << rd.name << "\t" + << _refnames[hit.ref()] << "\t" + << (tempHit.fw() ? "+" : "-") << "\t" << tempHit.refoff() << "\t" << tempHit.rdoff() << "\t" << tempHit.len() << "\t" << tempHit.score() << "\t" + << (hit.fw() ? "+" : "-") << "\t" << hit.refoff() << "\t" << hit.rdoff() << "\t" << hit.len() << "\t" << hit.score() + << endl; + } + _done = true; + return; +#endif + } + + // check if the partial alignment is compatible with the new alignment using the local index + if(!tempHit.compatibleWith(hit, this->_no_spliced_alignment)) { + if(count == 1) continue; + else break; + } + if(uniqueStop) { + assert_eq(coords.size(), 1); + index_t leftext = (index_t)OFF_MASK, rightext = (index_t)0; + tempHit.extend(rd, ref, ssdb, swa, swm, prm, sc, this->_minsc[rdi], rnd, this->_minK_local, leftext, rightext); + } + // combine the partial alignment and the new alignment + int64_t minsc = this->_minsc[rdi]; + bool combined = tempHit.combineWith(hit, rd, ref, ssdb, swa, swm, sc, minsc, rnd, this->_minK_local); + if(rdi == 0) minsc = max(minsc, sink.bestUnp1()); + else minsc = max(minsc, sink.bestUnp2()); + if(combined && tempHit.score() >= minsc) { + assert_eq(tempHit.trim5(), 0); + assert_leq(tempHit.rdoff() + tempHit.len() + tempHit.trim3(), rdlen); + if(tempHit.score() >= prev_score - sc.mmpMax) { + // extend the new partial alignment recursively + hybridSearch_recur( + sc, + ebwtFw, + ebwtBw, + ref, + swa, + ssdb, + rdi, + tempHit, + tempHit.rdoff(), + tempHit.len() + tempHit.trim3(), + wlm, + prm, + swm, + him, + rnd, + sink, + dep + 1); + } else { + local_genomeHits.push_back(tempHit); + } + } + } + } + int64_t minsc = (rdi == 0 ? sink.bestUnp1() : sink.bestUnp2()); + if(minsc >= prev_score - sc.mmpMax) success = true; + if(!success && (count == max_count || hierEbwtFw->prevLocalEbwt(localEbwtFw) == NULL)) { + for(index_t ti = 0; ti < local_genomeHits.size(); ti++) { + GenomeHit& tempHit = local_genomeHits[ti]; + int64_t minsc = this->_minsc[rdi]; + if(rdi == 0) minsc = max(minsc, sink.bestUnp1()); + else minsc = max(minsc, sink.bestUnp2()); + if(tempHit.score() >= minsc) { + hybridSearch_recur( + sc, + ebwtFw, + ebwtBw, + ref, + swa, + ssdb, + rdi, + tempHit, + tempHit.rdoff(), + tempHit.len() + tempHit.trim3(), + wlm, + prm, + swm, + him, + rnd, + sink, + dep + 1); + } + } + } + } // while(!success && count++ < 2) + + if(!success) { + if(hitoff > this->_minK) { + index_t extlen = 0; + index_t top = (index_t)OFF_MASK, bot = (index_t)OFF_MASK; + index_t extoff = hitoff - 1; + bool uniqueStop = true; + // perform global search for long introns + index_t nelt = this->globalEbwtSearch( + ebwtFw, // BWT index + rd, // read to align + sc, // scoring scheme + hit.fw(), + extoff, + extlen, + top, + bot, + rnd, + uniqueStop); + if(nelt <= 5 && extlen >= this->_minK) { + coords.clear(); + bool straddled = false; + this->getGenomeCoords( + ebwtFw, + ref, + rnd, + top, + bot, + hit.fw(), + bot - top, + extoff + 1 - extlen, + extlen, + coords, + wlm, + prm, + him, + true, // reject straddled? + straddled); + assert_leq(coords.size(), nelt); + coords.sort(); + for(int ri = coords.size() - 1; ri >= 0; ri--) { + const Coord& coord = coords[ri]; + GenomeHit tempHit; + tempHit.init(coord.orient(), + extoff + 1 - extlen, + extlen, + 0, // trim5 + 0, // trim3 + coord.ref(), + coord.off(), + this->_sharedVars); + if(!tempHit.compatibleWith(hit, this->_no_spliced_alignment)) continue; + if(uniqueStop) { + assert_eq(coords.size(), 1); + index_t leftext = (index_t)OFF_MASK, rightext = (index_t)0; + tempHit.extend(rd, ref, ssdb, swa, swm, prm, sc, this->_minsc[rdi], rnd, this->_minK_local, leftext, rightext); + } + int64_t minsc = this->_minsc[rdi]; + bool combined = tempHit.combineWith(hit, rd, ref, ssdb, swa, swm, sc, minsc, rnd, this->_minK_local); + if(rdi == 0) minsc = max(minsc, sink.bestUnp1()); + else minsc = max(minsc, sink.bestUnp2()); + if(combined && tempHit.score() >= minsc) { + assert_eq(tempHit.trim5(), 0); + assert_leq(tempHit.rdoff() + tempHit.len() + tempHit.trim3(), rdlen); + hybridSearch_recur( + sc, + ebwtFw, + ebwtBw, + ref, + swa, + ssdb, + rdi, + tempHit, + tempHit.rdoff(), + tempHit.len() + tempHit.trim3(), + wlm, + prm, + swm, + him, + rnd, + sink, + dep + 1); + } + } + } + } + GenomeHit tempHit = hit; + if(tempHit.rdoff() <= 5) { + index_t trim5 = tempHit.rdoff(); + tempHit.trim5(trim5); + assert_leq(tempHit.len() + tempHit.trim5() + tempHit.trim3(), rdlen); + hybridSearch_recur( + sc, + ebwtFw, + ebwtBw, + ref, + swa, + ssdb, + rdi, + tempHit, + 0, + tempHit.len() + tempHit.trim5() + tempHit.trim3(), + wlm, + prm, + swm, + him, + rnd, + sink, + dep + 1); + return; + } + // extend the partial alignment directly comparing with the corresponding genomic sequence + // with mismatches or a gap allowed + int64_t minsc = this->_minsc[rdi]; + assert_geq(tempHit.score(), minsc); + index_t mm = (tempHit.score() - minsc) / sc.mmpMax; + index_t leftext = (index_t)OFF_MASK, rightext = (index_t)0; + index_t num_mismatch_allowed = 1; + if(hitoff <= this->_minK_local) { + num_mismatch_allowed = min(tempHit.rdoff(), mm); + } + him.localextatts++; + tempHit.extend(rd, ref, ssdb, swa, swm, prm, sc, this->_minsc[rdi], rnd, this->_minK_local, leftext, rightext, num_mismatch_allowed); + if(rdi == 0) minsc = max(minsc, sink.bestUnp1()); + else minsc = max(minsc, sink.bestUnp2()); + if(tempHit.score() >= minsc && leftext >= min(this->_minK_local, hit.rdoff())) { + assert_eq(tempHit.trim5(), 0); + assert_leq(tempHit.rdoff() + tempHit.len() + tempHit.trim3(), rdlen); + hybridSearch_recur( + sc, + ebwtFw, + ebwtBw, + ref, + swa, + ssdb, + rdi, + tempHit, + tempHit.rdoff(), + tempHit.len() + tempHit.trim3(), + wlm, + prm, + swm, + him, + rnd, + sink, + dep + 1); + } else if(hitoff > this->_minK_local) { + // skip some bases of a read + index_t jumplen = hitoff > this->_minK ? this->_minK : this->_minK_local; + assert_leq(hitoff, hit.rdoff()); + int64_t expected_score = hit.score() - (hit.rdoff() - hitoff) / jumplen * sc.mmpMax - sc.mmpMax; + if(expected_score >= minsc) { + assert_lt(hitlen + jumplen, rdlen); + assert_eq(hit.trim5(), 0); + assert_leq(hitoff + hitlen, rdlen); + hybridSearch_recur( + sc, + ebwtFw, + ebwtBw, + ref, + swa, + ssdb, + rdi, + hit, + hitoff - jumplen, + hitlen + jumplen, + wlm, + prm, + swm, + him, + rnd, + sink, + dep + 1); + } + } + } + } else { + // extend the partial alignment in the right direction + assert_lt(hitoff + hitlen, rdlen); + index_t fragoff = 0, fraglen = 0, right = 0; + hit.getRight(fragoff, fraglen, right); + const index_t minMatchLen = this->_minK_local; + // make use of a list of known or novel splice sites to further align the read + if(fraglen >= minMatchLen && !this->_no_spliced_alignment) { + spliceSites.clear(); + assert_gt(fraglen, 0); + ssdb.getRightSpliceSites(hit.ref(), right + fraglen - minMatchLen, minMatchLen * 2, spliceSites); + for(size_t si = 0; si < spliceSites.size(); si++) { + const SpliceSite& ss = spliceSites[si]; + if(!ss._fromfile && ss._readid + this->_thread_rids_mindist > rd.rdid) continue; + if(right > ss.left()) continue; + index_t frag2off = ss.right() - ss.left() + right + fraglen - 1; + GenomeHit tempHit; + tempHit.init(hit.fw(), + fragoff + fraglen, + rdlen - fragoff - fraglen, + 0, // trim5 + 0, // trim3 + hit.ref(), + frag2off, + this->_sharedVars); + if(!hit.compatibleWith(tempHit, this->_no_spliced_alignment)) continue; + GenomeHit combinedHit = hit; + int64_t minsc = this->_minsc[rdi]; + bool combined = combinedHit.combineWith(tempHit, rd, ref, ssdb, swa, swm, sc, minsc, rnd, this->_minK_local, 1, 1, false); + if(rdi == 0) minsc = max(minsc, sink.bestUnp1()); + else minsc = max(minsc, sink.bestUnp2()); + if(combined && combinedHit.score() >= minsc) { + assert_leq(combinedHit.trim5(), combinedHit.rdoff()); + assert_eq(combinedHit.rdoff() + combinedHit.len(), rdlen); + hybridSearch_recur( + sc, + ebwtFw, + ebwtBw, + ref, + swa, + ssdb, + rdi, + combinedHit, + combinedHit.rdoff() - combinedHit.trim5(), + combinedHit.len() + combinedHit.trim5(), + wlm, + prm, + swm, + him, + rnd, + sink, + dep + 1); + } + } + } + + // choose a local index based on the genomic location of the partial alignment + const HierEbwt* hierEbwtFw = (const HierEbwt*)(&ebwtFw); + const LocalEbwt* localEbwtFw = hierEbwtFw->getLocalEbwt(hit.ref(), hit.refoff()); + bool success = false, first = true; + index_t count = 0; + index_t max_count = 2; + int64_t prev_score = hit.score(); + local_genomeHits.clear(); + while(!success && count++ < max_count) { + if(him.localindexatts >= this->max_localindexatts) return; + if(first) { + first = false; + } else { + localEbwtFw = hierEbwtFw->nextLocalEbwt(localEbwtFw); + if(localEbwtFw == NULL || localEbwtFw->empty()) break; + } + // local index search + index_t extlen = 0; + local_index_t top = (local_index_t)OFF_MASK, bot = (local_index_t)OFF_MASK; + // daehwan - for debugging purposes + // index_t extoff = hitoff + hitlen + this->_minK_local; + index_t extoff = hitoff + hitlen + this->_minK_local * 3; + if(extoff + 1 < rdlen) extoff += 1; + if(extoff >= rdlen) { + extoff = rdlen - 1; + } + index_t nelt = (index_t)OFF_MASK; + index_t max_nelt = std::max(5, extlen); + bool no_extension = false; + bool uniqueStop; + // daehwan - for debugging purposes + // index_t minUniqueLen = this->_minK_local; + index_t minUniqueLen = (index_t)OFF_MASK; + index_t maxHitLen = max(extoff + 1 - hitoff - hitlen, this->_minK_local); + for(; maxHitLen < extoff + 1 && extoff < rdlen;) { + extlen = 0; + uniqueStop = false; + him.localindexatts++; + nelt = this->localEbwtSearch( + localEbwtFw, // BWT index + NULL, // BWT index + rd, // read to align + sc, // scoring scheme + hit.fw(), + false, // searchfw, + extoff, + extlen, + top, + bot, + rnd, + uniqueStop, + minUniqueLen, + maxHitLen); + if(extoff < hitoff + hitlen) { + no_extension = true; + break; + } + if(nelt <= max_nelt) break; + if(extoff + 1 < rdlen) extoff++; + else { + if(extlen < maxHitLen) break; + else maxHitLen++; + } + } + assert_leq(top, bot); + assert_eq(nelt, (index_t)(bot - top)); + assert_leq(extlen, extoff + 1); + assert_leq(extoff, rdlen); + if(nelt > 0 && + nelt <= max_nelt && + extlen >= minAnchorLen && + !no_extension) { + assert_leq(nelt, max_nelt); + coords.clear(); + bool straddled = false; + // get genomic locations for this local search + this->getGenomeCoords_local( + *localEbwtFw, + ref, + rnd, + top, + bot, + hit.fw(), + extoff + 1 - extlen, + extlen, + coords, + wlm, + prm, + him, + true, // reject straddled? + straddled); + assert_leq(coords.size(), nelt); + coords.sort(); + for(index_t ri = 0; ri < coords.size(); ri++) { + const Coord& coord = coords[ri]; + GenomeHit tempHit; + tempHit.init(coord.orient(), + extoff + 1 - extlen, + extlen, + 0, // trim5 + 0, // trim3 + coord.ref(), + coord.off(), + this->_sharedVars); + + // daehwan - for debugging purposes + if(coord.ref() == hit.ref() && + coord.off() < hit.refoff() && + coord.off() + 64000 > hit.refoff()) { + index_t leftext = (index_t)0, rightext = (index_t)OFF_MASK; + index_t mm = 1; + tempHit.extend( + rd, + ref, + ssdb, + swa, + swm, + prm, + sc, + this->_minsc[rdi], + rnd, + this->_minK_local, + leftext, + rightext, + mm); + +#if 0 + cout << endl; + cout << rd.rdid << "\t" << rd.name << endl; + cout << "\ttype: " << 2 << endl; + cout << "\t" << hit.ref() << endl; + cout << "\t\ttempHit " << (tempHit.fw() ? "+" : "-") << "\t" << tempHit.refoff() << "\t" << tempHit.rdoff() << "\t" << tempHit.len() << "\t" << tempHit.score() << endl; + cout << "\t\tanchHit " << (hit.fw() ? "+" : "-") << "\t" << hit.refoff() << "\t" << hit.rdoff() << "\t" << hit.len() << "\t" << hit.score() << endl; + + spliceSites.clear(); + ssdb.getRightSpliceSites(tempHit.ref(), tempHit.refoff() - minMatchLen, minMatchLen * 2, spliceSites); + for(size_t si = 0; si < spliceSites.size(); si++) { + const SpliceSite& ss = spliceSites[si]; + if(ss.right() > hit.getRightOff()) { + index_t dist = ss.right() - 1 - hit.getRightOff(); + cout << rd.rdid << "s\t\t\t" << ss.left() + 1 << "\t" << ss.right() - 1 << "\t" << (ss.fw() ? "+" : "-") << "\t" << dist << endl; + } + } + + spliceSites.clear(); + ssdb.getLeftSpliceSites(hit.ref(), hit.getRightOff() + minMatchLen, minMatchLen * 2, spliceSites); + for(size_t si = 0; si < spliceSites.size(); si++) { + const SpliceSite& ss = spliceSites[si]; + if(ss.left() < tempHit.refoff()) { + index_t dist = tempHit.refoff() - (ss.left() + 1); + cout << rd.rdid << "s\t\t\t" << ss.left() + 1 << "\t" << ss.right() - 1 << "\t" << (ss.fw() ? "+" : "-") << "\t" << dist << endl; + } + } + + if(hit.rdoff() + hit.len() - 1 <= tempHit.rdoff()) return; +#else + assert_lt(hit.ref(), _refnames.size()); + { + ThreadSafe t(const_cast(_mutex), true); + cout << rd.name << "\t" + << _refnames[hit.ref()] << "\t" + << (hit.fw() ? "+" : "-") << "\t" << hit.refoff() << "\t" << hit.rdoff() << "\t" << hit.len() << "\t" << hit.score() << "\t" + << (tempHit.fw() ? "+" : "-") << "\t" << tempHit.refoff() << "\t" << tempHit.rdoff() << "\t" << tempHit.len() << "\t" << tempHit.score() + << endl; + } + _done = true; + return; + +#endif + } + + // check if the partial alignment is compatible with the new alignment using the local index + if(!hit.compatibleWith(tempHit, this->_no_spliced_alignment)) { + if(count == 1) continue; + else break; + } + index_t leftext = (index_t)0, rightext = (index_t)OFF_MASK; + tempHit.extend(rd, ref, ssdb, swa, swm, prm, sc, this->_minsc[rdi], rnd, this->_minK_local, leftext, rightext); + GenomeHit combinedHit = hit; + int64_t minsc = this->_minsc[rdi]; + // combine the partial alignment and the new alignment + bool combined = combinedHit.combineWith(tempHit, rd, ref, ssdb, swa, swm, sc, minsc, rnd, this->_minK_local); + if(rdi == 0) minsc = max(minsc, sink.bestUnp1()); + else minsc = max(minsc, sink.bestUnp2()); + if(combined && combinedHit.score() >= minsc) { + assert_leq(combinedHit.trim5(), combinedHit.rdoff()); + if(combinedHit.score() >= prev_score - sc.mmpMax) { + // extend the new partial alignment recursively + hybridSearch_recur( + sc, + ebwtFw, + ebwtBw, + ref, + swa, + ssdb, + rdi, + combinedHit, + combinedHit.rdoff() - combinedHit.trim5(), + combinedHit.len() + combinedHit.trim5(), + wlm, + prm, + swm, + him, + rnd, + sink, + dep + 1); + } else { + local_genomeHits.push_back(combinedHit); + } + } + } + } + int64_t minsc = (rdi == 0 ? sink.bestUnp1() : sink.bestUnp2()); + if(minsc >= prev_score - sc.mmpMax) success = true; + if(!success && (count == max_count || hierEbwtFw->nextLocalEbwt(localEbwtFw) == NULL) ) { + for(index_t ti = 0; ti < local_genomeHits.size(); ti++) { + GenomeHit& tempHit = local_genomeHits[ti]; + int64_t minsc = this->_minsc[rdi]; + if(rdi == 0) minsc = max(minsc, sink.bestUnp1()); + else minsc = max(minsc, sink.bestUnp2()); + if(tempHit.score() >= minsc) { + hybridSearch_recur( + sc, + ebwtFw, + ebwtBw, + ref, + swa, + ssdb, + rdi, + tempHit, + tempHit.rdoff() - tempHit.trim5(), + tempHit.len() + tempHit.trim5(), + wlm, + prm, + swm, + him, + rnd, + sink, + dep + 1); + } + } + } + } // while(!success && count++ < 2) + + if(!success) { + // perform global search for long introns + if(hitoff + hitlen + this->_minK + 1 < rdlen) { + index_t extlen = 0; + index_t top = (index_t)OFF_MASK, bot = (index_t)OFF_MASK; + index_t extoff = hitoff + hitlen + this->_minK + 1; + bool uniqueStop = true; + index_t nelt = this->globalEbwtSearch( + ebwtFw, // BWT index + rd, // read to align + sc, // scoring scheme + hit.fw(), + extoff, + extlen, + top, + bot, + rnd, + uniqueStop); + if(nelt <= 5 && extlen >= this->_minK) { + coords.clear(); + bool straddled = false; + this->getGenomeCoords( + ebwtFw, + ref, + rnd, + top, + bot, + hit.fw(), + bot - top, + extoff + 1 - extlen, + extlen, + coords, + wlm, + prm, + him, + true, // reject straddled + straddled); + assert_leq(coords.size(), nelt); + coords.sort(); + for(index_t ri = 0; ri < coords.size(); ri++) { + const Coord& coord = coords[ri]; + GenomeHit tempHit; + tempHit.init(coord.orient(), + extoff + 1 - extlen, + extlen, + 0, // trim5 + 0, // trim3 + coord.ref(), + coord.off(), + this->_sharedVars); + if(!hit.compatibleWith(tempHit, this->_no_spliced_alignment)) continue; + index_t leftext = (index_t)0, rightext = (index_t)OFF_MASK; + tempHit.extend(rd, ref, ssdb, swa, swm, prm, sc, this->_minsc[rdi], rnd, this->_minK_local, leftext, rightext); + GenomeHit combinedHit = hit; + int64_t minsc = this->_minsc[rdi]; + bool combined = combinedHit.combineWith(tempHit, rd, ref, ssdb, swa, swm, sc, minsc, rnd, this->_minK_local); + if(rdi == 0) minsc = max(minsc, sink.bestUnp1()); + else minsc = max(minsc, sink.bestUnp2()); + if(combined && combinedHit.score() >= minsc) { + assert_leq(combinedHit.trim5(), combinedHit.rdoff()); + hybridSearch_recur( + sc, + ebwtFw, + ebwtBw, + ref, + swa, + ssdb, + rdi, + combinedHit, + combinedHit.rdoff() - combinedHit.trim5(), + combinedHit.len() + combinedHit.trim5(), + wlm, + prm, + swm, + him, + rnd, + sink, + dep + 1); + } + } + } + } + GenomeHit tempHit = hit; + assert(tempHit.trim5() == 0 || hitoff == 0); + if(rdlen - hitoff - tempHit.len() - tempHit.trim5() <= 5) { + index_t trim3 = rdlen - hitoff - tempHit.len() - tempHit.trim5(); + tempHit.trim3(trim3); + assert_leq(tempHit.trim5(), tempHit.rdoff()); + assert_leq(tempHit.len() + tempHit.trim5() + tempHit.trim3(), rdlen); + hybridSearch_recur( + sc, + ebwtFw, + ebwtBw, + ref, + swa, + ssdb, + rdi, + tempHit, + tempHit.rdoff() - tempHit.trim5(), + tempHit.len() + tempHit.trim5() + tempHit.trim3(), + wlm, + prm, + swm, + him, + rnd, + sink, + dep + 1); + return; + } + // extend the partial alignment directly comparing with the corresponding genomic sequence + // with mismatches or a gap allowed + int64_t minsc = this->_minsc[rdi]; + assert_geq(tempHit.score(), minsc); + index_t leftext = (index_t)0, rightext = (index_t)OFF_MASK; + index_t mm = (tempHit.score() - minsc) / sc.mmpMax; + index_t num_mismatch_allowed = 1; + if(rdlen - hitoff - hitlen <= this->_minK_local) { + num_mismatch_allowed = min(rdlen - tempHit.rdoff() - tempHit.len(), mm); + } + him.localextatts++; + tempHit.extend(rd, ref, ssdb, swa, swm, prm, sc, this->_minsc[rdi], rnd, this->_minK_local, leftext, rightext, num_mismatch_allowed); + if(rdi == 0) minsc = max(minsc, sink.bestUnp1()); + else minsc = max(minsc, sink.bestUnp2()); + if(tempHit.score() >= minsc && rightext >= min(this->_minK_local, rdlen - hit.len() - hit.rdoff())) { + assert_eq(tempHit.trim3(), 0); + assert_leq(tempHit.trim5(), tempHit.rdoff()); + hybridSearch_recur( + sc, + ebwtFw, + ebwtBw, + ref, + swa, + ssdb, + rdi, + tempHit, + tempHit.rdoff() - tempHit.trim5(), + tempHit.len() + tempHit.trim5(), + wlm, + prm, + swm, + him, + rnd, + sink, + dep + 1); + } else if(hitoff + hitlen + this->_minK_local < rdlen) { + // skip some bases of a read + index_t jumplen = hitoff + hitlen + this->_minK < rdlen ? this->_minK : this->_minK_local; + assert_lt(hitoff + hitlen + jumplen, rdlen); + assert_leq(hit.len(), hitlen); + int64_t expected_score = hit.score() - (hitlen - hit.len()) / jumplen * sc.mmpMax - sc.mmpMax; + if(expected_score >= minsc) { + assert_eq(hit.trim3(), 0); + hybridSearch_recur( + sc, + ebwtFw, + ebwtBw, + ref, + swa, + ssdb, + rdi, + hit, + hitoff, + hitlen + jumplen, + wlm, + prm, + swm, + him, + rnd, + sink, + dep + 1); + } + } + } + } +} + +#endif /*BP_ALIGNER_H_*/ diff --git a/btypes.h b/btypes.h new file mode 100644 index 0000000..d82ed44 --- /dev/null +++ b/btypes.h @@ -0,0 +1,48 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + + +#ifndef BOWTIE_INDEX_TYPES_H +#define BOWTIE_INDEX_TYPES_H + +#ifdef BOWTIE_64BIT_INDEX +#define OFF_MASK 0xffffffffffffffff +#define OFF_LEN_MASK 0xc000000000000000 +#define LS_SIZE 0x100000000000000 +#define OFF_SIZE 8 +#define INDEX_MAX 0xffffffffffffffff + +typedef uint64_t TIndexOffU; +typedef int64_t TIndexOff; + +#else +#define OFF_MASK 0xffffffff +#define OFF_LEN_MASK 0xc0000000 +#define LS_SIZE 0x10000000 +#define OFF_SIZE 4 +#define INDEX_MAX 0xffffffff + +typedef uint32_t TIndexOffU; +typedef int TIndexOff; + +#endif /* BOWTIE_64BIT_INDEX */ + +extern const std::string gfm_ext; + +#endif /* BOWTIE_INDEX_TYPES_H */ diff --git a/ccnt_lut.cpp b/ccnt_lut.cpp new file mode 100644 index 0000000..bcac83c --- /dev/null +++ b/ccnt_lut.cpp @@ -0,0 +1,80 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include + +/* Generated by gen_lookup_tables.pl */ + +uint8_t cCntLUT_4[4][4][256]; +uint8_t cCntLUT_4_rev[4][4][256]; +uint8_t cCntBIT[8][256]; + +int countCnt(int by, int c, uint8_t str) { + int count = 0; + if(by == 0) by = 4; + while(by-- > 0) { + int c2 = str & 3; + str >>= 2; + if(c == c2) count++; + } + + return count; +} + +int countCnt_rev(int by, int c, uint8_t str) { + int count = 0; + if(by == 0) by = 4; + while(by-- > 0) { + int c2 = (str >> 6) & 3; + str <<= 2; + if(c == c2) count++; + } + + return count; +} + +void initializeCntLut() { + for(int by = 0; by < 4; by++) { + for(int c = 0; c < 4; c++) { + for(int str = 0; str < 256; str++) { + cCntLUT_4[by][c][str] = countCnt(by, c, str); + cCntLUT_4_rev[by][c][str] = countCnt_rev(by, c, str); + } + } + } +} + +int countBit(int b, uint8_t str) { + int count = 0; + if(b == 0) b = 8; + while(b-- > 0) { + if(str & 0x1) count++; + str >>= 1; + } + + return count; +} + +void initializeCntBit() { + for(int b = 0; b < 8; b++) { + for(int str = 0; str < 256; str++) { + cCntBIT[b][str] = countBit(b, str); + } + } +} diff --git a/diff_sample.cpp b/diff_sample.cpp new file mode 100644 index 0000000..b722702 --- /dev/null +++ b/diff_sample.cpp @@ -0,0 +1,117 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include "diff_sample.h" + +struct sampleEntry clDCs[16]; +bool clDCs_calced = false; /// have clDCs been calculated? + +/** + * Entries 4-57 are transcribed from page 6 of Luk and Wong's paper + * "Two New Quorum Based Algorithms for Distributed Mutual Exclusion", + * which is also used and cited in the Burkhardt and Karkkainen's + * papers on difference covers for sorting. These samples are optimal + * according to Luk and Wong. + * + * All other entries are generated via the exhaustive algorithm in + * calcExhaustiveDC(). + * + * The 0 is stored at the end of the sample as an end-of-list marker, + * but 0 is also an element of each. + * + * Note that every difference cover has a 0 and a 1. Intuitively, + * any optimal difference cover sample can be oriented (i.e. rotated) + * such that it includes 0 and 1 as elements. + * + * All samples in this list have been verified to be complete covers. + * + * A value of 0xffffffff in the first column indicates that there is no + * sample for that value of v. We do not keep samples for values of v + * less than 3, since they are trivial (and the caller probably didn't + * mean to ask for it). + */ +uint32_t dc0to64[65][10] = { + {0xffffffff}, // 0 + {0xffffffff}, // 1 + {0xffffffff}, // 2 + {1, 0}, // 3 + {1, 2, 0}, // 4 + {1, 2, 0}, // 5 + {1, 3, 0}, // 6 + {1, 3, 0}, // 7 + {1, 2, 4, 0}, // 8 + {1, 2, 4, 0}, // 9 + {1, 2, 5, 0}, // 10 + {1, 2, 5, 0}, // 11 + {1, 3, 7, 0}, // 12 + {1, 3, 9, 0}, // 13 + {1, 2, 3, 7, 0}, // 14 + {1, 2, 3, 7, 0}, // 15 + {1, 2, 5, 8, 0}, // 16 + {1, 2, 4, 12, 0}, // 17 + {1, 2, 5, 11, 0}, // 18 + {1, 2, 6, 9, 0}, // 19 + {1, 2, 3, 6, 10, 0}, // 20 + {1, 4, 14, 16, 0}, // 21 + {1, 2, 3, 7, 11, 0}, // 22 + {1, 2, 3, 7, 11, 0}, // 23 + {1, 2, 3, 7, 15, 0}, // 24 + {1, 2, 3, 8, 12, 0}, // 25 + {1, 2, 5, 9, 15, 0}, // 26 + {1, 2, 5, 13, 22, 0}, // 27 + {1, 4, 15, 20, 22, 0}, // 28 + {1, 2, 3, 4, 9, 14, 0}, // 29 + {1, 2, 3, 4, 9, 19, 0}, // 30 + {1, 3, 8, 12, 18, 0}, // 31 + {1, 2, 3, 7, 11, 19, 0}, // 32 + {1, 2, 3, 6, 16, 27, 0}, // 33 + {1, 2, 3, 7, 12, 20, 0}, // 34 + {1, 2, 3, 8, 12, 21, 0}, // 35 + {1, 2, 5, 12, 14, 20, 0}, // 36 + {1, 2, 4, 10, 15, 22, 0}, // 37 + {1, 2, 3, 4, 8, 14, 23, 0}, // 38 + {1, 2, 4, 13, 18, 33, 0}, // 39 + {1, 2, 3, 4, 9, 14, 24, 0}, // 40 + {1, 2, 3, 4, 9, 15, 25, 0}, // 41 + {1, 2, 3, 4, 9, 15, 25, 0}, // 42 + {1, 2, 3, 4, 10, 15, 26, 0}, // 43 + {1, 2, 3, 6, 16, 27, 38, 0}, // 44 + {1, 2, 3, 5, 12, 18, 26, 0}, // 45 + {1, 2, 3, 6, 18, 25, 38, 0}, // 46 + {1, 2, 3, 5, 16, 22, 40, 0}, // 47 + {1, 2, 5, 9, 20, 26, 36, 0}, // 48 + {1, 2, 5, 24, 33, 36, 44, 0}, // 49 + {1, 3, 8, 17, 28, 32, 38, 0}, // 50 + {1, 2, 5, 11, 18, 30, 38, 0}, // 51 + {1, 2, 3, 4, 6, 14, 21, 30, 0}, // 52 + {1, 2, 3, 4, 7, 21, 29, 44, 0}, // 53 + {1, 2, 3, 4, 9, 15, 21, 31, 0}, // 54 + {1, 2, 3, 4, 6, 19, 26, 47, 0}, // 55 + {1, 2, 3, 4, 11, 16, 33, 39, 0}, // 56 + {1, 3, 13, 32, 36, 43, 52, 0}, // 57 + + // Generated by calcExhaustiveDC() + {1, 2, 3, 7, 21, 33, 37, 50, 0}, // 58 + {1, 2, 3, 6, 13, 21, 35, 44, 0}, // 59 + {1, 2, 4, 9, 15, 25, 30, 42, 0}, // 60 + {1, 2, 3, 7, 15, 25, 36, 45, 0}, // 61 + {1, 2, 4, 10, 32, 39, 46, 51, 0}, // 62 + {1, 2, 6, 8, 20, 38, 41, 54, 0}, // 63 + {1, 2, 5, 14, 16, 34, 42, 59, 0} // 64 +}; diff --git a/diff_sample.h b/diff_sample.h new file mode 100644 index 0000000..dda8e09 --- /dev/null +++ b/diff_sample.h @@ -0,0 +1,1000 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef DIFF_SAMPLE_H_ +#define DIFF_SAMPLE_H_ + +#include +#include +#include "assert_helpers.h" +#include "multikey_qsort.h" +#include "timer.h" +#include "ds.h" +#include "mem_ids.h" +#include "ls.h" +#include "btypes.h" + +using namespace std; + +#ifndef VMSG_NL +#define VMSG_NL(...) \ +if(this->verbose()) { \ + stringstream tmp; \ + tmp << __VA_ARGS__ << endl; \ + this->verbose(tmp.str()); \ +} +#endif + +#ifndef VMSG +#define VMSG(...) \ +if(this->verbose()) { \ + stringstream tmp; \ + tmp << __VA_ARGS__; \ + this->verbose(tmp.str()); \ +} +#endif + +/** + * Routines for calculating, sanity-checking, and dispensing difference + * cover samples to clients. + */ + +/** + * + */ +struct sampleEntry { + uint32_t maxV; + uint32_t numSamples; + uint32_t samples[128]; +}; + +/// Array of Colbourn and Ling calculated difference covers up to +/// r = 16 (maxV = 5953) +extern struct sampleEntry clDCs[16]; +extern bool clDCs_calced; /// have clDCs been calculated? + +/** + * Check that the given difference cover 'ds' actually covers all + * differences for a periodicity of v. + */ +template +static bool dcRepOk(T v, EList& ds) { + // diffs[] records all the differences observed + AutoArray covered(v, EBWT_CAT); + for(T i = 1; i < v; i++) { + covered[i] = false; + } + for(T di = T(); di < ds.size(); di++) { + for(T dj = di+1; dj < ds.size(); dj++) { + assert_lt(ds[di], ds[dj]); + T d1 = (ds[dj] - ds[di]); + T d2 = (ds[di] + v - ds[dj]); + assert_lt(d1, v); + assert_lt(d2, v); + covered[d1] = true; + covered[d2] = true; + } + } + bool ok = true; + for(T i = 1; i < v; i++) { + if(covered[i] == false) { + ok = false; + break; + } + } + return ok; +} + +/** + * Return true iff each element of ts (with length 'limit') is greater + * than the last. + */ +template +static bool increasing(T* ts, size_t limit) { + for(size_t i = 0; i < limit-1; i++) { + if(ts[i+1] <= ts[i]) return false; + } + return true; +} + +/** + * Return true iff the given difference cover covers difference 'diff' + * mod 'v'. + */ +template +static inline bool hasDifference(T *ds, T d, T v, T diff) { + // diffs[] records all the differences observed + for(T di = T(); di < d; di++) { + for(T dj = di+1; dj < d; dj++) { + assert_lt(ds[di], ds[dj]); + T d1 = (ds[dj] - ds[di]); + T d2 = (ds[di] + v - ds[dj]); + assert_lt(d1, v); + assert_lt(d2, v); + if(d1 == diff || d2 == diff) return true; + } + } + return false; +} + +/** + * Exhaustively calculate optimal difference cover samples for v = 4, + * 8, 16, 32, 64, 128, 256 and store results in p2DCs[] + */ +template +void calcExhaustiveDC(T i, bool verbose = false, bool sanityCheck = false) { + T v = i; + AutoArray diffs(v, EBWT_CAT); + // v is the target period + T ld = (T)ceil(sqrt(v)); + // ud is the upper bound on |D| + T ud = v / 2; + // for all possible |D|s + bool ok = true; + T *ds = NULL; + T d; + for(d = ld; d <= ud+1; d++) { + // for all possible |D| samples + AutoArray ds(d, EBWT_CAT); + for(T j = 0; j < d; j++) { + ds[j] = j; + } + assert(increasing(ds, d)); + while(true) { + // reset diffs[] + for(T t = 1; t < v; t++) { + diffs[t] = false; + } + T diffCnt = 0; + // diffs[] records all the differences observed + for(T di = 0; di < d; di++) { + for(T dj = di+1; dj < d; dj++) { + assert_lt(ds[di], ds[dj]); + T d1 = (ds[dj] - ds[di]); + T d2 = (ds[di] + v - ds[dj]); + assert_lt(d1, v); + assert_lt(d2, v); + assert_gt(d1, 0); + assert_gt(d2, 0); + if(!diffs[d1]) { diffCnt++; diffs[d1] = true; } + if(!diffs[d2]) { diffCnt++; diffs[d2] = true; } + } + } + // Do we observe all possible differences (except 0) + ok = diffCnt == v-1; + if(ok) { + // Yes, all differences are covered + break; + } else { + // Advance ds + // (Following is commented out because it turns out + // it's slow) + // Find a missing difference + //uint32_t missing = 0xffffffff; + //for(uint32_t t = 1; t < v; t++) { + // if(diffs[t] == false) { + // missing = diffs[t]; + // break; + // } + //} + //assert_neq(missing, 0xffffffff); + assert(increasing(ds, d)); + bool advanced = false; + bool keepGoing = false; + do { + keepGoing = false; + for(T bd = d-1; bd > 1; bd--) { + T dif = (d-1)-bd; + if(ds[bd] < v-1-dif) { + ds[bd]++; + assert_neq(0, ds[bd]); + // Reset subsequent ones + for(T bdi = bd+1; bdi < d; bdi++) { + assert_eq(0, ds[bdi]); + ds[bdi] = ds[bdi-1]+1; + assert_gt(ds[bdi], ds[bdi-1]); + } + assert(increasing(ds, d)); + // (Following is commented out because + // it turns out it's slow) + // See if the new DC has the missing value + //if(!hasDifference(ds, d, v, missing)) { + // keepGoing = true; + // break; + //} + advanced = true; + break; + } else { + ds[bd] = 0; + // keep going + } + } + } while(keepGoing); + // No solution for this |D| + if(!advanced) break; + assert(increasing(ds, d)); + } + } // next sample assignment + if(ok) { + break; + } + } // next |D| + assert(ok); + cout << "Did exhaustive v=" << v << " |D|=" << d << endl; + cout << " "; + for(T i = 0; i < d; i++) { + cout << ds[i]; + if(i < d-1) cout << ","; + } + cout << endl; +} + +/** + * Routune for calculating the elements of clDCs up to r = 16 using the + * technique of Colbourn and Ling. + * + * See http://citeseer.ist.psu.edu/211575.html + */ +template +void calcColbournAndLingDCs(bool verbose = false, bool sanityCheck = false) { + for(T r = 0; r < 16; r++) { + T maxv = 24*r*r + 36*r + 13; // Corollary 2.3 + T numsamp = 6*r + 4; + clDCs[r].maxV = maxv; + clDCs[r].numSamples = numsamp; + memset(clDCs[r].samples, 0, 4 * 128); + T i; + // clDCs[r].samples[0] = 0; + // Fill in the 1^r part of the B series + for(i = 1; i < r+1; i++) { + clDCs[r].samples[i] = clDCs[r].samples[i-1] + 1; + } + // Fill in the (r + 1)^1 part + clDCs[r].samples[r+1] = clDCs[r].samples[r] + r + 1; + // Fill in the (2r + 1)^r part + for(i = r+2; i < r+2+r; i++) { + clDCs[r].samples[i] = clDCs[r].samples[i-1] + 2*r + 1; + } + // Fill in the (4r + 3)^(2r + 1) part + for(i = r+2+r; i < r+2+r+2*r+1; i++) { + clDCs[r].samples[i] = clDCs[r].samples[i-1] + 4*r + 3; + } + // Fill in the (2r + 2)^(r + 1) part + for(i = r+2+r+2*r+1; i < r+2+r+2*r+1+r+1; i++) { + clDCs[r].samples[i] = clDCs[r].samples[i-1] + 2*r + 2; + } + // Fill in the last 1^r part + for(i = r+2+r+2*r+1+r+1; i < r+2+r+2*r+1+r+1+r; i++) { + clDCs[r].samples[i] = clDCs[r].samples[i-1] + 1; + } + assert_eq(i, numsamp); + assert_lt(i, 128); + if(sanityCheck) { + // diffs[] records all the differences observed + AutoArray diffs(maxv, EBWT_CAT); + for(T i = 0; i < numsamp; i++) { + for(T j = i+1; j < numsamp; j++) { + T d1 = (clDCs[r].samples[j] - clDCs[r].samples[i]); + T d2 = (clDCs[r].samples[i] + maxv - clDCs[r].samples[j]); + assert_lt(d1, maxv); + assert_lt(d2, maxv); + diffs[d1] = true; + diffs[d2] = true; + } + } + // Should have observed all possible differences (except 0) + for(T i = 1; i < maxv; i++) { + if(diffs[i] == false) cout << r << ", " << i << endl; + assert(diffs[i] == true); + } + } + } + clDCs_calced = true; +} + +/** + * A precalculated list of difference covers. + */ +extern uint32_t dc0to64[65][10]; + +/** + * Get a difference cover for the requested periodicity v. + */ +template +static EList getDiffCover( + T v, + bool verbose = false, + bool sanityCheck = false) +{ + assert_gt(v, 2); + EList ret; + ret.clear(); + // Can we look it up in our hardcoded array? + if(v <= 64 && dc0to64[v][0] == 0xffffffff) { + if(verbose) cout << "v in hardcoded area, but hardcoded entry was all-fs" << endl; + return ret; + } else if(v <= 64) { + ret.push_back(0); + for(size_t i = 0; i < 10; i++) { + if(dc0to64[v][i] == 0) break; + ret.push_back(dc0to64[v][i]); + } + if(sanityCheck) assert(dcRepOk(v, ret)); + return ret; + } + + // Can we look it up in our calcColbournAndLingDCs array? + if(!clDCs_calced) { + calcColbournAndLingDCs(verbose, sanityCheck); + assert(clDCs_calced); + } + for(size_t i = 0; i < 16; i++) { + if(v <= clDCs[i].maxV) { + for(size_t j = 0; j < clDCs[i].numSamples; j++) { + T s = clDCs[i].samples[j]; + if(s >= v) { + s %= v; + for(size_t k = 0; k < ret.size(); k++) { + if(s == ret[k]) break; + if(s < ret[k]) { + ret.insert(s, k); + break; + } + } + } else { + ret.push_back(s % v); + } + } + if(sanityCheck) assert(dcRepOk(v, ret)); + return ret; + } + } + cerr << "Error: Could not find a difference cover sample for v=" << v << endl; + throw 1; +} + +/** + * Calculate and return a delta map based on the given difference cover + * and periodicity v. + */ +template +static EList getDeltaMap(T v, const EList& dc) { + // Declare anchor-map-related items + EList amap; + size_t amapEnts = 1; + amap.resizeExact((size_t)v); + amap.fill(0xffffffff); + amap[0] = 0; + // Print out difference cover (and optionally calculate + // anchor map) + for(size_t i = 0; i < dc.size(); i++) { + for(size_t j = i+1; j < dc.size(); j++) { + assert_gt(dc[j], dc[i]); + T diffLeft = dc[j] - dc[i]; + T diffRight = dc[i] + v - dc[j]; + assert_lt(diffLeft, v); + assert_lt(diffRight, v); + if(amap[diffLeft] == 0xffffffff) { + amap[diffLeft] = dc[i]; + amapEnts++; + } + if(amap[diffRight] == 0xffffffff) { + amap[diffRight] = dc[j]; + amapEnts++; + } + } + } + return amap; +} + +/** + * Return population count (count of all bits set to 1) of i. + */ +template +static unsigned int popCount(T i) { + unsigned int cnt = 0; + for(size_t j = 0; j < sizeof(T)*8; j++) { + if(i & 1) cnt++; + i >>= 1; + } + return cnt; +} + +/** + * Calculate log-base-2 of i + */ +template +static unsigned int myLog2(T i) { + assert_eq(1, popCount(i)); // must be power of 2 + for(size_t j = 0; j < sizeof(T)*8; j++) { + if(i & 1) return (int)j; + i >>= 1; + } + assert(false); + return 0xffffffff; +} + +/** + * + */ +template +class DifferenceCoverSample { +public: + + DifferenceCoverSample(const TStr& __text, + uint32_t __v, + bool __verbose = false, + bool __sanity = false, + ostream& __logger = cout) : + _text(__text), + _v(__v), + _verbose(__verbose), + _sanity(__sanity), + _ds(getDiffCover(_v, _verbose, _sanity)), + _dmap(getDeltaMap(_v, _ds)), + _d((uint32_t)_ds.size()), + _doffs(), + _isaPrime(), + _dInv(), + _log2v(myLog2(_v)), + _vmask(OFF_MASK << _log2v), + _logger(__logger) + { + assert_gt(_d, 0); + assert_eq(1, popCount(_v)); // must be power of 2 + // Build map from d's to idx's + _dInv.resizeExact((size_t)v()); + _dInv.fill(0xffffffff); + uint32_t lim = (uint32_t)_ds.size(); + for(uint32_t i = 0; i < lim; i++) { + _dInv[_ds[i]] = i; + } + } + + /** + * Allocate an amount of memory that simulates the peak memory + * usage of the DifferenceCoverSample with the given text and v. + * Throws bad_alloc if it's not going to fit in memory. Returns + * the approximate number of bytes the Cover takes at all times. + */ + static size_t simulateAllocs(const TStr& text, uint32_t v) { + EList ds(getDiffCover(v, false /*verbose*/, false /*sanity*/)); + size_t len = text.length(); + size_t sPrimeSz = (len / v) * ds.size(); + // sPrime, sPrimeOrder, _isaPrime all exist in memory at + // once and that's the peak + AutoArray aa(sPrimeSz * 3 + (1024 * 1024 /*out of caution*/), EBWT_CAT); + return sPrimeSz * 4; // sPrime array + } + + uint32_t v() const { return _v; } + uint32_t log2v() const { return _log2v; } + uint32_t vmask() const { return _vmask; } + uint32_t modv(TIndexOffU i) const { return (uint32_t)(i & ~_vmask); } + TIndexOffU divv(TIndexOffU i) const { return i >> _log2v; } + uint32_t d() const { return _d; } + bool verbose() const { return _verbose; } + bool sanityCheck() const { return _sanity; } + const TStr& text() const { return _text; } + const EList& ds() const { return _ds; } + const EList& dmap() const { return _dmap; } + ostream& log() const { return _logger; } + + void build(int nthreads); + uint32_t tieBreakOff(TIndexOffU i, TIndexOffU j) const; + int64_t breakTie(TIndexOffU i, TIndexOffU j) const; + bool isCovered(TIndexOffU i) const; + TIndexOffU rank(TIndexOffU i) const; + + /** + * Print out the suffix array such that every sample offset has its + * rank filled in and every non-sample offset is shown as '-'. + */ + void print(ostream& out) { + for(size_t i = 0; i < _text.length(); i++) { + if(isCovered(i)) { + out << rank(i); + } else { + out << "-"; + } + if(i < _text.length()-1) { + out << ","; + } + } + out << endl; + } + +private: + + void doBuiltSanityCheck() const; + void buildSPrime(EList& sPrime, size_t padding); + + bool built() const { + return _isaPrime.size() > 0; + } + + void verbose(const string& s) const { + if(this->verbose()) { + this->log() << s.c_str(); + this->log().flush(); + } + } + + const TStr& _text; // text to sample + uint32_t _v; // periodicity of sample + bool _verbose; // + bool _sanity; // + EList _ds; // samples: idx -> d + EList _dmap; // delta map + uint32_t _d; // |D| - size of sample + EList _doffs; // offsets into sPrime/isaPrime for each d idx + EList _isaPrime; // ISA' array + EList _dInv; // Map from d -> idx + uint32_t _log2v; + TIndexOffU _vmask; + ostream& _logger; +}; + +/** + * Sanity-check the difference cover by first inverting _isaPrime then + * checking that each successive suffix really is less than the next. + */ +template +void DifferenceCoverSample::doBuiltSanityCheck() const { + uint32_t v = this->v(); + assert(built()); + VMSG_NL(" Doing sanity check"); + TIndexOffU added = 0; + EList sorted; + sorted.resizeExact(_isaPrime.size()); + sorted.fill(OFF_MASK); + for(size_t di = 0; di < this->d(); di++) { + uint32_t d = _ds[di]; + size_t i = 0; + for(size_t doi = _doffs[di]; doi < _doffs[di+1]; doi++, i++) { + assert_eq(OFF_MASK, sorted[_isaPrime[doi]]); + // Maps the offset of the suffix to its rank + sorted[_isaPrime[doi]] = (TIndexOffU)(v*i + d); + added++; + } + } + assert_eq(added, _isaPrime.size()); +#ifndef NDEBUG + for(size_t i = 0; i < sorted.size()-1; i++) { + assert(sstr_suf_lt(this->text(), sorted[i], this->text(), sorted[i+1], false)); + } +#endif +} + +/** + * Build the s' array by sampling suffixes (suffix offsets, actually) + * from t according to the difference-cover sample and pack them into + * an array of machine words in the order dictated by the "mu" mapping + * described in Burkhardt. + * + * Also builds _doffs map. + */ +template +void DifferenceCoverSample::buildSPrime( + EList& sPrime, + size_t padding) +{ + const TStr& t = this->text(); + const EList& ds = this->ds(); + TIndexOffU tlen = (TIndexOffU)t.length(); + uint32_t v = this->v(); + uint32_t d = this->d(); + assert_gt(v, 2); + assert_lt(d, v); + // Record where each d section should begin in sPrime + TIndexOffU tlenDivV = this->divv(tlen); + uint32_t tlenModV = this->modv(tlen); + TIndexOffU sPrimeSz = 0; + assert(_doffs.empty()); + _doffs.resizeExact((size_t)d+1); + for(uint32_t di = 0; di < d; di++) { + // mu mapping + TIndexOffU sz = tlenDivV + ((ds[di] <= tlenModV) ? 1 : 0); + assert_geq(sz, 0); + _doffs[di] = sPrimeSz; + sPrimeSz += sz; + } + _doffs[d] = sPrimeSz; +#ifndef NDEBUG + if(tlenDivV > 0) { + for(size_t i = 0; i < d; i++) { + assert_gt(_doffs[i+1], _doffs[i]); + TIndexOffU diff = _doffs[i+1] - _doffs[i]; + assert(diff == tlenDivV || diff == tlenDivV+1); + } + } +#endif + assert_eq(_doffs.size(), d+1); + // Size sPrime appropriately + sPrime.resizeExact((size_t)sPrimeSz + padding); + sPrime.fill(OFF_MASK); + // Slot suffixes from text into sPrime according to the mu + // mapping; where the mapping would leave a blank, insert a 0 + TIndexOffU added = 0; + TIndexOffU i = 0; + for(uint64_t ti = 0; ti <= tlen; ti += v) { + for(uint32_t di = 0; di < d; di++) { + TIndexOffU tti = (TIndexOffU)ti + (TIndexOffU)ds[di]; + if(tti > tlen) break; + TIndexOffU spi = _doffs[di] + i; + assert_lt(spi, _doffs[di+1]); + assert_leq(tti, tlen); + assert_lt(spi, sPrimeSz); + assert_eq(OFF_MASK, sPrime[spi]); + sPrime[spi] = tti; added++; + } + i++; + } + assert_eq(added, sPrimeSz); +} + +/** + * Return true iff suffixes with offsets suf1 and suf2 out of host + * string 'host' are identical up to depth 'v'. + */ +template +static inline bool suffixSameUpTo( + const TStr& host, + TIndexOffU suf1, + TIndexOffU suf2, + TIndexOffU v) +{ + for(TIndexOffU i = 0; i < v; i++) { + bool endSuf1 = suf1+i >= host.length(); + bool endSuf2 = suf2+i >= host.length(); + if((endSuf1 && !endSuf2) || (!endSuf1 && endSuf2)) return false; + if(endSuf1 && endSuf2) return true; + if(host[suf1+i] != host[suf2+i]) return false; + } + return true; +} + +template +struct VSortingParam { + DifferenceCoverSample* dcs; + TIndexOffU* sPrimeArr; + size_t sPrimeSz; + TIndexOffU* sPrimeOrderArr; + size_t depth; + const EList* boundaries; + size_t* cur; + MUTEX_T* mutex; +}; + +template +static void VSorting_worker(void *vp) +{ + VSortingParam* param = (VSortingParam*)vp; + DifferenceCoverSample* dcs = param->dcs; + const TStr& host = dcs->text(); + const size_t hlen = host.length(); + uint32_t v = dcs->v(); + while(true) { + size_t cur = 0; + { + ThreadSafe ts(param->mutex, true); + cur = *(param->cur); + (*param->cur)++; + } + if(cur >= param->boundaries->size()) return; + size_t begin = (cur == 0 ? 0 : (*param->boundaries)[cur-1]); + size_t end = (*param->boundaries)[cur]; + assert_leq(begin, end); + if(end - begin <= 1) continue; + mkeyQSortSuf2( + host, + hlen, + param->sPrimeArr, + param->sPrimeSz, + param->sPrimeOrderArr, + 4, + begin, + end, + param->depth, + v); + } +} + +/** + * Calculates a ranking of all suffixes in the sample and stores them, + * packed according to the mu mapping, in _isaPrime. + */ +template +void DifferenceCoverSample::build(int nthreads) { + // Local names for relevant types + VMSG_NL("Building DifferenceCoverSample"); + // Local names for relevant data + const TStr& t = this->text(); + uint32_t v = this->v(); + assert_gt(v, 2); + // Build s' + EList sPrime; + // Need to allocate 2 extra elements at the end of the sPrime and _isaPrime + // arrays. One element that's less than all others, and another that acts + // as needed padding for the Larsson-Sadakane sorting code. + size_t padding = 1; + VMSG_NL(" Building sPrime"); + buildSPrime(sPrime, padding); + size_t sPrimeSz = sPrime.size() - padding; + assert_gt(sPrime.size(), padding); + assert_leq(sPrime.size(), t.length() + padding + 1); + TIndexOffU nextRank = 0; + { + VMSG_NL(" Building sPrimeOrder"); + EList sPrimeOrder; + sPrimeOrder.resizeExact(sPrimeSz); + for(TIndexOffU i = 0; i < sPrimeSz; i++) { + sPrimeOrder[i] = i; + } + // sPrime now holds suffix-offsets for DC samples. + { + Timer timer(cout, " V-Sorting samples time: ", this->verbose()); + VMSG_NL(" V-Sorting samples"); + // Extract backing-store array from sPrime and sPrimeOrder; + // the mkeyQSortSuf2 routine works on the array for maximum + // efficiency + TIndexOffU *sPrimeArr = (TIndexOffU*)sPrime.ptr(); + assert_eq(sPrimeArr[0], sPrime[0]); + assert_eq(sPrimeArr[sPrimeSz-1], sPrime[sPrimeSz-1]); + TIndexOffU *sPrimeOrderArr = (TIndexOffU*)sPrimeOrder.ptr(); + assert_eq(sPrimeOrderArr[0], sPrimeOrder[0]); + assert_eq(sPrimeOrderArr[sPrimeSz-1], sPrimeOrder[sPrimeSz-1]); + // Sort sample suffixes up to the vth character using a + // multikey quicksort. Sort time is proportional to the + // number of samples times v. It isn't quadratic. + // sPrimeOrder is passed in as a swapping partner for + // sPrimeArr, i.e., every time the multikey qsort swaps + // elements in sPrime, it swaps the same elements in + // sPrimeOrder too. This allows us to easily reconstruct + // what the sort did. + if(nthreads == 1) { + mkeyQSortSuf2(t, sPrimeArr, sPrimeSz, sPrimeOrderArr, 4, + this->verbose(), this->sanityCheck(), v); + } else { + int query_depth = 0; + int tmp_nthreads = nthreads; + while(tmp_nthreads > 0) { + query_depth++; + tmp_nthreads >>= 1; + } + EList boundaries; // bucket boundaries for parallelization + TIndexOffU *sOrig = NULL; + if(this->sanityCheck()) { + sOrig = new TIndexOffU[sPrimeSz]; + memcpy(sOrig, sPrimeArr, OFF_SIZE * sPrimeSz); + } + mkeyQSortSuf2(t, sPrimeArr, sPrimeSz, sPrimeOrderArr, 4, + this->verbose(), false, query_depth, &boundaries); + if(boundaries.size() > 0) { + AutoArray threads(nthreads); + EList > tparams; + size_t cur = 0; + MUTEX_T mutex; + for(int tid = 0; tid < nthreads; tid++) { + // Calculate bucket sizes by doing a binary search for each + // suffix and noting where it lands + tparams.expand(); + tparams.back().dcs = this; + tparams.back().sPrimeArr = sPrimeArr; + tparams.back().sPrimeSz = sPrimeSz; + tparams.back().sPrimeOrderArr = sPrimeOrderArr; + tparams.back().depth = query_depth; + tparams.back().boundaries = &boundaries; + tparams.back().cur = &cur; + tparams.back().mutex = &mutex; + threads[tid] = new tthread::thread(VSorting_worker, (void*)&tparams.back()); + } + for (int tid = 0; tid < nthreads; tid++) { + threads[tid]->join(); + } + } + if(this->sanityCheck()) { + sanityCheckOrderedSufs(t, t.length(), sPrimeArr, sPrimeSz, v); + for(size_t i = 0; i < sPrimeSz; i++) { + assert_eq(sPrimeArr[i], sOrig[sPrimeOrderArr[i]]); + } + delete[] sOrig; + } + } + // Make sure sPrime and sPrimeOrder are consistent with + // their respective backing-store arrays + assert_eq(sPrimeArr[0], sPrime[0]); + assert_eq(sPrimeArr[sPrimeSz-1], sPrime[sPrimeSz-1]); + assert_eq(sPrimeOrderArr[0], sPrimeOrder[0]); + assert_eq(sPrimeOrderArr[sPrimeSz-1], sPrimeOrder[sPrimeSz-1]); + } + // Now assign the ranking implied by the sorted sPrime/sPrimeOrder + // arrays back into sPrime. + VMSG_NL(" Allocating rank array"); + _isaPrime.resizeExact(sPrime.size()); + ASSERT_ONLY(_isaPrime.fill(OFF_MASK)); + assert_gt(_isaPrime.size(), 0); + { + Timer timer(cout, " Ranking v-sort output time: ", this->verbose()); + VMSG_NL(" Ranking v-sort output"); + for(size_t i = 0; i < sPrimeSz-1; i++) { + // Place the appropriate ranking + _isaPrime[sPrimeOrder[i]] = nextRank; + // If sPrime[i] and sPrime[i+1] are identical up to v, then we + // should give the next suffix the same rank + if(!suffixSameUpTo(t, sPrime[i], sPrime[i+1], v)) nextRank++; + } + _isaPrime[sPrimeOrder[sPrimeSz-1]] = nextRank; // finish off +#ifndef NDEBUG + for(size_t i = 0; i < sPrimeSz; i++) { + assert_neq(OFF_MASK, _isaPrime[i]); + assert_lt(_isaPrime[i], sPrimeSz); + } +#endif + } + // sPrimeOrder is destroyed + // All the information we need is now in _isaPrime + } + _isaPrime[_isaPrime.size()-1] = (TIndexOffU)sPrimeSz; + sPrime[sPrime.size()-1] = (TIndexOffU)sPrimeSz; + // _isaPrime[_isaPrime.size()-1] and sPrime[sPrime.size()-1] are just + // spacer for the Larsson-Sadakane routine to use + { + Timer timer(cout, " Invoking Larsson-Sadakane on ranks time: ", this->verbose()); + VMSG_NL(" Invoking Larsson-Sadakane on ranks"); + if(sPrime.size() >= LS_SIZE) { + cerr << "Error; sPrime array has so many elements that it can't be converted to a signed array without overflow." << endl; + throw 1; + } + LarssonSadakane ls; + ls.suffixsort( + (TIndexOff*)_isaPrime.ptr(), + (TIndexOff*)sPrime.ptr(), + (TIndexOff)sPrimeSz, + (TIndexOff)sPrime.size(), + 0); + } + // chop off final character of _isaPrime + _isaPrime.resizeExact(sPrimeSz); + for(size_t i = 0; i < _isaPrime.size(); i++) { + _isaPrime[i]--; + } +#ifndef NDEBUG + for(size_t i = 0; i < sPrimeSz-1; i++) { + assert_lt(_isaPrime[i], sPrimeSz); + assert(i == 0 || _isaPrime[i] != _isaPrime[i-1]); + } +#endif + VMSG_NL(" Sanity-checking and returning"); + if(this->sanityCheck()) doBuiltSanityCheck(); +} + +/** + * Return true iff index i within the text is covered by the difference + * cover sample. Allow i to be off the end of the text; simplifies + * logic elsewhere. + */ +template +bool DifferenceCoverSample::isCovered(TIndexOffU i) const { + assert(built()); + uint32_t modi = this->modv(i); + assert_lt(modi, _dInv.size()); + return _dInv[modi] != 0xffffffff; +} + +/** + * Given a text offset that's covered, return its lexicographical rank + * among the sample suffixes. + */ +template +TIndexOffU DifferenceCoverSample::rank(TIndexOffU i) const { + assert(built()); + assert_lt(i, this->text().length()); + uint32_t imodv = this->modv(i); + assert_neq(0xffffffff, _dInv[imodv]); // must be in the sample + TIndexOffU ioff = this->divv(i); + assert_lt(ioff, _doffs[_dInv[imodv]+1] - _doffs[_dInv[imodv]]); + TIndexOffU isaIIdx = _doffs[_dInv[imodv]] + ioff; + assert_lt(isaIIdx, _isaPrime.size()); + TIndexOffU isaPrimeI = _isaPrime[isaIIdx]; + assert_leq(isaPrimeI, _isaPrime.size()); + return isaPrimeI; +} + +/** + * Return: < 0 if suffix i is lexicographically less than suffix j; > 0 + * if suffix j is lexicographically greater. + */ +template +int64_t DifferenceCoverSample::breakTie(TIndexOffU i, TIndexOffU j) const { + assert(built()); + assert_neq(i, j); + assert_lt(i, this->text().length()); + assert_lt(j, this->text().length()); + uint32_t imodv = this->modv(i); + uint32_t jmodv = this->modv(j); + assert_neq(0xffffffff, _dInv[imodv]); // must be in the sample + assert_neq(0xffffffff, _dInv[jmodv]); // must be in the sample + uint32_t dimodv = _dInv[imodv]; + uint32_t djmodv = _dInv[jmodv]; + TIndexOffU ioff = this->divv(i); + TIndexOffU joff = this->divv(j); + assert_lt(dimodv+1, _doffs.size()); + assert_lt(djmodv+1, _doffs.size()); + // assert_lt: expected (32024) < (0) + assert_lt(ioff, _doffs[dimodv+1] - _doffs[dimodv]); + assert_lt(joff, _doffs[djmodv+1] - _doffs[djmodv]); + TIndexOffU isaIIdx = _doffs[dimodv] + ioff; + TIndexOffU isaJIdx = _doffs[djmodv] + joff; + assert_lt(isaIIdx, _isaPrime.size()); + assert_lt(isaJIdx, _isaPrime.size()); + assert_neq(isaIIdx, isaJIdx); // ranks must be unique + TIndexOffU isaPrimeI = _isaPrime[isaIIdx]; + TIndexOffU isaPrimeJ = _isaPrime[isaJIdx]; + assert_neq(isaPrimeI, isaPrimeJ); // ranks must be unique + assert_leq(isaPrimeI, _isaPrime.size()); + assert_leq(isaPrimeJ, _isaPrime.size()); + return (int64_t)isaPrimeI - (int64_t)isaPrimeJ; +} + +/** + * Given i, j, return the number of additional characters that need to + * be compared before the difference cover can break the tie. + */ +template +uint32_t DifferenceCoverSample::tieBreakOff(TIndexOffU i, TIndexOffU j) const { + const TStr& t = this->text(); + const EList& dmap = this->dmap(); + assert(built()); + // It's actually convenient to allow this, but we're permitted to + // return nonsense in that case + if(t[i] != t[j]) return 0xffffffff; + //assert_eq(t[i], t[j]); // if they're unequal, there's no tie to break + uint32_t v = this->v(); + assert_neq(i, j); + assert_lt(i, t.length()); + assert_lt(j, t.length()); + uint32_t imod = this->modv(i); + uint32_t jmod = this->modv(j); + uint32_t diffLeft = (jmod >= imod)? (jmod - imod) : (jmod + v - imod); + uint32_t diffRight = (imod >= jmod)? (imod - jmod) : (imod + v - jmod); + assert_lt(diffLeft, dmap.size()); + assert_lt(diffRight, dmap.size()); + uint32_t destLeft = dmap[diffLeft]; // offset where i needs to be + uint32_t destRight = dmap[diffRight]; // offset where i needs to be + assert(isCovered(destLeft)); + assert(isCovered(destLeft+diffLeft)); + assert(isCovered(destRight)); + assert(isCovered(destRight+diffRight)); + assert_lt(destLeft, v); + assert_lt(destRight, v); + uint32_t deltaLeft = (destLeft >= imod)? (destLeft - imod) : (destLeft + v - imod); + if(deltaLeft == v) deltaLeft = 0; + uint32_t deltaRight = (destRight >= jmod)? (destRight - jmod) : (destRight + v - jmod); + if(deltaRight == v) deltaRight = 0; + assert_lt(deltaLeft, v); + assert_lt(deltaRight, v); + assert(isCovered(i+deltaLeft)); + assert(isCovered(j+deltaLeft)); + assert(isCovered(i+deltaRight)); + assert(isCovered(j+deltaRight)); + return min(deltaLeft, deltaRight); +} + +#endif /*DIFF_SAMPLE_H_*/ diff --git a/docs/404.html b/docs/404.html new file mode 100644 index 0000000..18ceadc --- /dev/null +++ b/docs/404.html @@ -0,0 +1,9 @@ +--- +layout: page +title: 404 Not Found +permalink: 404.html +hide: true +share: false +--- + +Sorry, the requested page wasn't found on the server. diff --git a/docs/Gemfile b/docs/Gemfile new file mode 100644 index 0000000..f42cd81 --- /dev/null +++ b/docs/Gemfile @@ -0,0 +1,4 @@ +source 'https://rubygems.org' +gem 'github-pages' +gem 'jekyll-feed' +gem 'jemoji' diff --git a/docs/LICENSE b/docs/LICENSE new file mode 100644 index 0000000..08e721b --- /dev/null +++ b/docs/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2014 Rohan Chandra + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..19db668 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,59 @@ +# jekyll-ttskch-theme + +A simple and customizable theme for Jekyll. + +> This theme was renamed from _jekyll-**qck**-theme_ to _jekyll-**tch**-theme_ at 2016.06.02. +> And renamed again from _jekyll-**tch**-theme_ to _jekyll-**ttskch**-theme_ at 2016.09.23. + +## Screen shot + +![image](https://cloud.githubusercontent.com/assets/4360663/18776176/62611b38-81a2-11e6-875b-86a66aa8f15c.png) + +## Features + +* A lot of Markdown features (also GitHub Flavored Markdown) +* `:emoji:` ready :+1: +* Easy color-scheme customization +* Tags list page +* Monthly Archives page +* Search feature without any Jekyll plugins +* `` tag feature +* Anchor links for each headings +* Sticky side nav +* Responsive +* OGP ready +* Share buttons ready + +## Getting started + +1. [Fork me](https://github.com/ttskch/jekyll-ttskch-theme/fork) +2. Rename the repository from `jekyll-ttskch-theme` to `{username}.github.io` ([learn more](https://pages.github.com/)) +3. Modify `_config.yml` +4. Modify `_sass/base/_variables.scss` if you need to change colors or font sizes +5. Add new posts into `_posts/` :smiley: + +## Demo + +You can see live demo at below: + +* https://ttskch.github.io/jekyll-ttskch-theme/ + +## Thanks for using :wink: + +* http://ttskch.github.io +* http://sitaramshelke.github.io +* http://jffourmond.github.io +* http://vbflash8.github.io +* http://luqitao.github.io +* http://harusametime.github.io +* http://gitzxon.github.io +* http://hutsonlu.github.io +* http://k0-1.github.io +* http://anthonygore.github.io +* http://getjsdojo.github.io +* http://georgezhuo.github.io +* http://neontapir.github.io +* https://sasukeh.github.io +* https://blog.guilhermegarnier.com + +Please PR if you want to add your blog. diff --git a/docs/_config.yml b/docs/_config.yml new file mode 100644 index 0000000..6b96528 --- /dev/null +++ b/docs/_config.yml @@ -0,0 +1,130 @@ +# +# Basic settings. +# +url: http://DaehwanKimLab.github.io +baseurl: /hisat2 +title: HISAT2 +description: graph-based alignment of next generation sequencing reads to a population of genomes +avatar: /assets/img/ogp.png +# favicon: /favicon.ico +favicon: /assets/img/ogp.png +# language: ja +language: en + +# +# Icons +# +icons: + rss: true + email: + github: DaehwanKimLab + bitbucket: + twitter: + facebook: + google_plus: + tumblr: + behance: + dribbble: + flickr: + instagram: + linkedin: # full URL + pinterest: + reddit: + soundcloud: + stack_exchange: # full URL + steam: + wordpress: + youtube: + +# +# default for front matter +# +defaults: + - + scope: + path: "" + values: + category: "main" + + + +# +# Prettify url. +# +permalink: pretty + +# +# Scripts. +# +google_analytics: # e.g. UA-000000-01 +disqus: + +# +# Localizations. +# +str_next: Next +str_prev: Prev +str_read_more: Read more... +str_search: Search +str_recent_posts: Recent posts +str_show_all_posts: Show all posts + +# +# Recent posts. +# +recent_posts_num: 10 + +# +# Pagination. +# +paginate: 10 +paginate_path: page/:num + +# +# Social. +# +share_buttons: + twitter: true + facebook: false # needs ogp.fb.app_id + hatena: false +ogp: + image_url: //ttskch.github.io/jekyll-ttskch-theme/assets/img/ogp.png + fb: + admin: # facebook admin id + app_id: # facebook application id + +# +# Plugins. +# +gems: + - jekyll-paginate + - jekyll-feed + - jemoji + +# +# Styles: see "_sass/base/_variables.scss" +# + +# +# !! Danger zone !! +# + +include: ["_pages"] + +markdown: kramdown +kramdown: + input: GFM + syntax_highlighter: rouge + +excerpt_separator: + +sass: + sass_dir: _sass + style: :compressed # or :expanded + +exclude: + - Gemfile + - Gemfile.lock + - LICENSE + - README.md + - vendor diff --git a/docs/_data/collaborate.yml b/docs/_data/collaborate.yml new file mode 100644 index 0000000..2aef647 --- /dev/null +++ b/docs/_data/collaborate.yml @@ -0,0 +1,6 @@ +- name: Lyda Hill Department of Bioinformatics, The University of Texas Southwestern Medical Center + url: https://www.utsouthwestern.edu/departments/bioinformatics + logo: /assets/img/bioinformatics_utsw_logo.png +- name: Center for Computational Biologoy, Johns Hopkins University + url: http://ccb.jhu.edu + logo: /assets/img/ccb_jhu_logo_tmp.png \ No newline at end of file diff --git a/docs/_data/contributor.yml b/docs/_data/contributor.yml new file mode 100644 index 0000000..ef84e12 --- /dev/null +++ b/docs/_data/contributor.yml @@ -0,0 +1,10 @@ +- name: Chanhee Park + url: /chanhee.park/ +- name: Ben Langmead + url: http://www.langmead-lab.org/ +- name: Yun (Leo) Zhang + url: /leo.zhang/ +- name: Steven Salzberg + url: https://salzberg-lab.org/in-the-news/about-me/ +- name: Daehwan Kim + url: https://kim-lab.org/daehwan-kim-principal-investigator/ diff --git a/docs/_data/download-binary.yml b/docs/_data/download-binary.yml new file mode 100644 index 0000000..eaf5caf --- /dev/null +++ b/docs/_data/download-binary.yml @@ -0,0 +1,66 @@ +latest_version: 2.2.1,2.2.0,2.1.0 +release: + - version: 2.2.1 + date: 7/24/2020 + name: HISAT2 + artifacts: + Source: https://cloud.biohpc.swmed.edu/index.php/s/fE9QCsX3NH4QwBi/download + OSX_x86_64: https://cloud.biohpc.swmed.edu/index.php/s/zMgEtnF6LjnjFrr/download + Linux_x86_64: https://cloud.biohpc.swmed.edu/index.php/s/oTtGWbWjaxsQ2Ho/download + - version: 2.2.0 + date: 2/6/2020 + name: HISAT2 + artifacts: + Source: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-220-source/download + OSX_x86_64: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-220-OSX_x86_64/download + Linux_x86_64: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-220-Linux_x86_64/download + - version: 2.1.0 + date: 6/8/2017 + name: HISAT2 + artifacts: + Source: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-210-source/download + OSX_x86_64: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-210-OSX_x86_64/download + Linux_x86_64: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-210-Linux_x86_64/download + Windows: http://www.di.fc.ul.pt/~afalcao/hisat2_windows.html + - version: 2.0.5 + date: 11/4/2016 + name: HISAT2 + artifacts: + Source: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-205-source/download + OSX_x86_64: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-205-OSX_x86_64/download + Linux_x86_64: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-205-Linux_x86_64/download + - version: 2.0.4 + date: 5/18/2016 + name: HISAT2 + artifacts: + Source: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-204-source/download + OSX_x86_64: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-204-OSX_x86_64/download + Linux_x86_64: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-204-Linux_x86_64/download + - version: 2.0.3-beta + date: 3/28/2016 + name: HISAT2 + artifacts: + Source: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-203-beta-source/download + OSX_x86_64: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-203-beta-OSX_x86_64/download + Linux_x86_64: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-203-beta-Linux_x86_64/download + - version: 2.0.2-beta + date: 3/17/2016 + name: HISAT2 + artifacts: + Source: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-202-beta-source/download + OSX_x86_64: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-202-beta-OSX_x86_64/download + Linux_x86_64: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-202-beta-Linux_x86_64/download + - version: 2.0.1-beta + date: 11/19/2015 + name: HISAT2 + artifacts: + Source: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-201-beta-source/download + OSX_x86_64: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-201-beta-OSX_x86_64/download + Linux_x86_64: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-201-beta-Linux_x86_64/download + - version: 2.0.0-beta + date: 9/8/2015 + name: HISAT2 + artifacts: + Source: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-200-beta-source/download + OSX_x86_64: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-200-beta-OSX_x86_64/download + Linux_x86_64: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-200-beta-Linux_x86_64/download diff --git a/docs/_data/download-index.yml b/docs/_data/download-index.yml new file mode 100644 index 0000000..ad1f1dd --- /dev/null +++ b/docs/_data/download-index.yml @@ -0,0 +1,81 @@ +- organism: H. sapiens + data: + GRCh38: + genome: + url: https://genome-idx.s3.amazonaws.com/hisat/grch38_genome.tar.gz + genome_snp: + url: https://genome-idx.s3.amazonaws.com/hisat/grch38_snp.tar.gz + genome_tran: + url: https://genome-idx.s3.amazonaws.com/hisat/grch38_tran.tar.gz + genome_snp_tran: + url: https://genome-idx.s3.amazonaws.com/hisat/grch38_snptran.tar.gz + genome_rep(above 2.2.0): + url: https://genome-idx.s3.amazonaws.com/hisat/grch38_rep.tar.gz + genome_snp_rep(above 2.2.0): + url: https://genome-idx.s3.amazonaws.com/hisat/grch38_snprep.tar.gz + UCSC hg38: + genome: + url: https://genome-idx.s3.amazonaws.com/hisat/hg38_genome.tar.gz + genome_tran: + url: https://genome-idx.s3.amazonaws.com/hisat/hg38_tran.tar.gz + GRCh37: + genome: + url: https://genome-idx.s3.amazonaws.com/hisat/grch37_genome.tar.gz + genome_snp: + url: https://genome-idx.s3.amazonaws.com/hisat/grch37_snp.tar.gz + genome_tran: + url: https://genome-idx.s3.amazonaws.com/hisat/grch37_tran.tar.gz + genome_snp_tran: + url: https://genome-idx.s3.amazonaws.com/hisat/grch37_snptran.tar.gz + UCSC hg19: + genome: + url: https://genome-idx.s3.amazonaws.com/hisat/hg19_genome.tar.gz +- organism: M. musculus + data: + GRCm38: + genome: + url: https://cloud.biohpc.swmed.edu/index.php/s/grcm38/download + genome_snp: + url: https://cloud.biohpc.swmed.edu/index.php/s/grcm38_snp/download + genome_tran: + url: https://cloud.biohpc.swmed.edu/index.php/s/grcm38_tran/download + genome_snp_tran: + url: https://cloud.biohpc.swmed.edu/index.php/s/grcm38_snp_tran/download + UCSC mm10: + genome: + url: https://genome-idx.s3.amazonaws.com/hisat/mm10_genome.tar.gz +- organism: R. norvegicus + data: + UCSC rn6: + genome: + url: https://genome-idx.s3.amazonaws.com/hisat/rn6_genome.tar.gz +- organism: D. melanogaster + data: + BDGP6: + genome: + url: https://genome-idx.s3.amazonaws.com/hisat/bdgp6.tar.gz + genome_tran: + url: https://genome-idx.s3.amazonaws.com/hisat/bdgp6_tran.tar.gz + UCSC dm6: + genome: + url: https://genome-idx.s3.amazonaws.com/hisat/dm6.tar.gz +- organism: C. elegans + data: + WBcel235: + genome: + url: https://genome-idx.s3.amazonaws.com/hisat/wbcel235.tar.gz + genome_tran: + url: https://genome-idx.s3.amazonaws.com/hisat/wbcel235_tran.tar.gz + UCSC ce10: + genome: + url: https://cloud.biohpc.swmed.edu/index.php/s/bbynxoY2TPpRNQb/download +- organism: S. cerevisiae + data: + R64-1-1: + genome: + url: https://cloud.biohpc.swmed.edu/index.php/s/JRSoKHD5cHfpCFE/download + genome_tran: + url: https://cloud.biohpc.swmed.edu/index.php/s/akeiMrGGtt5KoJY/download + UCSC sacCer3: + genome: + url: https://cloud.biohpc.swmed.edu/index.php/s/Gsq4goLW4TDAz4E/download diff --git a/docs/_includes/article-footer.html b/docs/_includes/article-footer.html new file mode 100644 index 0000000..cc9607b --- /dev/null +++ b/docs/_includes/article-footer.html @@ -0,0 +1,5 @@ +
+ {% if site.share_buttons and include.share != false %} + {% include share-buttons.html page=include.page %} + {% endif %} +
diff --git a/docs/_includes/article-header.html b/docs/_includes/article-header.html new file mode 100644 index 0000000..d876fb0 --- /dev/null +++ b/docs/_includes/article-header.html @@ -0,0 +1,64 @@ +{% assign page = include.page %} + +
+
+

+ {% if include.link %} + {{ page.title }} + {% else %} + {{ page.title }} + {% endif %} +

+ +
    + {% assign tags_num = (page.tags | size) %} + {% if tags_num > 0 %} +
  • + {% endif %} + {% for tag in page.tags %} +
  • + #{{ tag }} +
  • + {% endfor %} +
+ +
+
    + {% if page.date %} +
  • + + {{ page.date | date: "%Y-%m-%d" }} +
  • + {% endif %} + + {% if page.author %} +
  • + + + {{ page.author }} + +
  • + {% if page.icons %} +
  • +
      + {% include icons.html icons=page.icons %} +
    +
  • + {% endif %} + {% endif %} +
+
+
+ + {% if site.share_buttons and include.share != false %} +
+ {% include share-buttons.html page=page %} +
+ {% endif %} + + {% if include.eye_catch != false and page.eye_catch %} +

+ +

+ {% endif %} +
diff --git a/docs/_includes/disqus.html b/docs/_includes/disqus.html new file mode 100644 index 0000000..097f452 --- /dev/null +++ b/docs/_includes/disqus.html @@ -0,0 +1,10 @@ +
+ + diff --git a/docs/_includes/fb-root.html b/docs/_includes/fb-root.html new file mode 100644 index 0000000..6375220 --- /dev/null +++ b/docs/_includes/fb-root.html @@ -0,0 +1,11 @@ + +{% if site.share_buttons.facebook %} +
+ +{% endif %} diff --git a/docs/_includes/google-analytics.html b/docs/_includes/google-analytics.html new file mode 100644 index 0000000..fa7f0e2 --- /dev/null +++ b/docs/_includes/google-analytics.html @@ -0,0 +1,12 @@ + +{% if site.google_analytics %} + +{% endif %} diff --git a/docs/_includes/icons.html b/docs/_includes/icons.html new file mode 100644 index 0000000..dbec897 --- /dev/null +++ b/docs/_includes/icons.html @@ -0,0 +1,161 @@ +{% assign icons = include.icons %} + +{% if icons.rss %} +
  • + + + +
  • +{% endif %} + +{% if icons.email %} +
  • + + + +
  • +{% endif %} + +{% if icons.github %} +
  • + + + +
  • +{% endif %} + +{% if icons.bitbucket %} +
  • + + + +
  • +{% endif %} + +{% if icons.twitter %} +
  • + + + +
  • +{% endif %} + +{% if icons.facebook %} +
  • + + + +
  • +{% endif %} + +{% if icons.google_plus %} +
  • + + + +
  • +{% endif %} + +{% if icons.tumblr %} +
  • + + + +
  • +{% endif %} + +{% if icons.behance %} +
  • + + + +
  • +{% endif %} + +{% if icons.dribbble %} +
  • + + + +
  • +{% endif %} + +{% if icons.flickr %} +
  • + + + +
  • +{% endif %} + +{% if icons.instagram %} +
  • + + + +
  • +{% endif %} + +{% if icons.linkedin %} +
  • + + + +
  • +{% endif %} + +{% if icons.pinterest %} +
  • + + + +
  • +{% endif %} + +{% if icons.reddit %} +
  • + + + +
  • +{% endif %} + +{% if icons.soundcloud %} +
  • + + + +
  • +{% endif %} + +{% if icons.stack_exchange %} +
  • + + + +
  • +{% endif %} + +{% if icons.steam %} +
  • + + + +
  • +{% endif %} + +{% if icons.wordpress %} +
  • + + + +
  • +{% endif %} + +{% if icons.youtube %} +
  • + + + +
  • +{% endif %} diff --git a/docs/_includes/page-url-resolver.html b/docs/_includes/page-url-resolver.html new file mode 100644 index 0000000..f89f26d --- /dev/null +++ b/docs/_includes/page-url-resolver.html @@ -0,0 +1,7 @@ +{% assign page = include.page %} + +{% if page.canonical %} +{% assign url = page.canonical | prepend: site.baseurl | prepend: site.url %} +{% else %} +{% assign url = page.url | replace: 'index.html', '' | prepend: site.baseurl | prepend: site.url %} +{% endif %} diff --git a/docs/_includes/paginator.html b/docs/_includes/paginator.html new file mode 100644 index 0000000..7e736f2 --- /dev/null +++ b/docs/_includes/paginator.html @@ -0,0 +1,29 @@ +{% if paginator.total_pages > 1 %} + +{% endif %} diff --git a/docs/_includes/share-buttons.html b/docs/_includes/share-buttons.html new file mode 100644 index 0000000..0bdd9de --- /dev/null +++ b/docs/_includes/share-buttons.html @@ -0,0 +1,22 @@ +{% include page-url-resolver.html page=include.page %} +{% assign title = include.page.title | append: ' | ' | append: site.title %} +
    +
    + {% if site.share_buttons.twitter %} +
    + + +
    + {% endif %} + {% if site.share_buttons.facebook %} +
    +
    +
    + {% endif %} + {% if site.share_buttons.hatena %} +
    + ã“ã®ã‚¨ãƒ³ãƒˆãƒªãƒ¼ã‚’ã¯ã¦ãªãƒ–ックマークã«è¿½åŠ  +
    + {% endif %} +
    +
    diff --git a/docs/_layouts/default.html b/docs/_layouts/default.html new file mode 100644 index 0000000..dbf5ab5 --- /dev/null +++ b/docs/_layouts/default.html @@ -0,0 +1,194 @@ + + + + {% capture title %}{% if page.title %}{{ page.title }} | {% endif %}{{ site.title }}{% endcapture %} + + {% include page-url-resolver.html page=page %} + + {% if page.excerpt %} + {% assign description = page.excerpt | strip_html | strip_newlines | truncate: 160 %} + {% else %} + {% assign description = site.description %} + {% endif %} + + + + + + {{ title }} + + + + + + + + {% if page.eye_catch %} + {% assign ogp_image_url = page.eye_catch %} + {% else %} + {% assign ogp_image_url = site.ogp.image_url %} + {% endif %} + + + + + + + + + + + + + + + + + + + + +
    +
    + {{ content }} +
    + + +
    + + + + + + + +{% if page.id %} + +{% endif %} + +{% if page.permalink == '/search/' %} + +{% endif %} + +{% include fb-root.html %} +{% include google-analytics.html %} + + + diff --git a/docs/_layouts/page.html b/docs/_layouts/page.html new file mode 100644 index 0000000..e718aa0 --- /dev/null +++ b/docs/_layouts/page.html @@ -0,0 +1,13 @@ +--- +layout: default +--- + +
    +
    + {% include article-header.html page=page link=false share=page.share %} +
    + {{ content }} +
    + {% include article-footer.html page=page share=page.share %} +
    +
    diff --git a/docs/_layouts/post.html b/docs/_layouts/post.html new file mode 100644 index 0000000..bccaf31 --- /dev/null +++ b/docs/_layouts/post.html @@ -0,0 +1,19 @@ +--- +layout: default +--- + +
    +
    + {% include article-header.html page=page link=false share=page.share %} +
    + {{ content }} +
    + {% include article-footer.html page=page share=page.share %} +
    +
    + +{% if site.disqus %} +
    + {% include disqus.html %} +
    +{% endif %} diff --git a/docs/_pages/about.md b/docs/_pages/about.md new file mode 100644 index 0000000..0ab9f26 --- /dev/null +++ b/docs/_pages/about.md @@ -0,0 +1,9 @@ +--- +layout: page +title: About +permalink: /about/ +order: 2 +share: false +--- + +**HISAT2** is a fast and sensitive alignment program for mapping next-generation sequencing reads (both DNA and RNA) to a population of human genomes as well as to a single reference genome. Based on an extension of BWT for graphs ([Sirén et al. 2014](http://dl.acm.org/citation.cfm?id=2674828)), we designed and implemented a graph FM index (GFM), an original approach and its first implementation. In addition to using one global GFM index that represents a population of human genomes, HISAT2 uses a large set of small GFM indexes that collectively cover the whole genome. These small indexes (called local indexes), combined with several alignment strategies, enable rapid and accurate alignment of sequencing reads. This new indexing scheme is called a Hierarchical Graph FM index (HGFM). diff --git a/docs/_pages/archives-all.html b/docs/_pages/archives-all.html new file mode 100644 index 0000000..f3f0e5e --- /dev/null +++ b/docs/_pages/archives-all.html @@ -0,0 +1,20 @@ +--- +layout: page +title: All Posts +permalink: /archives/all/ +hide: true +share: false +--- + +
    +
    + + {% for post in site.posts %} +
    +
    + {% include article-header.html page=post link=true share=false eye_catch=false %} +
    +
    +
    + {% endfor %} +
    diff --git a/docs/_pages/archives.html b/docs/_pages/archives.html new file mode 100644 index 0000000..5ebb486 --- /dev/null +++ b/docs/_pages/archives.html @@ -0,0 +1,35 @@ +--- +layout: page +title: Archives +permalink: /archives/ +order: 3 +share: false +hide: true +--- + +{% for post in site.posts %} + {% unless post.next %} +

    {{ post.date | date: '%Y' }}

    +
      + {% else %} + {% assign year = post.date | date: '%Y' %} + {% assign next_year = post.next.date | date: '%Y' %} + {% if year != next_year %} +
    +

    {{ post.date | date: '%Y' }}

    +
      + {% endif %} + {% endunless %} + + {% assign month = post.date | date: '%m' %} + {% assign next_month = post.next.date | date: '%m' %} + {% if year != next_year or month != next_month %} +
    • {{ post.date | date: '%Y/%m' }}
    • + {% endif %} +{% endfor %} + +{% if site.posts %} +
    +{% endif %} + +{{ site.str_show_all_posts }} diff --git a/docs/_pages/contributors/chanheepark.md b/docs/_pages/contributors/chanheepark.md new file mode 100644 index 0000000..6201910 --- /dev/null +++ b/docs/_pages/contributors/chanheepark.md @@ -0,0 +1,12 @@ +--- +layout: page +title: Chanhee Park +permalink: /chanhee.park/ +order: 1 +share: false +category: contributor +--- + +Chanhee Park is a Scientific Software Engineer in the Kim Lab at UTSW responsible for maintaining and improving HISAT2. + +[Linkedin](https://www.linkedin.com/in/chanhee-park-97677297/) diff --git a/docs/_pages/contributors/yunleozhang.md b/docs/_pages/contributors/yunleozhang.md new file mode 100644 index 0000000..15d57d0 --- /dev/null +++ b/docs/_pages/contributors/yunleozhang.md @@ -0,0 +1,12 @@ +--- +layout: page +title: Yun (Leo) Zhang +permalink: /leo.zhang/ +order: 1 +share: false +category: contributor +--- + +Yun (Leo) is a biomedical engineering graduate student at UT Southwestern Medical Center. His main research includes developing advance alignment tools. + +[Linkedin](https://www.linkedin.com/in/zhang-yun-a9565891/) diff --git a/docs/_pages/download.md b/docs/_pages/download.md new file mode 100644 index 0000000..672e3bf --- /dev/null +++ b/docs/_pages/download.md @@ -0,0 +1,61 @@ +--- +layout: page +title: Download +permalink: /download/ +order: 5 +share: false +--- + +Please cite: +>Kim, D., Paggi, J.M., Park, C. _et al._ Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype. _Nat Biotechnol_ **37**, 907–915 (2019). + +- TOC +{:toc} + +## Index +HISAT2 indexes are hosted on AWS (Amazon Web Services), thanks to the AWS Public Datasets program. Click this [link](https://registry.opendata.aws/jhu-indexes/) for more details. + +{% for item in site.data.download-index %} +### {{ item.organism }} + {% for data in item.data %} +
  • {{ data[0] }}
  • + +{% for genome in data[1] %} + + + +{% endfor %} +
    {{ genome[0] }} + {% for url in genome[1] %} + {{ url[1] }}
    + {% endfor %} +
    +{% endfor %} +{% endfor %} + + + genome: HISAT2 index for reference + genome_snp: HISAT2 Graph index for reference plus SNPs + genome_tran: HISAT2 Graph index for reference plus transcripts + genome_snp_tran: HISAT2 Graph index for reference plus SNPs and transcripts + + +## Binaries +{: binaries } + +{% assign targets = site.data.download-binary.latest_version | split: "," %} +{% for release in site.data.download-binary.release %} +{% assign version = release['version'] %} +{% if targets contains version or targets == null %} +{% assign name = release['name'] %} +### Version: {{name}} {{version}} + + +{% for artifact in release['artifacts'] %} +{% assign type = artifact[0] %} + +{% endfor %} +
    Release Date: {{release['date']}}
    {{type}}{{artifact[1]}}
    +{% endif %} +{% endfor %} + diff --git a/docs/_pages/hisat-3n.md b/docs/_pages/hisat-3n.md new file mode 100644 index 0000000..c0ebd65 --- /dev/null +++ b/docs/_pages/hisat-3n.md @@ -0,0 +1,225 @@ +--- +layout: page +title: HISAT-3N +permalink: /hisat-3n/ +order: 4 +share: false +--- + +HISAT-3N +============ + +Overview +----------------- +**HISAT-3N** (hierarchical indexing for spliced alignment of transcripts - 3 nucleotides) +is designed for nucleotide conversion sequencing technologies and implemented based on HISAT2. +There are two strategies for HISAT-3N to align nuleotide conversion sequencing reads: *standard mode* and *repeat mode*. +The standard mode align reads with standard-3N index only, so it is fast and require small memory (~9GB for human genome alignment). +The repeat mode align reads with both standard-3N index and repeat-3N index, then output 1,000 alignment result (the output number can be adjust by `--repeat-limit`). +The repeat mode can align nucleotide conversion reads more accurately, +and it is only 10% slower than the standard mode with tiny more memory (repeat mode use about ~10.5GB) usage than standard mode. + +HISAT-3N is developed based on [HISAT2], which is particularly optimized for RNA sequencing technology. +HISAT-3N can be used for any base-converted sequencing reads include [BS-seq], [SLAM-seq], [TAB-seq], [oxBS-seq], [TAPS], [scBS-seq], and [scSLAM-seq],. + +[HISAT2]:https://github.com/DaehwanKimLab/hisat2 +[BS-seq]: https://en.wikipedia.org/wiki/Bisulfite_sequencing +[SLAM-seq]: https://www.nature.com/articles/nmeth.4435 +[scBS-seq]: https://www.nature.com/articles/nmeth.3035 +[scSLAM-seq]: https://www.nature.com/articles/s41586-019-1369-y +[TAPS]: https://www.nature.com/articles/s41587-019-0041-2 +[TAB-seq]: https://doi.org/10.1016/j.cell.2012.04.027 +[oxBS-seq]: https://science.sciencemag.org/content/336/6083/934 + + +Getting started +============ +HISAT-3N requires a 64-bit computer running either Linux or Mac OS X and at least 16 GB of RAM. + +A few notes: + +1. The repeat 3N index building process requires 256 GB of RAM. +2. The standard 3N index building requires no more than 16 GB of RAM. +3. The alignment process with either standard or repeat index requires no more than 16 GB of RAM. +4. [SAMtools] is required to sort SAM file for hisat-3n-table. + +Install +------------ + + git clone https://github.com/DaehwanKimLab/hisat2.git + cd hisat2 + git checkout -b hisat-3n origin/hisat-3n + make + + +Make sure that you are in the `hisat-3n` branch + + +Build a 3N index with `hisat-3n-build` +----------- +`hisat-3n-build` builds a 3N-index, which contains two hisat2 indexes, from a set of DNA sequences. For standard 3N-index, +each index contains 16 files with suffix `.3n.*.*.ht2`. +For repeat 3N-index, there are 16 more files in addition to the standard 3N-index, and they have the suffix +`.3n.*.rep.*.ht2`. +These files constitute the hisat-3n index and no other file is needed to alignment reads to the reference. + +* Example for standard HISAT-3N index building: +`hisat-3n-build genome.fa genome` + +* Example for repeat HISAT-3N index building (require 256 GB memory): +`hisat-3n-build --repeat-index genome.fa genome` + +It is optional to make the graph index and add SNP or spicing site information to the index, to increase the alignment accuracy. +for more detail, please check the [HISAT2 manual]. + +[HISAT2 manual]:https://daehwankimlab.github.io/hisat2/manual/ + + # Standard HISAT-3N integrated index with SNP information + hisat-3n-build --exons genome.exon genome.fa genome + + # Standard HISAT-3N integrated index with splicing site information + hisat-3n-build --ss genome.ss genome.fa genome + + # Repeat HISAT-3N integrated index with SNP information + hisat-3n-build --repeat-index --exons genome.exon genome.fa genome + + # Repeat HISAT-3N integrated index with splicing site information + hisat-3n-build --repeat-index --ss genome.ss genome.fa genome + +Alignment with `hisat-3n` +------------ +After we build the HISAT-3N index, you are ready to use `hisat-3n` for alignment. +HISAT-3N uses the HISAT2 argument but has some extra arguments. Please check [HISAT2 manual] for more detail. + +For human genome reference, HISAT-3N requires about 9GB for alignment with standard 3N-index and 10.5 GB for repeat 3N-index. + +* `--base-change ` + Provide which base is converted in the sequencing process to another base. Please enter + 2 letters separated by ',' for this argument. The first letter(chr1) should be the converted base, the second letter(chr2) should be + the converted to base. For example, during slam-seq, some 'T' is converted to 'C', + please enter `--base-change T,C`. During bisulfite-seq, some 'C' is converted to 'T', please enter `--base-change C,T`. + If you want to align non-converted reads to the regular HISAT2 index, do not use this option. + +* `--index/-x ` + The index for HISAT-3N. The basename is the name of the index files up to but not including the suffix `.3n.*.*.ht2` / etc. + For example, you build your index with basename 'genome' by HISAT-3N-build, please enter `--index genome`. + +* `--repeat-limit ` + You can set up the number of alignment will be check for each repeat alignment. You may increase the number to let hisat-3n + output more, if a read has multiple mapping. We suggest the repeat limit number for paired-end reads alignment is no more + than 1,000,000. default: 1000. + +* `--unique-only` + Only output uniquely aligned reads. + +#### Examples: +* Single-end slam-seq reads (T to C conversion) alignment with standard 3N-index: +`hisat-3n --index genome -f -U read.fa -S alignment_result.sam --base-change T,C` + +* Paired-end bisulfite-seq reads (C to T conversion) alignment with repeat 3N-index: +`hisat-3n --index genome -f -1 read_1.fa -2 read_2.fa -S alignment_result.sam --base-change C,T` + +* Single-end TAPS reads (have C to T conversion) alignment with repeat 3N-index and only output unique aligned result: +`hisat-3n --index genome -q -U read.fq -S alignment_result.sam --base-change C,T --unique` + + + +#### Extra SAM tags generated by HISAT-3N: + +* `Yf:i:`: Number of conversions are detected in the read. + +* `YZ:A:`: The value `+` or `–` indicate the read is mapped to REF-3N (`+`) or REF-RC-3N (`-`). + +Generate a 3N-conversion-table with `hisat-3n-table` +------------ +### Preparation + +To generate 3N-conversion-table, users need to sort the SAM file which generated by `hisat-3n`. +[SAMtools] is required for this sorting process. + +Use `samtools sort` to convert the SAM file to a sorted SAM file. + + samtools sort alignment_result.sam -o sorted_alignment_result.sam -O sam + +Generate 3N-conversion-table with `hisat-3n-table`: + +### Usage + hisat-3n-table [options]* --alignments --ref --output-name --base-change + +#### Main arguments +* `--alignments ` + SORTED SAM file. Please enter `-` for standard input. + +* `--ref ` + The reference genome file (FASTA format) for generating HISAT-3N index. + +* `--output-name ` + Filename to write 3N-conversion-table (tsv format) to. + +* `--base-change ` + The base-change rule. User should enter the exact same `--base-change` arguments in hisat-3n. + For example, please enter `--base-change C,T` for bisulfite sequencing reads. + +#### Input options +* `-u/--unique-only` + Only count the unique aligned reads into 3N-conversion-table. + +* `-m/--multiple-only` + Only count the multiple aligned reads into 3N-conversion-table. + +* `-c/--CG-only` + Only count the CpG island in reference genome. This option is designed for bisulfite sequencing reads. + +* `-p/--threads ` + Launch `int` parallel threads (default: 1) for table building. + +* `-h/--help` + Print usage information and quit. + + +#### Examples: +* Generate 3N conversion table for bisulfite sequencing data: +`hisat-3n-table -p 16 --alignments sorted_alignment_result.sam --ref genome.fa --output-name output.tsv --base-change C,T` + +* Generate 3N-conversion-table for TAPS data and only count base in CpG island and uniquely aligned: +`hisat-3n-table -p 16 --alignments sorted_alignment_result.sam --ref genome.fa --output-name output.tsv --base-change C,T --CG-only --unique-only` + +* Generate 3N conversion table for bisulfite sequencing data from sorted BAM file: +`samtools view -h sorted_alignment_result.bam | hisat-3n-table --ref genome.fa --alignments - --output-name output.tsv --base-change C,T` + +* Generate 3N conversion table for bisulfite sequencing data from unsorted BAM file: + `samtools sort alignment_result.bam -O sam | hisat-3n-table --ref genome.fa --alignments - --output-name output.tsv --base-change C,T` + + +#### Note: +There are 7 columns in the 3N-conversion-table: + +1. `ref`: the chromosome name. +2. `pos`: 1-based position in ref. +3. `strand`: '+' for forward strand. '-' for reverse strand. +4. `convertedBaseQualities`: the qualities for converted base in read-level measurement. Length of this string is equal to +the number of converted Base in read-level measurement. +5. `convertedBaseCount`: number of distinct read positions where converted base in read-level measurements were found. +this number should equal to the length of convertedBaseQualities. +6. `unconvertedBaseQualities`: the qualities for unconverted base in read-level measurement. Length of this string is equal to +the number of unconverted Base in read-level measurement. +7. `unconvertedBaseCount`: number of distinct read positions where unconverted base in read-level measurements were found. +this number should equal to the length of unconvertedBaseQualities. + +##### Sample 3N-conversion-table: + ref pos strand convertedBaseQualities convertedBaseCount unconvertedBaseQualities unconvertedBaseCount + 1 11874 + FFFFFB5 locations and >100 locations, respectively. Attempting to report all alignments would likely consume a prohibitive amount of disk space. In order to address this issue, our repeat indexing and alignment approach directly aligns reads to repeat sequences, resulting in one repeat alignment per read. HISAT2 provides application programming interfaces (API) for C++, Python, and JAVA that rapidly retrieve genomic locations from repeat alignments for use in downstream analyses. +Other minor bug fixes are also included as follows: + +* Fixed occasional sign (+ or -) issues of template lengths in SAM file +* Fixed duplicate read alignments in SAM file +* Skip a splice site if exon's last base or first base is ambiguous (N) + + +### Index files are moved to a different location. 8/30/2019 + +Due to a high volume of index downloads, we have moved HISAT2 index files to a different location in order to provide faster download speed. If you use wget or curl to download index files, then you may need to use the following commands to get the correct file name. +* `wget --content-disposition` *download_link* +* `curl -OJ` *download_link* + + +### [The HISAT2 paper](https://www.nature.com/articles/s41587-019-0201-4) is out in *Nature Biotechnology*. 8/2/2019 + + +### HISAT 2.1.0 release 6/8/2017 + +* This major version includes the first release of HISAT-genotype, which currently performs HLA typing, + DNA fingerprinting analysis, and CYP typing on whole genome sequencing (WGS) reads. + We plan to extend the system so that it can analyze not just a few genes, but a whole human genome. + Please refer to [the HISAT-genotype website](https://daehwankimlab.github.io/hisat-genotype) for more details. +* HISAT2 can be directly compiled and executed on Windows system using Visual Studio, thanks to [Nigel Dyer](http://www2.warwick.ac.uk/fac/sci/systemsbiology/staff/dyer/). +* Implemented `--new-summary` option to output a new style of alignment summary, which is easier to parse for programming purposes. +* Implemented `--summary-file` option to output alignment summary to a file in addition to the terminal (e.g. stderr). +* Fixed discrepancy in HISAT2’s alignment summary. +* Implemented `--no-templatelen-adjustment` option to disable automatic template length adjustment for RNA-seq reads. + + +### HISAT2 2.0.5 release 11/4/2016 +Version 2.0.5 is a minor release with the following changes. +* Due to a policy change (HTTP to HTTPS) in using SRA data (`--sra-option`), users are strongly encouraged to use this version. As of 11/9/2016, NCBI will begin a permanent redirect to HTTPS, which means the previous versions of HISAT2 no longer works with `--sra-acc` option soon. +* Implemented `-I` and `-X` options for specifying minimum and maximum fragment lengths. The options are valid only when used with `--no-spliced-alignment`, which is used for the alignment of DNA-seq reads. +* Fixed some cases where reads with SNPs on their 5' ends were not properly aligned. +* Implemented `--no-softclip` option to disable soft-clipping. +* Implemented `--max-seeds` to specify the maximum number of seeds that HISAT2 will try to extend to full-length alignments (see [the manual] for details). + + +### [HISAT, StringTie and Ballgown protocol](http://www.nature.com/nprot/journal/v11/n9/full/nprot.2016.095.html) published at Nature Protocols 8/11/2016 + + +### HISAT2 2.0.4 Windows binary available [here](http://www.di.fc.ul.pt/~afalcao/hisat2_windows.html), thanks to [Andre Osorio Falcao](http://www.di.fc.ul.pt/~afalcao/) 5/24/2016 + + +### HISAT2 2.0.4 release 5/18/2016 +Version 2.0.4 is a minor release with the following changes. +* Improved template length estimation (the 9th column of the SAM format) of RNA-seq reads by taking introns into account. +* Introduced two options, `--remove-chrname` and `--add-chrname`, to remove "chr" from reference names or add "chr" to reference names in the alignment output, respectively (the 3rd column of the SAM format). +* Changed the maximum of mapping quality (the 5th column of the SAM format) from 255 to 60. Note that 255 is an undefined value according to the SAM manual and some programs would not work with this value (255) properly. +* Fixed NH (number of hits) in the alignment output. +* HISAT2 allows indels of any length pertaining to minimum alignment score (previously, the maximum length of indels was 3 bp). +* Fixed several cases that alignment goes beyond reference sequences. +* Fixed reporting duplicate alignments. + + +### HISAT2 2.0.3-beta release 3/28/2016 +Version 2.0.3-beta is a minor release with the following changes. +* Fixed graph index building when using both SNPs and transcripts. As a result, genome_snp_tran indexes here on the HISAT2 website have been rebuilt. +* Included some missing files needed to follow the small test example (see [the manual] for details). + + +### HISAT2 2.0.2-beta release 3/17/2016 +**Note (3/19/2016):** this version is slightly updated to handle reporting splice sites with the correct chromosome names. +Version 2.0.2-beta is a major release with the following changes. +* Memory mappaped IO (`--mm` option) works now. +* Building linear index can be now done using multi-threads. +* Changed the minimum score for alignment in keeping with read lengths, so it's now `--score-min L,0.0,-0.2`, meaning a minimum score of -20 for 100-bp reads and -30 for 150-bp reads. +* Fixed a bug that the same read was written into a file multiple times when `--un-conc` was used. +* Fixed another bug that caused reads to map beyond reference sequences. +* Introduced `--haplotype` option in the hisat2-build (index building), which is used with `--snp` option together to incorporate those SNP combinations present in the human population. This option also prevents graph construction from exploding due to exponential combinations of SNPs in small genomic regions. +* Provided a new python script to extract SNPs and haplotypes from VCF files, hisat2_extract_snps_haplotypes_VCF.py +* Changed several python script names as follows< + * *extract_splice_sites.py* to *hisat2_extract_splice_sites.py* + * *extract_exons.py* to *hisat2_extract_exons.py* + * *extract_snps.py* to *hisat2_extract_snps_haplotypes_UCSC.py* + + +### HISAT2 2.0.1-beta release 11/19/2015 +Version 2.0.1-beta is a maintenance release with the following changes. +* Fixed a bug that caused reads to map beyond reference sequences. +* Fixed a deadlock issue that happened very rarely. +* Fixed a bug that led to illegal memory access when reading SNP information. +* Fixed a system-specific bug related to popcount instruction. + + +### HISAT2 2.0.0-beta release 9/8/2015 - first release +We extended the BWT/FM index to incorporate genomic differences among individuals into the reference genome, while keeping memory requirements low enough to fit the entire index onto a desktop computer. Using this novel Hierarchical Graph FM index (HGFM) approach, we built a new alignment system, HISAT2, with an index that incorporates ~12.3M common SNPs from the dbSNP database. HISAT2 provides greater alignment accuracy for reads containing SNPs. +* HISAT2's index size for the human reference genome and 12.3 million common SNPs is 6.2GB (the memory footprint of HISAT2 is 6.7GB). The SNPs consist of 11 million single nucleotide polymorphisms, 728,000 deletions, and 555,000 insertions. The insertions and deletions used in this index are small (usually <20bp). +* HISAT2 comes with several index types: + * Hierarchical FM index (HFM) for a reference genome (index base: genome) + * Hierarchical Graph FM index (HGFM) for a reference genome plus SNPs (index base: genome_snp) + * Hierarchical Graph FM index (HGFM) for a reference genome plus transcripts (index base: genome_tran) + * Hierarchical Graph FM index (HGFM) for a reference genome plus SNPs and transcripts (index base: genome_snp_tran) +* HISAT2 is a successor to both [HISAT](http://ccb.jhu.edu/software/hisat) and [TopHat2](http://ccb.jhu.edu/software/tophat). We recommend that HISAT and TopHat2 users switch to HISAT2. + * HISAT2 can be considered an enhanced version of HISAT with many improvements and bug fixes. The alignment speed and memory requirements of HISAT2 are virtually the same as those of HISAT when using the HFM index (genome). + * When using graph-based indexes (HGFM), the runtime of HISAT2 is slightly slower than HISAT (30~80% additional CPU time). + * HISAT2 allows for mapping reads directly against transcripts, similar to that of TopHat2 (use genome_tran or genome_snp_tran). +* When reads contain SNPs, the SNP information is provided as an optional field in the SAM output of HISAT2 (e.g., **Zs:Z:1|S|rs3747203,97|S|rs16990981** - see [the manual] for details). This feature enables fast and sensitive genotyping in downstream analyses. Note that there is no alignment penalty for mismatches, insertions, and deletions if they correspond to known SNPs. +* HISAT2 provides options for transcript assemblers (e.g., StringTie and Cufflinks) to work better with the alignment from HISAT2 (see options such as `--dta` and `--dta-cufflinks`). +* Some slides about HISAT2 are found [here]({{ '/assets/data/HISAT2-first_release-Sept_8_2015.pdf' | prepend: site.baseurl }}) and we are preparing detailed documention. +* We plan to incorporate a larger set of SNPs and structural variations (SV) into this index (e.g., long insertions/deletions, inversions, and translocations). + +[the manual]: {{ site.baseurl }}{% link _pages/manual.md %} + +### The HISAT2 source code is available in a [public GitHub repository](https://github.com/DaehwanKimLab/hisat2) (5/30/2015). + + diff --git a/docs/_pages/howto.md b/docs/_pages/howto.md new file mode 100644 index 0000000..3fc968d --- /dev/null +++ b/docs/_pages/howto.md @@ -0,0 +1,78 @@ +--- +layout: page +title: HowTo +permalink: /howto/ +order: 6 +share: false +--- + +## HOWTO +{: .no_toc} + +- TOC +{:toc} + +### Building indexes +Depend on your purpose, you have to download reference sequence, gene annotation and SNP files. +We also provides scripts to build indexes. [Download]({{ site.baseurl }}{% link _pages/download.md %}) + +#### Prepare data +1. Download reference +``` +$ wget ftp://ftp.ensembl.org/pub/release-84/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz +$ gzip -d Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz +$ mv Homo_sapiens.GRCh38.dna.primary_assembly.fa genome.fa +``` + +1. Download GTF and make exon, splicesite file. + If you want to build HFM index, you can skip this step. +``` +$ wget ftp://ftp.ensembl.org/pub/release-84/gtf/homo_sapiens/Homo_sapiens.GRCh38.84.gtf.gz +$ gzip -d Homo_sapiens.GRCh38.84.gtf.gz +$ mv Homo_sapiens.GRCh38.84.gtf genome.gtf +$ hisat2_extract_splice_sites.py genome.gtf > genome.ss +$ hisat2_extract_exons.py genome.gtf > genome.exon +``` + +1. Download SNP + If you want to build HFM index, you can skip this step. +``` +$ wget http://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/snp144Common.txt.gz +$ gzip -d snp144Common.txt.gz +``` + + Convert chromosome names of UCSC Database to Ensembl Annotation +``` +$ awk 'BEGIN{OFS="\t"} {if($2 ~ /^chr/) {$2 = substr($2, 4)}; if($2 == "M") {$2 = "MT"} print}' snp144Common.txt > snp144Common.txt.ensembl +``` + + make SNPs and haplotype file +``` +$ hisat2_extract_snps_haplotypes_UCSC.py genome.fa snp144Common.txt.ensembl genome +``` + +#### Build HFM index +It takes about 20 minutes(depend on HW spec) to build index, and requires at least 6GB memory. +``` +$ hisat2-build -p 16 genome.fa genome +``` + +#### Build HGFM index with SNPs +``` +$ hisat2-build -p 16 --snp genome.snp --haplotype genome.haplotype genome.fa genome_snp +``` + +#### Build HGFM index with transcripts +It takes about 1 hour(depend on HW spec) to build index, and requires at least 160GB memory. +``` +$ hisat2-build -p 16 --exon genome.exon --ss genome.ss genome.fa genome_tran +``` + +#### Build HGFM index with SNPs and transcripts + +``` +$ hisat2-build -p 16 --snp genome.snp --haplotype genome.haplotype --exon genome.exon --ss genome.ss genome.fa genome_snp_tran +``` + + + diff --git a/docs/_pages/links.md b/docs/_pages/links.md new file mode 100644 index 0000000..94a5e8c --- /dev/null +++ b/docs/_pages/links.md @@ -0,0 +1,17 @@ +--- +layout: page +title: Links +permalink: /links/ +order: 7 +share: false +--- + +* KimLab - + * github - +* hisat-genotype - + * github for hisat-genotype - + +* Lyda Hill Department of Bioinformatics at UT Southwestern Medical Center - + +* Center for Computational Biology at Johns Hopkins University - + diff --git a/docs/_pages/manual.md b/docs/_pages/manual.md new file mode 100644 index 0000000..edb7c47 --- /dev/null +++ b/docs/_pages/manual.md @@ -0,0 +1,1545 @@ +--- +layout: page +title: Manual +permalink: /manual/ +order: 3 +share: false +--- + +Introduction +============ + +What is HISAT2? +----------------- + +HISAT2 is a fast and sensitive alignment program for mapping next-generation sequencing reads +(whole-genome, transcriptome, and exome sequencing data) against the general human population +(as well as against a single reference genome). Based on [GCSA] (an extension of [BWT] for a graph), we designed and implemented a graph FM index (GFM), +an original approach and its first implementation to the best of our knowledge. +In addition to using one global GFM index that represents general population, +HISAT2 uses a large set of small GFM indexes that collectively cover the whole genome +(each index representing a genomic region of 56 Kbp, with 55,000 indexes needed to cover human population). +These small indexes (called local indexes) combined with several alignment strategies enable effective alignment of sequencing reads. +This new indexing scheme is called Hierarchical Graph FM index (HGFM). +We have developed HISAT 2 based on the [HISAT] and [Bowtie2] implementations. +HISAT2 outputs alignments in [SAM] format, enabling interoperation with a large number of other tools (e.g. [SAMtools], [GATK]) that use SAM. +HISAT2 is distributed under the [GPLv3 license], and it runs on the command line under +Linux, Mac OS X and Windows. + +[HISAT2]: https://daehwankimlab.github.io/hisat2 +[HISAT]: http://ccb.jhu.edu/software/hisat +[Bowtie2]: http://bowtie-bio.sf.net/bowtie2 +[Bowtie]: http://bowtie-bio.sf.net +[Bowtie1]: http://bowtie-bio.sf.net +[GCSA]: http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=6698337&tag=1 +[Burrows-Wheeler Transform]: http://en.wikipedia.org/wiki/Burrows-Wheeler_transform +[BWT]: http://en.wikipedia.org/wiki/Burrows-Wheeler_transform +[FM Index]: http://en.wikipedia.org/wiki/FM-index +[SAM]: http://samtools.sourceforge.net/SAM1.pdf +[SAMtools]: http://samtools.sourceforge.net +[GATK]: http://www.broadinstitute.org/gsa/wiki/index.php/The_Genome_Analysis_Toolkit +[TopHat2]: http://ccb.jhu.edu/software/tophat +[Cufflinks]: http://cufflinks.cbcb.umd.edu/ +[Crossbow]: http://bowtie-bio.sf.net/crossbow +[Myrna]: http://bowtie-bio.sf.net/myrna +[Bowtie paper]: http://genomebiology.com/2009/10/3/R25 +[GPLv3 license]: http://www.gnu.org/licenses/gpl-3.0.html + + +Obtaining HISAT2 +================== + +Download HISAT2 sources and binaries from the Releases sections on the right side. +Binaries are available for Intel architectures (`x86_64`) running Linux, and Mac OS X. + +Building from source +-------------------- + +Building HISAT2 from source requires a GNU-like environment with GCC, GNU Make +and other basics. It should be possible to build HISAT2 on most vanilla Linux +installations or on a Mac installation with [Xcode] installed. HISAT2 can +also be built on Windows using [Cygwin] or [MinGW] (MinGW recommended). For a +MinGW build the choice of what compiler is to be used is important since this +will determine if a 32 or 64 bit code can be successfully compiled using it. If +there is a need to generate both 32 and 64 bit on the same machine then a multilib +MinGW has to be properly installed. [MSYS], the [zlib] library, and depending on +architecture [pthreads] library are also required. We are recommending a 64 bit +build since it has some clear advantages in real life research problems. In order +to simplify the MinGW setup it might be worth investigating popular MinGW personal +builds since these are coming already prepared with most of the toolchains needed. + +First, download the [source package] from the Download section on the right side. +Unzip the file, change to the unzipped directory, and build the +HISAT2 tools by running GNU `make` (usually with the command `make`, but +sometimes with `gmake`) with no arguments. If building with MinGW, run `make` +from the MSYS environment. + +HISAT2 is using the multithreading software model in order to speed up +execution times on SMP architectures where this is possible. On POSIX +platforms (like linux, Mac OS, etc) it needs the pthread library. Although +it is possible to use pthread library on non-POSIX platform like Windows, due +to performance reasons HISAT2 will try to use Windows native multithreading +if possible. + +For the support of SRA data access in HISAT2, please download and install the [NCBI-NGS] toolkit. +When running `make`, specify additional variables as follow. +`make USE_SRA=1 NCBI_NGS_DIR=/path/to/NCBI-NGS-directory NCBI_VDB_DIR=/path/to/NCBI-NGS-directory`, +where `NCBI_NGS_DIR` and `NCBI_VDB_DIR` will be used in Makefile for `-I` and `-L` compilation options. +For example, `$(NCBI_NGS_DIR)/include` and `$(NCBI_NGS_DIR)/lib64` will be used. + +[Cygwin]: http://www.cygwin.com/ +[MinGW]: http://www.mingw.org/ +[MSYS]: http://www.mingw.org/wiki/msys +[zlib]: http://cygwin.com/packages/mingw-zlib/ +[pthreads]: http://sourceware.org/pthreads-win32/ +[GnuWin32]: http://gnuwin32.sf.net/packages/coreutils.htm +[Download]: https://sourceforge.net/projects/bowtie-bio/files/bowtie2/ +[sourceforge site]: https://sourceforge.net/projects/bowtie-bio/files/bowtie2/ +[source package]: {{ site.baseurl }}{% link _pages/download.md %} +[Xcode]: http://developer.apple.com/xcode/ +[NCBI-NGS]: https://github.com/ncbi/ngs/wiki/Downloads + +Running HISAT2 +============= + +Adding to PATH +-------------- + +By adding your new HISAT2 directory to your [PATH environment variable], you +ensure that whenever you run `hisat2`, `hisat2-build` or `hisat2-inspect` +from the command line, you will get the version you just installed without +having to specify the entire path. This is recommended for most users. To do +this, follow your operating system's instructions for adding the directory to +your [PATH]. + +If you would like to install HISAT2 by copying the HISAT2 executable files +to an existing directory in your [PATH], make sure that you copy all the +executables, including `hisat2`, `hisat2-align-s`, `hisat2-align-l`, `hisat2-build`, `hisat2-build-s`, `hisat2-build-l`, `hisat2-inspect`, `hisat2-inspect-s` and +`hisat2-inspect-l`. + +[PATH environment variable]: http://en.wikipedia.org/wiki/PATH_(variable) +[PATH]: http://en.wikipedia.org/wiki/PATH_(variable) + +Reporting +--------- + +The reporting mode governs how many alignments HISAT2 looks for, and how to +report them. + +In general, when we say that a read has an alignment, we mean that it has a +[valid alignment]. When we say that a read has multiple alignments, we mean +that it has multiple alignments that are valid and distinct from one another. + +[valid alignment]: #valid-alignments-meet-or-exceed-the-minimum-score-threshold + +By default, HISAT2 may soft-clip reads near their 5' and 3' ends. Users can control this behavior by setting different penalties for soft-clipping ([`--sp`]) or by disallowing soft-clipping ([`--no-softclip`]). + +### Distinct alignments map a read to different places + +Two alignments for the same individual read are "distinct" if they map the same +read to different places. Specifically, we say that two alignments are distinct +if there are no alignment positions where a particular read offset is aligned +opposite a particular reference offset in both alignments with the same +orientation. E.g. if the first alignment is in the forward orientation and +aligns the read character at read offset 10 to the reference character at +chromosome 3, offset 3,445,245, and the second alignment is also in the forward +orientation and also aligns the read character at read offset 10 to the +reference character at chromosome 3, offset 3,445,245, they are not distinct +alignments. + +Two alignments for the same pair are distinct if either the mate 1s in the two +paired-end alignments are distinct or the mate 2s in the two alignments are +distinct or both. + +### Default mode: search for one or more alignments, report each + +HISAT2 searches for up to N distinct, primary alignments for +each read, where N equals the integer specified with the `-k` parameter. +Primary alignments mean alignments whose alignment score is equal or higher than any other alignments. +It is possible that multiple distinct alignments have the same score. +That is, if `-k 2` is specified, HISAT2 will search for at most 2 distinct +alignments. The alignment score for a paired-end alignment equals the sum of the +alignment scores of the individual mates. Each reported read or pair alignment +beyond the first has the SAM 'secondary' bit (which equals 256) set in its FLAGS +field. See the [SAM specification] for details. + +HISAT2 does not "find" alignments in any specific order, so for reads that +have more than N distinct, valid alignments, HISAT2 does not guarantee that +the N alignments reported are the best possible in terms of alignment score. +Still, this mode can be effective and fast in situations where the user cares +more about whether a read aligns (or aligns a certain number of times) than +where exactly it originated. + + +[SAM specification]: http://samtools.sourceforge.net/SAM1.pdf + +Alignment summary +------------------ + +When HISAT2 finishes running, it prints messages summarizing what happened. +These messages are printed to the "standard error" ("stderr") filehandle. For +datasets consisting of unpaired reads, the summary might look like this: + + 20000 reads; of these: + 20000 (100.00%) were unpaired; of these: + 1247 (6.24%) aligned 0 times + 18739 (93.69%) aligned exactly 1 time + 14 (0.07%) aligned >1 times + 93.77% overall alignment rate + +For datasets consisting of pairs, the summary might look like this: + + 10000 reads; of these: + 10000 (100.00%) were paired; of these: + 650 (6.50%) aligned concordantly 0 times + 8823 (88.23%) aligned concordantly exactly 1 time + 527 (5.27%) aligned concordantly >1 times + ---- + 650 pairs aligned concordantly 0 times; of these: + 34 (5.23%) aligned discordantly 1 time + ---- + 616 pairs aligned 0 times concordantly or discordantly; of these: + 1232 mates make up the pairs; of these: + 660 (53.57%) aligned 0 times + 571 (46.35%) aligned exactly 1 time + 1 (0.08%) aligned >1 times + 96.70% overall alignment rate + +The indentation indicates how subtotals relate to totals. + +Wrapper +------- + +The `hisat2`, `hisat2-build` and `hisat2-inspect` executables are actually +wrapper scripts that call binary programs as appropriate. The wrappers shield +users from having to distinguish between "small" and "large" index formats, +discussed briefly in the following section. Also, the `hisat2` wrapper +provides some key functionality, like the ability to handle compressed inputs, +and the functionality for [`--un`], [`--al`] and related options. + +It is recommended that you always run the hisat2 wrappers and not run the +binaries directly. + +Small and large indexes +----------------------- + +`hisat2-build` can index reference genomes of any size. For genomes less than +about 4 billion nucleotides in length, `hisat2-build` builds a "small" index +using 32-bit numbers in various parts of the index. When the genome is longer, +`hisat2-build` builds a "large" index using 64-bit numbers. Small indexes are +stored in files with the `.ht2` extension, and large indexes are stored in +files with the `.ht2l` extension. The user need not worry about whether a +particular index is small or large; the wrapper scripts will automatically build +and use the appropriate index. + +Performance tuning +------------------ + +1. If your computer has multiple processors/cores, use `-p` + The [`-p`] option causes HISAT2 to launch a specified number of parallel + search threads. Each thread runs on a different processor/core and all + threads find alignments in parallel, increasing alignment throughput by + approximately a multiple of the number of threads (though in practice, + speedup is somewhat worse than linear). + +Command Line +------------ + +### Setting function options + +Some HISAT2 options specify a function rather than an individual number or +setting. In these cases the user specifies three parameters: (a) a function +type `F`, (b) a constant term `B`, and (c) a coefficient `A`. The available +function types are constant (`C`), linear (`L`), square-root (`S`), and natural +log (`G`). The parameters are specified as `F,B,A` - that is, the function type, +the constant term, and the coefficient are separated by commas with no +whitespace. The constant term and coefficient may be negative and/or +floating-point numbers. + +For example, if the function specification is `L,-0.4,-0.6`, then the function +defined is: + + f(x) = -0.4 + -0.6 * x + +If the function specification is `G,1,5.4`, then the function defined is: + + f(x) = 1.0 + 5.4 * ln(x) + +See the documentation for the option in question to learn what the parameter `x` +is for. For example, in the case if the [`--score-min`] option, the function +`f(x)` sets the minimum alignment score necessary for an alignment to be +considered valid, and `x` is the read length. + +### Usage + + hisat2 [options]* -x {-1 -2 | -U | --sra-acc } [-S ] + +### Main arguments + +* `-x ` + The basename of the index for the reference genome. The basename is the name of + any of the index files up to but not including the final `.1.ht2` / etc. + `hisat2` looks for the specified index first in the current directory, + then in the directory specified in the `HISAT2_INDEXES` environment variable. +{: #hisat2-options-x} +[`-x`]: #hisat2-options-x + +* `-1 ` + Comma-separated list of files containing mate 1s (filename usually includes + `_1`), e.g. `-1 flyA_1.fq,flyB_1.fq`. Sequences specified with this option must + correspond file-for-file and read-for-read with those specified in ``. Reads + may be a mix of different lengths. If `-` is specified, `hisat2` will read the + mate 1s from the "standard in" or "stdin" filehandle. +{: #hisat2-options-1} +[`-1`]: #hisat2-options-1 + +* `-2 ` + Comma-separated list of files containing mate 2s (filename usually includes + `_2`), e.g. `-2 flyA_2.fq,flyB_2.fq`. Sequences specified with this option must + correspond file-for-file and read-for-read with those specified in ``. Reads + may be a mix of different lengths. If `-` is specified, `hisat2` will read the + mate 2s from the "standard in" or "stdin" filehandle. +{: #hisat2-options-2} +[`-2`]: #hisat2-options-2 + +* `-U ` + Comma-separated list of files containing unpaired reads to be aligned, e.g. + `lane1.fq,lane2.fq,lane3.fq,lane4.fq`. Reads may be a mix of different lengths. + If `-` is specified, `hisat2` gets the reads from the "standard in" or "stdin" + filehandle. +{: #hisat2-options-U} +[`-U`]: #hisat2-options-U + +* `--sra-acc ` + Comma-separated list of SRA accession numbers, e.g. `--sra-acc SRR353653,SRR353654`. + Information about read types is available at http://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?sp=runinfo&acc=sra-acc&retmode=xml, + where sra-acc is SRA accession number. If users run HISAT2 on a computer cluster, it is recommended to disable SRA-related caching (see the instruction at [SRA-MANUAL]). +{: #hisat2-options-sra-acc} +[`--sra-acc`]: #hisat2-options-sra-acc +[SRA-MANUAL]: https://github.com/ncbi/sra-tools/wiki/Toolkit-Configuration + +* `-S ` + File to write SAM alignments to. By default, alignments are written to the + "standard out" or "stdout" filehandle (i.e. the console). +{: #hisat2-options-S} +[`-S`]: #hisat2-options-S + +### Options + +#### Input options + +* `-q` + Reads (specified with ``, ``, ``) are FASTQ files. FASTQ files + usually have extension `.fq` or `.fastq`. FASTQ is the default format. See + also: [`--solexa-quals`] and [`--int-quals`]. +{: #hisat2-options-q} +[`-q`]: #hisat2-options-q + +* `--qseq` + Reads (specified with ``, ``, ``) are QSEQ files. QSEQ files usually + end in `_qseq.txt`. See also: [`--solexa-quals`] and [`--int-quals`]. +{: #hisat2-options-qseq} +[`--qseq`]: #hisat2-options-qseq + +* `-f` + Reads (specified with ``, ``, ``) are FASTA files. FASTA files + usually have extension `.fa`, `.fasta`, `.mfa`, `.fna` or similar. FASTA files + do not have a way of specifying quality values, so when `-f` is set, the result + is as if `--ignore-quals` is also set. +{: #hisat2-options-f} +[`-f`]: #hisat2-options-f + +* `-r` + Reads (specified with ``, ``, ``) are files with one input sequence + per line, without any other information (no read names, no qualities). When + `-r` is set, the result is as if `--ignore-quals` is also set. +{: #hisat2-options-r} +[`-r`]: #hisat2-options-r + +* `-c` + The read sequences are given on command line. I.e. ``, `` and + `` are comma-separated lists of reads rather than lists of read files. + There is no way to specify read names or qualities, so `-c` also implies + `--ignore-quals`. +{: #hisat2-options-c} +[`-c`]: #hisat2-options-c + +* `-s/--skip ` + Skip (i.e. do not align) the first `` reads or pairs in the input. +{: #hisat2-options-s} +[`-s`/`--skip`]: #hisat2-options-s +[`-s`]: #hisat2-options-s + +* `-u/--upto ` + Align the first `` reads or read pairs from the input (after the + [`-s`/`--skip`] reads or pairs have been skipped), then stop. Default: no limit. +{: #hisat2-options-u} +[`-u`/`--qupto`]: #hisat2-options-u +[`-u`]: #hisat2-options-u + +* `-5/--trim5 ` + Trim `` bases from 5' (left) end of each read before alignment (default: 0). +{: #hisat2-options-5} +[`-5`/`--trim5`]: #hisat2-options-5 +[`-5`]: #hisat2-options-5 + +* `-3/--trim3 ` + Trim `` bases from 3' (right) end of each read before alignment (default: 0). +{: #hisat2-options-3} +[`-3`/`--trim3`]: #hisat2-options-3 +[`-3`]: #hisat2-options-3 + +* `--phred33` + Input qualities are ASCII chars equal to the [Phred quality] plus 33. This is + also called the "Phred+33" encoding, which is used by the very latest Illumina + pipelines. +{: #hisat2-options-phred33-quals} +[`--phred33`]: #hisat2-options-phred33-quals +[Phred quality]: http://en.wikipedia.org/wiki/Phred_quality_score + +* `--phred64` + Input qualities are ASCII chars equal to the [Phred quality] plus 64. This is + also called the "Phred+64" encoding. +{: #hisat2-options-phred64-quals} +[`--phred64`]: #hisat2-options-phred64-quals + +* `--solexa-quals` + Convert input qualities from [Solexa][Phred quality] (which can be negative) to + [Phred][Phred quality] (which can't). This scheme was used in older Illumina GA + Pipeline versions (prior to 1.3). Default: off. +{: #hisat2-options-solexa-quals} +[`--solexa-quals`]: #hisat2-options-solexa-quals + +* `--int-quals` + Quality values are represented in the read input file as space-separated ASCII + integers, e.g., `40 40 30 40`..., rather than ASCII characters, e.g., `II?I`.... + Integers are treated as being on the [Phred quality] scale unless + [`--solexa-quals`] is also specified. Default: off. +{: #hisat2-options-int-quals} +[`--int-quals`]: #hisat2-options-int-quals + +#### Alignment options + +* `--n-ceil ` + Sets a function governing the maximum number of ambiguous characters (usually + `N`s and/or `.`s) allowed in a read as a function of read length. For instance, + specifying `-L,0,0.15` sets the N-ceiling function `f` to `f(x) = 0 + 0.15 * x`, + where x is the read length. See also: [setting function options]. Reads + exceeding this ceiling are [filtered out]. Default: `L,0,0.15`. +{: #hisat2-options-n-ceil} +[`--n-ceil`]: #hisat2-options-n-ceil +[filtered out]: #filtering + +* `--ignore-quals` + When calculating a mismatch penalty, always consider the quality value at the + mismatched position to be the highest possible, regardless of the actual value. + I.e. input is treated as though all quality values are high. This is also the + default behavior when the input doesn't specify quality values (e.g. in [`-f`], + [`-r`], or [`-c`] modes). +{: #hisat2-options-ignore-quals} +[`--ignore-quals`]: #hisat2-options-ignore-quals + +* `--nofw/--norc` + If `--nofw` is specified, `hisat2` will not attempt to align unpaired reads to + the forward (Watson) reference strand. If `--norc` is specified, `hisat2` will + not attempt to align unpaired reads against the reverse-complement (Crick) + reference strand. In paired-end mode, `--nofw` and `--norc` pertain to the + fragments; i.e. specifying `--nofw` causes `hisat2` to explore only those + paired-end configurations corresponding to fragments from the reverse-complement + (Crick) strand. Default: both strands enabled. +{: #hisat2-options-nofw} +[`--nofw`]: #hisat2-options-nofw + +#### Scoring options + +* `--mp MX,MN` + Sets the maximum (`MX`) and minimum (`MN`) mismatch penalties, both integers. A + number less than or equal to `MX` and greater than or equal to `MN` is + subtracted from the alignment score for each position where a read character + aligns to a reference character, the characters do not match, and neither is an + `N`. If [`--ignore-quals`] is specified, the number subtracted quals `MX`. + Otherwise, the number subtracted is `MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) )` + where Q is the Phred quality value. Default: `MX` = 6, `MN` = 2. +{: #hisat2-options-mp} +[`--mp`]: #hisat2-options-mp + +* `--sp MX,MN` + Sets the maximum (`MX`) and minimum (`MN`) penalties for soft-clipping per base, + both integers. A number less than or equal to `MX` and greater than or equal to `MN` is + subtracted from the alignment score for each position. + The number subtracted is `MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) )` + where Q is the Phred quality value. Default: `MX` = 2, `MN` = 1. +{: #hisat2-options-sp} +[`--sp`]: #hisat2-options-sp + +* `--no-softclip` + Disallow soft-clipping. +{: #hisat2-options-no-softclip} +[`--sp`]: #hisat2-options-no-softclip + +* `--np ` + Sets penalty for positions where the read, reference, or both, contain an + ambiguous character such as `N`. Default: 1. +{: #hisat2-options-np} +[`--np`]: #hisat2-options-np + +* `--rdg ,` + Sets the read gap open (``) and extend (``) penalties. A read gap of + length N gets a penalty of `` + N * ``. Default: 5, 3. +{: #hisat2-options-rdg} +[`--rdg`]: #hisat2-options-rdg + +* `--rfg ,` + Sets the reference gap open (``) and extend (``) penalties. A + reference gap of length N gets a penalty of `` + N * ``. Default: + 5, 3. +{: #hisat2-options-rfg} +[`--rfg`]: #hisat2-options-rfg + +* `--score-min ` + Sets a function governing the minimum alignment score needed for an alignment to + be considered "valid" (i.e. good enough to report). This is a function of read + length. For instance, specifying `L,0,-0.6` sets the minimum-score function `f` + to `f(x) = 0 + -0.6 * x`, where `x` is the read length. See also: [setting function options]. The default is `L,0,-0.2`. +{: #hisat2-options-score-min} +[`--score-min`]: #hisat2-options-score-min + +#### Spliced alignment options + +* `--pen-cansplice ` + Sets the penalty for each pair of canonical splice sites (e.g. GT/AG). Default: 0. +{: #hisat2-options-pen-cansplice} +[`--pen-cansplice`]: #hisat2-options-pen-cansplice + +* `--pen-noncansplice ` + Sets the penalty for each pair of non-canonical splice sites (e.g. non-GT/AG). Default: 12. +{: #hisat2-options-pen-noncansplice} +[`--pen-noncansplice`]: #hisat2-options-pen-noncansplice + +* `--pen-canintronlen ` + Sets the penalty for long introns with canonical splice sites so that alignments with shorter introns are preferred + to those with longer ones. Default: G,-8,1 +{: #hisat2-options-pen-canintronlen} +[`--pen-canintronlen`]: #hisat2-options-pen-canintronlen + +* `--pen-noncanintronlen ` + Sets the penalty for long introns with noncanonical splice sites so that alignments with shorter introns are preferred + to those with longer ones. Default: G,-8,1 +{: #hisat2-options-pen-noncanintronlen} +[`--pen-noncanintronlen`]: #hisat2-options-pen-noncanintronlen + +* `--min-intronlen ` + Sets minimum intron length. Default: 20 +{: #hisat2-options-min-intronlen} +[`--min-intronlen`]: #hisat2-options-min-intronlen + +* `--max-intronlen ` + Sets maximum intron length. Default: 500000 +{: #hisat2-options-max-intronlen} +[`--max-intronlen`]: #hisat2-options-max-intronlen + +* `--known-splicesite-infile ` + With this mode, you can provide a list of known splice sites, which HISAT2 makes use of to align reads with small anchors. + You can create such a list using `python hisat2_extract_splice_sites.py genes.gtf > splicesites.txt`, + where `hisat2_extract_splice_sites.py` is included in the HISAT2 package, `genes.gtf` is a gene annotation file, + and `splicesites.txt` is a list of splice sites with which you provide HISAT2 in this mode. + Note that it is better to use indexes built using annotated transcripts (such as genome_tran or genome_snp_tran), which works better + than using this option. It has no effect to provide splice sites that are already included in the indexes. +{: #hisat2-options-known-splicesite-infile} +[`--splice-infile`]: #hisat2-options-known-splicesite-infile + +* `--novel-splicesite-outfile ` + In this mode, HISAT2 reports a list of splice sites in the file : + > chromosome name `` genomic position of the flanking base on the left side of an intron `` genomic position of the flanking base on the right `` strand (+, -, and .) + + '.' indicates an unknown strand for non-canonical splice sites. +{: #hisat2-options-novel-splicesite-outfile} +[`--novel-splicesite-outfile`]: #hisat2-options-novel-splicesite-outfile + +* `--novel-splicesite-infile ` + With this mode, you can provide a list of novel splice sites that were generated from the above option "--novel-splicesite-outfile". +{: #hisat2-options-novel-splicesite-infile} +[`--novel-splicesite-infile`]: #hisat2-options-novel-splicesite-infile + +* `--no-temp-splicesite` + HISAT2, by default, makes use of splice sites found by earlier reads to align later reads in the same run, + in particular, reads with small anchors (<= 15 bp). + The option disables this default alignment strategy. +{: #hisat2-options-no-temp-splicesite} +[`--no-temp-splicesite`]: #hisat2-options-no-temp-splicesite + +* `--no-spliced-alignment` + Disable spliced alignment. +{: #hisat2-options-no-spliced-alignment} +[`--no-spliced-alignment`]: #hisat2-options-no-spliced-alignment + +* `--rna-strandness ` + Specify strand-specific information: the default is unstranded. + For single-end reads, use F or R. + >'F' means a read corresponds to a transcript. + >'R' means a read corresponds to the reverse complemented counterpart of a transcript. + + For paired-end reads, use either FR or RF. + With this option being used, every read alignment will have an XS attribute tag: + >'+' means a read belongs to a transcript on '+' strand of genome. + >'-' means a read belongs to a transcript on '-' strand of genome. + + (TopHat has a similar option, --library-type option, where fr-firststrand corresponds to R and RF; fr-secondstrand corresponds to F and FR.) +{: #hisat2-options-rna-strandness} +[`--rna-strandness`]: #hisat2-options-rna-strandness + +* `--tmo/--transcriptome-mapping-only` + Report only those alignments within known transcripts. +{: #hisat2-options-tmo} +[`--tmo/--transcriptome-mapping-only`]: #hisat2-options-tmo + +* `--dta/--downstream-transcriptome-assembly` + Report alignments tailored for transcript assemblers including StringTie. + With this option, HISAT2 requires longer anchor lengths for de novo discovery of splice sites. + This leads to fewer alignments with short-anchors, + which helps transcript assemblers improve significantly in computation and memory usage. +{: #hisat2-options-dta} +[`--dta/--downstream-transcriptome-assembly`]: #hisat2-options-dta + +* `--dta-cufflinks` + Report alignments tailored specifically for Cufflinks. In addition to what HISAT2 does with the above option (--dta), + With this option, HISAT2 looks for novel splice sites with three signals (GT/AG, GC/AG, AT/AC), but all user-provided splice sites are used irrespective of their signals. + HISAT2 produces an optional field, XS:A:[+-], for every spliced alignment. +{: #hisat2-options-dta-cufflinks} +[`--dta-cufflinks`]: #hisat2-options-dta-cufflinks + +* `--avoid-pseudogene` + Try to avoid aligning reads to pseudogenes. Note this option is experimental and needs further investigation. +{: #hisat2-options-avoid-pseudogene} +[`--avoid-pseudogene`]: #hisat2-options-avoid-pseudogene + +* `--no-templatelen-adjustment` + Disables template length adjustment for RNA-seq reads. +{: #hisat2-options-no-templatelen-adjustment} +[`--no-templatelen-adjustment`]: #hisat2-options-no-templatelen-adjustment + +#### Reporting options + +* `-k ` + It searches for at most `` distinct, primary alignments for each read. + Primary alignments mean alignments whose alignment score is equal to or higher than any other alignments. + The search terminates when it cannot find more distinct valid alignments, or when it + finds ``, whichever happens first. The alignment score for a paired-end + alignment equals the sum of the alignment scores of the individual mates. Each + reported read or pair alignment beyond the first has the SAM 'secondary' bit + (which equals 256) set in its FLAGS field. For reads that have more than + `` distinct, valid alignments, **hisat2** does not guarantee that the + `` alignments reported are the best possible in terms of alignment score. Default: 5 (linear index) or 10 (graph index). +

    + Note: HISAT2 is not designed with large values for `-k` in mind, and when + aligning reads to long, repetitive genomes, large `-k` could make alignment much slower. +{: #hisat2-options-k} +[`-k`]: #hisat2-options-k + +* `--max-seeds ` + HISAT2, like other aligners, uses seed-and-extend approaches. HISAT2 tries to extend seeds to full-length alignments. In HISAT2, `--max-seeds` is used to control the maximum number of seeds that will be extended. For DNA-read alignment ([`--no-spliced-alignment`]), HISAT2 extends up to these many seeds and skips the rest of the seeds. For RNA-read alignment, HISAT2 skips extending seeds and reports no alignments if the number of seeds is larger than the number specified with the option, to be compatible with previous versions of HISAT2. Large values for `--max-seeds` may improve alignment sensitivity, but HISAT2 is not designed with large values for `--max-seeds` in mind, and when aligning reads to long, repetitive genomes, large `--max-seeds` could make alignment much slower. The default value is the maximum of 5 and the value that comes with `-k` times 2. +{: #hisat2-options-max-seeds} +[`--max-seeds`]: #hisat2-options-max-seeds + +* `-a/--all` + HISAT2 reports all alignments it can find. Using the option is equivalent to using both [`--max-seeds`] and [`-k`] with the maximum value that a 64-bit signed integer can represent (9,223,372,036,854,775,807). +{: #hisat2-options-a} +[`-a`/`--all`]: #hisat2-options-a +[`-a`]: #hisat2-options-a + + +* `--secondary` + Report secondary alignments. +{: #hisat2-options-secondary} +[`--secondary`]: #hisat2-options-secondary + +#### Paired-end options + +* `-I/--minins ` + The minimum fragment length for valid paired-end alignments.This option is valid only with [`--no-spliced-alignment`]. + E.g. if `-I 60` is specified and a paired-end alignment consists of two 20-bp alignments in the + appropriate orientation with a 20-bp gap between them, that alignment is + considered valid (as long as [`-X`] is also satisfied). A 19-bp gap would not + be valid in that case. If trimming options [`-3`] or [`-5`] are also used, the + [`-I`] constraint is applied with respect to the untrimmed mates. +

    + The larger the difference between [`-I`] and [`-X`], the slower HISAT2 will + run. This is because larger differences between [`-I`] and [`-X`] require that + HISAT2 scan a larger window to determine if a concordant alignment exists. + For typical fragment length ranges (200 to 400 nucleotides), HISAT2 is very + efficient. +

    + Default: 0 (essentially imposing no minimum) +{: #hisat2-options-I} +[`-I`/`--minins`]: #hisat2-options-I +[`-I`]: #hisat2-options-I + +* `-X/--maxins ` + The maximum fragment length for valid paired-end alignments. This option is valid only with [`--no-spliced-alignment`]. + E.g. if `-X 100` is specified and a paired-end alignment consists of two 20-bp alignments in the + proper orientation with a 60-bp gap between them, that alignment is considered + valid (as long as [`-I`] is also satisfied). A 61-bp gap would not be valid in + that case. If trimming options [`-3`] or [`-5`] are also used, the `-X` + constraint is applied with respect to the untrimmed mates, not the trimmed + mates. +

    + The larger the difference between [`-I`] and [`-X`], the slower HISAT2 will + run. This is because larger differences between [`-I`] and [`-X`] require that + HISAT2 scan a larger window to determine if a concordant alignment exists. + For typical fragment length ranges (200 to 400 nucleotides), HISAT2 is very + efficient. +

    + Default: 500. +{: #hisat2-options-X} +[`-X`/`--maxins`]: #hisat2-options-X +[`-X`]: #hisat2-options-X + +* `--fr/--rf/--ff` + The upstream/downstream mate orientations for a valid paired-end alignment + against the forward reference strand. E.g., if `--fr` is specified and there is + a candidate paired-end alignment where mate 1 appears upstream of the reverse + complement of mate 2 and the fragment length constraints ([`-I`] and [`-X`]) are + met, that alignment is valid. Also, if mate 2 appears upstream of the reverse + complement of mate 1 and all other constraints are met, that too is valid. + `--rf` likewise requires that an upstream mate1 be reverse-complemented and a + downstream mate2 be forward-oriented. `--ff` requires both an upstream mate 1 + and a downstream mate 2 to be forward-oriented. Default: `--fr` (appropriate + for Illumina's Paired-end Sequencing Assay). +{: #hisat2-options-fr} +[`--fr/--rf/--ff`]: #hisat2-options-fr +[`--fr`]: #hisat2-options-fr +[`--rf`]: #hisat2-options-fr +[`--ff`]: #hisat2-options-fr + +* `--no-mixed` + By default, when `hisat2` cannot find a concordant or discordant alignment for + a pair, it then tries to find alignments for the individual mates. This option + disables that behavior. +{: #hisat2-options-no-mixed} +[`--no-mixed`]: #hisat2-options-no-mixed + +* `--no-discordant` + By default, `hisat2` looks for discordant alignments if it cannot find any + concordant alignments. A discordant alignment is an alignment where both mates + align uniquely, but that does not satisfy the paired-end constraints + ([`--fr/--rf/--ff`], [`-I`], [`-X`]). This option disables that behavior. +{: #hisat2-options-no-discordant} +[`--no-discordant`]: #hisat2-options-no-discordant + +#### Output options + +* `-t/--time` + Print the wall-clock time required to load the index files and align the reads. + This is printed to the "standard error" ("stderr") filehandle. Default: off. +{: #hisat2-options-t} +[`-t`/`--time`]: #hisat2-options-t +[`-t`]: #hisat2-options-t + +* `--un `, `--un-gz `, `--un-bz2 ` + Write unpaired reads that fail to align to file at ``. These reads + correspond to the SAM records with the FLAGS `0x4` bit set and neither the + `0x40` nor `0x80` bits set. If `--un-gz` is specified, output will be gzip + compressed. If `--un-bz2` is specified, output will be bzip2 compressed. Reads + written in this way will appear exactly as they did in the input file, without + any modification (same sequence, same name, same quality string, same quality + encoding). Reads will not necessarily appear in the same order as they did in + the input. +{: #hisat2-options-un} +[`--un`]: #hisat2-options-un +[`--un-gz`]: #hisat2-options-un +[`--un-bz2`]: #hisat2-options-un + +* `--al `, `--al-gz `, `--al-bz2 ` + Write unpaired reads that align at least once to file at ``. These reads + correspond to the SAM records with the FLAGS `0x4`, `0x40`, and `0x80` bits + unset. If `--al-gz` is specified, output will be gzip compressed. If `--al-bz2` + is specified, output will be bzip2 compressed. Reads written in this way will + appear exactly as they did in the input file, without any modification (same + sequence, same name, same quality string, same quality encoding). Reads will + not necessarily appear in the same order as they did in the input. +{: #hisat2-options-al} +[`--al`]: #hisat2-options-al +[`--al-gz`]: #hisat2-options-al +[`--al-bz2`]: #hisat2-options-al + +* `--un-conc `, `--un-conc-gz `, `--un-conc-bz2 ` + Write paired-end reads that fail to align concordantly to file(s) at ``. + These reads correspond to the SAM records with the FLAGS `0x4` bit set and + either the `0x40` or `0x80` bit set (depending on whether it's mate #1 or #2). + `.1` and `.2` strings are added to the filename to distinguish which file + contains mate #1 and mate #2. If a percent symbol, `%`, is used in ``, + the percent symbol is replaced with `1` or `2` to make the per-mate filenames. + Otherwise, `.1` or `.2` are added before the final dot in `` to make the + per-mate filenames. Reads written in this way will appear exactly as they did + in the input files, without any modification (same sequence, same name, same + quality string, same quality encoding). Reads will not necessarily appear in + the same order as they did in the inputs. +{: #hisat2-options-un-conc} +[`--un-conc`]: #hisat2-options-un-conc +[`--un-conc-gz`]: #hisat2-options-un-conc +[`--un-conc-bz2`]: #hisat2-options-un-conc + +* `--al-conc `, `--al-conc-gz `, `--al-conc-bz2 ` + Write paired-end reads that align concordantly at least once to file(s) at + ``. These reads correspond to the SAM records with the FLAGS `0x4` bit + unset and either the `0x40` or `0x80` bit set (depending on whether it's mate #1 + or #2). `.1` and `.2` strings are added to the filename to distinguish which + file contains mate #1 and mate #2. If a percent symbol, `%`, is used in + ``, the percent symbol is replaced with `1` or `2` to make the per-mate + filenames. Otherwise, `.1` or `.2` are added before the final dot in `` to + make the per-mate filenames. Reads written in this way will appear exactly as + they did in the input files, without any modification (same sequence, same name, + same quality string, same quality encoding). Reads will not necessarily appear + in the same order as they did in the inputs. +{: #hisat2-options-al-conc} +[`--al-conc`]: #hisat2-options-al-conc +[`--al-conc-gz`]: #hisat2-options-al-conc +[`--al-conc-bz2`]: #hisat2-options-al-conc + +* `--quiet` + Print nothing besides alignments and serious errors. +{: #hisat2-options-quiet} +[`--quiet`]: #hisat2-options-quiet + +* `--summary-file` + Print alignment summary to this file. +{: #hisat2-options-summary-file} +[`--summary-file`]: #hisat2-options-summary-file + +* `--new-summary` + Print alignment summary in a new style, which is more machine-friendly. +{: #hisat2-options-new-summary} +[`--new-summary`]: #hisat2-options-new-summary + +* `--met-file ` + Write `hisat2` metrics to file ``. Having alignment metric can be useful + for debugging certain problems, especially performance issues. See also: + [`--met`]. Default: metrics disabled. +{: #hisat2-options-met-file} +[`--met-file`]: #hisat2-options-met-file + +* `--met-stderr` + Write `hisat2` metrics to the "standard error" ("stderr") filehandle. This is + not mutually exclusive with [`--met-file`]. Having alignment metric can be + useful for debugging certain problems, especially performance issues. See also: + [`--met`]. Default: metrics disabled. +{: #hisat2-options-met-stderr} +[`--met-stderr`]: #hisat2-options-met-stderr + +* `--met ` + Write a new `hisat2` metrics record every `` seconds. Only matters if + either [`--met-stderr`] or [`--met-file`] are specified. Default: 1. +{: #hisat2-options-met} +[`--met`]: #hisat2-options-met + +#### SAM options + +* `--no-unal` + Suppress SAM records for reads that failed to align. +{: #hisat2-options-no-unal} +[`--no-unal`]: #hisat2-options-no-unal + +* `--no-hd` + Suppress SAM header lines (starting with `@`). +{: #hisat2-options-no-hd} +[`--no-hd`]: #hisat2-options-no-hd + +* `--no-sq` + Suppress `@SQ` SAM header lines. +{: #hisat2-options-no-sq} +[`--no-sq`]: #hisat2-options-no-sq + +* `--rg-id ` + Set the read group ID to ``. This causes the SAM `@RG` header line to be + printed, with `` as the value associated with the `ID:` tag. It also + causes the `RG:Z:` extra field to be attached to each SAM output record, with + value set to ``. +{: #hisat2-options-rg-id} +[`--rg-id`]: #hisat2-options-rg-id + +* `--rg ` + Add `` (usually of the form `TAG:VAL`, e.g. `SM:Pool1`) as a field on the + `@RG` header line. Note: in order for the `@RG` line to appear, [`--rg-id`] + must also be specified. This is because the `ID` tag is required by the [SAM + Spec][SAM]. Specify `--rg` multiple times to set multiple fields. See the + [SAM Spec][SAM] for details about what fields are legal. +{: #hisat2-options-rg} +[`--rg`]: #hisat2-options-rg + +* `--remove-chrname` + Remove 'chr' from reference names in alignment (e.g., chr18 to 18) +{: #hisat2-remove-chrname} +[`--remove-chrname`]: #hisat2-remove-chrname + +* `--add-chrname` + Add 'chr' to reference names in alignment (e.g., 18 to chr18) +{: #hisat2-options-add-chrname} +[`--add-chrname`]: #hisat2-options-add-chrname + +* `--omit-sec-seq` + When printing secondary alignments, HISAT2 by default will write out the `SEQ` + and `QUAL` strings. Specifying this option causes HISAT2 to print an asterisk + in those fields instead. +{: #hisat2-options-omit-sec-seq} +[`--omit-sec-seq`]: #hisat2-options-omit-sec-seq + +#### Performance options + +* `-o/--offrate ` + Override the offrate of the index with ``. If `` is greater + than the offrate used to build the index, then some row markings are + discarded when the index is read into memory. This reduces the memory + footprint of the aligner but requires more time to calculate text + offsets. `` must be greater than the value used to build the + index. +{: #hisat2-options-o} +[`-o`/`--offrate`]: #hisat2-options-o +[`-o`]: #hisat2-options-o +[`--offrate`]: #hisat2-options-o + +* `-p/--threads NTHREADS` + Launch `NTHREADS` parallel search threads (default: 1). Threads will run on + separate processors/cores and synchronize when parsing reads and outputting + alignments. Searching for alignments is highly parallel, and speedup is close + to linear. Increasing `-p` increases HISAT2's memory footprint. E.g. when + aligning to a human genome index, increasing `-p` from 1 to 8 increases the + memory footprint by a few hundred megabytes. This option is only available if + HISAT2 is linked with the `pthreads` library. +{: #hisat2-options-p} +[`-p`/`--threads`]: #hisat2-options-p +[`-p`]: #hisat2-options-p + +* `--reorder` + Guarantees that output SAM records are printed in an order corresponding to the + order of the reads in the original input file, even when [`-p`] is set greater + than 1. Specifying `--reorder` and setting [`-p`] greater than 1 causes HISAT2 + to run somewhat slower and use somewhat more memory then if `--reorder` were + not specified. Has no effect if [`-p`] is set to 1, since output order will + naturally correspond to input order in that case. +{: #hisat2-options-reorder} +[`--reorder`]: #hisat2-options-reorder + +* `--mm` + Use memory-mapped I/O to load the index, rather than typical file I/O. + Memory-mapping allows many concurrent `bowtie` processes on the same computer to + share the same memory image of the index (i.e. you pay the memory overhead just + once). This facilitates memory-efficient parallelization of `bowtie` in + situations where using [`-p`] is not possible or not preferable. +{: #hisat2-options-mm} +[`--mm`]: #hisat2-options-mm + +#### Other options + +* `--qc-filter` + Filter out reads for which the QSEQ filter field is non-zero. Only has an + effect when read format is [`--qseq`]. Default: off. +{: #hisat2-options-qc-filter} +[`--qc-filter`]: #hisat2-options-qc-filter + +* `--seed ` + Use `` as the seed for pseudo-random number generator. Default: 0. +{: #hisat2-options-seed} +[`--seed`]: #hisat2-options-seed + +* `--non-deterministic` + Normally, HISAT2 re-initializes its pseudo-random generator for each read. It + seeds the generator with a number derived from (a) the read name, (b) the + nucleotide sequence, (c) the quality sequence, (d) the value of the [`--seed`] + option. This means that if two reads are identical (same name, same + nucleotides, same qualities) HISAT2 will find and report the same alignment(s) + for both, even if there was ambiguity. When `--non-deterministic` is specified, + HISAT2 re-initializes its pseudo-random generator for each read using the + current time. This means that HISAT2 will not necessarily report the same + alignment for two identical reads. This is counter-intuitive for some users, + but might be more appropriate in situations where the input consists of many + identical reads. +{: #hisat2-options-non-deterministic} +[`--non-deterministic`]: #hisat2-options-non-deterministic + +* `--version` + Print version information and quit. +{: #hisat2-options-version} +[`--version`]: #hisat2-options-version + +* `-h/--help` + Print usage information and quit. +{: #hisat2-options-h} +[`-h`]: #hisat2-options-h + +SAM output +---------- + +Following is a brief description of the [SAM] format as output by `hisat2`. +For more details, see the [SAM format specification][SAM]. + +By default, `hisat2` prints a SAM header with `@HD`, `@SQ` and `@PG` lines. +When one or more [`--rg`] arguments are specified, `hisat2` will also print +an `@RG` line that includes all user-specified [`--rg`] tokens separated by +tabs. + +Each subsequent line describes an alignment or, if the read failed to align, a +read. Each line is a collection of at least 12 fields separated by tabs; from +left to right, the fields are: + + +1. Name of read that aligned. + Note that the [SAM specification] disallows whitespace in the read name. + If the read name contains any whitespace characters, HISAT2 will truncate + the name at the first whitespace character. This is similar to the + behavior of other tools. +2. Sum of all applicable flags. + Flags relevant to HISAT2 are + * 1: The read is one of a pair + * 2: The alignment is one end of a proper paired-end alignment + * 4: The read has no reported alignments + * 8: The read is one of a pair and has no reported alignments + * 16: The alignment is to the reverse reference strand + * 32: The other mate in the paired-end alignment is aligned to the reverse reference strand + * 64: The read is mate 1 in a pair + * 128: The read is mate 2 in a pair + ^ + Thus, an unpaired read that aligns to the reverse reference strand + will have flag 16. A paired-end read that aligns and is the first + mate in the pair will have flag 83 (= 64 + 16 + 2 + 1). +3. Name of reference sequence where alignment occurs +4. 1-based offset into the forward reference strand where leftmost + character of the alignment occurs +5. Mapping quality. Mapping quality of HISAT2 +6. CIGAR string representation of alignment +7. Name of reference sequence where mate's alignment occurs. Set to `=` if the +mate's reference sequence is the same as this alignment's, or `*` if there is no +mate. +8. 1-based offset into the forward reference strand where leftmost character of +the mate's alignment occurs. Offset is 0 if there is no mate. +9. Inferred fragment length. Size is negative if the mate's alignment occurs +upstream of this alignment. Size is 0 if the mates did not align concordantly. +However, size is non-0 if the mates aligned discordantly to the same +chromosome. +10. Read sequence (reverse-complemented if aligned to the reverse strand) +11. ASCII-encoded read qualities (reverse-complemented if the read aligned to +the reverse strand). The encoded quality values are on the [Phred quality] +scale and the encoding is ASCII-offset by 33 (ASCII char `!`), similarly to a +[FASTQ] file. +12. Optional fields. Fields are tab-separated. `hisat2` outputs zero or more +of these optional fields for each alignment, depending on the type of the +alignment: + * {: #hisat2-opt-fields-as} `AS:i:` : Alignment score. Can be negative. Only present if SAM record is for + an aligned read. + * {: #hisat2-opt-fields-xs} `ZS:i:` : Alignment score for the best-scoring alignment found other than the + alignment reported. Can be negative. Only present if the SAM record is + for an aligned read and more than one alignment was found for the read. + Note that, when the read is part of a concordantly-aligned pair, this score + could be greater than [`AS:i`]. + * {: #hisat2-opt-fields-ys} `YS:i:` : Alignment score for opposite mate in the paired-end alignment. Only present + if the SAM record is for a read that aligned as part of a paired-end + alignment. + * {: #hisat2-opt-fields-xn} `XN:i:` : The number of ambiguous bases in the reference covering this alignment. + Only present if SAM record is for an aligned read. + * {: #hisat2-opt-fields-xm} `XM:i:` : The number of mismatches in the alignment. Only present if SAM record is + for an aligned read. + * {: #hisat2-opt-fields-xo} `XO:i:` : The number of gap opens, for both read and reference gaps, in the alignment. + Only present if SAM record is for an aligned read. + * {: #hisat2-opt-fields-xg} `XG:i:` : The number of gap extensions, for both read and reference gaps, in the + alignment. Only present if SAM record is for an aligned read. + * {: #hisat2-opt-fields-nm} `NM:i:` : The edit distance; that is, the minimal number of one-nucleotide edits + (substitutions, insertions and deletions) needed to transform the read + string into the reference string. Only present if SAM record is for an + aligned read. + * {: #hisat2-opt-fields-yf} `YF:Z:` : String indicating reason why the read was filtered out. See also: + [Filtering]. Only appears for reads that were filtered out. + * {: #hisat2-opt-fields-yt} `YT:Z:` : Value of `UU` indicates the read was not part of a pair. Value of `CP` + indicates the read was part of a pair and the pair aligned concordantly. + Value of `DP` indicates the read was part of a pair and the pair aligned + discordantly. Value of `UP` indicates the read was part of a pair but the + pair failed to aligned either concordantly or discordantly. + * {: #hisat2-opt-fields-md} `MD:Z:` : A string representation of the mismatched reference bases in the alignment. + See [SAM] format specification for details. Only present if SAM record is + for an aligned read. + * {: #hisat2-opt-fields-xs} `XS:A:` : Values of `+` and `-` indicate the read is mapped to transcripts on sense and anti-sense + strands, respectively. Spliced alignments need to have this field, which is required in Cufflinks and StringTie. + We can report this field for the canonical-splice site (GT/AG), but not for non-canonical splice sites. + You can direct HISAT2 not to output such alignments (involving non-canonical splice sites) using "--pen-noncansplice 1000000". + * {: #hisat2-opt-fields-nh} `NH:i:` : The number of mapped locations for the read or the pair. + * {: #hisat2-opt-fields-Zs} `Zs:Z:` : When the alignment of a read involves SNPs that are in the index, this option is used to indicate where exactly the read involves the SNPs. + This optional field is similar to the above MD:Z field. + For example, `Zs:Z:1|S|rs3747203,97|S|rs16990981` indicates the second base of the read corresponds to a known SNP (ID: rs3747203). + 97 bases after the third base (the base after the second one), the read at 100th base involves another known SNP (ID: rs16990981). + 'S' indicates a single nucleotide polymorphism. 'D' and 'I' indicate a deletion and an insertion, respectively. + +[SAM format specification]: http://samtools.sf.net/SAM1.pdf +[FASTQ]: http://en.wikipedia.org/wiki/FASTQ_format +[`-S`/`--sam`]: #hisat2-options-S +[`-m`]: #hisat2-options-m +[`AS:i`]: #hisat2-opt-fields-as +[`ZS:i`]: #hisat2-opt-fields-xs +[`YS:i`]: #hisat2-opt-fields-ys +[`XN:i`]: #hisat2-opt-fields-xn +[`XM:i`]: #hisat2-opt-fields-xm +[`XO:i`]: #hisat2-opt-fields-xo +[`XG:i`]: #hisat2-opt-fields-xg +[`NM:i`]: #hisat2-opt-fields-nm +[`YF:Z`]: #hisat2-opt-fields-yf +[`YT:Z`]: #hisat2-opt-fields-yt +[`MD:Z`]: #hisat2-opt-fields-md +[`XS:A`]: #hisat2-opt-fields-xs +[`NH:i`]: #hisat2-opt-fields-nh +[`Zs:Z`]: #hisat2-opt-fields-Zs + +The `hisat2-build` indexer +=========================== + +`hisat2-build` builds a HISAT2 index from a set of DNA sequences. +`hisat2-build` outputs a set of 6 files with suffixes `.1.ht2`, `.2.ht2`, +`.3.ht2`, `.4.ht2`, `.5.ht2`, `.6.ht2`, `.7.ht2`, and `.8.ht2`. In the case of a large +index these suffixes will have a `ht2l` termination. These files together +constitute the index: they are all that is needed to align reads to that +reference. The original sequence FASTA files are no longer used by HISAT2 +once the index is built. + +Use of Karkkainen's [blockwise algorithm] allows `hisat2-build` to trade off +between running time and memory usage. `hisat2-build` has three options +governing how it makes this trade: [`-p`/`--packed`], [`--bmax`]/[`--bmaxdivn`], +and [`--dcv`]. By default, `hisat2-build` will automatically search for the +settings that yield the best running time without exhausting memory. This +behavior can be disabled using the [`-a`/`--noauto`] option. + +The indexer provides options pertaining to the "shape" of the index, e.g. +[`--offrate`](#hisat2-build-options-o) governs the fraction of [Burrows-Wheeler] +rows that are "marked" (i.e., the density of the suffix-array sample; see the +original [FM Index] paper for details). All of these options are potentially +profitable trade-offs depending on the application. They have been set to +defaults that are reasonable for most cases according to our experiments. See +[Performance tuning] for details. + +`hisat2-build` can generate either [small or large indexes](#small-and-large-indexes). The wrapper +will decide which based on the length of the input genome. If the reference +does not exceed 4 billion characters but a large index is preferred, the user +can specify [`--large-index`] to force `hisat2-build` to build a large index +instead. + +The HISAT2 index is based on the [FM Index] of Ferragina and Manzini, which in +turn is based on the [Burrows-Wheeler] transform. The algorithm used to build +the index is based on the [blockwise algorithm] of Karkkainen. + +[Blockwise algorithm]: http://portal.acm.org/citation.cfm?id=1314852 +[Burrows-Wheeler]: http://en.wikipedia.org/wiki/Burrows-Wheeler_transform +[Performance tuning]: #performance-tuning + +Command Line +------------ + +Usage: + + hisat2-build [options]* + +### Notes + If you use --snp, --ss, and/or --exon, hisat2-build will need about 200GB RAM for the human genome size as index building involves a graph construction. + Otherwise, you will be able to build an index on your desktop with 8GB RAM. + +### Main arguments + +* `` + A comma-separated list of FASTA files containing the reference sequences to be + aligned to, or, if [`-c`](#hisat2-build-options-c) is specified, the sequences + themselves. E.g., `` might be `chr1.fa,chr2.fa,chrX.fa,chrY.fa`, + or, if [`-c`](#hisat2-build-options-c) is specified, this might be + `GGTCATCCT,ACGGGTCGT,CCGTTCTATGCGGCTTA`. +{: #hisat2-build-options-ref} + +* `` + The basename of the index files to write. By default, `hisat2-build` writes + files named `NAME.1.ht2`, `NAME.2.ht2`, `NAME.3.ht2`, `NAME.4.ht2`, + `NAME.5.ht2`, `NAME.6.ht2`, `NAME.7.ht2`, and `NAME.8.ht2` where `NAME` is ``. +{: #hisat2-build-options-base} + +### Options + +* `-f` + The reference input files (specified as ``) are FASTA files + (usually having extension `.fa`, `.mfa`, `.fna` or similar). +{: #hisat2-build-options-f} + +* `-c` + The reference sequences are given on the command line. I.e. `` is + a comma-separated list of sequences rather than a list of FASTA files. +{: #hisat2-build-options-c} + +* `--large-index` + Force `hisat2-build` to build a [large index](#small-and-large-indexes), even if the reference is less + than ~ 4 billion nucleotides long. +{: #hisat2-build-options-large-index} +[`--large-index`]: #hisat2-build-options-large-index + +* `-a/--noauto` + Disable the default behavior whereby `hisat2-build` automatically selects + values for the [`--bmax`], [`--dcv`] and [`--packed`] parameters according to + available memory. Instead, user may specify values for those parameters. If + memory is exhausted during indexing, an error message will be printed; it is up + to the user to try new parameters. +{: #hisat2-build-options-a} +[`-a`/`--noauto`]: #hisat2-build-options-a + +* `--bmax ` + The maximum number of suffixes allowed in a block. Allowing more suffixes per + block makes indexing faster, but increases peak memory usage. Setting this + option overrides any previous setting for [`--bmax`], or [`--bmaxdivn`]. + Default (in terms of the [`--bmaxdivn`] parameter) is [`--bmaxdivn`] 4. This is + configured automatically by default; use [`-a`/`--noauto`] to configure manually. +{: #hisat2-build-options-bmax} +[`--bmax`]: #hisat2-build-options-bmax + +* `--bmaxdivn ` + The maximum number of suffixes allowed in a block, expressed as a fraction of + the length of the reference. Setting this option overrides any previous setting + for [`--bmax`], or [`--bmaxdivn`]. Default: [`--bmaxdivn`] 4. This is + configured automatically by default; use [`-a`/`--noauto`] to configure manually. +{: #hisat2-build-options-bmaxdivn} +[`--bmaxdivn`]: #hisat2-build-options-bmaxdivn + +* `--dcv ` + Use `` as the period for the difference-cover sample. A larger period + yields less memory overhead, but may make suffix sorting slower, especially if + repeats are present. Must be a power of 2 no greater than 4096. Default: 1024. + This is configured automatically by default; use [`-a`/`--noauto`] to configure + manually. +{: #hisat2-build-options-dcv} +[`--dcv`]: #hisat2-build-options-dcv + +* `--nodc` + Disable use of the difference-cover sample. Suffix sorting becomes + quadratic-time in the worst case (where the worst case is an extremely + repetitive reference). Default: off. +{: #hisat2-build-options-nodc} +[`--nodc`]: #hisat2-build-options-nodc + +* `-r/--noref` + Do not build the `NAME.3.ht2` and `NAME.4.ht2` portions of the index, which + contain a bitpacked version of the reference sequences and are used for + paired-end alignment. +{: #hisat2-build-options-r} + +* `-3/--justref` + Build only the `NAME.3.ht2` and `NAME.4.ht2` portions of the index, which + contain a bitpacked version of the reference sequences and are used for + paired-end alignment. +{: #hisat2-build-options-3} + +* `-o/--offrate ` + To map alignments back to positions on the reference sequences, it's necessary + to annotate ("mark") some or all of the [Burrows-Wheeler] rows with their + corresponding location on the genome. + [`-o`/`--offrate`](#hisat2-build-options-o) governs how many rows get marked: + the indexer will mark every 2^`` rows. Marking more rows makes + reference-position lookups faster, but requires more memory to hold the + annotations at runtime. The default is 4 (every 16th row is marked; for human + genome, annotations occupy about 680 megabytes). +{: #hisat2-build-options-o} + +* `-t/--ftabchars ` + The ftab is the lookup table used to calculate an initial [Burrows-Wheeler] + range with respect to the first `` characters of the query. A larger + `` yields a larger lookup table but faster query times. The ftab has size + 4^(``+1) bytes. The default setting is 10 (ftab is 4MB). +{: #hisat2-build-options-t} + +* `--localoffrate ` + This option governs how many rows get marked in a local index: + the indexer will mark every 2^`` rows. Marking more rows makes + reference-position lookups faster, but requires more memory to hold the + annotations at runtime. The default is 3 (every 8th row is marked, + this occupies about 16KB per local index). +{: #hisat2-build-options-localoffrate} + +* `--localftabchars ` + The local ftab is the lookup table in a local index. + The default setting is 6 (ftab is 8KB per local index). +{: #hisat2-build-options-localftabchars} + +* `-p ` + Launch `NTHREADS` parallel build threads (default: 1). +{: #hisat2-build-options-p} + +* `--snp ` + Provide a list of SNPs (in the HISAT2's own format) as follows (five columns). + + SNP ID snp type (single, deletion, or insertion) chromosome name zero-offset based genomic position of a SNP alternative base (single), the length of SNP (deletion), or insertion sequence (insertion) + + For example, + + rs58784443 single 13 18447947 T + + Use `hisat2_extract_snps_haplotypes_UCSC.py` (in the HISAT2 package) to extract SNPs and haplotypes from a dbSNP file (e.g. http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/snp144Common.txt.gz). + or `hisat2_extract_snps_haplotypes_VCF.py` to extract SNPs and haplotypes from a VCF file (e.g. ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/supporting/GRCh38_positions/ALL.chr22.phase3_shapeit2_mvncall_integrated_v3plus_nounphased.rsID.genotypes.GRCh38_dbSNP_no_SVs.vcf.gz). +{: #hisat2-build-options-snp} + +* `--haplotype ` + Provide a list of haplotypes (in the HISAT2's own format) as follows (five columns). + + Haplotype ID chromosome name zero-offset based left coordinate of haplotype zero-offset based right coordinate of haplotype a comma separated list of SNP ids in the haplotype + + For example, + + ht35 13 18446877 18446945 rs12381094,rs12381056,rs192016659,rs538569910 + + See the above option, --snp, about how to extract haplotypes. This option is not required, but haplotype information can keep the index construction from exploding and reduce the index size substantially. +{: #hisat2-build-options-haplotype} + +* `--ss ` + Note this option should be used with the following [`--exon`](#hisat2-build-options-exon) option. + Provide a list of splice sites (in the HISAT2's own format) as follows (four columns). + + chromosome name zero-offset based genomic position of the flanking base on the left side of an intron zero-offset based genomic position of the flanking base on the right strand + + Use `hisat2_extract_splice_sites.py` (in the HISAT2 package) to extract splice sites from a GTF file. +{: #hisat2-build-options-ss} + +* `--exon ` + Note this option should be used with the above [`--ss`](#hisat2-build-options-ss) option. + Provide a list of exons (in the HISAT2's own format) as follows (three columns). + + chromosome name zero-offset based left genomic position of an exon zero-offset based right genomic position of an exon + + Use `hisat2_extract_exons.py` (in the HISAT2 package) to extract exons from a GTF file. +{: #hisat2-build-options-exon} + +* `--seed ` + Use `` as the seed for pseudo-random number generator. +{: #hisat2-build-options-seed} + +* `--cutoff ` + Index only the first `` bases of the reference sequences (cumulative across + sequences) and ignore the rest. +{: #hisat2-build-options-cutoff} + +* `-q/--quiet` + `hisat2-build` is verbose by default. With this option `hisat2-build` will + print only error messages. +{: #hisat2-build-options-q} + +* `-h/--help` + Print usage information and quit. +{: #hisat2-build-options-h} + +* `--version` + Print version information and quit. +{: #hisat2-build-options-version} + +The `hisat2-inspect` index inspector +===================================== + +`hisat2-inspect` extracts information from a HISAT2 index about what kind of +index it is and what reference sequences were used to build it. When run without +any options, the tool will output a FASTA file containing the sequences of the +original references (with all non-`A`/`C`/`G`/`T` characters converted to `N`s). + It can also be used to extract just the reference sequence names using the +[`-n`/`--names`] option or a more verbose summary using the [`-s`/`--summary`] +option. + +Command Line +------------ + +Usage: + + hisat2-inspect [options]* + +### Main arguments + +* `` + The basename of the index to be inspected. The basename is name of any of the + index files but with the `.X.ht2` suffix omitted. + `hisat2-inspect` first looks in the current directory for the index files, then + in the directory specified in the `HISAT2_INDEXES` environment variable. +{: #hisat2-inspect-options-base} + +### Options + +* `-a/--across ` + When printing FASTA output, output a newline character every `` bases + (default: 60). +{: #hisat2-inspect-options-a} + +* `-n/--names` + Print reference sequence names, one per line, and quit. +{: #hisat2-inspect-options-n} +[`-n`/`--names`]: #hisat2-inspect-options-n + +* `-s/--summary` + Print a summary that includes information about index settings, as well as the + names and lengths of the input sequences. The summary has this format: + + Colorspace <0 or 1> + SA-Sample 1 in + FTab-Chars + Sequence-1 + Sequence-2 + ... + Sequence-N + + Fields are separated by tabs. Colorspace is always set to 0 for HISAT2. +{: #hisat2-inspect-options-s} +[`-s`/`--summary`]: #hisat2-inspect-options-s + +* `--snp` + Print SNPs, and quit. +{: #hisat2-inspect-options-snp} +[`--snp`]: #hisat2-inspect-options-snp + +* `--ss` + Print splice sites, and quit. +{: #hisat2-inspect-options-ss} +[`--ss`]: #hisat2-inspect-options-ss + +* `--ss-all` + Print splice sites including those not in the global index, and quit. +{: #hisat2-inspect-options-ss-all} +[`--ss-all`]: #hisat2-inspect-options-ss-all + +* `--exon` + Print exons, and quit. +{: #hisat2-inspect-options-exon} +[`--exon`]: #hisat2-inspect-options-exon + +* `-v/--verbose` + Print verbose output (for debugging). + +* `--version` + Print version information and quit. + +* `-h/--help` + Print usage information and quit. + + +Getting started with HISAT2 +=================================================== + +HISAT2 comes with some example files to get you started. The example files +are not scientifically significant; these files will simply let you start running HISAT2 and +downstream tools right away. + +First follow the manual instructions to [obtain HISAT2]. Set the `HISAT2_HOME` +environment variable to point to the new HISAT2 directory containing the +`hisat2`, `hisat2-build` and `hisat2-inspect` binaries. This is important, +as the `HISAT2_HOME` variable is used in the commands below to refer to that +directory. + +[obtain HISAT2]: #obtaining-hisat2 + +Indexing a reference genome +--------------------------- + +To create an index for the genomic region (1 million bps from the human chromosome 22 between 20,000,000 and 20,999,999) +included with HISAT2, create a new temporary directory (it doesn't matter where), change into that directory, and run: + + $HISAT2_HOME/hisat2-build $HISAT2_HOME/example/reference/22_20-21M.fa --snp $HISAT2_HOME/example/reference/22_20-21M.snp 22_20-21M_snp + +The command should print many lines of output then quit. When the command +completes, the current directory will contain ten new files that all start with +`22_20-21M_snp` and end with `.1.ht2`, `.2.ht2`, `.3.ht2`, `.4.ht2`, `.5.ht2`, `.6.ht2`, +`.7.ht2`, and `.8.ht2`. These files constitute the index - you're done! + +You can use `hisat2-build` to create an index for a set of FASTA files obtained +from any source, including sites such as [UCSC], [NCBI], and [Ensembl]. When +indexing multiple FASTA files, specify all the files using commas to separate +file names. For more details on how to create an index with `hisat2-build`, +see the [manual section on index building]. You may also want to bypass this +process by obtaining a pre-built index. + +[UCSC]: http://genome.ucsc.edu/cgi-bin/hgGateway +[NCBI]: http://www.ncbi.nlm.nih.gov/sites/genome +[Ensembl]: http://www.ensembl.org/ +[manual section on index building]: #the-hisat2-build-indexer +[using a pre-built index]: #using-a-pre-built-index + +Aligning example reads +---------------------- + +Stay in the directory created in the previous step, which now contains the +`22_20-21M` index files. Next, run: + + $HISAT2_HOME/hisat2 -f -x $HISAT2_HOME/example/index/22_20-21M_snp -U $HISAT2_HOME/example/reads/reads_1.fa -S eg1.sam + +This runs the HISAT2 aligner, which aligns a set of unpaired reads to the +genome region using the index generated in the previous step. +The alignment results in SAM format are written to the file `eg1.sam`, and a +short alignment summary is written to the console. (Actually, the summary is +written to the `"standard error"` or `"stderr"` filehandle, which is typically +printed to the console.) + +To see the first few lines of the SAM output, run: + + head eg1.sam + +You will see something like this: + + @HD VN:1.0 SO:unsorted + @SQ SN:22:20000001-21000000 LN:1000000 + @PG ID:hisat2 PN:hisat2 VN:2.0.0-beta + 1 0 22:20000001-21000000 397984 255 100M * 0 0 GCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU NH:i:1 + 2 16 22:20000001-21000000 398131 255 100M * 0 0 ATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:80A19 YT:Z:UU NH:i:1 Zs:Z:80|S|rs576159895 + 3 16 22:20000001-21000000 398222 255 100M * 0 0 TGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCTCCACTTGGTCAGAGCTGCAGTACTTGGCGATCTCAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:16A83 YT:Z:UU NH:i:1 Zs:Z:16|S|rs2629364 + 4 16 22:20000001-21000000 398247 255 90M200N10M * 0 0 CAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCTCCACTTGGTCAGAGCTGCAGTACTTGGCGATCTCAAACCGCTGCACCAGGAAGTCGATCCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU XS:A:- NH:i:1 + 5 16 22:20000001-21000000 398194 255 100M * 0 0 GGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCTCCACTTGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:17A26A55 YT:Z:UU NH:i:1 Zs:Z:17|S|rs576159895,26|S|rs2629364 + 6 0 22:20000001-21000000 398069 255 100M * 0 0 CAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU NH:i:1 + 7 0 22:20000001-21000000 397896 255 100M * 0 0 GTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:31G68 YT:Z:UU NH:i:1 Zs:Z:31|S|rs562662261 + 8 0 22:20000001-21000000 398150 255 100M * 0 0 AGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:61A26A11 YT:Z:UU NH:i:1 Zs:Z:61|S|rs576159895,26|S|rs2629364 + 9 16 22:20000001-21000000 398329 255 8M200N92M * 0 0 ACCAGGAAGTCGATCCAGATGTAGTGGGGGGTCACTTCGGGGGGACAGGGTTTGGGTTGACTTGCTTCCGAGGCAGCCAGGGGGTCTGCTTCCTTTATCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU XS:A:- NH:i:1 + 10 16 22:20000001-21000000 398184 255 100M * 0 0 CTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:27A26A45 YT:Z:UU NH:i:1 Zs:Z:27|S|rs576159895,26|S|rs2629364 + +The first few lines (beginning with `@`) are SAM header lines, and the rest of +the lines are SAM alignments, one line per read or mate. See the [HISAT2 +manual section on SAM output] and the [SAM specification] for details about how +to interpret the SAM file format. + +[HISAT2 manual section on SAM output]: #sam-output + +Paired-end example +------------------ + +To align paired-end reads included with HISAT2, stay in the same directory and +run: + + $HISAT2_HOME/hisat2 -f -x $HISAT2_HOME/example/index/22_20-21M_snp -1 $HISAT2_HOME/example/reads/reads_1.fa -2 $HISAT2_HOME/example/reads/reads_2.fa -S eg2.sam + +This aligns a set of paired-end reads to the reference genome, with results +written to the file `eg2.sam`. + +Using SAMtools/BCFtools downstream +---------------------------------- + +[SAMtools] is a collection of tools for manipulating and analyzing SAM and BAM +alignment files. [BCFtools] is a collection of tools for calling variants and +manipulating VCF and BCF files, and it is typically distributed with [SAMtools]. +Using these tools together allows you to get from alignments in SAM format to +variant calls in VCF format. This example assumes that `samtools` and +`bcftools` are installed and that the directories containing these binaries are +in your [PATH environment variable]. + +Run the paired-end example: + + $HISAT2_HOME/hisat -f -x $HISAT2_HOME/example/index/22_20-21M_snp -1 $HISAT2_HOME/example/reads/reads_1.fa -2 $HISAT2_HOME/example/reads/reads_2.fa -S eg2.sam + +Use `samtools view` to convert the SAM file into a BAM file. BAM is a the +binary format corresponding to the SAM text format. Run: + + samtools view -bS eg2.sam > eg2.bam + +Use `samtools sort` to convert the BAM file to a sorted BAM file. The following command requires samtools version 1.2 or higher. + + samtools sort eg2.bam -o eg2.sorted.bam + +We now have a sorted BAM file called `eg2.sorted.bam`. Sorted BAM is a useful +format because the alignments are (a) compressed, which is convenient for +long-term storage, and (b) sorted, which is convenient for variant discovery. +To generate variant calls in VCF format, run: + + samtools mpileup -uf $HISAT2_HOME/example/reference/22_20-21M.fa eg2.sorted.bam | bcftools view -bvcg - > eg2.raw.bcf + +Then to view the variants, run: + + bcftools view eg2.raw.bcf + +See the official SAMtools guide to [Calling SNPs/INDELs with SAMtools/BCFtools] +for more details and variations on this process. + +[BCFtools]: http://samtools.sourceforge.net/mpileup.shtml +[Calling SNPs/INDELs with SAMtools/BCFtools]: http://samtools.sourceforge.net/mpileup.shtml diff --git a/docs/_pages/search.html b/docs/_pages/search.html new file mode 100644 index 0000000..d964f54 --- /dev/null +++ b/docs/_pages/search.html @@ -0,0 +1,26 @@ +--- +layout: page +title: Search Results +permalink: /search/ +hide: true +share: false +--- + + + +

    diff --git a/docs/_pages/tags.html b/docs/_pages/tags.html new file mode 100644 index 0000000..8329577 --- /dev/null +++ b/docs/_pages/tags.html @@ -0,0 +1,14 @@ +--- +layout: page +title: Tags +permalink: /tags/ +order: 2 +share: false +hide: true +--- + + diff --git a/docs/_posts/2000-01-01-kim.md b/docs/_posts/2000-01-01-kim.md new file mode 100644 index 0000000..fd77605 --- /dev/null +++ b/docs/_posts/2000-01-01-kim.md @@ -0,0 +1,13 @@ +--- +layout: post +title: Daehwan Kim +tags: daehwankim +eye_catch: https://avatars0.githubusercontent.com/u/28678667?s=460&v=4 +--- + +Daehwan Kim is an Assistant Professor at UT Southwestern and was the original designer who layed much of the ground work for HISAT-genotype. + +[Webpage](https://kim-lab.org/daehwan-kim-principal-investigator/) + + + diff --git a/docs/_posts/2000-01-02-salzberg.md b/docs/_posts/2000-01-02-salzberg.md new file mode 100644 index 0000000..71f5117 --- /dev/null +++ b/docs/_posts/2000-01-02-salzberg.md @@ -0,0 +1,11 @@ +--- +layout: post +title: Steven Salzberg +tags: stevensalzberg +eye_catch: https://avatars0.githubusercontent.com/u/28678667?s=460&v=4 +--- + +Steven Salzberg is the Bloomberg Distinguished Professor of Biomedical Engineering, Computer Science, and Biostatistics at Johns Hopkins University, where I’m also Director of the Center for Computational Biology. + +[Webpage](https://salzberg-lab.org/in-the-news/about-me/) + diff --git a/docs/_posts/2000-01-03-langmead.md b/docs/_posts/2000-01-03-langmead.md new file mode 100644 index 0000000..0099a97 --- /dev/null +++ b/docs/_posts/2000-01-03-langmead.md @@ -0,0 +1,13 @@ +--- +layout: post +title: Ben Langmead +tags: benlangmead +eye_catch: https://avatars0.githubusercontent.com/u/28678667?s=460&v=4 +--- + +Ben Langmead is an Associate Professor of Computer Science at Johns Hopkins University. + +[Webpage](http://www.langmead-lab.org/) + + + diff --git a/docs/_posts/2019-07-28-park.md b/docs/_posts/2019-07-28-park.md new file mode 100644 index 0000000..a5f6c02 --- /dev/null +++ b/docs/_posts/2019-07-28-park.md @@ -0,0 +1,10 @@ +--- +layout: post +title: Chanhee Park +tags: chanheepark +eye_catch: https://avatars0.githubusercontent.com/u/28678667?s=460&v=4 +--- + +Chanhee Park is a Scientific Software Engineer in the Kim Lab at UTSW responsible for maintaining and improving HISAT2, the core of HISAT-genotype. + +[Linkedin](https://www.linkedin.com/in/chanhee-park-97677297/) diff --git a/docs/_sass/_aside.scss b/docs/_sass/_aside.scss new file mode 100644 index 0000000..d82fea3 --- /dev/null +++ b/docs/_sass/_aside.scss @@ -0,0 +1,66 @@ +.site-aside { + font-size: 0.95em; + padding-top: $margin; + padding-bottom: $margin; + + @media screen and (max-width: $mobile-width) { + border-top: 3px solid $brand-color; + } + + h2 { + font-size: 1.2em; + line-height: 1.3; + margin: 1em 0; + } + + .block { + margin-bottom: $margin; + h2 { + margin: 0; + } + } + + #search { + box-sizing: border-box; + width: 100%; + } + + ol, ul { + margin: 1.5em 0; + list-style: none; + line-height: 1.2em; + li { + border-bottom: 1px solid $border-color; + &:first-child { + border-top: 1px solid $border-color; + } + a, span { + padding: 0.5em 0.3em; + display: block; + width: 100%; + } + &:last-child { + margin-bottom: 0; + } + } + } + + ul.icons { + @extend .clearfix; + list-style: none; + li { + border: none; + font-size: 1.8em; + margin-bottom: 0; + float: left; + a { + padding: 0; + } + } + } + + a.publication { + padding: 0; + display: inline; + } +} diff --git a/docs/_sass/_common.scss b/docs/_sass/_common.scss new file mode 100644 index 0000000..9d1ed21 --- /dev/null +++ b/docs/_sass/_common.scss @@ -0,0 +1,43 @@ +html { + background: $background-color; +} + +body { + color: $text-color; + font-family: $font-family; + font-size: $font-size; + word-wrap: break-word; +} + +a { + color: $link-color; + text-decoration: none; + &:hover { + opacity: .8; + text-decoration: underline; + } +} + +hr { + border: 0; + border-top: 1px solid $border-color; + border-bottom: 1px solid #fff; + margin: 1em 0; + &.with-margin { + margin: $margin 0; + } + &.with-no-margin { + margin: 0; + } +} + +input, select, textarea { + border-radius: 0.3em; + border: 1px solid $border-color; + display: inline-block; + padding: 0.5em 0.75em; +} + +iframe, img, embed, object, video { + max-width: 100%; +} diff --git a/docs/_sass/_content.scss b/docs/_sass/_content.scss new file mode 100644 index 0000000..b028d5d --- /dev/null +++ b/docs/_sass/_content.scss @@ -0,0 +1,203 @@ +.site-content { + h1, h2, h3, h4, h5, h6 { + line-height: 1.3; + margin: 1em 0; + .header-link { + margin-left: 0.2em; + color: $link-color; + opacity: 0; + } + &:hover .header-link { + opacity: 1; + } + } + + h1 { + font-size: 2.3em; + } + + h2 { + font-size: 1.9em; + } + + blockquote { + border-left: 5px solid #ddd; + color: $blockquote-color; + padding: 0.5em 1em; + p:first-child { + margin-top: 0; + } + p:last-child { + margin-bottom: 0; + } + } + + table { + width: 100%; + border-collapse: collapse; + td, th { + padding: 0.5em 1em; + border: 1px solid $border-color; + text-align: left; + } + } + + p, ol, ul, dl, table, blockquote, kbd, pre, samp { + margin: 1.5em 0; + } + + ul, ol { + padding-left: 1em; + ul, ol { + margin: 0; + } + } + + ul, ol { + &.inline { + @extend .clearfix; + list-style: none; + padding-left: 0; + li { + float: left; + margin-right: 1em; + &:last-child { + margin-right: 0; + } + } + } + } + + ul { + margin-left: 0.5em; + } + + dt { + font-weight: bold; + } + + dd { + margin-left: 2em; + } + + p, ol, ul, dl { + line-height: 1.5; + } + + ol, ul { + list-style-position: outside; + } + + code { + font-family: $code-font-family; + font-size: $code-font-size; + margin: 0 1px; + padding: 0 1px; + background-color: rgba(#ddd, .1); + } + + pre { + border: 1px solid #ccc; + background-color: rgba(#ddd, .1); + overflow: auto; + padding: 10px 15px; + line-height: 1em; + border-radius: 3px; + code { + margin: 0; + padding: 0; + word-wrap: normal; + white-space: pre; + border: none; + background: transparent; + } + } + + .article-wrapper { + @extend .clearfix; + margin: $margin 0; + } + + article { + width: 100%; + float: left; + header { + margin-bottom: $margin; + .panel { + padding: 1em 1.5em; + background-color: rgba($page-title-color, .2); + h1 { + margin: 0; + &, a { + color: $page-title-color; + } + } + ul.meta, ul.tags { + list-style: none; + margin: 0; + padding: 0; + li { + display: inline-block; + color: rgba($text-color, .5); + font-size: 0.95em; + margin-right: 0.5em; + &:last-child { + margin-right: 0; + } + } + } + ul.meta { + float: right; + } + ul.icons { + margin: 0; + padding: 0; + li { + margin-right: 0; + } + } + } + } + p:last-child { + margin-bottom: 0; + } + footer { + margin-top: $margin; + } + .footnotes { + font-size: 0.9em; + } + } + + .comments { + margin-bottom: $margin; + } + + .pagination { + margin: $margin 0; + padding: 0 10%; + text-align: center; + .btn:first-child { + margin-right: 1em; + } + } + + #search-results { + .article-wrapper { + margin: 1em 0; + } + article header { + margin-bottom: 0; + .panel { + padding: 0; + background-color: $background-color; + h1 { + font-size: 1.5em; + a.tag { + font-size: 0.7em; + } + } + } + } + } +} diff --git a/docs/_sass/_footer.scss b/docs/_sass/_footer.scss new file mode 100644 index 0000000..f0d18e0 --- /dev/null +++ b/docs/_sass/_footer.scss @@ -0,0 +1,7 @@ +.site-footer { + border-top: 3px solid $brand-color; + padding: $margin/2 0; + color: lighten($text-color, 30%); + text-align: center; + font-size: 0.9em; +} diff --git a/docs/_sass/_header.scss b/docs/_sass/_header.scss new file mode 100644 index 0000000..392366b --- /dev/null +++ b/docs/_sass/_header.scss @@ -0,0 +1,47 @@ +.site-header { + font-size: 2em; + padding: 0.7em 0; + background-color: $brand-color; + border-top: 3px solid rgba(#000, .5); + @media screen and (max-width: $mobile-width) { + text-align: center; + } + + .avatar { + height: 3em; + width: 3em; + border: 3px solid rgba(#fff, .7); + border-radius: 1.5em; + margin-right: 0.8em; + float: left; + @media screen and (max-width: $mobile-width) { + float: none; + margin-right: 0; + } + } + + h1 { + font-size: 1em; + line-height: 1em; + margin: 0; + .title { + display: inline-block; + color: $site-title-color; + font-weight: bold; + margin-top: 0.5em; + &.slim { + margin-top: 0; + } + @media screen and (max-width: $mobile-width) { + margin-top: 0; + } + } + .description { + margin: 0; + font-size: 0.6em; + line-height: 1em; + margin-top: 0.5em; + color: lighten($site-title-color, 20%); + } + } +} diff --git a/docs/_sass/base/_layout.scss b/docs/_sass/base/_layout.scss new file mode 100644 index 0000000..94767d3 --- /dev/null +++ b/docs/_sass/base/_layout.scss @@ -0,0 +1,62 @@ +$site-max-width: 1200px; +$mobile-width: 800px; +$side-padding: 10px; + +* { + margin: 0; + padding: 0; + box-sizing: border-box; +} + +.site-header { + width: 100%; + .inner { + width: $site-max-width; + margin: 0 auto; + @media screen and (max-width: $site-max-width) { + width: 100%; + padding: 0 $side-padding; + } + } +} + +.site-container { + @extend .clearfix; + width: $site-max-width; + margin: 0 auto; + @media screen and (max-width: $site-max-width) { + width: 100%; + padding: 0 $side-padding; + } + @media screen and (max-width: $mobile-width) { + padding: 0; + } +} + +.site-content { + width: 70%; + float: left; + @media screen and (max-width: $mobile-width) { + width: 100%; + padding: 0 $side-padding; + } +} + +.site-aside { + padding-left: 50px; + width: 30%; + float: left; + @media screen and (max-width: $mobile-width) { + width: 100%; + padding: 0 $side-padding; + } + .inner { + } +} + +.site-footer { + width: 100%; + .inner { + padding: 0 $side-padding; + } +} diff --git a/docs/_sass/base/_reset.scss b/docs/_sass/base/_reset.scss new file mode 100644 index 0000000..458eea1 --- /dev/null +++ b/docs/_sass/base/_reset.scss @@ -0,0 +1,427 @@ +/*! normalize.css v3.0.2 | MIT License | git.io/normalize */ + +/** + * 1. Set default font family to sans-serif. + * 2. Prevent iOS text size adjust after orientation change, without disabling + * user zoom. + */ + +html { + font-family: sans-serif; /* 1 */ + -ms-text-size-adjust: 100%; /* 2 */ + -webkit-text-size-adjust: 100%; /* 2 */ +} + +/** + * Remove default margin. + */ + +body { + margin: 0; +} + +/* HTML5 display definitions + ========================================================================== */ + +/** + * Correct `block` display not defined for any HTML5 element in IE 8/9. + * Correct `block` display not defined for `details` or `summary` in IE 10/11 + * and Firefox. + * Correct `block` display not defined for `main` in IE 11. + */ + +article, +aside, +details, +figcaption, +figure, +footer, +header, +hgroup, +main, +menu, +nav, +section, +summary { + display: block; +} + +/** + * 1. Correct `inline-block` display not defined in IE 8/9. + * 2. Normalize vertical alignment of `progress` in Chrome, Firefox, and Opera. + */ + +audio, +canvas, +progress, +video { + display: inline-block; /* 1 */ + vertical-align: baseline; /* 2 */ +} + +/** + * Prevent modern browsers from displaying `audio` without controls. + * Remove excess height in iOS 5 devices. + */ + +audio:not([controls]) { + display: none; + height: 0; +} + +/** + * Address `[hidden]` styling not present in IE 8/9/10. + * Hide the `template` element in IE 8/9/11, Safari, and Firefox < 22. + */ + +[hidden], +template { + display: none; +} + +/* Links + ========================================================================== */ + +/** + * Remove the gray background color from active links in IE 10. + */ + +a { + background-color: transparent; +} + +/** + * Improve readability when focused and also mouse hovered in all browsers. + */ + +a:active, +a:hover { + outline: 0; +} + +/* Text-level semantics + ========================================================================== */ + +/** + * Address styling not present in IE 8/9/10/11, Safari, and Chrome. + */ + +abbr[title] { + border-bottom: 1px dotted; +} + +/** + * Address style set to `bolder` in Firefox 4+, Safari, and Chrome. + */ + +b, +strong { + font-weight: bold; +} + +/** + * Address styling not present in Safari and Chrome. + */ + +dfn { + font-style: italic; +} + +/** + * Address variable `h1` font-size and margin within `section` and `article` + * contexts in Firefox 4+, Safari, and Chrome. + */ + +h1 { + font-size: 2em; + margin: 0.67em 0; +} + +/** + * Address styling not present in IE 8/9. + */ + +mark { + background: #ff0; + color: #000; +} + +/** + * Address inconsistent and variable font size in all browsers. + */ + +small { + font-size: 80%; +} + +/** + * Prevent `sub` and `sup` affecting `line-height` in all browsers. + */ + +sub, +sup { + font-size: 75%; + line-height: 0; + position: relative; + vertical-align: baseline; +} + +sup { + top: -0.5em; +} + +sub { + bottom: -0.25em; +} + +/* Embedded content + ========================================================================== */ + +/** + * Remove border when inside `a` element in IE 8/9/10. + */ + +img { + border: 0; +} + +/** + * Correct overflow not hidden in IE 9/10/11. + */ + +svg:not(:root) { + overflow: hidden; +} + +/* Grouping content + ========================================================================== */ + +/** + * Address margin not present in IE 8/9 and Safari. + */ + +figure { + margin: 1em 40px; +} + +/** + * Address differences between Firefox and other browsers. + */ + +hr { + -moz-box-sizing: content-box; + box-sizing: content-box; + height: 0; +} + +/** + * Contain overflow in all browsers. + */ + +pre { + overflow: auto; +} + +/** + * Address odd `em`-unit font size rendering in all browsers. + */ + +code, +kbd, +pre, +samp { + font-family: monospace, monospace; + font-size: 1em; +} + +/* Forms + ========================================================================== */ + +/** + * Known limitation: by default, Chrome and Safari on OS X allow very limited + * styling of `select`, unless a `border` property is set. + */ + +/** + * 1. Correct color not being inherited. + * Known issue: affects color of disabled elements. + * 2. Correct font properties not being inherited. + * 3. Address margins set differently in Firefox 4+, Safari, and Chrome. + */ + +button, +input, +optgroup, +select, +textarea { + color: inherit; /* 1 */ + font: inherit; /* 2 */ + margin: 0; /* 3 */ +} + +/** + * Address `overflow` set to `hidden` in IE 8/9/10/11. + */ + +button { + overflow: visible; +} + +/** + * Address inconsistent `text-transform` inheritance for `button` and `select`. + * All other form control elements do not inherit `text-transform` values. + * Correct `button` style inheritance in Firefox, IE 8/9/10/11, and Opera. + * Correct `select` style inheritance in Firefox. + */ + +button, +select { + text-transform: none; +} + +/** + * 1. Avoid the WebKit bug in Android 4.0.* where (2) destroys native `audio` + * and `video` controls. + * 2. Correct inability to style clickable `input` types in iOS. + * 3. Improve usability and consistency of cursor style between image-type + * `input` and others. + */ + +button, +html input[type="button"], /* 1 */ +input[type="reset"], +input[type="submit"] { + -webkit-appearance: button; /* 2 */ + cursor: pointer; /* 3 */ +} + +/** + * Re-set default cursor for disabled elements. + */ + +button[disabled], +html input[disabled] { + cursor: default; +} + +/** + * Remove inner padding and border in Firefox 4+. + */ + +button::-moz-focus-inner, +input::-moz-focus-inner { + border: 0; + padding: 0; +} + +/** + * Address Firefox 4+ setting `line-height` on `input` using `!important` in + * the UA stylesheet. + */ + +input { + line-height: normal; +} + +/** + * It's recommended that you don't attempt to style these elements. + * Firefox's implementation doesn't respect box-sizing, padding, or width. + * + * 1. Address box sizing set to `content-box` in IE 8/9/10. + * 2. Remove excess padding in IE 8/9/10. + */ + +input[type="checkbox"], +input[type="radio"] { + box-sizing: border-box; /* 1 */ + padding: 0; /* 2 */ +} + +/** + * Fix the cursor style for Chrome's increment/decrement buttons. For certain + * `font-size` values of the `input`, it causes the cursor style of the + * decrement button to change from `default` to `text`. + */ + +input[type="number"]::-webkit-inner-spin-button, +input[type="number"]::-webkit-outer-spin-button { + height: auto; +} + +/** + * 1. Address `appearance` set to `searchfield` in Safari and Chrome. + * 2. Address `box-sizing` set to `border-box` in Safari and Chrome + * (include `-moz` to future-proof). + */ + +input[type="search"] { + -webkit-appearance: textfield; /* 1 */ + -moz-box-sizing: content-box; + -webkit-box-sizing: content-box; /* 2 */ + box-sizing: content-box; +} + +/** + * Remove inner padding and search cancel button in Safari and Chrome on OS X. + * Safari (but not Chrome) clips the cancel button when the search input has + * padding (and `textfield` appearance). + */ + +input[type="search"]::-webkit-search-cancel-button, +input[type="search"]::-webkit-search-decoration { + -webkit-appearance: none; +} + +/** + * Define consistent border, margin, and padding. + */ + +fieldset { + border: 1px solid #c0c0c0; + margin: 0 2px; + padding: 0.35em 0.625em 0.75em; +} + +/** + * 1. Correct `color` not being inherited in IE 8/9/10/11. + * 2. Remove padding so people aren't caught out if they zero out fieldsets. + */ + +legend { + border: 0; /* 1 */ + padding: 0; /* 2 */ +} + +/** + * Remove default vertical scrollbar in IE 8/9/10/11. + */ + +textarea { + overflow: auto; +} + +/** + * Don't inherit the `font-weight` (applied by a rule above). + * NOTE: the default cannot safely be changed in Chrome and Safari on OS X. + */ + +optgroup { + font-weight: bold; +} + +/* Tables + ========================================================================== */ + +/** + * Remove most spacing between table cells. + */ + +table { + border-collapse: collapse; + border-spacing: 0; +} + +td, +th { + padding: 0; +} diff --git a/docs/_sass/base/_syntax.scss b/docs/_sass/base/_syntax.scss new file mode 100644 index 0000000..39cdb79 --- /dev/null +++ b/docs/_sass/base/_syntax.scss @@ -0,0 +1,63 @@ +// from https://github.com/mojombo/tpw/blob/master/css/syntax.css +// see also https://jekyllrb.com/docs/templates/#stylesheets-for-syntax-highlighting + +.highlight { background: #ffffff; } +.highlight .c { color: #999988; font-style: italic } /* Comment */ +.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */ +.highlight .k { font-weight: bold } /* Keyword */ +.highlight .o { font-weight: bold } /* Operator */ +.highlight .cm { color: #999988; font-style: italic } /* Comment.Multiline */ +.highlight .cp { color: #999999; font-weight: bold } /* Comment.Preproc */ +.highlight .c1 { color: #999988; font-style: italic } /* Comment.Single */ +.highlight .cs { color: #999999; font-weight: bold; font-style: italic } /* Comment.Special */ +.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */ +.highlight .gd .x { color: #000000; background-color: #ffaaaa } /* Generic.Deleted.Specific */ +.highlight .ge { font-style: italic } /* Generic.Emph */ +.highlight .gr { color: #aa0000 } /* Generic.Error */ +.highlight .gh { color: #999999 } /* Generic.Heading */ +.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */ +.highlight .gi .x { color: #000000; background-color: #aaffaa } /* Generic.Inserted.Specific */ +.highlight .go { color: #888888 } /* Generic.Output */ +.highlight .gp { color: #555555 } /* Generic.Prompt */ +.highlight .gs { font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #aaaaaa } /* Generic.Subheading */ +.highlight .gt { color: #aa0000 } /* Generic.Traceback */ +.highlight .kc { font-weight: bold } /* Keyword.Constant */ +.highlight .kd { font-weight: bold } /* Keyword.Declaration */ +.highlight .kp { font-weight: bold } /* Keyword.Pseudo */ +.highlight .kr { font-weight: bold } /* Keyword.Reserved */ +.highlight .kt { color: #445588; font-weight: bold } /* Keyword.Type */ +.highlight .m { color: #009999 } /* Literal.Number */ +.highlight .s { color: #d14 } /* Literal.String */ +.highlight .na { color: #008080 } /* Name.Attribute */ +.highlight .nb { color: #0086B3 } /* Name.Builtin */ +.highlight .nc { color: #445588; font-weight: bold } /* Name.Class */ +.highlight .no { color: #008080 } /* Name.Constant */ +.highlight .ni { color: #800080 } /* Name.Entity */ +.highlight .ne { color: #990000; font-weight: bold } /* Name.Exception */ +.highlight .nf { color: #990000; font-weight: bold } /* Name.Function */ +.highlight .nn { color: #555555 } /* Name.Namespace */ +.highlight .nt { color: #000080 } /* Name.Tag */ +.highlight .nv { color: #008080 } /* Name.Variable */ +.highlight .ow { font-weight: bold } /* Operator.Word */ +.highlight .w { color: #bbbbbb } /* Text.Whitespace */ +.highlight .mf { color: #009999 } /* Literal.Number.Float */ +.highlight .mh { color: #009999 } /* Literal.Number.Hex */ +.highlight .mi { color: #009999 } /* Literal.Number.Integer */ +.highlight .mo { color: #009999 } /* Literal.Number.Oct */ +.highlight .sb { color: #d14 } /* Literal.String.Backtick */ +.highlight .sc { color: #d14 } /* Literal.String.Char */ +.highlight .sd { color: #d14 } /* Literal.String.Doc */ +.highlight .s2 { color: #d14 } /* Literal.String.Double */ +.highlight .se { color: #d14 } /* Literal.String.Escape */ +.highlight .sh { color: #d14 } /* Literal.String.Heredoc */ +.highlight .si { color: #d14 } /* Literal.String.Interpol */ +.highlight .sx { color: #d14 } /* Literal.String.Other */ +.highlight .sr { color: #009926 } /* Literal.String.Regex */ +.highlight .s1 { color: #d14 } /* Literal.String.Single */ +.highlight .ss { color: #990073 } /* Literal.String.Symbol */ +.highlight .bp { color: #999999 } /* Name.Builtin.Pseudo */ +.highlight .vc { color: #008080 } /* Name.Variable.Class */ +.highlight .vg { color: #008080 } /* Name.Variable.Global */ +.highlight .vi { color: #008080 } /* Name.Variable.Instance */ +.highlight .il { color: #009999 } /* Literal.Number.Integer.Long */ diff --git a/docs/_sass/base/_utilities.scss b/docs/_sass/base/_utilities.scss new file mode 100644 index 0000000..f7fde26 --- /dev/null +++ b/docs/_sass/base/_utilities.scss @@ -0,0 +1,39 @@ +// button. +.btn { + border-radius: 0.3em; + border: 1px solid; + display: inline-block; + padding: 0.5em 0.75em; +} +a.btn:hover { + background: $link-color; + color: $background-color; + text-decoration: none; +} + +// margin. +.margin { + margin: $margin 0 !important; +} +.margin-top { + margin-top: $margin !important; +} +.margin-bottom { + margin-bottom: $margin !important; +} + +// state. +.disabled { + opacity: 0.7; +} + +// clearfix. +.clearfix { + &:before, &:after { + content: " "; + display: table; + } + &:after { + clear: both; + } +} diff --git a/docs/_sass/base/_variables.scss b/docs/_sass/base/_variables.scss new file mode 100644 index 0000000..19eb550 --- /dev/null +++ b/docs/_sass/base/_variables.scss @@ -0,0 +1,24 @@ +// typography. +$font-family: 'Source Sans Pro', Helvetica, Arial, sans-serif; +$font-size: 16px; + +// source code typography. +$code-font-family: "Courier New", Courier, monospace, fixed; +$code-font-size: 0.9em; + +// vertical margin. +$margin: 50px; + +// brand color. +$brand-color: #003c6e; + +// other major colors. +$site-title-color: rgba(#fff, .9); +$page-title-color: $brand-color; +$background-color: #fff; +$border-color: rgba(#000, .25); + +// typography colors. +$text-color: #383838; +$link-color: lighten($brand-color, 15%); +$blockquote-color: #777; diff --git a/docs/assets/css/style.scss b/docs/assets/css/style.scss new file mode 100644 index 0000000..c681659 --- /dev/null +++ b/docs/assets/css/style.scss @@ -0,0 +1,13 @@ +--- +--- + +@import 'base/_reset'; +@import 'base/_syntax'; +@import 'base/_variables'; +@import 'base/_layout'; +@import 'base/_utilities'; +@import '_common'; +@import '_header'; +@import '_footer'; +@import '_aside'; +@import '_content'; diff --git a/docs/assets/data/HISAT2-first_release-Sept_8_2015.pdf b/docs/assets/data/HISAT2-first_release-Sept_8_2015.pdf new file mode 100644 index 0000000..d9ba746 Binary files /dev/null and b/docs/assets/data/HISAT2-first_release-Sept_8_2015.pdf differ diff --git a/docs/assets/img/bioinformatics_utsw_logo.png b/docs/assets/img/bioinformatics_utsw_logo.png new file mode 100644 index 0000000..65a0f96 Binary files /dev/null and b/docs/assets/img/bioinformatics_utsw_logo.png differ diff --git a/docs/assets/img/ccb_jhu_logo_tmp.png b/docs/assets/img/ccb_jhu_logo_tmp.png new file mode 100644 index 0000000..783a88b Binary files /dev/null and b/docs/assets/img/ccb_jhu_logo_tmp.png differ diff --git a/docs/assets/img/ogp.png b/docs/assets/img/ogp.png new file mode 100644 index 0000000..59e200a Binary files /dev/null and b/docs/assets/img/ogp.png differ diff --git a/docs/assets/js/header-link.js b/docs/assets/js/header-link.js new file mode 100644 index 0000000..8d500fe --- /dev/null +++ b/docs/assets/js/header-link.js @@ -0,0 +1,8 @@ +$(function () { + $("h1, h2, h3, h4, h5, h6").each(function () { + var id = $(this).attr("id"); + if (id) { + $(this).append($("").addClass("header-link").attr("href", "#" + id).html('')); + } + }); +}); diff --git a/docs/assets/js/script.js b/docs/assets/js/script.js new file mode 100644 index 0000000..753da9b --- /dev/null +++ b/docs/assets/js/script.js @@ -0,0 +1,34 @@ +$(function () { + // focus on search input with '/' key. + $("body").on("keyup", function (e) { + e.stopPropagation(); + var slashKeys = [47, 111, 191]; + if (slashKeys.some(function (value) { return e.keyCode == value })) { + $("#search").focus(); + } + }); + + // add `target="_blank"` into all outer links. + var host = document.location.host; + $("a[href]").each(function() { + var re = new RegExp(host, "g"); + if ($(this).attr("href").match(/\/\//) && !$(this).attr("href").match(re)) { + $(this).attr("target", "_blank"); + } + }); + + // center and linkable all images. + var $images = $("article img:not(.emoji, .eye-catch)"); + $images.closest("p").css("text-align", "center"); + $images.each(function () { + var imgUrl = $(this).attr("src"); + var $a = $("").attr("href", imgUrl).attr("target", "_blank"); + $(this).wrap($a); + }); + + // stick aside. + var topSpacing = $(".site-aside").css("padding-top").replace(/px/, ""); + $(".site-aside .sticky").sticky({ + topSpacing: parseInt(topSpacing) + }); +}); diff --git a/docs/assets/js/search.js b/docs/assets/js/search.js new file mode 100644 index 0000000..47688e0 --- /dev/null +++ b/docs/assets/js/search.js @@ -0,0 +1,108 @@ +$(function () { + var query = getQuery(["q", "t", "a", "d"]); + + var targets; + switch (query.key) { + case "t": + targets = ["tags"]; + break; + case "a": + targets = ["author"]; + break; + case "d": + targets = ["date"]; + break; + case "q": + default: + targets = ["title", "tags", "author", "url", "date", "content"]; + break; + } + showPosts(query.words, targets); + + if (query.key == "q") { + $("#search").val(query.query).focus(); + } +}); + +function getQuery(keys) +{ + var query = ""; + var key = ""; + var words = []; + + keys.forEach(function (queryKey) { + var regex = RegExp("[?&]" + queryKey + "=([^&]+)", 'i'); + var matched; + if (matched = window.location.search.match(regex)) { + query = decodeURIComponent(matched[1]).replace(/( | |\+)+/g, ' '); + words = query.split(' '); + key = queryKey; + return false; // break; + } + return true; // continue; + }); + + return { query: query, key: key, words: words }; +} + +function showPosts(words, targets) +{ + var getJson = function () { + + var dfd = $.Deferred(); + $.ajax({ + url: baseurl + "/search.json", + dataType: "json", + timeout: 3000, // 3 sec + success: function (posts) { + var matchedPosts = []; + posts.forEach(function (post) { + + // concatenate target fields as a string. + var searchee = ""; + for (var i = 0; i < targets.length; i++) { + var target = post[targets[i]]; + var targetString = ""; + if (target instanceof Array) { + for (var j = 0; j < target.length; j++) { + targetString += target[j]; + } + } else if (typeof target == "object") { + for (key in target) { + targetString += target[key]; + } + } else { + targetString = target; + } + searchee += targetString; + } + + // matching. + var matched = true; + words.forEach(function (word) { + var regex = new RegExp(word, 'i'); + if (searchee.match(regex) == null) { + matched = false; + return false; // break; + } + return true; // continue; + }); + + if (matched) { + matchedPosts.push(post); + } + }); + + dfd.resolve(matchedPosts); + } + }); + + return dfd.promise(); + }; + + getJson().then(function (matchedPosts) { + matchedPosts.forEach(function (post) { + $("#search-results").find("#" + post.id).show(); + }); + }); +} diff --git a/docs/assets/lib/garand-sticky/jquery.sticky.js b/docs/assets/lib/garand-sticky/jquery.sticky.js new file mode 100644 index 0000000..7417c47 --- /dev/null +++ b/docs/assets/lib/garand-sticky/jquery.sticky.js @@ -0,0 +1,172 @@ +// Sticky Plugin v1.0.0 for jQuery +// ============= +// Author: Anthony Garand +// Improvements by German M. Bravo (Kronuz) and Ruud Kamphuis (ruudk) +// Improvements by Leonardo C. Daronco (daronco) +// Created: 2/14/2011 +// Date: 2/12/2012 +// Website: http://labs.anthonygarand.com/sticky +// Description: Makes an element on the page stick on the screen as you scroll +// It will only set the 'top' and 'position' of your element, you +// might need to adjust the width in some cases. + +(function($) { + var defaults = { + topSpacing: 0, + bottomSpacing: 0, + className: 'is-sticky', + wrapperClassName: 'sticky-wrapper', + center: false, + getWidthFrom: '', + responsiveWidth: false + }, + $window = $(window), + $document = $(document), + sticked = [], + windowHeight = $window.height(), + scroller = function() { + var scrollTop = $window.scrollTop(), + documentHeight = $document.height(), + dwh = documentHeight - windowHeight, + extra = (scrollTop > dwh) ? dwh - scrollTop : 0; + + for (var i = 0; i < sticked.length; i++) { + var s = sticked[i], + elementTop = s.stickyWrapper.offset().top, + etse = elementTop - s.topSpacing - extra; + + if (scrollTop <= etse) { + if (s.currentTop !== null) { + s.stickyElement + .css('width', '') + .css('position', '') + .css('top', ''); + s.stickyElement.trigger('sticky-end', [s]).parent().removeClass(s.className); + s.currentTop = null; + } + } + else { + var newTop = documentHeight - s.stickyElement.outerHeight() + - s.topSpacing - s.bottomSpacing - scrollTop - extra; + if (newTop < 0) { + newTop = newTop + s.topSpacing; + } else { + newTop = s.topSpacing; + } + if (s.currentTop != newTop) { + s.stickyElement + .css('width', s.stickyElement.width()) + .css('position', 'fixed') + .css('top', newTop); + + if (typeof s.getWidthFrom !== 'undefined') { + s.stickyElement.css('width', $(s.getWidthFrom).width()); + } + + s.stickyElement.trigger('sticky-start', [s]).parent().addClass(s.className); + s.currentTop = newTop; + } + } + } + }, + resizer = function() { + windowHeight = $window.height(); + + for (var i = 0; i < sticked.length; i++) { + var s = sticked[i]; + if (typeof s.getWidthFrom !== 'undefined' && s.responsiveWidth === true) { + s.stickyElement.css('width', $(s.getWidthFrom).width()); + } + } + }, + methods = { + init: function(options) { + var o = $.extend({}, defaults, options); + return this.each(function() { + var stickyElement = $(this); + + var stickyId = stickyElement.attr('id'); + var wrapperId = stickyId ? stickyId + '-' + defaults.wrapperClassName : defaults.wrapperClassName + var wrapper = $('
    ') + .attr('id', stickyId + '-sticky-wrapper') + .addClass(o.wrapperClassName); + stickyElement.wrapAll(wrapper); + + if (o.center) { + stickyElement.parent().css({width:stickyElement.outerWidth(),marginLeft:"auto",marginRight:"auto"}); + } + + if (stickyElement.css("float") == "right") { + stickyElement.css({"float":"none"}).parent().css({"float":"right"}); + } + + var stickyWrapper = stickyElement.parent(); + stickyWrapper.css('height', stickyElement.outerHeight()); + sticked.push({ + topSpacing: o.topSpacing, + bottomSpacing: o.bottomSpacing, + stickyElement: stickyElement, + currentTop: null, + stickyWrapper: stickyWrapper, + className: o.className, + getWidthFrom: o.getWidthFrom, + responsiveWidth: o.responsiveWidth + }); + }); + }, + update: scroller, + unstick: function(options) { + return this.each(function() { + var unstickyElement = $(this); + + var removeIdx = -1; + for (var i = 0; i < sticked.length; i++) + { + if (sticked[i].stickyElement.get(0) == unstickyElement.get(0)) + { + removeIdx = i; + } + } + if(removeIdx != -1) + { + sticked.splice(removeIdx,1); + unstickyElement.unwrap(); + unstickyElement.removeAttr('style'); + } + }); + } + }; + + // should be more efficient than using $window.scroll(scroller) and $window.resize(resizer): + if (window.addEventListener) { + window.addEventListener('scroll', scroller, false); + window.addEventListener('resize', resizer, false); + } else if (window.attachEvent) { + window.attachEvent('onscroll', scroller); + window.attachEvent('onresize', resizer); + } + + $.fn.sticky = function(method) { + if (methods[method]) { + return methods[method].apply(this, Array.prototype.slice.call(arguments, 1)); + } else if (typeof method === 'object' || !method ) { + return methods.init.apply( this, arguments ); + } else { + $.error('Method ' + method + ' does not exist on jQuery.sticky'); + } + }; + + $.fn.unstick = function(method) { + if (methods[method]) { + return methods[method].apply(this, Array.prototype.slice.call(arguments, 1)); + } else if (typeof method === 'object' || !method ) { + return methods.unstick.apply( this, arguments ); + } else { + $.error('Method ' + method + ' does not exist on jQuery.sticky'); + } + + }; + $(function() { + setTimeout(scroller, 0); + }); +})(jQuery); diff --git a/docs/favicon.ico b/docs/favicon.ico new file mode 100644 index 0000000..133f0ca Binary files /dev/null and b/docs/favicon.ico differ diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 0000000..a15e38d --- /dev/null +++ b/docs/index.html @@ -0,0 +1,14 @@ +--- +layout: default +--- + +{% assign pages = site.pages | where: "title", "Main" %} +
    +
    +
    + {{ pages[0].content }} +
    +
    +
    + + diff --git a/docs/search.json b/docs/search.json new file mode 100644 index 0000000..3070feb --- /dev/null +++ b/docs/search.json @@ -0,0 +1,16 @@ +--- +--- + +[ +{% for post in site.posts %} +{ +"id": {{ post.id | replace: '/', '-' | jsonify }}, +"title": {{ post.title | jsonify }}, +"author": {{ post.author | jsonify }}, +"tags": [{% for tag in post.tags%}{{ tag | jsonify }}{% unless forloop.last %}, {% endunless %}{% endfor %}], +"url": {{ post.url | jsonify }}, +"date": {{ post.date | date: '%Y-%m-%d' | jsonify }}, +"content": {{ post.content | strip_html | strip_newlines | jsonify }} +}{% unless forloop.last %}, {% endunless %} +{% endfor %} +] diff --git a/docs_jhu/README b/docs_jhu/README new file mode 100644 index 0000000..ed048dd --- /dev/null +++ b/docs_jhu/README @@ -0,0 +1,4 @@ +To populate this directory, change to the hisat2 directory and type +'make doc'. You must have pandoc installed: + + http://johnmacfarlane.net/pandoc/ diff --git a/docs_jhu/add.css b/docs_jhu/add.css new file mode 100644 index 0000000..849f79d --- /dev/null +++ b/docs_jhu/add.css @@ -0,0 +1,57 @@ +.pageStyle #leftside { + color: #666; +} + +.pageStyle #leftside a { + color: #0066B3; + text-decoration: none; +} + +.pageStyle #leftside h1 { + background: none; + margin: 0 0 10px; + padding: 10px 0; + font: bold 1.9em Arial,Verdana,sans-serif; +} + +.pageStyle #leftside h2 { + background: none; + margin: 0 0 10px; + padding: 10px 0; + font: bold 1.2em Arial,Verdana,sans-serif; +} +.pageStyle #leftside h3 { + background: none; + margin: 0 0 10px 5px; + padding: 10px 0; + font: 1.2em Arial,Verdana,sans-serif; +} + +.pageStyle #leftside table { + margin: 15px 0 0; +} + +.pageStyle #leftside td { + vertical-align: top; +} + +.pageStyle #leftside p { color:#444; } + + +.pageStyle #leftside td p { + margin-left:15px; +} + +.pageStyle #leftside h4 { + margin: 0px 15px 10px 10px; + padding: 10px 0px; + font: 1.1em Arial,Verdana,sans-serif; + background: none; +} + +.pageStyle #leftside ul { margin:0; padding-left:0; list-style-type: circle; } +.pageStyle #leftside #TOC ul { margin:0; padding-left:0; list-style-type: none; } +.pageStyle #leftside li { color:#444; margin-left:14px; } +.pageStyle #leftside #TOC li { margin-left:0; } +.pageStyle #leftside #TOC li li { margin-left:14px; } +.pageStyle #leftside p { padding: 0; margin:0 0 10px; } diff --git a/docs_jhu/faq.shtml b/docs_jhu/faq.shtml new file mode 100644 index 0000000..51ef428 --- /dev/null +++ b/docs_jhu/faq.shtml @@ -0,0 +1,45 @@ + + + + + +
    +
    +
    + +
    +
    +

    Frequently Asked Questions


    +
    +
      + + +
    +
    +
    +
    + + + + + + + + + diff --git a/docs_jhu/footer.inc.html b/docs_jhu/footer.inc.html new file mode 100644 index 0000000..8520a2d --- /dev/null +++ b/docs_jhu/footer.inc.html @@ -0,0 +1,7 @@ + diff --git a/docs_jhu/index.html b/docs_jhu/index.html new file mode 100644 index 0000000..23db360 --- /dev/null +++ b/docs_jhu/index.html @@ -0,0 +1,9 @@ + + + + HISAT2 (under construction) + + +

    + + diff --git a/docs_jhu/index.shtml b/docs_jhu/index.shtml new file mode 100644 index 0000000..07892a9 --- /dev/null +++ b/docs_jhu/index.shtml @@ -0,0 +1,154 @@ + + + + + +
    +
    + + + +
    +HISAT2 is a fast and sensitive alignment program for mapping next-generation sequencing reads (both DNA and RNA) to a population of human genomes (as well as to a single reference genome). Based on an extension of BWT for graphs [Sirén et al. 2014], we designed and implemented a graph FM index (GFM), an original approach and its first implementation to the best of our knowledge. In addition to using one global GFM index that represents a population of human genomes, HISAT2 uses a large set of small GFM indexes that collectively cover the whole genome (each index representing a genomic region of 56 Kbp, with 55,000 indexes needed to cover the human population). These small indexes (called local indexes), combined with several alignment strategies, enable rapid and accurate alignment of sequencing reads. This new indexing scheme is called a Hierarchical Graph FM index (HGFM). +
    +
    + Open Source Software +
    +
    +
    +
    + +
    +
    +

    HISAT2 2.1.0 release 6/8/2017

    +
    +
    + +

    HISAT2 2.0.5 release 11/4/2016

    + Version 2.0.5 is a minor release with the following changes. +
      +
    • Due to a policy change (HTTP to HTTPS) in using SRA data (`--sra-option`), users are strongly encouraged to use this version. As of 11/9/2016, NCBI will begin a permanent redirect to HTTPS, which means the previous versions of HISAT2 no longer works with `--sra-acc` option soon.
    • +
    • Implemented -I and -X options for specifying minimum and maximum fragment lengths. The options are valid only when used with --no-spliced-alignment, which is used for the alignment of DNA-seq reads.
    • +
    • Fixed some cases where reads with SNPs on their 5' ends were not properly aligned.
    • +
    • Implemented --no-softclip option to disable soft-clipping.
    • +
    • Implemented --max-seeds to specify the maximum number of seeds that HISAT2 will try to extend to full-length alignments (see the manual for details).
    • +
    +
    + +

    HISAT, StringTie and Ballgown protocol published at Nature Protocols 8/11/2016

    + +

    HISAT2 2.0.4 Windows binary available here, thanks to Andre Osorio Falcao 5/24/2016

    + +

    HISAT2 2.0.4 release 5/18/2016

    + Version 2.0.4 is a minor release with the following changes. +
      +
    • Improved template length estimation (the 9th column of the SAM format) of RNA-seq reads by taking introns into account.
    • +
    • Introduced two options, --remove-chrname and --add-chrname, to remove "chr" from reference names or add "chr" to reference names in the alignment output, respectively (the 3rd column of the SAM format).
    • +
    • Changed the maximum of mapping quality (the 5th column of the SAM format) from 255 to 60. Note that 255 is an undefined value according to the SAM manual and some programs would not work with this value (255) properly.
    • +
    • Fixed NH (number of hits) in the alignment output.
    • +
    • HISAT2 allows indels of any length pertaining to minimum alignment score (previously, the maximum length of indels was 3 bp).
    • +
    • Fixed several cases that alignment goes beyond reference sequences.
    • +
    • Fixed reporting duplicate alignments.
    • +
    +
    + +

    HISAT2 2.0.3-beta release 3/28/2016

    + Version 2.0.3-beta is a minor release with the following changes. +
      +
    • Fixed graph index building when using both SNPs and transcripts. As a result, genome_snp_tran indexes here on the HISAT2 website have been rebuilt.
    • +
    • Included some missing files needed to follow the small test example (see the manual for details).
    • +
    +
    + +

    HISAT2 2.0.2-beta release 3/17/2016

    + Note (3/19/2016): this version is slightly updated to handle reporting splice sites with the correct chromosome names.
    + Version 2.0.2-beta is a major release with the following changes. +
      +
    • Memory mappaped IO (--mm option) works now.
    • +
    • Building linear index can be now done using multi-threads.
    • +
    • Changed the minimum score for alignment in keeping with read lengths, so it's now --score-min L,0.0,-0.2, meaning a minimum score of -20 for 100-bp reads and -30 for 150-bp reads.
    • +
    • Fixed a bug that the same read was written into a file multiple times when --un-conc was used.
    • +
    • Fixed another bug that caused reads to map beyond reference sequences.
    • +
    • Introduced --haplotype option in the hisat2-build (index building), which is used with --snp option together to incorporate those SNP combinations present in the human population. This option also prevents graph construction from exploding due to exponential combinations of SNPs in small genomic regions.
    • +
    • Provided a new python script to extract SNPs and haplotypes from VCF files, hisat2_extract_snps_haplotypes_VCF.py
    • +
    • Changed several python script names as follows
    • +
        +
      1. extract_splice_sites.py to hisat2_extract_splice_sites.py
      2. +
      3. extract_exons.py to hisat2_extract_exons.py
      4. +
      5. extract_snps.py to hisat2_extract_snps_haplotypes_UCSC.py
      6. +
      +
    +
    + +

    HISAT2 2.0.1-beta release 11/19/2015

    + Version 2.0.1-beta is a maintenance release with the following changes. +
      +
    • Fixed a bug that caused reads to map beyond reference sequences.
    • +
    • Fixed a deadlock issue that happened very rarely.
    • +
    • Fixed a bug that led to illegal memory access when reading SNP information.
    • +
    • Fixed a system-specific bug related to popcount instruction.
    • +
    +
    +

    HISAT2 2.0.0-beta release 9/8/2015 - first release

    + We extended the BWT/FM index to incorporate genomic differences among individuals into the reference genome, while keeping memory requirements low enough to fit the entire index onto a desktop computer. Using this novel Hierarchical Graph FM index (HGFM) approach, we built a new alignment system, HISAT2, with an index that incorporates ~12.3M common SNPs from the dbSNP database. HISAT2 provides greater alignment accuracy for reads containing SNPs. +
      +
    • + HISAT2's index size for the human reference genome and 12.3 million common SNPs is 6.2GB (the memory footprint of HISAT2 is 6.7GB). The SNPs consist of 11 million single nucleotide polymorphisms, 728,000 deletions, and 555,000 insertions. The insertions and deletions used in this index are small (usually <20bp). +
    • +
    • + HISAT2 comes with several index types: +
        +
      1. Hierarchical FM index (HFM) for a reference genome (index base: genome)
      2. +
      3. Hierarchical Graph FM index (HGFM) for a reference genome plus SNPs (index base: genome_snp)
      4. +
      5. Hierarchical Graph FM index (HGFM) for a reference genome plus transcripts (index base: genome_tran)
      6. +
      7. Hierarchical Graph FM index (HGFM) for a reference genome plus SNPs and transcripts (index base: genome_snp_tran)
      8. +
      +
    • +
    • + HISAT2 is a successor to both HISAT and TopHat2. We recommend that HISAT and TopHat2 users switch to HISAT2. +
        +
      1. HISAT2 can be considered an enhanced version of HISAT with many improvements and bug fixes. The alignment speed and memory requirements of HISAT2 are virtually the same as those of HISAT when using the HFM index (genome).
      2. +
      3. When using graph-based indexes (HGFM), the runtime of HISAT2 is slightly slower than HISAT (30~80% additional CPU time). +
      4. HISAT2 allows for mapping reads directly against transcripts, similar to that of TopHat2 (use genome_tran or genome_snp_tran).
      5. +
      +
    • +
    • + When reads contain SNPs, the SNP information is provided as an optional field in the SAM output of HISAT2 (e.g., Zs:Z:1|S|rs3747203,97|S|rs16990981 - see the manual for details). This feature enables fast and sensitive genotyping in downstream analyses. Note that there is no alignment penalty for mismatches, insertions, and deletions if they correspond to known SNPs. +
    • +
    • + HISAT2 provides options for transcript assemblers (e.g., StringTie and Cufflinks) to work better with the alignment from HISAT2 (see options such as --dta and --dta-cufflinks). +
    • +
    • + Some slides about HISAT2 are found here and we are preparing detailed documention. +
    • +
    • + We plan to incorporate a larger set of SNPs and structural variations (SV) into this index (e.g., long insertions/deletions, inversions, and translocations). +
    • +
    +
    +

    The HISAT2 source code is available in a public GitHub repository (5/30/2015).

    +
    +
    +
    + + + + + + + + + diff --git a/docs_jhu/indexes.txt b/docs_jhu/indexes.txt new file mode 100644 index 0000000..727288e --- /dev/null +++ b/docs_jhu/indexes.txt @@ -0,0 +1,5 @@ +Each of the HISAT2 indexes available here on the website comes with a shell script (e.g. make_grch38.sh) that provides instructions (or shell commands) for downloading a reference sequence, gene annotations, and SNPs, and building a HISAT2 index. You can use the script to build the same HISAT2 index we provide. + +HISAT2 indexes named genome_tran or genome_snp_tran use Ensembl gene annotations, which include many more transcripts than RefSeq annotations, due to the inclusion of annotations as predicted by software. + +The HISAT2 indexes for the human genome include only primary assembly. If you choose to include alternative sequences introduced in GRCh38 and build your own HISAT2 indexes, then please be aware that those alternative sequences are nearly identical to the primary assembly in GRCh38 and as a result some reads may map to many more locations compared to when using the primary assembly only. HISAT2 often skips those multi-mapped reads if the number of potential locations is more than the value specified by the -k option. You may want to use a higher value for the -k option to resolve the issue. diff --git a/docs_jhu/manual.inc.html b/docs_jhu/manual.inc.html new file mode 100644 index 0000000..13b48dc --- /dev/null +++ b/docs_jhu/manual.inc.html @@ -0,0 +1,1262 @@ + + + +

    Introduction

    +

    What is HISAT2?

    +

    HISAT2 is a fast and sensitive alignment program for mapping next-generation sequencing reads (whole-genome, transcriptome, and exome sequencing data) against the general human population (as well as against a single reference genome). Based on GCSA (an extension of BWT for a graph), we designed and implemented a graph FM index (GFM), an original approach and its first implementation to the best of our knowledge. In addition to using one global GFM index that represents general population, HISAT2 uses a large set of small GFM indexes that collectively cover the whole genome (each index representing a genomic region of 56 Kbp, with 55,000 indexes needed to cover human population). These small indexes (called local indexes) combined with several alignment strategies enable effective alignment of sequencing reads. This new indexing scheme is called Hierarchical Graph FM index (HGFM). We have developed HISAT 2 based on the HISAT and Bowtie2 implementations. HISAT2 outputs alignments in SAM format, enabling interoperation with a large number of other tools (e.g. SAMtools, GATK) that use SAM. HISAT2 is distributed under the GPLv3 license, and it runs on the command line under Linux, Mac OS X and Windows.

    +

    Obtaining HISAT2

    +

    Download HISAT2 sources and binaries from the Releases sections on the right side. Binaries are available for Intel architectures (x86_64) running Linux, and Mac OS X.

    +

    Building from source

    +

    Building HISAT2 from source requires a GNU-like environment with GCC, GNU Make and other basics. It should be possible to build HISAT2 on most vanilla Linux installations or on a Mac installation with Xcode installed. HISAT2 can also be built on Windows using Cygwin or MinGW (MinGW recommended). For a MinGW build the choice of what compiler is to be used is important since this will determine if a 32 or 64 bit code can be successfully compiled using it. If there is a need to generate both 32 and 64 bit on the same machine then a multilib MinGW has to be properly installed. MSYS, the zlib library, and depending on architecture pthreads library are also required. We are recommending a 64 bit build since it has some clear advantages in real life research problems. In order to simplify the MinGW setup it might be worth investigating popular MinGW personal builds since these are coming already prepared with most of the toolchains needed.

    +

    First, download the source package from the Releases section on the right side. Unzip the file, change to the unzipped directory, and build the HISAT2 tools by running GNU make (usually with the command make, but sometimes with gmake) with no arguments. If building with MinGW, run make from the MSYS environment.

    +

    HISAT2 is using the multithreading software model in order to speed up execution times on SMP architectures where this is possible. On POSIX platforms (like linux, Mac OS, etc) it needs the pthread library. Although it is possible to use pthread library on non-POSIX platform like Windows, due to performance reasons HISAT2 will try to use Windows native multithreading if possible.

    +

    For the support of SRA data access in HISAT2, please download and install the NCBI-NGS toolkit. When running make, specify additional variables as follow. make USE_SRA=1 NCBI_NGS_DIR=/path/to/NCBI-NGS-directory NCBI_VDB_DIR=/path/to/NCBI-NGS-directory, where NCBI_NGS_DIR and NCBI_VDB_DIR will be used in Makefile for -I and -L compilation options. For example, $(NCBI_NGS_DIR)/include and $(NCBI_NGS_DIR)/lib64 will be used.

    +

    Running HISAT2

    +

    Adding to PATH

    +

    By adding your new HISAT2 directory to your PATH environment variable, you ensure that whenever you run hisat2, hisat2-build or hisat2-inspect from the command line, you will get the version you just installed without having to specify the entire path. This is recommended for most users. To do this, follow your operating system's instructions for adding the directory to your PATH.

    +

    If you would like to install HISAT2 by copying the HISAT2 executable files to an existing directory in your PATH, make sure that you copy all the executables, including hisat2, hisat2-align-s, hisat2-align-l, hisat2-build, hisat2-build-s, hisat2-build-l, hisat2-inspect, hisat2-inspect-s and hisat2-inspect-l.

    +

    Reporting

    +

    The reporting mode governs how many alignments HISAT2 looks for, and how to report them.

    +

    In general, when we say that a read has an alignment, we mean that it has a valid alignment. When we say that a read has multiple alignments, we mean that it has multiple alignments that are valid and distinct from one another.

    +

    By default, HISAT2 may soft-clip reads near their 5' and 3' ends. Users can control this behavior by setting different penalties for soft-clipping (--sp) or by disallowing soft-clipping ([--no-softclip]).

    +

    Distinct alignments map a read to different places

    +

    Two alignments for the same individual read are "distinct" if they map the same read to different places. Specifically, we say that two alignments are distinct if there are no alignment positions where a particular read offset is aligned opposite a particular reference offset in both alignments with the same orientation. E.g. if the first alignment is in the forward orientation and aligns the read character at read offset 10 to the reference character at chromosome 3, offset 3,445,245, and the second alignment is also in the forward orientation and also aligns the read character at read offset 10 to the reference character at chromosome 3, offset 3,445,245, they are not distinct alignments.

    +

    Two alignments for the same pair are distinct if either the mate 1s in the two paired-end alignments are distinct or the mate 2s in the two alignments are distinct or both.

    +

    Default mode: search for one or more alignments, report each

    +

    HISAT2 searches for up to N distinct, primary alignments for each read, where N equals the integer specified with the -k parameter. Primary alignments mean alignments whose alignment score is equal or higher than any other alignments. It is possible that multiple distinct alignments have the same score. That is, if -k 2 is specified, HISAT2 will search for at most 2 distinct alignments. The alignment score for a paired-end alignment equals the sum of the alignment scores of the individual mates. Each reported read or pair alignment beyond the first has the SAM 'secondary' bit (which equals 256) set in its FLAGS field. See the SAM specification for details.

    +

    HISAT2 does not "find" alignments in any specific order, so for reads that have more than N distinct, valid alignments, HISAT2 does not guarantee that the N alignments reported are the best possible in terms of alignment score. Still, this mode can be effective and fast in situations where the user cares more about whether a read aligns (or aligns a certain number of times) than where exactly it originated.

    +

    Alignment summary

    +

    When HISAT2 finishes running, it prints messages summarizing what happened. These messages are printed to the "standard error" ("stderr") filehandle. For datasets consisting of unpaired reads, the summary might look like this:

    +
    20000 reads; of these:
    +  20000 (100.00%) were unpaired; of these:
    +    1247 (6.24%) aligned 0 times
    +    18739 (93.69%) aligned exactly 1 time
    +    14 (0.07%) aligned >1 times
    +93.77% overall alignment rate
    +

    For datasets consisting of pairs, the summary might look like this:

    +
    10000 reads; of these:
    +  10000 (100.00%) were paired; of these:
    +    650 (6.50%) aligned concordantly 0 times
    +    8823 (88.23%) aligned concordantly exactly 1 time
    +    527 (5.27%) aligned concordantly >1 times
    +    ----
    +    650 pairs aligned concordantly 0 times; of these:
    +      34 (5.23%) aligned discordantly 1 time
    +    ----
    +    616 pairs aligned 0 times concordantly or discordantly; of these:
    +      1232 mates make up the pairs; of these:
    +        660 (53.57%) aligned 0 times
    +        571 (46.35%) aligned exactly 1 time
    +        1 (0.08%) aligned >1 times
    +96.70% overall alignment rate
    +

    The indentation indicates how subtotals relate to totals.

    +

    Wrapper

    +

    The hisat2, hisat2-build and hisat2-inspect executables are actually wrapper scripts that call binary programs as appropriate. The wrappers shield users from having to distinguish between "small" and "large" index formats, discussed briefly in the following section. Also, the hisat2 wrapper provides some key functionality, like the ability to handle compressed inputs, and the functionality for --un, --al and related options.

    +

    It is recommended that you always run the hisat2 wrappers and not run the binaries directly.

    +

    Small and large indexes

    +

    hisat2-build can index reference genomes of any size. For genomes less than about 4 billion nucleotides in length, hisat2-build builds a "small" index using 32-bit numbers in various parts of the index. When the genome is longer, hisat2-build builds a "large" index using 64-bit numbers. Small indexes are stored in files with the .ht2 extension, and large indexes are stored in files with the .ht2l extension. The user need not worry about whether a particular index is small or large; the wrapper scripts will automatically build and use the appropriate index.

    +

    Performance tuning

    +
      +
    1. If your computer has multiple processors/cores, use -p

      +

      The -p option causes HISAT2 to launch a specified number of parallel search threads. Each thread runs on a different processor/core and all threads find alignments in parallel, increasing alignment throughput by approximately a multiple of the number of threads (though in practice, speedup is somewhat worse than linear).

    2. +
    +

    Command Line

    +

    Setting function options

    +

    Some HISAT2 options specify a function rather than an individual number or setting. In these cases the user specifies three parameters: (a) a function type F, (b) a constant term B, and (c) a coefficient A. The available function types are constant (C), linear (L), square-root (S), and natural log (G). The parameters are specified as F,B,A - that is, the function type, the constant term, and the coefficient are separated by commas with no whitespace. The constant term and coefficient may be negative and/or floating-point numbers.

    +

    For example, if the function specification is L,-0.4,-0.6, then the function defined is:

    +
    f(x) = -0.4 + -0.6 * x
    +

    If the function specification is G,1,5.4, then the function defined is:

    +
    f(x) = 1.0 + 5.4 * ln(x)
    +

    See the documentation for the option in question to learn what the parameter x is for. For example, in the case if the --score-min option, the function f(x) sets the minimum alignment score necessary for an alignment to be considered valid, and x is the read length.

    +

    Usage

    +
    hisat2 [options]* -x <hisat2-idx> {-1 <m1> -2 <m2> | -U <r> | --sra-acc <SRA accession number>} [-S <hit>]
    +

    Main arguments

    +
    + +
    -x <hisat2-idx>
    +
    + +

    The basename of the index for the reference genome. The basename is the name of any of the index files up to but not including the final .1.ht2 / etc. hisat2 looks for the specified index first in the current directory, then in the directory specified in the HISAT2_INDEXES environment variable.

    +
    + +
    -1 <m1>
    +
    + +

    Comma-separated list of files containing mate 1s (filename usually includes _1), e.g. -1 flyA_1.fq,flyB_1.fq. Sequences specified with this option must correspond file-for-file and read-for-read with those specified in <m2>. Reads may be a mix of different lengths. If - is specified, hisat2 will read the mate 1s from the "standard in" or "stdin" filehandle.

    +
    + +
    -2 <m2>
    +
    + +

    Comma-separated list of files containing mate 2s (filename usually includes _2), e.g. -2 flyA_2.fq,flyB_2.fq. Sequences specified with this option must correspond file-for-file and read-for-read with those specified in <m1>. Reads may be a mix of different lengths. If - is specified, hisat2 will read the mate 2s from the "standard in" or "stdin" filehandle.

    +
    + +
    -U <r>
    +
    + +

    Comma-separated list of files containing unpaired reads to be aligned, e.g. lane1.fq,lane2.fq,lane3.fq,lane4.fq. Reads may be a mix of different lengths. If - is specified, hisat2 gets the reads from the "standard in" or "stdin" filehandle.

    +
    + +
    --sra-acc <SRA accession number>
    +
    + +

    Comma-separated list of SRA accession numbers, e.g. --sra-acc SRR353653,SRR353654. Information about read types is available at http://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?sp=runinfo&acc=sra-acc&retmode=xml, where sra-acc is SRA accession number. If users run HISAT2 on a computer cluster, it is recommended to disable SRA-related caching (see the instruction at SRA-MANUAL).

    +
    + +
    -S <hit>
    +
    + +

    File to write SAM alignments to. By default, alignments are written to the "standard out" or "stdout" filehandle (i.e. the console).

    +
    + +

    Options

    +

    Input options

    + + + + + + + + + + + + +
    + +
    -q
    +
    + +

    Reads (specified with <m1>, <m2>, <s>) are FASTQ files. FASTQ files usually have extension .fq or .fastq. FASTQ is the default format. See also: --solexa-quals and --int-quals.

    +
    + +
    --qseq
    +
    + +

    Reads (specified with <m1>, <m2>, <s>) are QSEQ files. QSEQ files usually end in _qseq.txt. See also: --solexa-quals and --int-quals.

    +
    + +
    -f
    +
    + +

    Reads (specified with <m1>, <m2>, <s>) are FASTA files. FASTA files usually have extension .fa, .fasta, .mfa, .fna or similar. FASTA files do not have a way of specifying quality values, so when -f is set, the result is as if --ignore-quals is also set.

    +
    + +
    -r
    +
    + +

    Reads (specified with <m1>, <m2>, <s>) are files with one input sequence per line, without any other information (no read names, no qualities). When -r is set, the result is as if --ignore-quals is also set.

    +
    + +
    -c
    +
    + +

    The read sequences are given on command line. I.e. <m1>, <m2> and <singles> are comma-separated lists of reads rather than lists of read files. There is no way to specify read names or qualities, so -c also implies --ignore-quals.

    +
    + +
    -s/--skip <int>
    +
    + +

    Skip (i.e. do not align) the first <int> reads or pairs in the input.

    +
    + +
    -u/--qupto <int>
    +
    + +

    Align the first <int> reads or read pairs from the input (after the -s/--skip reads or pairs have been skipped), then stop. Default: no limit.

    +
    + +
    -5/--trim5 <int>
    +
    + +

    Trim <int> bases from 5' (left) end of each read before alignment (default: 0).

    +
    + +
    -3/--trim3 <int>
    +
    + +

    Trim <int> bases from 3' (right) end of each read before alignment (default: 0).

    +
    + +
    --phred33
    +
    + +

    Input qualities are ASCII chars equal to the Phred quality plus 33. This is also called the "Phred+33" encoding, which is used by the very latest Illumina pipelines.

    +
    + +
    --phred64
    +
    + +

    Input qualities are ASCII chars equal to the Phred quality plus 64. This is also called the "Phred+64" encoding.

    +
    + +
    --solexa-quals
    +
    + +

    Convert input qualities from Solexa (which can be negative) to Phred (which can't). This scheme was used in older Illumina GA Pipeline versions (prior to 1.3). Default: off.

    +
    + +
    --int-quals
    +
    + +

    Quality values are represented in the read input file as space-separated ASCII integers, e.g., 40 40 30 40..., rather than ASCII characters, e.g., II?I.... Integers are treated as being on the Phred quality scale unless --solexa-quals is also specified. Default: off.

    +
    + +

    Alignment options

    + + + + + + + +
    + +
    --n-ceil <func>
    +
    + +

    Sets a function governing the maximum number of ambiguous characters (usually Ns and/or .s) allowed in a read as a function of read length. For instance, specifying -L,0,0.15 sets the N-ceiling function f to f(x) = 0 + 0.15 * x, where x is the read length. See also: [setting function options]. Reads exceeding this ceiling are filtered out. Default: L,0,0.15.

    +
    + +
    --ignore-quals
    +
    + +

    When calculating a mismatch penalty, always consider the quality value at the mismatched position to be the highest possible, regardless of the actual value. I.e. input is treated as though all quality values are high. This is also the default behavior when the input doesn't specify quality values (e.g. in -f, -r, or -c modes).

    +
    + +
    --nofw/--norc
    +
    + +

    If --nofw is specified, hisat2 will not attempt to align unpaired reads to the forward (Watson) reference strand. If --norc is specified, hisat2 will not attempt to align unpaired reads against the reverse-complement (Crick) reference strand. In paired-end mode, --nofw and --norc pertain to the fragments; i.e. specifying --nofw causes hisat2 to explore only those paired-end configurations corresponding to fragments from the reverse-complement (Crick) strand. Default: both strands enabled.

    +
    + +

    Scoring options

    + + + + + + + + + + +
    + +
    --mp MX,MN
    +
    + +

    Sets the maximum (MX) and minimum (MN) mismatch penalties, both integers. A number less than or equal to MX and greater than or equal to MN is subtracted from the alignment score for each position where a read character aligns to a reference character, the characters do not match, and neither is an N. If --ignore-quals is specified, the number subtracted quals MX. Otherwise, the number subtracted is MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) ) where Q is the Phred quality value. Default: MX = 6, MN = 2.

    +
    + +
    --sp MX,MN
    +
    + +

    Sets the maximum (MX) and minimum (MN) penalties for soft-clipping per base, both integers. A number less than or equal to MX and greater than or equal to MN is subtracted from the alignment score for each position. The number subtracted is MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) ) where Q is the Phred quality value. Default: MX = 2, MN = 1.

    +
    + +
    --no-softclip
    +
    + +

    Disallow soft-clipping.

    +
    + +
    --np <int>
    +
    + +

    Sets penalty for positions where the read, reference, or both, contain an ambiguous character such as N. Default: 1.

    +
    + +
    --rdg <int1>,<int2>
    +
    + +

    Sets the read gap open (<int1>) and extend (<int2>) penalties. A read gap of length N gets a penalty of <int1> + N * <int2>. Default: 5, 3.

    +
    + +
    --rfg <int1>,<int2>
    +
    + +

    Sets the reference gap open (<int1>) and extend (<int2>) penalties. A reference gap of length N gets a penalty of <int1> + N * <int2>. Default: 5, 3.

    +
    + +
    --score-min <func>
    +
    + +

    Sets a function governing the minimum alignment score needed for an alignment to be considered "valid" (i.e. good enough to report). This is a function of read length. For instance, specifying L,0,-0.6 sets the minimum-score function f to f(x) = 0 + -0.6 * x, where x is the read length. See also: [setting function options]. The default is L,0,-0.2.

    +
    + +

    Spliced alignment options

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + +
    --pen-cansplice <int>
    +
    + +

    Sets the penalty for each pair of canonical splice sites (e.g. GT/AG). Default: 0.

    +
    + +
    --pen-noncansplice <int>
    +
    + +

    Sets the penalty for each pair of non-canonical splice sites (e.g. non-GT/AG). Default: 12.

    +
    + +
    --pen-canintronlen <func>
    +
    + +

    Sets the penalty for long introns with canonical splice sites so that alignments with shorter introns are preferred to those with longer ones. Default: G,-8,1

    +
    + +
    --pen-noncanintronlen <func>
    +
    + +

    Sets the penalty for long introns with noncanonical splice sites so that alignments with shorter introns are preferred to those with longer ones. Default: G,-8,1

    +
    + +
    --min-intronlen <int>
    +
    + +

    Sets minimum intron length. Default: 20

    +
    + +
    --max-intronlen <int>
    +
    + +

    Sets maximum intron length. Default: 500000

    +
    + +
    --known-splicesite-infile <path>
    +
    + +

    With this mode, you can provide a list of known splice sites, which HISAT2 makes use of to align reads with small anchors.
    You can create such a list using python hisat2_extract_splice_sites.py genes.gtf > splicesites.txt, where hisat2_extract_splice_sites.py is included in the HISAT2 package, genes.gtf is a gene annotation file, and splicesites.txt is a list of splice sites with which you provide HISAT2 in this mode. Note that it is better to use indexes built using annotated transcripts (such as genome_tran or genome_snp_tran), which works better than using this option. It has no effect to provide splice sites that are already included in the indexes.

    +
    + +
    --novel-splicesite-outfile <path>
    +
    + +

    In this mode, HISAT2 reports a list of splice sites in the file :
    chromosome name <tab> genomic position of the flanking base on the left side of an intron <tab> genomic position of the flanking base on the right <tab> strand (+, -, and .) '.' indicates an unknown strand for non-canonical splice sites.

    +
    + +
    --novel-splicesite-infile <path>
    +
    + +

    With this mode, you can provide a list of novel splice sites that were generated from the above option "--novel-splicesite-outfile".

    +
    + +
    --no-temp-splicesite
    +
    + +

    HISAT2, by default, makes use of splice sites found by earlier reads to align later reads in the same run, in particular, reads with small anchors (<= 15 bp).
    The option disables this default alignment strategy.

    +
    + +
    --no-spliced-alignment
    +
    + +

    Disable spliced alignment.

    +
    +
    --rna-strandness <string>
    +
    + +

    Specify strand-specific information: the default is unstranded.
    For single-end reads, use F or R. 'F' means a read corresponds to a transcript. 'R' means a read corresponds to the reverse complemented counterpart of a transcript. For paired-end reads, use either FR or RF.
    With this option being used, every read alignment will have an XS attribute tag: '+' means a read belongs to a transcript on '+' strand of genome. '-' means a read belongs to a transcript on '-' strand of genome.

    +(TopHat has a similar option, --library-type option, where fr-firststrand corresponds to R and RF; fr-secondstrand corresponds to F and FR.) +
    +
    --tmo/--transcriptome-mapping-only
    +
    + +

    Report only those alignments within known transcripts.

    +
    +
    --dta/--downstream-transcriptome-assembly
    +
    + +

    Report alignments tailored for transcript assemblers including StringTie. With this option, HISAT2 requires longer anchor lengths for de novo discovery of splice sites. This leads to fewer alignments with short-anchors, which helps transcript assemblers improve significantly in computation and memory usage.

    +
    + +

    Report alignments tailored specifically for Cufflinks. In addition to what HISAT2 does with the above option (--dta), With this option, HISAT2 looks for novel splice sites with three signals (GT/AG, GC/AG, AT/AC), but all user-provided splice sites are used irrespective of their signals. HISAT2 produces an optional field, XS:A:[+-], for every spliced alignment.

    +
    +
    --no-templatelen-adjustment
    +
    + +

    Disables template length adjustment for RNA-seq reads.

    +
    + +

    Reporting options

    + + + + + + + +
    + +
    -k <int>
    +
    + +

    It searches for at most <int> distinct, primary alignments for each read. Primary alignments mean alignments whose alignment score is equal or higher than any other alignments. The search terminates when it can't find more distinct valid alignments, or when it finds <int>, whichever happens first. The alignment score for a paired-end alignment equals the sum of the alignment scores of the individual mates. Each reported read or pair alignment beyond the first has the SAM 'secondary' bit (which equals 256) set in its FLAGS field. For reads that have more than <int> distinct, valid alignments, hisat2 does not guarantee that the <int> alignments reported are the best possible in terms of alignment score. Default: 5 (HFM) or 10 (HGFM)

    +

    Note: HISAT2 is not designed with large values for -k in mind, and when aligning reads to long, repetitive genomes large -k can be very, very slow.

    +
    + +
    --max-seeds <int>
    +
    + +

    HISAT2, like other aligners, uses seed-and-extend approaches. HISAT2 tries to extend seeds to full-length alignments. In HISAT2, --max-seeds is used to control the maximum number of seeds that will be extended. HISAT2 extends up to these many seeds and skips the rest of the seeds. Large values for --max-seeds may improve alignment sensitivity, but HISAT2 is not designed with large values for --max-seeds in mind, and when aligning reads to long, repetitive genomes large --max-seeds can be very, very slow. The default value is the maximum of 5 and the value that comes with-k.

    +
    + +
    --secondary
    +
    + +

    Report secondary alignments.

    +
    + +

    Paired-end options

    + + + + + + +
    + +
    -I/--minins <int>
    +
    + +

    The minimum fragment length for valid paired-end alignments.This option is valid only with --no-spliced-alignment. E.g. if -I 60 is specified and a paired-end alignment consists of two 20-bp alignments in the appropriate orientation with a 20-bp gap between them, that alignment is considered valid (as long as -X is also satisfied). A 19-bp gap would not be valid in that case. If trimming options -3 or -5 are also used, the -I constraint is applied with respect to the untrimmed mates.

    +

    The larger the difference between -I and -X, the slower HISAT2 will run. This is because larger differences between -I and -X require that HISAT2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), HISAT2 is very efficient.

    +

    Default: 0 (essentially imposing no minimum)

    +
    + +
    -X/--maxins <int>
    +
    + +

    The maximum fragment length for valid paired-end alignments. This option is valid only with --no-spliced-alignment. E.g. if -X 100 is specified and a paired-end alignment consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is considered valid (as long as -I is also satisfied). A 61-bp gap would not be valid in that case. If trimming options -3 or -5 are also used, the -X constraint is applied with respect to the untrimmed mates, not the trimmed mates.

    +

    The larger the difference between -I and -X, the slower HISAT2 will run. This is because larger differences between -I and -X require that HISAT2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), HISAT2 is very efficient.

    +

    Default: 500.

    +
    + +
    --fr/--rf/--ff
    +
    + +

    The upstream/downstream mate orientations for a valid paired-end alignment against the forward reference strand. E.g., if --fr is specified and there is a candidate paired-end alignment where mate 1 appears upstream of the reverse complement of mate 2 and the fragment length constraints (-I and -X) are met, that alignment is valid. Also, if mate 2 appears upstream of the reverse complement of mate 1 and all other constraints are met, that too is valid. --rf likewise requires that an upstream mate1 be reverse-complemented and a downstream mate2 be forward-oriented. --ff requires both an upstream mate 1 and a downstream mate 2 to be forward-oriented. Default: --fr (appropriate for Illumina's Paired-end Sequencing Assay).

    +
    + +
    --no-mixed
    +
    + +

    By default, when hisat2 cannot find a concordant or discordant alignment for a pair, it then tries to find alignments for the individual mates. This option disables that behavior.

    +
    + +
    --no-discordant
    +
    + +

    By default, hisat2 looks for discordant alignments if it cannot find any concordant alignments. A discordant alignment is an alignment where both mates align uniquely, but that does not satisfy the paired-end constraints (--fr/--rf/--ff, -I, -X). This option disables that behavior.

    +
    + +

    Output options

    + + + + + + + + + + + + + + + + +
    + +
    -t/--time
    +
    + +

    Print the wall-clock time required to load the index files and align the reads. This is printed to the "standard error" ("stderr") filehandle. Default: off.

    +
    + +
    --un <path>
    +--un-gz <path>
    +--un-bz2 <path>
    +
    + +

    Write unpaired reads that fail to align to file at <path>. These reads correspond to the SAM records with the FLAGS 0x4 bit set and neither the 0x40 nor 0x80 bits set. If --un-gz is specified, output will be gzip compressed. If --un-bz2 is specified, output will be bzip2 compressed. Reads written in this way will appear exactly as they did in the input file, without any modification (same sequence, same name, same quality string, same quality encoding). Reads will not necessarily appear in the same order as they did in the input.

    +
    + +
    --al <path>
    +--al-gz <path>
    +--al-bz2 <path>
    +
    + +

    Write unpaired reads that align at least once to file at <path>. These reads correspond to the SAM records with the FLAGS 0x4, 0x40, and 0x80 bits unset. If --al-gz is specified, output will be gzip compressed. If --al-bz2 is specified, output will be bzip2 compressed. Reads written in this way will appear exactly as they did in the input file, without any modification (same sequence, same name, same quality string, same quality encoding). Reads will not necessarily appear in the same order as they did in the input.

    +
    + +
    --un-conc <path>
    +--un-conc-gz <path>
    +--un-conc-bz2 <path>
    +
    + +

    Write paired-end reads that fail to align concordantly to file(s) at <path>. These reads correspond to the SAM records with the FLAGS 0x4 bit set and either the 0x40 or 0x80 bit set (depending on whether it's mate #1 or #2). .1 and .2 strings are added to the filename to distinguish which file contains mate #1 and mate #2. If a percent symbol, %, is used in <path>, the percent symbol is replaced with 1 or 2 to make the per-mate filenames. Otherwise, .1 or .2 are added before the final dot in <path> to make the per-mate filenames. Reads written in this way will appear exactly as they did in the input files, without any modification (same sequence, same name, same quality string, same quality encoding). Reads will not necessarily appear in the same order as they did in the inputs.

    +
    + +
    --al-conc <path>
    +--al-conc-gz <path>
    +--al-conc-bz2 <path>
    +
    + +

    Write paired-end reads that align concordantly at least once to file(s) at <path>. These reads correspond to the SAM records with the FLAGS 0x4 bit unset and either the 0x40 or 0x80 bit set (depending on whether it's mate #1 or #2). .1 and .2 strings are added to the filename to distinguish which file contains mate #1 and mate #2. If a percent symbol, %, is used in <path>, the percent symbol is replaced with 1 or 2 to make the per-mate filenames. Otherwise, .1 or .2 are added before the final dot in <path> to make the per-mate filenames. Reads written in this way will appear exactly as they did in the input files, without any modification (same sequence, same name, same quality string, same quality encoding). Reads will not necessarily appear in the same order as they did in the inputs.

    +
    + +
    --quiet
    +
    + +

    Print nothing besides alignments and serious errors.

    +
    + +
    --summary-file
    +
    + +

    Print alignment summary to this file.

    +
    + +
    --new-summary
    +
    + +

    Print alignment summary in a new style, which is more machine-friendly.

    +
    + +
    --met-file <path>
    +
    + +

    Write hisat2 metrics to file <path>. Having alignment metric can be useful for debugging certain problems, especially performance issues. See also: --met. Default: metrics disabled.

    +
    + +
    --met-stderr
    +
    + +

    Write hisat2 metrics to the "standard error" ("stderr") filehandle. This is not mutually exclusive with --met-file. Having alignment metric can be useful for debugging certain problems, especially performance issues. See also: --met. Default: metrics disabled.

    +
    + +
    --met <int>
    +
    + +

    Write a new hisat2 metrics record every <int> seconds. Only matters if either --met-stderr or --met-file are specified. Default: 1.

    +
    + +

    SAM options

    + + + + + + + + + + + + +
    + +
    --no-unal
    +
    + +

    Suppress SAM records for reads that failed to align.

    +
    + +
    --no-hd
    +
    + +

    Suppress SAM header lines (starting with @).

    +
    + +
    --no-sq
    +
    + +

    Suppress @SQ SAM header lines.

    +
    + +
    --rg-id <text>
    +
    + +

    Set the read group ID to <text>. This causes the SAM @RG header line to be printed, with <text> as the value associated with the ID: tag. It also causes the RG:Z: extra field to be attached to each SAM output record, with value set to <text>.

    +
    + +
    --rg <text>
    +
    + +

    Add <text> (usually of the form TAG:VAL, e.g. SM:Pool1) as a field on the @RG header line. Note: in order for the @RG line to appear, --rg-id must also be specified. This is because the ID tag is required by the SAM Spec. Specify --rg multiple times to set multiple fields. See the SAM Spec for details about what fields are legal.

    +
    + +
    --remove-chrname
    +
    + +

    Remove 'chr' from reference names in alignment (e.g., chr18 to 18)

    +
    + +
    --add-chrname
    +
    + +

    Add 'chr' to reference names in alignment (e.g., 18 to chr18)

    +
    + +
    --omit-sec-seq
    +
    + +

    When printing secondary alignments, HISAT2 by default will write out the SEQ and QUAL strings. Specifying this option causes HISAT2 to print an asterisk in those fields instead.

    +
    + +

    Performance options

    + + + + + +
    + +
    -o/--offrate <int>
    +
    + +

    Override the offrate of the index with <int>. If <int> is greater than the offrate used to build the index, then some row markings are discarded when the index is read into memory. This reduces the memory footprint of the aligner but requires more time to calculate text offsets. <int> must be greater than the value used to build the index.

    +
    + +
    -p/--threads NTHREADS
    +
    + +

    Launch NTHREADS parallel search threads (default: 1). Threads will run on separate processors/cores and synchronize when parsing reads and outputting alignments. Searching for alignments is highly parallel, and speedup is close to linear. Increasing -p increases HISAT2's memory footprint. E.g. when aligning to a human genome index, increasing -p from 1 to 8 increases the memory footprint by a few hundred megabytes. This option is only available if bowtie is linked with the pthreads library (i.e. if BOWTIE_PTHREADS=0 is not specified at build time).

    +
    + +
    --reorder
    +
    + +

    Guarantees that output SAM records are printed in an order corresponding to the order of the reads in the original input file, even when -p is set greater than 1. Specifying --reorder and setting -p greater than 1 causes HISAT2 to run somewhat slower and use somewhat more memory then if --reorder were not specified. Has no effect if -p is set to 1, since output order will naturally correspond to input order in that case.

    +
    + +
    --mm
    +
    + +

    Use memory-mapped I/O to load the index, rather than typical file I/O. Memory-mapping allows many concurrent bowtie processes on the same computer to share the same memory image of the index (i.e. you pay the memory overhead just once). This facilitates memory-efficient parallelization of bowtie in situations where using -p is not possible or not preferable.

    +
    + +

    Other options

    + + + + + +
    + +
    --qc-filter
    +
    + +

    Filter out reads for which the QSEQ filter field is non-zero. Only has an effect when read format is --qseq. Default: off.

    +
    + +
    --seed <int>
    +
    + +

    Use <int> as the seed for pseudo-random number generator. Default: 0.

    +
    + +
    --non-deterministic
    +
    + +

    Normally, HISAT2 re-initializes its pseudo-random generator for each read. It seeds the generator with a number derived from (a) the read name, (b) the nucleotide sequence, (c) the quality sequence, (d) the value of the --seed option. This means that if two reads are identical (same name, same nucleotides, same qualities) HISAT2 will find and report the same alignment(s) for both, even if there was ambiguity. When --non-deterministic is specified, HISAT2 re-initializes its pseudo-random generator for each read using the current time. This means that HISAT2 will not necessarily report the same alignment for two identical reads. This is counter-intuitive for some users, but might be more appropriate in situations where the input consists of many identical reads.

    +
    + +
    --version
    +
    + +

    Print version information and quit.

    +
    + +
    -h/--help
    +
    + +

    Print usage information and quit.

    +
    + +

    SAM output

    +

    Following is a brief description of the SAM format as output by hisat2. For more details, see the SAM format specification.

    +

    By default, hisat2 prints a SAM header with @HD, @SQ and @PG lines. When one or more --rg arguments are specified, hisat2 will also print an @RG line that includes all user-specified --rg tokens separated by tabs.

    +

    Each subsequent line describes an alignment or, if the read failed to align, a read. Each line is a collection of at least 12 fields separated by tabs; from left to right, the fields are:

    +
      +
    1. Name of read that aligned.

      +

      Note that the SAM specification disallows whitespace in the read name. If the read name contains any whitespace characters, HISAT2 will truncate the name at the first whitespace character. This is similar to the behavior of other tools.

    2. +
    3. Sum of all applicable flags. Flags relevant to HISAT2 are:

      +
      + +
      1
      +
      + +

      The read is one of a pair

      +
      + +
      2
      +
      + +

      The alignment is one end of a proper paired-end alignment

      +
      + +
      4
      +
      + +

      The read has no reported alignments

      +
      + +
      8
      +
      + +

      The read is one of a pair and has no reported alignments

      +
      + +
      16
      +
      + +

      The alignment is to the reverse reference strand

      +
      + +
      32
      +
      + +

      The other mate in the paired-end alignment is aligned to the reverse reference strand

      +
      + +
      64
      +
      + +

      The read is mate 1 in a pair

      +
      + +
      128
      +
      + +

      The read is mate 2 in a pair

      +
      + +

      Thus, an unpaired read that aligns to the reverse reference strand will have flag 16. A paired-end read that aligns and is the first mate in the pair will have flag 83 (= 64 + 16 + 2 + 1).

    4. +
    5. Name of reference sequence where alignment occurs

    6. +
    7. 1-based offset into the forward reference strand where leftmost character of the alignment occurs

    8. +
    9. Mapping quality

    10. +
    11. CIGAR string representation of alignment

    12. +
    13. Name of reference sequence where mate's alignment occurs. Set to = if the mate's reference sequence is the same as this alignment's, or * if there is no mate.

    14. +
    15. 1-based offset into the forward reference strand where leftmost character of the mate's alignment occurs. Offset is 0 if there is no mate.

    16. +
    17. Inferred fragment length. Size is negative if the mate's alignment occurs upstream of this alignment. Size is 0 if the mates did not align concordantly. However, size is non-0 if the mates aligned discordantly to the same chromosome.

    18. +
    19. Read sequence (reverse-complemented if aligned to the reverse strand)

    20. +
    21. ASCII-encoded read qualities (reverse-complemented if the read aligned to the reverse strand). The encoded quality values are on the Phred quality scale and the encoding is ASCII-offset by 33 (ASCII char !), similarly to a FASTQ file.

    22. +
    23. Optional fields. Fields are tab-separated. hisat2 outputs zero or more of these optional fields for each alignment, depending on the type of the alignment:

      + + + + + + + + + + + + + + + + + + + + + + + + +
      + +
      AS:i:<N>
      +
      + +

      Alignment score. Can be negative. Only present if SAM record is for an aligned read.

      +
      + +
      ZS:i:<N>
      +
      +

      Alignment score for the best-scoring alignment found other than the alignment reported. Can be negative. Only present if the SAM record is for an aligned read and more than one alignment was found for the read. Note that, when the read is part of a concordantly-aligned pair, this score could be greater than [AS:i].

      +
      + +
      YS:i:<N>
      +
      + +

      Alignment score for opposite mate in the paired-end alignment. Only present if the SAM record is for a read that aligned as part of a paired-end alignment.

      +
      + +
      XN:i:<N>
      +
      + +

      The number of ambiguous bases in the reference covering this alignment. Only present if SAM record is for an aligned read.

      +
      + +
      XM:i:<N>
      +
      + +

      The number of mismatches in the alignment. Only present if SAM record is for an aligned read.

      +
      + +
      XO:i:<N>
      +
      + +

      The number of gap opens, for both read and reference gaps, in the alignment. Only present if SAM record is for an aligned read.

      +
      + +
      XG:i:<N>
      +
      + +

      The number of gap extensions, for both read and reference gaps, in the alignment. Only present if SAM record is for an aligned read.

      +
      + +
      NM:i:<N>
      +
      + +

      The edit distance; that is, the minimal number of one-nucleotide edits (substitutions, insertions and deletions) needed to transform the read string into the reference string. Only present if SAM record is for an aligned read.

      +
      + +
      YF:Z:<S>
      +
      + +

      String indicating reason why the read was filtered out. See also: [Filtering]. Only appears for reads that were filtered out.

      +
      + +
      YT:Z:<S>
      +
      + +

      Value of UU indicates the read was not part of a pair. Value of CP indicates the read was part of a pair and the pair aligned concordantly. Value of DP indicates the read was part of a pair and the pair aligned discordantly. Value of UP indicates the read was part of a pair but the pair failed to aligned either concordantly or discordantly.

      +
      + +
      MD:Z:<S>
      +
      + +

      A string representation of the mismatched reference bases in the alignment. See SAM format specification for details. Only present if SAM record is for an aligned read.

      +
      + +
      XS:A:<A>
      +
      + +

      Values of + and - indicate the read is mapped to transcripts on sense and anti-sense strands, respectively. Spliced alignments need to have this field, which is required in Cufflinks and StringTie.
      We can report this field for the canonical-splice site (GT/AG), but not for non-canonical splice sites. You can direct HISAT2 not to output such alignments (involving non-canonical splice sites) using "--pen-noncansplice 1000000".

      +
      + +
      NH:i:<N>
      +
      + +

      The number of mapped locations for the read or the pair.

      +
      + +
      Zs:Z:<S>
      +
      + +When the alignment of a read involves SNPs that are in the index, this option is used to indicate where exactly the read involves the SNPs. This optional field is similar to the above MD:Z field. For example, Zs:Z:1|S|rs3747203,97|S|rs16990981 indicates the second base of the read corresponds to a known SNP (ID: rs3747203). 97 bases after the third base (the base after the second one), the read at 100th base involves another known SNP (ID: rs16990981). 'S' indicates a single nucleotide polymorphism. 'D' and 'I' indicate a deletion and an insertion, respectively. +
      +
    24. +
    +

    The hisat2-build indexer

    +

    hisat2-build builds a HISAT2 index from a set of DNA sequences. hisat2-build outputs a set of 6 files with suffixes .1.ht2, .2.ht2, .3.ht2, .4.ht2, .5.ht2, .6.ht2, .7.ht2, and .8.ht2. In the case of a large index these suffixes will have a ht2l termination. These files together constitute the index: they are all that is needed to align reads to that reference. The original sequence FASTA files are no longer used by HISAT2 once the index is built.

    +

    Use of Karkkainen's blockwise algorithm allows hisat2-build to trade off between running time and memory usage. hisat2-build has three options governing how it makes this trade: [-p/--packed], --bmax/--bmaxdivn, and --dcv. By default, hisat2-build will automatically search for the settings that yield the best running time without exhausting memory. This behavior can be disabled using the -a/--noauto option.

    +

    The indexer provides options pertaining to the "shape" of the index, e.g. --offrate governs the fraction of Burrows-Wheeler rows that are "marked" (i.e., the density of the suffix-array sample; see the original FM Index paper for details). All of these options are potentially profitable trade-offs depending on the application. They have been set to defaults that are reasonable for most cases according to our experiments. See Performance tuning for details.

    +

    hisat2-build can generate either small or large indexes. The wrapper will decide which based on the length of the input genome. If the reference does not exceed 4 billion characters but a large index is preferred, the user can specify --large-index to force hisat2-build to build a large index instead.

    +

    The HISAT2 index is based on the FM Index of Ferragina and Manzini, which in turn is based on the Burrows-Wheeler transform. The algorithm used to build the index is based on the blockwise algorithm of Karkkainen.

    +

    Command Line

    +

    Usage:

    +
    hisat2-build [options]* <reference_in> <ht2_base>
    +

    Notes

    +
    If you use --snp, --ss, and/or --exon, hisat2-build will need about 200GB RAM for the human genome size as index building involves a graph construction. 
    +Otherwise, you will be able to build an index on your desktop with 8GB RAM.
    +

    Main arguments

    +
    + +
    <reference_in>
    +
    + +

    A comma-separated list of FASTA files containing the reference sequences to be aligned to, or, if -c is specified, the sequences themselves. E.g., <reference_in> might be chr1.fa,chr2.fa,chrX.fa,chrY.fa, or, if -c is specified, this might be GGTCATCCT,ACGGGTCGT,CCGTTCTATGCGGCTTA.

    +
    + +
    <ht2_base>
    +
    + +

    The basename of the index files to write. By default, hisat2-build writes files named NAME.1.ht2, NAME.2.ht2, NAME.3.ht2, NAME.4.ht2, NAME.5.ht2, NAME.6.ht2, NAME.7.ht2, and NAME.8.ht2 where NAME is <ht2_base>.

    +
    + +

    Options

    + + + + +
    + +
    -f
    +
    + +

    The reference input files (specified as <reference_in>) are FASTA files (usually having extension .fa, .mfa, .fna or similar).

    +
    + +
    -c
    +
    + +

    The reference sequences are given on the command line. I.e. <reference_in> is a comma-separated list of sequences rather than a list of FASTA files.

    +
    + +
    --large-index
    +
    + +

    Force hisat2-build to build a large index, even if the reference is less than ~ 4 billion nucleotides long.

    +
    + +
    -a/--noauto
    +
    + +

    Disable the default behavior whereby hisat2-build automatically selects values for the --bmax, --dcv and [--packed] parameters according to available memory. Instead, user may specify values for those parameters. If memory is exhausted during indexing, an error message will be printed; it is up to the user to try new parameters.

    +
    + +
    --bmax <int>
    +
    + +

    The maximum number of suffixes allowed in a block. Allowing more suffixes per block makes indexing faster, but increases peak memory usage. Setting this option overrides any previous setting for --bmax, or --bmaxdivn. Default (in terms of the --bmaxdivn parameter) is --bmaxdivn 4. This is configured automatically by default; use -a/--noauto to configure manually.

    +
    + +
    --bmaxdivn <int>
    +
    + +

    The maximum number of suffixes allowed in a block, expressed as a fraction of the length of the reference. Setting this option overrides any previous setting for --bmax, or --bmaxdivn. Default: --bmaxdivn 4. This is configured automatically by default; use -a/--noauto to configure manually.

    +
    + +
    --dcv <int>
    +
    + +

    Use <int> as the period for the difference-cover sample. A larger period yields less memory overhead, but may make suffix sorting slower, especially if repeats are present. Must be a power of 2 no greater than 4096. Default: 1024. This is configured automatically by default; use -a/--noauto to configure manually.

    +
    + +
    --nodc
    +
    + +

    Disable use of the difference-cover sample. Suffix sorting becomes quadratic-time in the worst case (where the worst case is an extremely repetitive reference). Default: off.

    +
    + +
    -r/--noref
    +
    + +

    Do not build the NAME.3.ht2 and NAME.4.ht2 portions of the index, which contain a bitpacked version of the reference sequences and are used for paired-end alignment.

    +
    + +
    -3/--justref
    +
    + +

    Build only the NAME.3.ht2 and NAME.4.ht2 portions of the index, which contain a bitpacked version of the reference sequences and are used for paired-end alignment.

    +
    + +
    -o/--offrate <int>
    +
    + +

    To map alignments back to positions on the reference sequences, it's necessary to annotate ("mark") some or all of the Burrows-Wheeler rows with their corresponding location on the genome. -o/--offrate governs how many rows get marked: the indexer will mark every 2^<int> rows. Marking more rows makes reference-position lookups faster, but requires more memory to hold the annotations at runtime. The default is 4 (every 16th row is marked; for human genome, annotations occupy about 680 megabytes).

    +
    + +
    -t/--ftabchars <int>
    +
    + +

    The ftab is the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first <int> characters of the query. A larger <int> yields a larger lookup table but faster query times. The ftab has size 4^(<int>+1) bytes. The default setting is 10 (ftab is 4MB).

    +
    + +
    --localoffrate <int>
    +
    + +

    This option governs how many rows get marked in a local index: the indexer will mark every 2^<int> rows. Marking more rows makes reference-position lookups faster, but requires more memory to hold the annotations at runtime. The default is 3 (every 8th row is marked, this occupies about 16KB per local index).

    +
    + +
    --localftabchars <int>
    +
    + +

    The local ftab is the lookup table in a local index. The default setting is 6 (ftab is 8KB per local index).

    +
    + +
    -p <int>
    +
    + +

    Launch NTHREADS parallel build threads (default: 1).

    +
    + +
    --snp <path>
    +
    + +

    Provide a list of SNPs (in the HISAT2's own format) as follows (five columns).

    +

    SNP ID <tab> snp type (single, deletion, or insertion) <tab> chromosome name <tab> zero-offset based genomic position of a SNP <tab> alternative base (single), the length of SNP (deletion), or insertion sequence (insertion)

    +

    For example, rs58784443 single 13 18447947 T

    +

    Use hisat2_extract_snps_haplotypes_UCSC.py (in the HISAT2 package) to extract SNPs and haplotypes from a dbSNP file (e.g. http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/snp144Common.txt.gz). or hisat2_extract_snps_haplotypes_VCF.py to extract SNPs and haplotypes from a VCF file (e.g. ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/supporting/GRCh38_positions/ALL.chr22.phase3_shapeit2_mvncall_integrated_v3plus_nounphased.rsID.genotypes.GRCh38_dbSNP_no_SVs.vcf.gz).

    +
    + +
    --haplotype <path>
    +
    + +

    Provide a list of haplotypes (in the HISAT2's own format) as follows (five columns).

    +

    Haplotype ID <tab> chromosome name <tab> zero-offset based left coordinate of haplotype <tab> zero-offset based right coordinate of haplotype <tab> a comma separated list of SNP ids in the haplotype

    +

    For example, ht35 13 18446877 18446945 rs12381094,rs12381056,rs192016659,rs538569910

    +

    See the above option, --snp, about how to extract haplotypes. This option is not required, but haplotype information can keep the index construction from exploding and reduce the index size substantially.

    +
    + +
    --ss <path>
    +
    + +

    Note this option should be used with the following --exon option. Provide a list of splice sites (in the HISAT2's own format) as follows (four columns).

    +

    chromosome name <tab> zero-offset based genomic position of the flanking base on the left side of an intron <tab> zero-offset based genomic position of the flanking base on the right <tab> strand

    +

    Use hisat2_extract_splice_sites.py (in the HISAT2 package) to extract splice sites from a GTF file.

    +
    + +
    --exon <path>
    +
    + +

    Note this option should be used with the above --ss option. Provide a list of exons (in the HISAT2's own format) as follows (three columns).

    +

    chromosome name <tab> zero-offset based left genomic position of an exon <tab> zero-offset based right genomic position of an exon

    +

    Use hisat2_extract_exons.py (in the HISAT2 package) to extract exons from a GTF file.

    +
    + +
    --seed <int>
    +
    + +

    Use <int> as the seed for pseudo-random number generator.

    +
    + +
    --cutoff <int>
    +
    + +

    Index only the first <int> bases of the reference sequences (cumulative across sequences) and ignore the rest.

    +
    + +
    -q/--quiet
    +
    + +

    hisat2-build is verbose by default. With this option hisat2-build will print only error messages.

    +
    + +
    -h/--help
    +
    + +

    Print usage information and quit.

    +
    + +
    --version
    +
    + +

    Print version information and quit.

    +
    + +

    The hisat2-inspect index inspector

    +

    hisat2-inspect extracts information from a HISAT2 index about what kind of index it is and what reference sequences were used to build it. When run without any options, the tool will output a FASTA file containing the sequences of the original references (with all non-A/C/G/T characters converted to Ns). It can also be used to extract just the reference sequence names using the -n/--names option or a more verbose summary using the -s/--summary option.

    +

    Command Line

    +

    Usage:

    +
    hisat2-inspect [options]* <ht2_base>
    +

    Main arguments

    +
    + +
    <ht2_base>
    +
    + +

    The basename of the index to be inspected. The basename is name of any of the index files but with the .X.ht2 suffix omitted. hisat2-inspect first looks in the current directory for the index files, then in the directory specified in the HISAT2_INDEXES environment variable.

    +
    + +

    Options

    +
    + +
    -a/--across <int>
    +
    + +

    When printing FASTA output, output a newline character every <int> bases (default: 60).

    +
    + +
    -n/--names
    +
    + +

    Print reference sequence names, one per line, and quit.

    +
    + +
    -s/--summary
    +
    + +

    Print a summary that includes information about index settings, as well as the names and lengths of the input sequences. The summary has this format:

    +
    Colorspace  <0 or 1>
    +SA-Sample   1 in <sample>
    +FTab-Chars  <chars>
    +Sequence-1  <name>  <len>
    +Sequence-2  <name>  <len>
    +...
    +Sequence-N  <name>  <len>
    +

    Fields are separated by tabs. Colorspace is always set to 0 for HISAT2.

    +
    + +
    --snp
    +
    + +

    Print SNPs, and quit.

    +
    + +
    --ss
    +
    + +

    Print splice sites, and quit.

    +
    + +
    --ss-all
    +
    + +

    Print splice sites including those not in the global index, and quit.

    +
    + +
    --exon
    +
    + +

    Print exons, and quit.

    +
    + +
    -v/--verbose
    +
    + +

    Print verbose output (for debugging).

    +
    + +
    --version
    +
    + +

    Print version information and quit.

    +
    + +
    -h/--help
    +
    + +

    Print usage information and quit.

    +
    + +

    Getting started with HISAT2

    +

    HISAT2 comes with some example files to get you started. The example files are not scientifically significant; these files will simply let you start running HISAT2 and downstream tools right away.

    +

    First follow the manual instructions to obtain HISAT2. Set the HISAT2_HOME environment variable to point to the new HISAT2 directory containing the hisat2, hisat2-build and hisat2-inspect binaries. This is important, as the HISAT2_HOME variable is used in the commands below to refer to that directory.

    +

    Indexing a reference genome

    +

    To create an index for the genomic region (1 million bps from the human chromosome 22 between 20,000,000 and 20,999,999) included with HISAT2, create a new temporary directory (it doesn't matter where), change into that directory, and run:

    +
    $HISAT2_HOME/hisat2-build $HISAT2_HOME/example/reference/22_20-21M.fa --snp $HISAT2_HOME/example/reference/22_20-21M.snp 22_20-21M_snp
    +

    The command should print many lines of output then quit. When the command completes, the current directory will contain ten new files that all start with 22_20-21M_snp and end with .1.ht2, .2.ht2, .3.ht2, .4.ht2, .5.ht2, .6.ht2, .7.ht2, and .8.ht2. These files constitute the index - you're done!

    +

    You can use hisat2-build to create an index for a set of FASTA files obtained from any source, including sites such as UCSC, NCBI, and Ensembl. When indexing multiple FASTA files, specify all the files using commas to separate file names. For more details on how to create an index with hisat2-build, see the manual section on index building. You may also want to bypass this process by obtaining a pre-built index.

    +

    Aligning example reads

    +

    Stay in the directory created in the previous step, which now contains the 22_20-21M index files. Next, run:

    +
    $HISAT2_HOME/hisat2 -f -x $HISAT2_HOME/example/index/22_20-21M_snp -U $HISAT2_HOME/example/reads/reads_1.fa -S eg1.sam
    +

    This runs the HISAT2 aligner, which aligns a set of unpaired reads to the genome region using the index generated in the previous step. The alignment results in SAM format are written to the file eg1.sam, and a short alignment summary is written to the console. (Actually, the summary is written to the "standard error" or "stderr" filehandle, which is typically printed to the console.)

    +

    To see the first few lines of the SAM output, run:

    +
    head eg1.sam
    +

    You will see something like this:

    +
    @HD     VN:1.0  SO:unsorted
    +@SQ     SN:22:20000001-21000000 LN:1000000
    +@PG     ID:hisat2       PN:hisat2       VN:2.0.0-beta
    +1       0       22:20000001-21000000    397984  255     100M    *       0       0       GCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACT    IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII    AS:i:0  XN:i:0  XM:i:0  XO:i:0  XG:i:0  NM:i:0  MD:Z:100        YT:Z:UU NH:i:1
    +2       16      22:20000001-21000000    398131  255     100M    *       0       0       ATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCT    IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII    AS:i:0  XN:i:0  XM:i:0  XO:i:0  XG:i:0  NM:i:0  MD:Z:80A19      YT:Z:UU NH:i:1  Zs:Z:80|S|rs576159895
    +3       16      22:20000001-21000000    398222  255     100M    *       0       0       TGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCTCCACTTGGTCAGAGCTGCAGTACTTGGCGATCTCAAA    IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII    AS:i:0  XN:i:0  XM:i:0  XO:i:0  XG:i:0  NM:i:0  MD:Z:16A83      YT:Z:UU NH:i:1  Zs:Z:16|S|rs2629364
    +4       16      22:20000001-21000000    398247  255     90M200N10M      *       0       0       CAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCTCCACTTGGTCAGAGCTGCAGTACTTGGCGATCTCAAACCGCTGCACCAGGAAGTCGATCCAG    IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII    AS:i:0  XN:i:0  XM:i:0  XO:i:0  XG:i:0  NM:i:0  MD:Z:100        YT:Z:UU XS:A:-  NH:i:1
    +5       16      22:20000001-21000000    398194  255     100M    *       0       0       GGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCTCCACTTGGT    IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII    AS:i:0  XN:i:0  XM:i:0  XO:i:0  XG:i:0  NM:i:0  MD:Z:17A26A55   YT:Z:UU NH:i:1  Zs:Z:17|S|rs576159895,26|S|rs2629364
    +6       0       22:20000001-21000000    398069  255     100M    *       0       0       CAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCA    IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII    AS:i:0  XN:i:0  XM:i:0  XO:i:0  XG:i:0  NM:i:0  MD:Z:100        YT:Z:UU NH:i:1
    +7       0       22:20000001-21000000    397896  255     100M    *       0       0       GTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGA    IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII    AS:i:0  XN:i:0  XM:i:0  XO:i:0  XG:i:0  NM:i:0  MD:Z:31G68      YT:Z:UU NH:i:1  Zs:Z:31|S|rs562662261
    +8       0       22:20000001-21000000    398150  255     100M    *       0       0       AGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAG    IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII    AS:i:0  XN:i:0  XM:i:0  XO:i:0  XG:i:0  NM:i:0  MD:Z:61A26A11   YT:Z:UU NH:i:1  Zs:Z:61|S|rs576159895,26|S|rs2629364
    +9       16      22:20000001-21000000    398329  255     8M200N92M       *       0       0       ACCAGGAAGTCGATCCAGATGTAGTGGGGGGTCACTTCGGGGGGACAGGGTTTGGGTTGACTTGCTTCCGAGGCAGCCAGGGGGTCTGCTTCCTTTATCT    IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII    AS:i:0  XN:i:0  XM:i:0  XO:i:0  XG:i:0  NM:i:0  MD:Z:100        YT:Z:UU XS:A:-  NH:i:1
    +10      16      22:20000001-21000000    398184  255     100M    *       0       0       CTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATC    IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII    AS:i:0  XN:i:0  XM:i:0  XO:i:0  XG:i:0  NM:i:0  MD:Z:27A26A45   YT:Z:UU NH:i:1  Zs:Z:27|S|rs576159895,26|S|rs2629364
    +

    The first few lines (beginning with @) are SAM header lines, and the rest of the lines are SAM alignments, one line per read or mate. See the HISAT2 manual section on SAM output and the SAM specification for details about how to interpret the SAM file format.

    +

    Paired-end example

    +

    To align paired-end reads included with HISAT2, stay in the same directory and run:

    +
    $HISAT2_HOME/hisat2 -f -x $HISAT2_HOME/example/index/22_20-21M_snp -1 $HISAT2_HOME/example/reads/reads_1.fa -2 $HISAT2_HOME/example/reads/reads_2.fa -S eg2.sam
    +

    This aligns a set of paired-end reads to the reference genome, with results written to the file eg2.sam.

    +

    Using SAMtools/BCFtools downstream

    +

    SAMtools is a collection of tools for manipulating and analyzing SAM and BAM alignment files. BCFtools is a collection of tools for calling variants and manipulating VCF and BCF files, and it is typically distributed with SAMtools. Using these tools together allows you to get from alignments in SAM format to variant calls in VCF format. This example assumes that samtools and bcftools are installed and that the directories containing these binaries are in your PATH environment variable.

    +

    Run the paired-end example:

    +
    $HISAT2_HOME/hisat -f -x $HISAT2_HOME/example/index/22_20-21M_snp -1 $HISAT2_HOME/example/reads/reads_1.fa -2 $HISAT2_HOME/example/reads/reads_2.fa -S eg2.sam
    +

    Use samtools view to convert the SAM file into a BAM file. BAM is a the binary format corresponding to the SAM text format. Run:

    +
    samtools view -bS eg2.sam > eg2.bam
    +

    Use samtools sort to convert the BAM file to a sorted BAM file. The following command requires samtools version 1.2 or higher.

    +
    samtools sort eg2.bam -o eg2.sorted.bam
    +

    We now have a sorted BAM file called eg2.sorted.bam. Sorted BAM is a useful format because the alignments are (a) compressed, which is convenient for long-term storage, and (b) sorted, which is convenient for variant discovery. To generate variant calls in VCF format, run:

    +
    samtools mpileup -uf $HISAT2_HOME/example/reference/22_20-21M.fa eg2.sorted.bam | bcftools view -bvcg - > eg2.raw.bcf
    +

    Then to view the variants, run:

    +
    bcftools view eg2.raw.bcf
    +

    See the official SAMtools guide to Calling SNPs/INDELs with SAMtools/BCFtools for more details and variations on this process.

    diff --git a/docs_jhu/manual.shtml b/docs_jhu/manual.shtml new file mode 100644 index 0000000..73d5209 --- /dev/null +++ b/docs_jhu/manual.shtml @@ -0,0 +1,37 @@ + + + + + +
    +
    + +
    + + + + +
    + + +
    +

    Table of Contents

    + +
    +
    +
    + + + + + + + + + diff --git a/docs_jhu/sidebar.inc.shtml b/docs_jhu/sidebar.inc.shtml new file mode 100644 index 0000000..069393e --- /dev/null +++ b/docs_jhu/sidebar.inc.shtml @@ -0,0 +1,409 @@ +

    Site Map

    +
    + +
    + +

    News and Updates

    +
    +
      + + +
      New releases and related tools will be announced through the Bowtie + mailing list.
      +
    +
    + +

    Getting Help

    +
    +
      + + +
      + + Please use hisat2.genomics@gmail.com for + private communications only. Please do not email technical questions to HISAT2 contributors directly.
      +
    +
    + +

    Releases

    +
    + +Please cite:

    Kim D, Langmead B and Salzberg SL. HISAT: a fast spliced aligner with low memory requirements. Nature Methods 2015

    +
    + +

    Indexes (see note)

    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + H. sapiens, GRCh38 +
    + genome + + 3.9 GB +
    + genome_snp + + 4.6 GB +
    + genome_tran + + 4.1 GB +
    + genome_snp_tran + + 4.6 GB +
    + H. sapiens, UCSC hg38 +
    + genome + + 4.1 GB +
    + H. sapiens, UCSC hg38 and Refseq gene annotations referred to in the Nature Protocol paper +
    + genome_tran + + 4.2 GB +
    + H. sapiens, GRCh37 +
    + genome + + 3.9 GB +
    + genome_snp + + 4.5 GB +
    + genome_tran + + 4.0 GB +
    + genome_snp_tran + + 4.5 GB +
    + H. sapiens, UCSC hg19 +
    + genome + + 3.9 GB +
    + M. musculus, GRCm38 +
    + genome + + 3.7 GB +
    + genome_snp + + 4.0 GB +
    + genome_tran + + 3.7 GB +
    + genome_snp_tran + + 4.0 GB +
    + M. musculus, UCSC mm10 +
    + genome + + 3.5 GB +
    + R. norvegicus, UCSC rn6 +
    + genome + + 3.7 GB +
    + D. melanogaster, BDGP6 +
    + genome + + 0.2 GB +
    + genome_tran + + 0.2 GB +
    + D. melanogaster, UCSC dm6 +
    + genome + + 0.2 GB +
    + C. elegans, WBcel235 +
    + genome + + 0.1 GB +
    + genome_tran + + 0.1 GB +
    + C. elegans, UCSC ce10 +
    + genome + + 0.1 GB +
    + S. cerevisiae, R64-1-1 +
    + genome + + 0.01 GB +
    + genome_tran + + 0.01 GB +
    + S. cerevisiae, UCSC sacCer3 +
    + genome + + 0.01 GB +
    +
    + * genome: HFM index for reference +
    + * genome_snp: HGFM index for reference plus SNPs +
    + * genome_tran: HGFM index for reference plus transcripts +
    + * genome_snp_tran: HGFM index for reference plus SNPs and transcripts +
    + more indexes +
    +
    + +

    Related Tools

    +
    +
      +
    • HISAT-genotype: Next-generation analysis of human genomes
    • +
    • HISAT: Fast and sensitive spliced alignment
    • +
    • Bowtie2: Ultrafast read alignment
    • +
    • TopHat2: Spliced read mapper for RNA-Seq
    • +
    • Cufflinks: Isoform assembly and quantitation for RNA-Seq
    • +
    • StringTie: Transcript assembly and quantification for RNA-Seq
    • +
    +
    + +

    Publications

    + + +

    Contributors

    + + +

    Links

    + diff --git a/docs_jhu/strip_markdown.pl b/docs_jhu/strip_markdown.pl new file mode 100644 index 0000000..0ecc595 --- /dev/null +++ b/docs_jhu/strip_markdown.pl @@ -0,0 +1,45 @@ +#!/usr/bin/perl -w + +## +# strip_markdown.pl +# +# Used to convert MANUAL.markdown to MANUAL. Leaves all manual content, but +# strips away some of the clutter that makes it hard to read the markdown. +# + +use strict; +use warnings; + +my $lastBlank = 0; + +while(<>) { + # Skip comments + next if /^\s*/; + # Skip internal links + next if /\[.*\]: #/; + # Skip HTML + next if /^\s?\s?\s?<.*>\s*$/; + # Skip HTML + next if /^\s*\s*$/; + # Strip [`...`] + s/\[`/`/g; + s/`\]/`/g; + # Strip [#...] + #s/\[#[^\]]*\]//g; + # Strip (#...) + s/\(#[^\)]*\)//g; + # Turn hashes into spaces + #s/^####/ /; + #s/^###/ /; + if(/^\s*$/) { + next if $lastBlank; + $lastBlank = 1; + } else { + $lastBlank = 0; + } + print $_; +} diff --git a/docs_jhu/style.css b/docs_jhu/style.css new file mode 100644 index 0000000..b4014e1 --- /dev/null +++ b/docs_jhu/style.css @@ -0,0 +1,306 @@ +/* +Stylesheet for the free sNews15_1 template +from http://www.free-css-templates.com +*/ + +/* Reset all margins and paddings for browsers */ +* { + padding: 0; + margin: 0; +} + +body { + font: .8em Verdana, Arial, Sans-Serif; + line-height: 1.6em; + margin: 0; + /* background-image: url(../images/bg.jpg); */ + /* background-repeat: repeat */ +} + +#wrap { margin: 0 auto; width: 95% } + +/* TOP HEADER -------- */ +#top { + margin: 0 auto; + padding: 0; + background:#1E6BAC url(../images/ccbstrip.jpg) repeat-x top; + height: 141px; +} +#top h1 { padding: 10px 0 0 25px; color: #FFF; font-size: 240%; background: transparent;} +#top h2 { padding: 0px 0 0 25px; color: #bbb; font-size: 100%; background: transparent;} +#top .padding { padding-top: 5px; } +/* +#top .lefts { + background: transparent url(../images/topl.jpg) no-repeat left; + height: 81px; +} +#top .rights { + background: transparent url(../images/topr.jpg) no-repeat right; + float: right; + height: 81px; + width: 18px; +} +*/ +/* SEARCH BOX AND BUTTON ----------*/ +#search { float: right; padding: 10px 25px 0 0; } + +#search input.text { + border: 1px solid #eee; + display: inline; + margin-top: 5px; + width: 120px; + height: 12px; + font-size: 10px; + } + #search input.searchbutton { + border: 0; + background: transparent; + color: #FFF; + cursor: pointer; + font: bold 0.8em Arial, Arial, Sans-Serif + } + +#subheader { + clear: both; + border-top: 1px dotted #888; + border-bottom: 1px dotted #888; + background: #eaeaea; + color: #505050; + padding: 1em; + margin: 15px 0px 10px 0px; + +} +#subheader a { text-decoration: none; /* border-bottom: 1px dashed #0066B3; */ } + + +/* TOP MENU ---------- */ +#topmenu { margin: 0px 8px 0 8px; + padding: 0; + background: url(../images/menu.jpg) repeat-x top; + height: 30px; + +} +#topmenu .lefts { + background: url(../images/menul.jpg) no-repeat left; + height: 30px; + padding-left: 0px; +} +#topmenu .rights { + background: url(../images/menur.jpg) no-repeat right; + float: right; + height: 30px; + width: 8px; +} +#topmenu li a { + color: #FFF; + text-align: left; + padding-left: 10px; + padding-right: 15px; + text-decoration: none; + background: transparent; + font-weight: bold +} +#topmenu li { padding: 0px; + float: left; + margin: 0; + font-size: 11px; + line-height: 30px; + white-space: nowrap; + /* list-style-type: none; */ + width: auto; + background: url(../images/sep.gif) no-repeat top right + +} + +#main { background: #FFF; margin: 25px 0 15px 0; color: #666; } + +#main #rightside { + width: 300px; + float: right; + background: #FFF; + margin-right: 0px; + color: #555; + +} + +#main #rightside .box { + background: #efefef; + margin-bottom: 10px; + padding: 5px; + color: #555; +} + +#main #rightside h2 { + font: bold 1.0em Arial, Arial, Sans-Serif; + background: #CDCDCD url(../images/greyc.gif) no-repeat top right; + height: 18px; + padding: 3px; + color: #666; +} + +/* LEFT SIDE - ARTICLES AREA -------- */ +#leftside { + padding-left: 8px; + color: #555; + background: #FFF; + margin-right: 255px; + margin-left: 0px; + +} + +#manual { + margin-right: 305px; + margin-left: 0px; + width: auto; +} + +#leftside h1 { padding: 15px 0 10px 0 } +#leftside h2 { padding: 15px 0 10px 0; color: #555; text-indent: 17px; background: #FFF url(../images/head.gif) no-repeat left; } +#leftside h3 { padding: 15px 0 10px 0; font-size: 100%; margin-left: 5px; text-indent: 17px; background: #FFF url(../images/head.gif) no-repeat left; } +#leftside ul { margin-left: 24px; padding-left 24px; list-style-type: circle } +#leftside li { } +#leftside p { padding: 0px 0 10px 0 } + +#footer { + clear: both; + background: #FFF url(../images/footer.jpg) repeat-x; + height: 46px; + margin-left: 0px; + margin-right: 0px; + font-size: 75%; + color: #666; +} +#footer p { padding: 5px } +#footer .rside { float: right; display: inline; padding: 5px; text-align: right} + +#toc ol { list-style: roman } + +a { color: #0066B3; background: inherit; text-decoration: none } +h1 { font: bold 1.9em Arial, Arial, Sans-Serif } +h2 { font: bold 1.2em Arial, Arial, Sans-Serif; padding: 0; margin: 0 } +ul { padding: 0; margin: 0; list-style-type: none } +li { } +ol { margin-left: 24px; + padding-left 24px; + list-style: decimal } +/* blockquote { margin-left: 35px; font-family: "Courier New", Courier, monospace; } */ +blockquote { margin-left: 35px; font-family: "Courier New", Courier; } +tt { font-family: "Courier New", Courier, monospace; } +.date { border-top: 1px solid #e5e5e5; text-align: right; margin-bottom: 25px; margin-top: 5px;} +#main #leftside .date a, #main #rightside a { border: 0; text-decoration: none; } + +.comment .date { text-align: left; border: 0;} + + +#breadcrumbs { + float: left; + padding-left: 8px; + padding-top: 0px; + font: bold .8em Arial, Arial, Sans-Serif; + color: #666; + width: 100%; + height: 25px; + margin-top: 10px; + margin-bottom: 10px; + clear: both; +} + + + +#leftside #txt {width: 100%; height: 10em; padding: 3px 3px 3px 6px; margin-left:0em;} +#leftside textarea { border: 1px solid #bbb; width: 100%; } + + +/* SNEWS */ +#main #leftside fieldset { float: left; width: 100%; border: 1px solid #ccc; padding: 10px 8px; margin: 0 10px 8px 0; background: #FFF; color: #000; } +#main #leftside fieldset p { width: 100%; } +#main input { padding: 3px; margin: 0; border: 1px solid #bbb } +/*p { margin-top: 5px; }*/ +p { margin-top: 10px; } +/*input.search { border: 1px solid #ccc; padding: 4px; width: 160px; }*/ +.comment { background: #FFF; color: #808080; padding: 10px; margin: 0 0 10px 0; border-top: 1px solid #ccc; } +.commentsbox { background: #FFF; color: #808080; padding: 10px; margin: 0 0 10px 0; border-top: 1px solid #ccc; } + + +#box-table-a +{ + font-family: .8em Verdana, Arial, Sans-Serif; + /*font-size: 12px;*/ + margin: 45px; + width: 600px; + text-align: left; + border-collapse: collapse; +} +#box-table-a th +{ + font-size: 13px; + font-weight: normal; + padding: 8px; + background: #b9c9fe; + border-top: 4px solid #aabcfe; + border-bottom: 1px solid #fff; + color: #039; +} +#box-table-a td +{ + padding: 8px; + background: #e8edff; + border-bottom: 2px solid #fff; + color: #669; + border-top: 2px solid transparent; +} +#box-table-a tr:hover td +{ + background: #d0dafd; + color: #339; +} + + +#box-table-b +{ + font-family: .8em Verdana, Arial, Sans-Serif; + /*font-size: 12px;*/ + margin: 45px; + width: 480px; + text-align: center; + border-collapse: collapse; + border-top: 7px solid #9baff1; + border-bottom: 7px solid #9baff1; +} +#box-table-b th +{ + font-size: 13px; + font-weight: normal; + padding: 8px; + background: #e8edff; + border-right: 1px solid #9baff1; + border-left: 1px solid #9baff1; + color: #039; +} +#box-table-b td +{ + padding: 8px; + background: #e8edff; + border-right: 1px solid #aabcfe; + border-left: 1px solid #aabcfe; + color: #669; +} + +#manual h1 { margin: 0 15px 10px 15px; padding: 10px 0 10px 0; font: bold 1.9em Arial, Arial, Sans-Serif } +#manual h2 { margin: 0 15px 10px 15px; padding: 10px 0 10px 0; font: bold 1.2em Arial, Arial, Sans-Serif } +#manual h3 { margin: 0 15px 10px 20px; padding: 10px 0 10px 0; font: 1.2em Arial, Arial, Sans-Serif } +#manual h4 { margin: 0 15px 10px 25px; padding: 10px 0 10px 0; font: 1.1em Arial, Arial, Sans-Serif } +#manual p { margin: 0 15px 10px 15px; color: #444 } +#manual table { margin-top: 15px } +#manual ul { margin: 0 15px 10px 15px; padding: 0; margin: 0 } +#manual pre { margin: 0 15px 15px 25px } +#manual li { margin: 0 15px 1px 15px; color: #444 } +#manual ol { margin-left: 24px; padding-left 24px; list-style: decimal } +#manual td { vertical-align: top; } +#manual blockquote { margin-left: 35px; font-family: "Courier New", Courier; } +#manual tt { font: .8em; font-family: "Courier New", Courier; } +#manual code { font: .8em; font-family: "Courier New", Courier; } +#manual .date { border-top: 1px solid #e5e5e5; text-align: right; margin-bottom: 25px; margin-top: 5px;} +#manual .date a, #main #rightside a { border: 0; text-decoration: none; } +#manual .date a, #main #rightside a { border: 0; text-decoration: none; } +#manual td { vertical-align: top; } diff --git a/dp_framer.cpp b/dp_framer.cpp new file mode 100644 index 0000000..d7e359a --- /dev/null +++ b/dp_framer.cpp @@ -0,0 +1,910 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include "dp_framer.h" + +using namespace std; + +/** + * Set up variables that describe the shape of a dynamic programming matrix to + * be filled in. The matrix is built around the diagonal containing the seed + * hit: the "seed diagonal". The N diagonals to the right of the seed diagonal + * are the "RHS gap" diagonals, where N is the maximum number of read or + * reference gaps permitted (whichever is larger). The N diagonals to the left + * of the seed diagonal are the "LHS gap" diagonals. + * + * The way the rectangle is currently formulated, there are another N diagonals + * to the left of the "LHS gap" diagonals called the "LHS extra diagonals". It + * might also be possible to split the "extra diagonals" into two subsets and + * place them both to the left of the LHS gap diagonals and to the right of the + * RHS gap diagonals. + * + * The purpose of arranging and these groupings of diagonals is that a subset + * of them, the "core diagonals", can now be considered "covered." By + * "covered" I mean that any alignment that overlaps a cell in any of the core + * diagonals cannot possibly overlap another, higher-scoring alignment that + * falls partially outside the rectangle. + * + * Say the read is 5 characters long, the maximum number of read or ref gaps is + * 2, and the seed hit puts the main diagonal at offset 10 in the reference. + * The larger rectangle explored looks like this: + * + * off=10, maxgap=2 + * + * Ref 1 + * off: 67890123456 0: seed diagonal + * **OO0oo++---- o: "RHS gap" diagonals + * -**OO0oo++--- O: "LHS gap" diagonals + * --**OO0oo++-- *: "LHS extra" diagonals + * ---**OO0oo++- +: "RHS extra" diagonals + * ----**OO0oo++ -: cells that can't possibly be involved in a valid + * alignment that overlaps one of the core diagonals + * + * The "core diagonals" are marked with 0's, O's or o's. + * + * A caveat is that, for performance reasons, we place an upper limit on N - + * the maximum number of read or reference gaps. It is constrained to be no + * greater than 'maxgap'. This means that in some situations, we may report an + * alignment that spuriously trumps a better alignment that falls partially + * outside the rectangle. Also, we may fail to find a valid alignment with + * more than 'maxgap' gaps. + * + * Another issue is trimming: if the seed hit is sufficiently close to one or + * both ends of the reference sequence, and either (a) overhang is not + * permitted, or (b) the number of Ns permitted is less than the number of + * columns that overhang the reference, then we want to exclude the trimmed + * columns from the rectangle. + * + * We need to return enough information so that downstream routines can fully + * understand the shape of the rectangle, which diagonals are which (esp. which + * are the "core" diagonals, since we needn't examine any more seed hits from + * those columns in the future), and how the rectangle is trimmed. The + * information returned should be compatible with the sort of information + * returned by the routines that set up rectangles for mate finding. + */ +bool DynProgFramer::frameSeedExtensionRect( + int64_t off, // ref offset implied by seed hit assuming no gaps + size_t rdlen, // length of read sequence used in DP table (so len + // of +1 nucleotide sequence for colorspace reads) + int64_t reflen, // length of reference sequence aligned to + size_t maxrdgap, // max # of read gaps permitted in opp mate alignment + size_t maxrfgap, // max # of ref gaps permitted in opp mate alignment + int64_t maxns, // # Ns permitted + size_t maxhalf, // max width in either direction + DPRect& rect) // out: DP rectangle +{ + assert_gt(rdlen, 0); + assert_gt(reflen, 0); + // Set N, the maximum number of reference or read gaps permitted, whichever + // is larger. Also, enforce ceiling: can't be larger than 'maxhalf'. + size_t maxgap = max(maxrdgap, maxrfgap); + maxgap = min(maxgap, maxhalf); + // Leave room for "LHS gap" and "LHS extra" diagonals + int64_t refl = off - 2 * maxgap; // inclusive + // Leave room for "RHS gap" and "RHS extra" diagonals + int64_t refr = off + (rdlen - 1) + 2 * maxgap; // inclusive + size_t triml = 0, trimr = 0; + // Check if we have to trim to fit the extents of the reference + if(trimToRef_) { + maxns = 0; // no leeway + } else if(maxns == (int64_t)rdlen) { + maxns--; + } + // Trim from RHS of rectangle + if(refr >= reflen + maxns) { + trimr = (size_t)(refr - (reflen + maxns - 1)); + } + // Trim from LHS of rectangle + if(refl < -maxns) { + triml = (size_t)(-refl) - (size_t)maxns; + } + rect.refl_pretrim = refl; + rect.refr_pretrim = refr; + rect.refl = refl + triml; + rect.refr = refr - trimr; + rect.triml = triml; + rect.trimr = trimr; + rect.maxgap = maxgap; + // Remember which diagonals are "core" as offsets from the LHS of the + // untrimmed rectangle + rect.corel = maxgap; + rect.corer = rect.corel + 2 * maxgap; // inclusive + assert(rect.repOk()); + return !rect.entirelyTrimmed(); +} + +/** + * Set up variables that describe the shape of a dynamic programming matrix to + * be filled in. The matrix is built around the diagonals that terminate in + * the range of columns where the RHS of the opposite mate must fall in order + * to satisfy the fragment-length constraint. These are the "mate" diagonals + * and they also happen to be the "core" diagonals in this case. + * + * The N diagonals to the right of the mate diagonals are the "RHS gap" + * diagonals, where N is the maximum number of read or reference gaps permitted + * (whichever is larger). The N diagonals to the left of the mate diagonals + * are the "LHS gap" diagonals. + * + * The purpose of arranging and these groupings of diagonals is that a subset + * of them, the "core diagonals", can now be considered "covered." By + * "covered" I mean that any alignment that overlaps a cell in any of the core + * diagonals cannot possibly overlap another, higher-scoring alignment that + * falls partially outside the rectangle. + * + * |Anchor| + * o---------OO0000000000000oo------ 0: mate diagonal (also core diags!) + * -o---------OO0000000000000oo----- o: "RHS gap" diagonals + * --o---------OO0000000000000oo---- O: "LHS gap" diagonals + * ---oo--------OO0000000000000oo--- *: "LHS extra" diagonals + * -----o--------OO0000000000000oo-- -: cells that can't possibly be + * ------o--------OO0000000000000oo- involved in a valid alignment that + * -------o--------OO0000000000000oo overlaps one of the core diagonals + * XXXXXXXXXXXXX + * | RHS Range | + * ^ ^ + * rl rr + * + * The "core diagonals" are marked with 0s. + * + * A caveat is that, for performance reasons, we place an upper limit on N - + * the maximum number of read or reference gaps. It is constrained to be no + * greater than 'maxgap'. This means that in some situations, we may report an + * alignment that spuriously trumps a better alignment that falls partially + * outside the rectangle. Also, we may fail to find a valid alignment with + * more than 'maxgap' gaps. + * + * Another issue is trimming: if the seed hit is sufficiently close to one or + * both ends of the reference sequence, and either (a) overhang is not + * permitted, or (b) the number of Ns permitted is less than the number of + * columns that overhang the reference, then we want to exclude the trimmed + * columns from the rectangle. + */ +bool DynProgFramer::frameFindMateAnchorLeftRect( + int64_t ll, // leftmost Watson off for LHS of opp alignment + int64_t lr, // rightmost Watson off for LHS of opp alignment + int64_t rl, // leftmost Watson off for RHS of opp alignment + int64_t rr, // rightmost Watson off for RHS of opp alignment + size_t rdlen, // length of opposite mate + int64_t reflen, // length of reference sequence aligned to + size_t maxrdgap, // max # of read gaps permitted in opp mate alignment + size_t maxrfgap, // max # of ref gaps permitted in opp mate alignment + int64_t maxns, // max # ns permitted in the alignment + size_t maxhalf, // max width in either direction + DPRect& rect) // out: DP rectangle + const +{ + assert_geq(lr, ll); // LHS rightmost must be >= LHS leftmost + assert_geq(rr, rl); // RHS rightmost must be >= RHS leftmost + assert_geq(rr, lr); // RHS rightmost must be >= LHS rightmost + assert_geq(rl, ll); // RHS leftmost must be >= LHS leftmost + assert_gt(rdlen, 0); + assert_gt(reflen, 0); + size_t triml = 0, trimr = 0; + size_t maxgap = max(maxrdgap, maxrfgap); + maxgap = max(maxgap, maxhalf); + // Amount of padding we have to add to account for the fact that alignments + // ending between en_left/en_right might start in various columns in the + // first row + int64_t pad_left = maxgap; + int64_t pad_right = maxgap; + int64_t en_left = rl; + int64_t en_right = rr; + int64_t st_left = en_left - (rdlen-1); + ASSERT_ONLY(int64_t st_right = en_right - (rdlen-1)); + int64_t en_right_pad = en_right + pad_right; + ASSERT_ONLY(int64_t en_left_pad = en_left - pad_left); + ASSERT_ONLY(int64_t st_right_pad = st_right + pad_right); + int64_t st_left_pad = st_left - pad_left; + assert_leq(st_left, en_left); + assert_geq(en_right, st_right); + assert_leq(st_left_pad, en_left_pad); + assert_geq(en_right_pad, st_right_pad); + int64_t refl = st_left_pad; + int64_t refr = en_right_pad; + if(trimToRef_) { + maxns = 0; + } else if(maxns == (int64_t)rdlen) { + maxns--; + } + // Trim from the RHS of the rectangle? + if(refr >= reflen + maxns) { + trimr = (size_t)(refr - (reflen + maxns - 1)); + } + // Trim from the LHS of the rectangle? + if(refl < -maxns) { + triml = (size_t)(-refl) - (size_t)maxns; + } + size_t width = (size_t)(refr - refl + 1); + rect.refl_pretrim = refl; + rect.refr_pretrim = refr; + rect.refl = refl + triml; + rect.refr = refr - trimr; + rect.triml = triml; + rect.trimr = trimr; + rect.maxgap = maxgap; + rect.corel = maxgap; + rect.corer = width - maxgap - 1; // inclusive + assert(rect.repOk()); + return !rect.entirelyTrimmed(); +} + +/** + * Set up variables that describe the shape of a dynamic programming matrix to + * be filled in. The matrix is built around the diagonals that begin in the + * range of columns where the LHS of the opposite mate must fall in order to + * satisfy the fragment-length constraint. These are the "mate" diagonals and + * they also happen to be the "core" diagonals in this case. + * + * The N diagonals to the right of the mate diagonals are the "RHS gap" + * diagonals, where N is the maximum number of read or reference gaps permitted + * (whichever is larger). The N diagonals to the left of the mate diagonals + * are the "LHS gap" diagonals. + * + * The purpose of arranging and these groupings of diagonals is that a subset + * of them, the "core diagonals", can now be considered "covered." By + * "covered" I mean that any alignment that overlaps a cell in any of the core + * diagonals cannot possibly overlap another, higher-scoring alignment that + * falls partially outside the rectangle. + * + * ll lr + * v v + * | LHS Range | + * XXXXXXXXXXXXX |Anchor| + * OO0000000000000oo--------o-------- 0: mate diagonal (also core diags!) + * -OO0000000000000oo--------o------- o: "RHS gap" diagonals + * --OO0000000000000oo--------o------ O: "LHS gap" diagonals + * ---OO0000000000000oo--------oo---- *: "LHS extra" diagonals + * ----OO0000000000000oo---------o--- -: cells that can't possibly be + * -----OO0000000000000oo---------o-- involved in a valid alignment that + * ------OO0000000000000oo---------o- overlaps one of the core diagonals + * + * The "core diagonals" are marked with 0s. + * + * A caveat is that, for performance reasons, we place an upper limit on N - + * the maximum number of read or reference gaps. It is constrained to be no + * greater than 'maxgap'. This means that in some situations, we may report an + * alignment that spuriously trumps a better alignment that falls partially + * outside the rectangle. Also, we may fail to find a valid alignment with + * more than 'maxgap' gaps. + * + * Another issue is trimming: if the seed hit is sufficiently close to one or + * both ends of the reference sequence, and either (a) overhang is not + * permitted, or (b) the number of Ns permitted is less than the number of + * columns that overhang the reference, then we want to exclude the trimmed + * columns from the rectangle. + */ +bool DynProgFramer::frameFindMateAnchorRightRect( + int64_t ll, // leftmost Watson off for LHS of opp alignment + int64_t lr, // rightmost Watson off for LHS of opp alignment + int64_t rl, // leftmost Watson off for RHS of opp alignment + int64_t rr, // rightmost Watson off for RHS of opp alignment + size_t rdlen, // length of opposite mate + int64_t reflen, // length of reference sequence aligned to + size_t maxrdgap, // max # of read gaps permitted in opp mate alignment + size_t maxrfgap, // max # of ref gaps permitted in opp mate alignment + int64_t maxns, // max # ns permitted in the alignment + size_t maxhalf, // max width in either direction + DPRect& rect) // out: DP rectangle + const +{ + assert_geq(lr, ll); + assert_geq(rr, rl); + assert_geq(rr, lr); + assert_geq(rl, ll); + assert_gt(rdlen, 0); + assert_gt(reflen, 0); + size_t triml = 0, trimr = 0; + size_t maxgap = max(maxrdgap, maxrfgap); + maxgap = max(maxgap, maxhalf); + int64_t pad_left = maxgap; + int64_t pad_right = maxgap; + int64_t st_left = ll; + int64_t st_right = lr; + ASSERT_ONLY(int64_t en_left = st_left + (rdlen-1)); + int64_t en_right = st_right + (rdlen-1); + int64_t en_right_pad = en_right + pad_right; + ASSERT_ONLY(int64_t en_left_pad = en_left - pad_left); + ASSERT_ONLY(int64_t st_right_pad = st_right + pad_right); + int64_t st_left_pad = st_left - pad_left; + assert_leq(st_left, en_left); + assert_geq(en_right, st_right); + assert_leq(st_left_pad, en_left_pad); + assert_geq(en_right_pad, st_right_pad); + // We have enough info to deduce where the boundaries of our rectangle + // should be. Finalize the boundaries, ignoring reference trimming for now + int64_t refl = st_left_pad; + int64_t refr = en_right_pad; + if(trimToRef_) { + maxns = 0; + } else if(maxns == (int64_t)rdlen) { + maxns--; + } + // Trim from the RHS of the rectangle? + if(refr >= reflen + maxns) { + trimr = (size_t)(refr - (reflen + maxns - 1)); + } + // Trim from the LHS of the rectangle? + if(refl < -maxns) { + triml = (size_t)(-refl) - (size_t)maxns; + } + size_t width = (size_t)(refr - refl + 1); + rect.refl_pretrim = refl; + rect.refr_pretrim = refr; + rect.refl = refl + triml; + rect.refr = refr - trimr; + rect.triml = triml; + rect.trimr = trimr; + rect.maxgap = maxgap; + rect.corel = maxgap; + rect.corer = width - maxgap - 1; // inclusive + assert(rect.repOk()); + return !rect.entirelyTrimmed(); +} + +#ifdef MAIN_DP_FRAMER + +#include + +static void testCaseFindMateAnchorLeft( + const char *testName, + bool trimToRef, + int64_t ll, + int64_t lr, + int64_t rl, + int64_t rr, + size_t rdlen, + size_t reflen, + size_t maxrdgap, + size_t maxrfgap, + size_t ex_width, + size_t ex_solwidth, + size_t ex_trimup, + size_t ex_trimdn, + int64_t ex_refl, + int64_t ex_refr, + const char *ex_st, // string of '0'/'1' chars + const char *ex_en) // string of '0'/'1' chars +{ + cerr << testName << "..."; + DynProgFramer fr(trimToRef); + size_t width, solwidth; + int64_t refl, refr; + EList st, en; + size_t trimup, trimdn; + size_t maxhalf = 500; + size_t maxgaps = 0; + fr.frameFindMateAnchorLeft( + ll, // leftmost Watson off for LHS of opp alignment + lr, // rightmost Watson off for LHS of opp alignment + rl, // leftmost Watson off for RHS of opp alignment + rr, // rightmost Watson off for RHS of opp alignment + rdlen, // length of opposite mate + reflen, // length of reference sequence aligned to + maxrdgap, // max # of read gaps permitted in opp mate alignment + maxrfgap, // max # of ref gaps permitted in opp mate alignment + maxns, // max # Ns permitted + maxhalf, // max width in either direction + width, // out: calculated width stored here + maxgaps, // out: max # gaps + trimup, // out: number of bases trimmed from upstream end + trimdn, // out: number of bases trimmed from downstream end + refl, // out: ref pos of upper LHS of parallelogram + refr, // out: ref pos of lower RHS of parallelogram + st, // out: legal starting columns stored here + en); // out: legal ending columns stored here + assert_eq(ex_width, width); + assert_eq(ex_solwidth, solwidth); + assert_eq(ex_trimup, trimup); + assert_eq(ex_trimdn, trimdn); + assert_eq(ex_refl, refl); + assert_eq(ex_refr, refr); + for(size_t i = 0; i < width; i++) { + assert_eq((ex_st[i] == '1'), st[i]); + assert_eq((ex_en[i] == '1'), en[i]); + } + cerr << "PASSED" << endl; +} + +static void testCaseFindMateAnchorRight( + const char *testName, + bool trimToRef, + int64_t ll, + int64_t lr, + int64_t rl, + int64_t rr, + size_t rdlen, + size_t reflen, + size_t maxrdgap, + size_t maxrfgap, + size_t ex_width, + size_t ex_solwidth, + size_t ex_trimup, + size_t ex_trimdn, + int64_t ex_refl, + int64_t ex_refr, + const char *ex_st, // string of '0'/'1' chars + const char *ex_en) // string of '0'/'1' chars +{ + cerr << testName << "..."; + DynProgFramer fr(trimToRef); + size_t width, solwidth; + size_t maxgaps; + int64_t refl, refr; + EList st, en; + size_t trimup, trimdn; + size_t maxhalf = 500; + fr.frameFindMateAnchorRight( + ll, // leftmost Watson off for LHS of opp alignment + lr, // rightmost Watson off for LHS of opp alignment + rl, // leftmost Watson off for RHS of opp alignment + rr, // rightmost Watson off for RHS of opp alignment + rdlen, // length of opposite mate + reflen, // length of reference sequence aligned to + maxrdgap, // max # of read gaps permitted in opp mate alignment + maxrfgap, // max # of ref gaps permitted in opp mate alignment + maxns, // max # Ns permitted + maxhalf, // max width in either direction + width, // out: calculated width stored here + maxgaps, // out: calcualted max # gaps + trimup, // out: number of bases trimmed from upstream end + trimdn, // out: number of bases trimmed from downstream end + refl, // out: ref pos of upper LHS of parallelogram + refr, // out: ref pos of lower RHS of parallelogram + st, // out: legal starting columns stored here + en); // out: legal ending columns stored here + assert_eq(ex_width, width); + assert_eq(ex_trimup, trimup); + assert_eq(ex_trimdn, trimdn); + assert_eq(ex_refl, refl); + assert_eq(ex_refr, refr); + for(size_t i = 0; i < width; i++) { + assert_eq((ex_st[i] == '1'), st[i]); + assert_eq((ex_en[i] == '1'), en[i]); + } + cerr << "PASSED" << endl; +} + +int main(void) { + + /////////////////////////// + // + // ANCHOR ON THE LEFT + // + /////////////////////////// + + // ------------- + // o o + // o o + // o o + // o o + // <<<------->>> + // 012345678901234567890 + // 0 1 2 + testCaseFindMateAnchorLeft( + "FindMateAnchorLeft1", + false, // trim to reference + 3, // left offset of upper parallelogram extent + 15, // right offset of upper parallelogram extent + 10, // left offset of lower parallelogram extent + 16, // right offset of lower parallelogram extent + 5, // length of opposite mate + 30, // length of reference sequence aligned to + 3, // max # of read gaps permitted in opp mate alignment + 3, // max # of ref gaps permitted in opp mate alignment + 13, // expected width + 0, // expected # bases trimmed from upstream end + 0, // expected # bases trimmed from downstream end + 3, // ref offset of upstream column + 19, // ref offset of downstream column + "1111111111111", // expected starting bools + "0001111111000"); // expected ending bools + + // ******* + // <<===----- + // o o + // o o + // o o + // o o + // <<=----->> + // ******* + // 012345678901234567890 + // 0 1 2 + testCaseFindMateAnchorLeft( + "FindMateAnchorLeft2", + false, // trim to reference + 9, // left offset of left upper parallelogram extent + 14, // right offset of left upper parallelogram extent + 10, // left offset of left lower parallelogram extent + 15, // right offset of left lower parallelogram extent + 5, // length of opposite mate + 30, // length of reference sequence aligned to + 2, // max # of read gaps permitted in opp mate alignment + 2, // max # of ref gaps permitted in opp mate alignment + 7, // expected width + 3, // expected # bases trimmed from upstream end + 0, // expected # bases trimmed from downstream end + 7, // ref offset of upstream column + 17, // ref offset of downstream column + "0011111", // expected starting bools + "1111100"); // expected ending bools + + // ******* + // <<===--->> + // o o + // o o + // o o + // o o + // o o + // <<=----->> + // ******* + // 01234567890123456xxxx + // 0 1 2 + testCaseFindMateAnchorLeft( + "FindMateAnchorLeft3", + true, // trim to reference + 9, // left offset of left upper parallelogram extent + 14, // right offset of left upper parallelogram extent + 10, // left offset of left lower parallelogram extent + 15, // right offset of left lower parallelogram extent + 5, // length of opposite mate + 17, // length of reference sequence aligned to + 2, // max # of read gaps permitted in opp mate alignment + 2, // max # of ref gaps permitted in opp mate alignment + 7, // expected width + 3, // expected # bases trimmed from upstream end + 0, // expected # bases trimmed from downstream end + 7, // ref offset of upstream column + 17, // ref offset of downstream column + "0011111", // expected starting bools + "1111100"); // expected ending bools + + // ****** + // <<===----- + // o o + // o o + // o o + // o o + // <<=----=>> + // ****** + // 012345678901234xxxxxx + // 0 1 2 + testCaseFindMateAnchorLeft( + "FindMateAnchorLeft4", + true, // trim to reference + 9, // left offset of left upper parallelogram extent + 14, // right offset of left upper parallelogram extent + 10, // left offset of left lower parallelogram extent + 15, // right offset of left lower parallelogram extent + 5, // length of opposite mate + 15, // length of reference sequence aligned to + 2, // max # of read gaps permitted in opp mate alignment + 2, // max # of ref gaps permitted in opp mate alignment + 6, // expected width + 3, // expected # bases trimmed from upstream end + 1, // expected # bases trimmed from downstream end + 7, // ref offset of upstream column + 16, // ref offset of downstream column + "001111", // expected starting bools + "111100"); // expected ending bools + + // -1 0 2 + // xxxxxxxxxx012345678xx + // + // ******* + // <<===----- + // o o + // o o + // o o + // o o + // o o + // <<=----->> + // ******* + // + // xxxxxxxxxx012345678xx + // -1 0 2 + testCaseFindMateAnchorLeft( + "FindMateAnchorLeft5", + true, // trim to reference + 1, // left offset of left upper parallelogram extent + 7, // right offset of left upper parallelogram extent + 2, // left offset of left lower parallelogram extent + 7, // right offset of left lower parallelogram extent + 5, // length of opposite mate + 9, // length of reference sequence aligned to + 2, // max # of read gaps permitted in opp mate alignment + 2, // max # of ref gaps permitted in opp mate alignment + 7, // expected width + 3, // expected # bases trimmed from upstream end + 0, // expected # bases trimmed from downstream end + -1, // ref offset of upstream column + 9, // ref offset of downstream column + "0011111", // expected starting bools + "1111100"); // expected ending bools + + // <<<<==-===>> + // o o + // o o + // o o + // o o + // <<<<------>> + // ****** + // 012345678901234567890 + // 0 1 2 + testCaseFindMateAnchorLeft( + "FindMateAnchorLeft6", + false, // trim to reference + 8, // left offset of left upper parallelogram extent + 8, // right offset of left upper parallelogram extent + 10, // left offset of left lower parallelogram extent + 15, // right offset of left lower parallelogram extent + 5, // length of opposite mate + 30, // length of reference sequence aligned to + 4, // max # of read gaps permitted in opp mate alignment + 2, // max # of ref gaps permitted in opp mate alignment + 6, // expected width + 4, // expected # bases trimmed from upstream end + 2, // expected # bases trimmed from downstream end + 6, // ref offset of upstream column + 15, // ref offset of downstream column + "001000", // expected starting bools + "111111"); // expected ending bools + + /////////////////////////// + // + // ANCHOR ON THE RIGHT + // + /////////////////////////// + + // <<<------->>> + // o o + // o o + // o o + // o o + // <<<------->>> + // 012345678901234567890123456789 + // 0 1 2 + testCaseFindMateAnchorRight( + "FindMateAnchorRight1", + false, // trim to reference + 10, // left offset of left upper parallelogram extent + 16, // right offset of left upper parallelogram extent + 11, // left offset of left lower parallelogram extent + 23, // right offset of left lower parallelogram extent + 5, // length of opposite mate + 30, // length of reference sequence aligned to + 3, // max # of read gaps permitted in opp mate alignment + 3, // max # of ref gaps permitted in opp mate alignment + 13, // expected width + 0, // expected # bases trimmed from upstream end + 0, // expected # bases trimmed from downstream end + 7, // ref offset of upstream column + 23, // ref offset of downstream column + "0001111111000", // expected starting bools + "1111111111111"); // expected ending bools + + // 0 1 2 + // 012345678901234567890 + // ******* + // <<------>> + // o o + // o o + // o o + // o o + // <<===--->> + // ******* + // 012345678901234567890 + // 0 1 2 + testCaseFindMateAnchorRight( + "FindMateAnchorRight2", + false, // trim to reference + 6, // left offset of left upper parallelogram extent + 11, // right offset of left upper parallelogram extent + 13, // left offset of left lower parallelogram extent + 18, // right offset of left lower parallelogram extent + 5, // length of opposite mate + 30, // length of reference sequence aligned to + 2, // max # of read gaps permitted in opp mate alignment + 2, // max # of ref gaps permitted in opp mate alignment + 7, // expected width + 3, // expected # bases trimmed from upstream end + 0, // expected # bases trimmed from downstream end + 7, // ref offset of upstream column + 17, // ref offset of downstream column + "1111100", // expected starting bools + "0011111"); // expected ending bools + + // Reference trimming takes off the left_pad of the left mate + // + // ******* + // <<------>> + // o o + // o o + // o o + // o o + // o o + // <<===--->> + // ******* + // 0123456789012345678901234567890 + // -1 0 1 2 + testCaseFindMateAnchorRight( + "FindMateAnchorRight3", + true, // trim to reference + 0, // left offset of left upper parallelogram extent + 5, // right offset of left upper parallelogram extent + 7, // left offset of left lower parallelogram extent + 11, // right offset of left lower parallelogram extent + 5, // length of opposite mate + 30, // length of reference sequence aligned to + 2, // max # of read gaps permitted in opp mate alignment + 2, // max # of ref gaps permitted in opp mate alignment + 7, // expected width + 3, // expected # bases trimmed from upstream end + 0, // expected # bases trimmed from downstream end + 1, // ref offset of upstream column + 11, // ref offset of downstream column + "1111100", // expected starting bools + "0011111"); // expected ending bools + + // Reference trimming takes off the leftmost 5 positions of the left mate, + // and takes 1 from the right mate + // + // ***** + // <<------>> + // o o + // o o + // o o + // o o + // o o + // <<===--->> + // ***** + // 0987654321012345678901234567890 + // -1 0 1 2 + testCaseFindMateAnchorRight( + "FindMateAnchorRight4", + true, // trim to reference + -3, // left offset of left upper parallelogram extent + 2, // right offset of left upper parallelogram extent + 4, // left offset of left lower parallelogram extent + 10, // right offset of left lower parallelogram extent + 5, // length of opposite mate + 30, // length of reference sequence aligned to + 2, // max # of read gaps permitted in opp mate alignment + 2, // max # of ref gaps permitted in opp mate alignment + 5, // expected width + 5, // expected # bases trimmed from upstream end + 0, // expected # bases trimmed from downstream end + 0, // ref offset of upstream column + 8, // ref offset of downstream column + "11100", // expected starting bools + "11111"); // expected ending bools + + // Reference trimming takes off the leftmost 5 positions of the left mate, + // and takes 1 from the left of the right mate. Also, it takes 2 from the + // right of the right mate. + // + // *** + // <<------>> + // o o + // o o + // o o + // o o + // o o + // <<===--->> + // *** + // 0987654321012345678901234567890 + // -1 0 1 2 + testCaseFindMateAnchorRight( + "FindMateAnchorRight5", + true, // trim to reference + -3, // left offset of left upper parallelogram extent + 2, // right offset of left upper parallelogram extent + 4, // left offset of left lower parallelogram extent + 10, // right offset of left lower parallelogram extent + 5, // length of opposite mate + 7, // length of reference sequence aligned to + 2, // max # of read gaps permitted in opp mate alignment + 2, // max # of ref gaps permitted in opp mate alignment + 3, // expected width + 5, // expected # bases trimmed from upstream end + 2, // expected # bases trimmed from downstream end + 0, // ref offset of upstream column + 6, // ref offset of downstream column + "111", // expected starting bools + "111"); // expected ending bools + + // ****** + // <<------>>>> + // o o + // o o + // o o + // o o + // <<====-=>>>> + // ****** + // 012345678901234567890 + // 0 1 2 + testCaseFindMateAnchorRight( + "FindMateAnchorRight6", + false, // trim to reference + 6, // left offset of left upper parallelogram extent + 11, // right offset of left upper parallelogram extent + 14, // left offset of left lower parallelogram extent + 14, // right offset of left lower parallelogram extent + 5, // length of opposite mate + 30, // length of reference sequence aligned to + 4, // max # of read gaps permitted in opp mate alignment + 2, // max # of ref gaps permitted in opp mate alignment + 6, // expected width + 2, // expected # bases trimmed from upstream end + 4, // expected # bases trimmed from downstream end + 6, // ref offset of upstream column + 15, // ref offset of downstream column + "111111", // expected starting bools + "000010"); // expected ending bools + + // **** + // <<<<==---->> + // o o + // o o + // o o + // o o + // o o + // <<<<====-=>> + // **** + // 012345678901234567890 + // 0 1 2 + testCaseFindMateAnchorRight( + "FindMateAnchorRight7", + false, // trim to reference + 6, // left offset of left upper parallelogram extent + 11, // right offset of left upper parallelogram extent + 14, // left offset of left lower parallelogram extent + 14, // right offset of left lower parallelogram extent + 5, // length of opposite mate + 30, // length of reference sequence aligned to + 2, // max # of read gaps permitted in opp mate alignment + 4, // max # of ref gaps permitted in opp mate alignment + 4, // expected width + 6, // expected # bases trimmed from upstream end + 2, // expected # bases trimmed from downstream end + 8, // ref offset of upstream column + 15, // ref offset of downstream column + "1111", // expected starting bools + "0010"); // expected ending bools + + testCaseFindMateAnchorRight( + "FindMateAnchorRight8", + true, // trim to reference + -37, // left offset of left upper parallelogram extent + 13, // right offset of left upper parallelogram extent + -37, // left offset of left lower parallelogram extent + 52, // right offset of left lower parallelogram extent + 10, // length of opposite mate + 53, // length of reference sequence aligned to + 0, // max # of read gaps permitted in opp mate alignment + 0, // max # of ref gaps permitted in opp mate alignment + 14, // expected width + 37, // expected # bases trimmed from upstream end + 0, // expected # bases trimmed from downstream end + 0, // ref offset of upstream column + 22, // ref offset of downstream column + "11111111111111", // expected starting bools + "11111111111111");// expected ending bools +} + +#endif /*def MAIN_DP_FRAMER*/ diff --git a/dp_framer.h b/dp_framer.h new file mode 100644 index 0000000..4209f41 --- /dev/null +++ b/dp_framer.h @@ -0,0 +1,261 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +/* + * dp_framer.h + * + * Classes and routines for framing dynamic programming problems. There are 2 + * basic types of dynamic programming problems solved in Bowtie 2: + * + * 1. Seed extension: we found a seed hit using Burrows-Wheeler techniques and + * now we would like to extend it into a full alignment by doing dynamic + * programming in the vicinity of the seed hit. + * + * 2. Mate finding: we would a full alignment for one mate in a pair and now we + * would like to extend it into a full alignment by doing dynamic + * programming in the area prescribed by the maximum and minimum fragment + * lengths. + * + * By "framing" the dynamic programming problem, we mean that all of the + * following DP inputs are calculated: + * + * 1. The width of the parallelogram/rectangle to explore. + * 2. The 0-based offset of the reference position associated with the leftmost + * diagnomal/column in the parallelogram/rectangle to explore + * 3. An EList of length=width encoding which columns the alignment may + * start in + * 4. An EList of length=width encoding which columns the alignment may + * end in + */ + +#ifndef DP_FRAMER_H_ +#define DP_FRAMER_H_ + +#include +#include "ds.h" +#include "ref_coord.h" + +/** + * Describes a dynamic programming rectangle. + * + * Only knows about reference offsets, not reference sequences. + */ +struct DPRect { + + DPRect(int cat = 0) /*: st(cat), en(cat)*/ { + refl = refr = triml = trimr = corel = corer = 0; + } + + int64_t refl; // leftmost ref offset involved post trimming (incl) + int64_t refr; // rightmost ref offset involved post trimming (incl) + + int64_t refl_pretrim; // leftmost ref offset involved pre trimming (incl) + int64_t refr_pretrim; // rightmost ref offset involved pre trimming (incl) + + size_t triml; // positions trimmed from LHS + size_t trimr; // positions trimmed from RHS + + // If "core" diagonals are specified, then any alignment reported has to + // overlap one of the core diagonals. This is to avoid the situation where + // an alignment is reported that overlaps a better-scoring alignment that + // falls partially outside the rectangle. This is used in both seed + // extensions and in mate finding. Filtering based on the core diagonals + // should happen in the backtrace routine. I.e. it should simply never + // return an alignment that doesn't overlap a core diagonal, even if there + // is such an alignment and it's valid. + + size_t corel; // offset of column where leftmost "core" diagonal starts + size_t corer; // offset of column where rightmost "core" diagonal starts + // [corel, corer] is an inclusive range and offsets are with respect to the + // original, untrimmed rectangle. + + size_t maxgap; // max # gaps - width of the gap bands + + /** + * Return true iff the combined effect of triml and trimr is to trim away + * the entire rectangle. + */ + bool entirelyTrimmed() const { + bool tr = refr < refl; + ASSERT_ONLY(size_t width = (size_t)(refr_pretrim - refl_pretrim + 1)); + assert(tr == (width <= triml + trimr)); + return tr; + } + +#ifndef NDEBUG + bool repOk() const { + assert_geq(corer, corel); + return true; + } +#endif + + /** + * Set the given interval to the range of diagonals that are "covered" by + * this dynamic programming problem. + */ + void initIval(Interval& iv) { + iv.setOff(refl_pretrim + (int64_t)corel); + iv.setLen(corer - corel + 1); + } +}; + +/** + * Encapsulates routines for calculating parameters for the various types of + * dynamic programming problems solved in Bowtie2. + */ +class DynProgFramer { + +public: + + DynProgFramer(bool trimToRef) : trimToRef_(trimToRef) { } + + /** + * Similar to frameSeedExtensionParallelogram but we're being somewhat more + * inclusive in order to ensure all characters aling the "width" in the last + * row are exhaustively scored. + */ + bool frameSeedExtensionRect( + int64_t off, // ref offset implied by seed hit assuming no gaps + size_t rdlen, // length of read sequence used in DP table (so len + // of +1 nucleotide sequence for colorspace reads) + int64_t reflen, // length of reference sequence aligned to + size_t maxrdgap, // max # of read gaps permitted in opp mate alignment + size_t maxrfgap, // max # of ref gaps permitted in opp mate alignment + int64_t maxns, // # Ns permitted + size_t maxhalf, // max width in either direction + DPRect& rect); // out: DP rectangle + + /** + * Given information about an anchor mate hit, and information deduced by + * PairedEndPolicy about where the opposite mate can begin and start given + * the fragment length range, return parameters for the dynamic programming + * problem to solve. + */ + bool frameFindMateRect( + bool anchorLeft, // true iff anchor alignment is to the left + int64_t ll, // leftmost Watson off for LHS of opp alignment + int64_t lr, // rightmost Watson off for LHS of opp alignment + int64_t rl, // leftmost Watson off for RHS of opp alignment + int64_t rr, // rightmost Watson off for RHS of opp alignment + size_t rdlen, // length of opposite mate + int64_t reflen, // length of reference sequence aligned to + size_t maxrdgap, // max # of read gaps permitted in opp mate alignment + size_t maxrfgap, // max # of ref gaps permitted in opp mate alignment + int64_t maxns, // max # Ns permitted + size_t maxhalf, // max width in either direction + DPRect& rect) // out: DP rectangle + const + { + if(anchorLeft) { + return frameFindMateAnchorLeftRect( + ll, + lr, + rl, + rr, + rdlen, + reflen, + maxrdgap, + maxrfgap, + maxns, + maxhalf, + rect); + } else { + return frameFindMateAnchorRightRect( + ll, + lr, + rl, + rr, + rdlen, + reflen, + maxrdgap, + maxrfgap, + maxns, + maxhalf, + rect); + } + } + + /** + * Given information about an anchor mate hit, and information deduced by + * PairedEndPolicy about where the opposite mate can begin and start given + * the fragment length range, return parameters for the dynamic programming + * problem to solve. + */ + bool frameFindMateAnchorLeftRect( + int64_t ll, // leftmost Watson off for LHS of opp alignment + int64_t lr, // rightmost Watson off for LHS of opp alignment + int64_t rl, // leftmost Watson off for RHS of opp alignment + int64_t rr, // rightmost Watson off for RHS of opp alignment + size_t rdlen, // length of opposite mate + int64_t reflen, // length of reference sequence aligned to + size_t maxrdgap, // max # of read gaps permitted in opp mate alignment + size_t maxrfgap, // max # of ref gaps permitted in opp mate alignment + int64_t maxns, // max # Ns permitted in alignment + size_t maxhalf, // max width in either direction + DPRect& rect) // out: DP rectangle + const; + + /** + * Given information about an anchor mate hit, and information deduced by + * PairedEndPolicy about where the opposite mate can begin and start given + * the fragment length range, return parameters for the dynamic programming + * problem to solve. + */ + bool frameFindMateAnchorRightRect( + int64_t ll, // leftmost Watson off for LHS of opp alignment + int64_t lr, // rightmost Watson off for LHS of opp alignment + int64_t rl, // leftmost Watson off for RHS of opp alignment + int64_t rr, // rightmost Watson off for RHS of opp alignment + size_t rdlen, // length of opposite mate + int64_t reflen, // length of reference sequence aligned to + size_t maxrdgap, // max # of read gaps permitted in opp mate alignment + size_t maxrfgap, // max # of ref gaps permitted in opp mate alignment + int64_t maxns, // max # Ns permitted in alignment + size_t maxhalf, // max width in either direction + DPRect& rect) // out: DP rectangle + const; + +protected: + + /** + * Trim the given parallelogram width and reference window so that neither + * overhangs the beginning or end of the reference. Return true if width + * is still > 0 after trimming, otherwise return false. + */ + void trimToRef( + size_t reflen, // in: length of reference sequence aligned to + int64_t& refl, // in/out: ref pos of upper LHS of parallelogram + int64_t& refr, // in/out: ref pos of lower RHS of parallelogram + size_t& trimup, // out: number of bases trimmed from upstream end + size_t& trimdn) // out: number of bases trimmed from downstream end + { + if(refl < 0) { + trimup = (size_t)(-refl); + //refl = 0; + } + if(refr >= (int64_t)reflen) { + trimdn = (size_t)(refr - reflen + 1); + //refr = (int64_t)reflen-1; + } + } + + bool trimToRef_; +}; + +#endif /*ndef DP_FRAMER_H_*/ diff --git a/ds.cpp b/ds.cpp new file mode 100644 index 0000000..35bdaac --- /dev/null +++ b/ds.cpp @@ -0,0 +1,155 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include "ds.h" + +extern MemoryTally gMemTally; + +/** + * Tally a memory allocation of size amt bytes. + */ +void MemoryTally::add(int cat, uint64_t amt) { + ThreadSafe ts(&mutex_m); + tots_[cat] += amt; + tot_ += amt; + if(tots_[cat] > peaks_[cat]) { + peaks_[cat] = tots_[cat]; + } + if(tot_ > peak_) { + peak_ = tot_; + } +} + +/** + * Tally a memory free of size amt bytes. + */ +void MemoryTally::del(int cat, uint64_t amt) { + ThreadSafe ts(&mutex_m); + assert_geq(tots_[cat], amt); + assert_geq(tot_, amt); + tots_[cat] -= amt; + tot_ -= amt; +} + +#ifdef MAIN_DS + +#include +#include "random_source.h" + +using namespace std; + +int main(void) { + cerr << "Test EHeap 1..."; + { + EHeap h; + h.insert(0.5f); // 1 + h.insert(0.6f); // 2 + h.insert(0.25f); // 3 + h.insert(0.75f); // 4 + h.insert(0.1f); // 5 + h.insert(0.9f); // 6 + h.insert(0.4f); // 7 + assert_eq(7, h.size()); + if(h.pop() != 0.1f) { + throw 1; + } + assert_eq(6, h.size()); + if(h.pop() != 0.25f) { + throw 1; + } + assert_eq(5, h.size()); + if(h.pop() != 0.4f) { + throw 1; + } + assert_eq(4, h.size()); + if(h.pop() != 0.5f) { + throw 1; + } + assert_eq(3, h.size()); + if(h.pop() != 0.6f) { + throw 1; + } + assert_eq(2, h.size()); + if(h.pop() != 0.75f) { + throw 1; + } + assert_eq(1, h.size()); + if(h.pop() != 0.9f) { + throw 1; + } + assert_eq(0, h.size()); + assert(h.empty()); + } + cerr << "PASSED" << endl; + + cerr << "Test EHeap 2..."; + { + EHeap h; + RandomSource rnd(12); + size_t lim = 2000; + while(h.size() < lim) { + h.insert(rnd.nextU32()); + } + size_t last = std::numeric_limits::max(); + bool first = true; + while(!h.empty()) { + size_t p = h.pop(); + assert(first || p >= last); + last = p; + first = false; + } + } + cerr << "PASSED" << endl; + + cerr << "Test EBitList 1..."; + { + EBitList<128> l; + assert_eq(0, l.size()); + assert_eq(std::numeric_limits::max(), l.max()); + + assert(!l.test(0)); + assert(!l.test(1)); + assert(!l.test(10)); + + for(int i = 0; i < 3; i++) { + l.set(10); + assert(!l.test(0)); + assert(!l.test(1)); + assert(!l.test(9)); + assert(l.test(10)); + assert(!l.test(11)); + } + + assert_eq(10, l.max()); + l.clear(); + assert(!l.test(10)); + assert_eq(std::numeric_limits::max(), l.max()); + + RandomSource rnd(12); + size_t lim = 2000; + for(size_t i = 0; i < lim; i++) { + uint32_t ri = rnd.nextU32() % 10000; + l.set(ri); + assert(l.test(ri)); + } + } + cerr << "PASSED" << endl; +} + +#endif /*def MAIN_SSTRING*/ diff --git a/ds.h b/ds.h new file mode 100644 index 0000000..dce3054 --- /dev/null +++ b/ds.h @@ -0,0 +1,4397 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef DS_H_ +#define DS_H_ + +#include +#include +#include +#include +#include +#include +#include "assert_helpers.h" +#include "threading.h" +#include "random_source.h" +#include "btypes.h" +//added by Joe Paggi to implement EList.push_back_array +#include + +/** + * Tally how much memory is allocated to certain + */ +class MemoryTally { + +public: + + MemoryTally() : tot_(0), peak_(0) { + memset(tots_, 0, 256 * sizeof(uint64_t)); + memset(peaks_, 0, 256 * sizeof(uint64_t)); + } + + /** + * Tally a memory allocation of size amt bytes. + */ + void add(int cat, uint64_t amt); + + /** + * Tally a memory free of size amt bytes. + */ + void del(int cat, uint64_t amt); + + /** + * Return the total amount of memory allocated. + */ + uint64_t total() { return tot_; } + + /** + * Return the total amount of memory allocated in a particular + * category. + */ + uint64_t total(int cat) { return tots_[cat]; } + + /** + * Return the peak amount of memory allocated. + */ + uint64_t peak() { return peak_; } + + /** + * Return the peak amount of memory allocated in a particular + * category. + */ + uint64_t peak(int cat) { return peaks_[cat]; } + +#ifndef NDEBUG + /** + * Check that memory tallies are internally consistent; + */ + bool repOk() const { + uint64_t tot = 0; + for(int i = 0; i < 256; i++) { + assert_leq(tots_[i], peaks_[i]); + tot += tots_[i]; + } + assert_eq(tot, tot_); + return true; + } +#endif + +protected: + + MUTEX_T mutex_m; + uint64_t tots_[256]; + uint64_t tot_; + uint64_t peaks_[256]; + uint64_t peak_; +}; + +extern MemoryTally gMemTally; + +/** + * A simple fixed-length array of type T, automatically freed in the + * destructor. + */ +template +class AutoArray { +public: + + AutoArray(size_t sz, int cat = 0) : cat_(cat) { + t_ = NULL; + t_ = new T[sz]; + gMemTally.add(cat_, sz); + memset(t_, 0, sz * sizeof(T)); + sz_ = sz; + } + + ~AutoArray() { + if(t_ != NULL) { + delete[] t_; + gMemTally.del(cat_, sz_); + } + } + + T& operator[](size_t sz) { + return t_[sz]; + } + + const T& operator[](size_t sz) const { + return t_[sz]; + } + + size_t size() const { return sz_; } + +private: + int cat_; + T *t_; + size_t sz_; +}; + +/** + * A wrapper for a non-array pointer that associates it with a memory + * category for tracking purposes and calls delete on it when the + * PtrWrap is destroyed. + */ +template +class PtrWrap { +public: + + explicit PtrWrap( + T* p, + bool freeable = true, + int cat = 0) : + cat_(cat), + p_(NULL) + { + init(p, freeable); + } + + explicit PtrWrap(int cat = 0) : + cat_(cat), + p_(NULL) + { + reset(); + } + + void reset() { + free(); + init(NULL); + } + + ~PtrWrap() { free(); } + + void init(T* p, bool freeable = true) { + assert(p_ == NULL); + p_ = p; + freeable_ = freeable; + if(p != NULL && freeable_) { + gMemTally.add(cat_, sizeof(T)); + } + } + + void free() { + if(p_ != NULL) { + if(freeable_) { + delete p_; + gMemTally.del(cat_, sizeof(T)); + } + p_ = NULL; + } + } + + inline T* get() { return p_; } + inline const T* get() const { return p_; } + +private: + int cat_; + T *p_; + bool freeable_; +}; + +/** + * A wrapper for an array pointer that associates it with a memory + * category for tracking purposes and calls delete[] on it when the + * PtrWrap is destroyed. + */ +template +class APtrWrap { +public: + + explicit APtrWrap( + T* p, + size_t sz, + bool freeable = true, + int cat = 0) : + cat_(cat), + p_(NULL) + { + init(p, sz, freeable); + } + + explicit APtrWrap(int cat = 0) : + cat_(cat), + p_(NULL) + { + reset(); + } + + void reset() { + free(); + init(NULL, 0); + } + + ~APtrWrap() { free(); } + + void init(T* p, size_t sz, bool freeable = true) { + assert(p_ == NULL); + p_ = p; + sz_ = sz; + freeable_ = freeable; + if(p != NULL && freeable_) { + gMemTally.add(cat_, sizeof(T) * sz_); + } + } + + void free() { + if(p_ != NULL) { + if(freeable_) { + delete[] p_; + gMemTally.del(cat_, sizeof(T) * sz_); + } + p_ = NULL; + } + } + + inline T* get() { return p_; } + inline const T* get() const { return p_; } + +private: + int cat_; + T *p_; + bool freeable_; + size_t sz_; +}; + +/** + * An EList is an expandable list with these features: + * + * - Payload type is a template parameter T. + * - Initial size can be specified at construction time, otherwise + * default of 128 is used. + * - When allocated initially or when expanding, the new[] operator is + * used, which in turn calls the default constructor for T. + * - All copies (e.g. assignment of a const T& to an EList element, + * or during expansion) use operator=. + * - When the EList is resized to a smaller size (or cleared, which + * is like resizing to size 0), the underlying containing is not + * reshaped. Thus, EListss never release memory before + * destruction. + * + * And these requirements: + * + * - Payload type T must have a default constructor. + * + * For efficiency reasons, ELists should not be declared on the stack + * in often-called worker functions. Best practice is to declare + * ELists at a relatively stable layer of the stack (such that it + * rarely bounces in and out of scope) and let the worker function use + * it and *expand* it only as needed. The effect is that only + * relatively few allocations and copies will be incurred, and they'll + * occur toward the beginning of the computation before stabilizing at + * a "high water mark" for the remainder of the computation. + * + * A word about multidimensional lists. One way to achieve a + * multidimensional lists is to nest ELists. This works, but it often + * involves a lot more calls to the default constructor and to + * operator=, especially when the outermost EList needs expanding, than + * some of the alternatives. One alternative is use a most specialized + * container that still uses ELists but knows to use xfer instead of + * operator= when T=EList. + * + * The 'cat_' fiends encodes a category. This makes it possible to + * distinguish between object subgroups in the global memory tally. + * + * Memory allocation is lazy. Allocation is only triggered when the + * user calls push_back, expand, resize, or another function that + * increases the size of the list. This saves memory and also makes it + * easier to deal with nested ELists, since the default constructor + * doesn't set anything in stone. + */ +template +class EList { + +public: + + /** + * Allocate initial default of S elements. + */ + explicit EList() : + cat_(0), allocCat_(-1), list_(NULL), sz_(S), cur_(0) { } + + /** + * Allocate initial default of S elements. + */ + explicit EList(int cat) : + cat_(cat), allocCat_(-1), list_(NULL), sz_(S), cur_(0) + { + assert_geq(cat, 0); + } + + /** + * Initially allocate given number of elements; should be > 0. + */ + explicit EList(size_t isz, int cat = 0) : + cat_(cat), allocCat_(-1), list_(NULL), sz_(isz), cur_(0) + { + assert_geq(cat, 0); + } + + /** + * Copy from another EList using operator=. + */ + EList(const EList& o) : + cat_(0), allocCat_(-1), list_(NULL), sz_(0), cur_(0) + { + *this = o; + } + + /** + * Copy from another EList using operator=. + */ + explicit EList(const EList& o, int cat) : + cat_(cat), allocCat_(-1), list_(NULL), sz_(0), cur_(0) + { + *this = o; + assert_geq(cat, 0); + } + + /** + * Destructor. + */ + ~EList() { free(); } + + /** + * Make this object into a copy of o by allocat + */ + EList& operator=(const EList& o) { + assert_eq(cat_, o.cat()); + if(o.cur_ == 0) { + // Nothing to copy + cur_ = 0; + return *this; + } + if(list_ == NULL) { + // cat_ should already be set + lazyInit(); + } + if(sz_ < o.cur_) expandNoCopy(o.cur_ + 1); + assert_geq(sz_, o.cur_); + cur_ = o.cur_; + for(size_t i = 0; i < cur_; i++) { + list_[i] = o.list_[i]; + } + return *this; + } + + /** + * Transfer the guts of another EList into this one without using + * operator=, etc. We have to set EList o's list_ field to NULL to + * avoid o's destructor from deleting list_ out from under us. + */ + void xfer(EList& o) { + // What does it mean to transfer to a different-category list? + assert_eq(cat_, o.cat()); + // Can only transfer into an empty object + free(); + allocCat_ = cat_; + list_ = o.list_; + sz_ = o.sz_; + cur_ = o.cur_; + o.list_ = NULL; + o.sz_ = o.cur_ = 0; + o.allocCat_ = -1; + } + //Added by Joe + void swap(EList& o) { + assert_eq(cat_, o.cat()); + T* temp_l = list_; + size_t temp_sz = sz_; + size_t temp_cur = cur_; + list_ = o.list_; + sz_ = o.sz_; + cur_ = o.cur_; + o.list_ = temp_l; + o.sz_ = temp_sz; + o.cur_ = temp_cur; + } + + /** + * Return number of elements. + */ + inline size_t size() const { return cur_; } + + /** + * Return number of elements allocated. + */ + inline size_t capacity() const { return sz_; } + + /** + * Return the total size in bytes occupied by this list. + */ + size_t totalSizeBytes() const { + return 2 * sizeof(int) + + 2 * sizeof(size_t) + + cur_ * sizeof(T); + } + + /** + * Return the total capacity in bytes occupied by this list. + */ + size_t totalCapacityBytes() const { + return 2 * sizeof(int) + + 2 * sizeof(size_t) + + sz_ * sizeof(T); + } + + /** + * Ensure that there is sufficient capacity to expand to include + * 'thresh' more elements without having to expand. + */ + inline void ensure(size_t thresh) { + if(list_ == NULL) lazyInit(); + expandCopy(cur_ + thresh); + } + + /** + * Ensure that there is sufficient capacity to include 'newsz' elements. + * If there isn't enough capacity right now, expand capacity to exactly + * equal 'newsz'. + */ + inline void reserveExact(size_t newsz) { + if(list_ == NULL) lazyInitExact(newsz); + expandCopyExact(newsz); + } + + /** + * Return true iff there are no elements. + */ + inline bool empty() const { return cur_ == 0; } + + /** + * Return true iff list hasn't been initialized yet. + */ + inline bool null() const { return list_ == NULL; } + + /** + * Add an element to the back and immediately initialize it via + * operator=. + */ + void push_back(const T& el) { + if(list_ == NULL) lazyInit(); + if(cur_ == sz_) expandCopy(sz_+1); + list_[cur_++] = el; + } + + void nullify() { + free(); + } + + + /** + * Add an element to the back. No intialization is done. + */ + void expand() { + if(list_ == NULL) lazyInit(); + if(cur_ == sz_) expandCopy(sz_+1); + cur_++; + } + + /** + * Add an element to the back. No intialization is done. + */ + void fill(size_t begin, size_t end, const T& v) { + assert_leq(begin, end); + assert_leq(end, cur_); + for(size_t i = begin; i < end; i++) { + list_[i] = v; + } + } + + /** + * Add an element to the back. No intialization is done. + */ + void fill(const T& v) { + for(size_t i = 0; i < cur_; i++) { + list_[i] = v; + } + } + + /** + * Set all bits in specified range of elements in list array to 0. + */ + void fillZero(size_t begin, size_t end) { + assert_leq(begin, end); + memset(&list_[begin], 0, sizeof(T) * (end-begin)); + } + + /** + * Set all bits in the list array to 0. + */ + void fillZero() { + memset(list_, 0, sizeof(T) * cur_); + } + + /** + * If size is less than requested size, resize up to at least sz + * and set cur_ to requested sz. + */ + void resizeNoCopy(size_t sz) { + if(sz > 0 && list_ == NULL) lazyInit(); + if(sz <= cur_) { + cur_ = sz; + return; + } + if(sz_ < sz) expandNoCopy(sz); + cur_ = sz; + } + + /** + * If size is less than requested size, resize up to at least sz + * and set cur_ to requested sz. + */ + void resizeNoCopyExact(size_t sz) { + if(sz > 0 && list_ == NULL) lazyInit(); + if(sz <= cur_) { + cur_ = sz; + return; + } + if(sz_ < sz) expandNoCopyExact(sz); + cur_ = sz; + } + + /** + * If size is less than requested size, resize up to at least sz + * and set cur_ to requested sz. + */ + void resize(size_t sz) { + if(sz > 0 && list_ == NULL) lazyInit(); + if(sz <= cur_) { + cur_ = sz; + return; + } + if(sz_ < sz) { + expandCopy(sz); + } + cur_ = sz; + } + + /** + * If size is less than requested size, resize up to exactly sz and set + * cur_ to requested sz. + */ + void resizeExact(size_t sz) { + if(sz > 0 && list_ == NULL) lazyInitExact(sz); + if(sz <= cur_) { + cur_ = sz; + return; + } + if(sz_ < sz) expandCopyExact(sz); + cur_ = sz; + } + + /** + * Erase element at offset idx. + */ + void erase(size_t idx) { + assert_lt(idx, cur_); + for(size_t i = idx; i < cur_-1; i++) { + list_[i] = list_[i+1]; + } + cur_--; + } + + /** + * Erase range of elements starting at offset idx and going for len. + */ + void erase(size_t idx, size_t len) { + assert_geq(len, 0); + if(len == 0) { + return; + } + assert_lt(idx, cur_); + for(size_t i = idx; i < cur_-len; i++) { + list_[i] = list_[i+len]; + } + cur_ -= len; + } + + /** + * Insert value 'el' at offset 'idx' + */ + void insert(const T& el, size_t idx) { + if(list_ == NULL) lazyInit(); + assert_leq(idx, cur_); + if(cur_ == sz_) expandCopy(sz_+1); + for(size_t i = cur_; i > idx; i--) { + list_[i] = list_[i-1]; + } + list_[idx] = el; + cur_++; + } + + /** + * Insert contents of list 'l' at offset 'idx' + */ + void insert(const EList& l, size_t idx) { + if(list_ == NULL) lazyInit(); + assert_lt(idx, cur_); + if(l.cur_ == 0) return; + if(cur_ + l.cur_ > sz_) expandCopy(cur_ + l.cur_); + for(size_t i = cur_ + l.cur_ - 1; i > idx + (l.cur_ - 1); i--) { + list_[i] = list_[i - l.cur_]; + } + for(size_t i = 0; i < l.cur_; i++) { + list_[i+idx] = l.list_[i]; + } + cur_ += l.cur_; + } + + /** + * push first l_size objects of T array l to top of stack + * added by Joe Paggi + */ + void push_back_array(const T* l, size_t l_size) { + if(list_ == NULL) lazyInit(); + if(l_size == 0) return; + if(cur_ + l_size > sz_) expandCopy(cur_ + l_size); + size_t bytes = l_size * sizeof(T); + std::memcpy(list_ + cur_, l, bytes); + cur_ += l_size; + } + + /** + * Remove an element from the top of the stack. + */ + void pop_back() { + assert_gt(cur_, 0); + cur_--; + } + + /** + * Make the stack empty. + */ + void clear() { + cur_ = 0; // re-use stack memory + // Don't clear heap; re-use it + } + + /** + * Get the element on the top of the stack. + */ + inline T& back() { + assert_gt(cur_, 0); + return list_[cur_-1]; + } + + /** + * Reverse list elements. + */ + void reverse() { + if(cur_ > 1) { + size_t n = cur_ >> 1; + for(size_t i = 0; i < n; i++) { + T tmp = list_[i]; + list_[i] = list_[cur_ - i - 1]; + list_[cur_ - i - 1] = tmp; + } + } + } + + /** + * Get the element on the top of the stack, const version. + */ + inline const T& back() const { + assert_gt(cur_, 0); + return list_[cur_-1]; + } + + /** + * Get the frontmost element (bottom of stack). + */ + inline T& front() { + assert_gt(cur_, 0); + return list_[0]; + } + + /** + * Get the element on the bottom of the stack, const version. + */ + inline const T& front() const { return front(); } + + inline T* begin() { + assert_gt(cur_, 0); + return &list_[0]; + } + + inline const T* begin() const { return begin(); } + + inline T* end() { return begin() + cur_; } + inline const T* end() const { return begin() + cur_; } + + /** + * Return true iff this list and list o contain the same elements in the + * same order according to type T's operator==. + */ + bool operator==(const EList& o) const { + if(size() != o.size()) { + return false; + } + for(size_t i = 0; i < size(); i++) { + if(!(get(i) == o.get(i))) { + return false; + } + } + return true; + } + + /** + * Return true iff this list contains all of the elements in o according to + * type T's operator==. + */ + bool isSuperset(const EList& o) const { + if(o.size() > size()) { + // This can't be a superset if the other set contains more elts + return false; + } + // For each element in o + for(size_t i = 0; i < o.size(); i++) { + bool inthis = false; + // Check if it's in this + for(size_t j = 0; j < size(); j++) { + if(o[i] == (*this)[j]) { + inthis = true; + break; + } + } + if(!inthis) { + return false; + } + } + return true; + } + + /** + * Return a reference to the ith element. + */ + inline T& operator[](size_t i) { + assert_lt(i, cur_); + return list_[i]; + } + + /** + * Return a reference to the ith element. + */ + inline const T& operator[](size_t i) const { + assert_lt(i, cur_); + return list_[i]; + } + + /** + * Return a reference to the ith element. + */ + inline T& get(size_t i) { + return operator[](i); + } + + /** + * Return a reference to the ith element. + */ + inline const T& get(size_t i) const { + return operator[](i); + } + + /** + * Return a reference to the ith element. This version is not + * inlined, which guarantees we can use it from the debugger. + */ + T& getSlow(size_t i) { + return operator[](i); + } + + /** + * Return a reference to the ith element. This version is not + * inlined, which guarantees we can use it from the debugger. + */ + const T& getSlow(size_t i) const { + return operator[](i); + } + + /** + * Sort some of the contents. + */ + void sortPortion(size_t begin, size_t num) { + assert_leq(begin+num, cur_); + if(num < 2) return; + std::sort(list_ + begin, list_ + begin + num); + } + + /** + * Shuffle a portion of the list. + */ + void shufflePortion(size_t begin, size_t num, RandomSource& rnd) { + assert_leq(begin+num, cur_); + if(num < 2) return; + size_t left = num; + for(size_t i = begin; i < begin + num - 1; i++) { + uint32_t rndi = rnd.nextU32() % left; + if(rndi > 0) { + std::swap(list_[i], list_[i + rndi]); + } + left--; + } + } + + /** + * Sort contents + */ + void sort() { + sortPortion(0, cur_); + } + + /** + * Return true iff every element is < its successor. Only operator< is + * used. + */ + bool sorted() const { + for(size_t i = 1; i < cur_; i++) { + if(!(list_[i-1] < list_[i])) { + return false; + } + } + return true; + } + + /** + * Delete element at position 'idx'; slide subsequent chars up. + */ + void remove(size_t idx) { + assert_lt(idx, cur_); + assert_gt(cur_, 0); + for(size_t i = idx; i < cur_-1; i++) { + list_[i] = list_[i+1]; + } + cur_--; + } + + /** + * Return a pointer to the beginning of the buffer. + */ + T *ptr() { return list_; } + + /** + * Return a const pointer to the beginning of the buffer. + */ + const T *ptr() const { return list_; } + + /** + * Set the memory category for this object. + */ + void setCat(int cat) { + // What does it mean to set the category after the list_ is + // already allocated? + assert(null()); + assert_gt(cat, 0); cat_ = cat; + } + + /** + * Return memory category. + */ + int cat() const { return cat_; } + + /** + * Perform a binary search for the first element that is not less + * than 'el'. Return cur_ if all elements are less than el. + */ + size_t bsearchLoBound(const T& el) const { + size_t hi = cur_; + size_t lo = 0; + while(true) { + if(lo == hi) { + return lo; + } + size_t mid = lo + ((hi-lo)>>1); + assert_neq(mid, hi); + if(list_[mid] < el) { + if(lo == mid) { + return hi; + } + lo = mid; + } else { + hi = mid; + } + } + } + +private: + + /** + * Initialize memory for EList. + */ + void lazyInit() { + assert(list_ == NULL); + list_ = alloc(sz_); + } + + /** + * Initialize exactly the prescribed number of elements for EList. + */ + void lazyInitExact(size_t sz) { + assert_gt(sz, 0); + assert(list_ == NULL); + sz_ = sz; + list_ = alloc(sz); + } + + /** + * Allocate a T array of length sz_ and store in list_. Also, + * tally into the global memory tally. + */ + T *alloc(size_t sz) { + T* tmp = new T[sz]; + assert(tmp != NULL); + gMemTally.add(cat_, sz); + allocCat_ = cat_; + return tmp; + } + + /** + * Allocate a T array of length sz_ and store in list_. Also, + * tally into the global memory tally. + */ + void free() { + if(list_ != NULL) { + assert_neq(-1, allocCat_); + assert_eq(allocCat_, cat_); + delete[] list_; + gMemTally.del(cat_, sz_); + list_ = NULL; + sz_ = cur_ = 0; + } + } + + /** + * Expand the list_ buffer until it has at least 'thresh' elements. Size + * increases quadratically with number of expansions. Copy old contents + * into new buffer using operator=. + */ + void expandCopy(size_t thresh) { + if(thresh <= sz_) return; + size_t newsz = (sz_ * 2)+1; + while(newsz < thresh) newsz *= 2; + expandCopyExact(newsz); + } + + /** + * Expand the list_ buffer until it has exactly 'newsz' elements. Copy + * old contents into new buffer using operator=. + */ + void expandCopyExact(size_t newsz) { + if(newsz <= sz_) return; + T* tmp = alloc(newsz); + assert(tmp != NULL); + size_t cur = cur_; + if(list_ != NULL) { + for(size_t i = 0; i < cur_; i++) { + // Note: operator= is used + tmp[i] = list_[i]; + } + free(); + } + list_ = tmp; + sz_ = newsz; + cur_ = cur; + } + + /** + * Expand the list_ buffer until it has at least 'thresh' elements. + * Size increases quadratically with number of expansions. Don't copy old + * contents into the new buffer. + */ + void expandNoCopy(size_t thresh) { + assert(list_ != NULL); + if(thresh <= sz_) return; + size_t newsz = (sz_ * 2)+1; + while(newsz < thresh) newsz *= 2; + expandNoCopyExact(newsz); + } + + /** + * Expand the list_ buffer until it has exactly 'newsz' elements. Don't + * copy old contents into the new buffer. + */ + void expandNoCopyExact(size_t newsz) { + assert(list_ != NULL); + assert_gt(newsz, 0); + free(); + T* tmp = alloc(newsz); + assert(tmp != NULL); + list_ = tmp; + sz_ = newsz; + assert_gt(sz_, 0); + } + + int cat_; // memory category, for accounting purposes + int allocCat_; // category at time of allocation + T *list_; // list pointer, returned from new[] + size_t sz_; // capacity + size_t cur_; // occupancy (AKA size) +}; + +/** + * An ELList is an expandable list of lists with these features: + * + * - Payload type of the inner list is a template parameter T. + * - Initial size can be specified at construction time, otherwise + * default of 128 is used. + * - When allocated initially or when expanding, the new[] operator is + * used, which in turn calls the default constructor for EList. + * - Upon expansion, instead of copies, xfer is used. + * - When the ELList is resized to a smaller size (or cleared, + * which is like resizing to size 0), the underlying containing is + * not reshaped. Thus, ELListss never release memory before + * destruction. + * + * And these requirements: + * + * - Payload type T must have a default constructor. + * + */ +template +class ELList { + +public: + + /** + * Allocate initial default of 128 elements. + */ + explicit ELList(int cat = 0) : + cat_(cat), list_(NULL), sz_(S2), cur_(0) + { + assert_geq(cat, 0); + } + + /** + * Initially allocate given number of elements; should be > 0. + */ + explicit ELList(size_t isz, int cat = 0) : + cat_(cat), list_(NULL), sz_(isz), cur_(0) + { + assert_gt(isz, 0); + assert_geq(cat, 0); + } + + /** + * Copy from another ELList using operator=. + */ + ELList(const ELList& o) : + cat_(0), list_(NULL), sz_(0), cur_(0) + { + *this = o; + } + + /** + * Copy from another ELList using operator=. + */ + explicit ELList(const ELList& o, int cat) : + cat_(cat), list_(NULL), sz_(0), cur_(0) + { + *this = o; + assert_geq(cat, 0); + } + + /** + * Destructor. + */ + ~ELList() { free(); } + + /** + * Make this object into a copy of o by allocating enough memory to + * fit the number of elements in o (note: the number of elements + * may be substantially less than the memory allocated in o) and + * using operator= to copy them over. + */ + ELList& operator=(const ELList& o) { + assert_eq(cat_, o.cat()); + if(list_ == NULL) { + lazyInit(); + } + if(o.cur_ == 0) { + cur_ = 0; + return *this; + } + if(sz_ < o.cur_) expandNoCopy(o.cur_ + 1); + assert_geq(sz_, o.cur_); + cur_ = o.cur_; + for(size_t i = 0; i < cur_; i++) { + // Note: using operator=, not xfer + assert_eq(list_[i].cat(), o.list_[i].cat()); + list_[i] = o.list_[i]; + } + return *this; + } + + /** + * Transfer the guts of another EList into this one without using + * operator=, etc. We have to set EList o's list_ field to NULL to + * avoid o's destructor from deleting list_ out from under us. + */ + void xfer(ELList& o) { + assert_eq(cat_, o.cat()); + list_ = o.list_; // list_ is an array of ELists + sz_ = o.sz_; + cur_ = o.cur_; + o.list_ = NULL; + o.sz_ = o.cur_ = 0; + } + + /** + * Return number of elements. + */ + inline size_t size() const { return cur_; } + + /** + * Return true iff there are no elements. + */ + inline bool empty() const { return cur_ == 0; } + + /** + * Return true iff list hasn't been initialized yet. + */ + inline bool null() const { return list_ == NULL; } + + /** + * Add an element to the back. No intialization is done. + */ + void expand() { + if(list_ == NULL) lazyInit(); + if(cur_ == sz_) expandCopy(sz_+1); + cur_++; + } + + /** + * If size is less than requested size, resize up to at least sz + * and set cur_ to requested sz. + */ + void resize(size_t sz) { + if(sz > 0 && list_ == NULL) lazyInit(); + if(sz <= cur_) { + cur_ = sz; + return; + } + if(sz_ < sz) { + expandCopy(sz); + } + cur_ = sz; + } + + /** + * Make the stack empty. + */ + void clear() { + cur_ = 0; // re-use stack memory + // Don't clear heap; re-use it + } + + /** + * Get the element on the top of the stack. + */ + inline EList& back() { + assert_gt(cur_, 0); + return list_[cur_-1]; + } + + /** + * Get the element on the top of the stack, const version. + */ + inline const EList& back() const { + assert_gt(cur_, 0); + return list_[cur_-1]; + } + + /** + * Get the frontmost element (bottom of stack). + */ + inline EList& front() { + assert_gt(cur_, 0); + return list_[0]; + } + + /** + * Get the element on the bottom of the stack, const version. + */ + inline const EList& front() const { return front(); } + + /** + * Return a reference to the ith element. + */ + inline EList& operator[](size_t i) { + assert_lt(i, cur_); + return list_[i]; + } + + /** + * Return a reference to the ith element. + */ + inline const EList& operator[](size_t i) const { + assert_lt(i, cur_); + return list_[i]; + } + + /** + * Return a reference to the ith element. + */ + inline EList& get(size_t i) { + return operator[](i); + } + + /** + * Return a reference to the ith element. + */ + inline const EList& get(size_t i) const { + return operator[](i); + } + + /** + * Return a reference to the ith element. This version is not + * inlined, which guarantees we can use it from the debugger. + */ + EList& getSlow(size_t i) { + return operator[](i); + } + + /** + * Return a reference to the ith element. This version is not + * inlined, which guarantees we can use it from the debugger. + */ + const EList& getSlow(size_t i) const { + return operator[](i); + } + + /** + * Return a pointer to the beginning of the buffer. + */ + EList *ptr() { return list_; } + + /** + * Set the memory category for this object and all children. + */ + void setCat(int cat) { + assert_gt(cat, 0); + cat_ = cat; + if(cat_ != 0) { + for(size_t i = 0; i < sz_; i++) { + assert(list_[i].null()); + list_[i].setCat(cat_); + } + } + } + + /** + * Return memory category. + */ + int cat() const { return cat_; } + +protected: + + /** + * Initialize memory for EList. + */ + void lazyInit() { + assert(list_ == NULL); + list_ = alloc(sz_); + } + + /** + * Allocate a T array of length sz_ and store in list_. Also, + * tally into the global memory tally. + */ + EList *alloc(size_t sz) { + assert_gt(sz, 0); + EList *tmp = new EList[sz]; + gMemTally.add(cat_, sz); + if(cat_ != 0) { + for(size_t i = 0; i < sz; i++) { + assert(tmp[i].ptr() == NULL); + tmp[i].setCat(cat_); + } + } + return tmp; + } + + /** + * Allocate a T array of length sz_ and store in list_. Also, + * tally into the global memory tally. + */ + void free() { + if(list_ != NULL) { + delete[] list_; + gMemTally.del(cat_, sz_); + list_ = NULL; + } + } + + /** + * Expand the list_ buffer until it has at least 'thresh' elements. + * Expansions are quadratic. Copy old contents into new buffer + * using operator=. + */ + void expandCopy(size_t thresh) { + assert(list_ != NULL); + if(thresh <= sz_) return; + size_t newsz = (sz_ * 2)+1; + while(newsz < thresh) newsz *= 2; + EList* tmp = alloc(newsz); + if(list_ != NULL) { + for(size_t i = 0; i < cur_; i++) { + assert_eq(cat_, tmp[i].cat()); + tmp[i].xfer(list_[i]); + assert_eq(cat_, tmp[i].cat()); + } + free(); + } + list_ = tmp; + sz_ = newsz; + } + + /** + * Expand the list_ buffer until it has at least 'thresh' elements. + * Expansions are quadratic. Don't copy old contents over. + */ + void expandNoCopy(size_t thresh) { + assert(list_ != NULL); + if(thresh <= sz_) return; + free(); + size_t newsz = (sz_ * 2)+1; + while(newsz < thresh) newsz *= 2; + EList* tmp = alloc(newsz); + list_ = tmp; + sz_ = newsz; + assert_gt(sz_, 0); + } + + int cat_; // memory category, for accounting purposes + EList *list_; // list pointer, returned from new[] + size_t sz_; // capacity + size_t cur_; // occupancy (AKA size) + +}; + +/** + * An ELLList is an expandable list of expandable lists with these + * features: + * + * - Payload type of the innermost list is a template parameter T. + * - Initial size can be specified at construction time, otherwise + * default of 128 is used. + * - When allocated initially or when expanding, the new[] operator is + * used, which in turn calls the default constructor for ELList. + * - Upon expansion, instead of copies, xfer is used. + * - When the ELLList is resized to a smaller size (or cleared, + * which is like resizing to size 0), the underlying containing is + * not reshaped. Thus, ELLListss never release memory before + * destruction. + * + * And these requirements: + * + * - Payload type T must have a default constructor. + * + */ +template +class ELLList { + +public: + + /** + * Allocate initial default of 128 elements. + */ + explicit ELLList(int cat = 0) : + cat_(cat), list_(NULL), sz_(S3), cur_(0) + { + assert_geq(cat, 0); + } + + /** + * Initially allocate given number of elements; should be > 0. + */ + explicit ELLList(size_t isz, int cat = 0) : + cat_(cat), list_(NULL), sz_(isz), cur_(0) + { + assert_geq(cat, 0); + assert_gt(isz, 0); + } + + /** + * Copy from another ELLList using operator=. + */ + ELLList(const ELLList& o) : + cat_(0), list_(NULL), sz_(0), cur_(0) + { + *this = o; + } + + /** + * Copy from another ELLList using operator=. + */ + explicit ELLList(const ELLList& o, int cat) : + cat_(cat), list_(NULL), sz_(0), cur_(0) + { + *this = o; + assert_geq(cat, 0); + } + + /** + * Destructor. + */ + ~ELLList() { free(); } + + /** + * Make this object into a copy of o by allocating enough memory to + * fit the number of elements in o (note: the number of elements + * may be substantially less than the memory allocated in o) and + * using operator= to copy them over. + */ + ELLList& operator=(const ELLList& o) { + assert_eq(cat_, o.cat()); + if(list_ == NULL) lazyInit(); + if(o.cur_ == 0) { + cur_ = 0; + return *this; + } + if(sz_ < o.cur_) expandNoCopy(o.cur_ + 1); + assert_geq(sz_, o.cur_); + cur_ = o.cur_; + for(size_t i = 0; i < cur_; i++) { + // Note: using operator=, not xfer + assert_eq(list_[i].cat(), o.list_[i].cat()); + list_[i] = o.list_[i]; + } + return *this; + } + + /** + * Transfer the guts of another EList into this one without using + * operator=, etc. We have to set EList o's list_ field to NULL to + * avoid o's destructor from deleting list_ out from under us. + */ + void xfer(ELLList& o) { + assert_eq(cat_, o.cat()); + list_ = o.list_; // list_ is an array of ELists + sz_ = o.sz_; + cur_ = o.cur_; + o.list_ = NULL; + o.sz_ = o.cur_ = 0; + } + + /** + * Return number of elements. + */ + inline size_t size() const { return cur_; } + + /** + * Return true iff there are no elements. + */ + inline bool empty() const { return cur_ == 0; } + + /** + * Return true iff list hasn't been initialized yet. + */ + inline bool null() const { return list_ == NULL; } + + /** + * Add an element to the back. No intialization is done. + */ + void expand() { + if(list_ == NULL) lazyInit(); + if(cur_ == sz_) expandCopy(sz_+1); + cur_++; + } + + /** + * If size is less than requested size, resize up to at least sz + * and set cur_ to requested sz. + */ + void resize(size_t sz) { + if(sz > 0 && list_ == NULL) lazyInit(); + if(sz <= cur_) { + cur_ = sz; + return; + } + if(sz_ < sz) expandCopy(sz); + cur_ = sz; + } + + /** + * Make the stack empty. + */ + void clear() { + cur_ = 0; // re-use stack memory + // Don't clear heap; re-use it + } + + /** + * Get the element on the top of the stack. + */ + inline ELList& back() { + assert_gt(cur_, 0); + return list_[cur_-1]; + } + + /** + * Get the element on the top of the stack, const version. + */ + inline const ELList& back() const { + assert_gt(cur_, 0); + return list_[cur_-1]; + } + + /** + * Get the frontmost element (bottom of stack). + */ + inline ELList& front() { + assert_gt(cur_, 0); + return list_[0]; + } + + /** + * Get the element on the bottom of the stack, const version. + */ + inline const ELList& front() const { return front(); } + + /** + * Return a reference to the ith element. + */ + inline ELList& operator[](size_t i) { + assert_lt(i, cur_); + return list_[i]; + } + + /** + * Return a reference to the ith element. + */ + inline const ELList& operator[](size_t i) const { + assert_lt(i, cur_); + return list_[i]; + } + + /** + * Return a reference to the ith element. + */ + inline ELList& get(size_t i) { + return operator[](i); + } + + /** + * Return a reference to the ith element. + */ + inline const ELList& get(size_t i) const { + return operator[](i); + } + + /** + * Return a reference to the ith element. This version is not + * inlined, which guarantees we can use it from the debugger. + */ + ELList& getSlow(size_t i) { + return operator[](i); + } + + /** + * Return a reference to the ith element. This version is not + * inlined, which guarantees we can use it from the debugger. + */ + const ELList& getSlow(size_t i) const { + return operator[](i); + } + + /** + * Return a pointer to the beginning of the buffer. + */ + ELList *ptr() { return list_; } + + /** + * Set the memory category for this object and all children. + */ + void setCat(int cat) { + assert_gt(cat, 0); + cat_ = cat; + if(cat_ != 0) { + for(size_t i = 0; i < sz_; i++) { + assert(list_[i].null()); + list_[i].setCat(cat_); + } + } + } + + /** + * Return memory category. + */ + int cat() const { return cat_; } + +protected: + + /** + * Initialize memory for EList. + */ + void lazyInit() { + assert(null()); + list_ = alloc(sz_); + } + + /** + * Allocate a T array of length sz_ and store in list_. Also, + * tally into the global memory tally. + */ + ELList *alloc(size_t sz) { + assert_gt(sz, 0); + ELList *tmp = new ELList[sz]; + gMemTally.add(cat_, sz); + if(cat_ != 0) { + for(size_t i = 0; i < sz; i++) { + assert(tmp[i].ptr() == NULL); + tmp[i].setCat(cat_); + } + } + return tmp; + } + + /** + * Allocate a T array of length sz_ and store in list_. Also, + * tally into the global memory tally. + */ + void free() { + if(list_ != NULL) { + delete[] list_; + gMemTally.del(cat_, sz_); + list_ = NULL; + } + } + + /** + * Expand the list_ buffer until it has at least 'thresh' elements. + * Expansions are quadratic. Copy old contents into new buffer + * using operator=. + */ + void expandCopy(size_t thresh) { + assert(list_ != NULL); + if(thresh <= sz_) return; + size_t newsz = (sz_ * 2)+1; + while(newsz < thresh) newsz *= 2; + ELList* tmp = alloc(newsz); + if(list_ != NULL) { + for(size_t i = 0; i < cur_; i++) { + assert_eq(cat_, tmp[i].cat()); + tmp[i].xfer(list_[i]); + assert_eq(cat_, tmp[i].cat()); + } + free(); + } + list_ = tmp; + sz_ = newsz; + } + + /** + * Expand the list_ buffer until it has at least 'thresh' elements. + * Expansions are quadratic. Don't copy old contents over. + */ + void expandNoCopy(size_t thresh) { + assert(list_ != NULL); + if(thresh <= sz_) return; + free(); + size_t newsz = (sz_ * 2)+1; + while(newsz < thresh) newsz *= 2; + ELList* tmp = alloc(newsz); + list_ = tmp; + sz_ = newsz; + assert_gt(sz_, 0); + } + + int cat_; // memory category, for accounting purposes + ELList *list_; // list pointer, returned from new[] + size_t sz_; // capacity + size_t cur_; // occupancy (AKA size) + +}; + +/** + * Expandable set using a heap-allocated sorted array. + * + * Note that the copy constructor and operator= routines perform + * shallow copies (w/ memcpy). + */ +template +class ESet { +public: + + /** + * Allocate initial default of 128 elements. + */ + ESet(int cat = 0) : + cat_(cat), + list_(NULL), + sz_(0), + cur_(0) + { + if(sz_ > 0) { + list_ = alloc(sz_); + } + } + + /** + * Initially allocate given number of elements; should be > 0. + */ + ESet(size_t isz, int cat = 0) : + cat_(cat), + list_(NULL), + sz_(isz), + cur_(0) + { + assert_gt(isz, 0); + if(sz_ > 0) { + list_ = alloc(sz_); + } + } + + /** + * Copy from another ESet. + */ + ESet(const ESet& o, int cat = 0) : + cat_(cat), list_(NULL) + { + assert_eq(cat_, o.cat()); + *this = o; + } + + /** + * Destructor. + */ + ~ESet() { free(); } + + /** + * Copy contents of given ESet into this ESet. + */ + ESet& operator=(const ESet& o) { + assert_eq(cat_, o.cat()); + sz_ = o.sz_; + cur_ = o.cur_; + free(); + if(sz_ > 0) { + list_ = alloc(sz_); + memcpy(list_, o.list_, cur_ * sizeof(T)); + } else { + list_ = NULL; + } + return *this; + } + + /** + * Return number of elements. + */ + size_t size() const { return cur_; } + + /** + * Return the total size in bytes occupied by this set. + */ + size_t totalSizeBytes() const { + return sizeof(int) + cur_ * sizeof(T) + 2 * sizeof(size_t); + } + + /** + * Return the total capacity in bytes occupied by this set. + */ + size_t totalCapacityBytes() const { + return sizeof(int) + sz_ * sizeof(T) + 2 * sizeof(size_t); + } + + /** + * Return true iff there are no elements. + */ + bool empty() const { return cur_ == 0; } + + /** + * Return true iff list isn't initialized yet. + */ + bool null() const { return list_ == NULL; } + + /** + * Insert a new element into the set in sorted order. + */ + bool insert(const T& el) { + size_t i = 0; + if(cur_ == 0) { + insert(el, 0); + return true; + } + if(cur_ < 16) { + // Linear scan + i = scanLoBound(el); + } else { + // Binary search + i = bsearchLoBound(el); + } + if(i < cur_ && list_[i] == el) return false; + insert(el, i); + return true; + } + + /** + * Return true iff this set contains 'el'. + */ + bool contains(const T& el) const { + if(cur_ == 0) { + return false; + } + else if(cur_ == 1) { + return el == list_[0]; + } + size_t i; + if(cur_ < 16) { + // Linear scan + i = scanLoBound(el); + } else { + // Binary search + i = bsearchLoBound(el); + } + return i != cur_ && list_[i] == el; + } + + /** + * Remove element from set. + */ + void remove(const T& el) { + size_t i; + if(cur_ < 16) { + // Linear scan + i = scanLoBound(el); + } else { + // Binary search + i = bsearchLoBound(el); + } + assert(i != cur_ && list_[i] == el); + erase(i); + } + + /** + * Get + */ + T& get(const T& el) { + size_t i; + if(cur_ < 16) { + // Linear scan + i = scanLoBound(el); + } else { + // Binary search + i = bsearchLoBound(el); + } + assert(i != cur_ && list_[i] == el); + return list_[i]; + } + + const T& get(const T& el) const { + size_t i; + if(cur_ < 16) { + // Linear scan + i = scanLoBound(el); + } else { + // Binary search + i = bsearchLoBound(el); + } + assert(i != cur_ && list_[i] == el); + return list_[i]; + } + + /** + * Return a reference to the ith element. + */ + inline T& operator[](size_t i) { + assert_lt(i, cur_); + return list_[i]; + } + + /** + * Return a reference to the ith element. + */ + inline const T& operator[](size_t i) const { + assert_lt(i, cur_); + return list_[i]; + } + + /** + * If size is less than requested size, resize up to at least sz + * and set cur_ to requested sz. + */ + void resize(size_t sz) { + if(sz <= cur_) return; + if(sz_ < sz) expandCopy(sz); + } + + /** + * Clear set without deallocating (or setting) anything. + */ + void clear() { cur_ = 0; } + + /** + * Return memory category. + */ + int cat() const { return cat_; } + + /** + * Set the memory category for this object. + */ + void setCat(int cat) { + cat_ = cat; + } + + /** + * Transfer the guts of another EList into this one without using + * operator=, etc. We have to set EList o's list_ field to NULL to + * avoid o's destructor from deleting list_ out from under us. + */ + void xfer(ESet& o) { + // What does it mean to transfer to a different-category list? + assert_eq(cat_, o.cat()); + // Can only transfer into an empty object + free(); + list_ = o.list_; + sz_ = o.sz_; + cur_ = o.cur_; + o.list_ = NULL; + o.sz_ = o.cur_ = 0; + } + + /** + * Return a pointer to the beginning of the buffer. + */ + T *ptr() { return list_; } + + /** + * Return a const pointer to the beginning of the buffer. + */ + const T *ptr() const { return list_; } + +private: + + /** + * Allocate a T array of length sz_ and store in list_. Also, + * tally into the global memory tally. + */ + T *alloc(size_t sz) { + assert_gt(sz, 0); + T *tmp = new T[sz]; + gMemTally.add(cat_, sz); + return tmp; + } + + /** + * Allocate a T array of length sz_ and store in list_. Also, + * tally into the global memory tally. + */ + void free() { + if(list_ != NULL) { + delete[] list_; + gMemTally.del(cat_, sz_); + list_ = NULL; + } + } + + /** + * Simple linear scan that returns the index of the first element + * of list_ that is not less than el, or cur_ if all elements are + * less than el. + */ + size_t scanLoBound(const T& el) const { + for(size_t i = 0; i < cur_; i++) { + if(!(list_[i] < el)) { + // Shouldn't be equal + return i; + } + } + return cur_; + } + + /** + * Perform a binary search for the first element that is not less + * than 'el'. Return cur_ if all elements are less than el. + */ + size_t bsearchLoBound(const T& el) const { + size_t hi = cur_; + size_t lo = 0; + while(true) { + if(lo == hi) { +#ifndef NDEBUG + if((rand() % 10) == 0) { + assert_eq(lo, scanLoBound(el)); + } +#endif + return lo; + } + size_t mid = lo + ((hi-lo)>>1); + assert_neq(mid, hi); + if(list_[mid] < el) { + if(lo == mid) { +#ifndef NDEBUG + if((rand() % 10) == 0) { + assert_eq(hi, scanLoBound(el)); + } +#endif + return hi; + } + lo = mid; + } else { + hi = mid; + } + } + } + + /** + * Return true if sorted, assert otherwise. + */ + bool sorted() const { + if(cur_ <= 1) return true; +#ifndef NDEBUG + if((rand() % 20) == 0) { + for(size_t i = 0; i < cur_-1; i++) { + assert(list_[i] < list_[i+1]); + } + } +#endif + return true; + } + + /** + * Insert value 'el' at offset 'idx'. It's OK to insert at cur_, + * which is equivalent to appending. + */ + void insert(const T& el, size_t idx) { + assert_leq(idx, cur_); + if(cur_ == sz_) { + expandCopy(sz_+1); + assert(sorted()); + } + for(size_t i = cur_; i > idx; i--) { + list_[i] = list_[i-1]; + } + list_[idx] = el; + cur_++; + assert(sorted()); + } + + /** + * Erase element at offset idx. + */ + void erase(size_t idx) { + assert_lt(idx, cur_); + for(size_t i = idx; i < cur_-1; i++) { + list_[i] = list_[i+1]; + } + cur_--; + assert(sorted()); + } + + /** + * Expand the list_ buffer until it has at least 'thresh' elements. + * Expansions are quadratic. + */ + void expandCopy(size_t thresh) { + if(thresh <= sz_) return; + size_t newsz = (sz_ * 2)+1; + while(newsz < thresh) { + newsz *= 2; + } + T* tmp = alloc(newsz); + for(size_t i = 0; i < cur_; i++) { + tmp[i] = list_[i]; + } + free(); + list_ = tmp; + sz_ = newsz; + } + + int cat_; // memory category, for accounting purposes + T *list_; // list pointer, returned from new[] + size_t sz_; // capacity + size_t cur_; // occupancy (AKA size) +}; + +template +class ELSet { + +public: + + /** + * Allocate initial default of 128 elements. + */ + explicit ELSet(int cat = 0) : + cat_(cat), list_(NULL), sz_(S), cur_(0) + { + assert_geq(cat, 0); + } + + /** + * Initially allocate given number of elements; should be > 0. + */ + explicit ELSet(size_t isz, int cat = 0) : + cat_(cat), list_(NULL), sz_(isz), cur_(0) + { + assert_gt(isz, 0); + assert_geq(cat, 0); + } + + /** + * Copy from another ELList using operator=. + */ + ELSet(const ELSet& o) : + cat_(0), list_(NULL), sz_(0), cur_(0) + { + *this = o; + } + + /** + * Copy from another ELList using operator=. + */ + explicit ELSet(const ELSet& o, int cat) : + cat_(cat), list_(NULL), sz_(0), cur_(0) + { + *this = o; + assert_geq(cat, 0); + } + + /** + * Destructor. + */ + ~ELSet() { free(); } + + /** + * Make this object into a copy of o by allocating enough memory to + * fit the number of elements in o (note: the number of elements + * may be substantially less than the memory allocated in o) and + * using operator= to copy them over. + */ + ELSet& operator=(const ELSet& o) { + assert_eq(cat_, o.cat()); + if(list_ == NULL) { + lazyInit(); + } + if(o.cur_ == 0) { + cur_ = 0; + return *this; + } + if(sz_ < o.cur_) expandNoCopy(o.cur_ + 1); + assert_geq(sz_, o.cur_); + cur_ = o.cur_; + for(size_t i = 0; i < cur_; i++) { + // Note: using operator=, not xfer + assert_eq(list_[i].cat(), o.list_[i].cat()); + list_[i] = o.list_[i]; + } + return *this; + } + + /** + * Transfer the guts of another ESet into this one without using + * operator=, etc. We have to set ESet o's list_ field to NULL to + * avoid o's destructor from deleting list_ out from under us. + */ + void xfer(ELSet& o) { + assert_eq(cat_, o.cat()); + list_ = o.list_; // list_ is an array of ESets + sz_ = o.sz_; + cur_ = o.cur_; + o.list_ = NULL; + o.sz_ = o.cur_ = 0; + } + + /** + * Return number of elements. + */ + inline size_t size() const { return cur_; } + + /** + * Return true iff there are no elements. + */ + inline bool empty() const { return cur_ == 0; } + + /** + * Return true iff list hasn't been initialized yet. + */ + inline bool null() const { return list_ == NULL; } + + /** + * Add an element to the back. No intialization is done. + */ + void expand() { + if(list_ == NULL) lazyInit(); + if(cur_ == sz_) expandCopy(sz_+1); + cur_++; + } + + /** + * If size is less than requested size, resize up to at least sz + * and set cur_ to requested sz. + */ + void resize(size_t sz) { + if(sz > 0 && list_ == NULL) lazyInit(); + if(sz <= cur_) { + cur_ = sz; + return; + } + if(sz_ < sz) { + expandCopy(sz); + } + cur_ = sz; + } + + /** + * Make the stack empty. + */ + void clear() { + cur_ = 0; // re-use stack memory + // Don't clear heap; re-use it + } + + /** + * Get the element on the top of the stack. + */ + inline ESet& back() { + assert_gt(cur_, 0); + return list_[cur_-1]; + } + + /** + * Get the element on the top of the stack, const version. + */ + inline const ESet& back() const { + assert_gt(cur_, 0); + return list_[cur_-1]; + } + + /** + * Get the frontmost element (bottom of stack). + */ + inline ESet& front() { + assert_gt(cur_, 0); + return list_[0]; + } + + /** + * Get the element on the bottom of the stack, const version. + */ + inline const ESet& front() const { return front(); } + + /** + * Return a reference to the ith element. + */ + inline ESet& operator[](size_t i) { + assert_lt(i, cur_); + return list_[i]; + } + + /** + * Return a reference to the ith element. + */ + inline const ESet& operator[](size_t i) const { + assert_lt(i, cur_); + return list_[i]; + } + + /** + * Return a reference to the ith element. + */ + inline ESet& get(size_t i) { + return operator[](i); + } + + /** + * Return a reference to the ith element. + */ + inline const ESet& get(size_t i) const { + return operator[](i); + } + + /** + * Return a reference to the ith element. This version is not + * inlined, which guarantees we can use it from the debugger. + */ + ESet& getSlow(size_t i) { + return operator[](i); + } + + /** + * Return a reference to the ith element. This version is not + * inlined, which guarantees we can use it from the debugger. + */ + const ESet& getSlow(size_t i) const { + return operator[](i); + } + + /** + * Return a pointer to the beginning of the buffer. + */ + ESet *ptr() { return list_; } + + /** + * Return a const pointer to the beginning of the buffer. + */ + const ESet *ptr() const { return list_; } + + /** + * Set the memory category for this object and all children. + */ + void setCat(int cat) { + assert_gt(cat, 0); + cat_ = cat; + if(cat_ != 0) { + for(size_t i = 0; i < sz_; i++) { + assert(list_[i].null()); + list_[i].setCat(cat_); + } + } + } + + /** + * Return memory category. + */ + int cat() const { return cat_; } + +protected: + + /** + * Initialize memory for ELSet. + */ + void lazyInit() { + assert(list_ == NULL); + list_ = alloc(sz_); + } + + /** + * Allocate a T array of length sz_ and store in list_. Also, + * tally into the global memory tally. + */ + ESet *alloc(size_t sz) { + assert_gt(sz, 0); + ESet *tmp = new ESet[sz]; + gMemTally.add(cat_, sz); + if(cat_ != 0) { + for(size_t i = 0; i < sz; i++) { + assert(tmp[i].ptr() == NULL); + tmp[i].setCat(cat_); + } + } + return tmp; + } + + /** + * Allocate a T array of length sz_ and store in list_. Also, + * tally into the global memory tally. + */ + void free() { + if(list_ != NULL) { + delete[] list_; + gMemTally.del(cat_, sz_); + list_ = NULL; + } + } + + /** + * Expand the list_ buffer until it has at least 'thresh' elements. + * Expansions are quadratic. Copy old contents into new buffer + * using operator=. + */ + void expandCopy(size_t thresh) { + assert(list_ != NULL); + if(thresh <= sz_) return; + size_t newsz = (sz_ * 2)+1; + while(newsz < thresh) newsz *= 2; + ESet* tmp = alloc(newsz); + if(list_ != NULL) { + for(size_t i = 0; i < cur_; i++) { + assert_eq(cat_, tmp[i].cat()); + tmp[i].xfer(list_[i]); + assert_eq(cat_, tmp[i].cat()); + } + free(); + } + list_ = tmp; + sz_ = newsz; + } + + /** + * Expand the list_ buffer until it has at least 'thresh' elements. + * Expansions are quadratic. Don't copy old contents over. + */ + void expandNoCopy(size_t thresh) { + assert(list_ != NULL); + if(thresh <= sz_) return; + free(); + size_t newsz = (sz_ * 2)+1; + while(newsz < thresh) newsz *= 2; + ESet* tmp = alloc(newsz); + list_ = tmp; + sz_ = newsz; + assert_gt(sz_, 0); + } + + int cat_; // memory category, for accounting purposes + ESet *list_; // list pointer, returned from new[] + size_t sz_; // capacity + size_t cur_; // occupancy (AKA size) + +}; + +/** + * Expandable map using a heap-allocated sorted array. + * + * Note that the copy constructor and operator= routines perform + * shallow copies (w/ memcpy). + */ +template +class EMap { + +public: + + /** + * Allocate initial default of 128 elements. + */ + EMap(int cat = 0) : + cat_(cat), + list_(NULL), + sz_(128), + cur_(0) + { + list_ = alloc(sz_); + } + + /** + * Initially allocate given number of elements; should be > 0. + */ + EMap(size_t isz, int cat = 0) : + cat_(cat), + list_(NULL), + sz_(isz), + cur_(0) + { + assert_gt(isz, 0); + list_ = alloc(sz_); + } + + /** + * Copy from another ESet. + */ + EMap(const EMap& o) : list_(NULL) { + *this = o; + } + + /** + * Destructor. + */ + ~EMap() { free(); } + + /** + * Copy contents of given ESet into this ESet. + */ + EMap& operator=(const EMap& o) { + sz_ = o.sz_; + cur_ = o.cur_; + free(); + list_ = alloc(sz_); + memcpy(list_, o.list_, cur_ * sizeof(std::pair)); + return *this; + } + + /** + * Return number of elements. + */ + size_t size() const { return cur_; } + + /** + * Return the total size in bytes occupied by this map. + */ + size_t totalSizeBytes() const { + return sizeof(int) + + 2 * sizeof(size_t) + + cur_ * sizeof(std::pair); + } + + /** + * Return the total capacity in bytes occupied by this map. + */ + size_t totalCapacityBytes() const { + return sizeof(int) + + 2 * sizeof(size_t) + + sz_ * sizeof(std::pair); + } + + /** + * Return true iff there are no elements. + */ + bool empty() const { return cur_ == 0; } + + /** + * Insert a new element into the set in sorted order. + */ + bool insert(const std::pair& el) { + size_t i = 0; + if(cur_ == 0) { + insert(el, 0); + return true; + } + if(cur_ < 16) { + // Linear scan + i = scanLoBound(el.first); + } else { + // Binary search + i = bsearchLoBound(el.first); + } + if(list_[i] == el) return false; // already there + insert(el, i); + return true; // not already there + } + + /** + * Return true iff this set contains 'el'. + */ + bool contains(const K& el) const { + if(cur_ == 0) return false; + else if(cur_ == 1) return el == list_[0].first; + size_t i; + if(cur_ < 16) { + // Linear scan + i = scanLoBound(el); + } else { + // Binary search + i = bsearchLoBound(el); + } + return i != cur_ && list_[i].first == el; + } + + /** + * Return true iff this set contains 'el'. + */ + bool containsEx(const K& el, size_t& i) const { + if(cur_ == 0) return false; + else if(cur_ == 1) { + i = 0; + return el == list_[0].first; + } + if(cur_ < 16) { + // Linear scan + i = scanLoBound(el); + } else { + // Binary search + i = bsearchLoBound(el); + } + return i != cur_ && list_[i].first == el; + } + + /** + * Remove element from set. + */ + void remove(const K& el) { + size_t i; + if(cur_ < 16) { + // Linear scan + i = scanLoBound(el); + } else { + // Binary search + i = bsearchLoBound(el); + } + assert(i != cur_ && list_[i].first == el); + erase(i); + } + + /** + * If size is less than requested size, resize up to at least sz + * and set cur_ to requested sz. + */ + void resize(size_t sz) { + if(sz <= cur_) return; + if(sz_ < sz) expandCopy(sz); + } + + /** + * Get the ith key, value pair in the map. + */ + const std::pair& get(size_t i) const { + assert_lt(i, cur_); + return list_[i]; + } + + /** + * Get the ith key, value pair in the map. + */ + const std::pair& operator[](size_t i) const { + return get(i); + } + + /** + * Clear set without deallocating (or setting) anything. + */ + void clear() { cur_ = 0; } + +private: + + /** + * Allocate a T array of length sz_ and store in list_. Also, + * tally into the global memory tally. + */ + std::pair *alloc(size_t sz) { + assert_gt(sz, 0); + std::pair *tmp = new std::pair[sz]; + gMemTally.add(cat_, sz); + return tmp; + } + + /** + * Allocate a T array of length sz_ and store in list_. Also, + * tally into the global memory tally. + */ + void free() { + if(list_ != NULL) { + delete[] list_; + gMemTally.del(cat_, sz_); + list_ = NULL; + } + } + + /** + * Simple linear scan that returns the index of the first element + * of list_ that is not less than el, or cur_ if all elements are + * less than el. + */ + size_t scanLoBound(const K& el) const { + for(size_t i = 0; i < cur_; i++) { + if(!(list_[i].first < el)) { + // Shouldn't be equal + return i; + } + } + return cur_; + } + + /** + * Perform a binary search for the first element that is not less + * than 'el'. Return cur_ if all elements are less than el. + */ + size_t bsearchLoBound(const K& el) const { + size_t hi = cur_; + size_t lo = 0; + while(true) { + if(lo == hi) { +#ifndef NDEBUG + if((rand() % 10) == 0) { + assert_eq(lo, scanLoBound(el)); + } +#endif + return lo; + } + size_t mid = lo + ((hi-lo)>>1); + assert_neq(mid, hi); + if(list_[mid].first < el) { + if(lo == mid) { +#ifndef NDEBUG + if((rand() % 10) == 0) { + assert_eq(hi, scanLoBound(el)); + } +#endif + return hi; + } + lo = mid; + } else { + hi = mid; + } + } + } + + /** + * Return true if sorted, assert otherwise. + */ + bool sorted() const { + if(cur_ <= 1) return true; +#ifndef NDEBUG + for(size_t i = 0; i < cur_-1; i++) { + assert(!(list_[i] == list_[i+1])); + assert(list_[i] < list_[i+1]); + } +#endif + return true; + } + + /** + * Insert value 'el' at offset 'idx'. It's OK to insert at cur_, + * which is equivalent to appending. + */ + void insert(const std::pair& el, size_t idx) { + assert_leq(idx, cur_); + if(cur_ == sz_) { + expandCopy(sz_+1); + } + for(size_t i = cur_; i > idx; i--) { + list_[i] = list_[i-1]; + } + list_[idx] = el; + assert(idx == cur_ || list_[idx] < list_[idx+1]); + cur_++; + assert(sorted()); + } + + /** + * Erase element at offset idx. + */ + void erase(size_t idx) { + assert_lt(idx, cur_); + for(size_t i = idx; i < cur_-1; i++) { + list_[i] = list_[i+1]; + } + cur_--; + assert(sorted()); + } + + /** + * Expand the list_ buffer until it has at least 'thresh' elements. + * Expansions are quadratic. + */ + void expandCopy(size_t thresh) { + if(thresh <= sz_) return; + size_t newsz = sz_ * 2; + while(newsz < thresh) newsz *= 2; + std::pair* tmp = alloc(newsz); + for(size_t i = 0; i < cur_; i++) { + tmp[i] = list_[i]; + } + free(); + list_ = tmp; + sz_ = newsz; + } + + int cat_; // memory category, for accounting purposes + std::pair *list_; // list pointer, returned from new[] + size_t sz_; // capacity + size_t cur_; // occupancy (AKA size) +}; + +/** + * A class that allows callers to create objects that are referred to by ID. + * Objects should not be referred to via pointers or references, since they + * are stored in an expandable buffer that might be resized and thereby moved + * to another address. + */ +template +class EFactory { + +public: + + explicit EFactory(size_t isz, int cat = 0) : l_(isz, cat) { } + + explicit EFactory(int cat = 0) : l_(cat) { } + + /** + * Clear the list. + */ + void clear() { + l_.clear(); + } + + /** + * Add one additional item to the list and return its ID. + */ + size_t alloc() { + l_.expand(); + return l_.size()-1; + } + + /** + * Return the number of items in the list. + */ + size_t size() const { + return l_.size(); + } + + /** + * Return the number of items in the factory. + */ + size_t totalSizeBytes() const { + return l_.totalSizeBytes(); + } + + /** + * Return the total capacity in bytes occupied by this factory. + */ + size_t totalCapacityBytes() const { + return l_.totalCapacityBytes(); + } + + /** + * Resize the list. + */ + void resize(size_t sz) { + l_.resize(sz); + } + + /** + * Return true iff the list is empty. + */ + bool empty() const { + return size() == 0; + } + + /** + * Shrink the list such that the topmost (most recently allocated) element + * is removed. + */ + void pop() { + l_.resize(l_.size()-1); + } + + /** + * Return mutable list item at offset 'off' + */ + T& operator[](size_t off) { + return l_[off]; + } + + /** + * Return immutable list item at offset 'off' + */ + const T& operator[](size_t off) const { + return l_[off]; + } + +protected: + + EList l_; +}; + +/** + * An expandable bit vector based on EList + */ +template +class EBitList { + +public: + + explicit EBitList(size_t isz, int cat = 0) : l_(isz, cat) { reset(); } + + explicit EBitList(int cat = 0) : l_(cat) { reset(); } + + /** + * Reset to empty state. + */ + void clear() { + reset(); + } + + /** + * Reset to empty state. + */ + void reset() { + l_.clear(); + max_ = std::numeric_limits::max(); + } + + /** + * Set a bit. + */ + void set(size_t off) { + resize(off); + l_[off >> 3] |= (1 << (off & 7)); + if(off > max_ || max_ == std::numeric_limits::max()) { + max_ = off; + } + } + + /** + * Return mutable list item at offset 'off' + */ + bool test(size_t off) const { + if((size_t)(off >> 3) >= l_.size()) { + return false; + } + return (l_[off >> 3] & (1 << (off & 7))) != 0; + } + + /** + * Return size of the underlying byte array. + */ + size_t size() const { + return l_.size(); + } + + /** + * Resize to accomodate at least the given number of bits. + */ + void resize(size_t off) { + if((size_t)(off >> 3) >= l_.size()) { + size_t oldsz = l_.size(); + l_.resize((off >> 3) + 1); + for(size_t i = oldsz; i < l_.size(); i++) { + l_[i] = 0; + } + } + } + + /** + * Return max set bit. + */ + size_t max() const { + return max_; + } + +protected: + + EList l_; + size_t max_; +}; + +/** + * Implements a min-heap. + */ +template +class EHeap { +public: + + /** + * Add the element to the next available leaf position and percolate up. + */ + void insert(T o) { + size_t pos = l_.size(); + l_.push_back(o); + while(pos > 0) { + size_t parent = (pos-1) >> 1; + if(l_[pos] < l_[parent]) { + T tmp(l_[pos]); + l_[pos] = l_[parent]; + l_[parent] = tmp; + pos = parent; + } else break; + } + assert(repOk()); + } + + /** + * Return the topmost element. + */ + T top() { + assert_gt(l_.size(), 0); + return l_[0]; + } + + /** + * Remove the topmost element. + */ + T pop() { + assert_gt(l_.size(), 0); + T ret = l_[0]; + l_[0] = l_[l_.size()-1]; + l_.resize(l_.size()-1); + size_t cur = 0; + while(true) { + size_t c1 = ((cur+1) << 1) - 1; + size_t c2 = c1 + 1; + if(c2 < l_.size()) { + if(l_[c1] < l_[cur] && l_[c1] <= l_[c2]) { + T tmp(l_[c1]); + l_[c1] = l_[cur]; + l_[cur] = tmp; + cur = c1; + } else if(l_[c2] < l_[cur]) { + T tmp(l_[c2]); + l_[c2] = l_[cur]; + l_[cur] = tmp; + cur = c2; + } else { + break; + } + } else if(c1 < l_.size()) { + if(l_[c1] < l_[cur]) { + T tmp(l_[c1]); + l_[c1] = l_[cur]; + l_[cur] = tmp; + cur = c1; + } else { + break; + } + } else { + break; + } + } + assert(repOk()); + return ret; + } + + /** + * Return number of elements in the heap. + */ + size_t size() const { + return l_.size(); + } + + /** + * Return the total size in bytes occupied by this heap. + */ + size_t totalSizeBytes() const { + return l_.totalSizeBytes(); + } + + /** + * Return the total capacity in bytes occupied by this heap. + */ + size_t totalCapacityBytes() const { + return l_.totalCapacityBytes(); + } + + /** + * Return true when heap is empty. + */ + bool empty() const { + return l_.empty(); + } + + /** + * Return element at offset i. + */ + const T& operator[](size_t i) const { + return l_[i]; + } + +#ifndef NDEBUG + /** + * Check that heap property holds. + */ + bool repOk() const { + if(empty()) return true; + return repOkNode(0); + } + + /** + * Check that heap property holds at and below this node. + */ + bool repOkNode(size_t cur) const { + size_t c1 = ((cur+1) << 1) - 1; + size_t c2 = c1 + 1; + if(c1 < l_.size()) { + assert_leq(l_[cur], l_[c1]); + } + if(c2 < l_.size()) { + assert_leq(l_[cur], l_[c2]); + } + if(c2 < l_.size()) { + return repOkNode(c1) && repOkNode(c2); + } else if(c1 < l_.size()) { + return repOkNode(c1); + } + return true; + } +#endif + + /** + * Clear the heap so that it's empty. + */ + void clear() { + l_.clear(); + } + +protected: + + EList l_; +}; + +/** + * Dispenses pages of memory for all the lists in the cache, including + * the sequence-to-range map, the range list, the edits list, and the + * offsets list. All lists contend for the same pool of memory. + */ +class Pool { +public: + Pool( + uint64_t bytes, + uint32_t pagesz, + int cat = 0) : + cat_(cat), + cur_(0), + bytes_(bytes), + pagesz_(pagesz), + pages_(cat) + { + for(size_t i = 0; i < ((bytes+pagesz-1)/pagesz); i++) { + pages_.push_back(new uint8_t[pagesz]); + gMemTally.add(cat, pagesz); + assert(pages_.back() != NULL); + } + assert(repOk()); + } + + /** + * Free each page. + */ + ~Pool() { + for(size_t i = 0; i < pages_.size(); i++) { + assert(pages_[i] != NULL); + delete[] pages_[i]; + gMemTally.del(cat_, pagesz_); + } + } + + /** + * Allocate one page, or return NULL if no pages are left. + */ + uint8_t * alloc() { + assert(repOk()); + if(cur_ == pages_.size()) return NULL; + return pages_[cur_++]; + } + + bool full() { return cur_ == pages_.size(); } + + /** + * Clear the pool so that no pages are considered allocated. + */ + void clear() { + cur_ = 0; + assert(repOk()); + } + + /** + * Reset the Pool to be as though + */ + void free() { + // Currently a no-op because the only freeing method supported + // now is to clear the entire pool + } + +#ifndef NDEBUG + /** + * Check that pool is internally consistent. + */ + bool repOk() const { + assert_leq(cur_, pages_.size()); + assert(!pages_.empty()); + assert_gt(bytes_, 0); + assert_gt(pagesz_, 0); + return true; + } +#endif + +public: + int cat_; // memory category, for accounting purposes + uint32_t cur_; // next page to hand out + const uint64_t bytes_; // total bytes in the pool + const uint32_t pagesz_; // size of a single page + EList pages_; // the pages themselves +}; + +/** + * An expandable list backed by a pool. + */ +template +class PList { + +#define PLIST_PER_PAGE (S / sizeof(T)) + +public: + /** + * Initialize the current-edit pointer to 0 and set the number of + * edits per memory page. + */ + PList(int cat = 0) : + cur_(0), + curPage_(0), + pages_(cat) { } + + /** + * Add 1 object to the list. + */ + bool add(Pool& p, const T& o) { + assert(repOk()); + if(!ensure(p, 1)) return false; + if(cur_ == PLIST_PER_PAGE) { + cur_ = 0; + curPage_++; + } + assert_lt(curPage_, pages_.size()); + assert(repOk()); + assert_lt(cur_, PLIST_PER_PAGE); + pages_[curPage_][cur_++] = o; + return true; + } + + /** + * Add a list of objects to the list. + */ + bool add(Pool& p, const EList& os) { + if(!ensure(p, os.size())) return false; + for(size_t i = 0; i < os.size(); i++) { + if(cur_ == PLIST_PER_PAGE) { + cur_ = 0; + curPage_++; + } + assert_lt(curPage_, pages_.size()); + assert(repOk()); + assert_lt(cur_, PLIST_PER_PAGE); + pages_[curPage_][cur_++] = os[i]; + } + return true; + } + + /** + * Add a list of objects to the list. + */ + bool copy( + Pool& p, + const PList& src, + size_t i, + size_t len) + { + if(!ensure(p, src.size())) return false; + for(size_t i = 0; i < src.size(); i++) { + if(cur_ == PLIST_PER_PAGE) { + cur_ = 0; + curPage_++; + } + assert_lt(curPage_, pages_.size()); + assert(repOk()); + assert_lt(cur_, PLIST_PER_PAGE); + pages_[curPage_][cur_++] = src[i]; + } + return true; + } + + /** + * Add 'num' objects, all equal to 'o' to the list. + */ + bool addFill(Pool& p, size_t num, const T& o) { + if(!ensure(p, num)) return false; + for(size_t i = 0; i < num; i++) { + if(cur_ == PLIST_PER_PAGE) { + cur_ = 0; + curPage_++; + } + assert_lt(curPage_, pages_.size()); + assert(repOk()); + assert_lt(cur_, PLIST_PER_PAGE); + pages_[curPage_][cur_++] = o; + } + return true; + } + + /** + * Free all pages associated with the list. + */ + void clear() { + pages_.clear(); + cur_ = curPage_ = 0; + } + +#ifndef NDEBUG + /** + * Check that list is internally consistent. + */ + bool repOk() const { + assert(pages_.size() == 0 || curPage_ < pages_.size()); + assert_leq(cur_, PLIST_PER_PAGE); + return true; + } +#endif + + /** + * Return the number of elements in the list. + */ + size_t size() const { + return curPage_ * PLIST_PER_PAGE + cur_; + } + + /** + * Return true iff the PList has no elements. + */ + bool empty() const { + return size() == 0; + } + + /** + * Get the ith element added to the list. + */ + inline const T& getConst(size_t i) const { + assert_lt(i, size()); + size_t page = i / PLIST_PER_PAGE; + size_t elt = i % PLIST_PER_PAGE; + return pages_[page][elt]; + } + + /** + * Get the ith element added to the list. + */ + inline T& get(size_t i) { + assert_lt(i, size()); + size_t page = i / PLIST_PER_PAGE; + size_t elt = i % PLIST_PER_PAGE; + assert_lt(page, pages_.size()); + assert(page < pages_.size()-1 || elt < cur_); + return pages_[page][elt]; + } + + /** + * Get the most recently added element. + */ + inline T& back() { + size_t page = (size()-1) / PLIST_PER_PAGE; + size_t elt = (size()-1) % PLIST_PER_PAGE; + assert_lt(page, pages_.size()); + assert(page < pages_.size()-1 || elt < cur_); + return pages_[page][elt]; + } + + /** + * Get const version of the most recently added element. + */ + inline const T& back() const { + size_t page = (size()-1) / PLIST_PER_PAGE; + size_t elt = (size()-1) % PLIST_PER_PAGE; + assert_lt(page, pages_.size()); + assert(page < pages_.size()-1 || elt < cur_); + return pages_[page][elt]; + } + + /** + * Get the element most recently added to the list. + */ + T& last() { + assert(!pages_.empty()); + assert_gt(PLIST_PER_PAGE, 0); + if(cur_ == 0) { + assert_gt(pages_.size(), 1); + return pages_[pages_.size()-2][PLIST_PER_PAGE-1]; + } else { + return pages_.back()[cur_-1]; + } + } + + /** + * Return true iff 'num' additional objects will fit in the pages + * allocated to the list. If more pages are needed, they are + * added if possible. + */ + bool ensure(Pool& p, size_t num) { + assert(repOk()); + if(num == 0) return true; + // Allocation of the first page + if(pages_.size() == 0) { + if(expand(p) == NULL) { + return false; + } + assert_eq(1, pages_.size()); + } + size_t cur = cur_; + size_t curPage = curPage_; + while(cur + num > PLIST_PER_PAGE) { + assert_lt(curPage, pages_.size()); + if(curPage == pages_.size()-1 && expand(p) == NULL) { + return false; + } + num -= (PLIST_PER_PAGE - cur); + cur = 0; + curPage++; + } + return true; + } + +protected: + + /** + * Expand our page supply by 1 + */ + T* expand(Pool& p) { + T* newpage = (T*)p.alloc(); + if(newpage == NULL) { + return NULL; + } + pages_.push_back(newpage); + return pages_.back(); + } + + size_t cur_; // current elt within page + size_t curPage_; // current page + EList pages_; // the pages +}; + +/** + * A slice of an EList. + */ +template +class EListSlice { + +public: + EListSlice() : + i_(0), + len_(0), + list_() + { } + + EListSlice( + EList& list, + size_t i, + size_t len) : + i_(i), + len_(len), + list_(&list) + { } + + /** + * Initialize from a piece of another PListSlice. + */ + void init(const EListSlice& sl, size_t first, size_t last) { + assert_gt(last, first); + assert_leq(last - first, sl.len_); + i_ = sl.i_ + first; + len_ = last - first; + list_ = sl.list_; + } + + /** + * Reset state to be empty. + */ + void reset() { + i_ = len_ = 0; + list_ = NULL; + } + + /** + * Get the ith element of the slice. + */ + inline const T& get(size_t i) const { + assert(valid()); + assert_lt(i, len_); + return list_->get(i + i_); + } + + /** + * Get the ith element of the slice. + */ + inline T& get(size_t i) { + assert(valid()); + assert_lt(i, len_); + return list_->get(i + i_); + } + + /** + * Return a reference to the ith element. + */ + inline T& operator[](size_t i) { + assert(valid()); + assert_lt(i, len_); + return list_->get(i + i_); + } + + /** + * Return a reference to the ith element. + */ + inline const T& operator[](size_t i) const { + assert(valid()); + assert_lt(i, len_); + return list_->get(i + i_); + } + + /** + * Return true iff this slice is initialized. + */ + bool valid() const { + return len_ != 0; + } + + /** + * Return number of elements in the slice. + */ + size_t size() const { + return len_; + } + +#ifndef NDEBUG + /** + * Ensure that the PListSlice is internally consistent and + * consistent with the backing PList. + */ + bool repOk() const { + assert_leq(i_ + len_, list_->size()); + return true; + } +#endif + + /** + * Return true iff this slice refers to the same slice of the same + * list as the given slice. + */ + bool operator==(const EListSlice& sl) const { + return i_ == sl.i_ && len_ == sl.len_ && list_ == sl.list_; + } + + /** + * Return false iff this slice refers to the same slice of the same + * list as the given slice. + */ + bool operator!=(const EListSlice& sl) const { + return !(*this == sl); + } + + /** + * Set the length. This could leave things inconsistent (e.g. could + * include elements that fall off the end of list_). + */ + void setLength(size_t nlen) { + len_ = (uint32_t)nlen; + } + +protected: + size_t i_; + size_t len_; + EList* list_; +}; + +/** + * A slice of a PList. + */ +template +class PListSlice { + +public: + PListSlice() : + i_(0), + len_(0), + list_() + { } + + PListSlice( + PList& list, + TIndexOffU i, + TIndexOffU len) : + i_(i), + len_(len), + list_(&list) + { } + + /** + * Initialize from a piece of another PListSlice. + */ + void init(const PListSlice& sl, size_t first, size_t last) { + assert_gt(last, first); + assert_leq(last - first, sl.len_); + i_ = (uint32_t)(sl.i_ + first); + len_ = (uint32_t)(last - first); + list_ = sl.list_; + } + + /** + * Reset state to be empty. + */ + void reset() { + i_ = len_ = 0; + list_ = NULL; + } + + /** + * Get the ith element of the slice. + */ + inline const T& get(size_t i) const { + assert(valid()); + assert_lt(i, len_); + return list_->get(i+i_); + } + + /** + * Get the ith element of the slice. + */ + inline T& get(size_t i) { + assert(valid()); + assert_lt(i, len_); + return list_->get(i+i_); + } + + /** + * Return a reference to the ith element. + */ + inline T& operator[](size_t i) { + assert(valid()); + assert_lt(i, len_); + return list_->get(i+i_); + } + + /** + * Return a reference to the ith element. + */ + inline const T& operator[](size_t i) const { + assert(valid()); + assert_lt(i, len_); + return list_->get(i+i_); + } + + /** + * Return true iff this slice is initialized. + */ + bool valid() const { + return len_ != 0; + } + + /** + * Return number of elements in the slice. + */ + size_t size() const { + return len_; + } + +#ifndef NDEBUG + /** + * Ensure that the PListSlice is internally consistent and + * consistent with the backing PList. + */ + bool repOk() const { + assert_leq(i_ + len_, list_->size()); + return true; + } +#endif + + /** + * Return true iff this slice refers to the same slice of the same + * list as the given slice. + */ + bool operator==(const PListSlice& sl) const { + return i_ == sl.i_ && len_ == sl.len_ && list_ == sl.list_; + } + + /** + * Return false iff this slice refers to the same slice of the same + * list as the given slice. + */ + bool operator!=(const PListSlice& sl) const { + return !(*this == sl); + } + + /** + * Set the length. This could leave things inconsistent (e.g. could + * include elements that fall off the end of list_). + */ + void setLength(size_t nlen) { + len_ = (uint32_t)nlen; + } + +protected: + uint32_t i_; + uint32_t len_; + PList* list_; +}; + +/** + * A Red-Black tree node. Links to parent & left and right children. + * Key and Payload are of types K and P. Node total ordering is based + * on K's total ordering. K must implement <, == and > operators. + */ +template // K=key, P=payload +class RedBlackNode { + + typedef RedBlackNode TNode; + +public: + TNode *parent; // parent + TNode *left; // left child + TNode *right; // right child + bool red; // true -> red, false -> black + K key; // key, for ordering + P payload; // payload (i.e. value) + + /** + * Return the parent of this node's parent, or NULL if none exists. + */ + RedBlackNode *grandparent() { + return parent != NULL ? parent->parent : NULL; + } + + /** + * Return the sibling of this node's parent, or NULL if none exists. + */ + RedBlackNode *uncle() { + if(parent == NULL) return NULL; // no parent + if(parent->parent == NULL) return NULL; // parent has no siblings + return (parent->parent->left == parent) ? parent->parent->right : parent->parent->left; + } + + /** + * Return true iff this node is its parent's left child. + */ + bool isLeftChild() const { assert(parent != NULL); return parent->left == this; } + + /** + * Return true iff this node is its parent's right child. + */ + bool isRightChild() const { assert(parent != NULL); return parent->right == this; } + + /** + * Return true iff this node is its parent's right child. + */ + void replaceChild(RedBlackNode* ol, RedBlackNode* nw) { + if(left == ol) { + left = nw; + } else { + assert(right == ol); + right = nw; + } + } + + /** + * Return the number of non-null children this node has. + */ + int numChildren() const { + return ((left != NULL) ? 1 : 0) + ((right != NULL) ? 1 : 0); + } + +#ifndef NDEBUG + /** + * Check that node is internally consistent. + */ + bool repOk() const { + if(parent != NULL) { + assert(parent->left == this || parent->right == this); + } + return true; + } +#endif + + /** + * True -> my key is less than than the given node's key. + */ + bool operator<(const TNode& o) const { return key < o.key; } + + /** + * True -> my key is greater than the given node's key. + */ + bool operator>(const TNode& o) const { return key > o.key; } + + /** + * True -> my key equals the given node's key. + */ + bool operator==(const TNode& o) const { return key == o.key; } + + /** + * True -> my key is less than the given key. + */ + bool operator<(const K& okey) const { return key < okey; } + + /** + * True -> my key is greater than the given key. + */ + bool operator>(const K& okey) const { return key > okey; } + + /** + * True -> my key is equal to the given key. + */ + bool operator==(const K& okey) const { return key == okey; } +}; + +/** + * A Red-Black tree that associates keys (of type K) with payloads (of + * type P). Red-Black trees are self-balancing and guarantee that the + * tree as always "balanced" to a factor of 2, i.e., the longest + * root-to-leaf path is never more than twice as long as the shortest + * root-to-leaf path. + */ +template // K=key, P=payload +class RedBlack { + + typedef RedBlackNode TNode; + +public: + /** + * Initialize the current-edit pointer to 0 and set the number of + * edits per memory page. + */ + RedBlack(uint32_t pageSz, int cat = 0) : + perPage_(pageSz/sizeof(TNode)), pages_(cat) { clear(); } + + /** + * Given a DNA string, find the red-black node corresponding to it, + * if one exists. + */ + inline TNode* lookup(const K& key) const { + TNode* cur = root_; + while(cur != NULL) { + if((*cur) == key) return cur; + if((*cur) < key) { + cur = cur->right; + } else { + cur = cur->left; + } + } + return NULL; + } + + /** + * Add a new key as a node in the red-black tree. + */ + TNode* add( + Pool& p, // in: pool for memory pages + const K& key, // in: key to insert + bool* added) // if true, assert is thrown if key exists + { + // Look for key; if it's not there, get its parent + TNode* cur = root_; + assert(root_ == NULL || !root_->red); + TNode* parent = NULL; + bool leftChild = true; + while(cur != NULL) { + if((*cur) == key) { + // Found it; break out of loop with cur != NULL + break; + } + parent = cur; + if((*cur) < key) { + if((cur = cur->right) == NULL) { + // Fell off the bottom of the tree as the right + // child of parent 'lastCur' + leftChild = false; + } + } else { + if((cur = cur->left) == NULL) { + // Fell off the bottom of the tree as the left + // child of parent 'lastCur' + leftChild = true; + } + } + } + if(cur != NULL) { + // Found an entry; assert if we weren't supposed to + if(added != NULL) *added = false; + } else { + assert(root_ == NULL || !root_->red); + if(!addNode(p, cur)) { + // Exhausted memory + return NULL; + } + assert(cur != NULL); + assert(cur != root_); + assert(cur != parent); + // Initialize new node + cur->key = key; + cur->left = cur->right = NULL; + cur->red = true; // red until proven black + keys_++; + if(added != NULL) *added = true; + // Put it where we know it should go + addNode(cur, parent, leftChild); + } + return cur; // return the added or found node + } + +#ifndef NDEBUG + /** + * Check that list is internally consistent. + */ + bool repOk() const { + assert(curPage_ == 0 || curPage_ < pages_.size()); + assert_leq(cur_, perPage_); + assert(root_ == NULL || !root_->red); + return true; + } +#endif + + /** + * Clear all state. + */ + void clear() { + cur_ = curPage_ = 0; + root_ = NULL; + keys_ = 0; + intenseRepOkCnt_ = 0; + pages_.clear(); + } + + /** + * Return number of keys added. + */ + size_t size() const { + return keys_; + } + + /** + * Return true iff there are no keys in the map. + */ + bool empty() const { + return keys_ == 0; + } + + /** + * Add another node and return a pointer to it in 'node'. A new + * page is allocated if necessary. If the allocation fails, false + * is returned. + */ + bool addNode(Pool& p, TNode*& node) { + assert_leq(cur_, perPage_); + assert(repOk()); + assert(this != NULL); + // Allocation of the first page + if(pages_.size() == 0) { + if(addPage(p) == NULL) { + node = NULL; + return false; + } + assert_eq(1, pages_.size()); + } + if(cur_ == perPage_) { + assert_lt(curPage_, pages_.size()); + if(curPage_ == pages_.size()-1 && addPage(p) == NULL) { + return false; + } + cur_ = 0; + curPage_++; + } + assert_lt(cur_, perPage_); + assert_lt(curPage_, pages_.size()); + node = &pages_[curPage_][cur_]; + assert(node != NULL); + cur_++; + return true; + } + + const TNode* root() const { return root_; } + +protected: + +#ifndef NDEBUG + /** + * Check specifically that the red-black invariants are satistfied. + */ + bool redBlackRepOk(TNode* n) { + if(n == NULL) return true; + if(++intenseRepOkCnt_ < 500) return true; + intenseRepOkCnt_ = 0; + int minNodes = -1; // min # nodes along any n->leaf path + int maxNodes = -1; // max # nodes along any n->leaf path + // The number of black nodes along paths from n to leaf + // (must be same for all paths) + int blackConst = -1; + size_t nodesTot = 0; + redBlackRepOk( + n, + 1, /* 1 node so far */ + n->red ? 0 : 1, /* black nodes so far */ + blackConst, + minNodes, + maxNodes, + nodesTot); + if(n == root_) { + assert_eq(nodesTot, keys_); + } + assert_gt(minNodes, 0); + assert_gt(maxNodes, 0); + assert_leq(maxNodes, 2*minNodes); + return true; + } + + /** + * Check specifically that the red-black invariants are satistfied. + */ + bool redBlackRepOk( + TNode* n, + int nodes, + int black, + int& blackConst, + int& minNodes, + int& maxNodes, + size_t& nodesTot) const + { + assert_gt(black, 0); + nodesTot++; // account for leaf node + if(n->left == NULL) { + if(blackConst == -1) blackConst = black; + assert_eq(black, blackConst); + if(nodes+1 > maxNodes) maxNodes = nodes+1; + if(nodes+1 < minNodes || minNodes == -1) minNodes = nodes+1; + } else { + if(n->red) assert(!n->left->red); // Red can't be child of a red + redBlackRepOk( + n->left, // next node + nodes + 1, // # nodes so far on path + black + (n->left->red ? 0 : 1), // # black so far on path + blackConst, // invariant # black nodes on root->leaf path + minNodes, // min root->leaf len so far + maxNodes, // max root->leaf len so far + nodesTot); // tot nodes so far + } + if(n->right == NULL) { + if(blackConst == -1) blackConst = black; + assert_eq(black, blackConst); + if(nodes+1 > maxNodes) maxNodes = nodes+1; + if(nodes+1 < minNodes || minNodes == -1) minNodes = nodes+1; + } else { + if(n->red) assert(!n->right->red); // Red can't be child of a red + redBlackRepOk( + n->right, // next node + nodes + 1, // # nodes so far on path + black + (n->right->red ? 0 : 1), // # black so far on path + blackConst, // invariant # black nodes on root->leaf path + minNodes, // min root->leaf len so far + maxNodes, // max root->leaf len so far + nodesTot); // tot nodes so far + } + return true; + } +#endif + + /** + * Rotate to the left such that n is replaced by its right child + * w/r/t n's current parent. + */ + void leftRotate(TNode* n) { + TNode* r = n->right; + assert(n->repOk()); + assert(r->repOk()); + n->right = r->left; + if(n->right != NULL) { + n->right->parent = n; + assert(n->right->repOk()); + } + r->parent = n->parent; + n->parent = r; + r->left = n; + if(r->parent != NULL) { + r->parent->replaceChild(n, r); + } + if(root_ == n) root_ = r; + assert(!root_->red); + assert(n->repOk()); + assert(r->repOk()); + } + + /** + * Rotate to the right such that n is replaced by its left child + * w/r/t n's current parent. n moves down to the right and loses + * its left child, while its former left child moves up and gains a + * right child. + */ + void rightRotate(TNode* n) { + TNode* r = n->left; + assert(n->repOk()); + assert(r->repOk()); + n->left = r->right; + if(n->left != NULL) { + n->left->parent = n; + assert(n->left->repOk()); + } + r->parent = n->parent; + n->parent = r; + r->right = n; + if(r->parent != NULL) { + r->parent->replaceChild(n, r); + } + if(root_ == n) root_ = r; + assert(!root_->red); + assert(n->repOk()); + assert(r->repOk()); + } + + /** + * Add a node to the red-black tree, maintaining the red-black + * invariants. + */ + void addNode(TNode* n, TNode* parent, bool leftChild) { + assert(n != NULL); + if(parent == NULL) { + // Case 1: inserted at root + root_ = n; + root_->red = false; // root must be black + n->parent = NULL; + assert(redBlackRepOk(root_)); + assert(n->repOk()); + } else { + assert(!root_->red); + // Add new node to tree + if(leftChild) { + assert(parent->left == NULL); + parent->left = n; + } else { + assert(parent->right == NULL); + parent->right = n; + } + n->parent = parent; + int thru = 0; + while(true) { + thru++; + parent = n->parent; + if(parent != NULL) assert(parent->repOk()); + if(parent == NULL && n->red) { + n->red = false; + } + if(parent == NULL || !parent->red) { + assert(redBlackRepOk(root_)); + break; + } + TNode* uncle = n->uncle(); + TNode* gparent = n->grandparent(); + assert(gparent != NULL); // if parent is red, grandparent must exist + bool uncleRed = (uncle != NULL ? uncle->red : false); + if(uncleRed) { + // Parent is red, uncle is red; recursive case + assert(uncle != NULL); + parent->red = uncle->red = false; + gparent->red = true; + n = gparent; + continue; + } else { + if(parent->isLeftChild()) { + // Parent is red, uncle is black, parent is + // left child + if(!n->isLeftChild()) { + n = parent; + leftRotate(n); + } + n = n->parent; + n->red = false; + n->parent->red = true; + rightRotate(n->parent); + assert(redBlackRepOk(n)); + assert(redBlackRepOk(root_)); + } else { + // Parent is red, uncle is black, parent is + // right child. + if(!n->isRightChild()) { + n = parent; + rightRotate(n); + } + n = n->parent; + n->red = false; + n->parent->red = true; + leftRotate(n->parent); + assert(redBlackRepOk(n)); + assert(redBlackRepOk(root_)); + } + } + break; + } + } + assert(redBlackRepOk(root_)); + } + + /** + * Expand our page supply by 1 + */ + TNode* addPage(Pool& p) { + TNode *n = (TNode *)p.alloc(); + if(n != NULL) { + pages_.push_back(n); + } + return n; + } + + size_t keys_; // number of keys so far + size_t cur_; // current elt within page + size_t curPage_; // current page + const size_t perPage_; // # edits fitting in a page + TNode* root_; // root node + EList pages_; // the pages + int intenseRepOkCnt_; // counter for the computationally intensive repOk function +}; + +/** + * For assembling doubly-linked lists of Edits. + */ +template +struct DoublyLinkedList { + + DoublyLinkedList() : payload(), prev(NULL), next(NULL) { } + + /** + * Add all elements in the doubly-linked list to the provided EList. + */ + void toList(EList& l) { + // Add this and all subsequent elements + DoublyLinkedList *cur = this; + while(cur != NULL) { + l.push_back(cur->payload); + cur = cur->next; + } + // Add all previous elements + cur = prev; + while(cur != NULL) { + l.push_back(cur->payload); + cur = cur->prev; + } + } + + T payload; + DoublyLinkedList *prev; + DoublyLinkedList *next; +}; + +template +struct Pair { + T1 a; + T2 b; + + Pair() : a(), b() { } + + Pair( + const T1& a_, + const T2& b_) { a = a_; b = b_; } + + bool operator==(const Pair& o) const { + return a == o.a && b == o.b; + } + + bool operator<(const Pair& o) const { + if(a < o.a) return true; + if(a > o.a) return false; + if(b < o.b) return true; + return false; + } +}; + +template +struct Triple { + T1 a; + T2 b; + T3 c; + + Triple() : a(), b(), c() { } + + Triple( + const T1& a_, + const T2& b_, + const T3& c_) { a = a_; b = b_; c = c_; } + + bool operator==(const Triple& o) const { + return a == o.a && b == o.b && c == o.c; + } + + bool operator<(const Triple& o) const { + if(a < o.a) return true; + if(a > o.a) return false; + if(b < o.b) return true; + if(b > o.b) return false; + if(c < o.c) return true; + return false; + } +}; + +template +struct Quad { + + Quad() : a(), b(), c(), d() { } + + Quad( + const T1& a_, + const T2& b_, + const T3& c_, + const T4& d_) { a = a_; b = b_; c = c_; d = d_; } + + Quad( + const T1& a_, + const T1& b_, + const T1& c_, + const T1& d_) + { + init(a_, b_, c_, d_); + } + + void init( + const T1& a_, + const T1& b_, + const T1& c_, + const T1& d_) + { + a = a_; b = b_; c = c_; d = d_; + } + + bool operator==(const Quad& o) const { + return a == o.a && b == o.b && c == o.c && d == o.d; + } + + bool operator<(const Quad& o) const { + if(a < o.a) return true; + if(a > o.a) return false; + if(b < o.b) return true; + if(b > o.b) return false; + if(c < o.c) return true; + if(c > o.c) return false; + if(d < o.d) return true; + return false; + } + + T1 a; + T2 b; + T3 c; + T4 d; +}; + +/** + * For assembling doubly-linked lists of EList. + */ +template +struct LinkedEListNode { + + LinkedEListNode() : payload(), next(NULL) { } + + T payload; + LinkedEListNode *next; +}; + +/** + * For assembling doubly-linked lists of EList. + */ +template +struct LinkedEList { + + LinkedEList() : head(NULL) { + ASSERT_ONLY(num_allocated = 0); + ASSERT_ONLY(num_new_node = 0); + ASSERT_ONLY(num_delete_node = 0); + } + + ~LinkedEList() { + ASSERT_ONLY(size_t num_deallocated = 0); + while(head != NULL) { + LinkedEListNode* next = head->next; + delete head; + ASSERT_ONLY(num_deallocated++); + head = next; + } + // daehwan - for debugging purposes + // assert_eq(num_allocated, num_deallocated); + } + + LinkedEListNode* new_node() { + ASSERT_ONLY(num_new_node++); + LinkedEListNode *result = NULL; + if(head == NULL) { + head = new LinkedEListNode(); + head-> next = NULL; + ASSERT_ONLY(num_allocated++); + } + assert(head != NULL); + result = head; + head = head->next; + assert(result != NULL); + return result; + } + + void delete_node(LinkedEListNode *node) { + ASSERT_ONLY(num_delete_node++); + assert(node != NULL); + // check if this is already deleted. +#ifndef NDEBUG + LinkedEListNode *temp = head; + while(temp != NULL) { + assert(temp != node); + temp = temp->next; + } +#endif + node->next = head; + head = node; + } + + LinkedEListNode *head; + + ASSERT_ONLY(size_t num_allocated); + ASSERT_ONLY(size_t num_new_node); + ASSERT_ONLY(size_t num_delete_node); +}; + + +#endif /* DS_H_ */ diff --git a/edit.cpp b/edit.cpp new file mode 100644 index 0000000..1187bad --- /dev/null +++ b/edit.cpp @@ -0,0 +1,501 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include +#include "edit.h" + +using namespace std; + +/** + * Print a single edit to a std::ostream. Format is + * (pos):(ref chr)>(read chr). Where 'pos' is an offset from the 5' + * end of the read, and the ref and read chrs are expressed w/r/t the + * Watson strand. + */ +ostream& operator<< (ostream& os, const Edit& e) { + if(e.type != EDIT_TYPE_SPL) { + os << e.pos << ":" << (char)e.chr << ">" << (char)e.qchr; + } else { + os << e.pos << ":" << e.splLen; + } + + return os; +} + +/** + * Print a list of edits to a std::ostream, separated by commas. + */ +void Edit::print(ostream& os, const EList& edits, char delim) { + for(size_t i = 0; i < edits.size(); i++) { + os << edits[i]; + if(i < edits.size()-1) os << delim; + } +} + +void Edit::complement(EList &edits) +{ + for(size_t i = 0; i < edits.size(); i++) { + Edit& e = edits[i]; + + if(e.type != EDIT_TYPE_SPL) { + assert_in(e.chr, "ACMGRSVTWYHKDBN-"); + assert_in(e.qchr, "ACGTN-"); + + e.chr = asc2dnacomp[e.chr]; + e.qchr = asc2dnacomp[e.qchr]; + } + } +} + +/** + * Flip all the edits.pos fields so that they're with respect to + * the other end of the read (of length 'sz'). + */ +void Edit::invertPoss( + EList& edits, + size_t sz, + size_t ei, + size_t en, + bool sort) +{ + // Invert elements + size_t ii = 0; + for(size_t i = ei; i < ei + en/2; i++) { + Edit tmp = edits[i]; + edits[i] = edits[ei + en - ii - 1]; + edits[ei + en - ii - 1] = tmp; + ii++; + } + for(size_t i = ei; i < ei + en; i++) { + assert(edits[i].pos < sz || + (edits[i].isReadGap() && edits[i].pos == sz)); + // Adjust pos + if(edits[i].isReadGap() || edits[i].isSpliced()) { + edits[i].pos = (uint32_t)(sz - edits[i].pos); + } else { + edits[i].pos = (uint32_t)(sz - edits[i].pos - 1); + } + // Adjust pos2 + if(edits[i].isReadGap()) { + int64_t pos2diff = (int64_t)(uint64_t)edits[i].pos2 - (int64_t)((uint64_t)std::numeric_limits::max() >> 1); + int64_t pos2new = (int64_t)(uint64_t)edits[i].pos2 - 2*pos2diff; + assert(pos2diff == 0 || (uint32_t)pos2new != (std::numeric_limits::max() >> 1)); + edits[i].pos2 = (uint32_t)pos2new; + } + } + if(sort) { + // Edits might not necessarily be in same order after inversion + edits.sortPortion(ei, en); +#ifndef NDEBUG + for(size_t i = ei + 1; i < ei + en; i++) { + assert_geq(edits[i].pos, edits[i-1].pos); + } +#endif + } +} + +/** + * For now, we pretend that the alignment is in the forward orientation + * and that the Edits are listed from left- to right-hand side. + */ +void Edit::printQAlign( + std::ostream& os, + const BTDnaString& read, + const EList& edits) +{ + printQAlign(os, "", read, edits); +} + +/** + * For now, we pretend that the alignment is in the forward orientation + * and that the Edits are listed from left- to right-hand side. + */ +void Edit::printQAlignNoCheck( + std::ostream& os, + const BTDnaString& read, + const EList& edits) +{ + printQAlignNoCheck(os, "", read, edits); +} + +/** + * For now, we pretend that the alignment is in the forward orientation + * and that the Edits are listed from left- to right-hand side. + */ +void Edit::printQAlign( + std::ostream& os, + const char *prefix, + const BTDnaString& read, + const EList& edits) +{ + size_t eidx = 0; + os << prefix; + // Print read + for(size_t i = 0; i < read.length(); i++) { + bool del = false, mm = false; + while(eidx < edits.size() && edits[eidx].pos == i) { + if(edits[eidx].isReadGap()) { + os << '-'; + } else if(edits[eidx].isRefGap()) { + del = true; + assert_eq((int)edits[eidx].qchr, read.toChar(i)); + os << read.toChar(i); + } else { + mm = true; + assert(edits[eidx].isMismatch()); + assert_eq((int)edits[eidx].qchr, read.toChar(i)); + os << (char)edits[eidx].qchr; + } + eidx++; + } + if(!del && !mm) os << read.toChar(i); + } + os << endl; + os << prefix; + eidx = 0; + // Print match bars + for(size_t i = 0; i < read.length(); i++) { + bool del = false, mm = false; + while(eidx < edits.size() && edits[eidx].pos == i) { + if(edits[eidx].isReadGap()) { + os << ' '; + } else if(edits[eidx].isRefGap()) { + del = true; + os << ' '; + } else { + mm = true; + assert(edits[eidx].isMismatch()); + os << ' '; + } + eidx++; + } + if(!del && !mm) os << '|'; + } + os << endl; + os << prefix; + eidx = 0; + // Print reference + for(size_t i = 0; i < read.length(); i++) { + bool del = false, mm = false; + while(eidx < edits.size() && edits[eidx].pos == i) { + if(edits[eidx].isReadGap()) { + os << (char)edits[eidx].chr; + } else if(edits[eidx].isRefGap()) { + del = true; + os << '-'; + } else { + mm = true; + assert(edits[eidx].isMismatch()); + os << (char)edits[eidx].chr; + } + eidx++; + } + if(!del && !mm) os << read.toChar(i); + } + os << endl; +} + +/** + * For now, we pretend that the alignment is in the forward orientation + * and that the Edits are listed from left- to right-hand side. + */ +void Edit::printQAlignNoCheck( + std::ostream& os, + const char *prefix, + const BTDnaString& read, + const EList& edits) +{ + size_t eidx = 0; + os << prefix; + // Print read + for(size_t i = 0; i < read.length(); i++) { + bool del = false, mm = false; + while(eidx < edits.size() && edits[eidx].pos == i) { + if(edits[eidx].isReadGap()) { + os << '-'; + } else if(edits[eidx].isRefGap()) { + del = true; + os << read.toChar(i); + } else { + mm = true; + os << (char)edits[eidx].qchr; + } + eidx++; + } + if(!del && !mm) os << read.toChar(i); + } + os << endl; + os << prefix; + eidx = 0; + // Print match bars + for(size_t i = 0; i < read.length(); i++) { + bool del = false, mm = false; + while(eidx < edits.size() && edits[eidx].pos == i) { + if(edits[eidx].isReadGap()) { + os << ' '; + } else if(edits[eidx].isRefGap()) { + del = true; + os << ' '; + } else { + mm = true; + os << ' '; + } + eidx++; + } + if(!del && !mm) os << '|'; + } + os << endl; + os << prefix; + eidx = 0; + // Print reference + for(size_t i = 0; i < read.length(); i++) { + bool del = false, mm = false; + while(eidx < edits.size() && edits[eidx].pos == i) { + if(edits[eidx].isReadGap()) { + os << (char)edits[eidx].chr; + } else if(edits[eidx].isRefGap()) { + del = true; + os << '-'; + } else { + mm = true; + os << (char)edits[eidx].chr; + } + eidx++; + } + if(!del && !mm) os << read.toChar(i); + } + os << endl; +} + +/** + * Sort the edits in the provided list. + */ +void Edit::sort(EList& edits) { + edits.sort(); // simple! +} + +/** + * Given a read string and some edits, generate and append the corresponding + * reference string to 'ref'. If read aligned to the Watson strand, the caller + * should pass the original read sequence and original edits. If a read + * aligned to the Crick strand, the caller should pass the reverse complement + * of the read and a version of the edits list that has had Edit:invertPoss + * called on it to cause edits to be listed in 3'-to-5' order. + */ +void Edit::toRef( + const BTDnaString& read, + const EList& edits, + BTDnaString& ref, + bool fw, + size_t trim5, + size_t trim3) +{ + // edits should be sorted + size_t eidx = 0; + // Print reference + const size_t rdlen = read.length(); + size_t trimBeg = fw ? trim5 : trim3; + size_t trimEnd = fw ? trim3 : trim5; + assert(Edit::repOk(edits, read, fw, trim5, trim3)); + if(!fw) { + invertPoss(const_cast&>(edits), read.length()-trimBeg-trimEnd, false); + } + for(size_t i = 0; i < rdlen; i++) { + ASSERT_ONLY(int c = read[i]); + assert_range(0, 4, c); + bool del = false, mm = false; + bool append = i >= trimBeg && rdlen - i - 1 >= trimEnd; + bool appendIns = i >= trimBeg && rdlen - i >= trimEnd; + while(eidx < edits.size() && edits[eidx].pos+trimBeg == i) { + if(edits[eidx].isReadGap()) { + // Inserted characters come before the position's + // character + if(appendIns) { + ref.appendChar((char)edits[eidx].chr); + } + } else if(edits[eidx].isRefGap()) { + assert_eq("ACGTN"[c], edits[eidx].qchr); + del = true; + } else if(edits[eidx].isMismatch()){ + mm = true; + assert(edits[eidx].qchr != edits[eidx].chr || edits[eidx].qchr == 'N'); + assert_eq("ACGTN"[c], edits[eidx].qchr); + if(append) { + ref.appendChar((char)edits[eidx].chr); + } + } + eidx++; + } + if(!del && !mm) { + if(append) { + ref.append(read[i]); + } + } + } + if(trimEnd == 0) { + while(eidx < edits.size()) { + assert_gt(rdlen, edits[eidx].pos); + if(edits[eidx].isReadGap()) { + ref.appendChar((char)edits[eidx].chr); + } + eidx++; + } + } + if(!fw) { + invertPoss(const_cast&>(edits), read.length()-trimBeg-trimEnd, false); + } +} + +#ifndef NDEBUG +/** + * Check that the edit is internally consistent. + */ +bool Edit::repOk() const { + assert(inited()); + // Ref and read characters cannot be the same unless they're both Ns + if(type != EDIT_TYPE_SPL) { + assert(qchr != chr || qchr == 'N'); + // Type must match characters + assert(isRefGap() || chr != '-'); + assert(isReadGap() || qchr != '-'); + assert(!isMismatch() || (qchr != '-' && chr != '-')); + } else { + assert_gt(splLen, 0); + } + return true; +} + +/** + * Given a list of edits and a DNA string representing the query + * sequence, check that the edits are consistent with respect to the + * query. + */ +bool Edit::repOk( + const EList& edits, + const BTDnaString& s, + bool fw, + size_t trimBeg, + size_t trimEnd) +{ + if(!fw) { + invertPoss(const_cast&>(edits), s.length()-trimBeg-trimEnd, false); + swap(trimBeg, trimEnd); + } + for(size_t i = 0; i < edits.size(); i++) { + const Edit& e = edits[i]; + size_t pos = e.pos; + if(i > 0) { + assert_geq(pos, edits[i-1].pos); + } + bool del = false, mm = false; + while(i < edits.size() && edits[i].pos == pos) { + const Edit& ee = edits[i]; + assert_lt(ee.pos, s.length()); + if(ee.type != EDIT_TYPE_SPL) { + if(ee.qchr != '-') { + assert(ee.isRefGap() || ee.isMismatch()); + //assert_eq((int)ee.qchr, s.toChar(ee.pos+trimBeg)); + } + } + if(ee.isMismatch()) { + assert(!mm); + mm = true; + assert(!del); + } else if(ee.isReadGap()) { + assert(!mm); + } else if(ee.isRefGap()) { + assert(!mm); + assert(!del); + del = true; + } else if(ee.isSpliced()) { + + } + i++; + } + } + if(!fw) { + invertPoss(const_cast&>(edits), s.length()-trimBeg-trimEnd, false); + } + return true; +} +#endif + +/** + * Merge second argument into the first. Assume both are sorted to + * begin with. + */ +void Edit::merge(EList& dst, const EList& src) { + size_t di = 0, si = 0; + while(di < dst.size()) { + if(src[si].pos < dst[di].pos) { + dst.insert(src[si], di); + si++; di++; + } else if(src[si].pos == dst[di].pos) { + // There can be two inserts at a given position, but we + // can't merge them because there's no way to know their + // order + assert(src[si].isReadGap() != dst[di].isReadGap()); + if(src[si].isReadGap()) { + dst.insert(src[si], di); + si++; di++; + } else if(dst[di].isReadGap()) { + di++; + } + } + } + while(si < src.size()) dst.push_back(src[si++]); +} + +/** + * Clip off some of the low-numbered positions. + */ +void Edit::clipLo(EList& ed, size_t len, size_t amt) { + size_t nrm = 0; + for(size_t i = 0; i < ed.size(); i++) { + assert_lt(ed[i].pos, len); + if(ed[i].pos < amt) { + nrm++; + } else { + // Shift everyone else up + ed[i].pos -= (uint32_t)amt; + } + } + ed.erase(0, nrm); +} + +/** + * Clip off some of the high-numbered positions. + */ +void Edit::clipHi(EList& ed, size_t len, size_t amt) { + assert_leq(amt, len); + size_t max = len - amt; + size_t nrm = 0; + for(size_t i = 0; i < ed.size(); i++) { + size_t ii = ed.size() - i - 1; + assert_lt(ed[ii].pos, len); + if(ed[ii].pos > max) { + nrm++; + } else if(ed[ii].pos == max && !ed[ii].isReadGap()) { + nrm++; + } else { + break; + } + } + ed.resize(ed.size() - nrm); +} diff --git a/edit.h b/edit.h new file mode 100644 index 0000000..c5ae2e8 --- /dev/null +++ b/edit.h @@ -0,0 +1,401 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef EDIT_H_ +#define EDIT_H_ + +#include +#include +#include +#include "assert_helpers.h" +#include "filebuf.h" +#include "sstring.h" +#include "ds.h" +#include "splice_site.h" + +/** + * 3 types of edits; mismatch (substitution), insertion in the + * reference, deletion in the reference. + */ +enum { + EDIT_TYPE_READ_GAP = 1, + EDIT_TYPE_REF_GAP, + EDIT_TYPE_MM, + EDIT_TYPE_SNP, + EDIT_TYPE_SPL, // splicing of pre-messenger RNAs into messenger RNAs +}; + +/** + * Encapsulates an edit between the read sequence and the reference sequence. + * We obey a few conventions when populating its fields. The fields are: + * + * uint8_t chr; // reference character involved (for subst and ins) + * uint8_t qchr; // read character involved (for subst and del) + * uint8_t type; // 1 -> mm, 2 -> SNP, 3 -> ins, 4 -> del + * uint32_t pos; // position w/r/t search root + * + * One convention is that pos is always an offset w/r/t the 5' end of the read. + * + * Another is that chr and qchr are expressed in terms of the nucleotides on + * the forward version of the read. So if we're aligning the reverse + * complement of the read, and an A in the reverse complement mismatches a C in + * the reference, chr should be G and qchr should be T. + */ +struct Edit { + + Edit() { reset(); } + + Edit( + uint32_t po, + int ch, + int qc, + int ty, + bool chrs = true, + uint32_t snp = std::numeric_limits::max()) + { + init(po, ch, qc, ty, chrs, snp); + } + + Edit( + uint32_t po, + int ch, + int qc, + int ty, + uint32_t sl, + uint8_t sdir, + bool knowns, + bool chrs = true) + { + init(po, ch, qc, ty, sl, sdir, knowns, chrs); + } + + /** + * Reset Edit to uninitialized state. + */ + void reset() { + pos = pos2 = std::numeric_limits::max(); + chr = qchr = type = 0; + splLen = 0; + splDir = SPL_UNKNOWN; + knownSpl = false; + snpID = std::numeric_limits::max(); + } + + /** + * Return true iff the Edit is initialized. + */ + bool inited() const { + return pos != std::numeric_limits::max(); + } + + /** + * Initialize a new Edit. + */ + void init( + uint32_t po, + int ch, + int qc, + int ty, + bool chrs = true, + uint32_t snp = std::numeric_limits::max()) + { + chr = ch; + qchr = qc; + type = ty; + splLen = 0; + splDir = SPL_UNKNOWN; + pos = po; + if(qc == '-') { + // Read gap + pos2 = std::numeric_limits::max() >> 1; + } else { + pos2 = std::numeric_limits::max(); + } + snpID = snp; + if(!chrs) { + assert_range(0, 4, (int)chr); + assert_range(0, 4, (int)qchr); + chr = "ACGTN"[chr]; + qchr = "ACGTN"[qchr]; + } +#ifndef NDEBUG + if(type != EDIT_TYPE_SPL) { + assert_in(chr, "ACMGRSVTWYHKDBN-"); + assert_in(qchr, "ACGTN-"); + assert(chr != qchr || chr == 'N'); + } +#endif + assert(inited()); + } + + /** + * Initialize a new Edit. + */ + void init( + uint32_t po, + int ch, + int qc, + int ty, + uint32_t sl, + uint32_t sdir, + bool knowns, + bool chrs = true) + { + assert_eq(ty, EDIT_TYPE_SPL); + init(po, ch, qc, ty, chrs); + splLen = sl; + splDir = sdir; + knownSpl = knowns; + } + + /** + * Return true iff one part of the edit or the other has an 'N'. + */ + bool hasN() const { + assert(inited()); + return chr == 'N' || qchr == 'N'; + } + + /** + * Edit less-than overload. + */ + int operator< (const Edit &rhs) const { + assert(inited()); + if(pos < rhs.pos) return 1; + if(pos > rhs.pos) return 0; + if(pos2 < rhs.pos2) return 1; + if(pos2 > rhs.pos2) return 0; + if(type < rhs.type) return 1; + if(type > rhs.type) return 0; + if(chr < rhs.chr) return 1; + if(chr > rhs.chr) return 0; + return (qchr < rhs.qchr)? 1 : 0; + } + + /** + * Edit equals overload. + */ + int operator== (const Edit &rhs) const { + assert(inited()); + if(type != rhs.type) + return false; + if(pos != rhs.pos) + return false; + if(type != EDIT_TYPE_SPL) { + return chr == rhs.chr && qchr == rhs.qchr; + } else { + return pos2 == rhs.pos2 && + splLen == rhs.splLen && + splDir == rhs.splDir /* && + knownSpl == rhs.knownSpl */; + } + } + + /** + * Return true iff this Edit is an initialized insertion. + */ + bool isReadGap() const { + assert(inited()); + return type == EDIT_TYPE_READ_GAP; + } + + /** + * Return true iff this Edit is an initialized deletion. + */ + bool isRefGap() const { + assert(inited()); + return type == EDIT_TYPE_REF_GAP; + } + + /** + * Return true if this Edit is either an initialized deletion or an + * initialized insertion. + */ + bool isGap() const { + assert(inited()); + return (type == EDIT_TYPE_REF_GAP || type == EDIT_TYPE_READ_GAP); + } + + bool isSpliced() const { + assert(inited()); + return type == EDIT_TYPE_SPL; + } + + /** + * Return the number of gaps in the given edit list. + */ + static size_t numGaps(const EList& es) { + size_t gaps = 0; + for(size_t i = 0; i < es.size(); i++) { + if(es[i].isGap()) gaps++; + } + return gaps; + } + + /** + * Return true iff this Edit is an initialized mismatch. + */ + bool isMismatch() const { + assert(inited()); + return type == EDIT_TYPE_MM; + } + + /** + * Sort the edits in the provided list. + */ + static void sort(EList& edits); + + /** + * Flip all the edits.pos fields so that they're with respect to + * the other end of the read (of length 'sz'). + */ + static void invertPoss( + EList& edits, + size_t sz, + size_t ei, + size_t en, + bool sort = false); + + /** + * Flip all the edits.pos fields so that they're with respect to + * the other end of the read (of length 'sz'). + */ + static void invertPoss(EList& edits, size_t sz, bool sort = false) { + invertPoss(edits, sz, 0, edits.size(), sort); + } + + static void complement(EList& edits); + + /** + * Clip off some of the low-numbered positions. + */ + static void clipLo(EList& edits, size_t len, size_t amt); + + /** + * Clip off some of the high-numbered positions. + */ + static void clipHi(EList& edits, size_t len, size_t amt); + + /** + * Given a read string and some edits, generate and append the + * corresponding reference string to 'ref'. + */ + static void toRef( + const BTDnaString& read, + const EList& edits, + BTDnaString& ref, + bool fw = true, + size_t trim5 = 0, + size_t trim3 = 0); + + /** + * Given a string and its edits with respect to some other string, + * print the alignment between the strings with the strings stacked + * vertically, with vertical bars denoting matches. + */ + static void printQAlign( + std::ostream& os, + const BTDnaString& read, + const EList& edits); + + /** + * Given a string and its edits with respect to some other string, + * print the alignment between the strings with the strings stacked + * vertically, with vertical bars denoting matches. Add 'prefix' + * before each line of output. + */ + static void printQAlign( + std::ostream& os, + const char *prefix, + const BTDnaString& read, + const EList& edits); + + /** + * Given a string and its edits with respect to some other string, + * print the alignment between the strings with the strings stacked + * vertically, with vertical bars denoting matches. + */ + static void printQAlignNoCheck( + std::ostream& os, + const BTDnaString& read, + const EList& edits); + + /** + * Given a string and its edits with respect to some other string, + * print the alignment between the strings with the strings stacked + * vertically, with vertical bars denoting matches. Add 'prefix' + * before each line of output. + */ + static void printQAlignNoCheck( + std::ostream& os, + const char *prefix, + const BTDnaString& read, + const EList& edits); + +#ifndef NDEBUG + bool repOk() const; + + /** + * Given a list of edits and a DNA string representing the query + * sequence, check that the edits are consistent with respect to the + * query. + */ + static bool repOk( + const EList& edits, + const BTDnaString& s, + bool fw = true, + size_t trim5 = 0, + size_t trim3 = 0); +#endif + + uint8_t chr; // reference character involved (for subst and ins) + uint8_t qchr; // read character involved (for subst and del) + uint8_t type; // 1 -> mm, 2 -> SNP, 3 -> ins, 4 -> del + uint32_t pos; // position w/r/t search root + uint32_t pos2; // Second int to take into account when sorting. Useful for + // sorting read gap edits that are all part of the same long + // gap. + + uint32_t splLen; // skip over the genome due to an intron + uint8_t splDir; + bool knownSpl; + + int64_t donor_seq; + int64_t acceptor_seq; + + uint32_t snpID; // snp ID + + friend std::ostream& operator<< (std::ostream& os, const Edit& e); + + /** + * Print a comma-separated list of Edits to given output stream. + */ + static void print( + std::ostream& os, + const EList& edits, + char delim = '\t'); + + /** + * Merge second argument into the first. Assume both are sorted to + * begin with. + */ + static void merge(EList& dst, const EList& src); +}; + +#endif /* EDIT_H_ */ diff --git a/endian_swap.h b/endian_swap.h new file mode 100644 index 0000000..762f274 --- /dev/null +++ b/endian_swap.h @@ -0,0 +1,160 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef ENDIAN_SWAP_H +#define ENDIAN_SWAP_H + +#include +#include + +/** + * Return true iff the machine running this program is big-endian. + */ +static inline bool currentlyBigEndian() { + static uint8_t endianCheck[] = {1, 0, 0, 0}; + return *((uint32_t*)endianCheck) != 1; +} + +/** + * Return copy of uint32_t argument with byte order reversed. + */ +static inline uint16_t endianSwapU16(uint16_t u) { + uint16_t tmp = 0; + tmp |= ((u >> 8) & (0xff << 0)); + tmp |= ((u << 8) & (0xff << 8)); + return tmp; +} + +/** + * Return copy of uint32_t argument with byte order reversed. + */ +static inline uint32_t endianSwapU32(uint32_t u) { + uint32_t tmp = 0; + tmp |= ((u >> 24) & (0xff << 0)); + tmp |= ((u >> 8) & (0xff << 8)); + tmp |= ((u << 8) & (0xff << 16)); + tmp |= ((u << 24) & (0xff << 24)); + return tmp; +} + +/** + * Return copy of uint64_t argument with byte order reversed. + */ +static inline uint64_t endianSwapU64(uint64_t u) { + uint64_t tmp = 0; + tmp |= ((u >> 56) & (0xffull << 0)); + tmp |= ((u >> 40) & (0xffull << 8)); + tmp |= ((u >> 24) & (0xffull << 16)); + tmp |= ((u >> 8) & (0xffull << 24)); + tmp |= ((u << 8) & (0xffull << 32)); + tmp |= ((u << 24) & (0xffull << 40)); + tmp |= ((u << 40) & (0xffull << 48)); + tmp |= ((u << 56) & (0xffull << 56)); + return tmp; +} + +/** + * Return copy of uint_t argument with byte order reversed. + */ +template +static inline index_t endianSwapIndex(index_t u) { + if(sizeof(index_t) == 8) { + return (index_t)endianSwapU64(u); + } else if(sizeof(index_t) == 4) { + return endianSwapU32((uint32_t)u); + } else { + return endianSwapU16(u); + } +} + +/** + * Return copy of int16_t argument with byte order reversed. + */ +static inline int16_t endianSwapI16(int16_t i) { + int16_t tmp = 0; + tmp |= ((i >> 8) & (0xff << 0)); + tmp |= ((i << 8) & (0xff << 8)); + return tmp; +} + +/** + * Convert uint16_t argument to the specified endianness. It's assumed + * that u currently has the endianness of the current machine. + */ +static inline uint16_t endianizeU16(uint16_t u, bool toBig) { + if(toBig == currentlyBigEndian()) { + return u; + } + return endianSwapU16(u); +} + +/** + * Convert int16_t argument to the specified endianness. It's assumed + * that u currently has the endianness of the current machine. + */ +static inline int16_t endianizeI16(int16_t i, bool toBig) { + if(toBig == currentlyBigEndian()) { + return i; + } + return endianSwapI16(i); +} + +/** + * Return copy of int32_t argument with byte order reversed. + */ +static inline int32_t endianSwapI32(int32_t i) { + int32_t tmp = 0; + tmp |= ((i >> 24) & (0xff << 0)); + tmp |= ((i >> 8) & (0xff << 8)); + tmp |= ((i << 8) & (0xff << 16)); + tmp |= ((i << 24) & (0xff << 24)); + return tmp; +} + +/** + * Convert uint32_t argument to the specified endianness. It's assumed + * that u currently has the endianness of the current machine. + */ +static inline uint32_t endianizeU32(uint32_t u, bool toBig) { + if(toBig == currentlyBigEndian()) { + return u; + } + return endianSwapU32(u); +} + +/** + * Convert int32_t argument to the specified endianness. It's assumed + * that u currently has the endianness of the current machine. + */ +static inline int32_t endianizeI32(int32_t i, bool toBig) { + if(toBig == currentlyBigEndian()) { + return i; + } + return endianSwapI32(i); +} + +template +index_t endianizeIndex(index_t u, bool toBig) { + if(toBig == currentlyBigEndian()) { + return u; + } + return endianSwapIndex(u); +} + +#endif diff --git a/evaluation/build_indexes.py b/evaluation/build_indexes.py new file mode 100644 index 0000000..95bd109 --- /dev/null +++ b/evaluation/build_indexes.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python + +import sys, os +use_message = ''' +''' + +# GRCh38 release 84 +def build_indexes(): + # Build indexes + if not os.path.exists("indexes"): + os.mkdir("indexes") + os.chdir("indexes") + aligners = ["HISAT2", "HISAT", "Bowtie", "STAR", "GSNAP", "BWA", "minimap2"] + genomes = ["22_20-21M", "22", "genome"] + for genome in genomes: + for aligner in aligners: + if genome == "genome": + dir = aligner + else: + dir = aligner + "_" + genome + if os.path.exists(dir): + continue + os.mkdir(dir) + os.chdir(dir) + if aligner == "HISAT2": + cmd = "../../aligners/bin/hisat2-build ../../data/%s.fa %s" % (genome, genome) + cmd = cmd + "; ../../aligners/bin/hisat2-build -p 4 ../../data/%s.fa --snp ../../data/%s.snp --haplotype ../../data/%s.haplotype %s_snp" % (genome, genome, genome, genome) + cmd = cmd + "; ../../aligners/bin/hisat2-build -p 4 ../../data/%s.fa --ss ../../data/%s.ss --exon ../../data/%s.exon %s_tran" % (genome, genome, genome, genome) + cmd = cmd + "; ../../aligners/bin/hisat2-build -p 4 ../../data/%s.fa --snp ../../data/%s.snp --haplotype ../../data/%s.haplotype --ss ../../data/%s.ss --exon ../../data/%s.exon %s_snp_tran" % (genome, genome, genome, genome, genome, genome) + elif aligner == "HISAT": + cmd = "../../aligners/bin/hisat-build ../../data/%s.fa %s" % (genome, genome) + cmd = cmd + "; ../../aligners/bin/tophat -G ../../data/%s.gtf --transcriptome-index=gtf %s; rm -rf tophat_out" % (genome, genome) + elif aligner == "Bowtie": + cmd = "../../aligners/bin/bowtie-build ../../data/%s.fa %s" % (genome, genome) + elif aligner == "Bowtie2": + cmd = "../../aligners/bin/bowtie2-build --threads 6 ../../data/%s.fa %s" % (genome, genome) + elif aligner == "STAR": + cmd = "../../aligners/bin/STAR --runMode genomeGenerate --genomeDir . --genomeFastaFiles ../../data/%s.fa" % (genome) + cmd = cmd + "; mkdir gtf; ../../aligners/bin/STAR --runMode genomeGenerate --genomeDir gtf --genomeFastaFiles ../../data/%s.fa --sjdbGTFfile ../../data/%s.gtf --sjdbOverhang 99 --runThreadN 4" % (genome, genome) + elif aligner == "GSNAP": + cmd = "../../aligners/bin/gmap_build -B ../../aligners/bin -D . -d %s ../../data/%s.fa" % (genome, genome) + elif aligner == "BWA": + cmd = "../../aligners/bin/bwa index -p %s.fa ../../data/%s.fa" % (genome, genome) + elif aligner == "minimap2": + cmd = "../../aligners/bin/minimap2 -x sr -d %s.mmi ../../data/%s.fa" % (genome, genome) + elif aligner == "VG": + assert False + else: + assert False + print >> sys.stderr, cmd + os.system(cmd) + os.chdir("..") + + os.chdir("..") + + +if __name__ == "__main__": + build_indexes() diff --git a/evaluation/generate_reads.py b/evaluation/generate_reads.py new file mode 100644 index 0000000..eed84d6 --- /dev/null +++ b/evaluation/generate_reads.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python + +import sys, os, random +from argparse import ArgumentParser, FileType +from multiprocessing import Process + +def shuffle_reads(read_fname, random_list): + reads = [] + read_file = open(read_fname) + for line in read_file: + if line[0] == ">": + reads.append([]) + reads[-1].append(line[:-1]) + read_file.close() + + read_fname_out = read_fname + ".shuffle" + read_file_out = open(read_fname_out, "w") + assert len(random_list) == len(reads) + for i in random_list: + read = reads[random_list[i]] + print >> read_file_out, "\n".join(read) + read_file_out.close() + + +def shuffle_pairs(read1_fname, read2_fname): + read1_file = open(read1_fname) + num_reads = 0 + for line in read1_file: + if line[0] == ">": + num_reads += 1 + read1_file.close() + + random_list = [i for i in range(num_reads)] + random.shuffle(random_list) + + shuffle_reads(read1_fname, random_list) + shuffle_reads(read2_fname, random_list) + + +def simulate_reads(): + if not os.path.exists("reads"): + os.mkdir("reads") + os.chdir("reads") + if not os.path.exists("simulation"): + os.mkdir("simulation") + os.chdir("simulation") + + _rna, _mismatch, _snp, _constant = True, True, True, True + _dna = not _rna + datasets = [ + ["22", 1000000, _dna, not _snp, not _mismatch, _constant], + ["22", 1000000, _dna, not _snp, _mismatch, _constant], + ["22", 1000000, _dna, _snp, not _mismatch, _constant], + ["22", 1000000, _dna, _snp, _mismatch, _constant], + ["22", 1000000, _rna, not _snp, not _mismatch, not _constant], + ["22", 1000000, _rna, not _snp, not _mismatch, _constant], + ["22", 1000000, _rna, not _snp, _mismatch, not _constant], + ["22", 1000000, _rna, not _snp, _mismatch, _constant], + ["22", 1000000, _rna, _snp, not _mismatch, not _constant], + ["22", 1000000, _rna, _snp, not _mismatch, _constant], + ["22", 1000000, _rna, _snp, _mismatch, not _constant], + ["22", 1000000, _rna, _snp, _mismatch, _constant], + # ["22_20-21M", 1000000, _rna, not _snp, not _mismatch, not _constant], + # ["22_20-21M", 1000000, _rna, _snp, not _mismatch, _constant], + ["genome", 10000000, _dna, not _snp, not _mismatch, _constant], + ["genome", 10000000, _dna, _snp, not _mismatch, _constant], + ["genome", 10000000, _dna, _snp, _mismatch, _constant], + ["genome", 10000000, _rna, not _snp, not _mismatch, not _constant], + ["genome", 10000000, _rna, _snp, not _mismatch, not _constant], + ["genome", 10000000, _rna, _snp, _mismatch, not _constant], + ] + + data_dir_base = "../../../data" + + def generate_reads(cmd): + print >> sys.stderr, cmd + os.system(cmd) + + random.seed(0) + print >> sys.stderr, "shuffle reads sim_1.fa and sim_2.fa" + shuffle_pairs("sim_1.fa", "sim_2.fa") + shuffle_reads_cmd = " mv sim_1.fa.shuffle sim_1.fa" + shuffle_reads_cmd += "; mv sim_2.fa.shuffle sim_2.fa" + os.system(shuffle_reads_cmd) + + + pid_list = [] + + for genome, numreads, rna, snp, mismatch, constant in datasets: + if rna: + molecule = "RNA" + else: + molecule = "DNA" + if numreads >= 1000000: + dirname = "%dM_%s" % (numreads / 1000000, molecule) + else: + dirname = "%dk_%s" % (numreads / 1000, molecule) + + if mismatch: + dirname += "_mismatch" + if snp: + dirname += "_snp" + if rna and constant: + dirname += "_constant" + dirname += "_reads" + dirname += ("_" + genome) + if os.path.exists(dirname): + continue + os.mkdir(dirname) + os.chdir(dirname) + genome_fname = data_dir_base + "/%s.fa" % (genome) + + if rna: + gtf_fname = data_dir_base + "/%s.gtf" % (genome) + else: + gtf_fname = "/dev/null" + + if snp: + snp_fname = data_dir_base + "/%s.snp" % (genome) + else: + snp_fname = "/dev/null" + + cmd_add = "" + if not rna: + cmd_add += "--dna " + if mismatch: + cmd_add += "--error-rate 0.2 " + if rna and constant: + cmd_add += "--expr-profile constant " + cmd = "../../../aligners/bin/hisat2_simulate_reads.py --sanity-check %s --num-fragment %d %s %s %s sim" % \ + (cmd_add, numreads, genome_fname, gtf_fname, snp_fname) + + """ + print >> sys.stderr, cmd + os.system(cmd) + + random.seed(0) + print >> sys.stderr, "shuffle reads sim_1.fa and sim_2.fa" + shuffle_pairs("sim_1.fa", "sim_2.fa") + shuffle_reads_cmd = " mv sim_1.fa.shuffle sim_1.fa" + shuffle_reads_cmd += "; mv sim_2.fa.shuffle sim_2.fa" + os.system(shuffle_reads_cmd) + """ + #generate_reads(cmd) + p = Process(target=generate_reads, args=(cmd,)) + p.start() + pid_list.append(p) + + os.chdir("..") + + os.chdir("..") + + # wait + for p in pid_list: + p.join() + + +if __name__ == "__main__": + parser = ArgumentParser( + description='Generate reads using simulate_reads.py in HISAT2') + args = parser.parse_args() + simulate_reads() diff --git a/evaluation/get_data.py b/evaluation/get_data.py new file mode 100644 index 0000000..3e49e95 --- /dev/null +++ b/evaluation/get_data.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python + +import sys, os +from argparse import ArgumentParser, FileType + +def get_data(small = False): + data_root = "http://www.ccb.jhu.edu/software/hisat2/downloads/evaluation" + + # Download the reference human genome, SNPs, and gene annotations + if not os.path.exists("data"): + os.mkdir("data") + os.chdir("data") + genome_files = ["genome.fa", "genome.fa.fai", "genome.gtf", "snpCommon.txt", "genome.snp", "genome.ss", "genome.exon"] + small_genome_files = ["22.fa", "22.fa.fai", "22.gtf", "22.snp", "22.ss", "22.exon", \ + "22_20-21M.fa", "22_20-21M.fa.fai", "22_20-21M.gtf", "22_20-21M.snp", "22_20-21M.ss", "22_20-21M.exon"] + files = [] + if not small: + files += genome_files + files += small_genome_files + for file in files: + if os.path.exists(file): + continue + wget_cmd = "wget %s/data/%s" % (data_root, file) + print >> sys.stderr, wget_cmd + os.system(wget_cmd) + os.chdir("..") + + # Download indexes + if not os.path.exists("indexes"): + os.mkdir("indexes") + os.chdir("indexes") + aligners = ["HISAT2", "HISAT", "Bowtie", "STAR", "GSNAP"] + for genome in ["genome", "22", "22_20-21M"]: + if small and genome == "genome": + continue + for aligner in aligners: + if genome == "genome": + aligner_dir = aligner + else: + aligner_dir = aligner + "_" + genome + if os.path.exists(aligner_dir): + continue + cmd = "wget %s/indexes/%s.tar.gz; tar xvzf %s.tar.gz; rm %s.tar.gz" % \ + (data_root, aligner_dir, aligner_dir, aligner_dir) + print >> sys.stderr, cmd + os.system(cmd) + os.chdir("..") + + # Download simulated and real reads + if not os.path.exists("reads"): + os.mkdir("reads") + os.chdir("reads") + for type in ["simulation", "real"]: + if small and type == "real": + continue + if not os.path.exists(type): + os.mkdir(type) + os.chdir(type) + if type == "simulation": + files = ["1M_DNA_reads_22", + "1M_DNA_mismatch_reads_22", + "1M_DNA_snp_reads_22", + "1M_DNA_mismatch_snp_reads_22", + "1M_RNA_reads_22", + "1M_RNA_constant_reads_22", + "1M_RNA_mismatch_reads_22", + "1M_RNA_snp_reads_22", + "1M_RNA_mismatch_snp_reads_22", + "1M_RNA_reads_22_20-21M", + "20M_DNA_reads_genome", + "20M_DNA_snp_reads_genome", + "20M_RNA_reads_genome", + "20M_RNA_snp_reads_genome"] + else: + files = ["108M_RNA_wgEncodeCshlLongRnaSeq", + "62M_RNA_SRR353653", + "80M_DNA_SRR345300", + "5M_DNA_NA12878D"] + for file in files: + if small and file.find("20M") != -1: + continue + if os.path.exists(file): + continue + cmd = "wget %s/reads/%s/%s.tar.gz; tar xvzf %s.tar.gz; rm %s.tar.gz" % \ + (data_root, type, file, file, file) + print >> sys.stderr, cmd + os.system(cmd) + os.chdir("..") + + os.chdir("..") + + +if __name__ == "__main__": + parser = ArgumentParser( + description='Get reference genome, annotations, and indexes') + parser.add_argument('-s', '--small', + dest='small', + action='store_true', + default=False, + help='small testset') + args = parser.parse_args() + get_data(args.small) diff --git a/evaluation/get_programs.py b/evaluation/get_programs.py new file mode 100644 index 0000000..5d497c6 --- /dev/null +++ b/evaluation/get_programs.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python + +import sys, os +use_message = ''' +''' + +def get_aligners(): + mac = (sys.platform == "darwin") + if not os.path.exists("aligners"): + os.mkdir("aligners") + os.chdir("aligners") + if not os.path.exists("bin"): + os.mkdir("bin") + programs = ["HISAT", "Bowtie2", "Bowtie", "TopHat2", "STAR", "GSNAP", "BWA", "StringTie", "Cufflinks", "minimap2"] + for program in programs: + if program == "HISAT": + dir = "hisat-0.1.6-beta" + if os.path.exists(dir): + continue + fname = dir + "-source.zip" + url = "http://www.ccb.jhu.edu/software/hisat/downloads" + bins = "hisat-align-s hisat-build-s hisat-inspect-s" + installs = bins + " hisat hisat-build hisat-inspect" + cmd = "wget %s/%s; unzip %s; cd %s; make %s; cp %s ../bin; cd .." % \ + (url, fname, fname, dir, bins, installs) + elif program == "Bowtie2": + dir = "bowtie2-2.3.4.3" + if os.path.exists(dir): + continue + fname = dir + "-source.zip" + url = "http://sourceforge.net/projects/bowtie-bio/files/bowtie2/2.3.4.3" + bins = "bowtie2-align-s bowtie2-build-s bowtie2-inspect-s" + installs = bins + " bowtie2 bowtie2-build bowtie2-inspect" + cmd = "wget %s/%s; unzip %s; cd %s; make NO_TBB=1 %s; cp %s ../bin; cd .." % \ + (url, fname, fname, dir, bins, installs) + elif program == "Bowtie": + dir = "bowtie-1.1.2" + if os.path.exists(dir): + continue + fname = dir + "-src.zip" + url = "http://sourceforge.net/projects/bowtie-bio/files/bowtie/1.1.2" + bins = "bowtie-align-s bowtie-build-s bowtie-inspect-s" + installs = bins + " bowtie bowtie-build bowtie-inspect" + cmd = "wget %s/%s; unzip %s; cd %s; make %s; cp %s ../bin; cd .." % \ + (url, fname, fname, dir, bins, installs) + elif program == "minimap2": + dir = "minimap2-2.12" + if os.path.exists(dir): + continue + fname = dir + ".tar.bz2" + #https://github.com/lh3/minimap2/releases/download/v2.12/minimap2-2.12.tar.bz2 + url = "https://github.com/lh3/minimap2/releases/download/v2.12" + bins = "minimap2" + cmd = "wget %s/%s; tar jxvf %s; cd %s; make ; cp %s ../bin; cd .." % \ + (url, fname, fname, dir,bins) + elif program == "TopHat2": + if mac: + dir = "tophat-2.1.0.OSX_x86_64" + else: + dir = "tophat-2.1.0.Linux_x86_64" + if os.path.exists(dir): + continue + fname = dir + ".tar.gz" + url = "http://ccb.jhu.edu/software/tophat/downloads" + installs = "gtf_juncs juncs_db prep_reads segment_juncs tophat tophat_reports sra_to_solid samtools_0.1.18 map2gtf fix_map_ordering bam_merge long_spanning_reads sam_juncs gtf_to_fasta bam2fastx" + cmd = "wget %s/%s; tar xvzf %s; cd %s; cp %s ../bin; cd .." % \ + (url, fname, fname, dir, installs) + elif program == "STAR": + dir = "2.5.2b" + if os.path.exists("STAR-" + dir): + continue + fname = dir + ".tar.gz" + url = "https://github.com/alexdobin/STAR/archive" + if mac: + add_cmd = "awk '{if($1 ~ /^CXX/) {print \"CXX =/opt/local/bin/g++-mp-4.8\";} else {print;}}' Makefile > Makefile.tmp; mv Makefile.tmp Makefile" + make_arg = "STARforMac" + cmd = "wget %s/%s; tar xvzf %s; cd STAR-%s/source; %s; make; make %s; cp STAR ../../bin; cd ../.." % \ + (url, fname, fname, dir, add_cmd, make_arg) + else: + cmd = "wget %s/%s; tar xvzf %s; cd STAR-%s/source; make; cp STAR ../../bin; cd ../.." % \ + (url, fname, fname, dir) + elif program == "GSNAP": + dir = "gmap-2015-07-23" + dir2 = "gmap-gsnap-2015-07-23" + if os.path.exists(dir): + continue + fname = dir2 + ".tar.gz" + url = "http://research-pub.gene.com/gmap/src" + installs = "gmap gmapl get-genome gmapindex iit_store iit_get iit_dump gsnap gsnapl uniqscan uniqscanl snpindex cmetindex atoiindex sam_sort ../util/*" + cmd = "wget %s/%s; tar xvzf %s; cd %s; ./configure; make; cd src; cp %s ../../bin; cd ../.." % \ + (url, fname, fname, dir, installs) + elif program == "BWA": + dir = "bwa-0.7.17" + if os.path.exists(dir): + continue + url = "http://sourceforge.net/projects/bio-bwa/files/%s.tar.bz2" % (dir) + installs = "bwa" + cmd = "wget %s; tar xvjf %s.tar.bz2; cd %s; make; cp %s ../bin/; cd .." % (url, dir, dir, installs) + elif program == "StringTie": + dir = "stringtie-1.0.4" + url = "http://ccb.jhu.edu/software/stringtie/dl" + bins = "stringtie" + cmd = "wget %s/%s.tar.gz; tar xvzf %s.tar.gz; cd %s; make release; cp %s ../bin; cd .." % \ + (url, dir, dir, dir, bins) + elif program == "Cufflinks": + cmd = "" + elif program == "vg": + version = "v1.13.0" + dir = program + "-" + version + url = "https://github.com/vgteam/vg/releases/download/%s" % (version) + cmd = "wget %s/%s.tar.gz; tar zxvf %s.tar.gz; cd %s" % (url, dir, dir, dir) + cmd += "; source ./source_me.sh; make; cp bin/%s ../bin; cd .." % (program) + else: + assert False + print >> sys.stderr, cmd + os.system(cmd) + + files = ["hisat2", "hisat2-align-s", "hisat2-build", "hisat2-build-s", "hisat2-inspect", "hisat2-inspect-s", "extract_splice_sites.py", "hisat2_extract_snps_haplotypes_UCSC.py", "hisat2_simulate_reads.py"] + os.chdir("bin") + for file in files: + if os.path.exists(file): + continue + os.system("ln -s ../../../%s %s" % (file, file)) + os.chdir("..") + + os.chdir("..") + + +if __name__ == "__main__": + get_aligners() diff --git a/evaluation/real/calculate_read_cost.py b/evaluation/real/calculate_read_cost.py new file mode 100644 index 0000000..82899e0 --- /dev/null +++ b/evaluation/real/calculate_read_cost.py @@ -0,0 +1,1675 @@ +#!/usr/bin/env python + +import sys, os, subprocess, signal +import multiprocessing +import platform +import string +import re +from datetime import datetime, date, time +from collections import defaultdict +from argparse import ArgumentParser, FileType + +osx_mode = False +if sys.platform == 'darwin': + osx_mode = True + +MAX_EDIT = 21 +signal.signal(signal.SIGPIPE, signal.SIG_DFL) + +cigar_re = re.compile('\d+\w') + +""" +""" +def parse_mem_usage(resource): + if osx_mode: + resource = resource.strip().split('\n') + for line in resource: + if line.find('maximum resident set size') != -1: + return int(line.split()[0]) / 1024 + else: + resource = resource.split(' ') + for line in resource: + idx = line.find('maxresident') + if idx != -1: + return line[:idx] + + return '0' + + +""" +""" +def reverse_complement(seq): + result = "" + for nt in seq: + base = nt + if nt == 'A': + base = 'T' + elif nt == 'a': + base = 't' + elif nt == 'C': + base = 'G' + elif nt == 'c': + base = 'g' + elif nt == 'G': + base = 'C' + elif nt == 'g': + base = 'c' + elif nt == 'T': + base = 'A' + elif nt == 't': + base = 'a' + + result = base + result + + return result + + +""" +""" +def read_genome(genome_filename): + chr_dic = {} + genome_file = open(genome_filename, "r") + + chr_name, sequence = "", "" + for line in genome_file: + if line[0] == ">": + if chr_name and sequence: + chr_dic[chr_name] = sequence + + chr_name = line[1:-1].split()[0] + sequence = "" + else: + sequence += line[:-1] + + if chr_name and sequence: + chr_dic[chr_name] = sequence + + genome_file.close() + + print >> sys.stderr, "genome is loaded" + + return chr_dic + + +""" +""" +def read_snp(snp_filename): + snps = defaultdict(list) + snp_file = open(snp_filename, 'r') + + for line in snp_file: + line = line.strip() + if not line or line.startswith('#'): + continue + try: + snpID, type, chr, pos, data = line.split('\t') + except ValueError: + continue + + assert type in ["single", "deletion", "insertion"] + if type == "deletion": + data = int(data) + snps[chr].append([snpID, type, int(pos), data]) + + print >> sys.stderr, "snp is loaded" + + return snps + + +""" +""" +def extract_splice_sites(gtf_fname): + trans = {} + + gtf_file = open(gtf_fname) + # Parse valid exon lines from the GTF file into a dict by transcript_id + for line in gtf_file: + line = line.strip() + if not line or line.startswith('#'): + continue + if '#' in line: + line = line.split('#')[0].strip() + + try: + chrom, source, feature, left, right, score, \ + strand, frame, values = line.split('\t') + except ValueError: + continue + left, right = int(left), int(right) + + if feature != 'exon' or left >= right: + continue + + values_dict = {} + for attr in values.split(';')[:-1]: + attr, _, val = attr.strip().partition(' ') + values_dict[attr] = val.strip('"') + + if 'gene_id' not in values_dict or \ + 'transcript_id' not in values_dict: + continue + + transcript_id = values_dict['transcript_id'] + if transcript_id not in trans: + trans[transcript_id] = [chrom, strand, [[left, right]]] + else: + trans[transcript_id][2].append([left, right]) + + gtf_file.close() + + # Sort exons and merge where separating introns are <=5 bps + for tran, [chrom, strand, exons] in trans.items(): + exons.sort() + tmp_exons = [exons[0]] + for i in range(1, len(exons)): + if exons[i][0] - tmp_exons[-1][1] <= 5: + tmp_exons[-1][1] = exons[i][1] + else: + tmp_exons.append(exons[i]) + trans[tran] = [chrom, strand, tmp_exons] + + # Calculate and print the unique junctions + junctions = set() + for chrom, strand, exons in trans.values(): + for i in range(1, len(exons)): + junctions.add(to_junction_str([chrom, exons[i-1][1], exons[i][0]])) + + return junctions + + +def to_junction_str(junction): + return "%s-%d-%d" % (junction[0], junction[1], junction[2]) + + +def to_junction(junction_str): + chr, left, right = junction_str.split("-") + return [chr, int(left), int(right)] + + +def junction_cmp(a, b): + if a[0] != b[0]: + if a[0] < b[0]: + return -1 + else: + return 1 + + if a[1] != b[1]: + if a[1] < b[1]: + return -1 + else: + return 1 + + if a[2] != b[2]: + if a[2] < b[2]: + return -1 + else: + return 1 + + return 0 + + +# chr and pos are assumed to be integers +def get_junctions(chr, pos, cigar_str, min_anchor_len = 0, read_len = 100): + junctions = [] + right_pos = pos + cigars = cigar_re.findall(cigar_str) + cigars = [[int(cigars[i][:-1]), cigars[i][-1]] for i in range(len(cigars))] + + left_anchor_lens = [] + cur_left_anchor_len = 0 + for i in range(len(cigars)): + length, cigar_op = cigars[i] + if cigar_op in "MI": + cur_left_anchor_len += length + elif cigar_op == "N": + assert cur_left_anchor_len > 0 + left_anchor_lens.append(cur_left_anchor_len) + cur_left_anchor_len = 0 + + for i in range(len(cigars)): + length, cigar_op = cigars[i] + if cigar_op == "N": + left, right = right_pos - 1, right_pos + length + + if i > 0 and cigars[i-1][1] in "ID": + if cigars[i-1][1] == "I": + left += cigars[i-1][0] + else: + left -= cigars[i-1][0] + if i + 1 < len(cigars) and cigars[i+1][1] in "ID": + if cigars[i+1][1] == "I": + right -= cigars[i+1][0] + else: + right += cigars[i+1][0] + + junction_idx = len(junctions) + assert junction_idx < len(left_anchor_lens) + left_anchor_len = left_anchor_lens[junction_idx] + assert left_anchor_len > 0 and left_anchor_len < read_len + right_anchor_len = read_len - left_anchor_len + if left_anchor_len >= min_anchor_len and right_anchor_len >= min_anchor_len: + junctions.append([chr, left, right]) + + if cigar_op in "MND": + right_pos += length + + return junctions + + +def get_right(pos, cigars): + right_pos = pos + cigars = cigar_re.findall(cigars) + for cigar in cigars: + length = int(cigar[:-1]) + cigar_op = cigar[-1] + if cigar_op in "MDN": + right_pos += length + + return right_pos + +def get_cigar_chars(cigars): + cigars = cigar_re.findall(cigars) + cigar_chars = "" + for cigar in cigars: + cigar_op = cigar[-1] + cigar_chars += cigar_op + + return cigar_chars + +def get_cigar_chars_MN(cigars): + cigars = cigar_re.findall(cigars) + cigar_chars = "" + for cigar in cigars: + cigar_op = cigar[-1] + if cigar_op in "MN": + if cigar_chars == "" or cigar_chars[-1] != cigar_op: + cigar_chars += cigar_op + + return cigar_chars + +def is_non_canonical_junction_read(chr_dic, chr, left, cigars, canonical_junctions = [["GT", "AG"], ["GC", "AG"], ["AT", "AC"]]): + pos = left + for cigar in cigar_re.findall(cigars): + cigar_op = cigar[-1] + cigar_len = int(cigar[:-1]) + + if cigar_op in 'MD': + pos += cigar_len + elif cigar_op == 'N': + right = pos + cigar_len + + donor = chr_dic[chr][pos-1:pos+1] + acceptor = chr_dic[chr][right-3:right-1] + + rev_donor = reverse_complement(acceptor) + rev_acceptor = reverse_complement(donor) + + # print donor, acceptor + # print rev_donor, rev_acceptor + + if [donor, acceptor] not in canonical_junctions and [rev_donor, rev_acceptor] not in canonical_junctions: + return True + + pos = right + + return False + +def is_canonical_junction(chr_dic, junction): + chr, left, right = junction + donor = chr_dic[chr][left:left+2] + acceptor = chr_dic[chr][right-3:right-1] + rev_donor = reverse_complement(acceptor) + rev_acceptor = reverse_complement(donor) + + if (donor == "GT" and acceptor == "AG") or \ + (rev_donor == "GT" and rev_acceptor == "AG"): + return True + + return False + +def is_junction_read(chr_dic, gtf_junctions, chr, pos, cigar_str): + result_junctions = [] + junctions = get_junctions(chr, pos, cigar_str, 0, 101) + for junction in junctions: + junction_str = to_junction_str(junction) + is_gtf_junction = False + def find_in_gtf_junctions(gtf_junctions, junction): + l, u = 0, len(gtf_junctions) + while l < u: + m = (l + u) / 2 + assert m >= 0 and m < len(gtf_junctions) + cmp_result = junction_cmp(junction, gtf_junctions[m]) + if cmp_result == 0: + return m + elif cmp_result < 0: + u = m + else: + l = m + 1 + + return l + + # allow small (<= 5bp) discrepancy for non-canonical splice sites. + relaxed_junction_dist = 5 + chr, left, right = junction + gtf_index = find_in_gtf_junctions(gtf_junctions, [chr, left - relaxed_junction_dist, right - relaxed_junction_dist]) + if gtf_index >= 0: + i = gtf_index + while i < len(gtf_junctions): + chr2, left2, right2 = gtf_junctions[i] + if chr2 > chr or \ + left2 - left > relaxed_junction_dist or \ + right2 - right > relaxed_junction_dist: + break + + if abs(left - left2) <= relaxed_junction_dist and left - left2 == right - right2: + canonical = is_canonical_junction(chr_dic, gtf_junctions[i]) + if left == left2 or not canonical: + is_gtf_junction = True + break + + i += 1 + + result_junctions.append([junction_str, is_gtf_junction]) + + is_gtf_junction_read = False + if len(result_junctions) > 0: + is_gtf_junction_read = True + for junction_str, is_gtf_junction in result_junctions: + if not is_gtf_junction: + is_gtf_junction_read = False + break + + return result_junctions, len(result_junctions) > 0, is_gtf_junction_read + + +def is_junction_pair(chr_dic, gtf_junctions, chr, pos, cigar_str, mate_chr, mate_pos, mate_cigar_str): + junctions, junction_read, gtf_junction_read = is_junction_read(chr_dic, gtf_junctions, chr, pos, cigar_str) + mate_junctions, mate_junction_read, mate_gtf_junction_read = is_junction_read(chr_dic, gtf_junctions, mate_chr, mate_pos, mate_cigar_str) + junctions += mate_junctions + junction_pair = len(junctions) > 0 + if junction_pair: + gtf_junction_pair = True + if junction_read and not gtf_junction_read: + gtf_junction_pair = False + if mate_junction_read and not mate_gtf_junction_read: + gtf_junction_pair = False + else: + gtf_junction_pair = False + + return junctions, junction_pair, gtf_junction_pair + +""" +""" +def getSNPs(chr_snps, left, right): + low, high = 0, len(chr_snps) + while low < high: + mid = (low + high) / 2 + snpID, type, pos, data = chr_snps[mid] + if pos < left: + low = mid + 1 + else: + high = mid - 1 + + snps = [] + for i in xrange(low, len(chr_snps)): + snp = chr_snps[i] + snpID, type, pos, data = snp + pos2 = pos + if type == "deletion": + pos2 += data + if pos2 >= right: + break + if pos >= left: + if len(snps) > 0: + _, prev_type, prev_pos, prev_data = snps[-1] + assert prev_pos <= pos + prev_pos2 = prev_pos + if prev_type == "deletion": + prev_pos2 += prev_data + if pos <= prev_pos2: + continue + snps.append(snp) + + return snps + +""" +""" +def check_snps(snps, check_type, ref_pos, read_seq): + found = False + + for snp in snps: + snp_type, snp_pos, snp_data = snp[1:4] + + if snp_type == check_type: + if snp_type == 'single': + if snp_pos == ref_pos and snp_data[0] == read_seq[0]: + found = True + break + elif snp_type == 'insertion': + if snp_pos == ref_pos and snp_data == read_seq: + found = True + break + + elif snp_type == 'deletion': + # snp_data and read_seq are length of sequence deleted + if snp_pos == ref_pos and int(snp_data) == int(read_seq): + found = True + break + + return found + + +def extract_reads_and_pairs(chr_dic, sam_filename, read_filename, pair_filename, unmapped_read_1_fq_name, unmapped_read_2_fq_name, snps_dict = None): + temp_read_filename, temp_pair_filename = read_filename + ".temp", pair_filename + ".temp" + temp_read_file, temp_pair_file = open(temp_read_filename, "w"), open(temp_pair_filename, "w") + + unmapped_read_1_fq, unmapped_read_2_fq = open(unmapped_read_1_fq_name, "w"), open(unmapped_read_2_fq_name, "w") + hisat2 = read_filename.find("hisat2") != -1 or pair_filename.find("hisat2") != -1 + vg = read_filename.find("vg") != -1 or pair_filename.find("vg") != -1 + + read_dic = {} + prev_read_id, prev_read_seq = "", "" + sam_file = open(sam_filename, "r") + for line in sam_file: + if line[0] == "@": + continue + + fields = line[:-1].split() + read_id, flag, chr, pos, mapQ, cigar_str, mate_chr, mate_pos, template_len, read_seq, read_qual = fields[:11] + if 'H' in cigar_str: + continue + + flag, pos, mate_pos = int(flag), int(pos), int(mate_pos) + if read_seq == "*" and prev_read_id == read_id: + read_seq = prev_read_seq + read_seq = read_seq.upper() + if read_seq == "" or read_seq == "*": + continue + + if flag & 0x04 != 0 or \ + chr == "*" or \ + cigar_str == "*": + """ + if flag & 0x80 != 0: + print >> unmapped_read_2_fq, "@%s\n%s\n+%s\n%s" % (read_id, read_seq, read_id, read_qual) + else: + print >> unmapped_read_1_fq, "@%s\n%s\n+%s\n%s" % (read_id, read_seq, read_id, read_qual) + """ + continue + + if mate_chr == '=': + mate_chr = chr + + if len(read_id) >= 3 and read_id[-2] == "/": + read_id = read_id[:-2] + + if read_id.find("seq.") == 0: + read_id = read_id[4:] + + if read_id != prev_read_id: + read_dic = {} + + HISAT2_XM, HISAT2_NM = 0, 0 + if hisat2: + for field in fields[11:]: + if field[:5] == "XM:i:": + HISAT2_XM = int(field[5:]) + elif field[:5] == "NM:i:": + HISAT2_NM = int(field[5:]) + + prev_read_id = read_id + prev_read_seq = read_seq + + if snps_dict != None and chr in snps_dict: + chr_snps = snps_dict[chr] + else: + chr_snps = [] + + snps = None + + XM, gap = 0, 0 + read_pos, right_pos = 0, pos - 1, + junction_read = False + + cigars = cigar_re.findall(cigar_str) + for i in range(len(cigars)): + cigar = cigars[i] + length = int(cigar[:-1]) + cigar_op = cigar[-1] + + if cigar_op == "S": + if i != 0 and i != len(cigars) - 1: + print >> sys.stderr, "S is located at %dth out of %d %s" % (i+1, len(cigars), cigar_str) + + if cigar_op in "MS": + ref_pos = right_pos + if cigar_op == "S" and i == 0: + ref_seq = chr_dic[chr][right_pos-length:right_pos] + ref_pos = right_pos - length + else: + ref_seq = chr_dic[chr][right_pos:right_pos+length] + + ref_seq = ref_seq.upper() + if length == len(ref_seq): + for j in range(length): + if ref_seq[j] != "N" and read_seq[read_pos+j] != ref_seq[j]: + if snps_dict == None: + XM += 1 + else: + if snps == None: + snps = getSNPs(chr_snps, pos - 1, pos + len(read_seq)) + + found_snp = check_snps(snps, 'single', ref_pos + j, read_seq[read_pos + j]) + if not found_snp: + XM += 1 + + if hisat2 and cigar_op == "S": + HISAT2_XM += 1 + HISAT2_NM += 1 + else: + XM += length + + if cigar_op in "I": + if snps == None: + snps = getSNPs(chr_snps, pos - 1, pos + len(read_seq)) + found_snp = check_snps(snps, 'insertion', right_pos, read_seq[read_pos:read_pos + length]) + if not found_snp: + gap += length + + if cigar_op in "D": + if snps == None: + snps = getSNPs(chr_snps, pos - 1, pos + len(read_seq)) + found_snp = check_snps(snps, 'deletion', right_pos, length) + if not found_snp: + gap += length + + if cigar_op in "MND": + right_pos += length + + if cigar_op in "MIS": + read_pos += length + + if cigar_op == "N": + junction_read = True + + NM = XM + gap + if hisat2: + XM, NM = HISAT2_XM, HISAT2_NM + if NM < MAX_EDIT: + print >> temp_read_file, "%s\t%d\t%s\t%s\t%s\tXM:i:%d\tNM:i:%d" % \ + (read_id, flag, chr, pos, cigar_str, XM, NM) + + found = False + me = "%s\t%s\t%d" % (read_id, chr, pos) + partner = "%s\t%s\t%d" % (read_id, mate_chr, mate_pos) + if partner in read_dic: + maps = read_dic[partner] + for map in maps: + if map[0] == me: + mate_flag, mate_cigar_str, mate_XM, mate_NM = map[1:] + if mate_pos > pos: + flag, chr, pos, cigar_str, XM, NM, mate_flag, mate_chr_str, mate_pos, mate_cigar_str, mate_XM, mate_NM = \ + mate_flag, mate_chr, mate_pos, mate_cigar_str, mate_XM, mate_NM, flag, chr, pos, cigar_str, XM, NM + + print >> temp_pair_file, "%s\t%d\t%s\t%d\t%s\tXM:i:%d\tNM:i:%d\t%d\t%s\t%d\t%s\tXM:i:%d\tNM:i:%d" % \ + (read_id, mate_flag, mate_chr, mate_pos, mate_cigar_str, mate_XM, mate_NM, flag, chr, pos, cigar_str, XM, NM) + found = True + break + + if not found: + if not me in read_dic: + read_dic[me] = [] + + read_dic[me].append([partner, flag, cigar_str, XM, NM]) + + sam_file.close() + + temp_read_file.close() + temp_pair_file.close() + + unmapped_read_1_fq.close() + unmapped_read_2_fq.close() + + + sort = False + if vg: + sort = True + + if sort: + command = "sort %s | uniq > %s; rm %s" % (temp_read_filename, read_filename, temp_read_filename) + os.system(command) + + command = "sort %s | uniq > %s; rm %s" % (temp_pair_filename, pair_filename, temp_pair_filename) + os.system(command) + else: + command = "mv %s %s; mv %s %s" % (temp_read_filename, read_filename, temp_pair_filename, pair_filename) + os.system(command) + + +def remove_redundant_junctions(junctions): + temp_junctions = [] + for junction in junctions: + temp_junctions.append(to_junction(junction)) + junctions = sorted(list(temp_junctions), cmp=junction_cmp) + temp_junctions = [] + for can_junction in junctions: + if len(temp_junctions) <= 0: + temp_junctions.append(can_junction) + else: + chr, left, right = temp_junctions[-1] + chr2, left2, right2 = can_junction + if chr == chr2 and \ + abs(left - left2) == abs(right - right2) and \ + abs(left - left2) <= 10: + continue + temp_junctions.append(can_junction) + junctions = set() + for junction in temp_junctions: + junctions.add(to_junction_str(junction)) + + return junctions + + + +def read_stat(read_filename, gtf_junctions, chr_dic = None, debug = False): + read_stat = [[0, 0, 0] for i in range(MAX_EDIT)] + temp_junctions = [set() for i in range(MAX_EDIT)] + temp_gtf_junctions = [set() for i in range(MAX_EDIT)] + + alignment = [] + prev_read_id = "" + read_file = open(read_filename, "r") + for line in read_file: + read_id, flag, chr, pos, cigar_str, XM, NM = line[:-1].split() + flag, pos = int(flag), int(pos) + XM, NM = int(XM[5:]), int(NM[5:]) + + read_junctions, junction_read, gtf_junction_read = \ + is_junction_read(chr_dic, gtf_junctions, chr, pos, cigar_str) + + if junction_read: + for junction_str, is_gtf_junction in read_junctions: + if NM < len(temp_junctions): + temp_junctions[NM].add(junction_str) + + if is_gtf_junction: + temp_gtf_junctions[NM].add(junction_str) + + if read_id != prev_read_id: + if prev_read_id != "": + NM2, junction_read2, gtf_junction_read2 = alignment + if NM2 < len(read_stat): + read_stat[NM2][0] += 1 + + if junction_read2: + read_stat[NM2][1] += 1 + + if gtf_junction_read2: + read_stat[NM2][2] += 1 + + alignment = [] + + prev_read_id = read_id + + if not alignment: + alignment = [NM, junction_read, gtf_junction_read] + elif alignment[0] > NM or \ + (alignment[0] == NM and not alignment[2] and junction_read): + alignment = [NM, junction_read, gtf_junction_read] + + read_file.close() + + for i in range(len(read_stat)): + temp_junctions[i] = remove_redundant_junctions(temp_junctions[i]) + temp_gtf_junctions[i] = remove_redundant_junctions(temp_gtf_junctions[i]) + + for i in range(len(read_stat)): + read_stat[i].append(len(temp_junctions[i])) + read_stat[i].append(len(temp_gtf_junctions[i])) + + if alignment: + NM2, junction_read2, gtf_junction_read2 = alignment + if NM2 < len(read_stat): + read_stat[NM2][0] += 1 + + if junction_read2: + read_stat[NM2][1] += 1 + + if gtf_junction_read2: + read_stat[NM2][2] += 1 + + return read_stat + + +def cal_read_len(cigar_str): + length = 0 + leftmost_softclip = 0 + rightmost_softclip = 0 + cigars = cigar_re.findall(cigar_str) + + for i in range(len(cigars)): + cigar = cigars[i] + cigar_length = int(cigar[:-1]) + cigar_op = cigar[-1] + + if cigar_op in "MIS": + length += cigar_length + + if (i == 0) and (cigar_op == "S"): + leftmost_softclip = cigar_length + if (i == (len(cigars) - 1)) and (cigar_op == "S"): + rightmost_softclip = cigar_length + + return length, leftmost_softclip, rightmost_softclip + +def is_concordantly(read_id, flag, chr, pos, cigar_str, XM, NM, mate_flag, mate_chr, mate_pos, mate_cigar_str, mate_XM, mate_NM): + concord_length = 1000 + segment_length = sys.maxint + + pairs = {} + pairs[0] = [flag, chr, pos, cigar_str, XM, NM] + pairs[1] = [mate_flag, mate_chr, mate_pos, mate_cigar_str, mate_XM, mate_NM] + + if chr != mate_chr: + return False, segment_length + if (flag & 0x10 == 0x10) or (mate_flag & 0x10 == 0): + return False, segment_length + + assert pos <= mate_pos + + left = pairs[0] + right = pairs[1] + + left_start = left[2] + left_len, _, _ = cal_read_len(left[3]) # cigar + + right_start = right[2] + right_len, _, right_soft = cal_read_len(right[3]) + + segment_length = (right_start + right_len) - left_start - right_soft + assert segment_length >= 0 + + if segment_length > concord_length: + return False, segment_length + + return True, segment_length + +def pair_stat(pair_filename, gtf_junctions, chr_dic): + # pair_stat = NM, junction_pair, gtf_junction, concordant_alignment] + pair_stat = [[0, 0, 0, 0] for i in range(MAX_EDIT)] + dis_pair_stat = [0 for i in range(MAX_EDIT)] + temp_junctions = [set() for i in range(MAX_EDIT)] + temp_gtf_junctions = [set() for i in range(MAX_EDIT)] + + alignment, dis_alignments = [], [] + prev_read_id = "" + con_file = open(pair_filename + ".con", "w") + discon_file = open(pair_filename + ".discon", "w") + pair_file = open(pair_filename, "r") + for line in pair_file: + read_id, flag, chr, pos, cigar_str, XM, NM, mate_flag, mate_chr, mate_pos, mate_cigar_str, mate_XM, mate_NM = line[:-1].split() + flag, pos, XM, NM, mate_flag, mate_pos, mate_XM, mate_NM = \ + int(flag), int(pos), int(XM[5:]), int(NM[5:]), int(mate_flag), int(mate_pos), int(mate_XM[5:]), int(mate_NM[5:]) + + pair_XM = XM + mate_XM + pair_NM = NM + mate_NM + + pair_junctions, junction_pair, gtf_junction_pair = \ + is_junction_pair(chr_dic, gtf_junctions, chr, pos, cigar_str, mate_chr, mate_pos, mate_cigar_str) + + # check concordantly + concord_align, segment_len = is_concordantly(read_id, flag, chr, pos, cigar_str, XM, NM, mate_flag, mate_chr, mate_pos, mate_cigar_str, mate_XM, mate_NM) + print >> (con_file if concord_align else discon_file), line.strip(), ('none', 'first')[(flag & 0x40 == 0x40)], ('none', 'last')[(mate_flag & 0x80 == 0x80)], segment_len + + if junction_pair: + for junction_str, is_gtf_junction in pair_junctions: + if pair_NM < len(temp_junctions): + temp_junctions[pair_NM].add(junction_str) + + if is_gtf_junction: + temp_gtf_junctions[pair_NM].add(junction_str) + + if read_id != prev_read_id: + if prev_read_id != "": + NM2, junction_read2, gtf_junction_read2, concord_align2 = alignment + if NM2 < len(pair_stat): + pair_stat[NM2][0] += 1 + + if junction_read2: + pair_stat[NM2][1] += 1 + if gtf_junction_read2: + pair_stat[NM2][2] += 1 + if concord_align2: + pair_stat[NM2][3] += 1 + + for NM2 in dis_alignments: + if NM2 < len(dis_pair_stat): + dis_pair_stat[NM2] += 1 + + alignment = [] + dis_alignment = [] + + prev_read_id = read_id + + if not alignment: + alignment = [pair_NM, junction_pair, gtf_junction_pair, concord_align] + elif alignment[0] > pair_NM or \ + (alignment[0] == pair_NM and not alignment[2] and junction_pair): + alignment = [pair_NM, junction_pair, gtf_junction_pair, concord_align] + + if mate_chr != chr or ((flag & 0x10) != 0 or (mate_flag & 0x10) == 0): + if len(dis_alignments) == 0: + dis_alignments = [pair_NM] + elif dis_alignments[0] > pair_NM: + dis_alignments = [pair_NM] + + pair_file.close() + con_file.close() + discon_file.close() + + # process last line + if alignment: + NM2, junction_read2, gtf_junction_read2, concord_align2 = alignment + if NM2 < len(pair_stat): + pair_stat[NM2][0] += 1 + + if junction_read2: + pair_stat[NM2][1] += 1 + if gtf_junction_read2: + pair_stat[NM2][2] += 1 + + if concord_align2: + pair_stat[NM2][3] += 1 + + assert len(dis_alignments) <= 1 + for NM2 in dis_alignments: + if NM2 < len(dis_pair_stat): + dis_pair_stat[NM2] += 1 + + for i in range(len(pair_stat)): + temp_junctions[i] = remove_redundant_junctions(temp_junctions[i]) + temp_gtf_junctions[i] = remove_redundant_junctions(temp_gtf_junctions[i]) + + for i in range(len(pair_stat)): + pair_stat[i].append(len(temp_junctions[i])) + pair_stat[i].append(len(temp_gtf_junctions[i])) + + return pair_stat, dis_pair_stat + + +def sql_execute(sql_db, sql_query): + sql_cmd = [ + "sqlite3", sql_db, + "-separator", "\t", + "%s;" % sql_query + ] + # print >> sys.stderr, sql_cmd + sql_process = subprocess.Popen(sql_cmd, stdout=subprocess.PIPE) + output = sql_process.communicate()[0][:-1] + return output + + +def create_sql_db(sql_db): + if os.path.exists(sql_db): + print >> sys.stderr, sql_db, "already exists!" + return + + columns = [ + ["id", "integer primary key autoincrement"], + ["reads", "text"], + ["genome", "text"], + ["end_type", "text"], + ["aligner", "text"], + ["version", "test"], + ["use_annotation", "text"], + ["edit_distance", "integer"], + ["mapped_reads", "integer"], + ["junction_reads", "integer"], + ["gtf_junction_reads", "integer"], + ["junctions", "integer"], + ["gtf_junctions", "integer"], + ["runtime", "real"], + ["host", "text"], + ["created", "text"], + ["cmd", "text"] + ] + + sql_create_table = "CREATE TABLE Mappings (" + for i in range(len(columns)): + name, type = columns[i] + if i != 0: + sql_create_table += ", " + sql_create_table += ("%s %s" % (name, type)) + sql_create_table += ");" + sql_execute(sql_db, sql_create_table) + + +def write_analysis_data(sql_db, database_name, paired): + if not os.path.exists(sql_db): + return + + if paired: + paired = "paired" + else: + paired = "single" + + aligners = [] + sql_aligners = "SELECT aligner FROM Mappings WHERE end_type = '%s' GROUP BY aligner" % (paired) + output = sql_execute(sql_db, sql_aligners) + aligners = output.split() + + database_fname = database_name + "_" + paired + ".analysis" + database_file = open(database_fname, "w") + + print >> database_file, "aligner\tuse_annotation\tend_type\tedit_distance\tmapped_reads\tjunction_reads\tgtf_junction_reads\tjunctions\tgtf_junctions\truntime" + for aligner in aligners: + for edit_distance in range(MAX_EDIT): + sql_row = "SELECT aligner, use_annotation, end_type, edit_distance, mapped_reads, junction_reads, gtf_junction_reads, junctions, gtf_junctions, runtime FROM Mappings" + sql_row += " WHERE reads = '%s' and aligner = '%s' and edit_distance = %d and end_type = '%s' ORDER BY created DESC LIMIT 1" % (database_name, aligner, edit_distance, paired) + output = sql_execute(sql_db, sql_row) + if output: + print >> database_file, output + + database_file.close() + + +def calculate_read_cost(single_end, + paired_end, + test_aligners, + fresh, + runtime_only, + verbose): + sql_db_name = "analysis.db" + if not os.path.exists(sql_db_name): + create_sql_db(sql_db_name) + + full_workdir = os.getcwd() + workdir = full_workdir.split("/")[-1] + + num_cpus = multiprocessing.cpu_count() + if num_cpus > 8: + num_threads = min(8, num_cpus) + desktop = False + else: + num_threads = min(3, num_cpus) + desktop = True + + verbose = False + sql_write = True + is_large_file = False + gz_file = False + if os.path.exists("1.fq.gz"): + gz_file = True + if os.path.getsize("1.fq.gz") > (1024 * 1024 * 1024): + is_large_file = True + + elif os.path.exists("1.fq"): + gz_file = False + if os.path.getsize("1.fq") > (2 * 1024 * 1024 * 1024): + is_large_file = True + + else: + assert(False) + + + aligners = [ + # ["hisat2", "", "", "", ""], + # ["hisat2", "", "", "", "--sensitive"], + # ["hisat2", "", "", "", "--very-sensitive"], + # ["hisat2", "", "", "", "-k 50 --score-min C,-50,0"], + # ["hisat2", "", "snp", "", ""], + # ["hisat2", "", "snp", "", "--sensitive"], + # ["hisat2", "", "snp", "", "-k 50"], + # ["hisat2", "", "", "205", ""], + # ["hisat2", "", "snp", "205", ""], + # ["hisat2", "", "snp_tran", "205", ""], + # ["hisat2", "", "tran", "", ""], + # ["hisat2", "x1", "snp", "", ""], + # ["hisat2", "x1", "", "", ""], + # ["hisat2", "x2", "", "", ""], + # ["hisat2", "", "tran", "", ""], + # ["hisat2", "", "snp_tran", "204", ""], + # ["hisat2", "", "snp_tran", "", ""], + # ["hisat2", "", "", "210", ""], + ["hisat2", "", "rep", "", ""], + # ["hisat2", "", "rep", "", "--read-lengths 101"], + # ["hisat2", "", "rep", "", "--sensitive"], + # ["hisat2", "", "rep-100-300", "", ""], + # ["hisat2", "", "rep-101-300", "", "--sensitive"], + # ["hisat2", "", "rep-101-300", "", "-k 10 --score-min C,-50,0"], + # ["hisat2", "", "rep-150-300", "", ""], + # ["tophat2", "", "", "", ""], + # ["bowtie", "", "", "", ""], + ["bowtie2", "", "", "", ""], + # ["bowtie2", "", "", "", "-k 10"], + ["bwa", "mem", "", "", ""], + # ["bwa", "mem", "", "", "-a"], + # ["bwa", "sw", "", "", ""], + # ["star", "", "", "", ""], + # ["star", "x2", "", "", ""], + # ["vg", "", "", "", ""], + # ["vg", "", "", "", "-M 10"], + # ["vg", "", "snp", "", ""], + # ["vg", "", "snp", "", "-M 10"], + # ["minimap2", "", "", "", ""], + ] + + # sql_write = False + verbose = True + debug = False + + genome = "genome" + cwd = os.getcwd() + RNA = (cwd.find("RNA") != -1) + + chr_dic = read_genome("../../../data/" + genome + ".fa") + snp_dic = read_snp("../../../data/" + genome + ".snp") + gtf_junction_strs = extract_splice_sites("../../../data/" + genome + ".gtf") + gene = "no" + gtf_junctions = [] + for junction_str in gtf_junction_strs: + junction = to_junction(junction_str) + gtf_junctions.append(junction) + gtf_junctions = sorted(gtf_junctions, cmp=junction_cmp) + + print >> sys.stderr, "aligner\tuse_annotation\tend_type\tedit_distance\tmapped_reads\tjunction_reads\tgtf_junction_reads\tjunctions\tgtf_junctions\truntime" + + for paired in [False, True]: + if not paired and not single_end: + continue + if paired and not paired_end: + continue + + type_read1_fname = "1.fq" + if gz_file: + type_read1_fname += ".gz" + + if paired: + type_read2_fname = "2.fq" + if gz_file: + type_read2_fname += ".gz" + + else: + type_read2_fname = "" + + aligner_bin_base = "../../../../aligners/bin" + def get_aligner_version(aligner): + version = "" + if aligner == "hisat2" or \ + aligner == "hisat" or \ + aligner == "bowtie" or \ + aligner == "bowtie2": + if version: + cmd = ["%s/%s_%s/%s" % (aligner_bin_base, aligner, version, aligner)] + else: + cmd = ["%s/%s" % (aligner_bin_base, aligner)] + cmd += ["--version"] + cmd_process = subprocess.Popen(cmd, stdout=subprocess.PIPE) + version = cmd_process.communicate()[0][:-1].split("\n")[0] + version = version.split()[-1] + elif aligner == "tophat2": + cmd = ["%s/tophat" % (aligner_bin_base)] + cmd += ["--version"] + cmd_process = subprocess.Popen(cmd, stdout=subprocess.PIPE) + version = cmd_process.communicate()[0][:-1].split()[-1] + elif aligner in ["star", "starx2"]: + version = "2.4.2a" + elif aligner == "gsnap": + cmd = ["%s/gsnap" % (aligner_bin_base)] + cmd_process = subprocess.Popen(cmd, stderr=subprocess.PIPE) + version = cmd_process.communicate()[1][:-1].split("\n")[0] + version = version.split()[2] + elif aligner == "bwa": + if version: + cmd = ["%s/bwa_%s/bwa" % (aligner_bin_base, version)] + else: + cmd = ["%s/bwa" % (aligner_bin_base)] + cmd_process = subprocess.Popen(cmd, stderr=subprocess.PIPE) + version = cmd_process.communicate()[1][:-1].split("\n")[2] + version = version.split()[1] + elif aligner == "vg": + cmd = ["%s/vg" % (aligner_bin_base)] + cmd_process = subprocess.Popen(cmd, stderr=subprocess.PIPE) + version = cmd_process.communicate()[1][:-1].split("\n")[0] + version = version.split()[5] + elif aligner == "minimap2": + cmd = ["%s/minimap2" % (aligner_bin_base)] + cmd += ["--version"] + cmd_process = subprocess.Popen(cmd, stdout=subprocess.PIPE) + version = cmd_process.communicate()[0][:-1].split("\n")[0] + + return version + + index_base = "../../../../indexes" + index_add = "" + if genome != "genome": + index_add = "_" + genome + def get_aligner_cmd(RNA, aligner, type, index_type, version, options, read1_fname, read2_fname, out_fname, cmd_idx = 0): + cmd = ["/usr/bin/time"] + if osx_mode: + cmd += ['-l'] + if aligner == "hisat2": + if version: + cmd += ["%s/hisat2_%s/hisat2" % (aligner_bin_base, version)] + else: + cmd += ["%s/hisat2" % (aligner_bin_base)] + if num_threads > 1: + cmd += ["-p", str(num_threads)] + + # cmd += ["-k", "5"] + # cmd += ["--score-min", "C,-18"] + + # daehwan - for debugging purposes + # cmd += ["--score-min", "C,-50"] + # cmd += ["--pen-cansplice", "0"] + # cmd += ["--pen-noncansplice", "12"] + # cmd += ["--pen-intronlen", "G,-8,1"] + # cmd += ["--metrics", "1", + # "--metrics-file", "metrics.out"] + + if version == "204": + cmd += ["--sp", "2,1"] + + if not RNA: + cmd += ["--no-spliced-alignment"] + + if type in ["x1", "x2"]: + cmd += ["--no-temp-splicesite"] + + # DK - for debugging purposes + # cmd += ["--dta"] + """ + if index_type == "tran": + cmd += ["--no-anchorstop"] + cmd += ["-k", "100"] + """ + + if options != "": + cmd += options.split(' ') + + if type == "x2": + if cmd_idx == 0: + cmd += ["--novel-splicesite-outfile"] + else: + cmd += ["--novel-splicesite-infile"] + cmd += ["splicesites.txt"] + + # "--novel-splicesite-infile", + # "../splicesites.txt", + # "--rna-strandness", + # "FR", + if version: + index_cmd = "%s/HISAT2_%s%s/" % (index_base, version, index_add) + genome + else: + index_cmd = "%s/HISAT2%s/" % (index_base, index_add) + genome + if index_type: + index_cmd += ("_" + index_type) + cmd += [index_cmd] + if paired: + cmd += ["-1", read1_fname, + "-2", read2_fname] + else: + cmd += ["-U", read1_fname] + elif aligner == "hisat": + cmd += ["%s/hisat" % (aligner_bin_base)] + if num_threads > 1: + cmd += ["-p", str(num_threads)] + # cmd += ["-k", "5"] + # cmd += ["--score-min", "C,-18"] + if version != "": + version = int(version) + else: + version = sys.maxint + + if not RNA: + cmd += ["--no-spliced-alignment"] + + if type in ["x1", "x2"] or not RNA: + cmd += ["--no-temp-splicesite"] + + """ + cmd += ["--rdg", "100,100", + "--rfg", "100,100"] + """ + + if type == "x2": + if cmd_idx == 0: + cmd += ["--novel-splicesite-outfile"] + else: + cmd += ["--novel-splicesite-infile"] + cmd += ["splicesites.txt"] + + # "--novel-splicesite-infile", + # "../splicesites.txt", + # "--rna-strandness", + # "FR", + cmd += ["%s/HISAT%s/" % (index_base, index_add) + genome] + if paired: + cmd += ["-1", read1_fname, + "-2", read2_fname] + else: + cmd += [read1_fname] + elif aligner == "tophat2": + cmd += ["%s/tophat" % (aligner_bin_base)] + if num_threads > 1: + cmd += ["-p", str(num_threads)] + cmd += ["--read-edit-dist", "3"] + cmd += ["--no-sort-bam"] + cmd += ["--read-realign-edit-dist", "0"] + cmd += ["--keep-tmp", + "%s/HISAT%s/" % (index_base, index_add) + genome, + read1_fname] + if paired: + cmd += [read2_fname] + elif aligner == "star": + cmd += ["%s/STAR" % (aligner_bin_base)] + if num_threads > 1: + cmd += ["--runThreadN", str(num_threads)] + if type == "x2" and cmd_idx == 1: + cmd += ["--genomeDir", "."] + else: + cmd += ["--genomeDir", "%s/STAR%s" % (index_base, index_add)] + if desktop: + cmd += ["--genomeLoad", "NoSharedMemory"] + else: + cmd += ["--genomeLoad", "LoadAndKeep"] + if type == "x2": + if cmd_idx == 1: + cmd += ["--alignSJDBoverhangMin", "1"] + cmd += ["--readFilesIn", + read1_fname] + if paired: + cmd += [read2_fname] + if paired: + cmd += ["--outFilterMismatchNmax", "6"] + else: + cmd += ["--outFilterMismatchNmax", "3"] + elif aligner == "bowtie": + cmd += ["%s/bowtie" % (aligner_bin_base)] + if num_threads > 1: + cmd += ["-p", str(num_threads)] + cmd += ["--sam", + "-k", "10"] + cmd += ["-n", "3"] + if paired: + cmd += ["-X", "500"] + cmd += ["%s/Bowtie%s/" % (index_base, index_add) + genome] + if paired: + cmd += ["-1", read1_fname, + "-2", read2_fname] + else: + cmd += [read1_fname] + elif aligner == "bowtie2": + if version: + cmd += ["%s/bowtie2_%s/bowtie2" % (aligner_bin_base, version)] + else: + cmd += ["%s/bowtie2" % (aligner_bin_base)] + if num_threads > 1: + cmd += ["-p", str(num_threads)] + #cmd += ["-k", "10"] + #cmd += ["--score-min", "C,-18"] + cmd += ["-X", "1000"] + + if options: + cmd += options.split(' ') + + if version: + cmd += ["-x %s/Bowtie2_%s%s/" % (index_base, version, index_add) + genome] + else: + cmd += ["-x %s/Bowtie2%s/" % (index_base, index_add) + genome] + if paired: + cmd += ["-1", read1_fname, + "-2", read2_fname] + else: + cmd += [read1_fname] + elif aligner == "gsnap": + cmd += ["%s/gsnap" % (aligner_bin_base), + "-A", + "sam"] + if num_threads > 1: + cmd += ["-t", str(num_threads)] + cmd += ["--max-mismatches=3", + "-D", "%s/GSNAP%s" % (index_base, index_add), + "-N", "1", + "-d", genome, + read1_fname] + if paired: + cmd += [read2_fname] + elif aligner == "bwa": + if version: + cmd += ["%s/bwa_%s/bwa" % (aligner_bin_base, version)] + else: + cmd += ["%s/bwa" % (aligner_bin_base)] + if type in ["mem", "aln"]: + cmd += [type] + elif type == "sw": + cmd += ["bwa" + type] + if num_threads > 1: + cmd += ["-t", str(num_threads)] + if options: + cmd += options.split(' ') + if version: + cmd += ["%s/BWA_%s%s/%s.fa" % (index_base, version, index_add, genome)] + else: + cmd += ["%s/BWA%s/%s.fa" % (index_base, index_add, genome)] + cmd += [read1_fname] + if paired: + cmd += [read2_fname] + elif aligner == "vg": + # vg map -d 22 -t 6 -M 10 -f ../sim-1.fa -f ../sim-2.fa --surject-to sam > result.sam + cmd += ["%s/vg" % (aligner_bin_base)] + cmd += ["map"] + cmd += ["-t", str(num_threads)] + cmd += ["--surject-to", "sam"] + index_cmd = "%s/VG%s/" % (index_base, index_add) + genome + if index_type: + index_cmd += ("_" + index_type) + + if options: + cmd += options.split(' ') + + cmd += ["-d", index_cmd] + + cmd += ["-f", read1_fname] + if paired: + cmd += ["-f", read2_fname] + + elif aligner == "minimap2": + # minimap2 -a -x sr 22.mmi sim_1.fa sim_2.fa > result.sam + cmd += ["%s/minimap2" % (aligner_bin_base)] + cmd += ["-a"] + cmd += ["-x", "sr"] + index_cmd = "%s/minimap2%s/" % (index_base, index_add) + genome + if index_type: + index_cmd += ("_" + index_type) + index_cmd += ".mmi" + cmd += [index_cmd] + cmd += [read1_fname] + if paired: + cmd += [read2_fname] + else: + assert False + + return cmd + + for aligner, type, index_type, version, options in aligners: + skip = False + if len(test_aligners) > 0: + skip = True + for test_aligner in test_aligners: + if aligner == test_aligner: + skip = False + if skip: + continue + + aligner_name = aligner + type + version + if (aligner == "hisat2" or aligner == "vg") and index_type != "": + aligner_name += ("_" + index_type) + + if options != "": + option_name = options.replace(' ', '').replace('-', '').replace(',', '') + aligner_name = aligner_name + '_' + option_name + if paired: + aligner_dir = aligner_name + "_paired" + else: + aligner_dir = aligner_name + "_single" + + if fresh and os.path.exists(aligner_dir): + os.system("rm -rf %s" % aligner_dir) + + if not os.path.exists(aligner_dir): + os.mkdir(aligner_dir) + os.chdir(aligner_dir) + + out_fname = "accepted.sam" + aligner_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../" + type_read1_fname, "../" + type_read2_fname, out_fname) + duration = 0.1 + mem_usage = '' + if not os.path.exists(out_fname): + if not os.path.exists("../one.fq") or not os.path.exists("../two.fq"): + if gz_file: + os.system("gzip -cd ../1.fq.gz | head -400 > ../one.fq") + os.system("gzip -cd ../2.fq.gz | head -400 > ../two.fq") + else: + os.system("head -400 ../1.fq > ../one.fq") + os.system("head -400 ../2.fq > ../two.fq") + + # dummy commands for caching index + loading_time = 0 + if aligner not in ["tophat2"]: + for i in range(3): + dummy_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../one.fq", "../two.fq", "/dev/null") + start_time = datetime.now() + if verbose: + print >> sys.stderr, start_time, "\t", " ".join(dummy_cmd) + if aligner in ["hisat2", "hisat", "bowtie", "bowtie2", "gsnap", "bwa"]: + proc = subprocess.Popen(dummy_cmd, stdout=open("/dev/null", "w"), stderr=subprocess.PIPE) + else: + proc = subprocess.Popen(dummy_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + proc.communicate() + finish_time = datetime.now() + duration = finish_time - start_time + duration = duration.total_seconds() + if verbose: + print >> sys.stderr, finish_time, "duration:", duration + loading_time = duration + + # align all reads + if paired: + sweep_read_cmd = "cat ../%s ../%s > /dev/null" % (type_read1_fname, type_read2_fname) + else: + sweep_read_cmd = "cat ../%s > /dev/null" % (type_read1_fname) + print >> sys.stderr, datetime.now(), "\t", sweep_read_cmd + os.system(sweep_read_cmd) + + skip_alignment = False + if paired and aligner == "olego" and os.path.exists(out_fname + "1"): + skip_alignment = True + + if not skip_alignment: + aligner_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../" + type_read1_fname, "../" + type_read2_fname, out_fname) + start_time = datetime.now() + if verbose: + print >> sys.stderr, start_time, "\t", " ".join(aligner_cmd) + if aligner in ["hisat2", "hisat", "bowtie", "bowtie2", "gsnap", "bwa", "vg", "minimap2"]: + proc = subprocess.Popen(aligner_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE) + else: + proc = subprocess.Popen(aligner_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + _, mem_usage = proc.communicate() + mem_usage = parse_mem_usage(mem_usage) + finish_time = datetime.now() + duration = finish_time - start_time + duration = duration.total_seconds() - loading_time + if duration < 0.1: + duration = 0.1 + if verbose: + print >> sys.stderr, finish_time, "duration:", duration + + if verbose: + print >> sys.stderr, finish_time, "Memory Usage: %dMB" % (int(mem_usage) / 1024) + + if debug and aligner == "hisat" and type == "x1": + os.system("cat metrics.out") + print >> sys.stderr, "\ttime: %.4f" % (duration) + # break + + if aligner == "star" and type in ["", "gtf"]: + os.system("mv Aligned.out.sam %s" % out_fname) + elif aligner in ["hisat2", "hisat"] and type == "x2": + aligner_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../" + type_read1_fname, "../" + type_read2_fname, out_fname, 1) + if verbose: + print >> sys.stderr, start_time, "\t", " ".join(aligner_cmd) + start_time = datetime.now() + proc = subprocess.Popen(aligner_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE) + proc.communicate() + finish_time = datetime.now() + duration += (finish_time - start_time).total_seconds() + duration -= loading_time + if duration < 0.1: + duration = 0.1 + if verbose: + print >> sys.stderr, finish_time, "duration:", duration + elif aligner == "star" and type == "x2": + assert os.path.exists("SJ.out.tab") + os.system("awk 'BEGIN {OFS=\"\t\"; strChar[0]=\".\"; strChar[1]=\"+\"; strChar[2]=\"-\";} {if($5>0){print $1,$2,$3,strChar[$4]}}' SJ.out.tab > SJ.out.tab.Pass1.sjdb") + for file in os.listdir("."): + if file in ["SJ.out.tab.Pass1.sjdb", "genome.fa"]: + continue + os.remove(file) + star_index_cmd = "STAR --genomeDir ./ --runMode genomeGenerate --genomeFastaFiles ../../../../data/genome.fa --sjdbFileChrStartEnd SJ.out.tab.Pass1.sjdb --sjdbOverhang 100 --runThreadN %d" % (num_threads) + print >> sys.stderr, "\t", datetime.now(), star_index_cmd + os.system(star_index_cmd) + if verbose: + print >> sys.stderr, "\t", datetime.now(), " ".join(dummy_cmd) + proc = subprocess.Popen(dummy_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + proc.communicate() + if verbose: + print >> sys.stderr, "\t", datetime.now(), "finished" + aligner_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../" + type_read1_fname, "../" + type_read2_fname, out_fname, 1) + start_time = datetime.now() + if verbose: + print >> sys.stderr, "\t", start_time, " ".join(aligner_cmd) + proc = subprocess.Popen(aligner_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + proc.communicate() + finish_time = datetime.now() + duration += (finish_time - start_time).total_seconds() + duration -= loading_time + if duration < 0.1: + duration = 0.1 + if verbose: + print >> sys.stderr, "\t", finish_time, "finished:", duration + os.system("mv Aligned.out.sam %s" % out_fname) + elif aligner == "tophat2": + os.system("samtools sort -n tophat_out/accepted_hits.bam accepted_hits; samtools view -h accepted_hits.bam > %s" % out_fname) + elif aligner == "vg": + index_name = '%s/VG%s/' % (index_base, index_add) + genome + if index_type: + index_name += ('_' + index_type) + + os.system("echo %s %s %f >> runtime" % (str(datetime.now()), aligner, duration)) + + if aligner in ["star", "tophat2", "gsnap"]: + os.system("tar cvzf %s.tar.gz %s &> /dev/null" % (out_fname, out_fname)) + + if runtime_only: + os.chdir("..") + continue + + suffix = aligner + read_sam, pair_sam = suffix + ".read.sam", suffix + ".pair.sam" + unmapped_read_1_fq, unmapped_read_2_fq = suffix + ".unmapped.1.fq", suffix + ".unmapped.2.fq" + if not os.path.exists(read_sam) or not os.path.exists(pair_sam): + if index_type == 'snp': + extract_reads_and_pairs(chr_dic, out_fname, read_sam, pair_sam, unmapped_read_1_fq, unmapped_read_2_fq, snp_dic) + else: + extract_reads_and_pairs(chr_dic, out_fname, read_sam, pair_sam, unmapped_read_1_fq, unmapped_read_2_fq) + + + + out = '' + if gz_file: + out = subprocess.check_output("gzip -cd ../%s | wc -l" % type_read1_fname, shell=True) + else: + out = subprocess.check_output("wc -l ../%s" % type_read1_fname, shell=True) + + numreads = int(out.split()[0]) / 4 + + done_filename = suffix + ".done" + if not os.path.exists(done_filename): + done_file = open(done_filename, "w") + if paired: + sum = [0, 0, 0, 0, 0, 0] # mappep_read, junction_read, gtf_junction_reads, concord_mapped_read, num_junctions, num_gtf_junctions + dis_sum = 0 + stat, dis_stat = pair_stat(pair_sam, gtf_junctions, chr_dic) + output = "" + for i in range(len(stat)): + for j in range(len(sum)): + sum[j] += stat[i][j] + + dis_sum += dis_stat[i] + mapped_reads, junction_reads, gtf_junction_reads, concord_mapped_read, num_junctions, num_gtf_junctions = sum + output += "%s\t%s\tpaired\t%d\t%d\t%.2f%%\t%d\t%d\t%d\t%d\t%f\t%d\t%d\t%.2f%%\n" % \ + (aligner_name, gene, i, mapped_reads, float(mapped_reads) * 100.0 / numreads, junction_reads, gtf_junction_reads, num_junctions, num_gtf_junctions, duration, (numreads / max(1.0, duration)), concord_mapped_read, float(concord_mapped_read) * 100.0 / numreads) + + if sql_write and os.path.exists("../" + sql_db_name): + sql_insert = "INSERT INTO \"Mappings\" VALUES(NULL, '%s', '%s', '%s', '%s', '%s', '%s', %d, %d, %d, %d, %d, %d, %f, '%s', datetime('now', 'localtime'), '%s');" % \ + (workdir, genome, "paired", aligner_name, get_aligner_version(aligner), "no", i, mapped_reads, junction_reads, gtf_junction_reads, num_junctions, num_gtf_junctions, duration, platform.node(), " ".join(aligner_cmd)) + sql_execute("../" + sql_db_name, sql_insert) + + + print >> sys.stderr, output, + print >> done_file, output + else: + sum = [0, 0, 0, 0, 0] + stat = read_stat(read_sam, gtf_junctions, chr_dic) + output = "" + for i in range(len(stat)): + for j in range(len(sum)): + sum[j] += stat[i][j] + + mapped_reads, junction_reads, gtf_junction_reads, num_junctions, num_gtf_junctions = sum + output += "%s\t%s\tsingle\t%d\t%d\t%.2f%%\t%d\t%d\t%d\t%d\t%f\t%d\n" % \ + (aligner_name, gene, i, mapped_reads, float(mapped_reads) * 100.0 / numreads, junction_reads, gtf_junction_reads, num_junctions, num_gtf_junctions, duration, (numreads / max(1.0, duration))) + + if sql_write and os.path.exists("../" + sql_db_name): + sql_insert = "INSERT INTO \"Mappings\" VALUES(NULL, '%s', '%s', '%s', '%s', '%s', '%s', %d, %d, %d, %d, %d, %d, %f, '%s', datetime('now', 'localtime'), '%s');" % \ + (workdir, genome, "single", aligner_name, get_aligner_version(aligner), "no", i, mapped_reads, junction_reads, gtf_junction_reads, num_junctions, num_gtf_junctions, duration, platform.node(), " ".join(aligner_cmd)) + sql_execute("../" + sql_db_name, sql_insert) + + print >> sys.stderr, output, + print >> done_file, output + + done_file.close() + + + os.chdir("..") + + if os.path.exists(sql_db_name): + write_analysis_data(sql_db_name, workdir, paired) + + + +if __name__ == "__main__": + parser = ArgumentParser( + description='test HISAT2, and compare HISAT2 with other popular aligners such as TopHat2, STAR, Bowtie1/2, GSNAP, BWA-mem, etc.') + parser.add_argument('--single-end', + dest='paired_end', + action='store_false', + help='run single-end only') + parser.add_argument('--paired-end', + dest='single_end', + action='store_false', + help='run paired_end only') + parser.add_argument('--aligner-list', + dest='aligner_list', + type=str, + default="", + help='comma-separated list of aligners (e.g. hisat2,bowtie2,bwa') + parser.add_argument('--fresh', + dest='fresh', + action='store_true', + help='delete existing alignment related directories (e.g. hisat2_single)') + parser.add_argument('--runtime-only', + dest='runtime_only', + action='store_true', + help='run programs without evaluation') + parser.add_argument('-v', '--verbose', + dest='verbose', + action='store_true', + help='also print some statistics to stderr') + + args = parser.parse_args() + + aligners = [] + for aligner in args.aligner_list.split(','): + if aligner == "": + continue + aligners.append(aligner) + + calculate_read_cost(args.single_end, + args.paired_end, + aligners, + args.fresh, + args.runtime_only, + args.verbose) + diff --git a/evaluation/real/init.py b/evaluation/real/init.py new file mode 100644 index 0000000..e8b8b57 --- /dev/null +++ b/evaluation/real/init.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python + +import sys, os, signal +import string, re + +signal.signal(signal.SIGPIPE, signal.SIG_DFL) +use_message = ''' +''' + +osx_mode = False +if sys.platform == 'darwin': + osx_mode = True + +def make_cat_cmd(gzmode, read_dir_base, read_dir, fq_name, num_read): + cmd = [] + if gzmode: + if osx_mode: + cmd += ["gzcat"] + else: + cmd += ["zcat"] + else: + cmd += ["cat"] + + cmd += ["../../%s%s/%s" % (read_dir_base, read_dir, fq_name)] + cmd += ["|", "head", "-n", "%d" % (num_read * 4)] + + if gzmode: + cmd += ["|", "gzip"] + + cmd += [">", fq_name] + return ' '.join(cmd) + + +def init(): + read_dir_base = "../reads/real/" + read_dirs = os.listdir(read_dir_base) + for read_dir in read_dirs: + if os.path.exists(read_dir): + continue + + gz_file = False + fq_1_name = '1.fq' + fq_2_name = '2.fq' + if os.path.exists(read_dir_base + read_dir + "/1.fq.gz") and \ + os.path.exists(read_dir_base + read_dir + "/2.fq.gz"): + gz_file = True + fq_1_name = '1.fq.gz' + fq_2_name = '2.fq.gz' + else: + if not os.path.exists(read_dir_base + read_dir + "/1.fq") or \ + not os.path.exists(read_dir_base + read_dir + "/1.fq"): + continue + + print >> sys.stderr, "Processing", read_dir, "..." + + os.mkdir(read_dir) + os.chdir(read_dir) + + RNA = (read_dir.find("RNA") != -1) + tests = [ + ["1M", 1000000], + #["5M", 5000000], + ["10M", 10000000], + #["20M", 20000000], + ["whole", 0], + ] + + for dir_name, num_reads in tests: + if os.path.exists(dir_name): + continue + + os.mkdir(dir_name) + os.chdir(dir_name) + + if dir_name == "whole": + ln_cmd = "ln -s ../../%s%s/%s ." % (read_dir_base, read_dir, fq_1_name) + print >> sys.stderr, ln_cmd + os.system(ln_cmd) + ln_cmd = "ln -s ../../%s%s/%s ." % (read_dir_base, read_dir, fq_2_name) + print >> sys.stderr, ln_cmd + os.system(ln_cmd) + else: + cmd = make_cat_cmd(gz_file, read_dir_base, read_dir, fq_1_name, num_reads) + print >> sys.stderr, cmd + os.system(cmd) + + cmd = make_cat_cmd(gz_file, read_dir_base, read_dir, fq_2_name, num_reads) + print >> sys.stderr, cmd + os.system(cmd) + + os.system("ln -s ../../calculate_read_cost.py .") + os.chdir("..") + + os.chdir("..") + + +if __name__ == "__main__": + init() diff --git a/evaluation/simulation/calculate_read_cost.py b/evaluation/simulation/calculate_read_cost.py new file mode 100644 index 0000000..4b7b726 --- /dev/null +++ b/evaluation/simulation/calculate_read_cost.py @@ -0,0 +1,2834 @@ +#!/usr/bin/env python + +import sys, os, subprocess +import multiprocessing +import string, re +import platform +from datetime import datetime, date, time +import copy +from argparse import ArgumentParser, FileType +from multiprocessing import Process +import bisect + +mp_mode = False +mp_num = 1 + +cigar_re = re.compile('\d+\w') + +osx_mode = False +if sys.platform == 'darwin': + osx_mode = True + +""" +""" +def parse_mem_usage(resource): + if osx_mode: + resource = resource.strip().split('\n') + for line in resource: + if line.find('maximum resident set size') != -1: + return int(line.split()[0]) / 1024 + else: + resource = resource.split(' ') + for line in resource: + idx = line.find('maxresident') + if idx != -1: + return line[:idx] + + return '0' + + +""" +""" +def reverse_complement(seq): + result = "" + for nt in seq: + base = nt + if nt == 'A': + base = 'T' + elif nt == 'a': + base = 't' + elif nt == 'C': + base = 'G' + elif nt == 'c': + base = 'g' + elif nt == 'G': + base = 'C' + elif nt == 'g': + base = 'c' + elif nt == 'T': + base = 'A' + elif nt == 't': + base = 'a' + + result = base + result + + return result + + +""" +RepeatDB +""" + +class RepeatAllele: + def __init__(self): + self.repeat_name = '' + self.allele_idx = 0 + self.repeat_pos = 0 + self.repeat_length = 0 + self.positions = [] + return + + def __repr__(self): + return '[' + ','.join([str(self.repeat_name), str(self.allele_idx), str(self.repeat_pos), str(self.repeat_length), str(len(self.positions))]) + ']' + + def add_position(self, chr, pos, strand): + self.positions.append([chr, pos, strand]) + + + def __lt__(self, other): + if self.repeat_pos < other.repeat_pos: + return True + elif self.repeat_pos == other.repeat_pos: + return self.repeat_length < other.repeat_length + else: + return False + +class Repeat: + def __init__(self): + self.repeat_name = '' + self.repeat_length = 0 + self.repeat_pos = 0 + self.allele = [] + return + + def add_allele(self, allele_idx, repeatAllele): + #self.allele[allele_idx] = repeatAllele + self.allele.append(repeatAllele) + + def allele_sort(self): + self.allele = sorted(self.allele) + +def cmp_repeatmap(a, b): + if a[0] < b[0]: + return -1 + elif a[0] == b[0]: + return 0 + else: + return 1 + +def read_len_cigar(cigar_str): + cigars = cigar_re.findall(cigar_str) + cigars = [[cigar[-1], int(cigar[:-1])] for cigar in cigars] + + read_len = 0 + for cigar in cigars: + cigar_op, length = cigar + if cigar_op in "MISH": + read_len += int(length) + + return read_len + +def read_repeatdb(repeat_filename): + repeat_db = {} + + if os.path.exists(repeat_filename): + + for line in open(repeat_filename, 'r'): + + if line[0] == '>': + line = line.strip()[1:] + name, rptRefName, pos, rep_len, _, _ = line.split()[:6] + pos = int(pos) + rep_len = int(rep_len) + rptName, allele_idx = name.split('*')[0:2] + allele_idx = int(allele_idx) + + repeatAllele = RepeatAllele() + repeatAllele.repeat_name = rptName + repeatAllele.allele_idx = allele_idx + repeatAllele.repeat_pos = pos + repeatAllele.repeat_length = rep_len + + if rptRefName not in repeat_db: + # new rptRefName + repeat_db[rptRefName] = {} + + if rptName not in repeat_db[rptRefName]: + # new rptName + assert allele_idx == 0 + repeat_db[rptRefName][rptName] = Repeat() + repeat_db[rptRefName][rptName].repeat_name = rptName + + repeat_db[rptRefName][rptName].add_allele(allele_idx, repeatAllele) + + else: + coords = line.split() + for coord in coords: + chr, pos, strand = coord.split(':') + pos = int(pos) + + repeat_db[rptRefName][rptName].allele[allele_idx].add_position(chr, pos, strand) + + else: + print >> sys.stderr, 'Cannot open file', repeat_filename + + + print >> sys.stderr, 'Build repeatMap' + repeat_map = {} + for rptRefName, repeats in repeat_db.items(): + #print 'Processing', rptRefName + repeat_pos_list = [] + + for repeatName, repeat in repeats.items(): + #print 'Common Allele:', repeatName, repeat.repeat_name + repeat_left = sys.maxint + repeat_right = 0 + + #for allele_id, repeatAllele in repeat.allele.items(): + for repeatAllele in repeat.allele: + left = repeatAllele.repeat_pos + right = left + repeatAllele.repeat_length + if left < repeat_left: + repeat_left = left + if right > repeat_right: + repeat_right = right + + repeat.repeat_pos = repeat_left + repeat.repeat_length = repeat_right - repeat_left + + #print repeat.allele + #repeat.allele_sort() + #print repeat.allele + + #print repeat_left, repeat_right + + repeat_pos_list.append((repeat_right, repeatName)) + + repeat_map[rptRefName] = sorted(repeat_pos_list, cmp=cmp_repeatmap) + #print repeat_map[rptRefName] + + return repeat_db, repeat_map + + +def find_leftmost_pos(rmap, left): + pos = bisect.bisect_left(rmap, (left, None)) + #print pos + + if pos == len(rmap): + return pos + + if rmap[pos][0] == left: + while pos < len(rmap): + if rmap[pos][0] != left: + break + pos += 1 + + return pos + +def repeat_to_genome_pos(repeat_db, repeat_map, rptRefName, pos, cigar_str = ''): + assert rptRefName in repeat_db + readlen = read_len_cigar(cigar_str) + #readlen = 101 + + # pos in sam-result. pos is 1-based + left = pos - 1 + right = left + readlen + + repeats = repeat_db[rptRefName] + rmap = repeat_map[rptRefName] + + #print len(rmap) + #print rmap + + i = find_leftmost_pos(rmap, left) + if i >= len(rmap): + print >> sys.stderr, 'Value Error' + return + + if right > rmap[i][0]: + print >> sys.stderr, 'Not repeat' + return + + repeat = repeats[rmap[i][1]] + + #print 'Allele Size:', len(repeat.allele) + #print repeat.allele + for allele in repeat.allele: + rpos = allele.repeat_pos + rlen = allele.repeat_length + + if (left >= rpos) and (right <= rpos + rlen): + offset = left - rpos + for genome_pos in allele.positions: + print genome_pos[0], genome_pos[1] + offset + 1, genome_pos[2], genome_pos[1] + +""" +""" +def read_genome(genome_filename): + chr_dic = {} + genome_file = open(genome_filename, "r") + + chr_name, sequence = "", "" + for line in genome_file: + if line[0] == ">": + if chr_name and sequence: + chr_dic[chr_name] = sequence + + chr_name = line[1:-1].split()[0] + sequence = "" + else: + sequence += line[:-1] + + if chr_name and sequence: + chr_dic[chr_name] = sequence + + genome_file.close() + + print >> sys.stderr, "genome is loaded" + + return chr_dic + + +""" +""" +def extract_splice_sites(gtf_fname): + trans = {} + + gtf_file = open(gtf_fname) + # Parse valid exon lines from the GTF file into a dict by transcript_id + for line in gtf_file: + line = line.strip() + if not line or line.startswith('#'): + continue + if '#' in line: + line = line.split('#')[0].strip() + + try: + chrom, source, feature, left, right, score, \ + strand, frame, values = line.split('\t') + except ValueError: + continue + left, right = int(left), int(right) + + if feature != 'exon' or left >= right: + continue + + values_dict = {} + for attr in values.split(';')[:-1]: + attr, _, val = attr.strip().partition(' ') + values_dict[attr] = val.strip('"') + + if 'gene_id' not in values_dict or \ + 'transcript_id' not in values_dict: + continue + + transcript_id = values_dict['transcript_id'] + if transcript_id not in trans: + trans[transcript_id] = [chrom, strand, [[left, right]]] + else: + trans[transcript_id][2].append([left, right]) + + gtf_file.close() + + # Sort exons and merge where separating introns are <=5 bps + for tran, [chrom, strand, exons] in trans.items(): + exons.sort() + tmp_exons = [exons[0]] + for i in range(1, len(exons)): + if exons[i][0] - tmp_exons[-1][1] <= 5: + tmp_exons[-1][1] = exons[i][1] + else: + tmp_exons.append(exons[i]) + trans[tran] = [chrom, strand, tmp_exons] + + # Calculate and print the unique junctions + junctions = set() + for chrom, strand, exons in trans.values(): + for i in range(1, len(exons)): + junctions.add(to_junction_str([chrom, exons[i-1][1], exons[i][0]])) + + return junctions + +""" +""" +def read_repeat_info(repeat_filename): + repeat_info, repeat_dic = {}, {} + repeat_pos = {} + if os.path.exists(repeat_filename): + for line in open(repeat_filename): + if line[0] == ">": + line = line.strip()[1:] + allele, rep, pos, rep_len, _, _ = line.split()[:6] + pos, rep_len = int(pos), int(rep_len) + common_allele = allele.split('*')[0] + + if rep not in repeat_info: + repeat_info[rep] = [] + repeat_dic[rep] = {} + repeat_pos[rep] = {} + + repeat_info[rep].append([allele, pos, rep_len]) + if allele not in repeat_dic[rep]: + repeat_dic[rep][allele] = [] + repeat_pos[rep][allele] = set() + else: + coords = line.split() + for coord in coords: + chr, pos, strand = coord.split(':') + pos = int(pos) + if pos in repeat_pos[rep][allele]: + continue + repeat_dic[rep][allele].append([chr, pos, strand]) + repeat_pos[rep][allele].add(pos) + + for rep, repeats in repeat_info.items(): + def my_cmp(a, b): + if a[1] < b[1]: + return -1 + elif a[1] == b[1]: + return a[2] - b[2] + else: + return 1 + repeat_info[rep] = sorted(repeat_info[rep], cmp=my_cmp) + + return repeat_info, repeat_dic + + +""" +""" +def find_repeat(repeat_info, pos): + if len(repeat_info) <= 0: + return -1 + + l, r = 0, len(repeat_info) + while l < r: + m = (l + r) / 2 + _, rep_pos, rep_len, _ = repeat_info[m] + if rep_pos <= pos and pos < rep_pos + rep_len: + return m + elif pos < rep_pos: + r = m + else: + l = m + 1 + + return -1 + +def reverse_cigar(cigar_str): + cigars = cigar_re.findall(cigar_str) + cigars = [[cigar[-1], int(cigar[:-1])] for cigar in cigars] + cigars[::-1] + + read_len = 0 + cigar_str = "" + for cigar in cigars: + cigar_op, length = cigar + cigar_str += ("%d%s" % (length, cigar_op)) + if cigar_op in "MISH": + read_len += int(length) + + return read_len, cigar_str + + + +def to_junction_str(junction): + return "%s-%d-%d" % (junction[0], junction[1], junction[2]) + + +def to_junction(junction_str): + fields = junction_str.split("-") + if len(fields) > 3: + chr, left, right = "-".join(fields[:-2]), fields[-2], fields[-1] + else: + assert len(fields) == 3 + chr, left, right = fields + + return [chr, int(left), int(right)] + + +""" +""" +def junction_cmp(a, b): + if a[0] != b[0]: + if a[0] < b[0]: + return -1 + else: + return 1 + + if a[1] != b[1]: + if a[1] < b[1]: + return -1 + else: + return 1 + + if a[2] != b[2]: + if a[2] < b[2]: + return -1 + else: + return 1 + + return 0 + + +""" +# chr and pos are assumed to be integers +""" +def get_junctions(chr, pos, cigar_str, min_anchor_len = 0, read_len = 100): + junctions = [] + right_pos = pos + cigars = cigar_re.findall(cigar_str) + cigars = [[int(cigars[i][:-1]), cigars[i][-1]] for i in range(len(cigars))] + + left_anchor_lens = [] + cur_left_anchor_len = 0 + for i in range(len(cigars)): + length, cigar_op = cigars[i] + if cigar_op in "MI": + cur_left_anchor_len += length + elif cigar_op == "N": + assert cur_left_anchor_len > 0 + left_anchor_lens.append(cur_left_anchor_len) + cur_left_anchor_len = 0 + + for i in range(len(cigars)): + length, cigar_op = cigars[i] + if cigar_op == "N": + left, right = right_pos - 1, right_pos + length + + if i > 0 and cigars[i-1][1] in "ID": + if cigars[i-1][1] == "I": + left += cigars[i-1][0] + else: + left -= cigars[i-1][0] + if i + 1 < len(cigars) and cigars[i+1][1] in "ID": + if cigars[i+1][1] == "I": + right -= cigars[i+1][0] + else: + right += cigars[i+1][0] + + junction_idx = len(junctions) + assert junction_idx < len(left_anchor_lens) + left_anchor_len = left_anchor_lens[junction_idx] + assert left_anchor_len > 0 and left_anchor_len < read_len + right_anchor_len = read_len - left_anchor_len + if left_anchor_len >= min_anchor_len and right_anchor_len >= min_anchor_len: + junctions.append([chr, left, right]) + + if cigar_op in "MND": + right_pos += length + + return junctions + +def get_right(pos, cigars): + right_pos = pos + cigars = cigar_re.findall(cigars) + for cigar in cigars: + length = int(cigar[:-1]) + cigar_op = cigar[-1] + if cigar_op in "MDN": + right_pos += length + + return right_pos + +def get_cigar_chars(cigars): + cigars = cigar_re.findall(cigars) + cigar_chars = "" + for cigar in cigars: + cigar_op = cigar[-1] + cigar_chars += cigar_op + + return cigar_chars + + +""" +""" +def get_cigar_chars_MN(cigars): + cigars = cigar_re.findall(cigars) + cigar_chars = "" + for cigar in cigars: + cigar_op = cigar[-1] + if cigar_op in "MN": + if cigar_chars == "" or cigar_chars[-1] != cigar_op: + cigar_chars += cigar_op + + return cigar_chars + + +""" +""" +def is_small_anchor_junction_read(cigars): + cigar_list = [] + for cigar in cigar_re.findall(cigars): + cigar_op = cigar[-1] + cigar_len = int(cigar[:-1]) + cigar_list.append([cigar_op, cigar_len]) + + if len(cigar_list) < 3: + return False + + if cigar_list[0][0] != 'M' or cigar_list[-1][0] != 'M': + return False + + if cigar_list[0][1] > 10 and cigar_list[-1][1] > 10: + return False + + if cigar_list[1][0] != 'N' or cigar_list[-2][0] != 'N': + return False + + return True + + +""" +""" +def is_canonical_junction(chr_dic, junction): + chr, left, right = junction + donor = chr_dic[chr][left:left+2] + acceptor = chr_dic[chr][right-3:right-1] + rev_donor = reverse_complement(acceptor) + rev_acceptor = reverse_complement(donor) + + if (donor == "GT" and acceptor == "AG") or \ + (rev_donor == "GT" and rev_acceptor == "AG"): + return True + + return False + + +""" +""" +def is_small_exon_junction_read(cigars, min_exon_len = 23): + cigars = cigar_re.findall(cigars) + for i in range(1, len(cigars) - 1): + cigar = cigars[i] + cigar_op = cigar[-1] + cigar_len = int(cigar[:-1]) + + prev_op = cigars[i-1][-1] + next_op = cigars[i+1][-1] + + if prev_op == 'N' and cigar_op == 'M' and next_op == 'N': + if cigar_len <= min_exon_len: + return True + + return False + + +""" +""" +""" +def repeat_to_genome_alignment(repeat_info, repeat_dic, rep, pos, cigar_str = ""): + assert rep in repeat_info + left = pos - 1 # convert 1-based offset to zero-based + + repeats = repeat_info[rep] + l, r = 0, len(repeats) + while l < r: + m = (l + r) / 2 + rep_allele, rpos, rlen = repeats[m] + if left >= rpos and left < rpos + rlen: + while m > 0: + rep_allele, rpos, rlen = repeats[m-1] + if left < rpos: + break + m -= 1 + break + if left < rpos: + r = m + else: + l = m + 1 + + alignments = [] + while m < len(repeats): + rep_allele, rpos, rlen = repeats[m] + if left >= rpos + rlen: + m += 1 + continue + + if left < rpos: + break + + assert rep_allele in repeat_dic[rep] + coords = repeat_dic[rep][rep_allele] + assert len(coords) > 0 + for coord in coords: + cchr, cpos, cstrand = coord + adj_left = left - rpos + if cstrand == '+': + rep_left = cpos + adj_left + rep_cigar_str = cigar_str + else: + if cigar_str: + read_len, rep_cigar_str = reverse_cigar(cigar_str) + else: + read_len, rep_cigar_str = 0, "" + rc_adj_left = rlen - adj_left - read_len; + rep_left = cpos + rc_adj_left + + alignments.append([cchr, rep_left + 1, rep_cigar_str]) + m += 1 + + return alignments +""" +def repeat_to_genome_alignment(repeat_db, repeat_map, rptRefName, pos, cigar_str = ''): + assert rptRefName in repeat_db + readlen = read_len_cigar(cigar_str) + #readlen = 101 + + # pos in sam-result. pos is 1-based + left = pos - 1 + right = left + readlen + + repeats = repeat_db[rptRefName] + rmap = repeat_map[rptRefName] + + #print len(rmap) + #print rmap + + alignments = [] + + i = find_leftmost_pos(rmap, left) + if i >= len(rmap): + print >> sys.stderr, 'Value Error' + return alignments + + if right > rmap[i][0]: + print >> sys.stderr, 'Not repeat' + return alignments + + repeat = repeats[rmap[i][1]] + + #print 'Allele Size:', len(repeat.allele) + #print repeat.allele + for allele in repeat.allele: + rpos = allele.repeat_pos + rlen = allele.repeat_length + + if (left >= rpos) and (right <= rpos + rlen): + offset = left - rpos + for coord in allele.positions: + cchr, cpos, cstrand = coord + if cstrand == '+': + rep_left = cpos + offset + rep_cigar_str = cigar_str + else: + if cigar_str: + rep_read_len, rep_cigar_str = reverse_cigar(cigar_str) + else: + rep_read_len, rep_cigar_str = 0, "" + + rc_offset = rlen - offset - rep_read_len + rep_left = cpos + rc_offset + + alignments.append([cchr, rep_left + 1, rep_cigar_str]) + #print genome_pos[0], genome_pos[1] + offset + 1, genome_pos[2], genome_pos[1] + + return alignments + + +""" +""" +def extract_single(infilename, + outfilename, + chr_dic, + aligner, + version, + repeat_db, + repeat_map, + debug_dic, + hash_idx): + infile = open(infilename) + if hash_idx == -1: + outfile = open(outfilename, "w") + else: + outfile = open(outfilename + "." + str(hash_idx), "w") + + prev_read_id = "" + num_reads, num_aligned_reads, num_ualigned_reads = 0, 0, 0 + prev_NM, prev_NH, NH_real = 0, 0, 0 + + for line in infile: + if line[0] == '@': + continue + + cols = line[:-1].split() + read_id, flag, chr, pos, mapQ, cigar_str = cols[:6] + read_seq = cols[9] + if len(read_id) >= 3 and read_id[-2] == "/": + read_id = read_id[:-2] + + if read_id.find("seq.") == 0: + read_id = read_id[4:] + + if aligner == "gsnap": + chr = chr.replace("_", ":") + + if hash_idx != -1: + hashval = hash(read_id) + if hashval % mp_num != hash_idx: + continue + + if read_id != prev_read_id: + num_reads += 1 + + flag, pos, mapQ = int(flag), int(pos), int(mapQ) + if flag & 0x4 != 0 or \ + 'H' in cigar_str: + prev_read_id = read_id + continue + + NH, NM, XA = "", sys.maxint, [] + for i in range(11, len(cols)): + col = cols[i] + # "nM" from STAR + if col.startswith("NM") or col.startswith("nM"): + NM = int(col[5:]) + elif col.startswith("NH"): + NH = col + elif col.startswith("XA"): + XA = col[5:].split(';')[:-1] + if NH != "": + NH = int(NH[5:]) + if aligner == "hisat2": + if prev_read_id == read_id: + assert prev_NH == NH + if NH == 1 or mapQ == 60: + assert NH == 1 and mapQ == 60 + + if read_id != prev_read_id: + num_aligned_reads += 1 + if aligner == "hisat2" and \ + NH == 1: + num_ualigned_reads += 1 + else: + # In case of Bowtie2, only consier the best alignments + if aligner in ["bowtie2", "bwa", "vg"]: + if NM > prev_NM: + continue + + def adjust_alignment(chr, pos, cigar_str): + NM_real = 0 + read_pos, right_pos = 0, pos - 1 + cigars = cigar_re.findall(cigar_str) + cigars = [[cigar[-1], int(cigar[:-1])] for cigar in cigars] + for i in range(len(cigars)): + cigar_op, length = cigars[i] + if cigar_op == "S": + assert i == 0 or i == len(cigars) - 1 + if i == 0: + assert cigars[i+1][0] == "M" + ref_seq = chr_dic[chr][right_pos-length:right_pos] + else: + assert cigars[i-1][0] == "M" + ref_seq = chr_dic[chr][right_pos:right_pos+length] + + ref_seq = ref_seq.upper() + if length == len(ref_seq): + for j in range(length): + if ref_seq[j] != "N" and read_seq[read_pos+j] != ref_seq[j]: + NM_real += 1 + else: + NM_real += length + if cigar_op in "MND": + right_pos += length + if cigar_op in "MIS": + read_pos += length + + if cigars[0][0] == "S": + assert cigars[1][0] == "M" + pos -= cigars[0][1] + cigars[1][1] += cigars[0][1] + cigars = cigars[1:] + if cigars[-1][0] == "S": + assert cigars[-2][0] == "M" + cigars[-2][1] += cigars[-1][1] + cigars = cigars[:-1] + + cigar_str = "" + for cigar in cigars: + cigar_op, length = cigar + cigar_str += ("%d%s" % (length, cigar_op)) + + return pos, cigar_str, NM_real + + + alignments = [[chr, pos, cigar_str]] + if aligner == "bwa" and len(XA) > 0: + for alignment in XA: + alt_chr, alt_pos, alt_cigar_str, alt_NM = alignment.split(',') + alt_pos, alt_NM = abs(int(alt_pos)), int(alt_NM) + if alt_NM > NM: + break + alignments.append([alt_chr, alt_pos, alt_cigar_str]) + + # Convert repeat alignments to genome alignments + if aligner == "hisat2" and chr.startswith("rep") and len(repeat_map) > 0: + alignments = repeat_to_genome_alignment(repeat_db, repeat_map, chr, pos, cigar_str) + + for i, alignment in enumerate(alignments): + chr, pos, cigar_str = alignment + pos, cigar_str, NM_real = adjust_alignment(chr, pos, cigar_str) + p_str = "%s\t%s\t%d\t%s\tNM:i:%d" % (read_id, chr, pos, cigar_str, NM_real) + print >> outfile, p_str + + if aligner == "hisat2": + if prev_read_id != read_id: + if prev_read_id != "": + assert prev_NH == NH_real + NH_real = 1 + else: + NH_real += 1 + prev_NH = NH + prev_NM = NM + prev_read_id = read_id + + if aligner == "hisat2": + if prev_read_id != "": + assert prev_NH == NH_real + + outfile.close() + infile.close() + + # Sanity check for HISAT2's alignment summary + if aligner == "hisat2" and os.path.exists(infilename + ".summary") and (not mp_mode): + hisat2_reads, hisat2_0aligned_reads, hisat2_ualigned_reads, hisat2_maligned_reads = 0, 0, 0, 0 + for line in open(infilename + ".summary"): + line = line.strip() + if line.startswith("HISAT2 summary") or \ + line.startswith("Overall"): + continue + category, num = line.split(':') + num = num.strip() + num = int(num.split(' ')[0]) + if category.startswith("Total reads"): + hisat2_reads = num + elif category.startswith("Aligned 0 time"): + hisat2_0aligned_reads = num + elif category.startswith("Aligned 1 time"): + hisat2_ualigned_reads = num + else: + assert category.startswith("Aligned >1 time") + assert hisat2_reads == hisat2_0aligned_reads + hisat2_ualigned_reads + num + + hisat2_aligned_reads = hisat2_reads - hisat2_0aligned_reads + + assert hisat2_reads == num_reads + assert hisat2_aligned_reads == num_aligned_reads + assert hisat2_ualigned_reads == num_ualigned_reads + + +""" +""" +def extract_pair(infilename, + outfilename, + chr_dic, + RNA, + aligner, + version, + repeat_db, + repeat_map, + debug_dic, + hash_idx): + read_dic = {} + pair_reported = set() + + infile = open(infilename) + if hash_idx == -1: + outfile = open(outfilename, "w") + else: + outfile = open(outfilename + "." + str(hash_idx), "w") + + num_pairs, num_conc_aligned_pairs, num_conc_ualigned_pairs, num_disc_aligned_pairs = 0, 0, 0, 0 + num_aligned_reads, num_ualigned_reads = 0, 0 + + prev_read_id, pair_list = "", set() + prev_NM = sys.maxint + prev_NH1, prev_NH2 = 0, 0 + NH1_real, NH2_real = 0, 0 + + for line in infile: + if line[0] == '@': + continue + + cols = line[:-1].split() + read_id, flag, chr1, pos1, mapQ, cigar1_str, chr2, pos2 = cols[:8] + read_seq = cols[9] + if len(read_id) >= 3 and read_id[-2] == "/": + read_id = read_id[:-2] + + if read_id.find("seq.") == 0: + read_id = read_id[4:] + + if hash_idx != -1: + hashval = hash(read_id) + if hashval % mp_num != hash_idx: + continue + + if aligner == "gsnap": + chr1 = chr1.replace("_", ":") + chr2 = chr2.replace("_", ":") + + if read_id != prev_read_id: + num_pairs += 1 + pair_list = set() + prev_NM = sys.maxint + + flag = int(flag) + canonical_pos1, canonical_pos2 = int(pos1), int(pos2) + left_read = (flag & 0x40 != 0) + pos1 = canonical_pos1 + mapQ = int(mapQ) + if flag & 0x4 != 0 or \ + 'H' in cigar1_str: + prev_read_id, is_prev_read_left = read_id, left_read + continue + + concordant = (flag & 0x2 != 0) + NH, NM1, YT, XA = sys.maxint, sys.maxint, "", [] + for i in range(11, len(cols)): + col = cols[i] + # "nM" from STAR + if col.startswith("NM") or col.startswith("nM"): + NM1 = int(col[5:]) + elif col.startswith("NH"): + NH = int(col[5:]) + elif col.startswith("YT"): + YT = col[5:] + elif col.startswith("XA"): + XA = col[5:].split(';')[:-1] + + if aligner == "hisat2": + if prev_read_id == read_id: + if left_read: + assert prev_NH1 == 0 or prev_NH1 == NH + else: + assert prev_NH2 == 0 or prev_NH2 == NH + if NH == 1 or mapQ == 60: + assert NH == 1 and mapQ == 60 + + unpaired = (flag & 0x8 != 0) or (YT in ["UU", "UP"]) + if unpaired: + if left_read not in pair_list: + pair_list.add(left_read) + num_aligned_reads += 1 + if aligner == "hisat2" and NH == 1: + num_ualigned_reads += 1 + assert mapQ == 60 + else: + if read_id != prev_read_id: + if concordant: + num_conc_aligned_pairs += 1 + if aligner == "hisat2" and NH == 1: + num_conc_ualigned_pairs += 1 + else: + if aligner == "hisat2": + assert YT == "DP" + num_disc_aligned_pairs += 1 + + if chr2 == '*': + continue + + def adjust_alignment(chr, pos, cigar_str): + NM_real = 0 + read_pos, right_pos = 0, pos - 1 + cigars = cigar_re.findall(cigar_str) + cigars = [[cigar[-1], int(cigar[:-1])] for cigar in cigars] + for i in range(len(cigars)): + cigar_op, length = cigars[i] + if cigar_op == "S": + assert i == 0 or i == len(cigars) - 1 + if i == 0: + assert cigars[i+1][0] == "M" + ref_seq = chr_dic[chr1][right_pos-length:right_pos] + else: + assert cigars[i-1][0] == "M" + ref_seq = chr_dic[chr1][right_pos:right_pos+length] + + ref_seq = ref_seq.upper() + if length == len(ref_seq): + for j in range(length): + if ref_seq[j] != "N" and read_seq[read_pos+j] != ref_seq[j]: + NM_real += 1 + else: + NM_real += length + if cigar_op in "MND": + right_pos += length + if cigar_op in "MIS": + read_pos += length + + if cigars[0][0] == "S": + assert cigars[1][0] == "M" + pos -= cigars[0][1] + cigars[1][1] += cigars[0][1] + cigars = cigars[1:] + if cigars[-1][0] == "S": + assert cigars[-2][0] == "M" + cigars[-2][1] += cigars[-1][1] + cigars = cigars[:-1] + + cigar_str = "" + for cigar in cigars: + cigar_op, length = cigar + cigar_str += ("%d%s" % (length, cigar_op)) + + return pos, cigar_str, NM_real + + alignments = [[chr1, pos1, cigar1_str]] + if aligner == "bwa" and len(XA) > 0: + for alignment in XA: + alt_chr, alt_pos, alt_cigar_str, alt_NM = alignment.split(',') + alt_pos, alt_NM = abs(int(alt_pos)), int(alt_NM) + if alt_NM > NM1: + break + alignments.append([alt_chr, alt_pos, alt_cigar_str]) + + # Convert repeat alignments to genome alignments + if aligner == "hisat2" and (chr1.startswith("rep") or chr2.startswith("rep")) and len(repeat_map) > 0: + if chr1.startswith("rep"): + alignments = repeat_to_genome_alignment(repeat_db, repeat_map, chr1, pos1, cigar1_str) + if chr2.startswith("rep") or (chr1.startswith("rep") and chr2 == "="): + chr_tmp = chr1 if chr2 == "=" else chr2 + alignments2 = repeat_to_genome_alignment(repeat_db, repeat_map, chr_tmp, int(pos2)) + else: + alignments2 = [[chr2, int(pos2)]] + + selected_alignments = [] + for alignment in alignments: + _chr1, _pos1 = alignment[:2] + add = False + for alignment2 in alignments2: + _chr2, _pos2 = alignment2[:2] + if _chr1 == _chr2 and abs(_pos1 - _pos2) <= 1000: + add = True + break + if add: + selected_alignments.append(alignment) + + alignments = selected_alignments + + for i, alignment in enumerate(alignments): + chr1, pos1, cigar1_str = alignment + pos1, cigar_str, NM_real = adjust_alignment(chr1, pos1, cigar1_str) + chr2 = chr1 + + if aligner == "bwa": + me = "%s\t%s" % (read_id, chr1) + partner = "%s\t%s" % (read_id, chr2) + else: + me = "%s\t%s\t%d" % (read_id, chr1, canonical_pos1) + partner = "%s\t%s\t%d" % (read_id, chr2, canonical_pos2) + if partner in read_dic: + maps = read_dic[partner] + + for map in maps: + if (map[0] == me) or len(maps) == 1: + cigar2_str, NM2, pos2 = map[1:4] + + if aligner == "bwa": + if abs(pos1 - pos2) >= 1000: + continue + + if aligner in ["bowtie2", "bwa"]: + if NM1 + NM2 > prev_NM: + continue + else: + prev_NM = NM1 + NM2 + + if chr1 != chr2: + continue + + # DK - debugging purposes + if RNA: + if aligner in ["bowtie2", "bwa"] and abs(pos1 - pos2) > 1000: + continue + else: + if abs(pos1 - pos2) > 1000: + continue + + if int(pos2) > int(pos1): + p_str = "%s\t%s\t%d\t%s\t%s\t%d\t%s\tNM:i:%d\tNM:i:%d" % \ + (read_id, chr1, pos1, cigar_str, chr2, pos2, cigar2_str, NM1, NM2) + else: + p_str = "%s\t%s\t%d\t%s\t%s\t%d\t%s\tNM:i:%d\tNM:i:%d" % \ + (read_id, chr2, pos2, cigar2_str, chr1, pos1, cigar_str, NM2, NM1) + + if p_str not in pair_reported: + pair_reported.add(p_str) + print >> outfile, p_str + + + + if not me in read_dic: + read_dic[me] = [] + + read_dic[me].append([partner, cigar_str, NM1, pos1]) + + if aligner == "hisat2": + if prev_read_id != read_id: + if prev_read_id != "": + assert prev_NH1 == NH1_real + assert prev_NH2 == NH2_real + prev_NH1, prev_NH2 = 0, 0 + if left_read: + NH1_real, NH2_real = 1, 0 + else: + NH1_real, NH2_real = 0, 1 + else: + if left_read: + NH1_real += 1 + else: + NH2_real += 1 + if left_read: + prev_NH1 = NH + else: + prev_NH2 = NH + prev_read_id = read_id + + if aligner == "hisat2": + if prev_read_id != "": + assert prev_NH1 == NH1_real + assert prev_NH2 == NH2_real + + outfile.close() + infile.close() + + # Sanity check for HISAT2's alignment summary + if aligner == "hisat2" and os.path.exists(infilename + ".summary") and (not mp_mode): + hisat2_pairs, hisat2_0aligned_pairs, hisat2_conc_ualigned_pairs, hisat2_conc_maligned_pairs, hisat2_disc_aligned_pairs = 0, 0, 0, 0, 0 + hisat2_reads, hisat2_0aligned_reads, hisat2_ualigned_reads, hisat2_maligned_reads = 0, 0, 0, 0 + + for line in open(infilename + ".summary"): + line = line.strip() + if line.startswith("HISAT2 summary") or \ + line.startswith("Overall"): + continue + category, num = line.split(':') + num = num.strip() + num = int(num.split(' ')[0]) + if category.startswith("Total pairs"): + hisat2_pairs = num + elif category.startswith("Aligned concordantly or discordantly 0 time"): + hisat2_0aligned_pairs = num + elif category.startswith("Aligned concordantly 1 time"): + hisat2_conc_ualigned_pairs = num + elif category.startswith("Aligned concordantly >1 time"): + hisat2_conc_maligned_pairs = num + elif category.startswith("Aligned discordantly"): + hisat2_disc_aligned_pairs = num + assert hisat2_pairs == hisat2_0aligned_pairs + hisat2_conc_ualigned_pairs + hisat2_conc_maligned_pairs + hisat2_disc_aligned_pairs + elif category.startswith("Total unpaired reads"): + hisat2_reads = num + assert hisat2_reads == hisat2_0aligned_pairs * 2 + elif category.startswith("Aligned 0 time"): + hisat2_0aligned_reads = num + elif category.startswith("Aligned 1 time"): + hisat2_ualigned_reads = num + else: + assert category.startswith("Aligned >1 times") + hisat2_maligned_reads = num + assert hisat2_reads == hisat2_0aligned_reads + hisat2_ualigned_reads + hisat2_maligned_reads + + assert hisat2_pairs == num_pairs + assert hisat2_conc_ualigned_pairs == num_conc_ualigned_pairs + assert hisat2_conc_maligned_pairs == num_conc_aligned_pairs - num_conc_ualigned_pairs + assert hisat2_disc_aligned_pairs == num_disc_aligned_pairs + assert hisat2_ualigned_reads == num_ualigned_reads + assert hisat2_maligned_reads == num_aligned_reads - num_ualigned_reads + + +""" +""" +def is_junction_read(junctions_dic, chr, pos, cigar_str): + result_junctions = [] + junctions = get_junctions(chr, pos, cigar_str) + for junction in junctions: + junction_str = to_junction_str(junction) + result_junctions.append([junction_str, junction_str in junctions_dic]) + + return result_junctions + + +""" +""" +def is_junction_pair(junctions_dic, chr, pos, cigar_str, mate_chr, mate_pos, mate_cigar_str): + junctions = is_junction_read(junctions_dic, chr, pos, cigar_str) + mate_junctions = is_junction_read(junctions_dic, mate_chr, mate_pos, mate_cigar_str) + junctions += mate_junctions + return junctions + + +""" +""" +def find_in_gtf_junctions(chr_dic, gtf_junctions, junction, relax_dist = 5): + def find_in_gtf_junctions(gtf_junctions, junction): + l, u = 0, len(gtf_junctions) + while l < u: + m = (l + u) / 2 + assert m >= 0 and m < len(gtf_junctions) + cmp_result = junction_cmp(junction, gtf_junctions[m]) + if cmp_result == 0: + return m + elif cmp_result < 0: + u = m + else: + l = m + 1 + + return l + + chr, left, right = junction + gtf_index = find_in_gtf_junctions(gtf_junctions, [chr, left - relax_dist, right - relax_dist]) + + if gtf_index >= 0: + i = gtf_index + while i < len(gtf_junctions): + chr2, left2, right2 = gtf_junctions[i] + if chr2 > chr or \ + left2 - left > relax_dist or \ + right2 - right > relax_dist: + break + + if abs(left - left2) <= relax_dist and left - left2 == right - right2: + test_small = ":" in chr + if is_canonical_junction(chr_dic, gtf_junctions[i]): + if left == left2: + return i + else: + return -1 + else: + return i + i += 1 + + return -1 + + +def singleInMaps(chr, pos, pos2, cigar, NM, maps): + for i in range(len(maps)): + if chr != maps[i][0]: + continue + if (pos == maps[i][1] or pos2 == maps[i][2]): + return True, i + return False, -1 + + + +""" +""" +def compare_single_sam(RNA, + reference_sam, + query_sam, + mapped_fname, + chr_dic, + gtf_junctions, + gtf_junctions_set, + ex_gtf_junctions, + aligner): + aligned, multi_aligned = 0, 0 + db_dic, db_junction_dic = {}, {} + mapped_file = open(mapped_fname, "w") + first_mapped_file = open(mapped_fname + ".first", "w") + file = open(reference_sam, "r") + junction_read_dic = {} + for line in file: + if line.startswith('@'): + continue + + read_name, chr, pos, cigar, NM = line[:-1].split() + pos, NM = int(pos), int(NM[5:]) + + if read_name.find("seq.") == 0: + read_name = read_name[4:] + + if len(read_name) > 2 and read_name[-2] == '/': + read_name = read_name[:-2] + + multi_aligned += 1 + if read_name not in db_dic: + db_dic[read_name] = [] + aligned += 1 + + pos2 = get_right(pos, cigar) + db_dic[read_name].append([chr, pos, pos2, cigar, NM]) + + read_junctions = is_junction_read(gtf_junctions_set, chr, pos, cigar) + if len(read_junctions) > 0: + if read_name not in db_junction_dic: + db_junction_dic[read_name] = [] + + for junction_str, is_gtf_junction in read_junctions: + db_junction_dic[read_name].append([junction_str, is_gtf_junction]) + + if junction_str not in junction_read_dic: + junction_read_dic[junction_str] = [] + junction_read_dic[junction_str].append(line[:-1]) + + file.close() + + temp_junctions, temp_gtf_junctions = set(), set() + for read_name, can_junctions in db_junction_dic.items(): + if len(can_junctions) <= 0: + continue + + # DK - for debugging purposes + # 1. select the best candidate among spliced alignments if multiple + + def pickup_junction(can_junctions): + junctions = [can_junctions[0]] + for i in range(1, len(can_junctions)): + def get_intron_len(can_junction): + chr, left, right = to_junction(can_junction) + return right - left - 1 + + intron, intron_cmp = get_intron_len(junctions[0][0]), get_intron_len(can_junctions[i][0]) + + if intron > intron_cmp: + junctions = [can_junctions[i]] + elif intron == intron_cmp: + junctions.append(can_junctions[i]) + + return junctions + + # can_junctions = pickup_junction(can_junctions) + + for can_junction in can_junctions: + found_junction_str = None + junction_str, is_gtf_junction = can_junction + if is_gtf_junction: + found_junction_str = junction_str + + if not found_junction_str: + junction = to_junction(junction_str) + gtf_index = find_in_gtf_junctions(chr_dic, gtf_junctions, junction) + + if gtf_index >= 0: + is_gtf_junction = True + found_junction_str = to_junction_str(gtf_junctions[gtf_index]) + + if found_junction_str: + temp_gtf_junctions.add(found_junction_str) + temp_junctions.add(found_junction_str) + else: + if junction_str not in temp_junctions: + None + # assert junction_str in junction_read_dic + # DK - for debugging purposes + """ + if len(junction_read_dic[junction_str]) <= 2: + canonical = is_canonical_junction(chr_dic, to_junction(junction_str)) + if not canonical: + print >> sys.stdout, read_name, junction_str, len(junction_read_dic[junction_str]), can_junctions + for line in junction_read_dic[junction_str]: + print >> sys.stdout, "\t", line + """ + temp_junctions.add(junction_str) + + + temp2_junctions = [] + for junction in temp_junctions: + temp2_junctions.append(to_junction(junction)) + temp_junctions = sorted(list(temp2_junctions), cmp=junction_cmp) + temp2_junctions = [] + for can_junction in temp_junctions: + if len(temp2_junctions) <= 0: + temp2_junctions.append(can_junction) + else: + chr, left, right = temp2_junctions[-1] + chr2, left2, right2 = can_junction + if chr == chr2 and \ + abs(left - left2) == abs(right - right2) and \ + abs(left - left2) <= 10 and \ + not to_junction_str(can_junction) in temp_gtf_junctions: + continue + temp2_junctions.append(can_junction) + + temp_junctions = set() + for junction in temp2_junctions: + temp_junctions.add(to_junction_str(junction)) + + file = open(query_sam) + mapped, unmapped, unique_mapped, first_mapped, mapping_point = 0, 0, 0, 0, 0.0 + snp_mapped, snp_unmapped, snp_unique_mapped, snp_first_mapped = 0, 0, 0, 0 + for line in file: + if line.startswith('@'): + continue + fields = line[:-1].split() + read_name, chr, pos, cigar = fields[:4] + trans_id, NM, Zs = None, None, None + for field in fields[4:]: + if field.startswith("TI"): + trans_id = field[5:] + elif field.startswith("NM"): + NM = int(field[5:]) + elif field.startswith("Zs"): + Zs = field[5:] + snp_included = (Zs != None) + + pos = int(pos) + pos2 = get_right(pos, cigar) + if read_name not in db_dic: + unmapped += 1 + if snp_included: + snp_unmapped += 1 + continue + + maps = db_dic[read_name] + found = False + found_at_first = False + + if (aligner == "hisat-TLA" and RNA): + found, index = singleInMaps(chr, pos, pos2, cigar, NM, maps) + if found: + if index == 0: + found_at_first = True + + else: + if [chr, pos, pos2, cigar, NM] in maps: + found = True + if maps.index([chr, pos, pos2, cigar, NM]) == 0: + found_at_first = True + + # DK - debugging purposes + if False and len(maps) > 0 and maps[0][-1] < NM: + found = True + + if not found: + for idx, map in enumerate(maps): + if chr == map[0] and \ + pos == map[1] and \ + pos2 == map[2] and \ + get_cigar_chars(cigar) == get_cigar_chars(map[3]): + + read_junctions = is_junction_read(gtf_junctions_set, map[0], map[1], map[3]) + if True: + found_list = [False for i in range(len(read_junctions))] + for j in range(len(read_junctions)): + junction_str, is_gtf_junction = read_junctions[j] + junction = to_junction(junction_str) + gtf_index = find_in_gtf_junctions(chr_dic, gtf_junctions, junction) + if gtf_index >= 0: + found_list[j] = True + found = not (False in found_list) + else: + found = False + + if found: + if idx == 0: + found_at_first = True + break + + if found: + print >> mapped_file, read_name + mapped += 1 + if snp_included: + snp_mapped += 1 + if len(maps) == 1: + unique_mapped += 1 + if snp_included: + snp_unique_mapped += 1 + if found_at_first: + print >> first_mapped_file, read_name + first_mapped += 1 + if snp_included: + snp_first_mapped += 1 + mapping_point += (1.0 / len(maps)) + else: + unmapped += 1 + if snp_included: + snp_unmapped += 1 + + file.close() + mapped_file.close() + first_mapped_file.close() + + # DK - for debugging purposes + false_can_junctions, false_noncan_junctions = 0, 0 + for junction_str in temp_junctions: + if junction_str in temp_gtf_junctions: + continue + if junction_str in ex_gtf_junctions: + continue + if is_canonical_junction(chr_dic, to_junction(junction_str)): + false_can_junctions += 1 + else: + false_noncan_junctions += 1 + print >> sys.stderr, "\t\t\tfalse junctions: %d (canonical), %d (non-canonical)" % (false_can_junctions, false_noncan_junctions) + + return mapped, unique_mapped, first_mapped, unmapped, aligned, multi_aligned, \ + snp_mapped, snp_unique_mapped, snp_first_mapped, snp_unmapped, \ + len(temp_junctions), len(temp_gtf_junctions), mapping_point + + + +def pairedInMaps(chr, pos, pos_right, cigar, pos2, pos2_right, cigar2, NM, NM2, maps): + for i in range(len(maps)): + if chr != maps[i][0]: + continue; + if (pos == maps[i][1] or pos_right == maps[i][2]) and (pos2 == maps[i][4] or pos2_right == maps[i][5]): + return True, i + return False, -1 +""" +""" +def compare_paired_sam(RNA, + reference_sam, + query_sam, + mapped_fname, + chr_dic, + gtf_junctions, + gtf_junctions_set, + ex_gtf_junctions, + aligner): + aligned, multi_aligned = 0, 0 + db_dic, db_junction_dic, junction_pair_dic = {}, {}, {} + mapped_file = open(mapped_fname, "w") + uniq_mapped_file = open(mapped_fname + '.uniq', "w") + first_mapped_file = open(mapped_fname + '.first', "w") + file = open(reference_sam, "r") + for line in file: + if line[0] == '@': + continue + read_name, chr, pos, cigar, chr2, pos2, cigar2, NM, NM2 = line[:-1].split() + pos, pos2 = int(pos), int(pos2) + NM, NM2 = int(NM[5:]), int(NM2[5:]) + + if read_name.find("seq.") == 0: + read_name = read_name[4:] + + if len(read_name) > 2 and read_name[-2] == '/': + read_name = read_name[:-2] + + multi_aligned += 1 + if read_name not in db_dic: + db_dic[read_name] = [] + aligned += 1 + + pos_right, pos2_right = get_right(pos, cigar), get_right(pos2, cigar2) + db_dic[read_name].append([chr, pos, pos_right, cigar, pos2, pos2_right, cigar2, NM, NM2]) + + + pair_junctions = is_junction_pair(gtf_junctions_set, chr, pos, cigar, chr2, pos2, cigar2) + if len(pair_junctions) > 0: + if read_name not in db_junction_dic: + db_junction_dic[read_name] = [] + + for junction_str, is_gtf_junction in pair_junctions: + db_junction_dic[read_name].append([junction_str, is_gtf_junction]) + + # DK - for debugging purposes + if junction_str not in junction_pair_dic: + junction_pair_dic[junction_str] = [] + junction_pair_dic[junction_str].append(line[:-1]) + + file.close() + + temp_junctions, temp_gtf_junctions = set(), set() + for read_name, can_junctions in db_junction_dic.items(): + if len(can_junctions) <= 0: + continue + + # DK - for debugging purposes + # 1. select the best candidate among spliced alignments if multiple + + def pickup_junction(can_junctions): + junctions = [can_junctions[0]] + for i in range(1, len(can_junctions)): + def get_intron_len(can_junction): + chr, left, right = to_junction(can_junction) + return right - left - 1 + + intron, intron_cmp = get_intron_len(junctions[0][0]), get_intron_len(can_junctions[i][0]) + + if intron > intron_cmp: + junctions = [can_junctions[i]] + elif intron == intron_cmp: + junctions.append(can_junctions[i]) + + return junctions + + # can_junctions = pickup_junction(can_junctions) + + for can_junction in can_junctions: + found_junction_str = None + junction_str, is_gtf_junction = can_junction + + # DK - for debugging purposes + assert junction_str in junction_pair_dic + if len(junction_pair_dic[junction_str]) <= 5: + continue + + if is_gtf_junction: + found_junction_str = junction_str + + if not found_junction_str: + junction = to_junction(junction_str) + gtf_index = find_in_gtf_junctions(chr_dic, gtf_junctions, junction) + + if gtf_index >= 0: + is_gtf_junction = True + found_junction_str = to_junction_str(gtf_junctions[gtf_index]) + + if found_junction_str: + temp_gtf_junctions.add(found_junction_str) + temp_junctions.add(found_junction_str) + else: + if junction_str not in temp_junctions: + None + # assert junction_str in junction_read_dic + # print >> sys.stdout, read_name, junction_str, len(junction_read_dic[junction_str]) + # for line in junction_read_dic[junction_str]: + # print >> sys.stdout, "\t", line + + temp_junctions.add(junction_str) + + # DK - for debugging purposes + filter_junction_db = {} + + temp2_junctions = [] + for junction in temp_junctions: + temp2_junctions.append(to_junction(junction)) + temp_junctions = sorted(list(temp2_junctions), cmp=junction_cmp) + temp2_junctions = [] + for can_junction in temp_junctions: + if len(temp2_junctions) <= 0: + temp2_junctions.append(can_junction) + + # DK - for debugging purposes + # assert to_junction_str(can_junction) in junction_pair_dic + # filter_junction_db[to_junction_str(can_junction)] = len(junction_pair_dic[to_junction_str(can_junction)]) + else: + chr, left, right = temp2_junctions[-1] + chr2, left2, right2 = can_junction + if chr == chr2 and \ + abs(left - left2) == abs(right - right2) and \ + abs(left - left2) <= 10 and \ + not to_junction_str(can_junction) in temp_gtf_junctions: + + # DK - for debugging purposes + # assert to_junction_str(temp2_junctions[-1]) in junction_pair_dic + # assert to_junction_str(temp2_junctions[-1]) in filter_junction_dic + # filter_junction_db[to_junction_str(temp2_junctions[-1])] += len(junction_pair_dic[to_junction_str(temp2_junctions[-1])]) + + continue + + temp2_junctions.append(can_junction) + + # DK - for debugging purposes + # assert to_junction_str(can_junction) in junction_pair_dic + # filter_junction_db[to_junction_str(can_junction)] = len(junction_pair_dic[to_junction_str(can_junction)]) + + temp_junctions = set() + for junction in temp2_junctions: + # DK - for debugging purposes + # assert to_junction_str(junction) in filter_junction_dic + # if filter_junction_dic[to_junction_str(junction)] <= 5: + # continue + + temp_junctions.add(to_junction_str(junction)) + + file = open(query_sam) + mapped, unique_mapped, first_mapped, unmapped, mapping_point = 0, 0, 0, 0, 0.0 + snp_mapped, snp_unique_mapped, snp_first_mapped, snp_unmapped = 0, 0, 0, 0 + for line in file: + if line.startswith('@'): + continue + + fields = line[:-1].split() + read_name, chr, pos, cigar, chr2, pos2, cigar2 = fields[:7] + trains_id, NM, NM2, Zs, Zs2 = None, None, None, None, None + for field in fields[7:]: + if field.startswith("TI"): + trans_id = field[5:] + elif field.startswith("NM"): + if NM == None: + NM = int(field[5:]) + else: + NM2 = int(field[5:]) + elif field.startswith("Zs"): + if Zs == None: + Zs = field[5:] + else: + Zs2 = field[5:] + snp_included = (Zs != None or Zs2 != None) + + pos, pos2 = int(pos), int(pos2) + pos_right, pos2_right = get_right(pos, cigar), get_right(pos2, cigar2) + + if read_name not in db_dic: + unmapped += 1 + if snp_included: + snp_unmapped += 1 + continue + + maps = db_dic[read_name] + + found = False + found_at_first = False + + + if (aligner == "hisat-TLA" and RNA) : + found, index = pairedInMaps(chr, pos, pos_right, cigar, pos2, pos2_right, cigar2, NM, NM2, maps) + if (found): + if (index == 0): + found_at_first = True + else: + if [chr, pos, pos_right, cigar, pos2, pos2_right, cigar2, NM, NM2] in maps: + found = True + if maps.index([chr, pos, pos_right, cigar, pos2, pos2_right, cigar2, NM, NM2]) == 0: + found_at_first = True + + # DK - debugging purposes + if False and len(maps) > 0 and maps[0][-1] + maps[0][-2] < NM + NM2: + found = True + + if not found: + for idx, map in enumerate(maps): + if chr == map[0] and \ + pos == map[1] and \ + pos_right == map[2] and \ + get_cigar_chars(cigar) == get_cigar_chars(map[3]) and \ + pos2 == map[4] and \ + pos2_right == map[5] and \ + get_cigar_chars(cigar2) == get_cigar_chars(map[6]): + + pair_junctions = is_junction_pair(gtf_junctions_set, map[0], map[1], map[3], map[0], map[4], map[6]) + if True: + found_list = [False for i in range(len(pair_junctions))] + for j in range(len(pair_junctions)): + junction_str, is_gtf_junction = pair_junctions[j] + junction = to_junction(junction_str) + gtf_index = find_in_gtf_junctions(chr_dic, gtf_junctions, junction) + if gtf_index >= 0: + found_list[j] = True + found = not (False in found_list) + else: + found = False + + if found: + if idx == 0: + found_at_first = True + break + + if found: + print >> mapped_file, read_name + mapped += 1 + if snp_included: + snp_mapped += 1 + if len(maps) == 1: + unique_mapped += 1 + print >> uniq_mapped_file, read_name + if snp_included: + snp_unique_mapped += 1 + if found_at_first: + print >> first_mapped_file, read_name + first_mapped += 1 + if snp_included: + snp_first_mapped += 1 + + mapping_point += (1.0 / len(maps)) + else: + unmapped += 1 + if snp_included: + snp_unmapped += 1 + + file.close() + mapped_file.close() + uniq_mapped_file.close() + first_mapped_file.close() + + # DK - for debugging purposes + false_can_junctions, false_noncan_junctions = 0, 0 + for junction_str in temp_junctions: + if junction_str in temp_gtf_junctions: + continue + if is_canonical_junction(chr_dic, to_junction(junction_str)): + false_can_junctions += 1 + else: + false_noncan_junctions += 1 + print >> sys.stderr, "\t\t\tfalse junctions: %d (canonical), %d (non-canonical)" % (false_can_junctions, false_noncan_junctions) + + + return mapped, unique_mapped, first_mapped, unmapped, aligned, multi_aligned, \ + snp_mapped, snp_unique_mapped, snp_first_mapped, snp_unmapped, \ + len(temp_junctions), len(temp_gtf_junctions), mapping_point + + +""" +""" +def extract_mapped_unmapped(read_fname, mapped_id_fname, mapped_fname, unmapped_fname, read2_fname = "", mapped2_fname = "", unmapped2_fname = ""): + mapped_ids = set() + mapped_id_file = open(mapped_id_fname) + for line in mapped_id_file: + read_id = int(line[:-1]) + mapped_ids.add(read_id) + + mapped_id_file.close() + + def write_reads(read_fname, mapped_fname, unmapped_fname): + mapped_file = open(mapped_fname, "w") + unmapped_file = open(unmapped_fname, "w") + read_file = open(read_fname) + write = False + for line in read_file: + if line[0] == "@": + read_id = int(line[1:-1]) + write = read_id in mapped_ids + + if write: + print >> mapped_file, line[:-1] + else: + print >> unmapped_file, line[:-1] + + read_file.close() + mapped_file.close() + unmapped_file.close() + + write_reads(read_fname, mapped_fname, unmapped_fname) + if read2_fname != "": + assert mapped2_fname != "" + assert unmapped2_fname != "" + write_reads(read2_fname, mapped2_fname, unmapped2_fname) + + +""" +""" +def sql_execute(sql_db, sql_query): + sql_cmd = [ + "sqlite3", sql_db, + "-separator", "\t", + "%s;" % sql_query + ] + # print >> sys.stderr, sql_cmd + sql_process = subprocess.Popen(sql_cmd, stdout=subprocess.PIPE) + output = sql_process.communicate()[0][:-1] + return output + + +""" +""" +def create_sql_db(sql_db): + if os.path.exists(sql_db): + print >> sys.stderr, sql_db, "already exists!" + return + + columns = [ + ["id", "integer primary key autoincrement"], + ["genome", "text"], + ["head", "text"], + ["end_type", "text"], + ["type", "text"], + ["aligner", "text"], + ["version", "text"], + ["num_reads", "integer"], + ["mapped_reads", "integer"], + ["unique_mapped_reads", "integer"], + ["unmapped_reads", "integer"], + ["mapping_point", "real"], + ["snp_mapped_reads", "integer"], + ["snp_unique_mapped_reads", "integer"], + ["snp_unmapped_reads", "integer"], + ["time", "real"], + ["mem", "integer"], + ["true_gtf_junctions", "integer"], + ["temp_junctions", "integer"], + ["temp_gtf_junctions", "integer"], + ["host", "text"], + ["created", "text"], + ["cmd", "text"] + ] + + sql_create_table = "CREATE TABLE ReadCosts (" + for i in range(len(columns)): + name, type = columns[i] + if i != 0: + sql_create_table += ", " + sql_create_table += ("%s %s" % (name, type)) + sql_create_table += ");" + sql_execute(sql_db, sql_create_table) + + +""" +""" +def write_analysis_data(sql_db, genome_name, database_name): + if not os.path.exists(sql_db): + return + + aligners = [] + sql_aligners = "SELECT aligner FROM ReadCosts GROUP BY aligner" + output = sql_execute(sql_db, sql_aligners) + aligners = output.split() + + can_read_types = ["all", "M", "2M_gt_15", "2M_8_15", "2M_1_7", "gt_2M"] + tmp_read_types = [] + sql_types = "SELECT type FROM ReadCosts GROUP BY type" + output = sql_execute(sql_db, sql_types) + tmp_read_types = output.split() + + read_types = [] + for read_type in can_read_types: + if read_type in tmp_read_types: + read_types.append(read_type) + + for paired in [False, True]: + database_fname = genome_name + "_" + database_name + if paired: + end_type = "paired" + database_fname += "_paired" + else: + end_type = "single" + database_fname += "_single" + database_fname += ".analysis" + database_file = open(database_fname, "w") + print >> database_file, "end_type\ttype\taligner\tnum_reads\ttime\tmem\tmapped_reads\tunique_mapped_reads\tunmapped_reads\tmapping_point\ttrue_gtf_junctions\ttemp_junctions\ttemp_gtf_junctions" + for aligner in aligners: + for read_type in read_types: + sql_row = "SELECT end_type, type, aligner, num_reads, time, mem, mapped_reads, unique_mapped_reads, unmapped_reads, mapping_point, snp_mapped_reads, snp_unique_mapped_reads, snp_unmapped_reads, true_gtf_junctions, temp_junctions, temp_gtf_junctions FROM ReadCosts" + sql_row += " WHERE genome = '%s' and head = '%s' and aligner = '%s' and type = '%s' and end_type = '%s' ORDER BY created DESC LIMIT 1" % (genome_name, database_name, aligner, read_type, end_type) + output = sql_execute(sql_db, sql_row) + if output: + print >> database_file, output + + database_file.close() + + +""" +""" +def calculate_read_cost(single_end, + paired_end, + test_aligners, + fresh, + runtime_only, + baseChange, + verbose): + sql_db_name = "analysis.db" + if not os.path.exists(sql_db_name): + create_sql_db(sql_db_name) + + num_cpus = multiprocessing.cpu_count() + if num_cpus > 10: + num_threads = min(10, num_cpus) + desktop = False + else: + #num_threads = min(4, num_cpus) + num_threads = num_cpus - 1 + desktop = True + + data_base = "sim" + test_large_index = False + verbose = False + sql_write = True + readtypes = ["all", "M", "2M_gt_15", "2M_8_15", "2M_1_7", "gt_2M"] + + aligners = [ + # [aligner, two_step, index_type, aligner_version, addition_options] + # ["hisat", "", "", "", ""], + # ["hisat2", "", "", "204", ""], + # ["hisat2", "", "", "210", ""], + # ["hisat2", "", "snp", "203b", ""], + # ["hisat2", "", "snp", "210", ""], + # ["hisat2", "", "tran", "210", ""], + # ["hisat2", "", "snp_tran", "210", ""], + # ["hisat2", "", "", "210", ""], + ["hisat2", "", "", "", ""], + #["hisat2", "", "rep", "", ""], + # ["hisat2", "", "rep-100-300", "", ""], + # ["hisat2", "", "rep_mm", "", ""], + # ["hisat2", "", "", "", "--sensitive"], + # ["hisat2", "", "rep", "", "--sensitive"], + # ["hisat2", "", "", "", "--very-sensitive"], + # ["hisat2", "", "snp", "", ""], + # ["hisat2", "", "snp", "", "--sensitive"], + # ["hisat2", "", "snp_noht", "", ""], + # ["hisat2", "x2", "", "", ""], + # ["hisat2", "x1", "tran", "", ""], + # ["hisat2", "", "tran", "", ""], + # ["hisat2", "", "snp_tran", "", ""], + # ["hisat2", "x1", "snp_tran", "", ""], + # ["hisat2", "x1", "snp_tran_ercc", "", ""], + # ["tophat2", "gtfonly", "", "", ""], + # ["tophat2", "gtf", "", "", ""], + #["star", "", "", "", ""], + # ["star", "x2", "", "", ""], + # ["star", "gtf", "", "", ""], + # ["bowtie", "", "", "", ""], + #["bowtie2", "", "", "", ""], + # ["bowtie2", "", "", "", "-k 10"], + # ["bowtie2", "", "", "", "-k 1000 --extends 2000"], + # ["gsnap", "", "", "", ""], + # ["bwa", "mem", "", "", ""], + #["bwa", "mem", "", "", "-a"], + # ["hisat2", "", "snp", "", ""], + # ["hisat2", "", "tran", "", ""], + # ["hisat2", "", "snp_tran", "", ""], + # ["vg", "", "", "", ""], + # ["vg", "", "", "", "-M 10"], + # ["vg", "", "snp", "", ""], + # ["vg", "", "snp", "", "-M 10"], + # ["minimap2", "", "", "", ""], + ["hisat-TLA", "", "", "", ""], + ] + readtypes = ["all"] + verbose = True + debug = False + # sql_write = False + + cwd = os.getcwd() + if len(cwd.split("reads_")) > 1: + genome = cwd.split("reads_")[1].split("_")[0] + else: + genome = "genome" + RNA = (cwd.find("RNA") != -1) + + test_small = (genome != "genome") + + if runtime_only: + verbose = True + + chr_dic = read_genome("../../data/%s.fa" % genome) + gtf_junctions = extract_splice_sites("../../data/%s.gtf" % genome) + repeat_db, repeat_map = read_repeatdb("../../data/%s_rep.rep.info" % genome) + align_stat = [] + for paired in [False, True]: + if not paired and not single_end: + continue + if paired and not paired_end: + continue + for readtype in readtypes: + if paired: + base_fname = data_base + "_paired" + type_sam_fname = base_fname + "_" + readtype + ".sam" + type_read1_fname = base_fname + "_1_" + readtype + ".fa" + type_read2_fname = base_fname + "_2_" + readtype + ".fa" + type_junction_fname = base_fname + "_" + readtype + ".junc" + else: + base_fname = data_base + "_single" + type_sam_fname = base_fname + "_" + readtype + ".sam" + type_read1_fname = base_fname + "_" + readtype + ".fa" + type_read2_fname = "" + type_junction_fname = base_fname + "_" + readtype + ".junc" + + assert os.path.exists(type_sam_fname) and os.path.exists(type_junction_fname) + numreads = 0 + type_sam_file = open(type_sam_fname) + for line in type_sam_file: + numreads += 1 + type_sam_file.close() + if numreads <= 0: + continue + print >> sys.stderr, "%s\t%d" % (readtype, numreads) + + junctions, junctions_set = [], set() + type_junction_file = open(type_junction_fname) + for line in type_junction_file: + chr, left, right = line[:-1].split() + left, right = int(left), int(right) + junctions.append([chr, left, right]) + junctions_set.add(to_junction_str([chr, left, right])) + + type_junction_file.close() + + aligner_bin_base = "../../../aligners/bin" + def get_aligner_version(aligner, version): + version = "" + if aligner == "hisat2" or \ + aligner == "hisat" or \ + aligner == "bowtie" or \ + aligner == "bowtie2": + if version: + cmd = ["%s/%s_%s/%s" % (aligner_bin_base, aligner, version, aligner)] + else: + cmd = ["%s/%s" % (aligner_bin_base, aligner)] + cmd += ["--version"] + cmd_process = subprocess.Popen(cmd, stdout=subprocess.PIPE) + version = cmd_process.communicate()[0][:-1].split("\n")[0] + version = version.split()[-1] + elif aligner == "tophat2": + cmd = ["%s/tophat" % (aligner_bin_base)] + cmd += ["--version"] + cmd_process = subprocess.Popen(cmd, stdout=subprocess.PIPE) + version = cmd_process.communicate()[0][:-1].split()[-1] + elif aligner in ["star", "starx2"]: + version = "2.4.2a" + elif aligner == "gsnap": + cmd = ["%s/gsnap" % (aligner_bin_base)] + cmd_process = subprocess.Popen(cmd, stderr=subprocess.PIPE) + version = cmd_process.communicate()[1][:-1].split("\n")[0] + version = version.split()[2] + elif aligner == "bwa": + if version: + cmd = ["%s/bwa_%s/bwa" % (aligner_bin_base, version)] + else: + cmd = ["%s/bwa" % (aligner_bin_base)] + cmd_process = subprocess.Popen(cmd, stderr=subprocess.PIPE) + version = cmd_process.communicate()[1][:-1].split("\n")[2] + version = version.split()[1] + elif aligner == "vg": + cmd = ["%s/vg" % (aligner_bin_base)] + cmd_process = subprocess.Popen(cmd, stderr=subprocess.PIPE) + version = cmd_process.communicate()[1][:-1].split("\n")[0] + version = version.split()[5] + elif aligner == "minimap2": + cmd = ["%s/minimap2" % (aligner_bin_base)] + cmd += ["--version"] + cmd_process = subprocess.Popen(cmd, stdout=subprocess.PIPE) + version = cmd_process.communicate()[0][:-1].split("\n")[0] + + return version + + index_base = "../../../indexes" + index_add = "" + if genome != "genome": + index_add = "_" + genome + def get_aligner_cmd(RNA, aligner, type, index_type, version, options, read1_fname, read2_fname, out_fname, cmd_idx = 0): + cmd = ["/usr/bin/time"] + if osx_mode: + cmd += ['-l'] + if aligner == "hisat2": + if version: + cmd += ["%s/hisat2_%s/hisat2" % (aligner_bin_base, version)] + else: + cmd += ["%s/hisat2" % (aligner_bin_base)] + if num_threads > 1: + cmd += ["-p", str(num_threads)] + cmd += ["-f"] + # cmd += ["-k", "100"] + # cmd += ["--max-seeds", "100"] + # cmd += ["--score-min", "C,-100"] + # cmd += ["--pen-cansplice", "0"] + # cmd += ["--pen-noncansplice", "12"] + # cmd += ["--pen-intronlen", "G,-8,1"] + # cmd += ["--metrics", "1", + # "--metrics-file", "metrics.out"] + + if not RNA: + cmd += ["--no-spliced-alignment"] + + if type in ["x1", "x2"]: + cmd += ["--no-temp-splicesite"] + + # cmd += ["--no-anchorstop"] + if version == "" or \ + (version != "" and int(version) >= 210): + cmd += ["--new-summary", + "--summary-file", out_fname + ".summary"] + + if version == "" or int(version) >= 220: + cmd += ["--repeat"] + + # cmd += ["--dta"] + # cmd += ["--dta-cufflinks"] + + if options != "": + cmd += options.split(' ') + + if type == "x2": + if cmd_idx == 0: + cmd += ["--novel-splicesite-outfile"] + else: + cmd += ["--novel-splicesite-infile"] + cmd += ["splicesites.txt"] + + # "--novel-splicesite-infile", + # "../splicesites.txt", + # "--rna-strandness", + # "FR", + if version: + index_cmd = "%s/HISAT2_%s%s/" % (index_base, version, index_add) + genome + else: + index_cmd = "%s/HISAT2%s/" % (index_base, index_add) + genome + if index_type: + index_cmd += ("_" + index_type) + cmd += [index_cmd] + if paired: + cmd += ["-1", read1_fname, + "-2", read2_fname] + else: + cmd += [read1_fname] + + elif aligner == "hisat-TLA": + + if version: + cmd += ["%s/hisat2_%s/hisat2" % (aligner_bin_base, version)] + else: + cmd += ["%s/hisat2" % (aligner_bin_base)] + if num_threads > 1: + cmd += ["-p", str(num_threads)] + cmd += ["-f"] + if not RNA: + cmd += ["--no-spliced-alignment"] + if type in ["x1", "x2"]: + cmd += ["--no-temp-splicesite"] + if version == "" or \ + (version != "" and int(version) >= 210): + cmd += ["--new-summary", + "--summary-file", out_fname + ".summary"] + if version == "" or int(version) >= 220: + cmd += ["--repeat"] + if paired: + cmd += ["-1", read1_fname, + "-2", read2_fname] + else: + cmd += ["-U", read1_fname] + + cmd += ["--TLA"] + cmd += ["--base-change"] + cmd += [baseChange] + cmd += ["--expand-repeat"] + + index1_cmd = "%s/HISAT2%s/" % (index_base, index_add) + genome + '_' + baseChange + if index_type: + index1_cmd += ("_" + index_type) + cmd += ["--index1"] + cmd += [index1_cmd] + + index2_cmd = "%s/HISAT2%s/" % (index_base, index_add) + genome + '_' + reverse_complement(baseChange[0]) + reverse_complement(baseChange[1]) + if index_type: + index2_cmd += ("_" + index_type) + cmd += ["--index2"] + cmd += [index2_cmd] + + cmd += ["--reference"] + cmd += ["../../../data/%s.fa" % genome] + + + + elif aligner == "hisat": + cmd += ["%s/hisat" % (aligner_bin_base)] + if num_threads > 1: + cmd += ["-p", str(num_threads)] + cmd += ["-f"] + # cmd += ["-k", "5"] + # cmd += ["--score-min", "C,-18"] + if version != "": + version = int(version) + else: + version = sys.maxint + + if not RNA: + cmd += ["--no-spliced-alignment"] + + if type in ["x1", "x2"]: + cmd += ["--no-temp-splicesite"] + + """ + cmd += ["--rdg", "100,100", + "--rfg", "100,100"] + """ + + if type == "x2": + if cmd_idx == 0: + cmd += ["--novel-splicesite-outfile"] + else: + cmd += ["--novel-splicesite-infile"] + cmd += ["splicesites.txt"] + + # "--novel-splicesite-infile", + # "../splicesites.txt", + # "--rna-strandness", + # "FR", + cmd += ["%s/HISAT%s/" % (index_base, index_add) + genome] + if paired: + cmd += ["-1", read1_fname, + "-2", read2_fname] + else: + cmd += [read1_fname] + elif aligner == "tophat2": + cmd += ["%s/tophat" % (aligner_bin_base)] + if num_threads > 1: + cmd += ["-p", str(num_threads)] + if type.find("gtf") != -1: + cmd += ["--transcriptome-index=%s/HISAT%s/gtf/%s" % (index_base, index_add, genome)] + if type == "gtfonly": + cmd += ["--transcriptome-only"] + cmd += ["--read-edit-dist", "3"] + cmd += ["--no-sort-bam"] + cmd += ["--read-realign-edit-dist", "0"] + cmd += ["--keep-tmp", + "%s/HISAT%s/" % (index_base, index_add) + genome, + read1_fname] + if paired: + cmd += [read2_fname] + elif aligner == "star": + cmd += ["%s/STAR" % (aligner_bin_base)] + if num_threads > 1: + cmd += ["--runThreadN", str(num_threads)] + cmd += ["--genomeDir"] + if cmd_idx == 0: + if type == "gtf": + cmd += ["%s/STAR%s/gtf" % (index_base, index_add)] + else: + cmd += ["%s/STAR%s" % (index_base, index_add)] + else: + assert cmd_idx == 1 + cmd += ["."] + + if desktop: + cmd += ["--genomeLoad", "NoSharedMemory"] + else: + cmd += ["--genomeLoad", "LoadAndKeep"] + if type == "x2": + if cmd_idx == 1: + cmd += ["--alignSJDBoverhangMin", "1"] + cmd += ["--readFilesIn", + read1_fname] + if paired: + cmd += [read2_fname] + if paired: + cmd += ["--outFilterMismatchNmax", "6"] + else: + cmd += ["--outFilterMismatchNmax", "3"] + elif aligner == "bowtie": + cmd += ["%s/bowtie" % (aligner_bin_base)] + if num_threads > 1: + cmd += ["-p", str(num_threads)] + cmd += ["-f", + "--sam", + "-k", "10"] + cmd += ["-n", "3"] + if paired: + cmd += ["-X", "500"] + cmd += ["%s/Bowtie%s/" % (index_base, index_add) + genome] + if paired: + cmd += ["-1", read1_fname, + "-2", read2_fname] + else: + cmd += [read1_fname] + elif aligner == "bowtie2": + if version: + cmd += ["%s/bowtie2_%s/bowtie2" % (aligner_bin_base, version)] + else: + cmd += ["%s/bowtie2" % (aligner_bin_base)] + if num_threads > 1: + cmd += ["-p", str(num_threads)] + cmd += ["-f"] + if options != "": + cmd += options.split(' ') + if version: + cmd += ["-x %s/Bowtie2_%s%s/" % (index_base, version, index_add) + genome] + else: + cmd += ["-x %s/Bowtie2%s/" % (index_base, index_add) + genome] + if paired: + cmd += ["-1", read1_fname, + "-2", read2_fname] + else: + cmd += [read1_fname] + elif aligner == "gsnap": + cmd += ["%s/gsnap" % (aligner_bin_base), + "-A", + "sam"] + if num_threads > 1: + cmd += ["-t", str(num_threads)] + cmd += ["--max-mismatches=3", + "-D", "%s/GSNAP%s" % (index_base, index_add), + "-N", "1", + "-d", genome, + read1_fname] + if paired: + cmd += [read2_fname] + elif aligner == "bwa": + if version: + cmd += ["%s/bwa_%s/bwa" % (aligner_bin_base, version)] + else: + cmd += ["%s/bwa" % (aligner_bin_base)] + if type in ["mem", "aln"]: + cmd += [type] + elif type == "sw": + cmd += ["bwa" + type] + if num_threads > 1: + cmd += ["-t", str(num_threads)] + if options != "": + cmd += options.split(' ') + # if paired: + # cmd += ["-T", "60"] + if version: + cmd += ["%s/BWA_%s%s/%s.fa" % (index_base, version, index_add, genome)] + else: + cmd += ["%s/BWA%s/%s.fa" % (index_base, index_add, genome)] + cmd += [read1_fname] + if paired: + cmd += [read2_fname] + elif aligner == "vg": + # vg map -d 22 -t 6 -M 10 -f ../sim-1.fa -f ../sim-2.fa > result.sam.gam + cmd += ["%s/vg" % (aligner_bin_base)] + cmd += ["map"] + cmd += ["-t", str(num_threads)] + cmd += ["--surject-to", "sam"] + index_cmd = "%s/VG%s/" % (index_base, index_add) + genome + if index_type: + index_cmd += ("_" + index_type) + + if options != "": + cmd += options.split(' ') + + cmd += ["-d", index_cmd] + + cmd += ["-f", read1_fname] + if paired: + cmd += ["-f", read2_fname] + elif aligner == "minimap2": + # minimap2 -a -x sr 22.mmi sim_1.fa sim_2.fa > result.sam + cmd += ["%s/minimap2" % (aligner_bin_base)] + cmd += ["-a"] + cmd += ["-x", "sr"] + index_cmd = "%s/minimap2%s/" % (index_base, index_add) + genome + if index_type: + index_cmd += ("_" + index_type) + index_cmd += ".mmi" + cmd += [index_cmd] + cmd += [read1_fname] + if paired: + cmd += [read2_fname] + else: + assert False + + return cmd + + for aligner, type, index_type, version, options in aligners: + skip = False + if len(test_aligners) > 0: + skip = True + for test_aligner in test_aligners: + if aligner == test_aligner: + skip = False + if skip: + continue + + aligner_name = aligner + type + if version != "": + aligner_name += ("_%s" % version) + if aligner == "hisat2" and index_type != "": + aligner_name += ("_" + index_type) + if aligner == "vg" and index_type != "": + aligner_name += ("_" + index_type) + + two_step = (aligner == "tophat2" or type == "x2" or (aligner in ["hisat2", "hisat"] and type == "")) + if RNA and readtype != "M": + if aligner in ["bowtie", "bowtie2", "bwa"]: + continue + if readtype != "all": + if two_step: + continue + if not RNA and readtype != "all": + continue + + print >> sys.stderr, "\t%s\t%s" % (aligner_name, str(datetime.now())) + if options != "": + option_name = options.replace(' ', '').replace('-', '').replace(',', '') + aligner_name = aligner_name + '_' + option_name + if paired: + aligner_dir = aligner_name + "_paired" + else: + aligner_dir = aligner_name + "_single" + + if fresh and os.path.exists(aligner_dir): + os.system("rm -rf %s" % aligner_dir) + + if not os.path.exists(aligner_dir): + os.mkdir(aligner_dir) + + os.chdir(aligner_dir) + + out_fname = base_fname + "_" + readtype + ".sam" + out_fname2 = out_fname + "2" + duration = -1.0 + mem_usage = '0' + if not os.path.exists(out_fname): + if not os.path.exists("../one.fa") or not os.path.exists("../two.fa"): + os.system("head -400 ../%s_1.fa > ../one.fa" % (data_base)) + os.system("head -400 ../%s_2.fa > ../two.fa" % (data_base)) + + if runtime_only: + out_fname = "/dev/null" + out_fname2 = "/dev/null" + + if not two_step: + align_stat.append([readtype, aligner_name]) + + # dummy commands for caching index and simulated reads + loading_time = 0 + if aligner != "tophat2": + for i in range(3): + dummy_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../one.fa", "../two.fa", "/dev/null") + start_time = datetime.now() + if verbose: + print >> sys.stderr, start_time, "\t", " ".join(dummy_cmd) + if aligner in ["hisat2", "hisat", "bowtie", "bowtie2", "gsnap", "bwa"]: + proc = subprocess.Popen(dummy_cmd, stdout=open("/dev/null", "w"), stderr=subprocess.PIPE) + else: + proc = subprocess.Popen(dummy_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + proc.communicate() + finish_time = datetime.now() + duration = finish_time - start_time + duration = duration.total_seconds() + if verbose: + print >> sys.stderr, finish_time, "duration:", duration + loading_time = duration + + # Align all reads + aligner_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../" + type_read1_fname, "../" + type_read2_fname, out_fname) + start_time = datetime.now() + if verbose: + print >> sys.stderr, "\t", start_time, " ".join(aligner_cmd) + if aligner in ["hisat2", "hisat", "bowtie", "bowtie2", "gsnap", "bwa", "vg", "minimap2", "hisat-TLA"]: + proc = subprocess.Popen(aligner_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE) + else: + proc = subprocess.Popen(aligner_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + _, mem_usage = proc.communicate() + mem_usage = parse_mem_usage(mem_usage) + finish_time = datetime.now() + duration = finish_time - start_time + duration = duration.total_seconds() - loading_time + if duration < 0.1: + duration = 0.1 + if verbose: + print >> sys.stderr, "\t", finish_time, "finished:", duration + + if debug and aligner == "hisat2": + os.system("cat metrics.out") + print >> sys.stderr, "\ttime: %.4f" % (duration) + + if aligner == "star" and type in ["", "gtf"]: + os.system("mv Aligned.out.sam %s" % out_fname) + elif aligner in ["hisat2", "hisat"] and type == "x2": + aligner_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../" + type_read1_fname, "../" + type_read2_fname, out_fname, 1) + start_time = datetime.now() + if verbose: + print >> sys.stderr, "\t", start_time, " ".join(aligner_cmd) + proc = subprocess.Popen(aligner_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE) + proc.communicate() + finish_time = datetime.now() + duration += (finish_time - start_time).total_seconds() + duration -= loading_time + if duration < 0.1: + duration = 0.1 + if verbose: + print >> sys.stderr, "\t", finish_time, "finished:", duration + elif aligner == "star" and type == "x2": + assert os.path.exists("SJ.out.tab") + os.system("awk 'BEGIN {OFS=\"\t\"; strChar[0]=\".\"; strChar[1]=\"+\"; strChar[2]=\"-\";} {if($5>0){print $1,$2,$3,strChar[$4]}}' SJ.out.tab > SJ.out.tab.Pass1.sjdb") + for file in os.listdir("."): + if file in ["SJ.out.tab.Pass1.sjdb", "genome.fa"]: + continue + os.remove(file) + star_index_cmd = "%s/STAR --genomeDir ./ --runMode genomeGenerate --genomeFastaFiles ../../../data/%s.fa --sjdbFileChrStartEnd SJ.out.tab.Pass1.sjdb --sjdbOverhang 99 --runThreadN %d" % (aligner_bin_base, genome, num_threads) + if verbose: + print >> sys.stderr, "\t", datetime.now(), star_index_cmd + os.system(star_index_cmd) + if verbose: + print >> sys.stderr, "\t", datetime.now(), " ".join(dummy_cmd) + proc = subprocess.Popen(dummy_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + proc.communicate() + if verbose: + print >> sys.stderr, "\t", datetime.now(), "finished" + aligner_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../" + type_read1_fname, "../" + type_read2_fname, out_fname, 1) + start_time = datetime.now() + if verbose: + print >> sys.stderr, "\t", start_time, " ".join(aligner_cmd) + proc = subprocess.Popen(aligner_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + proc.communicate() + finish_time = datetime.now() + duration += (finish_time - start_time).total_seconds() + duration -= loading_time + if duration < 0.1: + duration = 0.1 + if verbose: + print >> sys.stderr, "\t", finish_time, "finished:", duration + os.system("mv Aligned.out.sam %s" % out_fname) + elif aligner == "tophat2": + os.system("samtools sort -n tophat_out/accepted_hits.bam accepted_hits; samtools view -h accepted_hits.bam > %s" % out_fname) + elif aligner == "vg": + index_name = "%s/VG%s/" % (index_base, index_add) + genome + if index_type: + index_name += ("_" + index_type) + + if aligner in ["gsnap", "tophat2"]: + os.system("tar cvzf %s.tar.gz %s &> /dev/null" % (out_fname, out_fname)) + + if runtime_only: + print >> sys.stderr, "\t\t\tMemory Usage: %dMB" % (int(mem_usage) / 1024) + os.chdir("..") + continue + + if not os.path.exists(out_fname2): + debug_dic = {} + pid_list = [] + if paired: + if mp_mode: + for i in xrange(mp_num): + p = Process(target=extract_pair, args=(out_fname, out_fname2, chr_dic, RNA, aligner, version, repeat_db, repeat_map, debug_dic, i)) + pid_list.append(p) + p.start() + + for p in pid_list: + p.join() + + # merge + os.system("mv %s %s" % (out_fname2 + ".0", out_fname2)) + for i in xrange(1, mp_num): + os.system("cat %s >> %s" % (out_fname2 + "." + str(i), out_fname2)) + os.system("rm %s" % (out_fname2 + "." + str(i))) + + else: + extract_pair(out_fname, out_fname2, chr_dic, RNA, aligner, version, repeat_db, repeat_map, debug_dic, -1) + + + else: + if mp_mode: + # Prepare queues + for i in xrange(mp_num): + p = Process(target=extract_single, args=(out_fname, out_fname2, chr_dic, aligner, version, repeat_db, repeat_map, debug_dic, i)) + pid_list.append(p) + p.start() + + # wait + for p in pid_list: + p.join() + + # merge + os.system("mv %s %s" % (out_fname2 + ".0", out_fname2)) + for i in xrange(1, mp_num): + os.system("cat %s >> %s" % (out_fname2 + "." + str(i), out_fname2)) + os.system("rm %s" % (out_fname2 + "." + str(i))) + + else: + extract_single(out_fname, out_fname2, chr_dic, aligner, version, repeat_db, repeat_map, debug_dic, -1) + + for readtype2 in readtypes: + if not two_step and readtype != readtype2: + continue + + type_sam_fname2 = base_fname + "_" + readtype2 + ".sam" + if os.path.exists(type_sam_fname2 + ".done"): + continue + + if paired: + type_read_fname2 = base_fname + "_1_" + readtype2 + ".fa" + else: + type_read_fname2 = base_fname + "_" + readtype2 + ".fa" + mapped_id_fname = base_fname + "_" + readtype2 + ".read_id" + if paired: + mapped, unique_mapped, first_mapped, unmapped, aligned, multi_aligned, \ + snp_mapped, snp_unique_mapped, snp_first_mapped, snp_unmapped, \ + temp_junctions, temp_gtf_junctions, mapping_point \ + = compare_paired_sam(RNA, out_fname2, "../" + type_sam_fname2, mapped_id_fname, chr_dic, junctions, junctions_set, gtf_junctions, aligner) + else: + mapped, unique_mapped, first_mapped, unmapped, aligned, multi_aligned, \ + snp_mapped, snp_unique_mapped, snp_first_mapped, snp_unmapped, \ + temp_junctions, temp_gtf_junctions, mapping_point \ + = compare_single_sam(RNA, out_fname2, "../" + type_sam_fname2, mapped_id_fname, chr_dic, junctions, junctions_set, gtf_junctions, aligner) + proc = subprocess.Popen(["wc", "-l", "../" + type_read_fname2], stdout=subprocess.PIPE) + out = proc.communicate()[0] + numreads = int(out.split()[0]) / 2 + assert mapped + unmapped == numreads + + if two_step: + print >> sys.stderr, "\t\t%s" % readtype2 + print >> sys.stderr, "\t\taligned: %d, multi aligned: %d" % (aligned, multi_aligned) + print >> sys.stderr, "\t\tcorrectly mapped: %d (%.2f%%) mapping_point: %.2f" % (mapped, float(mapped) * 100.0 / numreads, mapping_point * 100.0 / numreads) + print >> sys.stderr, "\t\tcorrectly mapped at first: %d (%.2f%%)" % (first_mapped, float(first_mapped) * 100.0 / numreads) + print >> sys.stderr, "\t\tuniquely and correctly mapped: %d (%.2f%%)" % (unique_mapped, float(unique_mapped) * 100.0 / numreads) + snp_numreads = snp_mapped + snp_unmapped + if snp_numreads > 0: + print >> sys.stderr, "\t\t\t\tSNP: reads: %d" % (snp_numreads) + print >> sys.stderr, "\t\t\t\tSNP: correctly mapped: %d (%.2f%%)" % (snp_mapped, float(snp_mapped) * 100.0 / snp_numreads) + print >> sys.stderr, "\t\t\t\tSNP: correctly mapped at first: %d (%.2f%%)" % (snp_first_mapped, float(snp_first_mapped) * 100.0 / snp_numreads) + print >> sys.stderr, "\t\t\t\tSNP: uniquely and correctly mapped: %d (%.2f%%)" % (snp_unique_mapped, float(snp_unique_mapped) * 100.0 / snp_numreads) + if readtype == readtype2: + print >> sys.stderr, "\t\t\t%d reads per sec (all)" % (numreads / max(1.0, duration)) + if RNA: + print >> sys.stderr, "\t\tjunc. sensitivity %d / %d (%.2f%%), junc. accuracy: %d / %d (%.2f%%)" % \ + (temp_gtf_junctions, len(junctions), float(temp_gtf_junctions) * 100.0 / max(1, len(junctions)), \ + temp_gtf_junctions, temp_junctions, float(temp_gtf_junctions) * 100.0 / max(1, temp_junctions)) + + print >> sys.stderr, "\t\t\tMemory Usage: %dMB" % (int(mem_usage) / 1024) + + if duration > 0.0: + if sql_write and os.path.exists("../" + sql_db_name): + if paired: + end_type = "paired" + else: + end_type = "single" + + mem_used = int(mem_usage) / 1024 + sql_insert = "INSERT INTO \"ReadCosts\" VALUES(NULL, '%s', '%s', '%s', '%s', '%s', '%s', %d, %d, %d, %d, %f, %f, %d, %d, %d, %d, %d, %d, %d, '%s', datetime('now', 'localtime'), '%s');" % \ + (genome, data_base, end_type, readtype2, aligner_name, get_aligner_version(aligner, version), numreads, mapped, unique_mapped, unmapped, mapping_point, snp_mapped, snp_unique_mapped, snp_unmapped, duration, mem_used, len(junctions), temp_junctions, temp_gtf_junctions, platform.node(), " ".join(aligner_cmd)) + sql_execute("../" + sql_db_name, sql_insert) + + if two_step: + align_stat.append([readtype2, aligner_name, numreads, duration, mem_used, mapped, unique_mapped, unmapped, mapping_point, snp_mapped, snp_unique_mapped, snp_unmapped, len(junctions), temp_junctions, temp_gtf_junctions]) + else: + align_stat[-1].extend([numreads, duration, mem_used, mapped, unique_mapped, unmapped, mapping_point, snp_mapped, snp_unique_mapped, snp_unmapped, len(junctions), temp_junctions, temp_gtf_junctions]) + + os.system("touch %s.done" % type_sam_fname2) + + os.chdir("..") + + print >> sys.stdout, "\t".join(["type", "aligner", "all", "all_time", "mem", "mapped", "unique_mapped", "unmapped", "mapping point", "snp_mapped", "snp_unique_mapped", "snp_unmapped", "true_gtf_junctions", "temp_junctions", "temp_gtf_junctions"]) + for line in align_stat: + outstr = "" + for item in line: + if outstr != "": + outstr += "\t" + outstr += str(item) + print >> sys.stdout, outstr + + if os.path.exists(sql_db_name): + write_analysis_data(sql_db_name, genome, data_base) + + + +""" +""" +if __name__ == "__main__": + parser = ArgumentParser( + description='test HISAT2, and compare HISAT2 with other popular aligners such as TopHat2, STAR, Bowtie1/2, GSNAP, BWA-mem, etc.') + parser.add_argument('--single-end', + dest='paired_end', + action='store_false', + help='run single-end only') + parser.add_argument('--paired-end', + dest='single_end', + action='store_false', + help='run paired_end only') + parser.add_argument('--aligner-list', + dest='aligner_list', + type=str, + default="", + help='comma-separated list of aligners (e.g. hisat2,bowtie2,bwa') + parser.add_argument('--fresh', + dest='fresh', + action='store_true', + help='delete existing alignment related directories (e.g. hisat2_single)') + parser.add_argument('--runtime-only', '--runtime', + dest='runtime_only', + action='store_true', + help='run programs without evaluation') + parser.add_argument('-p', '--multi-process', + dest='mp_num', + action='store', + type=int, + default=1, + help='Use multiple process mode') + parser.add_argument('--base-change', + dest='baseChange', + type=str, + default="", + help='base change for Hisat-TLA') + parser.add_argument('-v', '--verbose', + dest='verbose', + action='store_true', + help='also print some statistics to stderr') + + args = parser.parse_args() + + aligners = [] + for aligner in args.aligner_list.split(','): + if aligner == "": + continue + aligners.append(aligner) + + mp_num = args.mp_num + mp_mode = (mp_num > 1) + + calculate_read_cost(args.single_end, + args.paired_end, + aligners, + args.fresh, + args.runtime_only, + args.baseChange, + args.verbose) diff --git a/evaluation/simulation/init.py b/evaluation/simulation/init.py new file mode 100644 index 0000000..aa7b3f8 --- /dev/null +++ b/evaluation/simulation/init.py @@ -0,0 +1,343 @@ +#!/usr/bin/env python + +import sys, os +import string, re + +use_message = ''' +''' + +def extract_pair(RNA): + read_dic = {} + pair_reported = set() + + out_file = open("sim_paired.sam", "w") + hits_file = open("sim.sam") + for line in hits_file: + if line.startswith('@'): + continue + + fields = line[:-1].split() + read_id, flag, chr1, pos1, mapQ, cigar1, chr2, pos2 = fields[:8] + + flag = int(flag) + assert flag & 0x4 == 0 + pos1, pos2 = int(pos1), int(pos2) + + TI, NM1, Zs1 = "", "", "" + for field in fields[11:]: + if field.startswith("TI"): + TI = "\t" + field + elif field.startswith("NM"): + NM1 = "\t" + field + elif field.startswith("Zs"): + Zs1 = "\t" + field + assert NM1 != "" + assert chr1 == chr2 + + me = "%s\t%s\t%d" % (read_id, chr1, pos1) + partner = "%s\t%s\t%d" % (read_id, chr2, pos2) + if partner in read_dic: + maps = read_dic[partner] + for map in maps: + if map[0] == me: + cigar2, NM2, Zs2 = map[1:4] + if int(pos2) > int(pos1): + p_str = "%s\t%s\t%d\t%s\t%s\t%d\t%s%s%s%s%s%s" % \ + (read_id, chr1, pos1, cigar1, chr2, pos2, cigar2, TI, NM1, NM2, Zs1, Zs2) + else: + p_str = "%s\t%s\t%d\t%s\t%s\t%d\t%s%s%s%s%s%s" % \ + (read_id, chr2, pos2, cigar2, chr1, pos1, cigar1, TI, NM2, NM1, Zs2, Zs1) + + if p_str not in pair_reported: + pair_reported.add(p_str) + print >> out_file, p_str + + if not me in read_dic: + read_dic[me] = [] + + read_dic[me].append([partner, cigar1, NM1, Zs1]) + + + hits_file.close() + out_file.close() + + +def init_reads(read_dir, RNA): + sim1_file = open("sim_1.sam", 'w') + sim2_file = open("sim_2.sam", 'w') + + for line in open("sim.sam"): + if line.startswith('@'): + continue + + fields = line[:-1].split('\t') + read_id, flag, chr, pos, _, cigar = fields[:6] + left_read = int(flag) < 128 + + NM, TI, Zs = "", "", "" + for field in fields: + if field.startswith("NM"): + NM = "\t" + field + elif field.startswith("TI"): + TI = "\t" + field + elif field.startswith("Zs"): + Zs = "\t" + field + + print >> (sim1_file if left_read else sim2_file), \ + "%s\t%s\t%s\t%s%s%s%s" % (read_id, chr, pos, cigar, TI, NM, Zs) + + sim1_file.close() + sim2_file.close() + extract_pair(RNA) + + +def to_junction_str(junction): + return "%s-%d-%d" % (junction[0], junction[1], junction[2]) + +def to_junction(junction_str): + fields = junction_str.split("-") + if len(fields) > 3: + chr, left, right = "-".join(fields[:-2]), fields[-2], fields[-1] + else: + assert len(fields) == 3 + chr, left, right = fields + + return [chr, int(left), int(right)] + +def junction_cmp(a, b): + if a[0] != b[0]: + if a[0] < b[0]: + return -1 + else: + return 1 + + if a[1] != b[1]: + if a[1] < b[1]: + return -1 + else: + return 1 + + if a[2] != b[2]: + if a[2] < b[2]: + return -1 + else: + return 1 + + return 0 + +def classify_reads(RNA): + if RNA: + readtypes = ["all", "M", "2M_gt_15", "2M_8_15", "2M_1_7", "gt_2M"] + else: + readtypes = ["all" ,"M"] + + readtype_order = {} + for i in range(len(readtypes)): + readtype = readtypes[i] + assert readtype not in readtype_order + readtype_order[readtype] = i + + for paired in [False, True]: + for readtype in readtypes: + if paired: + base_fname = "sim_paired" + type_sam_fname = base_fname + "_" + readtype + ".sam" + type_read1_fname = base_fname + "_1_" + readtype + ".fa" + type_read2_fname = base_fname + "_2_" + readtype + ".fa" + type_junction_fname = base_fname + "_" + readtype + ".junc" + else: + base_fname = "sim_single" + type_sam_fname = base_fname + "_" + readtype + ".sam" + type_read1_fname = base_fname + "_" + readtype + ".fa" + type_read2_fname = "" + type_junction_fname = base_fname + "_" + readtype + ".junc" + + if os.path.exists(type_sam_fname) and \ + os.path.exists(type_junction_fname): + continue + + read_ids = set() + junctions = [] + type_sam_file = open(type_sam_fname, "w") + if paired: + sam_file = open("sim_paired.sam") + else: + sam_file = open("sim_1.sam") + + cigar_re = re.compile('\d+\w') + def get_read_type(cigar): + cigars = cigar_re.findall(cigar) + + def get_cigar_chars_MN(cigars): + cigars = cigar_re.findall(cigars) + cigar_chars = "" + for cigar in cigars: + cigar_op = cigar[-1] + if cigar_op in "MN": + if cigar_chars == "" or cigar_chars[-1] != cigar_op: + cigar_chars += cigar_op + + return cigar_chars + + cigar_str = get_cigar_chars_MN(cigar) + if cigar_str == "M": + return cigar_str + elif cigar_str == "MNM": + assert len(cigars) >= 3 + left_anchor = 0 + for ci in range(len(cigars)): + c = cigars[ci][-1] + c_len = int(cigars[ci][:-1]) + if c in "MI": + left_anchor += c_len + else: + break + assert left_anchor > 0 + right_anchor = 0 + for ci in reversed(range(len(cigars))): + c = cigars[ci][-1] + c_len = int(cigars[ci][:-1]) + if c in "MI": + right_anchor += c_len + else: + break + assert right_anchor > 0 + min_anchor = min(left_anchor, right_anchor) + if min_anchor > 15: + return "2M_gt_15" + elif min_anchor >= 8 and min_anchor <= 15: + return "2M_8_15" + elif min_anchor >= 1 and min_anchor <= 7: + return "2M_1_7" + else: + assert False + else: + assert cigar_str not in ["M", "MNM"] + return "gt_2M" + + # chr and pos are assumed to be integers + def get_junctions(chr, pos, cigar_str): + junctions = [] + right_pos = pos + cigars = cigar_re.findall(cigar_str) + cigars = [[int(cigars[i][:-1]), cigars[i][-1]] for i in range(len(cigars))] + for i in range(len(cigars)): + length, cigar_op = cigars[i] + if cigar_op == "N": + left, right = right_pos - 1, right_pos + length + + if i > 0 and cigars[i-1][1] in "ID": + if cigars[i-1][1] == "I": + left += cigars[i-1][0] + else: + left -= cigars[i-1][0] + if i + 1 < len(cigars) and cigars[i+1][1] in "ID": + if cigars[i+1][1] == "I": + right -= cigars[i+1][0] + else: + right += cigars[i+1][0] + + junctions.append([chr, left, right]) + + if cigar_op in "MND": + right_pos += length + + return junctions + + for line in sam_file: + fields = line[:-1].split() + if paired: + read_id, chr, pos, cigar, chr2, pos2, cigar2 = fields[:7] + else: + read_id, chr, pos, cigar = fields[:4] + + read_id = int(read_id) + readtype2 = get_read_type(cigar) + if paired: + readtype3 = get_read_type(cigar2) + assert readtype2 in readtype_order + assert readtype3 in readtype_order + if readtype_order[readtype2] < readtype_order[readtype3]: + readtype2 = readtype3 + + if readtype == "all" or readtype == readtype2: + read_ids.add(read_id) + print >> type_sam_file, line[:-1] + junctions += get_junctions(chr, int(pos), cigar) + if paired: + junctions += get_junctions(chr2, int(pos2), cigar2) + + sam_file.close() + type_sam_file.close() + + # make this set non-redundant + junction_set = set() + for junction in junctions: + junction_set.add(to_junction_str(junction)) + + junctions = [] + for junction_str in junction_set: + junctions.append(to_junction(junction_str)) + + # sort the list of junctions + junctions = sorted(junctions, cmp=junction_cmp) + + # write the junctions into a file + type_junction_file = open(type_junction_fname, "w") + for junction in junctions: + print >> type_junction_file, "%s\t%d\t%d" % (junction[0], junction[1], junction[2]) + type_junction_file.close() + + def write_reads(read_fname, type_read_fname): + type_read_file = open(type_read_fname, "w") + read_file = open(read_fname) + + write = False + for line in read_file: + if line[0] == ">": + read_id = int(line[1:-1]) + write = read_id in read_ids + + if write: + print >> type_read_file, line[:-1] + + read_file.close() + type_read_file.close() + + if paired: + write_reads("sim_1.fa", type_read1_fname) + write_reads("sim_2.fa", type_read2_fname) + else: + write_reads("sim_1.fa", type_read1_fname) + + +def init(): + read_dir_base = "../reads/simulation/" + read_dirs = os.listdir(read_dir_base) + for read_dir in read_dirs: + if os.path.exists(read_dir): + continue + + if not os.path.exists(read_dir_base + read_dir + "/sim.sam") or \ + not os.path.exists(read_dir_base + read_dir + "/sim_1.fa") or \ + not os.path.exists(read_dir_base + read_dir + "/sim_2.fa"): + continue + + print >> sys.stderr, "Processing", read_dir, "..." + + os.mkdir(read_dir) + os.chdir(read_dir) + os.system("ln -s ../%s%s/* ." % (read_dir_base, read_dir)) + os.system("ln -s sim_1.fa 1.fa") + os.system("ln -s sim_2.fa 2.fa") + + RNA = (read_dir.find("RNA") != -1) + init_reads(read_dir, RNA) + classify_reads(RNA) + os.system("ln -s ../calculate_read_cost.py .") + + os.chdir("..") + + +if __name__ == "__main__": + init() diff --git a/evaluation/tests/CODIS/README b/evaluation/tests/CODIS/README new file mode 100644 index 0000000..eac41f7 --- /dev/null +++ b/evaluation/tests/CODIS/README @@ -0,0 +1,21 @@ +To extract HLA alleles and variants + + ./hisatgenotype_extract_vars.py --base codis --inter-gap 30 --intra-gap 50 --min-var-freq 0.1 + ./hisatgenotype_extract_vars.py --base hla --locus-list A --inter-gap 30 --intra-gap 50 --min-var-freq 0.1 + + +To build genotype genome + + ./hisatgenotype_build_genome.py genome.fa genotype_genome -p 3 --verbose + + +For DNA fingerprinting + ./hisatgenotype_locus.py --verbose --base codis --locus-list D13S317 --fragment-len 200 --no-assembly --debug paired + + + +To run HISAT-genotype + ./hisatgenotype_locus.py --base hla --locus-list A --simulate-interval 10 --perbase-errorrate 0.3 --perbase-snprate 0.1 --num-editdist 2 --debug paired + + for i in `ls -l ILMN/NA128*1.fq.gz | awk '{print $NF}' | cut -d'/' -f2 | cut -d'.' -f1`; do echo $i; ./hisatgenotype_locus.py --locus-list A --aligner-list hisat2.graph --num-editdist 2 --reads ILMN/$i.extracted.1.fq.gz,ILMN/$i.extracted.2.fq.gz --assembly-base assembly_graph_$i 2> $i.tmp; done + diff --git a/evaluation/tests/CODIS/genome.fa b/evaluation/tests/CODIS/genome.fa new file mode 100644 index 0000000..56c0aad --- /dev/null +++ b/evaluation/tests/CODIS/genome.fa @@ -0,0 +1 @@ +../HLA_novel/genome.fa \ No newline at end of file diff --git a/evaluation/tests/CODIS/genome.fa.fai b/evaluation/tests/CODIS/genome.fa.fai new file mode 100644 index 0000000..b332dba --- /dev/null +++ b/evaluation/tests/CODIS/genome.fa.fai @@ -0,0 +1 @@ +../HLA_novel/genome.fa.fai \ No newline at end of file diff --git a/evaluation/tests/CODIS/grch38 b/evaluation/tests/CODIS/grch38 new file mode 100644 index 0000000..ebe18e1 --- /dev/null +++ b/evaluation/tests/CODIS/grch38 @@ -0,0 +1 @@ +../HLA_novel/grch38 \ No newline at end of file diff --git a/evaluation/tests/CODIS/hisatgenotype_convert_codis.py b/evaluation/tests/CODIS/hisatgenotype_convert_codis.py new file mode 100644 index 0000000..99ea92b --- /dev/null +++ b/evaluation/tests/CODIS/hisatgenotype_convert_codis.py @@ -0,0 +1 @@ +hisatgenotype_modules/hisatgenotype_convert_codis.py \ No newline at end of file diff --git a/evaluation/tests/CODIS/hisatgenotype_extract_codis_data.py b/evaluation/tests/CODIS/hisatgenotype_extract_codis_data.py new file mode 100644 index 0000000..b586a7b --- /dev/null +++ b/evaluation/tests/CODIS/hisatgenotype_extract_codis_data.py @@ -0,0 +1 @@ +hisatgenotype_modules/hisatgenotype_extract_codis_data.py \ No newline at end of file diff --git a/evaluation/tests/CODIS/hisatgenotype_extract_vars.py b/evaluation/tests/CODIS/hisatgenotype_extract_vars.py new file mode 100644 index 0000000..6d113b3 --- /dev/null +++ b/evaluation/tests/CODIS/hisatgenotype_extract_vars.py @@ -0,0 +1 @@ +../../../hisatgenotype_extract_vars.py \ No newline at end of file diff --git a/evaluation/tests/CODIS/hisatgenotype_locus.py b/evaluation/tests/CODIS/hisatgenotype_locus.py new file mode 100644 index 0000000..c6bcacb --- /dev/null +++ b/evaluation/tests/CODIS/hisatgenotype_locus.py @@ -0,0 +1 @@ +../../../hisatgenotype_locus.py \ No newline at end of file diff --git a/evaluation/tests/CODIS/hisatgenotype_modules b/evaluation/tests/CODIS/hisatgenotype_modules new file mode 100644 index 0000000..018d309 --- /dev/null +++ b/evaluation/tests/CODIS/hisatgenotype_modules @@ -0,0 +1 @@ +../../../hisatgenotype_modules \ No newline at end of file diff --git a/evaluation/tests/CYP/hisatgenotype_extract_vars.py b/evaluation/tests/CYP/hisatgenotype_extract_vars.py new file mode 100644 index 0000000..6d113b3 --- /dev/null +++ b/evaluation/tests/CYP/hisatgenotype_extract_vars.py @@ -0,0 +1 @@ +../../../hisatgenotype_extract_vars.py \ No newline at end of file diff --git a/evaluation/tests/CYP/hisatgenotype_locus.py b/evaluation/tests/CYP/hisatgenotype_locus.py new file mode 100644 index 0000000..c6bcacb --- /dev/null +++ b/evaluation/tests/CYP/hisatgenotype_locus.py @@ -0,0 +1 @@ +../../../hisatgenotype_locus.py \ No newline at end of file diff --git a/evaluation/tests/HLA_novel/ILMN_StrandSeq/README b/evaluation/tests/HLA_novel/ILMN_StrandSeq/README new file mode 100644 index 0000000..7121782 --- /dev/null +++ b/evaluation/tests/HLA_novel/ILMN_StrandSeq/README @@ -0,0 +1 @@ +../ILMN_StrandSeq_original/README \ No newline at end of file diff --git a/evaluation/tests/HLA_novel/ILMN_StrandSeq/SraRunInfo.txt b/evaluation/tests/HLA_novel/ILMN_StrandSeq/SraRunInfo.txt new file mode 100644 index 0000000..9b3f785 --- /dev/null +++ b/evaluation/tests/HLA_novel/ILMN_StrandSeq/SraRunInfo.txt @@ -0,0 +1 @@ +../ILMN_StrandSeq_original/SraRunInfo.txt \ No newline at end of file diff --git a/evaluation/tests/HLA_novel/ILMN_StrandSeq_original/README b/evaluation/tests/HLA_novel/ILMN_StrandSeq_original/README new file mode 100644 index 0000000..c2cc2b1 --- /dev/null +++ b/evaluation/tests/HLA_novel/ILMN_StrandSeq_original/README @@ -0,0 +1,8 @@ +Download SRA reads in the FASTQ format + for i in `awk '{if(NR != 1) print $4}' SraRunInfo.txt`; do fastq-dump --gzip $i; done + + +for i in `ls *.gz`; do echo $i; ../../../../hisat2 --no-hd -x ../hla.graph -U $i --max-altstried 64 2> /dev/null | grep "A\*B" >> temp; done + +scripts/get_haplotype_ILMN_StrandSeq.py ILMN_StrandSeq/SraRunInfo.txt ILMN_StrandSeq/temp + diff --git a/evaluation/tests/HLA_novel/ILMN_StrandSeq_original/SraRunInfo.txt b/evaluation/tests/HLA_novel/ILMN_StrandSeq_original/SraRunInfo.txt new file mode 100644 index 0000000..1b4a860 --- /dev/null +++ b/evaluation/tests/HLA_novel/ILMN_StrandSeq_original/SraRunInfo.txt @@ -0,0 +1,305 @@ +Run spots bases download_path SampleName Sex +ERR1429624 454580 23183580 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429624 NA12892 female +ERR1429625 1164337 59381187 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429625 NA12892 female +ERR1429626 446404 22766604 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429626 NA12892 female +ERR1429627 1178769 60117219 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429627 NA12892 female +ERR1429628 422539 21549489 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429628 NA12892 female +ERR1429629 1158071 59061621 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429629 NA12892 female +ERR1429630 1033423 52704573 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429630 NA12892 female +ERR1429631 661547 33738897 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429631 NA12892 female +ERR1429632 764557 38992407 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429632 NA12892 female +ERR1429633 654774 33393474 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429633 NA12892 female +ERR1429634 653700 33338700 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429634 NA12892 female +ERR1429635 387115 19742865 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429635 NA12892 female +ERR1429636 602942 30750042 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429636 NA12892 female +ERR1429637 416442 21238542 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429637 NA12892 female +ERR1429638 361789 18451239 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429638 NA12892 female +ERR1429639 445019 22695969 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429639 NA12892 female +ERR1429640 655382 33424482 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429640 NA12892 female +ERR1429641 502098 25606998 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429641 NA12892 female +ERR1429642 442140 22549140 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429642 NA12892 female +ERR1429643 589718 30075618 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429643 NA12892 female +ERR1429644 336049 17138499 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429644 NA12892 female +ERR1429645 369398 18839298 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429645 NA12892 female +ERR1429646 575850 29368350 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429646 NA12892 female +ERR1429647 413988 21113388 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429647 NA12892 female +ERR1429648 687172 35045772 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429648 NA12892 female +ERR1429649 675486 34449786 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429649 NA12892 female +ERR1429650 683774 34872474 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429650 NA12892 female +ERR1429651 527860 26920860 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429651 NA12892 female +ERR1429652 363994 18563694 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429652 NA12892 female +ERR1429653 485829 24777279 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429653 NA12892 female +ERR1429654 404689 20639139 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429654 NA12892 female +ERR1429655 628956 32076756 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429655 NA12892 female +ERR1429656 443843 22635993 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429656 NA12892 female +ERR1429657 507127 25863477 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429657 NA12892 female +ERR1429658 548507 27973857 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429658 NA12892 female +ERR1429659 682499 34807449 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429659 NA12892 female +ERR1429660 414572 21143172 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429660 NA12892 female +ERR1429661 425025 21676275 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429661 NA12892 female +ERR1429662 371171 18929721 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429662 NA12892 female +ERR1429663 1092760 55730760 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429663 NA12892 female +ERR1429664 651261 33214311 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429664 NA12892 female +ERR1429665 643455 32816205 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429665 NA12892 female +ERR1429666 657058 33509958 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429666 NA12892 female +ERR1429667 527528 26903928 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429667 NA12892 female +ERR1429668 941433 48013083 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429668 NA12892 female +ERR1429669 635537 32412387 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429669 NA12892 female +ERR1429670 493285 25157535 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429670 NA12892 female +ERR1429671 1280258 65293158 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429671 NA12892 female +ERR1429672 1765271 90028821 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429672 NA12892 female +ERR1429673 936702 47771802 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429673 NA12892 female +ERR1429674 876200 44686200 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429674 NA12892 female +ERR1429675 1407029 71758479 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429675 NA12892 female +ERR1429676 1703380 86872380 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429676 NA12892 female +ERR1429677 1207785 61597035 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429677 NA12892 female +ERR1429678 1061830 54153330 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429678 NA12892 female +ERR1429679 1669149 85126599 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429679 NA12892 female +ERR1429680 1476448 75298848 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429680 NA12892 female +ERR1429681 1020002 52020102 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429681 NA12892 female +ERR1429682 1356679 69190629 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429682 NA12892 female +ERR1429683 915946 46713246 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429683 NA12892 female +ERR1429684 1045332 53311932 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429684 NA12892 female +ERR1429685 1525162 77783262 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429685 NA12892 female +ERR1429686 471194 24030894 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429686 NA12892 female +ERR1429687 1491856 76084656 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429687 NA12892 female +ERR1429688 824709 42060159 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429688 NA12892 female +ERR1429689 1112300 56727300 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429689 NA12892 female +ERR1429690 874610 44605110 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429690 NA12892 female +ERR1429691 697710 35583210 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429691 NA12892 female +ERR1429692 582636 29714436 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429692 NA12892 female +ERR1429693 761870 38855370 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429693 NA12892 female +ERR1429694 616775 31455525 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429694 NA12892 female +ERR1429695 680648 34713048 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429695 NA12892 female +ERR1429696 557253 28419903 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429696 NA12892 female +ERR1429697 456232 23267832 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429697 NA12892 female +ERR1429698 1494318 76210218 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429698 NA12892 female +ERR1429699 738330 37654830 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429699 NA12892 female +ERR1429700 1328177 67737027 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429700 NA12892 female +ERR1429701 741532 37818132 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429701 NA12892 female +ERR1429702 1061965 54160215 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429702 NA12892 female +ERR1429703 378981 19328031 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429703 NA12892 female +ERR1429704 1117620 56998620 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429704 NA12892 female +ERR1429705 1124119 57330069 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429705 NA12892 female +ERR1429706 1339802 68329902 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429706 NA12892 female +ERR1429707 2385699 121670649 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429707 NA12892 female +ERR1429708 833880 42527880 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429708 NA12892 female +ERR1429709 745854 38038554 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429709 NA12892 female +ERR1429710 753263 38416413 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429710 NA12892 female +ERR1429711 535518 27311418 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429711 NA12892 female +ERR1429712 1299967 66298317 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429712 NA12892 female +ERR1429713 899645 45881895 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429713 NA12892 female +ERR1429714 739975 37738725 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429714 NA12892 female +ERR1429715 1477561 75355611 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429715 NA12892 female +ERR1429716 1411260 71974260 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429716 NA12892 female +ERR1429717 1461893 74556543 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429717 NA12892 female +ERR1429718 1167396 59537196 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429718 NA12892 female +ERR1429719 455539 23232489 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429719 NA12892 female +ERR1429720 691972 35290572 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429720 NA12892 female +ERR1429721 1767814 90158514 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429721 NA12892 female +ERR1429722 1089325 55555575 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429722 NA12892 female +ERR1429723 1241483 63315633 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429723 NA12892 female +ERR1429524 714521 36440571 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429524 NA12892 female +ERR1429525 512294 26126994 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429525 NA12892 female +ERR1429526 600138 30607038 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429526 NA12892 female +ERR1429527 1026295 52341045 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429527 NA12892 female +ERR1429528 485027 24736377 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429528 NA12892 female +ERR1429529 610595 31140345 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429529 NA12892 female +ERR1429530 634235 32345985 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429530 NA12892 female +ERR1429531 588103 29993253 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429531 NA12892 female +ERR1429532 2161798 110251698 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429532 NA12892 female +ERR1429533 2008655 102441405 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429533 NA12892 female +ERR1429534 2299802 117289902 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429534 NA12892 female +ERR1429535 1667191 85026741 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429535 NA12892 female +ERR1429536 1515614 77296314 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429536 NA12892 female +ERR1429537 1800878 91844778 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429537 NA12892 female +ERR1429538 1883585 96062835 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429538 NA12892 female +ERR1429539 1390662 70923762 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429539 NA12892 female +ERR1429540 1636297 83451147 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429540 NA12892 female +ERR1429541 1547471 78921021 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429541 NA12892 female +ERR1429542 1306043 66608193 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429542 NA12892 female +ERR1429543 1545983 78845133 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429543 NA12892 female +ERR1429544 1346888 68691288 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429544 NA12892 female +ERR1429545 1672037 85273887 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429545 NA12892 female +ERR1429546 1182992 60332592 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429546 NA12892 female +ERR1429547 1110020 56611020 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429547 NA12892 female +ERR1429548 673901 34368951 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429548 NA12892 female +ERR1429549 418115 21323865 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429549 NA12892 female +ERR1429550 700872 35744472 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429550 NA12892 female +ERR1429551 839395 42809145 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429551 NA12892 female +ERR1429552 574017 29274867 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429552 NA12892 female +ERR1429553 1022930 52169430 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429553 NA12892 female +ERR1429554 770504 39295704 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429554 NA12892 female +ERR1429555 589137 30045987 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429555 NA12892 female +ERR1429556 621838 31713738 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429556 NA12892 female +ERR1429557 573305 29238555 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429557 NA12892 female +ERR1429558 711033 36262683 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429558 NA12892 female +ERR1429559 664808 33905208 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429559 NA12892 female +ERR1429560 531349 27098799 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429560 NA12892 female +ERR1429561 892253 45504903 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429561 NA12892 female +ERR1429562 687951 35085501 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429562 NA12892 female +ERR1429563 624558 31852458 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429563 NA12892 female +ERR1429564 781236 39843036 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429564 NA12892 female +ERR1429565 1372521 69998571 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429565 NA12892 female +ERR1429566 1218765 62157015 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429566 NA12892 female +ERR1429567 675274 34438974 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429567 NA12892 female +ERR1429568 560720 28596720 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429568 NA12892 female +ERR1429569 664298 33879198 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429569 NA12892 female +ERR1429570 572222 29183322 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429570 NA12892 female +ERR1429571 1350359 68868309 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429571 NA12892 female +ERR1429572 1309402 66779502 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429572 NA12892 female +ERR1429573 1088377 55507227 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429573 NA12892 female +ERR1429574 1047059 53400009 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429574 NA12892 female +ERR1429575 529587 27008937 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429575 NA12892 female +ERR1429576 1169107 59624457 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429576 NA12892 female +ERR1429577 699355 35667105 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429577 NA12892 female +ERR1429578 698973 35647623 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429578 NA12892 female +ERR1429579 567632 28949232 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429579 NA12892 female +ERR1429580 880930 44927430 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429580 NA12892 female +ERR1429581 682353 34800003 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429581 NA12892 female +ERR1429582 800198 40810098 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429582 NA12892 female +ERR1429583 564119 28770069 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429583 NA12892 female +ERR1429584 765306 39030606 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429584 NA12892 female +ERR1429585 690339 35207289 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429585 NA12892 female +ERR1429586 559619 28540569 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429586 NA12892 female +ERR1429587 922910 47068410 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429587 NA12892 female +ERR1429588 467525 23843775 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429588 NA12892 female +ERR1429589 638329 32554779 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429589 NA12892 female +ERR1429590 690814 35231514 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429590 NA12892 female +ERR1429591 477979 24376929 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429591 NA12892 female +ERR1429592 560344 28577544 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429592 NA12892 female +ERR1429593 784251 39996801 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429593 NA12892 female +ERR1429594 747178 38106078 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429594 NA12892 female +ERR1429595 827661 42210711 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429595 NA12892 female +ERR1429596 950714 48486414 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429596 NA12892 female +ERR1429597 5235515 267011265 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429597 NA12892 female +ERR1429598 939966 47938266 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429598 NA12892 female +ERR1429599 546226 27857526 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429599 NA12892 female +ERR1429600 463247 23625597 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429600 NA12892 female +ERR1429601 1024726 52261026 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429601 NA12892 female +ERR1429602 1465503 74740653 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429602 NA12892 female +ERR1429603 1174942 59922042 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429603 NA12892 female +ERR1429604 921930 47018430 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429604 NA12892 female +ERR1429605 1074963 54823113 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429605 NA12892 female +ERR1429606 970622 49501722 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429606 NA12892 female +ERR1429607 796770 40635270 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429607 NA12892 female +ERR1429608 884429 45105879 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429608 NA12892 female +ERR1429609 1034885 52779135 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429609 NA12892 female +ERR1429610 579267 29542617 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429610 NA12892 female +ERR1429611 497651 25380201 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429611 NA12892 female +ERR1429612 538377 27457227 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429612 NA12892 female +ERR1429613 1005381 51274431 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429613 NA12892 female +ERR1429614 987462 50360562 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429614 NA12892 female +ERR1429615 456782 23295882 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429615 NA12892 female +ERR1429616 674193 34383843 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429616 NA12892 female +ERR1429617 953023 48604173 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429617 NA12892 female +ERR1429618 617758 31505658 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429618 NA12892 female +ERR1429619 598055 30500805 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429619 NA12892 female +ERR1429620 597972 30496572 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429620 NA12892 female +ERR1429621 456116 23261916 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429621 NA12892 female +ERR1429622 997159 50855109 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429622 NA12892 female +ERR1429623 505516 25781316 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429623 NA12892 female +ERR1429043 605410 30875910 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429043 NA12878 female +ERR1429044 464531 23691081 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429044 NA12878 female +ERR1429045 548098 27952998 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429045 NA12878 female +ERR1429046 516069 26319519 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429046 NA12878 female +ERR1429047 494917 25240767 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429047 NA12878 female +ERR1429048 324250 16536750 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429048 NA12878 female +ERR1429049 377454 19250154 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429049 NA12878 female +ERR1429050 439003 22389153 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429050 NA12878 female +ERR1429051 527178 26886078 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429051 NA12878 female +ERR1429052 445298 22710198 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429052 NA12878 female +ERR1429053 466364 23784564 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429053 NA12878 female +ERR1429054 417270 21280770 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429054 NA12878 female +ERR1429055 408573 20837223 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429055 NA12878 female +ERR1429056 350813 17891463 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429056 NA12878 female +ERR1429057 532542 27159642 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429057 NA12878 female +ERR1429058 547294 27911994 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429058 NA12878 female +ERR1429059 289030 14740530 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429059 NA12878 female +ERR1429060 471157 24029007 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429060 NA12878 female +ERR1429061 493958 25191858 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429061 NA12878 female +ERR1429062 362477 18486327 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429062 NA12878 female +ERR1429063 267605 13647855 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429063 NA12878 female +ERR1429064 277873 14171523 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429064 NA12878 female +ERR1429065 384164 19592364 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429065 NA12878 female +ERR1429066 360900 18405900 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429066 NA12878 female +ERR1429067 470448 23992848 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429067 NA12878 female +ERR1429068 529681 27013731 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429068 NA12878 female +ERR1429069 352233 17963883 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429069 NA12878 female +ERR1429070 571067 29124417 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429070 NA12878 female +ERR1429071 504820 25745820 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429071 NA12878 female +ERR1429072 309267 15772617 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429072 NA12878 female +ERR1429073 425635 21707385 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429073 NA12878 female +ERR1429074 454845 23197095 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429074 NA12878 female +ERR1429075 202007 10302357 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429075 NA12878 female +ERR1429076 612703 31247853 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429076 NA12878 female +ERR1429077 590290 30104790 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429077 NA12878 female +ERR1429078 447433 22819083 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429078 NA12878 female +ERR1429079 338973 17287623 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429079 NA12878 female +ERR1429080 487364 24855564 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429080 NA12878 female +ERR1429081 545881 27839931 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429081 NA12878 female +ERR1429082 522177 26631027 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429082 NA12878 female +ERR1429083 475914 24271614 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429083 NA12878 female +ERR1429084 587003 29937153 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429084 NA12878 female +ERR1429085 587009 29937459 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429085 NA12878 female +ERR1429086 547645 27929895 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429086 NA12878 female +ERR1429087 459050 23411550 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429087 NA12878 female +ERR1429088 270489 13794939 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429088 NA12878 female +ERR1429089 547181 27906231 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429089 NA12878 female +ERR1429282 478706 24414006 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429282 NA12891 male +ERR1429283 1604643 81836793 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429283 NA12891 male +ERR1429284 1102261 56215311 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429284 NA12891 male +ERR1429285 1337037 68188887 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429285 NA12891 male +ERR1429286 845172 43103772 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429286 NA12891 male +ERR1429287 1255934 64052634 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429287 NA12891 male +ERR1429288 781311 39846861 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429288 NA12891 male +ERR1429289 784261 39997311 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429289 NA12891 male +ERR1429290 1464639 74696589 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429290 NA12891 male +ERR1429291 1111003 56661153 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429291 NA12891 male +ERR1429292 887204 45247404 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429292 NA12891 male +ERR1429293 873622 44554722 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429293 NA12891 male +ERR1429294 1517391 77386941 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429294 NA12891 male +ERR1429295 604847 30847197 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429295 NA12891 male +ERR1429296 1096055 55898805 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429296 NA12891 male +ERR1429297 1766588 90095988 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429297 NA12891 male +ERR1429298 1133125 57789375 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429298 NA12891 male +ERR1429299 880106 44885406 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429299 NA12891 male +ERR1429300 870455 44393205 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429300 NA12891 male +ERR1429301 655374 33424074 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429301 NA12891 male +ERR1429302 530794 27070494 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429302 NA12891 male +ERR1429303 1647372 84015972 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429303 NA12891 male +ERR1429304 1012403 51632553 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429304 NA12891 male +ERR1429305 1291017 65841867 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429305 NA12891 male +ERR1429306 1929309 98394759 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429306 NA12891 male +ERR1429307 1388340 70805340 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429307 NA12891 male +ERR1429308 952677 48586527 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429308 NA12891 male +ERR1429309 637678 32521578 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429309 NA12891 male +ERR1429310 1689586 86168886 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429310 NA12891 male +ERR1429311 1351050 68903550 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429311 NA12891 male +ERR1429312 1926500 98251500 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429312 NA12891 male +ERR1429313 1127035 57478785 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429313 NA12891 male +ERR1429314 902921 46048971 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429314 NA12891 male +ERR1429315 1343658 68526558 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429315 NA12891 male +ERR1429316 592152 30199752 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429316 NA12891 male +ERR1429317 799753 40787403 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429317 NA12891 male +ERR1429318 1604259 81817209 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429318 NA12891 male +ERR1429319 772620 39403620 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429319 NA12891 male +ERR1429320 948191 48357741 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429320 NA12891 male +ERR1429321 1149341 58616391 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429321 NA12891 male +ERR1429322 2440295 124455045 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429322 NA12891 male +ERR1429323 644880 32888880 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429323 NA12891 male +ERR1429324 1591841 81183891 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429324 NA12891 male +ERR1429325 746939 38093889 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429325 NA12891 male +ERR1429515 742596 37872396 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429515 NA12892 female +ERR1429516 441081 22495131 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429516 NA12892 female +ERR1429517 627119 31983069 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429517 NA12892 female +ERR1429518 963073 49116723 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429518 NA12892 female +ERR1429519 1062118 54168018 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429519 NA12892 female +ERR1429520 359069 18312519 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429520 NA12892 female +ERR1429521 780080 39784080 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429521 NA12892 female +ERR1429522 1223012 62373612 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429522 NA12892 female +ERR1429523 525746 26813046 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429523 NA12892 female +ERR1429039 459819 23450769 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429039 NA12878 female +ERR1429040 602524 30728724 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429040 NA12878 female +ERR1429041 337658 17220558 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429041 NA12878 female +ERR1429042 632741 32269791 https://sra-download.ncbi.nlm.nih.gov/srapub/ERR1429042 NA12878 female \ No newline at end of file diff --git a/evaluation/tests/HLA_novel/README b/evaluation/tests/HLA_novel/README new file mode 100644 index 0000000..e9e4b8b --- /dev/null +++ b/evaluation/tests/HLA_novel/README @@ -0,0 +1,20 @@ + hisatgenotype_locus.py --aligner-list hisat2.graph --base hla --locus-list A --num-editdist 2 --assembly -1 ILMN/NA12892.extracted.1.fq.gz -2 ILMN/NA12892.extracted.2.fq.gz + + for i in `ls -l CP_80/LP*1.fq.gz | awk '{print $NF}' | cut -d'/' -f2 | cut -d'.' -f1`; do echo $i; ./hisatgenotype_locus.py --locus-list A --num-editdist 2 --assembly --reads CP_80/$i.extracted.1.fq.gz,CP_80/$i.extracted.2.fq.gz --assembly-base assembly_graph_$i 2> $i.tmp; done + + hisatgenotype_locus_samples.py -p 3 -v --max-sample 400 --region-list hla.A --read-dir CP_400 --out-dir test_A > cp_400_A.txt + hisatgenotype_scripts/compare_HLA_Omixon.py cp_400_A.txt ../CAAPA/omixon_caapa_hla.txt + + hisatgenotype_locus.py --aligner-list hisat2.graph --locus-list A --num-editdist 2 --assembly -1 CP_80/LP6005106-DNA_H02.extracted.fq.1.gz -2 CP_80/LP6005106-DNA_H02.extracted.fq.2.gz --display-alleles A*23:17,A*02:02:01:01 + + hisatgenotype_extract_reads.py --base-fname genotype_genome --reference-type genome --read-dir ILMN_12062016 --out-dir ILMN + + hisat2 --no-hd -x genotype_genome -c CAGCTGGGATGTGGAGTGGTGTGAGGAGTGGCCACAGGGGAGCAGAGGAGGTGGCAGAAGCCGGAGGTAAAGGTGTCTTAAA + + +On MARCC - https://www.marcc.jhu.edu/getting-started/basic + +sbatch script-name +squeue -u dkim136@jhu.edu +scancel job-id +sinfo diff --git a/evaluation/tests/HLA_novel/hisatgenotype.py b/evaluation/tests/HLA_novel/hisatgenotype.py new file mode 100644 index 0000000..fad5906 --- /dev/null +++ b/evaluation/tests/HLA_novel/hisatgenotype.py @@ -0,0 +1 @@ +../../../hisatgenotype.py \ No newline at end of file diff --git a/evaluation/tests/HLA_novel/hisatgenotype_build_genome.py b/evaluation/tests/HLA_novel/hisatgenotype_build_genome.py new file mode 100644 index 0000000..9d3572a --- /dev/null +++ b/evaluation/tests/HLA_novel/hisatgenotype_build_genome.py @@ -0,0 +1 @@ +../../../hisatgenotype_build_genome.py \ No newline at end of file diff --git a/evaluation/tests/HLA_novel/hisatgenotype_extract_vars.py b/evaluation/tests/HLA_novel/hisatgenotype_extract_vars.py new file mode 100644 index 0000000..6d113b3 --- /dev/null +++ b/evaluation/tests/HLA_novel/hisatgenotype_extract_vars.py @@ -0,0 +1 @@ +../../../hisatgenotype_extract_vars.py \ No newline at end of file diff --git a/evaluation/tests/HLA_novel/hisatgenotype_locus.py b/evaluation/tests/HLA_novel/hisatgenotype_locus.py new file mode 100644 index 0000000..c6bcacb --- /dev/null +++ b/evaluation/tests/HLA_novel/hisatgenotype_locus.py @@ -0,0 +1 @@ +../../../hisatgenotype_locus.py \ No newline at end of file diff --git a/evaluation/tests/HLA_novel/hisatgenotype_locus_prev.py b/evaluation/tests/HLA_novel/hisatgenotype_locus_prev.py new file mode 100644 index 0000000..3daa365 --- /dev/null +++ b/evaluation/tests/HLA_novel/hisatgenotype_locus_prev.py @@ -0,0 +1,3210 @@ +#!/usr/bin/env python +# +# Copyright 2015, Daehwan Kim +# +# This file is part of HISAT 2. +# +# HISAT 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# HISAT 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HISAT 2. If not, see . +# + + +import sys, os, subprocess, re +import inspect, random +import math +from datetime import datetime, date, time +from argparse import ArgumentParser, FileType +from copy import deepcopy +from hisatgenotype_modules import typing_common, Gene_typing, assembly_graph + + + +""" +Align reads, and sort the alignments into a BAM file +""" +def align_reads(ex_path, + aligner, + simulation, + base_fname, + index_type, + read_fname, + fastq, + threads, + out_fname, + verbose): + if aligner == "hisat2": + hisat2 = os.path.join(ex_path, "hisat2") + # DK - debugging purpose + # aligner_cmd = [hisat2, "--mm"] + aligner_cmd = ["/Users/infphilo/work/hisat2/hisat2", "--mm"] + if not simulation: + aligner_cmd += ["--no-unal"] + DNA = True + if DNA: + aligner_cmd += ["--no-spliced-alignment"] # no spliced alignment + aligner_cmd += ["-X", "1000"] # max fragment length + if index_type == "linear": + aligner_cmd += ["-k", "10"] + else: + aligner_cmd += ["--max-altstried", "64"] + # DK - debugging purposes + aligner_cmd += ["--haplotype"] + if base_fname == "codis": + aligner_cmd += ["--enable-codis"] + aligner_cmd += ["-x", "%s.%s" % (base_fname, index_type)] + elif aligner == "bowtie2": + aligner_cmd = [aligner, + "--no-unal", + "-k", "10", + "-x", base_fname] + else: + assert False + assert len(read_fname) in [1,2] + aligner_cmd += ["-p", str(threads)] + if not fastq: + aligner_cmd += ["-f"] + if len(read_fname) == 1: + aligner_cmd += ["-U", read_fname[0]] + else: + aligner_cmd += ["-1", "%s" % read_fname[0], + "-2", "%s" % read_fname[1]] + if verbose >= 1: + print >> sys.stderr, ' '.join(aligner_cmd) + align_proc = subprocess.Popen(aligner_cmd, + stdout=subprocess.PIPE, + stderr=open("/dev/null", 'w')) + + sambam_cmd = ["samtools", + "view", + "-bS", + "-"] + sambam_proc = subprocess.Popen(sambam_cmd, + stdin=align_proc.stdout, + stdout=open(out_fname + ".unsorted", 'w'), + stderr=open("/dev/null", 'w')) + sambam_proc.communicate() + if index_type == "graph": + bamsort_cmd = ["samtools", + "sort", + out_fname + ".unsorted", + "-o", out_fname] + bamsort_proc = subprocess.Popen(bamsort_cmd, + stderr=open("/dev/null", 'w')) + bamsort_proc.communicate() + + bamindex_cmd = ["samtools", + "index", + out_fname] + bamindex_proc = subprocess.Popen(bamindex_cmd, + stderr=open("/dev/null", 'w')) + bamindex_proc.communicate() + + os.system("rm %s" % (out_fname + ".unsorted")) + + +""" +""" +def normalize(prob): + total = sum(prob.values()) + for allele, mass in prob.items(): + prob[allele] = mass / total + + +""" +""" +def prob_diff(prob1, prob2): + diff = 0.0 + for allele in prob1.keys(): + if allele in prob2: + diff += abs(prob1[allele] - prob2[allele]) + else: + diff += prob1[allele] + return diff + + +""" +""" +def Gene_prob_cmp(a, b): + if a[1] != b[1]: + if a[1] < b[1]: + return 1 + else: + return -1 + assert a[0] != b[0] + if a[0] < b[0]: + return -1 + else: + return 1 + + +""" +""" +def single_abundance(Gene_cmpt, + Gene_length): + def normalize2(prob, length): + total = 0 + for allele, mass in prob.items(): + assert allele in length + total += (mass / length[allele]) + for allele, mass in prob.items(): + assert allele in length + prob[allele] = mass / length[allele] / total + + Gene_prob, Gene_prob_next = {}, {} + for cmpt, count in Gene_cmpt.items(): + alleles = cmpt.split('-') + for allele in alleles: + if allele not in Gene_prob: + Gene_prob[allele] = 0.0 + Gene_prob[allele] += (float(count) / len(alleles)) + + normalize(Gene_prob) + def next_prob(Gene_cmpt, Gene_prob, Gene_length): + Gene_prob_next = {} + for cmpt, count in Gene_cmpt.items(): + alleles = cmpt.split('-') + alleles_prob = 0.0 + for allele in alleles: + if allele not in Gene_prob: + continue + alleles_prob += Gene_prob[allele] + if alleles_prob <= 0.0: + continue + for allele in alleles: + if allele not in Gene_prob: + continue + if allele not in Gene_prob_next: + Gene_prob_next[allele] = 0.0 + Gene_prob_next[allele] += (float(count) * Gene_prob[allele] / alleles_prob) + normalize(Gene_prob_next) + return Gene_prob_next + + + fast_EM = True + diff, iter = 1.0, 0 + while diff > 0.0001 and iter < 1000: + Gene_prob_next = next_prob(Gene_cmpt, Gene_prob, Gene_length) + if fast_EM: + # Accelerated version of EM - SQUAREM iteration + # Varadhan, R. & Roland, C. Scand. J. Stat. 35, 335-353 (2008) + # Also, this algorithm is used in Sailfish - http://www.nature.com/nbt/journal/v32/n5/full/nbt.2862.html + Gene_prob_next2 = next_prob(Gene_cmpt, Gene_prob_next, Gene_length) + sum_squared_r, sum_squared_v = 0.0, 0.0 + p_r, p_v = {}, {} + for a in Gene_prob.keys(): + p_r[a] = Gene_prob_next[a] - Gene_prob[a] + sum_squared_r += (p_r[a] * p_r[a]) + p_v[a] = Gene_prob_next2[a] - Gene_prob_next[a] - p_r[a] + sum_squared_v += (p_v[a] * p_v[a]) + if sum_squared_v > 0.0: + gamma = -math.sqrt(sum_squared_r / sum_squared_v) + for a in Gene_prob.keys(): + Gene_prob_next2[a] = max(0.0, Gene_prob[a] - 2 * gamma * p_r[a] + gamma * gamma * p_v[a]); + Gene_prob_next = next_prob(Gene_cmpt, Gene_prob_next2, Gene_length) + + diff = prob_diff(Gene_prob, Gene_prob_next) + Gene_prob = Gene_prob_next + + # Accelerate convergence + if iter >= 10: + Gene_prob2 = {} + avg_prob = sum(Gene_prob.values()) / len(Gene_prob) + for allele, prob in Gene_prob.items(): + if prob >= 0.005 or prob > avg_prob: + Gene_prob2[allele] = prob + Gene_prob = Gene_prob2 + + # DK - debugging purposes + if iter % 10 == 0 and False: + print "iter", iter + for allele, prob in Gene_prob.items(): + if prob >= 0.01: + print >> sys.stderr, "\t", iter, allele, prob, str(datetime.now()) + + iter += 1 + + """ + for allele, prob in Gene_prob.items(): + allele_len = Gene_length[allele] + Gene_prob[allele] /= float(allele_len) + """ + + # normalize(Gene_prob) + normalize2(Gene_prob, Gene_length) + Gene_prob = [[allele, prob] for allele, prob in Gene_prob.items()] + Gene_prob = sorted(Gene_prob, cmp=Gene_prob_cmp) + return Gene_prob + + +""" +""" +def lower_bound(Var_list, pos): + low, high = 0, len(Var_list) + while low < high: + m = (low + high) / 2 + m_pos = Var_list[m][0] + if m_pos < pos: + low = m + 1 + elif m_pos > pos: + high = m + else: + assert m_pos == pos + while m > 0: + if Var_list[m-1][0] < pos: + break + m -= 1 + return m + return low + + +""" + var: ['single', 3300, 'G'] + exons: [[301, 373], [504, 822], [1084, 1417], [2019, 2301], [2404, 2520], [2965, 2997], [3140, 3187], [3357, 3361]] +""" +def var_in_exon(var, exons): + exonic = False + var_type, var_left, var_data = var + var_right = var_left + if var_type == "deletion": + var_right = var_left + int(var_data) - 1 + for exon_left, exon_right in exons: + if var_left >= exon_left and var_right <= exon_right: + return True + return False + + +""" +Report variant IDs whose var is within exonic regions +""" +def get_exonic_vars(Vars, exons): + vars = set() + for var_id, var in Vars.items(): + var_type, var_left, var_data = var + var_right = var_left + if var_type == "deletion": + var_right = var_left + int(var_data) - 1 + for exon_left, exon_right in exons: + if var_left >= exon_left and var_right <= exon_right: + vars.add(var_id) + + return vars + + +""" +Get representative alleles among those that share the same exonic sequences +""" +def get_rep_alleles(Links, exon_vars): + allele_vars = {} + for var, alleles in Links.items(): + if var not in exon_vars: + continue + for allele in alleles: + if allele not in allele_vars: + allele_vars[allele] = set() + allele_vars[allele].add(var) + + allele_groups = {} + for allele, vars in allele_vars.items(): + vars = '-'.join(vars) + if vars not in allele_groups: + allele_groups[vars] = [] + allele_groups[vars].append(allele) + + allele_reps = {} # allele representatives + allele_rep_groups = {} # allele groups by allele representatives + for allele_members in allele_groups.values(): + assert len(allele_members) > 0 + allele_rep = allele_members[0] + allele_rep_groups[allele_rep] = allele_members + for allele_member in allele_members: + assert allele_member not in allele_reps + allele_reps[allele_member] = allele_rep + + return allele_reps, allele_rep_groups + + +""" +Identify alternative alignments +""" +def get_alternatives(ref_seq, Vars, Var_list, verbose): + # Check deletions' alternatives + def get_alternatives_recur(ref_seq, + Vars, + Var_list, + Alts, + var_id, + del_len, + other_del_len, + left, + alt_list, + var_j, + latest_pos, + debug = False): + def add_alt(Alts, alt_list, var_id, j_id): + if j_id.isdigit(): + if var_id not in Alts: + Alts[var_id] = [["1"]] + else: + if Alts[var_id][-1][-1].isdigit(): + Alts[var_id][-1][-1] = str(int(Alts[var_id][-1][-1]) + 1) + else: + Alts[var_id][-1].append("1") + else: + if var_id not in Alts: + Alts[var_id] = [[j_id]] + else: + if Alts[var_id][-1][-1].isdigit(): + Alts[var_id][-1][-1] = j_id + else: + Alts[var_id][-1].append(j_id) + Alts[var_id][-1].append("0") + + if not j_id.isdigit(): + alt_list.append(j_id) + alts = '-'.join(alt_list) + if alts not in Alts: + Alts[alts] = [[var_id]] + else: + Alts[alts].append([var_id]) + + if del_len == other_del_len: + return + + var_type, var_pos, var_data = Vars[var_id] + if left: # Look in left direction + if var_j < 0: + return + j_pos, j_id = Var_list[var_j] + alt_del = [] + if var_id != j_id and j_pos < var_pos + del_len: + prev_latest_pos = latest_pos + # Check bases between SNPs + while latest_pos - max(0, del_len - other_del_len) > 0: + if ref_seq[latest_pos - 1] != ref_seq[latest_pos - 1 - del_len + other_del_len]: + break + latest_pos -= 1 + add_alt(Alts, alt_list, var_id, str(latest_pos)) + if latest_pos - 1 > j_pos: + return + j_type, _, j_data = Vars[j_id] + if j_type == "deletion": + j_del_len = int(j_data) + if j_type == "single" and j_pos == latest_pos - 1: + j_cmp_pos = j_pos - del_len + other_del_len + if debug: + print Vars[j_id] + print j_pos, ref_seq[j_pos] + print j_cmp_pos, ref_seq[j_cmp_pos] + if j_data == ref_seq[j_cmp_pos]: + add_alt(Alts, alt_list, var_id, j_id) + latest_pos = j_pos + elif j_type == "deletion" and j_pos + j_del_len - 1 == prev_latest_pos - 1: + alt_list2 = alt_list[:] + [j_id] + latest_pos2 = j_pos + alt_del = [alt_list2, latest_pos2] + + get_alternatives_recur(ref_seq, + Vars, + Var_list, + Alts, + var_id, + del_len, + other_del_len, + left, + alt_list, + var_j - 1, + latest_pos, + debug) + + if alt_del: + alt_list2, latest_pos2 = alt_del + if var_id not in Alts: + Alts[var_id] = [alt_list2[:]] + else: + Alts[var_id].append(alt_list2[:]) + alt_idx = len(Alts[var_id]) - 1 + get_alternatives_recur(ref_seq, + Vars, + Var_list, + Alts, + var_id, + del_len, + other_del_len + j_del_len, + left, + alt_list2, + var_j - 1, + latest_pos2, + debug) + # Remove this Deletion if not supported by additional bases? + assert alt_idx < len(Alts[var_id]) + if Alts[var_id][alt_idx][-1] == j_id: + Alts[var_id] = Alts[var_id][:alt_idx] + Alts[var_id][alt_idx+1:] + + else: # Look in right direction + if var_j >= len(Var_list): + return + j_pos, j_id = Var_list[var_j] + alt_del = [] + if var_id != j_id and j_pos >= var_pos: + # Check bases between SNPs + prev_latest_pos = latest_pos + while latest_pos + 1 + max(0, del_len - other_del_len) < len(ref_seq): + if ref_seq[latest_pos + 1] != ref_seq[latest_pos + 1 + del_len - other_del_len]: + break + + # DK - debugging purposes + if debug: + pos2_ = latest_pos + 1 + del_len - other_del_len + print "DK: latest_pos:", latest_pos + 1, pos2_ + print "DK: var_pos:", var_pos, "del_len:", del_len, "other_del_len:", other_del_len + print "DK:", ref_seq[latest_pos + 1], ref_seq[pos2_] + + latest_pos += 1 + add_alt(Alts, alt_list, var_id, str(latest_pos)) + + if latest_pos + 1 < j_pos: + return + + j_type, _, j_data = Vars[j_id] + if j_type == "single" and j_pos == latest_pos + 1: + j_cmp_pos = j_pos + del_len - other_del_len + if debug: + print Vars[j_id] + print j_pos, ref_seq[j_pos] + print j_cmp_pos, ref_seq[j_cmp_pos] + + if j_data == ref_seq[j_cmp_pos]: + add_alt(Alts, alt_list, var_id, j_id) + latest_pos = j_pos + elif j_type == "deletion" and j_pos == prev_latest_pos + 1: + j_del_len = int(j_data) + alt_list2 = alt_list[:] + [j_id] + latest_pos2 = j_pos + j_del_len - 1 + alt_del = [alt_list2, latest_pos2] + + get_alternatives_recur(ref_seq, + Vars, + Var_list, + Alts, + var_id, + del_len, + other_del_len, + left, + alt_list, + var_j + 1, + latest_pos, + debug) + + if alt_del: + alt_list2, latest_pos2 = alt_del + if var_id not in Alts: + Alts[var_id] = [alt_list2[:]] + else: + Alts[var_id].append(alt_list2[:]) + alt_idx = len(Alts[var_id]) - 1 + get_alternatives_recur(ref_seq, + Vars, + Var_list, + Alts, + var_id, + del_len, + other_del_len + j_del_len, + left, + alt_list2, + var_j + 1, + latest_pos2, + debug) + + # Remove this Deletion if not supported by additional bases? + assert alt_idx < len(Alts[var_id]) + if Alts[var_id][alt_idx][-1] == j_id: + Alts[var_id] = Alts[var_id][:alt_idx] + Alts[var_id][alt_idx+1:] + + # Check deletions' alternatives + Alts_left, Alts_right = {}, {} + for var_i, var_id in Var_list: + var_type, var_pos, var_data = var = Vars[var_id] + if var_type != "deletion" or var_pos == 0: + continue + del_len = int(var_data) + if var_pos + del_len >= len(ref_seq): + assert var_pos + del_len == len(ref_seq) + continue + debug = (var_id == "hv454a") + if debug: + print Vars[var_id] + + alt_list = [] + var_j = lower_bound(Var_list, var_pos + del_len - 1) + latest_pos = var_pos + del_len + if var_j < len(Var_list): + get_alternatives_recur(ref_seq, + Vars, + Var_list, + Alts_left, + var_id, + del_len, + 0, + True, # left + alt_list, + var_j, + latest_pos, + debug) + alt_list = [] + var_j = lower_bound(Var_list, var_pos) + latest_pos = var_pos - 1 + assert var_j >= 0 + get_alternatives_recur(ref_seq, + Vars, + Var_list, + Alts_right, + var_id, + del_len, + 0, + False, # right + alt_list, + var_j, + latest_pos, + debug) + + if debug: + print "DK :-)" + sys.exit(1) + + def assert_print_alts(Alts, dir): + def get_seq_pos(alt_list): + seq = "" + seq_left, seq_right = -1, -1 + for i in range(len(alt_list)): + alt = alt_list[i] + if alt.isdigit(): + assert i + 1 == len(alt_list) + if dir == "left": + if i == 0: + seq = alt + break + + alt = int(alt) + seq = ref_seq[seq_left-alt+1:seq_left+1] + seq + seq_left -= alt + else: + alt = int(alt) + seq += ref_seq[seq_right:seq_right+alt] + seq_right += alt + break + + var_type, var_pos, var_data = Vars[alt] + if dir == "left" and var_type == "deletion": + var_pos = var_pos + int(var_data) - 1 + + if i == 0: + if dir == "left": + seq_left, seq_right = var_pos, var_pos + else: + seq_left, seq_right = var_pos, var_pos + + if dir == "left": + assert seq_left >= var_pos + if i > 0: + seq = ref_seq[var_pos+1:seq_left+1] + seq + if var_type == "single": + seq = var_data + seq + seq_left = var_pos - 1 + elif var_type == "deletion": + seq_left = var_pos - int(var_data) + else: + assert var_type == "insertion" + seq = var_data + seq + else: + assert seq_right <= var_pos + if i > 0: + seq += ref_seq[seq_right:var_pos] + if var_type == "single": + seq += var_data + seq_right = var_pos + 1 + elif var_type == "deletion": + seq_right = var_pos + int(var_data) + else: + assert var_type == "insertion" + seq += var_data + + return seq, seq_left, seq_right + + for alt_list1, alt_list2 in Alts.items(): + if verbose >= 2: print >> sys.stderr, "\t", dir, ":", alt_list1, alt_list2 + out_str = "\t\t" + alt_list1 = alt_list1.split('-') + for i in range(len(alt_list1)): + alt = alt_list1[i] + var_type, var_pos, var_data = Vars[alt] + out_str += ("%s-%d-%s" % (var_type, var_pos, var_data)) + if i + 1 < len(alt_list1): + out_str += " " + + for i in range(len(alt_list2)): + alt_list3 = alt_list2[i] + out_str += "\t[" + for j in range(len(alt_list3)): + alt = alt_list3[j] + if alt.isdigit(): + out_str += alt + else: + var_type, var_pos, var_data = Vars[alt] + out_str += ("%s-%d-%s" % (var_type, var_pos, var_data)) + if j + 1 < len(alt_list3): + out_str += ", " + out_str += "]" + if verbose >= 2: print >> sys.stderr, out_str + + for i in range(len(alt_list2)): + alt_list3 = alt_list2[i] + seq1, seq1_left, seq1_right = get_seq_pos(alt_list1) + seq2, seq2_left, seq2_right = get_seq_pos(alt_list3) + if seq1.isdigit(): + assert not seq2.isdigit() + seq1_left, seq1_right = seq2_right - int(seq1), seq2_right + seq1 = ref_seq[seq1_left+1:seq1_right+1] + elif seq2.isdigit(): + seq2_left, seq2_right = seq1_right - int(seq2), seq1_right + seq2 = ref_seq[seq2_left+1:seq2_right+1] + + if dir == "left": + if seq1_right < seq2_right: + seq1 += ref_seq[seq1_right+1:seq2_right+1] + elif seq2_right < seq1_right: + seq2 += ref_seq[seq2_right+1:seq1_right+1] + else: + if seq1_left < seq2_left: + seq2 = ref_seq[seq1_left:seq2_left] + seq2 + elif seq2_left < seq1_left: + seq1 = ref_seq[seq2_left:seq1_left] + seq1 + seq1_len, seq2_len = len(seq1), len(seq2) + if seq1_len != seq2_len: + len_diff = abs(seq1_len - seq2_len) + if dir == "left": + if seq1_len < seq2_len: + seq1 = ref_seq[seq1_left-len_diff+1:seq1_left+1] + seq1 + else: + seq2 = ref_seq[seq2_left-len_diff+1:seq2_left+1] + seq2 + else: + if seq1_len < seq2_len: + seq1 += ref_seq[seq1_right:seq1_right+len_diff] + else: + seq2 += ref_seq[seq2_right:seq2_right+len_diff] + if verbose >= 3: + print >> sys.stderr, "\t\t", alt_list1, alt_list3 + print >> sys.stderr, "\t\t\t", seq1, seq1_left, seq1_right + print >> sys.stderr, "\t\t\t", seq2, seq2_left, seq2_right + assert seq1 == seq2 + + assert_print_alts(Alts_left, "left") + assert_print_alts(Alts_right, "right") + + return Alts_left, Alts_right + + +""" +Identify ambigious differences that may account for other alleles, + given a list of differences (cmp_list) between a read and a potential allele +""" +def identify_ambigious_diffs(Vars, + Alts_left, + Alts_right, + cmp_list, + verbose, + debug = False): + cmp_left, cmp_right = 0, len(cmp_list) - 1 + + i = 0 + while i < len(cmp_list): + cmp_i = cmp_list[i] + type, pos, length = cmp_i[:3] + # Check alternative alignments + if type in ["mismatch", "deletion"]: + var_id = cmp_i[3] + if var_id == "unknown": + i += 1 + continue + + # Left direction + id_str = var_id + total_del_len = length if type == "deletion" else 0 + for j in reversed(range(0, i)): + cmp_j = cmp_list[j] + j_type, j_pos, j_len = cmp_j[:3] + if j_type != "match": + if len(cmp_j) < 4: + continue + j_var_id = cmp_j[3] + id_str += ("-%s" % j_var_id) + if j_type == "deletion": + total_del_len += j_len + last_type, last_pos, last_len = cmp_list[0][:3] + assert last_type in ["match", "mismatch"] + left_pos = last_pos + total_del_len + if id_str in Alts_left: + orig_alts = id_str.split('-') + alts_list = Alts_left[id_str] + for alts in alts_list: + if alts[-1].isdigit(): + assert type == "deletion" + assert len(orig_alts) == 1 + alts_id_str = '-'.join(alts[:-1]) + alt_left_pos = pos + alt_total_del_len = 0 + for alt in alts[:-1]: + assert alt in Vars + alt_type, alt_pos, alt_data = Vars[alt] + alt_left_pos = alt_pos - 1 + if alt_type == "deletion": + alt_total_del_len += int(alt_data) + alt_left_pos = alt_left_pos + alt_total_del_len - int(alts[-1]) + 1 + else: + alts_id_str = '-'.join(alts) + assert alts_id_str in Alts_left + for back_alts in Alts_left[alts_id_str]: + back_id_str = '-'.join(back_alts) + if back_id_str.find(id_str) != 0: + continue + assert len(orig_alts) < len(back_alts) + assert back_alts[-1].isdigit() + alt_left_pos = pos + alt_total_del_len = 0 + for alt in back_alts[:len(orig_alts) + 1]: + if alt.isdigit(): + alt_left_pos = alt_left_pos - int(alt) + 1 + else: + assert alt in Vars + alt_type, alt_pos, alt_data = Vars[alt] + alt_left_pos = alt_pos - 1 + if alt_type == "deletion": + alt_total_del_len += int(alt_data) + alt_left_pos += alt_total_del_len + if left_pos >= alt_left_pos: + if verbose >= 2: + print "LEFT:", cmp_list + print "\t", type, "id_str:", id_str, "=>", alts_id_str, "=>", back_alts, "left_pos:", left_pos, "alt_left_pos:", alt_left_pos + cmp_left = i + 1 + break + + # DK - debugging purposes + if debug: + print "DK: var_id:", var_id + print "DK: cmp_list:", cmp_list + print "DK: cmp_right:", cmp_right + # sys.exit(1) + + # Right direction + if cmp_right + 1 == len(cmp_list): + id_str = var_id + total_del_len = length if type == "deletion" else 0 + for j in range(i + 1, len(cmp_list)): + cmp_j = cmp_list[j] + j_type, j_pos, j_len = cmp_j[:3] + if j_type != "match": + if len(cmp_j) < 4: + continue + j_var_id = cmp_j[3] + id_str += ("-%s" % j_var_id) + if j_type == "deletion": + total_del_len += j_len + last_type, last_pos, last_len = cmp_list[-1][:3] + assert last_type in ["match", "mismatch"] + right_pos = last_pos + last_len - 1 - total_del_len + + # DK - debugging purposes + if debug: + print "DK: id_str:", id_str + + if id_str in Alts_right: + orig_alts = id_str.split('-') + alts_list = Alts_right[id_str] + for alts in alts_list: + if alts[-1].isdigit(): + assert type == "deletion" + assert len(orig_alts) == 1 + alts_id_str = '-'.join(alts[:-1]) + alt_right_pos = pos + alt_total_del_len = 0 + for alt in alts[:-1]: + assert alt in Vars + alt_type, alt_pos, alt_data = Vars[alt] + alt_right_pos = alt_pos + if alt_type == "single": + alt_right_pos += 1 + else: + assert alt_type == "deletion" + alt_del_len = int(alt_data) + alt_right_pos += alt_del_len + alt_total_del_len += alt_del_len + alt_right_pos = alt_right_pos - alt_total_del_len + int(alts[-1]) - 1 + else: + alts_id_str = '-'.join(alts) + assert alts_id_str in Alts_right + for back_alts in Alts_right[alts_id_str]: + back_id_str = '-'.join(back_alts) + if back_id_str.find(id_str) != 0: + continue + assert len(orig_alts) < len(back_alts) + assert back_alts[-1].isdigit() + alt_right_pos = pos + alt_total_del_len = 0 + for alt in back_alts[:len(orig_alts) + 1]: + if alt.isdigit(): + alt_right_pos = alt_right_pos + int(alt) - 1 + else: + assert alt in Vars + alt_type, alt_pos, alt_data = Vars[alt] + alt_right_pos = alt_pos + if alt_type == "single": + alt_right_pos += 1 + else: + assert alt_type == "deletion" + alt_del_len = int(alt_data) + alt_right_pos += alt_del_len + alt_total_del_len += alt_del_len + alt_right_pos -= alt_total_del_len + + if right_pos <= alt_right_pos: + if verbose >= 2: + print "RIGHT:", cmp_list + print "\t", type, "id_str:", id_str, "=>", alts_id_str, "right_pos:", right_pos, "alt_right_pos:", alt_right_pos + cmp_right = i - 1 + break + i += 1 + + return cmp_left, cmp_right + + +""" +Example, + gene_name, allele_name (input): A, A*32:01:01 + allele (output): single-136-G-hv47,deletion-285-1-hv57, ... ,single-3473-T-hv1756,deletion-3495-1-hv1763,single-3613-C-hv1799 +""" +def get_allele(gene_name, allele_name, Vars, Var_list, Links): + allele_haplotype = [] + for _var_pos, _var_id in Var_list[gene_name]: + if allele_name in Links[_var_id]: + _var = Vars[gene_name][_var_id] + allele_haplotype.append("%s-%d-%s-%s" % (_var[0], _var[1], _var[2], _var_id)) + allele_haplotype = ','.join(allele_haplotype) + return allele_haplotype + + +""" +HISAT-genotype's mpileup +""" +def get_mpileup(alignview_cmd, + ref_seq, + base_locus, + vars, + allow_discordant): + ref_seq_len = len(ref_seq) + mpileup = [] + for i in range(ref_seq_len): + mpileup.append([[], {}]) + + proc = subprocess.Popen(alignview_cmd, + stdout=subprocess.PIPE, + stderr=open("/dev/null", 'w')) + + prev_pos = -1 + cigar_re = re.compile('\d+\w') + for line in proc.stdout: + line = line.strip() + cols = line.split() + read_id, flag, _, pos, _, cigar_str = cols[:6] + read_seq = cols[9] + flag, pos = int(flag), int(pos) + # Unalined? + if flag & 0x4 != 0: + continue + pos -= (base_locus + 1) + if pos < 0: + continue + + # Concordantly mapped? + if flag & 0x2 != 0: + concordant = True + else: + concordant = False + + if not allow_discordant and not concordant: + continue + + read_pos, left_pos = 0, pos + right_pos = left_pos + cigars = cigar_re.findall(cigar_str) + cigars = [[cigar[-1], int(cigar[:-1])] for cigar in cigars] + for i in range(len(cigars)): + cigar_op, length = cigars[i] + if cigar_op in "MD": + for j in range(length): + if cigar_op == 'M': + read_nt = read_seq[read_pos + j] + else: + read_nt = 'D' + if read_nt not in mpileup[right_pos + j][1]: + mpileup[right_pos + j][1][read_nt] = 1 + else: + mpileup[right_pos + j][1][read_nt] += 1 + + if cigar_op in "MND": + right_pos += length + + if cigar_op in "MIS": + read_pos += length + + # Choose representative bases or 'D' + for i in range(len(mpileup)): + nt_dic = mpileup[i][1] + num_nt = sum(nt_dic.values()) + nt_set = [] + if num_nt >= 20: + for nt, count in nt_dic.items(): + if nt not in "ACGT": + continue + if count >= num_nt * 0.2 or count >= 7: + nt_set.append(nt) + mpileup[i][0] = nt_set + + # Sort variants + var_list = [[] for i in range(len(mpileup))] + for var_id, value in vars.items(): + var_type, var_pos, var_data = value + assert var_pos < len(var_list) + var_list[var_pos].append([var_id, var_type, var_data]) + + # Assign known or unknown variants + skip_i, prev_del_var_id = -1, "" + for i in range(len(mpileup)): + nt_dic = mpileup[i][1] + ref_nt = ref_seq[i] + new_nt_dic = {} + for nt, count in nt_dic.items(): + var_id = "" + if nt == 'D': + if i <= skip_i: + assert prev_del_var_id != "" + var_id = prev_del_var_id + else: + for var_id_, var_type, var_data in var_list[i]: + if var_type != "deletion": + continue + del_len = int(var_data) + del_exist = True + for j in range(i + 1, i + del_len): + assert j < len(mpileup) + nt_dic2 = mpileup[j][1] + if 'D' not in nt_dic2: + del_exist = False + break + if del_exist: + var_id = var_id_ + prev_del_var_id = var_id + skip_i = i + del_len - 1 + break + elif nt != 'N' and nt != ref_nt: + assert nt in "ACGT" + id = "unknown" + for var_id_, var_type, var_data in var_list[i]: + if var_type != "single": + continue + if nt == var_data: + var_id = var_id_ + break + new_nt_dic[nt] = [count, var_id] + + mpileup[i][1] = new_nt_dic + + return mpileup + + +""" +""" +def error_correct(ref_seq, + read_seq, + read_pos, + mpileup, + Vars, + Var_list, + cmp_list, + debug = False): + if debug: + print cmp_list + print read_seq + + i = 0 + while i < len(cmp_list): + type, left, length = cmp_list[i][:3] + assert length > 0 + if type == "match": + middle_cmp_list = [] + last_j = 0 + for j in range(length): + read_bp, ref_bp = read_seq[read_pos + j], ref_seq[left + j] + assert left + j < len(mpileup) + nt_set = mpileup[left + j][0] + if len(nt_set) > 0 and read_bp not in nt_set: + read_bp = 'N' if len(nt_set) > 1 else nt_set[0] + read_seq = read_seq[:read_pos + j] + read_bp + read_seq[read_pos + j + 1:] + assert read_bp != ref_bp + new_cmp = ["mismatch", left + j, 1, "unknown"] + if read_bp != 'N': + var_idx = lower_bound(Var_list, left + j) + while var_idx < len(Var_list): + var_pos, var_id = Var_list[var_idx] + if var_pos > left + j: + break + if var_pos == left + j: + var_type, _, var_data = Vars[var_id] + if var_type == "single" and read_bp == var_data: + new_cmp[3] = var_id + break + var_idx += 1 + if j > last_j: + middle_cmp_list.append(["match", left + last_j, j- last_j]) + middle_cmp_list.append(new_cmp) + last_j = j + 1 + if last_j < length: + middle_cmp_list.append(["match", left + last_j, length - last_j]) + + assert len(middle_cmp_list) > 0 + cmp_list = cmp_list[:i] + middle_cmp_list + cmp_list[i+1:] + i += (len(middle_cmp_list) - 1) + else: + assert type == "mismatch" + read_bp, ref_bp = read_seq[read_pos], ref_seq[left] + assert left < len(mpileup) + nt_set = mpileup[left][0] + + if debug: + print left, read_bp, ref_bp, mpileup[left] + + if len(nt_set) > 0 and read_bp not in nt_set: + read_bp = 'N' if len(nt_set) > 1 else nt_set[0] + read_seq = read_seq[:read_pos] + read_bp + read_seq[read_pos+1:] + if read_bp == 'N': + cmp_list[i][3] = "unknown" + elif read_bp == ref_bp: + cmp_list[i] = ["match", left, 1] + else: + cmp_list[i][3] = "unknown" + var_idx = lower_bound(Var_list, left) + while var_idx < len(Var_list): + var_pos, var_id = Var_list[var_idx] + if var_pos > left: + break + if var_pos == left: + var_type, _, var_data = Vars[var_id] + if var_type == "single" and read_bp == var_data: + cmp_list[i][3] = var_id + break + var_idx += 1 + + if debug: + print left, read_bp, ref_bp, mpileup[left] + print cmp_list[i] + + read_pos += length + i += 1 + + # Combine matches + i = 0 + while i < len(cmp_list): + type, left, length = cmp_list[i][:3] + if type == "match" and i + 1 < len(cmp_list): + type2, left2, length2 = cmp_list[i+1][:3] + if type2 == "match": + cmp_list[i] = [type, left, length + length2] + cmp_list = cmp_list[:i+1] + cmp_list[i+2:] + continue + i += 1 + + if debug: + print cmp_list + print read_seq + + return cmp_list, read_seq + + +""" +Use Stranded-seq reads to resolve assembly ambiguity +""" +def stranded_seq_alignment(genome_name, + sra_run_info, + ex_path, + ref_allele): + read_dir = sra_run_info.split('/')[:-1] + read_dir = '/'.join(read_dir) + runs = [] + for line in open(sra_run_info): + line = line.strip() + fields = line.split('\t') + genome_name_, run = fields[4], fields[0] + if genome_name == genome_name_: + runs.append(run) + + run_alignments = [] + for run in runs: + read_fname, out_fname = "%s/%s.extracted.fq.gz" % (read_dir, run), "%s/%s.bam" % (read_dir, run) + align_reads(ex_path, + "hisat2", + False, # simulation? + "hla", + "graph", + [read_fname], + True, # fastq? + 1, # number of threads + out_fname, + False) # verbose? + + # Read alignments + alignview_cmd = ["samtools", + "view", + out_fname] + base_locus = 0 + alignview_cmd += [ref_allele] + alignview_proc = subprocess.Popen(alignview_cmd, + stdout=subprocess.PIPE, + stderr=open("/dev/null", 'w')) + + plus, minus = [], [] + # Cigar regular expression + cigar_re = re.compile('\d+\w') + pos_added = set() + for line in alignview_proc.stdout: + line = line.strip() + fields = line.split('\t') + read_id, flag, ref, pos, _, cigar_str = fields[:6] + flag = int(flag) + assert run == read_id.split('.')[0] + + if flag & 0x4 != 0: + continue + + Zs = "" + for i in range(11, len(fields)): + field = fields[i] + if field.startswith("Zs"): + Zs = field[5:] + + vars = [] + if Zs != "": + for var in Zs.split(','): + _, _, var = var.split('|') + vars.append(var) + + left_pos = int(pos) - 1 + if left_pos in pos_added: + continue + pos_added.add(left_pos) + right_pos = left_pos + cigars = cigar_re.findall(cigar_str) + cigars = [[cigar[-1], int(cigar[:-1])] for cigar in cigars] + for i in range(len(cigars)): + cigar_op, length = cigars[i] + if cigar_op in "MND": + right_pos += length + + entry = [left_pos, right_pos, vars] + if flag & 0x10 == 0: + plus.append(entry) + else: + minus.append(entry) + + if len(plus) > 0 and len(minus) > 0: + if len(plus) > 1 or len(minus) > 1: + run_alignments.append([run, plus, minus]) + + return run_alignments + + +""" +""" +def typing(ex_path, + simulation, + base_fname, + locus_list, + partial, + partial_alleles, + refGenes, + Genes, + Gene_names, + Gene_lengths, + refGene_loci, + Vars, + Var_list, + Links, + aligners, + num_editdist, + assembly, + output_base, + error_correction, + allow_discordant, + display_alleles, + stranded_seq, + fastq, + read_fname, + alignment_fname, + num_frag_list, + threads, + best_alleles, + verbose): + if simulation: + test_passed = {} + report_file = open(output_base + ".report", 'w') + for aligner, index_type in aligners: + for f_ in [sys.stderr, report_file]: + if index_type == "graph": + print >> f_, "\n\t\t%s %s" % (aligner, index_type) + else: + print >> f_, "\n\t\t%s %s" % (aligner, index_type) + + remove_alignment_file = False + if alignment_fname == "": + # Align reads, and sort the alignments into a BAM file + remove_alignment_file = True + if simulation: + alignment_fname = "%s_output.bam" % base_fname + else: + alignment_fname = read_fname[0].split('/')[-1] + alignment_fname = alignment_fname.split('.')[0] + ".bam" + + align_reads(ex_path, + aligner, + simulation, + base_fname, + index_type, + read_fname, + fastq, + threads, + alignment_fname, + verbose) + + for test_Gene_names in locus_list: + if simulation: + gene = test_Gene_names[0].split('*')[0] + else: + gene = test_Gene_names + ref_allele = refGenes[gene] + ref_seq = Genes[gene][ref_allele] + ref_exons = refGene_loci[gene][-1] + + novel_var_count = 0 + gene_vars, gene_var_list = deepcopy(Vars[gene]), deepcopy(Var_list[gene]) + var_count = {} + def add_novel_var(gene_vars, + gene_var_list, + novel_var_count, + var_type, + var_pos, + var_data): + var_idx = lower_bound(gene_var_list, var_pos) + while var_idx < len(gene_var_list): + pos_, id_ = gene_var_list[var_idx] + if pos_ > var_pos: + break + if pos_ == var_pos: + type_, _, data_ = gene_vars[id_] + assert type_ != var_type or data_ != var_data + if type_ != var_type: + if var_type == "insertion": + break + elif var_type == "single" and type_ == "deletion": + break + else: + if var_data < data_: + break + var_idx += 1 + var_id = "nv%d" % novel_var_count + assert var_id not in gene_vars + gene_vars[var_id] = [var_type, var_pos, var_data] + gene_var_list.insert(var_idx, [var_pos, var_id]) + return var_id, novel_var_count + 1 + + if not os.path.exists(alignment_fname + ".bai"): + os.system("samtools index %s" % alignment_fname) + # Read alignments + alignview_cmd = ["samtools", + "view", + alignment_fname] + base_locus = 0 + if index_type == "graph": + alignview_cmd += [ref_allele] + mpileup = get_mpileup(alignview_cmd, + ref_seq, + base_locus, + gene_vars, + allow_discordant) + + bamview_proc = subprocess.Popen(alignview_cmd, + stdout=subprocess.PIPE, + stderr=open("/dev/null", 'w')) + + sort_read_cmd = ["sort", "-k", "1,1", "-s"] # -s for stable sorting + alignview_proc = subprocess.Popen(sort_read_cmd, + stdin=bamview_proc.stdout, + stdout=subprocess.PIPE, + stderr=open("/dev/null", 'w')) + else: + alignview_proc = subprocess.Popen(alignview_cmd, + stdout=subprocess.PIPE, + stderr=open("/dev/null", 'w')) + + # List of nodes that represent alleles + allele_vars = {} + for var_id, allele_list in Links.items(): + for allele_id in allele_list: + if allele_id not in Genes[gene]: + continue + if allele_id not in allele_vars: + allele_vars[allele_id] = [var_id] + else: + allele_vars[allele_id].append(var_id) + + # Extract variants that are within exons + exon_vars = get_exonic_vars(gene_vars, ref_exons) + + # Store nodes that represent alleles + allele_nodes = {} + def create_allele_node(allele_name): + if allele_name in allele_nodes: + return allele_nodes[allele_name] + if allele_name in allele_vars: + var_ids = allele_vars[allele_name] + else: + var_ids = [] + seq = list(ref_seq) # sequence that node represents + var = ["" for i in range(len(ref_seq))] # how sequence is related to backbone + for var_id in var_ids: + assert var_id in gene_vars + var_type, var_pos, var_data = gene_vars[var_id] + assert var_pos >= 0 and var_pos < len(ref_seq) + if var_type == "single": + seq[var_pos] = var_data + var[var_pos] = var_id + elif var_type == "deletion": + del_len = int(var_data) + assert var_pos + del_len <= len(ref_seq) + seq[var_pos:var_pos + del_len] = ['D'] * del_len + var[var_pos:var_pos + del_len] = [var_id] * del_len + else: + # DK - to be implemented for insertions + assert var_type == "insertion" + None + + qual = ' ' * len(seq) + allele_node = assembly_graph.Node(allele_name, + 0, + seq, + qual, + var, + ref_seq, + gene_vars, + mpileup, + simulation) + allele_nodes[allele_name] = allele_node + return allele_node + + true_allele_nodes = {} + if simulation: + for allele_name in test_Gene_names: + true_allele_nodes[allele_name] = create_allele_node(allele_name) + + display_allele_nodes = {} + for display_allele in display_alleles: + display_allele_nodes[display_allele] = create_allele_node(display_allele) + + # Assembly graph + asm_graph = assembly_graph.Graph(ref_seq, + gene_vars, + ref_exons, + partial_alleles, + true_allele_nodes, + {}, # predicted_allele_nodes, which is empty for now + display_allele_nodes, + simulation) + + # Choose allele representives from those that share the same exonic sequences + allele_reps, allele_rep_groups = get_rep_alleles(Links, exon_vars) + allele_rep_set = set(allele_reps.values()) + + # For checking alternative alignments near the ends of alignments + Alts_left, Alts_right = get_alternatives(ref_seq, gene_vars, gene_var_list, verbose) + + # Count alleles + Gene_counts, Gene_cmpt = {}, {} + Gene_gen_counts, Gene_gen_cmpt = {}, {} + num_reads, total_read_len = 0, 0 + + # For debugging purposes + debug_allele_names = set(test_Gene_names) if simulation and verbose >= 2 else set() + + # Read information + prev_read_id = None + prev_right_pos = 0 + prev_lines = [] + left_read_ids, right_read_ids = set(), set() + if index_type == "graph": + # nodes for reads + read_nodes = [] + read_vars_list = [] + + # Cigar regular expression + cigar_re = re.compile('\d+\w') + for line in alignview_proc.stdout: + line = line.strip() + cols = line.split() + read_id, flag, chr, pos, mapQ, cigar_str = cols[:6] + node_read_id = orig_read_id = read_id + if simulation: + read_id = read_id.split('|')[0] + read_seq, read_qual = cols[9], cols[10] + total_read_len += len(read_seq) + flag, pos = int(flag), int(pos) + pos -= (base_locus + 1) + if pos < 0: + continue + + # Unalined? + if flag & 0x4 != 0: + if simulation and verbose >= 2: + print "Unaligned" + print "\t", line + continue + + # Concordantly mapped? + if flag & 0x2 != 0: + concordant = True + else: + concordant = False + + NM, Zs, MD, NH = "", "", "", "" + for i in range(11, len(cols)): + col = cols[i] + if col.startswith("Zs"): + Zs = col[5:] + elif col.startswith("MD"): + MD = col[5:] + elif col.startswith("NM"): + NM = int(col[5:]) + elif col.startswith("NH"): + NH = int(col[5:]) + + if NM > num_editdist: + continue + + # Only consider unique alignment + if NH > 1: + continue + + # Concordantly aligned mate pairs + if not allow_discordant and not concordant: + continue + + # Left read? + if flag & 0x40 != 0: + if read_id in left_read_ids: + continue + left_read_ids.add(read_id) + if not simulation: + node_read_id += '|L' + else: # Right read? + assert flag & 0x80 != 0 + if read_id in right_read_ids: + continue + right_read_ids.add(read_id) + if not simulation: + node_read_id += '|R' + + if Zs: + Zs = Zs.split(',') + + assert MD != "" + MD_str_pos, MD_len = 0, 0 + Zs_pos, Zs_i = 0, 0 + for _i in range(len(Zs)): + Zs[_i] = Zs[_i].split('|') + Zs[_i][0] = int(Zs[_i][0]) + if Zs_i < len(Zs): + Zs_pos += Zs[Zs_i][0] + read_pos, left_pos = 0, pos + right_pos = left_pos + cigars = cigar_re.findall(cigar_str) + cigars = [[cigar[-1], int(cigar[:-1])] for cigar in cigars] + cmp_list = [] + + likely_misalignment = False + + # Extract variants w.r.t backbone from CIGAR string + softclip = [0, 0] + for i in range(len(cigars)): + cigar_op, length = cigars[i] + if cigar_op == 'M': + first = True + MD_len_used = 0 + cmp_list_i = len(cmp_list) + while True: + if not first or MD_len == 0: + if MD[MD_str_pos].isdigit(): + num = int(MD[MD_str_pos]) + MD_str_pos += 1 + while MD_str_pos < len(MD): + if MD[MD_str_pos].isdigit(): + num = num * 10 + int(MD[MD_str_pos]) + MD_str_pos += 1 + else: + break + MD_len += num + # Insertion or full match followed + if MD_len >= length: + MD_len -= length + if length > MD_len_used: + cmp_list.append(["match", right_pos + MD_len_used, length - MD_len_used]) + break + first = False + read_base = read_seq[read_pos + MD_len] + MD_ref_base = MD[MD_str_pos] + MD_str_pos += 1 + assert MD_ref_base in "ACGT" + if MD_len > MD_len_used: + cmp_list.append(["match", right_pos + MD_len_used, MD_len - MD_len_used]) + + _var_id = "unknown" + if read_pos + MD_len == Zs_pos and Zs_i < len(Zs): + assert Zs[Zs_i][1] == 'S' + _var_id = Zs[Zs_i][2] + Zs_i += 1 + Zs_pos += 1 + if Zs_i < len(Zs): + Zs_pos += Zs[Zs_i][0] + else: + # Search for a known (yet not indexed) variant or a novel variant + ref_pos = right_pos + MD_len + var_idx = lower_bound(gene_var_list, ref_pos) + while var_idx < len(gene_var_list): + var_pos, var_id = gene_var_list[var_idx] + if var_pos > ref_pos: + break + if var_pos == ref_pos: + var_type, _, var_data = gene_vars[var_id] + if var_type == "single" and var_data == read_base: + _var_id = var_id + break + var_idx += 1 + + cmp_list.append(["mismatch", right_pos + MD_len, 1, _var_id]) + MD_len_used = MD_len + 1 + MD_len += 1 + # Full match + if MD_len == length: + MD_len = 0 + break + + # Correction for sequencing errors and update for cmp_list + if error_correction: + assert cmp_list_i < len(cmp_list) + new_cmp_list, read_seq = error_correct(ref_seq, + read_seq, + read_pos, + mpileup, + gene_vars, + gene_var_list, + cmp_list[cmp_list_i:], + node_read_id == "#HSQ1008:176:D0UYCACXX:4:1304:19006:96208|R") + cmp_list = cmp_list[:cmp_list_i] + new_cmp_list + + elif cigar_op == 'I': + _var_id = "unknown" + if read_pos == Zs_pos and Zs_i < len(Zs): + assert Zs[Zs_i][1] == 'I' + _var_id = Zs[Zs_i][2] + Zs_i += 1 + if Zs_i < len(Zs): + Zs_pos += Zs[Zs_i][0] + else: + # Search for a known (yet not indexed) variant or a novel variant + var_idx = lower_bound(gene_var_list, right_pos) + while var_idx < len(gene_var_list): + var_pos, var_id = gene_var_list[var_idx] + if var_pos > right_pos: + break + if var_pos == right_pos: + var_type, _, var_data = gene_vars[var_id] + if var_type == "insertion" and len(var_data) == length: + _var_id = var_id + break + var_idx += 1 + cmp_list.append(["insertion", right_pos, length, _var_id]) + if 'N' in read_seq[read_pos:read_pos+length]: + likely_misalignment = True + + elif cigar_op == 'D': + if MD[MD_str_pos] == '0': + MD_str_pos += 1 + assert MD[MD_str_pos] == '^' + MD_str_pos += 1 + while MD_str_pos < len(MD): + if not MD[MD_str_pos] in "ACGT": + break + MD_str_pos += 1 + _var_id = "unknown" + if read_pos == Zs_pos and \ + Zs_i < len(Zs) and \ + Zs[Zs_i][1] == 'D': + _var_id = Zs[Zs_i][2] + Zs_i += 1 + if Zs_i < len(Zs): + Zs_pos += Zs[Zs_i][0] + else: + # Search for a known (yet not indexed) variant or a novel variant + var_idx = lower_bound(gene_var_list, right_pos) + while var_idx < len(gene_var_list): + var_pos, var_id = gene_var_list[var_idx] + if var_pos > right_pos: + break + if var_pos == right_pos: + var_type, _, var_data = gene_vars[var_id] + if var_type == "deletion" and int(var_data) == length: + _var_id = var_id + break + var_idx += 1 + + cmp_list.append(["deletion", right_pos, length, _var_id]) + + # Check if this deletion is artificial alignment + assert right_pos < mpileup + del_count, nt_count = 0, 0 + for nt, value in mpileup[right_pos][1].items(): + count = value[0] + if nt == 'D': + del_count += count + else: + nt_count += count + # DK - debugging purposes + if del_count * 6 < nt_count: # and nt_count >= 15: + likely_misalignment = True + + elif cigar_op == 'S': + if i == 0: + softclip[0] = length + Zs_pos += length + else: + assert i + 1 == len(cigars) + softclip[1] = length + else: + assert cigar_op == 'N' + assert False + cmp_list.append(["intron", right_pos, length]) + + if cigar_op in "MND": + right_pos += length + + if cigar_op in "MIS": + read_pos += length + + # Remove softclip in cigar and modify read_seq and read_qual accordingly + if sum(softclip) > 0: + if softclip[0] > 0: + cigars = cigars[1:] + read_seq = read_seq[softclip[0]:] + read_qual = read_qual[softclip[0]:] + if softclip[1] > 0: + cigars = cigars[:-1] + read_seq = read_seq[:-softclip[1]] + read_qual = read_qual[:-softclip[1]] + + cigar_str = "" + for type, length in cigars: + cigar_str += str(length) + cigar_str += type + + if right_pos > len(ref_seq): + continue + + if likely_misalignment: + continue + + # Add novel variants + read_pos = 0 + for cmp_i in range(len(cmp_list)): + type_, pos_, length_ = cmp_list[cmp_i][:3] + if type_ != "match": + var_id_ = cmp_list[cmp_i][3] + if var_id_ == "unknown": + add = True + if type_ == "mismatch": + data_ = read_seq[read_pos] + if data_ == 'N': + add = False + elif type_ == "deletion": + data_ = str(length_) + else: + assert type_ == "insertion" + data_ = read_seq[read_pos:read_pos + length_] + if add: + var_id, novel_var_count = add_novel_var(gene_vars, + gene_var_list, + novel_var_count, + type_ if type_ != "mismatch" else "single", + pos_, + data_) + cmp_list[cmp_i][3] = var_id + if var_id not in var_count: + var_count[var_id] = 1 + else: + var_count[var_id] += 1 + + if type_ != "deletion": + read_pos += length_ + + # Count the number of reads aligned uniquely with some constraints + num_reads += 1 + + def add_stat(Gene_cmpt, Gene_counts, Gene_count_per_read, include_alleles = set()): + max_count = max(Gene_count_per_read.values()) + cur_cmpt = set() + for allele, count in Gene_count_per_read.items(): + if count < max_count: + continue + + if len(include_alleles) > 0 and allele not in include_alleles: + continue + + cur_cmpt.add(allele) + if allele not in Gene_counts: + Gene_counts[allele] = 1 + else: + Gene_counts[allele] += 1 + + if len(cur_cmpt) == 0: + return "" + + # DK - for debugging purposes + alleles = ["", ""] + # alleles = ["A*24:36N", "A*24:359N"] + allele1_found, allele2_found = False, False + if alleles[0] != "": + for allele, count in Gene_count_per_read.items(): + if count < max_count: + continue + if allele == alleles[0]: + allele1_found = True + elif allele == alleles[1]: + allele2_found = True + if allele1_found != allele2_found: + print alleles[0], Gene_count_per_read[alleles[0]] + print alleles[1], Gene_count_per_read[alleles[1]] + if allele1_found: + print ("%s\tread_id %s - %d vs. %d]" % (alleles[0], prev_read_id, max_count, Gene_count_per_read[alleles[1]])) + else: + print ("%s\tread_id %s - %d vs. %d]" % (alleles[1], prev_read_id, max_count, Gene_count_per_read[alleles[0]])) + print read_seq + + cur_cmpt = sorted(list(cur_cmpt)) + cur_cmpt = '-'.join(cur_cmpt) + if not cur_cmpt in Gene_cmpt: + Gene_cmpt[cur_cmpt] = 1 + else: + Gene_cmpt[cur_cmpt] += 1 + + return cur_cmpt + + if read_id != prev_read_id: + if prev_read_id != None: + + # DK - debugging purpose + debug_allele_id = "A*24:355" + assert debug_allele_id in Gene_count_per_read + debug_max_read_count = max(Gene_count_per_read.values()) + debug_read_count = Gene_count_per_read[debug_allele_id] + if debug_read_count == debug_max_read_count and \ + Gene_count_per_read["A*24:02:01:02L"] < debug_max_read_count and \ + Gene_count_per_read["A*01:01:01:01"] < debug_max_read_count: + print prev_read_id + None + + if prev_read_id == "HSQ1008:175:C0JVFACXX:7:1208:5604:41201": + None + """ + for line in prev_lines: + print line + print Gene_count_per_read[debug_allele_id], max(Gene_count_per_read.values()) + print Gene_gen_count_per_read[debug_allele_id], max(Gene_gen_count_per_read.values()) + + for allele_id, count in Gene_count_per_read.items(): + if count == debug_max_read_count: + print "allele max:", allele_id, count + """ + + if base_fname == "hla": + cur_cmpt = add_stat(Gene_cmpt, Gene_counts, Gene_count_per_read, allele_rep_set) + add_stat(Gene_gen_cmpt, Gene_gen_counts, Gene_gen_count_per_read) + for read_id_, read_node in read_nodes: + asm_graph.add_node(read_id_, + read_node, + simulation) + read_nodes, read_var_list = [], [] + if simulation and \ + verbose >= 2 and \ + base_fname == "hla": + cur_cmpt = cur_cmpt.split('-') + if not(set(cur_cmpt) & set(test_Gene_names)): + print "%s are chosen instead of %s" % ('-'.join(cur_cmpt), '-'.join(test_Gene_names)) + for prev_line in prev_lines: + print "\t", prev_line + + prev_lines = [] + + Gene_count_per_read, Gene_gen_count_per_read = {}, {} + for Gene_name in Gene_names[gene]: + if Gene_name.find("BACKBONE") != -1: + continue + Gene_count_per_read[Gene_name] = 0 + Gene_gen_count_per_read[Gene_name] = 0 + + prev_lines.append(line) + + def add_count(count_per_read, var_id, add): + alleles = Links[var_id] + if verbose >= 2: + if add > 0 and not (set(alleles) & debug_allele_names): + print "Add:", add, debug_allele_names, "-", var_id + print "\t", line + print "\t", alleles + if add < 0 and set(alleles) & debug_allele_names: + print "Add:", add, debug_allele_names, "-", var_id + print "\t", line + + for allele in alleles: + count_per_read[allele] += add + + # Decide which allele(s) a read most likely came from + for var_id, data in gene_vars.items(): + if var_id == "unknown" or var_id.startswith("nv"): + continue + var_type, var_pos, var_data = data + if var_type != "deletion": + continue + if left_pos >= var_pos and right_pos <= var_pos + int(var_data): + if var_id in exon_vars: + add_count(Gene_count_per_read, var_id, -1) + add_count(Gene_gen_count_per_read, var_id, -1) + + # Node + read_node_pos, read_node_seq, read_node_qual, read_node_var = -1, [], [], [] + read_vars = [] + + # Positive and negative evidence + positive_vars, negative_vars = set(), set() + + # Sanity check - read length, cigar string, and MD string + ref_pos, read_pos, cmp_cigar_str, cmp_MD = left_pos, 0, "", "" + cigar_match_len, MD_match_len = 0, 0 + + cmp_list_left, cmp_list_right = identify_ambigious_diffs(gene_vars, + Alts_left, + Alts_right, + cmp_list, + verbose, + orig_read_id == "a45|L_441_89M8D11M_89|D|hv1,7|S|hv15") # debug? + + # DK - debugging purposes + DK_debug = False + if orig_read_id == "a46|L_451_88M12D12M_88|D|hv2": + DK_debug = True + print line + print cmp_list + print "positive vars:", positive_vars + print "negative vars:", negative_vars + print "cmp_list[%d, %d]" % (cmp_list_left, cmp_list_right) + + # Deletions at 5' and 3' ends + for var_id, data in gene_vars.items(): + var_type, var_pos, var_data = data + if var_type != "deletion": + continue + if left_pos >= var_pos and right_pos <= var_pos + int(var_data): + negative_vars.add(var_id) + + cmp_i = 0 + while cmp_i < len(cmp_list): + cmp = cmp_list[cmp_i] + type, length = cmp[0], cmp[2] + # Disable the following sanity check due to error correction + # if num_editdist == 0 and type in ["mismatch", "deletion", "insertion"]: + # assert cmp[3] != "unknown" + + if type in ["match", "mismatch"]: + if read_node_pos < 0: + read_node_pos = ref_pos + + if type == "match": + read_node_seq += list(read_seq[read_pos:read_pos+length]) + read_node_qual += list(read_qual[read_pos:read_pos+length]) + read_node_var += ([''] * length) + + var_idx = lower_bound(gene_var_list, ref_pos) + while var_idx < len(gene_var_list): + var_pos, var_id = gene_var_list[var_idx] + if ref_pos + length <= var_pos: + break + if ref_pos <= var_pos: + var_type, _, var_data = gene_vars[var_id] + if var_type == "insertion": + if ref_pos < var_pos and ref_pos + length > var_pos + len(var_data): + negative_vars.add(var_id) + elif var_type == "deletion": + del_len = int(var_data) + if ref_pos < var_pos and ref_pos + length > var_pos + del_len: + if base_fname == "codis": + cmp_left, cmp_right = left_pos, right_pos + else: + cmp_left, cmp_right = cmp[1], cmp[1] + cmp[2] + + # Check if this might be one of the two tandem repeats (the same left coordinate) + test1_seq1 = ref_seq[cmp_left:cmp_right] + test1_seq2 = ref_seq[cmp_left:var_pos] + ref_seq[var_pos + del_len:cmp_right + del_len] + # Check if this happens due to small repeats (the same right coordinate - e.g. 19 times of TTTC in DQA1*05:05:01:02) + cmp_left -= read_pos + cmp_right += (len(read_seq) - read_pos - cmp[2]) + test2_seq1 = ref_seq[cmp_left+int(var_data):cmp_right] + test2_seq2 = ref_seq[cmp_left:var_pos] + ref_seq[var_pos+int(var_data):cmp_right] + + if test1_seq1 != test1_seq2 and test2_seq1 != test2_seq2: + negative_vars.add(var_id) + else: + negative_vars.add(var_id) + var_idx += 1 + read_pos += length + ref_pos += length + cigar_match_len += length + MD_match_len += length + elif type == "mismatch": + var_id = cmp[3] + read_base, qual = read_seq[read_pos], read_qual[read_pos] + read_node_seq += [read_base] + read_node_qual += [qual] + read_node_var.append(var_id) + if var_id != "unknown": + if cmp_i >= cmp_list_left and cmp_i <= cmp_list_right: + positive_vars.add(var_id) + + if read_id == "HSQ1008:175:C0JVFACXX:6:2207:13481:60924" and False: + print "add positive var:", var_id + print "\tcmp_list:", cmp_list_left, cmp_list_right, cmp_list + + + cmp_MD += ("%d%s" % (MD_match_len, ref_seq[ref_pos])) + MD_match_len = 0 + cigar_match_len += 1 + read_pos += 1 + ref_pos += 1 + elif type == "insertion": + var_id = cmp[3] + ins_len = length + ins_seq = read_seq[read_pos:read_pos+ins_len] + if var_id != "unknown" or not var_id.startswith("nv"): + if cmp_i >= cmp_list_left and cmp_i <= cmp_list_right: + # Require at least 5bp match before and after a deletion + if read_pos >= 5 and read_pos + 5 <= len(read_seq): + positive_vars.add(var_id) + read_node_seq += ["I%s" % nt for nt in ins_seq] + read_node_qual += list(read_qual[read_pos:read_pos+ins_len]) + read_node_var += ([var_id] * ins_len) + if cigar_match_len > 0: + cmp_cigar_str += ("%dM" % cigar_match_len) + cigar_match_len = 0 + read_pos += length + cmp_cigar_str += ("%dI" % length) + elif type == "deletion": + var_id = cmp[3] + alt_match = False + del_len = length + read_node_seq += (['D'] * del_len) + read_node_qual += ([''] * del_len) + if var_id != "unknown" or not var_id.statswith("nv"): + if cmp_i >= cmp_list_left and cmp_i <= cmp_list_right: + # Require at least 5bp match before and after a deletion + if read_pos >= 5 and read_pos + 5 <= len(read_seq): + positive_vars.add(var_id) + + if len(read_node_seq) > len(read_node_var): + assert len(read_node_seq) == len(read_node_var) + del_len + read_node_var += ([var_id] * del_len) + + if cigar_match_len > 0: + cmp_cigar_str += ("%dM" % cigar_match_len) + cigar_match_len = 0 + cmp_MD += ("%d" % MD_match_len) + MD_match_len = 0 + cmp_cigar_str += ("%dD" % length) + cmp_MD += ("^%s" % ref_seq[ref_pos:ref_pos+length]) + ref_pos += length + else: + assert type == "intron" + if cigar_match_len > 0: + cmp_cigar_str += ("%dM" % cigar_match_len) + cigar_match_len = 0 + cmp_cigar_str += ("%dN" % length) + ref_pos += length + + cmp_i += 1 + + if cigar_match_len > 0: + cmp_cigar_str += ("%dM" % cigar_match_len) + cmp_MD += ("%d" % MD_match_len) + # Sanity check + if read_pos != len(read_seq) or \ + cmp_cigar_str != cigar_str: + # cmp_MD != MD: # Disabled due to error correction + print >> sys.stderr, "Error:", cigar_str, MD + print >> sys.stderr, "\tcomputed:", cmp_cigar_str, cmp_MD + print >> sys.stderr, "\tcmp list:", cmp_list + assert False + + # DK - debugging purposes + if DK_debug: + print "positive:", positive_vars + print "negative:", negative_vars + + # Node + if assembly: + read_nodes.append([node_read_id, + assembly_graph.Node(node_read_id, + read_node_pos, + read_node_seq, + read_node_qual, + read_node_var, + ref_seq, + gene_vars, + mpileup, + simulation)]) + + for positive_var in positive_vars: + if positive_var == "unknown" or positive_var.startswith("nv"): + continue + if positive_var in exon_vars: + add_count(Gene_count_per_read, positive_var, 1) + add_count(Gene_gen_count_per_read, positive_var, 1) + + if read_id == "HSQ1008:175:C0JVFACXX:6:2207:13481:60924" and False: + print "positive_vars:", positive_vars + print "negative_vars:", negative_vars + + + for negative_var in negative_vars: + if negative_var == "unknown" or negative_var.startswith("nv"): + continue + if negative_var in exon_vars: + add_count(Gene_count_per_read, negative_var, -1) + add_count(Gene_gen_count_per_read, negative_var, -1) + + prev_read_id = read_id + prev_right_pos = right_pos + + if num_reads <= 0: + continue + + for f_ in [sys.stderr, report_file]: + print >> f_, "\t\t\tNumber of reads aligned: %d" % num_reads + + if prev_read_id != None: + if base_fname == "hla": + add_stat(Gene_cmpt, Gene_counts, Gene_count_per_read, allele_rep_set) + add_stat(Gene_gen_cmpt, Gene_gen_counts, Gene_gen_count_per_read) + for read_id_, read_node in read_nodes: + asm_graph.add_node(read_id_, + read_node, + simulation) + read_nodes, read_var_list = [], [] + + else: + assert index_type == "linear" + def add_alleles(alleles): + if not allele in Gene_counts: + Gene_counts[allele] = 1 + else: + Gene_counts[allele] += 1 + + cur_cmpt = sorted(list(alleles)) + cur_cmpt = '-'.join(cur_cmpt) + if not cur_cmpt in Gene_cmpt: + Gene_cmpt[cur_cmpt] = 1 + else: + Gene_cmpt[cur_cmpt] += 1 + + prev_read_id, prev_AS = None, None + alleles = set() + for line in alignview_proc.stdout: + cols = line[:-1].split() + read_id, flag, allele = cols[:3] + flag = int(flag) + if flag & 0x4 != 0: + continue + if not allele.startswith(gene): + continue + if allele.find("BACKBONE") != -1: + continue + + AS = None + for i in range(11, len(cols)): + col = cols[i] + if col.startswith("AS"): + AS = int(col[5:]) + assert AS != None + if read_id != prev_read_id: + if alleles: + if aligner == "hisat2" or \ + (aligner == "bowtie2" and len(alleles) < 10): + add_alleles(alleles) + alleles = set() + prev_AS = None + if prev_AS != None and AS < prev_AS: + continue + prev_read_id = read_id + prev_AS = AS + alleles.add(allele) + + if alleles: + add_alleles(alleles) + + if base_fname != "hla": + Gene_counts = Gene_gen_counts + Gene_counts = [[allele, count] for allele, count in Gene_counts.items()] + def Gene_count_cmp(a, b): + if a[1] != b[1]: + return b[1] - a[1] + assert a[0] != b[0] + if a[0] < b[0]: + return -1 + else: + return 1 + Gene_counts = sorted(Gene_counts, cmp=Gene_count_cmp) + for count_i in range(len(Gene_counts)): + count = Gene_counts[count_i] + if simulation: + found = False + for test_Gene_name in test_Gene_names: + if count[0] == test_Gene_name: + for f_ in [sys.stderr, report_file]: + print >> f_, "\t\t\t*** %d ranked %s (count: %d)" % (count_i + 1, test_Gene_name, count[1]) + found = True + """ + if count_i > 0 and Gene_counts[0][1] > count[1]: + print >> sys.stderr, "Warning: %s ranked first (count: %d)" % (Gene_counts[0][0], Gene_counts[0][1]) + assert False + else: + test_passed += 1 + """ + if count_i < 5 and not found: + for f_ in [sys.stderr, report_file]: + print >> f_, "\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1]) + else: + for f_ in [sys.stderr, report_file]: + print >> f_, "\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1]) + if count_i >= 9: + break + for f_ in [sys.stderr, report_file]: + print >> f_ + + # Calculate the abundance of representative alleles on exonic sequences + if base_fname == "hla": + # Incorporate non representative alleles (full length alleles) + Gene_prob = single_abundance(Gene_cmpt, Gene_lengths[gene]) + gen_alleles = set() + gen_prob_sum = 0.0 + for prob_i in range(len(Gene_prob)): + allele, prob = Gene_prob[prob_i][:2] + if prob_i >= 10 and prob < 0.03: + break + if allele in partial_alleles: + continue + + gen_prob_sum += prob + for allele2 in allele_rep_groups[allele]: + gen_alleles.add(allele2) + + if len(gen_alleles) > 0: + Gene_gen_cmpt2 = {} + for cmpt, value in Gene_gen_cmpt.items(): + cmpt2 = [] + for allele in cmpt.split('-'): + if allele in gen_alleles: + cmpt2.append(allele) + if len(cmpt2) == 0: + continue + cmpt2 = '-'.join(cmpt2) + if cmpt2 not in Gene_gen_cmpt2: + Gene_gen_cmpt2[cmpt2] = value + else: + Gene_gen_cmpt2[cmpt2] += value + Gene_gen_cmpt = Gene_gen_cmpt2 + Gene_gen_prob = single_abundance(Gene_gen_cmpt, Gene_lengths[gene]) + + Gene_combined_prob = {} + for allele, prob in Gene_prob: + assert allele not in Gene_combined_prob + if allele in gen_alleles: + Gene_combined_prob[allele] = 0.0 + else: + Gene_combined_prob[allele] = prob + for allele, prob in Gene_gen_prob: + Gene_combined_prob[allele] = prob * gen_prob_sum + Gene_prob = [[allele, prob] for allele, prob in Gene_combined_prob.items()] + Gene_prob = sorted(Gene_prob, cmp=Gene_prob_cmp) + else: + Gene_prob = single_abundance(Gene_gen_cmpt, Gene_lengths[gene]) + + if index_type == "graph" and assembly: + allele_node_order = [] + predicted_allele_nodes = {} + for allele_name, prob in Gene_prob: + if prob < 0.1: # abundance of 10% + break + predicted_allele_nodes[allele_name] = create_allele_node(allele_name) + allele_node_order.append([allele_name, prob]) + if len(predicted_allele_nodes) >= 2: + break + asm_graph.predicted_allele_nodes = predicted_allele_nodes + asm_graph.allele_node_order = allele_node_order + + # Start drawing assembly graph + asm_graph.begin_draw(output_base) + + # Draw assembly graph + begin_y = asm_graph.draw(0, "Initial graph") + begin_y += 200 + + # Apply De Bruijn graph + asm_graph.guided_DeBruijn() + + # Draw assembly graph + begin_y = asm_graph.draw(begin_y, "Asssembly") + begin_y += 200 + + # Stranded-seq read analysis + if len(stranded_seq) == 2: + run_alignments = stranded_seq_alignment(stranded_seq[0], + stranded_seq[1], + ex_path, + ref_allele) + + def get_best_alleles(left, right, vars): + max_alleles, max_common = [], -sys.maxint + for allele_name, allele_node in predicted_allele_nodes.items(): + tmp_vars = allele_node.get_var_ids(left, right) + tmp_common = len(set(vars) & set(tmp_vars)) + tmp_common -= len(set(vars) | set(tmp_vars)) + if max_common < tmp_common: + max_common = tmp_common + max_alleles = [[allele_name, max_common]] + elif max_common == tmp_common: + max_alleles.append([allele_name, max_common]) + return max_alleles + + for run, plus, minus in run_alignments: + print run + print "\tplus:" + for left, right, vars in plus: + print "\t\t", left, right, vars, get_best_alleles(left, right, vars) + print "\tminus:" + for left, right, vars in minus: + print "\t\t", left, right, vars, get_best_alleles(left, right, vars) + + assert False + + + # DK - debugging purposes + # """ + + # Draw assembly graph + asm_graph.nodes = asm_graph.nodes2 + asm_graph.to_node, asm_graph.from_node = {}, {} + begin_y = asm_graph.draw(begin_y, "Assembly with known alleles") + + # """ + + # End drawing assembly graph + asm_graph.end_draw() + + # Compare two alleles + if simulation and len(test_Gene_names) == 2: + allele_name1, allele_name2 = test_Gene_names + print >> sys.stderr, allele_name1, "vs.", allele_name2 + asm_graph.print_node_comparison(asm_graph.true_allele_nodes) + + def compare_alleles(vars1, vars2, print_output = True): + skip = True + var_i, var_j = 0, 0 + exon_i = 0 + mismatches = 0 + while var_i < len(vars1) and var_j < len(vars2): + cmp_var_id, node_var_id = vars1[var_i], vars2[var_j] + cmp_var, node_var = gene_vars[cmp_var_id], gene_vars[node_var_id] + + min_pos = min(cmp_var[1], node_var[1]) + cmp_var_in_exon, node_var_in_exon = False, False + while exon_i < len(ref_exons): + exon_left, exon_right = ref_exons[exon_i] + if min_pos <= exon_right: + if cmp_var[1] >= exon_left and cmp_var[1] <= exon_right: + cmp_var_in_exon = True + else: + cmp_var_in_exon = False + if node_var[1] >= exon_left and node_var[1] <= exon_right: + node_var_in_exon = True + else: + node_var_in_exon = False + break + exon_i += 1 + + if cmp_var_id == node_var_id: + skip = False + if print_output: + if cmp_var_in_exon: + print >> sys.stderr, "\033[94mexon%d\033[00m" % (exon_i + 1), + print >> sys.stderr, cmp_var_id, cmp_var, "\t\t\t", mpileup[cmp_var[1]] + var_i += 1; var_j += 1 + continue + if cmp_var[1] <= node_var[1]: + if not skip: + if (var_i > 0 and var_i + 1 < len(vars1)) or cmp_var[0] != "deletion": + if print_output: + if cmp_var_in_exon: + for f_ in [sys.stderr, report_file]: + print >> f_, "\033[94mexon%d\033[00m" % (exon_i + 1), + for f_ in [sys.stderr, report_file]: + print >> f_, "***", cmp_var_id, cmp_var, "==", "\t\t\t", mpileup[cmp_var[1]] + mismatches += 1 + var_i += 1 + else: + if print_output: + if node_var_in_exon: + for f_ in [sys.stderr, report_file]: + print >> f_, "\033[94mexon%d\033[00m" % (exon_i + 1), + for f_ in [sys.stderr, report_file]: + print >> f_, "*** ==", node_var_id, node_var, "\t\t\t", mpileup[node_var[1]] + mismatches += 1 + var_j += 1 + + return mismatches + + tmp_nodes = asm_graph.nodes + print >> sys.stderr, "Number of tmp nodes:", len(tmp_nodes) + count = 0 + for id, node in tmp_nodes.items(): + count += 1 + if count > 10: + break + node_vars = node.get_var_ids() + node.print_info(); print >> sys.stderr + if node.id in asm_graph.to_node: + for id2, at in asm_graph.to_node[node.id]: + print >> sys.stderr, "\tat %d ==> %s" % (at, id2) + + if simulation: + cmp_Gene_names = test_Gene_names + else: + cmp_Gene_names = [allele_name for allele_name, _ in allele_node_order] + + alleles, cmp_vars, max_common = [], [], -sys.maxint + for cmp_Gene_name in cmp_Gene_names: + tmp_vars = allele_nodes[cmp_Gene_name].get_var_ids(node.left, node.right) + tmp_common = len(set(node_vars) & set(tmp_vars)) + tmp_common -= len(set(node_vars) | set(tmp_vars)) + if max_common < tmp_common: + max_common = tmp_common + alleles = [[cmp_Gene_name, tmp_vars]] + elif max_common == tmp_common: + alleles.append([cmp_Gene_name, tmp_vars]) + + for allele_name, cmp_vars in alleles: + for f_ in [sys.stderr, report_file]: + print >> f_, "vs.", allele_name + compare_alleles(cmp_vars, node_vars) + + print >> sys.stderr + print >> sys.stderr + + + # Identify alleles that perfectly or closesly match assembled alleles + for node_name, node in asm_graph.nodes.items(): + vars = set(node.get_var_ids()) + + max_allele_names, max_common = [], -sys.maxint + for allele_name, vars2 in allele_vars.items(): + vars2 = set(vars2) + tmp_common = len(vars & vars2) - len(vars | vars2) + if tmp_common > max_common: + max_common = tmp_common + max_allele_names = [allele_name] + elif tmp_common == max_common: + max_allele_names.append(allele_name) + + for f_ in [sys.stderr, report_file]: + print >> f_, "Genomic:", node_name + node_vars = node.get_var_ids() + min_mismatches = sys.maxint + for max_allele_name in max_allele_names: + cmp_vars = allele_vars[max_allele_name] + cmp_vars = sorted(cmp_vars, cmp=lambda a, b: int(a[2:]) - int(b[2:])) + print_output = False + tmp_mismatches = compare_alleles(cmp_vars, node_vars, print_output) + print >> f_, "\t\t%s:" % max_allele_name, max_common, tmp_mismatches + if tmp_mismatches < min_mismatches: + min_mismatches = tmp_mismatches + if min_mismatches > 0: + print >> f_, "Novel allele" + else: + print >> f_, "Known allele" + + """ + allele_exon_vars = {} + for allele_name, vars in allele_vars.items(): + allele_exon_vars[allele_name] = set(vars) & exon_vars + + for node_name, node in asm_graph.nodes.items(): + vars = [] + for left, right in ref_exons: + vars += node.get_var_ids(left, right) + vars = set(vars) & exon_vars + + max_allele_names, max_common = [], -sys.maxint + for allele_name, vars2 in allele_exon_vars.items(): + tmp_common = len(vars & vars2) - len(vars | vars2) + if tmp_common > max_common: + max_common = tmp_common + max_allele_names = [allele_name] + elif tmp_common == max_common: + max_allele_names.append(allele_name) + + for f_ in [sys.stderr, report_file]: + print >> f_, "Exonic:", node_name + for max_allele_name in max_allele_names: + print >> f_, "\t\t%s:" % max_allele_name, max_common + """ + + success = [False for i in range(len(test_Gene_names))] + found_list = [False for i in range(len(test_Gene_names))] + for prob_i in range(len(Gene_prob)): + prob = Gene_prob[prob_i] + found = False + _allele_rep = prob[0] + """ + if partial and exonic_only: + _fields = _allele_rep.split(':') + if len(_fields) == 4: + _allele_rep = ':'.join(_fields[:-1]) + """ + + if simulation: + for name_i in range(len(test_Gene_names)): + test_Gene_name = test_Gene_names[name_i] + if prob[0] == test_Gene_name: + rank_i = prob_i + while rank_i > 0: + if prob == Gene_prob[rank_i - 1][1]: + rank_i -= 1 + else: + break + for f_ in [sys.stderr, report_file]: + print >> f_, "\t\t\t*** %d ranked %s (abundance: %.2f%%)" % (rank_i + 1, test_Gene_name, prob[1] * 100.0) + if rank_i < len(success): + success[rank_i] = True + found_list[name_i] = True + found = True + # DK - for debugging purposes + if not False in found_list and prob_i >= 10: + break + if not found: + for f_ in [sys.stderr, report_file]: + print >> f_, "\t\t\t\t%d ranked %s (abundance: %.2f%%)" % (prob_i + 1, _allele_rep, prob[1] * 100.0) + if best_alleles and prob_i < 2: + for f_ in [sys.stderr, report_file]: + print >> f_, "SingleModel %s (abundance: %.2f%%)" % (_allele_rep, prob[1] * 100.0) + if not simulation and prob_i >= 9: + break + if prob_i >= 19: + break + print >> sys.stderr + + if simulation and not False in success: + aligner_type = "%s %s" % (aligner, index_type) + if not aligner_type in test_passed: + test_passed[aligner_type] = 1 + else: + test_passed[aligner_type] += 1 + + if remove_alignment_file and not simulation: + os.system("rm %s*" % (alignment_fname)) + + report_file.close() + if simulation: + return test_passed + + +""" +""" +def read_Gene_alleles(fname, Genes): + for line in open(fname): + if line.startswith(">"): + Gene_name = line.strip().split()[0][1:] + Gene_gene = Gene_name.split('*')[0] + if not Gene_gene in Genes: + Genes[Gene_gene] = {} + if not Gene_name in Genes[Gene_gene]: + Genes[Gene_gene][Gene_name] = "" + else: + Genes[Gene_gene][Gene_name] += line.strip() + return Genes + + +""" +""" +def read_Gene_vars(fname): + Vars, Var_list = {}, {} + for line in open(fname): + var_id, var_type, allele, pos, data = line.strip().split('\t') + pos = int(pos) + gene = allele.split('*')[0] + if not gene in Vars: + Vars[gene] = {} + assert not gene in Var_list + Var_list[gene] = [] + + assert not var_id in Vars[gene] + left = 0 + Vars[gene][var_id] = [var_type, pos - left, data] + Var_list[gene].append([pos - left, var_id]) + + for gene, in_var_list in Var_list.items(): + Var_list[gene] = sorted(in_var_list) + + return Vars, Var_list + + +""" +""" +def read_Gene_links(fname): + Links = {} + for line in open(fname): + var_id, alleles = line.strip().split('\t') + alleles = alleles.split() + assert not var_id in Links + Links[var_id] = alleles + + return Links + + +""" +""" +def construct_allele_seq(backbone_seq, var_ids, Vars): + allele_seq = list(backbone_seq) + for id in var_ids: + assert id in Vars + type, pos, data = Vars[id] + assert pos < len(allele_seq) + if type == "single": + assert allele_seq[pos] != data + allele_seq[pos] = data + else: + assert type == "deletion" + del_len = int(data) + assert pos + del_len <= len(allele_seq) + for i in range(pos, pos + del_len): + allele_seq[i] = '.' + + allele_seq = ''.join(allele_seq) + allele_seq = allele_seq.replace('.', '') + return allele_seq + + +""" +""" +def test_Gene_genotyping(base_fname, + locus_list, + partial, + aligners, + read_fname, + alignment_fname, + threads, + simulate_interval, + read_len, + fragment_len, + best_alleles, + num_editdist, + perbase_errorrate, + perbase_snprate, + skip_fragment_regions, + assembly, + output_base, + error_correction, + discordant, + display_alleles, + stranded_seq, + verbose, + debug_instr): + # Current script directory + curr_script = os.path.realpath(inspect.getsourcefile(test_Gene_genotyping)) + ex_path = os.path.dirname(curr_script) + + # Clone a git repository, IMGTHLA + if not os.path.exists("IMGTHLA"): + Gene_typing.clone_IMGTHLA_database() + + if not os.path.exists("hisatgenotype_db"): + typing_common.clone_hisatgenotype_database() + + simulation = (read_fname == [] and alignment_fname == "") + + # Download human genome and HISAT2 index + HISAT2_fnames = ["grch38", + "genome.fa", + "genome.fa.fai"] + if not typing_common.check_files(HISAT2_fnames): + typing_common.download_genome_and_index() + + # Check if the pre-existing files (hla*) are compatible with the current parameter setting + if os.path.exists("%s.ref" % base_fname): + left = 0 + Gene_genes = [] + BACKBONE = False + for line in open("%s.ref" % base_fname): + Gene_name = line.strip().split()[0] + if Gene_name.find("BACKBONE") != -1: + BACKBONE = True + Gene_gene = Gene_name.split('*')[0] + Gene_genes.append(Gene_gene) + delete_hla_files = False + if not BACKBONE: + delete_hla_files = True + if len(locus_list) == 0: + locus_list = Gene_genes + if not set(locus_list).issubset(set(Gene_genes)): + delete_hla_files = True + if delete_hla_files: + os.system("rm %s*" % base_fname) + + # Extract HLA variants, backbone sequence, and other sequeces + Gene_fnames = [base_fname + "_backbone.fa", + base_fname + "_sequences.fa", + base_fname + ".ref", + base_fname + ".snp", + base_fname + ".index.snp", + base_fname + ".haplotype", + base_fname + ".link"] + + if verbose >= 1: + print >> sys.stderr, Gene_fnames + + if not typing_common.check_files(Gene_fnames): + extract_hla_script = os.path.join(ex_path, "hisatgenotype_extract_vars.py") + extract_cmd = [extract_hla_script] + if len(locus_list) > 0: + extract_cmd += ["--locus-list", ','.join(locus_list)] + + extract_cmd += ["--base", base_fname] + + if not partial: + extract_cmd += ["--no-partial"] + extract_cmd += ["--inter-gap", "30", + "--intra-gap", "50"] + + # DK - debugging purposes + extract_cmd += ["--min-var-freq", "0.1"] + + if base_fname == "codis": + extract_cmd += ["--leftshift"] + + # DK - debugging purposes + # extract_cmd += ["--ext-seq", "300"] + if verbose >= 1: + print >> sys.stderr, "\tRunning:", ' '.join(extract_cmd) + proc = subprocess.Popen(extract_cmd, stdout=open("/dev/null", 'w'), stderr=open("/dev/null", 'w')) + proc.communicate() + + if not typing_common.check_files(Gene_fnames): + print >> sys.stderr, "Error: hisatgenotype_extract_vars failed!" + sys.exit(1) + + for aligner, index_type in aligners: + if aligner == "hisat2": + # Build HISAT2 graph indexes based on the above information + if index_type == "graph": + Gene_hisat2_graph_index_fnames = ["%s.graph.%d.ht2" % (base_fname, i+1) for i in range(8)] + if not typing_common.check_files(Gene_hisat2_graph_index_fnames): + hisat2_build = os.path.join(ex_path, "hisat2-build") + build_cmd = [hisat2_build, + "-p", str(threads), + "--snp", "%s.index.snp" % base_fname, + "--haplotype", "%s.haplotype" % base_fname, + "%s_backbone.fa" % base_fname, + "%s.graph" % base_fname] + if verbose >= 1: + print >> sys.stderr, "\tRunning:", ' '.join(build_cmd) + proc = subprocess.Popen(build_cmd, stdout=open("/dev/null", 'w'), stderr=open("/dev/null", 'w')) + proc.communicate() + if not typing_common.check_files(Gene_hisat2_graph_index_fnames): + print >> sys.stderr, "Error: indexing HLA failed! Perhaps, you may have forgotten to build hisat2 executables?" + sys.exit(1) + # Build HISAT2 linear indexes based on the above information + else: + assert index_type == "linear" + Gene_hisat2_linear_index_fnames = ["%s.linear.%d.ht2" % (base_fname, i+1) for i in range(8)] + if not typing_common.check_files(Gene_hisat2_linear_index_fnames): + hisat2_build = os.path.join(ex_path, "hisat2-build") + build_cmd = [hisat2_build, + "%s_backbone.fa,%s_sequences.fa" % (base_fname, base_fname), + "%s.linear" % base_fname] + proc = subprocess.Popen(build_cmd, stdout=open("/dev/null", 'w'), stderr=open("/dev/null", 'w')) + proc.communicate() + if not typing_common.check_files(Gene_hisat2_linear_index_fnames): + print >> sys.stderr, "Error: indexing HLA failed!" + sys.exit(1) + else: + assert aligner == "bowtie2" and index_type == "linear" + # Build Bowtie2 indexes based on the above information + Gene_bowtie2_index_fnames = ["%s.%d.bt2" % (base_fname, i+1) for i in range(4)] + Gene_bowtie2_index_fnames += ["%s.rev.%d.bt2" % (base_fname, i+1) for i in range(2)] + if not typing_common.check_files(Gene_bowtie2_index_fnames): + build_cmd = ["bowtie2-build", + "%s_backbone.fa,%s_sequences.fa" % (base_fname, base_fname), + base_fname] + proc = subprocess.Popen(build_cmd, stdout=open("/dev/null", 'w')) + proc.communicate() + if not typing_common.check_files(Gene_bowtie2_index_fnames): + print >> sys.stderr, "Error: indexing HLA failed!" + sys.exit(1) + + # Read partial alleles from hla.data (temporary) + partial_alleles = set() + for line in open("IMGTHLA/hla.dat"): + if not line.startswith("DE"): + continue + allele_name = line.split()[1][:-1] + if allele_name.startswith("HLA-"): + allele_name = allele_name[4:] + gene = allele_name.split('*')[0] + if line.find("partial") != -1: + partial_alleles.add(allele_name) + + # Read HLA alleles (names and sequences) + refGenes, refGene_loci = {}, {} + for line in open("%s.ref" % base_fname): + Gene_name, chr, left, right, length, exon_str, strand = line.strip().split() + Gene_gene = Gene_name.split('*')[0] + assert not Gene_gene in refGenes + refGenes[Gene_gene] = Gene_name + left, right = int(left), int(right) + exons = [] + for exon in exon_str.split(','): + exon_left, exon_right = exon.split('-') + exons.append([int(exon_left), int(exon_right)]) + refGene_loci[Gene_gene] = [Gene_name, chr, left, right, exons] + Genes = {} + if len(locus_list) == 0: + locus_list = refGene_loci.keys() + + read_Gene_alleles(base_fname + "_backbone.fa", Genes) + read_Gene_alleles(base_fname + "_sequences.fa", Genes) + + # HLA gene alleles + Gene_names = {} + for Gene_gene, data in Genes.items(): + Gene_names[Gene_gene] = list(data.keys()) + + # HLA gene allele lengths + Gene_lengths = {} + for Gene_gene, Gene_alleles in Genes.items(): + Gene_lengths[Gene_gene] = {} + for allele_name, seq in Gene_alleles.items(): + Gene_lengths[Gene_gene][allele_name] = len(seq) + + # Read HLA variants, and link information + Vars, Var_list = read_Gene_vars("%s.snp" % base_fname) + Links = read_Gene_links("%s.link" % base_fname) + # Test HLA typing + test_list = [] + if simulation: + basic_test, pair_test = True, False + if debug_instr: + if "basic_test" in debug_instr: + basic_test, pair_test = True, False + else: + basic_test, pair_test = False, True + + test_passed = {} + test_list = [] + genes = list(set(locus_list) & set(Gene_names.keys())) + if basic_test: + for gene in genes: + Gene_gene_alleles = Gene_names[gene] + for Gene_name in Gene_gene_alleles: + if Gene_name.find("BACKBONE") != -1: + continue + test_list.append([[Gene_name]]) + if pair_test: + test_size = 500 + allele_count = 2 + for test_i in range(test_size): + test_pairs = [] + for gene in genes: + Gene_gene_alleles = [] + + for allele in Gene_names[gene]: + if allele.find("BACKBONE") != -1: + continue + + if "full" in debug: + if allele in partial_alleles: + continue + + Gene_gene_alleles.append(allele) + nums = [i for i in range(len(Gene_gene_alleles))] + random.shuffle(nums) + test_pairs.append(sorted([Gene_gene_alleles[nums[i]] for i in range(allele_count)])) + test_list.append(test_pairs) + + if "test_list" in debug_instr: + test_list = [[debug_instr["test_list"].split('-')]] + + for test_i in range(len(test_list)): + if "test_id" in debug_instr: + test_ids = debug_instr["test_id"].split('-') + if str(test_i + 1) not in test_ids: + continue + + print >> sys.stderr, "Test %d" % (test_i + 1), str(datetime.now()) + test_locus_list = test_list[test_i] + num_frag_list = typing_common.simulate_reads(Genes, + base_fname, + test_locus_list, + Vars, + Links, + simulate_interval, + read_len, + fragment_len, + perbase_errorrate, + perbase_snprate, + skip_fragment_regions) + + assert len(num_frag_list) == len(test_locus_list) + for i_ in range(len(test_locus_list)): + test_Gene_names = test_locus_list[i_] + num_frag_list_i = num_frag_list[i_] + assert len(num_frag_list_i) == len(test_Gene_names) + for j_ in range(len(test_Gene_names)): + test_Gene_name = test_Gene_names[j_] + gene = test_Gene_name.split('*')[0] + test_Gene_seq = Genes[gene][test_Gene_name] + seq_type = "partial" if test_Gene_name in partial_alleles else "full" + print >> sys.stderr, "\t%s - %d bp (%s sequence, %d pairs)" % (test_Gene_name, len(test_Gene_seq), seq_type, num_frag_list_i[j_]) + + if "single-end" in debug_instr: + read_fname = ["%s_input_1.fa" % base_fname] + else: + read_fname = ["%s_input_1.fa" % base_fname, "%s_input_2.fa" % base_fname] + + fastq = False + tmp_test_passed = typing(ex_path, + simulation, + base_fname, + test_locus_list, + partial, + partial_alleles, + refGenes, + Genes, + Gene_names, + Gene_lengths, + refGene_loci, + Vars, + Var_list, + Links, + aligners, + num_editdist, + assembly, + output_base, + error_correction, + discordant, + display_alleles, + stranded_seq, + fastq, + read_fname, + alignment_fname, + num_frag_list, + threads, + best_alleles, + verbose) + + for aligner_type, passed in tmp_test_passed.items(): + if aligner_type in test_passed: + test_passed[aligner_type] += passed + else: + test_passed[aligner_type] = passed + + print >> sys.stderr, "\t\tPassed so far: %d/%d (%.2f%%)" % (test_passed[aligner_type], test_i + 1, (test_passed[aligner_type] * 100.0 / (test_i + 1))) + + + for aligner_type, passed in test_passed.items(): + print >> sys.stderr, "%s:\t%d/%d passed (%.2f%%)" % (aligner_type, passed, len(test_list), passed * 100.0 / len(test_list)) + + else: # With real reads or BAMs + print >> sys.stderr, "\t", ' '.join(locus_list) + fastq = True + typing(ex_path, + simulation, + base_fname, + locus_list, + partial, + partial_alleles, + refGenes, + Genes, + Gene_names, + Gene_lengths, + refGene_loci, + Vars, + Var_list, + Links, + aligners, + num_editdist, + assembly, + output_base, + error_correction, + discordant, + display_alleles, + stranded_seq, + fastq, + read_fname, + alignment_fname, + [], + threads, + best_alleles, + verbose) + + +""" +""" +if __name__ == '__main__': + parser = ArgumentParser( + description='test HLA genotyping') + parser.add_argument("--base", "--base-fname", + dest="base_fname", + type=str, + default="hla", + help="base filename for backbone HLA sequence, HLA variants, and HLA linking info (default: hla)") + parser.add_argument("--locus-list", + dest="locus_list", + type=str, + default="", + help="A comma-separated list of HLA genes (default: empty, all HLA genes in IMGT/HLA database)") + parser.add_argument('--no-partial', + dest='partial', + action='store_false', + help='Include partial alleles (e.g. A_nuc.fasta)') + parser.add_argument("--aligner-list", + dest="aligners", + type=str, + default="hisat2.graph", + help="A comma-separated list of aligners such as hisat2.graph,hisat2.linear,bowtie2.linear (default: hisat2.graph)") + parser.add_argument("--reads", + dest="read_fname", + type=str, + default="", + help="Fastq read file name") + parser.add_argument("--alignment", + dest="alignment_fname", + type=str, + default="", + help="BAM file name") + parser.add_argument("-p", "--threads", + dest="threads", + type=int, + default=1, + help="Number of threads") + parser.add_argument("--simulate-interval", + dest="simulate_interval", + type=int, + default=10, + help="Reads simulated at every these base pairs (default: 10)") + parser.add_argument("--read-len", + dest="read_len", + type=int, + default=100, + help="Length of simulated reads (default: 100)") + parser.add_argument("--fragment-len", + dest="fragment_len", + type=int, + default=350, + help="Length of fragments (default: 350)") + parser.add_argument("--best-alleles", + dest="best_alleles", + action='store_true', + help="") + parser.add_argument("--random-seed", + dest="random_seed", + type=int, + default=1, + help="A seeding number for randomness (default: 1)") + parser.add_argument("--num-editdist", + dest="num_editdist", + type=int, + default=0, + help="Maximum number of mismatches per read alignment to be considered (default: 0)") + parser.add_argument("--perbase-errorrate", + dest="perbase_errorrate", + type=float, + default=0.0, + help="Per basepair error rate in percentage when simulating reads (default: 0.0)") + parser.add_argument("--perbase-snprate", + dest="perbase_snprate", + type=float, + default=0.0, + help="Per basepair SNP rate in percentage when simulating reads (default: 0.0)") + parser.add_argument("--skip-fragment-regions", + dest="skip_fragment_regions", + type=str, + default="", + help="A comma-separated list of regions from which no reads originate, e.g., 500-600,1200-1400 (default: None).") + parser.add_argument('-v', '--verbose', + dest='verbose', + action='store_true', + help='also print some statistics to stderr') + parser.add_argument('--verbose-level', + dest='verbose_level', + type=int, + default=0, + help='also print some statistics to stderr (default: 0)') + parser.add_argument("--debug", + dest="debug", + type=str, + default="", + help="e.g., test_id:10,read_id:10000,basic_test") + parser.add_argument("--output-base", "--assembly-base", + dest="output_base", + type=str, + default="assembly_graph", + help="base file name (default: assembly_graph)") + parser.add_argument("--no-assembly", + dest="assembly", + action="store_false", + help="Perform assembly") + parser.add_argument("--no-error-correction", + dest="error_correction", + action="store_false", + help="Correct sequencing errors") + parser.add_argument("--discordant", + dest="discordant", + action="store_true", + help="Allow discordantly mapped pairs or singletons") + parser.add_argument("--display-alleles", + dest="display_alleles", + type=str, + default="", + help="A comma-separated list of alleles to display in HTML (default: empty)") + parser.add_argument("--stranded-seq", + dest="stranded_seq", + type=str, + default="", + help="Stranded-seq data (e.g.,: NA12892,ILMN_StrandSeq/SraRunInfo.txt") + + args = parser.parse_args() + if args.locus_list == "": + locus_list = [] + else: + locus_list = args.locus_list.split(',') + if args.aligners == "": + print >> sys.stderr, "Error: --aligners must be non-empty." + sys.exit(1) + args.aligners = args.aligners.split(',') + for i in range(len(args.aligners)): + args.aligners[i] = args.aligners[i].split('.') + if args.read_fname: + args.read_fname = args.read_fname.split(',') + else: + args.read_fname = [] + if args.alignment_fname != "" and \ + not os.path.exists(args.alignment_fname): + print >> sys.stderr, "Error: %s doesn't exist." % args.alignment_fname + sys.exit(1) + + if args.verbose and args.verbose_level == 0: + args.verbose_level = 1 + + debug = {} + if args.debug != "": + for item in args.debug.split(','): + if ':' in item: + fields = item.split(':') + assert len(fields) >= 2 + key, value = fields[0], ':'.join(fields[1:]) + debug[key] = value + else: + debug[item] = 1 + + if not args.partial: + print >> sys.stderr, "Warning: --no-partial will be no longer supported!" + + if args.read_len * 2 > args.fragment_len: + print >> sys.stderr, "Warning: fragment might be too short (%d)" % (args.fragment_len) + + skip_fragment_regions = [] + if args.skip_fragment_regions != "": + prev_left, prev_right = -1, -1 + for region in args.skip_fragment_regions.split(','): + left, right = region.split('-') + left, right = int(left), int(right) + assert left < right + assert prev_right < left + prev_left, prev_right = left, right + skip_fragment_regions.append([left, right]) + + if args.display_alleles == "": + display_alleles = [] + else: + display_alleles = args.display_alleles.split(',') + + if args.stranded_seq != "": + stranded_seq = args.stranded_seq.split(',') + if len(stranded_seq) != 2: + print >> sys.stderr, "Error: --stranded-seq is incorrectly specified" + sys.exit(1) + else: + stranded_seq = [] + + random.seed(args.random_seed) + test_Gene_genotyping(args.base_fname, + locus_list, + args.partial, + args.aligners, + args.read_fname, + args.alignment_fname, + args.threads, + args.simulate_interval, + args.read_len, + args.fragment_len, + args.best_alleles, + args.num_editdist, + args.perbase_errorrate, + args.perbase_snprate, + skip_fragment_regions, + args.assembly, + args.output_base, + args.error_correction, + args.discordant, + display_alleles, + stranded_seq, + args.verbose_level, + debug) + diff --git a/evaluation/tests/HLA_novel/hisatgenotype_modules b/evaluation/tests/HLA_novel/hisatgenotype_modules new file mode 100644 index 0000000..018d309 --- /dev/null +++ b/evaluation/tests/HLA_novel/hisatgenotype_modules @@ -0,0 +1 @@ +../../../hisatgenotype_modules \ No newline at end of file diff --git a/evaluation/tests/HLA_novel/scripts/run_extract_ILMN_HiSeqX.sh b/evaluation/tests/HLA_novel/scripts/run_extract_ILMN_HiSeqX.sh new file mode 100644 index 0000000..21186e0 --- /dev/null +++ b/evaluation/tests/HLA_novel/scripts/run_extract_ILMN_HiSeqX.sh @@ -0,0 +1,11 @@ +#!/bin/bash -l +#SBATCH --job-name=infphio.HLA.ILMN.extract.genome +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=8 +#SBATCH --mem=80G +#SBATCH --partition=shared +#SBATCH --time=166:0:0 +#SBATCH --workdir=/home-1/dkim136@jhu.edu/infphilo/hisat2/evaluation/tests/HLA_novel + +/home-1/dkim136@jhu.edu/infphilo/hisat2/evaluation/tests/HLA_novel/scripts/extract_reads.py --base-fname genotype_genome --reference-type genome --read-dir /home-1/dkim136@jhu.edu/ssalzbe1/users/infphilo/ILMN_HiSeqX --out-dir ILMN_HiSeqX -p 8 + diff --git a/evaluation/tests/genotype_genome/hisatgenotype.py b/evaluation/tests/genotype_genome/hisatgenotype.py new file mode 100644 index 0000000..fad5906 --- /dev/null +++ b/evaluation/tests/genotype_genome/hisatgenotype.py @@ -0,0 +1 @@ +../../../hisatgenotype.py \ No newline at end of file diff --git a/evaluation/tests/genotype_genome/hisatgenotype_build_genome.py b/evaluation/tests/genotype_genome/hisatgenotype_build_genome.py new file mode 100644 index 0000000..9d3572a --- /dev/null +++ b/evaluation/tests/genotype_genome/hisatgenotype_build_genome.py @@ -0,0 +1 @@ +../../../hisatgenotype_build_genome.py \ No newline at end of file diff --git a/evaluation/tests/genotype_genome/hisatgenotype_extract_vars.py b/evaluation/tests/genotype_genome/hisatgenotype_extract_vars.py new file mode 100644 index 0000000..6d113b3 --- /dev/null +++ b/evaluation/tests/genotype_genome/hisatgenotype_extract_vars.py @@ -0,0 +1 @@ +../../../hisatgenotype_extract_vars.py \ No newline at end of file diff --git a/evaluation/tests/genotype_genome/hisatgenotype_locus.py b/evaluation/tests/genotype_genome/hisatgenotype_locus.py new file mode 100644 index 0000000..c6bcacb --- /dev/null +++ b/evaluation/tests/genotype_genome/hisatgenotype_locus.py @@ -0,0 +1 @@ +../../../hisatgenotype_locus.py \ No newline at end of file diff --git a/evaluation/tests/genotype_genome/hisatgenotype_modules b/evaluation/tests/genotype_genome/hisatgenotype_modules new file mode 100644 index 0000000..018d309 --- /dev/null +++ b/evaluation/tests/genotype_genome/hisatgenotype_modules @@ -0,0 +1 @@ +../../../hisatgenotype_modules \ No newline at end of file diff --git a/evaluation/tests/genotype_genome/hisatgenotype_prev.py b/evaluation/tests/genotype_genome/hisatgenotype_prev.py new file mode 100644 index 0000000..3e7bf0c --- /dev/null +++ b/evaluation/tests/genotype_genome/hisatgenotype_prev.py @@ -0,0 +1,1052 @@ +#!/usr/bin/env python + +# +# Copyright 2016, Daehwan Kim +# +# This file is part of HISAT 2. +# +# HISAT 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# HISAT 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HISAT 2. If not, see . +# + + +import sys, os, subprocess, re +import inspect, random +import math +from argparse import ArgumentParser, FileType +import hisatgenotype_typing_common as typing_common + + +""" +Align reads, and sort the alignments into a BAM file +""" +def align_reads(base_fname, + read_fnames, + fastq, + threads, + verbose): + aligner_cmd = ["hisat2", + "--no-unal", + "-p", str(threads)] + # aligner_cmd += ["--mm"] + aligner_cmd += ["-x", "%s" % base_fname] + + assert len(read_fnames) > 0 + if not fastq: + aligner_cmd += ["-f"] + single = len(read_fnames) == 1 + if single: + aligner_cmd += ["-U", read_fnames[0]] + else: + aligner_cmd += ["-1", read_fnames[0], + "-2", read_fnames[1]] + + print >> sys.stderr, "Aligning %s to %s ..." % (' '.join(read_fnames), base_fname) + if verbose: + print >> sys.stderr, "\t%s" % (' '.join(aligner_cmd)) + + align_proc = subprocess.Popen(aligner_cmd, + stdout=subprocess.PIPE, + stderr=open("/dev/null", 'w')) + + sambam_cmd = ["samtools", + "view", + "-bS", + "-"] + sambam_proc = subprocess.Popen(sambam_cmd, + stdin=align_proc.stdout, + stdout=open("hla_output_unsorted.bam", 'w'), + stderr=open("/dev/null", 'w')) + sambam_proc.communicate() + + print >> sys.stderr, "Sorting %s ..." % "TBD" + bamsort_cmd = ["samtools", + "sort", + "--threads", str(threads), + "hla_output_unsorted.bam"] + bamsort_proc = subprocess.Popen(bamsort_cmd, + stdout=open("hla_output.bam", 'w'), + stderr=open("/dev/null", 'w')) + bamsort_proc.communicate() + + print >> sys.stderr, "Indexing %s ..." % "TBD" + + bamindex_cmd = ["samtools", + "index", + "hla_output.bam"] + bamindex_proc = subprocess.Popen(bamindex_cmd, + stderr=open("/dev/null", 'w')) + bamindex_proc.communicate() + + os.remove("hla_output_unsorted.bam") + + + +""" +""" +def genotype(base_fname, + fastq, + read_fnames, + threads, + num_mismatch, + verbose, + daehwan_debug): + # Load genomic sequences + chr_dic, chr_names, chr_full_names = typing_common.read_genome(open("%s.fa" % base_fname)) + + # variants, backbone sequence, and other sequeces + genotype_fnames = ["%s.fa" % base_fname, + "%s.gene" % base_fname, + "%s.snp" % base_fname, + "%s.index.snp" % base_fname, + "%s.haplotype" % base_fname, + "%s.link" % base_fname, + "%s.coord" % base_fname, + "%s.clnsig" % base_fname] + # hisat2 graph index files + genotype_fnames += ["%s.%d.ht2" % (base_fname, i+1) for i in range(8)] + if not typing_common.check_files(genotype_fnames): + print >> sys.stderr, "Error: some of the following files are missing!" + for fname in genotype_fnames: + print >> sys.stderr, "\t%s" % fname + sys.exit(1) + + # Align reads, and sort the alignments into a BAM file + align_reads(base_fname, + read_fnames, + fastq, + threads, + verbose) + + # Read HLA alleles (names and sequences) + genes, gene_loci, gene_seqs = {}, {}, {} + for line in open("%s.gene" % base_fname): + family, allele_name, chr, left, right = line.strip().split() + gene_name = "%s-%s" % (family, allele_name.split('*')[0]) + assert gene_name not in genes + genes[gene_name] = allele_name + left, right = int(left), int(right) + """ + exons = [] + for exon in exon_str.split(','): + exon_left, exon_right = exon.split('-') + exons.append([int(exon_left), int(exon_right)]) + """ + gene_loci[gene_name] = [allele_name, chr, left, right] + assert chr in chr_dic + chr_seq = chr_dic[chr] + assert left < right + assert right < len(chr_seq) + gene_seqs[gene_name] = chr_dic[chr][left:right+1] + + # Read link information + Links, var_genes, allele_vars = {}, {}, {} + for line in open("%s.link" % base_fname): + var_id, alleles = line.strip().split('\t') + alleles = alleles.split() + assert not var_id in Links + Links[var_id] = alleles + for allele in alleles: + if allele not in allele_vars: + allele_vars[allele] = set() + allele_vars[allele].add(var_id) + gene_name = "HLA-%s" % (allele.split('*')[0]) + var_genes[var_id] = gene_name + + # gene alleles + allele_names = {} + for gene_name in genes.keys(): + if gene_name not in allele_names: + allele_names[gene_name] = [] + gene_name2 = gene_name.split('-')[1] + for allele_name in allele_vars.keys(): + allele_name1 = allele_name.split('*')[0] + if gene_name2 == allele_name1: + allele_names[gene_name].append(allele_name) + + + # Read HLA variants, and link information + Vars, Var_list = {}, {} + for line in open("%s.snp" % base_fname): + var_id, var_type, chr, pos, data = line.strip().split('\t') + pos = int(pos) + + # daehwan - for debugging purposes + if var_id not in var_genes: + continue + + assert var_id in var_genes + gene_name = var_genes[var_id] + if not gene_name in Vars: + Vars[gene_name] = {} + assert not gene_name in Var_list + Var_list[gene_name] = [] + + assert not var_id in Vars[gene_name] + Vars[gene_name][var_id] = [var_type, pos, data] + Var_list[gene_name].append([pos, var_id]) + + for gene_name, in_var_list in Var_list.items(): + Var_list[gene_name] = sorted(in_var_list) + def lower_bound(Var_list, pos): + low, high = 0, len(Var_list) + while low < high: + m = (low + high) / 2 + m_pos = Var_list[m][0] + if m_pos < pos: + low = m + 1 + elif m_pos > pos: + high = m + else: + assert m_pos == pos + while m > 0: + if Var_list[m-1][0] < pos: + break + m -= 1 + return m + return low + + + # HLA gene allele lengths + """ + HLA_lengths = {} + for HLA_gene, HLA_alleles in HLAs.items(): + HLA_lengths[HLA_gene] = {} + for allele_name, seq in HLA_alleles.items(): + HLA_lengths[HLA_gene][allele_name] = len(seq) + """ + + # Cigar regular expression + cigar_re = re.compile('\d+\w') + + test_list = [[sorted(genes.keys())]] + for test_i in range(len(test_list)): + test_HLA_list = test_list[test_i] + for test_HLA_names in test_HLA_list: + print >> sys.stderr, "\t%s" % (test_HLA_names) + for gene in test_HLA_names: + ref_allele = genes[gene] + ref_seq = gene_seqs[gene] + # ref_exons = refHLA_loci[gene][-1] + + # Read alignments + alignview_cmd = ["samtools", + "view"] + alignview_cmd += ["hla_output.bam"] + base_locus = 0 + _, chr, left, right = gene_loci[gene] + base_locus = left + alignview_cmd += ["%s:%d-%d" % (chr, left + 1, right + 1)] + + bamview_proc = subprocess.Popen(alignview_cmd, + stdout=subprocess.PIPE, + stderr=open("/dev/null", 'w')) + + sort_read_cmd = ["sort", "-k", "1", "-n"] + alignview_proc = subprocess.Popen(sort_read_cmd, + stdin=bamview_proc.stdout, + stdout=subprocess.PIPE, + stderr=open("/dev/null", 'w')) + + # Count alleles + HLA_counts, HLA_cmpt = {}, {} + coverage = [0 for i in range(len(ref_seq) + 1)] + num_reads, total_read_len = 0, 0 + prev_read_id = None + prev_exon = False + for line in alignview_proc.stdout: + cols = line.strip().split() + read_id, flag, chr, pos, mapQ, cigar_str = cols[:6] + origin_read_id = read_id + if read_id.find('|') != -1: + tmp_read_id = read_id.split('|')[0] + try: + read_id = int(tmp_read_id) + except ValueError: + None + + read_seq, qual = cols[9], cols[10] + num_reads += 1 + total_read_len += len(read_seq) + flag, pos = int(flag), int(pos) + pos -= 1 + if pos < 0: + continue + + if flag & 0x4 != 0: + continue + + NM, Zs, MD = "", "", "" + for i in range(11, len(cols)): + col = cols[i] + if col.startswith("Zs"): + Zs = col[5:] + elif col.startswith("MD"): + MD = col[5:] + elif col.startswith("NM"): + NM = int(col[5:]) + + if NM > num_mismatch: + continue + + # daehwan - for debugging purposes + debug = False + if read_id in ["2339"] and False: + debug = True + print "read_id: %s)" % read_id, pos, cigar_str, "NM:", NM, MD, Zs + print " ", read_seq + + vars = [] + if Zs: + vars = Zs.split(',') + + assert MD != "" + MD_str_pos, MD_len = 0, 0 + read_pos, left_pos = 0, pos + right_pos = left_pos + cigars = cigar_re.findall(cigar_str) + cigars = [[cigar[-1], int(cigar[:-1])] for cigar in cigars] + cmp_list = [] + for i in range(len(cigars)): + cigar_op, length = cigars[i] + if cigar_op == 'M': + first = True + MD_len_used = 0 + while True: + if not first or MD_len == 0: + if MD[MD_str_pos].isdigit(): + num = int(MD[MD_str_pos]) + MD_str_pos += 1 + while MD_str_pos < len(MD): + if MD[MD_str_pos].isdigit(): + num = num * 10 + int(MD[MD_str_pos]) + MD_str_pos += 1 + else: + break + MD_len += num + # Insertion or full match followed + if MD_len >= length: + MD_len -= length + cmp_list.append(["match", right_pos + MD_len_used, length - MD_len_used]) + break + first = False + read_base = read_seq[read_pos + MD_len] + MD_ref_base = MD[MD_str_pos] + MD_str_pos += 1 + assert MD_ref_base in "ACGT" + cmp_list.append(["match", right_pos + MD_len_used, MD_len - MD_len_used]) + cmp_list.append(["mismatch", right_pos + MD_len, 1]) + MD_len_used = MD_len + 1 + MD_len += 1 + # Full match + if MD_len == length: + MD_len = 0 + break + elif cigar_op == 'I': + cmp_list.append(["insertion", right_pos, length]) + elif cigar_op == 'D': + if MD[MD_str_pos] == '0': + MD_str_pos += 1 + assert MD[MD_str_pos] == '^' + MD_str_pos += 1 + while MD_str_pos < len(MD): + if not MD[MD_str_pos] in "ACGT": + break + MD_str_pos += 1 + cmp_list.append(["deletion", right_pos, length]) + elif cigar_op == 'S': + cmp_list.append(["soft", right_pos, length]) + else: + assert cigar_op == 'N' + cmp_list.append(["intron", right_pos, length]) + + if cigar_op in "MND": + right_pos += length + + if cigar_op in "MIS": + read_pos += length + + """ + exon = False + for exon in ref_exons: + exon_left, exon_right = exon + if right_pos <= exon_left or pos > exon_right: + continue + else: + exon = True + break + """ + + if left_pos < base_locus or \ + right_pos - base_locus > len(ref_seq): + continue + + def add_stat(HLA_cmpt, HLA_counts, HLA_count_per_read, exon = True): + max_count = max(HLA_count_per_read.values()) + cur_cmpt = set() + for allele, count in HLA_count_per_read.items(): + if count < max_count: + continue + """ + if allele in exclude_allele_list: + continue + """ + cur_cmpt.add(allele) + if not allele in HLA_counts: + HLA_counts[allele] = 1 + else: + HLA_counts[allele] += 1 + + if len(cur_cmpt) == 0: + return + + # daehwan - for debugging purposes + alleles = ["", ""] + # alleles = ["B*40:304", "B*40:02:01"] + allele1_found, allele2_found = False, False + for allele, count in HLA_count_per_read.items(): + if count < max_count: + continue + if allele == alleles[0]: + allele1_found = True + elif allele == alleles[1]: + allele2_found = True + if allele1_found != allele2_found: + print alleles[0], HLA_count_per_read[alleles[0]] + print alleles[1], HLA_count_per_read[alleles[1]] + if allele1_found: + print ("%s\tread_id %s - %d vs. %d]" % (alleles[0], prev_read_id, max_count, HLA_count_per_read[alleles[1]])) + else: + print ("%s\tread_id %s - %d vs. %d]" % (alleles[1], prev_read_id, max_count, HLA_count_per_read[alleles[0]])) + print read_seq + + cur_cmpt = sorted(list(cur_cmpt)) + cur_cmpt = '-'.join(cur_cmpt) + add = 1 + """ + if partial and not exon: + add *= 0.2 + """ + if not cur_cmpt in HLA_cmpt: + HLA_cmpt[cur_cmpt] = add + else: + HLA_cmpt[cur_cmpt] += add + + if read_id != prev_read_id: + if prev_read_id != None: + add_stat(HLA_cmpt, HLA_counts, HLA_count_per_read, prev_exon) + + HLA_count_per_read = {} + for HLA_name in allele_names[gene]: + if HLA_name.find("BACKBONE") != -1: + continue + HLA_count_per_read[HLA_name] = 0 + + def add_count(var_id, add): + assert var_id in Links + alleles = Links[var_id] + for allele in alleles: + if allele.find("BACKBONE") != -1: + continue + HLA_count_per_read[allele] += add + # daehwan - for debugging purposes + if debug: + if allele in ["DQA1*05:05:01:01", "DQA1*05:05:01:02"]: + print allele, add, var_id + + # Decide which allele(s) a read most likely came from + # also sanity check - read length, cigar string, and MD string + for var_id, data in Vars[gene].items(): + var_type, var_pos, var_data = data + if var_type != "deletion": + continue + if left_pos >= var_pos and right_pos <= var_pos + int(var_data): + add_count(var_id, -1) + ref_pos, read_pos, cmp_cigar_str, cmp_MD = left_pos, 0, "", "" + cigar_match_len, MD_match_len = 0, 0 + for cmp in cmp_list: + type = cmp[0] + length = cmp[2] + if type == "match": + var_idx = lower_bound(Var_list[gene], ref_pos) + while var_idx < len(Var_list[gene]): + var_pos, var_id = Var_list[gene][var_idx] + if ref_pos + length <= var_pos: + break + if ref_pos <= var_pos: + var_type, _, var_data = Vars[gene][var_id] + if var_type == "insertion": + if ref_pos < var_pos and ref_pos + length > var_pos + len(var_data): + add_count(var_id, -1) + # daehwan - for debugging purposes + if debug: + print cmp, var_id, Links[var_id] + elif var_type == "deletion": + del_len = int(var_data) + if ref_pos < var_pos and ref_pos + length > var_pos + del_len: + # daehwan - for debugging purposes + if debug: + print cmp, var_id, Links[var_id], -1, Vars[gene][var_id] + # Check if this might be one of the two tandem repeats (the same left coordinate) + cmp_left, cmp_right = cmp[1], cmp[1] + cmp[2] + test1_seq1 = ref_seq[cmp_left-base_locus:cmp_right-base_locus] + test1_seq2 = ref_seq[cmp_left-base_locus:var_pos-base_locus] + ref_seq[var_pos + del_len - base_locus:cmp_right + del_len - base_locus] + # Check if this happens due to small repeats (the same right coordinate - e.g. 19 times of TTTC in DQA1*05:05:01:02) + cmp_left -= read_pos + cmp_right += (len(read_seq) - read_pos - cmp[2]) + test2_seq1 = ref_seq[cmp_left+int(var_data)-base_locus:cmp_right-base_locus] + test2_seq2 = ref_seq[cmp_left-base_locus:var_pos-base_locus] + ref_seq[var_pos+int(var_data)-base_locus:cmp_right-base_locus] + if test1_seq1 != test1_seq2 and test2_seq1 != test2_seq2: + add_count(var_id, -1) + else: + if debug: + print cmp, var_id, Links[var_id], -1 + add_count(var_id, -1) + var_idx += 1 + + read_pos += length + ref_pos += length + cigar_match_len += length + MD_match_len += length + elif type == "mismatch": + read_base = read_seq[read_pos] + var_idx = lower_bound(Var_list[gene], ref_pos) + while var_idx < len(Var_list[gene]): + var_pos, var_id = Var_list[gene][var_idx] + if ref_pos < var_pos: + break + if ref_pos == var_pos: + var_type, _, var_data = Vars[gene][var_id] + if var_type == "single": + if var_data == read_base: + # daehwan - for debugging purposes + if debug: + print cmp, var_id, 1, var_data, read_base, Links[var_id] + + # daehwan - for debugging purposes + if False: + read_qual = ord(qual[read_pos]) + add_count(var_id, (read_qual - 60) / 60.0) + else: + add_count(var_id, 1) + # daehwan - check out if this routine is appropriate + # else: + # add_count(var_id, -1) + var_idx += 1 + cmp_MD += ("%d%s" % (MD_match_len, ref_seq[ref_pos-base_locus])) + MD_match_len = 0 + cigar_match_len += 1 + read_pos += 1 + ref_pos += 1 + elif type == "insertion": + ins_seq = read_seq[read_pos:read_pos+length] + var_idx = lower_bound(Var_list[gene], ref_pos) + # daehwan - for debugging purposes + if debug: + print left_pos, cigar_str, MD, vars + print ref_pos, ins_seq, Var_list[gene][var_idx], Vars[gene][Var_list[gene][var_idx][1]] + # sys.exit(1) + while var_idx < len(Var_list[gene]): + var_pos, var_id = Var_list[gene][var_idx] + if ref_pos < var_pos: + break + if ref_pos == var_pos: + var_type, _, var_data = Vars[gene][var_id] + if var_type == "insertion": + if var_data == ins_seq: + # daehwan - for debugging purposes + if debug: + print cmp, var_id, 1, Links[var_id] + add_count(var_id, 1) + var_idx += 1 + + if cigar_match_len > 0: + cmp_cigar_str += ("%dM" % cigar_match_len) + cigar_match_len = 0 + read_pos += length + cmp_cigar_str += ("%dI" % length) + elif type == "deletion": + del_len = length + # Deletions can be shifted bidirectionally + temp_ref_pos = ref_pos + while temp_ref_pos > 0: + last_bp = ref_seq[temp_ref_pos + del_len - 1 - base_locus] + prev_bp = ref_seq[temp_ref_pos - 1 - base_locus] + if last_bp != prev_bp: + break + temp_ref_pos -= 1 + var_idx = lower_bound(Var_list[gene], temp_ref_pos) + while var_idx < len(Var_list[gene]): + var_pos, var_id = Var_list[gene][var_idx] + if temp_ref_pos < var_pos: + first_bp = ref_seq[temp_ref_pos - base_locus] + next_bp = ref_seq[temp_ref_pos + del_len - base_locus] + if first_bp == next_bp: + temp_ref_pos += 1 + continue + else: + break + if temp_ref_pos == var_pos: + var_type, _, var_data = Vars[gene][var_id] + if var_type == "deletion": + var_len = int(var_data) + if var_len == length: + if debug: + print cmp, var_id, 1, Links[var_id] + print ref_seq[var_pos - 10-base_locus:var_pos-base_locus], ref_seq[var_pos-base_locus:var_pos+int(var_data)-base_locus], ref_seq[var_pos+int(var_data)-base_locus:var_pos+int(var_data)+10-base_locus] + add_count(var_id, 1) + var_idx += 1 + + if cigar_match_len > 0: + cmp_cigar_str += ("%dM" % cigar_match_len) + cigar_match_len = 0 + cmp_MD += ("%d" % MD_match_len) + MD_match_len = 0 + cmp_cigar_str += ("%dD" % length) + cmp_MD += ("^%s" % ref_seq[ref_pos-base_locus:ref_pos+length-base_locus]) + ref_pos += length + elif type == "soft": + if cigar_match_len > 0: + cmp_cigar_str += ("%dM" % cigar_match_len) + cigar_match_len = 0 + read_pos += length + cmp_cigar_str += ("%dS" % length) + else: + assert type == "intron" + if cigar_match_len > 0: + cmp_cigar_str += ("%dM" % cigar_match_len) + cigar_match_len = 0 + cmp_cigar_str += ("%dN" % length) + ref_pos += length + if cigar_match_len > 0: + cmp_cigar_str += ("%dM" % cigar_match_len) + cmp_MD += ("%d" % MD_match_len) + if read_pos != len(read_seq) or \ + cmp_cigar_str != cigar_str or \ + cmp_MD != MD: + print >> sys.stderr, "Error:", cigar_str, MD + print >> sys.stderr, "\tcomputed:", cmp_cigar_str, cmp_MD + print >> sys.stderr, "\tcmp list:", cmp_list + assert False + + prev_read_id = read_id + # prev_exon = exon + + if num_reads <= 0: + continue + + if prev_read_id != None: + add_stat(HLA_cmpt, HLA_counts, HLA_count_per_read) + + HLA_counts = [[allele, count] for allele, count in HLA_counts.items()] + def HLA_count_cmp(a, b): + if a[1] != b[1]: + return b[1] - a[1] + assert a[0] != b[0] + if a[0] < b[0]: + return -1 + else: + return 1 + HLA_counts = sorted(HLA_counts, cmp=HLA_count_cmp) + for count_i in range(len(HLA_counts)): + count = HLA_counts[count_i] + print >> sys.stderr, "\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1]) + if count_i >= 9: + break + print >> sys.stderr + + def normalize(prob): + total = sum(prob.values()) + for allele, mass in prob.items(): + prob[allele] = mass / total + + def normalize2(prob, length): + total = 0 + for allele, mass in prob.items(): + assert allele in length + total += (mass / length[allele]) + for allele, mass in prob.items(): + assert allele in length + prob[allele] = mass / length[allele] / total + + def prob_diff(prob1, prob2): + diff = 0.0 + for allele in prob1.keys(): + if allele in prob2: + diff += abs(prob1[allele] - prob2[allele]) + else: + diff += prob1[allele] + return diff + + def HLA_prob_cmp(a, b): + if a[1] != b[1]: + if a[1] < b[1]: + return 1 + else: + return -1 + assert a[0] != b[0] + if a[0] < b[0]: + return -1 + else: + return 1 + + HLA_prob, HLA_prob_next = {}, {} + for cmpt, count in HLA_cmpt.items(): + alleles = cmpt.split('-') + for allele in alleles: + if allele not in HLA_prob: + HLA_prob[allele] = 0.0 + HLA_prob[allele] += (float(count) / len(alleles)) + + """ + assert gene in HLA_lengths + HLA_length = HLA_lengths[gene] + """ + HLA_length = {} + + # normalize2(HLA_prob, HLA_length) + normalize(HLA_prob) + def next_prob(HLA_cmpt, HLA_prob, HLA_length): + HLA_prob_next = {} + for cmpt, count in HLA_cmpt.items(): + alleles = cmpt.split('-') + alleles_prob = 0.0 + for allele in alleles: + assert allele in HLA_prob + alleles_prob += HLA_prob[allele] + for allele in alleles: + if allele not in HLA_prob_next: + HLA_prob_next[allele] = 0.0 + HLA_prob_next[allele] += (float(count) * HLA_prob[allele] / alleles_prob) + # normalize2(HLA_prob_next, HLA_length) + normalize(HLA_prob_next) + return HLA_prob_next + + diff, iter = 1.0, 0 + while diff > 0.0001 and iter < 1000: + HLA_prob_next = next_prob(HLA_cmpt, HLA_prob, HLA_length) + diff = prob_diff(HLA_prob, HLA_prob_next) + HLA_prob = HLA_prob_next + iter += 1 + + """ + for allele, prob in HLA_prob.items(): + allele_len = len(HLAs[gene][allele]) + HLA_prob[allele] /= float(allele_len) + normalize(HLA_prob) + """ + HLA_prob = [[allele, prob] for allele, prob in HLA_prob.items()] + + HLA_prob = sorted(HLA_prob, cmp=HLA_prob_cmp) + success = [False for i in range(len(test_HLA_names))] + found_list = [False for i in range(len(test_HLA_names))] + for prob_i in range(len(HLA_prob)): + prob = HLA_prob[prob_i] + print >> sys.stderr, "\t\t\t\t%d ranked %s (abundance: %.2f%%)" % (prob_i + 1, prob[0], prob[1] * 100.0) + if prob_i >= 9: + break + print >> sys.stderr + + """ + if len(test_HLA_names) == 2: + HLA_prob, HLA_prob_next = {}, {} + for cmpt, count in HLA_cmpt.items(): + alleles = cmpt.split('-') + for allele1 in alleles: + for allele2 in HLA_names[gene]: + if allele1 < allele2: + allele_pair = "%s-%s" % (allele1, allele2) + else: + allele_pair = "%s-%s" % (allele2, allele1) + if not allele_pair in HLA_prob: + HLA_prob[allele_pair] = 0.0 + HLA_prob[allele_pair] += (float(count) / len(alleles)) + + if len(HLA_prob) <= 0: + continue + + # Choose top allele pairs + def choose_top_alleles(HLA_prob): + HLA_prob_list = [[allele_pair, prob] for allele_pair, prob in HLA_prob.items()] + HLA_prob_list = sorted(HLA_prob_list, cmp=HLA_prob_cmp) + HLA_prob = {} + best_prob = HLA_prob_list[0][1] + for i in range(len(HLA_prob_list)): + allele_pair, prob = HLA_prob_list[i] + if prob * 2 <= best_prob: + break + HLA_prob[allele_pair] = prob + normalize(HLA_prob) + return HLA_prob + HLA_prob = choose_top_alleles(HLA_prob) + + def next_prob(HLA_cmpt, HLA_prob): + HLA_prob_next = {} + for cmpt, count in HLA_cmpt.items(): + alleles = cmpt.split('-') + prob = 0.0 + for allele in alleles: + for allele_pair in HLA_prob.keys(): + if allele in allele_pair: + prob += HLA_prob[allele_pair] + for allele in alleles: + for allele_pair in HLA_prob.keys(): + if not allele in allele_pair: + continue + if allele_pair not in HLA_prob_next: + HLA_prob_next[allele_pair] = 0.0 + HLA_prob_next[allele_pair] += (float(count) * HLA_prob[allele_pair] / prob) + normalize(HLA_prob_next) + return HLA_prob_next + + diff, iter = 1.0, 0 + while diff > 0.0001 and iter < 1000: + HLA_prob_next = next_prob(HLA_cmpt, HLA_prob) + diff = prob_diff(HLA_prob, HLA_prob_next) + HLA_prob = HLA_prob_next + HLA_prob = choose_top_alleles(HLA_prob) + iter += 1 + + HLA_prob = [[allele_pair, prob] for allele_pair, prob in HLA_prob.items()] + HLA_prob = sorted(HLA_prob, cmp=HLA_prob_cmp) + + success = [False] + for prob_i in range(len(HLA_prob)): + allele_pair, prob = HLA_prob[prob_i] + allele1, allele2 = allele_pair.split('-') + if best_alleles and prob_i < 1: + print >> sys.stdout, "PairModel %s (abundance: %.2f%%)" % (allele_pair, prob * 100.0) + if simulation: + if allele1 in test_HLA_names and allele2 in test_HLA_names: + rank_i = prob_i + while rank_i > 0: + if HLA_prob[rank_i-1][1] == prob: + rank_i -= 1 + else: + break + print >> sys.stderr, "\t\t\t*** %d ranked %s (abundance: %.2f%%)" % (rank_i + 1, allele_pair, prob * 100.0) + if rank_i == 0: + success[0] = True + break + print >> sys.stderr, "\t\t\t\t%d ranked %s (abundance: %.2f%%)" % (prob_i + 1, allele_pair, prob * 100.0) + if not simulation and prob_i >= 9: + break + print >> sys.stderr + """ + + # Read variants with clinical significance + clnsigs = {} + for line in open("%s.clnsig" % base_fname): + var_id, var_gene, var_clnsig = line.strip().split('\t') + clnsigs[var_id] = [var_gene, var_clnsig] + + vars, Var_list = {}, {} + for line in open("%s.snp" % base_fname): + var_id, type, chr, left, data = line.strip().split() + if var_id not in clnsigs: + continue + left = int(left) + if type == "deletion": + data = int(data) + vars[var_id] = [chr, left, type, data] + if chr not in Var_list: + Var_list[chr] = [] + Var_list[chr].append([left, var_id]) + + var_counts = {} + + # Read alignments + alignview_cmd = ["samtools", + "view", + "hla_output.bam"] + bamview_proc = subprocess.Popen(alignview_cmd, + stdout=subprocess.PIPE, + stderr=open("/dev/null", 'w')) + + for line in bamview_proc.stdout: + cols = line.strip().split() + read_id, flag, chr, pos, mapQ, cigar_str = cols[:6] + read_seq, qual = cols[9], cols[10] + flag, pos = int(flag), int(pos) + pos -= 1 + if pos < 0: + continue + + if flag & 0x4 != 0: + continue + + if chr not in Var_list: + continue + + assert chr in chr_dic + chr_seq = chr_dic[chr] + + NM, Zs, MD, NH = "", "", "", "" + for i in range(11, len(cols)): + col = cols[i] + if col.startswith("Zs"): + Zs = col[5:] + elif col.startswith("MD"): + MD = col[5:] + elif col.startswith("NM"): + NM = int(col[5:]) + elif col.startswith("NH"): + NH = int(col[5:]) + + assert NH != "" + NH = int(NH) + if NH > 1: + continue + + if NM > num_mismatch: + continue + + read_vars = [] + if Zs: + read_vars = Zs.split(',') + for read_var in read_vars: + _, _, var_id = read_var.split('|') + if var_id not in clnsigs: + continue + if var_id not in var_counts: + var_counts[var_id] = [1, 0] + else: + var_counts[var_id][0] += 1 + + assert MD != "" + MD_str_pos, MD_len = 0, 0 + read_pos, left_pos = 0, pos + right_pos = left_pos + cigars = cigar_re.findall(cigar_str) + cigars = [[cigar[-1], int(cigar[:-1])] for cigar in cigars] + cmp_list = [] + for i in range(len(cigars)): + cigar_op, length = cigars[i] + if cigar_op == 'M': + chr_var_list = Var_list[chr] + var_idx = lower_bound(chr_var_list, right_pos) + while var_idx < len(chr_var_list): + var_pos, var_id = chr_var_list[var_idx] + if var_pos >= right_pos + length: + break + if var_pos >= right_pos: + assert var_id in vars + _, _, var_type, var_data = vars[var_id] + contradict = False + if var_type == "single": + contradict = (read_seq[read_pos + var_pos - right_pos] == chr_seq[var_pos]) + elif var_type == "insertion": + contradict = (right_pos < var_pos) + else: + contradict = True + if contradict: + if var_id not in var_counts: + var_counts[var_id] = [0, 1] + else: + var_counts[var_id][1] += 1 + + var_idx += 1 + + if cigar_op in "MND": + right_pos += length + + if cigar_op in "MIS": + read_pos += length + + for var_id, counts in var_counts.items(): + if counts[0] < 2: # or counts[0] * 3 < counts[1]: + continue + assert var_id in vars + var_chr, var_left, var_type, var_data = vars[var_id] + assert var_id in clnsigs + var_gene, var_clnsig = clnsigs[var_id] + print >> sys.stderr, "\t\t\t%s %s: %s:%d %s %s (%s): %d-%d" % \ + (var_gene, var_id, var_chr, var_left, var_type, var_data, var_clnsig, counts[0], counts[1]) + + + +""" +""" +if __name__ == '__main__': + parser = ArgumentParser( + description='HISAT2 genotyping') + parser.add_argument("--base", "--base-name", + dest="base_fname", + type=str, + default="genotype_genome", + help="base filename for genotype genome") + parser.add_argument('-f', + dest='fastq', + action='store_false', + help='FASTA file') + parser.add_argument("-U", + dest="read_fname_U", + type=str, + default="", + help="filename for single-end reads") + parser.add_argument("-1", + dest="read_fname_1", + type=str, + default="", + help="filename for paired-end reads") + parser.add_argument("-2", + dest="read_fname_2", + type=str, + default="", + help="filename for paired-end reads") + parser.add_argument("-p", "--threads", + dest="threads", + type=int, + default=1, + help="Number of threads") + parser.add_argument("--num-editdist", + dest="num_editdist", + type=int, + default=2, + help="Maximum number of mismatches per read alignment to be considered (default: 2)") + parser.add_argument('-v', '--verbose', + dest='verbose', + action='store_true', + help='also print some statistics to stderr') + parser.add_argument("--daehwan-debug", + dest="daehwan_debug", + type=str, + default="", + help="e.g., test_id:10,read_id:10000,basic_test") + + args = parser.parse_args() + daehwan_debug = {} + if args.daehwan_debug != "": + for item in args.daehwan_debug.split(','): + if ':' in item: + key, value = item.split(':') + daehwan_debug[key] = value + else: + daehwan_debug[item] = 1 + + if args.read_fname_U != "": + read_fnames = [args.read_fname_U] + else: + if args.read_fname_1 == "" or args.read_fname_2 == "": + print >> sys.stderr, "Error: please specify read file names correctly: -U or -1 and -2" + sys.exit(1) + read_fnames = [args.read_fname_1, args.read_fname_2] + + random.seed(1) + genotype(args.base_fname, + args.fastq, + read_fnames, + args.threads, + args.num_editdist, + args.verbose, + daehwan_debug) diff --git a/evaluation/tests/genotype_genome/paper_sensitivity/sensitivity.py b/evaluation/tests/genotype_genome/paper_sensitivity/sensitivity.py new file mode 100644 index 0000000..527d5d7 --- /dev/null +++ b/evaluation/tests/genotype_genome/paper_sensitivity/sensitivity.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python +# +# Copyright 2017, Daehwan Kim +# +# This file is part of HISAT-genotype. +# +# HISAT-genotype is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# HISAT-genotype is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HISAT-genotype. If not, see . +# + + +import sys, os, subprocess, re, resource +import inspect +import random +import glob +from argparse import ArgumentParser, FileType + + +""" +""" +if __name__ == '__main__': + aligners = [["hisat2", "graph"], ["hisat2", "linear"], ["bowtie2", "linear"]] + genes = ["A", "B", "C", "DQA1", "DQB1", "DRB1"] + + samples = ["NA12878", "LP6005041-DNA_A01", "LP6005045-DNA_D07"] + for sample in samples: + for aligner, type in aligners: + sample_dir = "%s_%s" % (sample, aligner) + if aligner == "hisat2": + sample_dir += (".%s" % type) + if not os.path.exists(sample_dir): + continue + + fq_fnames = glob.glob("%s/*.fq.gz" % sample_dir) + assert len(fq_fnames) == 2 + + regions, region_loci, region_count, region_read1_count, region_read2_count = {}, {}, {}, {}, {} + for line in open("%s.locus" % ("genotype_genome" if type == "graph" else "genotype_genome.linear")): + family, allele_name, chr, left, right = line.strip().split()[:5] + + # DK - debugging purposes + if family != "HLA": + continue + + region_name = "%s-%s" % (family, allele_name.split('*')[0]) + assert region_name not in regions + regions[region_name] = allele_name + left, right = int(left), int(right) + if chr not in region_loci: + region_loci[chr] = {} + region_loci[chr][region_name] = [allele_name, chr, left, right] + + + aligner_cmd = [aligner] + aligner_cmd += ["-x", "genotype_genome" if type == "graph" else "genotype_genome.linear"] + if aligner == "hisat2": + aligner_cmd += ["--no-spliced-alignment"] + aligner_cmd += ["-X", "1000"] + aligner_cmd += ["-1", fq_fnames[0], + "-2", fq_fnames[1]] + # print >> sys.stderr, "Running:", ' '.join(aligner_cmd) + print sample, aligner, type + align_proc = subprocess.Popen(aligner_cmd, + stdout=subprocess.PIPE, + stderr=open("/dev/null", 'w')) + + prev_read_name, extract, read1_extract, read2_extract, read1_first, read2_first = "", set(), set(), set(), True, True + for line in align_proc.stdout: + if line.startswith('@'): + continue + line = line.strip() + cols = line.split() + read_name, flag, chr, pos, mapQ, cigar, _, _, _, read, qual = cols[:11] + flag, pos = int(flag), int(pos) - 1 + strand = '-' if flag & 0x10 else '+' + AS, XS, NH = "", "", "" + for i in range(11, len(cols)): + col = cols[i] + if col.startswith("AS"): + AS = int(col[5:]) + elif col.startswith("XS"): + XS = int(col[5:]) + elif col.startswith("NH"): + NH = int(col[5:]) + + if read_name != prev_read_name: + for region in extract: + if region not in region_count: + region_count[region] = 0 + region_count[region] += 1 + + for region in read1_extract: + if region not in region_read1_count: + region_read1_count[region] = 0 + region_read1_count[region] += 1 + + for region in read2_extract: + if region not in region_read2_count: + region_read2_count[region] = 0 + region_read2_count[region] += 1 + + prev_read_name, extract, read1_extract, read2_extract, read1_first, read2_first = "", set(), set(), set(), True, True + + if ((aligner == "hisat2" and NH == 1) or (aligner == "bowtie2" and AS > XS and read1_first if flag & 0x40 else read2_first)): + if chr in region_loci: + for region, loci in region_loci[chr].items(): + _, _, loci_left, loci_right = loci + # there might be a different candidate region for each of left and right reads + if pos >= loci_left and pos < loci_right: + extract.add(region) + if flag & 0x40: + read1_extract.add(region) + else: + read2_extract.add(region) + break + + if flag & 0x40: # left read + read1_first = False + else: + assert flag & 0x80 # right read + read2_first = False + + prev_read_name = read_name + + for gene in genes: + gene = "HLA-" + gene + if gene not in region_count: + continue + print "\t%s pair: %d, left+right: %d" % (gene, region_count[gene], region_read1_count[gene] + region_read2_count[gene]) + diff --git a/evaluation/tests/one_snp_test/evaluate_one_snp_reads.py b/evaluation/tests/one_snp_test/evaluate_one_snp_reads.py new file mode 100644 index 0000000..3809a68 --- /dev/null +++ b/evaluation/tests/one_snp_test/evaluate_one_snp_reads.py @@ -0,0 +1,232 @@ +#!/usr/bin/env python + +import sys, os, subprocess +import multiprocessing +import string, re +import platform +from datetime import datetime, date, time +import copy +from argparse import ArgumentParser, FileType + + +""" +""" +def evaluate(read_fname, + verbose): + aligners = [ + ["hisat2", "", "", ""], + ["hisat2", "", "snp", ""], + ["bowtie2", "", "", ""], + ] + num_threads = 3 + + cwd = os.getcwd() + genome = "genome" + align_stat = [] + # for paired in [False, True]: + for paired in [False]: + base_fname = "common_snp_reads" + type_sam_fname = base_fname + ".sam" + type_read1_fname = base_fname + "_1.fa" + type_read2_fname = base_fname + "_2.fa" + + type_read1_fname = read_fname + + def get_aligner_version(aligner, version): + version = "" + if aligner == "hisat2" or \ + aligner == "bowtie2": + if version: + cmd = ["%s_%s/%s" % (aligner, version, aligner)] + else: + cmd = ["%s/%s" % (aligner_bin_base, aligner)] + cmd += ["--version"] + cmd_process = subprocess.Popen(cmd, stdout=subprocess.PIPE) + version = cmd_process.communicate()[0][:-1].split("\n")[0] + version = version.split()[-1] + elif aligner == "star": + version = "2.4.2a" + elif aligner == "gsnap": + cmd = ["%s/gsnap" % (aligner_bin_base)] + cmd_process = subprocess.Popen(cmd, stderr=subprocess.PIPE) + version = cmd_process.communicate()[1][:-1].split("\n")[0] + version = version.split()[2] + elif aligner == "bwa": + cmd = ["%s/bwa" % (aligner_bin_base)] + cmd_process = subprocess.Popen(cmd, stderr=subprocess.PIPE) + version = cmd_process.communicate()[1][:-1].split("\n")[2] + version = version.split()[1] + + return version + + def get_aligner_cmd(aligner, type, index_type, version, read1_fname, read2_fname, out_fname, cmd_idx = 0): + cmd = [] + if aligner == "hisat2": + cmd = ["hisat2"] + if num_threads > 1: + cmd += ["-p", str(num_threads)] + cmd += ["-f"] + cmd += ["--no-spliced-alignment"] + if index_type: + index_cmd = "../grch38_snp_hisat2/genome_snp" + else: + index_cmd = "../grch38_hisat2/genome" + cmd += [index_cmd] + if paired: + cmd += ["-1", read1_fname, + "-2", read2_fname] + else: + cmd += [read1_fname] + elif aligner == "star": + cmd = ["%s/STAR" % (aligner_bin_base)] + if num_threads > 1: + cmd += ["--runThreadN", str(num_threads)] + cmd += ["--genomeDir"] + if cmd_idx == 0: + if type == "gtf": + cmd += ["%s/STAR%s/gtf" % (index_base, index_add)] + else: + cmd += ["%s/STAR%s" % (index_base, index_add)] + else: + assert cmd_idx == 1 + cmd += ["."] + + if desktop: + cmd += ["--genomeLoad", "NoSharedMemory"] + else: + cmd += ["--genomeLoad", "LoadAndKeep"] + if type == "x2": + if cmd_idx == 1: + cmd += ["--alignSJDBoverhangMin", "1"] + cmd += ["--readFilesIn", + read1_fname] + if paired: + cmd += [read2_fname] + if paired: + cmd += ["--outFilterMismatchNmax", "6"] + else: + cmd += ["--outFilterMismatchNmax", "3"] + elif aligner == "bowtie2": + cmd = ["bowtie2"] + if num_threads > 1: + cmd += ["-p", str(num_threads)] + cmd += ["-f"] + cmd += ["-x ../grch38_bowtie2/genome"] + if paired: + cmd += ["-1", read1_fname, + "-2", read2_fname] + else: + cmd += [read1_fname] + elif aligner == "gsnap": + cmd = ["%s/gsnap" % (aligner_bin_base), + "-A", + "sam"] + if num_threads > 1: + cmd += ["-t", str(num_threads)] + cmd += ["--max-mismatches=3", + "-D", "%s/GSNAP%s" % (index_base, index_add), + "-N", "1", + "-d", genome, + read1_fname] + if paired: + cmd += [read2_fname] + elif aligner == "bwa": + cmd = ["%s/bwa" % (aligner_bin_base)] + if type in ["mem", "aln"]: + cmd += [type] + elif type == "sw": + cmd += ["bwa" + type] + if num_threads > 1: + cmd += ["-t", str(num_threads)] + cmd += ["%s/BWA%s/%s.fa" % (index_base, index_add, genome)] + cmd += [read1_fname] + if paired: + cmd += [read2_fname] + else: + assert False + + return cmd + + for aligner, type, index_type, version in aligners: + aligner_name = aligner + type + if version != "": + aligner_name += ("_%s" % version) + if aligner == "hisat2" and index_type != "": + aligner_name += ("_" + index_type) + two_step = (aligner == "tophat2" or type == "x2" or (aligner in ["hisat2", "hisat"] and type == "")) + print >> sys.stderr, "\t%s\t%s" % (aligner_name, str(datetime.now())) + if paired: + aligner_dir = aligner_name + "_paired" + else: + aligner_dir = aligner_name + "_single" + if not os.path.exists(aligner_dir): + os.mkdir(aligner_dir) + os.chdir(aligner_dir) + + out_fname = base_fname + ".sam" + duration = -1.0 + + # Align all reads + aligner_cmd = get_aligner_cmd(aligner, type, index_type, version, "../" + type_read1_fname, "../" + type_read2_fname, out_fname) + start_time = datetime.now() + if verbose: + print >> sys.stderr, "\t", start_time, " ".join(aligner_cmd) + if aligner in ["hisat2", "hisat", "bowtie", "bowtie2", "gsnap", "bwa"]: + proc = subprocess.Popen(aligner_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE) + else: + proc = subprocess.Popen(aligner_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + proc.communicate() + finish_time = datetime.now() + duration = finish_time - start_time + duration = duration.total_seconds() + if verbose: + print >> sys.stderr, "\t", finish_time, "finished:", duration + + assert os.path.exists(out_fname) + correct_reads, correct_multi_reads, num_reads = 0, 0, 0 + prev_read_id = None + for line in open(out_fname): + if line.startswith('@'): + continue + read_id, flag, chr, pos, mapQ, cigar = line.split()[:6] + if chr.startswith("chr"): + chr = chr[3:] + pos = int(pos) - 1 + true_chr, true_pos, true_cigar = read_id.split('_')[1:4] + true_pos = int(true_pos) + + if read_id != prev_read_id: + num_reads += 1 + + if true_chr == chr and pos == true_pos and cigar == true_cigar: + correct_multi_reads += 1 + if prev_read_id != read_id: + correct_reads += 1 + + prev_read_id = read_id + + print >> sys.stderr, "\tfirst: %d / %d (%.2f%%)" % (correct_reads, num_reads, float(correct_reads)/num_reads*100) + print >> sys.stderr, "\tall: %d / %d (%.2f%%)" % (correct_multi_reads, num_reads, float(correct_multi_reads)/num_reads*100) + + os.chdir("..") + + +""" +""" +if __name__ == "__main__": + parser = ArgumentParser( + description='test HISAT2, and compare HISAT2 with other popular aligners such as TopHat2, STAR, Bowtie1/2, GSNAP, BWA-mem, etc.') + parser.add_argument('read_fname', + nargs='?', + type=str, + help='input read file') + parser.add_argument('-v', '--verbose', + dest='verbose', + action='store_true', + help='also print some statistics to stderr') + + args = parser.parse_args() + evaluate(args.read_fname, + args.verbose) + + diff --git a/evaluation/tests/one_snp_test/simulate_one_snp_reads.py b/evaluation/tests/one_snp_test/simulate_one_snp_reads.py new file mode 100644 index 0000000..178cb06 --- /dev/null +++ b/evaluation/tests/one_snp_test/simulate_one_snp_reads.py @@ -0,0 +1,392 @@ +#!/usr/bin/env python +# +# Copyright 2015, Daehwan Kim +# +# This file is part of HISAT 2. +# +# HISAT 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# HISAT 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HISAT 2. If not, see . +# + +import sys, math, random, re +from collections import defaultdict, Counter +from argparse import ArgumentParser, FileType + + +""" +""" +def reverse_complement(seq): + result = "" + for nt in seq: + base = nt + if nt == 'A': + base = 'T' + elif nt == 'a': + base = 't' + elif nt == 'C': + base = 'G' + elif nt == 'c': + base = 'g' + elif nt == 'G': + base = 'C' + elif nt == 'g': + base = 'c' + elif nt == 'T': + base = 'A' + elif nt == 't': + base = 'a' + + result = base + result + + return result + + +""" +""" +def read_genome(genome_file): + chr_dic = {} + + chr_name, sequence = "", "" + for line in genome_file: + if line[0] == ">": + if chr_name and sequence: + chr_dic[chr_name] = sequence + chr_name = line.strip().split()[0][1:] + sequence = "" + else: + sequence += line[:-1] + + if chr_name and sequence: + chr_dic[chr_name] = sequence + + return chr_dic + + +""" +""" +def read_snp(snp_file): + snps = defaultdict(list) + for line in snp_file: + line = line.strip() + if not line or line.startswith('#'): + continue + try: + snpID, type, chr, pos, data = line.split('\t') + chr = chr.split()[0] + except ValueError: + continue + + assert type in ["single", "deletion", "insertion"] + if type == "deletion": + data = int(data) + snps[chr].append([snpID, type, int(pos), data]) + + return snps + + +""" +""" +def getSamAlignment(chr_seq, read_len, snp): + snp_id, snp_type, snp_pos, snp_data = snp + + # Define MD, XM, NM, Zs, read_seq + read_seq, MD, Zs = "", "", "" + left_read_len = read_len / 2 + pos = snp_pos - left_read_len + assert pos >= 0 + if snp_type == "single": + cigar_str = "100M" + read_seq = chr_seq[pos:pos+left_read_len] + snp_data + chr_seq[pos+left_read_len+1:pos+read_len] + MD = "%d%s%d" % (left_read_len, chr_seq[pos+left_read_len], read_len - left_read_len - 1) + Zs = "%d|S|%s" % (left_read_len, snp_id) + elif snp_type == "deletion": + del_len = int(snp_data) + cigar_str = "%dM%dD%dM" % (left_read_len, del_len, read_len - left_read_len) + read_seq = chr_seq[pos:pos+left_read_len] + chr_seq[pos+left_read_len+del_len:pos+read_len+del_len] + MD = "%d^%s%d" % (left_read_len, chr_seq[pos+left_read_len:pos+left_read_len+del_len], read_len - left_read_len) + Zs = "%d|D|%s" % (left_read_len, snp_id) + else: + assert snp_type == "insertion" + ins_len = len(snp_data) + assert ins_len < read_len + cigar_str = "%dM%dI%dM" % (left_read_len, ins_len, read_len - left_read_len - ins_len) + read_seq = chr_seq[pos:pos+left_read_len] + snp_data + chr_seq[pos+left_read_len:pos+read_len-ins_len] + MD = "%d" % (read_len - ins_len) + Zs = "%d|I|%s" % (left_read_len, snp_id) + + if len(read_seq) != read_len: + print >> sys.stderr, "read length differs:", len(read_seq), "vs.", read_len + print >> sys.stderr, pos, cigar_str, MD, Zs + assert False + + ref_read_seq = chr_seq[pos:pos+read_len] + return pos, cigar_str, MD, Zs, read_seq, ref_read_seq + + +""" +""" +cigar_re = re.compile('\d+\w') +def samRepOk(genome_seq, read_seq, chr, pos, cigar, MD, Zs): + assert chr in genome_seq + chr_seq = genome_seq[chr] + assert pos < len(chr_seq) + + # Calculate XM and NM based on Cigar and Zs + cigars = cigar_re.findall(cigar) + cigars = [[int(cigars[i][:-1]), cigars[i][-1]] for i in range(len(cigars))] + ref_pos, read_pos = pos, 0 + ann_ref_seq, ann_ref_rel, ann_read_seq, ann_read_rel = [], [], [], [] + for i in range(len(cigars)): + cigar_len, cigar_op = cigars[i] + if cigar_op == "M": + partial_ref_seq = chr_seq[ref_pos:ref_pos+cigar_len] + partial_read_seq = read_seq[read_pos:read_pos+cigar_len] + assert len(partial_ref_seq) == len(partial_read_seq) + ann_ref_seq += list(partial_ref_seq) + ann_read_seq += list(partial_read_seq) + for j in range(len(partial_ref_seq)): + if partial_ref_seq[j] == partial_read_seq[j]: + ann_ref_rel.append("=") + ann_read_rel.append("=") + else: + ann_ref_rel.append("X") + ann_read_rel.append("X") + ref_pos += cigar_len + read_pos += cigar_len + elif cigar_op == "D": + partial_ref_seq = chr_seq[ref_pos:ref_pos+cigar_len] + ann_ref_rel += list(partial_ref_seq) + ann_ref_seq += list(partial_ref_seq) + ann_read_rel += (["-"] * cigar_len) + ann_read_seq += (["-"] * cigar_len) + ref_pos += cigar_len + elif cigar_op == "I": + partial_read_seq = read_seq[read_pos:read_pos+cigar_len] + ann_ref_rel += (["-"] * cigar_len) + ann_ref_seq += (["-"] * cigar_len) + ann_read_rel += list(partial_read_seq) + ann_read_seq += list(partial_read_seq) + read_pos += cigar_len + elif cigar_op == "N": + ref_pos += cigar_len + else: + assert False + + assert len(ann_ref_seq) == len(ann_read_seq) + assert len(ann_ref_seq) == len(ann_ref_rel) + assert len(ann_ref_seq) == len(ann_read_rel) + ann_Zs_seq = ["0" for i in range(len(ann_ref_seq))] + + Zss, Zs_i, snp_pos_add = [], 0, 0 + if Zs != "": + Zss = Zs.split(',') + Zss = [zs.split('|') for zs in Zss] + + ann_read_pos = 0 + for zs in Zss: + zs_pos, zs_type, zs_id = zs + zs_pos = int(zs_pos) + for i in range(zs_pos): + while ann_read_rel[ann_read_pos] == '-': + ann_read_pos += 1 + ann_read_pos += 1 + if zs_type == "S": + ann_Zs_seq[ann_read_pos] = "1" + ann_read_pos += 1 + elif zs_type == "D": + while ann_read_rel[ann_read_pos] == '-': + ann_Zs_seq[ann_read_pos] = "1" + ann_read_pos += 1 + elif zs_type == "I": + while ann_ref_rel[ann_read_pos] == '-': + ann_Zs_seq[ann_read_pos] = "1" + ann_read_pos += 1 + else: + assert False + + tMD, tXM, tNM = "", 0, 0 + match_len = 0 + i = 0 + while i < len(ann_ref_seq): + if ann_ref_rel[i] == "=": + assert ann_read_rel[i] == "=" + match_len += 1 + i += 1 + continue + assert ann_read_rel[i] != "=" + if ann_ref_rel[i] == "X" and ann_read_rel[i] == "X": + if match_len > 0: + tMD += ("{}".format(match_len)) + match_len = 0 + tMD += ann_ref_seq[i] + if ann_Zs_seq[i] == "0": + tXM += 1 + tNM += 1 + i += 1 + else: + assert ann_ref_rel[i] == "-" or ann_read_rel[i] == "-" + if ann_ref_rel[i] == '-': + while ann_ref_rel[i] == '-': + if ann_Zs_seq[i] == "0": + tNM += 1 + i += 1 + else: + assert ann_read_rel[i] == '-' + del_seq = "" + while ann_read_rel[i] == '-': + del_seq += ann_ref_seq[i] + if ann_Zs_seq[i] == "0": + tNM += 1 + i += 1 + if match_len > 0: + tMD += ("{}".format(match_len)) + match_len = 0 + tMD += ("^{}".format(del_seq)) + + if match_len > 0: + tMD += ("{}".format(match_len)) + + if tMD != MD: + print >> sys.stderr, chr, pos, cigar, MD, Zs + print >> sys.stderr, tMD + assert False + + +""" +""" +def simulate_reads(genome_file, snp_file, base_fname, \ + paired_end, read_len, frag_len, \ + num_frag, sanity_check, verbose): + if read_len > frag_len: + frag_len = read_len + + genome_seq = read_genome(genome_file) + snps = read_snp(snp_file) + chr_ids = genome_seq.keys() + + sam_file = open(base_fname + ".sam", "w") + + # Write SAM header + print >> sam_file, "@HD\tVN:1.0\tSO:unsorted" + for chr in genome_seq.keys(): + print >> sam_file, "@SQ\tSN:%s\tLN:%d" % (chr, len(genome_seq[chr])) + + read_file = open(base_fname + "_snp_1.fa", "w") + ref_read_file = open(base_fname + "_ref_1.fa", "w") + if paired_end: + read2_file = open(base_fname + "_snp_2.fa", "w") + ref_read2_file = open(base_fname + "_ref_2.fa", "w") + + cur_read_id = 1 + for chr in snps: + assert chr in genome_seq + chr_seq = genome_seq[chr] + chr_len = len(chr_seq) + if chr in snps: + chr_snps = snps[chr] + else: + chr_snps = [] + for snp in chr_snps: + # SAM specification (v1.4) + # http://samtools.sourceforge.net/ + flag, flag2 = 99, 163 # 83, 147 + pos, cigar_str, MD, Zs, read_seq, ref_read_seq = getSamAlignment(chr_seq, read_len, snp) + # pos2, cigar2_str, MD2, Zs2, read2_seq = getSamAlignment(chr_seq, frag_pos+frag_len-read_len, read_len, snp) + if sanity_check: + samRepOk(genome_seq, read_seq, chr, pos, cigar_str, MD, Zs) + # samRepOk(genome_seq, read2_seq, chr, pos2, cigar2_str, MD2, Zs2) + + if Zs != "": + Zs = ("\tZs:Z:{}".format(Zs)) + # if Zs2 != "": + # Zs2 = ("\tZs:Z:{}".format(Zs2)) + + read_id_str = "{}_{}_{}_{}".format(cur_read_id, chr, pos, cigar_str) + print >> read_file, ">{}".format(read_id_str) + print >> read_file, read_seq + print >> sam_file, "{}\t{}\t{}\t{}\t255\t{}\t{}\t{}\t0\t{}\t*\tXM:i:0\tNM:i:0\tMD:Z:{}{}".format(read_id_str, flag, chr, pos + 1, cigar_str, chr, pos + 1, read_seq, MD, Zs) + + print >> ref_read_file, ">{}_{}_{}_100M".format(cur_read_id, chr, pos) + print >> ref_read_file, ref_read_seq + """ + if paired_end: + print >> read2_file, ">{}".format(cur_read_id) + print >> read2_file, reverse_complement(read2_seq) + print >> sam_file, "{}\t{}\t{}\t{}\t255\t{}\t{}\t{}\t0\t{}\t*\tXM:i:0\tNM:i:0\tMD:Z:{}{}".format(cur_read_id, flag2, chr, pos2 + 1, cigar2_str, chr, pos + 1, read2_seq, MD2, Zs2) + """ + + cur_read_id += 1 + + sam_file.close() + read_file.close() + ref_read_file.close() + if paired_end: + read2_file.close() + ref_read2_file.close() + + +if __name__ == '__main__': + parser = ArgumentParser( + description='Simulate reads from GENOME (fasta) and GTF files') + parser.add_argument('genome_file', + nargs='?', + type=FileType('r'), + help='input GENOME file') + parser.add_argument('snp_file', + nargs='?', + type=FileType('r'), + help='input SNP file') + parser.add_argument('base_fname', + nargs='?', + type=str, + help='output base filename') + parser.add_argument('--paired-end', + dest='paired_end', + action='store_true', + help='single-end reads (default: paired-end reads)') + parser.add_argument('-r', '--read-length', + dest='read_len', + action='store', + type=int, + default=100, + help='read length (default: 100)') + parser.add_argument('-f', '--fragment-length', + dest='frag_len', + action='store', + type=int, + default=250, + help='fragment length (default: 250)') + parser.add_argument('-n', '--num-fragment', + dest='num_frag', + action='store', + type=int, + default=1000000, + help='number of fragments (default: 1000000)') + parser.add_argument('--sanity-check', + dest='sanity_check', + action='store_true', + help='sanity check') + parser.add_argument('-v', '--verbose', + dest='verbose', + action='store_true', + help='also print some statistics to stderr') + parser.add_argument('--version', + action='version', + version='%(prog)s 2.1.0') + args = parser.parse_args() + if not args.genome_file or not args.snp_file: + parser.print_help() + exit(1) + simulate_reads(args.genome_file, args.snp_file, args.base_fname, \ + args.paired_end, args.read_len, args.frag_len, \ + args.num_frag, args.sanity_check, args.verbose) diff --git a/evaluation/tests/repeat/commands b/evaluation/tests/repeat/commands new file mode 100644 index 0000000..db6a962 --- /dev/null +++ b/evaluation/tests/repeat/commands @@ -0,0 +1,64 @@ +date; time bowtie2 -X 800 --extends 1000 --score-min C,0 -p 3 -x 20 -f -1 1.fa -2 2.fa > bowtie2.sam + +date; time hisat2 --no-spliced-alignment --score-min C,0 -X 800 -p 3 -x 20 -f -1 1.fa -2 2.fa > hisat2.sam + + + +# Chromosome 22 + +/usr/bin/time -l hisat2-repeat -p 3 --save-sa --load-sa --repeat-length 51-300,76-300,100-300,101-300,151-300 --repeat-count 5 22.fa 22_rep + +/usr/bin/time -l hisat2-build -p 3 22.fa --repeat-ref 22_rep.rep.fa --repeat-info 22_rep.rep.info 22_rep + +hisat2_simulate_reads.py --dna --num-fragment 1000000 --repeat-info 22_rep.rep.info 22.fa /dev/null /dev/null sim + +hisat2_simulate_reads.py --dna --num-fragment 1000000 --error-rate 0.2 --repeat-info 22_rep.rep.info 22.fa /dev/null /dev/null sim + +cp 22_rep.rep.info ../../data/ +cp 22_rep*ht2 ../../indexes/HISAT2_22 + + + +# 3-base +/usr/bin/time -l hisat2-repeat -p 30 --save-sa --load-sa --repeat-length 26-100,51-100 --repeat-count 5 genome.fa genome_rep +/usr/bin/time -l hisat2-build -p 30 genome.fa --repeat-ref genome_rep-26-100.rep.fa --repeat-info genome_rep-26-100.rep.info genome_rep + + + + + +# Human genome + +/usr/bin/time -l hisat2-repeat -p 3 --save-sa --load-sa --repeat-length 100-300,101-300 --repeat-count 5 genome.fa genome_rep + +/usr/bin/time -l hisat2-build -p 3 genome.fa --repeat-ref genome_rep.rep.fa --repeat-info genome_rep.rep.info genome_rep + +/usr/bin/time -l hisat2-repeat -p 3 --save-sa --load-sa --repeat-edit 0 --repeat-length 101--300 --repeat-count 5 genome.fa genome_rep-101-300 + +/usr/bin/time -l hisat2-build -p 3 genome.fa --repeat-ref genome_rep-101-300.rep.fa --repeat-info genome_rep-101-300.rep.info genome_rep-101-300 + +hisat2_simulate_reads.py --dna --num-fragment 1000 --repeat-info genome_rep.rep.info genome.fa /dev/null /dev/null sim + +hisat2_simulate_reads.py --dna --num-fragment 1000 --error-rate 0.2 --repeat-info genome_rep.rep.info genome.fa /dev/null /dev/null sim + +hisat2_simulate_reads.py --dna --num-fragment 10000 --repeat-info genome_rep.rep.info genome.fa /dev/null /dev/null sim + +hisat2_simulate_reads.py --dna --num-fragment 10000 --error-rate 0.2 --repeat-info genome_rep.rep.info genome.fa /dev/null /dev/null sim + +hisat2_simulate_reads.py --dna --num-fragment 10000000 --repeat-info genome_rep.rep.info genome.fa /dev/null /dev/null sim + +hisat2_simulate_reads.py --dna --num-fragment 10000000 --error-rate 0.2 --repeat-info genome_rep.rep.info genome.fa /dev/null /dev/null sim + +cp genome_rep.rep.info ../../data/ +cp genome_rep.*ht2 ../../indexes/HISAT2 + + + + +# Bisulfite-treated sequencing + +/usr/bin/time -l hisat2-repeat -p 3 --repeat-length 100 --repeat-count 2 --repeat-edit 0 --forward-only --CGtoTG 22.fa 22_rep + +/usr/bin/time -l hisat2-repeat -p 3 --repeat-length 200 --repeat-count 2 --repeat-edit 0 --forward-only --CGtoTG genome.fa genome_rep + + diff --git a/evaluation/tests/repeat/generate_repeats.py b/evaluation/tests/repeat/generate_repeats.py new file mode 100644 index 0000000..2227245 --- /dev/null +++ b/evaluation/tests/repeat/generate_repeats.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python + +import sys +import struct + +chr_name = "20" + +chr_seq = "" +for line in open("%s.fa" % chr_name): + if line.startswith('>'): + continue + line = line.strip() + line = line.replace('N', '') + chr_seq += line +chr_seq += '$' + +chr_sa = [] +f = open("%s.sa" % chr_name, "rb") +while True: + fourbytes = f.read(4) + if fourbytes == "": + break + + num = struct.unpack('I', fourbytes)[0] + chr_sa.append(num) + + if len(chr_sa) % 5000000 == 0: + print len(chr_sa) +f.close() + +assert chr_sa[0] + 1 == len(chr_sa) +chr_sa = chr_sa[1:] +assert len(chr_seq) == len(chr_sa) + +# for i, num in enumerate(chr_sa): +# print "%10d\t%s\t%d" % (i, chr_seq[num:num+100], num) + +seq_len = 100 +i = 0 +repeats = [] +while i < len(chr_sa) - 1: + pos = chr_sa[i] + base_seq = chr_seq[pos:pos+seq_len] + for j in xrange(i+1, len(chr_sa)): + pos2 = chr_sa[j] + cmp_seq = chr_seq[pos2:pos2+seq_len] + if base_seq != cmp_seq: + break + + if j - i >= 200: + repeats.append([base_seq, sorted(chr_sa[i:j])]) + + i = j + + if i % 5000000 == 0: + print i + +found = False +print len(repeats), "repeats" +deleted = set() +for i in xrange(len(repeats) - 1): + for j in xrange(i + 1, len(repeats)): + if j in deleted: + continue + + num_close, num_close2 = 0, 0 + pos_seq, pos_set = repeats[i] + pos_seq2, pos_set2 = repeats[j] + + k1, k2 = 0, 0 + while k1 < len(pos_set) and k2 < len(pos_set2): + _pos, _pos2 = pos_set[k1], pos_set2[k2] + if abs(_pos - _pos2) < 300: + num_close += 1 + if abs(_pos - _pos2) < 1100: + num_close2 += 1 + if _pos <= _pos2: + k1 += 1 + else: + k2 += 1 + + if num_close > min(len(pos_set), len(pos_set2)) * 0.95: + deleted.add(j) + + if num_close == 1 and num_close2 < 5: + found = True + print pos_set + print pos_set2 + print pos_seq + print pos_seq2 + + file1 = open("1.fa", "w") + file2 = open("2.fa", "w") + + pos_seq2_rc = list(pos_seq2) + pos_seq2_rc = pos_seq2_rc[::-1] + for k in xrange(seq_len): + nt = pos_seq2_rc[k] + if nt == 'A': + nt = 'T' + elif nt == 'C': + nt = 'G' + elif nt == 'G': + nt = 'C' + else: + assert nt == 'T' + nt = 'A' + pos_seq2_rc[k] = nt + pos_seq2_rc = ''.join(pos_seq2_rc) + + for k in xrange(1000000): + print >> file1, ">%d" % k + print >> file2, ">%d" % k + print >> file1, pos_seq + print >> file2, pos_seq2_rc + + file1.close() + file2.close() + + break + if found: + break + + print i + +chr_seq = "" +for line in open("%s.fa" % chr_name): + if line.startswith('>'): + continue + line = line.strip() + chr_seq += line + +N_ranges = [] +prev_nt = None +for i in xrange(len(chr_seq)): + nt = chr_seq[i] + if nt == 'N': + if prev_nt != 'N': + N_ranges.append([i, i]) # inclusive + else: + assert len(N_ranges) > 0 + N_ranges[-1][1] = i + prev_nt = nt + +to_joined_list = [] +for N_start, N_end in N_ranges: + if len(to_joined_list) == 0: + if N_start > 0: + to_joined_list.append([0, 0]) + else: + to_joined_list.append([N_end + 1, 0]) + else: + N_size = N_end - N_start + 1 + to = N_end + 1 - to_joined_list[-1][0] + to_joined_list[-1][1] - N_size + assert to > to_joined_list[-1][1] + to_joined_list.append([N_end + 1, to]) + +to_genome_list = [[y, x] for x, y in to_joined_list] + +N_ranges_tmp = [] +for i in xrange(len(to_genome_list)): + to_genome = to_genome_list[i] + if i == 0: + if to_genome[1] > 0: + N_ranges_tmp.append([0, to_genome[1] - 1]) + else: + to_genome_before = to_genome_list[i-1] + N_ranges_tmp.append([to_genome_before[1] + to_genome[0] - to_genome_before[0], to_genome[1] - 1]) + +assert N_ranges == N_ranges_tmp + +file = open("%s_rep.info" % chr_name, "w") +def print_rep_info(rep_name, rep_pos, rep_len, pos_set, pos_seq): + print >> file, ">%s*0\trep\t%d\t%d\t%d\t0" % (rep_name, rep_pos, rep_len, len(pos_set)) + for i in xrange(0, len(pos_set), 10): + output = "" + for j in range(i, i + 10): + if j >= len(pos_set): + break + if j > i: + output += " " + + def convert(pos): + for i in xrange(len(to_genome_list)): + if i + 1 == len(to_genome_list) or (pos >= to_genome_list[i][0] and pos < to_genome_list[i+1][0]): + return pos - to_genome_list[i][0] + to_genome_list[i][1] + + assert False + + pos = convert(pos_set[j]) + assert chr_seq[pos:pos+seq_len] == pos_seq + output += ("%s:%d:+" % (chr_name, pos)) + print >> file, output +print_rep_info("rep1", 0, seq_len, pos_set, pos_seq) +print_rep_info("rep2", seq_len, seq_len, pos_set2, pos_seq2) +file.close() + +chr_seq = chr_seq.replace(pos_seq, 'N' * seq_len) +chr_seq = chr_seq.replace(pos_seq2, 'N' * seq_len) +file = open("%s_mask.fa" % chr_name, "w") +print >> file, ">%s_mask" % chr_name +for i in xrange(0, len(chr_seq), 60): + print >> file, chr_seq[i:i+60] +file.close() + +file = open("%s_rep.fa" % chr_name, "w") +rep_seq = pos_seq + pos_seq2 +print >> file, ">rep" +for i in xrange(0, len(rep_seq), 60): + print >> file, rep_seq[i:i+60] +file.close() + diff --git a/evaluation/tests/repeat/test_repeat.py b/evaluation/tests/repeat/test_repeat.py new file mode 100644 index 0000000..cd10009 --- /dev/null +++ b/evaluation/tests/repeat/test_repeat.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python +import sys, os, subprocess, random +from argparse import ArgumentParser, FileType + +""" +""" +def reverse_complement(seq): + result = "" + for nt in seq: + base = nt + if nt == 'A': + base = 'T' + elif nt == 'a': + base = 't' + elif nt == 'C': + base = 'G' + elif nt == 'c': + base = 'g' + elif nt == 'G': + base = 'C' + elif nt == 'g': + base = 'c' + elif nt == 'T': + base = 'A' + elif nt == 't': + base = 'a' + + result = base + result + + return result + + +""" +""" +def read_genome(genome_filename): + chr_dic = {} + genome_file = open(genome_filename, "r") + + chr_name, sequence = "", "" + for line in genome_file: + if line[0] == ">": + if chr_name and sequence: + chr_dic[chr_name] = sequence + + chr_name = line[1:-1].split()[0] + sequence = "" + else: + sequence += line[:-1] + + if chr_name and sequence: + chr_dic[chr_name] = sequence + + genome_file.close() + + print >> sys.stderr, "genome is loaded" + + return chr_dic + + +""" +""" +def generate_random_seq(seq_len): + assert seq_len > 0 + random_seq = "" + for i in xrange(seq_len): + random_seq += "ACGT"[random.randint(0, 3)] + return random_seq + + +""" +""" +def test_repeat(verbose): + random.seed(1) + + backbone_seq = generate_random_seq(500) + mm_seq = backbone_seq[:] + mm_seq = mm_seq[:50] + ("A" if mm_seq[50] != "A" else "C") + mm_seq[51:] + mm_seq2 = backbone_seq[:] + mm_seq2 = mm_seq2[:450] + ("A" if mm_seq2[450] != "A" else "C") + mm_seq2[451:] + del_seq = backbone_seq[:] + del_seq = del_seq[:50] + del_seq[52:150] + del_seq[152:] + del_seq2 = backbone_seq[:] + del_seq2 = del_seq2[:350] + del_seq2[352:450] + del_seq2[452:] + indel_seq = backbone_seq[:] + indel_seq = indel_seq[:30] + indel_seq[32:130] + "AAA" + indel_seq[130:] + indel_seq2 = backbone_seq[:] + indel_seq2 = indel_seq2[:30] + "AAA" + indel_seq2[30:130] + indel_seq2[132:] + + seqs = [ + # dummy_seq, + ["bb01", backbone_seq], + ["bb02", backbone_seq], + ["bb03", backbone_seq], + ["bb04", backbone_seq], + ["bb05", backbone_seq], + ["mm01", mm_seq], + ["mm02", mm_seq], + ["dd01", del_seq], + ["dd02", del_seq], + ["dd03", del_seq2], + ["dd04", del_seq2], + ["id01", indel_seq], + ["id02", indel_seq], + ["id03", indel_seq], + ["id04", indel_seq], + ["id05", indel_seq], + ["id06", indel_seq], + ["id07", indel_seq2], + ] + + for id, seq in seqs: + print ">%s" % id + print generate_random_seq(20) + print seq + print generate_random_seq(20) + + +""" +""" +if __name__ == "__main__": + parser = ArgumentParser( + description='') + parser.add_argument('-v', '--verbose', + dest='verbose', + action='store_true', + help='also print some statistics to stderr') + + args = parser.parse_args() + test_repeat(args.verbose) + + diff --git a/evaluation/tests/the_small_example/COMMAND b/evaluation/tests/the_small_example/COMMAND new file mode 100644 index 0000000..1723cfb --- /dev/null +++ b/evaluation/tests/the_small_example/COMMAND @@ -0,0 +1,2 @@ +../../../hisat2-build --offrate 0 --ftabchars 1 --snp small.snp small.fa small +../../../hisat2 -x small -c GCTAG diff --git a/evaluation/tests/the_small_example/small.fa b/evaluation/tests/the_small_example/small.fa new file mode 100644 index 0000000..e9219f3 --- /dev/null +++ b/evaluation/tests/the_small_example/small.fa @@ -0,0 +1,2 @@ +>small +GAGCTG diff --git a/evaluation/tests/the_small_example/small.snp b/evaluation/tests/the_small_example/small.snp new file mode 100644 index 0000000..aab0624 --- /dev/null +++ b/evaluation/tests/the_small_example/small.snp @@ -0,0 +1,3 @@ +snp1 single small 1 T +snp2 deletion small 4 1 +snp3 insertion small 5 A diff --git a/example/index/22_20-21M_snp.1.ht2 b/example/index/22_20-21M_snp.1.ht2 new file mode 100644 index 0000000..47f7462 Binary files /dev/null and b/example/index/22_20-21M_snp.1.ht2 differ diff --git a/example/index/22_20-21M_snp.2.ht2 b/example/index/22_20-21M_snp.2.ht2 new file mode 100644 index 0000000..009e07d Binary files /dev/null and b/example/index/22_20-21M_snp.2.ht2 differ diff --git a/example/index/22_20-21M_snp.3.ht2 b/example/index/22_20-21M_snp.3.ht2 new file mode 100644 index 0000000..4ccadf0 Binary files /dev/null and b/example/index/22_20-21M_snp.3.ht2 differ diff --git a/example/index/22_20-21M_snp.4.ht2 b/example/index/22_20-21M_snp.4.ht2 new file mode 100644 index 0000000..c699c82 Binary files /dev/null and b/example/index/22_20-21M_snp.4.ht2 differ diff --git a/example/index/22_20-21M_snp.5.ht2 b/example/index/22_20-21M_snp.5.ht2 new file mode 100644 index 0000000..b8a78cb Binary files /dev/null and b/example/index/22_20-21M_snp.5.ht2 differ diff --git a/example/index/22_20-21M_snp.6.ht2 b/example/index/22_20-21M_snp.6.ht2 new file mode 100644 index 0000000..ba901df Binary files /dev/null and b/example/index/22_20-21M_snp.6.ht2 differ diff --git a/example/index/22_20-21M_snp.7.ht2 b/example/index/22_20-21M_snp.7.ht2 new file mode 100644 index 0000000..ee180e2 Binary files /dev/null and b/example/index/22_20-21M_snp.7.ht2 differ diff --git a/example/index/22_20-21M_snp.8.ht2 b/example/index/22_20-21M_snp.8.ht2 new file mode 100644 index 0000000..32bfe42 Binary files /dev/null and b/example/index/22_20-21M_snp.8.ht2 differ diff --git a/example/reads/reads_1.fa b/example/reads/reads_1.fa new file mode 100644 index 0000000..6ef20e6 --- /dev/null +++ b/example/reads/reads_1.fa @@ -0,0 +1,2000 @@ +>1 +GCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACT +>2 +AGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCAT +>3 +TTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCA +>4 +CTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTG +>5 +ACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCC +>6 +CAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCA +>7 +GTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGA +>8 +AGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAG +>9 +AGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGT +>10 +GATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAG +>11 +CAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGG +>12 +TGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTT +>13 +CTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCT +>14 +CCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCT +>15 +ACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCA +>16 +TACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCC +>17 +CCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTC +>18 +AACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCAC +>19 +GCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACTTGGCCCCA +>20 +AGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAG +>21 +TACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGT +>22 +CCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCT +>23 +TCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCA +>24 +CGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACT +>25 +CAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATG +>26 +TGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGA +>27 +CAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAG +>28 +CAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCC +>29 +TCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTG +>30 +TGAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTG +>31 +TGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTT +>32 +CAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGA +>33 +CAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAG +>34 +CCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTG +>35 +AGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAG +>36 +CACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGT +>37 +GGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGG +>38 +CAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCA +>39 +CCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGC +>40 +GCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACA +>41 +CAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCT +>42 +AGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGG +>43 +TCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGG +>44 +AGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCT +>45 +CCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCC +>46 +GGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGG +>47 +ACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGAT +>48 +GGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGAC +>49 +AACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCAC +>50 +GCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGG +>51 +TTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCG +>52 +ATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGT +>53 +AGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGT +>54 +CCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCG +>55 +CCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACAT +>56 +GGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGG +>57 +CTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCA +>58 +TGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTG +>59 +AGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCT +>60 +CCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGG +>61 +CGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGA +>62 +CGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAG +>63 +CTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAAC +>64 +TCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCC +>65 +CGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGA +>66 +CATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTG +>67 +AGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGT +>68 +GTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTG +>69 +CCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCG +>70 +ACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTA +>71 +GCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGG +>72 +AGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGG +>73 +CCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCT +>74 +AGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTA +>75 +CCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCC +>76 +ATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCT +>77 +CATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACTTG +>78 +CCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTG +>79 +TCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTA +>80 +TGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAA +>81 +TGCTGAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTC +>82 +GAAGTAGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTC +>83 +CCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTG +>84 +GCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACT +>85 +AAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGT +>86 +CTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAA +>87 +CGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGA +>88 +GCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCG +>89 +TCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACC +>90 +CATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCC +>91 +GAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAG +>92 +TGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGA +>93 +ACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGAT +>94 +CCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGT +>95 +ATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGC +>96 +GCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAA +>97 +CTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGC +>98 +TAGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCA +>99 +TCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTG +>100 +GCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACT +>101 +GTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGG +>102 +CTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGG +>103 +TGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAG +>104 +CCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGG +>105 +CAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCA +>106 +CCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCAT +>107 +CAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCA +>108 +AGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGG +>109 +AACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCAC +>110 +TCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTC +>111 +AGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGA +>112 +TTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTA +>113 +GGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACA +>114 +AGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTC +>115 +GCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGA +>116 +TAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCG +>117 +TCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGG +>118 +TGAAGTAGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGT +>119 +GGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGG +>120 +GTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGC +>121 +ACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTG +>122 +TGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAA +>123 +GAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATG +>124 +CCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGT +>125 +CCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCC +>126 +CAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAG +>127 +CCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGG +>128 +TGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCTC +>129 +TGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTT +>130 +CACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGC +>131 +CCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCAC +>132 +ATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACTTGG +>133 +CACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGT +>134 +CAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCA +>135 +CTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGG +>136 +CCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTT +>137 +GGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTC +>138 +TGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGC +>139 +CCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCAT +>140 +TCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTA +>141 +AGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTG +>142 +ATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACA +>143 +CTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATC +>144 +AGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTC +>145 +TGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTG +>146 +AGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCG +>147 +GGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAG +>148 +CAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGC +>149 +ACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCA +>150 +TCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTG +>151 +GGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAG +>152 +AGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAG +>153 +ATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCT +>154 +AAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAG +>155 +CAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGT +>156 +TCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCAC +>157 +AAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACG +>158 +AAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTT +>159 +AAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGA +>160 +TCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGG +>161 +CAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGC +>162 +GAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCA +>163 +ACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCAC +>164 +ATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTT +>165 +AAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGAC +>166 +TGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAA +>167 +AAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGA +>168 +GGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGG +>169 +TGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAA +>170 +TCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCA +>171 +GCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGA +>172 +CAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAG +>173 +AAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTG +>174 +TGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGC +>175 +GCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATAC +>176 +TGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGG +>177 +GAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGC +>178 +GCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGG +>179 +CTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACG +>180 +GGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTC +>181 +GATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGA +>182 +TCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCAC +>183 +GGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGG +>184 +CACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGC +>185 +TCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCC +>186 +TTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCG +>187 +AGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTG +>188 +CCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCA +>189 +CCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTG +>190 +GTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCA +>191 +ATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCT +>192 +AGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGG +>193 +TGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAAC +>194 +ATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCT +>195 +TGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACA +>196 +AAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGC +>197 +AGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGA +>198 +ATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCC +>199 +AAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCC +>200 +CAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACA +>201 +CACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGT +>202 +TTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGG +>203 +CCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAG +>204 +GTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGC +>205 +TGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTC +>206 +CAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCC +>207 +AAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGC +>208 +GGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGG +>209 +CCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGA +>210 +GTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCA +>211 +CCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCT +>212 +GAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGA +>213 +AGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAG +>214 +GGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGC +>215 +CCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTG +>216 +CATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGT +>217 +GCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTC +>218 +ATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCT +>219 +CTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCA +>220 +GCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTC +>221 +AGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAG +>222 +AGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAA +>223 +TGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTT +>224 +CCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGC +>225 +GGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGG +>226 +CACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGT +>227 +TTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTA +>228 +GAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCA +>229 +CGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAG +>230 +TCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGG +>231 +TGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAG +>232 +GCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACG +>233 +GGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGG +>234 +CAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCA +>235 +CCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCC +>236 +ATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGG +>237 +CCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTC +>238 +CCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCC +>239 +CACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCC +>240 +GTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTG +>241 +GCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCG +>242 +GCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCG +>243 +GACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGA +>244 +CACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCC +>245 +GAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCTCC +>246 +TCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGG +>247 +GGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGA +>248 +CTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGC +>249 +CAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAG +>250 +GGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGT +>251 +TACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCC +>252 +GAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCC +>253 +CTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGG +>254 +AGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCC +>255 +TTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCT +>256 +CAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGA +>257 +TGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGC +>258 +CATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACTTG +>259 +CAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCC +>260 +ACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGA +>261 +ACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCC +>262 +CCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTC +>263 +GGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGC +>264 +ACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGAT +>265 +CCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCC +>266 +CCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCC +>267 +TGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGT +>268 +GAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGC +>269 +TTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTA +>270 +GCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAG +>271 +ACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCC +>272 +CCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAG +>273 +GCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCC +>274 +CCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCC +>275 +ATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTT +>276 +GAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCC +>277 +GTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGC +>278 +AAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGC +>279 +CTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCC +>280 +TTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTA +>281 +TCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAG +>282 +CAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACA +>283 +CAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCA +>284 +AGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAA +>285 +AGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCT +>286 +GTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGG +>287 +ATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGC +>288 +GTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTG +>289 +CAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTT +>290 +CACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCA +>291 +GGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGG +>292 +ACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGG +>293 +TCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCAC +>294 +TGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTG +>295 +CCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGA +>296 +AGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGC +>297 +CAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAG +>298 +GGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGG +>299 +GTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAA +>300 +ATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACA +>301 +ATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCC +>302 +GCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGA +>303 +GCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAG +>304 +GTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGC +>305 +AGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTG +>306 +GCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAG +>307 +GGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTT +>308 +GGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGT +>309 +TCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCC +>310 +TTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGC +>311 +CATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGT +>312 +GATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGA +>313 +GATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAG +>314 +CCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCT +>315 +TTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGC +>316 +CCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCC +>317 +TCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCA +>318 +CAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCC +>319 +GAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTG +>320 +GAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGG +>321 +CACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGT +>322 +CACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCA +>323 +CTGCTGAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTT +>324 +GAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGAT +>325 +GTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGA +>326 +CCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGC +>327 +AGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGAC +>328 +CCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCC +>329 +GCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGAC +>330 +GGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGC +>331 +TGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGG +>332 +CTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTT +>333 +GGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTC +>334 +TCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGC +>335 +TCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTA +>336 +CAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGA +>337 +CCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCT +>338 +GTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGG +>339 +AGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCAC +>340 +GCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAG +>341 +CCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAG +>342 +CAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTC +>343 +CCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAG +>344 +CTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCC +>345 +CCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGC +>346 +AGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGAC +>347 +GCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATC +>348 +GCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCC +>349 +GTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAA +>350 +GGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTG +>351 +GCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGC +>352 +ACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACT +>353 +CCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCA +>354 +TCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCC +>355 +CCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAG +>356 +CCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTC +>357 +CTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCG +>358 +AGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGG +>359 +CGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACT +>360 +CGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGT +>361 +CCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCT +>362 +TCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACAC +>363 +CATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTG +>364 +GAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCA +>365 +AGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAA +>366 +AGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACC +>367 +ACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACC +>368 +GGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAG +>369 +GTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATA +>370 +GGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGT +>371 +GTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGC +>372 +TGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCT +>373 +GCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGA +>374 +GAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCT +>375 +CGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAG +>376 +CTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCT +>377 +GTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTT +>378 +AACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGT +>379 +CTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGG +>380 +ACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGG +>381 +GAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCC +>382 +GGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGG +>383 +CTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGG +>384 +CACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGAT +>385 +CCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGG +>386 +TTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGT +>387 +TGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGA +>388 +CACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTT +>389 +CCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCC +>390 +ACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCA +>391 +ACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGC +>392 +GACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCG +>393 +AAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACG +>394 +ACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGAT +>395 +CAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCAC +>396 +CCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCG +>397 +AGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAG +>398 +CGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACG +>399 +CACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGC +>400 +TCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCC +>401 +CGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACG +>402 +CAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGG +>403 +GCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGAC +>404 +GCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCT +>405 +CATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTAT +>406 +GGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCC +>407 +TTTCTGCTGAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGA +>408 +AATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACT +>409 +ATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATG +>410 +TACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGT +>411 +CATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCC +>412 +GGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTG +>413 +AGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGT +>414 +GCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACT +>415 +GCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAA +>416 +TGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAA +>417 +ACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCA +>418 +CTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTA +>419 +AAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACG +>420 +TGAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTG +>421 +GGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGG +>422 +AGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCT +>423 +CTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTA +>424 +GCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTC +>425 +CACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGC +>426 +TTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGC +>427 +CATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTG +>428 +GGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTG +>429 +CTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTC +>430 +CACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCA +>431 +AAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGC +>432 +TCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGG +>433 +GCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGG +>434 +CGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGT +>435 +GGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAG +>436 +GTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTT +>437 +AGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGAC +>438 +GAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCC +>439 +GAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGC +>440 +GGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGAC +>441 +TCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTC +>442 +GCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGG +>443 +CCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCG +>444 +CAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGT +>445 +GTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGC +>446 +GACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCT +>447 +ACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCC +>448 +CAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTC +>449 +AACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGAT +>450 +TGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCA +>451 +CCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTC +>452 +GTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGG +>453 +CCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATC +>454 +ATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCT +>455 +ATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACTTGG +>456 +CTGCTGAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTT +>457 +GCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCT +>458 +CCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGG +>459 +GTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTG +>460 +GAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGG +>461 +ATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCT +>462 +GGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACTTGGCCCC +>463 +CGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACG +>464 +TTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCT +>465 +ACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTG +>466 +TCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGC +>467 +ATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACA +>468 +ACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGG +>469 +ATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTG +>470 +ACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATC +>471 +CCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGC +>472 +GGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCC +>473 +GCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTC +>474 +GCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTG +>475 +CACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGT +>476 +AAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCAC +>477 +CTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCA +>478 +CAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGA +>479 +AAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAG +>480 +GGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGT +>481 +AGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCT +>482 +CCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCC +>483 +TCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCG +>484 +CAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCC +>485 +CGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAG +>486 +GCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGG +>487 +GGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGT +>488 +CAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCC +>489 +TGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAA +>490 +AGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGC +>491 +CTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATC +>492 +TGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTAC +>493 +AGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACC +>494 +TGCTGAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTC +>495 +AGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGG +>496 +ACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTA +>497 +ATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGC +>498 +CTGAAGTAGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGG +>499 +GATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGC +>500 +GTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGT +>501 +ATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACA +>502 +CATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGT +>503 +CAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGG +>504 +CGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGG +>505 +CAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCC +>506 +GGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACA +>507 +GCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCG +>508 +AGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGC +>509 +CCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCT +>510 +GCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACT +>511 +GGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTAT +>512 +CTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGT +>513 +GGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGC +>514 +AGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAG +>515 +CCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCT +>516 +ATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGT +>517 +GATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAAC +>518 +TCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACA +>519 +AGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAG +>520 +TACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCG +>521 +TGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGC +>522 +TTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCC +>523 +AAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTT +>524 +ACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTT +>525 +GAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCA +>526 +CCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGG +>527 +CGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGA +>528 +TGAAGTAGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGT +>529 +GGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGC +>530 +CCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCT +>531 +GCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCC +>532 +AAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAG +>533 +CAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGG +>534 +GGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGC +>535 +ACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATG +>536 +GTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACA +>537 +TACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCG +>538 +AGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCT +>539 +GTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCC +>540 +CGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGC +>541 +ACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATC +>542 +CCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCA +>543 +TGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTT +>544 +ATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACA +>545 +TGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCT +>546 +CCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCA +>547 +GGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGC +>548 +TCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACTT +>549 +TCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTA +>550 +CAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTC +>551 +CCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGG +>552 +ATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCC +>553 +CATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGT +>554 +CCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAG +>555 +CCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATC +>556 +CCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTT +>557 +GGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGAC +>558 +ATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAA +>559 +AGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGC +>560 +AGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATA +>561 +TCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCG +>562 +TGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTT +>563 +GCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCC +>564 +CATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTG +>565 +CCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTC +>566 +TCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCC +>567 +GGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTAT +>568 +TGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGG +>569 +AAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGA +>570 +CGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAG +>571 +GGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGT +>572 +GCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACA +>573 +CCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACA +>574 +TGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTC +>575 +ACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATG +>576 +GGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGA +>577 +ATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGT +>578 +GAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCA +>579 +TGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGC +>580 +ACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTA +>581 +CTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCG +>582 +ACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGC +>583 +ATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGT +>584 +ATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACTTGG +>585 +ACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGAT +>586 +CCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTC +>587 +TTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCT +>588 +CCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCC +>589 +CTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGG +>590 +GGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTAC +>591 +CCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCC +>592 +TGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGC +>593 +TACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGT +>594 +CCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGG +>595 +CCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAG +>596 +AGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAG +>597 +TGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAA +>598 +CAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCA +>599 +GGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCC +>600 +TTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAG +>601 +GACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTG +>602 +CTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAA +>603 +AACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGT +>604 +GCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCG +>605 +TTCTGCTGAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGAC +>606 +CCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCA +>607 +TTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCT +>608 +CAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACA +>609 +GACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCG +>610 +CCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCT +>611 +AGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGC +>612 +AAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGC +>613 +CTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAAC +>614 +CCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACA +>615 +CATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCA +>616 +CAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAG +>617 +CCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGG +>618 +AGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGG +>619 +CAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCA +>620 +ACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTT +>621 +GACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTG +>622 +CAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAG +>623 +CAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGA +>624 +CCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACAT +>625 +TCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCC +>626 +TGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCC +>627 +GCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACG +>628 +ATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTG +>629 +CATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTAT +>630 +GACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCT +>631 +GGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTC +>632 +GGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGA +>633 +AGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCG +>634 +CTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTT +>635 +TGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTG +>636 +GGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGA +>637 +CAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGG +>638 +TGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAAC +>639 +CCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTAC +>640 +TCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGG +>641 +TGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGA +>642 +AGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTT +>643 +GACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTG +>644 +CCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGA +>645 +TTCTGCTGAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGAC +>646 +ATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCT +>647 +TCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGG +>648 +TCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACAC +>649 +GGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGA +>650 +CTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGG +>651 +TGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCT +>652 +GAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGA +>653 +CGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGC +>654 +AGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGG +>655 +TCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCA +>656 +GCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCG +>657 +GTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGT +>658 +CCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCC +>659 +GTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAA +>660 +TGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAAT +>661 +CCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCAT +>662 +CATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCC +>663 +TCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACAT +>664 +ATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGC +>665 +GGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTC +>666 +GAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCA +>667 +CAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCA +>668 +CACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCA +>669 +CTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCT +>670 +GGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGG +>671 +TGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTG +>672 +TTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACAC +>673 +AACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGT +>674 +CAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAG +>675 +CCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTC +>676 +TTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCA +>677 +CACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACAC +>678 +GTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACT +>679 +ACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTC +>680 +GATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGC +>681 +GGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGG +>682 +GGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTG +>683 +CACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCC +>684 +GAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCA +>685 +AACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCAC +>686 +GGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGA +>687 +TCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACA +>688 +GCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTG +>689 +AAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGAC +>690 +CCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCC +>691 +CGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGA +>692 +AGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATT +>693 +AGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGT +>694 +TGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCT +>695 +CCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCT +>696 +TGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCT +>697 +AGTAGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAG +>698 +AGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCT +>699 +AAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGC +>700 +AGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGG +>701 +CTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTG +>702 +TTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGC +>703 +TCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTC +>704 +AGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCT +>705 +CCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGC +>706 +CAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGG +>707 +TGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCT +>708 +CACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGG +>709 +CCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTC +>710 +TGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAA +>711 +GAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCT +>712 +GGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACTTGGCCCC +>713 +CTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGC +>714 +GAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTG +>715 +GGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGG +>716 +CAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGC +>717 +CCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCC +>718 +TTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCG +>719 +CACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGT +>720 +GATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTG +>721 +ACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTG +>722 +GGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCC +>723 +AGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAG +>724 +ACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTG +>725 +ACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGG +>726 +CTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGA +>727 +TGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTG +>728 +CCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCC +>729 +CCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCC +>730 +GAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCT +>731 +GAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCA +>732 +CCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTAC +>733 +TCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGG +>734 +CCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCC +>735 +CCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCT +>736 +ACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCC +>737 +CATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAG +>738 +GATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAG +>739 +GTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAG +>740 +TCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGC +>741 +GCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTC +>742 +CTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCT +>743 +TGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGC +>744 +TGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTG +>745 +CATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCC +>746 +TGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCAT +>747 +TCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGG +>748 +GGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTA +>749 +TTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCT +>750 +GACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGA +>751 +AAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTG +>752 +CTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGA +>753 +GACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGA +>754 +CCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCC +>755 +CATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCC +>756 +TTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCAT +>757 +GAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATG +>758 +TGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAAC +>759 +GTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCA +>760 +ACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCC +>761 +CGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAG +>762 +GTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAG +>763 +GCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGC +>764 +GCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACT +>765 +CACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCA +>766 +CCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCT +>767 +AGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCT +>768 +ACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATG +>769 +CCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTAC +>770 +ACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGG +>771 +GGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGC +>772 +TTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCAT +>773 +GTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGA +>774 +CCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGC +>775 +GCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAA +>776 +CTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCA +>777 +GGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGT +>778 +TTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAG +>779 +AGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCA +>780 +CTGAAGTAGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGG +>781 +GAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATG +>782 +CTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAAC +>783 +GATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAAC +>784 +TTTCTGCTGAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGA +>785 +TCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCC +>786 +CGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAG +>787 +GCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCC +>788 +GGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGG +>789 +CTGCTGAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTT +>790 +GCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAA +>791 +CGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAG +>792 +TCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACAC +>793 +ATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGT +>794 +CGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGA +>795 +GAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGC +>796 +GCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCC +>797 +CGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCA +>798 +CCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAG +>799 +CATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCC +>800 +GCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTG +>801 +TACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGT +>802 +GTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGC +>803 +GGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGG +>804 +CTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGC +>805 +TCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGC +>806 +AAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGC +>807 +CTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCT +>808 +GAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGG +>809 +GAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCT +>810 +GACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCT +>811 +ACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTC +>812 +AGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGC +>813 +GTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCC +>814 +GAAGTAGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTC +>815 +TGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGC +>816 +TGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTG +>817 +CCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCT +>818 +AGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGG +>819 +GCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAG +>820 +TCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTA +>821 +TTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCT +>822 +GGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAG +>823 +GTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGC +>824 +CCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCG +>825 +AGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAG +>826 +CAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAG +>827 +GTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCC +>828 +TGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAA +>829 +CTGCTGAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTT +>830 +CCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTT +>831 +TGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAA +>832 +CCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGA +>833 +ACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATC +>834 +AGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGG +>835 +TCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGG +>836 +GGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGG +>837 +ATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTT +>838 +GTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGC +>839 +GGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTAC +>840 +CGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGA +>841 +ATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCT +>842 +ACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCC +>843 +CAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCA +>844 +CCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATC +>845 +GTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTG +>846 +CCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGC +>847 +TTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCAT +>848 +CCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGG +>849 +TGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCAT +>850 +GGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCC +>851 +TGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTG +>852 +TAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCA +>853 +TGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAAC +>854 +GGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGC +>855 +GCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAA +>856 +AGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCT +>857 +TGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAA +>858 +TGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTG +>859 +ACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCC +>860 +CTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGA +>861 +GCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCC +>862 +TGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTC +>863 +ATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCT +>864 +GGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGG +>865 +CAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCAC +>866 +ACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGA +>867 +CCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACA +>868 +GCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTAC +>869 +CACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGC +>870 +TTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACAC +>871 +TTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCC +>872 +TCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACA +>873 +GACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAG +>874 +GGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGC +>875 +CCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAG +>876 +CCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCC +>877 +CATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCA +>878 +AAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACG +>879 +GAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGA +>880 +GCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTG +>881 +CCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCG +>882 +GAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCA +>883 +CCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGG +>884 +GCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTG +>885 +GTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGC +>886 +TGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGA +>887 +CGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAG +>888 +GACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCT +>889 +GCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTG +>890 +AGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAA +>891 +GTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCC +>892 +GGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGAC +>893 +GCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTG +>894 +CTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGA +>895 +CCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGC +>896 +GGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGG +>897 +GAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGT +>898 +TGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTT +>899 +CCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCC +>900 +TTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCG +>901 +GGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGC +>902 +CAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCC +>903 +CGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGT +>904 +CTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCA +>905 +CCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCC +>906 +GATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTG +>907 +AGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGT +>908 +AGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGT +>909 +ATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTC +>910 +CCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGG +>911 +TTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCG +>912 +GTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTC +>913 +GTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTT +>914 +AAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCTCCA +>915 +TCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCC +>916 +GGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGT +>917 +TACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCG +>918 +CAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAG +>919 +CTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGG +>920 +AAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAG +>921 +GCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATAC +>922 +GGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGA +>923 +GTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGC +>924 +CAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCA +>925 +GCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTC +>926 +AAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGA +>927 +TGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGC +>928 +AGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGG +>929 +GAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGG +>930 +AGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCT +>931 +CAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTC +>932 +TGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTC +>933 +CACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCC +>934 +TACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCG +>935 +CCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGG +>936 +TGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAAC +>937 +GCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTAC +>938 +TGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAG +>939 +GAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTT +>940 +GGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAA +>941 +AGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGC +>942 +GGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACTTGGCCCC +>943 +TCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGG +>944 +ATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTG +>945 +CCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCC +>946 +AGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCA +>947 +AGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTG +>948 +TACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAA +>949 +TGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCT +>950 +ACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGAT +>951 +AACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGT +>952 +AGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGG +>953 +CTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGG +>954 +GAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGA +>955 +GCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTC +>956 +TGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGG +>957 +CAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCA +>958 +CAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCT +>959 +CTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGT +>960 +TCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAA +>961 +TGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAA +>962 +TTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTA +>963 +TTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCA +>964 +GGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATG +>965 +TGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCAT +>966 +AAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCAC +>967 +GACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCC +>968 +AAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTG +>969 +GTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCC +>970 +CCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAG +>971 +CTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGG +>972 +CCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGC +>973 +CCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCC +>974 +GGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGG +>975 +CAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCA +>976 +GCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCG +>977 +AAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCC +>978 +GTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCC +>979 +ACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCA +>980 +CGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCA +>981 +GCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTG +>982 +GGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTC +>983 +GCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTC +>984 +GGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGT +>985 +AGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGC +>986 +GATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAG +>987 +AGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTA +>988 +GGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTG +>989 +GGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTC +>990 +GCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGG +>991 +CCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCC +>992 +CACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACAC +>993 +TCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAG +>994 +GAAGTAGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTC +>995 +CTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATC +>996 +GGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGG +>997 +TTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAG +>998 +CTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAAC +>999 +GCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCA +>1000 +TCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACAT diff --git a/example/reads/reads_2.fa b/example/reads/reads_2.fa new file mode 100644 index 0000000..b533b80 --- /dev/null +++ b/example/reads/reads_2.fa @@ -0,0 +1,2000 @@ +>1 +CCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGT +>2 +GCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTC +>3 +GAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGA +>4 +CGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGC +>5 +TTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTA +>6 +GAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGA +>7 +TGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCAT +>8 +TCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCA +>9 +CCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGA +>10 +CAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATG +>11 +TACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAA +>12 +TTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACAC +>13 +GAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGAT +>14 +CTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACAC +>15 +TCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTC +>16 +CCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGC +>17 +CTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTC +>18 +CGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGA +>19 +CTGAAGTAGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGG +>20 +GCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGG +>21 +ACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGAT +>22 +CATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTC +>23 +AGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAG +>24 +GCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCT +>25 +CTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATG +>26 +CCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGG +>27 +CTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGT +>28 +CCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTG +>29 +GCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCA +>30 +TCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAG +>31 +TTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACAC +>32 +CAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCA +>33 +GACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTG +>34 +GCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTG +>35 +TCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACTT +>36 +CCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCT +>37 +CCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTG +>38 +CAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGA +>39 +AGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAG +>40 +GAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATG +>41 +CGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTA +>42 +ACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCAT +>43 +GGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCC +>44 +GGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATG +>45 +CTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGT +>46 +CTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGC +>47 +CCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCT +>48 +ATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGT +>49 +CGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGA +>50 +AGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAG +>51 +ATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTT +>52 +GGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGAC +>53 +AGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTG +>54 +ACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCA +>55 +GCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGT +>56 +GGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGT +>57 +AGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCA +>58 +GGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGA +>59 +GGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAG +>60 +AAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCAC +>61 +TCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTA +>62 +ATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGG +>63 +GGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTC +>64 +ACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTA +>65 +TCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTA +>66 +GCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACT +>67 +AGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTG +>68 +CCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCC +>69 +TGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTG +>70 +TCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCC +>71 +AGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTG +>72 +CCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAG +>73 +AGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGT +>74 +GCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGC +>75 +AACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGT +>76 +AACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGAT +>77 +TAGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCA +>78 +GACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGA +>79 +CGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGA +>80 +GGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTG +>81 +CTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATC +>82 +GGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACTTGGCCC +>83 +CAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCC +>84 +CCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGT +>85 +GTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTG +>86 +GAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGG +>87 +CAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGA +>88 +GGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAG +>89 +GCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAA +>90 +CTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCG +>91 +GAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAG +>92 +CCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGG +>93 +TACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGT +>94 +AGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTT +>95 +GCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGA +>96 +TCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACC +>97 +GGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGG +>98 +CATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACTTG +>99 +GCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCA +>100 +CCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGT +>101 +AGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAG +>102 +CTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCG +>103 +GTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCA +>104 +CACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTT +>105 +GAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGA +>106 +ATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGT +>107 +AAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCA +>108 +TCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGG +>109 +CGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGA +>110 +ACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCA +>111 +CATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGT +>112 +ACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCC +>113 +TCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCG +>114 +GGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTA +>115 +ATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCT +>116 +CTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGA +>117 +AGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGG +>118 +GGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACTTGGCCCC +>119 +TTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCAT +>120 +CCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTAC +>121 +GCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTC +>122 +TGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGG +>123 +GCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACA +>124 +GCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACT +>125 +GGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTC +>126 +CTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGT +>127 +ACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACC +>128 +TCTGCTGAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACT +>129 +TTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACA +>130 +CCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGG +>131 +ACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGC +>132 +GTAGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGC +>133 +AAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTC +>134 +CAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGA +>135 +CAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGG +>136 +CCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCC +>137 +GTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACA +>138 +TGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTC +>139 +ATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGT +>140 +CGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGA +>141 +AGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGT +>142 +TCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCG +>143 +TGCTGAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTC +>144 +GGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTA +>145 +GGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTG +>146 +CCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTC +>147 +AGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCT +>148 +GGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGG +>149 +TCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTC +>150 +GCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCA +>151 +GCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCG +>152 +TCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCA +>153 +GCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTC +>154 +GGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTG +>155 +CAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGG +>156 +TCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAG +>157 +CTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTG +>158 +GAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCA +>159 +AGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAA +>160 +AGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGG +>161 +GGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGG +>162 +AGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGAC +>163 +GGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTG +>164 +TTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCG +>165 +TACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCG +>166 +CACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCC +>167 +TGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTT +>168 +AGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGG +>169 +GGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTG +>170 +AGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAG +>171 +AGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACC +>172 +GACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTG +>173 +TGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGA +>174 +GTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCA +>175 +TGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTC +>176 +TGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAA +>177 +CACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCA +>178 +GCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAG +>179 +TCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGC +>180 +GTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACA +>181 +CCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGA +>182 +TCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAG +>183 +TTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCAT +>184 +CGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCA +>185 +GATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAAC +>186 +ATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTT +>187 +GATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGC +>188 +GAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTG +>189 +GACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGA +>190 +TGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAG +>191 +GCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGA +>192 +CGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGA +>193 +TCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCC +>194 +GCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTC +>195 +GCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTC +>196 +AGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCT +>197 +CATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGT +>198 +TCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCC +>199 +CCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCC +>200 +TTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAG +>201 +AAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTC +>202 +GAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCA +>203 +AGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCA +>204 +TGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTG +>205 +TGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGC +>206 +GCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGA +>207 +AGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCT +>208 +TTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCAT +>209 +ACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGG +>210 +GAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGG +>211 +CACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGT +>212 +TTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCA +>213 +GCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTG +>214 +AGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGC +>215 +CAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCC +>216 +AGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGA +>217 +AGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCAT +>218 +GCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTC +>219 +GTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGA +>220 +ATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCT +>221 +TCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACTT +>222 +GTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGC +>223 +TTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACA +>224 +GCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGC +>225 +GCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAG +>226 +AAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTC +>227 +ACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCC +>228 +GCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAA +>229 +TGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAAT +>230 +TGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGG +>231 +CCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCC +>232 +ACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTA +>233 +TCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAG +>234 +GAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGA +>235 +AGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAA +>236 +CGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAG +>237 +CTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTC +>238 +CCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCT +>239 +TGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAA +>240 +CCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCC +>241 +GGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAG +>242 +AGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGG +>243 +CCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTG +>244 +TGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAA +>245 +TTCTGCTGAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGAC +>246 +AATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACT +>247 +CTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCC +>248 +GGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGG +>249 +GACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTG +>250 +AGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCC +>251 +CCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGC +>252 +GTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTC +>253 +CAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGG +>254 +GGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGT +>255 +TTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCC +>256 +CAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCA +>257 +TGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTC +>258 +TAGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCA +>259 +GCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGA +>260 +CTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCT +>261 +CAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAG +>262 +CTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTC +>263 +AGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTT +>264 +GCTGAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCC +>265 +CCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTT +>266 +CATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAG +>267 +GGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCC +>268 +TACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCG +>269 +ACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCC +>270 +GGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAG +>271 +CAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAG +>272 +TGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCT +>273 +ACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTG +>274 +CCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGAC +>275 +TTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCG +>276 +GTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTC +>277 +GTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACT +>278 +GGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAA +>279 +GGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGA +>280 +ACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCC +>281 +CAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCC +>282 +TTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAG +>283 +CGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAG +>284 +GTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGC +>285 +AAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGC +>286 +GAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCC +>287 +TCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTC +>288 +AAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGT +>289 +GCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTAC +>290 +GGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGC +>291 +CCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTG +>292 +CGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTG +>293 +TCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAG +>294 +CACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGG +>295 +AGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGT +>296 +GGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGC +>297 +CTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGT +>298 +TCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAG +>299 +CAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCT +>300 +TGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGG +>301 +AAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCT +>302 +AGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACC +>303 +GCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGG +>304 +CCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTAC +>305 +AGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGT +>306 +GCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGG +>307 +TGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTC +>308 +GTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAA +>309 +ACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATG +>310 +ACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGC +>311 +AGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTC +>312 +CCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGA +>313 +CAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATG +>314 +CGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGG +>315 +ACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGC +>316 +GAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGG +>317 +AGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAG +>318 +TCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAG +>319 +CCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCA +>320 +CTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAA +>321 +CCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCT +>322 +GGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGC +>323 +TTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCT +>324 +CTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCT +>325 +CTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCA +>326 +GCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGC +>327 +GAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCA +>328 +CCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTT +>329 +TGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTAC +>330 +AGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTT +>331 +TGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAA +>332 +CCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACA +>333 +GTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACA +>334 +CTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACG +>335 +CGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGA +>336 +CAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCA +>337 +CGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGG +>338 +GAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCC +>339 +CAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTC +>340 +GCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGG +>341 +TGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCT +>342 +CCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGC +>343 +TGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCT +>344 +GGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGA +>345 +CAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTC +>346 +GAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCA +>347 +GACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCC +>348 +GTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTT +>349 +CAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCT +>350 +TGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAA +>351 +CTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGG +>352 +AAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCG +>353 +GAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTG +>354 +TGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAAC +>355 +ATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGAC +>356 +GGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGC +>357 +TCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAA +>358 +ACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCAT +>359 +GCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCT +>360 +CTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCT +>361 +CACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGT +>362 +TTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGT +>363 +GCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACT +>364 +TTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGG +>365 +CCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCC +>366 +GCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGA +>367 +GCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCAT +>368 +GCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAG +>369 +AGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGG +>370 +CTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGA +>371 +GTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACT +>372 +CCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAG +>373 +AGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACC +>374 +ACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTT +>375 +CAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCA +>376 +GAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGAT +>377 +GCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCC +>378 +CCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCC +>379 +GCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGC +>380 +GTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGA +>381 +GTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTC +>382 +TTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCG +>383 +TGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACAC +>384 +GCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTG +>385 +TGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGA +>386 +TCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACAC +>387 +CCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGG +>388 +CCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGG +>389 +GTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTG +>390 +TCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTC +>391 +TTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGC +>392 +TGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAA +>393 +CTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTG +>394 +GCTGAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCC +>395 +TGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGT +>396 +ACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCA +>397 +TCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCA +>398 +GACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAG +>399 +CGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCA +>400 +ACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTA +>401 +AGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCT +>402 +CTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCT +>403 +TGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTAC +>404 +TGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCT +>405 +GGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGAC +>406 +TGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGT +>407 +AAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCTCCA +>408 +TCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGG +>409 +GGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGA +>410 +ACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGAT +>411 +ACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATC +>412 +AAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAG +>413 +CCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGA +>414 +CATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTG +>415 +TCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACC +>416 +GACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCG +>417 +TCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTC +>418 +CCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACA +>419 +CTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTG +>420 +TCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAG +>421 +TCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAG +>422 +GGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATG +>423 +CCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACA +>424 +ACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTG +>425 +CGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCA +>426 +TCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACAT +>427 +GCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACT +>428 +ACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCAC +>429 +CCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTC +>430 +GGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGC +>431 +GGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAA +>432 +GGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCC +>433 +GCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAG +>434 +CTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCT +>435 +CTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATG +>436 +GCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCC +>437 +GAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCA +>438 +GTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTC +>439 +AGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCG +>440 +ATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGT +>441 +ATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGC +>442 +AGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTG +>443 +ACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCA +>444 +CTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCA +>445 +AGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAA +>446 +CCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCC +>447 +GTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCA +>448 +CCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGC +>449 +ATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCT +>450 +GTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGT +>451 +CTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTC +>452 +AGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAG +>453 +TCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGC +>454 +GCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTC +>455 +GTAGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGC +>456 +TTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCT +>457 +CGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACT +>458 +ACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACC +>459 +AAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGT +>460 +CTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAA +>461 +GCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTC +>462 +TGAAGTAGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGT +>463 +AGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCT +>464 +TTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCC +>465 +GCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCC +>466 +GCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAG +>467 +TGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGG +>468 +CGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTG +>469 +GAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTT +>470 +CATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCC +>471 +AGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGC +>472 +TCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGG +>473 +AGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCAT +>474 +TCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTG +>475 +AAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTC +>476 +CCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGG +>477 +CAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGT +>478 +ACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGA +>479 +GAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGT +>480 +AGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCC +>481 +CGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACG +>482 +CTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGT +>483 +CTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTC +>484 +TCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAG +>485 +GAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTAC +>486 +AGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTG +>487 +CTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGA +>488 +GCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGA +>489 +GGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTG +>490 +GGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGC +>491 +GACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACAC +>492 +GCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGAC +>493 +GTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCC +>494 +CTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATC +>495 +CGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGA +>496 +GCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACG +>497 +GCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGA +>498 +GCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACTTGGCCCCA +>499 +AGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTG +>500 +TGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCA +>501 +TGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGG +>502 +AGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGA +>503 +CTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCT +>504 +CCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCT +>505 +CCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTG +>506 +TCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCG +>507 +GGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAG +>508 +GGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGC +>509 +CGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGG +>510 +CATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTG +>511 +ACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCC +>512 +CAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAG +>513 +CCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTC +>514 +GCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTG +>515 +CACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGT +>516 +GGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGAC +>517 +CCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGG +>518 +GTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCT +>519 +TCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACTT +>520 +AAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGAC +>521 +TGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTC +>522 +TTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCT +>523 +GAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCA +>524 +GAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCT +>525 +AGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGAC +>526 +ACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACC +>527 +AGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGG +>528 +GGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACTTGGCCCC +>529 +CCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTC +>530 +GTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGC +>531 +GTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTT +>532 +GGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTG +>533 +GGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGA +>534 +CCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTC +>535 +TCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCC +>536 +GGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTC +>537 +AAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGAC +>538 +ACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATG +>539 +AGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACC +>540 +CTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTG +>541 +CATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCC +>542 +GAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTG +>543 +TTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACA +>544 +TCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCG +>545 +CCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAG +>546 +GACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCAT +>547 +CCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTC +>548 +AGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAG +>549 +CGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGA +>550 +CCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGC +>551 +GATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAAC +>552 +AAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCT +>553 +AGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTC +>554 +ATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGAC +>555 +TCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGC +>556 +CCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCC +>557 +CATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTAT +>558 +AAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACAT +>559 +CACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACAC +>560 +GCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCC +>561 +GGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACA +>562 +TTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACA +>563 +GTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTT +>564 +GCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACT +>565 +CTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTC +>566 +GATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAAC +>567 +ACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCC +>568 +ATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACA +>569 +TGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTT +>570 +CAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCA +>571 +CTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGA +>572 +GAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATG +>573 +GTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAA +>574 +TGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGC +>575 +AGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCT +>576 +CAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGG +>577 +CCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCAT +>578 +TTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGG +>579 +CTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGG +>580 +GCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACG +>581 +TCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAA +>582 +CCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCAC +>583 +GGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGAC +>584 +GTAGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGC +>585 +TACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGT +>586 +CTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTC +>587 +TTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCC +>588 +GAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGG +>589 +TGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACAC +>590 +AGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGT +>591 +GTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTG +>592 +TGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTC +>593 +ACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGAT +>594 +CACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGC +>595 +AGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGG +>596 +GCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTG +>597 +TGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGG +>598 +CGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAG +>599 +CTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCT +>600 +CAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACA +>601 +CAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAG +>602 +GAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGG +>603 +CCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCC +>604 +GGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAG +>605 +GAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCTCC +>606 +GAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTG +>607 +CTGCTGAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTT +>608 +TTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAG +>609 +TGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAA +>610 +CTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACAC +>611 +GGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGC +>612 +AGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCT +>613 +GGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTC +>614 +GTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAA +>615 +AGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATT +>616 +CTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGT +>617 +CACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTT +>618 +CGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGA +>619 +CGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAG +>620 +GAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCT +>621 +CAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAG +>622 +CTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGT +>623 +CAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCA +>624 +GCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGT +>625 +TGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAAC +>626 +GGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGC +>627 +ACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTA +>628 +GAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTT +>629 +GGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGAC +>630 +CCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCC +>631 +GTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACA +>632 +CTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCC +>633 +GAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGC +>634 +CCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACA +>635 +CTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGC +>636 +CAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGG +>637 +CTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCT +>638 +TCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCC +>639 +GTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGC +>640 +AGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGG +>641 +AAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTG +>642 +CCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGT +>643 +CAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAG +>644 +AGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGT +>645 +GAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCTCC +>646 +GCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTC +>647 +AATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACT +>648 +TTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGT +>649 +CTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCC +>650 +GAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCA +>651 +GCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCT +>652 +CAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCA +>653 +GTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTC +>654 +GTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATA +>655 +AGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAG +>656 +AGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGG +>657 +TGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCA +>658 +AACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGT +>659 +GGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGT +>660 +CGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAG +>661 +ATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGT +>662 +CTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCG +>663 +TTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGC +>664 +GCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGA +>665 +CCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCC +>666 +AAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTT +>667 +CAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGA +>668 +GGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGC +>669 +ACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGA +>670 +TTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCG +>671 +GGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTG +>672 +TGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTT +>673 +CACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCC +>674 +ACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCC +>675 +AGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCG +>676 +GAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGA +>677 +AGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGC +>678 +GTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGC +>679 +TACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGA +>680 +AGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTG +>681 +TCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAG +>682 +AAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAG +>683 +AACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGT +>684 +GCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAA +>685 +CGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGA +>686 +ATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATG +>687 +GTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCT +>688 +CCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCA +>689 +TACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCG +>690 +ACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTT +>691 +AACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCAC +>692 +CATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCA +>693 +CCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGA +>694 +CTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGC +>695 +CCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCC +>696 +CCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAG +>697 +TGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACTTGGC +>698 +GGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAG +>699 +GGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAA +>700 +CGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGA +>701 +CGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGC +>702 +ACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGC +>703 +ACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCA +>704 +GGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAG +>705 +GCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGC +>706 +ATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTC +>707 +CCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAG +>708 +TGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTG +>709 +AGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCG +>710 +GACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCG +>711 +ACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTT +>712 +TGAAGTAGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGT +>713 +GGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGG +>714 +CCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCA +>715 +CTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGC +>716 +GGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGG +>717 +CCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTT +>718 +GGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGG +>719 +CCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCT +>720 +CCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAG +>721 +ACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCC +>722 +TTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAG +>723 +TGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATG +>724 +ACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCC +>725 +CCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGA +>726 +GGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCC +>727 +GGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTG +>728 +CCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGAC +>729 +AAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCC +>730 +ACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTT +>731 +AGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGAC +>732 +GTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGC +>733 +AGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGG +>734 +GACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCT +>735 +CTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACAC +>736 +CAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAG +>737 +GGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGA +>738 +CAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATG +>739 +CGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATC +>740 +GCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAG +>741 +ACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTG +>742 +GGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCC +>743 +TGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTC +>744 +CCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCG +>745 +CTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCG +>746 +GTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGA +>747 +GGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCC +>748 +AGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTC +>749 +CTGCTGAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTT +>750 +CCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTG +>751 +TGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGA +>752 +TAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCG +>753 +CCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTG +>754 +TGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAG +>755 +CTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCG +>756 +GGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGG +>757 +GCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACA +>758 +TCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCC +>759 +TGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAG +>760 +ACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTG +>761 +GAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTAC +>762 +CGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATC +>763 +CCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGC +>764 +CCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGT +>765 +GAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGC +>766 +CCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCC +>767 +CGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACG +>768 +TCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCC +>769 +GTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGC +>770 +CCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGA +>771 +AGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGC +>772 +GGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGG +>773 +CTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCA +>774 +GCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGC +>775 +GAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCA +>776 +CAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGT +>777 +GTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAA +>778 +CAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACA +>779 +GGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGA +>780 +GCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACTTGGCCCCA +>781 +GCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACA +>782 +GGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTC +>783 +TCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCC +>784 +AAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCTCCA +>785 +ATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCC +>786 +ATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGG +>787 +GTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTT +>788 +AGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGG +>789 +TTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCT +>790 +TCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACC +>791 +GAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTAC +>792 +TTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGT +>793 +CCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCAT +>794 +TCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTA +>795 +AGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCG +>796 +AGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATA +>797 +CACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGC +>798 +ATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGAC +>799 +CTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCG +>800 +CCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCA +>801 +ACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGAT +>802 +CCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTAC +>803 +TCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAG +>804 +GGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGG +>805 +CCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATC +>806 +AGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCT +>807 +CAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGG +>808 +GTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCA +>809 +ACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTT +>810 +CCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCC +>811 +TACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGA +>812 +CCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGC +>813 +AGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACC +>814 +GGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACTTGGCCC +>815 +GTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCA +>816 +CTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGC +>817 +CCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGG +>818 +GCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCG +>819 +GGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAG +>820 +CGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGA +>821 +TTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCC +>822 +GCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAG +>823 +CCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTAC +>824 +TGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTG +>825 +CCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGC +>826 +GACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTG +>827 +TCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCT +>828 +GACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCG +>829 +TTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAGATCT +>830 +CCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCC +>831 +TGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGG +>832 +AGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGT +>833 +CATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCC +>834 +GGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGG +>835 +TGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGG +>836 +CTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGC +>837 +TTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCG +>838 +CCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCT +>839 +AGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGT +>840 +CAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGA +>841 +GCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGA +>842 +TTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTA +>843 +GAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGA +>844 +TCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGC +>845 +CCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCC +>846 +CAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTC +>847 +GGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGG +>848 +CACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGC +>849 +GTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGA +>850 +CTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCT +>851 +CCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCG +>852 +GGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGG +>853 +TCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCC +>854 +CCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTC +>855 +GAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCA +>856 +GGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAG +>857 +TGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGG +>858 +GTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGC +>859 +GGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTAT +>860 +TAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCG +>861 +AGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATA +>862 +GCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATAC +>863 +GCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTC +>864 +TAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCA +>865 +TGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGT +>866 +CAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGA +>867 +CTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTA +>868 +CAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTT +>869 +CCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGG +>870 +TGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTT +>871 +ATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACC +>872 +GTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCT +>873 +CGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACG +>874 +CACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCA +>875 +TGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCT +>876 +AACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGT +>877 +AGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATT +>878 +CTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTG +>879 +TTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCA +>880 +CACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGAT +>881 +TGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTG +>882 +TTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGG +>883 +CACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGC +>884 +AGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAG +>885 +AGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAA +>886 +CCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGG +>887 +GAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTAC +>888 +CCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCC +>889 +CACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGAT +>890 +AAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGA +>891 +TGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAG +>892 +CATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTAT +>893 +AGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAG +>894 +GGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGT +>895 +AGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGC +>896 +GGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGT +>897 +AAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAG +>898 +TTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACAC +>899 +AGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAA +>900 +GGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGG +>901 +CCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTC +>902 +TCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAG +>903 +CTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCT +>904 +GTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGA +>905 +AACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGT +>906 +CCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAG +>907 +GGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTAC +>908 +CCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGA +>909 +CAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGG +>910 +CACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTT +>911 +GGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGG +>912 +CGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGC +>913 +GCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCC +>914 +TTTCTGCTGAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGA +>915 +ATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCC +>916 +TCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGT +>917 +AAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGAC +>918 +CTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGT +>919 +GCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGC +>920 +GGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTG +>921 +TGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTC +>922 +GCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACT +>923 +CCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCT +>924 +CAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGA +>925 +TGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACA +>926 +AGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAA +>927 +GTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCA +>928 +GGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGG +>929 +GTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCA +>930 +GGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAG +>931 +AGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCAC +>932 +GCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATAC +>933 +AACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGT +>934 +AAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGAC +>935 +CACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGC +>936 +TCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCC +>937 +CAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTT +>938 +GTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCA +>939 +ATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTG +>940 +CCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGG +>941 +GGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGC +>942 +TGAAGTAGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGT +>943 +GGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCC +>944 +GAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTT +>945 +GACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCT +>946 +CCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAG +>947 +GATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGC +>948 +CAGTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGG +>949 +CCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAG +>950 +CCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCGCCGATGTTCAGGGACATGGAGCGCT +>951 +CCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCC +>952 +CGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGA +>953 +CTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCG +>954 +CAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCA +>955 +ATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCT +>956 +TCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGG +>957 +CGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAG +>958 +GTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAA +>959 +CAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAG +>960 +CTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACGTGCCGGTTCATGCTCCCCTTGGCCCCG +>961 +CACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCC +>962 +ACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCC +>963 +GAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGA +>964 +AGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCT +>965 +GTGGAGTAGATCTTCTCGCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGA +>966 +CCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGG +>967 +GCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATC +>968 +TGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGA +>969 +TCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCT +>970 +TGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCT +>971 +TGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACAC +>972 +GCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGC +>973 +AACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGT +>974 +CCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTG +>975 +CGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAG +>976 +AGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGG +>977 +CCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCC +>978 +TCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCT +>979 +CCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTGCAGCTCTGACCAAGTGGAGATCTTCTCCAGCCTGCTGCAGCG +>980 +CACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGC +>981 +TCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTG +>982 +CTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAAC +>983 +TGGCCTGGGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACA +>984 +GGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGG +>985 +CACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACAC +>986 +CAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATG +>987 +GCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGC +>988 +AAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAG +>989 +GTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACA +>990 +GCGAAGCACATTGCAGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAG +>991 +AACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGT +>992 +AGTGGAGATCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGC +>993 +GGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTGAGGGCCCTCTTCCTGGGGAGCACAGGGCCCCTGGTGTGTACAGTGTGTCATGGG +>994 +GGGCGTCTGTGTATCACGGTACTTCTGTAGACGGGGAACACATTTCAGTGAGCTGCTCCTGGAAGGGCAGGGAAGAGAGTCCATAAGGTGAACTTGGCCC +>995 +TGCTGAGATAAAGGAAGCAGACCCCCTGGCTGCCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTC +>996 +CCTCGGAAGCAAGTCAACCCAAACCCTGTCCCCCCGAAGTGACCCCCCACTACATCTGGATCGACTTCCTGGTGCAGCGGTTTGAGATCGCCAAGTACTG +>997 +GGGCCAAGTTCACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCC +>998 +GGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCAGCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTC +>999 +TCTTCTCCAGCCTGCTGCAGCGCTCCATGTCCCTGAACATCGGCGGGGCCAAGGGGAGCATGAACCGGCACGTGGCGGCCATCGGGCCCCGCTTCAAGTG +>1000 +TTCCCCGTCTACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGTGCTCCCCAGGAAGAGGGCCCTCACTTGAAGC diff --git a/example/reference/22_20-21M.fa b/example/reference/22_20-21M.fa new file mode 100644 index 0000000..1a0771f --- /dev/null +++ b/example/reference/22_20-21M.fa @@ -0,0 +1,16668 @@ +>22:20000001-21000000 +TGGGAAGGTGGGGAGGTGCTCACCTTGGGGAGCCCCCCTGAGTGCTCTGCAAGCTCTGGG +GTCACCTGCCTGGCCAAGGACCCAGTACCAGCCGCCTCAAAGCGGGGGAATGACCACGCC +CTTTATTGGGCCCTTTTCCCAGTGAGGATGGCCTGGGCCTACCCATGCCATGGGACCCCC +ACACTGGGTCTCCCCACCAAACTGTCCAGACCCGATGGCTGGCCATGGGGCTCTGTCAGC +CCCTGGCTACACCCCACTGATGCCCTAGTAAGCCCTGTGGTTCTGGCACGGTATCCATGG +TCCAACCAGAGGGCTGAGAGGTCTCACACTGGGGCATAAGCCTGGCCCAGGCCACACAGC +CAGATTGGCAAGCTACAGTCCTTTGGATAGCTGACGGCACAGGGCAGCTCCCATGGGTAC +ACACTGGGCCCAGAACTGGGATGGAGGATGCAGCCTGGGCTGGGTAGGCAATGAGGGGCC +CAGGTGAGCTCGCTCTGCACTCCCTGCAGTGTGGCCTCAGCAGGCCACCCCCTCCCCAAG +TCTCCTCGCTTCCTGCTCTATGAGCGCGAGGTGACACTGACTGTAGGTAGCTGAGAACAT +GCCCTGGCAACAGCCAATGCAGGCAGGTGAATGGAGTGCCCCGTGGCCAGTGGACAGGGA +AGGTCCATGCTGCTCAATGACAATGCTGTCCACTACAGCAAAACCGAGTGTTCTCCTAGG +CCTGCTGCCACCCTGGGCACATAGTGAGAACACGCCCACTTCTGCTGTGGACATTGAGGC +ACAGACCACACCTGGGAAGGCTTGGGAGGCCCCTGGGCACACACTTGAAGGACACTGCAC +TTTTGGGGCTACGCAGGCACAGGGCTAGTCCTTCACTGTCCCTGCTGAGCCTGTGTGTGG +TCACCCACCAGCTGGCCTTCAGGCTTCCTTCTGCCTGCACCAAGCATGTTGGGGGTACCG +AGGGGCCTTGCTTGGCTCCTTTCCAACTCCACGCCCTCAATAGGCGGCTTGGAGGCCGAG +CTCCGGTCATCCAAGCCAGGTGTCTGGGGAAGGGTCCTGGTCTGCCGCAGGAAGCTGGGT +GGAGCCGGTCTTGCCTGCTGTGGCGGTCCCAGCTGCCCTCTGGTGGCCATGCTCAGCTCC +CCGTTGCTCTGCCAGAGGCGGCAGCCCCTCAATCAGCAGGGGCAGTGGGAAGGGCCTGTG +CATAACTGGTTATGGAGTGGGGGGAACTCTGTGGCCCCAGGCACCCACCAGCTCTGGGTC +ATCTTCCCGACCTGAAACAGGCCAAGATATGAAGGCCCTGAGCCAGGAAAACCTACTAAG +GGATCCCTGATCCCAAGTCCCCTCAGAGTGACCCAAGACTTCATCTTGGGTTTTTCTGTT +TGTTTTTTTGTGGGTATTTTTTTTTCTTTTTTTGAGACGGAGTCTCACCCTGTCACCCAG +GCTGGAGTGCGATGGCACAATCTTGGCTCACTGCAACCTCTGCCTCCCAGGTTCAGGCAA +CCCTCCTGCCTCAGTCTCCCCAGCAGCTGGGATTACAGGCATATGCCACCAGGTCCAGCT +AATTTGTTTTGTATTTTTAGTAGAAACAGGGTTTCACCATGTTGGCCAGGCTGGTCTCAA +ACTCCTGACCTCAGGTGATCTGTCTGCCTGCCTCGGCCTCCCAAAGTGCTGGGATTACAG +GTACGAGCCACCGTGACCAGCCCATCCTGGTTGTTTTGTTTGTTTTTGTTTTTTGGAGAT +AGAGTCTCGCTCTATCACCCAGGCTGGAGTGCAGTGGTGTGATCTTGGCTCACTGCAACC +CTCACCTCCCCGGTTCAAGCGATTCTCCTGCCTCAGCCTCTTGAGTAGCTGGGACTACAG +GCGCATGCCACCATGCCCGACTAATTTTTGTATTTTTAGTAGAGACAGGGTTTCACCATG +TTGGCCAGGCTAGTGTCAAACTCCTGACCTCAGGTGATCCACCTGCCTTGGCCTCCCAAA +GTGTTGGGATTACAGGCATGAGCCACTGCGCCCGGCCTCATTCTGGTCTTAATTGAAGCT +GCTGACAAAAGAACAGACACCTCCCCATGACTAAGCCACCATCAGGGAAGTCCCCAGGAC +CCCCAAGGCCCAGCTGGTGCTCTGCAGTGCTGTCCCTGGACCCCCACCCTCCATTCCTGG +CCCTGTCTACAGAATGGGCCCTGACTCCCTATCCCAGCTCTACCCTGTCACTCCCTCTGC +CAGGGCCAGGCGGGGAGCACGACAGATGGGCTGGACAGAGGGTGGTGCCTGGCATCCGTG +TGCAGAGCTCCCTGCTCCACCCAGAGACCGGGGATCCTGCTGCTCCTGCACCTTCTGTCC +TTACTCAGGACAGCCTCATGCTCCCCCAGGCCAGACTCCTGGGGTGAAGGAGCACAGGGA +ACTCACCCTGAATCCCTGTGTAGCACAAACAGAAGGAGGGGTGCTACCCTACCCTCACAG +AGCCACAGAGATGCAGAGGGCCCCGGGACTGGGGGGAGACTACAGGGCCACCTCTGGAGG +GAGTGCCAGAGCAAACATGGGAGCCAAGCAGCCCAGATGTGGTGGGTGGGGAGACTCAAA +TTTAGCAGGATTGAGGGTCAGAACTTGTTCTCCCAGGAGGAACCTTGGTCAGAAGGTCCT +TGTGTTTATGCTGAAAAGCCTGACAGGGCAGCTCAGCCAAGAGCTGGGCAGGGGCTGGCT +TGGCTGTGCTCAACTACCCTCTCAGCTGGGACTTGAGTCCCTTAGCGAATGTTCTGTGCC +CAGTCCTCTGCTGGGCGCTGGTGGACCCCAGAATGGGGGTAGGGTGGGGACAGGGGACAG +CTGGGAGCAGACATTGCGGATAGGATGGGGCAAGACTGGGGGAAATGCCTTCCCAGACAC +GCTGCGGCTCACACTGTCGCCTGATCTAAGGCCTGGATCACCTGCTTAGGAGCTGACCGG +CCACTGGCATGGCACACCGGGCAAGAGAGCGCTGACTGCCCCCCTGCATTCAGCACCATC +CCAGCTCCAATAGAGTGAGTACAAGATAGGTAACACCTGCCTTGTTTGGACCAGACCTCT +AAACACCGCCCAGATCCCAGAGTAAAGGCAGATAAGGCAGTAGTTAAGAAGTAGGAAGAA +GTAAAGGCAGCTACCCCAGAGAAGCTAAGGTCGGGAGAGGTGGAGGCTCCCACTGCACCC +CCCTGGCCCATACCATAGGGGAGGTTTCCCCCACAGCAGCCTGGGTACTCACCTGAGGTT +ATTAGACAGCAGCATTAAGAGCCTACTCTAAAAAATACCCACGGGGCACCTTTCTTTTCC +TTATTTCTAAATATGCCTGGATGTAGGGAAAGGGAGCTGAAGTCCTTTAGTTCCATTTTA +AGTATGTTACATTACACATAGCTAAGAAACGGTGAAATATTGTAAGCAGCCAATATTTAA +TTATTTAAAAATTAAAATAGGCTGGAGCTCTCAGGCCACCTGCCCAGGGCGACTCCCCAG +CAAACCGAGCAGTCTCCCTCAGAGTGGAGATGACATGTCTCCGCACCGGCACCGAGGGGG +CGCAGGGCGGCGTGGGGAGGGGGACCCAGGCCGGGGGTCCGGGTGGCGCGCCCTCTCTCA +CCCCCGAGGCGGCAGGAGCAGCGCGGCGCGAGGACACCTCCGGGCCGTCGGTGCCCCTGA +GCTTCCCGGCGGCGCGCTGCCCCAGGAAAGTTCCCGGCCTGCCGCCCCTCGGGCTGTTGG +AAAGTTTCGGCGGCTGCAGGGCCCTGGCAGCTAGGCGCCAAGCGTCGCGCGGGGCGGGCA +CGGCTGCGGGGTCGGGCCGCGCTCGGCTGGAGTGCTCAGTGCAAGACGCCCGGACGGGTA +GGGTAGGGCCCCAAGGCCACGCACAAAGAGACCCGCCCCCGCCGGCCCGGGGTCTCGGAC +GCCGCAGAGCGCGCGGACAAGACGCGGGTGGGGCGGAGATTGGCTCCGACGTCCGGCCCA +GACCCTCGCCCGGATCTCGACTCCAGGGATAGGACCCTGTCCGGACCGCTGCCCGGAATG +CGTCCCATATCTGCAGCCCCGCCCCCAGTCCCGGCTCCGACCCGAGGCCCCAGCCCCGGG +TCCCACACCCCGCCCACTGGCTCAGGCTTACCTGGAGCGCAGCGTGGGCGGCCCCGCAGC +GCGGCCTCGGACCCCAGAAGGGCTTCCCCGGGTCCGTTGGCGCGCGGGGAGCGGCGTTCC +CAGGGCGCGGCGCGGTGCGGCGCGGCGCGGGTCGCAGTCCACGCGGCCGCAACTCGGACC +GGTGCGGGGGCCGCCCCCTCCCTCCAGGCCCAGCGCGCAACCCGAGACCCCGGGCCAGCC +CTCCCGCCCTCACGCGGCCCGTGCGCAAAGCGGACCGGGCCGGCGACGCGAATCTCCGCC +CCGCCCCATCTGACAGCAAATTTTCCCATTGGTCGAGAGACCCGGCGCGGCACGGCAACC +GGCGCCGATTGGGCGGATGCGCGGAGATGACGGCTTGCCATTGGGCCTCCGGGATGTCAG +TCCCGGCGGATGACCGGCCCCCTCGCGGAGGGGCCGCGGCCCCGCACCTGCCCGAACCTC +TGCGGCGGCGGTGGCAGGGTACGCGGGACCGCTCCCTCCCAGCCGACTTACGAGAACATC +CCCCGACCATCCAGCCCCGAGCAGGGACTGGCAGAGCCGGCGCCCCTTCCCCGGACTCTG +GCCGCGTAGCCTCCGCCACCACTCCCAGTTCACAGGTATCCTTCTTCAGCGCCTGCTACT +CGCTGCGCGCTGCGCGCCGCTCCCTCCTGTCGACATCGAATTGTATTCTTCACGCAAGCC +GCGAGAGAGAGTTCTGCTGCTATCCCCATTTTACAGTTGGGAAACAGCCTCAGAGAGGTT +CTCACTTGCTTGAACACAGTCGTCTGACCCTGGGGCCTGGGGGCTCTCTCATCCTCACGC +CCCTGGGTCCCCGCAGCGGGGAGACGATGGAGGTAGGAGCTGCAGTATTTCCGAGGGCAG +GAAGGGACCCCCTCGGTAAACCAGGGGCTCACCTCCCCCACCAGGCCTGGATGTAGAGAC +CTCTTGGGCTTGTCCCACCCTGCAGGCCCCCCATGGGGCCGCCCTTTCTTACCTCCTCCT +GGCATTCTCACAGCCCCACAGCTCTGCGTCTGGGGAGGCAGAGTTGTGCGTTTATGCCCG +CACCCCGTACTACTGTCCCATTTTCTTCACCCTACCTGGAGTCAGGAAGAAACGGTGCTT +CACTGCCTGTTGAGATTCACGTAGGTTACATTTCCTTGCCAGATTAAGAGCACCTGACAT +GAGTGGGACCATGACCCTCCGCCAGCTCTGCGTAACCTTGTGGTGTAGGGGGGAACCAGC +AGGGATGCTGCCCAGTGGGCTTTTCTAGGGGGAGGTCCTGAGATGACTTCTTAGCCAGTG +ACCCTGGCTCACGCATGCTGTCATCCTTTCCTGCCCTGGATGCACCTGGAGATGACCAGT +TCCTCGGCTCATTGCATTCTCCAAGGCCGCCTGGGGAACCAAACTGCCCACAACCATGCT +TATGAGTGGTGGAGGGGTGTGACCAGACACTCTTGTCTTATCCACCCATATTGGGAGTCC +AGTGTGTTCATCCAATCATTCCTTGGCCAGGACAAAGAAAATGAAATGGCCCAAGGCAAG +AACTCACTGCTGGGACCACCGCTGGGTCCATCTGTGTGCTGGCAGCATGGTGAATCGATC +CCAGCTGCCTTTCAGAGCCAGGCAGCTACTATCAATCGATTGGAGCTGGCACTTGACGTG +CCAATTGGCCTCCTGGCATAGGGTTGTTGCTTGCTGCTCTGGGAGGGCAGAATGAAGTAG +CTGTCCCTAGAATAGGGGCCCTCACAAACCAGGTAGCTGTCATCCTGTGGAGCAGCACCA +TGGTGACATTCTGGCTCAGGTGCTGTGTAGCTGAGGGGTCGTTTCCTCCATTTAGGAGTA +GACGTAATGAGTCCAGCTGTCCCCAGCCATCTCTTCAAGGCCATTGGACCCTGGTTCTGC +TGGAGCCCCTCTTCTCACAGTTACCTCATAGTCCTGCACCACCCCCTCCCCACACTTGCA +AGAGGGACTGGACAGACCTCTGTTGAAAGAGTGCAAAAGACAGAACCATACACCAAAATC +TGTGCAAGTGAGACTACCTCTCCTCTCATCACCACCCCTACCACCCATAGGGTTGCAGAG +ACCTGTCTGCCCTCGGCAGGCAGGATCATTAGCTGAGGTCAGAGGATGTCGGGGCAGCAC +CAATTCAAACAGGACTCAGGACCGCTGTCCAGCAGTCGGCCCATTAGGCTGCAGCCAGGG +GTCAGAAACACAACAAGGGAGGCTGGGCGCAGTGGCTCATGCCTGTAATCCCAGCACTTT +GGGAGGCCAAGGCAGGCAAATCACCAGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACAT +AGCAAAACCCCATCTCTACTAAAAGAATACAAAAAAAAGTAGCGGGGCATGGTGGTAGAC +GCCCATAGTCCCAGCTACTTGGGAGGCTGAGGCAGGAGATCGCTTGAACCTGGGAGGCGG +AGGTTGCAGTGAGCTGAAATTGCACCACCGCACGGCAGCCTAGGAGACAGAGCGAGACTC +CATCTCAAAAAAAAGGAAAAAAGAAAAAGATAAGAAAAACACAGCAAGGGAGACCACAGG +GACCAGTAGCCCAGTGTTCTCCCTCTCTCATCCTCTCACCTAGCTCCATGCGGACAGCTG +TGAGGCCCCCGGAGAAAGCCACAGAGAGAAAATGCTTTCAGCTGGGTGTCAAGAACTTTT +CAAGAGTGGCAGCTGTTGAAATCAAAAGAGTGGCCACAGAAGGTAGTGAGCTTCCTATCT +GAGTTTGTGTGCAATTAGAGGTTGGATGCCAGTGAGCCTGTTGCAGCCCCAAGATCCCAG +GGCTTAGGTGCCCATCCTAGACCCATTTCATCTGCCGTCACCTGCTTCCTGGCATCGCAT +GTCATCCAGACTGGAGTGTCAGTGGTGACAAAGAAGATCAGAGTGTCTGCTCTGGCTCTG +TCTAGCTGGGAAAAGTATGGCTGGGGGTGGGGATGGCCCAAGGCCACAAGGTATGACAGA +ACAGGATGCAAACCACCATCTGACATGTTACTGCATCCCTCTTTGTGGGCAAAGGGAAAC +CTTGGTCAACTCATGTCCAGCTTTGAGGTGCATGGCCTGGTGCTTTGGGGAAGCCCATTC +TCCAGGTGCTGCCATGAACCCCAGGGCACAGTCCATCACCCCCAAGCCCCAGATCCCACT +TGTCCACCACCACCACCTCTTGGCCCTATAGATTGCATAGGTTTGGCTGCTCAGAGGGAA +ATCACTGGTACCAGCCCCACATGATCCCACTGCTGGATGGTGGGAGAGACAACCATGTCT +GGCTGGGTCTGCGGCCCGCTGGCCATGAGTCCCTGGGCATCTGGAGTCCTTGTAAAAGGT +TCTGTTGGGTTCCAGGGGAAACACAGATAAGATAGCAAAAGTGTGGAGCACAACGGGCCC +GCATGTAGGTCCAGGTTAGACTCTTGCCGTTGCTGATTACCAGCCAGGCTCTTCCCATCC +CTGAGCCCCAGTTTTCTGCAGGGCATCAGGGCACCTAGGTCTTCAACACAGGTTGAAGCT +AATGTGGCCTCTCTTGCTTGCCACCTTTTTCAACTTCAACTGAATGACATAACTGCTCCA +AGTCTCAGTGTTTACATCTATAAAGTGGGATGAAAGAACTGCTGACCCAGACATTGGGTT +TAGAGTGCTTTACACATTATCATTATCCCTAATGTCTTTGTCATCCATGTCATAGTTTTT +TGAATGCTCACTTTGAGGCAGGGCCTGAGCCTCTGGGCTTTGACTCTCCACCTTTCTGCC +ACATAGGCCCAGGCACAGCCCAGAGGCTACAGGAGCAGCGGTCCCAGCTAATGCCTCTGT +CCTGTTGGGAGGGCCCCAGTCAGTGCCCACCTGCTCACCCTGCACAAGTCAAGCTGGGCC +AGCCCCTCGTCGCTGGGGCCTGGACCTGGATGTGAGCTCAAAGGCTACACTGGGGGGTGG +AAGTGGGGGAGGTGGTGGGTGTGCAGGGTATAGTGCTCAAATGCCTAGTCCAGGGTCTGG +GGGTACATACTGGGAATGGCAGGGACATCAGGGAGGAAAGAGCCGACCTGGAGGCCATAA +AGTGAAGGAAGGGATGACCAAGAGGAAGATGGCCAGGTCCAGAGGTGGGGTTGAGTGAAC +AGGATGGGGAGACCAGGACGCAGTCTGAGGGGGCCAGTCCGGAAGTGAGCATTCCTGTAC +CTCTTCCGAGAGAGGCCGCAGGGTGAGCAGGTGACAGATGGTGTCCATGAGGGCATGGAC +TCAGGGGCGGCTGCGACCTCCCATGGCGTCCCTGCAGGCAGGGAGCCGCGGAGTCCCCGA +AGGCCGCTGGATCAGGAAGCGCTCAGACGCCCGCGCAGCCCGAAGCTTTGGCGCAGCCCC +GGGGGCGGGGCGCCAGGAGGTGGCAGCCTCCGAGCGACAGCGCGCCCAGCCAACGGGACG +CCGGCATGCGGCGCGCCGCCCCGCCCCTCGCTGCGCGGCCCAATGGTGAACGCGCCGGCT +TCGGGCTGGGCGGTACTGGGCTGGCTGCGGTGGCGCGCGGGCGCGGCACCCGGAAGTCGG +CGGCGGTGGCGGAGGCGGTGAGTGCGCGGCTCCGGGGCTGGCCGACTCCGCTAGTGGCCC +GGCCGGCCTGGGCTCGGGGGCTCCGGGCTCTGGGCTCTGGGTGCGCGGACCGGGCCAGGC +TGCTTGAAGGTGGGTGGGCTGGCCGCACTTCCTCACCCTGCCTCAGTTTCCCCTTTCTGA +GGTATGGTATAGAATCAGGCGGCTGGGTCAGGGGCAACCACGTCGATCCCGGCCGACGAA +AATGCCCCGCGCGTTACTGGAAGGTGTTTTTGATAATCCCCGCTGGCACGGGATGCGAGG +GACTCTGGCAGGTGGGACCAGACTCGCTGCGGGGAGGCTGAGATCGGCAGGGTTGGGAGC +TGCGTGGTCCCTGCAGGGTGTGTGGGCTGCTCGGCCTTGGCCAGCATCAGGGACAGCTCT +GGCGCCCGGTCACTCTGCCCCCTACCCGCGGCCTGCTGCGGGCCAGCAGGGTGACAGCTA +ATGTGGGTGTGAGTAGATAGTAAGTGCTGGGTCATCCCTGCGCCTGGATGCGGCCTTCCT +GGGGAGGTTTGGGTAGTTAGATCCCCCACCCCCAACTCCAGAGCGTCAGAGGTAGGGGCT +GGCAGGGCTGCACTCAGGGCAGTCCTTGCCCCACCGCTGGGCTTTGTAGCAGCCTGGGCT +TTGCCCGTTTTCCTCTAGGAAGTGGGCACAGAGGGCTGTTTGCTGAGACCCCCACCTTCC +CTGCCCCTAGCACCTCTCCTGTGCTCCCTTGGCTTCCTGCTGGACCACAAGCTTTAGAGT +GAACCGTCTGCCCTTGGACAAGTCCCTGGGCCTCAGGCCCCCTTCCTCTTCAGGAATGTC +ACATGCCCTGCCCTGGCCAGGCCTGTTTGCACTGTCGTGTGGCTCTAGACCTGCTGACCA +CTGTTTTGCTGCCACTCTGGGCACAGTGCCCTCTTCCATGAGGCAGATGTGACGGCAGCT +ATAAGGGGGTAGCTGAGTAGCTCTTTGGAAGGCTTTCTCACGCTAGCTCATTTGCCTCTG +GGCAGCCTTTTGGGGCAAGCATTCATTGATCCAGTCCTGCTCATACAAGGAAAACTCAGG +CTAAGGTGGCAGATCGTGTTCCCAAGGTCATTCAGAGCCAGGGCCTGCTCCCAGTGCCTC +TACTACTCCTGGGCCACTCTCTTAGGAGTGTCCCTGTGTCTGTGTCCTGCGTCTGTGTTG +GGTGGGCTGGGAGCAGCTGGAGAGGGTGACCATGTTGTTAGATGGACACAGCAGAGCCTC +TGTTCCCACAGGATCCTAGTGCTCTGGTGGTCCAGGGTAGGGTGCCCTGGTGCCCCCACC +CCCTTTGACCCCTACACAGCTTTGTGTTACTTGCATGTCCTCAGACTGCAGGCCCCTCAG +ACCAAGCCCCTGCCTGCTGTGATGTGTGGTGGCCATAGCCCTTCCCTGAGCTAAGGTGAC +CAGCTGTCAGCTCTTCCTGGGCTGCCCAGCCTCTTGTGCTTCCTTGGTCTTCACTTGTGG +GCTGTCATTCTGATTACGTTTTGCAGTGTGTGGAATCACGTGCAGTGTCCTGGGAACTTG +ATCCTGTGCCCGGCTTCTGCGCCAGACGGCGAACTTGGAGGGCACCCCGGTGCCCACCCA +AGGGTGCTGCTTCTGAAAACACCCACAGGGATATGAGGGGCTTCTCAGGGACCCCAGAAG +GGGTCTGGAGAGAAATGGCCCGGGATAGGGAGGCTGGGAAGACTTCTCGAAGTAGGGCCA +TGCTTGTCATTGACACTCCAGATGCATGAGAAGCAGGGCCCATCCTTTGGTGACGTGTCA +GTCAGCACATGGCAGCAGCATGGCAGAGCCTGGCCTTTGTTGGCCCCTACCATAGGGCTT +GGGCTGTCTTGAAAACAAGCCTATCCCTACCATGAGGATGGGCTCCGTGAAAACAAGGTC +ACCCCAGAAAGACTGAGCTGCTTTTTGAGGTTATCACAGCTTTTTCCCCTTTTTCTCTCA +ACTTCCCTTTTAGACTGCAAAAATAGTTTATGCATGCTTAAAGATAAAAAAGGCTGGCCT +CCTTCTCCCAGCGGCTGCTCCCCTGGTGCACTTGCTGACATTTGGGGGTACATCCATTGA +GATCTTCAGTGCCATGCTAACTTGAAACTTGAAGTGCTTGCCTTCATCCTCTTTGCGGGC +ATGGTAGGAGTGCCCCCCTTTTTTTTTGTTTTTACTAACCACTCAGGCCGCAGGGGCCGC +TGCGAGGCTGGCCTGTCCCTGGCTTGTGAGGAAGCTGAGTGTTCCTCCCAGGAGGGCCTG +AGGATGGGGCCACCCTGTGTCCAGAGCTTGCTGTCTGCTCACGATGCCTCTTGTTTGTGC +TATTCCTTGGAACTGGGAAGAGACAGTGATCCTCTGGTGTGAGGCTGAAGAAATGCCCCG +GAAGCATTGTGAAGATCTGAGGAGAGAGTACCACTGTAGAAAATGTGTCATGATAGGCCA +AGCACGGTGGCTCACGCCTGTAATCCCAGCAGTTTGGGAGGCCGAGGCGGGTGGATCACG +AGGCCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATA +CAAAAAAATTAGCCGGACGTGGTGGTGGCAGGCACCTGTAGTCCCAGCTACTCGGGAGGC +TGAGGCAGGAGAATGGCGTGAACCCAGGAGGCAGAGCTTGCAGTGAGCCGGGATCGTGCC +ACTGCACTCCAGCCTGGGCAACAGAGCGAAACTCCATCTCAAAAAAAAAAAAAGAAAATG +TGTCATAATAAAATGCCATTTAGGCCCGGCACGGTGGCTCACACCTATAATCCCAGCACT +TTGGGAGGCCGAGACAGGTGGATCACCTGAGGTCAGGAGTTCGAGATCAGCCTGGCCAAC +ATAGTGAAACCCCGTCCCTACTAAAAACAAAATACAAAAATTTGCTGGGCATGGTGGCAG +GTGCCTGTAATCCCAACTACTCGAAAGGCTGAGGCAGGAGAATTGCTTGAACCCAGGAGG +CGAACGTTGCAGTGAGAGAGGTCACCCTCTCCAGCTGCCCCCAGCCCACCCAACAGGACC +ATTGCACTCCAGCCTGGGCGACAAGAGCAAAACTCCATCTAAAAATAAAAATAAAAAATG +CCATTTAATAGCCAGAGAGTGTAAACTGGTGAGCTCTTGCTGCGGAGATAGGAGGGAGCA +GGGCATTGCAGTGGACTGGAGACTCAGAGGGTTTCCTGACGTCAGGACTTTCTTCGCCCA +GGCCCCGTGAACAGCGACACTGTGTTCCCACGTGTGTTTACCTCCTCTGCTGACAGCCAC +CGGGAGCCCGCGTCACATGTCTGTGTCTGCCTTTGAAAAGAGCAACTCGTGCTTTCAAGC +TCTGCCCCTGGGGGCTGCCACCGCTGATGGAGTGGGGATTACAAGGGATGGGGATTGGGC +GCCCGGCTGCTCAGCCCTGCAGCACCAGCCCTTGTGGCCATTGAGGCCATTAAATGGGGG +CAGCAGACTGTAGGGCTTAGTCACCCTCTGTTCAGGGACTCACCATGTGCAGCCCCAATA +GGAGCCCGACTTGATGGCTACCCTCCCAAGTGTCTAGGTCCCCTTGACCCTGTGGTACTA +ATAGCTTGCCATTCCTCTGTCACCACTGCTTTCTGCTTAGCACACATCCCCTCTGCCCAT +GTGCTATGCTTGCCTTTTCGCTTGCCAAAGGGAAGTGGTTCCGCCAAAAGCGTGTGTCCC +ATCCCCTCTGGCCCCCAGGGTCTCCAGGTGTGGTGTTGCTGCGACAGCCCAGCTACACCC +CGTCCTATGCAGAGCTGTCTTGCACCAGGGCTGATTGGGTCTTCTCTTGCTTTCTGTGAA +CAAGTGACTCTTGGGTTCTTGCCCTTCCTTCCCTCTCTTCCCTGCCCATAGAAATGTCCC +AGCCCTGGCAGGGAAGGAGGGCAGGATCTGTACATTTTCTTTTTTTTTGCGTTTTGAGAC +AGAGTCTTGCTTTGTCACCTCAAAAGGCTGAGGCAGGAGAATCGCTTGAATACCGGAGGC +AGAGGTTGCAGTGACCTGAGATCGTGCCACTGCAACCTCTGCCTCTCAGGTTCAATGCGA +TTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGATTACAGGCGTGCATCACCACGCCCTGCT +AATTTTTGTGTTTTTAGTAGAGACGGGGTTTCACCATTTTAGCCAGGCTGGTCTTGAACT +CCTGACCTCAGGTGATCCACCCACCTCGGCCTCCCAAAGTGTTGGGATTACAGGTGTGAG +CCACTGTGCCTGGCCAGTGCACTTTCTTTTGAGGGTAGACATTACTGTTCCCCGCCGGGA +AGCATGCTTACTGGGTGTGCACGCTTTCAGTGTCTGGCGTGACAGATTGTACAAAGTGAA +CCCAGCCACACAGCTACCGTCCATATCACACCACCACCAGCACCCAGATATCTCCCTCGT +GTCCCCCAGTGGCTGCCTAGACCAGGTGGCCCTGCCCTGCCTCCTGTCCCCATGGGCAGA +TGTTGCTTATCTCTGGCCTCTGCATGAAAGACCAAGCAGAGTGCTATCTTCTGGCTGAGA +CCTCCCTGCTGCCTCATCACCATGGAAAGGCACAGGCTGGCTTCTTTTCCAGGTTATGGA +CTTTCTTACTGTGTCACTTTGTGGACTTGGGCACCTGCAGAGTGCCTGGCATGGTGAGCA +GGAGTTGGGGCATGGCGTGATCTAGGCAGGGTTTGGAAGGTTCTTTGAATAGCCAAGGGG +GACAGTCTGGAGGGTGCCCTCCAGACACCTGAGAATGCATTGAAGCTAGGGTGACTACTG +CAGCATCAGAGGCCTGGTGACACATTTAAGGCCTCAAGGGGCCCATGTGACTAGAAGTCC +AGGATGTGGCTGAGGTGGGCCAGACTGGATGGCCAGCGGCCACCTAAGGACAATTGGCTG +TGCGTTTGGGAAGCATGGGTGGTCCCCACCTCACAGCAGTCGCACAGGCAGACAGTGGGC +AGATCCAAGAGCTTCACCAAGGCTGGGCGGGGTGGCTCACGCCTATAATCCCAGCATTTT +GGGAGGCTGAGGCGGGGGGATCATGAGGTCAGGAGTTCAAGACCAGCCTGACCAACATGG +TGAAAACCTGTCTTTACTAAAAATACAAAAATTAGCCAGGCATGGTGGCACGCACCTGTA +ATGACAGTTACTTGGGAGGCTGAGGCAGGAGAATTGCTTGAACCCAGCAGGTGGAGGTTG +TAGTGAGCTGAGATCGCGCCGTTGAACTCCAGCCTGGGCGACATAGCGAGACTTCGTCTC +AAAAAAAAAAAAAAAGAGCTTCACCACTGTGCAACCATACAAGGTAGATAAGAAAAAGGA +GTTCTCATTTTGGGACAGGGAGGGCCTTTAGCAGGACGGGACATGACTGCTGTACAGTAA +TGATGGGAAAATCCCATGTAATATAAGGCTATTTCTACTAGACAGCATCACAGGCAGAGC +TGAAAGGCAGGGGATGGCTAGGGAGAAAACAGTTGCAGGCCTGGGACAGGCACATGGTTG +CCCACATCACACCAGCTGCACTACGGGTCAGTGAGACAAAGGCCTAGCAGTCAGTACCCA +TGTTGGTGAATTGTTGGCAAGAACATACAGGAGGTCACAAGTTGCCCAGCCAGTGGGGGA +TGCCCATAAAACCCTGGCGGGAACATTTCCCACACTGCATACACCCTGGAGGCAGGTGAG +TGGCAACAGGTTCTGGTGCTAGACGTTAGCGGGGACTTGGCCAAGTCTGTTTATCTGAAA +GCGTCACCCATGGATGGAGCAATTCCACCTCTTGCGTCTATCCTAGGAATGTCCTCAGCT +GCTCTCCCACTGTTGTAAAGAAATACCTGGGACTGGGTCATTTATTAAAAAAAGAGGTTT +AATTGGCTCTCGGTTCTGCAAGGTGCACAGGAAGCATGATCCTGGCATTTGCTCAGCTTC +TGGGGAGGCCTCAGGAAACTTACAATCATGGCAGAAGGAAAAGCAGGAGCCAGCACTTCA +CACGGCTGGAGCAGGAGGAAGAGAGAGATGGGGGAGGTGCCACACACTTAAAACCAGATC +TCGTGAGAACTCTATCACTGTACAGCACCGGGGGGGATGGTGTTAAACTTTCATGAGAAT +TCTGCCCCCATGATCCAATCGCCTCCCACCAGGTCCTGCCTCCAACACTGGGAATTAAAT +TCAACGTGAGATTTGGTGGGGACACAGATCCAAGCCAAATCAGCCCATGTGCAGAGCATG +TGCTAGGGATGGTCGCTTGATGGGACGAAGACACCAGCCACAGCTGAGAGGCAGAGCAGC +GGGAAAGCTGGGCAGTGGTGAACCAGTGGGGACATGGCTCTGGTGGAGACAGACCTCATG +GCCAATCAGGGAGAACGGAGCAATGGGACCCTAGCGTGTTAACCCCACAGCATACATGTG +TCTGCTCACATGTGTGTGCAAGCAGGTTAAGAACACTCTGGCGCCACAGGCAGCCCTTGG +CTGAGGCTTTTTTTCCTTTTTTTTTTGAGACTCAGAGTCTTGCCTTGTCACCCAGGCTGG +AGTGCAGTGGTGCTATCATAGCTCACTGCAGCCTCAAACTCCTGGGCTCGAGCAATCCTT +CTGCCTCAGCCTCCTGAGTAGCTGGGACTACAGATGCATGTCACCATGCTCGGCTAATTT +TATTTTATTATTATTTATTTTATTTTACTTTACTTTTTTGAAACGGAGTCTCGCGCTGTC +ACCCAGGCTGGAGTGAAGTGGCACAATCTCGGTTCACGGCGCCCTCCGGCTCCTGGGTTC +AAGTGATCCTCCTGCCTCAGCCTCCTGAGTAGCTGGGATTACAGGCCTGCACTACCATGC +CTGGCTAATTTTTGTATTTTTAGTAGAGACCGGGTTTCACCGTGTTGGCCAGGCTGGTCT +TGAATTCCTGACCTCAAATGATCTGCCCGCCTCAGCCTCCCAAAGTGCAGGGATTACAGC +CATGAGCCACTGCACCCTACCTTATTTTATTATATTATTAGTATTATATTGAGACAGAGT +CTCCCTCTGTTGCTGTGTTGCGGAAGTGCAGTGACATGATCATAATTCACTGCAGCCTTG +ATCTTCTAGGCTCAAGCCATCCTCCTACCTCAGCCTCCCAAGTAGTTGGGACCACAGGCA +TATGCCACTGCACCTGGCTAATTGTTTTTATTTGCTTTTTGTAGACAGGGACTTGCTGTG +TTTCACAGGCTGGACTTAAACTCCTGGCCTCAAACAATCCTCCTGCTTTAGCCTCCCAAA +GTGCTGGGATGACAGACATGAGCCCTGAAGGGATGGGGCTTTGGCCCGTTAAATGTCACT +GGAAATGTTTCCCTCCAGAATGTGCTCCTGGGTGACCTGAGCGATGGTATTAACAACTAT +GGGTCCCACCAGCCTCCAGAGCTGCCAGTCGTGGCTACAGGAGACTCAGCCTTAGGCTGG +GGCACAGAGAGGCTGAATAGAGGGTGAATGAACAGGCAGCGTATGGGAGGGACCAGCGTT +TCTTGGTATTATACAAAAGCTAGTGGAGGTGGTGCTGGTTGGTGTGGGGTGGGCGACTGG +CACGGTCAGATGCCTGATGCCTGTTGGCCTCCTCAGAAGGGGAGGCTGGCAGTAGTGGGT +TCTGGGACATGCAGGAGGGTGGCCTGGATTTGCCCTGCAGCACTGAGGTCTGCCTGTTCC +CCACTGGGTCCTCCATTTGAGGCCCTGTCAGCATGGCCCTCGTCCCCCTTGCTTCTTTTC +CCCATGGGCCTCTGCAAGGTTACTGCCCACCCTCCACCCCACCTACCTGCCTGTGGTGGG +CACTGAGAGCCAGGGAAGCAGATCCTCACTGCCAGGCAGCAGGCCATGTGCGCATCCATC +AGGGTGCCAGACCTTGTCCCCACGCAGCCCTGGCTGGGGTCACTGCTGCTGCTGCTCTCT +GCCTGTGGCTCTGGCCCATCTTTGCCCAGGCGTTCTGTCTGAATGGTTTTCTTGTCGTGC +CTTTTTTAGACACCGCATGGAGTGAGGTTTGGGGCTTGCCTCAACTGGAGTCTCATTCTT +CTTAGAGAGGGAGTTTAGCCCGTTTCCATTTGGCCTTAGGACAGAAGGCCCTGTTGGTTT +CTCTGAGAAGCCAGGGGAGGAGCCCTAGGCTGGAGAGGGGATTCTAGAGCGGGGTGGCTC +TCAGAAGCAGAAGGTGGGCAGAGAGGACGGTGCACGCTGGGAGAGTTCTCGAAAGCATCA +GCAGATGGGCCCAGAGTGCCTGGAAAGCTTCCGTCCTTCACAGACAGCGCCATGCTCTAA +GAATAACCCATGCCAAGTCCAGGCTTTCTTCCTGTTTTGCAGAATTACCAGGTTGTTAGG +CTAGGGGAACGTATGCTGGGATTCTGGGAAGAATTCCACCCGCACTGGTATTCTTAGAGG +ACGGGAAGCCCTTGGCTGCGGTGTGAAGAGGGCTCTCTCTTTCCATTCATATCTCATAGA +TCCTCTGGCCAGGCTTGGGGGTGTTGAGGGACTCCAGGAGTGTGGGGGGAACTTCCTCCC +TGGAAGAGGCCTGTGCTCTGTCCCTGCTGAGGCTGGTGGCATTTGCAGTCACATGGTGAG +CTCACAGTGACTGTCCCCCTCTTACCTCCCTGGCAGGGTTGGCAAAGACCACACCAAAGT +CCCAAGTGATTAGTTCCCCGGCCTCCCTGCACAGAGGCTGTGCATGTCCCCTGGGCATTC +ACTCTAGCTGTCTGGGCTGCAGGGAGGCAGGGTGCCTGGCCCAACTGGAGATGGCACTCT +ACTGGGGGTGGTTAAGTGTTGACCATCACAAAGGCGACCTCTTTTGTCACATCTGTGTGA +GAGGAGCTGGACTTTCTCAGATATGGATGAGCCTCACGACGCACATCCTTTCGTCCCTTG +CTTGTCTACCAAGTCCCTATGTGCTCCAGGAGGCAGTGGTACAGGCCAGCCGCTCCGTAT +GACCAGAGAACCAGTGCCTCACACTCCAGCGCATTTCCTTCCCAGGGTGCACCAGGACTC +TTGTAAAGAGGGCATTCTCGTGTAAGCAGTTGCTTTTTTTTTTTTTTTTTTTAATTTTTA +TTTTTTTGAGATGGAGTTTCCCTCTTGTCTCTTAGGCTGGAGTGCAATGGCACGATCTCG +GTTCACTGCAACCTCCACCTCGTGGGTTCAAGTGATTCTCCTGCCTCAGCCTCCTCAGTA +GCTAGGATTATAGGCACGCACCACCACACCTGGCTAATTTGTTTGTTTGTTTGTTTGTTT +GTTTTTTGAGATGGAGTCTTGCTCTGTTGCCCAGGCTGGAGGACAGTGGTACGATCTTGG +CTCACTACAACCTCTGCCTCCTGGGTTCAAGCAAGTCTTCTGTCTCAGCCTCCCAAGTAG +CTGGGACTACAGGCGCCCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACAG +GGTTTCACCATATTGGCCAGGCTGGTCTCGAACTCTTGACCTCGTGATCCACCCACCTTG +GCCTCCCAAAGTGCTGGAATTACAGGCGTGATAATTTTTGTATTTTTAGTAAAGATGGGG +TTTCACCATGTTGGCCAGGCTGGCCTCAAACTCCTGACCTCAGGTGATCCACCCGCCTTG +GCTTCCCAAAGTGTTGGGATTACAGGCGTGAGCCACCTCGCCCAACCAGCAGTTGCTGTA +AAATTTTTTAAAAGGATGTCACTTATACTACCCAGTCATTAAGACTTCCTGAGGTGGAAA +GATCACTTCAGCCCATGAGTTTGAGAATAGCCTGGGCAACATAATGAGACCCCACCTCTC +CAAAAAAATTTAAAAATTAGGGCCAGGCGTGGTGGCTCATGTCTGTAATACCAGCACTTT +GGGAGGCCAAGGTGGGTGGGTCACAAGGTCAGGAGATCGAGACCATCCTGGCCAACATGG +TGAAACCCCATCTCTACTAAAAATACAAAAATTAGCTGGGTGTGATGGCACGCACCTCTA +GTCCCAGCTATTTGAGAGGCTGAGGCAGGAGAATCGCTTGAACCAGGGAGGAGGAGGCTG +CAGTGAGCTGAGATCGCGCCGCTGCACTCCAGCCTAGGTGACAGATCGAGACTCTGTCTC +AAAAAAAAAAAATTAGCCGGGTGTGGTGGTATGCAGCTGTAATCTTAGCTACTTGGGAGG +CTGAGGTGGGAGGATGGCTTGAGCCCAGGAGTTCGAGGCTGCAGTGAGCTATGATCACAC +GACTGCACCTGGGCGACAGAGTGGAGGTGATGTGCTTCCTCAAATAGCCATGAGAACAAC +GCTTCTTTTCTTGTCCTGGCTCAGGAACCTCACCACCATGCCACGTTGCCCTGCTGCGCT +GTGCTGGTGTCGTAGGTCGGTGGCAGGACATTTGCCCACCTCACAGGCTCTGCTTTCAGT +CTTGAAGTATGGCTCCTCTCGACTGCTGCCCACACGGCCTCCTGTTGTCAGCCTGGGCTA +CATATCTGGGCTCTTCCATCCCTGTGGGCCTGGTGAGGCCTGGCCATGTGCCTGTGTCAT +GCCAGTTGCTTTCAAGATGATGTTTCTAGTAGGACCACATGTGAACACAGTGACCACCAT +GGCCAGGAGGAAGCTGGCTGCCCATCACCAGCTTGCCCAGGGACCTCTTGCACCCTGGCG +GGGATCAGCAGATCCTGCTATGAGAGAGCAGAAGGCAGCATGCTGACTTCCAGGAGTGTC +CACAGCGTGGCTGGGCAGAGCAGAGCTACAAATGAACAGATCCAGTGCCGAGGCCATCCT +GACTCTCTCAGGGGCTGCCAGGCCGTGGGTGCAGGGTCCAGCAGGCCCACTTGATCCCAG +CATCCCACGTGTGCCTGCTCGGGGTGGCCAGCTCACTGCTTCTTCACCTCTAGCCCAACC +AGCAGGTGCCCCACCATGAGCCCCTCCAGGCTCACCTTGCCCATCCCTGCCATGTGACCT +GGGACTCTTGAGATGAGATCACAGCTGCAGAGCACACAGCACAGGGAGGAGCTCATCAGC +ATCTTCCCACCTGTGGTGCCCTGGCCCAGCCTCCTTCCGGTTGGGGTTGGACCCGCCCTG +GCAGCCAGGTTTTGCGTCACCTGCCCCACCCGCCCTGTAACCTACTTGCCCAACTGCTAT +CCCGGTGGCCGTGAGGCCAGTTGACAGAATGGAGAGGGGTAAGCATTTTCCGCCTGGAGA +GGATGGGCGTGTGGCCATCTGACTGCTTCTGCAAGCCTGGTGGTTGTGTGCAGGCTGCCC +TGGTGGTCTTAGCCCCTTGGGGCAGGCGGGGTCCTGTGCAGGATGCAGAGACTGGGCGTG +GCTTCCTGCAGATGTTCAGATGCTTCTGGGCTGTGGCCTCTGAGACCCAGGGCAAGGCTG +CTGACTGGACCCCAGACCCTCCTGCCTGCCTCTTCCTCCAGCTGCTCTTTGTGGTCTGGT +TCTGGCCGCCTGGGCCTCAGGGCCAACCCTGCATGGCTTTGACCACTCTGGCCTTCTTGG +ACTGCCAAGGACATGCTGAGAGTTGGTTGGGCTTGGCCCAAGTGCTGAGCCCAGGGCAGG +GGTGGGGACAGGCAGGGCGGAGCCTCACCTGGAAAGCCAGGCACGATTCTCCCTGGGACA +CCAACCCTCCTGTAGAGTTGGCCAATTCGGGCAACCTTGAGGGCGAGCTCTGTGTGGCAC +AAGAGATGCCCTGAGGAGCCCAGCCTCACCTGGTGTGATGGGACCTGTGGAAGTTGTGGG +AAGTGGTCCTGGCCACCCGTGTGCCCACATGCCCTGCGCCCAGATCAAGATATGGTGACC +AAAAGGACAAGGACTATATCCATGGCTCCCTCCCAGAGCCTGGCAAGTCAGCAGGCATGA +GAGGGTGTTGGAATGCTGTGGTGGGCCTGTGTGGGGACCTTGTAGGACCTAGGGAACCTG +CAGGGCTTGGCTTAGGGAGCACACAGGGGCCCAGGCAAAGGCAAGGTCACAGGTCGGGGG +AGGTGAAGCTGGCAGGGGGAGGGGGAGACCTGCTGGCTAGAGCTGGGTTGGGGGCCGGTG +GGCAGTGGGCCTGGCTCGAGCAGGGGGCGAGGGATTGGAGAGAAAGGCAGTTCCTGATGG +TCCCCTCCCCAGGGGCTGGCTTTCCTCTGGTCCTTCCCTCCCAATGACCGCGTCTTCGTC +GAGGCCACAGCCCTTGGCTCTGCGCCCACACCTCCAGTGCCAGGCTGTCCGGAGATCTGT +TTATGGCCTTCCCTTGGACCATGGAGCCCTCCCTGCCACTGGTGCCTGGAGGGCTGGTCT +GCTGCCCCTGCACCCTGGCCAGCTAGGATGGTGGGGTCCTGCAGCTGAACTGGGGGTGCC +TTTCTGCCTGTCTGTCCTACGTGTATCAGTGGCCAGAACTGCCCTCATCCATGGCCTTGC +CAGCAGCTGGGCGTGCAGCCCAGGGAGGGCTGAGGTCGGCTGAAGTTGATTGAGACCAAC +GGCCTTCTCTAACACCCCAGAGGAGCATGTCCCTGGTGAGCAGGCCTTGGGCAGCCCTGG +GCTCCAGGCCAGTGGGATTCCAGTAGAGACCAGCTGGCAGGGCTTCTCCAGTGACTGCAG +CCAGGGGTTCTCATGTGCAGGAAAGACCAGCTCCTCCAGAGTAAAGGGACTTGTGTCTTC +TGGCCACATGGTGAGTGCTGGGCCCACCCCATCAGTGGCACCTTGGGTGCACTCCCAAAG +CCCAGCGTGTTCCCCATCTTCCTGCACTGCATTCGGGATGGTTCTGCATCATCAGCTGCA +CCTGATTGCTCTTAAACCCACAGCTGAGCTTTAATGTGCAGTCACTGAAGCTTTCATTTC +TAAAATAGCTTCCTAAGAGTTCTTTTGGCCAGGCATGGTGGCTCATGCCTGTAATCCTAA +CACTTTGGGAGGCCGAGGTGGGTGGATCACCTGAGGTCAGGAGTTGGAGACCAGCCTGAC +CAACATGGAGAAACCCCGTCTCCACTAAAAATACAAAATTAGCTGGGCGTGGTGGCGCAT +GCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATTGCTTGAACTCGGGGGGCG +GAGGTTGCGGTGAGCCGAGATTATGCCAGCCAGCCTGGGCAACAGGAGCAAAAACAAAAC +AACAACAACAAAAAAAAACCGAGTTCCTTTTGCTGCTTTGTAAACCCAGTTTGCTCTTCA +TCACTTCCTTTGCCCTGCAGATGTTTCTGAGCTGTCTGTTTTCTTTCTGCCATCAGAGCA +TGGTTGGTTGATGATGCATGTACGGTTATCTTTCTGCTGGTTCTTGCTCATGGTGCTTCT +TTCATTATGTGGTTCTTTCTGACTATGTACTGCTTCTGGTCTTTGAAAAATGATTTTGAG +ACTTCTTTGAAGTATAGGATGAAGGTATCTTTCTCCAGAGAGAACTTCAGGCACAGGGGC +ACTACGAGTGCTGGAACCCCTTAAAACTGTGCTTGGCTTGTGGGTCCTTGTACCCCCAGT +GTGGACCTGGACGGCTGGCTGTGCCACAGCTTCTCAGAAACTTCCCTTCACTTATGTGCA +ATGTCCCCATAACCCCTGGGGGTGGCTGCGGCTGTAGTTCATCTCTCCCTAAGCATCGCT +CTTTGGAGTCCCAGCCTAACTTAGAGGAGAGTCTTCTGTTAGGTTCTTCATCCTGTGGGG +GTGACTGGACCTTGTCTTCTGTCTCCTTCGCCCCCGAGGCCACCAGAGTATGAGCCCCAG +TGTGTTCAGTGGGCAAACGCTCTCAGGCAGGATTGGCCACTGTGTTCTGCTGTCCTCTGT +GGGCCACGCTCTCCGAACTGTTGGCCAAATGCTGCATGAGCTCATCAGCTCCTGGCAATT +TTTAAGATGTTTTCATATTTCATCCAGCATCTTGAGTTGTTTTTACCAAAAGGATTGCCC +CTAGTAGCCTGCCCACCCTCGGATGATGGCCCCCTGTGGGCGCTTCCAGGGCACCCGGCA +AGGGGACCCAACCCACTTCCAGCCCCACAGTCTGCTTCCCACAGGGCCCCCTTGCAACAA +CACACCCTTTAGCTTCCCATCCTATGCAGAGTGGTGGCCAGCAGACCATCTCAAATGGCT +GCCCATCCCCCTGACCCAGTGCGTCTGCACGTGGCGCTTATTGCTATCCCAATACTTCAA +CTGCACAGCCACCAGCTCCTCACTGGAGCAGATGGTCCCCAATGGCCATCCCTAATGCCC +GTGCCTATGGAGGCACATACACTGAGCCCTCTTCACCTCCGTCCCCACTCATTAGACTTT +TCTCAGAGGAATCTTTTTGGGTCTCTTCAGTATTTGAACAGCTTCTTGGCTGTCCCATGT +CCCAGCTGTGAGCACTGGCTCTGCCCTGGAGCCGTGTCACTAAATCATTGCACATGTGCA +CATTTCAGGGAACTTGGGTGTCACCCTTGGGTGGCCTCGGAATCTAATCTCCGTTCTGCA +CTCGTTTGCATTGTGGAAAACTGCCTGCTCTGGGCCCCAGAGGTCCCTGGGTGGGGACGA +GGTGTATGCTAAGTGTGCAGTCACCCAGCGTGTCCCCGGCACGTGTCCATGCCAGCTGCT +GTGGGTGTGTGGGGCCTTCGGGTCCTGTGGGATGGCGAGTGTGTGTGCTGAGCAGGACCT +GTAGGGCCTGCATCCCAGGTGAGTGGGCCATGCTGTTGGAGGCGGAGGCAGGGCTGCCCC +CACTGGTGGCGTGTGGGGTGGGTTTGGTGCTGTTGCAGGGGAGGAGATGTGCATGTGGGG +GACAGTCTTGAAGGAAGAATGTATACAGTGGTCTATTTCCAAGACAAAGTGTCTTAAATT +GGCTTAGCTCAGCAAACTACAGAAGAAACAGGATATACTAGGCCACCCTGCTTGGATAGC +TGATGTGTGCTTGTCGGCCTCCACTTTCCCCGCACTTAGTTGCCTTCACCCGAACCAAAG +AAGTTCAGTCTAAGCTGAAAGTTTACTAGCCTGCAAAATAGCTCATTTTGTCTGTTCTTA +TCAGCCTGCCTGCTACTTAGGTCGTAAGTTAAATACTTAAAAAGTCCCTGAGCTAACTAG +GACTGCAATGCATTGTGGGCTGCAACAAAATACAGCAAGACAAGCCCCCAAAAAAACACC +TGAAGCCCCTACCCAACAATTAATAGGCGACGTCCGGGAAAATTGTGACCCCACAGTACT +CGGCCTATGAGGAACTGGGGGAGGGACCTGCGTACTAGGGGATAAATTGCTTGTCGAAAC +TGTGCTGGGTGTGCCTGTCTGACACCTGATCTTGCAAGACTGTCATTAAAAGTCTCACTT +TCGCTGTTCTCTGGGTCTGAGTTCATTCTTTAGATTTGGATGGGTAAGTTTGTTTCTCAC +AGTGTGGCTCCTGCCCTCACTTCTGCTGCCCCCCGGGGACCCCCTCGGCACCCCAGCTCG +GTGTGTCCAGGCCTTAAGACTACCATGGTGCTGATGGGATGCTTCTGTTCAGGGCTGGTG +TTGCAGGTGGTGGAGAGACAGTCTGGCCAGTGCCAATGAGTGCATGGGTTTGGGAGTTGT +TTGTGTGGCACCGGCCAAGAGTGTGGTGTCCAGTTCCCCCACACCCAGCAAGTAGTACAG +ACACCACAGAGGTGGTTCTCTCTGTTCTGGCCTGTTGCAGGTTCGGAGGGCAGCCCTGAG +TGTCTGCCATCCGCTCAACTCAGTGTTTTCCTTTTCCCGCAGACCTCGCGACCTGTGTCA +GCAGAGCCGCCCTGCACCACCATGTGCATCATCTTCTTTAAGTTTGATCCTCGCCCTGTT +TCCAAAAACGCGTACAGGTAACCCCCTCGCTCTGCATCTGCTGCGCCCTGCAGGGTCCTG +GGTGCCCAGCCAGTTCTCATGCCACCCAAGCTGCTGTGTGCAGGAAGGTGTGTGGGCCAG +GACGGGGCTGCACAGGCCTGGCACTGCCCTCCAGGACAGGGTCACTCAGTGTGGGATGCT +GTCAGAATGCCTCTCGGGGCGGGGACTCCAGTCAATGTACAAAGACGTGAAGACTCAGCC +ACAGAAGGCAGCCACAGGTAGGACAGAGGAGTGACATGGGTCCAGGTGGGCTGCAGTTCT +ATGGGCTTTGAGGAGGGTCGGGCGGCAGAACTGGGACAACGGCGTCAGTGAGTGAGAGCC +AGCTTGGGCTGGCGGGTCACAGCTGCTGGTGAGAAGTCAAAGTGGCGGCCAGGCTAGGGG +TCTGTGGGTGGGTCCTGAGCTGAGGAAAAAGAACCTGGGGAAGAGGGGACGCTGAGGACC +AGCCAGAGGAGTCAGGAAATCGAGGCCAGAAGCCACCCCTGACCCTGGACTCAGGACGGC +CACACCTCCATTCCCCAGGACACCCCAGCATTTATCCACACAGAGAACGAGGCAGAGATA +TAACCCACATCTAAAATGAAACGTGTAACCACTCTCTAATGCAACTGCTCGGGAAGTTTT +CATCATTTTAAAATCTGTACCAGAAATGATTTGAACCATTTCAAGAGGGTCTCAGATTTG +AGTCACTGCAAAGTTGCATAGACACCATGCTGCTTGACTTCAAGGCTGAGTGAAGTCATT +TCAGGCCAAGGAATCAGGAAGGAAGGGTGTCTGGTGTTTTCCCTTTTTGCAGTTGGGTGA +GTGGCAGGTCCCAGCAGCAGACAGCATGCTGCAGGGAAGGTGGAAGAAATGTTAGGAAAG +AACTATTGACTGCTACAGGGACTGGAGTAGTGTTGGGCTGGCTAGTGAGTGAACAGAACT +CTAAAGAATGTAGGGGCTGGCCGAGCACGGTGGCTCACGCCTGTAATCCCAGCACTTTGG +GAGGCCAAGGTGGGCGGATCACAAGGTCAGGAGATCCAGACCATCCTGGATAACACATCT +CTACTAAAAACACACAAAAAAATTAGCTGGGCATGGGGGTGGGCGCCTGTAGTCCCAGCC +ACTTGGGAGGCTGAGACAAGAGAATGGCGTGAACCTGGGAGGCAGAGCTTGCAGTGAGCC +GAGATTGCGCCACTGCACTCCAGCATGGGCAACAGAGCGAGACTGCATCTCAAAAAAAAG +ATGAAGGTAAAATAAGGTCATGTGGGTAGGCCCTAATCAATGTGATGGGTGACTCAGATA +CGCACAGGGGAAGCTGGTGTGTATACAGAGATAGAAGCAGTCGTCTGCATACCGAGAAGA +CCCTCAAAAGGAGCCAGCCCTGCTGACGCCTTGATCTTGGATCTCCAGCATCCAATGCTG +CGAGACAATAGTGTCTGTTGTTGCAGCCGCCTGGTCTGTGGTGCTTAGTTACGGCAGCCC +CAGGAAACTAGCACGCTGAGGAGAGCCAGCCAGCTCTGTATGCGTTTCAGGGCTGCCTCC +TGCAGGCTGCTGAAGAGACTCAACCCACCCCGAGTGCTCCCATCTCTGGAAAGGAAGCTT +GATGGCTGGCCTGCGGGTGCTGGTCAGTGCCACCCAGCCCCTTGGGTGGAGAGGAGGAGG +GTTGACATGGGCCCCTCTTGCTTTCACTCAGAACCCGTGTGTTGCTGCCAGGACCTGTCC +TGCGGGTCCTGCCATTCCAGTTGGCCTCGGGATTTCTCTGGACTATTTTCCGAAGCCCTA +AGCCAAAGACGAAATGTGCCACCTCTGGGTGACTGAGCAAATGTGCGCTGTGTTTTCCTG +ACTTCATGGGAGGGTGTGAGACACTTCTGGTGGCTGCTTCTGCTCTGCCTGTGGGGTGTC +CAGCCAGGGGAAGGGCACGCTGCAGGCTGGGAGCACAGACACTTCTATGGAGCCCCTGAC +AGCACCTCTGCAGCACTTTGGGAGGCCTCGTACAGGTACCCAGCTAGGCTGGGCTGGACT +CCTCACCTGTGGACTGGAGTAAAAAGTAGGGTGCTTTAGGCCACTATGTTTTTTTTTTTT +ATTATTATTATTATTATTTTAGAGACAGGATCTTGCTCTGTTGCCCAGGCTGGAGTACAG +TGGTATGATCATAGCTTACTGCAGCCTCAAACTCGCCGGCTCAAGCGATCCTCCCACTTC +AGCATGCCTAAGTAACTGGGACTACAACACCATGCCCGGCTAATTTTTTTTTTTTTGGTA +GAGACAAGTTCTCACTATGTTGTCCAGGGTGGTCTCGAACTCCTGGGCCCAAGTGATCCT +CCTGCCTCAGCCTCCCAAAATGCTGGGATTACATGCGTGAGCCACCATGCCCGGCCTCGG +CCACTGTATTTGGTAATTTGATACCCATCACGGAAAACCAAGACCCTGCAGCAAGATAAC +ACAATCACTTCATGATGTCACCCTATCTCTTTCAAAACTGCCTGTTTACAGCCGGGTGCG +GTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCTGAGGTGGGTGGATCACTTGAGGT +CAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCATCTCTACTAAAAATACAAA +AATTAGCCTGGCATGGTGGCGCACACCTGTAATCCCAGCTATTAGGGAGGCTAAGGCAGG +AGAATTGCTTGAGCCTGGGAGGCAGAGGTTACAGGGAACTGAGATAGCGCCACTGCACTC +CAGCCTGGATGACAGAGTGAGACTCCATCTTAAAAAAAAGAAAAAACGAAAAAAAACCTG +CCTGTTTACAGCCCCACGTGATGCCACTTGCTGGCTTCAGCCTAGAGTGGCTGCAGGACA +AGCTGTTACTCCATGGGACAGCTTCCATGGGGTCACCACTGAGGGTTTGATCACTGGATC +TCAGGCCAGAGTACCTCCTATCTGTCACCAAACTGTCCCCCAGACAGTGCACACACTCAC +CCCCCGGCAGAACACACTCACCCCCCGAGATGATGCACATACCCCCCGGACAGTACACGC +ACTCACCCTGCCTAGCAGCGTGGGTGTCAGGTGGTGATGGATAATTAGTGGTTTCAGTTA +TTGGAACAAAATAAAGCCGAGGTGACTTGCCATCCTCAGGGACCTCACCGTGGTGGGTGC +ATCCCTGTCTCTCTGGACGTGGGGCGCAGATAGGCTTCTACCCATGTGCAGCCCCACCAG +TGATGACTGAGAGAGCTACTTCCTCCTCCCTTTGGGGCTTGATCTGGGTGCTCAGCATGC +TGGCTTGTCCCTGAATTATGTATTCAATGAAATCCCTGGCAAACCACCAGCAATTCAGCT +AAACATGACTGCTGGAACAAAAATAACCTGGCAATTAATGTTCCTGAGTTATTAGGCAAA +CATATATAATTGCTCCTTGTTTAAAAACAGACAGCTGAACATCTTAGGACAAAAGTCCAT +GCTCAGTCAGCTTTTCTTCCTACTTTTCTCCCCTTTATTTTCCCTTTGTAAGAGCATTAT +GTCAGAATCAGCACCCGAAGCCAACAGCCCCAGCCTGGTTTGGGGCTGGACTGGGATGAG +GTGTCCCTGGCTCCCCCGACCCCTGTTCACCTGCAGGGCTGTGGGACGTGGAAGCAGAGG +AGGAAGGTATTTAAGAGAGAATGCTGAGCAGAACCACATGGAGCTTGACCCCACCCATAG +GGTGGAGGAGCTGTCATCCCAGACCCTGGCACATCCCCACCATAGCACTCAGGGACTGCG +GTGTTCCCCCTTCTCCTGCCCGCTGGCCAGGACACAACCAATGGGCACCTACAGGACTGG +CCCATCCAGGTCTGGGGCAAGTGCACTGCCTTCTACTAGCACTGTCCAGCCCAGGGAAGG +TGGGCTCTCCTGCCATCCCTGCTGCAGAGGTTGGGGGGCACTTCTGCCCAGAAGGGACAT +GGAGACTTGGGCTGGGTCTGGGTTGAAACATGGCAGTTAGAGGTACAAAGGCTGAGGCTC +ATCAGGGACAGAGACCACCTCCCCCTACCCCAGGCAGCACATCCCTGAGACCTGGTGCAG +CCCTGGGGCCAGCTGGGTGTCAGACCAGGCTGAATGGGGCACCCTGGAGGTGGTAAGGCT +TCCATAGCAGACCCCAAGGGCTCTAGGGCATGGCACAGTCCCTGGCAATGACCAAAAGCA +TTTCCCCAGTTCTGCTGGCAAGAAGCTTCAGGGGCAGGCACTGAGTGGGGGGCACACATG +TGCACACAGGTGTGAATGCCTCCAAGGGCATGAATGGTGGGTGCTAGCACCATGTACTTT +TTTGTTTGAAATGGAGTTTCGTTCTGTCGCCCAGGCTAGAGTGCAGTGGCACGATCTCAG +CTCCCTGCAACCTCCGCCTCCCGGGTTCAAGCAATTCTCTGCCTCAACCTCTCGAGCAGC +TGGGATTACAGGCGCCCGCCACCACACCCGGCTAATTTTTTTTTTTTTTTTTTGAGACGG +AGTCTTGCTCTGTCACTCAGGCTGGAGTACAGTAGCATGATCTCGGCTCACTGCAAACTC +TGCCTCCCGGGTTCACACCATTGTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGC +GCCCGCCACCACACCTGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTTACCGTGT +TAGCCAGGATGGTCTCGATCTCTTGACCTCGTGATCCTCCCACCTTGGCCTCTCAAAGTG +CTGGGATTACAGGCGTGAGCCACCGGGCCCGGCCTAATTTTTGTATTTTTAATAGAGATG +GGGTTTCACCATCTTGGCCAGGCTGGTCTTGAACTCCTGACCTCATGATCCACCCACCTC +GGCCTCGCAAAATGCTGGGATTACAGGCATGAGCCACCGTACCCAGCTGTGAGCACCACT +TTCACTCCCAATTTTATGAATGTCTGTTATTTCACATTTAATCAGGAGCAAATGTTGTTT +TATGAAATTGATGTGCAATGATCAATATGGAAATTATTTTTAAACACTTGTCGGGCTCTC +ACCACTGCAACTTGGTTTCAGACTCAGATTGTGATGGCAGGACAGCTGAGCTCTGCTCCT +TGGAGCTGCCTCTCCGGCACAAGTGCTCAGGGGGTCTTCAGCCATCACTTTTTTTTTTTT +TTTAGAGACAGGGCTTCTCTGTTCCCCAGGTTGGAGTGCAATGGTGCAGTCACAGTTCAC +TGCAGCCTCAAACTCCTGGGCTCAAATGATCCTGCTGCCTCAGCCTCCCCAGTAGTTGGG +AATACAGGCATGCACCACCACACCTGACTAATTTTTGTATTTTTTTTTTGTAGATATGGG +GTCTTACTTTCTTGCCCAAGCTGGTCTCAAAACTCCTAGCCTCAAGTGATCCTCCCACTT +TGGCCTCCCAAACTGCTGGGATTACAGGTGTCATCCACTGTGCCTGGCCCAGCCATCGCT +TCTTAGACCCTCAGGTCTAATGGACATTGCAGGAACTGACCCCTTCCACTCAGTCACTGC +TGTGTCCTTATTTGTTTTCTTCTAATGTCCAAGCTGGGACTTGAAAGTCAGTTTTCTTTC +CTCAAGACCAGGGCTCTTAAAGTTTAGATGTCTCTCCTAATTCCAAAAGAAATATTTGTT +TGATTAGGCTGGATGTGGTGGCTCACACCTGTAATCCTCCGCCTCGGAGGCCGAGGCGGG +TAGATCATTTGAGGTCAGGAGTTCAAGACCAGCTTGGCCAACATGGTGAAACCCCATCTC +TACTAAAAATACAAAAAATTAGCTGGGTGTGGTGGCGGGTGCCTGTAATCCCAGCTACTC +AGGAGGCTGAGGCGGGAGAATAGCTTGAACCCGGGAGGCGGGAGTTGCAGTGAGCCGATG +TCACGCCACTGCACTCTAGCCTGGGTGACACAGAGAGACTCCATCTCAAAAAATAAAAAT +AAATAAAAATGGAAATATTTGTTTGATTCATAATGCAAAAAAACTAGTAAAACAGTGAAG +TTCCCATCCACTCCCAACCAACCCGAGAAATTAACTCCAAAACACACAGAATGGAGCAGC +CCTCAGAGGCCCACATGCTGTGCCATTCAATCTGTATCCTTGAGCTCACGCTTCCAAGAG +CCCACGGGTTTGTTGTGGGTGTTCCCAGGGCTGGGGACAGATGGACAGAAGGAGGAACAG +CCCCCCACCCTCCAGCCTGGGCTCCCGGCATGGAGCCCAGGCTCCATGGTGCCCCTAAGG +GCCTGGCAGACCCGTCTCCCGCCATTGAATGGTGTGGATGTGTGTCCCTGCCTAGCATCC +TCCTGTTTCCCGTGCGGTCCCAGCTGCAGGCTCCCGAGAGCTCACCTGGAGCCACGGTGC +TCCTTCCTCCAGTGTGGGTGCAGCAGGACTGCCGGCTTCCTGCCCTCTCTGAGACTGGCC +CTTGTTGCCACTGAGGCCAGTTTTGTGTGTGAAAAGAGAAAAAGACTTGGCTCGCTCGTT +TCCATCTGAAGCCATCAGTGATGCTTTCCTCTTGCAGGCTCATCTTGGCAGCCAACAGGG +ATGAATTCTACAGCCGACCCTCCAAGTTAGCTGACTTCTGGGGGAACAACAACGAGATCC +TCAGTGGTGAGTCTTCCTGCGTGCTCAGCGGTGGCTGCGCCTTGTTGCTTCCCTAGCCCC +TGCGTGGCCCGAGTGACCCTAACTGAGTGGTGCTGCTTCCAAGTGTGATGGCGGGGTGTA +GAGGTGGAACTGATGGGGCTCGTGGTCGGGGCCAGCCCCAGCACCGGGCATTGCCAGAGC +TGCCCACCCAGCACTGTCTGCGTGGCTTCCTGTGCCATGCGTCTGCATGGGGGCCCCATC +ATGTGAAAGCATCAGTTGGTTTTATATGGATCCCTCCTTTTTGCTGAGGGCATGTGGGTG +GATTTGTGTGATGTACAGATCCACACTCAGTCACTTTGTGAAGCAACCCAGTCATATGAG +TGTGTGCATGCGTGTGTATGCGTGCGTGTGTGTGCCTGTGTTTGTGTTTCTGTTTGTATA +TGAGCACCCTTGCTTCAGGGATGGTGGGGCTGAGTTTGGGAAGCTGGTGGTGCTGAAGCC +CCGTCACCTGCACCATGCCTGTGTTGCATTAGGTGTACTTATAGGGCAGAAGTAGGGGTG +AGAACCAGGTCCTCCCCACCAAAGATACCAGCCTCATTGAAGTCCTGTTGGACAGGAACC +AGTTCCTGCCCATGGGGTGGTGGGAGTCTACCTGGAAGGTGCGCATAGATGGCTGGTGAG +GCCACGGGTTACTCTGCCTGCGTACCAGGCGCCAAGTGGGCCCGAGGCATCCTGGCATGC +TCCAGGGTGCCCACTCAGGCCAGATGTGTGACCTCGTGGGCCCATGGAGGCTGTGTGTGC +CCTGTGCTGCCTCCCAGGCCGGGAGGGCAGGTGTTGGGATTATAGGTGTGAACCCAACAC +TTAAGGAGGCTGAGGCAGGAGGATTACTTGAGCCCAGGAGTTCAAGACCAGTCTGGGCAA +CATGGTAAAACCCTGTCTCTACAAAAAAAAATACAAAAATTAGCCAGGCGTGGTGGCATG +TGCCTGTGGCCCCAGCTACTCAGGAGGCTGAGGCTAGAGAATTGCCTGAGCCCGGAAAGC +AGAGGTTTCAGTGAGCTGAGATTGTGCCACTGCACTCCAGCATGGGTGACAGAGCGAGAC +CCTGTCTCATTCAATCAGTCAATCAATCAATCAAATAAAAGCTCAGCTGTGGGGTTTTGT +GAGGACTCCTGTGTAGGCCTGTGGCCTGGCAGTAGTGTGAGAATGCTGCTGAGGCTGGGT +GGTCCCGGCACCGTGTGGGAGGAGGACGGGTGCATGGAGACTGAGGCAGGGTGGTCCTAG +GCCTGCTGGTATAGGAGGATGCAGTGGGTGCAGGGCTGGTCAGGACGGGTGGGTGAAGGC +ACAAGACTGATGTCCTTGGAAGGTGGAAGGTTTTCATAGATGAAGATGTGGCGGCTCAGA +GGGTTACCCTCTTCTGGTTCCTTGACTATTGTGCCCTCCTGGGCTCCATCCCACCCTGGC +ACCCAGGTCTGTCAGCTGCCTCTGAACTAGTGCCAACCCCAGCAGCTCCAGTCTTGTGGA +ATTGTTCACCCCTGTGGTGTTTTGATGGTGGCAGCTGGTGCATGGATGTACACGGGCTTC +TGCCAAGTGTCGGGGTGACCATATCACTAAGGCCCACATCCTCTCACTCTCTGGCGTCAC +CGGATTGTATGCAAAAGTTTAGGACAGTGGTTCTCCAGGTGTGGTCCTTGGGCCTGTAGC +ACCAGCTTTACTTGAGAACTTGTTAGAAATGAAATCACCCTGGGCCGGTGGCTCACACCT +GTAATCTCAGCACTTTGGGAGTCCAAGGCAGGAGGATCCCTTGAGGCCAGGAGCTTGAGA +CCGGCCTGGGCAACGTAGTGAGTCCCTGTCTTTACAAAAAAAAAAAAAAAAATTAGCCAG +CTGTGGTGGCATATGCCTGTTGTCCCAGTTACTCAGGAGGCTGAGGTGGGAGGATCGCCT +GAGCCTGGAAGGTAGAAGCTGCAGTGAACCGAGACTGAGCCACTGCAGTTAGCCTAGGCA +ACAGAGTGAGACCCTGTCTGAAAAAAAAGAAAAGAAATGCAAATCAAAATCATTAAATTC +ATAAAAATTTGGCCATCACTTCTTTTTTTTTTTTTTTTTTTTTGAGATGGAATTTTGCTC +TTGTTGCCCAGGCTGGAGTGCAATGGTGCCATCTCAGCTCACCGCAACCTCCATCTCCCG +GGTTCAAGCTGTTCTCCTGCTTCAGCCTTCCTAATAGCTGGGATTACAGGCATGCACCAC +CACACCAGGCTAATTTTGTATTTTTAGTAGAGATGGGGTTTCTCCATGTTGGTCAGGCTG +GCCTTGAACTCCCGACCTCAGGTGATCCGCCCGCCTTGTCCTCCCAAAGTGCTGGGATTA +GAAGCATGAGCCATTGCACCCAGCGGCCATTACTTTTTCAAGTAGTTTTTCTGTCTTCTC +TCTCACTCTCTCCTTCTCTCCAGTTACACACATATATTAGTTCACTTGCCCTTGTCCCAC +ACCTCTGCGATGCTCTCTGCGTACTTTTCTCGGTGCCCCTAGATCTTGAGGTTTTCCAGC +CTGGTGTCTGGGAGCCAGCCCTACTCCCTGCCCTGTGTGAGCACTGGTGGTTCTTGCCCC +AGTGTCATATAGTCTCCTCCCTGAGATGCTGATCAGACATTCTCTGTGTAGCTCCTTCCT +CTCTGGACTTTTGTGCCACTGTCTTAGTTCGTTTGTGTTGCTCTAAAGGAAAAAGCTGAG +TCTGGGTAATTTTTTTCTTTTTTTTTGAGATAGGGTCTCACTCTGTTGCCCAGGCTGGAG +TGCAGTGGCACAATCCTGTCTCACTGCAGCCTCTAACTCCTGGCCTCAAGTGATCCTCCC +ACCTCAGCCTCCCTAGTAGCTGGGACTACAAGTGCACATCACCATGCCCAGCTAATTTTT +AAAAATAATTTGTAGCGACAAAGCCTTGCTGTGTTGCACAAGCTGGTTTCAAAGTCCTGG +CCTGAAGTGACCCTCCTTTCTTGGCCTCCGAAAGTGCTGGGATTACAGATAATAAGCCAT +GGTGCCCAGCCAAGGCTGGGTAATTTTTTTTTTTTTTTTTTTTTTGAGACAGAGTCTTGC +TCTGTCGCTTAGGCTGGAGTGCAATGGTGCGACCTTGGCTCACTGCAACCTCCACCTCCC +AGGTTCAAGTGATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGATTACAGGCACCTGCCA +CCACACCCTGCTAATTTTTGTATTTTTGTATTTTTTTATTATTATTCTTTAAGTTCTAGG +GTACATGTGCAGAACGTGCAGGTTTGTTACATAGGTATACATACATGTGCCATGTTGGTT +TGCTGCACCCATCAACTCATCATTTACATTGGGTATTTCTCCTAACGCTATCCTTCCCCC +AGCCCAATTTTTGTATTTTTAGTAGAGATGGGGTTTCACCAGGTTGGCTGGGCTGGTCTC +AAACTCCTGACCTCAAGTGATCCACAAGGCTGGGTAATTTATAAAGAAAAGAGGTTTATT +TGGCTTAGAGTTCTACAGGGGACCTGAAGCACAGTGTCAGCATCTGCTTCTGATGCAGGC +CTCAGGAAGCTTCTACTGATGGTGGAAGGGGAAGGGGAGGTGGTATGTGCAGATCACACA +CGGAGGGAGGAAGCCAGAGAGAGGGAGGAGGCGCCACGCTCTTTTAAACAACCAGTTCTC +AAAGGAACTAATAGAGCTAGAACTCATCACTGTGAGGACAACACCAAGCCGTTCATGAAG +GATCTGCCATTAGGTAGATCTGTCTCCCATTAGACCCCACCTCCAACACTGGGGATCAGT +TTTTTGGTTTGTTTGTTTTTTTTTTTTTTTTTTGAGACAGAGTTTCACTCTTGTTGCCCA +GGCTGGAGTGCAATGGCACGATCTCGGCTCACCACAAGCTCCGCCTCCAAGGTTCAAGCA +ATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGATTACAGGCGCCTGCCACCATGCCCAGC +TAATTTTTTGTGTTTTTAGTAGAGACGGGGTTTCACCATGTTGGTCAGGCTGGTCTCGAA +CTCTTGACCTCAGGCAATCCACCCGCCTCAGCCTCCCAAAATGCTGAGATTAGAGGCGTG +AGCCACCTCACCCAGCCCGGGGATCAGATTTTAACATGAGGTCTGGGGGACAAACATCCA +AACTATGGCAGCCATGAACTCTGGTTGTCTTAGTTTCCCCAAACACTAAGCCCCTCTTCT +CAGCTCAGGGAGTCTGCTGAATTCGGCCTGGTCCCCCCACTCCCTGGGCTGGGTCCTGGA +GCCTCTCTAATCATGGGCTCACTTTGTTTCCTGTCTGTCATATCCTTTTTTGTAAAACCC +ATTTGATATGGTTTGGCTGTGTCTCCACCCAAATCTCATCTTGAATTATAGTTCCCCAAA +TCCCCACATGTCCTGGGAGAGACCCAGTGGGAGGTAATTGAATCATGGGGCATTACCCCC +ATGCTGCTATTCTCATGATAGTGAGTTCTCATGAGATCTGATAGTGTTTTGTTTTTTTTT +TTTTGAGACTGAGTTTCGCTCTTGTTACCCAGGCTGGAGTGCAATGGTGCGATCTCGGCT +CACTGCAACCTCCACCTCCCAGGTTCAAGCAATTCTCCTGCCTCAGCCTCCTGAGTAGCT +GGAATTACAGCCGCTTGCCACCACGCCCAGCTAATTTTTGTATTTTTAGTAGAGATGGGG +TTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCTGCCTCG +GCCTTCCGAAGTGCTGACATTACAGGCATGAGCTGCCGCTCCCAGCCAAGATCTGATGGT +TTTATAAGGGGCTTTTCCTGCTTTGCTTGGAACTTCTCCTTCCTGCCACCATGTGAAGAA +GGACATGTTTGCTTCCCCTTCTGCCATGATTGTAAGTTTCCTGAGGCCTCCCCAGCCATG +CTGAACTATGAGTCACATATCTTTCCTTTATAAATTACCCAGTCTCGGGTATGTCCTTGT +AGCAGCATGAGAATGGACTAATAAATACACCATTGTTTCTTGAGTTTTATTTATTTATTT +ATTTTTTGGTTGTTTTATGCAGGAGGGTAAATCTGGTCCTTGTTGCTTGGGCAATTTGGA +AGTTTTGTTCTTGTTTTAATTATTGATTTGTAAGAGTTATTTACCCATTCTGGATATGAG +TCCTTTGTGCCTACTCATTTCCTTAATGATAGCTTATGGTGAGCAGAATTTTTTTTTTTT +CTTTTGAGTGGGTGGAGTCTTACTCTGTTGCCAGGCTGGAGTACAGTGGTGCGATCTCAG +CTCACTGCAACCTCCGCCTCCCAGGTTCAAGTGATTCTTCTGCCTCAGCCTCCTGAGTAG +CTGGGACTACAAGCATGCACCACCATGCCCAGCTAATTTTTGTATTTTTAGTAGAGATGG +GGTTTCACCATGTTGGCCAGGATGGGCTCAATCTCTTGACCTCTTGATCTGCCTGCCTTG +GCCTCCCAAAGTGGTAGGATTACAGGCGTGAGCCACTGCGCCCGGCCAGAATATTTCATT +TTAACAAAGTACAGTTTATCTTATCTTTTATGAGTTGTTCTTTTTGTGCCTTATCCAAAA +AGTCTTTGCCTACCCAGAGGAAAATATATAACAATATTTTCCTCCATTTGCTTCTAGAAG +CCTAATTTTTGTTTTTATGTTTGGTCTATGATTTATCTCAAGGTTTTGTACATGGTGTGA +GGTAGGGGTTGAGATTTACTGCCTCCAACCCCCAAATCTCTCTAGTTGTTCCAGCATCAT +TTATTAAATGGATTATCTTTTGCCTAGTTCATTACTATGGCAACTTGGTTGTACATCAGT +TGAACATATATGTGTGGGTCTATTCCTGACTCTGTTGTATCCCATTGATCTATGTATCTG +TCTTGATGCCAACACCATGCAGTCTATGAGTGCTGCTGCTTTTTAGTAAGTCTTGAAATC +AGGCTGAGCACTGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGACCGAGGTGGGCAG +ATCACCTGAGGTTGGGAGTTCAAGACCAGCCTGACCAACATGGAGAAACCCCGTCTCTAC +TAAAAATACAAAATTAGCTGGGTGTGGTGGTCCATGCCTGTAATCCCAGCTACTCGGGAG +GCTGAGGCAGGAGAATCACTTGAACCCGGGAGGCGGAGGTTGCGGTGAGCTGAGATCACG +CCATTGCACTCCAGCCTGGGTAACAAGAGCGAAACTCTGTCTCAAAAAAAAAAAAAAAAA +GAAAAAAGAAATCAGGCAATGTGGGTCTGCCTTCTTTTTCGGGACTATTTTGGCCATCCC +AGTTCCTTTGCATATGCATAGATATTTCAGCCAGTCTGTTAATTGCTATAAAACGAAAAG +CCCCCTGGGATTTTGACTGGGCAGGTTTTGAATCTATGGGTCATGTGGGGAGAAGTGTCA +CCTTAACAATACTGAGTTGTCTAAGAAGTACCCAGTGAAACAAATAATAACAAGAAGAAA +CACTGTTGAGGCTGGGCACGGTGGCTCACGCCTGTAATTCCAGCACTTTGGAAGGCCGAG +GCGGGCCGATCATGAGATCAGGAGACCGAGACCATCTTGGCTAACACGGTGAAACCCTGT +CTCTACTAAAAAAATATGAAAAAATTAGCTAGGTGTGGTGGTGGGCACCTGTAGTCCCAG +CTACTTGGGAGGCTGAGGCAGGAGAATGGCCTGAACCCGGGAGGCGGAGCTTGCAGTGAA +CTGAGATCACACCACTGCACTCCAGCCTAGGCGACAGAGCAAGACTCTAAAAAACAAAAA +CAAACAAAAAAACACTATTGAATCTTTCAGTGAATATGCATGTCTCTCCTTAAATCAGGT +CAGCTCAACTTGAGACCCTCAACAGTGCCCTGGAGTTTTCAGTCTTTGTCACTCATTTTT +CATTAAATATTTTCCTGGTGGTTTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTTGGT +CTGTTGCCCAGGCTAGAGTGCAATGGCGCCATCTCTGCTCATTGCAACCTCCACCTCCCG +GGTTCCAGCAATTCTCCTGTCTCAGCCTCCCGAGTAGCTGGAACTACAGGTGCCTGCCAC +CATGCCCAGCTAATTTTTGTATTTTTAGTAGAGATGGGGTTTCATCATATTGGCCAGGCT +GGTCTCGAACTCCTGACCTCGTGATCCGCCCGCCTTGGCCTCCCAAAGTGCTGGGATTAC +AGGCGTGAGCCACTGCACCCGGCTTTTTTTTTTTTTTTTAATCACACTAAGGATTTTAAA +AATGCTGTTATAAATGGTATATTTAAATGCTCACTTTCCAGTTCTTGCTAGCATTTACTT +ATTAGTTCTAATACATCGGTTTTGGTATTCCTTTTGGGTTTTCTTCATATATGATCCTGC +TGGTTGCAAATACAGTTTTGTTGGGTTTTTTTTTTTTTTTTGAGACAGAGTTTCGCTGAG +AGTGCAATGGTGTGATCTCGGCTCACTGCAACCTCCACCTCCCGGGTTCAAGCAATTCCC +CTGCCTCAGCCCCCCGAGTAGCTGGGATTATAGGCTGCCTCAGCCTCCTAAGTAGTTGGG +ATTACAGGCATGCACCACCACGCCTGGCTAATTTTGTATCTTTAATAGAAATGGTGTTTC +TCCATGTTGGTCAGGCTGGTCTTGAACTCCCAACCTCAGGTGATCTGCCCGCCTCGGCCT +CCCAAAGGGCTGGGATTACAGGTGTGAGTCACCGCACGGGGCCACAAATACAGTTTTATT +TCTTTATTTTTGATCTTTACATCTTTTCTTTTTCTGTTATAATTAATTATTGTTATAAAA +ATAATATCTTAGGGCCAGGCACGGTGGCTCACACCTGTAATCCCAGCACTTTGGGACGCC +AAGGCGGACGGATCACCTGAGGTTGGGAATTCGAGACCAGCCTGGCCAACATGGAGAAAC +TCCGTTTCTACTAAAAATACAAAATTAGCCAGGTGTGATGGCACATGCCTGTAATTCCAG +CTACTCAGGAGGCTGAGGCAGGAGAATCGCTTGAACCCAGGAGGGAGAGGTTGCAGTGAG +CCGAGAGCACACCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCTGTCTCAAAAAG +TAATAATAACAATAATAAATAAAATAACATCCTAGCCAGTGCAGTGGCTCACTCCTGTAA +TCCCAGCACTTTGGGAGGCTGAGTCGGGAGGATCGCTTGAGCCCAGGAGTTCGAGACAAG +CCTGGGCAACACAGCAAAACTTTGTCTGTACAAAAAAATGCAAAAATCAGCCGAGTGTGA +TGGTACCTGCCTGTGGTCCCAGCTACTTGGGCTGAGGTGGGAGGATCGCTTGAGCCTGGG +AGGTGGAGGTTGCAGTGAACTGTGTGATTGCCACTGAACTCCAGCTTGGGTGACAGAGTG +AGACTCTGTCTCAAAAAAATAAAAAATGAAATAAAATAATAAAAATAACATCTTAATGAC +AGCCTTTTTTGCCCCAATCTTTGGGTAAAATGACCCCCTGGTCTAAGAAGGAGCCCTCCT +GTGGCTTTCAGAGCCCTCCAACCTTGTGGCATTAGCAGTGGCCCATGCCCTGCAGATTTG +GCGGGGGTGAAGGTCCTGGTTGTGTGCTGGGCTCCTAGTGAAGGGCCTGCCCTTTGGGCC +TGGTGCATCCTGCTGGGTGAGGACAGCCAGCGTCCCTGCACCCCATTCATGCGATTGGAG +CCTAGTGCTGCCCCTCAGCCTTGACGCTGGCCCAAGTGGAAGGATGTGTTCTGCAGGGCA +TCTGTTGGGATAGGTGTGGTTGGTGTCATAGTTGTCCTGCAGGACAGGGATCTCACTTGC +ACGACCCATGAACCCCAGGGGCTGCCTGGCCTTGAAGAGTTCCAGCCACGATTCCATAGC +TGGGGCTGCTGGGCCAAGCCGCATGTCGAACGTCCTCGAGGAGCTTGAGGCAGGAGCTGG +GCCGAGTATGCGTCTCCCAGGGCTGGGAACCAGGTGGGGGACTGGAATGGGTGGCATCAA +TGGTGGTAACACTGTCATCTGCCACAGGGCTGGACATGGAGGAAGGCAAGGAAGGAGGCA +CATGGCTGGGCATCAGCACACGTGGCAAGCTGGCAGCACTCACCAACTACCTGCAGCCGC +AGCTGGACTGGCAGGCCCGAGGGCGAGGTAAGGCGAGTGGGGTGGGGCCAAGGTGAGACA +GGGTGGGGTGGGGCAGGCCTAGGTGAGACAAGGTGGGGCCAAGGGACTACAGGACTGGCC +AATGTATGGTGGAGTGGGGCGGGCCAAGGTAGGAAGTGCCAAGGTGCTTGTGCTTGGGAG +TGGGATGGGGCAGGGCTGAGGTACCGTGGGGCAGGGCCAGGTTACAGAGTGGGCGGTGAC +AAAGCGTGGCAGGGCAGGGCCCAGGTAAGGGTGGGGACAAGAAGGGGGAAGCTGGGCCCC +GTGGCAGGTGGGTGGCTTCCGTGGGCATATCACTGCCGCCACTGAGGTGTGCTGCAGGGG +TATGCGGCAGAGACAGGAAGAGTGAGGAGGGGAGCATCCCATTTCCCACACCCTGGCCCT +CTGGCGTCGCAGCCAGGAGCCCACTCCCACCTGACCCACGCAGCACTGTTTACTCCTCTC +AGGGCGCACTCTCCCCTTGTCCTCATGCATGGCACTCCTGACTGCTGTGTCACTGTCCCC +TGCCTGGAATGTTTGTAGAGTAGATGCCTTCCCCGAGACGACAGCTCTGTGGACACACAT +TGTCCCCACTGAAAGGTGTTGTTTACTTCATGCTGGATTTCCATAGGAAGACATAGTGGG +GAAATTTTTTTTTTAATAAAATGAGCAGTTTTCAGTGCCGACCTGAGTGGCAGCCAGGCT +AATGAGGTGTGGCTGACTCTTGCGGCAGTCATGACAGGCCAGGGCTCCAGTGGGACCCCT +CATCTTCCTGCATCCTCCCCAGGGGGCCCCATATCAGTGTTCCTGGGAGAAGAGCACATG +CCTGCAGCTGTGGACACAGCATCTGTCCCCTGCCTACAGGTGAACTTGTCACCCACTTTC +TGACCACTGACGTGGACAGCTTGTCCTACCTGAAGAAGGTCTCTATGGAGGGCCATCTGT +ACAATGGCTTCAACCTCATAGCAGCCGACCTGAGGTGGGTCTGCCAGAGGGGGATGGCGG +CTCTGCCCAGCACTGCCTCGGGGGCAGGCCTCAGGCTCATCAGGAAGTGGGGTTCCACTG +GGGGCTGTGGCAGCCTCTCCAGGGACGTTGCTCCTCGTGACTTGGCAAACATTCTGAGGA +CTGAGCCTGTCACAGATGTACCAACTCGGGTATTTGGGGGTGAAGCCCAAGGCTTCCTGT +GATGGCATGAGGGCTGCCGGATCCCTGGAGAAAGCTGAGGCTGTGAAGAACAGTCACATA +GAGTGGCTCTGGCCGGGTGTCCTCCCTCCCACGGGCAGGCTGTGCAGAGCAGCCCTCCGC +AGCCCTGGCTTGGAGTGACTGGGATGCTGTGCTCCCAGGGAAGCTCCTGTTGGAGATGAG +GCCGCAGGAACTGCGGAGGTTTTGGTGTTTTGCCCTTCCGTGAAGTGAGCATTCCTCCCT +CCTGCCCTCCTCCCCCTCCAAAGCCCCAGAACTTGGCCAATGCTGTTAGTCCGGGGCCTT +CTGCCAGGACAGGCAAACGGGAAGTGGTCGGCACATTGTGGCGGGCCCTGAAGCTGCCTC +CTTCCACAAGGGCAGGTCCATGGGCCTCTCTCCTGGCTCCCGAGCTCCAGGCCTGCTGTT +CGTTGAGCATCCCTTGTGTGTGGCATGCCCAGCCCTGCCCCACATGGGTCTGCACACTTG +CTATGGAGGCTGGGAGAGTTGCAGTGTCCCCTAGGTGGAAGGCCGAGGGCTGGTGCGGGG +CACTTGCTGTCCGGAGCCGTTCTTCCTGCCCATGCCTTCGTGACTCCAGGTGGCAGAGCC +CCTGTGCTCTCTGCATGCCCCCATGCTCAGGAACCACTTTTCCCGGTCAGTAGTTCCATG +TGGGTCTGATTAGTCTCAGGAGAGTTCCTCTGCGGCCGTGGCACCAGCAGGACTTGGCAC +TCAGCAGCTGTGTGGAGGTGGGGTGCCAAGGGTGCCATGCCTTGAGCAGATACACGGGCT +CCTGTGGTTGGGGGTTGGGGGTTTGGGCTCCTCAGAGCCTGTGCAACTCTCCCGGCTCCA +AAGGCTGCAGAGTGAGGGTCAGCCTCCTGGACCTAGCCTGAGGATACAGGCAGTGGCCAG +GAGAGCATACCACAGGTCAGGCAAAAGCTGGTGCCAGGGCCGAGGCTGGAGGGCGGGGGC +ATCGGAGGTCAGTGAGCAGCCCTGCTGGTAGGTGCAGATGAGGAAGCCAAGGCCTGGAAG +GGAGAGAGGCTTGTCATGGCCACAGCTATCCCTGGGTGAGGCCTGCCCTCCGCCTGGCCT +GTGGTATAGTGCTGGCAGCATGGGTGTCCAGTTCTGTAGGTCCTTACACAGAAAGCCCAG +AGGAGATTTTTGCCTGCAAACAGTCTCTGAAATTATGAACCAAGTATCTTAGGAAAGTTG +CCAAAGACCATGGTGAGGAAGAACGGGAGTGGAATGTGAACATAGACGGGGGGGCCCTTG +AAGCCTACCAGGGGTGGGGAAGGTGGGTCCCACCTGGGGGCAGCACTTCGGGGAGGAAGG +CACCATGCCTGGCTCTTTCTCTGCATCTGTTCCATCTTGAAGGCCCAGGGCTCCATGCCC +AGCATCTGGCCTGGGAGGGCATCTCACAGGCCACAAAGAGATCCTCTTCACTTTGCAAAA +AAGGCACTCCAAGTTCTAGAAGACTTACTGTGGTATTGAGGATTGGAATATTTTCAGCTG +GGGAGCATTTTGAACTATTTTCTTTTTAAAAAATGATTTATTTATTTTGTAGAGATGGGG +TCTTGCTATGTTGCCCAGTCTGGTTTCAAACTCCTGGCCTCAAGCAGTCCTCCCTCCTTA +GTCTCCAGATTAGCTGGGACTATAGGCATGAGTCACTATGCCCAGCTCTACTGTCCCTTA +ACTATTTTAAAGTGTACACTTAACACTTGAGAGTAGGAAAATGTGGTCTTTGGCGCTTTC +TTTGGAAGCCTCGATCACCCTGGGGCCACACTGGGGCCACCTGTCCAGAAACCCTCATAG +ATGAGCTGTGGTAAGGGGCCTGATGGAGAGGTTGGTGATGTGGGGATGGGGCTCAGCCCT +GCAGTTGTCCCCATGGGCAGCCTCTGTGTGTCCATGCCATGGGGCATTCGGCATGGCAGG +AGGGCATTTTAGATCCTGTGCCTCCTGGGCTGGTGGGCCCCGAGGTGAGCTGGGAACATA +CCCGTGATGCTGGACAAGAGCTGGGGCCTCCATGACCCCGCCTGCCAGGCACCTGAGGGG +CTGGGTCCTGCAAGCTCCTGACCTGGCCGCAGCGGGCTACTGCGCACATCGCTAGCCTCC +AGAAACAGGGCTGTGAGATCACCGGGCAGTGTCGGGGAGGACGCCCTATGGCTGTAGTCT +GACATTCTCTCCCCTTGGCCTGCAGCACAGCAAAGGGAGACGTCATTTGCTACTATGGGA +ACCGAGGGGAGCCTGATCCTATCGTTTTGACGCCAGGTGAGCCTGCCCTGGCAGCCTGAT +GGGGTGGGGGACTGTTTCTATGCAGAGGTCACCCTTGTGCTTTTTAGAGACCAGGCACTT +GTCATATTACTCTTCTGAGATGACTTTGTGGCCATTTAAGTGCCTTATGGTCTGTGGGCA +CCTTGCCCTGCACCTGGACACTTCAGAGAGCCCTGGCTCCCTGCTCACCCCCTCCCCATG +GGCTTTTCCTGCTGAGCAAGGTGCGCCCCTGTTCTGGGCTTCCCACCGCAGCCCTGCAGC +CCAGCCGGCCACCCCCAGCCTCACTTCCTGTGCCGTCCCAGTGTGCCCTGTGCCTGTTCC +TTCTCAGAACCTCTGGAGCCATCACCACCTGGGGTTCTTGCCTGGTCTGCTCCCTGACGC +ACAAGTACACACCCCACTCAAGCCCCACACAGATGCACGCCTCCTGCACCCAGTCCCCAG +TCTGATCCCCGGTCCCCATCCTCAGGGCCTCGAGCCCCTCTGCGCCAGGTCACACTCCAC +GGCTGGGCCCTTCCTGCCCTGGCTCTGCACCACTGCCCAGGCTGTGCCTCCTGCTACATG +CCCTTCCTTTCCCCCTCCTCGGGTGCTGCGATTTCCTGGAAGCTTCCTCTGCCAGTGCCC +CAACCTGGAGAGAATGTGTCACATTGCCCCCCTCTGCAGCCATGTTCTGGGAGGGAGGTC +TGCATCTTGTCCCAGGTCTGGTGCCAGGACAAAGCAGCACTCCTGGGAAATGGCATTTCC +CCTGGGTGCACAAAACTGTAGGGACCATAGCCTAGGTTCCGGGGACTGCTGGCTCACTCA +CTCCCACACTCGCTGCCCACAGACACTCCTCGCTGGTGGCATCGTGGAACTCTGTGCCTT +AAACGCCCGCTCTGGTCCCTCTACTCAAGCTGCTGGCTCACACAGGGTGTTCCGGCGCCT +GGGGAGTGTGTCAGTGCCCTTAGCGCCACCCACACCCCTCTGTGAGGGGGCTCCCTCACA +CCCCTGGCTCCTCTGAAGCTCCATGCCCACGTCACACAGACTTTCTTTTGCATTTGGCAC +AAATCACCAAGTGAGGTTGCAGGTCACAGGTGCACACCTTCCCACTGGGTGTACACGGTG +GAACTGAAGCCCCAGGCGTCCCTGGAGGGGCCCGAGGGAGATGATAAGCAGTCCCCAGGA +CAGCTGAGTGGCCCCATCCCACCTGCATCTTAGGGGGCTGGTGGTGCTGACGGGCTCATC +ATGAGTCCTGTGGTCCTTGCCTGGCCCACACTGGAGCTGGCTTCCAGCTTCCCACGTCTG +CCCAGTGATGTTTCATCCACAGATAGGCCCAGCCCCCATTTGCTAGGGTGGTGCCCAGAC +CCGAACCCCTGGGCTAGTTGGGATCTCCAGAGGAGGCATCTGCCTGGAGGGTGGCTTCCA +GTGTGTCCCACCCACTGCCCCCACGCCTACCAGCTGGTGCCCTCTCCTCCACCTGGGGCT +TATCCTTCCGGCCACCCTCTGTGACCCTGGTCTTCCCCTCCCTACCTTGGATACAACTGT +GGGCAGCCCACCTGAGCTCCCAAGAGCTGGGCAGTGGCACAGACACAGAGACTACCAGCG +AGGCAGGGATGTGGCCCCAGAAGGTCCTGAGCTTGCAAAGGATTGTTCTGGCTGTCTCAG +CCAGCTCTTGCTGTTCCACCTGCCCCAGAACCTCCGCATCAGTTGAGGGTCGTGGGGTTG +GAATGTGTGGCGAAGTAGGTCTGTGGCAGTGGAGTTGGGCCATGCTGAGCCTGTGACTGC +CCAGGAGAGAATTGGAAACTAAAAGGTGCAAAAGCAGTCAGAGACTCCCTACCACCCCCT +CACTGTGAACATTGGCCCAGCGGCCTCTGGGGCAGTCGCTGTAACCATCCACGTGGCATC +CTCGGTTCTCAGAGGCTGAGACCTCCGTGGCTGGAGATGCTTATTTTCTGCTCAGTTGTT +GTTCTGTAGAACTTATTTCTGTCTACATCCTGCCTCATCCCTGAGCCTGCCCCACCTGGT +CCACATTGCCTTTTTTTTTTTGATAAAGTTTTATTGAGATATGACTGACTAGCGGCACAG +TTCACCCATTTAAAGTGTATCATTCAGTAGGTTTCGTTTCATTCACAGAGTTGAGCAACT +GTCATCACTACCTAATTCCAGGACATTTCTTGATCCCCAAAGAAAGCCATACCTGTCAGC +AGCCACTCCCCATTCCCCGTCTCTTCAGCGCCTGCAACCTTTAATCTGCTTTCTGTCTCT +GGATTTGCCTATTCTGGACACTTTGTATAAAGGGAATCCTATAATATGTAGTTTAGGGGA +CTGACTTTTTTCACTTAGTGTTGTCGTCTAAAAACTCACCAGTGGCGAGGCACGGTGGCT +GACGCCTGTAATCCTAGCACTTTAGGAGGCCAAGGCGGGCAGATCACGAGGTCAGGAGAT +CGAGACCATCCTGGCTAACACGGTGAGACCCCATCTCTACTAAAAATACAAAAAATTAGC +CGGGCGTGTTGGCAGGTGCCTATAGTCCCAGCTACTTGGGAGGCTGAGGCGGGAGAATGG +CGTGAACACAGGAGGCGGAGCTTGCAGTGAGCTGAGATCGCGCCACTGTACTCCAGCCTG +GGCGACAGAGTGAGATTGCATCTCAAAAAAAAAAAAAACAAAGAAAAACTCACCAGCATG +GCTAAATGATAGAAAGGAGAGTTTTATTTGCCATATCAGTTTGCAGACCCGGAAGAGAGA +GTCTCTAGCATGGACTGAGGTGCTCTCTTTGAAGAGGGGAAGGGCAGTTTGAGTTTTATG +CCTCCCAAGGTCCATATCACACAATAGAGTCATACATATTCAGCAGGTTTGGGGAAAAGC +CATACATATTTATGAGGGGACCCGAGAACATATGCAATAGATAAACATACATGTAACATA +CATCCCATGTTCACTTTGGGGCAGGGTTTTAGCATTAAAATGAGGTAGAATCTGGATCTT +TCCATCAGAAGGTGAGCTCTAGGACACAGTTGGAGTGCAGCCTCTATGAACTGCTGAAAC +TGGCTTATGGTCTGTAGTTGCTTATCAGAAAAGAGTGTTTGAAGGCTGGTTCTCTGTCCA +GTCAGGGTTGTGGTGGTCTGGGCTGTCAAGAGTTAGGAGGGTCTGATAACGTGTGTGATC +TCTACTGTTGTGAGGGAGTTTATCAAGACTGTGGTTTTTCTTGTAGCTGTAGGAATTTAG +GGAGTTGCTGAGACAGCTGCCCTGAACCCTCGAGCCAGTTGGTACTTTTTGTTTCTTCCC +TTTATCCATTAATGGACACTTGAGTTTTTTCCACCTTTTGGCTGTTGGAAACAGTGCTGC +TCACAGTGTGACCTCACAGCATTTGTGTGGTCATATGTTTTTATGGCTCTCATGTGAATA +CGTAGGAATGGAGTTGTTAAGTCGTATGGTAACCCTGTTTAACCTTTTGAGGAACATCCA +AACTGTTTGCCAAAGCAGCCGCAGCATTATACATTCCCACCAGCAGTGTATGCGGGTTGC +AGCTTCTCTGAATCCTCACCAAAACTTGCTATGTTCTGTGTTTTTGATGATAGCCACGAT +AGTGGGTGTGAAGTTGTGTGGACTCGTGGTTTGGTTTGCGTTTCTCTGATGGCTATGATG +TTGAGCAGCTATTCTCATGCTCGTTGGCCATTTGTATGTCTTCTTTGGAGAAATGGTTAT +TCAGACTGGGTTGTCTTTTTGAGTTATAGGAGTGACTTATACATTCATTCTAGCTACAAG +TCTCTTATTTAAAGCACAAAGCTTAAAATTTTTAAGTCCTATGTCTCTCTCTTTTTTTTT +CTTTTGTTGCTCATGCTTTTGGTTTCTTATCTAAGAAACCATTGCCTAATCTAAGGTCAT +GAAGCTTTCCCCAATACTTTCTTCTTAGAGTTTTATAGCTTTTGCTCTTTCACTTAGGTC +TTTGGTTTGACTTAAGTTAACTATTTTTTTTTTTTTGAGACAGGGTCTGGCTCTGTCACC +CAGGCTGGAGTGCAGTGGTGCAGTCATGGCCCACTGCACTGTCAACCTCCTGGGCTTAAG +AGATCCTCCCAGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCAA +GGCGGGTGGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAAACCGGTGAAACCCCG +TCTCTACTAAAAATACAAAAAATTATCCAGGCGTGGTGGCGGGTGCCTGTAGTCCCAGCT +ACGTGGGAGGCTGAGACAGGAGAATGGCGTGAACCCGGGAGGTGGAGCTTGCAGTGAGCC +GAGATTGCGCCACCGCACTCCAGCCTGGGCGACAGAGTAAGACTCCGTCTCAAAAAAAAA +AAAAAAAAAAAGAGATCCTCCCTCCCAAGTAGCTGGGACTGCAGATGTGTGCCACCATGT +TCAGCTAACTTTTATGTTTTGTAGAGATAAGGCCTCACTATTTTGCTATTTTGCCGAGGC +TGGTCTTGAACTCCTGGGCTCCAGTGATCCTCCTGCCCCAGCCTCCCGATGTGCTGGGAT +TATAGGAGTGAGCCACCTCTCTTGACCTGAGTTAACTTTTGTGTATGGTGTCAGGTAGAG +TCCAACTCCCTTCTTTGCCCATGACTATGCAGTTCAATGGTCTTCCATGTTGTTCATGAT +GGAAAAAGCACTGTGACACCTGGGGCTGGCGGCTTGGCTCTGGGGGCCCCAGCAGAGCCA +GGCTGTCTCCCTGGGAAATGCCTCGGCCAAGATATTCTCCTGGAAGTCTGACAGGGAGAG +GGTGGTGTCTTGGTCTCCCTGGTGCATCATTTGGGGTGCTTGGCTCCCATCAAGCCCACC +CCTGCTGCCCTGGCCTCTAGAGAAGAGCCTCTGAGGTGAGGTGCTTCTCCACCTCAAGTC +CCTGCCCTTCACTTTCTGTGACATGAGAAACCCTTTTCCTGTTGGCCATGCCTTCTGCCA +ACACTCTGGGAGCTTGTTCAGAGTCTGTGCAGAGCCCCCCGGCTCTCCTAACCTGGGGCC +TGGCTGGTACGCAGAGGTGGGCCATGCTCTGCTGAGGCCCAGCCTACACCTGTGCCCACT +GTGTGGCGCTGTTGGGGGTTCCCCAAACAGGGACATGATTGTGGTGGGTGCCTCCTGGCC +TCTGGTGGCTAATTCTCCCACTCTTGAACTTCTCAGGTGGCCCCCCAACCTGGCGTGAGG +TGAAGAGGTTCCCTCTTGGCTGTCCCCTTTGCCTGTGAGATCTCAGGTTGCTCATTTCTT +ATTCATCCATCCATCGAGGCTGCTGGGCATCTGTGCCTTGTGGTCTCCTGTGGGTGCCCG +CTTCCGCCTACTGCTGAGTTCTCCTCTCCTTGCCATGCCATCAGGTCAGGAATGAGCATG +GGGAAGGAGCTGGAGCATGGAGCGGCTTGGCCTCAGTCTGGCCTGATTTCATGGTCCCCA +GCTGTACCCCGTGTTAGGTGGGTGGCAGGTGGCAATTTGCCCTGACATGGCACAGCAGGG +CCTCTGCATGGCCCGCTGATTGCTCCTCACAGGCACCTACGGGCTGAGCAACGCGCTGCT +GGAGACTCCCTGGAGGAAGCTGTGCTTTGGGAAGCAGCTCTTCCTGGAGGCTGTGGAACG +GAGCCAGGCGCTGCCCAAGGATGTGCTCATCGCCAGCCTCCTGGATGTGCTCAACAATGA +AGAGGCGTGAGTGGGCGGGTCCTGCTGGGGTGAGCCCCAGTGTCCCGCCACCAGGGCAGA +GGGAAAGGCAGGCCCTGCTGCCCCGGGAGGCCCATGGAAGTGGCCAGGCTGGGTCCCCAG +CTGCAGGGAAGCTGATGGATATCCTGTCCTCCCTGCCTGTCCCCTTGAGCCAGCTGAGGT +TTCCAACACCAGGGACTTTTGATGACAGAAGATGGGCCCAGGCCCAGGCCCAGGCCCAGG +GGAGGCTCAGTGCACCCAGAGGCTTGATGCAGCCACGTGGGGCCCAGAGCAGATGGATGC +TCCCCTCTGCATGCCGAGAGCTTGTGGGAAGTTCCAGAAGATTCTAGACACTCAGCAAAC +CCAGGCGGAAGAGACACATCATGGAAATGAGTGGCTAGGGTCAGCACGCTCAGGGTTGAT +GGGCTTCCCGCAAGCGGTCTGTCCCCAGCGACCCTGGGCTGAGTGCTGGGCAGGGACTTG +CAGTTCCGGGAACTGCCTGGGTGTAGGAGCTCCCACCCACTTCCTTGCCCGTAGGCCTCA +AGGCCCCTCAGGGGCCAGGCACTGGGGTCTTCGTTGCCCCAGCCGGGGGTCCTGGTGCTT +GCCATGCCAGCCTTTCTCCCACTGGGCGAGATGCCCCACCCATCCCTCGCTCCCCACCCA +GGTCAGACCACTGCCCTGCCCAGGTGATCCAGCGTCCACTCCCACCCTGTCCAGCTTGCC +CCCTTCAGAGAAGCACCCATGGACCATCTCCCACTCCTCCCCCTGGGGCCCCTTTCTGCC +CCCTGCCTACCCTCCTTCTCCTCTGGGCCTCTGATAGGCGGCCTCGTCCTTCAGCCTCAG +CATCGCTGCCTCACTGGAAGAGCCCCACCTGGGCATGCCCAGCAGCCCCTCCCAGCTGTA +AGTCCACACAGCCCCCATCAATGTGCCATCTGGGGCAGGCCGAGGACCGGCCCTGGCTGC +CCTCACTGATGGCTGGAGCCCCGATGCTAGCTACAATGGCTGGCTTGAGTGGACTTCTGT +CACAGAGGCAAATGCCCACAGGGCACCTCTTGGGGCAGGCAGGGAGCTTGTATGTGCCAT +CTGGGGCCAAAGCTCCACGATTGAGTCATAAGTTAAGGGTAGATGGAGATTTTCCGCCTT +GAGCAGAAACGTGGAGGAAACACGCCTGGTGTGTGACTCACGCCTGTAATCCCAGCACTT +TGGGAAGCCGAGGTGGGCGGATCACCTGAGGTCAGGAGGTCAAAACCAACTTGGTCAACA +TGGTGAAACTCTGTCTCTACTAAAAACACAAAATTAGATGGCTGGGTGGTGCACGCCTGT +AATCCCAGCTGACGGCTGGAGCCCCGATGCTAGCTACAATGGCTGGCTTGAGTGGACTTC +TGTCAGGAGAATTGCTTGAACTCGGGAGGTGGAAGTTGCAGTGAGCCAAGATCGTACCAT +TGCGCTCTAGTCTGGGCGACAAGAGTGAAACTCCTCCTCAAAAAAAGAAAATAAGAGAAA +AGAGAAGAGAAGAGGAAAAAGAAAAGAAGAAAAGAAACAACCCTTGCAGTTCCCCGGCCA +CTCCCCAGAGCCAGTTAGGCCACCAGGTGGGCTGGGGCTAGACCCGGCTGCGGGCGGGCC +ACAGAGGGACTAATGGCCACCACTTCTCTCCATCCTGCAGGCAGCTGCCAGACCCGGCCA +TCGAGGACCAGGGTGGGGAGTACGTGCAGCCCATGCTGAGCAAGTACGCGGCTGTGTGCG +TGCGCTGCCCTGGCTACGGCACCAGGTATTGCAGCACCGTGGGTGCGCCACCTCCTATCC +CATGTCCCACCTCCCACGCTAGAGGGCCGGCAAAGAAGCCAAGTGCCCATAGCCAGAGCC +AGGCTTCTTCCTCGCCTGAGTGGATTCCAGAGCTTCTGCCCTGTCCAGACGCAGCTGCAG +GGTGAGCAGATACCACAGGCTCCCGGGGCCCCACAGGCTGGGCTTAAGGAGCCAGTACAC +AGCCACACAGGGCCAGGATGGCTTCAGGGCTGCCTGCACATTGGTGGATCCCTGCGGGCA +AACGCCCCGGCACCATGGCACTAGGCCTGCAGGCTGTACCCTTGGGTGGTGGCTGCTGGC +CAGCCTTTCCTGCCTGGGGCTGGCTGGCCCATTGGCAGGTGCAGGGTGTCATTTCCTTCA +GCCCCTCCTCTGTCATAGGCTCATCTTCCTGTGTGTGAGGGGAAGGCAAGTGCCAACTCC +ACTCCCTGGCAGGCCGCATAGCCGCTGGGTCCCACCCCACGGGGTGCACTGCGTATGTGT +CTGCCCTGGGAACCAGCCTGTGCTGGGGCTGTTGGGGGATCCAGGGCCACCCTCCGTCTG +CAAAGCCCACGGCAGAGAGGGTTGGAGGAGAGGAGAGTGTGCTGAATGTTTGCTTCAGGT +GGGCTGGCATGGCCTGGGCAGTGGAAGAGAGGGCACAGGAGGGAAGCCTTCCAGACAGAG +GAGGCCTGGGCCCCAAGTGGTCAACAGCTTGACCTCTTGGAAGAACCAAGCATGGCTGTG +TCTGGGAACCTGGGCCTCCAAAGGGGAAGTGTGTGGTCAGCCCGCAGGCTGCCCTATGCC +TTGGCTCATGCTACACGGGGAGAGCAGGGCAGGGTGGGGTGGTTCCCAGTGAAGGTGGCA +GGACTCAGCAAGTGTGGGCCTGTGGAGCCAGCCAAGAGCTCACACGCCACTGATGGCCCC +TCATCATCCCCAAGACTGAGGCTGCTGCTCTCTGTGACTTGGCACAGCCTCCAGGCATGG +GGTTCCTAACTCTACCTGCCTGCATTCTTGCCCTATTTGTGGCTGTGACAGGCAGGGCAG +GGCTGAGGGACACCAGGTGAACGAGGGCCCCTGCTCTCTTTCAGAACCAACACTATCATC +CTGGTAGATGCGGACGGCCACGTGACCTTCACTGAGCGTAGCATGATGGACAAGGACCTC +TCCCACTGGGAGACCAGAACCTATGAGTTCACACTGCAGAGCTAACCCCACCTCTGGGCC +TGGCCAGTGGGCTCCTGGGGGGCCCTGCCTTGAGGGGCACTGTGGACAGGAAACCTTCCT +TTGCCATACTGCATTGCACTGCCCGTGGCTTGGCCAGCATCCCCCGGATCAGGGCCCTGT +GGTTTGCGTGTTACCCATCTGTGTCCCCATGCCCAGTTCAGGGTCTGCCTTTATGCCAGT +GAGGAGCAGCAGAGTCTGATACTAGGTCTAGGACCGGCCGAGGTATACCATGAACATGTG +GATACACCTGAGCCCACTCTTGCACATGTACACAGGCACTCACATGGCACACACATACAC +TCCTGCGTGTGCACAAGCACACACATGCAAGCCATATACATGGACACCGACACAGGCACA +TGTACGTGCACAGGTGTGCTACACATGTGCACACATGCACAGTTGCACAGACACACACAC +ACAGGTGCACACACACGATGCCGAACAAGGCAGAAGGGCGACTCTCACCTCTCATGTGCT +TCTGGCCAGTAGGTCTTTGTTCTGGTCCAACGACAGGAGTAGGCTTGTATTTAAAAGCGG +CCCCTCCTCTCCTGTGGCCACAGAACACAGGCGTGCTTGGACTCTTGACAAGCAGACCTG +CTCCTGCAGAGGAGACAGCCACATTTGGAATTGGGCACCGAGAAGACCTGAGAAAAACCC +ACTCTCTCTTTTTTTTTTTTTTGAGACGGAGTCTTGCTCTGTCACCCAGGCTGGAGTGCA +GTGGCACGATCTCGGCTCACTGCAACCTCCGCCTCCCAAGTTCAAGCAATTCTCCTGCCC +CAGCCTCCTGATTAGCTGGGATTACAGGCGTGTGCCACCATGCCCAGCTAATTTTTGTGT +TTTTAGTAGAGACGGGGTTTCACCATGTTGGTCAGGCTGGTCTTGAACTCCTGACGTGGT +GATCCACCTGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCACGCCTGG +CCGAAAAACCCACTCTCATAACAGAAGTGCAGACTCATTGCTAGATTCAGTGCCCTTGAG +TGTGCCAGAGGTCCTCTGTGTTTGAGACAATCCTGTGTGTGCCAGGAGGCTCCGTGTGCA +CCAGGGGCTCTCAGAATCCCGCTTACCCAGCTGGAGACCATGCCTCTGGCAGCCCCATCT +CAGCCAGCCCTGCTCTCTCCCTCTTCCCTCCAGGTGAGGCAAACTTCATAGGAATCTGTA +CCTGAATGTGAGCTCCTGATAATAAAACTCTGAGGCTTTGGTGAGCGCATTTCGAGGCCT +TTCCCTTGTATGCAGGGTGCCAGTGGGAGCTGCTATGTCTCCTGTGCAGCCAGACACCCT +GGACGAGGCCCTTCCCACCTCCCCCTTTCCTCACGGCTTCTTCAGCACCACATGTGGATG +CCTGTGGGTCAGTCCATCTGTCCATGGGCAGGGGCCGCTGTGAGAGACCCCATGCGCGTG +GGATGTGGTAGATGCTGATTATGGGGCCAGCCACCCATACAGGTCTATCAGGTCGCAATG +GAGGCCCCACTGGAGCGGGAGGCAGGCTGGCCTGGACTTCTGTTGGATGATGGGGGCCAG +GGGATGGTAGGAGGGGCAGGAGAGCAGGTCTGCATCTGTCTCAGCCCCAGGAGCTCCTGG +TTTGGGGCCAGCTCTAAGGAGCTTCATCCCTGAGCCTGGCCGAGTATAAGGGGGTCCCGG +ATATCCAATGCTGAGACCTAGACTTTTCTAAGCCTCTGCACCCAACCCCTAACTTGGCCT +TTTCAGACCCATGAGAGTGCAGGGTGGGTGTGGACCTGCGCCAGGGCTTCCCAGCCTGAT +TGTGACATGATGTGAGTCCACCTAGGGAGGAGCAGGGGCCTCCCCGTCTCTTCCCCTTCT +TGCCCAGACCACTTTGCTGGGCTCAGAGCCTTCCATGGCTCACTTTGAACTCAGAAGAGT +TTCCCAGCTGCGACCCAGGCTGCACCCCCAGGCCTGAGGAGAAACTGAGGCCCCACATCC +CATACGTGTCCTCTGAGTCAGGACAACAGGCTCCCGGCTGGGGGCACTGCCCTTGCTGCC +AACACATTCACCTGCCCCAGTGGCTACCATGTCCTGTGCCGGTGACCAGTGCCAGTCCCC +AACCTGGATGGTGCCCTGCCCTCATGAAGAGCCCCCCGGATGTTCACCTCTGCCCAGAGA +CCCGGCCTGCCCAGGAAGAGCAGTCTCTGCTCTTGTGTGGGTCCCTCTGCCCCCACCCTG +CCCAGGGCTCCCCAGCAGCCTGAGGCCCCTAAGGAAGAACAGCCCTCAATGGTGATTCTA +TTGGTGTTATGAGTTGTGTCCCCAAAGAGATATATCAGTGTCCTGACCCACAGTACCTCA +GAATGTGACTGCATTTGGAGATAGGGTTCTTCCAGGGGTAACCAAGGTAGAATGAGGTCG +TTAGTGTGGGCCCTAATCCAATATGACGGCTATCCTTATAAAAAGAGGAACATTTAGGCT +GGATACAGTGGCTCATGCCTATAATCCCAGCACCTTGGGAGGCTGAGGCAGGAGGACTGC +TTGAGCTCAGGAGTTCAAGACCAGCCTGGGGCAACATGTCGAAACGCTGTGTCTACCAAA +AAAAAAAAAAAAAAATACAAAAATTAGCTGGCCATTGTGACTCACACCTGTAGTGCGAGG +CTGAGGTGGGAGAATCGCTTGAACCCAGGAGGCAGAGGTTGCAGTGAGCCAAGATTGCAC +TACTGCACTCTGCCTGGGCAACAGAGGGAGACTCTGTCTCAAAAAAACAAAAAAGGAAGA +TTTGGGCTGGGCGCAGCGGCTCACATTGTAATCCCAGCCCTTTGGGAGGCCAAAGCTGGA +GGACTGCTTGAGCCCAGATTAGCCTGGGCAACATAGCAAGACCCCATCTCTAAAGAAAAA +TGAAAATTAAAAAATTAGCAGGGCATGGTGGTACACACCTGTAGTCCCAGCTACTTGGGA +GGCTGAGGTAGGAGGATCACTTGAGCTCAGGAGGTGGAGGCTGCAGTGAGCCGTGATTGC +ACCACTGCACTTCAGCCTGGATGATGGAGAAAAACCCTGTCTCTTAAAAAAAAAAAAAAA +GGAAAAAGAAAGATTTGGACACGGACGGATACGCGCACAGGAAGAACACCGTGTAAAGAC +TGGAATTCTGCTCCACGAGCCAAGGCATCACCAGAAGCTGGGAGAGGGGCCTGCTTCAGA +GCCTCTCTTAGAGCCTTCAGAGGGAGCAGACCTGCTGTCACTCTGATCATGGACTTCCAG +CATCCAGAGCTGTGTGACAATAAATGTCTGTTATCTTAAGTGGTTCAATTTGTGATGCTT +TTTTTTTTTTTTTTTTGGAGACAGGGTCTCACTCCATTGCCCAGACTGGAGTGCAGTGGC +AAGATCACAGCTCACTGCAGCCTCCACCTCCTGAGGTCAGGCAATCTTCCCACCTAGTCC +CCTGAGTAGATAGGACTACAGGTGCATACGACCACGTCCAGCTAATTTTTTGTATTTTTT +GTAGAGACGGGGTTTTACCATGTTGCCTGAACTGGTCCCAAACTCCTGGGCTCACATGAT +CCTCCCACCTTGGCCTCCCATAGTGCTGGGATTACAGGTGTGAGCTACTGAGCCCAGACT +GTGGTGCTTTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCACTCTGTCACCCAGGCT +GGAGTATGGTGGCACAGTCTTGGCTCACTGCAACCTCCACCTCCCGGGTTCAAGCAATTC +TCCTGCCTCAGCCTCCTGAGTAGCTGGGACTACAGGCACCTGCCACCACGTCCAGCTAAT +TTTTTGTATTTTTAGTAGAGACGGGGTTTCACTGTGTTAGCCAGGATGGTCTTGATCTCC +TGACCTTGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCAC +CGCTCCCGGCCGACTGTGGTGCTTTTTTAAGGCGGCCCTAACAGACTAACACAGTAGTGA +TCTTCACTTTACAGATGAAGGAACTGCGGCTTGGGGAGATACCATAACGGCCCCCCGGTT +CCGAGACAGGTGGAGAGGCAGAACCATGGGCCAGGACAGAATAGGATGAGTACAAGGTGG +CATCTCTGGGGGTGCCTGCCCCACAGATCCATGGTCTTCCCATGCTGGTGTCCCCACAAT +GTCTGGCACTGCTCCCCGCTTCACCTTACTTGCCCAGGCCTACCCCCATACCCTCAGCTC +ACAGCTACACACTCTGCACAGCTCCCACCCCAGGGTCTTTGTGCTCACAGTTCCCCTGCA +GATACCCAAACCACACTCTTCCCCCAGTTCCTGCAGGTCTTTACTGAAGTCTCGGGCAGG +CAACTCATTTAAAGTTTCAGTTTAGCCCAGCACGGTGGCACACACCTGTAATCCCAGCTA +CTTAGGAGGCTGAGGTGTGAGCGTCGCTTGAGCCCAGGAGTTTGAGGCTGTAGTAAGCCA +TGATTGCACGACTGCACTCCAGCGTGTGCGACACAGCGAGCCCCCATCTTGAAAGAAAGA +AAAAAGAAATTAAAAAGGAGGCTGGGCACTTTGGCTCACACCTGTAATCCCAGCACTTTG +AGAGGCCAAGGTGGGCAGATCACTTGAGTTCGAGACCAGCCTGACCAGCATGGTGAAACC +CCATCTCTACTAAATATACAAAAATTAGCTGGACATGGTGGCGGATGCCTGTAATCCCAG +CTATTCGGGAGGCTAAGGCAGGAGAATTGCTTGAACCTGGGATGCGGAGGTTGCAGTGAG +CTGAGATCGTACCACTGCACCCCAGCCTGGGTAACAGAGCTAGATTGTATCTCAAAAAAA +AAAAAAAAAAAAAAAAAAGGAAGGAAGGAGGAAGGAAAGAAGGAGGGAGGAAGAAGGAAG +GAAGGAACTCTCTTTTTCTCTATGGCAATGAGCACGTACCATCTGGCATGCTGTGTGCTA +GGATTGGTTCTTGCCTGTCTCCATCAGAATGGGAGCTTCAGGAAGGCAGGGACCTTCCTT +CTCTTGCCCACTACTGTACTCTGATGCCCAGGCCAGGCAGGCATTTGAGCCAAGAATGAC +AGTGCTGGTGCTTCCCAAGCCCCCATCTATTTGGGCCAACCCCTTGATGCAAGGCCCTTG +TGCTAATAGAGGGTATAGGGGTCCAAAGCCAGTGCCTTCTAGAAGAGCTTTGGACGTGAC +ACCAGGAATGGCTGGGCCTGGACAATGGCCTTCCTGGCACTGCCCAGCCAGTCCTCTGTG +CTTGTTGCCAGATGCACCTGATCCCTGAGTCCCCAGCATGGGTCTCTTTCTCCCAGGCAG +ATGGGGAACTCACCTTCTTCTCAGATGCCCCTTCCCAACATAAGCTGCTTCTGCCCTCTG +GCCCCGAGATGGCCAAGGTGAGCTGTGTAGACATAGAGTCTGGAGGCATGGGGCTGGCTA +ACGTGGAGGCAATTCTATGAACAGACATGGCCAGGCCAGGAATGCGGCATGGCCTCAGGT +CTCATCCTTGAAGGCCTGGGCTCCTGGTCTGCCTGCAGACTTCCTGAGACCACGTCTTGT +TCTCATCTGCCAGCAGATGCCTGGCTCATGGGTGAGCCGGGCTTAGGTGGAAACACAGGA +GCCCAGAGGGGAAAGGATCCTCCCCCAGCTTGGGCTGGAGTCCCAACTTCCAATCATCAA +TCTTTGCAACCCTGGGGCTGGACCTGGCTAGTTCCATGGTAAGGATCCTTCTCCGTCACC +CAGTCAGTCCCAGGCTGTGTGGGAAACTGGCTGTGGTGGGCAGGTCTGAGGTCTCAAGTC +TGACAGGGGCCACTGGAGCCTCCAACTCACCAATCAACCAAGTGTGAGAGGTTGCTTTGG +TTGAATGGCCATGTGCTGGGGTCTGACTGGCCCAGCCACAGGGAGGCTGGCATCCCCTAG +CTGAGTCCTGGACCCAGACCCTCCAGGGCATGGAGCCCATTGTGAGGTGTCTGGTGCTGA +AGTGGTGGGGGAGGCCCGTGCAGGCCTACAGCTTTGTCATCTGCAACATTCCTCTCCCCA +CTTTCTTTAAACTTTTTCATTTAGAAATGATTTCAATGGCCAGGCGTGGTGGCTCATGCC +TGTAATCCCAGCACTTTGGGAGGCTGAGGCGGGTGGATAACCTGAGGTCAGGAGTTCGCG +ACCAGCCTGACCAAAATGGAGAAACCTTGTCTCTACGAAAAATACAAAATTAGCTGGGCA +TGGTGGCACATGCATGTAATCCCAGCACTTTGGGAGGCTGAGGCAGGTAGATAACCTGAG +GTCAGGAGTTCGCGACCAGCCTGACCAAAATGGAGAAACCTTGTCTCTACAAAAAATACA +AAATTAGCTGAGCATGGTGGCACCTGCCTGTAATCCCAGCGACTTGGGAGGCCGAGGCAG +GAGAATCACTTGAACCCGGGAGGCAGAGGTTGCAGTGAGCCGAGATCATGCCATTGTCCT +CCAGCCTGGGTGACGAGAGTGAGACTCCGTCTCAAAAAAAAAAAAAAAAAAATGAAACCT +CGTCTCTACAAAATATACAAAAATTAGCTGGGCATGGTGGCAGGTGCCTGTAATCCCAGC +TACTTGGGAGGCTAAGGCAGGAGACTCGCTTGAACCCAGGAGGCAGAGGTTGCGGTGAGC +CAAGATGGCACCACTGCACTCTAGCCTGGTGACACGGCAAGACTCCATCTCAAAAAAAAA +AATTTTTTTTTCAATTTACTAAAGGATTTAAAAAGTAGAACAAAAAATTCTAGTGTCTTT +CACCCAGTTACCCCAAATGTGAATGCCGCACCCAATCACAGAAAAATGATTACAACCAGG +AAATTAACACTGATAATGTTAAAGAAAGAATTACTGAATGACATTTGCTAAAGCACTGTA +GGGAAGACTTTATTCAGGACCGTCTTGCTAGGTCTAGGGACCACAGGAATGAGATTCTGC +AGTGGGGGAGAGAGATTAAGCTCAAGTCCGAATGGGCAAGTGGGAACTGATGATAGCTGA +GGAGCAGGGCGGTGGGAGCTGGGTCAGCACATGGAAAATGACTAAGAAGAAACACCCCGG +GTGAGGGAGATTCTGGCTACACCAATTAACAGGATTTTTGCTGAAGACAGGCCACAGTGA +CCGGACATCTTCACCTGGGGGACAGTGTAGGAACGGATTAGATAAGGAGGGTGATCAGAT +GTCGAGGGTCAGGGGATGCTTCTTGCTAAAGTGACTTTGCAGGGTTCTTGCTCTAAGTGA +AATTTACAAGGAAGTGCACAGATGGGCCTAGAAGAAGGTTCAGAAGCCTGGTTTGGCCAA +GCAAAGAATCTTTGTCGGTCCCTCCTCTTGTTCAAGGAAAGAAGAGGTTATCTTTCCTTG +AATAGTACGAGTCCATTTCCTTGCTGGGCTGCCTTTTGTTCATTAAAGGCCAGCTGAATC +ATCTGCTGGGACCGGCAGTAGAGGAGATATCCTCCATGGTGTGAGCCATCAGGCGTTGCA +TGTTGAAGATAAAAGAAAAAGATATTAGTTGGAACCAACTATAAACTCAGTTTCTGAGTT +CAGAGGGCAGCTACTCAAGATTTCCAGGTGCTGTGCTTGAAGCATCTTCAGATGGAGTAG +GGGCTAACAGACTATCATGGACGGCAGTTTGCGTGTCATGAAGTCTGCCCACAAATTATC +TGTTGTGGTGCCATCTTAAAGTTTCTTTCTTTTTTTTTTTAGACAGAGTCTTGCTGTTGT +TGCCCAGGCTGGAGTGCAGTGGCGCAATCTCAGCTCACTGCAACCTCTGCCTCCCAGGTT +CAAGCGATTCTCTTGCCTCAGCCTCCAGAGTAGCTGGGACTACAGGCACACACCACCATG +CCCAGCTAATTTTTGTAATTTTTGTAGAGACGGAGTTTCACCATGTTGGCCAGGATGATC +TCAATCTCTTGACCTCGTGACCTGCCCACGTTGGCCTCCCAAAGTGTTGGGATTACAGGC +GTGAGCCACCGTGCCTGGCCTAAAGTTTCTATCAAGTTGTCCGGGTTCAGCTTAAAGGGC +TTCAGGAAATGGGAAGTTTTAATTTTTAGCGGTTCCAGGCCAGAAGGGTGGGAAAAAAAC +TGGAAACATCAGTTTAGAGAAAATCAAATATTGGAGGAAACTAGAAGAATTCAGGATCCA +ATCCAGTTTGTAGGTAAATATCAAAACCTCAAAAACAATGAACAGGGCTAGAATCTGATA +ATGGGTGCACTATAGTTTTCTTCTGAAACTAATTTTTCTCTATATAGTCACCCCACTTCT +ATCAAAGATGTTGAAGTAAGACTAATTTGTTTGTAGCTAAGTTTAGGTTGATTAAACTTG +GCCTAATTACTTACATAAGTGCCGCAACAATAGTGATTGAACACATAGGCTTTCTTTAAG +TTTCCTTTGCTAGGACTTTTTTTTTTTTTTTTTTTTTTTTGTCACGGAGTCTCGCTCTGT +CGCCCAGGCTGGAGTGCAGTGGCGTGATCTCCGCTCACTGCAAGCTCCGCCTCCCAGGTT +CACTCCATTCCCCTGCCTCAGCCTCCCAAGTAGCTGGGACTACAGGCACCCGTCACTATG +CCCGGCTAATTTTTTTTTGTATTTTTTTTAATAGAGATGGGGTTTCACCGTGTTAGCCAG +GATGGTCTTGATCTCCTGACTTCGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGCAT +TACAGGCGTGAGCCACCGTGCCCAGCCTGCTAGGACTTTTAATAAGGCATCTCAGATTGG +GCTTTTAAAAGCCTTGCCAGGCTAGGAAGCCAAGCCGAAGCTTCACCATCATACTTCATT +TGCAATCATTATAGGTTTGGGTGAATTCCTCTCTTTTTGAGGTCCCCCAAAATATCCTGA +CATCCTTGGGCCTGTCAGGAAGTGACTTTCCTTACTCACCTACAAGGCAACCACATGACC +CATGTATTCAAGGTTTAAAACCCATTTTTCCCCAAGGGCTTTATTGGTTCCATAAAGTCA +TCTTAGTTACTTAAAGTTGTCTGGCCATAGCTAAAAATATGGCATTCTAGTCAAAGCTTT +GGTAATATAAACAGCATTTCCAATTGTGTCCTGTTACAAAGAGAACAGGTTCTTACTGAA +CTTATGTAAATAACCATATTGCCATTAAAATAAAAATACTCATGAATAGTTTCCGAATTA +TGGAAGGATAAGGTACAGAGAAAAAGTAATTGTTTCACTTTTGTTTACAAAGGCATACTT +TACCAAACTGCTGTAAGCTATGGATAACTTAAAAGAGAAAAAAAAGTTTCCTTAAATCTG +GAAAACAAAACATTAAAGAACCAGCAATGTTTCAAACAAAACATAAAAAAATTATCCTGG +CTGGGTACAGTGGCTCACACCTGTAATGCTTAGGGAGGCTGAGATGGGAGGATCATTTGA +GGCCAAGAGTTTGAAACCAGCCTGGTCAACATAGTGAGACCCTATCTCTACAATAAAATT +AAATAAATAAATAATTGTAATTGGTTTATTCATTGCCATGTAATTTATTCTTGTTCTGTT +TGATCTTGATCAGCAGTTTCACGAACCCCTCAGTGTCTTCATTCAAGTTCTGGAAATTCT +GGCCAGGTGTGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCAAGGCAGGCAGA +TCAGTTGAGGCCAGGAATTTGAGACCAGCCTGGCCAATATGGTGAAACCCTGTCTCTACT +AAAAATATAAAAATTAACAGGGCGTGGTGGTGGGCGCCTGTAATCCCAGTTACTTGGGTG +GCTGAGGCAGGAGAATCGCTGGAACCCGGCAGGCAGAGGTTGCAGGGAGCCAAGATTGCA +CCACTGCATGCTGGCCTGGGTGACAGAGTAAGACTATGTCTCAAAAAAAAAAAAAAAAAA +AGAGTTCTGGAAATTTTTTTTTTTTTTTTTTGAGACGGAGTGTCAGTCACCCAGGCTGGA +GTGCAATGGCACGATCTTGGCTTACTGCAAGCTTCGCCTCCTGAGTTCAAGTAATTCTCC +TGTCTCAGCCTCCAGAGTAGCTGGGACTAAAGGCACACACCACCACACCCAGCTAATTTT +TGTGTTTTTAGTAGAGATGGGGTTTCACCATATTGGCCAGGATGGTCTCAATCTCTTGAC +CTCGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTATAGGTGTGAGCCACTGCG +CCTGGCCCCAGCCACATTTCTTAAGTAATCAAAAACCTAATAAAAGACAATATGAAGAAC +AAGGAACTATCTTGATATAACACAAAAATCTTTGTTTCCAAGGTCAATTATTTAAAAGGT +AAACAGAGGCCAGGCGAGGTGGCTCACGCCTGTAATGCCAGCACTTTGGGAGGCCGAGGC +GGGCGGATTACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACAGTGAAATCCCGTCT +CTACTAAAAAAATACAAAAAATTAGCCGGGCATGGTGGTGGGCGCCTGTAGTCCCAGCTA +CTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCAGGAGGCGGAGCTTGCAGTGAGCCG +AGATCGTGCCATTGCACTCCATCCTGGGCGACAGAGTGAGACTCCGTCTCAAAAATAAAA +AGTAAAATAATAATAATAATAATAAAAGGTAAACAGAAATCTTCATAATCTCAAATACTG +TAAGAAAACTTTGTCATTTCAACAGAGAAGATCAAGTTAAAGTTCTGCATCATAGCACTA +CTAATAAAGCTAATTTTAACAAAACCTTATAAATGAATCCATCCAATCTCATGCAAGATA +ATATTTCTTTTCCAAGATTTCTTTTCTATAGATCTTTTACAACTTAAAAAAAATATCTGG +CCGGGCACGGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGTGGATC +ATGAGGTGAGGAGATCGAGACCAGCCTGGCTAACACGGTGAAACCCTGTCTCTACTAAAA +ATATAAAAAATTAGCCAGGTATGGTGGCGGGCAGCTGTAGTTCCAGCTACTCGGGAGGCT +GAGGCAGGAGCATGGCGTGAACCCGGGAGGCAGAGGTTTCAGTGAGTCGAGATCTTGCCA +CTGCACTCCAGCCTGGGTGACACAGAGAGATGCCATCTCAAAAAAAAAAAATCTATCAGT +ACTTTTTTTCCCCTACATTTTTTTTCTTTCTCATTCTGGAACAACCAGTTGCTATATATA +TTAGCACAGAGTCAGTTATTCCTTTAAGGGATTTTATAAATTAATTTGGTGCTACTATCT +GGAGGTAGAAAAATATCATGTATATATAACATACAAACATACATATGTAGTTGCTCTGCA +ACTGAAGTTGGCAGAGGCCAGACCCAGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAG +GCCAAGGCAGGCAGATCACTTAAGGCCAGGAGTTTGAGACCAGTCTTGCCAACATGGTGA +AACCTGGTCTCTACAAAAATACAAAAAAATCAGCTGGGCGTAGTGGCGTGCGCTTATAGT +CCCAGCTACCTTGGAGACTGAGGCAGGAGAATAGCTTGAACCCAGGAGGTGGAGGCTGCA +GTGAGCCGAGACTGCGCCACTGCACTCCAGCCTGGGTGACCCAGTGAGTCTCTGTCTTAA +ATAAATACATAAATAAATAAACAAACAAATAAATAAAGTTGGCAGAGTATATAGTCTGAA +GGGGTTTGAGGAAAGGGGATTCAGGTGACTGAGAAGTTCCGATGGGAGAAGTAGGATCCA +ATAGAGAGAACAGAGAGGCCTCACAGTGGGAAGAAAAGACTGCCAGCCCAGGAGTCAGGG +AGTGAATCCCCATTCAGAATGAAGAGCCACAAGGAAGACCTTCCAACCCAGGAAGTATTA +TACCTTTCAAAAGAAGGCTGGGACCTTAAAAATCAAAATCTGTCCTTACCAGCTAAAATA +AAAGTTTTTCTACAGTCATGGCTCAGGAGTTTGACTCACCAGTAGATCCCCAATCAGCCA +GTCAGAAGACAAAGGCTGCAAAGTTGCACATTGAGAGTCCTGGGTCAGAGGCCTGTGGCA +CCACAACCATTAAAGAAAAAAATTATTCAGCCCTTGTCAAAGCACGGTCGGGAAGGCCTT +ATTCAGGAGCATTGCCACTGGCATATGGACCACAGGCAATGGGGTTTTGCTGCAGGGGAG +AGATTGGGCTGAACTCTGAATACAGCATGGACAAGTGTGAACTGATAGCCAAGGAGCAGG +GCGGGGGTCAGTGGATGAAAAATTACTAAGAAGAAACATAAGTGATAAGGGGGATACTGG +CTAAGCTGACCTAAGAGGATTCTTGCTGAAGACAGGCTGGGGTGATCAGACATCACCTAG +AGGATGGTGGAGGGATGAGGAATTTCATCAGTATTGAGGGCGATCAGATATCAAAAGCAG +GGTGGTTCTTGCTAAACTGGAATTCTCAGACGGGCCTAGAAGATTTAGGAGCCTGACTAA +AGTTTGGCCAAGCAAATAATCTTTTGTCAATACAACAACACGATACTTTTTACTGTTTGT +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGACAGAGTCTCCTGCTGTCTCCCAGGC +TGGAGTGCAGTGGCGCGATCTCAGCTCACTGCAACCTTTGCATCCCGGGTTCAAGCAATT +CTCCTGCCTCAGCCTCCCAAGTAGCTGCAACTACAGGTGCGTGCCACCATGCCCGGCTAA +TGTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCCTGTTAGCCAGGATGGTCTCGATCT +CCTGACCTCGTGATCTGCCTGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCC +ACCGCGCCAGGCCAATACAATAGTATTAACTAATCTACAGACCTTGTTTGAATTATATCA +ATTGTCCCATTAATGTCCCTTTTCTGAGCCAGGATCCCATCTATCGTCTGGATTCCACAC +TGCATTTTGTTATCTCTTTTAATTGAAGATGGCTCCTCAGTCCATCTTTGACTTGAATGA +TCTCGATACTTGAAAAGCACTGGCTGGTAATTTTGCAGAATGCCCCTAATTTGGGGTTTG +TCTGCTCTTCATTCAGATTCTGCCTTTTGGGCAAAAATACCCACAAGTGAACTTGTGTTC +TTCTCAGGAGGTCCACAGTATGTCCTCTTACTGGAACTAACTTTTTTCATTTCTATTGAG +GCAGAATTTATATACAATGCAATACACATGTTTAAATGGTACAATTTGCTAAGTTTGCTG +GTGACCTTGATGGCTTGGTTAAAGGTGGTGTCGCCCAAGTTTTCTTCCACTCTAAAACTA +CTATTTTCCCTAGGCAATTAGTATCCTGGGGGAGAAACTTTGCAACACTGAAAACAAAAC +AAAACCTGTTTGTCATGTTTTTGCCCACTATTTTTTTTTTTTTTTGAGATGGAGTCTCAC +TCTGTCACCCAGGCTGGAGTGCAGTGGCGTGATCCCGGCTCACCACAACCTCTGCCTCCC +AGGTTCAAGTGATTCTCCTGCCTTGGCCTCCCGAGTAGCTGGGACTACAGGCACGTGCCA +CCATGCCCAGCTAATTTTTTTTGTAGTTTTAGTAGAGACGAGGTTTCACCATCTTGGCCA +GGATGGTCTTGATCTCTTGACCTCGTGACCCACCCGCCTCGGCCTCCCAAAGTGCTGGGA +TTACAGGCGTAAGCCACTGTGCCTGGCCTAATTTTTTTGTAGGGGGTTGGGGGCCGGGGA +CCGAGTCTCGTTCTGTTGCCCATGCTGGAGTGCAGTGGCTGGATCTTGGCTCACTGCAAC +TCCTGCCTCCCAGGTTCAAGCAATTCTCCAGCTTCAGCCTCCGTAGCTGGGACAACAGGC +ACCTGCCACCAAGCCCTGCTATTTTTTTTTGTATTTATAGTAGAGACAGGGTTTCACCAT +GTTGGTCAGGCTGGTCTCGAACTCCTAACCTCAAGTGATCCGCCCGCCTCGGCCTCCCAA +AGTGCTGGGATTACACGCATGAGCCACTGTGCCTGGCCTTGCTCACTATTTTTTCTGGAG +ACGGTCTCGTTCTGTCACCCAGGTTGGAGTGCAGTGGCCCGATCTCAGATCATTGCAACC +TCCACCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAG +GTGCCTGCTACCACGCCTGGCTACTTTTTGTATTTTCAGTAGAGATAAGGTTTCGCCACG +TTGCCCAAGCTGGTGTGGAACTCTTGAGCTCAGGTGATCCGCCTGCTTCGGCCTCCCAAA +GTACTGGGGTTACAGGCGTGAGCCACAGGGCCCGGCCCACTATTTTTATCACTCAAGATT +TTCGTCTACAGCAGCTATTATTGGGGTGGCTACCCAGTGGAAAATTTTTCTATTTCCACT +ATTCCTTCCACATTTATTGACAGGAAAGCTAGTCTAAGGAAGAGCTGTACCCTCACTTAC +TCAGACCCTCGTTTTCCTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCACTCTGCCGC +CCAGGCTAGAGTGAAAGTGGTGCTATCTCGGATCACTGCAGCCTGGAACTCCTGGACTCA +GGTGATCCTTCCACCTCAGCCTCCCGAGTAGCTCGGACCACAGGCGCCCGCCACCACGCG +GCTAATTAAAAATAATTTTGGGCCCGGCGCGGTGGCTCATGCCTGTAATCCCAGCACTTT +GGGAGGCCGAGGCGGGCAGATCAATTGAGGCCAGGAGTTTGATACCAGCCTGGCCAACAT +GGTGAAACCCCGTCTCTACCAAAAATATAAAAATTAGCCGGGCGTGGTGGTGGGCGCCTG +TAATCCCAGTTACTCGGGTGGGTGAGCCAGGAGAATCGCTTGAATCCGGGAGGCAAAGGT +TGCAGTGAGCTGAGATCGCGCCACTGCACCCCAGCCTGGGAGACAAAGCGAGACACCGCC +TCAAAAAAAAAAAAAAAAAGAAAGAAAATTTGTAGATACTGGGCTCTCACTATGTTAGCC +AGGCTGGTCTCGAACTCCTGGGCTCAAGTAATCCTCCTGACTCGACCTCCCAAAACTCTG +GGATTACAGGCGCAAGCCACCGTGCCCGGCCGAACCACCGTGCCCGGCCGAACCCTCCCT +GTTTTGGAAATCCTGTATTAGCAAAGGGGACAGCGCCAGACAGTTGAGCACACGCGTTCT +TTATAAATGATGATGTCTGATACAGTGATGCTTTTAACAGGGTTGTGTAACGCCTGTGTG +TGCACGGTCTCTTCTGCAGTGTCCACCGAAGGTGGCCCGTCTGCGTGTCCGCTCCCGGCG +TACTCCGCGCTCCCTGTTCCCGGCATTCCCCGCGCTCCCCGAGCTTCCCGCGCTCCCCTG +CTCCCGGCATTCCCGGTGCTTCCCGTTCCCGGCGCTCCTCGCGGCCGCCGCCCTCAGTCC +AGCGAGCCCCGCCTGGGCAGCGACTGTTAGCGCGACCGCGGCCTGGGCGCCTGGTCCCTC +CCGCGCGGCCGCGCTATTGGGCCGCGCGCGGCGGACACACGCGCTACGCGCCGCCGATTG +GCGGCCTCCGCAGCGCGCTCTGCGGCTCCCCCAATGCGGGGCGGCCGCGCGCGGGGATTG +GCCGCACGCCGCGAAGGCCCGCCCTCCGCTCGCCCGGCGCGGCAGGCGGGTGCCGGCGAC +CGGAGAGCCTGGACAGGCTTTCCAGATGGCTGCGGCGGTCGGTCGGTGAGGCTTTCCCGG +CTGTGGTTTGGCTGCGGGCGGCTTGGGCAGCCCGCGGGCGCCTCAGGTAGGTGCGGGGCG +CGAGGGGCGCCCGCGGGCGGTTGGGCGGGCGCCGCGGCCTGCGCGAGGGCGCGCCCTTCC +CTCCCGCCTCCCTCCGGGACCGAGGCCGCGGGCGGGGCGGGGGCGCGGAGCCCGGCCTTT +GTGAGGCAACATGGCCGCGGGCGGTGGAGAGCGCGGGGTGGGGACGCCTCCGGGGCTGGG +GGGCGGGCCCCGGGCCCAGGCGTTGCCGACTCTCGTCGCTGTCCGCCCGGGTAAAGAGGC +TCCTCTCCCGGAGGGCCCGAAGCCTAGCATCGCCTTCAGGCGGGAAAGGTGTGGACAGGG +CCAGTGCTCGCCTGGGACCCTGCCGGCTTTAACACAGTTTGGGAAATCCAAGAAGTCGCC +TAAGTCTTTTTGTCCACAGAATGCTCACGGGTTGAGTGTCACGTAGATAATTGGGATACG +GTTTTATTGTAACAATGAGCATGGCGGAAATAAATGGAAGGTGGCAACCTGTTGACAGTG +ATGCCTGTCTGGTGTCAACCGATGGACTTGGGTTTCTTGTGGCAGGAGTCTGATTTGCCT +TTTTGCATGCTTGCATGTACCTTGTTCCTGCACGGCAGTCCTGTCTTTGGCTGTCGACTG +TTCGAGGCCAACCACCAGGCGAGGATCCACTGTTTCCTGGCCAGACTAGACTCCCAGCTG +TGGGAGTGGCCCAAACCCTGCTGCAGGCCTTCACTCACAAAAGGGAGTGAATGGCCAGCC +CGGGGTACCCAGTGTTGCCTCTAAGTGCGGGATATGAGGCCTGTTGGAGGATGACAAAAG +GAACGGATGAGGTCATACTGCCGCTGGGTTGGGCGGGAGGCTGCAGCGGGAGAGACTTGA +CCAAAAGGCAGCCTCCATGGCGCTGGCACTGGTGCTGGCCCGTCACAGCCAGTGCCTCTG +TTAGCCAACACAGCAAGTGGGCTAGGAGGAAGGGTGCTGTCCAAGTGTACGGGTTTTCTC +CCCGTGCCAAGGGTCCAGGGGAGCTTGCCGTGAGCCCGTTCCTGTCCGATAGTCCTGTTG +GCAAGGCAGGGTTTGCATGCGGTGTTTGGTCAGCTGGAGCAGTGGGGAGGCCTACGGGCA +GAGAGCTCCCCTTTGAGCCCTTTGCGGCCAGGCGCTGTGTGGAGCACTTCCCAGGGGTTG +TTTTGTAGTCTCACCGTGACCTTGGGGTAGGAAATGTGCTTCTGGCTGGCTTGCCCAGTC +TTAGACTCTCTTGTTGTAGCCCTCTCCTTTGTGATGTCACCCAGCCTCCTGACTTTAATG +CCAGCTTTCCTTGACATCTTCACAGCTCCAGCCCTGGCCTCTCTTCTGATTTCCAGCCTG +GTGTTTCTGGCCACCTCTTCCATATCTCCATGTGTTACTTTGGCGCCAAGTAGCCATATC +CAAACCAAACTCAAATCTTCACCCATGCCAAAACATTCCTTGGCCTATCTAAAGTCCCTT +TGATTCCTCTCTTTCTTACACCTTCATGCAGTCCTAAGGAGGCCCCGTCAGCTCACCTAT +TTCTAAGCACAGACCATTTCTCACCACCTTCAAGTTAACATCCTGGCCCAGCCTACCTGG +TGAGCCTCTAATTCTCTTTACTGTCACCTTAGGTCCTCACTCCCCATCCACCATCTATAG +TTTTTCTGGACACAGAGTGATTCTTCTTTCTTTCTTTTTTTTTTTTTTTGTTTTTTTGAG +ACAGAATCTCGCTCTGTCGCCCAGACTGGAGTGCAGTGGCGTGATCTCAGCTCACTGCAA +CTTCTGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACTGC +AGGCGTCCACCACCACCACGCCCGGCTGATTTTTGTATTTTTAGTAGAGACGGGGTTTCA +CCATGTTGGCCAGGCTGGTCTCGAACTCCTGGCCTCAAATGATCTGCCTGCCTCAGCCTT +CCAGAGCGCTGGGATTACAGGCGTGAGCCACTGTGCCTGGCCACAGAGTGATTCTTTTTA +TTTTTTTGAGACAGAGTCTCGCTTTGTTGCTCAGGCTGGAGTGCAGTGGTGCGATCTCCC +GCTCACTGTAAGCTTCGCCTCCTGGGTTCAAGCCATTCTCCTGCCTCAGCCTCCCGAGTA +GCTGGGACTATAGGCGCCCACCACCACACTCGGCTAATTTTTTGTATTTTTAATAGAGAC +GGGGTTTCAATGTGTTAGCCAGGGTGGTGTCGATCTCCTGACCTTGTGGTCCGCCTGCCT +CAGCCTCCCAAAGTGCTGGGATTACAGTCGTGCACCACCGCGCCCGGCCCAGAGTGGTTC +TTTTAAACCATAAGATGGTATAAAACAGAGGCGGTGTAGATGCAAGCCAGCTTGCCTGCT +TGTAAAGCAGCTCCATCACTTCCTGGCTGTATGACCTTGAGCATGTGGGTGAACCTTTCT +TCCATAATGCTATGTGAGACCTGGGTGAGTTAAAATGCTTAAAGTGCTTAGAACAGGGCC +TGGCTGAGGTTTGGTTGCCACTGCTATCCTGGAATCAGCTGATGCCACTGTGTGGGCCTT +CCTATCCGGTGGCCCATCTCTCTCACTAAGAGCCAAAGTCCTGGCAGCAGCCTGGGAGGA +CCAGCCCTGATCCATCTGCCCTCATGGACTTGGCCCACTGTCACGTCCCCTCAGCACACT +GCCCCAGCTGGCCACACCACATTCCTGACCCCAGCCCTCTGCTTTTCCTCTCAGTGTCAC +CTGGAACATCCCCTCCTCCCAGCCCTGTAACCCACCTGGCTCACTCCTGCCTTCACGTGG +ATGCAGGCCATGAGGCCTTCTTGGATCCCCCTTCCTAAAACTGCCTCCCCTTCTTCAGGC +GCTTTCCTTGTTTTGTTTTTGCTTGAATACTTCTTGGCATGTGACAGGGTGCATGCTTTT +CTTAGTTGTTAGCGGTCCATCTCCCCCACTAAAATGTGAGCTCTGTAGGGCAGGGTTTTG +TCCACCTTGTGTCTGTAGCATCTAGGATGGTGTGTGTGTGTGTGATGAATGGAGAAAGCT +TATAGAGGGGAGGAAGGAACTTGCCCAGGACTGCCCACCACTGAGGTCTGGGGCTGGGCA +GGTTTCCTGCTTTCCTAAGGAGCTCACTCACTTGTAAGGCACCAGGCTTTCCTGCTGTGC +ATTTCTTCAGTGCCGCCGCTTGGCCCCATTAGCACCTTTGCTTGGAACGTCTACTCTAGC +TGCTTCTTCGCCTTCTGTGGTAGCAGGCTTCCCCATTCCGTCCCACTGTTGGTCTCTAAT +CCCCTGCTCCTTGTGGCTTCTTCATGTAGCTTTTTGTGTCCCAGAATCATGGATTGACTC +ACCAGGGTCTCTATGGGACTGCCAGGTTCTGAGGTGTGTGGCCCTGGTCTTCCTTGTAAT +CTGTGGCACTGGGCGCATTGACCCCTCGGTCTGTTTTCTCTAATGGTCTTTCTTGGTTGC +CAGGAGACTGACCTCACCCCGCTTAGTGTTCCTCATGCCTGCCTGGTTGCTGCCCACAGC +CTCTTGAGCGTCTTCAGCACTTTATAGTCAGTTGTTAGCTCTGTGCTGTGTTTCCTGGAT +CCTCAGCACCTTTGATGGACACATCTTTGGGAACATACGGTGTGAAATTCCCATTAGATT +CAGAAATGTTAGGACATCTTAGAATCCAGGAAATGGGGTGACAGTGCAGTGGTCATTGAC +CTGCCAGTGTGTCCAGGCCCTGTGACCTTGTCTCTTGTCCATATAGTGCGTTCCTCGGAG +TAGATGGCCAGTGGATAGTTGTTAAATGTATTGATTAAACCTAAACTAGTCTCTGGTTTT +CATGTTGTGTCCCCTTCGAAGTACATCCACCATGCCTTGTCCCTTGTTAAATGTGTGATG +TCTGCATCTCCTGTTCTTTCCGGGAATCTTGGGAGGATGTTAGGTCATGCAGTGCGGTTT +GCATGTGTGCATCTTTACTGGTATGCACGCATGTGCATGCATGCGTGTTCCTGTGCACGT +TGTGGGAGTGTGCATGTGCGTGTGTCTGCATGTGTTCCTTCTGCTCTGATGCAGGGTTTT +CTGGCTGCCCCCTCAGACCCATCTTCCCTGCTTCATGTTGTCCCTTTCTTCATGCTTCCT +CTGTAGTAAGGCTTCCTAGAGTTGCATCCTTAACCCTCATCTCTGGGTGGCCTTCCTCCT +GTGGGTTTGGCGACTACCTCCTCAGTGCGTCCTGGCCTTCCGTATCTGGACCAGTCATGG +CTCCAGAGTTGTGGCCTCGGCACCTTCAGAGCCTGCCTGGAGGTCCCCTCTGTTCTCCTC +GCCAGGCTCCTGCCTGCATGCTGGAATTTGGGTGTCCTTCCTTGTCTTTAGTTCCCTCTT +TTCTCTGCTATCCACCTCCGTTGGGTCAACAAGTGCGGTCAGTTCTGTGACACTGTTCTT +GGCCCATTGCCAGGGCCTCTTTCACACCTTTGTCCTCTGGCCTGGAGGACTGTCCCCGTG +CCTTGGATGCAGTTGCTTCTGCCTGGCACAGCTGTGGGTCTCCCCTCTCAAGTAGGAACC +TACTTGTGGCATGAGAGTCGCTTCACCTTCAGTGGCTCCTTATTGTCCCCAGGGCGGAAC +GGGCTGCAGGTGGCACCCAGGCCTCTCTGTGCCTCGGCTCCAGCTGTCTCTTGTGGCTCC +CCACCCCAAATCCTGGCAGGTCCCTTCCCCACAGCCACCCACCCCTCTCCTCCATCGGGA +ACAGAACTATGCTGCCACCTCTGGTGACCTCAGCACGCTGCATCACTGTCCCCGTCCACG +TGCTACCCTGTGGGCCCAGGAGAGCCCTGGGGTCCCTGGGTAGCAGAGCGCCTGGCCATG +CCTCTGAGGCCCCTAGTGCCGCAGAGTTGAGCTGAGGGTCTCGCGCTCGCCCTCTGACTG +ACCCAGCCCTTGCAGGTGAGTGGATTGCTGTGCTCTGGTGGCCTGAGGGAGGCCACGCGC +CTTCTGTGTGTTCCAGAAAGGGTGCCTCCCACTGCATGCTTGCTTATCTGAGTTAGAAGA +ATGCTGTGGTGGAGTTTAGTGTAAATTTTTAAAATATTTTTTGAGCCTTATGATTATATA +GTTTTTGTGTTTCTGAAGTAGGAATTAAAGTGGGCATTAACAAAATATTTAACTTTGGAC +TTAAGTTATAATTCAGGTTCTGAAGAATAAAAGTAAGGTTAGTTTGTTTTGATGCCTAAA +AAGTCCTCTTAGGGAATATTATTTTGAAGCCCTTTACTATGCTGTTAATAGTGCTTGGCT +TTTAACTTGGTACCAGGGAATTGGAAGGTTTCTGTCATTTTGTGACGATATTTTTAAATT +TCTTTGCAGGTAGAAGAAGAAAGGTGCCACTCCGGCATGAAGACAGACTCGCTTAGTCGC +CAGTCACTTAAGCTGAGTGCATTGTGATTTCCAATAATTGAGGCAGTGGTTCTAAAAGCT +GTCTACATTAATGAAAAGAGCAATGTGGCCAGCTTGACTAAGCCGCCAGCGCACAGCGCG +GCAGGACGCGCCCGGGTCTCAGCGGACTTGTGCATGTTAGCTGTGTAGATTTATGTGAGG +GCTTGTAAAACTCTGGTCTTGTAAACTAGTCTTAAGCGCTTTTAATATGGAGACAGATGA +GAGCCCCTCTCCGCTCCCGTGTGGGCCCGCAGGAGAAGCGGTGATGGAGAGCCGAGCTCG +CCCCTTCCAAGCGCTGCCCCGTGAGCAGTCTCCACCACCTCCCCTGCAAACGTCCAGTGG +TGCAGAGGTAATGGACGTTGGCTCTGGTGGTGATGGACAGTCCGAACTCCCTGCTGAGGA +CCCCTTCAACTTCTACGGAGCTTCTCTTCTCTCCAAAGGATCCTTCTCTAAGGGCCGCCT +CCTCATAGACCCGAACTGTAGTGGCCACAGCCCGCGCACCGCCCGGCACGCACCTGCGGT +CCGGAAGTTCTCCCCTGACCTTAAGTTGCTTAAGGATGTAAAGATTAGCGTGAGCTTTAC +CGAGAGCTGCAGGAGTAAGGACAGGAAGGTGCTGTACACAGGAGCAGAGCGCGACGTGCG +GGCGGAGTGCGGTCTGCTCCTTAGCCCTGTCAGTGGGGACGTGCATGCTTGTCCCTTTGG +CGGGAGTGTTGGTGACGGGGTAGGCATAGGGGGTGAGAGTGCTGATAAGAAGGATGAGGA +GAATGAGCTGGATCAGGAAAAGAGAGTGGAGTATGCAGTGCTCGATGAGTTAGAAGATTT +TACTGACAATTTGGAGCTAGATGAAGAAGGAGCAGGCGGGTTCACGGCTAAAGCAATCGT +TCAGAGAGACAGAGTGGATGAAGAGGCCTTGAATTTCCCCTACGAGGTATGTTGGCAGCC +CCTCCTCTAGAGGGCTCTTAGCAAAACCCAAAGAGAGATTTGGGAATTGCAGCATCTTTT +GAAAGCAGGGAAATTAAAAAAAAAAAAAACAAAAACCAAAATCCCCTCTGAGGTGGAATA +ATGTTAATGTGGAGAAGAGAAAGATGTAAGGAGTCCAGATTTTTAAAGTTTCCTAATGAA +AAGTTTGGCCCATGGGTAGGCCCTGCATCCCTGATCTAGCGCGTGGGGCAGCAGGTGCTG +CTGAGTTACGCTCCTTGGCAGTGTGTGCCCCTGGACCAGGTGTGTTGGTGTCAGCTGGTA +GCTTCATCCTGTTTGTTTTTCAGATGATCATGCACCCTAAGGGCACATCTAGGCCCCCTG +AGAGCACCTCCTTTCTGTGTCTGTTCTCAGGAATGCTGTTGAGCTCTCCTGTTGCAGGAG +CATGAGCGCCAGGGGCTCTGGTGTCTGAACAGCGTGTTTTGCAGGATGACTTTGACAACG +ATGTGGATGCTCTGCTGGAAGAAGGCCTTTGTGCCCCCAAAAAGAGGCGAACAGAGGAAA +AATATGGCGGAGACAGCGACCATCCGTCCGATGGAGAGACAAGTGTGCAGCCGATGATGA +CCAAGATTAAAACAGTGCTCAAAAGTACGTGTGTGGGTCAGAAGCAGTGGGTGTTCCAGG +GCAGTGGAGGGGTGGTTGCTTCCTTAGCAGAAATGCTTTGAGAGAGCTCTGGTGCCAGGT +AGTGGGCTGGGTGGAGGCATGTTTGAGGACAGGACAGAAATATCTGAGGAGGGAAACACA +GGATGGGAGGACGTCCTGATGCATGACCCAGATGCGGATCCTGGTGTGTGCTATGGAAAC +CACAGAGCAGCAGGCACTGTGCAGAGGAATGGGAGGGGACTTCTGAGGGGGTCAGGAGGG +GATGTTGGTATATGCCAGGAGCCAGCATGACCATCAGCTGGAGGGTGTGGAGACCAAGGG +CAAGGTGGGTACACAGTGAGATGGCAGGTGATAGTTGTGATCCAGGTTCTAGTCCTGGAC +CAAGTGTCACAGGAAGCCACCAGAAGCAAGTGGTGTTACCTGATTTTTGTTTAACAACAT +GGCTTCTGGGCCACAGCATGGTGTGTGCACTTCAGCTGGGCAGGGTGGAAGTGCCCTGGT +CGACGTGGCACTTCCTCAACTTTGAGTTGAGGGTAGTGGGGAGAGCACTTGAGAGGCGCC +CAGAGTTGCACTGTGAGGCTCTGTCATCGATGTGGTGCAGGTGTTGGGGGAGTCGGAGTT +GGTGGCTTGGAGGCACCTATGAGGGTCCTGGTGGAGATGTGGGGCTCGGTAGAGGGACTG +AAGAGATGCACGGGGACTCAGGCTCACAGGTGAGACCCCAGGCTGAGGGCTGGGGTGCAC +AAGGTGGCAGGGTGCGACTGGGAGCAAAGCAAGGGGCGGGTGGCACATGGCCATCCTCCT +TGGGCCATTGAGGGGACAAGTTAGCTATGCCAGGTGCCACATGTTGCTGCCTATGTCGTG +TTGGTAGCAGTTAAGCAGGAGGGCTTTTGGTCTAGACGGTTGCAGGCAGGGTAAACCTGG +GGCTTCAGCTTCTGGGGCTGGGGGCAGGAGTCTTGACCTCGGGGCCTCTAGGAGGCTGGT +TTGGTGCTGCATTTTCAACAGGCTCATTCAGAAGGTAAACCCACATTCGTCCTGCACAGC +TCACTGGTCAGCAGCCATTCTGGCCTGGATGCCTGGATGTTCACAGACGGACAGCTGCAT +GTGACCCCCACCTCAGGTGGCTGGGCCAACTTTGTGACCACTCACTGTCACAGAGGAGCA +GCCCCAGCGGACCCGTGGCCCGTCTCTTGCTTGGCTCCGTATTCCCTTTGCTTCTTTTCC +TGACTTAACCTAAAGCTGGCAGGAGGCAGCTCCATCTTGGCCTGACTCTGTGGGGAGAGA +GCCCACGCAGTGTGCATGCACCCCTGACCTTTAACACTCCCTCCCTGAATCATCACCTGA +TGGTGGCCTGGGTACAGGTGTCAGGTCCAGGGTCAGGGCCACTTGGTGGGTACCTAGCCC +TGAGAGTCACTGGCCCTTTTTCCCCTTATGTATATTTTTTAAGGTTTCATTTGCTTTCTT +CTGTTTTTTAATTTTTTTTTTGAGGCGGGGTCTTGCTGTTACCCAGGTTGGAGTAGCTCA +CTACAGCCTCAGCCTCCCAGGCTCAAGTGATCCTCCCACCTCAGCTTCCCAAGTAGCCAG +GACTATAGGTGTATGCCACCATGCCTGGATAATTTTAAAAAGTTTTTTGTAGAGTCAGGG +TCTTGCTGTGTTGCCTAGGCTGGTCTTTCTAAGTTTTTAACTTGTTCCCAATACCTGTGA +GAAAATTAAGTTTTAGTTACATCTTTGCCTTACGCTCTTCCCAGCAGTCAGGCCTGGTGG +CAGGTGTCAAGTTGATAAGTCGTAAACCTCTTCCTCACAACTGCCTATAAAGCAAAGAAG +GGGTTGGGAGTAGTGTATTAAACCAACTTAGAAACGGAATCATTTGAAGCAGTGTTTTGT +ATGCTGCTGAGAGTGCAGCTCAGTTACAATAAAGACTTGTCACCTGGTCACTGGGAGCAG +TGGCTGACAGCCATGCAGTTTATTAGCAAAGCTCATCCAGCCCCAAGGCGCCTCTGGCCA +CAACCAGACAGGCAGCAAAGGAGTCCAGTGTCCACAGTGCTTACCAGATGTGGAGGCAGG +AGCTGAGGCTGGGACTTATATCTTGCACAGACCTGTTCTGTGCTCCTTCATGTGCTGGTT +TGACACGTGTAAATTCCCACCTCAGGCCACATGCTATCTGTCCTAGGCCTGGAGGCATAG +CAGTGAGCAAGGCAGAGAGGAATTTCGATTATCTGTGAAGACCCAGTTAAACACATGCTA +GTACCCAACAATTTCTTGTGCAGGAGGTGCTGTGGCAACAATTCCAAGTGTTTTTACAGT +GATTATAGGATGTTCTTGTCTTCCTGTGCAGGTCGTGGCCGCCCACCTACAGAGCCGCTG +CCCGACGGGTGGATCATGACATTCCATAACTCTGGAGTCCCGGTGTACCTACACAGAGAG +TCTCGGGTGGTCACCTGGTCCAGGCCATACTTCTTGGGAACGGGAAGCATACGGGTAGGG +GAGGCATCAGTCGTGACTTTAGGCTTGTAAGTTCTCAGACCAAAACGTGCACATCCACAC +ACATGGCACCGCAGCCAAGCAGAGGCCGGTGAAAGGCATCAGAGTTTCAGACTCTCGGGT +TGTCCTTGTAATCCATATTGCAATTTCACTATCCACAGAAACACGACCCTCCTCTGAGTA +GCATCCCTTGTCTGCATTATAAGAAAATGAAGGACAACGAGGAACGGGAGCAAAGCAGTG +ACCTCACCCCTAGTGGGGATGTGTCCCCCGTCAAGCCCCTGAGCCGATCTGCAGAGCTGG +AGTTTCCCCTGGATGAGCCTGACTCTATGGGTGCTGACCCGGGGCCCCCGGACGAGAAAG +ACCCACTAGGGGCTGAGGCAGCCCCTGGGGCCCTGGGGCAGGTGAAGGCCAAAGTCGAGG +TGTGCAAAGATGAATCCGTTGGTGAGTTTTTGAAGGACTCTTCCCTTCTTGCCTCCTGGG +ACCCATGAGCTTTGTTTTTCTAATGAAGGCCATAATTGTTCATAGTTCCTAGTTGTACTT +GTGAGCAAGGGGCTGTGCACCTCCACTGTTGGTGTGGCTGAAAGGCTTGTCTTCCTGTCC +TTGCAAGAACCTGCCTGGGTGCAGCACCCTAGGCTCCTGTCTGTGGGCTCTGTGGTGTCT +GCATGGCTGAGGCTGCTGGGGAGATGGCTGCTGGCACTGGCAGTGGGCCGTTTTCTCTCT +GGTTGTTTGTATGATCTTTCCCTAAGTCTTGCTTTTTCAGTAGGATGTGCTCATGTGGCA +TTGTTTTGATTTGTCTTGATGAGCTCAGTGAGCCTCTTTAATCCATGACTCAGTGTTGGG +AAACTGTCACCATTCTGTGGATAGTTCCCTCTTCAATCCTGCAATTGCTTGCTGGATGTG +TACTGGTGTTTCCCATTCTCTCTTCCATGTCTTTTAGCCTAGTCCCGCCATTTAAGTCTT +AGTTCTCTCTTCAGGTGTGGCAGGTCTCTGTGAACCCACCTGCTGGATTACTTTCAAAGA +GCACTTTCCATTTTCTAGGTTTCTATTTGTCTGTACATCACTACCCAGCCTGTCTCATAA +CTCCGTGGAGTGTGGGGTGCCCTAGAAGAGCCTGTGATGGGGGCTGTTGCTGAGGATGGG +GAAGTGGCCCGGGAGGAAATGTGACAAATGCGAGGCCCTGTGCAGGGAGCTGCCTGGGTG +TGTTGCTGACTACAGAGGTGGCCAGGATGGCCAGAAGGAAAAGAGTGAGGTGGTCAGAGA +GGTGGCGGGCGCATGGAGCCCGGGTGCTGCTGGCATTCGGGCACCTGTTTTGATACAGGG +AGGAAGTCCCGAAGGCCATCTGACTTTTTTAAGCTGTCTTTTTTCCTTTTCATTTTTGTA +TTCTTCCTGCCCCAAATAAAGTAAAAGAACATCTCAGAAATTCATCCAGAAGGGACCTGC +CAGAATAATCTGAGCTGGTGTAGGGAGGCCTTCATTCTTTGCTTCCCTCCCCTTTGAAAG +GGACGGGGAAAGGAAATAGTGTCATGTGGCCTGGGCCTGCCCCATGCACTGGGCTGTGAG +AACCTGACTCCTATGTTGGAAGTTAAGTAATTTGTTTCTCTGGTAAATCTGGGACAGATC +TCGAGGAATTTCGAAGCTACCTGGAGAAGCGTTTTGACTTTGAGCAAGTTACTGTGAAAA +AATTCAGGACTTGGGCTGAGCGGCGGCAATTCAATCGGGAAATGAAGCGGAAGCAGGCGG +AGTCCGAGAGGCCCATCTTGCCAGCCAATCAGAAGCTCATTACTTTATCAGTGCAAGATG +CACCCACAAAGAAAGGTATAAGCCTCTGCATTTTAACATCAGCAGACTGGTTATGCTGTT +ATTTCTAATGTGATGTGTTGAGGGCATGTTTTATGAGTTGCATTTCGTTCAAAGTTTATG +TTTATCCCATGAATGCAGGGGTCTGCCACATTCACGGTCGTGAGCCCCTAGTTACTGACA +TGGTAACAGGAAGCTGTGTGCTAGCTTGTGGCACTGCTTCACACTTGCTGAGATGGTTCT +TTTTGTCACAGAGTTTGTTATTAACCCCAACGGGAAATCCGAGGTCTGCATCCTGCACGA +GTACATGCAGCGTGTCCTCAAGGTCCGCCCTGTCTATAATTTCTTTGAATGTGGTAAGTC +TAACCTTCCCCATTTCAGTCCTAAAGAATCACAAGGTGTAACTTTGGTCAGGGCTGGGGA +GGCAGGCGCTGACCGCTAGCCCTACTCTACTGGAACGGGAGGAAAGGGAAGGGTAGAGAG +CAGCGTGCTGCAGATGGGTGGCTTGTTTCCTGGAGCCGCGCCCCATCTGAGTGGGTGGCT +CTGCTTTTCCCTTCCAGTCTGTCCCCTCTCCCCACCTTCCTCTTGTCCTGCCCCATCTGC +CTTGCTGGTTTCTGATCCTGCGTTGTCATGGACAGCCTTATTGCGAGCAAGGCCTCCCCA +CTGTGCACTGGGTCCTTTCCCTGGTCTCCTGGTGCCCCTGGGTGTCTGAAGGGTTCTTGG +GCATGCATTTCCCACCAGCTCTGTCTTCCTTGGGTGCCCTCATCACACCTTTGTGAAGAC +CAGTCTGTCTTCCCCTTGGAAGCTGGAGCCTGGGGGTCACTCATCCTTTGAGGTGGTGTT +TCTTTGGCAAGCCACATCTTGTGCTTTCCCATTCCGGCAGTCCTGTGTCCGTCTTTCGTA +GACCTTGGTCCCTGTCGTAGGAGGCTCTGCCCCCAGCCTCCTGCTGCCCCTCTGGCTCAC +CATCTCCCACGTGCCTCATTTCCCTCCACTTTCCCAGCTGTGCCCTTTGCTGCTGCTTGT +CTGCTGCCCGTCTCCAGTCCCAGGGAGCCCCAGGTCTCTGCAGTCAAGCACAGGCCCACT +CTCTGCAGGGACAGCCCCTGACTGCGCCTTGTCTGTCGGTGTGGGCAGACTGTGCACACG +CTTTTGATGTCTTGACTCGTATGTTTTAAAACAAGTAATTTTAATTTTAAGAGAACCCAA +GTGAGCCTTTTGGTGCCTCGGTGACCATTGATGGTGTGACTTACGGATCTGGAACTGCAA +GCAGCAAAAAACTTGCGAAGAATAAAGCTGGTAACGTGCTTGCTTGGGTGTCAAAGATAC +GTGCTGCCTGCTGTGTCTGCCTCGCTGCTTGGTTAGGGAGGGGCTGAGCAGTGGTGACAA +GTGGGGGATGTGGGTGGGGCATGTTTATTTTATTTATGTAATCATCTACTTTGATTTTTA +AAATTACATGCTCATTGGAAAAGACTCAAATAGAGCAAAATGTGAGACTTAAAAGTAGAA +GGCCTCGGCCGGGCGTGGTGTCTCATGCCTGTAATCCCAGCACTTTGGGAGGCTGAAGTG +GGCAGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCTCTCTC +TACTAAAAATACAAAAAATTAGCCGGGTGTGGTGGCGGGCACCTGTAGTCTCAGCTACTC +GGGAGGCTGAGGCAGGAGAATGGCGTGAACAAGCGAGGCAGAGCTTGCAGTGAGCTGAGA +TCGTGCCACTGCACTCCAGCCTGGGCGACAGTGCAAGACTCTGTCTAAAAAAAAAAAAGT +AGAAGGCCTCTCTACTGAGTGACAGCTGGGCCAGTATGTCCCTCCCTAGTCCTGGCCCTG +CACATATTCAGGGGTGTGCCTAGAGCCAACTGGGCTGTGCTGGGCACATGCTGTAGTGCT +TGTTTGTTTTTAAACTGAGCAGCATGTGGGCATGCCAGGAGGGAGGAGAAGCGGGCCAGC +ATGCTGTTGGCATCCTCCCCCAGTGGTGCTGGCTCTTACCGGCACTATTTCCTGAGTCCT +CCCACTCCATGTTTGGGCCTAGCCACACAGAACACCTGGCACAGCGCAGCTCTGGGAAGG +CTGTGTGTCCTGGCGAAGACCCACTGCCCTTTTGGACCATGCCCTTGCCGTTTGCATTCA +TGCCCTGCATTCGTGGTAGTTGACATTCTTTCACAAAAAGCCATTTGCCTCAGCGTCCAG +GGTTAGTTGGCTTCTTGTAGGGTGGTGTTGGGTGTACCCCAGGCCCTCGCACCCACAGGT +TGATTCTCGAGGTATCTGTCATTGCCGTGTGTTTTCCAGAAGGCCTGCTGCCCATCTTAG +ATTCTGCCTAAGAGACTTTTGCAGCTCAACAAAGGGGGTGTGGTGGAGGTTTTGACAGAT +AACTTGCAGTGGGGCCTTTTGTGGGGGTGGAGATGCCTTGGGCGTGGCTGTGTGCTCTTC +CCTCTGCGGGATCACCCGCTTTCCTCCCTATGTGCTGGAACCCACTTCCCTTGCCCAGAC +TCTTGGCTGCCCTATAGACAGCTTGGCTCCATGTCTTGCCAGCACTGATAGCTCTGTGGG +GTCTATGGAGCCATCAGCCCTTGGCTATGGCAACTCCGACCCCAGTGGGCATATCCGTGC +CCTGCCCTCTAGCAGCTCCCTCTAGCGTCTCTCAGACCTGGCTGCATCTCTCCACCCACG +CCAGCCACTGCTGCTGCCACCTGGTCTAGGTACCTGCAGTAGCCTGCTCTCAGGCCTCCC +TGCATGGTCCAGGGCCCGCAAAGCTGTTTTCTCACGTGCCACCGAAGTGGTTCCCTAACA +TACAGCTCCGAGCTCCTCCCTGCCTGCTCCCAGGACCCCAGGGTGAGAGCGCACACACCC +AGTGTGGCCCGTGAGGCTTTCTGGTAGCCTGCCACACGTCTGGGCTTTTCCTTCTGTCCT +CTCCTGACCCTGTCACTGAAGTGCTGTCTCTCCTCAGGGCCCTCGCTCAGCCTCTGAGCT +GTGGGGATGGGAGGCAGCAGTGCACAGTTCACTCTGCAGGGTGGTGAGAAGAGGGCAGTG +CCCAAGCCTCACCCTCGGGCTCTTTTTTTTCATAGCCCGAGCTACACTGGAAATCCTCAT +CCCTGACTTTGTTAAACAGACCTCTGAAGAGAAGCCCAAAGACAGTGAAGAACTCGAGGT +GAGTGTTGTGGTCCTGCCCTGCTGGGAGCTGTGTGTGCAGTGCGGCTACCCTGCCCTGTT +AGTGTGAGCTGGGGGTGTCCGTGGGCTGTGCTCATGCCTTCCATGCCCTGTAGGTTAGTC +TCATCCAGCCTCCAGGTTCTTCTGTTTGTATTTCATAAAGAGGAAACTTGCCTGTCTTTG +TATAGGCAGGGAAAAAATGTCCTTTTTGTTAGAGATTTTTGTCCTCTTTTGCAGCATTGA +GTGTGCACCGTAAATGTCGGGTACACATAGGTTTGTGATGGGAATGAATCTTTGAGGCAG +TCTACAAGATTTAGATTGTTCTTTTTTTTGAGACAAAGTCTTGCTCTGTCACCCAGGCAG +TGACAGAGTTTGATCTTGACCCACTGCAACTTTCACCTCCCGGATTCAAGTGATTCTCCC +ACCTCAGCTTCCTGAGTAGCTGGGATTACAGGTGTGTGCCACCATTTCTGGCTAATTTTT +GTATTTTTAGTAGAGATGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACC +TCGGGTGATCCACTCGCTTCCGCCTCCTAAAGTGCTGGGATTACAGGCATTAGCCACTGC +GCCCGGCCCTAGATTGTTCTTTATATACTTGACCCTTGAGTAACGTGGAAGTTAGGGGCA +CCAGCCTCTCACGTGGTCGAAAATTCACGCATCACTTTCAGTCCTCCCAAACCTTAACTA +CTTACTGGTAACCTACTCTTGACCAGAAGCCGTACCAGTAACAAACAATTGATTAATACA +TATTTTGTTATGTACATATTACTACATCTGTCCCCAACCTAGTTTTGGCACCAGAGATCA +GTTTCAGGGAAGACAATTTTTCCACAGACGGTGTTGGGGGGCGGGGATGGTTTCAGGATG +ACTCAAGCACATCACATTTATTGTGCACTTTTTATTTCTATTATCACTACACTGTAATAT +GTAATGTAATCACTATACAACTCACCATAATATAAAACCAGTGGGAGCCCTGAGCTTGTT +TTCTTGCAACTAGATGGTTCCCTCTGATGACGATGGGAGACAGTGACAGATCATCAGGCA +TTAGATTTGCATAAGGAGTACACAACCTAGATCCCTTGAATGCACTGTTCACAATAGGGT +TCACACTCCTATGAGAATCTAATGCTACTGCTGATCTGACAGGAGACAGAGCTCAGGTGT +AAAATGAGTGATGGGGAGCAGCTGTAAATACAGATTAAGCTTCACTTGCTCACTTACTCC +CCCTGCCCCCACCACTCATGTCCTGCTATGCAGCCCGGTTCCTAACAGGCCATGGATTGG +TACCAGTCCATGGCCTGGGGGTTGGGGACCCCTGATATATACTGTATTCTTACATTAAAG +TAAGTTACAAAAAGAATGTTATTAAGAAAATCATAAGGAAGAGAAAATTTATTTACTGTT +CTTGAAATGGAAACATCATCATGAAGGTCTTCATCCTCCACATCTTCATGTCGAATAGCT +GAGGAGGAGGGGGAAGAAGAAGAGTTGGCCTTGGTATCTCAGGGGTGGCAGAGGCAGAAG +AAAATTTTTGTATAAGTGGGCCCATGGAGTTCAAATCCATGTTGTTCAAGGGTCAGCCAT +GTGAAGAACCATTTAAAACTAGCTGTAAACCTTTTTGGGTAAATGGTTATTTGTAGCAAG +GGCTTGTGGAATTTTGAAGGGTGCATTCTTATTTATATAATAGCTTTATTGAGATAGTTT +ACATACTATAAATTTGGCCTTTTAAACTATATAAGTGGATTTTTATATATTCAGAGTTGT +ATAACCATCATAACTACCTAATTTTAGAGTATCTCAACACCCCCAAAAGAAACTCCATAC +CCATTAGCATTCCCTCTTAGTTCTCTCCAACCACCAAAAGCCACTAATCTGCTTTCTGTC +TCTATATATCTGCCAACTCTGGACATTTCATATTAGCGGAATCATACAATATGTGGTCTT +TTGTGACTGGCTTCCTTCACTTAGTGTGCTATGTCAGCTGTAGGTTTTTATCAGAATAAG +GAAATTTCCTTTTGTTCCTGGTTTGCTGAGAGTTTTTGTAGGAATGATGTTAGATTTTGT +CAAATGTTTTTCTGCTTCTATTCAGATGATCATGTTTTTTTTTGTCTTTTATTTTAGTAA +TATAGTGTATTATATTGGTTTTTGAATGGTGAGCCAACCTTGCATTCCTGGGATATATCC +CACTTGGTCATGACATATAATGATTTTTTTAATATGTTGCTGGATTCATTTTGCTGGTAT +TTTGTTAAGGATTTTTGCATTTGTAGTTATAAGCAATATTGATCTGTAGTTTTCTTGTGA +TATCTTTGTCCAGTGTAACAGCAGGTACAGGTATCAGGATAATACTGGCCTCATAAAATG +GTTGGAACATGTTCCATTTTCTTCTGTTGTGTGGAATAGTTTGTGAAGGATTGGTGTTCA +TTTTTTAAATGTTTTGTAGACTGCGTTCAGTGAAACCTTCTGGTCCCAGGCTTTTCTTTG +TCCAGAGCTACTTAATTGCTAATTCAATCTTACATGTTATAGGTCTATTGAGATTATCTA +TTTCTTCTTGAGCCAGTGTTGATAGTTTGTGTCTTTTTTGGAATTTTTTTCCCATTTGAT +GTAGATTATCTAATTGATTAGCATATATTTGTTCCTAGTATCCCCATATGACCCTTTTTT +ATTTCTATATGGTTGTGATGGTCCCTTGCTGTCATTCCTGATTTCAGTAATTTGAGGCTT +TCTTCTTTTTCTTGATCAGTAAATTGTTACACTTGATGATACATCACAGGTCTCTGAGGC +TCTGTTGTTTCTTCTCTGTACTTTTTTCTTTCTGTTCCTCAGTGTGGATTATTTCAATTG +CCCTGTCTTCAAGTTTGCTCCTTTATCTTCTGTTGGCTCACATTCTGCTGGTAAACCCCT +CTAGTGACTGTTTCATAATTTCTGTCTTTTTTAAAAAAATTCTTTATTGGGTAATGCATT +GTTTTTATACTTTAATTCTTTTTTTTTTTTTTTTTAATTATATATATAGGGTCAGGCATG +GTGGCTCACACCTGTAATCCCAGCACTTTGGGAGGCCAAGGCGGGCAGATCACAAGGTCA +AGAGATCGAGACCATCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAA +TTAGCTGGGTGTGGTGGTGGGCACCTGTAATTGCAGCTACCCGGGAGGCTGAGGCAGAAG +AACCACTTGAACCCAGGAGGTGGAGGTTGCAGTGAGCTGATACTGCGCCACTGTGCTCCA +GCCTGGTGACAGAGCGAGACTCCTGTCTCAAAAAAAAAAAAAGGAAAAAGAAAAAAAATA +TATATATATATGGGGTAGAGATGGGGTTTCACCATATTGCTCAGGCTGGTCTCGAACTCC +TAGGCTCAAGCAATCTGCCCACCTCAGCCTCCCAAATTGTTGAGATGACAGACGTGCCAC +TGTGCTGGCATTATACTTCAGTTCTTTAGACATGGTTTCTTAGTCCTTTGAGTATGTTTT +CTGTAGGTGATGTAAAATCTGTTTCTGGTAAGTCCAGTTCTCAGAGAGCGTGTATATTAA +ACTGCCTTTCCCTTCATGTATGGGCTGTCTCTACTTTCCTATTTCTTTGCATATCTCAGA +ATTTTTTGTTATTGTCAAGAATTGGACATTTAAAATATATGGCAACTCTGGAAATCACAT +TACCTCACCTCCCTTAAGTTTATAGTTTTTGTCATATGTCCTTGCTCTCTATTTGTGACT +GTATCAGTTTGCTAGGGATACTGTAACAAAGTGTCACAGACTGGGAGGCTTAAATGACAG +AGATGTATAGTCTCAAAGTTCTGGAAGCCAGAAGTCCAAAATGAAGGTGTCAGAAGGCTA +AGAGAAGGCCTTTCTCCGTGGGTGGAGGATAGCGATCTCCATGTCCATGCCTTTCTCTCT +GTGTGCATGCTGGTCTCCACATTGCCCATTTGATAGAGACACCTGTCTCACTGGATTAGG +CCCTACCCTAATGACCTCATTGTAATGTGATGACCTCTGTAAAGACCTTGTCTCCAAATA +CTGTTGCATTCTGAGGGACTTCAACATGTAAATTTGGGGGTGACAGTTTACCTCATAATA +GTGACTTTCCTAGATTGATTCTGTGAAGTGTGTATTCTTTATGATTTGTGGCCACTGAAT +ACACGGGCCCTTTAGCTGAGTGGTCTCCTGATGCTTGGACAGGGATTTTCGTAAATTCCC +TGAGCCAGTAAGTCTCCCGGCCTGTGATGAGAGCTGTGGGTCTGTGGGGCTCACCTGGTG +TGCTCCGGTGGCTGCCAACTGTCTTAGCCTTTGTGTGCCACCTGCACAGAGTCTCCAGGT +CAGTCAGCTGGGAGGTTTTCCTGGGCCTGTGCGCAGTCATCTCCAGACATACGCTAGAGC +CTTTCAGAGCCCCATGGACATCTCCAGCTTTTCCTTCTTGGGTTCTTGGCCAGCCTCTTG +TTTGCCCAACTAGTGCCCCAGCCTCTGGCAGCTGCAGGGTGAAACAGTTGCCACTGGTTC +CTTTTGACAAAAGCCCTCAGGAACTGGGCTGTTTTCACTGAGTGAGATCAAATAAAGACA +AATCCTGCCATGAGGCTTTTCCAGAGGGTGGCCAGGTAAACCCAACAGTGACAGTTCTCT +GGGCCTGGGGCCTTGAGGACCCGTTCTCCATTGAGTGGCTACCACTGAGCTGGGGAGAAG +GGCTGGGAATGGGGCGAGTTAAGATGCCAGCAACTCCTTGTTCTTAGCAAGACTCGTTGT +CTTTCTTGAATCAACACTCCTCAGATTGTTGAAAGCCTTTCTTTAGTTTCCAGGGTTCTG +AAAGAATTAATTCTGACCATTTTTGCTAGTGTTCTTGTTGCTTTAATGGAGGAGAGCAGA +TTTTGGAGGTCCTTATTCGGCCATTCCAGAACTACTTTAGGATGGGTTTTGTAAAGTTTT +CTCATCTTTGTGGACTGGTGCTGTTATAAAATATGGTTAAGTGTCTATGGTAGTACTAAC +TTGCTCTTAAGTTTCTTGAACTCCTGGCTTCAAGCAGTCCTCCTGCCTCAGCCTCCCAGG +ATGATGGGATTACAGGCGTGAGCCACCACACCTAGTCTCTTGCTCTCAAGTTTCTACATG +TTTTCTGTGCTTTATGTAACTGCTTCATCGTGGACAGGCTACAGACAGATTGCAGACCAC +ATGTGGATCAAGCACAGTCTCCAAGTACTAGCTGTTCCAGGAAGAGTCACATTTGTCCCT +TTTTTTTTGAGATGGAGTCTCGCTCTGTTGCCAGGCTGGAGTGCAGTGGTTCGATCTCTG +CTTACTGCAACCTCCGCCTCCTGGGTTCAAGCGATTCTTCTGCCTCAGCCTCCTGAGTAG +TTGGGACTACAGGTGCACGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGATGG +GGTTTCACCGTGTTGGCCAGGATGGTCTCTATCTCTTGACCTCGTGATCCGTCCGTCTTG +GCCTCCCAAAGTGCTGGGATTATAGGCGTGAGCCACTGCACCTGGCCACATTTGATTTTA +TTTCTGAGTGACCCAACCGTGGGAAATATGGCTCCCAGGGAGGCTTCCAAGGCCCTCTGG +GGAAGTGCTTGCGTAGACCCTGGCCCACCAAAACTCTTAGTATGGGTTGCTGCAGACGAT +GGTGCCTGAGTTGTGTAGGTCTCACGCTGCCTTCCTGTCTGAGTAAAAGCCCTGCCTGTT +CTTTGCTATGGGGTGGTGGAAATGTCCATGAACCACTGTGGTTTCTGGTGCCAGGAGAGG +GCCCAGCGGGGAGTCAGGTTCTCAGCCTCCACTCTGGGGTTCTTCTGTGAAGTGGAGAGG +CTTTGCAGAAACCACATTGCAAAGTAGTCTGAGAAAGAAGAGGTTTCCCCAGGTGGAGAA +GGACATGGGGGTGGGGCATGTGGTGGCTTCCGCTGTAGACTCCCCTGCCTGGGGCTTTTT +CCACACCCCTACTCCGATCCTCCCACACTGCAGCCTGTCTGGAGCTAGCGTCATATCTCC +CCAGGTAAGGAGCTCAGTTTCGTGGGACTGCCCCCACTTCCTGTGCCAGCCGCAAATGGG +GGCCCCTGCCACCCGGCACAGGACCCCTCCTCGTGCTTGGAAATTTGCTAGGACTCAGGA +ACTCAGGAAAAATTTTACTTATGAAACTTATTTTTCATGAAGGGCTGAAAGTTCCTCCCC +TCTACTGACACATTTGATTTCTCCGGGGAGCCCACCCTGGCCACCTCCTCAGCATAAGCT +CTGTGGTGGGGTCGTTTTGAATCACAAAAGACAAACCTAGCACTCAGGAAATCCTCAGGG +TTTTAAGAACTCTGTTCTGGGAACCTGAGAGAAGACCGTGTGTTTTTATGATGCCATGCC +TTGGTTTAGGGTGCCTTTTGTATGTGCCCCTCCTGAGAAGCTCTTTGACCCTCTTATTTG +TATATATGTCATCTGTGGGCTGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGG +AGGCCGAGGCGGGCAGATTACGAGGTAAGGACATCGAGACCATCCTGGCTAACACGGTGA +AACCCCGTCTCTACTAAAAACATACAAAAAATTAGCCGGGCATGGTGGTGGGCGCCTGCA +GTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCAGGAGGCGGAGCTTG +CAGCGAGCTGAGATTGCGCCACTGCACTCCAGCCTGGGCGACGGAGCGAGAATATGTCTC +AAAAAAAAAAAAATGTATATATGTCATCTGTGATGTCCCCATTTAAAAATCTGTCAGAGG +GGAAAGTAGCTTTTCCCTCCGTTGTAGGGTGGTGGGGGTGTTTAGACAGGGGTGGCCATC +TCCTGTTCCTGGGAAGTTTCTATTTGCTGGTCCTCCCTAGCGAGGCTCTCAGGTTGTGTC +CCCGCCCCCTGCTCCTCCCAGCTGCCTGTGCAGAGGTGCCAACACCTGTCCTTGGACTGT +CCCCTGGACAGCCTGGGGCGTGGGGTAGTATCCCCTCCTTGTGTGGACTCCGGAGGTGGG +GTTGACATGTCAGGAGGGGCGGGTTGATGCCTGTGAGGTGTGGTGGCCACGCGTGTATGC +TACTTGATTAAAAAGGGAAAAAAGGAACAACTGGCATCATTTCCTTTTCACAAAGCTTGT +GGTGATCTCTTGGTATTTTGTTGCTAGCTGTGGGTTGGTGGTTCATGGCTATTAAATTTA +GGTATTAGTGACCAAGTGTTTTCTTTTTTTTTTTTCATCTTATTTTAAAATTTACATAGA +GTTAAATTCACTTTTTGGTACAGTACTGTGAGTTTTGACAAATACACAGTCATGTAACCA +ACACTGCAATCAAGATGCAGAACATTTCTGTACATTCCAACATTCTTCTTTGTTGGTCAA +ACTCTCCCACCTGTATCCTCTGGTGGGCACTGAACTGCCCCTGTAGTTTTGCCAGATCCT +GAATGTCCTCTTAGTGGAGTCACCTGGTACGTAGCCTTTGAACCTGGCCTCTGGCTTGGT +GTGGTGTGACGCATCCGTCGTGTGGTCATTTGTACTCCTGAGTGCTGGCCACCAGGTGGC +TGTGCCCCTCAGTGCGTCTCTCCATTCTTCCTTGAGGGACACTGACAGGTTTCTGGGTTT +TGGTGATGGGCAGTAAGGCTGCTGTAAGGTCGTCCAGGTCTGTGTGTGATGGGCGGATTC +GTTTCCTTCAGGTAAACCCTTGGGATGGGGTTGCTGGGCTGTGTGGCAGGTGCACGCTTA +TGTTAGCATTGCCCTCACACCTTTCTCAGTCACTAGGTGACCGTGGAAGTGTGTCTGTTT +TGGGGCTGTCTACTGTCTTTCGAGCTGTGTGTCCTTTTCCTGATGCCACACTTTCCTGAT +GACTTGAAGTCTTGATACTGGGAGTCCTCCAACTTCGTTATTCTTTCTCAAAAAATTGTT +TTGGCTATTCTAGCTCCTTTGCTGTTCCATGTAGACTTTAGAAACAATATGTCAGTTTCT +ATAAAAAATTCTGGGCTGGGTGCAGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGC +CGAGGTGGGTGGATTACCTGAGGTTAGGAGTTCGAGACCTGCCTGGCCAACATGGTGAAA +CCCCATCTCTACTTAAATACAAAAATTAGGTGGGCGTGGTGGCGGGCACATGTAATCCCA +GCTACTCAGGAGGCTGAGGCAGGAGAATCGCTTGAACCCAGGAGGCGGAAGGTGAGGTGG +CAGTGAGCCGAGATTGTGCCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACCCCATCT +TAAAAAAAAAAAAAAAATCTGCCTGCATTTTGATTGGAATTGATCCTGTAGATCAATCTG +GGGAGAATGGACATCCTAACAATATTGGATTTTCCAAATCATAAACATAGTTCATCTGTT +CATTTATTTAGGTCTTCTTTGATATCTTTCATGGGTGGTCGGCAGTTTTCATCACACAGG +CCCTGTGTATATTCTACTAGATTTATAGTTAAATATTTTATGTATGGCACTGTGATGAAT +AGTACCTTAGTTCCAATTGATTGTTTATTGCTAGCTTACAGAGCTAGAATTTGTTTTATT +TGCACTGATTTATTCTAGTAGTTTTTCTGTAGATTCCTTATCCAGTTTTCTTCTCTAATT +TGTTAATGTAGTTAATTACGTTGTTTTTTTAAAAATGTGTTCCTGGGATAAGTCCTACTT +GGGCATGGTATATGTTCCTTTTCATGTGTTAATGGCTTTAATTTGCTAGTTCCCTGCTGC +ATCTGTGTTCTTGGTGTTAAGGTCATGGTACGGGTAGTGTCATCAAGCGAAGTGGGAAGT +AGCCCTCCTCCTGAGTTTTTCAGAAGGGCAGGACAGATATAGGATTGGTATCATTTCTTC +CTTAAAATTTGGGCCTGGAGTTTTCCTAATGGGAAAGTTTTAAACTACTACTGTATGAAC +TACTAACTATAACTATGACTACTCTGAACTACTGTTGAGTTTCTTGAACAGGTGTAGGGC +TGTTGAGGCTACTGAGATAGTTTGTGTCTTGCCAGGCATTGGTCCATCTCATATAGATAG +TGGAATGTAGAGGTGTCGGGCGTCTGTGGTGCTCCCACACGGTCCTTTGGCATCTGCCAG +GCACTGATTGCTTAGGTTTCTCCTGTGCTGTTTGTCAGAAGCAACAGGGATTTGTCTGCA +GGTCACTTCTGCCATCTGTCTGCTCTCATGAGCTGGAGCCTCCACCTGTCCCTGGCGGGT +TAACCCTCAGTAGCACATGTGGCCTCAGGCAGCTTGGTGCAGGGCATCCTGGTGTCGGTC +TTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCCGGACTGCGGACT +GCAGTGGCGCAATCTTGGCTCACTGCAAGCTCTGCTTCCCGGGTTCACGCCATTCTCCTG +CCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCTGCCACCGCGCCCGGCTAATTTTTT +TTTGTATTTTTAGTAGAGACGGGGTTTCACCTTGTTAGCCAGGATGGTCTCGATCTCCTG +ACCTCGTGATCCACCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCG +CGCCCGGCTGGCGTCGGTCTTTTGCCAGAAACTTCTGAGCGTGCTCTCTAGCTGTGTGTG +TGTCAGCCACTGCCTCAGCTTCTCCAGGGCCAGTCCCGAAGCCGTGGGGTATGTGGGTTC +TTGTGGGCCGACAGCACTGGACGTTGCTAATTGGTTGGCTGGTTGACAAACCCTTGAGGT +TCCAGGGCCTGATGTCTTGGTGAGTGACCTTCCCTGGAAAAAGACCCATGTTGATTTGTG +CACTTGAGAGGAGCAGTGTCTTCTGGGGACCTATTTGAAGACAGTAGCCAAGCCATACCT +CGAGGATTTTTCGCTTGCCAACCTCAGGGCTAGTGCCTTCCTCTTCTATGCAGTTGCAGC +GAGTTGGCCTTGCTGCTTGCTGGAGCGGCGAGGGCAGAGGAGCTGCCCACTGCCCTGTCT +GTAGGGCCAGTGTCCCTGTGGAGCGGGCTTGGCACACTGTGGAGCCTGCACAGCGAGGCG +TGTGAATCATTTGATTCTAGGTTTCCTCGCTGCCCATGGGCCAGGCAGGCTCCAGGTCCC +TGCAGACCCCTCCAGAACAGGGCCAGTGGGGGAAGCACTGTGAGGGTGTGGCTGATGGGT +AATCTCCAGTGCTTCAAAGTGTGGTCTTTTTTGGAAAGAGGATGATTGCAGATAGAATTA +GGTTAACATGAGATCACACTGAAGCAGAGCAGTGGTGTGATCCAGTGTGTCTGGTGTTTC +TCTAAGAAGAGGGAGGAGGGGCAGGGCTAGTAGTCCATATGAAAATCGGAGCAATGCTGC +CATAGCCAAGGGATGCCGGGGCCACCAGGAGCTGGCAGAGGCTCCCTAGAGGCTTCTGAA +AGAGGATGGCCCTGCTGAGGCCTTGATTTCAGACTTCAGAATGGTGAGAGAATAAACCTC +TTGTTCTAAGCCACTGACTTTGTGGTATGTTGTCACAGGAGCTCCACCAGGGAGGTTGTC +CATGCTCCAGAGTGGGCTGGATAGCATCTTCCTTTCACCCAGGAGGCACTCTCAGCTTCA +GCCAAAGTCCTAGTCTGGGAAGGCACAGTGGGCAATGTCTCTGCACCTCAGCCTGCTGCA +GCTTCCCTATGCCTCACTGTGTGGAATCTCCCCATCACCTCTTGTTTCCCTGTCTGCCTC +TCTTGTCCTTCCTGTGTGTTCTTGGGTTGGCTGCAGAGCCAGGGTGATGACACTGTTGGA +AGCCTTGCTGGGCTGGGCCTCTGCTGGCCACCTGGGGCTGGGCTCTGTGTAGCTGTGTGG +GGCTTTGGCCTTTGGGGAGCTGGGCCTTTTAAGGCCAGCTTTTGGGTCTGGCTGACTTTC +CCTCTCCTGTAGTCTCAGTTGTGGCAATAGTAAAGCCTCTCTGCAAAGGTGCTTTATCTT +TATAGAGTTCTGAGAAGGGTGCCTTATGGATCAGAATCAGATCCAAATAAGGTGGGCCTG +GCCAGCCAGCCATGTACCTGGGGCCCTCTTAGGATACCAGACTCCATCCCACAGTGCTCA +GGGTGGAAGGGTGCTGTGGGCTCTGGAGATGGGCAACTGGACGGTGAGGTACGGAGCATG +TCTTGTACCCGGTGGGTTCCTTGCAGAGTTGAGGTATGAAGTGCTAGGCGAGCTCACAAG +AACAGACATTAAGCATCTGGGGAAAGCCAACCGCAGGCCCAGCCATGAAGAGGCCGGTGG +GCCTGGTGGGGACTCACAAGCCTCTGCTTTGTGTTGTAGTATTTTAACCACATCAGCATC +GAGGACTCGCGGGTCTACGAGCTGACCAGCAAGGCTGGGCTGTTGTCTCCATATCAGATC +CTCCACGAGTGCCTTAAAAGGTAGGGTAGGGGGGTGCCTCCCCCCATGAGTCAGGTCGGG +GGAGCTCCTCTCTGGCGTCTCATATTCTTGTGGCTGTTTGTCCCAAGGCAGAGGCATGGC +CAGGTTTCCCTGGGTACACAGCAGCCCCTTGGCCCTGGCCACCAGTCAGTCCCACAGGCC +TGAATCGGGCGTGTGGAGAATTCCCTCCTCTGGCTGAGATGCAGTCTGGGGAGAGGCTTG +GTCATTTCCTAAGGGCTTCCCAGAGCAGGCCTCCTCAGAGGCAGCTGGCTGCTCTGCTCA +GGGGACGCCATCTGTTGTCTGTTTTCTCTTAAAGAAACCATGGGATGGGTGACACGTCTA +TCAAGTTTGAAGTGGTTCCTGGGAAAAACCAGAAGAGTGAATACGTCATGGCGTGTGGCA +AGCACACAGTGCGCGGGTGGTGTAAGGACTCCTTCTCTGCTGCCTGGTGGCCGCTGGGCG +GGCGGCCCCTGGTGTGGCCCTGCGCCTCTGTGGCTTGGTCTCAGCTGTTGCCCTGGCATT +GTCCCTGAGCCCAGCTGTGTGTGCTGGCCACCACACGTTCAGCCTAAGGCGGACTGGCAG +CCGTCCTGCCTGCACTTTCCCTGTCTTTTCCCTTTCCCCCGCTGTCACCCCTGTCTCCGT +CCAGCCCTTGTCCCTGTGGCAGGAGGAAAGGCCCTCCTCCTCGAACAGCCAGCAGAATCC +TGCTGCTCCCTGTTTCTGGAGGTGTGCGTCTTGGCGGGAGGGAGGCTCTCGGCTGCGTGG +CTTTGAGTGGTAGTGTCCTCAGCTCCTAGACTCTGAGGGCCACTCTCTCAAGGTGGCAGC +TGGGTACTGTGAGACTCAGGTGCCCAGAGCAGTGGCAGAGTGCTGCCTATTCCTGTAGAA +TGTGCCCTGGCTGGCCCTCGGGGTGTGGGTCAGGAGGGCTGGGAGCGGCAGGGGGCCCCA +TGAGCACTGGGTGTTTGTGACCCCCTCTCCATGCTTGCTCTTTCTCTGTGTAGGTAAGAA +CAAGAGAGTTGGAAAGCAGTTAGCCTCACAGAAGATCCTTCAGCTGCTGCACCCACATGT +CAAGAACTGGGGGTCTTTACTGCGCATGTATGGCCGTGAGAGCAGCAAGATGGTCAAGCA +GGTAACTGGCCATCAGCAGGTCCCAGGGCAGCCTGTGCTGCCACCCTGGAGCGTATTCCT +GAGGCTCTGTGCTCAGAGGGGGCAGAGCTGGGCAGCTCTGCTGTTGCTCACAGCTGCCTC +TCTCCTGGGCCACTTGTGTGGCTTTGTGGGCAGGGGTGGGGTCGTCCCAGCTACATCTCC +TATCCCAGCCTGCTGGGCATGGTCAGTGAAGTGTGGGTGATTGGGAACGCAGGCTGGGCC +CAGCTATGTGGCTTGTAGGGAGATGGCCAAAGTGCAAGGCAGGGCCAGGCAGGAGATGCT +GAAGGTGCGCCCGGCTCGGAGGTGTGCAAAGGCTCAGGAGCCAGGGCCAGCTCTCCCTGC +TTGAAGGAGAGCGAGTGTGTCAGGGTAGCTCTCCCTGGTACCAGTGCCGGTCCATCTCTA +ACAGAAGAGCATATGTGTACCATCTGTCTTGTTTCTGCTGTTGTGGCCAACATCCTGCAG +TGTAAGCCAGTGCCCACTCCTGAGGGGTGGAGGGTAACATGGGGAGGTGAGTGTAATCAT +TTTCCTCAAATGATTACAGAAACTCTCTGGGTTCTTGCCCTCCTGGTCATTGGAAGGGGA +AGTAGTTGTGGAGGGGTTAGGCCTGCAGGTGGGGCAGCCCTAGGACTGTGTGGTGGGTGG +GCCCTCTGCCCAAGCCAGCTGCCAAGAGCAGGGCCTGCCTTTAGGTCCCTGAGACCGAGG +CCTCGGCACTGTCCCCGAAGAGGCCTTTACTCTGGACATGATAAGGGAGGGTGAGGGGGC +TCACACGGGCCACCAGGGCATGTTAAGGAACTTGAGAGAGATTTTTGGGAAGCAAGTTGA +AATGTAGGTAGCACTTGCTTCTGCCCTTGCTGTGCATGGTCCTTGGCTGTGACGGAGGGA +GAGAAGGCAAGCCCCACTCTGGCCCGGGGGTGGGAAACTTCCCTGAGCAGGAGGGGGAAA +GGTCCCAGCCAGGCACAGCCACAGCTTGGGGGAGCTCTGGCATCCCGAGGAACCACAGCA +CCCTTGGGGCTGGCCTAAGACGGGAAGGGGCTGAGGCCAGCCTGATGCTGCTCTGCATTC +ATGGAGTGTGCAGAGAGCTCAGTGCCTGAGGCTGATGGGCTGGGCTGCAGCGTGCTGTTC +TTACTGGCTATGGCCACACTCTGGTTTCACTCCTTGATGAGGACTCTGGGCACCACGAGC +ACCTCACCTCCCTACTCTGCCCATGGCAGGAGGCCATATAAGTGGCGTGGTGTGGGCAGA +AAGGCCTGGCAGCTATGGACTGCCCCTCGCTCTCTGCTGCTGTCAGTGGGCCTGGCATCA +CTGAGGCGGGCCAGTCAGCATGCAGTTATGTTGCCACAGCTCCTGGCTGTTTCGTGTCTG +CCAGACCCTGGGCGCGCCTGCCTTCAGGGTCCCAGGCACACAGCTGCTGGGCAGCGGGCA +GGCCGTAGAGCTACAGCCTGCAGTCCTGAGCGTGAGGTGCTATACTTCCCAGGAGACATC +GGACAAGAGTGTGATTGAGCTGCAGCAGTATGCCAAGAAGAACAAGCCCAACCTGCACAT +CCTCAGCAAGCTCCAAGAGGAGATGAAGAGGCTAGCTGAGGAAAGGGTGAGTGCCACCCT +AGCGGGAGGGCTGCCGGGCCACGGCCATTCCTGTGGCACCTGTGGAGGGACAGCAGACCC +CAGGACCCGTGCCTGAGCTGACACCTTGGAAATGCTTGGATCCTCCTTCCAAAATCAGAT +ACAGGCTTTTCTTTGAGCTTCGACATGTGTGATAGTTTTATAAATCACTTCAGTTTTGGT +TCTGTTGTTAGAGAGCTGGCAGGCCAGCCTGTTGGTGGACCATGAAGGTCACAGTCTTGG +TGGGCCACCTGAGTCCCATGTCCTCCCTGGGTGTTTGGAGGTAGGTCAGTGCACCTGCCA +GGGTTGAATCTGTAGGGCTTGGTCCTGATGAAGCCTCCTAAAGCCACCTTGGGAACCCCA +CTCAAAGAGGGGCCTGTCAGGCTGGCAGTGGTGTCTTCTGTCTTCAGGCATCGTCTGCCT +GGCCACGGCGTGACCTGCCCACCTGGGCCTTCTGGGTGGGAGCTTTCTTAAGTGTGGCAG +TCCCCTGGCCTGGGTGAGAGCCCCAGGAGGTATCTGCTGCCGCCCAGGCCAGCAGGTCCC +TTGCTGCTCCCTGGGGTGGGGCTTGGCCTCCTGGAGCCAGTGGGACTGCCTGGTTTGCCC +CTGGTCGGAAGAGGGGCTGTGGGGCTCAAGGGCAGCAGGTGCAGGACATAGTGCTCCTGG +CTGGGCCTGGGGCAAGCAAAAGCTGGGCAAGGGAGGAACACCCAATGGGGAGGCCTGGCC +TGTCCCTGTAACTGCACCCAAAATTCTTCACTTCTTGGTACATGTCCTGATTGCCAGGGA +GGCTGCACATGTTGTACTCTGATGGCAATGCAGGGGGCCTCTGCAATCTCAGGCTGGCCT +GCCCCAGGCCTTCCCTGCTCCTCCTAGCCTGGCATCACAAGCACTGGTGCCGTTGGGGCT +CCAGGGCCTCCTGGCATGATCTGCTGGGCTCTCCCTCCACCTTGTGTCTTCCCGAGCCTC +TGCCAAGCCCACCTCACTGGTACCCCTGACCTTTGTTGTTCTTTCAGGAGGAGACTCGAA +AGAAGCCCAAGATGTCCATTGTGGCGTCCGCCCAGCCTGGCGGTGAGCCCCTGTGCACCG +TGGACGTGTGAGGGAGGTGGCACGGGCCAGGGCGCGGGGGCCGCCAGCCGCACTTCTGAG +GAGACCAGCAGTCATGCATCGTGCACCACAGTGTCAGGCCTCCAACCCACGCTCCTTCCC +TGTGGCCAACCTGTGGGCCCGGCCTTAGGGTGGAGGCTTTAGTGTACAGGGACAGCCATG +GCCACACAGCACACATGTGGAGCAGCGGCTCTCCCTGGAAAGCTCCAGGCCTGAATGGAT +GGACTCAGCGACTGCACCAGTGGCAGCTGGTGACTGTGGACAGTGGTGGACCCTGCTTCT +GTGCACCTGCTGCAGGCTCTTTTTATGAAGGCTTTCATGAATTTTAGTATGTAATACGCA +CTGACGACACATGATGCTTGGATGACAGATGAGAGGGGATGGCTGAGTCCTGTGGCTGGC +CCGTGATGCCAGGTGGCCCATGTGCCCAGGGCGCCTGCAGGGCTGCTACAGGGACCTGGT +CAGGAGGTGCACATGGTGCCCTGCCCTCACCCACCCTCTGTGTTTCCCCTTCTTTGAAAA +GGTAGAAGAGAAAGGAATATTTTAAACCTTTTTGGCTTAAACAGAATTTTAGCATCAGAA +CTAGCTTTCTGGGATTGGAGGCAAACCATCAAGGTGGTCCCTCTCCAGTCTGGACACGAT +GCCAGCAAGGATGACGTCCTGCCACCTCCTGGAGTTACCCTGGCCTCCTAGGGTCCCTTT +TTCTGATGAAGTCTTAATTCCCTAAAAGCGCCTCTTTGGACACTGAGGCCCTCTCTGCCT +TTCCTGGCCTCCGGCAACAGTTTTTTACAAAGATTTTTTGCAGTCGAGTCCATATGTCCA +CCCATTGATTTTTAAAGCTTTTGTGATATTTTAGCATTTTGAAAGACTTTCACAGTGAGA +GTAGAAGGTAGATTTGGAATCATGCATTTTAGCAAGTGGACTTGTTGAAACAGGAAGCAA +GGGCCTTCAGTGTAGCCCATTCTTGATCCAGAGCTGTTGCCTGTGACAGCGGTTTCTCTG +GATGTCAAAGGCAGCTGCCTGGTGCCCAGCTTGCTTCTCGACTGGTGGCCCCTATGGGTG +GGTGTGCGATGGAAATGTGTTCCTGCCGGAGTCTGAGGCACCAGGGTGTGCTCAAAGGCT +GGCCCTGGTGGTGGACTGGCACCTGTGCAGAGTGCCGTGTGCTTGTGGTGCGCCATCTGA +AGCAAGAGTCCAGCGTTCTGCCGTGTCTGTCCCCCACCATGCCCCCTACAGGCGGTACTG +ATGGCGCTTTTTTTTTTTTTTCTGTCAGGAAAACAATGTTGGCCTGTGGGCCGCCCACAA +CATATCCTTCCCTCACTACCTGTGTGACCAAGGTTGGCTTCTGTTGACCTTTAAAAAAGA +AACCCTCAACTCAAATTGCTATAATTAGACACTTGCTTCTGTCTTGCCTCCTGTCTGCAG +CTGTGAATAGTCATTTGACTGTGACTGTTGCCCTTAGCCAGCCAGATGCGCCTGTGAACC +AAAGCTTCGTGCACATGTGTTCCCCTAAAGGTTGGGGAGCCTCGCTGTGTCTTGCTGTTC +CCAGGCACCACCACAGCAGGTGCTGCCATACTCTTGTGGTCTCTGTGCGCCCCCCCCCCC +CCCCCACCCGTCTGCCAAGCATGGGTATGAATCGTGCACACAGCCATGCTTCAAGGCCGG +GGCAGGGGAGCCTGTGCTGATGCCATCCAGGGCACTGGGCTGTGCCTGGAAGGCGAGCCT +TGATTGTCTGAACACATAAAGCAAACTGTCCAGAAGGGAATGGCTGATGTCTTTATTCTG +AGGGTGAGAGGCTGGCACCCTGTGGACCCTTGACTGGCCAGATCCCCTCCTGGGGCCTCC +CTGCAGGCTGGACCCTGCCTTTGTAACAGCTGAGCAGCACCCCAGCATGGCCCCTTCCTT +GGTTGATGCCCTCTCTGCTGGGCTGAGGGCCAGGCTGGCCACAGAGGGGCCAACTCTGCC +CAGTGTTCTCCACCCCATGCCCCCAGCTGGTGCAGTTCCCAGACCTCTCTCTGCCTCTCC +TCAAGTTTAAGTATCAAATCGAACATTCTTTTTTAAAGTAAGCTCTGCCCTGACTCCCCC +AGCCATGCAGAGAGGCTTGCAGAGCTGCCTAGGCCTAGTTTGGCCCTTTCCCTGGCACCC +AGGCCCTGTGTTCAGACCCCTGGCTCTCATCACAGAAACACCCTTTGTTGAGCAGTTGTT +TATTCTGGCCCTCACAGCCTTGGCTAGTCCACAAAGGCCCTGGGGATGGGCAACAGGCTA +CAGGAACCCACCTGTCTTCCTGGTCAGGGCCCCTGGCCCCTAGCAGCAGGCCAATCCTGG +TGGGGCACAGGGTTCTGTGCCCTTTGGCTGCCTACCTCTGAATATCCTGGCCAGCAAGCC +ATGCCTTCCCCGCCCCTGGGGCCCTGGGAGCCCTTCAGCTCCTGTCCCCATAATGGGTCC +TGGGCCTAGGATGAGGGGAAGGTCCCAGTTTCTTGTAGGGTGTTATCTGGGGGTCCTGGT +GTGGGTTGAGCTGGAGGGCTGTGGGGCCCCAAGACCCCTGTGCCATTGGGGTGCTCCACC +CTCTCAAACAGGATGAGCATCTCACAGTGCGGGGTCTGCGGGAACAGGTCCACTGCCACA +GCCTTGACCGGCCGGAAGGGAATGCCCTTCACCCGGTTAGATGGGGCTCTGCAGAGGCTG +TGGGGGGAAAAGGGGGGCGCTAAGGTCAGCCGATAGGCTAATCAGGGGCTCCTGTGGGAG +CTGGGGGCTTGGTAGCAGGCCTAGCCCCCACCCAGGCCCCTGCAGACACTCACTCCACAA +AGTTGCCCATGGCTGCCCGGGGGTTGCATGAGACGTACAGCAGCCGCCTGAGGTTCTTAG +CTCTCCGGATGGCCAGGATCACCTTGGAATCTGAGGAGGCAAACACCAGCTGGGTCCCCA +CAGCCAGAGAGTGCATAACATGGTGAGCCACCCCATGTGTCCTCGTTCCCGTGCCACCTC +CTTAAGCAAAAGCCACTCACGCAAGCCAGCACGGGGTGGGTCCAGGATGGCCACGAGGTG +CTGGGAGGCCAGTCTGCTCACCAGGGTGGGCACCAGGTCCTCGGCCCTCCCGCAGTGGAA +CTCCACATTACTCAACTCTGAAGAGATGGCACCGTGGCCTGTCAGAGGGCCACATGCCCT +CCCAGCAGGGCCAGCCCTGGGGAACCCCTAGCCAAAGAGCTTGCTCACTTGCTCTACCTG +TCGGGCAGCCCCCAAGCCCTGGCTGTGGCTGAGGCTTGCAGCAGGGGTGGCGGCTGCCCC +CATCCCCACCCCCACCCACGGCCTTGCCACTCACCATTGTCCTGGGCGTTCACCCGGGCG +TCCTCCACAGCCTCTGGGCATAGCTCGACCCCAATGACCCTCTTTACCTTCTGAAACAGG +AAAGCGTGGTGTCGCCCCACCCAGGGAGGGGAGTACATGGGGCCCTGTGGGCAGCAAAGA +GGAGCTGGGCCTGGGGCATGATGGAGTCAGCTGTGGTGACTTTGAGCTGGGGTCAATGGG +GGTCTTGGGGTGGGGAGAGGGTGCCCTCAGCAGGGCAGGAGGGCTCACCCGGGCCAGGGC +CAGGCCAATGGTGCCGGTGCCACAGCACACGTCCAGCACCATGCTCCCCGCATCCAATTG +GGCCCAGTCCTGGATGACTGTGTAGAGCACCTCGGCTGCGGGTGTGTTCACCTGGGGGCA +GGCGGTGCCCAGGATGTCAGTGGCTCCTCTCCTGTGGTGCCACTGGTGCCCCGACGCCCA +CATTCCTGGTCCCTCCATCCTGCCGGGACCTCCGGCGCCCACTCCCCACCTTGGTGCCCA +ACGTCTTCCCTGACCCCACCTTGGACCAGCCTTGCTGATGCCCTCATCCAGCCCCTGGCT +TTAAAAGCCAAGCCCGTGCTAAGCACAGCCTCCTCCTTGCCCCACTTGGTTGAGGGCTCA +TGAGGCCCCGCAGGCCAATAGGCTGTACCTTCCCTGCCCCAGCAAGGGGGCTTTGCCTCC +CACAACTCACACCGGGGCCCCTGGAATCACCCAACTGCAGCTCCCACCCCCACACCTAGC +CCTCCAGGAGGTCCTGCCAGGCCTGTCTCCAAAAACGGCCGCTCCAGGTCCCCACAGCCT +TCCTGCAGCAGTGAGCAAACAGTGAAACAGTATTTCCCAGCCCAGAGGCCACCATGGCCT +CCCTCCTGCCGAGCCCAGAGCAGTGCCCTGTGGTAATGCACTGAGGGCCACTCCTCATCC +TCCCTGTGACAAGGGCCACTGCCGTGTGTACCTCTAGGTTATCCCGAGTCAAATCAGTGA +TGACCAACCTCTTTCTTACTTCACCCTCCCCTGTGGCTCACCTGCCTGGATACTCCCCCA +AATCTCACCGCCTGAGCCCACTGTTCCCATCACGTCCTGATGGTGCGTCAGGCCTCCCTC +AACATGTCCCCATGCCCACCAGGGACTCATGGCTACCTGGAAGAAGGCGTGTGGAGAGAT +CCGGAAGGTCAGCCCTAGCAGGTCCTCGTGGATGCACCGGTCCCCAGCCACATGCTCCAG +GGGCAGGCCCTCCTGGCTAGGAGTCTTTCTGTGGGCGAAGGTGCAGGTCCTTCAGTGTCA +CAGTCCCTGCAGGGACCTGCCCCGCCCCACTCGGGCTCCTTACCGCTGTCCCTCCTCCAC +GAAGTAGAGGCAGGTCACTCCACTGGCCCTGCCTGGCCCTGCTGTGAAGTGCTGCGCTAG +GGAGGTCTTCAGCTCTGCCAGCTCCTCAGGGCTCAGCTTCTGGAGTAAGTGGTGAAAAGT +TCCCATCAGGCTGTGCTGAGGCCCACCTAGGCTAGGCACCCTCCCCCAGCAGGGCCCCCG +TTGAGACCTGGGGGTGGAAGTAGGCAATGGCCATGGCCTGGTGGCGGCGGCTGGTGCGCA +CAGTCAGCTGCTTCCAGTGGCCTGTGTACGTCTCTGGGTCGTATGCCGAGTATGGAGTGG +ACCTGTGGGAATCACGAGCTGGCCCAAGTGCCCACAACTGGGCAAGCAGTCCCCCTGCTC +TCTCCTCTGGCCACACCTTCCTTCATCTGGCTGAAATGGCAGGCACTCCAGAATTCCCGG +GGCAGGATCACCACCCTCTGCTCCCACACCGCATAGGACTTCCCGGGAGCCCCGTCACAG +GTCCTCACCGGATGAACTCCTGGAAGGCCTTCACCACCTGCTTGGTGGCTTCGGGGATGT +GCACGGTGTCAAACGGGGCTGCCACAGCACACGTCCCGCCCTTGTACTTGCCGAGCCGAC +AGCCCACGGTGTTATCCTCCCCATCCACCCCGACGCCAACCAGAAACTCACACTTATTAC +GATACTCAGTCTGCAGGGAGAGAGAGCTGCACCTTACCCTGGCTGCCCAGCGCTTGGGCC +TGGAAACCCTGATTTCCCCACAGGGGCTGGCAGTCAAACAAGAGGAACAGCTGACAACTA +TGTGATGTTACCAGAAGAAAACACAAAGCAGGGAGATCAACAGGAAGGGTGAGCAAAAGC +AAAGTTTTTCAAAACCCAGGATAGGGGTTTGTGGCCACCGAAGTCTAGTCTGACCTGCTG +GGGTGATGGCCTGACCCCCTCCAGCGGGCAGCAGGCCTTGTTGTGCTTGTGCCTCTGCTC +GAGCAGCCAGGGCAGCAAGGCACGGTTGGTGCTCCCGATTTCCCTGTAAGAGGAGCAGAT +CGGTGGTTGGACTCCACCTACTGCCCCCGACCCATGCCAGGCCACTCCCCAAATGTCCAG +GCTCCTGACCCCTCCTTTGGGATGTCATTGTGCCCACTGTGACGTGGGTGTCTTGCTGGC +TAGCTCCCAAAGGGGCCGCCAGATCCTGTGCTGGGCCTCCCAACTGGTGCATGGACAGGC +TGAGGAAGCACAGGCCGGGCAGAGCCCTGGCCAAGAGGGGCGTCTTGCGCCCACACTCAC +TTGGCAAGTTTCTGCAGCACCTGCTCGCACTCCAGCTGCTTCCGCTCAAGCTGCTCAGCA +TAGGGCACTGTCCATAGAGGGGTCACCACGTCGGCCACTCGTGTTACTGGTGGCTCACTC +TCACCCTCCTGTCGCCTCCTCCTGGCCATGGGGTCGGCCTTGGGCCGGGCCAGGCGCACA +CTGAGTGGGCGGCCTTTCCAGAGGGCACCATGCAAAACGCGCAGGGCCTTGTCCCTCTCT +GCAGCGCTGCGGAATGTCACAAAGGCGCAGGGTGGTTGCCCAAAGAGTTTGGTTTTGTGG +GGCTGCAGACCAAAGCGGCCCAGGAAGCGCCGGACGTCGCTGAAGCTGGCGTGGCGAGGC +ACGTTCTGCAGCTCCAGTTTAAAGATCTCAGAGGTAAACAAGTCATCCCTGATGTAGCTG +TAGAGCCCGGGCTGAGGCCCCGGCCCTGTAGCCGCCCCAGCGCCCTCTTTCTCCACCTCC +TCCAGGGCTGCCGGGGCTGCAGGGGGCACCGAGACGGTAGGGCAGCTCAGGGCACTGCTG +CTCTCCTGGCCACAGCTCTCCATGGGCTTCGGGCCCTGTGTGGGGACAGATGGGGTGCTA +GGGTGAGGACTAGGCCCTGGGGGCCTGGCCCCCCAAGCTTCCTTATGGGACACCCAGACC +TCCGTCGGTGCACTCTGCACTCAGCCCTGCCCCTCCCCCAGTCTACCCTCCCGACCCCAT +TCCCGTCTCCTTTCCTCCCCTATCGCACCTGCCTCGTCCCCACCTATTCTCTGGCAGGTT +TCCTGACCCACCCCGCCTCCTCCCACCCCATCCCCGTCTCTACCCAGCGTCTCCCCGCAC +TCTACCTCGTTGTCGAGGTTCTCACTCATCGCCCAGGCGGTTCTCCGCCTAGACCAGGGA +CGCCATGGGGGCCGCCTGGCCACCTCGTCCTGGGCCTGTCACAAGGGAAGTGGCCTGTCA +CAAGGGAAGTGCTCAGAGGGGAGGTGCTCACAGAGCCGGTGCAACGCCGCGAGGTCGCCG +CCACCCCCGGCTTCGCCCCAGGCGGGGCGGGACTCGAACCTGCGATGCTCAGGTCCGGGT +CTCAGGCTTGGGGCTGTACCGCCCGCCCGCCAGGGGCCCGCGCCGGCCGCTCGCTTCGCC +AGCCACTCTTAGTCCGCCAGCGCGTGCGGCGGAGGCCGAGCGTCTCTATGATCCTGGCTT +CTGGCAACGTCATCGTCACGCGCCGGATCCAACCCCCAACCACTTTAGCCAGCTCTAGAG +GCGCGCGTGGCCGGGACGGAAGTGCGCGCGGGTGTCGCCGGGAGTGCGCGCTCCTCTGGC +TGACGGGCGGGCCGGGCATGCGCCGCGGGCGTTTTGGCGGGAAGCGCGGGGCGGGCCGGA +CAATGAGAGTGTCCGCCTCCTGAGCCAATAAAGCTGTACTGGTTTTGAATCGCGGCGCGT +TTCCCGCCGCTGGGGTCAGGGGTCGAGGTTCGGGTCGTGGGGCGGAGGGAAGAGCGGGCG +GGCGGGAGGCGCCGGCGCCAGACGCGGAGGGAAGGAGCTACGAGTAGCCGCCGAGAGGCC +GCGGAGCCAGCGACGACCGACCCAGCCGAGCCGCCGCCGCCGCCGCGCCCCCATGGCGGC +CGCCAAGGTGCCGCCGGGCCCAAGCGGGGACAGGGTGGGCGGGCGGGGCCGGGGCCGTTA +TCAGCGGCCACGTGGGCGGCCCGCGCCGCCGCCACCAACCTCCGCCGGCCTGCAGGCTCG +GCCGTCTGCTGGGCCTCGGTGGCGCGGAGTCGGGGCGCGTGGGGTTTGGGGGCTCGAGGC +CTGCGGAGTTGGGGCGTTTGGGGGTGCAGGCTCTGCAGAGGCCTGGGGGCTGCAGGGCCC +CCGCGTGACCTTGGGGTGGGTTGGGGAGCGGCCGTCGCCACGCGGGACGGCCCCAGCCTC +CAGGAACTGGTGCGGGCGGCGGGTTTCCTCCGGCTCGGCCCACTTGGGGCTCCCCACTAG +TACGGTGCATTTATTCTTTCCGTCGCCCACGCAGCACTTCATTCATTCGGTTCTTACGTC +TGGAACCGCCACTGGCCTCTGTGGCAGAAGCTCGCGTTCCAGCGTGGGGCACAGGTCCTA +GATGCGCCGACCCAGGCGGCGGCTTTGTTGGCTGCGGTGTCTGGTTGAGTGCGGGCCGCT +GCAGAGCCGCAGTGCTGTGGGGAACAGTGAAGAGCAGCGAAAAGCTTATGTTTTCTTTTC +CAGTTTGAGTCTAGTGGTGAAGTCTTGGGTCTCTTACGGACTTCTGGGTGACCAAGGCAT +GTGTTCTGGAAGACTCGAACCTAAAATATGATTATTGGTGCTCTCTTATCCTCTAAATTG +GGCCAGGATTTCATGACTTTCACCGGTACAATTTATCTAGTTATTAACTTAGTTTTCTGA +ACTGTGAAAAAATTGCTGGCAGAGCTGTGAATCCAGTGTAAATACTAGCAGTCCTGTAGC +GACTTTTTTCGTTTTACTAAGCAAGAGCACAACTATTTCCTAATTAGTCAAAACTGAAAG +AGTTACATTTTGCTTTAAAGTAATATTTCAAAATCTATTTCGACCCATCATTGTGTCCTT +TGATCTTCACACAGCAGGTATCATCTGCATTTTATGGATGAGGAAGAAACTGATGCTCAA +AGAGATGAGGCAGTTTCTGCCAGGCCACTCTCTGCAAGTGTTGGCGGCTTGTAGGGTCCC +CCATCAGTCTGTGCACAGATCTGATTGATGTCTGTGAATCTGGGGGAGGGGCCCCAAGTT +CTGGTGTCTCACTCTGTGCTCACATGGAGCTGGGCATAAACGCAGAGCTTTCCTAGTGTG +GAAGAGTGGGCCTAATTTCTTGCTTTGGGGTTGGGCCCATCCTTGTATCTGAAGTCCCTT +CATTGTCGGAGGGCTGACCACTGCAGGCACTGGTTGGGCTCTGGTTGTGAGCCAGATCCA +TAGGCCAGCAGTGTAACCTCTTTGTATCTCCACAGGACACTCATGAGGACCATGATACTT +CCACTGAGAATACAGACGAGTCCAACCATGACCCTCAGTTTGAGCCAATAGTTTCTCTTC +CTGAGCAAGAAATTAAAACACTGGAAGAAGATGAAGAGGAACTTTTTAAAATGTAAGTAG +GCTGATGTCCCAGAAATAGGACTCTTTAAGGTTGAGGTTACAACTTTTATGGGTGTCCAG +GTTTTGGCCTGACTTTTAATCAGACTGAGGCTGCTTAAAGAAAGGGCCTAAAGTTGGTGA +CTAATCCCATGTGGAAATTTAACCCGTTCCTTTCATGAATCTTCACCAGATGCCCTTGCT +GCCCTGCAGCCAGTTGCATGAAGGGCATCCCATCTTGCTGCTCTGTCCACTGGCATGGCG +GCCCCACTGCCCTTTTTTGCCACTGGCTTCTGAGGATCCAGGGAGTCCCTTGGTTGAGCA +GGGGGTTTTAAGTTTTTGCAGCATAGACTTCTTTGGCATCGTGAAGCTTATAAATCCCCC +ACCCGGTGTCTGTTGTTGTTGTTGTTTTGAGATGGAGTCTCGCTCTGTCGCCCAGGCTGG +AGTGCAGTGGTGCGATCTTGGCTCATTGCAAGCTCCGCCTCCCAGGTTCACGCCATTCTC +CTGCCTCAGCTGGAACTACAGGCACCCGCCACCACGCCCAGCTAATTTTTTGTATTTTTA +GTAGAGACGGAGTTTCACCGTATTAGCCAGGATGGTCTCGATCTCCTGACCTCATGATCT +GCCCACCCCAGCCTCCCAAAGTGCTGGGATTACAGGCGTCAGCTACCGCGCCCGGCCCAC +CCTTAATATACAACATGATGCTTGCGATAACACATGAAAATAGGTGATGGTTTAGTTATT +AAAACCACTTAAAAAAATTCTGTACATGTGCTTCTTGACGCTTTCAGTTAAGAGCTGGCT +ACAGTGGTGGGTACCAGCCTTGTGTCTTGTGACAGTGTCACAGCACCACAGAGACTGTGG +CCTGTCGCCTACACTCAGGATTGCAACAAACATCATGTTTAGTTACACACCTCAGTGAAA +ATGAGGTTGTAGTTTTTCCCTGTTCAAGTTCATGGAATCCCCCCATCCTTGAAGTTGCTT +TAAGAGTCCAGGTTAAGGTTTTGTTGCTCTGGGGTGGGAGAGGGAAGCAGAGAGAGTCAT +TCCTACTGTGAGTTTCCTATCCAAGGAAGGGGGCCAGGAGGGATACCTGGGAAAGGAGAG +CTGTGTGTCACCCTTTGGTCACAGAGGGTGATGGACATCTCCCTGAAACCACTGGGTGGC +CATGGCTCCACTCTGCTGATGCATTGCCGTAGCCCCTGTTTGGGAGTAGCTCTTTGCTGT +GGTGAAGTGCTTGCCTCTGTTCCCTCACTGCCCATGTCGGGTGGGAGGTGCTCCTGGTGG +CCTGGGCCCAGGCCCTTCCTGAGGGAGGTGAGGCCAGCAGGGTGGGCCCGGGTCTTTGCA +GTAAAGCTCCAGGGGCATCCTGTTGGGGAACCCTGTGAGACAGTCTCTTGCCAGAATCAT +GTTGGGGCAGAGAGCAGAGTCGTCTGAGAGAGGCAGGCAGAGCTGTGCACAGAGGGAGGG +CGCCAGCCTGAGCCCTGAGTGCACTGCTGGATTCTGGGTGCCCTGGTCCCCAGCGGGAGG +GGCTGCTCCTGGGCAGGAGGGCCAGGTGTTGGCCAGGGCCTGTTGCCTGATTAGAGGCCT +GGGCCTCTTCTCTGGGTCATCTGACCTCAGATGACCCGCCCGCCTCAGCCTCCCAAAATG +CTGGGATTACAGGTGTGAGCCACCGCGTCCGGTCATCTAATTCTCTTGGTTGTGGAAGTG +GAATTCAAGATGACCTGGAGTTTTTTGGGCTGCTGTGCTTCCTGGTGGTGTCGTGACAGA +GGTCCTTGGCCTGCACCTGATCCATGTTGTCCCTGCAGATCTCTGGGGCTGGGGTGCTCT +TGCACAGTGACCATGGGACGGAGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGTGGCGCG +ATCTTGGCTCACTGCAACCTCTGCCTCCCAGGTTCAAGCAATTTTCCTGCCTCAGCTTCT +TGAATAGCTGGGGCTATAGGCGCATGCCTCCATGCCTGGCTGATTTTTTGTATTTTTAGT +AGAGATGGGGTTTCACCGAGTTAGTCAGGATGGTCTGGATCTCCTGACCTCATGATCCAC +CTGCCTTGGCCTCCCAAAGTGCTGGAATTACAGGCGTGAGCCCCCGCGCCTGCTCTCTCT +TTTTTTTTTGAGATGGAATTTCGCTCTTGCTGCCCAGGCTGGTGCGCAATGGTGTGATCT +CAGCTCGCCGCAACCTCTGCCTCCCGGCTTCAAGGGATTCTCCTGCCTCAGCCTCCCGAG +TAGCTGGGATTACAGGCATGTGCCACCACACCCGGCTAATTTTGTATTTTTAGTAGAGAG +GGGGTTTCTCCACGTTGGTCAGGCTGGTCTCGAACTCCCGACGTCAGGTGATCCACCCAC +CTCAGCCTCCCAAAGTGTGGGATTACAGGCGTGGGCCACTGTGCCCTGTTTTGTTTTGTT +TTTTTTTAAAATAGGGTCTCACTCTCACCCAGGCTGGAGTGCAGCAGTGGTATCATGGCT +CACTGCAGCCTTGAACTCCTGCGCTCAAGGGATCCTCCTCCCTCTGCCTCTTGAGTAGCT +GAGACTACGGGTGTTAGCCATCATGTCCAGCTAATTTTTATCTTTTTGTAGAGATAGTGT +CTTGCTATGTTGCCCAGGCTGGTCCCCACTGAGGGAAATATTGTGACTGTAGGAGCAGTG +GGGCAATCTCCACTCACTGTATCCTCTGCCTCCTGGGCTCAAGTGATTGTCGTGCCTCAG +CCTCCTGAGTAGCTGGGATTACAGGCGTGCATCACCACACTCAGATGATTTTTGTATTTG +TAGTAGAGATGGGGTTTCACCATGTTGGCCAGGCTGGTTTTGAACTCCTAACCTCAGATG +ACCTGCCCGCCTCGGCCTCCCCGAATGCTGGGATTACAAATGAGCCACCGCGCCCGGTCG +TCTAATTCTCTTGGTTGTGGAACCGGAATTCAAGATGACCTGGAGTTTGTTGGGCTGCTG +CGCTTTCTGGTGGTGTTGTGACGGGCAGTTCTTGGAGCCTGAAGAAGGGCTGGGGCGTTC +GTGGAGGCATGGGGTCCTGCAGTGCAGAGTTGTCCTGTGTCCTCCTGCGCAGGCCCTGCA +TGTGGGCTGCAGGTCTGCACTCTTAACCTCACGGCTTTTCTTGCAGGCGGGCAAAACTGT +TCCGATTTGCCTCTGAGAACGATCTCCCAGAATGGAAGGAGCGAGGCACTGGTGACGTCA +AGCTCCTGAAGCACAAGGAGAAAGGGGCCATCCGCCTCCTCATGCGGAGGGACAAGACCC +TGAAGATCTGTGCCAACCACTACAGTAGGTGGCATGAACGACCACCTCGACAGTCCCCAG +CAGCTTGGCCTGGGACCTTTGGGAAGATTCAGGGCTGATTGGGAACTGGGGAGCGTGTTA +TTTCATGGAGTGCAAGTTTGTGGTGTGTTTGTTGAATTTAAAACCAGAAATACGTTTTTC +AGACGTGCCTCTGAACTCAGAGCAGGCCCGCAGCGAAGCTTAGAGCACACGGGGGTGTGG +AGTCAGCCAGACGGGCCCTGATGGGAGGACGCAGCGCCCAGGCTGGGCTCGGGACGAGGA +GTGAGTGCTGCAGGGCGAGGGGGTGCTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG +TGTGTGGTGTCTGGGGGGTGGGGGTTGGTGTCTGGGGGGGGTTAATGGGGGGTATGTGTA +GTATCTAGGGGGCTGTGGTGTCTACTGGTGGTGGTGTATAGGGGTGTGTGGTTCGGTGTG +TGTGGTGTCTGGGGCAGGGGGCTGTGTGGTGTCTGAAGGTGTGTGGTGTCTGGGGCGGGG +GTTGTGTGGTTGGGTGTGGTGTTTGGGGGTGTGTAGTTGGTCTGGTGTCTGGGGGGTGTG +GTGGGGTATGTGGTTGGGTGTGGTGTCTGGCGGGGGTGGTTGGGGGTGTGTGGTTGGGGG +TCTGGTGTCTGGGTGTTTGGTGTTTGGGGTGTGTATGTGGTGTCTGGGGGTGTGTGTAGT +GTCTGAAGGTGTGTGGTCTGGGGGGCTGTGTGGTGTCTGAAGGTGTGTCGGGGCATGTGT +GGTGTCTGAAGGTGTGTGGTGTCTGGGGAGGTGTACAGTGTGTGGTGTCTGGGGGGCGGG +TGTGGTATCCGGGGTGTGTGTAGGGGTCATCTGGTGTTTGGTGTCTAGGGGGGTTTGTGG +TGTCTAAGGGGTGTGGTTGGGTGTGTGTGGTGTCTGGGGGTGTTGGGGTGTGTGGTGTCT +GGGGGTGTTGGGGTGTGTGGTGTCTGGGGGTGTTGGGGTGTGTGTTGTCTGGGGGTGTTG +GGGTGTGTGGTGTTGGGGGGGGTGTGTGGTGTCTGGGGGGGTTTGGTGTCTGAGGGGGTG +CGGTATCGCGGTGGGGGTGTGTGTGAGAGAGATTGGGGGTGTTCGGGCAGTGTAAGAGCA +GATGTGTGAGGTGCCCAGGCACCAGCCTCCCCAGTTATGGTGTGTTCTACCCTCACCAGG +ATCTTCCTGGGGAGGAAACGGGAGGGGAACGGGGCTGAGAAGGTGGCCAGGTCCTGGGCG +CCTCTGAAGCTACATAAGTAGAGGCCCTTGGGCAGGTTCAGCCAAGAGCTCCAGCCACCC +TGTGAGGGGTCCAGCCTGGCCTGCAAAGAAGTGGCAGTAGGTTGTCCTGAGTATGGCTGG +CTGCCAGCTGTCTTTAGATTGAACCTGCACTGCCCAGCTTGTTGGCATGTTTGGAAGTAC +ACTTGTCCTGGGGTGTCCCTTGGTAAGCTTCCTAGTGTCGTTTGCTGAGTGTCATGCGTG +TCCGGCGGGGCAGGGTCCCTCATCCCAGGTGCAGGATGACTGCTGGCAGGAGCCTGAGTG +GCTCAGCCCTTGAGGGCGATGCAGAGGGGTGGGTGGTGGTCTGCCTATGCCATGGGCACC +TCACTGCACGGCCTGGCCTGCCAGACTTCCTTGGGAACTGGCTCCTGGGTTCCCCTCCCT +CCCTGCTCAGCATGTCTAGTCCTGGGGAAGCTTGCACTCTGGCTGGTCCTCCTCCTCGAG +CCGTTTGAATCATTTCCTTGGGGGTCTTCCCTGATGCCCTGTGCTGCTCCCAGAGCCTGG +GTCAGGTTGTGGGTTGTTGTGTGTGCATCATAAGTTGGTTCTGGGCTCCTGGGCATCTGG +GCAGAGCCATCTGTGTAGCCAGCAGTGTCTTGTGCCCATCATGGGCCCTTGGTAGACAGG +GAAAGTGGCAGTGGGTAGAGGCTGGCTTCTGGGTAAGGTGTCGAAGAGGCTGTGGGCTGG +GTCGGGGCGGGGTGTGAAGGGGGCATGCAGGGGGCTTGGCATCCGCTGTGGCAGGCTGCA +GTAGACGAGACCCCCCCCCAGCTCCCAGTTCAGATAGTGACGTGTAGGGGTATCCTGGGG +GTGCATGTGGCCTGGACTGGGCATGGGCACCTGGGATGGTCTGGGTTCTCAGGGAGCTGT +TCTGAAGAGTGGTCACCTGCAGGGCCATCTCCCCTTGGGGCTGCCTGTGCCTGGGGAGTT +GGGGTGGAGGCGTCCATTGCAGGCCGTGGCTCTGTCTCCTGTGCCCTCCAGGCTTCAGTC +ATGGGGATGCAGGTGGGCACTCCTGCATCAATCATGGGGATGCATGGCCACGGCCCCCAG +TGGGCTCCCTTCTGGCAACTGCCCCACTTTCTAGCCTTCTGCTGCCCCCCACTTTCAGAT +AGGCTGGCTCTATGTGTTGGGTTCCCTTAGGAAAGACTGGGCCCCCTGGGCCTTGTGCCC +TGATATCTGACCCTGGGCTTCTGACTTCACGGCCAGGGTCAGAATCAAGGGGATCAGGGT +TGGGCTTGAGTGCAGCCCTGGGATACAGCTGGGGCCTGCTTTGCCAGCTCTGCTTCTCTG +CATTTCTGGAGGTGGGGGTCTGTGTGCCAAGTGATCTTGAACACAGCTCAGGAGCTCCTC +GTGGAGATGGCCTTACGGTCAGGGAACAGGTGCCCATTCCTATTTAGGATGGTCCTGGTT +TTAAGTGAATATGGAGGATTTTCAAAGAGGTTTGAATAAAACTCAGGCGCCGTGGTACTT +TGTCTAAGCCTGGTTCTCCAACCCCAGACTGTCCCCGTGTTGTCTGAAGCAGACCCCTCA +GGTTGGTGAGCAGGGTGCTCTGTGGCCGAGGGCTGGGTGGGCTGGCCTTTGCAGTCATCT +CACCATCTTCACGAGCTTCTCTCTCTTCAGTCACGCCGATGATGGAGCTGAAGCCCAACG +CAGGTAGCGACCGTGCCTGGGTCTGGAACACCCACGCTGACTTCGCCGACGAGTGCCCCA +AGCCAGAGCTGCTGGCCATCCGCTTCCTGAATGCTGAGAGTGAGCCAAGGGCCCTGGGGA +CCTGCCTGACTTGGGGCTCACCTGCGTGGCAGCGTGGCGGGTTATGTGCAACAAATGCTG +TTGCCTTTTGGGGAGAGGGGGCAGTAACTGGGAAGTGTGTCGTGTGGGCTGCCCTGCCCT +GCTGTTTGGGGGCCATGCCCAGACTGAAGCCATGAGCAGCGCCTTCCCCCTTAAACTCAA +AGCTGTGCCAAGTAGCTGGTGAACAGCAGTGCCCGCTCAGCCGCCTTGTGGCTGGCACTT +TGGTTTGCTCTCCAAGCTCCGGTTCCCATTAGTTGTTGCGGAGCCTGAGGCCCAACCCAG +GGCTTCCTCCCAGTGCTGATCCTCGTGCCAGGCTTCTGGTTCACTGGGGCCAGGGGCCCC +TCATGGTTGCCTGGGTGCCAGCAGGGGCTGGGCAGAGAGATCCACAGGCCCAGGCTGATC +TCTTCCCTGCTCAGAGGTCTTTCCAGTCCAGATCGTCCAAGCTGCGTCCAGTGGCTCCTG +GAGGAGTCAGTGGGTACTGTGGCCTGGCGCCTGGTAGCTGGCCGTTTGGTGCCAAGTGAC +CTTGTCCTTGCTGTGGGCGTGGCTGCCCCAGCAGCCTTTCCAACACGGTTGGGAGCCGAG +CCTGTGGCAAAGAGGCCCATGCCCCTTGGTTGCCTGAGGGTCCGTGCTCTACCGCTCTGC +CCAAATGGGTGCAGTCAGATGAAGGAGACAGAAGGCCAGGGCCGATGTCAGGTGGACCAG +TCCTTTCTTAGGTCAGCAATTACCATGAAATGGGTCTTCTGTGAATCCTGACTCTTCAGA +CCCGGCAGGGCATGTCTCTTCCACTGCTCACAGCTCTTAGGAAGTCTTCGCTCTCCCTTC +CACAGATGCACAGAAATTCAAAACAAAGTTTGAAGAATGCAGGAAAGAGATCGAAGAGAG +AGAAAAGAAAGGTGACGTGGTGCCATGGGTTGGGGGGCTTCTTTGCAGACTCACTCTGCA +TCTGACTATACTTCCAGGCGGGTGCTTTTTCTGTCTGCCAGATAAACATTCCAGGGTGCT +GTGGCCGCCTCACGTATCCAGAGTGATGCAGCTCCCTGGGGACACAGGTGCTTCCTGGGG +AGCCCTGAGCACTTGTCAGTGTTGCTGGCCTCAGTCCAATTGGGCAGGCATTGGAGTCCG +GGCACTGCCTGGAGCTCACCAGAAGGTGCTTTATGATGGTCCTCGGTTTGTCTCTCAGAG +GGCTTGGTCACCTCTGCCATGAGGCTGGACTGCAGCTGTGGTGGCAAGTGACCAGATGTC +ACTGAGCAGCCTGACCATGGTCTAGGCAGTGCTTGAGTGCATCCACCTGGCTTCCTTTCG +TCACTGAATTTCAAGAAGACAGACTCCAGGAGGCGGCTTGGCCAGGCAGGAGTCTGTCCC +GAGGGCGGGCAAGCCGCTGTGGGTGGGTGACGGCACTTGGGACCAGCCTGGCGAGGGCCA +TGTGCTTGAATGCACCGTGCTTCTGTTTTCTCACTGTGCTGCTTGTGTTTTTAGCAGGAT +CAGGCAAAAATGATCATGCCGAAAAAGTGGCGGAAAAGCTAGAAGCTCTCTCGGTGAAGG +AGGAGACCAAGGAGGATGCTGAGGAGAAGCAATAAATCGTCTTATTTTATTTTCTTTTCC +TCTCTTTCCTTTCCTTTTTTTAAAAAATTTTACCCTGCCCCTCTTTTTCGGTTTGTTTTT +ATTCTTTCATTTTTACAAGGGACGTTATATAAAGAACTGAACTCAACATTCAGGTTGTTT +TTTTTTTTTGTTTCTAAGTTTTTGCCCTATTGAAGATGACTTCAGAAAATCCATTCCCCA +GTCATGAAAATGTACTGTGCTAACTTTCTTTTCCATAGTGGAAACACTTATTTATAGTCA +TCAAAAATAGTGAATAAAAAACACATTTGGAACCTGGGCCAGATGGCAGGACTGTGTGTG +TACAGGCCCCTGCGCTGTGAGCGCCGCTCGCCTAAGCCTCCCGGGTGTATGTCCAGGCCT +GTCCGCAGCACAGGTGGGTGCTGCACCTCACGGTAGCCTGGGTTCCTGGGAGAGACGCCC +TGGTGTCTGAGGTCGTGCATGTATCTTGCTGTGGGCCTTGAAGCTGAGGTGTCTGAGCTG +GCGCTGGTGAGGGAGCCTGGCCAGGGGTGTCTGGTGCCGGGTGTGGTACTGATGCACGAC +TGTGGGCTGGGCAGCGTGGGCTCTGCCGAGGCAGGCTCCACTGTGCCTTATGGCCTGAAC +AGCTACTGCTTTTTTTCTATGGTGAGGAGGGGTGGGCTTTCTGTGTTTTGAGGAGAGAAA +GGAAGACCACTGGAGTACCTTGGCTTTGGAGCAGGGAGGCAGTAGCCAACTGAGCAGAGT +CTCTAGCATCGGCCAAGGCTGACTTTGGCCCCTGTGGCAGAGTGGCCCCTGTGGAGGGAA +AGGCCAGTCTGGTGTGCTGGGGGTGGGGGCTCGGCCCTGTAGCTCTTCTTTTGGAAAGCC +TATTGTACCCTGGCCTTGGAGAAAACTGGGGTCTTGGCCTCCTGGACTTGAAGTCCACAC +TTGGCAGTGTTGGAGTTATGGGCATCTGCAGTAACTTTAAATAAGGAGATATTTTCCAGT +ACGCCATTGGGGGTTGTCATGTGTAATGGATTGTCTCTGCAAATACCATCTTAAGTGTGT +GGTCCCAACTGCCATAACGTGCCCTGTCACCGTTTGTCATGAACACCTACCAGACGTGCT +TACTGTGAGGCTGGCTGCCTGCTGCCATCACTTGCTCTTTCGTGCCCTCCACCTGGGCCT +GGGCCCTGTTCTCTCTGACCTCCGTGTGCTGTGTCCTGTCTGGTGACTGCCTGCTGGTTT +AAACACCAGTCCTTTGTCTCTCTGGGGCAGGGGTCCCCAATCCCTGGCCTGCCGACCGGT +ACAAGTCTGGCCTGTTAAGAACTGGGCAACACAGCAGGAGGTGAGTGGTGGGCGAGCGAG +CTTTACCACCTGAGCGCCTCCCCTCAGATAAGCAGGGCATTAGATTCCTATAGGAGTGCG +AACGCTAACGTGAACTACGCATGTGAGGAATCCGTGTTGTGCACTCCTTATGAGAACCTA +GTGCCTGATGATCTGGGGTGGACAGTTTCATCCTGAAACCACTCCCCCACCCCTGGTCCA +TGGAAAAATTGTTTTCCACCAAACTGATCCTTGGTGCCAAAAGGGTTGGGGACTGCTGCT +CTGGGGGATGTACTTCCAAAGTCAACTACTCTCAGGTTTGTTTATTTTTACATAAATGGG +AACCACTTTATGGTCTCTAGCATCAGCCAAGGCTGACTTTGGTGGGTCCCTGTGGCAGAG +TGGCCCCTGCAGAGGAAAAGGCCAGCTCAGGAGAGAAGGCTGTGCCGACTCCTCATGGTT +TCCAGTGCCCTACATGTTCAGAGAAACGTCTTTGGTAATGAACTATGGAAATGGTCCCTG +AAGGTATGGTCTTTGCTCTAGAAAAAGCCCAACTGATGACTCCTCACTGGCATCTTGACT +GTGTTCTGTGAGCTGTGGCCCTCATGCTCATGGACTGCTTGCCTCCAGCAGGTTGGCAGG +GGGACAGTGGTGCACTTTGCACCCAAGAAGAGGTGAAGCCTGTGCCCTATGCCCTGTGCC +GAGTGAGGTCCTCGTGTTGGGAGGGTGACGCCGTCCACTGCAGATGGTGCCTTGCTGTGC +TGCACTCTTGGTGACACCCGGGGCCCCAGGTCGGGTACAGGTGTCAGTTGGAGGCTTTGA +TGTCTGTGAAAGGGAGGGCCCCTTGCACTGGCTGGAGCTTCCAGGAGAGAAGCCCTTGGC +TAGGACAGGCACCCCCCAGCACAGAAAGGATGTTTAGCATTCTGTATTTGATTTGGGGTT +CCAATAAAATTCCAGGCTGACTTCCTTCATCTGGGGATGGGAGAAATGTCTCCAAGGGAG +AGTCCCCAGCAGTGAGAAGGCCCCTCGGAATGGCTGGGAGACAGTGCCATCCTGTGGTGG +GGCTGTGTTGCAGGGTGCATTTGAGGATGCAGCTTGTTGACACTGCAGGATGCGATGGTG +CTGCGGGAAGGGAGCTGTGCTGGGAGACAGAATGCGTCCGCGTGCTGGCCCCACGGGACC +ACAGGGTAGGCTATACAAGGACGCCCATGTGGGCACTCGGGCCAGCCGTCCAGCCCAGTG +TGTTTCCATTGGCTTCCCTTTGGCAGACAATGGTGAGCAGAATGGGGTCCTGCCGGTGCT +GGGGACTGTGTGAGAATGCAGTGTGCTTCAGCCTTCCATCCCATGTCTGTCTGCCGGGCT +GCAGGCTGGGGAGCCTCCTGCTTTCTGCTTCATTTTCCTTTTCCCTGGGATCTTAGCTCT +TCTTGAGGAGGAAACTAGGCCATGGGCTTGGCAGCCATGTTGGGTGGGGAGGCCGGTGCC +AACCTCGCTTGGTTCTCTGTCTCCAGTAGTGGGTCTGCACTACTTGGGACCCCTCTGCCC +AGGCCTCTGTGTTAAGGAAAAGGGATCCCAAGGCCAGATGCAATGGTTGACACCTGTCAC +CCCAGCACTTTGGGAGGCAAACGTGGGAGGATCACTTCAGGCCAGGAGTTCAAGGCCAGC +TTGGGCAACATAGTGAGAGACCCTATCTCTACAAAAAAATATAAAAAAAATAGCCTGGCA +TGGTGGTGCACGTCTGTGGTCCCAGCTACTTGGGAAGCTGAGGCAGGAGGATCACTTGAA +CCCTGGAGGTTGAGGCTTCAGTGAGCCATGATTGCACCACTGCACTCCAGCCTGGGTGGC +AGAGTGAAACCCTGTGTTAAAAACTCAAGGGAAAAGGGATCTCACCAAGTAGGCCTGATT +CCGCTTCTTCCCACTGTCAGCATTTGGCAGTTTACTCCAGGGTTCAGCTGGTGGAGGAAG +GTGACAGTCCTGGCACCCTGCACAGAGCATGGGCATGGAGTCAGACCTGGATCCTGAGGG +GCTCTGTGGCATGTGCCACTAGAGGAGGCACTTCATGGGGGAAGGTAGGAGCAAGGGCAA +GGCTGGAGATGGGCACCAGGTGGCAGGTGTGTTGCCAACTGAGAGGCTCACTTGTGGCCA +CTGCAGAGCCCAGGTAACGAGCCATCTGCTCCAGTGGTGTCTGCTCTAGGCTCACCATTG +TGCCCTGAGACCCCTCCACTGCCTCAGAGCTACCATCAGTGCTTGGGAGCGCTCCAGGCT +GCACCTCCTGGAGCAGGGCACCCTGAGAACAGAGCTTACCACTACGGCTGCCTGGGAGAA +AGCCCATCACCATTTCTGCTGTGCAGTAGTTAGTTGCATTAAGGGCTGGGTCCCGGCCCC +AGTCCTGGGTTCCTCCTTCCAATAGCCACAACCCTGGCCTGCCCACCCTGAGGAGCTGCG +GCCGGGTGCTTCCCCAGGGGAGCAGGGGCCTGCAGGCCAAGGAGCCTCATGCCTGTGCCG +CAGGGTAGGGCCGCTCGGGGTTCTGGTTGCAGAGGAGCAGACACTCAGGGCTGGGGGCGA +TGGGTGAGGTCCCCCCACAGATCTCCACCTAGGGAGGGGTCTAATGGGTCTGGGCTTCAG +AACAGGGGACGCTCGGGGCATGTTGGCGTGGCCGAGGACAATGACCCTCACTGGGGACGC +TCCGGAGCTTGACTGAGGGTCTCCCGCTGGCACCGCCTCCTCTGCCGGCGTGTGTGTGTC +TGGGGGGGGGACCCACACGAGGCCCCGCCCCACCCCGCCCACAGCGCCGTCGGCCCCGGC +CCGGTCCACCTTAAGCGGCGGCGGGGCGGGTGGGATTTCCTGCGGGAGGAGACGCGCCGG +GCCTATCGGGCAGGGCCGGGGCGCCGGGCTTCCCAAAGGGCCCCGCCTCGGGGAGGATGG +GGGCTCCAGCCGACCGTCTCTCAAGCCTGCAGCTCGGGCCAGGACGTGGAGGGCCCGGCG +CCCGCTGGCCGGGCTCCTCCAGCCCAGTGGCCCGAGGGAGGGCTGCGAGGCGGTCAGAAG +GCAAACAGCCACGACGCCGCTGCGGTGTCACACCCGCGCCCCCTCGTGAATTCCAAGGGG +CTTCAGACCAGAGGGGTCGTTAGGGAAAGGGCTGCGCCCACAGACGCCCCCGAGGCCACC +TGCACGGCTCCGCGGCCCGGGACCAGGGGGGCCTGGCCTTCGCCGTGGTGCCCACCACCC +GGGCCGCAGGGGTGCGTGGGTGCCCGGCCCCCGAGAGCCGCGGGCGCCCGCCGGGCTGGG +GTCCCCGAGGCCGCGGGGTCCTGCCCCCTCCGAAGGTCCCGCGAACCGGTGGCGGTCCCC +GGGCCCCATCAACTCGCCCCGCCCGGTCCCGCCCCGCCGCGGCCCCCAGCGCCCGGTCCA +CCTTAAGGGGCGCGCTGACATAAGAGCGCAGCAGCCGCCGCCGCCCGTGGGGTGGGATTT +CCTCCGCCGCCGCCGCCGCCGCCGCCGCGGGTCCTGCGCCGCGTCCAGCCCGCCCGCCCG +ACCCCGGCCCGACCCCGGCCGGCCCTGCCCGCCCGGCCCCGGGGAGGGATGCGGCGGCGC +GGCGCCCAGGATGCCCCGCAGCCCCGGGACGCGCCTCAAACCCGCCAAGTACATCCCGGT +GGCCACGGCCGCCGCGCTGCTGGTCGGCTCCAGCACCCTCTTCTTCGTGTTCACGTGAGT +CGGCGCCGCGTCTGGGGGCACGCGGGCAGCGATGGGCAGGGCCCGCACAGCCCGGGCACG +GGGGCAGCGGTCGCGGGGGCTGGCGGGGCCCCGGGCAGTGGGCGGGCGGGGCGCCGGCTG +CAGGCGATGGCAGTCCCTCTCCACCCGCTGGGGCTGGGGGAGTTGAAGCGGTGTGCCGGC +GTCCCGGGCACCGGGGTGAGTCCCTTGTGTGGGGACACCAGTGCGGGCTGGGCACCTCGG +GAGTGTCCAGGCAGGCCCTGGCGGGGAGCGCTGGGCCCTGGCAGGGGAGGAGGCCGGGCT +GCCCTGGGTGTGGCTGCAGCCCTTCCCTCCCCATGGGACCCCCGAGGGGACGTGGGCCAC +CCACAACCATGAGTCTTCTCTGCAGGTGGCCGTGGGTGCCTCTCGGGTCTCCTCCTGGCT +GCCCGCCCCGGGCCAGAGTCAAGTTGTTCCCACGTAGGTGTGGCCTGGCCTCGTTGCCAC +GTGCCTGCCCCACAGCCACTGGTGACCCGGGCCTGGCCCTTGGCCTCCTTCTCAGGAAGA +CCCAGTGGTCATCACAGCTGGAGGGCCCAGGCCAGGCCACACCCAGGGCCCGATGTTAGC +ACATCACCTCCGTGGGCTGGCCATGGTGCACACCAGGACCCTTGAGGTCCTGTTTCTGGG +AGTGAGAGCCCGCGCCTGCAGGACACTGGGCCCCTCCAGCCTCAACTTCTTCACCCAAGA +TACAGGCTGGCTTAGGACCAGAGAGGCATGCGGACAGCACAGCCTCCCTCTGTGGCTGTG +GGGTCACCCCAGGGCTCCTTCAGGCACAAGGAGGGTTGGGGGCTCCAGGCTTCGGTTGCA +CAGATCTTCAACCACCCACCCCCCTTCCACCAATCTGGGGTTGAGACATAGCCCAGGTGG +CACGAACGATGGCCACCTGTTCCAAGGTGGGGCCACCCCTGAGCATGTGAGCCTGGAGAG +GGGCAGGTAGGGGCTGCCTGGCGCTCGTCACCTTTGCAAGAGTTTGGGGGGCCCCCACGT +GCATCGCAAGGCTGAATCGGGTGCCCGTGGAGGCGAGAGAGTTACCTGCAGGCTGGGGCT +GGGTGGGCACCGCAGGCTGCCAAGAGCATGTTTAAAAAAGGCCTGTGTGGACGGGCTTAT +TTCTGGCTCGGGCAAGCCAGTGTGTTCTTGTCACTTAGTTGCTGCTGGTGTGGGGCAGAG +AGAGGTGGGTGGGGGGCAGCCTGGGGAGCCTGGCCTGGGGGGACACATCCCACCAAGGCT +TCTGGAGGGGTGGGCCCAGCCCAGTGCTGGTCCCCAGGGCCTCTGTCCTTGGGCGGGGGG +TCTGGCTTGGTTCCTTACTCTGTCCATGCAATTCAGTGTCCTTTCTCATGACTTTAAAAG +TAATGTGTGGGCCGGGCGCGGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCCAAG +GTGGGCAGATCACCTGAGGTCAGGAGTTTGAGACCAGCCTGGCCAACATGGCGAAACCCC +GTCTCTACTAAAAGTACAAAAACTAGCCAGGCGTGGTGGTGGGCACCTGTAATCCCAGCT +ACACAGGAGACTGAAGCAGGAAAATCACTTGAACCCGTAAGGCGGAGTGAGCCGAAATCG +CACCACTGCACTCCAGCCTGGGTGACAGAGCAAGACTCTGTCTCAAAAAAAAAAAAAAAA +AAAGTATGTTCTTTATGGGGAACTTGTGAAATACAGAAAAAAAAAATGCTAAGAAATTAA +AATCAACCCCACACACCCCCCATCTCCTGTCCTCCTTGGAGGGCTCCTCTGTTCCTGGCG +CCGCGTCTGCAGCCTGGCTGTGTGGTGGCTGCAGCTGCTGGTTCCCTCACGCGTGATGTC +AGAGGGCCCTTTACGTGACTCTGACTGTGAACCTGACCCAGCCCTAGGCATGGAGTGCTT +GGGTGGCTCCCACTCTGCTGAGCACCAGTTTCCCCTCCTCTGTTCTGGGGTTCTGGTTTC +GGCGACACCAGCAGTCTTGAGGTATGGGGTTCCCTCCTGGGGCAGTGCAGGCCATGCCTG +CACCCAACCCTACACCTGGCCTATGCTGACCTCTGCCGGCCCTGCTCCTGGGAGAAGATG +CCATGTGTCCCTGTAGCAGGATGGTGGCCTGGGTGCTCACCTGGGTTTGATGTGCAGTGG +CCACCTAGCAAGGCTAAGAAGCCCTGCTTGCCCCCTCAGGGCAGGTGAGGCCACACCCCT +GGGCTGAATGGCTGTCAGTGATCCTGGCCTCTTCCCCGACCCGCCCCGCCTCTGCTGAAC +TCCTCCGAGACTGCTCAGTTCCTGAGAAGGGGTTCTGTTGGGTCGGCTGGCCCATTGATG +CCACCCATAGCAGCTGTGCTGATTCTGGGTAGGCAGGGGTCCAGGCGGCCCCCTTCTTGG +GGCAGGCCACTTGTCTACCCGTTGCCCCACCTCCTGCCCTGTATCTGGTCCAGGCACACA +GGAAGGGGAGCAGGTTGCTCTTGTGCCCCGAATCCCTGCCCTTAGTGCCTATACAGGGTT +CCCGACTTCACTGTTTGCTCCTCGGACGTTACCAGACAGTGAGTGGCCCCAGCCCCCTCT +CTGCACTCAGTAAACATGAGTTGCTGCCGGTGGGGTGGCTTCACCCATGCACCAGGTGGG +CAGGAGTTCTGGGTGTCATGTGGTGTGGGGAGCTGGAGGCCTGAGAACCAGGGACCCCTC +CCTCAGCCAGATCGCCAAGGGTCTTGGGACCAGCAGGACATTCACACATCTCTCTAGGAG +GGTCTGGGCATGGAAGGATGCCCAGTTGTGAAGAGCACCCTCTCAGGATGGGGCACATAC +AGCAGGGCAGTTTCTCAGAGGCCCTCTTCCTTCAGGAAGCCTTCCTAGGATTTTGTTGTT +GTTTGTTTGTTTGAGACAGGGTCTCACTCTGTCACCCAGGCCAGAGTGCAGTGGTGCTAC +CTCGGCTCACTGCAGTCTCGACCTCCTGGGCGCAAGTGATCATCCCACCTCAGCCTCCTG +AGTACCTGGGATGACAGGTGGGCACCCTCACACCTGGCTAATTTATTTATTTTTTGTTTT +TTTGTTTTTTTTTAGTAGAGATGGGGTCTCGCTATGTTGCCCAGGCTATTCTCCTGAGCT +CAGGTGATCTGCCAGCCTTAGCCTCCCAAAGTGCTGGGATTACATGTGTGATCACTGCTC +CCGGCCCCTTCCTATGTTTCTGGGGGTTTTGTATCCCCCTGGGCTTCAGAAGCCCTTTAT +ACCATGGTATAATGCTAGGCTGGGTCAGCTGCTGGCAGAGGGTTCGAAGACCCCTAGGCC +AGGCTCCCGACCCTGCCCTGACTTCACTCCAGAGCCACATTGCAGAATGACAGGGACCCT +GTGAATGTCCGAGGGCTGGGGCTGCCTGGCAGGCTGCCAGTGCCTGGGGCTGAGCCAGGC +TGGCTTATCTCTGTCTGGGGCATCTGGGATGGTGTGGCTGGTCCCTAGATCGTCCTGAGC +CCTCTCCTTGCCTGGAGGCCCAGCTCAGGACAGCAGTCAGAACAGGCCCTGGGCCAGACT +AGCCTTGGTGGGCCAGGAGGGACTGAACTGCTGACTGGGACCTCATGAGTCTGGGGAACC +TGGGAGTGAAAACTCAGTGTGAGCCCCCCTGCTGGGACCCACCGCATGTGGCCTGGGGTA +TTCCTGGCCCCCGAGCCTCAGTTTCCCCATCTGTCCAGGTGTCTATAGGGATTGGACAGG +TAGGTGCAGGTACTTCGAGGCCCAAAGGCCTGGCCTGACAAGGGCGCAGCAGCTTCTGCC +CCGCCGGTCCCGAGCGCCCCACGGGCGAGTGCCACTCATACACAAACAGTGCTAATGAAG +AACCTGATTTAAATGAAAACGTCTGAGTTAAAATTTTCCATTCCACATTTAGCTGTGGCG +GACATTCTGCCCCTAAGCCACCCCTCTTGCTCGGCGGTATCTCCCTGCCAGCCCAGGCGG +GACTTGGGGCCTCAGCCCGGGTGGGGCCTATGGTGGTGGTGCATGTCCCTGCCTGTGGCC +CTGGAGGAGGGATTGGAACCTCGGGGCTGGGCCCCCTGGACCTGCCCTCCACCCGTAGCT +TGGCTGGAGCCAGCCAGGAGGGGCTCAGAGGGACATGGCGGGCCAGACACTCCACCTCCA +GCCCAGGCCCAGAGCCTGTCTTTGCCTGGCCCTCCGGCAGGCGAGCCTGGTGTGCACCAT +CAGGTCTGGGTGGGAGTGGAGCCTCCCCGAGCCTGGACCCAGACTACTGCCTGCTAGGCC +CTGCCTGTGGGAATGCGTGTCTTTGAGTGTCCCTGAGCATCCACACATGGCCTCCCCTGT +CCCCATGTGTCCTCATGCATCCTTGCATGTCCTTGTGTGTCCTCCCGTGTTCTTGCGCAC +CCTGAGAGTGCTTGTGTGTCCTCACACTTGCCCTATCCACAGAGGGGCAAAGGGAAGCCC +AGAGAGTGGGAGGGCCGTGTCTGAGGCCCAGAGTGAGGGGCAGCCGAGCCGAGGCTGGAT +AGGGCTGGGTGCAGTGTGAGGAGGCATGTGTCTATGCATGCCTGTGTTCACGCGTGTTCA +CACATAGGTCTTGGTGGGTGAGTGATGTGCAGGCGTGTCTGCGTTCATGCATGTGCACCA +GTGTGTGCACGTGTGCATGTCTGCATGCAAGTGTTCATGCATGTATATGTGTTTTGTATG +CAGACATGATTGCATGTGTGCATGTGTGCCTTCTGTGCACCAGAACAGGTTGTCTGTGTG +TTTATGTGTGTACATGCCTGTATATTTGTGTGTGTGCACACATACCCATGCACATGTGCC +CTGGAGCATGTTTGTGTGTGTGTCCATGTCTTCACACGTGTCTGTAGGTGCCTGCGTGTG +CTCCTCATCTTCTCTCCCACTGAGCCCTGAGAGCTCTGGGCCCTGGTGGGTTGGCCTGTG +AGCTGGTCCCTGTTAGGCAGGATCTGACCTTGGGGACTACAGAGAGGACAGTTGCCTCAC +TCTGGAGGACTGGAGGAGTGGGCAGCAGCTCAGGTCCCAGTCAGAGAACCCACAACAAGC +CTCCCCAACCCCTGGATTAGGGCCCATATGGGCCCTCACAGGCATGTGGCAGGTACCCGC +ACTGGGCAGGAGGGACCCGGTCACACTGCAGATTTCCCAAAGTAGCCCCACGGCTGCAAG +GCTGCTGTCCAGCCCGCCTGGTGTGGGGGAGTCCAGGCTCAGGGAGGACACAGCAGTACC +GCCTGCTTGGGCTGGCAGGGAGCTCAAGTCAGCATGGGGTTCAGGCCCCGCCTGCCTGAT +GGGCGAACACACCCCCAAGGCCCCCGTTAGGGCCAGGAGGCAGGCTGGCAGCCAGCGCAG +GTGCAGAGGGGCCCCGAGGCTTCCATCCTGAGGTCAGCACTGGGAGGACAGAGGCTGCCT +CCTTGCAGCCCGAGGGTTGACCAATGGCACTCGCCGCATCCCTTCCCAGAGCAGGCCAGC +CCCACGGGCATCATGAGGGCCTGCGTTTGGTTTAATGCTCCGCTGTCAACATCTTGAAAT +TCTCTGTAATTTTTAAATAAGGGGCCCGCAGTTTCATTTTGCACAGACTATGTAGCCAGC +CTTCCCCAGAAGGCTCGTGGAGAGCTCCACAGGCCCTGCCTCTCCCTGGGGGAGCTGCTC +ACCTGCCTGTGCCCATCTTGTTGGGCCGACCCTGCCTTAGGGGCCTGTGGTCCTGGCTGA +ATGGCGTTGCATTGGAGGCCAGGCACATGTTTGAGTGCTGCCTGTGTGCCAGGCAGGGCT +GGGGAGGGTGGAAGGAAGCCTGGGGGCTGAGCCACTGGGCAGGGACAGGGAGAGCCCTCC +CAGCAGAGGGAGGTGTGGCCGCAGCTGTGTCCTCACCCTGGTGTCTACCTCCACGGGCTG +GGCCCTGGACAAGAGGAAGGGTAACTTGGGCTCCCAGCCCCGCCGAGGATGGCTGTCGTG +GGAAGCGGAGTCTTTAAATAGCCTCTTGCTTTCCACTAAAAATAGTGAAGCTCTGCTGCC +GTGGAGGTGGTTGCCGGGGCGGCGGGGCTGGCTCTTGGCCAGGCGGGCGTGGGTGCAGCA +GCGTGGCCACGTGCGCTGGAGTGGTAGGCAGCGCGTGGGCCCCATCCACTGCTAGTGGCC +ATAGAGGTCAGGGCTGCTGTGGGGGATCCCCACTAGCCCCCCGACTATGCAGAGGGCCTG +GGTGCAGGGAGGGACGCAAACCCCTGTGTTCCTGTCCCTGCTGCTTGTCACAGCAGCTGT +CTCAGGGTGCGCAGGGGAGAGACAAGGAGCCTGAGGCCAGAGGGGCTTCTTGGAAGCTGA +GTGGGTGGATTTAGGAACCCGCTGCAGGCCGTGTGGCCCGAGGACCGGCAAGTCTCTTCA +GAGGGGACTCGGGCCAGGCCCGTTAAGGGCTGAGGAAGGCGCCTTTCAGGAAGGTGTGTG +AGGGCACACCTTATAGGGAAGCTGCAGAGGGCAGCCTGGAGTCCTGGAAAGCTTCCCGGA +AGAGGGGGTGGCCTTGGAGCCAAGTGCTGATGGCTGGGAAGATGGGAAGTGGTGGGGGGT +GTTGTGCATTCTAGAGTCCCTAAGTGTTCTACTTTGTCATCTGGGGAGGGAGGTGACAAA +CTAAAAAGAAGGGGCTGTCTGGATGGGGATGTGCCTGACTGGGAAGAGTAGGGGAGACAT +GCCCCATGCACTTACTGACCAAGGACTGGGGCTCCCCACACCCCAGAATCTGCCTGGTGA +GAGGCCCCACTGGGCCAGGGACAGGCCTGGATGAGGCATCGTGGCGCATTTTCCATCTGG +GGCTCCCACCTCCAGCCTGTGTACCGCCCCCGCCCCCTACCCCGCCCCAGTCATGGAGCC +TAGGTCGCTGGAATGGGGTGACCTTAGGTCTGTGGGCCTCTCTGAGACGGCTGCCCTGAG +AGGTTGCCGGGTCAGGGCACGGGTAGCCCACATGCTGTACTGATACGCACAGCCTCTGGC +CGTTACTCGAAGGAGCGATGTGGCTGAGGTTTCTTGGCAGGGCTGTGGCAGCCTCCCCAC +TGCCTCTGAGCTCTGAGGGTGACCTTGCAGGCCCAGCCTGCCGCACCACATAGCTCTGCG +CCTGCATCCGCTCTGCACCCCCAGACTCCACTCTCTGCCCCCACTGTCTACTGCAGGTGC +CCGTGGTTGACACGAGCTGTGTCCCCAGCTGTTCCCGTCTACAATGGCATCATCTTCCTC +TTTGTCCTGGCCAACTTCAGCATGGCCACTTTCATGGACCCTGGTGTTTTCCCCCGAGGT +AGGGCCCTGTGCTGCGGCAGCTCCTCTCACTTTCACTAGAGTGTGAGTGGCTAACTTGAG +ACAGCCTTAGGGATTCCTGGTGGGTGGGCTGGACTTGGGGGAGGGATTCACCTGCCAGGA +AAGTCAACTTCCTGCTCACTGCCTGGCTCCTGGTCTGTAGCGGATGAGGATGAGGACAAG +GAGGACGACTTCCGGGCTCCGCTGTACAAGAACGTGGATGTGCGAGGTATCCAGGTCCGC +ATGAAGTGGTGTGCCACGTGCCACTTCTACCGCCCGCCGCGCTGCTCCCACTGCAGCGTC +TGTGACAACTGTGTAGAGGTGACCGCCCTGCTGCCCCGCGCTCCCCCAGCCCTGTGCCAC +ATCCCTGCCTGCCATGTGCCCTGATGCACTGCTCCCGCCCAGGACTTTGACCACCACTGC +CCCTGGGTCAACAACTGCATCGGGCGTCGAAACTATCGCTACTTCTTCCTGTTCCTGCTG +TCACTCAGTGCACACATGGTGGGCGTCGTGGCCTTCGGCCTGGTCTACGTGCTGAACCAC +GCTGAGGGGCTGGGAGCCGCGCACACCACCATCACGTATCCTTGGTTCCTGTGGGCCTCA +TTGCAGAGGCGATGTGGACCGGGGACACGTTCACCAGGGAGATTGAGCCTCAGAAGGCTT +GGTTCCTGCCCAGGGATCCCCAGTGGGCAGGTGGCCGTCCCTAGGTTGGGAGGAGGTTTG +TCCACAGGCACCTGCTCTGTGCTGCTGCGATGGGGTCTGCGGTGTCCTGGCCTGCACCGT +TGGCCTTAACGGGCTAGCATGGCTGTCATGTGTGTGGCCGGCCTCTTCTTCATCCCTGTC +ATTGGCCTCACTGGCTTCCATGTGGTGCTGGTCACTCGGGGGCGCACCACCAACGAGCAG +GTGCAGACCCCATGGGGGATGGGTGGCCCCAAAGGGCCGAGAGAGTTGGGGCTGCATGGG +GACAGGGTGGGGGCTGGGGCACCCTTGCCTGAGGTAGCTTGTGCAGCTGTGTTGAGACGA +GGCTCCAACTCTGAGACCCCCACGTGGGGCTACGCCTGCCCCGTCCCCTGCGCACATCCT +GCCTGTGGACACTGTGGCCACCAGTGACCCAGAAATCCCACAGCTCCCTGCAAGTTCAGG +CTGGGTAACCTATGTGAGGGGCAGCCCTGGGCTGATCCCACCTAAGCCACTTCCCGCAAG +TATCAGCTTCTAAGGAACCCCAGCTCCCTTGCCCAGCCCCCTGCCCACCCTGGCCTGGAC +CTTGGGAGGCCAGGCTGGCTGAGGGTCCTGCATCCACAGGTGACTGGGAAGTTCCGCGGG +GGTGTGAACCCTTTCACCCGAGGCTGCTGTGGGAATGTGGAGCACGTGCTGTGTAGCCCC +CTGGCGCCCCGGTGAGGCCCGGCCTGGGCAGGGTGGAGGGGGGCCTCTGCTGGGTGTGGG +GCGGGCAGCCTTAGACCCTCTTGGTCCGTTTGGCTCTTGCCAGGTATGGCCAGCCCTGCC +CTTGTGTGTTTGTGGCAGGTGGGCTGGCTACTGACCCCTGTGCCCTGGTGCAGGTACGTG +GTGGAGCCACCCCGGCTGCCGCTCGCGGTGAGTTTGAAGCCGCCTTTCCTTAGGCCTGAA +CTCCTGGACCGAGCTGCACCGCTCAAGGTCAAGCTTAGTGACAACGGGCTGAAGGCTGGC +CTGGGCCGTAGCAAGGTGGGCGCCTGGGCTTAGGGATGGGCTGGCAGTCAGGCCCTTGGA +TGGGGAAACAGGCAGGTTGGTGGGGGCCTAGAAGAGGTGGATGGGGCAGACAGAGCCGTA +GACAGATTCTTGGGAGGGGGCTAGGTCCCAGGCTGAATCCCTAGTGAAGCCCTGCCCCTC +ATCCAAGCCCCACACACCACTGACCCCGCTCCCTCCCAGTCCAAGGGCAGCCTGGACCGG +CTGGATGAGAAGCCACTGGACTTGGGGCCACCACTGCCCCCCAAGATAGAGGCTGGCACG +TTCAGCAGTGACCTGCAGACCCCGCGCCCAGGCAGTGCTGGTGAGGTTGGGGCAGCCACG +ACTAGGGAGGGATATGGGTTGACCCTCCTCTGACCTCAGTCTGACAGTGGTCTGCATCCC +CAGAGAGTGCCCTGTCGGTGCAGAGGACCAGCCCCCCGACACCTGCCATGTACAAGTTTA +GGCCGGCTTTCCCCACGGGTCCCAAGGTGCCCTTCTGTGGACCAGGCGAGCAGGTAAGGA +GGCCTAGCCTGCCCCCTGGCAGGCCTCGTCCCCCAGGCCCGCCTCTAGGAGCTCGCCCCA +CAGCCTTCACCCCGTGAAGGCCCCACCTCCAGAGCCCCATCTCTCCAGCAGAGGCCACAT +CCCCTGAGGCCATGCCCCTCAGGGCTCTGCCTGGGTCACCAAGGGCCTTGTGTGACTGGT +AGGACCCTGTCTGTGTGCACCCAGAGCTGGGACCCACAGAGGGGAGCAGGGTGTCCTGGG +AGGGGCTGGCGTGGGCCATCCAGGCTTGGCGTCTGTCCCTTGGCCAGCATTGCCCTAGGC +CAGGTGGGCGGCTGCCCCCAGGAGCACTGTGTACCAGGATGCCTGGAATTCCACCATTAG +TCACTCTGTAAGGCCCACGGGAAGACTTTGGAAGAATTTGAGACGTGCTTGAGAAAGCAC +ATTTCTCTTCCCTGAGTTGCATGAGTGCTCTGTGGTCACTGGAATGGTGGGCAGTGCGGG +GCGGTCTGTGCAGTGATCAGGAACACCAAACAGTCCCAGGGTGCCATCCTTCCTTCTGGG +CCACCCCGAGAGGCAGGCCTGTGACTGGGCCCTGTGCACCTAGACCAGCCCCATCCCCAG +GGACCTCTCCCCTCAGGACTGATGTCTTTCCTGGAGAGAGTCACTGGGTGAAGCAGTGAG +TCTGAGCCTCCTTGGCCAAGGGTCTGAGTCTGCAGGCATGTAGGGGCCGTGCCTCGCAGG +AGGAGGCATCCCTGTTTGGAGATGAGGGCTCTGGGTATGTCATCTTCCTGGGACTCCTGG +GTCCAGTGGGGGATTGGGTCTGGTTGGCCAAGTGCATTGAGCCCACATTTGCCCTGATCC +ACTTCTGGCTTTGTGGTTCCTGAGGCTCTTGTCCCAAGCCCAAGCCCAAGCCCATGCCTA +TGTGCCCGTCACAGGGGCTAGACGAGGCTCCTCCTCCCTCTGGCTGCCCTGAGTGGTGGG +CCCCTGGGGCTGCATGGCTCCAAGGGCAGAACCCCAGGTGTCTGCAGTAGCCAGCTGTGG +CCTCCCTCCTGTTGCAGAGCCCCTGCCCCTCAGCCGTGGATGCTCAGGGACAGGGCCATG +GGTCACTATGTGGCTCTTATGGCTCCTTGAGGCCCAGTTCCTGAGGCCACGGTGCCATGT +GTGGCCCCCGTGGGTGCTGACTGGCGGCTGCAGGCGGGGTGGCAGGTGGGCAGTGGTGAG +AATGCCTTCTCCCTACAGGTTCCAGGCCCTGATTCCCTGACCCTGGGGGACGACAGCATC +CGTAGCCTGGACTTTGTGTCCGAGCCGAGCCTGGACCTCCCTGACTATGGGCCAGGGGGC +CTGCATGCAGCCTACCCGCCATCCCCACCGCTCAGCGCCTCTGATGCCTTCTCGGGCGCT +TTGCGCTCCCTGAGCCTCAAGGCCTCGAGCCGGCGGGGCGGGGATCATGTGGCCCTGCAG +CCCCTGCGCTCTGAGGGGGGGCCCCCCACGCCCCACCGTAGCATTTTTGCCCCCCATGCA +CTGCCCAACCGCAACGGCAGCCTGTCCTATGACAGCCTGCTCAATCCTGGCTCGCCTGGT +GGCCACGCCTGCCCTGCCCACCCAGCAGTTGGCGTGGCCGGATACCACTCACCCTACCTG +CATCCTGGGGCAACGGGCGACCCGCCACGGCCCCTACCCCGCAGCTTCAGCCCCGTGCTG +GGCCCCCGCCCCCGGGAGCCCTCGCCTGTGCGCTACGACAACCTGTCCAGGACCATCATG +GCATCCATCCAGGAGCGCAAGGACAGGGAGGAGCGTGAGCGCCTGCTGCGCTCCCAGGCC +GACTCACTCTTCGGCGACTCAGGCGTCTATGACGCTCCCAGCTCCTACAGCCTGCAGCAG +GCCAGTGTGCTGTCCGAGGGCCCCCGAGGTCCCGCGCTGCGCTATGGCTCCAGAGACGAC +CTTGTGGCTGGGCCCGGCTTCGGTGGCGCCCGCAACCCTGCCCTGCAGACGTCACTGTCC +TCGCTGTCCAGCTCCGTGAGCCGTGCACCGCGGACGTCGTCCTCCTCCCTGCAGGCTGAT +CAGGCCAGCAGCAACGCCCCGGGGCCCCGGCCCAGCAGTGGCTCACACAGGTCACCTGCA +CGCCAGGGCCTGCCCTCCCCGCCCGGCACTCCCCACTCACCATCCTACGCGGGCCCCAAA +GCTGTCGCCTTCATCCACACGGACCTCCCAGAGCCACCGCCCTCGCTGACCGTGCAGAGG +TGGGTGCCGGGAGGTGCGGGTGGGCTTCCTGGCACAGGGCAGCTGTCCAGCCGTCTCCAG +GGCCCCTCTTGGCTGGGCACTCATAGGTCATCCCAGGGGCCTGAGTTTTTCTCTGGGGAC +GGGACAGGGAGGATGCGCCTCACCCTGCCTTGAAGGCCTTGGCTAGCCCGTGTGGCCCAG +CTCTGAGGATGTCCTGGGCACGCTCTGGGGACAGGGTATGAGAATTTGTGTCCTGAGCCC +GACGACTCGTTCTTCTAGAGCTGGCACAGACCCAGCATGTGGGAGGCCATGGGGCTTTCT +CTTTGGGGCCAGTATCATGTGGCAGCCTGGGCCTCCTTTAGGCTGTGGCTCTGGCTTCCT +GGGTCACAGTGGCCCTGTCCACCCAGGGCTCTCCCTGGGCCTGCTTGGCTGCATCTCTAG +GAGGCTGGGGCTGAGGATATTACCTAGCAGCAGAATGTGCCACCCTGAGTGGCCGGGCCC +TGGCCCTCCCTGTAGGACATAGTTCTAGGAAGCAGGGGCTTCCCTGCTCGTAGGTGGGGA +AGCTGAGGACTGGAGAAAGCAGGCTGGCTCCAGGCACCCGGCAGGTTGGTGACTGAGAAG +GCGGGGTCAGCTGGCTCACAGGCCCAGGTCCAGGTAGGAGAGCTGCACACCTGCTGGGGC +CAGGACCCACGCCACAGACGCATGGTGTCCTCACGGCATCTGTCGCCCTGTCCGGGCCTG +GAAACCTCAAGCACAGGCGCAGTTGTTTCCCGTCGGCAGATGCAGCCCCAGAGGGAGAGC +CCAGCCCAGCCCAGCCCAGGCCACTTGGCCACAAAGGGGCTGAGGGCACAGCAGTCCAGC +CCTGAGCGGGTCTCCTCTACCCCACAGGGTGGGACCTGTTCAGCTTTTGTGGAGTTGGGG +AGGAGGCTGCCACTGGGAGTGATGAGTGGGGAGCCCGAGACCTAGAAGTGGGCAGCATTG +CAGCCAGGCAGGGATCGCAGCCCGGGGACAAAGGGCTCTCGCCTCAAGAAGGTTTAAGTA +GCAAAGATACAGCTGCTGTTGGTCAGCGCTGGTGGGCAGGAGGGCAGCCCCTCGTTTTCC +TGAGAGAGAGCAGGAATGAGGCCAGGCAGAGTGGCTGGGCTCCCAGTGCAGGGCACGGCA +GGGGCTCCATTCAGTGCCCACCTGGCTGCCCCATGCTCTCTGTGGCCAGTGCTGGCTAGG +AGGGACCCAGGGACTCGGGAGGGTGGTGGCAGTTCTGACAACATCTGTCTGTCCTGTGAC +AGCACAGCTGCTCCCCAGGCAGGCTCTGCATCCCTCACCTGCTTTCCAGGTGAACCCTCC +AAGCCCAGAGGGCAGGGACTGCACGGGGCCCCACAACTCGGCTGCACTAGTCCACGTCCG +CGTCGTCTCTGTCTTGCTGCCTCCTCCGTCCTCTGTCCCCGTGTTCGTGTGTTCACCGTG +TGCATGTGTGTATGTGCTTGTTTTGATGCAGGGACCACCCTCAGCTGAAGACTCCCCCAA +GTAAGCTTAATGGGCAGTCCCCGGGCCTGGCCCGGCTGGGACCTGCCACCGGCCCCCCAG +GGCCCTCTGCCAGCCCTACACGGCACACGCTGGTTAAGAAGGTGTCCGGCGTGGGTGGGA +CCACCTACGAGATCTCGGTGTGAGGACTGACTGCCACACATCCGCCATGGTGCCACGGGG +ACCAGGACCCCACAGCGCACCCCCCCTCCCCACCAACTTCTCTGCCCCAGGGACCCGAGG +CCACCCCAGCCTGGTGTGGACCCATCGGCGGGAGAGAGTGCCACGCCTCCACAGCTTGCC +CCAAGCGCTCTGCCTGCCCGTCCACTCATCTGCCCATGGGGAAGTCGGCTCACTGGGACA +AGGGCCACTGGGCTGGTCTGTGTCTGGGCCTGTCCCATGGCTGGGGCAGTGAGGGGGCCC +AGTCAGCCTCTTTGGGGCACCCTCTCTCAGCCAGGCTTGGCCCACTGCCATCACCCAGCA +CCCCAGATCACCGCCAGGCCAGCCCCCAATGGTCCCCTTACGGACAGGTCCCAGAGATGG +ACAGAGGCACCCAGGGCCCCCACCGTCCTTCTGACACAGCCTGTGGGCTCCCGGACCGAG +TGTCCCCCGCCAGGCTACTCCTAACTAACGCGTTGCCTTTCACGGACCCCGCTGGAAGCT +TGTAGCTTGGCAAGGCTGATGCTTCTGCCCTGGCCTGCTCTGGGTGGTGGTGGATAGGTG +GACAGACGGCCAGCCAGCCAGCTGTGGCCGGGGGCCCGGCTCCATGTGTCCCGTGTCTGT +GTGCTGTGCTGCCGCGCCGTGTCTGATGTGTCAGTGCTCCGGCCGCCGCTGTCCCTTTCA +TCAAAGCCTTAACCTTTGCTTTATGCTCTTGTGGGAGGCGACGGGGGGGCAGGCGGGAGC +AGGCACGGGGGTGATGCTGCCACAGGGGGCTGGTGACACCCAGAGCCCCCTCCCCAGCCC +TCAGGCCCTCCCTGCCAAACTGGAGAACCCCACCCCAAGGCATGCCACGTCCGCAGCCCC +GGCCTGGCTGCGGTGCTCGCGCCGTGGGAAAGCACACTGGGGAGGGGTCAGTGCTTCCCT +TGGTGTCAGGGACCTGAGAGTAAGCACATGACAGCGTCTGCTTGCGTTGTGTCTGTTTTA +TGTTTTTATATCTACATCTATATATCTATAATTTTATTAAAAAAAAGAAAAAGAGTTATT +TTGATTCTTTCCTTGGGCAAGGCCAGGGCTATTGCTGCAGGATCCCGAGTTAGGGGAGGG +CAGGGGCCGGGCCCCAGAAGAGGGAGTGGCTGCTGTCTGGTGTGCAGGGGTCGGTGGGTT +CCTCAGGGGCATTTCTGCCGACTTGGGCTGAGTCAGAGGCTTGGGAAGAGGGGGCGGCCG +ATGGTCTGTGGCTGGGAAGTGTGAGGGTCTCTCGCCTTGGGTCTGTGTCAGTTCTGGCAG +TGACAGGGTGTTTGGGGGAAAGACTTGGGTCTGCCGCTACCCACAGGGGACTCTCAGGAA +CCCGAGAGCTTGGGGAGATGAAATGGGGGTGCATAGGGGCCACCTGTTGGCTCAGGGCCC +TGTGGGGGCCGCTGAACCTGCTGGAACTCTCCTGCCTGCCACATGCCAGGGGCAGGGCTG +AGGGAGTGTGAGGGATGCACAGCTGTTCAGGGCTGAGACATTCTGGGCTGGGGCTGTCCC +AGCGTGGGAGGCGTGCGGGCTGTAGGGCCCCGAAGCTGACCTCCACCTTTCTGCTTCTCT +CTCACGGACGCCGCGGCCCGCAGGGGGCGGATTGGCACCTGCACCCGTGGATGGGGGCGG +CGTGGCCAGCCTTGGGTGCCTCCTGGGCTGCACCTGTGCCACCTTGGCCGCCCGGAGGAC +CGCCCACCACTGCGGGCCCCCTGGAGCCAGGCCGCCGGGGCACCCCCACGCGGGGCCATG +TGCCGCCTGCACTTGGCTGCCTCCAGTCTTTTCCCCAGCCTCTCGGGGCCCTAGCAGGAT +GACAAGTAGGCGGCTCTGGGGCCCAGGACAGCCCAGCTGGGGACCCAGGAGGTCAGACTG +CAGTGGACCCTGGGGCAGGGCTGGGGGTGGGCTGGGCTCTCTGCTCCACCAGCCACAGCT +TGACAGATTCCCAGCCTGCCAGGGCCTGAGACCCTGTGTCCACATGACCTCAGGGAGTCC +CCCACCTGCTGCAGGGGGTCCAGCACCCCACAGGGGGGCAGTCCCAGAGCTGTGGGGACC +GGCACGACCTTTGCCCAGCCTCCCTACCCAACCAAGCACTTTAGACTAAGCCACTTCCTC +CTCGGGGAGCCCAGGCCTCCGTGGGTTGGGCTGGGTGGGGGGGGGGTCTCAGGTTGCCCC +TGAAGGTCTCTGCACTCCTCCTGCCCTTCCCCTGACACATGAACAGATGCCTTAACTTCC +TGGAGCCACCAGCCTGGTGAGCCATTGGCCTCTGCCTGCCACCAAGGTCCTGTGGTCTTG +GCCAGCTCCGCCTGGGCCCCACTGGGGCTGCCTGCACCCAGAGACGATGCCGGCGGGATC +TCAGAGGGCCTGAGGCCCAAGCCCTGTGTCCTCCAGCAGTGGTACGGCCTGCGGCAGGGT +GGCACTCCGGCCAGCCCTTCTCCGTCACAGGGTCCCTGTCCCTGGGTCCACCCTGGGCTG +TGGCTCTACATCTCCCATTTGGGGACGAGAAAGCCACAAAACCATTCTCTATTGTTCTTA +AGGGTACCCCTGCTAATTAATTCCCCAAATAAAATTTTTGGTGTTGATCAGCTCTGTCTA +CCAATGTGGTCCCCTGGACCGGACAGGGCCCTGGGTTGGGGAGGTGACAGGACCTGGGCA +GGGAGGGTGGTGGCCAGGAAGGGAGGGGAGTTGCATGAGGGGCATGAGGTCAGTGGATCA +GCAGGTTCAGCGCTGAAAACCTGACCTCTGAGGTCCCTGGGAGGGCCTGGCCCAGGGGGC +AAGACCTGAGGACCCCCCAGTCCTTACTGCTGTTGCCTTTCAGTGGAGACCCTCCTAAGA +CGCCTTGGGGGGTCAGAGCCCCGTGCAGAGGGGCTGCCTTCTGGAGGGCAGGTAGGATGT +GGGGTGGGAAGCCGGCCCTGAGCTGGGGTCTCCACCCTGAGGGACCCTCGCTGCAGGATC +GCCTGGGCATCCCTCCCCCAATTTCCACAGAGACGTGCTCACCGGCCACTTATGTCATGA +TTCTATGTCCAGCCACAGGCCCAGCCCTCAGGACAGACCCCACCTCCACCCTTCTCTGCA +GCTTCTGGCAAGGATGGTGCACAGTGGTGCCATGTGCAGGGGGCTGGCACGGCCACTGGG +CATCCGGGGCAGTGCTGGTCGCCTGGCCTCCTTGCTGGGGCCGCTGGGCCTCTGGCCTAG +AAGGGCAGGAAGCCGTCCACTTTGAGGCGCAGGAAGGGGTCCAGCAGGGCCCGGAGCTTC +CAGACGGTGGCACGGCTCAGCAGGGGTGGCACCAGCCCCTCGAAAGGTGTGGGGTTCACC +ACGTACACGTAGGCAGCGGTCCAGACCAGCAGGGCGCCCAGCAGCAGCCTCAGGGTCAGC +AGCCTGCGGGCGGCGGTGGTCAGGGGCAGGGGCGCGCGGGGGGCCTGGGGGCTGGGCTCT +GCCGTGCAGTCACCTACCAGAGCTGGCCTGCCAGGCCCTTGGAGCTTCCCAGCTTTGGCC +TCCCTTTCTGTCGGGGAGGGGCTGGATCAGAGCCCACCCCGAGTGTCCAGGCTGAGGGCA +GGGATCCCTGGCTTTGGGAAACCCTCCTCAGACAGGGCTCCCCGCACCGCAGCCCTGGCC +CCTTCCTCCTGGACAGGCTCCTGCAGCCACTTTTCCGATGCCCCCTGCCCTGGACCTGCC +CTCCTGGAGCCCATTTCTCCCTCTCGCCCAAGCAGGGCCCTGGGTGGGGGGCTGGTCTCC +AGACCCAGAGTGGGGCGTGGGGGGCTCACCGACATGCTTGCCATCTGCTTCTCCTTCCTG +GCTCGAGTGCGGCACTGGGCCCGCTCCCTGCGGGGGCCTCACTGTCAGGACCCCACACCC +TCCACCCCCAGCCTGGCACCCACCCCCTGGCTCGGTGCTGTGGAGGTGGGGGGTCAAGGA +TGGGTCTGCCCTGTCCTGACCAGGACACCTTCTGAGACCCTGCTCAGCTGGCCCGGCCCC +TGCCCCCGTGTTACCTGAGAATCTCCAGCTGGATGTCCTCCATCTGGTCCAGCACGCCCC +GGATGTCCTTCTTGAGGTTCTGGGGGCCAGAGGAGGTAAGCACAGCAGGCCCCTCGCCCC +GCCCTCCGTGGCCTCTCGCCCGCCGGGCCCCGTCCTTCTGGGTGCTGCCCTGTGGGGACC +TACCAGGAGCCCGGTGGCCTGGTTGTTCAGGGCCATGTGCATCTCAGCCACGTTGTCCCA +CACCTAGGGGGAGGTGGGGTCACGCCTGAGGAGCAGGACCCACTGGGTGGGCCCCCAGCA +CCCCTCCCCCTCAGCCAGGCGGGCACCTCTGTGATGACCATCTTCTTCCTCTCAAAGCAC +AGGCGGATCTGCTGCAGCTCGTTCTGAGGGTGGGGCCCAGGATGGGCTTGGGCAGCATCA +GCAGGGTGGCCAGTACCTCCCCGCTGGGCCCACTGGCCTACTGCACGAAGACCTCCCTCC +CTAATGAATCCCTCAGGCCCACCTGGAGTGCTGTTACGAAGCTTCCTCCCCCCCCACAGC +CCCTCCCCCAGGCCCACCTGGGACTGTTGCACGAAAGCCTCTTGCCCCTGGCACAGCTCC +CGCCGCAGCAGCTGCAGAGGTGCCCTGTTCCCCAAGGGCTGCGTGCCAGCCGGGTCGGGG +GGCTGTGGGAGAGCCCCCAGATCAGCCCTAAGACTGCGGGCTAGAGGGGCGTTGGCTGCA +TGGGGTCCCTGGGCCCTGTATTTACCCAGGCCAGAGCAGTGCAGCTTGAGTGTTCGGGAC +ACAGGGGCAGGAACTGCTGCCCCGGCCCCAGGAGGGCCCCCAGCTGCTCCCGAAGTTCCT +GATTCTGCCTTTTCTGGGGTGGGGGGCAGAGAGAGGGGGCTGCCTGTCATGGCCGCTCCT +GCGGCTGGGGCTGGGGCAGGGGCAGGGGCGCCAGGTGGTGGGTGGGGCTGGGGCTGGGGC +AGGGGCAGGGGCAGGGGCGCCAGGTGGTGGGTGGGGCTGGGGCTGGGGCAGGGGTACCAG +GCGGTGGGTGGGGCTGGGGCTGGGGCTGGGGCGGTGGGTGGGGTGCTCACCAGGCTGTGG +TTCTCCTGCTCCAGCTGCAGGAAGGACTGTTCTGCAGAGCCCAGCAGCCCGCACTGAAGC +TGGGACAGGGCTACCCTGGGCGAGGCCAGGGCCCCTCCCTCCCGTGACAGGGGTGGGCAT +GGGCAGGGTCTGGTCCCTGAGCCAATGGATCCCCCCTGCCTGGGAGCCCCCTGGTTCGAC +CCTGGGTGCAGGGGCCAGCCCCACCCAAGCCCTGCCTCCAGGTCTCCTTGTCTCGGCCCC +TCAGTGTCTCTCGCTCTCTGCTCCTGGCTGGACAGAAAGAGCCCGGGAGCCTCGCCCTCC +ACTGTGGGCCCACTGCCCCCTGCCCCTGGGACTGGGAAAGGTCTCTGGGACTGCACTGAG +TGAGCAGAGGAGATGGGGCCCAGGCAGGGCCACCCTACAAATCCCTGGCAGGGCTGGTCC +AGGCCTCCTCCATGGCTGCTGGAGGCTGGGGTTGGGGGACACTGGGGGTGGGGGTGGCCG +CAGGGGCCCAGGGTTTTCAGCCCCTCCATCCCTCCCTGCAACCCCTTCCTGATCCCAGGT +CCAGGCTCTGGCCTGGCCCAACCTGCCACCCTCTTACTGAGGGGGCTGCCCCAGGCAACA +GTCACAAAGGCCTGGGTGCAGCCAGGGCGGTCACAATGGGCCCCTCCCACCTGGTCCTGA +AGCTGCCCCGCCTGATGACCTCACTGCCTCTGACCCACTGACCCACCATGCGAGTTTTCT +TTCTTTCTTTCTTTTATGGGAAGGGGACTTGCTCTGTCACCCAGGCTGGAGTGCAGTGGC +ACAAGGTGTCTCATTGCAGCCTCCAACTCCTGGGCTCAGGCAATCCTCCCACCTCAGCCT +CCCAAGTAGCTAGGACTACAGATGTGTGCAACCATGCCCGGCTAATTTTTAAGTTTTTTG +TAGAGATAAGGGTCTCGCTATGTTGCCCAGGCTGGTCACAAACTCCTGGCCTCAAGCGAT +CCTCTTACCTCAATATCTGAAAGTGTTGGGATTACGGATGTGAGCCACTGCACCCAGCCT +AGTTATAAAACAGATAATACACACACTCACAGTGTGGATTCAAAAAGTATCAAACAATAA +AAATGGTGTCCAAGCTTTTCCCACCTTTATCTCCTGTCCCTGCAGACAGGAGCTTCCTCT +TTAAGCTCCTAGAGGTGGGCTGCCGTCCCCTGCCCCTGGTTCACACAGAAGAGGTCATCC +TGTAGCCCAGCTTTGCTTTTCACCTAATATATCTCTGAGGTTTTAAAACATTCTTTTCTA +GCTATGTATTGAATATTTACATTATGCCAGATCCAGTACATGCCATTGTTATCCTCTGTT +TTTTTTTTTTTTTTTGAGACGGAGTCTTGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGC +AATCTCGGCTCACTGCAAGCTCCGCCTCCCAGGTTCACGCCATTCTCCTGCCTCAGCCTC +CCGAGTAGCTGGGACTACAGGCGCCTGTCACCGCGCCCGGCTAATTTTTTTGTATTTTTA +GTAGAGACGGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCATGATCC +ACCCATCTCCACCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCTGGCCTGT +TATCCTCTGTTTATGGGTGGGAAAGCTGAGGCAGGAAGCCCTTTAGTCACTTGCCGAAGG +CCACGCTGTTAACGGTCGGACCAGATTTGGGTGGCGGAAGAGAACTCATCCGACAGTAGG +GCCAGATTCACGCCCCAGGCCCGTTCCCTCCTGCTCCCTGGTGCTCCTCACACCATCGGC +CCTGCTGCCCCTCACTGCCCCTGAGTCCCTGTGCCCGTGTCCTCCTTCTTGAACCCATCA +GCCGTCAGTTAACCCTCAGAAAGCTGGCTCGGAGAAGTCCTCATATGGTAGGAAGAGCCC +CGTTCCCTCCTCTCCCGGTGTCCTGGCCCCCCTGCAAGGTGACTTCACCTCCTACATCAA +GAGGCGGGTACTGGTCTCCACCTCTGGGCTCTGGGCCGGCCTCGGATGCACTTGGCCAAT +GGGCTGCTCCTAAGCTGCTGTGGGCAGAGGTTGGCACGACACTTGTGCGCTGGGCCTGGA +CCTCTCTTGCTTGGCTCTGGGGACTGTCCTCCCTGCAGGAGGCCTGGGCTGGGCTGCAGG +ATGAGAGGCCATGGGTCAGGAGAGGCTTCTGTCGTCCCAGCCGGTCCCCAGCTGCCTAAA +GATGGGTGGACAAGCCCAGCGAGGCCCCCAGAGAACCGGCCCATACGTTAGCCAAGTTGC +CGGCCATGTACCATCGAGCCCCGGGCTGTCATGGTGGTTTTGCCACTGAGTTTTGGAGTG +TGCTGTGACGTGGGAGGAGACATTGAGAGGCCTCACAAGCCTAGAAGCGTCTTACCCTGA +CCTCACACCTGCTGGACTTCGGGCTGGGAAGAGAACTGGGGCCGAGGGCGTTTTCCCACA +GGACTGGAAGGCCGGTCGTGGTCCTTGGTGGTGAGAGCTGGCCAGGCTGCCAGCCTTCTC +CCCGGAAGCCTGCAGGGTCCCCTCTGATCCCTGCGCCTCTGGGGGTCACACCAACTGTGG +GTCTTTCCTTCCTCACCACGCTGGGCACTTGGTGAGCACATTCTGCCTTGAGACGGGGCA +TTTCTTTGATAACATTTCTTCTCATTTCCTGCTTGTTCTGGAACTCCTGTCAGTCAAATG +TTGGGCCTTTTGAATTGATCCTGTGAGTCCTTCTTTCTACCTTTGCTTGTAGATAGACTT +CCCTGAACCCAAACACAAACACTTTCTCTCTGCTTTTTAAAAAATTGGTTTTGGGGCCGG +ACGTAGTGGCTCACGCCTGTAGTCGCAGCACTTTGGGAGGCCAAGGCAGGCAGATCACTT +GAGGTCAGGAGTTCTAGACCAGCCTAGCCAACATGGTGAAACCCCGTCTCTACTAAAAGT +ACAAAAATTTGCTGGACGTGGTGGTGTGCGGATGTAATCCCAGCTACTCGGGAGGCTGAG +GCAGGAGACTGGCTTGAACCTGGAAGGCGGAGGTTGCAGTGAGCCGAGATCAAGCCACTG +CCCTCCAGCCTGGGTGACAGAGCCAGACTCTGTTTTAAAATAAATAAATAAATAAAAATA +AAAAATTGGTTTTGGATTTTAGCAGTCCTGTTTTTCATTTCCAAGACATTTCCATTGTTC +CCTGAGCTCCCCTCTTCTTGTTCCAGAGAATTCTGTTCTGCTCTTAGGGATAAAGTGGCT +TTGGGGGGAGGCGAATCTTTTGCTTTCTGCATTTTCTCTCTTGCGGTTCTTTTTTGTTTG +TTTAGTCTTGGAGCTTCACATGAGACTCAGTGCTCAAATGTCTGGCTTTTCTTTCTTTTC +AATCCCTGTTTATAGGGGAGACAGGCAGAGCTGCCGGGCACTCCTCTTTCCTTGTTAACT +TGAGGAAGCCGGACAGCCAGGTCTTGGAAGGGAGGCCTCCTGCCTCCAGGCGGCATCTCG +GGCTGCAGGGCTGTGGTCTGGAGTGCGGACAGTACCAGCTCCCACTGCTGTCCTGCCATG +CTTGTTCGCAGTACTCGTCTCCTCACACCAGTGACGAGGCCACTGCTTGCCAGCCGCTTG +CCTTCTTCCCGCCAGGCTGTCCTGCTACGTCAGCCTCCGCTCTCCACCTCAAGAAGTGTC +TCGAAATATCTTCTCTGCAGATGACCTCTCTCCTGTTCTCAGTGCCACGTGGGGCCTCCC +CAGCAGAGGCGGCTGGACACCTGGCCCTGGCTGCTGGAGGTTGTTTAGCTGCCAGAGGGT +GGCGGTAGCCTCCAGGACCCTCCCCTGGCCTCTGCCCTGTTCATCCTTTGCCGGAATGTC +ATGGGTTTCAGGATGTGCTGAGCTCCGAAACCCCATGGGGCAACTTGGGATGGGGGCTAA +GGAAGGCCTCTTGGAGGAGCCCTGAGGCGAGGTGGCCCTGGGGTGGGGTGGAGGAGGCCC +TGAGCAGCAGGGGAGGGGGTTGCACCCTGCTCTCTGTGTCCCTGAGTGAATGGGGCAGGG +GCTGCACCCTGCTCTCTGCGTCTCTGAGTGAGTGGGGCGGGGGCTGCACCCTGCTCTGTG +TGTCCCTGAGTGAGTGGGGCAGGGGCTGCCCCCTGCTCTCTGCATCTCTGAGTGGGATGG +GGGCTGCCCCCTGCTCTCTGCGTCCCTGAGTGAGTGGGGCGGGGGCTGCCCCCTGCTCTC +TGTGTCCCTGAGTGGGGTGGGGGCTGCCCTCTGCTCTCTGTGTCCTTGAGTGAGTGGGGT +GGGAGCTGCACCCTGCTCTCTGTGTCCCTGAATGAGTGGGGTGGGGGCTGCACCTGCTCT +GTGTGTCCCCAGCCTGCTAAGTGCCCCACAGGGGCCAGGATGCTGAGGAGTGAGAGGTCT +TCACAGAGACAGGTGGCTCCTGTAATAAGGAAGACAACACTGGCTTCCTGGGCGAACAGC +AGGGCGGGGTCCTTAGGTCCCACTGTAGGTGCCCAGGCCCGGATGAGATTCCCCAGTGGC +CCTGAGGGGGCGCCCGGGACACCCTGGATGGCCCAGTGTCCTCCCGTCACCCTCGCCCCT +GGGCACCATCCTCCTTGGCCTGGAAACCCCAAGCTCCCTGACGCCCCCGCCCCTCTCCCC +GGGCCTGTGCCGGGGAAGGATACTGCTGCTCCGTCAGTCAGCCATAGAGCCCCTCCTCCC +CCAGCCCCTCTCACCCCTCCCTGTGGTCCTTGCCAATCGCCCTGAGCCCGAAAAGGTGGG +AGATTCTTGGAGAGATGGGGCCCTGGATCCCCGGCCCTGGATCCCCATCCCTGGGGTCCC +CCACTCACGGGGTCAGATACGCTAACAAAGGCCAAGCGCCCTGCCAAGCGGTGTCCTGGA +TTGGGGTGGGCGTGGGGGTTCGCCTCAGGTCCAGGCTCCGGGATGTCGTGCAGGACAGGG +CGCGGTTGCGGCGGGGGCACTGGCTGGAAGCCGGAACTCCCGGCCTCGAGGGCGGAAAGT +GTGGCCAGCGCAGCGTGCTGGGGGGGCCATGGCCCCGTTGCCAGGCAGCGCCTCCCTGGA +CGCCAGTTGCCATGGAAGCCGCTTCCTGCCCATGTTGCCAGGGCGTCCAGATGGCCTCGA +GGAGCCTGGTTCAGAGGGGCCGCCCTATGCCCTTGCCCCTCCTCCGCTGCCCAGGGATGG +GCTGGGAGGACAAAGAGGAGGAGAGGCTGGGACTCCCAGCATGGAGATCCGAGTCCTTCC +TGCTGGAGCCAGGCTGCGGCCCAGGGCAGGGGCCAGATGGGGCTGTTCTGCTGTCTGCAC +CGCGGACTTGATGTTCATACCCCAAGCTCATTTTATTTATTTTCTATTTTTATTAAGAAA +GAGTCTCACTCTGTTGCCCAGGCTGGAGTGCAGTGGCGCGATCACAGCTCACTGAAGCCT +CCAGTTGGGCTCAAGGGATCCTCCCACCTCAGCCTCCCGTGTATTGGGGACCACAGGTAT +GTGCCACCATGCCCAGATAATTCTTTATTGTTTTATTTTTGTAGAGACTGGGTCTCGCTA +TGTTGCCCAGGCTGGTCTTGAACTCCTGGCTCAAGTGATCTGCCCGCCTCAGCCTCTCAA +AATGCTAGGATTACAGGTGTGAGACACCTCGCCTGTGCTTGGCTGCTCCCGAGCATATCT +CAGAAGCCAGGAAACAGGCCCTGAATGGATGGAGCTCGGGGTCACTGGGCCAGCCACAGC +CCTCAGAGGACCCCATCTCTGTGCCCTCTAACCTGGGCTCCTGGCCCACACCCACCCAGG +CTGCCTGTCAGGATCTGCTCCCTCTCCAGCCCTCAGAGCTGCCCTATGTGGAGGGTTGGG +GGTCCCAGAAAACCTGGGAGAAACGGGTCCTGGAAACAGGGTCGGGGGGAAGGGCAGAGT +GCTGGGGATGCTGGGCCCACGACCTCCCTGTCCCTGGGGCTTCCCCATGTCAGAGGTGAG +GCTCAGAGAGGTAAGGGCGCACCCTTTGTCTGCCCCCTCCCCAGCATGGCCAAGTCGCTC +CCAGGGTGCAGGCGATGGCAGGCCATTTGTCTCCCTCCTGGGTGAGTCTCTGGACATGGA +TTCTCACATTTTTTATTTAAGAATCAGAGAGATATAAGAATGTCAAGGAAAAATCCTCTC +ATGGACAATGCAGCCCCAGTAAATGACTGTCAGCCGGCGTGTCCAGGGCTTCAAGGCCCC +AGGAAGTGGCCATGCTGGGGCTGCCAGGCCTCTGGCTCCAGGGTCACTGGGGCTGAACTG +TCTGCCCAGGCCCGGAGACACCCTGCCCCTGAGGAGCCCACCGGCTTGGCCAGTCCATCT +TCTTGGCACTCCCTGACCACCACTTACCTTCTAGTCGGACAGTGGAGCCTGGGGGGACAG +CGTGGCAGAGTGCCTGATGGTCGGTGACAAAGTCATCTCCAAAGTCCTTGCTGGGGCCAA +GAGCCAGGACTCCTTGCCCGTCCCCGTCACTGCCCTGTGCCCGCCCAGCACCTGCTGGGG +ACTAGGCTGCCCATTGGAGAAGGAAACACAGTGCTGGGCTGTGAGCTCCTGAAGCCTGTC +TCTGTGCCCCAGGACCAGGCTCCTGGGTGGAGGGAGAGACCAGGGGCAGGTGAGGAAAGG +CAGGGCCCCCAGAATCCCTCCATGCCTGCCCCTCAGTCTCCAGGACTTATGTGCAGGTAC +CGTTTGGAGCTGTGGTGCAGTTCCCAGTCTCACCACCAGATGGCACCATGCCCCTGCAGA +AGCAGTGCCCAGAGCAGGCCAGGTGGTTCTCGGGGGCTGCGGTGGAGGAATCCACCCAGC +CGAAGCTCTGGCAGGGAAGGGGCAGTGCTAGGGGGAGCCCCCTCCCCACTTGGATCCGAA +GTTCCCATGGGGCCTAGGGCAGGGGCTTGCCAGGTAGCCAGAAGTTCCCTTGCTGAGTCT +AGGGTGTCTATCCCACATTTCACATGGAGACGAATCATCCCAGACACCCCGGCACTGTCC +CGGGGCCGTTTGCCACCCCTCCCAAGGGGCAGGGCTCCTGTCTTGGGGCCTTCCCAAGCA +TAGTAGCCTTCGGGCAGCCCCGGCCTGGCCCCAGGTTGTCCCACTTGTCAGTGGGCAGCT +GGGGCTGGGGGAGGGGTCCCAGCTCCTGTCCAAGGTCACCATTGCCCCTGACCAGGGTGT +GGCAGCTGCTGTGGCCTGGGTCAGTGAGGCCGTGGACGGAGGGTGGCCTGGGTGTAGGCA +CTGCTCTCAGCTAAGAGAGAGCCCATCTGGAGGAAGAACCAGGAACCAGAGGGATGGCCG +GGTTCCCAGCGTGCAGGAGGCCAGCTTAAGTTCAGTAGGGTCCCCCTGCAGGGACTCCAA +GAGGCAAACCACATGGAGAGTCACCTGAGAGGGAATGATGGAGGCTGGGGAGGTGGAGCC +CCCACCTCCAACTGCTGCCTCGGCCCCCACAGCAGGGGAGGGCGGGAGGCTGGCACTGGC +ACTGCCCTGGGAGATGCCACCTGGGCCTTGGTCTGGGGAGGAGGGCTGGCCAGGAGGCTG +GGTACGGCTACTGCTCTGGAGTGACCGAACCATTTCCACCCTGCTTGCCTTATTCTTCTC +ATGAGGGCCTCAAGGGGACACACCAGAGGTTTACTGGGGACATGGCAGGGGAGGGATCTC +AGTGCTGGCCTTCTGGGGCCCTGCGTCCACTCCGCCCCTTTCTGACCTGGGCTGGGGCCT +CTGCCCTCCCCGGGGCCTGGAGACAGAGCCCTTTTGGAGGCCCTCAGGCTTGACAAGCCT +ACTTCTTAGAGCCAGGCTGTGGGCTGGGCTGTATCCTGGTGGGATGAGGGATCTGGGGCT +TGGGTGGGAGTGGGGCTGGGAGCAGGTGGAAGTTGGTGCCTCAGGCTCTGAGGAATGGGC +CTGGGATGGCTGCAAAGTGGAGGAGAGGCTCTCTCAACCCTGGGCTTTGGGGTCTGAGCC +CAGCCCTGTCCTCAGGCAGTCTGCCCCTGCAGCCCCGCATTGCCATGTTGGTTGGAGTTG +CGATGAGAGGTGCTGGGGCCTCCCTTGCCTCTGCCCAGCACAGCTGAGAAGTTACGTGCT +GCTGCCCAACTGTGGGGGTTTCAAACCCCGCTGGAGCCTCCAGGTGGGCCCTTGGGCAGC +AGGGCCTTGTGGCGGGAGGAGGGGACCCTGGGAAGTCAGGGACCAGGCGGGGGAGGGAGA +GAAGGTGGGAGCACAGTTGTGGGTGTGGGTACCCGCCCCTTCCTGAACTGTGTTCCTCCC +GGCCCACCTGACTCTAGTCCTCTGTGCCGGCTGCCAGGGGATACCCGAGGCCACCTGCGT +CCCCAAAAGAAGGTCTCACCTTCAGAGCCAGCCTTTGAGGCTCCTGGGTGACCTCAGCCT +GTTACCCTTGACCCTGAGTTCACCTAGATCAAGTGGGAGGCCGCTCTTTGGGCCTCAGTT +CTCCCATCCTCAGGGAGTAGGTCCTCTGTCCCCTGACAGCACCCCTCCTCGCTGTGCCAC +CACCACTGCCAGGGTTCTGATCTGTGGATGTTCCCCAGGGGGTGGCTCTGGGCTCTCAGG +CAACCGGCTACTGTGGATGGTGGTTTGCACGAGAATAGTGTCACTCTTGGGTCCTTTGCT +TGAAGTTGGCTCTGCTGAGTGCAGCCTGCAGACATTTTCAGGAAACTGAATCCACAGGCT +GGAGCCTTCTGCTTCCCCACAACCCAAACGGCCCCCAGCCAGCCAAACAGCCACGAGAAC +CCAGCTGCTAGCAACTGCCAGCAGGAAACCCACTACCAGCCGCTCACAGATGGACAGCCC +ATCCCAGGAGCTGGCCCCACCTCCCCGCCCCTCTGCTGACCAACCCAGCAGCTCCTTGGG +TGGGTGTGGCCACCCTGCTGCCCCAACCTGCTCCCGGACCCTGTGCCAGGAGGTGCTTCG +CCTCAGCGGGGCCTTGCCTCCCCCAGAGCCGCTCTGCTGCCTGACCTTGCAATGTCATGG +GTCCTCTGGGCACTGGCGCAGTTGGGTGGGATCAGCTTTGGCCAGCAGGTGGGGGGCAGG +TGGGTAAATCCTCCCTCTCTGCCATCTCCGCTTCCTTCCTCCTGCCCGGCTTCCCTGCCT +TTTTCCTACTCTGGCTCCAGGGTTGCACTGCTCAGAAAGCTGTTGCTGCCTGGCCAGCAC +GCAGGCCTGATTTTCCTGGGAGCCTGGACTAAGACCCTGACCAGTCCTCCCACCCACCAC +CCACACCACCACCCAGCTAGCCAGCAGCTAGGCCAGGAGTGATCAGGAGGGGGCCCAACC +GACCACCCACCCGGCGTCTCCAGCCGGCCAACTCTGCCCACTCAGGGCCGTGAGCTGGAG +GTGCCAGGAGCCTCCCGGGAGGCAGTGTGAGGCATTCTTCTTAGTAAGTCCCGCATCGAT +GATAAAGGGCAGTATCAGCCCCTCTCTGATGCGCAGTGAGCGGCTCACCATACAGGGGAG +GCGTTTAATTTATCTCGGGAACTGGGCGCAAGTTATAAAAATGGTAATTTCTTGGAGATT +CAATTACATGTTTTCATTGTTACCCTCGATAATCAAGGAAACGATTTTTTATAAAAGGCT +GTGGGATGATACTGATCTCAGGAGAGGATCTGGGCCGGGTGGGGGCTTTGGGGACGAAGA +GGAGGGCAGTGTCTGCTGGGGGCAGGCAGGGCAAGTGGAGCTGGGCCGGCCGGCCCACCA +GCTCCTGCAGCCCACCAGTTCCTCCAGCTCCTCCAGCCCTCCAGTTCCTCTAGCTCTCCA +GCTCCTCCAGCCCACCAGTTCCTCCAGCCCTCCAGCTCCTCCGGCCCTCCAGCTCCTCCA +GCCCACCAGCTCCTCTAGCCCTTCAGCTTCTCTGGCCCACCAGTTCCTCCAGCCCTCCAG +CTCCTTCAGCCCTCTCTGGCCTCAGTCTTCCCATCTGCCGACGGGCCCATCTCTGGTAGG +GAGGGTCTGCGTTTTGGCATGAGGGCTGATGAAGTGAGGCGTGACTCCTGGGCCGTGAGG +GTGGCTGCGTCCCTGCCCCTCCCTTCCTGGTGCCCGAGCAGTCTCGTCCATCTCCTGAGG +GCTCTGCAGGGAATAGAGAGGGGTTCCTGGTGGAGTGTGGACCTGATTTTTTCCCCAAGT +GATGTCTTGGCTCTAAATAACATCTCCAGGATTAATGATGCCGGCCCCCAACAGCACCAG +GTGATTGGAAAAAGGGTTTTATGGCCCCGTGGGTGAAATTGAACTTGCATCTGAGGTGTA +TTTCCAAGATGGGATTTTCCTCCCTGGCAGAGGCGGCACATTTCACGCTAATGATAATGG +CAGCTCCTCGACATTCATCACCTATAGTTGGGGGGACACCCGTCTGGGGCACAGGGGCAG +GGTGTCCCTGGGGTAACCTCCAGGTCTCCTAATCTCAGCAGACTCTGGGGGGCTGGTTCT +AGGAGATGCGCCCTCACCCAGTGCAGGGCCAGGGAAGGGGGAGGCAGGAGCCCCCAGGGG +ATGAACGAGCTCTAGCTGGGCTGCACCAGGGAGGAGGCTGGAAGGAGGCCCCAGGCTGAG +GGTAGGACCCGGCTAGTGCTTGCCCAGGGACCCACAGGCCCCTTGCTGAGCAGACTCCAG +CGAGGGGCCTGGCCCATCTTGTTGCAAAGGGCTAGAGTCCTGGGCTGAGCTGGGCAGTTG +ACCTTGGATCTGACTCTGTCTCCCCTGAAAGCTGTTCCACCCCAATGCAGGCTCCTGCAG +CCTAGCACAGCGCCCAGCCAGGATGGGCAGGAGGAAGCCCTTCTTCCGACGGGCTGTCAC +CGCGCCCTGCCTGAGGGGCCTCAAGCCCCAGGGCTCTTGTCTCAGAACTGGGCCGTGGAG +ATTCAGGAACTATGGGAGTTTGGGGGATCTTGGTGGCTGAGGGCTGTGCTCATGGAGGAC +TCCTTGAGTTCAGAAGATGAGGCCCACCTTCTGCATAGCCAATAGGTCATTGTCACAGGG +ACACTGACCACCAGCACCAACCTCCATCCCCCGGCACCTGCCTTTATCACCCATTTCCCA +GGTGAGGACAGGGGCTCAAAGAGGGGCAGGGTTCCTCTGAGGTAACACAGCAGGTGAGCA +ACAGAGCCAGTGAGGGCCTGGGGGTGTCCTGCCACGGGCTGAGGTGTGCCCACATGCAGG +AATCCAGGGGTCCGTGGGCCAGGACACTGCCAAGAGTTGGGGGCTGGGGGAGGGAAGGGG +GCATTGCCAATGCTGCAGCCCCTGCCCTGGGGGGTAGGAGGGAGATGCCCCTCCGGGGGC +CCAAGGGCAGGGTGGGCCCATGCTGTTCCCTCCCACCTGCCACGGGTGAACCTGACAGGA +GTGGGGATGGAGCCTGGCATCCCGAGGGAACCTCCTGGAGTAATCACTCCACATGCTCGC +GGTAAATACTGGGCCCTTCTTCCCCTCCCAGTAATTGAACTTATTTACTTTTAACTAAGA +TTAAATGTTCTCTGCGAGCTCAAGATACTGACTGCCCTGTCAGGCTCAGTCCACGCTCTG +CTCTTCCTGCCTCTGGGGACCAACCCCAGGCAGGGAGGGACCAGGCAGCCCTGGAGAAGC +ACAGAAATTGTTTTTCCAGGAAACTCTGGGTGGATAATCCACTTACATCATCACCACTGT +CACCATGAGCATGGCCATAACCACCATCATGGTCAGCTCCACCAGCATCACCACCACCAC +CACCACCATCACCATCACTGCCACCACCACCGCCACCATGACTACCACCACCATCACCAC +CACCACCACCATCACTACCACCACCACCATCACTACCACCACCACCACCACGATCACTAC +CACCACCACCACCATCACCATCACTACCACCACCACCACCATCACCATCACCACCACCAT +CACTACCACCACCACCACCACCACCATCACCATCACTACCACCACCATCACTACCACCAC +CACCACCACCACCACCATCACTACAACCACTACCACCACCACCACCATCACTACGACCAC +CACCATAACCATCACTACCACCATCACCACCACCACCATCACCATCACCACCACCATCAC +CATCACTACCACCACCACCACCACCACCATCACTACCACCACCACCATCACTACCACCAT +CACCATCACTACCACCACCACCACCACCACCATCACTACCACCACCACCACCACCATCAT +CACCACCACCACCACCACCACCACCATCACTACCACCACCACCACCATCACTACCACTAC +CACCACCATCACTGCCACCACCACCACCACCACCATCACTACCACCACCACCACCATCAC +TGCCACCACCACCACCACCACCATCACTACCACCACCACCATCACCACCACCACCACCAT +CACTACCACCACCACCACCACCATCACTACCACCACCACCACCACCATCACTACCACCAC +CACCATCACCACCACCACCACCATCACTACCACCACCACCACCACCACCATCACTACCAC +CACCACCACCACCATCACTACCACCACCACCACCATCACTACCACCACCACCACCACCAC +CATCACTACCACCACCACCACCATCACCATCACTACCACCATCACCACCACCACCACCAT +CACCACCACCACCATCACCATCACTACCACCACCACCATCACCACCACCACCACCATCAC +TACCACCACCACCACCACCACCACCATCACTACCACCACCACCACCACCATCACTACCAC +CACCACCACCATCACTACCACCACCACCACCACCATCACTACCACCACCACCACCATCAC +CATCACTACCACCATCACCACCACCACCACCATCACCACCACCACCATCACCATCACTAC +CACCACCACCACCACCATCACTACCACCATCACCACCATCACCACCACCATCACTACAAC +CACCATCACTACCACCATCACCACTACCACCATCACTACCACCACCACCACCACCATCAC +CATCACTTCCACCACCACCACCACCACCATCACTACCACCACCACCACCACCATCACCAT +CACTTCCACCACCACCACCACCACCATCACTACCACCACCACCACCACCATCACCATCAC +TTCCACCACCACCACCACCACCATCACTACCACCACTACCACCAGGACCACCATCACTAC +CACCACCACCACCACCATCACTACCACCATCACCACCACCACCACCACCATCACTACCAC +CACCACCACCACCATCACTACCACCACCACCATCACCATCACCACCACCACCACCACCAC +CATCACTACCACCACCACCACCATCACCATCACCACCACCACCACCATCACTACCACCAC +CACCACCACCATCACTACCACCACCACCATCACCATCACTTCCACCACCACCACTGCCAT +CACTACCACCACCACCACCATCACCATCACTACCACCACCCCACCACTACCACCATCACC +ACCACCACCACCATCACTACCACCACCACCACCACCATCACTACCACCACCACCATCACC +ATCACTTCCACCACCACCACTGCCATCACTACCACCACCACCACCATCACCATCACTACC +ACCACCACCACCACCATCACCATCACTACCACCACCACCACCACCACCATCACTACCACC +ACCACCACCATCACTACCACCACCACCATCACTACCACCATCACCACCATCACTATCCCC +ACCATCACTACCACTGCCACCACCACCACCATCACTACCACCACCACCACCATCACTACC +ACCACTACCACCACCACCACCATCACCACCACCACCACCATCACTACCACCACCACCACC +AGCACCACCACCATCACTACCACCATCACCACCACAACCACCATCACCACCACCATCATC +AGAACCACCACCATCACCATCACTACCACCACCACCACCATCACCACCACCACCATCACC +ATCACTATCACCACCACCATCACCATCGTTACCATCACCATCACCATCACCACCACCACC +ACCATCACCATCACTATCACCACCACCATCACCATCGTTACCATCACCATCACCATCACC +ATCACCATCACCATCACCACACCTCCATCACTATGGTCGCTGTGACTGAGCCCATCCTCA +CTACTGTCATCAGCACTGTCACAGCACCATGCTGACAAGCCCACAGTGGGTTTGAGGGCT +GCCATTGCTTGGGACTAGTTCCTCAGGCACAGGAGGCTCCACGCTGGTCTGATTGCATTG +GAGGGGATGCCTGACAAAGGTCTTTATGGGGTGTGCATTGTGGAGTGGGGAGGGGGAGGC +TCAGGGAGGTCTCGAGGACTTCATGCCGTGGGCATTCTCCCCGGCCCCCTTCTGTCTCAG +CAGCTCCCCACGCTTCTTGACTGAGCTGGAGCAGAGGAAGCAGGCAGGGGTGGGAGGAGG +GGCAGGGGCAGGAGAAGGGGCAGGGGCAGCCACAACCACCTCCACTTAGGTTGGAGCACA +GCCTCCTGGCCTCCCTAAAGTGAAGGGCCACCCAGAGGAGGCACCTAACCATTCTCCCCT +CCGGACTCCACCCAGTGCTTGGATAGCCAGCGGGGCAGCTTGGGGTCGGGGCTGGGCACT +CAGCCAAAGGGGCTGGAGAGCATGCCGTGTGCAGAGCCTGCTCCCATGAGGTGGTGAGAG +CCTGCTGCACTGGTCTCCTGCCCCGAGCCTTGGCTTGCTCATCTATGAACGGGTGACGTG +GTGCCTGCTGCACAGCAGGCCACAAGAGCAGAGCGACAGGACACAGTGAGGGGCGGTAAG +GTGCTGCCTGGTAAGAGAGCTGTGGGCTGGCCTCAAGGGGTAGCCTGTGTCCTCTTGCTG +AGAATAGTGGGAAGCGAGCTGCGAGCTTGGGCCCACAGTGGTGGTCTTAGGCTGCCGTGG +TCACATAACGCCACCTCCCAGTGCCTGCGGCACCACCCCTGGGCTCCGCAGCCCCAGCCA +GCCGTCCATGTGGCCCATCCTGTCCTCTCCAGGCCTGTCTGTGTGCACCCAGTGGAGGGT +CTCCTGTCTCCTGTGGCTCCCTGAGGCTGCTGGTCACAGAGCTGATGAGGACACTAGAGA +CCGCTGCGGGGCTGGGCGGGGGCCCTGTGGGAGCGCAGCAGCCTGGGATCTGCAGCTGGG +GCTGACCTAGCACAGCAGGCCACTGATGCCACCTCTCAAGTGGCAGGGGCTTCACAGCCT +CCCACTGCAATCCCGGCAGGCCTCGGTGCCAGGGCGGCCCCATCCTGGGGGCCACTGAGG +ACCTGCATCGGCCACAGGGCTCGTGAGCATGGGCACCCCAGGGCTCAGTGGGGGCAGCTA +GAGGGAGGGTTCAGTCACCTAGGCCTGCCCAGAGCCCTGCTCAGGGCCCAGGATGAACTG +ATGGGGGCAACCTGCAGGCGGTGCCAGCGAGAGTGGGCGCTCCCCACAACAGGGGAGCCA +CAGCCAGCCTCACCGAGAGCAGGGAGGGAATGTGTGGAGATGCAGGAAACCTCTGCCACG +TCCAGGAAATATTATTAACAATTAAAAATTTTAAATCTTGGTAACATTCCCGATGGCCAG +ATAATTAATATAAGAGCAGATAATAATTACTCAATGATTAATAACTGCACTGAATAGTTT +TCAAAGTAATTGTGATAAAGAGAATGTTAAATGAAAACCCATCAAAGTTGCTGATGCCCT +GCACAGCGAGGGCACCTTAGTGCTGGGGGTGGCCATGTCGACTGGGCTGAGACTGGCCCC +CCGGGCCCACTGTGTCCTGGTGGCACGTATGGAGTGTGAGGACCACCTCCCCAACACACA +CGGGGTGAGGACCCTGCACGCACATGCAAAGGCAGGGGTCTCTCTTAGTATTGGGGCAGG +TGGGCAGAACCTGTCTCTCTTCCTGCCTCACTGGGATTGTGGATGACCATGGGGTTGGCT +CAGGAGTCTGCCCTGTGCCTGCAGATGTTCTGGATATCTGGGCTAGTGTTGGGGGGTATT +ATCTGACCCATGGACCAGTCTCTGGTTCTTCTATTGCCCCAGCCACTCCAGGCCTTTGGT +GAACCCCCTCTACCATTCAACTTGACCTGCCATGTTCCTGAGATCTAGCCCAGGGCTACT +CCATCCACCAATTCTCACTAAAGTGTGAGTGGTTGCTTGGCACTACCCTAATCTTTGCCG +CCTCATGTGTTTATTGGTGTGAAGCCTTCAGCTGCAGGGGGATGAAGAAGGTGATTCCAG +GCAGAGCAGGCCTGCTCCCTCCACCCTTAGGCTACTGACTCTGCAGTCCTGGACAGTGGC +TGGGAGCTACCAGGAGCCCAGGTTGGGGCAGGACCTGGGGACCTACTGCCCCTCACATAA +GCCAGTTTCTCCAATCCTGGTTCCTTGCCCTGTCTGCCATGCCTCCCAGCTGACACCACC +GTGCGGCTCCCTCACCTCATCCTCTGAACAGGACCATCCTGAGTTGTCACAGCTTCCAGA +CCCCCCGAAGCCACCTGGTCCCCTATCAGGCTGAGGCTGTCCAGACATGGCCTGCCACGA +GGCCCTGAGTGGATATGCTGGCCTGGACACCAAGGATGAGGGACCCTGGCACTTCGCATC +TCAGGGATGCCGGGTGCTCACCCGCAAAGACCCAGAGGAGAGAAGGGGAGAAAAAGTCTG +AAGGGCTCGTGACCACCCTGGGGTCCTGATGGGGCCAGGCACGGTGGCCATACTTGTAAC +CCTAGCAGTTTGGGAGGCTGAGATGGGAGGACTGCTGAAAGCCTGCCTGTGCAACAGGCA +TAGAAGGCCCTGTCTGTATGAAAAATAATTAGCTGGGTGTGGCAGCAAGTGCCTGTAATC +CCAGCTACTCAGGAGGTGAGCCCAGGAATTGGAGGCTGCAGTAAAAGTCATGATTGTGCC +ACTGCACTCCAGTCTAGGTGACAGAGTGAGAGCCAGTCTCAAAATAAAAAAATAAAAAAT +AAAAAAAAAGAAGGACTCCTGGATCACCCTGGGGCCGCGCAGGGATGGGTGGGCTCTGGA +GGCCACAACTCTATTCTAACCACAGTGGTAATGATGGCTTCGGGCAGGACCACTGATGGC +ACAGAAATCACCGGGTGAAAGGCTGATGCAGATGGGCCTGTGGCTGAGCTCAGCTTCCCC +ACAAGATCCTGCTTCCAGAAAGAAGAAGGCGTCCTCCTGGTGGAGAGCCCTCGGAGCTCA +GCCCATGAAGGAGGGACATGTGGTGCCCCTGCTGCGGCTGCCGCTCCCCAAGTCACCAGG +AAATGCAGACCACTGTCTGCGGCCACTGCATTGCTTCCCTTGTTCAGAGGTCGTCATCCC +ACAGTGATGGCAAAGATGGCAGTGACCACGGGGCAGGGGAGAGCCCAGTAGGCGATGGGC +CAGACAGGTGCGGGCCCGGGCAGGGCAGATGGGGCGCTTAAACTGCTCTTGCTGCTTCTT +TGAGAGTTAAATGTTTCAAGGCACAAGACTTTAAAAAGGAGGCACCTGGAGACATGAAGG +CCAAGGGTGACAGCCTCGTGGGGAGGGCCGTTGGGGGCCTTCCCGATGCTCGGCACAGAT +GGGCACTGTGGGCTATGGGCCGGGCAGGTGTGGTCCTGGCCTCAGGTTCCTGTCTCCCAC +TTCTGGGAGCTCTGCCTCCCTTGTGGCCTCTGTTTCAAGGTCATTTCTGGATGGGCAAAC +TGAGGCCCCAGTGGCTGGGGTTGTCCAAGGTCCTGAAGGCCAGTGTTGGAGCCTCCGGGC +ACTTGGCCCCTGCGGCCCCCACGCCGCTACCCCCAGCCCAGGCCCTCAGGTCCAGAACCT +CTTGCCCTCCAGCCTGGGGCCACTGTTCTGAGAGTGTGCCCCGAACGACACTCCCTCATT +AGCCGAGTCCCAGCTCTGTGTTCTGGGGTTGGAGCTCTGGGGCTGTTGTCCTCTGGGCCT +GGCTGCTCACTGAGGCTCCTCCAGGGGCCCTGCCACCCACCTTGTGTGCCTGCAGATGTC +CTTCCCATGCGAGCACCAGTGCCACAGCCCTGACCCCCCGCCCTGCCTACCCCATCATGG +CTCGCCACCAGCCCAGCTGAGCTGCTACCCCGAGCCCACCACTTCCCTAAGCCCCAGCTT +GCTCCTGGCACCTGGCATCAGCCTGGGAGGGGCTGATGATCTTGGGGGAGGATCCTGCGA +CAACTGCCGGGCCTGGGACAGTGATGGAAGTCCAGGCCTCCAGGGAGGCTAGGAGGCTGG +TCCCAGTCCCTGCTCAGCCATTAGGGCTGCCCCTCCCCATGTTGGCCTCAGTTTCCCTGT +GCGTAGCATGGGTTTGCGGTAAAGGTGATGCACTGCCATGGCAAGGTTTGTTATTGAGGG +TCCTTGGAGCGGGGCCACTGCAGCAGGAAGTCCCCGTGGGGCCCCCTGCGTGGGGTTGGC +CTGGCTCAGGAGATGGTCCCGGCTGCTATTTCATGTATCGTGAGGACACCGAATCCTACC +CCAAGGCCATGAGGGCAGAAGGCCAAGGCTGTGAGCCTTCCCTACTTGGAGACATTTGCC +CTGGGGAGCGGCTGTTCCCAGACCCTACATGCCTCCCCCACGAGTGATCTGCGGACTGGG +GAGGGCACTGCCTACATCTGCCCCAAAGGCAGTGGCAGAGGCTTTGTGGGATGAGGGTGG +TTTGTGCTCTCAGAGGCACTGGGCTCAGCCAGCGGCTGAGGTAGAGCTGGGGCCCTCCGG +GGCAAAACATCAAAAGTCTCTGAGCTCCTGTCCAGGGCTTGGATGAGTTGGGATTGGTGC +TGCCCCAGCACCCAGGATTCCCCAATGCACCTGCCACCTGGCAGTCCCTCCCCAGGGCTC +AGCACCCACCATGAGTCCTCAAGGATTTGGGTGATTCAGTGGCCTGCCTCGCTGGGCCTC +CCCCTTAATAATGCATTCTTGCTAATAACCTTCTTTCCTGTGTGTCATTAAACACATCCT +CCTGCTGGTGGGTAGTGGAGCAGCCGGTGGTGGCTGGGGAGGCCCGGGCCCCCAGGAGAG +AGGGAACCCTGGGAGGCTGGGGGGTAGGAGGGGACAGGGCTGGTGCACCACCTGAGAATA +GGTACAGGGGCTCCAGCTCTGGGCAGCATCATGAGACGCTTGTGGGGTCGGGGGCGGCAG +GGCAGAAAACAGCCTACAGGGGCCAGGCTGGGGGCTGCCCTCCTCGTCCTGTGGTCACTG +GGCTATGGGAATGGGAATGAGGGCAGGGCAGTGCTAGTGGCCCACAGCCCTGGGGGGCAG +CAGCCCAGGGATGGTTACAGGACAAGGTTTGCGTAAAACCAGGCTTGGAGGCCGGTGTGG +GGTGATCCCTCTTGGAGGTGAAGGAAGGGAGCAGTGTGGCAACGTCCTGGAAATCATGGT +CAGAGGGCCCAGCTGGGGCGTGTGGCCTGGAAAAGGCACTGCCCCCACCCCCACCAGCCA +GGGTCTTTCCCTTGCCCCTGCCCCCTCCTCAGGGCCCCGCCCTGTCAGCCCTGCCTGTGG +GTTTCAGCCCCATGGGCAGAGCATCAGACTTAGCCCGGGTGGAGGACATGGTCGGTCCTG +CGGGCAGGTCGGGGGAGTCCCAGGCCAGGGCCCAAGGCCACCCGGGGGCAGCCCTGCCCA +CGCCTCATCTGGTGGACCAGGGAGAAGTCCTCCATCAGGCATGGTTACAAAACTTCTCAG +ACTTAATTAAAGTCAATCTCCAGTTACTCAAATGCCATTGATTTGGATCTCGACTGATGG +GGAAAATATGATCATCATCATGGGAAACATCGATAGGAAGGCGCTGAGTACCTGGGCACA +GGGCCGATCTGGAGCCAACACAGTCACGGAGCCCCTGCTCTGGGCCCGACCACGGGGCCC +TCATGCAGAACCCTTGTCCAGGCCCTTCCGGAAGGGTCTGCCACTTGCTGCCTGGGTGTC +CCTGAATTCTCACCCCCTGGCTGAACCCAGGGGCCTCACAGAACCGGGATCCTGCAGGGC +TGATGGGAGGCTCAAGGGTGCGTGTCTTGGGGCCGGCAGCAGCCAGCGCTCTGTGGAGAC +CGTCTACCCTCGTCACGTGGCTGGGGTCCTCTTGATGTTGCACCGGGGACCCTGGGCTGG +CAGGACTAAGTCTAACTGGGGATCCAGGGCTCCTGCCTATCAGCCCCCAGTGAGTGAGTG +CAAGAGGGAAGGCCCTTTGGCCAGTGCTGCACCTTTTACTTGCGTGGGCTTTGAGGCTTG +GACCCAGCCCGAGGCCCATTGGGCAGCAGATGTCAGCACCTCTGTAGAGGGCAGCTGAGG +CCAGACTTCTCCATCCAGCCTCAGCAGAGAAGCAAGGGCGGAGAGCAGGTAGAACATCAA +GGCGGAAGGAGGTGATGCTGTTCGGGGGCTGGGGAGAAGTGCCTTCAACCAGAGCACCTG +AGGTTCTCTTCTGGCCTTGCAGCCTGGAGCCTGGTCTGGGGCTAGGAGGTGTCGCAGCTG +TGAGGCCCCAAGTGCAGGCCCAGGCAGAGGAGACCTGGAGGCAGCTAATGTGGGGGAGAG +TTGACTGGGGATCGTAATCTAGTTTTCTGTTTTTCCTTGGAAAAAGAAAGACAAAGTCCC +CAACTCACCTGCCCTTAAAATAAAAAGTCACAGGCTGGGTGCATTGCCTCATGCCTGGAA +TCCCAACACTTTGGGAGGCTGAGGCAAGTGGATTGCTTGAGCCCAGGAGTCGAAGACCAG +CCTGGGAAACATAGAGAGGGAAACCCCCTCCCTACAAAAAAATAGAAAAATTAGCCAGGC +ATGGTGTCGCATGCCTGTAGTCCCAGCTACCTGGGGGCCTGAAGTGGGAGGATCCTTTGA +GCTCTGGAGGTGGAGGCTTCAGCGAGCTGGGATTGCACCATTGCACTCTAGCCCGGGTGA +CAGAGTGAGACCCTGTCTCAAAAAACAAAACAAAACAAAAAAAACCACCAAAAACCAAAG +TCACATAAAATGTCTTCATAAATACAATAGAGGCAAACCTCATGTGATTTGTGTTCATGG +ACTTGGAGGGGCCAAAGGCCCAACAGAAGTGAGAGGAGGAGGGAGGGGAGGGGTGCAAGG +GCCAAGGGTAGAGGCCGGGAGGGGCCAGCATGGTGGCCTGAGTCCACTGCAGGTGGCACT +GCTGGCTTTGGTGTCCTCATTTCATCTCCTGTGACCTCTGCCCTTGGCATGTCCCCAGGT +ACCTCCTGTGGGCTCCTGGCTGGGGTGGGGTAGGAGGAGGAAGCTGAGGGTGGCCAGGGT +CTGTGTCCATCGAGGGGTGTCCAAATTCTAGTGGAGGGAGGGCTGGAGGGGAGTGGGGAG +GGGCCTTTGGCCAAAGCCAGGTGCTGGGGTGTGACGGATCTCCACGGTTGCTGGGACCTT +GTGGCCATTCCTGGGACATGGCTTTCACCTTAACAAGGGCCCTGGGTGTTCCAGGGGTCC +CCATCCCCTGCCCCTGGGGGCTCAGCACACAAAGCCCAGCCTGGGGCCTGGGCCATGGTA +GCCAGGGAGGTCAGGGTGTGAGGCCTTGGTGGCCTCCAGGTGGGCGGTGAGCCAGGGGGT +GTGGACATGTGGACAGTGCCGGGCCTGGGTCATGCACTTCGTCAACCCTTCACGCCTAGA +CCACAGCCTCTCTGAGCAGGGCCCAGCTTGCGTTGTGCTGAGTCTTTTGTTACTTTAATT +AATGCCCAGCTTCCTGACGCTTAATTAAACTGGAATTGCTCCATGTCCAATAGCCAAGCA +TCCCATTAGATACAAGTTAATGCAACTGTCGCAGTGGCCATGAAATGAGGCTGGTGCGTC +CCCCAGGGAGGGATGCAGAATGCATCTTGGAGGACAGCATGGCCTGACCGGGCAGCTGCA +CACAGGGCCAAACTGAGCCCGTACTGGACAGCCAGCCCTCAGCCCACCCAGTACTTCCAC +AAGACCCTTCCCAGGCCCCTCGAGGAGGACTCCCCATGGGGGCCCTGGCCTCCAGCTCCT +CTGCACGCCCGAGGGCAGCCTGCTTGCTTCCTGCTGAGGTTTGCTGTTCCAGCTCCATGA +GAAACTCATGCTAGTGTATCAACATTTGGAAAATGCAGAAAAATTAGAGAAAGGAAAGTA +GGGACTCCCCACCCCAGAAAGGTGCCATCCTCCCATGGGCCTGCGTTTTGGGCTCCGTAG +GGGCTACAAGGTGCTCTCTGGGGCCTGCAGTGGGGGCCGGGGCAGGAGCTCTGGGAAGGC +GTGGGGGCAGAGGCCACATTGTGAGGAGCCATTGGTTCTTCAGGAGCCGGAAGTTCTAGG +ATGGGGGGCAGGGCTGGGAGCCAGCGGGGGTGAGGCAGGCTGGGGATGCTGGGGTGATAC +CTTGCACCCACCAATCTCCAGTGCCAGCCTCTCACCCTGATGGCCCCGTGTCCAAGGCAG +CCTCACGGTGCCCTCTGCCAGTGGCCAAAGGCATTGTGCCTGGTTGGTCCATCACCCCAG +AGTCTTTTAAAATTAAATTAAATTAAATTAAATTATAATTAATTTTAGAGATAGGGTCTC +GATCTGTCACCTAGGCTGAAGAGCAGTGGCACCATCACAGTTCACTGCAGGCTTGAACGC +CCACGCTCAAGTGATCTTCCCACCTCAGCCTCCCAAGTAACTGGGACTGCAGGTGTGCAC +CACCACTCCCGGCTAATATTTAAACACTTATTTATTTATTTATTTATTTATTTGTTTTTA +GATGGGGTCTTGCTCTGTCACCCAGACTGGAGTGCAATTGTGTCATCACAGCTCACTGCA +ACCTTGAACTCTTGGGCTCAAGTGATCCTCCAGCATCAACCTCCCAAGTAGCTGGGACTA +TAGGTGCAGGCCACCACACCTGGCTAGTTTTTAAAGTTTTTTGTAGCGAGGGGCTTTCAC +TATGTTGCCTAGACTGGTCTCGAATTCCTGGCCTCAAGCGATCCTCCCACCTTGGCTTCT +CAAAGTGCTGGGATCACAGGCCTGGGCCACCACACCCGGCCAGCCCAGACTCTTGGTTCA +ATCCCCTAGAGGGCTTGAGTGGGGCAAGGAGGTGGCAAAGTGGGCAGCAAGGGGGCCGCC +CTGGACAGGTCTCCCTGTCCCCCTACCCGAGGGTACTGAGCGGCCTCTGCATTGGGCTCC +CAGGCTGGGCTGGGTGGGTCTGAGGCAGGAGCAGCCTCCCCCTGGAGGCCAGCTCAGGTG +GCAGAGGCCCGGTCTGCCCCGGGTCTGCGGAGCCGAGGGCAGGTCTCTGGTGCGTAGGTT +GCTGGGCCTGGCTCTCTCTCAAGCTTGTTTGCTGTTTGGGTCCTCCTCTTTGGGGCCAGG +GCCACACGCACCTCTGCGACATTAGCTTGGTGCCTGTGAGGGGCTGGGTATCTCAGGAGA +TCGCAGCAGGTCCTAGCACTACCCTGTTGCATTCTTCAAGGACTCCCCGCAGCCCAGTGT +CTGTGGAGTGGGGCGTGAGGTGCTGCCTCCCATGTTGCTGCTTAGAAGGACGCAGCCCTG +GAAACCCTCACTGTGGAGTCTCTGAGCCCCTCATCCGCAGAGCAGCAGTTGCTGCTGTTT +GCACAGACAGCAAATCCGGGGCATGTTTGTTTAGTAAGATTACCTCTGCTGGGGCCTGGC +CCAAGGGACAGGCCAGGCACAGAGGGGTGGGGGCTCTGTGCATACTGAACACCCCCCCGA +GAGTGGCTCTGTCTACAGCCGGACCACTCTGGAAGTGACGGCACTGCGGGCCTGGAGGGC +TGGGGCAGTCCAATTGGGCACCATGCCCTCACAAACAACACACACACCAGCGGTGGGGTT +GCGAGGGGGCGCTGAGGGAGGATCCTGGCTGCCCTGCTTAAGCCCTGCAGCCTGTGGGGC +TGTGTCCACCCGGGTCGCATGTTGCTTCTACCTGCACGGTGGCCCCACCACACAAGGTGC +CGTCCTGCCCTCCAGTCAACTTCCTGGGGCTCTGAGCAGCCTGTGGTGGCACCTCACCTG +GGCAGAGGGTGGGAGGAGCAGCGCCCTTCCTTCTGTGCAGGGAGTCTGATCAGGGTTCTG +TCTCACTGTGGGACAGTGGGGGCCGGGGAAGAGCGGCAGGTACCAAGTGCAGACAGCAGA +TGTAGATGCATGCGGGGGTCCGGGCTGGGGCCAAGAACACAGCCCTGGAGTCAGGGACAC +CGGCTTCATGTCCTCACTCTGCCATTCAAGAGCTGGGGGACCTCGGGCAGGTGGTGTAGG +AACAGGGAGGAGTCTGCCTGCAGCACGCAGAGGAGGGCTGTGCTGGTCAGGGGGTGTTTT +GGGGAGAGGACCTCAAAGGGAGGTTGACTTTGGGAAAGGGGTCAGAGTACAGAAAAAGTG +GGTCCAAATCCTGCTTCAGCACTGAGACCCTGCCTCGGCAGGGGTCTCCAAGCTTGGCTC +CACCCCCTGAGGGGCCAGTTACTGCATACCTCGCCCACCCAGGAGCTGGGGGCCCTGGTC +CTGTTTCCCAGGGAGCTGTTGGCACTTGTGGGCTCCTGGCTGGGGTCCAGGTGGGCTCTG +CTCCTCTGCCCTGCCCAGCGCTTTGTACTGAGCCCATCCTGCAGAGACTCAACGGGGCCA +AACGCACATTCCTTGACCTTCTGTGAGTCTGTCAGTCTTCATCCCTCCCTGGACCTAGGA +AGGCCTGGACAGCTTTGACCATTGGTGTTGGGTGGACATGGCGCTGCGCTGCACCCCAGG +CCCTCAGAGCCCAGCTGTTCTGCTTTCTGCTTTTGGTCCTGAGCTGCCTTGAAGGAATCT +GACCAGCAGGGAGAGCCCACGGAGCGGGAGGGCCTGTGGGGCTCAGCCCTGCAGCCGGTC +TGCCGAGGCCAGGCCTGGGGGCACAGCTGGGGCTGAGTACACTGAATGCCCCCTCGATGC +CATGCACAGCAGAAGAGCTGCCCACACCGCCCTGCTGGGCCCTGCCCGGGCTCCAGACCC +ACAGACCAAGCACACAAGAGACTGGGCTGCTTTGTCATGCAGCAGCAGGTAGCCGGACAC +ATGTTTCCCCATTCCCCTGCCATCCTCTGAAATCCTGTGGGGAGAAAGCATGTGAGGTCC +AGTGGGGGTTGGGGTGAGGGTCTGACTGGGGGCTTTCTAACCCACGTCTGAGTCCCATCC +GGCGCCAGTGCCACAGTGGCGGGTGACTGGTGCTGCTCACACTGGCAAGCTCCTACCTAG +ACTTTGGAGCCCAACTCCACAAGGCCCCTGAACTACCCCTGTAGGCACCGCAGCCCCCTT +GCCTTCCTCTGCTACGACATCAACCACTCTGATGGTATTTGTGGTTTGATGTGTTTAAGT +AAAAGGCTTGTTGCAGAGTTTAATTGACCCCATATGTCTGCCTGAGGAGGCCGTGAGCTG +CAGAGGGCAGGTGTGAGCTGGGCTCTTTCCTCAGTCCCCATGTCTTGCTGGGTACTCACA +GGGCACACACGAGCACGTGCGGCAGGGGCAGGGGCCAGCGGGGGATGGGGAGGCTTGGCG +CCCACGGCCCCGCTGCCCCATGTTTCCACTGCAGCCGAGGAGAAAACGAGGACTTTACGG +CTGTCACAAACATTTCATCTTGTGCATTTTACAGTCCACGACTGCTGTGAACACTGGGGC +TCCTGTCGGAGCTAGAAGATTCATAAAATGCATGTGAATTATTTTATTTTATTTTATTGT +TTTGAGATGGAGTCTCACTCTGTCACCCAGGCTGGAGTGCAGTGGCGCGATCTTGGCTCA +CTGTAACCTCCACCTCCTGGGTTCAAGCAATTCTCCTGCCTCAGCCTCCCAAGTAGCTGG +GACTATAGGCGTGCGCCACCACGCCTGGCTAATTTTTTTGTATTTTTAGTAGAGACGGGG +TTTCGCCATGTTAGCCAGACTGGTCTCAAACTCCTGACCTCAAGTGATCCTCCCACCTCG +GCCTCCCAAAATGCTGGGATCACAGGTGTGAGCCACTGCGCCTGGCCAAATGCATGTGAA +TTTTTATGTTCAATTATTCCAGGTTCCCTGCTCGGGGCCCATGGGCTTCCCCATGTACAA +CCCACCGGTAGGTCCCCGCCCAGTCCATGCTCCTCAAAGGCCAGCCTCACACCCATCCCA +CGTCCTCTTGGCTGGTAAACACTCTCCCCCTTCTTCCTTCTCCCTCCACATCTCAACTGC +TGGAAGGGAAGAGCTCCTCAGTCTACTTGGAGTCCCTCCTGCACAAGAGGATCCAGGGTG +GTCCAGGAGGGCCAAACCCACCTCTTTGGGTGCACCCCTGGGCAGTGGATTGGGGATCCA +GGGCTTGTGGGGGTATGCACCCACCAGGGCAGGTCCCTCAGGCTCACTCTCCCTGCGCCT +CAGTTTCCTCTCCTGAAAAATGGGCTTGTGACTAAATGTTGCAGGGAGCTGCTGAGAAAA +CGCCTGCTGAAAGCTTGGCAGGAGGTCATTGTGACCAGGGATGGGGCTGGGTGCTTCGGT +CAGGCCAGGGCTGGAGGCAGGGGCTCACACTGGGGTCAGCAGTGCAGGGGCGTGGCTGGC +GTGGCAGGCGCATGGGGATTGGCTCATCAGTTGCGGCCTCACGTCCCTTCGTTATGGCGC +CTGCAGAAGATGTTCTCTGATTAGTCTGCTGAATTATGAATTAGGTTCCGGGTCCCTGTG +CAGCTGTTCCCGGCTTGTCATCCCCACCACTTTATTTCCCCATCTGTTTATAAAACCAGG +AGCGCCGTCCTGCAATAAACACTTCAAACGCTTTTTTTAGGGGCTAATTAATTGTGTTGC +ATTTCTTCGACACAGCATGCAGGCCGGACCTGCCCCGAATTCCCAATGCGGGTGTGGCTC +CTGCTGAGCGCCTGGGGAGGGAGCCAGGCCTGGAGCCTGGGGGGCTGCATGTGGCCGCCC +CAAGTTTGTTTTCTCCAAGGCAGCCCTCCACCTTTATGTGGCGGGCTGGGCTGTCTCAGG +AATGCGCCGGCCATGTCAGGTTCCCCCTAGATTCCCCCTCCGCACCCTTTGGCTGAAGTC +CTCCTGCTCGGACCATAGGACACTCTTGAAATTATCTGTGGCACATCCAAGGTCAAGGTC +ACGCCCTGCTGCCAGCTACAGTGCTGACCCTGAGGGCTCCACCTCCCAGGCCCACTGAGC +ATCACGCTGTCTTCCCTGCCTTGCCTGCCCAGAGCACCTCTGCCCTCCAGGACCCCCTCA +GCGAGTGCGGTGGGCGAGTGCAGTGGGTGGGTGGCCCATCTAGGCAAGCCAATCTCTCCT +CGGATCCGTGCACCTCTTTGGAGAAGGCCTCTGGGAGCCCCAGCCCTGCTGTGCACCCTG +AGGCCTCACTCCGTCTGAGGAACCCAGAGAGCATCGCGGAGGAGGCTTAAAACAATGACA +ATAAGAAAACAAAATCACAATCACCAAGTCACGAAACAGCTCAGAGGCAATTGAGTTAGG +CAGCCCCCTGCCAGTGCCCAATGCCGGTAAAGTGGCTCTTTAGGAACTGAGTCCTAATGT +GGGCTGCGCAGCGGGTCAGAGGAGGTGTTCAGCCTGTGGCCAGGCATTGGGGCCCCAAGC +CAGTCCTCCACTGCGGACAGAACCTTGCCCAGCTGCAGCTGGAGCCTGAAGGCATCCACT +GCGTGCATCTGGAGGGGTCGGGGGTTCTTTGTCCCCAAGGCCACTGTGGCTCCTGTGGGC +CACCGTCATGTCCACTGGGACTGGTGGGAAACTGGGTCTGGGGACTTGTACCCTCCGCTT +CCTCATGTCCTCATCCTCAGGTGCCCTCCCAGCCCTTCAGGGGTCCTGTTTTCCAGCTGT +GGGCAGGATGTTGCTCCTCTGCCCTGTCTGTCCACGACTCAGGGTGCCAAGGCTGGTGGA +GCCTCCCTGCAGCCCCAAGGGAGGAGGCTGGGGCCAGAGACGCCCTGGGGTGGGGCACGT +CTCTTCCCAGTGAACCTCAGATCCCTCGGGCGCCCACGCTCCCCACTGAGGGCCTGGGGC +CACAGCACTCTGCCTGTGAGGGGTGGGCCCTTGGAGGCTTGGGGACATGCGCGCCCACAG +GCACGTGGGGAGAGCCTGCTGAGCGCTGTCTCAGTGATGGTTCCCCACACTGGCCCCTGA +CATGGGGCTTGGTGTGGCAGCTTGTTGGGGAGGTGGTGCGTGGAGCAGGATGGGGACCGG +GAGGTGGTTGCTGTGGGCTCTGCGGGGCGTGAGGGCCGGTGTTTGCCCCAAGCTCAGTGC +CTGCCTGTGACCACACCAGCTCATAAGTGACCTCTGGGTGGCCCAGGGGTGAGCGGAGAG +GGGACCTGAGCAGGCAACACGGGCCTGGAGAGGAGCTTTGTCTCTCTAGGAGAAGGCAGC +CTGTCCTGCCAGACCTGGTATCACGCCACCCCAAGGCAGGCCCCGCAGAGCCCTCGGGAG +CTCCACTCGGAGATGGAGACGCGCTTCCCTGTGTCGGGGCTTCGCTCAGAACATTTTTAT +GGGCTGTTAATTGTTATGCTTTCATTTAATTCTGGGTGTCATTCCATGTCAGCCAAGCCC +GACAGGCTGGGATTTATGCTTCCAAATTATGGGGCTCGGGCTGTTCAGAGGTAAAACAAT +TACCCATCAATCAGCCACCAACCGGCTTGTGCTGGGGCTGGCAGCCCCATTAGCGGGCGT +GGGCAGCCATGGCCCAGCAAGACCCCTTGAGGGACAGGACTGCCTTCGAGGGCAAGGCAG +TCTGGGTGAAGTCTCGCCCCAGGTTGGCCCTGTGCTGTGGCAGGGCGCAGCTCTCTGGGG +AGGCGGCCGCTCAGGGAAGGACCCCAGGGTCCACATTCACACCCAACTGGCTCCAGTGCT +GCAGCCCTCGGCCTCCCATATGATGGTGTGACCCTAGGTGGGCTGAGGGTGACCCGCCGG +AAGACGCTGCAGCATCAAGGTGGGGGGACCATGGGTCCCATATGCTGGGGCCTGGGGAGC +CTTGGCCAGCCATGGTCACTCAGTGGGAGGGGCAGGGTCACCTGCCTCCTGAGCACCCCA +TCCCCAGGAGGGAGGGACCAGGTGTGACCCACTTCACAGGGAGGGAAGCCTTGGCTGTCA +GAGGGGACACTGCGTCTCTCACTGGGAGGACCTCCCGGCCTGGGCTTGAGGCCCACCTGG +ATCTGTGCAAACGTAGATGTGGGTAGGGTGGGGCAGGGGAGGGGAGGTGGGCAGGGGAGG +TGGAGTGGGCTGGCTGTCAGCTCAGGAGTGGGGGTCGGGGAACCTGGGCCACAGAGCGGA +TCCTGTCCCACCTTGACCCTGGTATGGTTGAGTCGGCGGCTAAGCCAGGCCCCGTGGTCC +TTCACCCAGGGAGAGCCTGTGCCAGGCCAGGCACTGCCAGGCCAGCACACAGCCTTGACT +GAGCGCCTCCCCGTCTCAAGTTGTCTCCTGGATGCCCGGCTGGCTGCTCATGGGGACAGG +GGCAGGATGGGACAGCGAGAAGAGGTGGGCTCCCCCAGCCCCAGTGACCCTCCAGGCCAG +GGATTGGCTTCTCTCCCTGCAGAGATCGCCCTTCCCTGCCCCTCTGCCCAGCTGCCCTGG +GACTTGTGTCCCAGCCCTGCCCAGCATTCATCCTGGGTGATCTGCACGTGGGCATGGCGC +CTCACCGGGTCAGGGTGGTGTCCAGGTGAGCAGGCGGAGGACTCTCCTCACTGCCCAGCA +GTGTCCTGCTCCCCCAGCTGCTCTGACAGCCAAACTGGTCCCCAGCTGGGGCATCTCACG +CCCAGACCTGCTGCCCGTTGGTGCAGTGCAGCCCCCGCCAATGCAGACACTGTACCTCGG +TCTCCGGGACCCCCTCTGCCCCTGCAATTCGACGCTCTGCTCCTGCCCTGCTGGGATCCC +ACGTCCCTGGACCCTCCCTATGGCAGTCTCGCCCCGCGACCCTGCTACAGGCCTGCCCTG +CCACCCCCGGGCACACTCCCTCATCCAGAGTCTGTGCAGTCACATGTGGACCCCTGAGGC +CGGCCTGCGGCTCCCTGCTCACGGGCGGCTAAAAGGGGGCTTTCCAGTCACGCCCTGCCC +TGGCCTTCTGCCTAAGGCCCTGGCCCTCATGGGCCAGGCATGGTCCTCAGGCCCTTGGCT +CTGCCAAGGCTCTGAGACCCCACCCTAGAGAGACCCTGGCCCACCCAGCCTGGCCCTGCA +GCAGTGCCCCAAGTGCCCCCGAGCCCGGCCTGGCTGCTGCCAGGGTGACCACCAGGGGCA +GTAGTCAGATAGAGGGCGGTGGCCGGGCCCAGTGTGTCTGGGTGACCGCAGGGCCTGAAG +GTGTGGTGGCCCACCCGGGTCAGAGTGAAACTGAACTCCTAGGTCAGGGTCGGGTCCTCC +CAGGGTCGTGTCCTCTGGAACTTGGGGCGAGTCACTACGCCTCAGTGTCATCGTGGGCAG +AACCTTCCACATGTGGCATTGAAATCTTTCAAATTGAATAGGAAAAGCCCCACGTTTTCT +GAGCCCTATATATAGACCAGTACTGTCCTAAGTGTATTCTAGGGAAAGACAGTGGCCAGC +CCTGTAGCTCAGTTATTTCTGTTTAAAGTGTGAACGGGCTGGGTGCAGTGCTTGAAAATC +AAGCATGAAGTGAAACCCAGTGGGCCGTGCATGGCTCTGACACCCATGAGTCATTCACAC +GATGCCACTTGCTCATTGCTGTCCCACCCCAAATGCAGCACTGGTGGGCGAGACCCTCTG +GGCATATTACCTGCGGGATGTGGCTCCAGCGCCTCACTTCCAGCTCACTGCTGCCTCAAA +CTCCTGGACTCAAGGAATCCTCCTGCCTCAGCCTTCTGAATAGCTGGGACTACAGGCATG +TGCCGCCACACCCGGTGAATTTTTTTTTTGTAGAGAGGGGTTCTCTCTATGTTGCCCAGG +CTGGTCTCAAATTCCTGTGCTCAAGCGATCCTCCCACCTTGGCCTCCCAAAGTGCTGGGA +TTACAGGTGTGAGCCACTGCACCCAGCCTCTTCACCCTTTAAACAGAAACAACAACTGAA +CATACAGGGCAGACCAGTGTCTGTCCCCAGTGCCTGTGCTGGGGGTGACTTTCCAGCTGG +GGCTCAGCATTGCGAGTGGGAAGACCTCCTGGGGCCAACATGGCTTGGATTGTGAGGGCA +CGTGGGTCGCCGTGGGGCCAGAGTCCAGCAGTGGCAGAGCTGCACAGCAGTGGGGAGGGG +CAGGGCAGGGCAGGGAGAGGGCCCCGGCTGGCCCGGCTGGCCCAGCCTGCCCAGGGCTGC +ACGGTTTCCATCCCTGTCCCCAGCTGCAGTGGGACACCTGCCATGGGGGAGGCTGGGGGA +GGCCTGGGGTGGCGCCTTGGGCTCTCAGCCAGGAGTTCGCTCTCTCCAGGTCAGGCTGGC +AGAGCAGCCCCAGGGCCCCTGCACCCCCACTGCCCATGCCCACCTTGGGCTCTCTGATGC +CCGGCAGGGCCTCTGGGCTTCTCCCTCCCAAGCTTGGGTGGCAACGCTGGCATTTGGAGC +CTGGGGAGCTGCTGACAGTGCTGGTCCTGCCCGCGAAGGAGGCGGTGCCAGGCTGTTTGG +AAATGTCAGGCTGATGGGTGGTGGCACTGCCCAGCACCCCACCCCCGCTCCCACCCCAAG +AGGCTGGACACGGGCTGGGGCCCCGGTGATGGAGCCAGGCCGGGAGCCAGGAGATGCTGC +TGAGCAGGGATAGCTGGCTGAGGGCTCCTGGGCGGCAGGGGATCCGAGGCTGATGGACAA +GGGGCCTCTCCTTGCCACCGCAGCTGCGGCCTTGGTGGGCCGGTCGATGCGGAAGAGTGA +TGGTGCACGTGGGGCCGTGGGGTGTCGGAGGGAGGTTTATTGATGTCGGCTGATTTCCCT +GGGGGCCCCTTGCTGGCAGCAGCCACCATGCTCCCCGAGAGCTAACCGTATCAGGCCGGA +AGGGTGGTGGGCAGTGTGATGGATGGACTCCGGCTGTCGGCTGGCTGGGCCTGGATGGAG +CCCCGAGATGGGCACACTGGGCCGTGACAAGAGTGCCGTGGGGTCCCTGGGGGAGCCCAA +AGAGGCCTCTGAGAGGGTCTGTGCTGGGAGGTAGACCTGAGAGAGGGGAGACACCATCCC +AGGGTGGGCACAGGCATGGGGCAGAGGGCACGTGACGCCAACCTAGGGCCCCAGGGGGTG +GGACGGAGGGGCCTGGTTCTGGGGCATTGGGAGAGGCCTGGGACTCTGGACCTGGTCTGT +CTTTCTTGTCCCCAGTGTGAGCACCAGCCCTTGCAGCTGAGCCCTGGGAGGCGCCTCCTG +CCCACGCAGCAAAGCTCAGAGCTCTGTGGCCTCCTCTGGGCTCCTGCACAGCGGGTTTGC +TCCCAGCTGCTCCCCTGACACGGACACCTTCAAGGGCCACGTGACTGCCATCCCCAGGCC +TTGGCCCTCGATGAGGAAGGGCCCTTCTCAACCTGTGGCTCTGAAATAGGTCTCATGATG +GTGCCAGTTTAATGACAGGAGCAGTGGCTAATCCAGTGGTCATTATTGCATGGGCTGTCA +TGCCCTTCTCTATCCGTGCTCCCTGAAGAGGCGAGGGTCCACTGCCGGCTGCCACACAGA +GAAGCATGGCCACAGAAGGAGGCTCCTCTGCTCTGAGTGGGATGTTGGCTCCTCGGGCCA +CACTGGGCTGGAGGACAAGGGGAGGCAGGGGTGGGAGGGCCCTGGGCTGGGGTGGATGGG +GTGGGCCTCACAGATGGCATGGCCCTGTGGACAGCGAGTGGCTTTTCCTCCCCAGGTCTC +GTCCTGAGCCCTGCTCCTCCTCCTGGCCTCCCCTGGCCCCACACCTCTCCCACTGGCTTC +CTTTCCTGGCCTCTTTCTCTCTGCTCCTTGTCTCCATCTCTCATTCCTCTGGGCCTTGCT +TCACGGAGTCACCAGCCCGTGTGACAATGTCCACTTTCCCTGGCCTCATGTCTCCAACTT +AGTGTGGGACCTGCCTTTCCAGGCTCTTTGGCCACATGAGATCATGGAAACACATGGCTG +CTGTACCCACGCTGACCTGGCTCAGCCCTGGCTCATGATGCTTCCTGTGCACCCTGGTCC +CATCTGTACCTCAGTTTACCCATCTGCCACACAGGGCTGAGGGCATCTGCCTTGTGTGTG +GCGGTCAGGCTGGCCCAGTGACCGCCGAGCCATTTTGGCTTCCTCAGGTCTGGCCACTGT +GAAGGACATGGCTGTGCCCCCCGTCTGCCCTGTGTGGCCCTGGGCACATTAACCCCTTCC +ATAAGCCTGGTCTGCAGACGAGCGCCCACCGGGCTGGCAGTGCCTGAAGAGTCTCGGGCG +ACGTCCACTCACCTCCCAATCTGGTCCCAGCTCTGCTCAAACTCAGACTGGAGGATCCCC +CAGGGCAGTGACTCCAGTTCAGAGTCTAGCTGACTGCGTCCCTGGCAGCCAGGCACGGCC +CAAGGTCCAGAGCTGGAGACTGACCCCCCTGTGTAGGACCTCTGGGAAGTCCCCAGGACC +CAAGACGGCTCAGGGCTAGTGAGGTAGGGACAGGTGGCTATTTCAAGCCCCCAGCGGGCT +TCCTGCCTGGGGCACCTGGGCACAGTGGAGAAAGATGGCTCCCGGCAGAACACATGCCCC +CATCCTTCGGGGCCAGATGCCTTGGGTTGGGCACGGAGCAGGGCCCCCAGCAGACACAGA +GGCCAGGGTGGGAAGGGCCAGGGCACCGCGCAGTCTCTTGCCCTGTGTCAGGGCTCCAGG +CCTATGGTGGGGCTGGGCCTACCCACCCTGTCACTTTCCCCATACGGCTTTGGGGTCCCA +GGGGTTCAGGGGCCCCTCCTGCCTGACCCAGCCTTAACTTTTTGTTGTTTCTTCTCATTA +CAAAAGAAGCATGTTCAAAAGAACCCCACAGTCCTCCATGAACCCCACAGTACCCCGTGA +ACTCCAGAGTCCTCCATGAACCCTACAGACTCTCATGCACCCCACAGTCCCCCATGAACC +ACATAGTCCCCTATGTACCCCACAAGCTCCCATGTACCCTATAGTCCCCGATGCACCCCA +CAGTCCCCTATGTACCCCACAAGCCCCCATGCATCCCACAGGCCCTTATGTACCCCACAG +GCCCCCATGCACCCCACAGTCCCACATGCACCCCACAGGCTCCCATGAACCCCACAGGCC +CCTGTGCACCCCACAGTCCCACATGCACCCCACAGGCCCCCATGAACCCCACAGGCCCCC +GTGCACCCCACAGTCCCCTATGTACCCCACAGGCCCCCATGTACCCCACAGTCCCCTATG +TACCTACCCCACAGGCCCCCATGCACCGCACAGGCCCCCATGAACCCCACAGTCCCTCAC +AGCCCCCATGCGGGGAAAGGGCCTCACTTCTGGCCGCTGACTGACTTGCCAGTTTCCTGG +AGTTCTTTATCCAACTTTCAAGGTTGCTGAGTCCCGGGAACACCTCAGAAACCTCACGAA +ACAGTGACAGGTTGTCAATCAAGCAGCAAGGTTGACATGTTTGAGCAACGTTCAGCCTGG +GAGGCGCTGGGGCCAATTAGCAGAGACTCCCAGGTAGCAGAGCCCCCGGGCTCATGGCTG +GGGGGTGGATGGTCAGCCCTGACGCTGCTGCCCCCACCCGAGGATTTCTCCCCAGCCATG +GACTTGGGGCTGAGGGATTCCTGTACCCTCCCGGCCACCTGGACAGGGTCCCCGCCGTGG +CCCGGCCTAGGGCGTTGGCTCAGACCACACCTGCCCAAGGATGGAGCCTGACACCCAGGA +AGCAGCTCCTTGCAGCGCCCTGGGCCTCCGTGGGGCCCCTCTTGGCAGAGTGACCTGGGG +ACCAACCCCCTTCTGTGAGGCAGGTGAGGATGCTGCCTTGTGGGCTGAAGGCCAAGGGGA +AGAGGCATACTGGGGCCACTCAGGCTGAGCAGTGGTCACAGCGGCCACCCCTCCCTCTCC +AAGGCTCTGGCGACCACTGTTCTGTTTAGCCCCTACCCCTCCCCACCTTTTAAAAAGACA +GGGTCTCGCTCTGTCACCCAGGCTGGAGTGCAGTGACACAATCAGAGCTCACTGCAGCCT +CCAACTCCTGGGCTCAAGCGATCCTCTTGCTTAAGCCTCCGAGTAGCTGGGACAACAGGT +GTGTGCCACCATGCTCAGATAATTTTTTTAAAATTATTTTTTGTAGAGATGGCGGTCTCA +CTTTGTTCCTCAGGCTGGTCTCAAACTCCTGGCCTCAGGCAATCCTCCCTTCTTGGCCTC +CCAAAGCCCTGGGATTATGGGAGCCAGGTTCCCAAACACTCTCTGCTCTTCCCTCCACCT +GGCATCTCCCTCCCCAAAGCTCTGCCCCATCCTGAAGTCCCCACTGCAGATGCTGCCCCT +CCCAGAAGGTAGCTGGGCATGGGCTCTGCAGCTGCAGAGTTATGGACTGGGCCTCCCTGG +GCAGCTTCCTGGGAGCTGCACCATGCACGGGTCACGGGACCCTCCTGGGTAGGTTGAACT +GGGAGGCGACAGAGACCCTTCTAAAGTCCCTATGCCGGGTGCTGGGCTGTAGTCACATGG +TGCCCTCCCCAGGAATCCCTGAGGGGTGGGTGCGTCTCTCTCAGCAGCACCAGCAAGGAC +CAGCGGGGAGGGGTGAGCATCTTTGTTCTGTCCAAGAGCCTCTTTGGAGCACGAGCACTT +GCCACATGCAGGAGAGAGCCATGTTCACTTTCCTTTTATGGCTGAGTTTGAGATGTCTGG +ATAGAGAGTTATCAGCCAAGGCACTGATAAGGTCTGCGGGGGGAATATACTGTAAAGGGC +TGAGCGTGGAGTGGGGGCTACCTCTCCTGCTCGGGCTGGTGTTGGGGGTTGGGGTGGGCT +GGGTGAAGCTGCCACTCCTGGCCTGGATATATGGAGCCCTGGGCAGTCCTGCCTGTCCCA +TGGCTTTGCTGGACACAACAAGAACTGCCAGATGTCATTTGTGTGGGAATCATTTTGCTG +TTTGAGCGAAGTTGGATTTGTTAGTGTGCTCACCAGTGTGGATGGATGAGGTCTGACCAG +GAGGCTGCCTGTTGTCTGTCTTTCACAGAGGGCCCTCCACACCCAGGGTAGAGCTGGGGA +GAAGGAGTCCTGGGGAAATACTGAGTTAAACAAGGCTGATGTATCCTCTGCAGGACTTCT +CAGTGCCTTTAGCATGGCGGGTGTAGGTGAGTCTAAGAGATGGCCAGGTAGCCTAGGTGT +GGATATCACCAGGCTAGGGCTGCTGTCTGTCTCCTCTGCTCACGCTCCTGTGCAGGGAGG +GACTTGAGGGTGGGCCTGGGGAAACTGTGGAAAGTAGGAGTCTCTGTCCCCAGACACTGT +CTCTGAACTGGGGCCAAGATTCAGTCATTTCATGTCCCCACTAGGCAGTGCTTACAGATG +TTGCCCAGTATCAGCTGCATTGATTAAAGGAAGAGCTTTCTGACCATCAGGTGGGGGCGC +TCTCCTGGTGAGTGGGGCTGGGAGGCAGAGCCCTGTCCTCCGCGGTGTGACTGGGACGTA +ATCCCCTCCTGGGGCTGAGTGGAGCAGGGGCTTGCGCCTCCCGCAGGCCTAGCTGTGTGG +GATCTCATGGCACTCTGCTTTTATTCTGTTTGCTCCAATACCTTCTGCTTGCCACCTGCG +ATGCTTATTTTCCATTTCCAGTGGTGCTGTAAGACTTCCTTTCAAAATATAATCACTTAA +GTTAAAAAAGCCCATTCGAAGAAAGCCCTCGAGTAAACAATAGTGGAGCAGGTGGGCAGG +TGGAGCTGGCCCTTGGCCGGTGTGTGGGCTGGGGGCAAGTGGGAAGGGACAACGTGGGAA +GGAAAGGGGAGCAGGGGGTGTGGGGACCCTCCCTGGGAAACTCCCAGCCCGCCCTGGCCT +CTGTTACTGCCTGGCCCTGCTGAGCTCCCTGGGCTGCTGTCTGGTGGCCAACACTGGCGT +TGGCTGCCCACATGCCATGGGGTCTGCACACAGCAGGGGCTGGGGGACAGCGTCACAGGA +GGAGGAAGGCCACCTGTCACAGGAGAAGGAAGGGCACACAGAGGCTTGTCTGAGCCCCAC +GTGAGGATTAACAGAGGGGATGCATTGCAGTAAAGAGCACGAAGCTGCTGTGACCTTTGC +CTCTGACCGCCCCCCTTGCAGGGCCACGCCTGCACGTAGCCTCTGGGCAGGTGCCATGGG +GTCTGCCAAGGCTGGCATTGGCTGCCCAGATGCTGTCCTGCCCTGAGGTTCCATAGTCCT +CCGGCCACTGTGGCCTCACACTGTGTCCAGCTATGCTTTTTATTTACCTGTTAACCTCCC +AGCATACTTATTCATGATAACAGTGACAGGGAATGGGGCGAAAGTCTGAAGTGTCCTTCA +CCAGCAAGGGGCTGGTGGAAAAGTGGGACTCCTGTCTGAGGCCACTCTGGGTGGTGTCTG +TGTCCCAGGACTCTACAGCCAAACCCAATGCAGCTGGGGGCCTTCCATCCACAGACCCAC +ATGAGCAGTGGTAAGGACCTGCTATATTGAGCCAGCTCCTCACACACGCCCTCTTCCCAG +AGGGTAAACTGAGGCCCAGGATGTTGCCTCTTGCCTGAGGCTGCATGGCTGGGTGGAGCC +AGGCCCTGACCTCTTTGCTTCTTCACCTGCCCAGGGCAGGTGTTTGGATGTTGAGTCGAC +TGATGCCCCAGGGCGCCAGGCCCTCCTTCCATCTCACGGTCCCTGCCCCTGAGGCCCGTG +TGTTGGGGCTGGGTGGAGGTGGCCCTGTCTCCTTTCCAGGTGAGTGGTGCGCACCCTGGT +GAGCAAGGACCCCAGGCCCTGGCTGCAGGGCCTCCCTGTGGCTGGAGGTGTCTTCCACAC +GGCCAGTGTCCGGCACGAGCACTCTGGGGTCGGACACTTGAGATGGGCTGGAAGGAGGGG +GCATAGAGCCTTGCACAGACAGGGGCAGCTCAGGCCTGGGTTTGAATCCTGACCTAGGCA +CCCTGTGGTGAGGACAGCCACAGGCTGAGGACACTGGTGTCCAGCATGTGCCCAGCTGTC +ACCTCCACCCTGGGTTTGGCATGGGGCTGGCAGCAGGCCCAGGAGGGATGGGAGTCAGAG +TGTCAGGGAAGCAGTGGGAACTCGGGACACTCTGCTGCTCCATGGGAACAGCATTGGCCC +TGTGGTGTGGGCAGCCTCAGCAACTGGGCAGGGGCTGTCAGAGCCCAGGGGATGCTCCCA +GAAAGTGGGATGGGAGCCCAGAGCCCACAGCCCCTGATATCACTCAGCCTGGGAGCAGGC +CCACTGCCTCCACCGTGCAGACGAGGAAACTGAGGAGGGCCCTTGGGTCCCCTGCCCAGG +CCTCACAGAGAGCAGGTGACAGCTGGAGCCACCCCAGGCTTGTCCACCCCCCTGAAGGGA +TCCCCAGGTGCCCACCCTCTTCCTGCCATCTGGGGGCATGACTCCTACTGCTGACTTGAG +AGTGAAGCTACCGCCACCGCCTCCCTCCACACCTGGGCCAGCCTGGGCCATGCCAGGGGC +ATGCCTGCTTCTGGCCCCATGGGCCTACTGGTCTTGACCATTAAAAAGCATGTGTTGGGG +CTGGGCGCGGTGGCTCACACCTGTAATCCCAGCATTTTGGGAGGCCGAGGCAGGCAGATC +ATTTGAGTTCAGGAATTTGAGACCAGCCTGGCCAATAGGAAGAAACCCCGTCTTTACTAA +AAATACAAAAATTAGCTGGGCCTGGTGCCTCCCACCTGTAGTCCAGCTACTTGGGAGGTT +GAGGCAGGAGAATCGCTTGAACCTGGGAGGTGGAGGTTGCAGTGAGCCGAGACTGCGCCA +TTGCACTCCAGCCTGGGCGACAAAGTGAGACTCTGTCTCAAACAAACAAACAAACAAACA +AACAAAAAAGCACGTATTGGGACTACAGTCAATGATAATTTATTGTGCATTTAAAAATAA +CTAAAAGAGGCTGGGCACAGTGGCTCACGCCTGTAATCCCAGCGCTTTGGGAGGCCGAGG +TGGGCGGATCATGAGGTCATGAGAACGAGACCAACCTGGCTAACACAGTGAAACCCTGTC +TCTACTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATTAGCTAGGCTTGGTG +GCAGGCACCTGTAGTCCCAGCTACTCCAGAGGCTGAGGCAGGAGAATGGTGTGAACCTGG +GAGATAGAGCTTGCAGTGAGCCAAGATGGAGCCACTGCACTCCAGTCTGGGCGACAGAGC +AAGACTCCATCTCAAAAAAACAACAAAAAACAAACAGACAGAAAACTAAAAGAGTATAAT +TGGGGTGTTTGTAACACAAAGAAATGATGAATGCTTGAGATAATGGACACCCCATTTCCC +CTGATGTGACACTAACCATTGTATGCCTGTATCAACATATCTCATGTACCCCGTATCTCA +TGTACCCCATAGATATACCCACCTACTATGGTCCCACAGAAAAGGCATGTATTGGGGCCA +GGCCCAATGGCTCACACCTATAATCCCAGCACTGGTTTGGGAGGCCGAGGCAGGAGGATC +ACTTGAGGCCAGGAGTTCAAAACCAGCCTGGACAACATAGCGAGACCCCCATCTCTACCA +AAAACAAACAAAACAAAAAAACTTAGCCAGGTATTGTGGTCTGCACCTATAGTCCCAGCT +ACTTGGGAGGCTGAGGCAGGATTGTTTAAGTCCAGGAGTTTGAGGCCACAGTGAGGTATG +GTTACACCATGGGTGACAGAGCAAGACTATTTCTCTAAAAAACAAACAAAAAACAAAAAC +AAAACACAAAAAAGCATGTGTTTTCCTGTTCTCCCACCACCCCACGGCAAAGACAGTACT +GGCTATTGACACAAAAGGTGTGAAACGCCCACATGCTAAGGAAGACAGAAGCCCCCAGAA +TCCAACCTGGGGGTGTTTCAGAAGTGCTGAAAGGGAACAGTGTGGTCCCTTTAAATGATA +CGAAAGAGAGGAAGGGGGCTGGGCGCGGTGGCTCACGCCTGCAATCCCAGCACTTTGGGA +GCCTGAGGCGGGCGGATCACCTGACGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTG +AAACCCTGTCTCTAATAAAAATATGAAAATTAGCCGGGCGTGGTGGTGGACTCCTGTAAT +CCCAGCTACTTGGGAGGCTGAGGCAGGAGAATTGCTTGAACCTGGGAGGCAGAAGTTGCA +GTGAGCCGAGATCAGGCCATTGCACTCCAGCCTGGGCAACAGGGCAAGTCTCCATCTCAA +AAGAGGGGAAGGAAAGTGCTGGGTAGAGGAAGGCATGGTCCCTGGCTAGGGTTCCACCCC +CATGGACCTAGGTGAGGACAGGCATTTCCTGCCCACATGTTGTGGCATTTCCCAAGACTA +CCCGGGCCTGCCATGCTCCATCTTGTGCCTATAAAAACCCCTGAGACCCTAGTAGGCAGA +TACTCTAGCGGCTGGACATTGAGAGGAGTGGATCAGTGGAAGAACGCACAAGTGGCTGGA +CGTCAAGCGGAATCCACAGACAGGCACTGGCAGGCCTGGCATTTGTGAAGCCGGCTGCGG +CAGAATGATGTGGAGTTTGGCTGGGGCAGTCGTAGGAGAGCTCAGGCCGCCCAGGGGAAA +ATCATCTCCCTTCTGGCTCCCCCATCTGCTGAGAGCTACTTCCATTCAATAAAACCTTGC +ACTCATTCTCCAAGCCCACGTGTGGTCAGATTTACACAAGGCAAGAACCCCAGGATCCAG +AAAGCCCTCTGTCCTTGTGATAAGGCAGGGGTCTAGTTGAGCTGGTTAACACAAGCTGCC +TATAGATGGCTAAACTAAAAGCACCCTGTAACACGCCCAGTGGGGCTTCACGAGCTGTCA +ACATCCACCCCTAGACACTGCCGTGGGGTCAACACCCCACAACTTGCCCGTCTGTATGCT +CCCCCAGAGGTTTGAGCAGCAGGGCACCGAAGAAGTGAGCCACACCCCCATTGCATGCCC +TGCGAAGGGGACAAAGGAACTTTTCCTGTTTCAGGGGAATGGTCCTCACTGCCTCCTCCA +GATTTTCTGCCCTGAGTTGTGAGGACAGCACAGCTTGGGCTCTGTCCACAGCTTTTACTG +GGAGCCTGAGGCTCCGTCACTCACTCGGCTGCTTTTTCCTGTGCTGGATGAAGCCCAGGA +GCTGGCTGTGCAGGTGTGTCTGGGATTTCATGGGCATAGATGACTCCAGGACAGCTCCTT +AAACACCAAGCTCAAGCAGGTATTGCCAGCTAAATGAGGTCAATTCCTGGCAGTGGATCT +CAGGGCGCCGGCCTTGGACCCCACCTGCTTGCCCACCCCCAGCAGGCAGAGACCTTTCTA +GAAGGTCACAGATGCTGCTGCGGGATGGATCGGGTCCCAACCCTGGCTGGGAGTGATGCC +CGACAAGCTGCTGTAATTCTCAATTCTGCTCCGGAGGCCTTGGAGATGCTGCAGGCGGGC +AGGTGGGGTTGGGGGTCAGCGTCACAGGAGGGGAAGGCCACCCATGGGGGCACATAGGGG +CTAGTGTGAGCCCTGCATGAGGATTAACAGGGCATGCATTGCAATACAGAGCACGAAGCT +GCTGTGACCTTCCCCTCTGATCGCTGTCACAGGGCCTAGCCTGCACGTAGCCTCCTGGGC +AGGCACACCCCTGGACCCCTCCCTGGTGCTCTGCTGGAGGCCACGCTGGGCTCTGGAATC +TTACTGCACAGAGCTCCTGGAGACCCACCCTCAACCCTTGGGCCCCCTCTTCCTTCCTGG +GTCCTGGAGGGGGCTGTTAGGGGGCATGAGGCCACGCAGCCCTTGAGCACCTACTGTGTG +CAAGGGCCTGGCGTTTGTGAAGCAGGCTGCGGCCATCCTGGGTGGGAGTCAGGGAATGGG +GGTCCCGGAAGTGCTGGCTTTTCTGGTGGCTGTGGGGGATCAGGGGGCCTTGCTGTGTAT +CTGTGAGCTGAGCCATCCTCTGAGGGGCTGAGCGCTGGCAGGAACCTATCACGTTTCCCA +GGGGCAGGGTTGGGCATGAGAAGCCCTATCCTGCCTCTCAGCTAGGCCATGTTTCATACC +CCACAGATGCTTTGGAGGTGACAGATCTGGGTCAAGACTGGCCTTCCTGGTGACCAGCCT +TCAGATGGACACTGGGCGGCAGGTGTTCCTGGGCTGCCATGGCTGGTCTCCTGTGCTGGT +CCCCAGAGCAGCCTTCCTCTCCCCTCCCCAGACACCCCATCCTCTGCCGAGGCCTGTGAG +CACCTGCTGTGTGCCCACCTTTTGCTTAAACCCCAGGACGGCCTTACCCAGATGGGGAGC +CTTGGTTGGATTGAAGGGGCTTGCTGGAGGTGCAGGGGGAGGGGAGGTCAGAGCCCACAT +GCATCCTGAGCTCCCTCATCTTCCACACCCCATGCTCATCCCCCCAACCTGGCCCCGATG +CTATCAGCCCTTAGCTGGGGGAGGGCGGACCCCGGGGACCTCAGAGGCAGGGCTCCTGCA +AGCACTTTGCTGCACATGTCTCTGTTCCTGAAAATGCCCCAACCCCACCAGGGCAGCAGG +CAGCAGGGTCTGTGGGAAAGTCAGGAGCCCCGCTGATGGTCCTGAGGGGCCACCCCAGCC +TCAGGAGGAGGACCTGCCCTTCTCAGGGCCCTGCTGTTGCTCCTTCAGCTCGCCTCCTCC +ATGCTGGCCTCCGGGCTGCTCACACGAGGATTAGAGGCAGGAATGAAGAAAACCAAAGTC +AGACCACTACCAGGACCAGGGTACAAGAGACCAGGCCTGGACACCACTTATGGCTGGGGG +CCTTGGGCTGGTCAGCGCCTCCCCCTCAAGGCATATACACAGTGGATCGGTAAAGGCTGT +CCCATCTGGGGCTGTGTCAGGCCTCTGTGGGAGGAGGCATGTGGCGGGCCCAGCTCAGTT +GATGGATTCAGACCCATGCATTTGGCCAGCCCTAGTCAGGGAACATTGCAGCCACTGCCT +GCCCAGCACTGGGCAGGAGGAAGTGTCCTTTCTAAGAGTGGAGGGAGCTCCTGCTCTCAG +GCCTCAGAAACGAGGTGCTAAGGGGGCTTTCAGGCAGCAGCCTCAGGCAGGAGGAGCTCC +CGTGCCATGCGAATGGCTTTGCCCAGACACCGGGTGGCTCAGGCAGGTCACTCTGCCTTC +TGTGTCCTCCTCCTTGGTGGGCTGGTGGGCTGAGCCAGGAGTCGGGCTTGGGGCCTGTTC +CAGGCTTGCAGGCAGGAACAGGCCCGGGACACCCCAGAGACAATATTGATGCTGTTTCTG +CCCAAACAAATGCCAGCTACTAATTCCGGTCTCTGTTGGAAACACAGAAATCAATGCTCA +CAGAACAGGGAGGCTGGCCCTCGAGGAGCAGTGTGGTGTTGGGTCCCCAGGCAGTGATTA +CGACAGGACAGGCAGGCAGGAGAACAGAGCTCTCGACCTTAGACACGTCCCATGCGGCCC +TCACCTGGCGACCTGGACTCGGCAGCACGTTGGGGGTCAGAACAGTGGGTTCTCAGGGAC +CTGGCCTCGGGGGGCACCCGTCATCCCTCAGCACCCTCCCCTGCAAAAGGGAAGCAGGGC +CTGGACATGACCCCCAACCCTGGCTCCTGCCACATTCCCCTGACTTTATTTACCACCCGA +GAGACTCATAAATAATAGTTGAGGGGTCAGTGAGCGAATGAACTAGCCAAAAAACCAGCT +GGGGAATGCGTGTGCAGGTGGAGAGAATGGGCGCTGGGCCTGCCTGGGCACAGGTTCACG +GGCCTCTGGATGCAGCTCACCCCGTGGGAGCCCACAGCCCTGCAGGCCGGGCAGCCTGAG +GGCCAGGACAGCCCCACCCTTCTCTCAAGGTGCTGGTCCAGGGCCTGTTTCCCCATCCAT +GTAGCATTCTTGATCTCTGGGTCCTACCACGTCCCTTGGATCTCAGGCTCCCAGCCAGGA +GGGGCTCCCACCACCATCTCATGACCTTCTGAAACCCCAGCGCTGGGTGTGCACCTTCTT +GCCAGGCTCCATGGGCAGCAAGCACCCGCCCCTGGCCTGCTGGGCACTGCACCCAGGAGG +TGCCCAGTACTTGCTGGGACAGCTGTCTTCTGCTATGTCCTGGGCTCGTGTCCTGCCTGC +AGCTTCCGGGCTGGGGAGCTTGGGCAAGGCTCTTGGCCTCCCCCACCCCCACCTTGGTAT +CCCTCAACCCATCCCCCTGGACATGGGGTTGGTGCCTCTATTGGAGGAGGTGGTAGGTCA +GGCAGCCTCTGGCCCGCAAAGTCGGCCCTGGGTCCACCAGCCGCCCCTCTGCCCTGCCTT +TCCCGGGTCCCGCCTTCCCTTTCTAGGGCTATGGCTGCTTGTGGACAAGCGTCTCACGGG +TAGGAAACGGGACCTCTGAGAGCTCCTGGCCTGGCTGGGTCCTCAGCATGTGGGTCTGTG +AGCCAGGTCCACGCCAGGCAAGGGTTTTTACCTCTCTGAGCCTCGGTTTCCTCGTCTGTA +AAAATCAGTGAGGGGATGTCTCCTTGTTGCACACACAGCAGGACTGAGATTAGAGCCACA +GCCTGCCCCACCCCAGCCCCTGGTCAGGTGGGGGTGCTGCCATCTCCCAGCAGCCTGCCC +AGGCCCCTCCCTCACCTGTTCCGAGACTTCCTGGCCCCACATGGGCACCCGGCATCCTTG +TGGGCTCACCACAGCCTGATGCTGCCCACAGCCCATCTGTGTGGCCTGGCAGGGGGCCCC +ACAGCTTGGAAATGCCGGTATCTCCTTGCAGACCCCAGGTCAACACTGTAGATTCCTCCC +TCTTCTGTCATGGACCCCAGGAAGGAGGGTGGGTGGGGCCCTGGGCTGGTGCACCGCCCC +AGAAGCCCTCAGCTCCTGGCAGGACCCCCCTCCCCCTGCAGGCCCTCCCCTGGGAGCAGC +TGCTTCCCTGTTCATTCCAGGCTGGGTGGGGACCCCCAGGATGGCCCTTTCTCTGCTGTC +TGTGGGCCTAGGGTGGGGTGGCCCTGGCCAGGCCCTGTATGGCCTCCCAGGCTTCCCCTT +CACCCACTCGTTGTGGGCATGTGACACATTCATTCTAACTACAAACTGACACCCACTCAC +TGAGGGCATTGAAAAACCGGAGAGCCCCAGGAGCCAGGTCACGTGGTGACCCCGACAGAG +ACACCCCACAAGGGTGTCCCAGCCAGGTAGAGTGGCCCTGGGTTGGGGAGGCTTTTAGGT +TGGATGCTTGGTTAGGCACCTGCCAGGTGAACCTCAGCTGGGGGCAGAGAACCTGGGGCC +AAAATCTGGGGGCCGTGGTCCTGGGGCGCTTCCCTGGGCCCCACTGCAGGCACATGCCCT +CCCACCACGCGGGGGAGCTCCCCCTGCCCGTGAGCCCTGCTTGCACTTCCTCACCCGTCC +TCACCCTGCCCCAGCCTCCAGCCCGCCCTGGAGCAGGCTCTGTCCCTCAGGCCCTCTTGG +CCATGGGGTCAGCCTGGTGCCTGCTATTTTGCCCTCAGGACCCCTTTCTCCTGACCCCTG +CCAGGAGCCCTCAGCATTCACTGAGTTTTGGAGGAGCTCCCTAAACACAGCTGTGTTCAC +TCTGGAAGGCTGAGATGTGGGGGCAGGTGCGGGTGGGGTAATGATGTTGGGTGGGCTGGC +AGTGGAGCCAGAGAAGGCCCTCCCAGGACGAGGGCGCAGGGGGCTGGAGGGGAGGGCACT +CGGGCCCTGCTTGAGATTGAACTCAGTTACTGAGCAGCCAGTCTGCCAAAGTCTGATCTC +CCGGCTTTCATTCTACATTAAAAATTGATCTGAGATGGGAGAAGTGAAGGCAGTGGCCAT +GCCACCGAGAAACACTAGCTGTTCTCTGAGGCTGCTGGAGACACAGCCTCCTCCAGGAAG +GCTCCCAGAGGCAGCTCCAGACCACCAGGCCCGCTTCCTCCATGCTCATCTGCCCCACGC +TTGCCTCAGACTCCTGGCAGATCACTAACCCCTCTGAGCCCCCAGCCAGGCCTGGATGGG +TTTGTGTTTGCATGAGGAGGCCCTGGTCCTCACAGGCTCCCCCTGTAGCCAGGGCAGTCC +ATCTCATCCTCACCCCCGTCCACTCTGCCGGGGCTCCTCTGTCCCTGAGTTCTGCCTGAG +GGGCTGAACCACCAGCCTCGGGGCGGGGGAAGCCCACTGCACACCACCTCCCTGACATCC +CGGGACATCTTGCAGAGAAGGAAACTGAGGCTTAAGAGAGGAAACAGTGTCTGCACAGGG +CACAAAGCCACAGAGCCGCCTTCTGCACAGCTCCCACCTGCACCTGCGTCTCCAGGGGAG +CTGGGATTTTCCAGGAGTAGGTCCTCAACTCTTGGGCATGTGGCTGGGCCCTTCCCGACC +CTCAGAGGAGAGCAGGAGCCACGGGACAGGAGTAGACATGAAGAAGCCTCCACAGGGTTG +GGCTTGCCCAGGGCAGCCTGTGGCCAGCGCTGGGGGTCACGGCCCCAGTGGCCTTCCGAG +GGAATGGACAGGTCTGCCCCATCTCTCGCCTTGGCTCAGGCTCTGTGTGTGCTCACTCAA +TGCAGCCGCCAGCTGCTGCTCCCCCACGGGGTCTCGCTCACTGCATGCGGGCTCTGCTGC +TGCTGTCGGTCCTTGGCCTTGAGAATTCCTCTCTGGGAATGCGTGTTTCTCTCTGTGAAC +ATGTTTTGCCTTCTACAGGCCTCCTTAGCCTCATCAGAGACTCCTGAAATGAGTTTCCCG +GTCCTGTGTGGGCCAGGGGCCACCCGCCTCTCCAGACCCCCAGGGCCCATAGTGGCCTGT +CCTTTTGTCCTGCGGCCTCGGGCCCCTGGGGTGTGTGGGGGGGTTTCCTGTCCTTCCCCT +GAGCCCTGGGGCACGGGGACCCCTGGGTGGATGCCCCTCATCCTCATCCAGTGCGCTTGC +TGCCGGCCCCAGGCCCCCCGCGAGGGGCCCTATGGAGTCGAGACAGGAGCCCACCTATAG +GCCCTGTGTGGCCTGGAAGGTGGGGATGCCAGGGGAGCTCTCCTGCAACACCCCTGACTT +CCAGACTCCCCTGAGGGAGGGACTTCCACAGGGAAATTAACCCCGGACGAGCTTGGTTCC +CAGGAACTCACTTCGAGCCTCCAGGAAACAAACAGCCCCAGAGGGCAGGACACTCGGTGT +CCGAGGCACAGGAGTGAGCAGGGTCTCCTGGGCACCGGTGACCTCCACCAGCTCATCCTT +TGTCCACACCTGGGTGGGCTGTCCCTGGCGGGCTCTCAGGGTAGAGGCGGGGGCTGGGTG +GGGGCAGAGCAGGTGCAGAGGCTGGTGAGGCCTGCTGGGTGGGAGGTGGGGGTGACCGCT +CTCTGGCTGCCTTGACTCCATTCATCAGCCTGCAGACCCCCCTCTCCAACCCCCAGTCTT +TGAAGCCACAATAAATATTGATTTGGGCAAATCGATGCATTTCCGAGGGCCTGGGTCGAT +AGCAAAGTTACAAGGATCCGGGCAAAGTCATTCACGTGCCCAACTCCAGCTCTGAGAAGA +GCCTGGCTAGAGCTAAAAGGTGACTGGCCATGTCCCGGGTGCCAGTGCCACCAGCTGGGC +TACCCCAAGGGATGGGCAGTGTGGCCCCTCACTCTCACCTGGCCACAGCCAAGCTGGGGT +TGACCGTCAGAGTCCGGCTGCCTGTCTCTGCGTCTCCCCGTGTCTGCCCCCTGCCCAGCT +CCTGGCTGCATGGCTGCCTCTCCCCGTCCTGTCTGGGTGTCTTCCCGCCTGGATCCCAGA +CACTCTTATGGGGCGCTTGGCACTGGGATCTGAGCAGTTCTGCACATGGTAACCACGGGT +CCAGGGTAGAGCTGAGGGCCCTCATGCGACGCCTCCCTCGGGCTCTCACAGTGGCCCCTG +CCCCTAGATGGGAACTCATTGCTTCCTAAGCTGCCTCTTCCATCTCCACCCGCAGCAGTG +CCTCACCCTGACCCTGGGCCACAGAGGGGACATCTCCCCAAATGCACACACCAGCCTTGG +CCTGCTTAAGGTCACCTGGCCATCTCCCCGCTCTGGCCTCAGCCTCTGTGTTTCTGCTGT +CTTCTCTGACTCCCCGTGGGGTCTGTGGAGACCTGACCACTGCGGCCCCACCTGGGTGCT +GTTTCGCACGTGGCTCCCTCTGCTCTGTCCACCGTCCACAGTTCAGCCCTTCATCCAAGC +CCCCCAAGTGCCTGAACTGCCCTCTCTAGAGGGCAAGGCTGGGGCCCAGGACCCTGGGAG +GAGGTGCCTGTCCTCTGAGTCCTCCTAAGCCAGAGGCTGGGCAGGGATGCGCTCAAGGGC +GGTGGGCATCTGGGCTGAGCCCTGCACCCTCTTTTCTCCTGACATCCCTTGCCTCCCCCC +ACCCCGCCTGGTCTCCCTCCCTCTCTAATCATTTCTCATGGCTCCAACCCACTCTGCCTT +TCTTCCTCTCCTCGAACTACAAGGCTGGGTCTGGTGCACTCTGGTGGGGCCACCTTATTT +ATGAGAATTTGGGAAATGCAGATGTGATGTTTCCAGCTAATAGCCCCAAATCACTTGCAC +CGCCCCATTAGCGGGGAAACCAGCCTGTGTACATTACATACCACCTAAGTGGGAGCCGAG +ACAGCGGTTTTATGACCCCATAAAACTTTGGGAGTTTATTGCTATTGAACAAACATGTAA +TCCGATGCTTCAAGTTCAATATATATAGCCAGCAGCATCTAGAGCCAACCATGGCCGTGG +CTCCTACAGCTGGGCCAAGCTGCTGAGACATCTAGGGGATTTGGAGCCGATTGGCAATTG +ATCTGCTGTAGGACAGGGGCAGGATTTATGGTGGCCTTGCAGGGCTGGTCCTTCTGCATA +CTAAACAGTCCAGAGGACAGCAAGGGGAGGGCAGCCGCCTGGAGCCCAGGGCAAGTGGCT +GCTTGTGGGTTGCTGGGGCAGAGCCTTTAGCAAAGGGGTATCCACCTTGAGGACCTGCCC +CTGTAAAAACCCAGCTTCCTTGGAGACTCAGCTGCGGTGGGAGCCATGCAGAAAGTGGCC +CTGTGGCTGTGTGCCCTGCCCTTCAAGAGGCACCATTGCCTCCTGAAGCCTGTTTCCTTG +TCTGCAAGATGTCCCAGGATGGTTTGTTGCCCAAGAGGTGGCCCAGCAGAGCTCCTGGAA +GGAAGGGGCAAACACACAGGCCAGAGCAGCGGTCAAATGGCTAAGCCTTATCCACCAGAG +CCCCTCACCCAGCTCCCATTCATCCACTGTCCACCATCCATTCATCTAAACACCCATCTA +TCTAACCATCCATCCTTCTACCCATTGATCCTTTCACCCATCCACCTGCCACTGATTTAT +CCATCCATCCCTCCACCCATATACCTATCCATCCATCCATCCATCCATCCATCCTTTCAT +TCATCCATCCACCTGCCCATCCACCTGCTCACCCATCCATTCATGTGTCCATCCATCTAT +CCATCCATTTACTCACCTGTCCATCTCTCTATTCATCCACTCACTCATCCATGCATCTAT +CCACCCATCTGCTAGTCCATCTGCTCATCCGTCCATCCACTCATCTATCCATTCATCCAT +CCCTCCCACCATTCATTCCTCTATTTGTCCACCTGTCCACCTACCATTCATCCACATAGC +CATCTCTCTATGCACCCATCTGTTCATCCCCCATCTACTCACCCATCCATTTTCACCTAC +TCACTGTCCATCCACCCACCTGTCCATTCACCCACCTATCTGCCCATATACTTGTGTGTC +TGTTCACTCATTCATTCACCCTCCATTCATCTGCCCATATACTTGTATGTCTGTTCACTC +ATTCATTCACCCTCCATCCATCTGCCCATCTGGCCTTCTATCCTTTCCCTCAAAGCTGCC +CCTTGTCCAGGGACAGGGCTGTCCTTATAGTCGCAGAGATGATTGGAGCCTGAGAGAGCC +AAGGTGACTTATCTCACCTTGCAGAAGGTGCTGGCATTATTGCTTGCTCAGGACACCCGC +ATCCTCTTGCCCTGGGCTTCACACTCCAGGAATGCCTTTCTCATGTCTGTGCTGCATATA +CTTATTGTAAGTAAAGGAGAACCTCATGGCCGATGATAGTTTGTTCATTTCCTCCCAGTC +TTATTGTGTGCACCTATTCTTTCTTCCAAGTTCACAGTCTCTCTGGGCTTGTATGGCGTG +CATTTCACTCACCACAGATGTCTCTTCGGGGCTGGTCCATGGTGTTACTCTCCCAGCATT +GACTTAAACATTTCCCCACTGTTGGATAGTGTAGCTGTTTCCACCTCTTTGTGGTTATAA +ATATGCTGTGGTCATTATCTGAGCTGGCAGAGTGAAGTCCGCCCCTCTACGTGTTTAATT +AGCATAGACTCCAAGAGGGGGCATTCATGGGTTAAGCATCTCATGACTTGGGACACCCCT +TGTCCGATGACAAAAACCTGGCCCTGTGATCCCTGCCCTCAGTGGAGGAGGGAGGGCCTT +CCGTGGTGCCCGGCCGGAGGGGCTGGGATGTGTTCACTGTGAGTCTGGTGGGTCTCACAT +GTAGGCAGAGTGGGGACAAGGCAGCCTGCTCTGTGGCGTGTCCCATGGCGTCCGGGTTGC +CTTGTCTGCAGTTCACTCACATTGCCCTTGGTGTTGCTGCTTCAGATAAGTTCCTGCCGT +GTTAATGCTATTAATTTTTTGTCATGTTAGTGTCAAAAATTTACCCGCCTCATTTCTTGT +CTTTTAGCTTGATGTTTTCCTTTGAGCCACCTGAGAATTTAGCTTCCACACAGTCAAAGT +GAACTCATCCTTCCCTTTATTCTTGGAAAGTTCCTTCTCACCAGGGATTCGGTAAATATT +TGCTTCTGTTTACTTCCTGTTCCTTTCAATGGTTTAAGTTATTTTCTGTCAGCATCAACT +TATTCCCCAGCCCATATCCGCAGGCCTCACACTCTAATTAAGAGGATGCCCTATGGCCGG +GCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTAGGAGCCCGAGGCAGGCTGATCACTT +GAGGTCAGGAGTTGGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT +ACAAAAATTAGCCGGGCATGGTGGTGTGTGCCTGTGATCCCAGCTACTCAGGAGGCTGAG +GTAGGAGAATTGCTTGAACCTGGGAGGTGGAGATTGCAGGGAGCCGAGATCATGCCATCG +CACTCCAGCCTGGGCAACAGAGTGAGACTCTGTCTCAAAAAAAAAAAAAAAGTAAAAAAT +AAGAGGACACCCTGTGTCCCTCCCTCTCTCCCTCCCTACCCTTCCTTCCTTCCTTCCTTC +CTTCCTTCCTTCCTTCCTTCCCTCTTCCCTCCCTCCCTCCCTCCCTTCCTTCCTCCCTTT +CTGCCTAAACTTTTCTCAATAGCCTCAAGGCTGCAAATAAGACCTGCCCCTGGCCTCAGT +GACTTGGTGGCACAGACGTGCAAAGCACAGAACGCAGAAATTCAAGTCAGGGCTGAGGAC +TGGGGGTGGATGGGGGCCTGCTGGGGGGCTGGGGGAGAGAGTGGCAGGGTGAGCTGGGGA +CATTGGAAAACTCTTGCAGCTGTGACTGATGATGGTGGGCAGGTGTAAGGGGTGGCTTTT +CGGGGGAGGCACATTTTGGCACTGGGTTATGTGCTTCAGCACCAGGAGCAGGGGTGAGGT +AGCAGACGGAGGGGAGGAGGCCATACACCTGGGCAGGGGCTGGATGGGACTGGCTGACAG +CTGACCTCCCTCCTGTGCCCAGAGAGCCACAGCCTGCTCGGTGGCCCGCATGGGATGAAA +CCTTTGTGGTCCCAGCTTCTGATCACTGGAGGTGAGAACTAGTTTTTGAGAGACGGGAGC +AACTGCTGAAGACCCAGGGGCTGGGGCACGGCCCAGCGTTTGCTTTTGAGAGCTTCGCCA +TGGCCCGTGTGGAGAGGACAGCAACCACCAGGGAGGGGGCTGGGTGCCCCAGCCTGTCTG +CCAGCTTCTGAGCCCCACCTGCGTCTGCACCTACTGGAGACAGCTGGGTGGGCTCTAACC +TGGATGGTCCTGGGTGGGAATCCCAGCTCTGCCTCATGTGAGATGAGGGCTGAGCCTCTG +CTCATCTTAGAACCTCAGTTTTGTCACCCATACAATGGGATGGTCAGTTAGCAAGGAAAC +GCGTGTAGGTGCCCCGTGTAGTGCCGGCTGCCTGGGAGGCGCTCCCAGAATGTTTCAGAG +GTGATTCTTCTCAAGGACTTCCTGGGCCCCTTGGTCATAAGGCTCTGAATTCCCAGCCTC +TGGCCTGAGTGCCACAGACCCTAGAGGGTTCCTGGCCACTCCTGGGGTCTGTGTGGACCT +CGCTGCTTAAGGCCCCCCGTGTGCACTGAAGCGCGACACCCCGCACGCAGGGGCCATCAG +GTCTTTGGGGATTGATAGAATTAAGCCCATGGAACGTGCATTTGCTTGCTGTGAAGCCGC +AACAATCTCGCTGCCACAGGGCAGAGGACGTTCTGAACTGAGTGTCCCCTCGGCTCCCTC +ACCGGCCTCCTGCACCAGCAGATGGTGCCTGATGTGGTGGCTGGCACTGGTGCCCCTTGC +CCTCTGAGCAGCGGTACCATCAAAACCCATCTTCCCAGCAGCTCAGGATCACCGCGGTGG +AGTTTCCAGTCGCAGATGCCCTTGGCAGGGGCTGTGGGACTGCAGGGGAGACGGAGGTCT +GGGCCCTACCATGTGGGGGCAGCATGCAGGAGGGGCCCATTCCTGCCCCCCGGCAATACA +AGAAGCCTACGGGGGTGCCCAGGGGTCAGGGACCTGAGAGATGTGCTCACTGAGACCTAG +CATGCTCACCCCCAGCTGCCAGAGGGGGTAGTTCCCAAAGAGGGGCAGGCCCTCAGAGTG +AGCTCCTGTCCTGAGATGGGCAAGGGAATATTCCTGGGACACTGCAGAACTGTTACCAAG +CCAGGCACAGATGGGGCCAGGCCCCTCGGCCTCTAAGACAAGGCCGGCCCATGGATGGAG +CACGATGGCCCTGACCTTGGGCACAGACCCGACGATTGAGTGAGATGGAGACACAGGCTT +GGCTTGTCCCCAAGAAGAAAGAGGCACAGAAGCAGCTACAGAGTTTCTTGCTTCCCTGTT +TGGGTTGCTGCTGGCTCAGGTACCACAGCTGGCAACAGAAACAGAGGTCAAGGCCCAGAG +GGAGGGAGGGGTCGGGTCTGCTTTGTCCAGGGCTGGCTGGCCCCAGGAGGGCCCCGAAGC +TGTGGCAGAAGCAGGTACCAGATGGTCAGTGTAGCACTGTGGCTAGCACCCAAAACTGGA +CATGGGCTGGGAGCAGCCTGAGATGCAGGGCAGAGGACAGCAAGCCACGAGTCTGAATGG +CCCCTGTTCTGGAGGCAGTGCTGGGAGGTGGAGGCTGGAGGGGGTGTCTCGCAGAGCCTG +GTCTGCTGAGACATAGGCTGGGACAGGCTGCTGGGTGTACTGTCCCTGGGAAGTGCAAGG +TGGCGAGTCAGGAAGGCCCCAGGAGCAGGGTTGGGGCAGGAGCCAGTGAGCAGGGCAGGA +CCCTCATCTGGAGGGGCTGGGGGAGGTCTCCAGTCTGGGATGGGGGTGTTCCCGGCACCA +GCCCAGTGTTTCTGGTGCAGCAGCCACGTCATTATGGTTGGAATGTGACCCCATTAGCCC +TGGGGCGGGGCTGTTCAACATCCTGAGGATAATTGATCAGAGGGAGGAGGGGCTGCACCG +CTTCCTAGCAGGTTGAGTTAATTATCAGCCTGCATTAGCACTTGACCTGTGGCATGTGTC +AACTCATTACTGCTAATTGCAGGGACTCAGTGACCTCTCCGAACTTCTGTCACCCCTGTC +ACGGCCCCTGCCCTGTGGAGGCCTGTGGAATGGGAAGTGGGGAGGGGAAGGAGAAGGGGG +CGGAGTCAGCCGGAGCCTCCTGGCCAGGCCTGGAGTCACCCTGTCCTGCCACAGCTCTCA +TGGGTTTAGAGTGGGCCCTGGGGCAGAGGGTTTGCAAGGATAGGGGTGGGTGGCTCTGCC +GAGGGCGGTGTTCTGAGTAGGTCTCATTCCCCAGTCTGCCCAGCCTGGCCCCACCCCAGC +TCTGCTGCCCTGTTACTTTGTCCCCCTGTGTTCACCCTCCACTTCCTGCCGGGGACACTC +CAAGGCCTGCCAAGGTTGGAGCAGGGGTGGGACTCAGCCATTCAACAAACGCTTCCTTGG +TTATGCAGGGCTGGGCATTGGGGTGGGGGGCTGACCTGGACCCTCCAAGACATCAGATTG +TCCAACTGCTCAGTTTTCATTGGGGCAAGTGCCTCGCTGAGGGTGAAGGGGGCTGTGAGT +GCCCATCAGGGAGCCTGGACAGGTCCCGCCTCTCACAGCTGTCCCACTCTCTGGAGCTGC +CTTGGTGGCAGATGGCACTGTCCTAGAAAGGTGAGGCTGGAGCTGTCTCTTCCACCAGGG +GTGGCCAAGGACGCTTCCAGGGTCTTCCATGCAGGCCTCCAGGGTCACCCTGGGCGTGGC +TGAGGGCATGCAGACCACGCTGTGCTCCCCCCCACCCCCCATGCCCGTCGCTTGCCTGAC +ATGCTCCTCAGCTGTCTTGCCCCGCAGAAGCTGTGTGGTCTCCCCTGTGAGTAGAGTCGG +GCTCCATGATCCTCCGGAGCAGCGCCAGGGACTCTCCCCAGGCACAATTTTAAAGGGGGC +CCTAGGACCTGCCAGGTCCACGGAAGTGGTCCTTGTAACTCTCTCCCTGGTGGCAGCCTC +TGGTGCATTCCGGCAGGGGCAGCCCCTCCTCATCACAGGGACCCTGTGTGCCAGGCCCCA +GCTGCGTTGCCGCTGAATTATGGATTTAATAGCAGAGAAGCCTTGCAGGCCGCCTGTGCT +GGCTCGGGAAACCGGGGCCCAGCCGGGGCCTGTCACGGTCCTGGTGCCGCCCTGCTTCCT +GTCGGCGCCACGGAGCAAGCGGAGAGATTTATCTTCCCGGCTTTAAGAGGTACTAACGGA +GGAGGCTGAGAGGCAGGAGCTGAATAATTGATGGCCTTCTCTGGCTCAGGATCTGCTGAA +AATGACACGGTGCAAGGCAGAGGGGCTGAGAGTGCTGACCACAAAATGCTGAGAAGCCAC +TCCAGGGCGGTCACGGCCAGAAGGAGGACCCCCACTCAGGACGGGGTCACAGGCATTTGG +GGCGGGTGTCAGAGGCTCCTGCCCTGGGTCATTCCAGGGGGCCAGGATACCCAGAAGCTG +GGCCTAGAGGCCCACAGGGCTGGCTCACCCCCACTTCCCTCTCACACGCCTGCCATCACC +GGTGCTTCAGCCCAGCAACCCCATGGCTCGTTGGCCCCGTCCACAGCCCCCACTGCCTCC +TGTGTGCTGGGCGTGGGCCTCTGAGCTCTACTCTCAGCCTCCTTAGCCCCAGGAGAGGAG +TGGAGGACAGGGGTTTAACGTGGGGTGTGGTGCGGAGCGGGGAGGGCTGTGGCGCCCCCT +CAGCCCCTCCTTTCCTAACAGCCTCAGCCTCCCCCCTGGAGAATCCCCTACCTGCTGGGC +ACATCTGGCCTCTCCCCACCATTCTGTCTCCAGCCGGGAGATCTCTACCCAGTGAAACAG +TGAAACAGGAAGGGTTCCCTGGTTACCCCGCGCAGGGCGCAGGGCATGCAGCGGGGGAGT +GGCTCGCTTCTTCAGTGCCCCACCGCTCACACCTCTAGGGGAGCATACAGACGGGCAGGC +TGTGGGGCTCTGACCCCATGGCAGTGTCTGGGGGTGAATGTTTACAGCTCTTGAAACCCC +AGTGGGAGTGCGTTACCGGGTGGTCTTTTAGTCTAGCGGTCTGTAGGTGGCTGGTGAGAG +TGAGCTCAATTAGACCCCTGCCTTATAACAAGGACAGAGGAATTTCTGTATCCCAGGGTT +TCTTGCCTTGGTGTACCAAAAAATTGGATCACACGTGGGCTTGGAGAATGAGTGCAAGGT +TTTATTGAGTGGAAGTAGCTCTCAGCAGATGGGGGAGCCAGAGGGGAGATGGTTTTCCCC +TGGAGTCAGGCAACATTCTCGGGATGTCCAGCAGCTTGTCTTCTTCCACCAGTGTGTTCC +TCTCCCCACACAGAGGCTTCTGTCTCTGCCTTGCTAGGGTCTTGGGTTTTTATAGGCATA +GGATGGAGGCATGGCAGGCCAGGGTGGTCTTGGGAAATGCAACATTTGGGCAGGAAATGC +CTGTCCTCACCTAGGTTCGTGGGGGTGGAGCCCTAGGCAGGGACCACGCCCTTCTCTACG +GAGCACTTCCCGTCCCTCCTCTGTATCATTTAAAGGGACCACGCTTTTCCCTTCTCAGCA +CTTCCTTTCCGTATCAACAGGAGGCCAGGGACTGGCTGGACCCCTAGGAGCCAGGTCCTA +CGGATAGGTGAGCCGGCCCCCCAGGGCAAACACCTCCACTGGGCAGACCTGACTGCCCAG +AGCTGGATTCTTTACTTGTAAAAGGGGCCATGAGTGACACAGGCTCGGCCTGCTGTGCCT +TGAGGAAAGGAGAGGCTAAAAATTCTGCCTTCTCTCCTAAAGCCAGCCGGGCTCTTTATG +GTGGGGGAAGGAAGGATTTGTTTATTTGCACTCTTCTGAAATAAAAGTTCTCTATCCTCC +CTCCTGGCTGGAGTTGGGAGGGCCCCATGGGTGGGCTCTGAGATGTGGCACACGGTACCC +AGGGCAGGGCTGGGAAGGGTGGGGTGCAGGGGCCTCCCTGGCTGCAGGGCCCACCCACCA +GGTGCCTGGGCTTGCTGCACTGAGCAGGCATGGGCTCTGGCCCCCCTCACTGTCCCTTCC +AGGCCCAGCCTGTGTTCAGGGCCCAGGACAGGGTCTGGCCAGCAGGCCCCAGGCCTCCAA +GATGTTCTCAGCCTCTGGAGTGTGTGTGATGATGAGCACTCATGGAGCTTCCTGTATGCT +GGGCCCTGGGCTTCTGAGAGGGCCACCGGGTGGCGACTGAGTCACGAGGTGGAGGGCCAG +TCCCAGGGACTCCAGGTGACCTCGTGTCGGCACCACGCCCTTCTCCCATGGGCAGTCCTG +GGCGCCCCTTCCTGGCCTTGGGCCCAGTGGCCTGTGGCCTGACCCCGTCCACTCCTGCTT +GGTTGTCCCAGCAGCAGCAGGAGCCCATGCTGGAGAAGGTGTTATGGGGCAGGTGGAGGG +CCCGGCTGACTCAGATGATCCCCCAAGTTTCTTTTCAATGAACACTTGTTTAGTGCAGTT +TTTCCTCCTCAAATCCCCCTCACACAGGCAGACACCCTCTTGGCCCAGGGTAATCTGCTG +AGCCTTTGCTCAAATATAGCAGCTTTGCTGCTTTCTAAACACAACTAATCCAATTTGGGA +TTTGGGGATTCGAGGTGGGCAGCACTGGGCCTCAGATGAGCCCCCATCACCCCAGACCCA +TGCAGGAAGAGCCAGGCAGGGTGGGGCCCTGGGGCCACCTGGTGGGCTCAGGGCCAGGGC +ATCGCGATGTCCCCATCCTTCACACTACGGGGCTTCCTGGCTGCAGAGTTGGGGACTCAG +CTGGGAATGCTGGCCATAGAGGCAGAGAGCCCCCATCCTGCTGGGCCAGCCCTGACCTGG +CTTCTCCAGGGTGGCCCAGTGTCCCCTTCTGCCCTTCCCCTGGCTCCCACCGTCCACATC +CCTCTATCTGCCCACCCTGGACAACAACCAGCACAGTGCAGTGTGGCCCTGCAGTGACAC +CACCTGCAGAGAGGTCAGGCCCAGCCTCCCCCGTCTCCTAAAAGGCCAGCAGGTCCCAGG +AGCTGGGGGTGGGGCCTGGCTGTGGGGCCTCCATGCCCCAGGGGCCAGGGCAGACCCTCC +TTAGCCGGCAACCACTGCAGATTCCTCCCCTGGCCAGCACTCGGGGACCTCCCTGCCCCT +GCCCCGTCCCCCTACCTTGCCTGCTGAGACATAGGGCTCCACGGGTCTCTCTCCTGGGGC +CGGGCTGACTGTGGCCTGCGAGGGGCAGTCATCGTGTTGGGTTTTCCTGCCAGAGGCAGA +AACCACAAAATTACCTGGAACATACACGCCCCAAGTGACAGATTCAATTCAATTCCACAA +ATATTGACCTCGCGTCTAATCCACTCGTCCTCCAGTGCCAGCTCATTCCTGAGAAGCCCA +GCCTGGCAGCCGCAGATCCCATTTAATCCCAGCCCCACAATGTCACCAGCTCGCAGCTTG +GGAGCTGGGCTCATCCAGGCCCTGGCAGGTGGGGCCAGAGCTGGGCTCTGACCTCTGTAC +CCACACTTACCACCATGTAAAGTGCTGGGAAGAGGGGTGGGGAGTCACGGCAAGTTGAGG +GTCCAGGCTTGCCCCAGCTCGCCAGATGCTCGCCCTGGCTGCCCTGACCTAGCAGCTAAT +CTCTGCGGGGCACTGCGGCTACATGGTGACTCCAGCACGGGGACATTTATCAGAAATGGA +ACTGATGGATTAAAGTGGAACCAAGCGAATTGATTTTTCTCCCTGGGCACTGAAATGCTG +CGCTTAATCAAGCGATCAACTCCGTGGGCAGGGTGGCTGTGGGGACGGAGGTGGGGGACA +AGGGGATGGTGGCCATGGTGGGGACTGAAAGGTGGCTATGGGGACAGATGAGGTGAGCCA +GGTGGCCCTGGGGTCGAGCTCCAGCCCAGCCTCTGGCAAAGAGGCCCTGTGTGCAGAAGT +CCGGGTGTGATGCCCTCCCCCCACCGATTGTGTGATACCGGCCAGTCCTGTCCCGGCTGA +GCCTCGGTGGCCTTGATGTAACCCTGTGGGGTCTGGTGGCAGGGACTGATTCACTCATTT +GCTCATTCATTCATGTGTTTGACACACACTGCAGCAAAGGAAAGGTGAGAGCAGCCACAG +GCTGGCCAGGGACGATTCAGACTTCAGAGTGTAGCGTGCCCGCCCCTCCTCTTGTGAGGT +CCAGCCTTGGTGTGTGTGGATTCCCAGGCACCCTCCAGGTCCACCCTCCACTCTGCAGTG +CCCCGCCCACCTGTCCACATACTCTGTTCTGTGGTTTGATCGCCATCCTTCTGCTCTGCG +GCCAACTGGATGGCCCAAGCGGACTGAGTTGGGCAGGGTGGTCATACCTGTGGAGCCTGT +GGCTGTGGTTGGAGGCCCCGTGGGCAGCTGAGGCAGCCTGGGTGGGCATGTCCCACTGGG +ACTTAAGCCCCTAGGTGGGCTGTCCGAAGTGACTCCTGAGTGGCTGGGGTGTGCTGTCCT +TCCAGCCTGGCCGTGGTGGCTGGGTGGTGTTGGAAGGCACTGAAGGAAGGGCAGGTGAAT +TCGTGCTGGACCTGGTCTCAGTGCCATGGGCCCCAGGAGCCCATGACATCACTGGCCTCA +TGGACTGTGGGGATTAACCCTGTCACTGGACACCCCCAACACACCCCCTCCAAAAGAAAG +GTGTTGGCACAAGTGAAACCCCTTGAGTGATGGCTGGGATTTCAAGCTCCAGACTGGTCC +CCACTAGGAAGCTCTGGGTCTGGCTGGTGGAGCAATGTGTACCCGCGCCTCTGCGTCTCC +CTGCATCTGGCAGCCCTCCCAACAGGGCTCATGTGGGGTGCAGTGAAGCAGCAAAGCCCC +CCACCCCACCCCACCCCAGACTAAACACAGGCTGTAGACCTGGCCCAGGGTGAGAAGGGC +CCCTGTGGACCACCTACCCAGGTGGTCTGTGGCAGCCTCTCCCAGCTTTCGGTCCAGGTG +GCCCAGGGAGGTGGCAGCCCTGACCCTTCCTGGCGATACCCTGATTGATCATAGTGAAAC +ACTGAGCTGGGAGAGCCAGGGGCACCCAGGGCTGAGCTCCCGGGGCAGCTCCCAGGAGCT +GCAAGTGGACCTGGCACTAGGAAGGAGCAGTCCACCGAGCACCTAGGCCAGGGCCCGGGA +CGGTGCCAGGCACCAGCTGAGGCCCTGCCAGGCCAGCCGCATCTTCACATTCAAGACCCC +AGGCCCCAGCCACGGCTCTGAGCTCTGGGTGTCTGCAGCCACGGTTGGCAGGGTGGGCCC +CACCTTGCCCTGACTGGGCCACGGACTGGTGTGAAGAGCTCTAAATTTCACTTACTCCCC +CAAAGTAGAAGTGGCCTAGGACTGTGTGGTGGCTCCTGGTCTCCAGAACCCTGCCAGGCT +CCAGGGCAAGCAGGGAAGTCAGGCTTAGCTGCAGGGCAGCACCTGTCTGCACACCCCGTC +CTGCCACGCTCCCTGCAGGGACCTCCGCCTGTTGTCCATGGTCCAGCTTGCTGGATGGGG +CAGCAGCTCCTACACTCAGAGCTCTGCCATCCCCAGCCCCAGGCCAGGCTCTCCCCACAG +CCCGTTAGGCCCTGACGTCCCCTGACAACCAGGAGTTGGACAGGCCTGGCCCCCAGGCTG +CCCCACACTGCCATGAGGACCCCAGCTGGCTGAGTCCCATGTCCAGGCTCACTTCCTCTG +TCCCTGCAGCAGTGAGCACCTGGGACCATCATGGTGAGCACCACCCACCTTGCCCCTTGG +CCCATACTCGGGGACCTGGGTTCCCCAGGTGATCAGGGTGGCCTGCAGGGCCTGAGGTGG +CCATGCCAGCCAAACTCCCCCTGCCCCACAACTGGGTCACCTCTTCACCCTCCCGCAGCC +CTTGGGAACCCAGTGAGCTCCTCCCCCTTGCTGCTTTGTGAGTGCTGGCAGTCCTGGGTT +GCTGTCCTGGAGCTCCAAGACCTCCCTTCTGGAAGGCTGGGGCTTCCTGGTGGAAGATCC +CGCTCACCTCAGCTGTGGCGTGGGCCTGGGCTTCAGCCTGGGGCAGCCCTCATGGGCAAC +TGCTTCAGACTGGACCCCATGTGCTGGGGTGGCCAGGGCTGTGCATGGCAGGTTCAGGGC +ACAGGGTGCAGGCAAGGGTGTGAGGGTGTGGGAGGGCTTCAGTGGGAGGCCCTGATGCTG +AGCAGACACAGTACAGCCTGGTAAACAAGGCTCGTGTGCTGGGCACTCATCGATCTTGGC +CATTCGGGCCTGGCCAACAGCCGGTGGGCTCAGGCCTGGGCGGGGGCTGGGGTGGGAGTG +AGCTGCCTCAGGCTGGGCCGTGGGCACTGCAGGTGGCTTGTCCGGACAGCCCTTTCCCTG +GACCCCTCATGCCCATGGCTCTAGCTGAAAACACAGCTGCTCTTCTCTACAGGCTCGTCC +TGGGGTGAGGAGGGTGTGGGAGCTGCCTAGCCCTTCCTTTCCCAAGGGGTTGGGTGCCCC +TGGCTGAGGAGGAGACCCTCATGGCTGGAACTTGGTGGTGGTCAGAGCCTCCTGGAAGTT +GACCAGCTGAGCTCCAGCAAGTCCATGCCTCCTGGGACACAGCCTGGCCCCGATGAACCC +TCAGCTGCCATCATCCTCAAAGTCGCAGCAGGCTGGATGCTGAAGGCACAGGGGGCCGCA +TGGACTGTGGCTGCAGCTCTCTCATTGTCATGGGGAGGGAATGGCCTGCAGCCCCATCTT +GTTCCCTTCTCTTAACAGCCTCTTTCCAAAGATGGCCCGAGCTCCCATCCCCGCCCCCAC +AGGGCTCTTCCTGCACTTCCCCAGGCATCCTGGGAGGAGAGGTGGGCTGTGGCTGGAATG +TGCCGCCCTCTCACCCTCCTGCCAGGACCACTGTGCTGGCTGCCTTGGTGGCCTGGAGGC +CTGCAGAGCTGCCCAAGCGGGGGCTCCATGCGCAGCTCCCACTCCATGTTCTGTCCCTGG +GGAGGCCCACACAGCACTGCCCCTCTGTCTCATTCCAGGTGCCTGGTCCCCAGCGATCCG +CCTTGACCCCAGATGCTCAGACTTGCTAGCCCAGGGCCAGAGGGGCACAGGACAGGGCTT +TGGAGAGGGGGCTTCCTAGGCTGGTGCTGGCCCAGCCTGCCCAGCCCTGCCCTGCTCGGC +CCCTGCCTGTCTGGGTGCTGGACCTGGCCTGATCCATGAAGCTTCTCCTAGCGAGGTGTT +TTCCAGGCCTTTGGAATCTTAGGGATGATTTCGTATGGGGAGGGGCCTTCTCCCCAGCCT +GCTGTCTTATTTTTGTGCCGATTCCCCGCAGCCCGAGCTCCTGCCCCCAGCACCTGCCTT +TCCAGCCTGTGTCCTGTCTTCCACCAGCGGACTCCAGCCTGGCTCCCACTGCCACGTGGC +TGCCTGGCGCATGCTCCACCCCGTGTCCTCCCAGGGTCCCAGTGAGGGCCCAGCCCCGCA +GTGCGCAGTCTCCGCAGCCATCGAGTAGGCGGCCAGCTGAGCAGAGGTGCTGGCCGGCTG +CCAGCCCAGCACCCGTCCTGCTGAAGTTGCTCCTGGCACGCTTCCGGCCCCAGGCTTCCC +TGGGTGGTGGGGCAGACTTTGTGTTGCTGGTGTGCAGGAAGGGCCCACGACCCCCACCCA +ACATGTGAGAGGGGCATTTGGCAGCCATGGGCAGGACACCACCTTCGACCATCAGCTAGG +GGTATGTGCAAAGGGCCTGAGTGAGAAACAAAAGACAAAATGGACCAAATGTGCTTGTGT +GGCCGGGTGTGTGAATTGTGTGCAGGTGAGTGTGGAAAGATGTGTGAGCACAAGCCTGTC +AGCATACACGTGTGGGCGTGGGCATGTGGGTGAGCATGCAAGTGTGTATACATAAGTGAG +TGAGCACGTGTGTGTGGACGTGCATGTTATGTGGATGTGCATGTGTGCATATGAGTGTGT +GAGTGCATGTTTGATGAGCATGTATGTGTGGGCACACGTGTGTGTGTCAGCGTGTGCTGA +CTGCCATGGACGTGCACCTGGCTGAGTGTGGAAATGTGTGTGTGAGTGTACTGGCAGTCC +AGGTTTTACACCCACACGGCTTTGCCTCCCCAGGAATCCGTTTTAATAGCAGAGTGGGCT +GGAAGGGCTGGCCAGGCCCTGAGGCAGTTTTCAGGCCAGCAGTCCCTGGCTCCTCATATG +CGGTGACAGCCCAGGAGGCCGGGGCCCTCAGAGTGCAGTCTTTGGACATGCCTGGTTGAT +GGATCCGTCATTACCAGCCTGGGAGCTGGGCATGGGCTCCGGGGCTGGAGGACACCCCTT +TCCATCCCTGGCCCTGGCGAGCCCGGCCCTCCTCTCGCCCCGTGATAACCTGCAGCCTCA +TGGGGAAAGGCACGCGGAGGTGGTGCCGCACCCCCAGCCCTCACCCCTGCTTTGCCAATC +CTGGATCCTGGCCTCCGCCCGCCAAAGTCCACCATTGGCCACAGGATGTGGGTGCTGTGC +TGTGTCCTTGAAAATCTGAAGATGAGGACAGCTCAGTGGGCGGGGGCGTTTCACACATTC +GACACATGCCCTCACGCCTGTGGGTGTGTGGCGGGCTGGTTTACAGTTACGTGAGGATTT +CAGACCCAAAGTGCAGAGCCTGCAGCTGAGCAGTGCCCAGGGAGGGCTGGGGAGGAAGGT +CCCAGCCAGGAAAGTAGGGTGACTCTCATCAGTGGCTGCGGTGTCCAGCTGGGATGATCA +GGGCCGTGCCAGCCTGGTGCGCCTGGGGGACCCACAGCAGAGGTCCTGCATGCACTTGGC +TGGCACGTGTAGTTCTGAAGGCCCCTGGGGAGGCCTGGCACATGCAGTGTTTGGGGACCC +TCTCTGCTGCCTAGACACTGCCACAGACCCCACACCCAACCTTGCTGTTTTTCCCCCAAG +TGAGGGGAGCTGCTTTTTCCAACTCGGATAGCCCCCCTGCACTGTGCAGCTCGCCCACCT +CCCTCACAGCTATTCCAGCATCTCTGGGCGGGTGCGGGAAGCCGGATGCAGCCAGTACCG +TTGCTGTTTTTGGAAAAGAGAGGTTCAAGGCAGGGAGGGCCCAGCCTATGTTCCAGCAGG +GTTGTCTGGCTGGGGGGGTCTGCACCTCGCCCCTGCACCCCCAGGTTCCTGGCTCCCCAC +CCCCGTGCACGAAGCCCTTCTTGCCTGGCAGGCCTGGGGGCCCCCGGCCGCCATGCGCCC +CACCGCGGGTGTGATTTGGCAGCAGCGTTCTCCTCGCCAGCTTGTTTCTCACATAGGGGA +TGTTTCCATTTTTACAGCAGAAAGAAACCGATTCCAAGCGCCTCAGCCACAGCCGGAGCG +CGACGGCCCCCAGGGAGCTGCTGCCACGTCAGGTGCTTTGGGGGTGTGGGCTGCTCTGCC +AGCCGTGGTCCTAATTGCTGTCACTCAGGGAGCCAGCTCTAGCCTGGGGCAGGCAGGAAC +CATGAGGGGGGTCTTCGGGGCAGGCACTGGTCCCTGGCGGAATCCGAACCTCTGCCTGGT +TCTCCTGGCACCCCCTCCCCAGAGACCTGGAGCACCTCCCGACTTTGGGTGGGATGGGCC +TGTGGTTCTGGAACAGGAGCTTTGCCTGTGGGACCCTCCCATGGGGGCGGGGTGTCCTCT +CTTAGCCCAGGTGGGTTTGAGCCCCACACTGCTCCTTCCACCGCTCAGATTCCCGCCTGC +AGGTCCCTGTCACGGGGCTGCTGAGGACCGACCAAGGTGACTGCCCATAGCCCCACGCCA +GCATGGGGCACAGCTGCCTGGCGGCTCTGCCGTCACCCCCCGTGCTCTGACTCTGTGCTG +TGGATTTGTGTCACCTGCAGACTTGGGTGGCATTGGGCACAGGGGCACGGTGTCCTCTGT +CACTTCCCCACCTTGCTTGGGGCTGGTCTCCCAGGATGCTGAAGCATTGAAGTGCCTCGA +GGACTGTTAGAAAGATTTTCTTAGAAAGAAACATTTGCTACCCAGGGCACAGAACAGCTC +CTCGAGGTCTCGTAAACCAGTGTGGACTCGGCCCTGCTCCCGAGACTTGGAGGAAAGGCA +GAAAGGGCGGCAAGTCACACGCAAGGCAATTTTCAAAGAAAAGGGCACAACTCATAGGAG +GCAGGGGAAGGATTCCATTTCACCTACAGTCATGGCCCCGCGTCTTCAGGGAATAGGTGC +TGTCTGGTGCATCACATTATTAGTGCTGGAGGGCATCGCAGTCCTGGGGGAGGTCACTGC +CAGCATGGCAGCCCATAGGAAGTGGGAGGGTGGGATTCAAGGGCAAGGCGGGTGGTGGGT +CCTGGCCCCTGATGAGGCAGGTGGATGGTGGACCTGCCTCCACCCTACCCAGCAGGCTGG +CCACCAGGGCGTGTCCACAGACTCCAGACAAGAAGGAGAGAGTAAATTGAGGCATGTGGG +GCCCAGGCCCTGGGCGCAAGGCTGGAGGCCTGAAGTGGCATCCCATCAGCTGGAATCAGG +ATTGCCTCGATCTTTGATTCTTGAGACAGGAGGAGTGTGGCGGTGGCCCCAGCAGGATCC +CTGAGATCTTACACAGGTGAGGTGGTTGCAGAAGCACTACACGGGGCTGGGGCCTGAGGG +TGCAGTGCTGGGTCTAGGTCTCCCTGAGAGTCCACTGACAGCTGTGGAACCAGGCCTCAG +GGGAGCTGGGTGCAAATCCCACCCCTCCACAGAGCAGCCGTGTGACCCGAGGCTGTTTTG +CCCTCTCTGCCTCAGTTTCTCTCTGGCAGCCGCTCCTCCCTGGGCCCCGACCTGACCCTT +CAGGCTCAGGAGGCTGCTCCAGGTCCAGAACCTGGTGCTGCACTGGGTCCAGCCTCCAGG +CCTTTGCTAATGTTGTGGTCTCTGCCTGTGATAACTCCTGCGAGAGCTCCTGATCTCTTA +TGACCCTCCCCTGCCTGGGCAGATGCTCCTCCTGTCACCGAGGGTCCCATTAGCTGGCAG +CTACCCTGCCACTTGGTGGACCCAGGTGATGTGGGTGGGCCTCACTCTTCATCCACAAGT +GGGTGAGAAAAGGCAGGAAAGGAGGTGAAGCCAAAGATGGCATGCTCCCCCTGCACAGGG +TGACTGTATGGGGGGGTCACGGCGGGGGAGGGAGGGGAGGAGAGTGGTCCATCTAAGGGC +CCTGCCAGGGCCCAATGCAATGTCCCATCCTTAGATCCATCTTCAGTCACATCCCCACGC +TAGGGGGAGCAGCATTTCCAAAACGCCTGCCCTGGGGGCTCTCAGAATTCAGGGGTTCCC +TTTGAGGTCACACAGGCCCCACTGAGGCCAGACACAGCAAGTCCTTGCTCCTTGGCCTAA +GCCTGCAACCCCTGAGCCTCAGTTTCCTGCAGTAAGTGGGACATGATGCCTTTCCACTCC +TCTAGGCTTCTGCCCATCCTGTGTCACCGTCCCAGCAGATGAGTAGCTGGTCTGGTGGCC +TGAGACCTTGCTTCACTGGGGGACAGGGCTGCCATCCTCGAAGGCCAGGGTCTGGGCCTC +TGGGCCGGGAGCTCCTCCATCAGCTTGCAGCCCTGGGGGAGGCTTGCAGCCCTGGAGGCC +ACTCCCTCTAGTGACAGAACCCATACCTCTGGGAACTGCTTCCTCTGCCTGCATGCCCCA +AACTTGGACTTGGAGGTTGTAGATGGCCGTGGCAGGGCCCTTGCCTGGCCCCTATCCTCC +TGCAGCCTGGCCTAGGCATGGGTGACTTACCCAGTGCCAGCAGGCTGCCTCAGCCCGTGC +CCACTAAAAAATTTGTTCCTGGCCGGGCGCAGTGGCTCACGCCTGTGATCCCAGCACTTT +GGGAGGCTGAGGCAGGTGGATCGTCTGAGGTCAGGTGTTTGAGACCAGCCTAACCAACAT +GGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGATGGCAGGCGCCTG +TAATCCCAGCTACTCAGGAAGCTTAGGCAAGAGAATCACTTGAACCTGGGAGGCGGAGGT +TGCAGTGAGCGGAGATTGCGCCGCTGCACTCCATCCTGGACAATAGAGCAAGACTCCGTC +TCAAAAAAAAAAAAATTGTTCCCATGATATTCCAAATACAGTCATATACCATAATGACAT +TTCAGTCAACCATGGGCCGTGTATATAGCGGTGGTTCCATAAGATGAGAATACTGTATTT +TTACTATACCTTTTCTATGTTTAGATACACAATACCACTGTGTTACGATCCCCTACAGTG +TCCAATAGAGTAACACGCTGCACAGGTGTGTAGCCTAGGAGCAAAAGGCCAAACCAAATA +GCCCAGGTGTGTAGTGGGCTCTGCCATCTATGCTTGCTTAAGTTACTCTATGATGTTTGC +ACAATGACGAAATCACCAAACAATGCATTTCTCAGAACATAGCCCCATCATGTAATGGCA +TGAGAGGACACGGGCAGAATTCTTGGATGCCCATTTTGGGGACAGCCACCATAGTTTGGA +CATTAATACATTTGGGCACATGCACATGAGCACACACATGCTCACATGCACATGGATGCA +CACGTATGCGCGTACCACTCACTCCCGGGCACATGCTCATTCCCGGGCACATACACAGTG +GGGCGGGGCAGGAAGGCTGGATGGGGCCCAGGGGAGTCAGGTGCTGTGGTGCGCTGGGGC +TCAGGCCCTTCCAGCATTCAAACAGAGAGTCTCCTAAGAAAGCAGGGAGCCCACAGGGCA +TGGGATTTGGAATCTGAGTCAGGGATTTGAATCCCATCTTGTGCCCTGGGCATGCTAAGT +CTCCCTCCTAGTGCAGTGGGTGCCTGCTGCCCAGGCTGCAGGAGCGTTTGAGGAGGCTTG +TCAGCCTATAGGGAACTGCCTGGCGTCCTGCTCAGTGCTGCAGGCTGCATGAGGGGCCAA +TTGAGGACCCCCTAAGCCCAGGAGGGGCAGAGGCCAGGGACCAGATGCCACAAGTATCTG +TGGGTGGGCTTGGCTGTGATGAGCAGGACAGGGCTCAGCCAATGCCCAGGGGCAGAGCCC +CCGAGGCACTGATGACCACAGTGAGGCCAGCGTCAGGCTCTGGGTCTTGGGTCTTGGTGC +TGCTCCAGGAGTGGACCCACATGGCCCCTCAGATGCTGCTCTGTGACTCCAGGCCTGGGT +CAAGCCGACCTCTCTGCTTGTCCCCCCACCTCCCTCTAGGTAACTAGGCCCCAAGATGTC +ACAGCATCAGAAATACAGAGAAAAAAAAGCATAGTTAATACTGCTCCTGCTGGGAAGGCC +CACAGCAGGGCGGATGGTAGTGCACCCAGCAGGTGTGACAGCCCAGTCTCCCCATCTGCA +GGGAAACCCCATGCCCCCTCAACCCTTACTGCCTGTCACCAGCTGGACCCCGGTCTCTCC +TGGCTGCTGTCTCACCTGCCTTCACATGGTTTCTGGTTCTTTCATCTTTTCATTATAATT +TTTGTTTGGAGAAACTTTAAAAGTAAAAGAATGTGGGCCAAAAATATGTTAACCCATTCC +CACCAGACATGCAACACTGGGGGGGCTGTTCCTCCCAGGCTCCTTGGGCCCCCCAGCTGC +CCATGACCCCTGGGTGTGCCCAGACAGCAGGGGGGGTGTAGTCCACCCCCACCACGTCCA +CAGCCCCTGACTCTCGGGTGGGTCTGGGGGCTGCTCGCCCTGGGAGAGAAGCCATCCCCT +CCCGGCGGGGCCATCTATCAGGACCTCTCTGACATCAGCATTTTCCTCTGTGGCTCTAGC +TTCCTGCCTCCTATCTCTGTCCTCCTGAGAATGTCCCATCTGTCCCATCTTTCCCATTAG +CAGCCCTGTCCTTGAGCCCAGTGCAAGCTCCACAGTGGAGAGGCCACTCAGCATTGTAAG +TGAATGCTTGTCCTCGTGCCTCCCTCCCCGGGAGCTCTGCCTGGAGCTCCTTCCCGGCTC +CGGTGAAGCTGCAGGCAGCATTTCATTGCTTTGACCTGTCCCAGGGCACCTTCCTTCTAG +AAGGGCGACTTGAAACCTTTTCATCACAGCACGACATATGCCCGGTCTTGCTTTGGTCAT +CCAGTTTTATGTGTCCCATTTTATACTCATTTTCTTCCCCTCATGTTTTGTTTTTTTCTT +TTAACTAAAACGATCCATTTATTGAGTTAAAAGATTGTATATTCAAGACGTTCACTCAGT +ATGTGATCCTGCATAGTAAACCTCGGGTGAAACAAAGACAACTATTTTTATATATTTTTA +AAAATTTAGTGGCATATAGACTTCTAATATTTTGGGGGGGGGGATATTCTTTTTTTCTTT +AATTATGGTAAAATATCCATAATATAAAACTGACCACTCGAATCATTTCTAAGTGAGCAG +TTCAGTGGCATTTGGCATACTTACATTGTTGTGCAGCCATCACCCCCATCCATCCACAGA +ACTTTTTCATCTTCCCAAACTGAAACTCTGTATCCATTAAATAACAAATCCCCACCCTTC +CCCGCCCCTGGCAACCACTGTCTACTTTCTGTCTCCATGGATTTGACTACAGTGTGTCTA +AGTGGGATCATTCAGCATTTGTCCTTTTGTGACTGGCCTATTTCACCCCTCTGTTTTATA +CCTTTCTGCTGCGTGGCGCTTTGGCTTTCTATGTTCTGCGGCGGTGGGTGGTAAAGTGGA +TGTCCTGCTTTTAATTCTGAGGGCTCGCTTCTCAGTTTTCAAAAACACACTTTAATCTCT +GATTCTCCAATTATCAAGCATTTGCCATGAAATGATACTTGCTGCTCCCCAGCACTGCAG +ACTGTGTAAGAGACATCTCAGCCAGCATTTGGATTAAGTCAAACCCCAGGAATCCGCACC +TCCTCCACCTCATCACCCAATAAAAAAGGTAACGCTGAGGTATTAGAACATTCACATTAG +TTCCATGAGCATGCCCTCTTATGGGCCTGAGAGCACCCAGGCTCTGGTGCAAGCCCCCCA +CAGCCCAGGGAAGTGTTTCAGGACTGGAGCCCTGCTCAGTGCTGTGGGCTGCACAAGGGG +TCAACTGAGGACCCCTAAGCCCAGGAGTGGCAGAGGCTGGGGACCATCCCATGGGCTCCC +AGGATGGCCTTGGGGCCACAGGGATTATGGCCAGGCTTGTACTTCCAAAGCCCTTCCAGG +GAGGAAGGCGCTCCACTTCCTGCTTTGGCTTAAAAAAGTCCAGGGAAGCACCTGATTGGC +CTGGCTCAGGTCACATGACACTCCCTCTGTCCAATCACTGTGGCCAAGAGAATGCGCTAT +TGTGATTGGCCCAGCCTGGGTCAGAGCCTTCCCCTGCTGCCTGGAGCAGGGTCTGTCTCC +AGGAGAGAGGAGTAGGGTTCAGCCATTCCAATGGCGCTGCCGCCTGAAGAGCTGTCAGTC +CTACTTTGCAGTTGAGCTGGCTCTGAGAAGAAAGTGCTCTGCCTAGCCCTGTGATTGATC +CACAACTTCATGGGGACTCAAACATGGGTTCTTTTATTCCAGGTGGAGAAATTGAGGCCC +CAAGCAGGGGCAGACTCACTGGGGTCACATGGTGAGTTGGTGGCATGAATGGGTGCTCCG +GTCTACACTGTACCACCTCCTCCAGGAAGCCTTCCTGGCTTGGTCAAGCCAGGCCTGGGG +TGGTCACCACCTGATGTCACATCCCCTTGAGACAAAGCCCCCCCCCTCCCCCGCCACCAG +GCACCTATCCCCAGTGTCCAGGACATAGCTACTGGATTCAGGCAGCTTCTGCAGATGAGC +TCCCAGCCCTATGTCCAGGAGGTTTCTGGAATCGACAGCTCTCCATGCCACAATACCCTC +CATGGCTATTTAGGGAACACCCAGTGTGTGCCAGGTTCGGCTGGCCTGGGAGAGTGCACA +GTGGACACTGGTCACCAGGAGACACAGGGGGCCAGTCCACACCCTTCCGCAGTCGGGCCT +GGGGAATGGGTTGGGTACTGACCTGGAGGAGGGAGGGACAGCAGGAAGGGCTGGCTGGGC +CTCAGCTCTCTTGCTTACCTTCCATTCTCTCGATGGCCCTGGCTAGACACATGCTGGCTC +GCCCAGGCCCTACTCATGTTTCGGCACAACGACCCTGCTCTGCCTCCGTCCCCAAGACCC +TGGGCTGTGCAACCAGGGCCAGACCTGTTCTTCAGCCGCACCAGGGCCTGGCTGTGCCTC +TGCCTGTCAGTGGCCAAGCCCTGAGCCTGGAGCCATCCAGAGCCGCACGGAACCCACAGC +CTGCAGGGTGGGGCTAGACATGGCTCCTCCTGCCCGCCCGTGGGTCGTACACCAGCTCAT +TAGAGCCAAAGCAGCATCGATTTTGTGAATGATGAGCATGTGTACGTGTTTTTGGAGCCC +TGGTGTGATTTGGCGTGTGCAGCTGATGATTCAGGGGGGTCATGTGCAAACGACACGCCT +TATAAATGTGAACCCACCAGAACGCCCCGTTACGGAGATTTACACTGATCCTTAAACATC +GTAGATTAATTTTTGGAAAACATCTATAAAATAAGAGATTTATACCTAAAACAATACCCT +ATGCTGTGACAGGAAGGGATTAGAGTCAATTTATTCCTGTATTAGTCAAAAACTCAATTT +CATATATTGTGTGGAAAAATTAAAACCCTTTCTACATTTTATTACAAATTATGCATACTC +CATATATGGAGATAAATAATTGATAGCATTACAAATACTTAATCTAGAAAATATTTATTA +GGAACAGACAACTCATTTTTCATGTGTTTTGTTGAGAACAATATTGCACACTTTAAAAAA +TTAATGATGACTCTTTGCAAAAATAAATGTCTACATCCTAATTCTTTTGTTTAGCATTGT +GTCCAAGCCAGGCAGACGCAGCCGGGCAGTGGGCAGGTGGCCATGGAGCTGGGATGTCCA +CTCCCTGGCCCCTGGCTGTGGATGGGTCAGAGGGCTCATGTGCTGTGGCAGTTGGCCAGG +GAAGGTGCATTTGGAGAGTGTCAACATACTGTTTGTCTTCTGCTGTGCAAACTCTCCTCG +ACTTGTAATTTAAAACTAAAAGTGTTTCCGACTTAATTTCTGAGAAAGCACCAAGTAATG +GGATTTGTGGATGGACTTGAAGAGCCAAGAAATCAACACTGACTGCCTCTCGACCTGGAG +AATTACGGAGCCCATGAATTCTCTGGGCCTCAGCTCTGGTCCCAGGCTCTGTGCACAAAG +AGAGGTGGGCCTGGTGGGTGGTGCGGAGCTTGGCCTTGTACTTCGTGGCTGGCTCAGCCT +GCCCTCCTGGGACAAGTGGCTCTGCCGGGTTCTGTGTGCCTCCTGGCATCTCTCTGAGGG +GAGCTCCCAGCCTCTGCCTAGAATACCTGGGCAGCGTGTTCAGACCTGGGGCCCCCTGCA +CCATCACCCCATTCCAGGCTGGGAAGCTTGAGGACCCAGGCAGGGAAGCTTGAGGACCCA +GGCAGGGCCCCAGTGTGCCACAGGGCTTGGCCACTGTGCAACTGCACTGCAGCTCTGGCC +TCTGGCCGGGCCTTGCTTACCGACCTCTGCCCTCTGCCGTGTGTCCTCCACCTTGGCCAT +GCACAGCGGGGGCTAGGCGGGCACATGGTGGAGGCTGCCAATGGTGCCCTGGCAAGTTCT +CTTCGCCCTCGTGCCAGCCACTCCAGGGAGCACCAGGGATTGAATAAATAACTGCCACGT +TTATTAAAAAATAAATTTGGTGCCTAACGGAAGTGCTGTTTCTATACATAGAAAAAAATG +TTTAAAATAAAAACCAAAACAGTTCTCAAAGGCGCTGTGGCATTAATTATTCATCGCTTG +CTAATGAAAACATGGCTTTGCAGGCGTGGCCCCCACCTCCCTCCAAATAAGACTCGGGCT +CAGGGAAAGCCTGGCCTTAAGGGCAGGGAGCTGGTGACCCCTGAATAAAATGGGGGTGGC +TGTGATATCCTCCATGGGTATTTAGGGAGCACCCAGTGTGTGCCAGGCTCGGCTGGCCTG +GGGGAGTGCACAGTGGGCACTGGTCACCAGGCAGGCACAGGGGGCAGTCTGCACCCCTCC +TTAGTCAGCCTGGGGAATGGCTTGGGGTCTGACCTGGAGGAGGGAGGGAGCAGCAGGAGG +GGCTGGCTGGGCAGCAGAGCCACCTCTTGGAGGAAAGAAATGGGGACAGAGGCCACTCTG +GCCAGCACCAGAGGACAGTGGCTACATGTCTGGGCACCCAGGCCAGGGGCCTGGCAAACC +CCCACAGGGTGGGTGGGTGAGGCCCTGTTCTAAGTCATTTGATGACCCAGCCACCCTCTC +AGATGCTGCTGGATCCCAGACAGAGGGAGAGAGGGAGGCTGCAGGCCTTGCCGGAACCTA +GGCGTGCCATGCATGCCCCAGCAGCCAGAACCAGCCATGTGGTGGTGCCACACCAGAAAG +GGCCGGCCTGCCAGGCAGTCAGGGGCAGATGGGAGCAACTTGGGAAACAGCAGCAGGAGC +CTCCACTCCCTCTGCCTCCACCTCTCCAGCAGACTATATGGCACACCCTGCCCAGCTGTG +TGCAGCAGGGAAATTTGTGCCAGGCCTCGGGCACCTGAGCCCACACTGGGCATCGCTGTC +CTGTCACTGATCCTGGGCACTGATAGGAACTGTGAACAAAGCTCGGTCCAGGGCCCTGTG +GACTGAGCTCCCCATCCTCAGGGCCCGGGAGCCCCTCCTCCTGCAGTCACCTGGGGCAGC +CCTATCTCTGTGGGGAGGCCTGGAGCATTTGTTCTTTTGTTCATTCATTCGTTTATTTAT +CCATAAATGCCCCCGAGCTGGCTGCCTGGTCTCCTGGGGAGAAGGGATGCTGATCCCATC +AGTCACCTGCCTGAGCAGGGGGTCTGGGAGGCTGAGAGGAAGCCTCTCTGAGCTGCTGTG +GGTGCAGGTGGTATCCAGGGCGACAGCTCCTGGGCAGGAGATGAGCTGTGCCCTGAGGTG +GTCAAGTGGAACCCCAGCAGGGGCCTTTGAAGCTCCGCCAGCCCCTGGGGCAGGTCAGGG +ATGGGGAAGAGGCTGCAGGGGGCGGCAGGAGAGTGAGGCTCCATCTCTGCTGGGTGGCTC +CGTCCTGGTTAGCAAGGTCTCTCGCGTCCTGGCCCCTGTGCTGGCTCCTGGGACGTGGAG +TGGAACCCGCATCCCTGCTGCCCATGTGTGAGGACCCGGGCTGCCAGGGCTCCAGCTGGG +CCCCAGGCTCAGGGTTTGTGTGCTGGAGACCCAGCAAAGCTGGACAGAGCCTGGCGGGAC +CTGGCTCTGCGATGGCCCAGCTTGGTGTACTCAGAAGGCCTGATGGGAGCTAGCCTGGAC +ACCTTGGCCTGATTTAAGCCTGGGAGCCGCGTAAGGGGAGGCAGTTGGGCCTGCCGCCGA +ACCGACGCCCCGCTGCCCCCACTTGCCTCTCCCGCCGGTGGGCCTGACCTTGCTGGAAGC +ATCCCTCAGCCTGGTATGGATTTCCTGTCTTCACCGAGGCCGCAGGGCCAGCCGGGCGCA +GGAGCCGAGCGGTGGTCAGGCCCGGGGATGGTTTTTTGTTTGTGGGGGCAGGGGCTGAGG +GGACCCAAACCTTCCAGAGCTGGGGGGTTTTCTTCTCACCTGTCACTCCGTGGCTCCCTT +TGGTGGCTCTTTGTCTGGCATGGGTGCTGGGCCCACCCTGCGGTGTGGGCAGCTGTCCAG +CCTAGAGCCTGCCTCTGACCCTGGTCCTATCCTCAGGGCCCCTTCCAGGGCTGGAGCAAC +AGGGACACAATTGCTGAGCTCAGCCTGTGTCAGGTGCACAAGTGACCTCACTTGGGAGAC +AGTGCAACTGCAGTGGGCCTAGAGAGGGCTCTTCAGCTCCCAGTCACACAGCCAGGCCTG +GAGAGGTACACATCCTGGTCCCTGATTCCCCCATCGGCACTGGGTGGACTGAGGAGTGGA +GTGGGGACCCTGGGTGGGGGAGAGGAAGGAACTGGGCACAGGGGGAGAGCCAGGGACCAG +AGGGCATCATGGAGATGAAAGGACAGGCGAGCCCAGGAGGGCACACACCATGGCACGGCA +GGCAGACAGACATAGACATAATGCCCATATGGCAGCATGATCTCTCACACACACACATGC +TCACATGGGCAGCACACTCACACACACGTGCATATGGACGGCACGCTCACATACACACGC +ACACATGGGCAGCACGCTCACACACACACGCACACATAGGCAGCACACACACACGCACAC +ATGGACAGCAGGCTCTCACACACGCACACATGGGCAGCATGCTCACACACACACATGCAC +ACATGGGCAGCATGCTCTCAGATACATGCACACATGAACATGGGCAGCACACACGTACAC +ATAAGCAGCATGCTCACACATGCACACATGGGCAGCACGCTCACACACTTGCACACATGG +GCAGCACACACACACGCGAGCAACACGCTCACACACACAACATGGGCAGCACACACATGT +ACACATAAGCAGCACACACAATGCACACATGGGCAGCACACACATGCACACATGGGCAGC +ACACACATGCACACATGGACAGCACACTCACACGTGTACACATGGGCAGCACGCTCACAC +ACATGCACACATGGGCAGCATGCTCACACATGCACAGATGGGCAGCACACACACACATGC +ACACATGGACATCACACACGGACATCACACACATGCACACGTGGACAGCATGCTCATACA +TGCACAGATGAGCGGCACGCTCACACACTATGAGCAACAAGCTCACACACATGCACACAT +GAGCAACAGACACGCACACAGGCACAACATGGGCAGCACACACACATGCATATGAGCAGC +ATGCGCACACATATGCACACATGAGCAGCACGCTCACACATGTAAACAGGCAGCACACAC +ACAAGCACACAAGAGCAACACACACACATGCACAACATGGGCAGCACACACATGCACACA +AGCAGCATGCTCACACATATGAACACATGGGCAGCACGCTGACACACACATGCACACGTG +AGCAGCATGCTCACACACGCACACGTGAGCAGCATGCTCACACATATGCACACATGGGCA +GCATGCTCACACACACGCACATAGGAGCAGCACGTTCATACACACATGCATACATGAGCA +GCACACTCACATGCACATGTGTGCACATGGACAGCACACAGATACACATGCACATGTGGG +CCACACATACACACACATGCACACATTTATGCACATGGGAAGTATACTCACACACATGCA +TGCACACATGGGCAGCACATTCACATGTGTACATATGTGCATACCTAGGCAGTATGCTCA +CACACACACACACTCATGTGGGCAGCACACACACGCACACCTGGACAGCATATTTACACA +TGCATGTGCAGCACACATACACGAGGCAGCATGCTCACAGACACACGTGCACACACACAA +GCACACTCAGAACAAGCTGTGGGGCCTGCGGTGAGGACACCAGGCAGGCAAGGCGGCTGC +TGCAGAAATAATTAGTCACTGGTAGACAGGCCTGGCTCACCTTCTCCAAAGCCAAAGGGG +GGCTACCAGCCCCCAGCTCTGGGAAAGGGCTCTGAGGGCCCGCCCCCACCTTGCAGTGGT +TCCTGGGACCTGCAGGGAGCATCCCAAGCACGGCCCGTGGGGTGCGGGACTCTGCCATGT +GATCCCTGAGGCTGGGCCTGGCCGGACGTGCCTTGGCCCCTGCAAACCGTGCACATCCCC +AGGTCCCCCAGCACAGCAGCAGTGCAGGGAGTGGAGACGTGAAGATGGGGAGTGGAGACG +TGAAGATGGGGAAGAAGGAGGTTGGAGGTGCAGGGCCGCCCTGTGTTCTTGTTGGAGTGG +GGGTAGGAGCACTCCAGGTCTGGTTGGGCCCTGTGAGGGTGTTGGGGACCCCAGGCCAGG +CCTGGGTCGGATCCATCACTCCCTGGAGGAGGCATCTGAGTCCAGGGATGCCGGACAGGC +TGGTCCCAGGTCCCCAGGCTGACTGGGCCTCAGGCATATGGGCATCTGCAGCTGTCTGGG +GCCAGGCCCAGGCTGAGGGGCAGCCAGCAGGGTGCAGGAACTCCCAGGCCCTCAGCTTGC +AGCGCAGAGAAGGGTGACTGCTCTGCTGTTAAAACAATTTATTTTGCTCTTCAAAAAGCT +AACAGCTGTTTACAGTCAGCATGTGGGAGCTGCCCCCTGGGTGATGGATTCTGCCAATCC +AGCCCTCCCTTGCCTTCCGGCGAGGAGGGGTCATCCCAGGCTTCGGGTCAGGATGGGCAG +AGCAGACCACTGGGGGCCAGGCCATGATGTTCCTGGCCACTCCACGTCCCCTCGGCTGCC +GTCCTGTGTGCTGCCCTGGGAGAGTGCAAGGAAGGGTGGCCCCCTGAGGCTGATACGAAA +TGGTACGGTGATGTGAGGGGGTCAGGAGGGAGGACCACTGTGGGAGGCGTGAGGTGCAGT +GGAAAGGTCAGGGAGACCTTCTGAGAAGGCCAGCAGGGAGGGCTGCCCAGGGCCTGGAGA +GCGTGCATAGCTGGGGGCAGGGTGGGGGCAGCCGGCAAGGGCAAAGCCCAGTGGGTGTGG +CGATGCCCTGGGGGATGGGCCCCGATGCCCCTAAACTGGTTCGGCTCTCTAGACCAGACT +AGTGTGCTATGTCGGAGCCGAGCAACAGTCCCCCTCAACTGTATGCATGTACAGTAGGCG +CTCAACAGACACTTGCTGGATTTGACAATAGCCCTGGACGGAGGACTGAGGCCTGGGCTG +CACACATTCTGGGCTCCTGCAAGCCCCTGGCAGCCCTGAGCTGGTTTCTGAGTGCAGAGG +GAGGGGCTGTCAAGGGTGGAACCCACACATGCCAAAGCCTGGAGGCCAGTGCAGGGGCAG +GGCACCTGCTGCCTCGGGACAGCGCTGAGGTTCAAAGCCACCTGCTGGTTCCCACCTGCC +TGGCACAGGACCCTGTGTCGGTGGGGGTGGCCAGATAGGGCAGCTCCTTGTCTCCCTGGA +ACCTGTACCCTCAGAGGTCCAGGATCCCTGGGGGAGGTTCCGGGGCTGGAGAGACGGCCC +CTGTGGCAGGGCAGGGACAGACGGGCTGATTCTATCTAGGATGGGGGCACACAAAGACTT +CCTGAGGGGCGCTTCAAGGCCACAGCAATCTAGGTGGGACAGGGACATCAGTCAGAGGTG +CCCTGTGGAGACCCCCAGGTCCAGCTCCTGGTACTGTGAGAAGCCAGTTGTTCCCTCGCA +GGCCGCCAAGGCACCGCACAGTCCTGGCCTGAGTCGTTGTGGGGTCTTGGGGAGGAGGGT +TCTCTGGAAGGACACTGAGCCTGGACTTTGGGGCAAGAGACCCAGAGCCCCTGGATGCTT +GCGGCCTCAAGGCCCAACATGCAGCCAAGCAAAAGCTGGGGGCACAGGCAGGGGATCCCC +TCTGGGGAGGATGCAGAAGCCGAGCCAGGCTGGGCCGAAGCTCCTTACCTGATCTGGTGT +CCTCCTGTGCGATGGTACAAGAGCCTCAGGGCACATCTGGTCTCTGTGACACCCCCCGAG +ACACCCGGGGAATGTTCCCAATGACCACTGTTGGGCCACTGCCCGACCACGTCACGGTTG +CCCCTGCCAGGACTCAGGCAAAACCCAGCTGTCCCTGGCGCTGTCTCCCAACAGCCCTGC +AGGGCCCAGTGCTGGACCAGGGCCTTGTCTCCAGCCTCTCCCCCATGGGACTCTGGGGCC +TTTGGGGGCCCTTGGGGGCCTTCCTGCACCCTCACACCTACTCATCTCTGGACTCAGCCT +CAGTGTGAATGCTCACCTCGGGCTGAGCCAACCCCTCCAGACACGTTCTGCAGCTCGAGG +AAGGAACTGCCCTTTGCAGCTGGGATCTGCCCGAGTTTCAGCAGCAGCCTTGAATTGTAA +TCCCGAGCGGCTGGCGAATAACACAGCACACAGCACTCGAGAGCCCAGGTGAGTCCTAAG +TGGGGCTGGGCTATGGGATCTCTGGCTTCTGGGCCTTTCTCAGAACAGACCCTCCAGGCC +ACTCTCCCCTACATCCCCGGTTGCTGCACTGGGTGCAGGGGTCCAGAGCCAGGTCAGCTG +CTGCCCAGGGAGAGGGCCAAGACTGCAGACCCCGCCAAGCCTGGGGGCAGCCTTCCTGAC +AACAGCACCATTTGCTGCTTGGGATCAGCCAGGCCGGCTCCCCGCAGATGGGAGGGCTGT +GAGGTGAGCACCAGCGAGCCCCTGGAGGTGCCAGAAAATGCCAGTGTGGGCACCAGGGGG +CTTCCTTGGGCTGGCATGTCAGGGGAAGCCCACTGGGGCGGCTCAGTGCCCATGATGGAA +GCTCAAGTCCCACGGTGCTGCCCCAGGAGCTATACCCCAGTGAGGGTCGTTGGCTGGCGA +TGTGTCTGGCTGGCATGGGCTGCCTGGTAACGAGGGGGGCCCATGTGTGAGGAGCCTTAG +GTAAGGATCCAGCTGGCACACCGAAGACCCCAGGCCAGGGGACACAGGCTGGGGGAGGGG +GCTACAGAGACATGAACCCAAAAGAGAATGTCTGCTGACATCCCAACCCTGACAGATAAC +CAGCTGGGTGACAGGTGGGGGCTGTGCTCAGATCACCCTCCTCTGTCCTGTGCACATGTG +TGGGTGCATGGGAGTCCATGCGTTGCTCCTGGCTTGCCCCCTCCTCTCCTCGGCCCAGCT +TGTGGTGCTGCGCTGTGCATGGAGCTCGGTTTCTTAGCAAGTTCATGCCATGAGCCGCCA +GCTAGGCTGAGGAGGAGTGGGTCTGGGTGCCACTGGGGCGGTCTGGGCTTCCCTCAGCTC +CAGGGCTCTTGCCCTTGCTAGCATTGCTGTGATTCTCACCATTGCGCCCTGTTACAGAAG +AGACAACTGAGGCATGGGGAGGTGATGGGACCCCCAGGGCCACCCGCAGGAGGTGGCCAG +CCCAGCAGCCCTGCCCAGTCCCCCGCCGGCCTTGTGCTCCCATCTGGGCCCCTATCTGTA +GTTTCCCCAGTTAACAAAATCACAACAGTCATTCCTGGTTGTTCTTGCTGAATTCTCTCC +ATTTGAGGAGCAGCATTTTGGGGTCTGTGTCCTAAATGGACCCAATCACAGGAGTCACCC +AGGGCACTGGCTCAGCCCAAGACAACACCTTCCCCCATGTGGACACACACACATGGGGGA +AAGAAAACCTTGCCAACCCGCCCTCTAGCAAAGGGGCCTGGACTCGCATTTTTAACAAGA +ACCAAGAGATCCCGTGGAGGAGGTAGTCCACAGGGGGAAAAACACCCTTGAGTCTCAAGT +CCCCCGTGGCTCCCCAGCTCTGCCAGTGGCTGCATGGCGGGTGAGATGATGGTGGCTCTT +GGGTCCCTCTGAGTCTTGCTGCCCAGGCTGGCACCGGCACATTTTGTCTTGGCTCCATGC +TGTCGGGCCCTGAGCTGGGCACTGGGCTCCCTGAGTCTCGTGGGAGCTGTCTGGGTCCTG +AAGCAAGTGGTGGCCCGAGCAGGGGAGGCGGTCCCCACCCTCGGGGACACTGTCTGCTCC +GGCTGTGGGCTCCGGGGGCTGAGAGCTATTTCGGCTGCACTGGCCTCTCCACTCCTGCCA +GGCTTCAGTGAGCTAAGATTGTGCCAAAGTCAACAGAACAGAAAAGAGAGGCTATGGCGC +TCTGTCAGGGAGAAAGCCCAATTAGGAGGGGCTTGTGGGGGCAGGGCGGGAGTGCAGAGG +GAGCCCCGGAACCTAGCCAGCCCACTGTGATGACAGGGCCTTCGACAGCAGCCCAGCACT +TGGACTGTGCAGAACTGCGGAGATGGTGCTGTGAGCCTGGGGAGGGTGCCTTTCTCCCCA +GGGGCCCGCATGGGGCCAAGTCTACCAGACTTCTGAGCCGGCATGTCTGGGTACAGGACC +TGGCCCTGCAGAACTAGCCTGGGCATCAGAGCCCACCAGAGGTGACTGCCAGCGGCCAGG +ACCCACCACGGAGTCTCAGGGAGCAGCACACGGGTGGGGAGCGGGTGGAGGGCACACAAC +GCTGGTGGGGCATGGGCCTGGGCACCTCTCCAGCTGGACACTGCACATGACACCCCAGGC +CAGGCCAGGGTCCCCTGACTCACAGACCCCAGGCACAGGGACATGCTGTTTTCCTTTTAG +TCCAAGTGTACGGGGTGAGGAGCTGCAGCCCCCACCCTGAGGGCCTGCTCTGTTTAGAGG +GGGTGCCAGGGTCCTGGGTCCCTCCCTCTGTTAGAGCTGTCTGGAAATCCTGAGTCGGGT +AGGTGAGGTAGGAAAGGGAGACCTCCCCACCCACTGCCCTGCCTGAGCGGCTCCCACGGT +CTAAACCCTGTCTTGGGGGTGGTTCCCAGGAGGAGGCAGCAGAGCCTGGTGAAAAGAGAC +CAGACAGTGTGGACCTCCCTGGCCCTGGGATCAACAGCCTCTCCTGGGCCCAGCTTCCCC +AGGCCTGAGCTTGGGGTTGGAGGATCGGCAGGAGAGTGAGTCCATGAAGGCACCATCCCC +ACCCCTCAGCACCCTTTCCCTCCATGTCATCTCGGCCACCCATCTCAGAGCCCAGCTCAG +CACCCGGGCCCTACTGGGCCACCCCGTCAGGCCCTGAGTCCCCATGGAGGGTTTGGAGGC +TGGAGGCCGGTCAGCTGGGGAGGGGAAGGGCAGAGGAGCAGGGGTGAGCCACTTGGCTCA +GGGCTGAGGACCTCTCGCACTCTGGCCACTGCAGAGGGTGTCCGGCTGGGGCAGGAGGGC +TGTGGGAGAGAGATCACATCGTGTGTGGGAGGGGACAGTCCCCTCCTGCCCAGTCCCCTC +CTGCCCAGTCCCCTCCCCACAAGGCCTTAGGGAAGGGTGGGGGTGCCACACACCATGGGC +TGGTGCCCCCATCCTGGCTTTGCCTACCTAAGGCTCTGGGATTGGCAGGCATGGAGTTCC +CCAGCCCTGAGCGTGGCAGGGGCAGGATGACAGACCCACGGTCCCCTAAGCCCTCCCTCC +TCCACTTACACCCTCGCCCGCCAGACCCCAGTCATTCTCCTGCCCTCAGGGCCTGGATTC +AGCCCCACCCCCAGCCTCCTTGTAAGCCCGGCCTTATTAGTGCTGCTTCACCCCGACTTT +GCAAGTCAAATTCCGCATTACTGCGCTCCTGCTCTGTTTCCACCTTGATGCATGGAGAGC +TGGAGAGCAGCTGGAGCAGAGAAGGCACTGCAATGAGGCGGCAACCCGTGCTGGCCAGGG +GCCCCAGGGCCCAGAGCCAGGTGGGGTGGACCCCGAGGCCCTGTCTTTCCACAGGTGTCC +AGGGTCACTCCCACCCCACCCTTGGCCTCCTGCTGGCTCAGCCCCACACCCCCACCAAAG +GGCCAGGCTCCGGGTGAGTGATCCTCAGGGCCCCCGCACAGGCTGTTCCCTCTGCACACA +GGCTATCCCTGCCTTCCTGTCCAGCCAGTGGGTGGGGAGAAGCGAGTCCCTCCTTTCAGG +GATTCTCAGGAACCGAGTGGCTATCAGGGTATGCAGCTGTGATGCACGTGGCAGGAGAAG +ACCTCAGACGCCACAGCCTAGCTCTCTGTGGGGCAAGTGTGTCTCAGCCTCTCTCTGAGG +AGAGGAGAGCCCCTGGACTGGAGGGTGCCCTGCTCAGCTCTGCCTGGGTATTCTGTGGCT +TGCAGGGTGGAAGGTGAAGGGCTGCAGGCGCTGGCCTTCCCAGGCCCCCCACTGTCTGCC +CAGCTTTTCCTTCTTTGGCAGCCCCAGTTCCTTCCTTATACCCAGTGGGACAGCAGGGCC +CAGCCCTAGGGGCCGCAGGAGACAGACATGCCCCAACTGAGACAGACAATGGCCAGCATG +GCAGGTGGCCCAGTCTCGGCCACCAGGACGTCCTAGGAGGTACGGCCCCTCAGCTAAGCT +GGCTGTCCTTAGTGGCCCAGCTCTGGCTCTGGCTGGTCCTGTGAACTCAAGATGGCAGAC +ACTGTTCCTGGTCTCAGGCCAGGTCTTGGGTGAAGAACGTTGGGTGTAGGACCCTGTACC +TCTCCTCCTGGAACACCCCCAGTGGATGGCGAGGTCACTGAGGCTCCATCCAACGGGCAG +CCTCCAGGGCAGCCCTCGCTCCTCCCTTGCCCTGGAGGTGGGCACGGGTCAGTGGAAGGA +TGGGACTGAGGGGCTGGGCCCTTCACCGACCCTGGAAGGTGGCTCCCTGTGGCCTTCCCT +GCAGTTTCCTGGTCCAGTGTGGTTCAGCCTAAGACACAGGTGAAGGCCTGGTTATCACTT +GGCTGCTTTTGCCAAAAGAGTGGCTTCCAGTGAAATCCTTTGGCTGCCGAAAGGCAAAGT +CACCCTCCCTCCCTGTGGCCCCAACACCTGCAGGGTTTGTCAGCAGAAGGTTTTCACGGT +CCACTGGGCATTCGCGGGCCTTTGGCATCCTCGAGGGCCTGCCTTTGATTAGCACACACC +CTTTGCTTGACCCAGAGGGTCCAGATGGCACTGGCTCCACGGCATGGCCTTGAGAATTGA +GGCCGGCTTCGGCGAGGATGGTCCCACCTACTCTGGCCTTCAGGCCTTCACCAGGAGAGA +TTCAGGCTGCTGGCCCAGCCAGTCTCTGCAGGGACACCCCGGTGGGCCCAGGGAGGGACT +GGGGTCAGCTGGGGCCCTGGACTTACAGTGGATCCCACTTCCCCCACGTCTTGCTTGGTT +CTGGGCATAAGGGTCCTAGTTCTTAGGGCCTGGCCTGGCCCTCTCACTGCTCCCGGGAAA +CCTCGGGAAACATGGCAGGCTGTGTTCCCCATCCTCAGGCTCCCAAGTCGCTTCTGTCCA +GTCCTCTGCTGGTATCTGTGACTCAGCACCATGACGGGGCCCTGTCCAGGGCGTCCTGGG +CAGAAGGCTGCTCCGAGTGCACAGCGGGCACCCCCATGGACAGGCTGCGTCCATGTGCCA +CTCCTTGGCCGCCCTCTCTTCATCTGTAGTGATGACACAGTGACCCCCACCTGCAGGCCT +GGTGGCACTGCTGTCACCAGGCACAGGGAGCCTGCCCCCAACCTGGGTGGTGTTGTGGGA +GTGGCTGGGCAAGGAACGCTGCTATCCCTGCTGAGCTGGGTGGGCACTGATGGGCATGGG +GACCTGGAGTGCTGACCCCTAGCCCTACTGTCGGCAGGCCACGCCTGCCCCTCTGGGGAA +CTGCTGTGTCCCCCTTGGCTGGCCTAGAAGTGCCTAGAAATGTCTTTTATGTGGGGAGTC +GGTGGGTGAGGGCCCCACTGTCCCATCCCTTGAGGAAGGGAGGGCTCTGGGCAGGGTCTG +CACCAATTCCCCAGTGCCTGCTGAGGCCTGTGGCTGCCTCAGGGACAGTGTCAGTCACCC +CTTCACCCTGGCATAGGTGTCCTATTTCCAGAGTGTAGTGCTGGATGCTTCTGTTTGGCT +GTGTTAATTGAGACCAGCTTTTACAGAATGAGCCTCTTGGGGTGGGAAAATCTGCACAGC +CTTGGTGTTTAGGGTAACCTGAAAGTGGAGAGTGAGCCTGGGGTGCAGTCTCACTCCCCC +ATGCCCCTGGGCACAGCTACCCACAGGCCCACAGCAGGCAGGCAGGACTGATGCCATCCT +CCAGATCCCCCGACCAGGCCCAGAGGTGGGGCCAGGAGAGATCTGGGGAGTTGGTGCCAC +CAGATTTCCCAACCATGACAGGATGTCGGGGGGTGGGGGGCAACAGCAGGACTGCCCCAG +GCTGGGTCCCCACCCAAGTGAGGCTTGGTCGTGGCCTCATGGCCAGTTTGTCCCTCTGCG +GATCTGGAGGGGTCCTCGGCTGCAGCAGGGGGCCCAGATGATCCTGACCTGGGGATGGGA +AGGCCTCTGTCCGTGGTTCCCCAGCAGAGCGGGGCCTGTTGAGAGGAGCAGGGGATGGTC +TGTTGAAGGAGTCGGTGAACGGAGAGGCATGCAGGGATGGATATGGACAGGGCGGGTGCT +CAGACCCTACAGTCTGAGGTGGAGGGGTGGGGTTGGAGGCTGGAATGGCCGCGGGGGAGC +CTAGAGGGGTAGGGCTGTGGCTCCACCACTGTTGGCTAAGGGACCTCAGGGAGGGCTTCA +GAGGCTGGGGCCCTGGCTCCCAAGGCCTCAAGACCCTCCTGCTCCCCCTGAAGGTAGCAG +TGACCGCCTCTCAGAAACCCCTGGCATCCCTGGAGGCTGCAGTCCCCCGCCCCGCCTCCA +CTCTTTCTCTCTGCTCTGTCATTTTTCAAAAATAATTGTGCAGCAGCACCAGGCTGTCTC +TGTGCCCCTCCTGTAGGTTGCCCTACCCCACCAGGTCCCTGGCTCGGCGTCCCCAGACTC +TGCCAGGCTGGCCTCACCGTGTGTATCGAGGTGTTCTCCCTGCAAGGTCTTCATCATTCC +CCAATTGCTCGCATTTATTTTTCTCCTTCCTTGGTGAGTGGCCCTGCCAGCCTCAGCCTT +GTCTCTCCTTGGTGAGCGTCATTAATGCAGCTTCCGCGCCACGATCAGAGGGGGCTGCCC +CCACCCTGCCGGCCCTGCCGCTTCTCCTCTGCCCGCCTCGGGATGGATGTTCATTTATTG +CCCATAAATCTCTGCACAGCCCTTTGTAAAGACAAATTGGAGGCCTCCTGTTCCCCGGTG +TTGGTTGGGCCGATCGGTGGTGGGTTTAATTGGCCCTTCCCGTGGAGGCTGCCCCTGCAG +GAGAGGCTCGTTGTGGGCTGGGCCAGATGCACTGCGTCTGCAGAAGGGGCCATTCTCTGC +CCCTCATTTTGTCTAGCGTCTGAGGCTGGGCCATGTGCAGGGACAAAGTGTGGATCCTCC +CTCGGGCTTCCAAAACATGGCCTCCCCGCATTCACGTGGCCTGGCAGCAGCCCAGGCCCA +GGCAGGGCCCTCTCTCACCCAGGGGGTGATGCCTGCAGCTTCAACAGCAACCTCCAGGAA +GGAGGCGCATGGGTTCGTTGAGGTGATACAGGAGTGGAAGAGCTTGGTCCCTTGAAATGA +ATGATAGGGAAAAGGGGAAGTGCTGCGTAGAGGAGGGTGTGATCCCTGGCTGGGGCTCCA +TCCCACAGACCTAGGTGAGGACAGGCATTTTTGCATTTCCCAAGACCACCCTGGCCTGCC +ACACCCCCATCCTATGACTATAAAAACTCGCGAGACCCTGGCAGGCACGCACAAGCTGCT +GGACGTGGAGAGGAGCGGATTGGCAGAGGAGCACTCAGGCGGCTGGACGTCGAGAGGAAC +GCACTGACGGGCGCCGGCACCCCGGCAGGCTACCAACCGGCAGCAGAATGACCTGGAGTT +TGGCTGGGGCAGCCAGAGGAGAGCCCGGGCCCGACTCCAGGGGAAAACCGTCCCACTCCA +TCTTCTTCTGCCTTCCCCCATCTGCTACCTCCACTCAATAAAACCTTGCACTCATTCTCC +GAGCCCACGTGTGGTCCAATTCTTACACCAAGGCAAGAACCTTGGGATGCAGAAAGCCAT +CTGTCCTTGCGATAAGGCAGGGCTCTAACTGGGGTTTAATTGAGCTGAGTAACAGAAGCC +ACCGATAGAGTTAAACTAAAACAGCAAAACTAAAAGAGCACCCTGTAACACGCTCACTGG +GGCTTCAGCTATAAACATTCACCCCTAGACACTGCCGTGGGGTTGGAGCTCCACAGCCTG +CCCGTCTGTATGTTCCCCTAGAGATTTGAGCAGCAGGGCACTGAAGCAAGCCACAACCCC +ATTGCATGTACTGTGAGGGGAACAAGGGGACTTTTCCTGTTTCACCAGCACACAGCAGGT +GCCTAATAAACGCACAGCGAACTGGTCAGAGAGAGAGGGACCCTCCACCGAGCCCAGCCA +CTGCACAGGCTGGGAGGGCAGGACTCCTGCCGAGCTTGGCACCCTTTCCTGCACCCAGGT +GGCTGTGAGCCTGGCCCTGCCCCGGTCAGCAGGCCTGTGCTGACCCCACATGCAATCTGC +TGTTGGTGCCAAGGAAGTGCTACCTGTGCAGCTCTCAGAATGTCAGGCTGGGTGCTCCTC +CACTGAGGTCAGGGTCAAGAGACCCTAGGCCAGCTCTGGGGGGCCCACAGGGCGTGAGGG +GGGTCCAGGCAGCCAGCCCGGCCCAGCCTGGTTCCACTGCTTACTGTGTGGCCTTGGGGG +GCTCCCTTGGCTTTCTGGGGCCTGGGGAACCAGGCGCGAGCTTGGTCCTGTGGTCTGAAT +CCAAGAAGACAGGTTGTGGGTGGGGACATTTGGAGGGAGGGACAGCCTCGGCCTCAGCTT +TGGGACCGTGTGGCAGAAGCACCCTGTCTGGCCCTCTATGAGCTCCTCCAGGGCTGGGCC +CAAGCAGGGACCTCTTCTTCCATGAGGGTTTACTCATTCCTCAAAACAGGCACAAAAGCA +GAGGGCTGTTGAGGGGTAGTGACCTGCCCCTGGCTCTGGCCATGCGGGTCTGGGCACTGT +GGATCCACTCCATGGCCACCTGGCTGGGTGAGGACCAGCCTGGAGGATCAGGAAGAGCTT +CTTGGAGAAGGGAGTGGTGTCAGAACTGAATACTGAGCAAAGAGGAAGGACCAAGAAGAA +GATCCTAAGGAGTTCTGGGGCTGGGGGCTGGGGATTGGGAGCAAAAGGAGAGATGAGGCT +GGACAGGTGCCTGTTCCTCCACCCACTGGCTCAGGACTCCAGGCATGGCCCAGCAAGGGT +CCCCTGCCCCTGGGAGCAGGACTCCCCTGCAGCAGACAGAGGCAGGTGGGGAGTGCGACG +CACCTGGAAGGGGCTTCCGGAGAGGTGTGCCCTGGCAGAGCCCCACCTCCCCCAGGAAAA +GACAGTAGCACCAGGCCCCAGCCCTGGAGAAGCCTGACCAACCAGAGAAGTCCCACGGCC +TCCTCCATGGAGTTTGGGGCTCCTCCTCTAGACCTGGGATTGGCACTTTGGGGTCAGGCC +AGCTGCTGGGGAGCCCTGGGCCCCGACACATGCACCCTCCCAACAGGAGTGGGGCTCCGG +GGGCCACTGGGCTCATGCCCATCGCAGCCACTCTCCAAGGCCTTGGCATAATGGGATCCT +TGAGCTGCTGTGCCCGGCGGGGTGGGGGGGGATGGGCTGAGATACACCCTTTGCTGACAG +TGGCCATTGCACTTTTCCCACCCAGGCCTCATGTGGGGAGCCCTGCCAAGCCCAGGCTGT +GGTGTACACAGGGGCTGGGCTGCAGGGCAGGGCTCCGGAGCGGCCTAGCCCAGAGCAGGG +TGGGAAAGGAGCCAGCTGGGGTCCAGGCAGAACAGAGGCCAAGGCCACCAGTGAGAAATC +TGGGCATGAGGGTGGCCACCAGGCCGGCAACGTGGGGGAAATTGAGTTGAGGGCTGGGCC +TCCTCACTGTGGCTGGAGGCGGCCCAAAGGCCAGCTGACCAGACACAGAGCTGCCTCAGT +CACAGAGGTGCCACAGCTCAGGGTGCCACCCCGAGTGACATCCCTAGGCTGACTGACACA +GTGGAGGGAATTGACACCCAGGGTCTTGTGATGTCCCCGGGCACTGGGATGGAGAAGCAG +AGCAGCACTCTGGAGCACAAACACAGTTGGCAGCGAAGGGTCTGCTCCCGCGTGGCCTCG +GGCCCAATCCTGCCTGACATGTTGCTTCATGTCTCTGGGCCTCCGTTTCCTCATCTGTGC +CCTGGGGACATTAGCAGTGTCTTCCTGCGGGGATGTGAGTGTGAGGTGCCCTACCACCCC +AGGGAGGCTGTGAGCTGCAGGCAGCCCGGCAGTGAGGAGCCCCCACCCCTGCCGCCGCCG +CAGTGGCCCGCCCCGGGCCCCAGCTCACACCACAAACACTTGCCTCCCTCTCAACTTGTT +TCCGGCATGTTCTCAGTCTCGCTGGATGTTAGTTTACCATCGGCTGCTGAACATTCGGCA +GCTTCTATTATTTCAGAACTAATTTAAGGATTAAAAAGTAATTCTCCCAATTACTAGGCA +AAGCCCTGGGCAGCCAGATAAACAGGGGGCAGGCGCTCCCATCCAAGGACAGGGCTGCTC +CTGCAGCCCTGGGCAGACAGGTGTGCAGGGCAGCTCTGGTGCAGCTCAGCCCCCAGGGCC +GCAGGGGTGGAGTCGGGGTGGTTGAGAGGAATCCCCAGGAGCAGCCCAGCCTGGGCCCGA +CATCTGAGGGTTGCTTGCCCCTATCCCCCACCCAGGTCGGCTACAGCCACCTAGTGGTGA +GCCCAGCCCCAGCTGTCACCTCCACCAATTGTGCTGGGTGGTGCTGCCCTGGCTAGGCCT +GCAACACAGGGACAGGCCCATGAGCTGTGAAGGATTCGGTCCACAGGGCCCCCTGGGGCT +GGCCAGGCATCTTTGGACGGGGCCAGGAGAGTCTGTGGTAGACCAGCAGCCCGCATGGGG +TGGGGGCCAGGATTTGATACCCTCAAGGCTGATTCACGCAGCTTTAATTAGCTTTCTGCA +CCTGCTCTGGGGTCTGGGTGATCAAGGCTGGACACCAGCTGCCACCCCCAACCCCTGCCT +GGTCCAGCACCTGACCTCAGGGGCCCAGCCGGCCGGGGCTGGTCAGGCAGAGCTGGGGAG +TGCCGGGGTCGGCGCAGGAAGGAGCTGAGTCCCTGGGCCCTGGCCCAACGCCCCGCCCAC +TCCGGGCCCCTCGTCAGCAGCGGCCGGCCGGTGAGCTCATCTCTTCCTGGGCTGTAAATA +TCACCCAAGTGCTCACGACTCCCAAATGTGTCCTCCAGCCGGGCCCCTCTGCTGAGCTCA +GCCTGCACAGCCCACGCGACACCCCCGCCCAGATGTCTCCTGGGCCTCGACAAAGCGGCC +GCTCCTCTCGGCGCCCCCACCTCAGGGACGGGCGCCCCGTGCCGGCTGCTCAGGCCTGGG +AGCCCTGCCTGCCGCCTCCACCCCATGGGAGCGTCCATGCATCCTGCTGAGTCCAACTTA +AAACTCGTCCACTTCCTTCCGGATCCTCTCACCCCGTCCTCAGGTCTGGTTGGACTGTGC +CCAACTCCCCACCCCTGCCACTGCACAGGCCCCCACTGTGGCCTGGAGGGGAGGGTGCAG +AGCCCTGAGGGCAGCTGGTGGTGACAGTCAGCCCCGAAGCTGGCCTGAGGGCGACAGGTG +CCTGCTTCTTAGTTAGGAGTGGGCCCAGCAGAGGTAATGAACTCCTTGTCCTGGGAGGTG +TGCAAGCTGAGGCAGGATCTGCTGTCCTTCCACAAGCCATTGAGGGCGCCAGCATCCACT +GCAGTTGGGTCACTCAGGTGGGGCTCAAGGCTCTTTGGAAATGGGTGGCTTCGGTTTCCT +GGCTCTGACTGCAAGGAGCCCCCAGCCTCAGCTGAGAGCACTAGCCCAGATCCAGCAGTT +TAGCTGCCTCTTCTCAGGCATGACAGGGGCCACTTGGGAGAAAAACTGAATAAAACCCTG +CCTATGCCAGTGCCCAGGCACAGCCTGGCAGAATCCTGGGCCTGGCTGTGGGGTCACAGA +GACTGGGGGTGCCAGGGGTGCTGATGGTAGAGCTCCAGGCACAGGACAGAGACTGCACAA +CATTTTGACATCAGGCCATCCATGGAGAGGCCTCTGCTGGGGCTGCCAAGGGTCTCCTGG +GCCCAGAGGGGCTGCAGAGGGAGCCCTGGGCAAACAGCACCCCACCCCGGCAGGGCTGCC +CTGCCTGAGCTGCCCCCAGCCTTGGGGCAAGGCTGCCGGCCCTGGATCTCAGGTGAGCAG +AGGGGCAAACTGCAGGTGGAGGTGTGCGCAGCCACACGGGGGGCCTCAGCACCTGTGGGC +ACAGGCACAGGGTCTCTCAGGAGAGACAGGACATGTGGGAGGACCCTGAGGAGGGTACAG +GCCAGACAGGACCACTGCATCCTGGCTCTGCCCTGTGCTTGATGGGAGGGGTCTGAGCCT +TGGGCCGCAGCACACGCACAGAGAAGCGGGCGGAGGAGCTGGGGCCTTGGCTGCTAAGCG +CGTCCTCCCCGCTTCTTCTCCCTCCTTCTCCTCCCCCCAGGCTTCTGGAGCCCCGGCCGC +CCCCACGACCATCTGGCTTCACCAGCAACGCTCCCCCACACCCTCCCCCACGTCACTGTT +TTTCTCCCTCTCCCCCTCCCCAGCCACTGCCCTGGCGGGGAGGGGGCCCTTTCCTGGGAA +GGCTTCCTGGCCACGTGGCCCTTCCCTTTTGTTTCTGAGTCCTCTTCGGCCCAGGCCGTG +GGAAAGCGTGCCTGTGCATATTTCTGCCTGTGCATATTTTTGCCTGGATGCGTGTGCTGC +TCTCGAGCCCCCGGTGCCTGCATGTGTGTGGACATGTGTTCCCTGCCCTGCTGTGTGTGC +ATGCGTGTATGCATACATGGGTGCAGGGACATGCAGACCCACCACGGCCTATGTTTGGGG +TGTGGGCATGCTGTAAGTGGGGGTGCCCCAGACACGCATGTGGGGTCTAGTGTGACCACC +AGCAGCGGTGTGAGGGTGCATACAGCCCCCACTGGGCCTTGAGGCCAGGCCTGACCCGCC +CAACTCTCCTTCAGGGCCTTCCTCTCCTGCCACCCACCAGGGGTAGGAACAGGGTCCCTC +ATCCTCAGCCTCCTCCTGGGGGCTGCCCGGGCAGCCTGATTGGGGTGGGGGGCCCGAGAG +CAACTGTCCCCACAGGGGCTGCTATGTCCAGACCCATGGACCTGGCATGTGCTCCATCAG +CCAGGCTCAACGACGTCAGCCACCAAGCATGCTCCATCTGTGCGGCTGGGGGGCCAGGGG +CACCTGACATTGGAGCTGCCCTTGGCCAAGGCTGTCCCACCTGTGGTGCCAGGCAACTCC +CTTCCTCTGGGGGCCTTGTGATGGGGGAGAGGCTGCCCGTCCAGCACCCACCCCTGCTTC +TCACCGCCCACAGCACCCTAGCCTGAGAGGCCACAGCTTAAGAAAAGAGCTCTTTATTCC +ACGTCGTCCGATATTTTTACACAAGTAAAATAAAATGCATATCTCTATATACCGCGATCT +GGGTGGGAGGCGGCGTTCTGGAACAAACGCTGCCGCCGAACCCTGTAAACATGATGGGGT +GGAGATGGGGGTGGCGGGCGGCAGGCGTCCATCAGGGAGGACCTGGCCTGGCCTGCCCCA +CGGGTCGGCCGCCCGGCTGGCTTGGCGGCGTGGAGAGAGACCCCGTATGTACACACACCT +GGCTGCTGAGCACGCTCTTGTGTCCGCTGGGGGTCAGCAGGGCCCAAGCACTGTCCACAG +CACCAGCGCCAGGCCCAGGGGGGTGAGGCTGCAGGTGAGGCTGGGTAGGGCACCTGAGCC +TTCTGAGTCACCAGTCCCGCCACCCCCGCTGCCTGCCTGGCCCAGACGGCAGTGGCTGCG +GGTGCGGTTCTTGCGTGAACAGCCTGGCCTCCGGCGAGGGCCCGAGGTGGGGAACCCTGG +TGGCTCGGAGCCCTCGGGCCGCACTGCAGTGAGCGGGGGCTCAGCAGAGCCAGGCAGAGT +CCCAAAGGGTGAGTCATTGATGTGCCGTGGGCCAGAGCCGTTGCCCGGCGGGCTGTCACC +GGGCGGCACGCGTCCCTTCAGCGCATTGCCTGCCGAAGCTGGTCTTCCAGGCTCCAGTAC +TGAGGCCTTGTCAGCGGCATCTGGCTGGCAGCACTTGGGAAGCCCCAGCGGCTCCTCATC +GGTGGCCCTGCCGGTCCAGATGGGATGGTAAGGGCCGGTGGCCACAGCGCAGCCCTGCAG +GTCATTGGCAGCTAGGCGTTTGAGGTCACGGCCAGCCAGGCGTTGCGGGAGGCTGCAGGG +CACCTCGGAGGAGGAGCCGCGGAACTTCTGCAGCCAGGCCCAGAGTGGGCGTGCCCGGCA +GTCACACACCCAGGGGTTGTCGTTGAGCCTCAGGTACTGCAGGGCACGCAGGGGGGCCAG +GGCCTCAGTGGGCAGCGCTGATAGATTGTTGGCAAACAGATAGAGTGTCATGAGGCGGCC +AAGGTCACGGAAGGCATGCGGGTGCACATGGGCCACGCGGTTCTGGTGCAGTAGGAGACG +GTCGAGGCTGTGCAGCCCACGGAAGGCGCGCTCGGGCACGCTGGAGATGCGGTTGCCGTG +CAGGAAGAGGTGTGTGAGGTTGCCCAGGTCGCGGAAGGTGTCATCAGGCAGTGCCTGCAG +CGCGTTGTCCTGCAGGTAGAGGTACTGCAGGGCAGCCAGGCCGCGGAACAGCCCCGGGCC +CAGCTCCTGCAGGCCGCAGCGGTCCAGGTGCAGCGTGTGTAGGCGGCCCAGGCCGTGGAA +TGTGGCAGGGTCCACAGACCGGAGCTGTGCATTATCGCTGAGGTCCAGCTGCTCCAGGAG +GGCCAGGCCAGTGAAGGCAGCCGCATCAATTCGGGCCAGCACATTCGAGTGCAGCCACAG +GATGGTGAGGTTGCGGCAGGCACGGAAGCTGGCAGCTGGCACATGCGAGATGCGGTTGCC +GTGCAGGAAGATGCGCTGGCTGGCAGCAGGGATGCCCACGGGCACAGCCTGCAGGCCCTG +CTGGGGGCAGCTTGTCGTCACCTTGGGCTCATTGTAGCATACGCAGGCACCTGGGCATGG +GGCTGCCACCTGCCAGGCCTGCAGCCACAGCACCCATGCCAGCAGCCGGCTCCCTGTGGG +CAGAAGACAGAGTGGTTAGCAGGAAGGGCAGGGGTACTGGAGAAGCTGGAGGTTTTGCTA +GAGCCAGGTGGGCCCCGGAGATTGGGTCTCAGGAGGACCTGGGGCTGGGTCCAAGATGTG +GCCACCACCCCCGGGAATCGCAGTGTTGGGATCCCTGCTTCACAGGCCTCATGGACGATG +CTGGGCTAGAACATGCCTTGGAGCCATGCACTCCACCCTGGAAGCCACATCTGCAGACTC +CCCATACCACACCCAGGAGCAGCCCACTGGGTCACCAGGGGCACTCACTTGAAGGCCTCC +TGGAATGGACACTGGTGACTGCAGGTACCTCCTCAGGACTCCAGTGAACACAGAGCCCGC +ATTGCAGGAGTCCCCGGGGGGTCCCAGTGGGGGTTCCACCCACAGGGCCTAGGAAACACT +GCAATGGGGCTGGGGTCCCTCTGGGCACGTCTATCTCAAAAACGGGCGCCTGGATGCCCG +AGGGCGGGGGTGGAGGGCAAGTGTGTCCAGGGCTTAGGAGGAGTCGCGTCCCACAACCAC +CCAGGCAGCCTGCTCCCTCCTCCACACATCCTCCTTTCAAGCACTTCCTGACTTGGCAGG +AACCCTCCTGAAGCCTCCGCCCCCTGGCCTCATTCTCTGCTCCATCTCCTCCCTGGGAGG +AGTTCACTCCTCCCCAGCCAACCCCTGCAGTGCCAGCTGCCTGCACCCCTCCCACCCAGA +CGCCATCAGCAGGGATAGCCTCCCAGTGGGTTCTCCCCCAGCCCCAGGTGCCTGCAACCC +CTCCTCCATTACAGGGCCAGATACACCTGGGGCTTCACCAAGGCCCTACATGGGCAGTAG +TCCCTCCCGGGGTGCCGCATCCAGAGCTGGTCTCACCAGCGGGGTCACCCACTCCGGAGT +AAGCTTTCCTGCCTCTCATTCCACCATTGAGCCCCCACCCCTTTGGAATGCTGAAGATTC +TTTCTGCCTTCAGCAGTGAGTCTATTAAAATGTGAATCCCCCTGGGAAGGTTATGGCTTA +TCAGCTAACACTTGAGTTATCAGCTAACACCTGCGTTGACAGCTAACACCTTCCACGGCA +GCTGGCTAAGAGCAGGATTCTGCAGACATGAGGCCGGCTAAGTGCTGGGGTTGGAGCCGG +GCAGGAGGAAGGAGTGGACCACAGGCCAGCTTCCCCAACCTAGAACTGTGACCCTGGGGC +CCTCCTACCCACATGGGGCCCCTGCCCCACTGGGTCCTCTCTACTTGGGCAGACAGTGCA +GTCGCCCTAAACGCAGAGCCCAGCAAGCCCAGCACGCGCAGTGACCGCTGGGCATGTGCG +TCCTCCCAGACTCAGGTTCATAGGTCCTCAGACCCCCTCCCCACAGCCTGGGGGCCTCCT +CTGCCAAACCCCCTGCCCCAGGGCCATTTGTTGGCTACCAGGGAGATGGCAGCCCAGGGA +TGGGGGTCCGGAGGCCCCTCCCTGGGTGGGCTGGGTGAGGAAAGCACCTGGCCTCGTGTT +CTGTTTAAACTGCGGACACCCTCTCTGCTCTCTTTCTACACACTCTTCCTAGGAAGGTGG +GGCCTGACCATGTCAGCTCCCCACTGCTGTCTCACCTGCTGTCACCAGAGGGAAAACTGT +GCTATGTAAATGGCACTTCCTGTCCTGGGGCCAGGTGGGGTGGAGGCCCCACATGCTCAT +GGGGCAGGGTCAGGACTCACTGCCACATTATCGACCACACCACACCACTGACTGTGGGGG +TCACCAACTGAGCCTTGTCACCAACCATACCATGCGGCTGACTGCACCCACCAAGACTAG +CCACCCATGGTGGCCACGCCCGCCACATTACCACTGAGCCATGCCACCATCCCACCACTG +TCCTGTGGCTCCAGGCCCGAAGCTGTGGGCTGTGAGTCTGGCTGTGGTCAGGGCTCAGCT +CACTCGCCTTGAACCGAGCCCTGTGCTCATGGATCTCCCTCCCGAAGCAGCTGTGTGTCC +ACTCTAAACATGCCCCCAGTGAGGTGCCAGAACTGAGCCACACGGTGGTGCGGGGTCGGG +GGAGACAGGGGAGGACATGGGGGAGTGGCCACGTGGCGCTCAGTTCTGCCAGGAAGTGGG +TATTGGGCGCAAAGACACGGCTCGGTTCCTGCCGAGACAACTGCTTTTCGTAAACCAACT +TCTGAGAGTCGGGTGAGGTGGGCTCTCTCCTCGCTGCAGCCATGGTAATTGGCTCCAGCC +TGGCCCCCCTTGGGGCCTGCCCGCTGGTCTCCATGGCAACCCCTCTGCTGAGCGAGCAGG +AGGGGGCATCTTTTTATGAGATAATCAAACCCAACTTTGCAGCATGGGTTTGTCTTCACA +GCCGGGGCCCTGGGATGAGGGAGACGCAGCCAGGTGGGGGGAGGGTGTCCTCTGACCCCA +TCCCAGGGGCCAGACTGGTGACTCACCAGCCTCCTCCCTGGCCCCATGGTGGACTGCAGC +CCCTCCCCACCCCAAAAGACCTCCCTGGCCCGTGGCCCCTGACCTGTCCCCTGGGCTCTG +CAGCCTCAAGGCCACAAGGCCTCTCTGGCCCTCTCGCCCACCTTTCTCTCTCCTCTATCT +GCCTGGCCAACTCTAATAGGATGCTCTACTGTCCTTCTGGAGATAGCATGGCCTCCCTGC +CTGCCATCACCCCACTTTCCACTCTCACCCAGTCTCACCCTGGCCCCAGTTCTCACGGAG +CCTCCAGTATGAGCTCTGTCCTTCCTGCCCCTTGCACACGGGGTGAGCAACCAGCCCTGC +CATGGCCCCAGCTCTCACAGAGAAACCATGGATAGCGTGCCTGTCCACCGTCCCCACCCA +GGTTGGCCCTGCGGCCACCTGAGCCTGCGAGGGCCCCAGAGGGCTCTCCTCAGCCTCCAA +GCTGCACACAAACTGGTACATCTGTGCCTCTCCCACCCTCCCAGGCCAATTAACATCTCT +TCTTTCCTCCAGGGTCTCCTCCTGGGACCTCCCCGGAGGATGGCCCTGCTGGCTGTCAGG +GCTGGGTCATAGCTAGGAGCTCCTGTCTCAGCCCAGGCCTGGCTCAGGAGCCACAGTCAG +GGGAGGGCTGGCTCTCCTGCCTGGAAGGCTCTGAGCTTTCCCAGCGGGGACAGTGACCGG +ATACTGACCCAGAGCCTCTCCCTGCGGCAGCCAGACTGGGCAGGGTAGGACCCAGGATCA +CCAGGCTGAAGGCCCAGGAGGCTGGGCAGCAAACAGTCCAGGACGCATCCCCCTGACGGG +AGCAACAGGCAGCCCAGGTGGGCACCTGCACCATCAGTTCTGGTGAGTGGAGGGCTGTTC +ACGTTCACTGCAGGAGGGAGTGTGGCGGGGGGAGGAGGAGGTGGGAGGAGAGCGAGGAGG +GCAGGAGCAGAGGGGAGGGGTGGAGAAGGGGGATGGGGAGGTGGCGGTGGGAGGCTCTGG +GGATATGCCCAGCCATTGGGCAGGCAGGCGGCTGCTGAGGTCAGCAGCTCACCCCAGCAG +CCCAGGGAGCTGAGCCGAGCCGAGCAGGGATGGCCTTCTTCATGTGGTCCCTGGCCCCCA +CTGTTCCTGGGGCCCCGCCCACCTCCGTGCCCTCCCAAGCTTGGGCTGAAGTTCCACAGG +GATGCCTTTGCCTCTGCCCAGCGATGCCAGGAGGCAGCACAGGGCAGGCAGCTGTGGAGG +CGCCGAGCACCGGCTTTCATCCACGCACAGCTTGGTGCCCTCGGGCCCCAATGCCGCCCC +TGGGCTGGGATCACCGGAGGGTCCCATCACCTGCTATAAAAAGTGAGACAGCAATACATT +TACAGATTAGGAAACTGAGGCAGTGAGTGGTTAAGCCCTGCCCTGCTCCCACTCCAGACT +ACCCAGGTGGAGAGGCGGCCAAGGGAGACACTCACAGTGCTGCCCGGAGGGGCCACGAGG +CATGTCCTGCCCAAGCAGGAAGGCTGGCGGCCTGGCTAGTAATGGGGGAGGGTGTGGAGG +GGCTGGGCTGCAGGCCACGTGCAGAGGGGCAGGCAGCGCCCTCCTGTGTCATGGGTGGCC +GGGAGGGGAATCTCCAGGGAGGCTCTGAGCTGCTGCTTCCCAGGGCTGGGGTAAGAGGCT +GCAGAGGGCTGGGGCTGGGGGTGGGACCAGCTGTAGCACAGCCGGGACATCAGTCAGCTC +AGGATCCTCTCGGACACCAGCCATGTCCCCGTCAGGCGGTCACGTCACAGACCACAGTTA +TGAGGCAGAACAGGAGGCCACAGCTCAGTGCACCAAGGCCAGAAATAGCTCCAGGCGGGG +GTGGCGGGCCTATGTCATCCCCACACCACAGAGGGTCAGGGCACCCCTCCCACGCCTGCT +CCCCACTCCTGGCCCTCCACCACTGACTCACTCCCCCCGCCCCCCACCCATCTGTCCCCC +GATCCTCGCTTGCTTCTCACATGTACACCCACCCGCCATCCATCCTCAGGCCCACAGACA +CGTGCAGGGGCAGCCAGAATTCCCATGGTGTTTCCTGTGCCCTTCCAGGGCTCCAGGTGC +TGCGGCAGGAGATTGGCAGCAGCTTCTGGTGGGGGCAGTCTGCCCTTGAGGGGTCCTGAG +TCTCCTAGGGAAGTGAGTACCAGCCCCTGATGCCCTGGCCCTGGCCCCTGCTCCTTGTTG +ATCTTGGCTGCAGGGGCAGGGTAGGCAGACCAGGTCAATAACAGGCAAGGACACCTGCCA +TGGTGAGAGGTCACTGGACCCAGACTGAAGCAGCTCCAGGCCTAGCTAGCTACATGCTGT +GGGGGAAACTGAGGCCCAGTGTGGGCAGAGCCTTCAGGACTGGCCTCCTGAGTGTGCCTA +GTGCTTCCTCAGCCCCCTCGGCTCCACTCTTTGCCCTGGCCGGCAGGGCTCGTCCCTGCC +CCGTGATGCTGGGCTCACCAGAGACAACGAGATGTAGGTAGGTCCAGGGCCAGCAGGCCA +GGCCAGAGGCTGGGCCCTTCAATCCAGCAAATTCTCTTGGAGGGGGTGTCAAGTTTATTG +ACCCATTTTACAAATGAGGAACATAAGAACTGGTGTGGGTATAATCAATAGCAAACACAA +TCCAAGGTCAAATCCAACCATATGCTACTTACAAGAAACATGCCTAAAAGTACACAAAAT +CTTGCAATTAAAGATATTATCAGAAAAATAATAACAAGTAAAGCAGGAGAAGCCACAGCC +CCAGGGCTAGCGGGCTTCCCAGTCTCATGGCCCCAGGGGTGCGAAAAGGTCTTGCTGGGC +AGTAAAGGCTCAGCCACCCCCGCTCCTAACCTGAGCTCCACAGTCCTGTTTCCAGCCAGC +AGGGCACCTGAGAGGGTACACCTTTCAAGTCACAGACACTGAAGCTAGCAGAGGATGGCC +TGGCCCAGGTGAGAGCTGGCAGCCCTGGATGGCACCCTCACCCCAAGGACACATCAACGT +GGGCCACACCGTGCAGGATCATCCACTCGCTTGGCCCTCCCTCCGTACCTTGGCAAGGTG +TGAAGTGGCCCCTGTCCCAGGTGCTGTGCTGAGTGCCGCCCATACAAGACCAGGCCGAGG +AGCCGGGTCCCAGGCACTCACGGTCCAGGGCCAGCTGACCACGGGCAGCCACCCTGCTGC +ATGGCAGGTGCCCTATGGGAGAGTTAGGAGCCACAGAGGCCTGAGCCAGCCCCAGGCAGC +ATGAGGATGTTGGCAGGAGCCCAGGAGCCCACTACCAGCCAAAGGGTCTTGGCAGAGCCC +CAAGTGGGCAGGATCCTTGGCCTAGCACCTAGGCCTCGAGGATTCATCTTTGGTCAATGC +TGGGCTGCCTGCGTGGAGGGCTCCAGAACCTTGCCCCATCCAGCCCATACACCCCTCACT +GGAGGTGCACCCTCCTCGGCTCCCACCCCAGTGAGCCCCACACTGGGGGCCCCTGGAGGC +TCTGCCCAGTGGAGGCCTCGCCCAGCCCAGGGCACAGCCAGTGCCCCCTTGCCCCACAGG +AGGCTTGAGCCCACCTGCTCCTCCATGCTCCACCAGGAGCCAACAAACACCTGCTGCCTA +ATTATCTGGAGAATCCTGTCTCCACCAAGGGCTCATCTTGGTCCTGCAGCTAATTCAATC +CAGCAGCCAGAGCAAGCTGAGTCCCCAGAGGACACTAATGAGCAAGTGACCGGTGGCCGG +CCTGCTCCCACCCTGGGGACGCACTCTGAGCCAGGTCTGGGCCCAGCACTGCCATACACC +TGCCTCCCATGCCCATCTGCCTGCCTGACCCTGCCCGCTACCTGGTGCCTAGCCCTGCAG +GGCCCACATCCACTCCCATGGGCTAGCATCCAGACCTAAGCCCCAGCCCTCCCACACTCC +CCCAGGACCCTGGATCTGCTGCATGCACAAGGCTGACCACTGCCTGTGAGCCCAGAGTGT +CGGCCTTGACAGGGCAGGAGGAAGTACAAGAGCACATGCTCAGGGTGCACGAGCCAGGAG +CACAGACTCAGTGCATGCAATGGGACCCCTGGGGGTGCACACGATAGGCGTGCATACCGG +AAACACACAATAAGAGCCCCCAGGGCTGGTCAGGGCTCAGGCAGAGTCCACTGAGCAGTA +GAGGGTTGCTGGGATGCGAGCCCCTCCCAGGAGCCAGGGCCCGGCTGGAGGGTCTGCATG +CCGCTGCGGCAGAGGAAGATGTGTAGGCACATCCCCATGTGTTCTCTTGCCCTCAGCATT +TGTAAAACATGATCAACCAGGCAGGCGAGCGGCCTCTGTCCGAGCAGAATTTATTCTGCT +GTGAAAAGACACGTGTGGGTCCTGAGGCTCTTGCCGGCTTGTTCTCAGCAGCCTTGGATC +AGCCTCAGTCAGCAGAACAGCAGCTGGACAGGCTCCCATTGGAGCCATCCCAGGGACTGC +CCAGCCCTTCACATTAGAAACTGAGCTGGGGGCAGCAGCTGGCCCACGGCACATAAGCAG +TGCCTGCTAAGACCACAGAGAAGCTCAGCCAGGCAGGGCCAGTGGGATCCAGCACTCCCC +AGGGGATGAGGAATCTAAGAGGCAGGAGAGGATCCTCCTCAGCCTGGGATGTTCCCATTG +GAATTCCCCCAGCCAAGGCCAGAGCTCCGTGTCCGGGGGCCAGAGTGGAACCCTTGTCAG +AGCCGACAGGCCTTGGCCTCCTATTCTACTGGCACCTGGCAAACTGGCCCAAGGGCTGGC +TGCTGCTTCTGTAAAGGATGTGCTATTGGCAGCCAGCCACACCTATGTGTTCACGCATTG +TCCATGGCTGCTTTCCCGCAACAACAGCAGAGCTGAGTTGTTATGACACACTGTGTGGCC +CACAAAGCTGGAATTATTTAGTATCACACCATTCACACAAACAGTTTGCCGACCTGGTTT +GTGAGCAAGCCGAGTGTGGGTGCTGCCACCTTCACCCTCACAATGGCCCCAGCAGTTCAC +TCAGACCCCCACTAATGAAAAGGAAACCAAGGGTCAGAGAAGACCACGGTCCTGCTGAAG +TCACATAGCCAGGTGAACAGAGCCCAGCAACCTCAGTGCAGGCTCAGCTCTGTGGAGACC +TGGGCAAGGTCAGCATCTCACGGAGGCCTAGGGTGAGGGGTTCTAGGCAAGAAAGAGACT +CAGAAATACTGAAATACTCTGCAGGTCAAAACAGAAATGATCATTTTTCTAAAGAGGCTG +TCAGGAGTCTCACAGTCAAATATGCATAAAATAGGACAACCTAACACAAATAAGTTGGGA +GGGGTGGCTCACACCTCAGCCTCCTGAGTAGCTGGGATTACACACACACACATGCATGCA +CACACACACACACACATACACACAAAGAGAACAAAATGGTTGTAAGGAGAGGGGGACAGC +AGAGGAAGTGCTGAATGACACAGTCCGGCTTCGGAGGAAAGGACTTGGGTGGCACTTCAG +AGGAGACAAATCCAGTGCAGGTAGTTTTGAAGAGGGCAGCCGTTTAACTGAGGTAGGGGG +TCTCTGGGGACAGGGCCCCTCCATGCCAGTCCACCCACCTTGGAACCCTGCAGAGGCTGG +GCCGAGGCCTCTCCCTGCACCTTGTATCAGGGACTCTGGCCAGGTCACACCAAGGTGGCT +TATGACCCATTAACATCAAGCTGCCTCCCTGGGCTGGACAGGGCCTGGAAAGAGGTGACC +TGGGCAGATCCGTGTCGATGTACTTCCCAGGACATCAGGATTCCAGCCTCTGCTAAAGGG +ATCTTCACAGTTAATAACAAAGAAATATTAAAGATGAGAACACTTGGTGCAGGAAACAAG +AGAGGAAAGGGAGTGCGGTCCCTCCTTGCAAACAGGATGCCCTGGACCACCCTCCACGGC +CTCAGGCCTGGATGCTCCTGCAGACCCATAGCCAAGATTGTGAAGGAAAAGTAGGCTGTG +GAGGCATTTTCTGTCCTTGTTATCATCATTGTAAACATACTCATCATCACCATCACCATC +CTCATCACCATCACCACCACTATCACCACCATAATCATCAGCAGCAGCATCATCACCATC +ATCACCACCACTATCATCATTACCATCCTCATTACCATCACTATCACCACCATCATCTTC +ATCACCATCCTCATCACCATCACCATCCTCAACACCATCCTCAACACCATCATCACCATC +ACTATCATCATCACCATCACCATCCTCAACACCATCATCACCATCACTATCACCACCACT +ATCATCCTCATCACCATCCTCATCACCATCACCAGCCTCAACACCATCATCACCAACACT +ATCACCATCATTATCATCATCACCATCCTCATCACCCTCACCATCCTCATCACCATCATC +ACTATCACTATCACCACCATCATCATCACCACTATCCTTATCACCATCATCATCCCCAGG +CGACCAGAGCAGCACCATCCTTATCCTCATCACTATCACCATCACCATCACCATCATCAC +CATCCTCATCCTCATCACCATCATCCCATCACTATCATCACCATCCTCATCACAATCACT +ATCATCACCATCACCATCCTCATCATCACCATCGTCCTCATCACCATCACCGCCATCAGC +ATCCTCATCACTATCATCATCACCATCCTCATCCCTGTCACCATCATCGTCTTCATCATC +ATCACCGTCATCATCACTATCATCACACCAGCTCTGTGCAAGGCCCTTAGATAAAGGGCA +GCCTGGGGCTGACCTCACACAAGGATAGGAAGCCCCAGGTGAGCAATGGGTCAGTACTGG +GTCGACACCAGATATCCCTGGGCAGAGGCCCTGGCCCAAGGATGGGGAAGGGAAGAGGAC +AAGGGATGGGGAGTAAGAAATGAGGGCCCCTCGCCTCCCTGGGACAGCCCAAGGTGCAGC +CTTTCCTCATTCCTGGAGGTTTCAGGATATCCCTGTGCTGCATCCTCCCATTGTACGGTG +AGGGGGTGACTGCCAACCCACTCCTCAGATGAGGCACTGACTGAGGCAAGTGCCTTGCTG +GCCACCTTGCAAGGCCAGGAGACAGTGCACACAATCTGGACTCCAAGATCTCTGCCCACC +CCCATCTTCATCACCACTATCCTTATCACCATCATCATCCCCAGGCGACCAGAGCAGACC +CTGAGCCCCAGGGATGACACAGCAGGGAGGGGCAGCCACTCTGGGCTCCCCAGTCCCCCA +TGGCACCTCAACCTCAGCCCAGGAGGCTGCCCTCCCAGGGATGGCTGAAAGTGGGCCCTG +GGGTCTTGCAGCCCTGGGCTCAGATGTAGCCTCTGCACCCCCTCACAGAGTAGGCAGAGA +ATCTGCCAGCCCCCTCCAAATCTGTCCCCACACCAGTGATGTGAGGACAATGAGAGGCCA +GCACCTACCTCCCCCCAGGACCCCAAAGAAACCCCACCCAGGCCCCCACCCAGCATCCTG +CCTCCCCAGAGGCCTGCCAATGTGACCTGGGTCAGCCTCGTCTCAGGTGAGGAGTCCAGG +GAGGCAAGGTACAGCTCACAGCGTCCCAGAGCCACACGGACCCCTCTGCTCCAGCGGTCT +TGCAAGGCCACTCTGGCCTGGCCTGGCAGTAGCCCTGCTTTATTGAGGAGATATGGCAGG +CAGGATCCTGGCTGCATCTGGGCTGGAAGAGGGGGCAGCAGCCAGGCTGTGCTACAAACC +CACCTGGAGCCACTTCCTCGCTGCTTCTGCCCATCTCAGCCTCAGCAGAGAAAGAAGGCT +TGAGGACAGGCCTGGTGCAGCCTGACTCATCTCATGAGAGACCCGGCCTGTGCTCAGAGT +CGAGTTGCTGTGGCTTCTCATGCAACACTGATGGTCCCCCTCCCCTGCTCACCCACCATG +TGTTAGACACGGGGATCTGCATGAAAGACACGTGTTCCTCCCCTTCCAGAGACCCACCGA +AATGAGGGTGATGCAGAGTGAGGAGTGGGGAGCACAGTGGCAGGCTGCTCCCCTGGAGAA +ACACCCAGAGAAAGGGGTTCCACCACTGGTATCCAGGAGCACCCAGGGCCGATGGGCAGA +AGAAAAGCTCCATCTCAGCAGAGCAGCCAACCAGCTTCCCTCATAAAAGTGACCAGAAAA +CCAGTGACCACCAGGCATCCGGAAAACATATCAATGCAGAAAAAGAGGGAGAATGATGCT +AGAGGAAGCCGACAGTGTGGGAAAAAGACAAAAATTAGGAGAAACATCTAATTTGTGTCT +TCCTAGGCATAGATGGGGATTTAAAACAAGAACAGGCTGCTATGAACAAGGAGGCATCAG +ACAAAAAGAAAGAGTGCCTGGAAATTAAAAATAGAATCACTAAAATTAAAAAAAAAATCA +ATACAAGTGTTAGAAAACAAGGTCAAAGAAATCTTCCAGAAAGAAAAACAAAGGAGAAAG +ACAAATGAATGAGATTTGGGAGACAAATATAGGTTGAACATCCAAGGGGAGTTCCAGAAG +AGAAAACAAAAAAACTGAGAAAAGCTAGCAAAGAAACAATACCAGAAAACTTCCCAAAGC +TGAGTACCCAGTCCTCCACTGGGCATTCACAGTGCACTTTCAGGCCCCCAGGGGTCAAGG +GAAGTCTGAAGCTCCAGGAGGAGAAGCTGGTCACAAAGAGGCAACACGAAGCCATGAGCA +GCAGTGCACGCCGGTAATCCCAGCACTTTGGGAGACTGAGGTGGGAGGACTGCTTGAGGC +CAGGAGTTTGAGACCAGCCTGGGTAACACAGTAAGACCCTGTCTCTACAAAAAATTAAAA +AGTTAGCCGGGTGTGGTGATGCATGCCTGTAATCCCAGCTACTTGGGAGGTTGAGGTGGA +AGGATTGCTTGAGCCCAGAAGTTTGAGGCTGCAGTGAGCCATGATCATGCCACTGCACTC +CAGCCTGGGTGACAGAGTGAGACTTTCTCCAAAAAAACACACAACAAAGAAGATGCACTA +TGGTATTGGCCTCCTTCATAGCACACTAGATGCCAGGAAACAACAACACTGCAACAGCTT +CATAGCTCTGAGGGGAAACTAATTTCAACATGGAATTCTAGACCGAAACAATCCAGCTTG +AACATACTTGTAAATACAGGAAGACTTCCGGAATGATTCTTCACCATATCTTCTTTAGAG +AGTGAGTCAGGCTTATGTTTCAGCCAAAATGAGGTGCAAACCCAGAAAGAGGAAAACGTG +GGGCCCAGGAAGCATTAACTGAAATTTCAACAGGTGGTAAAGGGAAGTTATGGGGCAGCA +GGACAAGTGCTTGGGGTGAATCTTTATCCAGGGAAAAAAATGAGACCCCAATAGAAGCCC +TGATGTGGGGAAGAGTAAGCCCATGAGGGTGGTGGATGGGTAAAAAACATCAACTGTAAG +CCCCAAGGAAAACAAAAGGCCATGAAAGAGAAGGAAGGTCACCCATCCCACGGCAGGATT +GAGAAGAACATTTATGGGACCTGGGTTGGGAAACACTGGCTGTGAATTTAACTGATGGCA +GCGAGACAGCCATGCTGGGGAGTGTGCTGGAGGAGACAGAGCTGGAGTAGAAGCTGAGGA +AATGTGGCGGCAACCACTGGCAAAGCCAGGGAACAGAGGCGACAGTGGCTGCCTCCCAGG +AGGACGCGGGGCAGGGGCAGATTGAGCCCATTCTCCAGACCAGCCTCCGAGGCTGGGTGC +AGGTGTTAACTCTGCAATATGTTGTTGACCGAATGTTATTATTTTTTAAAACAACTGCAT +ATGTGGCACTAAAATCTCAGAAAACAGATAAATAAAAAGAAAAAAAAGAAAGTGTATCTT +CCAGAAGGTGGAGACAAATGTAGGTTAGAAGCAGGTGTCTCCAAGGTGCAGGAGGAGAGC +GAGGGACAGTGAGAGCTCAAGAGCACCTGGACATACAGCCCCAGGGCAGCTGGCATCTGG +CCAGGGAGGATCCACATGCTGCTTTTTGGTCCCCTGCTCGGCCAAGGGTGGCTGAGGATA +CATCTGGGTCTCCCAGGACACTATGGGCTCTTGGCCCCAGAGACTCTGCAATCCAGACAC +CCGAAGGGGCAGCTTCTGGCCTAAAAGCCCAACTCTGTGCTTGGGCCTTCCCTGCAGGCC +AGGCAGCTGCTTCCCTGAACCAGCCTCAGAGTTCCTGCCTGCCCACCCAGCCCAGCCCTC +ACACTGTGATGAAGCCCCACCCCTGGATTCCCCTTACCCCTGCCCGCTCCTGTCCCCTCC +TTGTCACAGATCTGAGCCCCCCACTCCAGGCTGGGGCCCCTCAGGAGCCGGGGGTAGGGG +TGTGCGGTCAACACCCAGCAGGGGTCGCCTAGCACTGGGGTGGGTGTGCAGCCCCCAGCC +TCCTTTCTGACCGTGCACACGGGCTTCTCTCAGCTGCTATGAGAGCCCCCAATGCGTCTC +CACAACCAAACCGAGGCACGCACGCAGCGGCGACGTGAATAAGTAATTGCTCTTTTATTA +ACAAGTGATAAATTATTCCTATATGATTGTGTGTGATAACTCTTCCTTATTTGAAATCAA +TTTGGGATGGAAACTGTTGTCGGCTTCTCATCCACGAGGGTGGGAGGCCCAGGTGCCGCT +GCCTCTTGGAAAACGAGGTCAGTGTATTTCAGCTACTTCCTGAGGCTGATGCGCCGCCCC +ACTGCGCAGGGCCAGTGGAAGGGGGTCAGCTGCCCTCCGGGAGGCGGGGCAGTCCGGCCT +TATTTCCTCAGCATCTCTGTCTGGGACCCACGCCACTGAGCCCAGGGCACAGAGGGGTCC +CTCAAGTCCAAGGCCACACTGGGCGTCACCACAGGGTCATCAGGGGTCAGGGGGCCATGC +ACAGGCCATACTGCATTCATCCCCCCATGTAAGCCCCAAAGACTGGGCAGCCTGACCTTT +CATCCCACAGAGGACACTCCAGCACCCAGCTGGTCTCCTGGCCTGCCCGGGCCACCCCAC +GGTGGCTTCTCAAACCTGCAAAGGCATAAGCCATTCTCCTAAAATTGTCCTGATGATTTT +ACAACAGCCCTGGCCCCATCCCCTTTTCTTCCTCCCCCAGCCTCACAGAGGAGGGTTCAG +GGACTCAGCCTGTTCAGGGCAGCCCCTGGGCCAGGCCTCAGCACCTCTTCTGTTAATGCA +TATTTAAAACCTTGCAACCCACCGGGCAGCCAGCCAGCCGGTGCCTCCCTCCCAGGCCAG +GCCTGCAGGAAGCCATGCCCCGTCCCGCTCACTCCTCCACTGGCTGCTTCTCCGACCCCG +ATCATGGCTGCAGCAGGCCTGGAGGCCCGGTCACCCCCAGGCCCAGTCACATGGCCCTAC +TCAGGCACCACCTGTGCTTCTTGGCTGGGTTTCTCATCTGAGAAATGGGAAGGCTGGGTC +TGGGTGAGGCACTTCTGTGGGCCCACTCCCACGGTCTCCTTGACACCCCTCCACCTTCTG +CCCACTGCCAGGGTGCCCATTCCACTCCACACCTCGCTGGCCCACTTAGCAGGCTTCTGT +GTAGCTTGTTTCTGTCGGTTGAGGCACCCTGGGCTTTGGGCTCCACAGGGGCTGGTGACC +CGCATACAGGAGGTGCTCCATAAATGTTAGTCATGTGAATGCACTAAATGCTCACCACTA +AGGGCAAGTGGCAGGCTTTGTGCAGCTATAGAGACAGCATTCCCCAGGATGTCCAGCCCT +GGGAGAGAGCCTTGGCCCACAGCCCATGAGGCAACGGATGTTCCTGGGTTGGGGGCATGG +CTCCAGCACCCCAGTGCCAAGGGCACGGTCTCTAAGTAGCAACATGGATAGCTTTACTTC +TGTTACAGCTCCATGTCTGCGTCTCCCCCAGATTCACGTGTCGAAGCCCTAACCCCCAGT +GTGATGGTATTTGGAGGCGGGGCCTTTGGGAGGTGATTACGGTTAGATAAGGTCACAAGG +CTGGGGCCCTCACCATGGCATTAGTGGCCTGGAAGAGAAAGAGACCTCAGAGAGTACATG +CCCTCCTCTCTCTCCACCCTTCCATCCCCTACCACGTGAGGGCACGGTAAGGAGGTGGCT +ACCTGCAAGCCAGGAAGGGAACTTCCACCAGAACCAGCCCATGCTGGCACCCTGATCTCA +GACTTCCAGCCTCCAGAACTGTCAGGAAATGTGCTTCCGTTGTTTAGGACACCTTGTCTA +TCGTATTTTGTCATGGCAGCTTGATCAGACTAATACAACTTCTTTCTTAAAACTTTGGTG +TATTTTTCCAGATTTTCTACAATGAACATGACCTCATAAAATCAAGAGTGTTTTTCGTTT +GTTCTGTTTGGTCTTATGCTGACACTGGGCCAGGTGGAAGGGTGGGTGAGTCCGTCTTGC +AGGTGGCAGGAGCCCCCTCCATGAGCCCCCACACCCCAGGGTCCCTGCCCAACGTGATCG +GCCCTGCAGGGCAGTCCTGGAACCAGTTCAGGGCTTGGGGCCCTGGCTCTGCCTAGGGAG +GTCTCAGGCAGGCCCCCCAACGTCTCAGAACTGCTTCCCTCAGTTGGAAATGGAGATAAT +AAGGCCCACCTCATGGCACCCACAAAGAATGGGGCAGCCTGCACCTGGCCTCTCCCTGGA +GGACCTTCTCACATCACCCCCGGAGGGACCACCATCCCCACCCCACAGAGGAGAAAGCCC +AGCTCCAAGGCCAAGTTGCAGCCAGGAAGTGCTCTGCTGGCATCCTACCTGGCCCTGACG +CCAGGCCTGAAGCCAGCACCACCCGGGGCTCTGTTGCTCCAAGGCCATTGCCCCTGCTGG +CGTTTGCTCAAGGAGCTCCCCCTCCAGAATGCCCTGCCCTATTCCCAGACACACTTCCTC +TGGGGGGCTCTTCTCCCCCAGCCCCACCCCTGCCACCCCTAAGGAGGCTGGCCCCTTCCC +AATCCCTTCCTCGTCTGCACCACTGCCTTGCCCAAGGTGTCCATCCTCAGACCCAATGGG +GCCACCATGGGAGCCCGGCTGGGTAATGGCCCATAAGCTCCTGCCCATGGAGTACTCAGG +CAGGGAGGGGCACAAGCTTGGCTGAGCATGAGTGACAGGCTGGGTGGGTCTCAACCTCAC +CCTGGGCCTGGGGTAGCCCCTTGCAAGTGAGGATGTTGAACACGGTGGACCCCCAGGCAG +AAAGTGAGCCACGGGGGGCTATGCAGGACTGCAGCCCTCCCCTCCCCTCAATTCACTGGT +GAGTAGCCTGCTACTCCCCTGGCTCCCAGGGCAGGGCCCTCATCACCTCGAGACCCAGAA +GCAGTGATGGAGACCGTCCTGCAACATCAAGCTCCCTGTGATCTGGGGGCCAGGGGATCC +TGAGTACCCTGCCTCCTGGAGTGCTCTGCCTGCAGGGGGCAGTAGCAGCACCTTCAGGTG +GGGTGGTTGGGGTGGCTTCCCGGAGGAAGTGATAACTCAGCTAGGAGTGGGGCGTGGGGG +TGGTCCTGACATGTACCAAGGCTGGGGGTCAAGTGTGCACTGTCTCTCAGCCAGGAGGGT +GGAGGCGCTGGACCAAGGCTGGGGGTCAAGTGTGCACCGTCTCTCAGCCAGGAGGGTAGA +GGCACTGGACCACTGGGGCCTCAGGTGTCCAGGTCACTTGTCCCTGGAGTCCCTGGCTAT +GACAGAGGCCCTGGTGGGCAGCTCGGGGTTGGGGACAGGCCAAGGCATAGGGGAGGGCAC +AAAGGGAGGTTGGTGCCCCACCCATCACCCATCATCCGCCTCAGGCCACCCCAGGGGACC +GGCCTCCGCTCCGGCAGCGGGTGAAGAATGGACTCCATTAGGGCCGCCATCTGTCTCAGC +CATGACGGGTCCTCGCGGTGACATCCGGGAGGTATGTAGATGGAATATCTTTATGGGGAG +AGTTTTTCTTTTGCCAGTTTTCAAATAGGAAGCCAGACACTTGTCACACCTGTCCCTGTG +TCAGCCTCTGCCTGACATAAATTGGGGCCGAGGCAGGGAGATGAAAGACAGCCTGGAGGC +AGCCGGGTGCGCAGGGCCCATGTGTCAGGCCGAGGGAGCACCAGAGCTGCAGGCAGCCTC +CTGCTGGGGCCAAGGAGAGCTCGCCCCGCAGTCCACCTCCGCAGCCTCTGCCCCTCCCTC +CACTCCCCTCTGGACCCACAAGTTCTTCACCTGCCCTCCCCCAACCCAGAGCCCTCCAAG +CATCTCACCCGGGCAGTGGCCTGTCCTGCAGCCACTGGGCTTCTGTTAGGGGACACTGGA +GCCTCCTCAGCCCCCAGGCCAGCAGCTCCAGCTCCTCTTCCAGACCTGGGGCCAGTGTGG +CTTTCATGAGCGGTCAATGTGGGAGGCTCAGAGTCCTCAACCTCACCCCCGCTGCCACTC +CTAGGGACCTTGTGGGCTTCAGCATGTTTCCCTGGAATCTGGGGGACTTGGGGGTAGAGC +ATGTCAAGAACCTCAAGGTCCCTGCCGCCCAGCATGGGACTATCTTAGAACTGATGCCAG +GAAGAATTTGCTAAGAGAATGAAGGGAGAAGCAAGTGACCCCTGTGGGCCTCCCCTGCAT +CGGGGACTGGACCAGGGCCCACTGGGAGTGCATCTTGGTGCCTGAAACTGCACCAAGGAT +GAGGAGGGGAGGACCCTCCCCAGGGGACACTTGGGGTTCACCTGGCAGGCCACACTCAAG +TCCACAGTCCCCCATGGCCCCAGCGGGTGGGGCAGGGGACGCACCCAGCTGAGGGAACTG +TGGCAGTCCTGCTGTTGTCTCACGTGGCCCTAGGGGGCTCATCTGTAGCCCCCACGGGAG +CCCCAAGAAGCCAATCCCTGCTGGGCTCATGGGCTACCACCTCATACAGCCCCATCCCTC +GGGAGGCCCTGAGGAGACCCTGGGGCTGGGCAGGGCGTGACCGGCCAGCTCCATGCTTCC +TGACAAGGGGGCCTGGATGTCCAGGGAGAGCCAGGAGGGAAGCGGAGACTCAAGTGGAGA +GTGTGTGGGAGGGAACGTTGCTGGGCTCCACAAAGGAGGGGGCACCAGGGCTGACTCCCA +GGTTCCTGGTGGCATGCGTGGAGGCAGGTGGGGCCAGGAGCACCAGCGCAGCACATCCTG +CGTGACCTTGGGCCAGCTCCTCAGCCCTCCCTCGACAGCCGTGCTGGTAGCTGGCCAGGC +CCGGGTGAGTGAGTTCCCCAGGTAGGTGGAAGGGGCTTCTGGGGTAGGGGCCACGCCCGG +GGGCAGGTGGCCCACCCTCCCAATACCCAGTCCCACCCCGCCCTACCACAACTGAGGACT +CTGCCCAAGGACCAGTCAGGGACGAGGCATCCTCTCCAACTGCACTGCAAGGCCAGCACA +AGTGAAGGGCGGGACAGAAGCCTCCACACAACAAAGGCAGAGGACCCCATACTTCTCTCT +GAGTCAGTGCAGAGGTCCTGCGGCAGAGCTCCTGCTGGAACCCAAACTGCGCCGTAGGCT +CCTCACTGAGAACTAGGGACTTCTCCACATGCACCCAGCCTCCCCTCCGGACTCCACAGC +TGCTAGCCCTGCCCGGACCCTGTGCACCCCACTCCCACCCCGTCATAGCCTCTCAGCCCC +ACTGTCATTTGGGCTCCCCTGGCATGAACTAACGTGTCTCTCAGGATGTCCTACGAGGAC +CCTCACGGCAGAAGGACCGATGGGGAGGAGTGGCCCCAGAGAGCAGTTCCTTCTCTGTAC +CCACCAGGGTTCAGCAGAGGCTCCTGCTCCAGGCAGTACAGTGGGGCCCACATGCCCATC +CTCACTGGCCACATGGGCTGGTCAGAGTCCAGCCTGGATCCCCCCAGGGCAGGACAGGAC +AGGTAAGAAGGGCTGAAAACCTGGGAGGCCTCCAGGAGGTGAGGGACCTGCCACCCCTTC +CCACAAGGCCCTAAGGCCCCAATCACTAGTCTCGATTTCCACAGCTAGTTTAGTCCAGGC +CAGGAAAGGGCGTGCAAGGGGCCTGGCCTGGAAGAAGGGGTCCCTGGAGCCAGGTGCACC +AGGAGCCTCCAGCTTGCCCAAATCAAGCTGCCACTATGGCCGGGTCCTGTGGGCAGTGCT +CACTCACCACCGCTCCAGCTTCACGATAGCCACGTGCGGACAGGGAGGCTCAGAGGGTAG +GTGGCTCCCCGGGGGAAACAGAGCAACCCTTGTGCAGCCGGCTGGGCCAACTGATCTGCA +CCTAACAGCCAGTCTTGAAGGCCACCAACCCTGGGGTCTCCCGGAGCTGACCCAGGGACA +GAAGGGCACCAGGCACTGCCCACAAAGGTGCAACTGATCCTCCTCACCGGCGCCTGGGCC +CTCCCTCCCTGCCAGTGTGGCTGTCCCACCCTTCATGGCTTTCTGCATCCTCAAGCTAGG +CCTGGGCTCTTCAAGTGGAGAGGGGTAGGCAGACACAGGTGCCAGGCACTGGGAACTAGG +GACGGGGGCTTCCCTGGTGAGGATGGAAAAGAAGGTTCCTACATGAGACCCTCTAGCCCA +GGCCCTGAGAACGAAGGGGCCAAAGCTTCACCTAGGGGCAGCCATGGGGGAGTTTAGGCA +GGCAGTGCCTGGCTGGAAACATGTCTTAGGGAAGCACTTGAATCCCGGGGTGGGGAACAA +GATGGTAGGCAGGGAGCCGGGGCCGGCTGTGGCCACAGGCCGGGGACAGAGGCCAGGTCT +GAGGATGGCCCTCAGTGAGGAGTAAAGGGGATGGACCGGGAGACAGGGTGCAGGTTTGAC +ACTGATTTTGCCTTTGGGGATGGGTCGAGGGAAGAGGCTGAGACTCTGGGCTTGGACGTT +GGGTACGGAGGATGTGACACTAGCAGACAGGCTGGATCTGGAGGCCCAAGGTGTGTAGTG +TGGGGAGCTGTGTGTGCAGGAGAGATGCAGGGCACTAGGGAGCCTGAACCCCTGCTCCAG +TGCACAGGCCGGGCACACGGCAGGGAGGAGGCAGTCACCATTCCCTCACTTGCCGGGACC +TGGGAGGCAGGGCAGAGTGGGTGATGGGCCAGGCTGGGGCCTTGGAATGACATCTGGCAT +GTGGCAGCGGAGGGGGGACATGTAACTGGGGACTGTGGACTCCCAGCTGGAATCCACCTC +CACCTTCCACATCCACGCACTGCATTTTGGTCTTTACTGAGCACCTGCTGTGTGCAGGGG +CTGCTCTAGCCCCTGGGACCACCAAGGGCCAGCCCTCCACAACCCCAGACCTGCACTCAG +CCCCCACCCACTTCCTCCAGAGCCCCACACCCCACACACTGAGGACTCTAGGCATTTTCC +CACTCTTGAAGACACTGAGCTGGGAACACCACCTCTGGCTACCAGCCGTGGACTGCAGAG +AGGCCTACGGTGACCAGGAAGGGTGGAGCTGCCCAAGGTCACACAGCAAGCCAGCACTGC +ACAAGCTTACCCATGGCTGCCCAGCCTTGGGGAATGGGGGACTTCTGGCATGTCCCCGAG +CTGGGCCCTGGCAGAAGGCAAATAAAGGCAACATCTCAACAAGGTTGTGATAGCCGGGGA +TCCAGGCAAGCAAGGGACAATCCAGGAAGGCTCCCAGGAGGAGGTGAGGGGCCTGGAGCC +CACCCTGGCCTGAGCCCCTCCTTGGCACTCCAGCCTGCCTAGCCCTGCCCCCTTCCTTGA +CGGGCCCCTCACCCAGCCTCAGCCTTGGCCTGACATTTCCCGTGACATTTGCTGCAAAGA +AATAGAGTCATAAATCCAAAGGTTAAATTTGCACATCGCCTCAAATTGCTGGGAGAAGCA +GTGGTCCCTCCACCCACTGGGCGCCCCGTGGGTGCGGGGTTTGTGTGGCAGGGTCTACCC +ATCCCAGGGCCCTTGCTGGAGGCCCACAGGTCTCCTCCTCTGGTCCTCCGGGGCACTTCA +ACCACTGTTTCCCAAAACAGGTTCCTGGGCACACCAGGCCCTGAGGTACATCGCATAGGG +GTGCCTCTCAGAGAAGGTGCTGCCCACCCCTCTCCAAAACAGGGCCCAAGACGCCCTGCG +GGAAAGGAGCACCATCAGCACCATTTATTCCAGCAGGTCCCACATTTGACCACAAAGCCC +TTATGTGATACCCCCAACATTCCAAAGAATTTCAACTCCATGGACCGTGACCCGTGACAG +CCAGCTAAGAATGCCAGGGTGCTGACTGGAGAGCCAAGAGGCACCCGGGGGCCCAGGGCA +GGCTGCTCTGAGTGAGGCTGGGTCAGCACCCGACACCTCACTCCCAGATCAGCCCAGCTC +CATGTGCCAGGGCCCTGAATGGGGCTTTCCACCTGCCAGGCCTGTGTAGACCTACATGGG +GACCCTGGCCTGTGCCCACTCAGCCCCTCCTAAGGCCACTCTCGAGGTGTGCCAACTGCC +CTGCCTTTCTTCTCCACCTCCTGGTCTGGCTGCTGTGGGTTGCCTCCATCGGACAGGACA +CAGCTGCGGAACAGTGTCAGCATCACCCCATGCCCCGCTGCCCTGCTGCCCCGCAGAAGA +GGCCCCGCAGGCCATAGCAGGCAAGACCAGAGGCTGTATGAAAGAGGCAGGGGAGAGGTG +GCCCCAAGGCACCCACTGCACAAATGGCTGCCCAGACTCTACTCACTCACCATCACCTAC +CCGGCCAGCCCACGGTTGTCAAAGCCTCAGTGGGCTAGGAGGGGCAGGATCTGGCCCTGC +TGCCGAGTCCCCTGAAATCAGCCCAAGCATCGGGGGAGAGGCACGTGACCAGACCCCAGA +CCTGGTGGGTGCCAGCTGAGGACATGGTGGGGTGGGTGTGCAGAGCTGCATCTCCCAGTG +TCCAGCTCCACGGCTGCAGGCAGTGCAGGAGGCCGGTGCAGGTTTGGATAAATAAATGAT +GGTGCATTCTCAGGAAGGGATTCCACACACACCAGAAAACCCATGATGCACAAGAATAGT +TCCCGATGTGCAAAATGTTTAAAATACAGCGATAAAAGAAGAAACGGCAGCACAGAACAC +CTGGTGTGGGGGAGCCCAGCTGGGCCCCAGGGACAGTGCAGGGAGATTTGTGGGGTGGGG +CAGCATGCCCGCCGCTCCGATCGGGCTTCCCTGGGCCACTCTGTGACCTCCTATCCTTTA +AAGCATTTAATCCTAGAGATGCCCGATGCCAAGGATCACACCCCTTGATTTGCCTGGCTC +AGAAGAGAATCTGACAGGCTAGGAACGAACGAGTGAATGAATGAACAAATGAGAAAGTGA +ATGGATGGTTGGGGCAAATGCAGCATCTGGGATCTGGCTTTGAGGAGGGTGCCTGTGACC +TTGGCCTGGGTGCAGCCAGACCGCAGAGCAGGGACACTGAGGGGTGCAGAGAGGGACCCG +AGAGGGCATCTGCACAGCTGCAGAGGCAGCAGGCGGCCAAGCCAGCTCCCGCAGACCCTG +AGGCCCAGGCCCTGGGTGGCTGCAGAGGGTCAGGCAGACCAGCCTGGGGGTCAGAGGTCA +GGCTCACCAGAATGGGGGCGGGACGGCATGAACCTCATTGATCAGATAGTAGCCAAGTGT +CAGGAGCCCTCACTAACCCTGTGCCAGGGCCGGGCGGGGCGCCTGGCTCACACACGCATT +TGTTCATTTCCAGGTCATCCTGAAACTCCCCGTGTGCTTCCTGCTCTTGACCCCTTTCCC +AGGTGCACAGACCAAGGCTCCCGGTGAGAGGTTCCGGGCCACAGGCCCAGGACAGCAGGA +GGGCTCCTCTGCCTCCCGCCCGTCCTGTCCTCACCAGCTGGACCCTCCTGGTGTTGCACC +GGCTGCACAGGGCCGTCCGTGCCACAAAGCATGAATTCAAGGCTGGGGCAGGGACGAGGA +GAGGGCAATGCTACAAGGCTGCACTGTTCCTGGGCAGAGGGAGGGGGAGAGCACGGTCCC +ACACACCCTGGCCCTCACAGCCTGCCTGTCACTCACAGCAACAGCAAACAAACCCTGGCC +CCTCCACAGGCTGTGGGCCAGTTCCCAGCCTCCCAGGACAGCAGGACCTTCCCCCACCTT +CCAGGGCCCCAGCCCCTGTAGGGGTGTCCTCCATGGACCCTCCACAAGCCCTCTGGCTTT +CAGAGACGTGCTGGGCCAGGCCCTCCCTCAGCCTCTCCCCTGCTTCTCACTCCATGCAAA +CCTACGTTTCTGCCAGTCCCAGCAGAAGGACCCTGGCACGGGGAAACTGAGGCTCAGGGG +TTCCACAGACCCCCAGGTGAGGAACAGGGGGAGGGCAGGGAGGGAGTGAAGGTACTATGG +GAGGGTCTCCCAAGCTGGGCAGCATCCCTGCCCTGTCCCCTCTCCACTCACCTGTGGCCC +GAGTTGCCTGACTAGGGGCTTTTATTGTCCCTTCTGCACATGGATGGGGTGAGGGGCCAG +CTGGCAGTGCAAAGGTCCCAGGAGGACTCTGCTGAGTGGGGGTGTGGGGAGGTGTTGTGG +GCATTCTGAGGCAGAAGCACCAGAGTCAGAACTCCACAGGGAAAGCCGGGCCAATTCCGC +CCAGCCTCAGCCTGGAAGTGCCCCGATAATCCCTGACCACTGGCCCATGGTGCAGGTGAG +GAAGACTGAGGCAGAGAGGCGACCTGTCACACCCAAGGTCACACCGCCAGCAGGTGGCTG +AGCCAGGACCCAAATCCAGGTCACGGTTCTGCCACGGCAGGCCCTGCACGTGCCCCTCTG +CCAGGTGGCGTCTAGGTGCTGATGGCCACCCATTCCCACCCCCGGCATGGGAGCTCACTT +CCTCCTCACCTCAGTAGAGAAGAACAGGGAACAGTACCCCCTTTATAGATGGGAAGACTG +AGGCTTAGAAAAAGAGACTGGCCAGCCCCACTGCTTCAGCTGCCCCTCTGCTAGTATGTT +TCACCCTCACACAACCTACAAGGATAACCTGCTGGTCCCCACTCTATAGATGAGCCTCTG +GTGGTGGCCAGGGGAGCAGGAGGTGTGGCCTGCCAATCCAAGCCCCAGCCCAGGGCCCTT +GCCACAGTGCTAGTGGGCCACCCTTGCCCATCATGCAGCATGGCATCCATCTGCTCAGCA +CCCTACACAACACCGGAGGTAGGGGAGCCCACCACAGAACCCCTCACCAAGAAGGTCCTC +ACCATGGAGAAACTGAGGCACTGGAGGCGGGGCCTCTGCAGTCACGCTGGGTCACGTGCT +CCAGTCAGCCAAGCTGCAGCGGCCACCAAGGGTGCCCAGGGTCTGGCCTGTTGGTCACAG +TGTTGACCTCATGGCCTGAAATGGGTCTTCCCTGCCCAGACAGGTTCCAGCCCACCATCA +TCTATCTGCCTCAGCCCTGGTGGGTAGGGAGTGGCTGGGAGACCCTGCCCCTCACACCCT +GGGCTTTGCGGTATTCAACGAGCCCCCAACACACATTCCAGGGCCTCTGCTCCAGATTAG +GCCACAGCCTGTCTAGGCACACAAGGGGCACCCACACCCTGGAGGCAAAGAGACATGCCT +ATGTGCCCGCAGGCACACATGATCACGCGCACAGACACACGTGCAGATGCATACCAAGTT +ACACACACGCACACAAGCCCGAGGCGGGGACCCCCAGGACACGGAGGCAGGCAGGCAGAG +GTGGGGCCCAGGATGTCTTGCCCCTGTCTGTGGGCCTGGCCTCCCCACTCAACAGACGAG +CATGTGGGTGCGGGAGTGGTTATTGCGAAAGGGGCCCCAGCATGTGCCACCCGTGGAGAC +CACAGCAGGCATGCCGCGTGCATGCATGTGTGGCCGTGGGTGTGCAGGCGGGGGCGAGGA +CACAGATATGCACCCTCAGAGTCCGTGTGTGCACCCAGTGTTTGTGCATGTGCACACTGG +GGCGGCGTTGGCTTGCATGTGCACCAGAGGTGTGCAAGTGTGCAGTCCGTGTAGACAAGT +GCACGTGCAAGGAAACGTGTGTGCAGCCTGCGGCACCATGGACATCAGCAGGGCAGCCTG +GGTGGGCACCTGGGGAGCACCCTCCAGGCGGGGCATGCCCAACTCACATGGAGCCCACCC +TCCCTTGCCCGCACCGCGGGACCTCACAGCCGGTGGGGCGGGGGAGTCCCACGAGGGCTG +CTAGGGAGGGGTTGCTGATTGCCCCACCGCCCCTGATTCCCCCAGCCAAGCCTCTCAATG +TCCCTCGGGCTGCAGATGGTGGGCGTTGCTGCCTCCTCTCACCGCCCGCCCTGGAGGCAG +CGCTGGGACAACCCTGTCCCCCACGCCCTCTACTGCCGCCAACGCCCCCATTCATCCCAC +TGTGGACTGACCTCGGAGAAACGGGAGGTTCTGAGCCCAGACCGTGAGGCTGGGGTGGAG +GCGGCCCGACACCCCCACCCACCCTCGGTACCCACGTTTCTGCAAAAACCCGCTCGTTAA +CCCCTTGGTGGCCGGGGCGCTCCGAGGCCGGGCCCCGCCCTTCCAGCCCACGGCCCCCGC +CCTCGACTTGGCCCAGCATCGGGGACCCTCGGACCGCCACCCTCGGCCCTGCCGAAGCCG +TCAACTTTCCCGGGAAGCGGCAACTTTTCCTGCCGCCTCCGGCTCCGCCCTCCCGGGGCC +GGGGCGCCAGCGGCCCCAGCCTGGCGCGGCACTGCGGTGGCTTTCCCTGCCGCCCGGCTA +CGCTTCCCTCGCCTCGGCAGCCCGGGACGGGCCCTGCGGCTCCGGGGAGGGGGCGAGCCG +CCCCCCTCCGCGCCCCGCCGCCGGCCGGGCTCGGGTGCAGCGCGGACACTCACCTCCAGC +GGACGCCCTCTTCATCGTAGGGGTTGGGCGGGGCGCGTCGGGGACTGAAAGTCGTTTCGG +GGCGGGCGCCCGTCTCCCCGCGGCCGGGTCCGCATCCAGGCGCCGCCGCTACGGCCCGGC +CCCGGCCCGGCCGCGGGACGAGGCTCGGCGCGCTCCGCTCCGCCCGGCTCTGGCTGGGCT +CGGGCCGCGGGTGCGCAGGGCGCGCAGGGCGCACAGGGCGAGGGCGGCGGCGGCGCGGGG +GTTGGGGCGTGGGCGGCGCGGCGGGCCGGGGCGCGCAGGCCGGGCCGCAGGAGGCGCTCA +GCCCGCCCGGTGCACCGGCGCCGCGCGGTCCCAGACAAAGGCGGCGCATTCCTGAAGAGG +CGGCGGCCCCTGCAGGCGGCGGAGGGGATGGCGGCCCGGGCCGGAGCCGGCCGGGGACGC +GGGCCCGAGGGTGTGGCGGCCTAGGCTGGCCCGGTCCCCAAGCGGGCAAGGGCTTTGTCC +CGAGGCCTAGGAATGGCCTCTGCCTGACCCACTCCCTTCCTGGCCTCACGAAACCCATGA +ATCCCCCGCCCCCTGCACAGAAGCCCCTTGCTGCAGCACGATCGCCCCCATTTTTCAGTA +AGAAACAGGCTCAGGGCTTAAGAGAGTTCTGGGGGCCCTCCTGGGCCTTTGTGGCCCTCT +CTGCACCATCCTAGAGAAGGGACCTAGGCCTGATGTTACCCGTAACCCTGCAGGGGTCCC +CCACCTCTTCCCTTCTGCGGAATGGGGGCTCCAAACCCAGAAGACGCTCTCCGAGGTCAC +CTGCTTAGTATCCCCCCACTCTACCAAGCCAGCCCCCCACGGGCGGTGTGAGGCATGGGG +CCAGAGAATCCAGTGACGCCCCTGCACACACCACACAGGGGTGCAGGTGTCCCTTCTCAA +TCTTCTGCTCGGGGCACCTACCGGGCCTCACCGATGAGCTCCTAAGTGCATCCCGGGGTC +TTCAGACAACTGCCAGGCTGCCCCTGCACCTGGAGAAGTGCTGGGAGTCAGCTCAGCCGG +GCTGGGTCCTCCCCAGGCCCACCCTGCATGAGAGGACCATGCAGATGAGGGTCTTGCAGG +CCCACCTGGTGTCAGTCTGTAGGACCCTGACTCAAGGACGGAGTGGGCCAGCCTGATGCC +TTGATGGCCCCCCTCAGAGCCCAGGTCCACTGATATGAAGCAGAGAGACCTGGTGACTGG +GCACCCCTGTGCCCTCTGCCTGCCCCTGGCCATGGAGGTGGCCCTGAGGGAGATGAGCAG +GGCGCAGGGCCGGTGACCTGCCTGCCTTGTGCAGAGGAAGTGAGGACCTCCCAGAGCCCT +GGTGCTCCCATCTGATCCCAGTGGCAGGCAGCTTCGCAGGGTGGAGCCATGCTGGGTGGG +CCCTCTGCCTGGTGTGGAGGCTGCACTGCCCACATGCCTCTGGCTGCCGCTCTGTGGCCC +TCATCACTTTGGCCTGCTTTATCCCTATCACCCTTGTCCCAGCCTAGGCTTATGAGGGCA +TTGTGGAGTGCATGCCATTCCCAGCCACACAGAAGGTGAGGTGGGGCCCAGACACCCCCA +GCCACTCTGTCCCCTGGCCTCCTGAAGGGGTGGGCTGTGCACACACTGCCACCTCCCCAG +AGGGTGCTGGCTGCCGAGTGCCACCAGCTCAGCCCCTGAGGTGGCTCAACAGGTCCTCCC +AGAGCCACACCAAGACGTCATCACCCATTGCACAGATAGGGAACCTCAGGCGAAGCTCAC +AGAGCCAGTGGGAGGCTGAGGCCTGACCCCCAGACAAGCATTCTTCTCCAGGGTCCCTGC +ACACAGCAGCCCCCAGGCTAGGAACTGAGCTCTCTCCACAGGGATGTATCAGACCCTAGA +TGAGTCACGTGGCACTGACCATCAGTCTGAGGACAGCGCTGGGGTGGACCAGGCTCCAGT +CTCGGGCTGCTCTGTGCTTGCAGAAAGGCCTTGAGAGGACACTTGCTTGCTTACTCCGCA +GGCGTCTCTTCAATTACCAGTGACCCAGAGCAGCCCACTATAGCTGAAGATTGTGACCCC +ACACCCTATCTCATAGGTTTAAAGCCCCTCCCACCCCTCCTGCCGCAGCCGAAGTGACCA +TTCTGGGGCATGGGATGGGCATGGGGTGAAATGGGGAGCAGGGGTGGCCCTGGAGGCTTC +TGGCACCAGCACCCTCTCCAGCCACCAACCTGGCTGTTCCCGGTTGCTCTGCGCTGGGAC +CTGGGCCTCCACCTCCCCACAGGTCTCTGCATGGGAGCAGGGCTGAGGCCTGCAGAGGGA +GCGCTGGGGGATGGGCTGGTTTCCTGTGTCCGCCACCTCGGTCAGCACGTTGGCCAGCAC +CCTCCCCACTGGAAGCTGGGCCCACTCCTGGGTGCTAGCTGGGATGGTTAGGCGTCGGCC +AGCGCTGATAGGGAGTTATGGGGGTGCAGAGGCAGAGTTGAGCAGGGCTAGGCAGCCGGT +GGGTCTGCTACCAGCAAGGCACAGCCCACTTCTCAGGCTGCGCATGTGGGAGGGTTTGGT +AAGCCACATTTTTCAGGGTGTAGCCGGGTGAGGGGTAGCATGGGGAGTGGTGTACTCTTT +GGGCAGGCGACCACGGAGCCATCACCCCCCAGGGCCCAACGGGCAAGAAGGGGAGCAGCT +GACGGCTGGTGAGGGGCTAGCAGGGAGGGCTTGACAAGGGCTGGAGCCCCTGGTCAAGGG +GCACAACCAGTTCTATCGGGTCACCTTGTGGGAGGAGCCAGGGACCCTGTGGGCAGAGCC +TGGCCGTCTGCTGCCCGGAGTCTACACCAGTCAGTTCCTAGGAAGCCCCATCAGGGGTGC +TGGGAGGACAGGCCATCTGTGCCCAGCTGATGCAAGCTCCCAGGCTTTCTGGACACGTGG +CCCTGGGGATCAGGAGTGAGCTGGAGACTGCAGACAGCTGTGCCCTCAGGGCCACACAGA +CCTGTCACACCCTGCCTGGACTGGGTCAGCAGGGCCACACGGGCATCTGCCCGAATCGTT +CCCCACCTCACACCCACCATGTGGGGCACATTCTTTGCCAGCTTCCCCAGCCCACCACCC +CTCTACAGTGAGGCCAGGGGCTGTCTCACCTCCCACCCACACCCACACCCAAAATGTGGG +GCTCAGCCTCCACGACCAGCATGCCGGTCATGAGGGCCCTGCCCAGCTGTGTGACCTCAG +CCCAGTCCCTTGGCCTCGCTGAGCTTGCACAGCATGTGGACGAGGCAGACGCTAATGCCT +CAGGTCCTGGTCAGCTGGTCACCATCCCTCCTTTGTTCACCTCGCCTGTCCCCTGAGAGG +ATCATGCAGGGTTTTCACTCAAAACTCCAGGACTGCCCATAGAAGCCAGCTTGAGATGAG +GGGAACTTTCGCGGTGGCCTGTCTTGGGCTGACAGGGACACGGTTCAGCCTCCTTGGAAG +GTCACTGCTTCCCAGGGCTTGGGAAGACTCATCTCCTGCCTGGAGAGATAGGCGTGCCAT +GGCTTTCCTTGGCATGCCTGTGCGGTACTCACGGACATCCAGGCCCTCAGTGTCTCCTGG +GGAGGGATGGGGCAGCAAGGAAACTGGGTCTTTGCTCCACACATTCTGGGCCACTCCAGG +CCAATGCAAGGAGAGACCAAGAAGGAAGCTAGAAGGTCATGTGGCAAGCACATGCCTGCT +GCCCAAGTGTGTCAATGTGACTGACCTTCCCTGCACTCTGAGTGGGCGGCTGGGGATGTG +GTCCCAGTGCCAGGGAGCTGGGGGGTGTTGGCCTGCCATCTGCTCTCACCATGGAGGGCG +GCTGCATGCCCAAGGCCTAAGAGGACTCTGTGCCAGGCCTGCTGAGTGAGTCATCCCATT +CAATCCTACAGTGTAGACACTACTACCACTACCCATTTTCCAGACCAGAGAAGGAGGTCT +GGGGAGCTTAGGTGGACCCACATCTTGGCAGCCCCTTTCTGCCCTCTCTTTGTCCACAGC +CTCCGCCTCCTCCAGAAGCCTCTCACCATCCCCCATCCCCGGGGCTGCAGAGGGTGTCCT +GGCCTTCCCCTATCAGCTCTGCACCCCTGAGGACTCCCATCCCTAGGAGTCCCTCTGCCC +AACTTTGGTGCATGTTAAGCCATAATTAATCTGCTTCCTCATCTCTTATTCATCAAAGCC +ACAGCAGCAGCAAAGGGGGCTGGCACAAGGAAGGGACCGACAACCAAACCCTGGCAGTGG +CCCCAAGAATCCCACTGGGCCTGCCCATCTCCAAATGCCTCTGACCCTTCACTAGCGAGG +GCCTCCTCCCACAGAATGGCATCCTGGATTGTGCCACACTAGGCAGGCCCCCTGACGGAC +CATGCGCACCTCCCACCACCTGGAGGCTGCTGGGGACCGTCAATGGGACGTATGCATTGC +ACAGTGATGGCCCTTACCTCTGCGTCCCCCAGCCCTGGACTGGATCCCTTGTGGCCCATC +TCCAGGATGGGAGCCTGAACGGAGATGAAAGGACACTCGGCCAAGTGGGACATGCCAGGC +TGGCACCTGGACCCTGGTCCGCCACTCTTCCTGGTGCCTAGAGGAAGGGGTGAGTCCCCA +GGGCTCCTGGACCTTGAGATGAGCCTCGGGCCACTTGTGTTTGCCAGCAGATGGGCTCAG +GGTCTTCTCTCAGTGGGCACAACCGAGTTGGTGGCCTCTCTCTGGGGAGGCCGGCTGGTG +GGGGCTGGGGAGTAGGCGGCTTCTGGTCCCTGAAGGCCCAAATCTGGTGCAGGGGCAGAG +CTCCGGAAGGCGGGGGTCCCTGGGCTGTGCCCAAGGCATCCGCTGTGGCTTCACGCCCCT +CCCCCGCCCCTGCAGGGAGTGTGGATCCTTCTAGAGCCACCCACCTGGCAGTGACAGCTA +AATTGGGAGCCGCTGTTCACACCAGGAGCCCAGCGGGCAGTCAAAACCGGGTCGGGGGGG +AGGGGCACCCGCTGAAAACACCCACAAGCTCCCAGTGGGGCAGGGAAGGCAGGGAAGGCA +TGGTGATAGCTGTCAGCAGGAGTGGGTGGGGAGTGGAAGGCAGGGTAGACCGCCAAACCC +ATCTTGGGGCCATAGTGAGATGGCCCCTGGGATGCTTCGGCTGGCCAGTGCCCAGCCTTG +CTCTTGTAGGAGCTCCCTGGGCCCCTGGGTTCCCCACTCTGGGCCACCACAATCCATGCA +GCATGCAGGTTTGTTCCGTTTCCCAGATGGAGAGGCTGAGGGCTTACCACAGGGTTCCGT +CAAGGACGCAAAAGGGCCAGCCTGACTCTGAGCTCAGTAGCCCCACCCTCCACAGCCTCC +CTTGCCGCCTTCATCTGCTCACTTCCTCAGTCACTCAACAAACACTCTGGGAGGCAGACT +GTAACACCCTGCATACCTCTGCTGGACCCCCATTCCACATATCCCCACTGGTTACAGGTG +GGGAAACTGAGTGCCTGGGAGGCGGGCCTTGGCCTGAGTCCCCCAGTCAGTGATTCCAGG +ATAAAGGGCATAGCCATGCCATTTGGTCACTTGAAGCCCACAGGGGTGACTGGGAAGAGC +CCACCTAGCACCCCACATTCTGCTAGGGTGACTCAGACCAAGTCCTTGGGGCTCTGCCCC +CCAGCCAGTGGCTCTGCCTCTGAGAGCCATGGGACAGGGAGCTGTGCAGGGTGGGGCCAG +CAGAGCCTGTGCCCCTGCCTGAGCTAGTCGTGATGGGGCCTGCCCTGACCCCTTCCCAGG +CTCATGGGTGCGGAGCAGGGCGGGGCCTTACAGTGAGATGGGGGCCTCTGTGGGCAGTCC +TGGAGGCCAGGCCCTGGGGCTGGGACTCACTGGCACAGGGCCTAAGTTTCCTCCTGTGTC +CGCAGGCCTTATTGCCTGCAAGGGAGGTGGAGGGGCACAGGGGCGGTGCCCAGCGTGAGG +AGGGGGCTCTGAGGAGGCTGCAGCAGCCGGCAGGCCTCTGAGACTGGGATATTTTTAGCC +AGGATTCCTGCTGACGGTCCAGGCTGGCCGGGGTTTGCTGCCAGCCTTAGAGGCGCTTCT +CCTAGTCTGTGTCCGGACATCTGGGGTCTGCTGGCCCCACCCACCCTCTGCTCAATGCCC +CCGCCCCACTGGCCCATCCAGCCAGGCGAGGGGCTCCCAGGAGGCCGGCCATGGTGGGAG +CAGAGCCTGGGGCCCTCGGGGTGTGGCTGAGAAGAGGCCACATGGGCTGCTGGGGGCTGC +CTGTGTCCTGGGTCTCCAGCTCCCAGCTGCCACTCTTAGCCGACCCGCTTAAGGGGCAGG +TTTCTGGAGGCTCCGGTTCATGCCCAGGCTCGAGTTCTGAACTGAGCATTCTCAGTCCGC +TGCCTGCTGCCTGCTGGAACGGATGGCTCAGCAGAGCGGCCACAGCTTGGCTCCCTCCTG +GGCACTCCTGCCTGCCCTTGCCGCTTAGGTGCTCTGGTCATCAGGCCTCTCTGGCTGGCT +CTACCAAGGTGCATGTGTGCAGGGCCTGGGCTTGCAGGGCCTTGTGGCTCAGCCAGGACC +ACCTCCCTTCCCATGGGCAAGTGAGGACATCCAGAAAGAGGGTCCTCTGCCTCGGCCACT +CCCTTGGGACACCCCTCCACCCCAAGGCCTCTCGTGGAAACAGAGTCCAGCACTCAGCGT +CATGCCTATGCACTCCACCGGCAGGACTGGCTGAGTCCCGAGGAGCCAGGCCCTGGGGCA +CAGGCCAATCGGATGGAACCCAGATGAAAGCCCAGTGCTGGGACCCACGGCCCATTCTCT +GGCCTGGCTGAAGCCTGATGGCTTGTTCCTTGAATGAATCAGGAGGCCCATCCAGAGCAA +TTGTGAAGTTCACATGAAAGAATGAACGTACGGAGAGAGATGGGCCATGTCGCATCCTGA +AACGCCTTATATTCTAAGACCATGATAATTAACACAGACCCTGGACAGTCACACGATGGG +AAAGAGTCCAGAAAGAGGCTCAAATGCAAATATGAGAAGAGGCAGGTCAAATCCAAGAGG +AGAGATGGAATATTCAACAAATAATCCCACATATACAATCAACACTTGGCCTTAAGACAA +CGGAATTGACATATGGAGGAAACATTTGGATTCCTACCTCACTCCTTGACGCTAAAACAA +GTTCCAAGTGGAACTAAGATTTAAAAAGTAAAAATGAAACCATAAAACTCCTAGAAGAAA +GCATGGATGGCAAAAAACCAATCTTGAGTCACTCTAAGAATTATACAAAACCCAGAAGCC +ATGAAGTAAAAGATGGGTATATTCAACCTCGTGAAAATAAAAGGCAGATGCATGGAGAGA +AAAAAACCACCATAAAATGATAACCAACAAAGGGTAAAAACATCGGCAACTCAGTTCACA +AAGGGCCGTTTTTGGTTCTCAATACAAATCAGTAAGAAAAAGAGCAACAGCCCAAGAGAA +AAATGGGCAAAGGCATGAAAGGAACCTCCCATCACCCCCTGCTGACCTTCACTCAGAGTG +AGAAACATGCGGATCTAAGCTGCAGTGAGATGCTGCCCCTCACCCACAGGGATGGCAGCC +GCATCAGGCTGGGGGTTCCCTAGGGGGGCTGCTTAGGGGCTCCCACAGGTCTGGCTGTTG +GAGGGCAGGGAGAGGCCTGCGGGTCTGGTGGGGCCGGGGTGAGGCACTCACAGATATGTC +TAGTGGGGGTGAGGAGGGGGACCGCCTGCAGGTACAGCTGGGGGTGTCAGGTCGCTGGGG +ATGGGGTGGAAGGAGAGGATACCCATTCAAAACAGGAAATATATATATTTTAAAGTAAAA +CAGGCTAAGTGCAGTGACTCACACCTGTGATCCCAGCAGAGCGGGGAGAATCCCTTGAGG +CTAGAAGTTCGGGACTAGCCTGGGGAACATAGTGAGTCCCAGTCTCTATAAAAATTAAAA +AATAAAATTATCTGGCTGTTACGATGCACGCCTGTAGTCTCCGCTACTTGGGAGCTGAGG +CGGGAGGATCACGTTAGCCCAGGAGTTCGAGGCTGCAGTGAGCCATGATTGTGCCACTGC +ACTTCAGCCTGGGCAACAGAGCAAGACCCTCTAGAAAAATAATTAAAAAAAAATAAAAAT +AAATATAAAATAAAATAAAAACCAGCTGACATCCATCCTCTCCTTGCCACCACTGACATG +GCCGTGGGCCGAGTTCTCATGGCCATCCCACCGCGAGTCCCCAAGCGCTCCTGATGCCTG +CTCACCCTTCTTGTCTTTGTTAGCGCCCCGTTAGCAGAGGCCTGCTTAACTTGTCTCTGG +TCTTGCTTCTCAGGAGCCGTGTCACATTCAACAGTGGCAGCACAGAGGCCCTGCTCAGCC +CCAGTCCCTCACCCTAGGCCTCCACTGCCAGATGTGGTCCTGGTGGTCCTGCGGACTCAA +GCTGTCCCCCCCTGCCCAACCTTCACAGGCCACCAGCAAGTCCCAGGCCTCCCACAGGCA +TGGCTCTGGGCAGTATCTTTACCTTCCCAGCCTCAGTTTCCCTGATGGTGAAACAGGGTG +AGATCTACTTCCTAGCAGAGAATCCGGGGGAATGGAGGCCGTGGGAGGAAGAGGACTTGA +GGAAAGGAGCATTCCTGAGGGGTCCCAGAGCGGGCTGGGGCTGACAGGACTGAGTTTGCA +GGGCCTGGGGACATTCCCTGGGGAGGTGCTAAGATGCCGGTCAGCTAGAGCAGCGGGGAG +TGATCCAGGATGGCTGCCCGGATGAGGGGCTGGGTCGCTGGTCCCTGAGTGCCAAGGTGG +TCTGTCCATGTCTGCAGGAACACACCCACCCTGCTGCCTCCAGGAGGGGTGCTGGACAGG +CCTGAGCTCCAGGATCATAGGAGGCAAGAGCCACTCTCACCAAAGAACAGCAAGTGTGCC +TCCCCTGCCAGCAATCTGTCCGGTCCAGCCCCTGCCCAGCTATCTGGGGAAAAGGCCACC +TTTTCAGTGGAGGCTGAGCTCAGGGCAGGAAGAGGTTGAGAGGGGAGAAATGGAGTGAGA +ACTGGAAGGTGAGAGGTGTGAGTGAGAGGTAGGGGTGTGAATTGGGGGTGAGATGTGGTA +GGTGAGAGCGGCAGGTGAGAGGTAGGGATGAGAGGTGGGGGTGAGAAGTGGAGGTGGGCA +GTCATGGTGATAGCTTGTGGGGTGAGAAGTGGAGGGTGAGAATTGGGGTGGGAGTTGGGG +TGTCACCTCCCAAACCCTCTCACCCTCAGATTAAGAAGCCCCTCCTATGGGCCCTGGAGG +AACAGGCCTTTGACCCACCCCAACCCCACTGGTGTCTTGGGCAGGGAGATCTCGAGGTGG +GGGCCTCAGCTCCCCGTAGGACCGTCTAGCCCTGGACCACCTGACCACAGCTACAGTGGG +ACAAGGCCCCTGTTGCCTGTCTGCTGGGGGCCGAGCGTGGTATGGAGGAGCCAGCAGCCC +CCGCCTTCATCACAGAGGAGGAATCCGAGGCCCAGGAGGGTGTCTCTGGCACCCAACACG +TGAGACCCTTTGAAAATCCCCCGAGTGTGTGGGGGGCTGGGATGAACATCTGGGGGCGGC +AGCAGCAGGTGGCCCACCTGGCCAAACACACTCCTGTGTCCCCTTGTTTTTGGCAATTTC +AATGACCAAAATCAGATCCTGGTATGGAGAAAAAGACATCGGTAAGGACGAAGGAAATGT +GAACACTGCATGGACGATGGCTGATAATAGTGTGCCGCTATTGGCTCAGCAACTCTAACC +TACCAATGCCAGATGCTCACAACGGTGAAACTGGTGTGTGGTAGATGCGATCTCTCTGTA +CTGTCTTCTCAATTTTTCAGCAAAGATAAAACTGTTCTAAAGTTGAAAAGTTTATTAAAA +AAAAAAAAAGAAAAAAGACGGGCTTATTAGCAAAAATCTGAGATAGAACCAAGCAGTAGG +GCCCAGCCCACCCCCCGCTCAGGGCGCACCAGCAGTACCCCTCTCTCAGGGACATGGCCA +GGCCTGGGCCACTCCGAAGAGCTCTTCCTGGTGCTGGGGGGCCTGGTGGGTTTATTCCAC +CTGTCAGTCTCACTGCAAAGTGTGGCAGTGGAGTCTGAGTTAGGCTGACTCTTACCCTGC +CAAGGACACCCCTGCACTCAGGACCCAGCCCCGGCCCCCTTGCCCTGTGGGCTGGAGGCG +ACCCTGTGTGCAGATGGCAGATGGAGGGGCATGGTGCTGTTGCTCTCAGGTCAGGGGCAG +CAGCCTCAGCCCTGACCAGCCCTCTCCTGCGTGGTTGTGGTTGGGTCTCCTGGTCTCTGG +CCTCCTTTCCATTCCAACCTGTAGAAAACAGTGTGGTGGGGGATGCTCGGCCTGCAGATC +AGACAGGCCGGGGTCAAAGCCCCGAACTACCACCTGAACTAGTCCTGGCCGAGTGGCGCT +GATGCTCTAGGCCGAGGGCCCCCTTGACAGAACAGCCATCATCTAGCTCCGCCGCAGGGT +AGGGGAGGTGGCAGTGTGGGCCGCAGCCCTCGGCTGAAGCGGGGGCCAAGCGGGGCTCTG +CCTGGTGCAGGGGGTTTCCTCTATGGGGCAGGGGCTCCCAAATTAGCCCTGGTGACATCT +GGAGCTCACCTCTGGGGGCTCTGGGGGCTGAGAGAGCAGGCGGGGCCCAGTTCTATCAAT +CATCTGCCTTGTTCCTGGGGGATCGCGGTCAGTCCCAGCCTCCCCACAAGTCCTGGGGCG +GGGCAGGCCTGCTGGAAACCCAAGATGCAGAGAAGAGCGCCAAGCACCTGAGACAGGGCT +CTGAGCACGGAAGGCAAGGACGCCTCCCTCAGCCTCAGCTGGGGAGCCAGTGGCCCTGCA +TGCACTTTCTCCCGTCTGGGGAGCCTGAGCCCTGACCCAGTCCCGAGGCCCATGTGTGCA +CAAAAGCCCCCTGCTTTCTCACCCAGGACACTGGCGATCAGTGGGTAGGCGAGCCGGAGC +CCAGCTGCTGCGCTCCCTTCCCTGCCAGGACCGGGCCAGCTGCATCCTCATGCCTGGGTT +TCAGGGCTTCTCGGGGGATGTGATGGCACTGCCTGAGGATGTGGGATCCTTCCACTGAAA +AGGGCCACACGGCTGTCCCCAGCACCCTTCCAGAAGGGACAACCCACTCCTGGGAGGTCT +TAATGTAGACATCGAGGTCCCCCGAGCTTCCTCCATGGCCTGGCCACTTCACAACCATAT +TTCTTTATGTTCCACGCGCCTTGCCTGCTCCACGGCCAGGACCAGGGCCACCATCTTTGC +AGCCGGTGAAGGGACAGCTGCCGTGGGCTGGGCTGCTGAGGCAGGACAGGGGTCCGGGCC +AGGGAGGGGTGGATGGAGGCCAAGGATGGGCCCCTTGGAGACAGCCTTGGGGTGCAGCCG +GCTCTGCCACCTCTCCCTGGCTTCCTTCCCTCCAACCTGTGACTCCTGGGAAGTCCCTGA +TGGGCCAGCCACTTGGGGCCGGGAAGTAACTGTGGTGTGTGGTGGTCACCCTCACCAGAG +TTCATGGGGTGGAGCAAGGGGGAAGAATCTCCCCCAAAATCAGGTGTCTTGTTCCCAGGC +CCAGCACAGAGACCTGGCCCCCAGACATCTCTGGACATTTGTTAAGAGTGGACAACTGCT +TCCATTACACAGGAGAGGAAACCGAGGCTCTAAGGGGTCCCAGACTTGCCTGGCTGCCAC +AGTGAGCAGGCAACAAATACCAAGTGGCTGCCGCAATGGGGTAGGTGGGAGGCAGTGGCT +GAACCAGAAAGTGCTGCTGTCGATTCTGCTGGTTGCATGAGTCTTAGGAGATCATCAAAG +GGCAGATTGTGGGGAGATAAAAAGTATGAGTGGGGAACAGGATTGGGGGGAGCAGAAGAG +CCTCAAAGACCCTGCGACATTCTCAACGTCTCGATGGTGACTCAGAGATAGCTCCAACCC +GCATGGCGGGCAGAGTGCGGAGGGGACGGCTTTGTGGCTGGATTAGTCATCCCGGTGCAG +CTCACCCCAAATCCCGGCTTCCAGCCCTGGGCCGTCCATGCTGGGATGGAGGAAGGGCAG +CCTCTCCGCAGGGCCACGTCCTGGAGCCTGATAACCTGGCCAGCACACTGTGCAAATGGT +GCATGAGACAGGGAGTTGGCTCTGTGTTCCCCACGGTGGCCAAGGGACCAGCTGTGCCTC +AGTTTCCCTTATTCTATCAAACTTTGAAGAAGGTCCCGTAAAAGCGCATCTGTTTCCAGG +GTAGGGGCCAGAGTCAGCCACCCACACCAGAGTGAGAATTGGTGGGTGCCCTAGCCCTGG +CAGGATCTGGCATTTTGCTGGACGGAGAGAGGAGGCAGCACCCACTGGGGCTCGGGCAAC +CATCCCGGCTACCCCCGCCCCGGCCCGCCAGGAGAGGAGGGAAGCCTTGAAGTGCCAGGC +CTTTGAATCGCCCATCTCCATGGCAACGCGTGGGCACAAAGGGCCGGGCCGGCGAGCAGG +CGGCGGCTGCGCAAGCTGGAAGGAGGAAGGGAATCTTTTATTTATGGGGAGGGAGAGTCG +GGGAGCCAGGCTGCGCCCACGCTCAGGGCCAGGCCGCAGTCTTGAGGCTGGGCCCCCTGC +CCCATAGCCAGGGCCTCTGAGCAGCACCTGCCCACACACCCCTGACCCTCCGTGCCTGGC +TGGGTGGACGGAGCATGGGCAAATGGAGGGCAGAATGGAGTGAGTGTCCAGCATGCACCC +AACCTGGAGATCGGCCCAGAGGGTCCCTGGGAGCCTGCCATCCTCCATACCCCAGTCTCT +GTGATGCGGGCACCCGGGCAGATACCATGCTGAGAGCCAGCTGGGCCTGCAGTGAGGAAA +CGTAGCCCAAAGCAGACTGCAGAGCTGGAACTCAGGTGGGGCTGCTGCCATAGGCTCTGA +AGACAGCGCAGGAGACCGGGAGGGGTCACCAGTGCCAAGGCCCTGAGGCAAGCCAGAGCT +GGGAGCATCTGAGGATGGGCATTAGCGTGGGGCTCCAGGGTACAGCCGGTCCACTGCAGG +CAGCCTGGCCTCCCCAGAGACCCAGCCACGGCTCCTCACCTTGGACAGGGGGCCCTGGGC +CTTGGGCATCAAATGACCTGCTTGCCGCGGTGGAGGCTGCCATACCAGGACGCCGGCCCA +CGGCCTCCAGCGGCCCGTCCCCACCTTCCCAAGCCCCTCCCCTCATGGCTCACACCCTGT +TCCCTGAGCCCCCAAACATGGGAAATGGCACCTGTTGTGCCCCAGCGGGGGAATTCTGAC +CCCAGACCCTGGGGCCAGTGCAGAAACCACCACTGTATCAGATTTTATTTCATCTAAAAA +ATATCTGAGCCAGATTGAAAGCAGCGGGGAAAGGGTCCCATGGCTGCTTCCCCCAGCCCC +ACCCTCCACAGGCCCTGGGCCCTCCCAGCCCCCATTTCCTTTCCTTGAGAAAGTCTAAAG +TGGGGCAATATTTTGTGAAATTGGAGGCACCACGTTGGAAATTCCAAACCCGTGTGTACG +ATGAGCTGCACGCGCGTTTTGAACCTGCCCTGGTTTTGAGCTGAGGGAAGCGGAGGGTGC +AGGAGTTGCCCACTCCAGAGGCCCTGGGTGTGCATGAGGAGCCCTGAGGCTGGGCCGAGA +GCCGCTTGTGGGAGGCACTGTGCCCCCTGCCCCCGAGATCAGGGCCTGGGTGCCCAGGAC +ACTACAGACCTTTTGGGGATCCTGGGGGTTGGGAAGCCTAGAGCTGGAACCCACAGGCCA +GAGTCTGAGCTTTCATCCCCCACCCCCGAAGACGCTCCCCCAGCCACTGCCTGCGCATCG +GGGGAGACAAGCTCCCCCCACGCCCACCTTGCCTCCAGCAGGGCTGTTCCTCCCCTCATC +TGCTGGCATCAGTTTTTTCTTTAACTGAGCCCCATGCTGGGGAAGGGAGAGCTCCAGGGG +ACCAGGCAGCAGAGAGGGGAGGAGAGAAGCCATCAAACAGGCCGGGGGCTGGGAGGCCAT +GCACTCAGCATAGAGCCCCACATGGCAGCAGGGGGCTGGATGGCCGGCTTAGAGCCCATT +CTCTCCTGGACACTGTTCTGGAATGGCCCCTGGCCCCTCCGTTCCAAAGATGGGACCCTC +CGGTCCCCGGGGACCCGGGATGATGTCCCCCAAACCACCTCTCTTCCCAGACTTCATCAA +TGGCAATGAATGGTGCATCCTGGCTGGGGCCGCCGCCCCTCACTGTGAGACTTCAGCCCA +GACGAGCGCCCAGCCCCTCCTGGGGTCTCAGATAGTCAGAAAGAAAGGCTCACGGCTGCC +ATGCACTTCCACAGGACTTTGTGGTTCCTCCTGCACCCCCGGTGATGCTTTCATCCCTCT +CCAGCACCATTCCAGAGGCCAGAAAACCATGATGTTTGGGGTCAAACAGCTGGGAGAGGC +AGTTGGAACTCAGGCCAGGCCTGTCTGACTGGACAGTGCAGAGACTGGGACCATGCTCCC +CTACAAGTGCTGTCAGTGCACCCTCGATGCCCCCCAGAGCTTGCCATGCCCCGACAGTCC +CTTCATGGCCAAAGTGTTCCCTGGCACAGGATACCCTGGGGTGCCATGTCTGCAAGAGCT +CACACCCTTCTCTCCCCAGCTGCTGCTAACTGCTGCCAGAGGTGGGAGTGTGGAGGGCTC +GGGGCCTCTTCACCCTCCCTGCACCAACTCAGCTCCAGAACATTCCCAGCAGTGAGAAGA +GGGTGGCCACTGTGCTGCAGCCCCAAGGGATGGGCAAGGCCATGCCAGCTGGAGTCATCC +ATGTCCCCTTGGACATGGTGGCCTCATTCAGAGGGCAGCATGGAGGAAGGTGCAATGAGG +GTGGCTACCAGAGACAGTGGAGATGAGGCAGTCAGGAAGCCCAGGGACAGGACTGGAGGG +GACACACGGCCAGCCTGGGACAGGGCATGAGGCCTTTGTTCTGCGTGCTGTGGCATACGG +TGTTGGCTCTGGCCCTGGCCCATCTTTTGTTAGTGGTAGGAGGGTGCTCCTTTAATCAGC +CAACTCTTTGTTTCAAATTGCCCCTGTCCTTGAAAACACTGCTTCAGGGAAGCGTGAGAC +AAAGCCAGTTCAGTGGGGTGCCTACCTGGCCAGCAGCCCTCATCCGCAGGGGCTGCTCCT +GGGGCACGTGCCAGAATCAGACAGTCCCACCGTCATGCAGGTGCTGGGGCCTTCAGTCAG +CAGGACCAGGCAGCCCATGTCCTGGGGACCCTTGGGGAGGTGTGGGCATTTGCTGAGTTG +GGCTGTCATTTGCTAGACAGGCGCTTGGCGAAGGGGCCCTTTTGTGCATGGATGCCCTGG +GCAGGTGTCAGTCCTGAGTCCTGGAGAGGAAGGCAAGCTTTGGAGTTGATGCTGGGTCCT +AGGAGGGAGCGTGTGGTTTCGCCAGCTTCTCAAAAGTCACCTGAGACCCTGCCAAGGGAC +CTCCAGACATTCGGTGCAGGCTCCTGGAGGCTGGACAGAGGGCACCAAAGGGCGACGGGC +CCCGAGGACAGAGGTGTGCAGCCAGGTGGGGCCATGGCCAGGCTGGGACCTGCTGGAGCT +CTTGTGTGTGAGCCTGGAGGAGCTGCCAGGCTGGGGGAGATCGGGGCTGGGGCTGGGGGC +GGGGCTGGGCGTCAGCAGGGTCTGCAGCTCCCATCTTCCCAGCTTATAGACCTGAATATC +CAGGTTGAGAATAAACCATTCTCCCAGGGCACCATAGCCTCCAGGAAGCACCTCCTCCTT +CCACCATCCGCCTTTCAGGAGGGGCCTCCCTGGGGGGCTGTCTCTGGGACTGCAGAGGCT +GCTTGGTGTATCTGGGGCTCTGACATGAGTGAGATTGTTCCGGGGCCTGTGCTGCCCAGC +CTCTTCCCCAACAGCTTCCTCTGGAGGCAAGGGGCTTGGCAGGAGCTGCAGGGAGCCAAT +TATCCCTGAGGCAAGCTGGCTGCAGATAATTGGGAAGACAGGCTTTTCACCAGCAGCCAA +GCTCTGGGGCGCACTCGCCACACCAGGACTTGGGACTTGAGGATCCAGAGGTCTGCCAAA +CCCCACGCCTGATTGGGAGCCCCATGTGCTGCCATCTACCAGGGGAGGTGGCATTTTCTT +GGCAACTGAGGGGGGAAGCCAGGTGCTGAGGTGTGGCCCACGGGACGCCTCGCCTGGGGA +CCTCCCAGCCCTTCTCCTCTGCCTCCTAGGGGCCTCCTCTCCTTACACATGAAAACCTCC +AATGTCTAGCCCAGGGCTCCAGATTAGGGGTCAGTGCCGCCCCCAGCCCAACTTCCTCTG +GGAAGTGTGGGGTTCTCCCCGGGGCACTCAGGCCTCTTTCTTACAGGGCTTCCCAAGGGG +AGGCATCCAGAGGAAGGCAAAGGCTGTGATCAGGTAGAACCCGCTGGACGCCCCCACCAT +CTCCCTCCATCAACATCAAAAGACAAGCTGACCACCCCCAATGCAAGGTGGACATTAGGG +TTAAACCCTGAGTGACCAGCACTCAGCTCCCAGCCTGGCCGGGATGCAGTGGGGTGGGGT +GTGGTGGGCTCATGGGGCTGGTGAGAGCCCGGTGGGGTGGGTTTCAGGTGCTGCTTGTCC +AGGGCTCAGCTCTGTTCCTTGGGGCTCCCTCAGCCCAAGGGCCCACCTCCCCTCTGGGCA +GCCCCGATCGGCGTGTTCTGCCTCGGCCAGCTGTCAGCCACCTGCAGCTCCTCCCTCATT +GCACCACCCTGGAGCATCACGGTGCCCAGGGCAGGCCGTGGAGACCTGGCTAGCCCATGG +TCTGCTCAGTCCTCACCAGCTGTTGTGACAGAAACAAGTCTGTGCTGATGAGCTGAGCCG +ACCAGGCTGCAGCCCTCACTTCAGGAGAGGGTCAGGCCCAGCCAGGCCTGGTGAGCAGGT +GTGGGGAAGGATGGGTGCTCATGTGCGGGAAGTTTCTCAAGAGTGGTGGAAGGAGGATAG +ACAACGAGGGCCCAGTGGATAGGGCGGGGAGGCAGGAAGAGGGGACAGAGAGGGGACCAG +TGAAGGAGAGTGGAGGGTACTGCCCCCCTGGGGGAAGGATGGGGTGGGGCTGGTGTTAGC +TGGGTTGGGTGTGTGGGTCTGGCAGGGAGCCGGGCATGGGAGTGGGTCCCGAGGAATGCT +AGGTTGGAGCCAGGACCATGGGCCTCACCAGTCATAGAAGCCTCACCTCAGGGCAGAGGG +GGCTGACAGACTGGGCTGACGGGAAGGGGGGCTTCCTGTCACCAGAGAACATAAAGGGAC +CACTAGTGGCCTGGGCCATGAGGCTCTGTGGCCCTCAAGGTGCTGAGCTCAAGGTGGGCC +CAGCCCATGGCTGCCTTTGTGGGGACCCCACAAGGAGATGCTGGCCTCTGAGGATGCTCA +GACCTGGTTCAGCATGGCCAGGCCCTGGCCTAGCCCAAGGTCAGCACTGGCCACTACCCT +GGGGGCTGAGTCTCCGTCTCGCCCTGTGACATGGTGCCCACACTATGAGCCAGTGTCAAC +CCTTTGCCCATACACCGGGAGCCTTGTGTGTCCATACTCATGGCTGAGGGCCTGTTTGGG +GGTCTCTGTGTTGGGGGAGGTGGGTCCAGGGTGCGTGGCCCCTGGGATTGTTCATCATGA +GTGCCACCTGGAAGGTGCTGGAGCCTCACCCGTCCTCCAGAGTGTTTTGGGGAGATCCTG +TGCTGGCATCTTGGCTGGGCCAGGATGGTGTCTAGGAGGGACCCTTGGGAGGGAAGCAGG +GGCATCAGGGGGCCTGGTGCTGGGGTGGGTGGCAGGAGAGGCCTGGAGGGCGGCGGCCCA +AGCCAGTGGCTGGGCAGGTGTCAGGAGGGAGAGGGGTGAGGCCTGGCCTCCCTGCCACAG +CTTAGCCAGGGCCTGGGGTGTCTTGGGGTGGGACAAGGCCCAGGGCAGAGGCAGGGCTGC +TTGCATGCAGGGTGGCCACATCCCGGCCTGGTGAGTGCCGGCCACTCAGTTCTGTGGCCC +TCTGCTGCCCTCTGCCGGCCGCCTCTGTCCCTGCAGGCCCAGCCATCTACCTGCAGGTCT +GGACAGACCCTCAGAGCCTAGGCTGGGAGGTCTCAGGAGTCAAGACAGGGTTGAATCCCA +TTTGAGGGCATCTGAAGGAGTGAGAGCCTATGTCAATCCCCTGCCCAGCCTTGCCTCGGC +CAGTGGCTCCCAGGGGACTCTGGTGGGCAGTGGCTAAAGATGACAGAGGGGCTCAGAGAG +GCTGAGCTGCTCTGTAAGGTTGCACAGTGAGCAGGGGCCGAGTGGCCTTGGGAGAGGCAA +CAGCAGGAGGGTCCTCGTCATGGCCACAGCCCAGGGACAGGAAACCCCTGCTGGGTTCCC +CCACCAAGTAGCTCCCATTCCCAGCCCTTAGGGTGTCTCAGGAGAGGTTCCGGGGGAGGT +CCCACCTGCCTCCTGCCTTCTCCCCCAACCACACCCTAAATGGGTCCCAGCTCCCCAGCC +CCTCCCACAAGGGCTCACCCGCCAGAGCTCGGGGAGAGGCCCTGCAGGGCGGCTGTTCCT +CCGTGTGCTGCAGTCCTCTGGACACTGAAAGCCAATCCCATCCATCTTAATGGCATTTAA +TATGCACAGTGAAAGTCAGCAGCCACAGCCACGGGCGTGTGCTCAGGGCAAGGTTTACAC +GCGGATGCCACCAGGCTTAGCCCATGAGTGGCCTCCTGGGTCCCAGGAGACCTGTGGGGT +GGGGGTGCAGCTGGGTGGGCCATGGTAGCAGAGACCTGGGGAGGGGATGTGGGAGGTAGG +AACTGGGCTAGGCAGGGACTCAGGGGAGGCTGAGGCCTGGAACAGGGAGACTGAGACCTC +AAGGAGGCTGGGACATTGGGAGTCAAGACTTGGGGAAGCTGGAACATGGGCAGGCCGAGA +CGCAGGGGTTTAAGAATGAGGGTCTGGTGCAGACCTGAGTTGGAGCCCAGCCGCTGCTGG +CCCCACGTAGCTGGAGAGTGTGCTGACCCCATGGTGGCTAGAGAATGTGCTGTTGGTAGG +GAAGGCAAGTGCTGGGAAGGGAAAAGTGTGGTCCATTTAAATGATACGGAAGCAGGGGAG +GGAAGTGCTGGGTAGAGGAGGGCATGGTCCCAGACTAGGGCTCCAACCCCACGGACCTAG +GTGAGAACAGGCACTTCCTACCCAAATGTTGCATTTCTCACGACCACCCTAGCCTGCCAC +GCCCCCATCCTGTGCTTATAAAATCCCCCGAGACCTTAGCAGGCAGACACAGGCAGCTGG +ACGTGGAGAGGAACACATCAGCGGAGACACACACGGGCGGCTGGATGTCGAGATGAACGC +ACTGATAGGCACCGGCATGCTGGCAGGCCACTGACTGGCAGAACCACACGGAGTTTGGCT +GGGGCAGTTGGAGGACAGCCCGGGTGCACAGCGGCCCCACTCCAGGGGAAAACCTTCCCA +CTCCACCCCCTTCTAGCTTCCCCTATCTGCTGGGAGCCACCTCCCCTCAATAAAACCTTG +CACTCATTCTCCAAGTCCACGTGTGATCTGACTCTTCTGGTACACAAAGGCAAGAACCCC +AGGATCCAGAAAGCCCTCTGACTAACACAAGCCACCCTACAGACGACAAACTAAAAAAGC +CCCCGTGACACACGCCCACTGGGGCTTCAGCTGTAAACCTTCACCCCTAGACACTGCTGT +GGGGTCGGAGCTCCACAGCCTGCCCGTCTGTAAGCTCCCCTAGAGGTTTGGGCAGCAGAG +CACTGAAGAAGCGAGCCACTCCCCTTCTCACACACCCTGCTAGGGCAACAAGGGAACTTT +TTCCGTTTCACTGGCCCAAGGTGGCTGGACAGTGTGCTGGCCCCACGGTAGCTGGAGAGT +GTGTCGCGCTCCCTGGCTTTCCTCCTGATGTATAAAATGAGTTAGCGGGTCCTGGGGGCT +GTTCCCACAGTGACGTGGTGGAGGCTGACCCTGACACCCACCCCTGCTGAATGCTCAGTG +AATGGTGGTTGCTGTCAGGAAGCATCATTGCTGTCCTCCATCAGTGTCTGTGGGGCTAGG +TGACTCTTGGAGGATGCTTCCAGCAACGTTGCACTGCCAAGACAGAGGCCGTCCACCTGC +CTCTGGGCTTCCTGGTGTGAACAGATGGATGGATGCGTGGGCCTGCTGCGTGCAGGGAGC +TGAGGCTGGGTCTTCACCTATGGGCAGGAGGCTACACAGGGCTCTGGGAGAGACAGCACT +GTGGCTGGGACAGAGCCCATGTGATGAGATGCCCTCGGTGCCCCTGGCCTCCCTCCTCGC +TCCTGCCCCTCTGGAACATCACCCAGAGCCCCCCTTGCTCTTTTGGGGGATGTCTGGGTG +TCTGCGGTTGCTGTCACTCTTGATCCCACCCACCCACCCCTGCAGGTTGCTGGCAGGACA +AGTGCAGGAGTCATCTCATGGCTTTCCCTGGGATCTCTGTACCCGCTGAGAGCCGGGAGC +TGCACACAGCCTGTCCACCATCCTGGGAACTCACCCTGTTGCTGCCGCTTGTCCTGGGTC +ATCCTGGGGCTCACTCCTTGCCATTCAGACCCCTGTGAGGAGAGCACTGGCTGTGAGTTG +CTCTGTGGCTAGTGATGGCAAGGCAGGGCCTGGAGTAGATCCATCACGAATAGTTCTTTT +TTTTTTTTTTTTTTTTGAGATGGAGTCTCGCTCTGTCACCAGGCTGGAGTGTGCGATCTC +GGCTCACTGCAACCTCCGACTCCCTGGTTCAAGCAATTCTCCTGCCTCAGCCTCCTGAGT +AGCTGGGATTAAAGGCACATGTCCCTATGCCCAGCTAATTTTTGTATTTTTAGTAGAGAT +GGGGTTTCACCATGTTGGCCAGGACGGTTTCGATCTCCTGACCTCGTGATCCACCTGCCT +CAGCCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCACGGTGCCCAGCTGAGAATTCTTA +ATGAAATGGGAACATGCCTCCCACGAAATGCACAGGGGCCTGCAGTGATAGAGTGGCCTC +AGGTATGTGCACAGGTGTGAACACGCCTTCCACAGAGGGCAGTGGACGTGCACATGCCTC +ACACACACTGTGGCGCAGGGCAAGTGTCCCACATGTGTTCATGCTTGCGCACATGTCAGT +GGGCTGGATGTGTGTGTGCACACCTGTGTCATGCCTGCAGAGAAGACAGTGGAGAGAAAT +ATGCCCAAATCTCCCCTCTGGGTCCCTGCCCTGGGTGCTGGGATGTCAGAACAGGTCGAT +GTCCTTTTATCCTTCCCCATATCCACCACGCTGTCTACCAAAGCACGCTTTAGTTTTATA +ATCAGAAATGAAATGCACCGTTTCAAGAAAGCCATGGGCCAGCCCCTCTGTGCCCTGGTG +TTGGCGGGGGCAGAGGGGTGGGGTGTGTAGGGGGAGATTAGCCTCATGCAGCCGTGGGGC +CAGGGAGGCCAAGGTCAGCTTGCTCAGCACCTGGAGGTACAGCGAGTACCAACAACACAG +CCTAGCTTGACCGCGGAAGGAAAATCAATGCTGGGCGCAAGGACTGACGGCAGACTCACT +GGGAACACTGTCCACCGACGTCAGAGCTGCTCACAGGGCACCGGGAGCTTGCCAGCGAGC +TTGGCTGCCTGTGTTTGGAACAACCCTGGTGGGCGAGCACGGGCCCCTCACCTGACTCCA +CTCAGTGCCCTTCCCAGAGGGGAGACAAAGGCTGTGAACAGCTCTGGAGGCCCTCCCTGC +CAGTGCCCCCCAGGCCTGGCTAAAGAAATCTCAAATACCCCGAAGGGCCTGGCTCCAGCG +GCCCCTGCTATGGGTCAGCCTCAGGGGGATGCTCATCTTCAATTGACCCTGACTGTTCCA +CATTACAAGATAAACTGTGGGGTTAACTCAAGGCGGTATTACTTTGTAAAGACTGCTCTT +TATACAAGAGCGGTGGATGCAAAAACAAAAACATTGCCATGTAAGACGGGAGTCCTCCGA +GTCCTGTCTTCCTCCATTCCGTGTCCTTGCACCCCATCCATGTGTGGCCTGCATGGACAG +ACAGCTTCACACACTCAGACTCTTCCCTCCCCACGCATCAGAGATGTGTGTCCCTGTTAC +TGACATGCAGGGAGGGGCTCATCATCTTGGGCACACCCCATGGTGCTCTGAGCCCATCAT +CACCTATGGTGGACGTTGAAGGAGTTCCCAGGTTTTCCTTTTCAGGAGCCATGGCTCGGG +CCAGTGCCTAGGAGGTCAGAGGTTGGGGGCCCAGGGAGAGGTCACTGCCCAACCGTGCCA +CCTTGGACTCAGAGTGCCGGCCCAGGGAACCAGGCAGGCCCGGCTCTGACCGTGCCGCTG +GGCGACCGCTGGCTGAGACCTGGGGCTGAAACACATCCCACATCAGCCAAGGAAACAGGC +TCTGCATGAGGCCTGAAGCCCCTGAAGGGCTCTGGGACAGGAGGAGAGCCCAAGCCTTCA +GCTGTGATGTGCACAGCACCGTCTGTCCTCCACCCAAGGGAACTCACCTTGGTGACCCTG +CTGGAAGGCACAGGTTGCTGACATTCTGACCCCTGACATTCGGACCTCCAGCAGAGCCTG +CAGTTTGAGACCTCTGGTTCAGGAGCTGGGACTGATGGGGCCCTTGGGGATTCCCGTGCT +GCTGGAGGAAGATGGGGGCATGGGGGTGTCCACAGCAAAGAAGTGAGAAGTAGAGGCCGC +CAGCCCCCCGCCCCGACCCTTCCCTGGCTAGAGTGGGGCAGGCTTGGGGGTGAGTCTGGG +GTGCTGATTTCACCCTCCTGGGCCAAAGTCTCCCCGTGACTTTATGGACTGTCTCCCTGT +GCTCCCTGAGACCCTGGTTCTCCCTCACCAGCCTGGGTCCTTCTTGTCTCAGCTCAGCTG +TCACCTCCTCCGGGAAGCCCCCAAGCCCTTGAGTGGGTGCCCAGCAGGCATGGTCAGTGC +TGAGGCTACAAGCCCTGCCGGCACCCGGCAGGGAACACAAGGCCAGGTCAGGCTCTGGGG +ACAGTAGACAAGCTTATCCCTGCACACTGGGGACTGCCCACCTGGGGGAGAGAGCTAGGT +CAGGCTGGGGAGACGTCAGGGGATGGCCGGGCACCCCTCCCTGAGGAGAGCTCCACCTGA +GCAAGAGTGTCACCAGGCCCACTCCAGACACATGCACTGCCTCAGACGGGCTGGAGAGAG +GGTGTGGCCGGGCCATGGGATGAACCATTCTGGGCGGTTGTCCTGGCATCACCAGACATG +TGGCTCTATTAAGCATCTCGACTCCACAGGAGCCCATGTGGCATGTCCTCTGACCTCCTG +TTCACTTCTGAAAGGGGCGGCCCTCTGAGATTCTGCCTGGAACCTGTCAGCCCTCCTGAC +AAGGTCTCTGGTTCCCGGCCATCACAGGCACCCAGCCACCCTACAGAGAGGCTCTGGGAG +GCTCAGCCTTGCCTGGGGCCCCAAGACTGATGCCAGCTGGGGCATCTCACAGGGAGCACC +CCTACCTGTAAACCGCCCAGCTCAAGCCCTGCCTCGACCTCTCCAAGTCAGCAGCCGACC +TTCCCCAGGGCCCCTCCCAGCCTGCAGGGGGTCTGAGGCTCTGAGGCTTTTCTGCAGCCT +CTCAGTCCCCCACCCTGCTACCTGCTCAGGCAGAGAGATCATGGCTCTGGGTTATTGTGT +GCTTAGTGGGTGCTGGAGCTCGTGCTAAACAACTTGGGTGCCCCCTGAATCTTGACAATA +ATCTGAGGGGGCATTATTATTACTATTCTCACTTCCCCGAGGAAGAAGTCCCAAACCCCG +TCTGGCACAAAGCCAGCGCCCCGAGTCTCCCTCTGCAGTCCATGTGCTCCCACGTGGGGC +ATGCCTGGGACATCACAACATCTGGGGTCAATGCGGCTGCTGTTGTCACTGTGGCATTCA +GCTTTCCCAAGGCTCACATCTGCTCCATCCAGCAAAGGGGTCTTGCACCCCAAGCCTGCC +TCCCTCTTCACACACACAATCCCACTTCGTCAACCACAAAGATGCATGGGTTTACCCAGC +CTCAACCCCATCGGCCCCTCCCCTTCCAGTTGCCGGAGGGCTCGGGCCCCTTCAGCTGGG +CTCCCACATTCCTGACCCCTGGCCTGCAGGCCCTCAAGACTATGCCAGCCCGGGCTCACC +TGGTGCCAGACACCTGCATCTCCCTGGGGTGGGGGTTGGGGGCAGTAGGCTCCGTGGACC +TAGCACACGTATGATTCTTGGAAAATGTAAATGTCTCTGAGGACAGCGATCCCAGCTTCA +AATATTGCTGTGGGCTCTCGGGCAAGTCCCTCCTCCTCTCTGAGCTTCAGTTTCCCCCGC +ATAAAATGGAGCAGTGATCGTAGCTCCTGCATGGGAAGGTTGTGAACACCATGCTTGTCA +GCAGAATATACCCTAATTTTGCAAGGAAACAATATGGCAGGGGTGGGGGGGATCTAAATA +CCTTTCCTTATACACGCATTGAAAATGATTATATAAAGTGGTGAAGTGTGATTGCATTCA +CATAAACAGTGGCATGTTCTGGGATCTAGCAGACAATCTTAACTTCAGGAAATGCAATCA +TATTTTACTAGCTTTGAGAAAAGTGAATAAAATGTCATAGCTAAATAGAACTCAAGTAGG +ATCTGTGAACGGCAGCATGGAGTGGATAAGGGGATAACGTACCGATGAGGGAATTTATAT +ACCCAGCAGCCCAGGTTTGGTGCAGGCTGATAACGGAAAGACAAACTCCAAATTAAATGA +ATGTGAATGAATTCCTTTGCCTGCGGACAGACCAAAAATATCGGTAAAAGTGGCTACAGG +GCAAGCAGAAAAATGCAACAAATTAGAATGAACGCAGGGAACCATGTTTCTGGCAGAGAA +ACAAATATTTCAGAGTACAGGTAAGAGGCACATGGCAAAAAAAACAACAACCTGACTTTG +CCAGAGCTCCAATGTCATGCTCACGGCTGAGGAGAAACAATTATCTCGCTGGGAATGAGT +GCAAGTGAGAGAACGGAAGGAGTCCTGAGCACGGTTGACTGCATGTGCCCAGGGGCCCGG +ACAGGCAGCACAGCGGCCTCTCTACCCTCGAAGGTCTGTTTCTTGTCTGCTTCCTCCTTG +AGAGTGGCAGGCCCTGGTAACTGGCCCAACCTCGGGGGCGCCCAGCAGCTGGGTCCCCAC +CACATGCCCAAGTCTGGCCCTCTCAGATCTGCCTGTCTCCTGAGCTGGAGAACGGAGCCC +ACCCCTCCATCCCCGACACTCATCTCCCATCCAGTTGATCTCCCTGAGGACCCCCCCACC +GACAACTCCCACCTGGATCCCCTGGCCCTGGTGGGGCATCATTCTTTCAAAATAATCCAT +GACTGTGTTCAGTGGCTCACGCCTGAATCCCAGCACTTTGAAAGGCCAAGGTGGGAGGAT +TGCTTGAGCCCAGGAGTTTCAGATCAGCCTGGGGTAACACAGCCAGACCCTGTCTCCACA +AAAAACTATTTTTTAAAAATTAGCTGGGTGCAGTCCAGCCTATAGTCCCAGCTACTTGGG +AGGCTGAGGCAGGGGGATCACAAGCCCAGGAGTTCAAGGCTACAGTGAGCTGTGATCACA +TCACTGCACTCCAGCCTGGGCAACAGAGACCTTGTCTCTTAAAAAAAAAAAAAAAAAAAA +AGCCATCAGCTGGCTCAGGGTAGCCACCTGCCAAGGGTCCCCAGCTGAGCGTAAGACCTG +CTTCCTAGGATTTACAGAGCTCCTGTGAAACTCGGTTGTTTCATTGATTGTCACATGGAG +CCAGTGACTTTCAGGCCAGCCTTGTTCCCACAGGTGGGCTCTCCCGTGGGCCTGGGCACC +CAGCGCTAGGCCTGCCCTGCTATGGTCTGGATGTGAGCAGAGATCTCTGTGATCCCTGTA +CCCCTCGCTTCTCCCCCTGCCCTCTCTGCAGCCCTGCCCCTGGCAGCCCCTGCCCACCTT +CGCCTCTGTGCAGGGCCACAGGCTTCCTTGAAGCAAATGCTAGAACTTTGAGCTCTTTGC +TTTTTAAAAAAAGATAACAGCTTTATTGAGATATAATTCATTATACTATTCATTTATTAC +CTTTTGCATTATACCATGCAATTTTTTCCATTTCAATGGCACAATCGAATGGTTTTTTAG +TCTATACACAAGGTTGAGCAGACATCAACACTCTGTAGCCCCAGAGTATTTCCAGACCCC +AAAGAAACTCCGTGGGCCTTGGCAGCCCCTCCCACTCCCCCTGTCCTCCACCTCCAGGCC +CTTACCAGTGCCACAGTCACCAGGCTACATTATGGGCTTGTCTTCGTGTGACTTCTGTCT +CTATGGGTTTGCCTATTCTGAACTCTTCATATAAACTGAACCATAAAACTTGTGGCCGTC +TGTGTCTGGCATCTTTCTTTCTTTTCTTAAAAAAATTTCACTTTTATTTTAGAGACAGGG +TCTCCTTCCATTGCCCAGGCTGGAGTGCAGTGGCACGATCATAGCTCACTGCAGCCTCAA +CCTCCTGGGCTCAAGCAATCCCCTCACCTCAGCCTCCTGAGTAGCTGGGATAACAGACAT +AGACCACCATGCCTGGTTAATTTTGTATTTTTTTATAGAGATGAGGTCTTATTATGTTGC +CCAGACTGGTCTAGAACTCTTGGGCTCAAGTGATCCTCCTGCCTTGGCCTCCCAAATTGC +TGGGATTACAGGTGTGAGCCACCACACCTGGCTATTTTTATTTTTTAATTGACACATAAT +AATTGTACATATTCGTGGGGTACGTAGTGATGTTTCTATACATGCAATGTATGGTGATCA +GATGAGGGTAATTAGCACACCTATCATCTCAAACATTTATCATTTCTTTGCATTACGAAC +ACTCAATATTCTCTTTCTAGCTAGCTGAAAATACATAATTTTTGTTTTGGTTTGGATTTT +GGTTTTTGTTTTTTGTTTTTGAGACAGGGCCTCACTCTGTTGCCCAGGCTGGAGTGCAGT +AGCACCATCACGACTCACTGTAGCCTCTACCTCCCAAGGCTCAGGTGATCCTCCTGCCTC +AGCAACCCGAGTAGCTGGGACTACAGGCATGTACCACCACACCTGGCTAACTTTTGTATG +TTCTGTAGTGATGGGATTTGACCATGTTGCCTAGGCTGGTTGTGAACTGCTGGGCTCAAG +CAATTCTACCCCTCTAGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCGTGACACCT +GACCCATAATATGTTGTTGTTAACTAGAGTCATCCTACATTGGTGTAGAACACTAGAACT +TATTCCTGCCATGTAGCTATAATTTTGTATCCTTCAACACATCTCTCCCTCCCTCTCCCT +CCCTTCTTTCCACCCTTCCCAGCCTCTACTATCCTCTGTTCTACTTTTGACTTCTATGAG +GCCAGCTTTTTTTAGCTTCCGCATATGAGCAAGAACATGCGGTGTTTAACTTTCTGTTCT +GTCTCATTTCACTTAACATAAGTTTCTCCAGTTCCATGCATGTGACTGGCTTCTTCGATT +TTGCATACTTTCAAAGTTCATCCATGTCATGGCATGCATTAGTACTTCATTCCTTTTTAT +AGCTTAATAGTATTCCACTGTATGGATGGATCACGTTTTGTTTGCACCGTTTTGCTGTTG +TGAATAGTGCTACTGTGCACCAGGGTTGTATTCCTCTGTGTGCCACTGGGTTGAGGGTTG +GGGGATGGTAGGTGCTCCCATAAAGGGGGAAATTGACTGATACTGGCTGAAATGGACCAG +GCTCTTCATTAAGCTGGCCTCTGAATGTTGCAAAAGCATTCTGTTGGTTTCCAGGATCAC +TGCTTAAGACGGTTCCTTTCAGTACCACTGTTGTCCAGGGGAGGGATGGGTTCCCGGAGC +TTCCTGATCTGCCACCTTCTCTGATGTCACCCTCAGACCCTCTGCTTTTGAGCCCCAGGC +TACAGAGTAAAAGTCTGATGGTAGGATGTGAGGTGAGGGACCTTCCTTGACCCGGCACCC +ACATGAGGCAGGACTGAGAACACCCTGAGGACTGCCCGGGTGGCTGGGGTGCTGAAGCCA +GGCTTGGCCCCTCCACAGGCCATGCATGACAAGGACCACCCTGTCTCTGAGCTTTGCCTT +GGCCTTGCCGTGACATTTTCAGGTGTGTTTCAGCTTAGATTCTGCTCTGGGGTTTCCCGT +GTAAGAACAGGGATGACGTCTAAGCCCTTGTCTGGCATTTGTCAGGAATATTGGGAGAGG +CAATGTGGCAGTGTGAGAAACAAGCTCACCCATCCAAACCTGAAGAATGGACTCAGAGGC +ACGAGCAGTGAAAGTGAGACTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTCTGTCG +CCCAGGCTGGAGTGCAGAGGCGCCATCTCGGCTCACTGCAAGCTCCGCCTCCCGGGTTCA +CGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGTGCCCACCACCACACC +TGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGCCAGGATGGTCT +GGATCTCCTGACCTGGTGATCCGCCCGCCTCCGCCTCCCAAAGTGCTGGGATTACAGGCG +TGAGCCACCATGCCCGGGAAAGTGAGACTTTTAATAGTGGTCTTGCGAGATTGGTGTCTG +GTGGATAGGCGCACTCAGGGCAGTCACAGCAAGTAACTTATGCCCTAGCACACAAGTCCC +TCCTCCTCATTGGTCGAGTACTATGGGGTTACAATCTTCCAGGACATTGCCTAAGTTTCA +TTATCCACCTTATAAGGTTATACCCCGTCGCCTTCCCCGCTTAAGTTTCAATTTTCCAAT +AACGAAACTTTCTTCCCTTTTATGGGCTGACCCTTTTTCTACATTCTGTCTGCTTATTGT +GATCTTCTACGTGCATGAGCCGTGCGGTTTGTTACATTCTCAGGCTGGCTGCCAGTACTT +AGATTTATCATGCCTTGAAAATGGGCCATTTAAAATGTTTTCTCATGGCGGTAAATCGGT +GAGCTGACTGTCATACACTCAGTCATTTTGCCCAAAGTGTGTGTGATGTGGAGGGCTGAC +AGCCTTCTCCATTACCAGTGCAGTGAGAGGTGGCACAGCTGGTGGGACCCCCTCTCTCTC +ATGGATATGAGTGCGTTAGAGGGGAGGTCTTGGGCCAGGAGCAGATCAGATCAACAGGAT +TATCCAGAAAATGGCAGTCATGGCGAGTATTGGCTCCAGATGGGGAGGGTTAGGGTCAGG +GTCGGCTCTTCTCACCTTCCTGAGCGAGTCACCAGCGTCGACAAGTTACTCCCAACATAC +AGGGCCTGGGTGGAGGCTGAGTGCTAGGAACCCAGGCTCCAATTCTGGTAACCAGGTGCA +GATGCTCAGGAGAAGAATGCAGCCCAGACAAGCGTGGTGTGCCCTCCAGAGGAGAGGACG +TGGCATCAAAGGGCTGTCTTCATGGAGAGCAGGAACAGAGACTTTGGGAAGCCAGGGGGC +CTTGGCCTCAGCCCTGCTGGCAGAGGGTCCCCACCATGTAGCTGAAGTGCCAGGGTGCTT +GTGAGGACTGGGTGTGGTCCAGGGTGACTCAGGGCAAGGCCTGGGGGATGCATCTCAAAT +CCACAGTCACCAGCCTAGAAACTGGGGTCACACTTCCCACATGGGTTCCCACATGTGGAA +ATTTCATTCACTAAATAACGACACATCAAGGCAGCAGTATGGATGGGGCACAGCTGTCAA +AGTCACCAGCAACAAGTGCAAGAAGAGAAGAGGGAGTTGCAGCCACATAGGTGCCCAGGT +GGGCCCTGGGGGTCAGAGAGGGCCTGCCAGGGGTAGCACCCACCATGCTGACCCTGATGA +GGAGAGGAAGGAGGGTGTGGGATGCAGGGGAGGGAAAGGTGCCTGAGAGCAGCATGCAAA +CAGCCTGGGCTCAGGAAGAGCCGATGGCATGACAGGTGCTGGGGAGAAAGGCGTGATAGG +TGTGGACAGGGGCCACAGGAGGAGGGATGGTCCTCAGGAAGGGGAGACTGGTGGCTATGG +CCGGCTGCCGAGGCTAGCCTCTCTCCAGCAAAATGAGCCTGGATTCATCCACTCCTTTTG +TCTCTCCCCTTTGCTCTGTGGTGCCGGCAGGAGGAAGAGGAGCAGAAAAACTGTTGCTCC +ATTCATCAGGAAGCCTCTCTCTCTCTCGAGTGGTGGTGATGGTGTCCCCCAGGGGCACCT +GGCAATGTCTGGAGACATTTACGGCTGTCCCGACTGTGTATGTCTGTGTGTGTGTGTACT +ACAGGCCTCTGGTTCCTGGAGGCCAGGGATGCTGATACACAACCTGCAATGCACAGGGCA +GCTCCCACAAGGAAATTATTCAGCCTCAAATTTCAACATTGCAGAACAGGTCAGGGAGTG +CAAGGTACCTCCTGCTCTTTCCAGCAGGTGCAAGGTGCACAGGGAGAAGCAGGTGGGTGA +GGTGGGAGGAGGCTGGAGCCAGACCCAGCGTCTGTGGGGTGCAGGAATATCGTGAGGTGA +CGAGGGGCCCAGCGGTGAGTGATGGGGAGGGGTGCACTACGGCAGGCCGCCAGGGGCGGT +TCCACCAAGAAAGTGACATCTGGGAGGGGCATTGAGGCAAAGGGGCCGCGAGAGGGTAGG +TGCTCTAGAGGTGGCCCACGGGCGGGCGGCACTGAGGCCGGGCCTGCAGGGGAGGCGGTG +TGGCCAGCGGACTCTTGTTGGGTGGCAAGGGAAGGGAAGAACCACAGGGCTGCACAGGTT +CCGAGGGGGCCTAGTGGTCCGCATTTGCCCTTGGGTGGGCGGTGGGACTGGGAGGACAGG +AGAGGAGCGCGGGGCAAGCCCCTAAGAGGCCTGAGGGGTGGGAGGGAGAGGACGGCTTCC +CATACGCCCATTTAGACCAGAGGAGAAGCGGACCCACTTTGAGGGCTGGATTCTCAGCCG +ATCCGGCCTCAGCTGTGGGTCCACGCGGCTCAAGGACCAGGGAGGAACCCAGGTGTGCGG +CTGCGCTGATCGGCGGGCTCCGGACCCACCTGGCTGGCCTGGGGACCTGGGAAACCCGGT +GTGGTGGGGAGACCGCAAGCCTGGTCAGGGGTCCCGCTCCCAATGGGGCGGGTCTCCTGT +GAGGAAGGCGGAGCTCAGGGCGGGTAGCCCCGGGGACCGGGTCTCTCGCGGGGGTGTGGC +CACGGCCCGGTGGGCGGGGCCTGCTGTGGACGCACAGAGGTCTGCAGCGCGCGGGGCGGA +ACCTCCCGAGGAGAGGGCGCGGCTGGGAGCGCGGGGCGGGGCCTTCCGAGGAGAGGGCGA +GACTGGGAGTGTGAGGCAGGGCTTCCCACGGAATGGGGGCGCGGAGCCGGTCCTCCCGAG +GAGCGGGCGCGGCTGGGAAGGCGGGTCGGGTCCTCTCGAGGAGCGGGCGCGGCTGGGAGC +GCGGGGCGCGGCCTCAAGAGGAGAAGGGGCGGTGGGTCCTGGGGCGTGCCAGAACCCTTG +AGGAAGAAGCGCTGGTGTTCTGGAGTATGACCACCGGAGCCGGCCGGGCCTCGGGTTGGG +GCGGGGCCTCAGGGGTGGGGTTGGGACTTCGGGATGGGTTCTCCAGAGGTGAGAGCTGGG +GTCGTAGGGTGGACCGGCGTCTGCTGAGGAAAGGGAGCTGCTTGGAGCGCGGGGCGAGGG +ACTCACGTGGAGAAGGCGTGGGCCCTACGGCGGGCGGGGCGGGGCCTCAAGAGGAGCGGG +TGCAGATGCTAGGGAGGGGCGGGGCCCCTGGGATGGGGCGGTGTCTCCCGAGGAGTGGGC +GCGGCTCGGAGCGTGGGACTTCTAAGGAGAGAGCGCGGCTAGGGGGCATGCGGGGCGGGG +AGGGGCGGGGCGGGACCAATAGCGCATACTTAAGCGGCCCGGGCGGGTACCGGCGTCCCG +CCATGGCTCTGCGGCGCGTCTTGCCCGCGCTGCGCCCCTACATTCCCCGCTTCGCGCCGC +TGTCCACGGCGCCGGCCGCCAGCGAGCAGCCCGCCGCGGGCCCAGGGGCCGTGCCAGGAC +GTGGGTCGGCCAGGGCAGTGCGGCCGCTGGTGCCCGCCGTGGACTTCGGCAACGCGTAGG +AGGCGTACTGCAGCTGGCGAACCTGGGAGCTGGCGCGCAGCCTGCTGGTGCTGCGCTTCT +GCGCCTGGCCCGCGCTGCTGGCGCGCCACGAGCAGGTGCGCGGGGTGCAGCCGGGGCGCG +GGGCTTCTGCCCGTCCCGGGAGCCTTTACGGAGTTTCCTGGCGTGAAACGGGTGCCTCCC +AAGCATCTCCTGGAGCAAGGGAAGCCAGGAAGGGAGGCGTTTCCTGCAGTCCCTTTCTGA +TATCGCGAATTTTCCTACTTCTCGATGTCTACCAACTAAAAACAATGGTCGTTTTAATAT +TTACGATATATACCACTTGTGGGAATACAGAGTTATCACTTGGAATTTAAAATGGAGTAT +TCTTCATGTTGACACAAGAGAAGCCTGGTGTAGGGCTCATAGCCTGGAACCCCAGAGTTC +AATCCCCGGGACCCAGCCGCTTCCCAGACACGGCCACTCTGAGAACACTGTTGGGGAAAA +ATGGGAAGCCCCTCAAACGCCCAGCTTTGAACTCACCCATGGTAACTTTAAAAAGTGTCA +GGGCTTTTGGTGGCGGTGGTGGGGGGCAACAAGCAGAACACTGTTTTAGCCGCATCAGCC +TAGGGGTACTAGAGATGACGGTTATCTCCAGGTGACCCTGGGAAGAGTTTGCAAGGTTCC +GCTTTTTGGCGCCAGGGTATCTGGTGGTGTTGATTTCTCAGGTGAGACTGTACAGAGGTC +GGGGGCTGCACCCCTGGCCTCAACAGGAGGCGGGGGAAGGCGGGGGACGGTGTGAGATGA +GAAGCACCGAGCCACACTTGAGCTTAGGTGAGGCCTCGGCGGGTGCGAGGTGAGGGCAGG +GATCCACGAACTCAGAGTCAGATGCTGTTGGCCCGAATCCCATGGGGAGCGCCAAGGTCA +GCCTAGGACCCGTGGTGGCGGGAGTGCGGAGGATGGTGGAGGAAGAAGGAGCTGAACTGG +ACAGATGGAGGGCTCTGGGAGGGCACTGCTGTGCTTGTGCACACAAGGATGTCTTAAGGT +CAAGTTTTGGCCATAGAGAGCCCCGGCCAAGGAGACAGGAGGGCGAGAAGGGACATCCCA +GGTAGAGGAACTGGCAAGGTGAGGGACGGCGATGTGGAGGCTTTGGGGAGTGCCCACATC +TAAGGGGCATCCCAAAAGAGGAGCCAGTGCTGGTGGTGCTGCAATAGGAGGAGGAGGCAG +CAGGGCAGGAATGCCAGGGGAGGGAGTGGCCAGCCACAGGGCAAGGACGGGCATTCTAGG +TGGCAGCTGAGTGGGACAGAGACTAGCCATGCAGAAGCCATGCCCACCTTGTGGCCAGGA +TGTAGTGGGGGCTAGGAAGGCACCAGGTCAGGTTTCTTTTTTCTTTTTTTTCTTTTTCTT +TTCTTTTCTTTTCCTTCCTTCCTTTTCTTTTCTTTTTTTTTTTTTTTATGGAATTCGCTC +TGTCACCAGGCTGGAGTGCAGTGGCGCGATCTCTGCTCACTGTGACCTCCACCTCCCTGG +TTCAAGCGATTCACCTGCCTCAGTCTCCCGAGTGGCTGGGATTACAGGCACGCGCCACCA +CACCCAGCTAATTTTTGTATTTTCAGTAGAGACAGGGTTTCATCATGTTGGCCAGGATGG +TCTCAATCTCCTGACCTCATGATCTGCTCACCTTGGCCTCCCAAAGTGCTGGGATTACAG +GCATGAGCCACTGTGCCCAGGGAGCTGGTGCCGCATCTTCTGCCTGGGAATCCCTTCCTG +ACACAGCCTCTGGGGGCCAGAGATGGGGGGAGCAGGGCTCCAGGGGTAGAGAAGCCAGAG +GTCACTAAGGCCCAGTGTCCTTCCGTGTTAGAAACACCCGACCCTGCACAGCCCCATGCT +GGCCCCAGCAGCCCTCCGTCAGGGGCAGCGGGTCCCAGGCCTGTTTATGTTTTTGTTTGT +TTGTTTCAAATGAGACTGGGTCTTGCTCTGTTGCCCATGCTGGAGTGCAGTGGTGTGATC +ATAGCTCACTGCAGCCTAGACCTCCTGAGCTCAAGCCATCCTCCCACCTCAGCCACTTGA +GTAGCTGGGTCTACAGGTGCATGCCGCCACACATGGCCAAGTTTTTTTTTTTTTAATTTT +TGTAAAGAGGAGGTCTCACAGTTGGGCGTGGTGGCTCATGCCTGTAATTCCAGCACTTTG +GGAGGTGAGGCTGGTGGATTGTCTGAACTCAGGAGTTCAAGACCAGCCCGGGCAACAGGG +TGAAATCCTGTCTCTAATAAAATACAAAACATTAGCCAGGCATGGCGGCAGGTGCCTGTA +ATCCCAGCCACTCGGGAGGCTGAGACAGAATTGCTTGAACCCGGGAGGCAGAGGTTGCAG +TGAGCCGAGATCACTCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTCCGTCTCAAA +AAAAAAAAAAAAAAAGAAAAAAGATGAGGTGTCACTATGTTACCCTGGTTGGCCTGGAAC +TCCTGGGCTCAAGTGATCCCCCCACCTTGGCCTCCCAAAGTGCTGGAATTACAGGCGCAC +ACTACCACACCCAGCTAATTTTCATATTTTTTGTAGAGATGGTATTTCACCATGTTGCCC +AGGCTGGTATCAAACTCGTGGCCTCAAGTGAACCTTCCACTTTGGCCTCCCAAAGTGCTG +GGATGACAGGGCATACCACTGCATCTGGCCCTTCTGAGCAGTGCTTGGAAGCCACCTCCT +GACGGGGGCTGGGGGTCAGAGTGGCCAGCTGAATGCTGGGCCATCAGGGTGAGGGGCGTT +TTCTCCTCATCCATTGCCTGTTCTCCTGGGGCAGATGTAAGGGGAAGGCCCTGCACTGGG +GTGGTGTTAAACAAGATGAGATGGTAAGATCTGCTTTCACTTTAAAGGAACATTTCTGGT +TGTTGCTTGGAAAGAAGTGTCAGGGGCAAGAATGCTGCAGGAGGGCTGGCCTGGAGGGCA +GGGTGGGACTGCAGGCTGGCTGCACGGGGGCAGGAGCCTGCGCTTGCGTCAGAGGTTCCT +GAGAGACTTGGGCTCTGGGGCAGAGGCCTGAGCAAGGGGGCCTCGCTGGGGTGTCTCTGT +CTTAGCCACACTCGGTGTTTCTGGGTCCTGTGACCCATGTGTGAGCAGCCTGTGGTGCTG +GTGGCACAGGGGAGGAAACCGAGGCAGGGAGCCTGTGGGGGCTGCTCGGCAGGGGACAGG +CAGCTCCTGTGCCCACTGCTGTCCCACACCTGCATGGAAACACAAACCCCACCACGGAGC +CCCCTCTCATCTCTACAGGGCCTCCAGAGCGCCATGTGCAAGAACTTCATGGGCGGGGTG +GGGTGGGGTCAGGAGCCTGCTGTGGGCTGGCCAGGCATGAACACCAAGCTGGAGGTGGCG +GTGCTGCAGGTGGGGCAGCCTTCCCCTTCCCCACTCCTGTGGCTTGTGAGGGGGTGGGCA +CCGTGGGGGCGCACAGGGGGCATGGTGGGCAGTGGTCGGAGGTGGTTTTGGGAACTCACC +CAGGGCACGTGGCTCACTCTGCAGCTGCTGGGGTTGGCCTGGTGGGGAGGAGGAGGTCGG +AGGGACTCTGAACAGAGAGGGGAGATGCTGGCAGGAGGGCTGGAGACCCAGCCCTGCAGG +GCGTGGGCGTGCATCAGCAGCGGGGTGGGAATAGACCTGTCTATGCAGTAGAAACAAACC +AGACATGGATCCTGTTCTTACTAACGATGTAATAAAGCCAGAAACAGCACCAGAGGCCAA +TGGGTGGGGGGAAGAATAAAATGTGAGCGAATGGGGCTGATCCAGGCCTTCTCACTTCTC +AAGGAAAGTGTCGCAAAGATGGGCATCGCATCCAGGGCTGAGATTGAGGACTGGTTCACG +GCAGAGACCCTGGGAGTGTCTGGGTGAGAGCAGCCCCCAGGCTGAGAGGACACGAGCATC +AGCCATGGTTCCAGCCAAGCCCCAGTGACGTCTCCCAGTCCTGGCTGCCCTGCCTGAGAG +GGTGGGCCTTGGGATGACAGTGCTGAAGGAACTGCTTTTCACACAGCAGGACTTGATGGC +CTCAGGAGTCCCTGAGGCTGGGGCTGGGCTGGGCAGGGGAGACACAGGGCCAGCACAGAC +CCCATAGAGGGCTCTTTATAGAATTATCTGGGTCCAGAGCAAAGGTGGCACTCTCAGCCC +TCCCTGGAGTCCACGCCGGCAGGTGAAGGATCAGAGCCCCATGCCCCATCCTTGGGGCCA +GGGGCTGGGACCCAGGTCACCAGGAGCCTCCAGTTGAAGTGGGGAGGTGACTGGGGTGGT +GCAGTAGTGAGCCATCCCCTCACACACTGCCAGCCTTGCCCACTGTGTCCTCCCTGGCCG +CCCGGAGCTGTGTCCGCCTCCTGCCTGGCTGCCTGTGCTCGGCCCCTCCACAAGCCCAAA +ACAGCCCTAGGGTACTCAGTGTTTTCAGAGCCGCCTAGATGCAGGACTGTTTGTGTTTTG +GTTTTAAATTTTTTTAATTAATTTATTTTTTATAGAGACCAGGTCTCACCATGTTGCCCA +GGCTGGTTTCAAACCCCTGGGCTCAAGTGACCTGCCCGCCGCAGCCTCCCAAAGTGCTGG +GATTACAGGTTTGAGCTACTGCACGCCACTGGTTTTAAATTTTTAAACAGAAAACAATAT +TTACATTTCCTCTGTCTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTCTGTCAC +CCAGGCTGGAGTACAGTGGCGCGATCTCTGCTCACTGCAAGCTCCGCCTCCTGGGTTCAT +GCCATTCTCCTGCCTCAGCCTTCTCGTCTTTTAAAACTGAACACTGAGGTGGGTTTTGTG +GTGGTTGGTGAACGGACAGGTTTGGGCCTGGATCCCCCACCCGACCCTAGCACCCACAGG +TGGGGTCGCCAACTCTGCTGATGTGCCTCCTCCCTCAGCACTGTGGACCTGCTGGACTGG +AGCAGCCTCATCGACAGCAGGACCAAGCTGTCCAAGCACCTGGTAGTCCCCAATGGACAG +GTAACACCTCCGCTCCTCAGTGAGGCCCAGCTCAGCAGGGCGCTGCGCTAAGAAGGGAAT +TCAGCCTGCCACATGTTTCTCTTGTTGCCTACCCTGGGAACTTAACATGACCAAGATCAC +TGCACACTATGGCCCCACAGACCCCCTATGGTCCAGGGGGAAAAGAAGACAAACCCACTA +GTGTCCACAGAGTGCTTGGTGGGACAGAGACCCACGCGGAGCCTTAAAACACCACAAGGA +GAGGGGGTCTGGGGGCCTTCCCCAAGTCATCTGTCTTGTGCATCAGGTCACCCCAAACTT +CAGAGGCCTAAAACGGCAGCAGCCACTCTGCTTCTGCCTCATGGTTCTGGGGTTGACAGG +GCTCAGCTGGGCAGTCCTCGCTTGGGGTCTCCTGGGTAGCTGTACTCACAGAGTGGTGGC +GCTGGGACCTGGGGGAGGCGCCCACTCACATGTCTGGGAGTTGGTGCTGGCTGTGGCTTG +GGCTTTTTCCCAACATGGCAGCTGGGCTCTAAGGGCCAGTGTCCCCTGAGACGTGAGAGC +CAGGCAGAAGCTGGGTGACCTTTTCCTGCCTCACCTCAGAAGTCACACGGTGTCTTTTCT +GGCAAGTTCTGTTTGTTAGAAGCATGTCACCAGGCCAGCCTGTATACTGCGGGGAGAGGA +ATGAGACTCATCTGCCTGTGGGAGGGGGGCCCAGGACCTGGCTGACTGCTTGAAACCACA +ACCTAAACTGGAATTCGAAGAAAGAACAGAAGCACCTTCAGAAAACATCCTGCAGAGGCA +GCTTTCCATGACACATCTAGCCCCAGTCCACTAAGGGCCCAGCCTCAAACTCAGAGCCCC +CGACCCAGAGCCTCGGGGCGCAGTGCGTACAGTGTGGCTGTGCACCAAGGCCACCAGCTG +TTGGAGACTTTAGCATGGTGTAGCAAGGGATAGAGTGGTACTGCTTGTGCCCAGTGAGGC +AAGGCTGCTGGGGCCAGCAAAGGGGTGAGCGGGAGGCCCTGCGGGATGGGTCCCCACGGC +CTCTGAGCACCAGCTCCTTGAAGGCCTCATCCTGGCGGTTGCCCTGGTCCCTGGCTACGT +GTCAGCACGTGCTGCACAACTGCCCTGTCTGCCTGTAGCCAGGGGTCTTCCTCCTGGGCC +CTCTTCCTGCTCTGGGTGGAGCCCTTCTGTTTGACGATTGGGTGGTCAGTGTTCACTTCC +TCAGTGTCACATGGGAGCTGCAGGGGGCCCTCCCACAATGGTGGGGCCTCTGTGAGAACT +GGGGAGGCTGTTAGAGTACACACTTCGGAGTGCAGGCCTGGACTGAGCCTGGGGCCCACT +GACATGGGCACTGAGCTCCCTCCTGGTACCCCCAGGGCCCGCGTAGGGCAGTGCACTGGC +TGCGTTGGAGGTTCCGGGGCTCTGTGCTGCTTGTCCAGGTGGCCCTCCCTCCCGGCCCTG +CATGAGAGGGTTCCCGTGGGCTGGGAACACTAAGAGAGGTTTAGCCTGGGCCTCAGGGTG +GCACTTCCTGGCCCTTGCTGTGGCCTGACCATGTGCTGTACCCCCAGACAGGACAGCTGG +AGCCCCTGCTGTCCCGGTTTGCTGAGGAGGAGGAGCTACAGATGACGAGGATGCTACAGC +GGATGGATGTCCTGCCCAAGGCGAGCTGTCCCGCTCGGGCAGAGGGCTGGGGCCTTGGGG +ACCCGGGACTCATGCAGAAGAGGGCACCTGCCTCACTTGAGCAATAGTTGCTCCCACTCT +CCCACCCCCTGCGCCTCTCAGAAAGCCACAGAGATGGGCGTGCGGCTGATGGTGGATGCC +GAGCAGACCTACTTCCAGCCGGCCATCAGTCGCCTGACGCTGGAGATGCAGCGGAAGTTC +AATGTGGAGAAGCTGCTCATCTTCAACACATACCAGTGCTACCTCAAGGCAAGCCCCCGC +TCAGCCCCTCCCTGCCTGCCTTGGGGCTTGGAGACCCCACAGGAGGGGCTCCTCTCATCT +GAGCTCCCTGGCATTTGCCTGGTTTTATACTCAGCACCCACCAGGGAGCTCACGGGATGG +AGGAGGGAGTGGGCCGTGCCCCCAAGGGGTTGTTGGGGTCTTCTGCAGCACAGCTCACCC +AGCGGGCCCCAGGGTCCAGGAAAGAGCCCCTTAGGTGGTGGCCACAGCCAGGCCTGCAGA +GGAGGCCGAGGCCAGGGTGACTGCCTCAGAGTAGCAAGAAAGGCCCGCTGCCCGCAGCCT +CTCCCCTGGTTCTCAGAACCAGCCAAGAGTGTCTAAGGAGCAAGAGGAAGACAACGATCT +TTATGAGGGTCCACTAGATGTACCCGACACCCCAAACTTGGAAGGTGTCACATGACCATT +TCCTCCAGCGTGTTTTCAGTTCAGCGCCCCTAAGAACCTCATGAAAGCTTTGGGCCCTCC +CCGGAAAGACTCACCCAGGATGTCACATGTATTCAGGGAGCCCATCCTAGCCTGTGGGCC +CCAAACTGCTCAGGGATGTTAGCAGGTCCAGAGCTCTCAGGGGTGAGCCGGGTGCCCAGA +GGACACAGGGTGGCATGGGGAGGGGGTCCAGGCAGGAGCATGCAGTATGCACCCCTCCTG +GTCGGCCTGGGAGTCTGGAGCACATGATGGCTGCTGGCGCCTGGGCTGCCCCCATAGGGC +TGTCCACAGGCCACAGAGGAAGCTGCAGATGTCCCCACTGCCATTGCTCCCTGGAGACAG +AGGTTGGAGAACCATCCTCACCCACACCTGGTCCAGGTGCCTTGAGCTGCACCTCCCCAG +GCAGCTGCTGCCCAGCCCTGGTGACTGCACCTGGGCCCAGGAAGGCCCCATCCTGCTGTC +ACTTGGGCTTGCCCAGGCTCCCTGTGACTTTTGGCCCTGGCCCCACAGGATGCCTATGAC +AATGTGACCCTGGACATGGAGCTGGCTCACCGTGAGGGCTGGTGTTTTGGGGCCAAGCCG +GTGCGGGGCGCATACCTGGCCCAGGAGCGAGCCTGTGCAGCAGAGATCGGATATGAGGAC +CCCATCAACCCCATGTACGAGGCCACCAACACCATGTACCACAGGTGCGCAGCTCTCCCA +CCCCTCTGTCTTCTCAGGGCAGGGCGCCAAGAAACCAGGCCTGAGGGAGAGGCTTGGAGA +CTGAACTTGTCAGCCACATGTGTCCCCAGGTCAGAACTGGTATTTCCTCAGCTTTTATGT +CATGTGTTACTTTTTCAAAAGCATTAAGGTTATTCTTTTATAAATATAGAAAAGCCTATT +GAACAAAGTAAAATTGGCCACAGTTCCCTGAGCCTGCTCCTGCAGACACTTAGATCCAGA +GATGTGCATGAACACTCGGTCCTCCCCACCCTGCAGGCTCAGCCTGTTTGCTTTTCAGTT +CACACAGTGGGATCGCCCATCAGCTGTTCTGCACCTGCTTTTACTGCTCTAGTTTCTTGG +TGATCTTTTCATGTAGCACAGGCTGGCCAACCATGGGCTTTTCCTCTTAGAGGCGTGTGT +GCATGCTGCAAATTCAGAGAGACCCCAAGTGCCTCCTGGTTGGCTCCAGCTCCCCTCCTG +CAGCCGCTGTGCATGTCGTGCCACAGCGCACACATCTCCGCCATTGTTGTTTGCGCTGTA +GCGTGTCCTCTGCCCGATGTGTTTTTCATTTCGCGTTCCATCTGGGTCCTCTTGTGTGGG +CTTGCAGAGTGGTGCTGTCAACAGCCAGCATGGAGTGCCCGGGGCTATGTGTCCAGCATC +TTTGATCTTACTGGTTTTCAGTCTGGTTATTGTAACAGGGACTCGCTGGACATACCGTCA +CCCTCTGTGGTACACACGGGCCGTCAGGGGTTTTGTGTCTGAGGATGCAGCTGTCCTAGG +TTGCTGGAAATGGAATTGCTGGGTCAGAGAGAACGTGCGCGTGGTCAAGCCTGCTGAGTG +TCCTCGCTGCAGGAGGGCCGTACTGCTCGGGCTTCCTGGTGCACCGGGATGCCCACCCCC +ACTGTGCCCTACAGGGACCTGCTTCCCTTTTATTTCCTTTCAGGGCCAGGGGTGCACTGT +ACGATCCCTCCCCAGACGCAAAGCTGTGTCTGTCATGAGGCTTATGGGCATGTTTGGGCT +GGTCCCCAGAGTCACGGCATGTTTGATGCAGGGACGCTGTGCCCCAGCTGGTTCTGTGTG +AATTGTCTGGTCCCAGACATCGCTTTTGCTTGGTTGCTGGTCTTTCTTTTTCATGAGGTG +CTGGGAGGGGCAGATGCTGCTCCTGTTCCTCCTGATCCTGTGGATACCCTTGGAAAGAGC +CAGCTCACTGGATGGATCTGGGTCTGCCTTCGGCCTCCTCTCTGTGCAGCTGGTAATGAG +AGGTGCAGGCGACCACAGGCCTGGAACTGGGCTCACATGGCACTCTTCACAGGTGCCTGG +ACTACGTGCTGGAGGAGCTGAAGCACAACGCCAAGGCCAAGGTGATGGTGGCCTCCCACA +ATGAGGACACAGTGCGCTTCGCACTGCGCAGGTAGGTGTGCCCCACCCTGCACCGAAACC +CCAACCTGAGCCGTTGTCTCCTCCAGCTGGGGAGAGGTGGCAGCAGTGCAGAGCTCAGGG +TGGGCAGAGTGACCTGCTGGGTGGTCGGGCATTTGTGGGGAAAGGCTGGTGAAATGAGCT +GGAGGGTGGCTTCAGTGGGCAGGCCCAGGCCCGCTGGCATTCTGGACATCGGACTGTGAG +AGCACAATGGCTCTTTGCAGTTACGTTGGTTTTTAAAATTAACTTTTTTTTTTAGATAGA +ATCTCGCTGTCACCTATGCTGGAGTGCAGTGGCGTGGTCTCAGCTCACTGCAACCTCCGC +TTCCCAAGTTCAAGCAATTCTTCATGCCTCAGCCTCCCAAGTAGCTGGGATTACAGGCAT +GTGCCACCACGCCTGGCTAATGTTTTGTATTTTTAGCAGAGACAGGGTTTCACCATGTTG +GCCAGGCTGGTCTCAAACTCCTGGCCTCAAGTGATCCTCCCTTCTTGGCCTCCCAAAGTG +CTGGGATTACAAGCATCAGCCACTGTACCCAGCCTTTTAAAAATTAACATTTGACTTATT +AAACTTCACAATTAAAAAACTAAATTTCATCAGGGTATTCGGATGGCTCCTGAAAGGTAC +AAATGTAGTGTTGGAGCCCAGTCCCTCAGGAGGCCTCCACAGCGCTGCCTCATTTATAAG +CAGTTCCCTTAACATTTTAACTACATCTGGAATGTAACGGTTTGTTTTCTCTTTAAACAT +TTGTCCCATGGAGCACAACTTCGGTAGAATTCTAACAAGTCAAATGCAGAAGTTAGGTGA +AGTCAGTTCTTGGATACTCCTGTACTGTCACCCTGGTCTTATCTCATGCCTTAGCCCAAG +CTATGCACACAATGGGGTCCTAGGTCCCCCTCCCCTCCCAGATTCCGCCTTCCCAGGGAT +GGGACCCCCTAGAACCCTCGGAGGCCTGGGCAGTGGCCTCGCTGGCTCTCGCCTTCCTAG +GAGCTGAGCAGGAGCTCCACTCTCAGCAGGGCAGGGTGCCCCAGATCCATGACTTGTGGC +ACGAAGGGGCTCCCCAAGAAGCTGTGAAGCCACAGGGCCAGGCTTGGGGCCAGGGCTGCA +GCCAACAGGACACATCCTGGTGCTGCTCGGGAGCTGACTCACGGCTCTGAAATCATTCAG +TGATGTCCACATTCAGCCTTTGTTTACGATGCTGCTACTTTTTCATGGCCTGAAATTGCA +GCACACCCATTTTTGTGCCCTGATCCTCTGCTAATTGTCATAGATGTGTCGGTGATGGGA +CAGACTCACTTTCTCCACACGCACAGGCCTGGTGGCCAGCACTGCCCAGCACTCGGGAGC +TGGTGGGGGTTCCTCTCAGCTTGTAATCAGGACTCCCAGCCTCTGGGTGGACTCTCAATG +CTGGTGGACTCACAGCCTGACTAGACACATCACCATCCAACTCCGAAAGGACAGCCCGCC +CTCAGCAGGGCTTCCTCTCTCCAGCTTAGCCCCGGCACCTGTCTGTCCCTACTATCTTGC +TGTGGCAGTGGCTTCCCCAGTGGACACAGCTTCCCCCTCCCGAGTGGCAGCCGGGCTCTC +TCCTCTTGGCCCCAAGGGCCCTGTGCCGGGCTCCCCCAGTACCATGCTCACATCCCTGCC +TGAGAGGCTGGACCCCTGCAGGCTTCAGCTCTGCCCCTGCTGTGGGTGGCTGGGGAGCTC +TGGGGAGCTGGGGTCCCCTCCCATGGGTAAGTGGTTCTAGACTATGGCAGCTGCTCTGTC +CTGTGCTGGGCCCAGACTGCCACACCACCCACCCTCAAACAACCCGAGTTGGCTTTGGCC +AAGGAACAAACCAGCCCCAAACTTAGAGGCTGGAACAGTGGCATGTTCTGTAGGCCAGGG +GTGGGCTGGCTCAGCTGATGCTTCTGCTGCTGGCTGTGCCTGTGTCCCTCCTGCGTCTGT +GATCTGGGGCCGGTGGGGCCTAATGAGCTCATTCATGGCTGGTGTAGTAAAGGAGACTAC +ATCCTGGGTCCAGCGTCCCACAGGCCAGCCCAGGCTTGGGCACGTGGTGGGCTCAGGCTT +CCAGAAGGCCTGGAGAGGGCAAGCCTCTGCTTGTGTCACCTTTGGTACCTCCCTGTTGGC +CAAAGTGCCTCACGAGGCCAAGCCCAGATGCCAGGTGGAGTGAGCCACTGTCCTGTGCTG +GGAGGAGCCGCAGTCACATGGCAGAGGCTGTGCACACCAGCAGGGGAGGGGGCAGCCAGC +AGTGGCACCCGGCTCTCGGGTTCATGTCCTGGGGTTGAGGAAGACACAAGATGGAAGCCA +GGGCGATGTGGCAGAGATGGGTGCCTGCTTTAGACTGGGTGGTCCCGACACTGGAGCCCA +GCCCAGGGATGGAGGGAAAGGGCCTCTGGCCGATGGGTACTCTGGGAGCCAGGGCTGGGG +CAGATGGAGAACAGGGGAGGTGAGGCCCTCGGGGAGGGAGTGGGAAGCACAGCACAGGCA +CTGCCAGTCACCCGGGTGATGCGTGAGCAGCAGGCGGAACTCGCCCCAGGCTGGGGGACC +CAGTGCTGTGCAGACCAGCGAAGTCGGGATACCTGGATATAGTAAGCTGGAGATGAAACT +TGAGAACTGTCACTGCGTGGATATTTCAAACCCTGAGAATCTTTGTAAGACCCTATACGG +GGTGGTGTGAAGACAGGACAGAGTGGAGGCCCCACCCCAGTATCTGAGCAGTGCTAATAG +ATGTATAATGCAAGCCACATCTGTAATTTAAAGTTTTTTGGTAGCTACGTTAAAAGATAA +AATGAATTTCAAGAATACATTTTAATTAACCCAAAATATACAAAATATTACTTCAAATTG +TATTCAAGTTGATTTTAGAAAAGCAATGCGGCATCGTGCATTTTTCTAGAAAGTCTCCTA +AATCCCGTGTGTATTTGACATTTGCAACACATCTCGATTCAGAGCAGCCGCCTTCCGAGG +ACTCGGGGGCTGGTGGCAGCTGTGTGGGACACAGCAGCTGTGGGGATGGGGCAGAGCAGC +AGCCCACAGAGGGCGAGGGCAGGGGCCGGAGACCAGTCATACCACAGAACCCTGGAGGGG +CTTGAGGAAGGGATGGCAGGAGAGCTGAGGAGCTTCTCTGCCCTGTGGGGCAGGCTGCTG +GGTGGGTCAAAGCCCCAGAAGGGCTGCACCTCCAAGAAGGGAGGTAGAGGAGAGGTGGGA +GAGAGGAGGGTAGGGAAGGCAGGAGGGGGGCCTTGTTGGCTAAGGGCTGAGGCCTCCCCA +GGGAGAAGAGTAGGGCAGCCCAGCAGAGTCCTGCTGGGAAGAGGCCTCACTGGGAGAGGG +CCGGACAGAGCCTATATCCCAACCCCACTTCAAAGGGGATAAAGTCTGGAGAGTGTAGAT +GGGGGACCTGGGGTGGCAGGACAGGGTCAGAGCTGGGACAGGGGCCCTGTACCCCTGCAC +CGGGGACAGTGTCCTGGAATTAGCCCTCAGCTCCTGGCTTTACCTGCACAGGATGGAGGA +GCTGCGCTTGCATCCTACTGTACTTTGGACAGCTGCTAGGCATGTGTGACCAGATCAGCT +TCCCGCTGGGTGAATGGGGCCCTCCCTGGTGCGATGGAGTGTGGTGCTGGGCCTGGGCCT +CATGGCATTATCTGCCCCCATGCAGGCCAGGCTGGCTACCCTGTGTACAAGTATAGGGCT +GTAGGTGCTGTCATACGTGTCCTGCCACGCCCTGGAGAACAGCAGCCTCGTGAAGGGTGC +CCGTCGGGAGCGGCAGCTGCTGTGGCTGGAGCTCTTGAGGTGGCTCCGAACTGGCAACCT +CTTCCATTGCCCCACCTAGCACCCCCCCGCCCCCGCCCAGGCCATCACCACAGCTGCAGC +CAACCCCATCCTCACACAGATTCACCTTTTTTCACCCCACACTTGCAGAGCTGCTGGAGG +TGGAGTCAGGTGCCTCCCAGCCCTGCCAATGGTGGGGGCACTCAGGTGTGGGCTGACCCT +GATACCTGCCTGGGATAGCCGCTGGTAACTGTTAGGAACTTTCCTCGGATGTGTGGGCCC +AAGGCCCCCACCTCTGTGACCCCCATGTCCTTGGACCTAGAGGATTGTCCACCTTCTCCC +AAGGCCAGCCCACATAGCCCGAGCCCCTCGGGGAGCAGTGGCCGGGCTGGGGAGGCCTGC +CTGGTCAATAAACCACTGTTCCTGCAGCTGAGAGCCCTTTGCCCTCACACAGGGTCAGCT +CTGGGCAACAAAGAGGGAGTGGCTCAGACAGGGTCAGAGAAGTCTCATCCCAAACTCTGG +CTTCCAGCATCATCTGTTCCTGGAGGCCAGGCCCTGCGCAGACCTGGCCACCCAGGGTGC +TGGACACCCCCAACCCTGACTCCACAAGGACCCAGCAGGGCCCATGGGCCCAGGCCAGCA +CGGAGGGGACTCATGCAGATGCAGAAGCCAAGCTCCCCAGGTGGCCCTGGCATGCCTCCC +CGCAGGGCTCCTCCTCAGGCCATTCCTGCCACCCTGAGTCCTCTATCCCAGTGACCTGCA +TCAGGCTCCTCCTAGTGGTTCCCACAGGACACTGACCAGGCTGCGGCAGGCGCATACTTG +CACTCCGGGCCTTCTCCCATGCTCTCCAGCCCCCAGCCCCACAAAGCAGGCTGGATGTGG +TCCTCATGCCCTGGGGCTTGGATGGGGGCCCCTGTACAGATGGCAGTGACAGCATCCTGG +GACCAGGGCTGGGGCAGGAAAGGCTGGAGGGTCCCAGTCTCCAGGCAGGGGGAGGGGTGG +TGGGCTGGGCACACAGCCCAGGAAGGGGCCCCCAGGTAGAGGCCTCTGCTGCAACAGAGG +TGGGGGCCCTACCAGGGCCCCGGTTATGGCAGAACCTGCAGCTATCACACAAGCCACATT +TGGGCCACACTGAGGCATCTTTATTTCTCTGGGTCACACAGAGCTTGGGCCTGGGAGCCT +CTTCTGCATGGGGGCTGCAGCTGCCAAGGGAGCGGCTGAGACCAGAACAAGCGCTCGGGT +GGGCCCCAGTGCAGGTTGGGATGTGCCCGGTGGAGTAAGGTGTGAGAACCCCCAGCCTCA +CTCTCTGCCTGGTCCTGAAGCAGACAGCAGCAGGCTGGCCCAGCCTCCCCTTTATGAGAC +TATCCTAGGGTTTGACAGCAAGTCCCAGATGAAGGGTGACAGGCAGCTGGGGTCCACCTG +GTCTCTCCTCAGCAGGGGGAACCCCCTGCGGGCAGCTGGGAAGGCAGTGGCCAAAGGTCA +AGAAGATGAACTCTGCCCAGACTTCTGCTGCCTGTGGCTATGGTGGGACAGGGCTGCCTT +TCTGGTCACACTGGGCAGCAGGCGACTGCCAGGGACCTCCCAGTCCCAGGGCTGCATTCC +CTGCCCGGCAGCCCCTCTGCTGCAGCTTTCGGATGAGTTCCAGCAGGTTCATCTGCAGCA +TCAGCTCCTGGGATACAGAGGGGCCCCATGAAGGCCAGAGTGACCCCCAGGCCCCTTTCA +TCCCGGCCCAAGGGTGCCAGGCAGTGAGCCCACGGGGGCGACCATCCTGTGAAGCCTGGT +CATGGCCTGCCAGGACAGGGTGGAATCTGAGCTTCTGCCCCCAACAGGGCCACTCGGAAT +CTGAGCCTCTGCCCCCAGCAGGGCCACTCGGAATCTGAGCCTCTGCCCCCAGCAGGGCCA +CTCGGAATGGCATGCGGGAGGGGAGCAGAAGAGGGCCCTGTGTGGCACAGCAGGCCTGGA +GAGGGTGGGCAGTGCCCATGGCTGGCTGGCTCACCCTCCTGTAGGGAATGGGCCTGGCCT +CCGTGCTGGGAATGGCACAGAAATCCTTTCCAGAGCCTCAATCCTTCCACTTCACCCCCA +TACCCTGAGCCTGGCTCCCCAGGGACCAGCAGTAAAACCCCAGATGGAATGGGGCCCCGG +GGCACTCGGGCTAGGGCAGAGCAGGCCCAGGCACTGACCTGTGGGTTGGTGGTCACGTAG +AAGCCAGCCACCCCCGCCTTCTCCAGTGTGCTCTGCTGGTCAGCCACCTTCCGGTCCAGC +TCCAGGATGATCTTCTGGTCCATCGCCCGCTGCTCCTCACGGATCCGGTGTTCCACGGCC +TGCCATGGGGCCAGGGCGCGGAGGGGGAGAGGTGAGGGCAGCTCTGCCATCAGGGGGCGG +GGAGGTGAGGGCAGCTCAAACACATGCTCCTGCTGCCGTCCCCAGTGGCAGCAGCTCTGA +CACCACCGGTGCATCCTCACCACACCCAGGCCTCTCCTCGGCCATGTGAACCCCTGAAGG +GGGCCCCAAGGGGCTTCCACGAATGCCCTGGCTTGGGACTGCCTTAGGACCACACACACA +TCTCCACAGGCACACCCTGCCCCACGGCTTGTTGGTTACATGCCAGTCTCTGGTTGTCCC +TGTGCTCCCTGGGAGAAGGATGGGCCCCTTCCTGGCCCCAACACCCTACCATTGCCTTTG +TACCCCCCTGAGGCCACCTGCTCTGTGGAGGCCCAAACCCACGAGGAGCTGCTATCCAGG +GGAGCACTGGTGGGTTCTCAACCTCCACCCAGCCCCAGCACACTTGGTATCCGCAAAGAG +GAGCCACCTGGTGGGACTCACTAGGCCTCCAGTGTCCCACTCAGGCCCAGGACAGCCACC +CATGCCTGAGCCCCCACACCCCTTCAGCTCAGCCCAGTCAGCAGAGCCAGGTGGTGCCCA +CAGGAGAGCTGGGCCGGCCACCCTGCCTGCGCCCCCAGTACCTCTAGTTCTCGCTGCTGA +GCCGCCTGAACCACAGGCAGGTTGTGGGGCCGGCAGGCCTGCTGGGCTTCCTGGTGCTTC +TGCCGCAGCGCCTGCCTGAGCACTGGGAGGGATGAGAGCCCGTCAGCAGGCGGTGGGACT +CGGCAGCAGCCCTCACCCATGGGCACCTGCCTTGCCAGCCCAGCAGAGCAGCCAGGCCAA +AGGGGTGTCTGCAGTGGGGAGGATGCCAGCCTCCTCCATCCTCCCACCCCACTGTCACCC +AGGGCCGGGCCCTTATCTGGTGGCAAGTCTCCAGGGGACTCCAGAAGGTGCGGGGCACTG +AAAGAGTGGGGTGGGGAAGACTGGCCAGGTGGGGCCAAAGCTGCAGCCCCAGACCCAGGG +CAGCTGGAGGAGGCAGGCGGGAGGGAGGCTGCAGTCTGAGGGCTTCAGGGCCCCCATGGC +CACAGGCCATGAGACATCTACTCAAGTGTCAGAGACCACCTCAGGCAAAGGCATGGGAAA +GGAGCAGGAGGAGCGGGGTCGCTGACCCTGAGCGTGGGGCCTCTGCCTACCAGGGGTGTG +GTCGGGCCCTCCACCCTCAGTGCTGCCCTTCAGGGTAGACAGCCCCTCCCTCATTTGCAG +CTCCCAGCCCCCGCACAGCGCCAGAGGGAGAAGGGTGGGCAGACCTGGGAGGCCCAAGTT +TGGGATGAGCTTGGGGTGCCATAAGCCTGTGTGGCTCTGAGGGGCTCACCCTCAGCCATC +AAGTAAAGCTCCACGGTCCTTGCTAAACTGTGCACAAACACAGATGTGTGCAAACTGTCT +GGTCTGAAGGGAATGTACAGAGGAAGCGATCATGGGACATAAACCACCATGAAACAGAAC +ATGCAGCACCCCACAGCTGGCGCCAGTGACCAGTAAGTGCCAAATGGCTACAGCTCCCCA +GGAGGGAGGGGTCCCTGGCCAAAAGAAAAGGTGAAACCCAACACTCTGTTCCTGGGAAGA +GCCCTGATGTTGCAGCTCTTCTCTGGTGGCCAGTCCCCAGGGAACTCCAGGGCCCCTGAG +GCTCCCCTGCAGGTAGCCTGGCTCCCGCTGTGCCTACAGCCTTCCCCGATCTCCTGTCTC +TTCACATTCTGTACACACCTCGACGATGGTATTTACCAAAAGCCCCCCTCAAGACAGCTC +CATAACACAGGGAAGCAGGGAAATGTCATCTCCTTCACTCTACTGATCAAACATTGCTAC +AAGTGCCAGGTCCACTACCAGATTCCACTAGATAGACAACCAAAAGGCAAGAAGAGAGCC +ACGCTGGCTGCCTGGGCAGAAGGGGCCTGGCCTTAGACATGACCAGGGAACACAAAGGAG +GCAGGAGCAGCCAGTCGCCCCTGCCAAATCAGCAGGAGAAGCAGAAAGTCAATGCAGGCA +TGCCTGCTGCTGGTGGAACCAGGGCCTGGCCCAAACCTTCTGAAGCACAAACAGCATTGG +GCCCAGGCAGTTACTAAAACTCCCCATCCTGCTATAAGGAACTATAGCTCTGTGGAAAAA +TGGCCGATTCCAGGACTGAGACACGGAGAAAACAAGAAAAGATGAGCCTGAACACATACC +TGAACCAGAAAGCAAGAAAGTGCTCACAGAATGATGGGGCGGCATCTAAAGGTGCCTGGA +GCGCCCCCAACTGGCCACACATGGCACAGTCTGGGCATTAAGGAATAATGAGCCATGCGC +TTGGGATTAGTAAACTACATACATCTTACAGCTCAATAAAAAATTTTAAAAAGACAATCG +CCCAGACATTTAGGGGAAGAAGTAATGCCAATTCTACACAAACTCTTCTGGAAAACTGAA +GAGGAGAGAACACTTCGAAACTCATCCTATAAGGCTGGCAAAAACCCTGATACCCAAACC +AGATATTACATGAAAACTATAGACCACAGTTTATCATGAACATAGACCAAAAAATTCTTA +AGATTTTAGCAAATCAAATCCAGCATACATAAAAAGGATACTACAGCACGACCCAGTAGG +ATGGAATTCAAGTGACTGTAATTCACTGTAGTAATAGAAAATGATTCACCACGTGATCAT +CTCAAGAGCTGCCGAAAAGGGAACTGCCAAAATCCAACATCCATTCTTGATAAAAACACA +CCAATCCTGGAGCAGATGGACACTGCCTGCCTTCTGGGGAAGGGGGCCTTGCACTGCTAG +GAGAAATTTTTGCCACATTAAACCTGCTTCTAATCAGGCTTCTACCAGCAGTCCAGTGTA +CAGGAAACCCAGAGAGTGGAGAAGCAGGTTAAATGCAAGAGGAAGCATTTCGTCTGATAG +ATCAAGAGCAGGGCATTCTATAAAATTAGAATGCCCTGGACTCTACAAAAAAGTCAATGC +CACAATAAATGTTGGGGACTGTTTCAGAAGCAAAGAGACCAAAGTAACCAATTGTAGTAC +ATGAACTTGGACTGGATCCTTACTTATAAAGCAACAATTTGCCACAAGTTAGGTACCATG +ATGTGGCCGTGCACTGAAAGTCAGGAGACACCCGGAACAGTTAATTTCTCAGGGGGCTAC +CTTCTGCAGCTGTGTAGGAGACCAGTTTTGTTCTAGAGCACTTGCTCCCAATCCAAGGCC +AAGGTTGACCCCACCACCCAGGTTGTCTGGAGGCATTCTTGGTTTTCAGGACTAGGGAGG +GTACCCCTGCCATCCTGCAATGCGCAAGACAGCATCTCACCAAAAAGACTTACCCAGCCA +CAAATGTCAGCATGGCTGGGTTGAAAATCCTGGTCTCCAGGAATGCTGAAGACTCTTGGT +GAATGCTATGCTGTCACATGTATATATTTTGGGATGTCAAGAACTAGAGCAAAACGTTAA +CTATTGAACTTTAGTGAAGAGCACAAGCGTATGCACTATACTATCTTTTCAGCTTTTCTA +AATGTTTTAACTTCTAAAATAAGAAGTTTGGGAAAAATAACAAAGGTGGAAACTGATGGT +GCAGGGGGACCTGCCCTATATAATTTGCAGGAAAGGGCTCCCAAGCCCACCAAGCAGGCC +TGGCTCAAGTGGGAAGCTCTTCGGGTTTGGGAGGGCGCCACAGGAGACAGTGCTTAGGAA +TTTGATGTGCAACTGGAATGAGGCCCTGCGAGGCCTGAGAGGGAACTGACCCTGAAAGAC +GCTTGCTGCACAACTGTCAACAGACTTCCAACTACTGAGGGGCAATGCTGCTGCAGTCAG +CGCCACCACCAGAGTCTGCCTGCAGGCCAATCAGGCACCGAGGCCTAGGGCCATCCTGTC +TGGAAGCGCGTGGGGCACAGGGAAGCCCGAACAGGGAGTTTCTGCTAAGAAGGTGGGTGC +TGCGCAACTGAAAACAAAACTGGTTGGACCCACTGATCACCAAAAAGGCGCTGTCTCAGT +CTCTGCGCCCGTTGATTTTGTCAATCTTAGAGACCTCTTCCGAACACTTCCAAGGAAGGA +GCTGATCTGCACAGATACCAAGAGCTGCCCGGAGGCGCCGACCCGGAGGAGGCGGCACCT +CATCCCGCTGCCCCCGCGCACCTCGGTGCTCGTTCTGTAGGCGCAGGCGCTGGTTGTACA +GGCTCTTTTCGGTGAGGTGCTGGATCTCCAGTAGCCCCTGCACGATTTCGAACACGGTGC +CGTCGAGAAGCGCCAGGGCCAGGTCGCTGAGCGTGGTGTAGGACAGGCGCTGCTGGAAAG +AGCTGCGGGTAGGGGGGCGCGGTGAGCCCCGGCGGGAAACGAAGCCGCCTCCGCAGGCCT +CCGCCCGCCCCGCCTGCGTACCTGGGCAACTCCTTCACCAGGCTCTGTAGCGCCGACAGC +AACTGGTAGTGTCGCTCCTGCTGCCGGGCACCGTCCGCCACCTCCTCCAAGGCGGCCGCG +TAGCGCTCCATGGCGCGGACGCCCGCTAGCCGCCGGCGGCGGCGACGAGCTCCCCCAGCT +TCACGACATCCCGAGCGCGGCGCGTCCCGCCCCTTTTACGATTGTCCCACGCGCGGCGCG +CTCCGCCCCCTTTTACGACAGTCCCGAGAGGGCCTGGCCGCCTGCCCCGCCCTGCCCCGC +GCCCCGCCGCCTGCTTATTCAGGAGGCGCGCCCCGCTCCTCTGCGGAGACTCCAAGGAGG +GTTTTGCTCAGCCCAGGCTCGCACCTGCCTGCTCCCCGCGCGACATTAAAGGCGAGACTC +CGCCGTCTGGATATCGCAGGACCACGGAGAATCCCACCGGCCCTTGCTGAGTCATTGCGG +GCCGAGGTCCGGGCTCCGCGCCGGCGCGCCTGCGAGGTGGACTGTCCCGGTCACCGGCCT +CCCCAGAGGCAGGGGTCTCGGGCCAGAGCCCACAGGACCTGGGTCTAAGGCACCATCCGC +GGCCGAGCAGTGGCGCTGGGGTCTGTCTGCGTCCTCCCCGCCCCCGGGGTGAGACAGACA +GGACCGCCCTAGAGCCTCTGGGTCAGGACAGTCGGAGCCAGAGTCCCCATCTGGGCCTCC +GTGGCCCCCAGCCCGTGAGCCCAGACTGCGCTCTCCCCGCTCCACAGGCCAGCTGCTGGG +CCGGGGCTGGGGGGCTTCTGGTGGCTGGGGCTGAACCGTCTAGGAACAGGCTTTAATAGG +AGGGGCTCGGGTGGAGGAATGGCTCCTGCAAAGGCCCTGCCAGAGTCAGGTCAGGTGGGT +GGGCGAGAGGACCAGTGTGGCCAGGCTGCGGAAGCCAGCACAGGAGACAAGGGCGGGTGC +TGTGCATTACAGAGGCTGCTGGGGGCTTGTCCATAGAGCAGCAGGGGGCACGGAAGGGCT +TGGGGACGCAGACACGATCAGACTTGCATTTCATCAGGCACTCTTTAGCTGCCTGCTGGG +TGGCTGGGCTGTGTGAGATGACCCTGCTTTGACCAGGTTAGGAATTATGACTGGTTTCCA +GAGAGAACATTGGTTAGTAATTGGCTATGGGGAAGAGGGGGCGTTGAGCCAACCAACTGG +GGTCCCCCTGAAGTGCCAGCAAAGTGTCTGGTTTTCTCCAGGATCTGTTTCCTCAGTCCC +TGGCAGGATTGGCTCCCCTGCACGCCATGTCCTTGTGGTCCCTTCCAGACAGAAACCAGG +CATGAATAAAGTGAATAATGACAAGCGCTCAGGCCACTGCCGTGGAGGGCGGTCCACAAC +CCAGAGCTGGTCATGCGCCGGGCACCCTGGCCTCCAGGTCCCTCAGAATGGCAGGAGCTG +GAGGTGGGACTCGAGATTGTGGGAGCTGAGTCTGTAGTGAGTGACCAGGAGCCAGCTGGG +GCTGGGCTTGGTCACTGTCACCTCCCCCGGAGGTGGTGAGTGGGATGATTAGCTGGGTGA +AGACGCCTGCCTGTTTCACCAGTGTCCCTGCCCTCAGTCCCCCAGTCCATGAAGAGTGTC +TGTGTGGAGACTGGGAGGATATAGGGCAGCCCCACGTGATGATATGTCTGTGGCCAGCTG +GGCAACACTGTCGCCAGCTCCGGGGTCTGCCAGGTGGGGTGGGGGCTGTGAGCCACAGCA +GGAGCAGGCCAGGCCTGGGTTTGGACACCTGCTCCTCTGCCTTGTCCGGGTGCTGATGCA +GGAAGGCCCCTTCTCCAACCCTTGGTGCAACGTCTGACCTGAACCAGCTTCCCCAGCCCA +GCAACACTGCTCCCCTAATGTGGATGTCAACCTAAAGAAACAAACTGAACCACAGTTCAT +ATAGAGAGTTTGTCTGGGCCAAGGCTGAGAGAGCTGCCCAGGACACACCAAGTTGCCCTG +GGAGTGCTTCATTCAGCCTTATTACAAGCAAGTTTTTAAAGACTGAAGGGGACAAGGAGT +GGGCTGATATGAAGTCAGCAGGAATTCTGACTGCTTTCCAGAGAGAACCCTTAGTGACTG +GCTATAAGGTGTTGGACTACTGGGTAAGAGTTATGGTGTCCAGCGTATGGCTTTTTATGC +CTACTTGGTGTCAGTCTGGAACCCACAGAGCAATTGGCTTCAAGAGGTCGTTAGCTCAAG +TAGGAGTGGGATGTGACTGCTGTTTCATTCCAGTGCCTCTCTGGGCCAGATAATTAAAGG +GAGCTCACATTCCTCAGATAAAAAAGTTCCTTTCGCATTAGGAGAAATACGTAATGTAGA +TGATAGGTTCATGGGCACAGCAAACCACCATGGCACGTGTATACCTATGTAACAAACCTG +CACGTTCTGCACATGTACCCCAGAACTTAAAAGAATATATATATGAATATATATATATAC +TTTATACATATATATATAAACTTCCATTGTGCGCGCGCGCACACACACACACACACACAC +ACACACACAAAGTTCCTTTTCTTTTTCATGGACATGAGCAGTATGTCACTCAGGCATGGG +GACTCCTGGGCCACCTCTGGGTCTTCAGGGAGGGCCTGGTCTGGAAGGAGGGGTCTCATC +CTCCAGAGCCCCAGGGATGGGTGATATATGTGTAGGAATGGGGGAGCTGGTTCGCAGACG +TGCGTTTCCCCTGTGTGTGTGGACACGATGGGCCCCATGGCTCTTCCCGTCGGGTGGTAT +CCACCGAAGGCCTTTGCTCTGCGGCCACTGTCCACCTCCACTTGGCCTCCCGAGTCCACC +AGGCTGGCTGGTGACACCTTGGGTTAGTGTGGGGTGACTGGTGTGTGTTGGGGAGCTGCA +GGTCCCCTGTCTCTGACACAGCTGTGTCTGGGCAAACACCAAGGTTTGAAGGAGTGGGTG +CCTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGCGCGCACGTGTGTCAAAAGGCTTT +GCTGCATGCCTTTTGTGTGGCTCTTTGTCACATGTGCCTTTCTGTGTTTCTCTGTGAGTT +TTCATGTGTCAGGTTGTATGTCTGTTGTGTGGGCGGAGAAACAGCTCTGCTGTGGGGCCC +TGAGCCATTGCAGGATCCCCATAGGCCATGCAGAGGTGCACACAGCACCTGACCTAAGCC +TGCTCCTCCCAGAACATCAGGCTGCACCCAGGCCACCGCTCACTCCGTCTGTCTCCCGAG +ACTGTTCCTCACTTCCTGCCTCACTGGCCCTGGTGAGGGTGAGGCTCCCTGCCCCCGGCC +CTCAAAACAGAGCTGCAGATGAGGACGCCACCCAGCTGCAGTGTCAAGGGGCTCCTCCAC +AGTGGGTGTCCCGCAACGCATGCTCTCTGACCCCTTTTGTTTCAGTGTTGTCCTTTTTAG +TGTGTGGCACCAGGACAGCAGGGAGCTGCCACCTCAGGCTTCTCTTATTTTCACTCTGTG +AGTAATAAGGTCTGAATGAGAGGGGCTCATTGTGTCTTTACTGTCAAATCAGTCAGGCCT +TGGCCTTGGCATGTCTTGCGTGTGCTTGACAAATGTATTGCTCATGGGGTCTGGTGGAGG +CACAGCACCCTTTTAGAAGACAGGGTCTGTGCGGACCAGTTACGGGTGACATTTCCTAGG +CACAGCTCTTGCCTAAGTGGTGTGGGATGGGAGGAGGGCGGGGCTTCCCACTGTCCCACC +AGCAAAATGGGCTGTGTGGCCAGCTCCTCAGGACCCGAGAGCATTTGCTCCAGGTTGATA +AAATGTTGCGGTGTTGGGTGGCAAGAGTAGGATTGAAACCAGCCACCCAGTTGGTGCTTT +GTTACCATCAGAAGTGTTTGCTGAGTCAAAGCAAATGGCCACTCAAAACTGAGTGGGCTG +AGTGTGGTGACTCACACCTGTAATCCCAGCACTTTGGGAGGCCAAGATGGGCAGATGGCT +TGAGGCCAGGAGTTCAAGACCAGCCTGGGCAACAGGGTGAGAGCCTGTCTCTACAAAAAG +TTTAAAAAATTCGCTGGCATAGTGGCATGTGCCTGAAGTCCCAGCTACTTGCCACTTGGG +AGACTGAGGTGGGATCACTTGAGCCCAGGAGGTCCAGGCTGCAGTGAGCTGAGATCACAC +CACTGTACTGCAGCCTGGGCAACAGAACAAGAATCAAATAATAAATAAGTAAGTAAGTGA +GTAAATCCATCCATCCACCCCAGAGTGAGTCAGCTGGGGCTGCTGCATTGCTCCTCCCTC +ACCCACAGGATTCCCTGGTTCCAGCCAGGAGACGATGGTCAACAGCCACTGCTGTAGTGG +TGCCTAGTTAAAACTCTTTGGTTTCTGTCTCTGTAAAAAAAAGTGTCTAATTGCATTTGG +GCAAAAAACTTGCAGTGAAGAGAGAAAAAGAGATGAAAGCAACCTGTGAGGGAGGCTCGG +GTTGACGTGCGGCATGTGCTGTTTGAGTGTGATGAGTGTGTCGGTTTGCACTGTAGATGC +TCAGGAAGTGAGGATATGTGGAGTGCTGACTGTATACATGATGCCTCCACTGGCTGGCTG +AGTAGAGTTTCATGAAACAATGTTGGTGGACTCAGCCCTGAATCCCTCAGGCATTCAGAG +AGCAACCTCTGTCTTTTTTTTTTTTTTTTTTTTTTTTGAGATGGAGGCTCGCTCTGTTGC +CCAGGCTGGAGTGCAGTGGTGCAATCTCGGCTCGCTGCAACCTCTGCCTTCCGGGTTCAA +ATGTCCTGGCTTTAATTTCACAGATTCTTTCTTCTGTTTCTTCTGTTTGATCAAGTCTGC +AATTGAAATTCTCTATTGCATTTTCATTTCATTCATTTATTTATTTTTATATATTTTTGA +GACAGAGTCTGTGTCACCCAGGCTTGAATGCAGTGGTGCCATCTTGGCTCACTCCAACTT +CCACCTCCCGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCTAGTAGCTAGGATTACAGGC +ATATGCCACCATGCCTGGCTAATTTTTGTATTTTTAATACAGATGGGGTTTTGGCATGTT +GGCCAGGCTGGTCTTGAACTCTTGACCTCAAGTGATCCGCCTGCCTCGGCCTCCCAAAGT +GCTGGGATTACAGGAGTCCGCCACGGCACCCAGCTTGCATTTTCATTTTATTCATTGTAT +TCTTCAGTTCTAGAATTTCTGTTTGGTTCTTATTATTTCTGTATCTTTATTGAACTTTTA +GTTTTGTTCATCTACTATTTTCTTGATATTATTGAGTTGATATATACATTCTAGTAAATT +TCACTGAGCTGTCTTAAATCAATTATTTTGAATTGTCAGACAATTTGTAGATCTCTATTT +TTGGGGGGTTGATTACAGGAGATTTATGAGTTTATTTTGGTAGTGTCATATTTGCTGATT +CTTCATGATCTACAGACTTTCATTAATGTCTATGAAGAAGCAAATACCTCTTCTTTTTTT +TTTTTTTTTTTTTTTGAGACAGAGTCTTGCTCTGTCACTCAGCTGGAGTGCAGTGGCGTG +ATCTCAGCTCACTGTAACCTCCACCTCCCAGGTTCAAATGATTCTCCTGCCTCAGCCTCC +CAAGCAGCTGGGATCACAGGCATGTGCCACCACGCCTGGCTAATTTTTTTGTATTTTTTG +TAGAGACAGAGTTTCACCGTGTTGTCCAGGCTGGTCTCAAACTCCTGGCCTCAAGTGGTC +TGCCCGCCTTGGCCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCACCATGCCCAATCTC +TTTCTGTCTTTATAGATTGGTTTCAGCAGGTACAAACCTTTTCCTGCTGGATCCCTTGAC +TGGATCACAGTCAAGTGGGCCTGGAGCCATATCACATGGCTGCTGCCTGGTCTGCAGCTG +AATCTCTGATTGGCAGGCCTGCTATCAAGGCATAGGTTAGTGATGCAGTTTCTGCTGGAT +CCTCAGGAGAACTGGACTGCCTCTGATACCCTGATTGAACAGGACTGGAGCCAGGTCATG +GGGCCACTTCTAGTTCTACAGTCAAGTCTTCAGATATCAGGCCTATTACCAAGGGCATGG +ACTGGTATAGCTCCCTGTGGGTCCCAGATTGAGCTCCTGCTGGTTTACTAGGTAGGTCCA +TGGGAAGACAGGACTGCCTCCAGACCACAGTAGAGCAGGGCTAGAGCCAAGTCACAGGAC +AGCTTTGGTGACCACATTTGGGTTCAAGATTGGTGGTCCTCTTATTAGGAGAATGGATGG +TATGTCTTTCACCAGGTCCCAGGATGGGCTGGACTGTGCCCAGACGGGCAAAGCAAGACT +GGAATGGAGTCACAGGGCTACTTTAGTGTCCATAGCTGACACTGAGATCAGCAGGCCTGT +TACCAAGGGCATGTAAAGGCATCACTGAATTCCTGGGCAGGCAAGACTGACTGTGGTAGA +GTGGGGCTGAAGCCAGGTCAGGGCTGCTTTAGTTTCTGCAGTCAGGACCATGGTTAGAAG +GCCTGTTACTGGGGGCATAAATGGTCATGGTTCCTCCTAGGTGCTTAGTGGATGGGGCTA +GTTGCAAGAACATGATCTAGTGGAGCTGGACCCAAGTCCATAGGAGGACAAAGCTGCTTT +CAGTCTGCAACTGGGAACCTGTCACTGGTGTGTGGACCTGCCTTCTCAAAGCAGCTCTCC +TTGGTTTTGGGCTTTGCTAGAGTTTTGCCACCTCCTGCCTGGATATTAAAACTCTTGCAA +AGGCAGTTTTGTCCATGAATGGCTCCAGATCATTGTTTGTGTGGGGAGAGGTGAGTGGAG +GGCCTCCTGTTCTGCCATCTTGCTGATGTCACCCTAAGATGATTATTTGAATTCTTTGTC +AGGCAATTTGTAGATCTTCATGTCTTTGGAGTCAGCCACTGGAGTTTCATTTTGTTTCTT +TGGTGGTGTCATATTTTCTCATGCTTCCTGTTCTTTGAAGACTTAGATTGCTTTCTTCAT +GTTTGAAGAAGGAGTCATCTTTTCCACTCTTTACTAACTTCAGGAGAGAAAGACCATCAA +TTAGCTAAGCTATAGATTCTGGGGGTCTCTCAGTCCTTTTCTGTGGGTGGTCCTTCCCTT +TTAAGGGGGATGTCTTAGGATTTTGTCCCTTGTCTTCATTTCACAAATGAATAAAACAAC +CAGACCAGACATAAGTAAAGGAATACAGCACTTGAACAACACCTGAAAAAACAACTAGAC +CTAACAGACATACACAGGATATTCTACCCAACAACATAATACACATACTTCTCAAGTATA +CATGGGACATTTTCAGGATAGACCATATAACACATCACAAATTAATTCTCAATAGGGGCT +GGGTGCAGTGGCTCACGTCTGTAATCCCAGAGTAATTTGGGAGGCTGAGGCGGGTGGATT +GCTTGAAGCCAGGAGCTTGACATCAGCCTGGCCAACACGGTGAAACCCCATCTCTACTAA +AAATACAAAAATTAGCTGGGCATGGTGGTGCGTGCCTGTGATCCCAGCTTCTTGGGAGGC +TGAAGCGTGAGAATTGCTTAGGAGCCCAGGAGGTTGAAGCTGCAGTGAGCAGAGATTGTA +CCACTGTACTCCAGCCTGTACTTCATGACAAAGAAAATGTACCATTGTACCACTGACAGA +ACGAGACCCTGTCCCAAAAAAGGAAAAAAGCTCAGTAGATTTAAAACGATAGACATCATA +CAAAGTGTCTTCTCTGACCACAACAGGATAAAGTTAGAAATCAATAACAGAAGATTTTAA +AAAGTTCACAAATTAGTAGAATTTAAACAACACACTCTCAAACAACCAATGGATCAAAGA +AATCACAAAGAAATTATAAAATTCTTAAAGACAAATGAAAATGAAAGCACACTATATCCA +AACTTATGGGTTGTGGCCAGTTGTGGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGA +CTGAGGGAGGTGGATAGCTAGAGGTCAGGAGTTCAAGATCAGCCAGGCCAACATGGTGAA +ACCCCGTCTCTACTAAAAACACAAAAATTAGCTGGGAGTGGTGGTACGTGCCTGTAGTCC +CAGCTACCCAGGAGGCTGAGGCATGAGAATTTCTTGAACCCAGGAGGCAGAGGTTGCACC +ACTGAGCTAACACCACTGCACTCCAGCCTGGGTGACAGAATGAGACTCTGTCTCAAAAAA +CAAAGAAACAACAAAAAACCACAACTTATGAGTTGTGGTGAAAGGAGTGCTAGGGAGGAA +ATTTATAGCTATAAACACATTAAAAAAAGAAACACCTCAATTCAACAACATAAGTTTACA +CATTAAGAAACTAGAAAAAGAAGAATGAAACTAAACCCAAAGTTAGCAGAAGGAAGGAAA +CAATAGAGATCAGGGCAGAGATAAATGGAAAAGAGAATAGAAAAACAATAAAAAACAAAA +CCAAAAGTTGGTTCTTCAAAAAGATTAATAAAACTGACAAGACTACACTTTGGGAGGCCG +AGGCGGGCGGATCACGAGGTCAGGAGATCGAGACCATCCCAGCTAAAACGGTGAAACCCC +GTCTCTACTAAAAATACAAAAAATTAGCCGGGCATAGTGGCGGGCGCCTGTAGTCCCAGC +TACTTGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCGGGAGGCGGAGCTTGCAGTGAGC +CGAGATCCCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAAAA +AAAAAAAAAACTGACAAGACTAAGGAAAAGGGAAACAATCTAAATTACTTAAAACAGAAA +TGTATTTGAGAATATCTTTATATATTTCTGTCTGTCTGTCTGTCTGCCTGTCTATGTTTT +AGAAACAAGTGTCTGTCTATGTTTTAGAGACAAGGTCTAGCTCCATCGCCCTGGCAACAA +TCAGATGCAACCACAATCAGTGGCACAATCGGCTCACTGCAGCCTCGAATTCCTGGGCTC +GCCACTATGCCAGCTCTTTTTTTTTTTTTTTTTTTTAAGAGACAGGATCTTGCCATGTTA +TCCAGGCTGATCTTGAACTCCTGGCCTCAAGGAATTTTCCCACCTCGGCCTCCCAAATTG +TTGGAATTACAGGCATGACCCACCATTCCCAGCCTAGAAAGGATTATAAAAGATTACTAT +AAATAATTGTGTGCTCATAAATTGGATAACCCAGATGAAATAGATGAATTCCTAGACACA +CAAAACCTACCAAGACTCAATTATAAAGAAACAGAAAGTCAGAATAGACCTAACCTAGTA +AGGGAATTGAGTCAGTAATAAGACAATCTCATGACAAAGAAAAGTCCTGGACCTGATAGC +TTTACTGGTGAGTTCTGCCAAACGTTTAAAGAAGAACTAACACTGATTCTTTTCAAGCAT +TTCCAAAGAGTGGAAGAGGAGTGAATACCTCTTAACTCTTTCTATGAGGCCAGCATTACC +CTGATACCAAAGCCAAAAACACTATAAGAAAATAAAACAACAGATCAATATGCCTCTGAT +CATTGATGCAAAAACTCTAAAAATACTAGCATACTGGGTGGGGCATGGTGGCTCATGCCT +GTAATCCCAGCACTTTGGGAGGCTGAGGTGGGTGGATCACCTGAGGTCAGGAGTTCAAGA +CCAGCCTGACCAACATGGAGAAACTCTTGTCTCTACTAAAAATACAAAATTAGCCATGTG +TGGTGGTGCATGCCTGTAACCTCAGCTACTCGGGAGGCTGAGACAAGAGAATCGCTTGAA +CCCGGGAGGTGGAGGTTGTGGTGAGCCGAGATCATGCCATTGCACTTCAGCCTAGGCAAC +AAGAGCAAAACTTTGTCTAAAAAAAAACTATCATACTGAATTCAGCATCATATTAAAAGG +ATTATACACCATGACCAAGTGGGATTTATTCCTGGAATGCAACGATGTTTCAATATATAA +AAATTGATCCATATAAATTCAGACAATTAATTTTAACTGATGTATAGTTTCACATTATAT +GCACATACCACGATTTACTTTTCCATTTTCCTAGTAATGAGCTTTAAGATTATTTCCAAT +TTTTAGCTATTACTAATAATGTTTCAACAAACACCTTTGGACATCTCTTCTGCAAATGTG +TGTGTATTCTCTTGGAGAAGGTTTACCTTGTGGAATTTCTAGTGTGTATGTGTATGTGTG +TGGGTATATATATATACACATATGTATATATTTTCAACTTTACTATATATTTCCAAAAAA +CTCTTCAAAGTGGCAGATTTAAACTCCCCTTCGCCGTATATAAATTCACGGTTCCTAAAG +CCCTTGGCATAGTTTTGTGTTTTTATAGAAAAGGGCATAAAGTCTTATTGCTGTTGAATT +GTACCTTGCTCTTTTCAATTGTCTTTTCACAATAATTCACTATTTATCATCTGTTGTGAA +TATTGTGGAGTAGTTCTTAATCATTTTTTCTACTAAACTATTTTATATCATTAATTTGTA +GAAATCCTCTTTATATAATGGATATTATACTTTTCAATTATATATTTTGTAAATATTTTA +CCCAATTTTCTGGCTTGCATTTAACCTTTATTTAGGATGCCTAGGATTTTTCATTTTAAT +ACAGTCTAATTTATTTTTCTTTTCTTCTTCATGCATTCTGGCATTTGATTAATAAGCCAT +TTGTTGCCCAACGTCATTAGTTCTCTGTATATACTTTTAAATATCTTAAACATGTAATAT +TCATAAGTAATTTCACAAGTAGTTTAAGATTTGAGTCTGTAACCTAAATAGATACAGGAT +TTTGTGATATGCTGTAATATACAGATTTTTATTTATATATGGATAAAAATTTATATGTTT +TTTCTGACAGCATTTATTGGATAGTATAATTTTTCTACTGACTTTTAGTATCATATCTTT +TATTACAAGTCCAATGTATTCAGCATACCTATGGATCTATTTCTTGGTGCTTCCATTTTT +TCCCATTGTTTAACGGTGTATCACTGAGCCAAAGCAAACCATTTTAATTATTATAATTAA +AACACATTTAAATATTGATAGGCTATTATTGTTCAAAATAACTTTGGTTCTTCCTTTCCG +TTGATTACTTTAATATGACTTTTCCAATCAAGTTCTAAAAAAATACTGGTGGTATATTTA +TTGAAATAACTTTAATAAAATTAAATGAAAGATCATGTATTTGTTTTCTGAATTAATTCT +TAGATACATTAATGTTTTATGTTACCATGAATGTGATATTATAATATAATATTTTTAATT +GGTTGCTACTGTTTATAAGAATTTCATTTTCTGTTTACTTTGCCTTCATATCTGAAAACC +TTGCTGATTTGATTAGTGCATCCACAAATTTTCTTGGATTTTCTATGGGTAATTACAAAT +CTCCACACAATGAGGTTGCAGTGAGCCAAGATCACACCACTGTACTCCAGCCTGGGCGAC +AGAGTGAGACACCATCTCACAAAAACACATAAACAAACAAACAGAAACTCCACACAATGA +CAACGTATGTGTTTTCTTTTTTTCTTCCTCTTTCTGTAATATTTCTTTGTCCTATCTTAA +CTGAACTGGCCAGAAACCCCAGGACAATGATAAATACGAGCAGTGTCAACAGACATCTCA +TTCCCTTTCCTAGCTTTTATAAAAAATAACGATTATGCTTCAACATTACATATGGTGGTG +TCGATGGTTTTGTTATAGATAAGCTTATCAGGTTAAGAAATTTGTCTGCTTTTCCTAGTT +TGGTATAAAGATTTTAATATAAATGAATGTTGTATTTTACCATCTTATTTTTTTCCTACA +TCTGCTAAGATAATCCTGTGTTTTCCCTTTTTCAATCTCCTAATGTGGTGAATGACATTA +AAATACCTTCTATTGTTAAAATATTCTTGCAACGCTGTATAGAACCAATGCCTTTATTCT +GTATTGCTGATGGATTTTTGAAAAATATGTAGGTGGACTTAGTTTTCTAAGGGGAATAGA +ATTTCTAATATATTTAAAATATTTTGCATGTATGTTCTGAAGGACATTGGTGTGTCATTT +CTATACCATCTGGCTACGAGAGGAGCCGACTGAAAGTCACACTGCCGGAGGAGGGGAGAG +GTGCTCTTCCGTTTCTGGTGTCTGTAGCCATCTCCAGTGGTAGCTGCAGTGATAATAATG +CTGCGGTGCCGACAGTTCTGGAAGGAGCAACAACAGTGATTTCAGCAGCAGCAGTATTGC +GGGATCCCCACGATGGAGCAAGGGAAATAATTCTGGAAGCAATGACAATATCAGCTGTGG +CTATAGCAGCTGAGATGTGAGTTCTCACGGTGGCAGCTTCAAGGACAGTAGTGATGGTCC +AATGGCGCCCAGACCTAGAAATGCACATTTCCTCAGCACCGGCTCCAGATGCTGAGCTTG +GACAGCTGACGCCTTGGATCATCTGCCACTGATCTCTGGTCAACATTTTTATCACCCAAC +ACAAAAGAAGCAGAGATTTATCAAGTTACTTAACCTGACCCTTTCATCTTTTGCTACACA +TACTCTTGTAATTGATCTCTCCATGAATTGTTTTCTGTTTAAAATATCTAGAATGTTTTC +TGCTTCCTGACTTGATTCTAATATTGTATAGGTACTAGAAATGGTTCTAGAAATAGATCT +TTATAGATGAGAATCTGGAAGTGGTTTGCTGACCTGTTTCAGTCTGAATGAATTCCTGAC +CTTCTTGTCGGGAGGAGACAGAAACCTGATCATCTGTAGTGTACGGTGGTATCATGATTA +CTTAAATCATCAAATGTGGTTATTGGGAATGATGTGTTTTTTAAAGTGGTACATGAGAGG +TAAAATTGCTATTGTAGTTGACTGTTGCAGTTATAATTTTGTCAACATGGTCTGTAAGAG +TGCAATGGCGTCGGCCCGGTGCGGTGGCTCACGCCTGTAATCCCAATACTTTGGGAGGCC +AAGGTGGGCCGATCACGAGGTCAGGAGATCCAGACCATCCTGGCTAACACAGTGAGACCC +CATCTCTACTAAAAATACAAAAAATTAGCCGTGTGTGGCGGCACACGCCTGTAGTCCCAG +CTACTTGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCGGGAGGCAGAGCTTGCAGTGAG +CCGAGATCGCGCCACTGCATTCCAGCCTGGGCGACAGAGCAAGACTCCATCTCAAAAAAA +AAAAAAAAAAAAAAAAAAAAGAGTGCAATGGAAAGCTGGTAGAAAATGGACATTGTTTAA +AGACCAAAACAACCACACTACTTTGCTAATCCCTATCAGCTAAAGACTAGAGAATATATG +AGGCATAGATTTACGTGGTGTTTGACTAAGGCATGCAGAATATAATCTTGACATAGGCTA +AGTTTATCAACATAGATACATAGAGATTCTAAAGTTAAAGTCTTAGCCCAAGAAGTTAGA +AGTGGTGCTCAAGTTTGTTTGTCTGACAGAAACTGTGAATCCACACTGGCCTGCTTATTT +TGAGGTTGCGTTGCCAGAGCTTTCCTAGCATAATAAAGAGAGGTGCATACAAAGGAATAG +GAATAGTAGGAGGTGGGGGTAAAAATATCAGGTGTGATCCACATGCCAAGCCGACCCCCA +CTGTGTCCCACAGGAAGCCCCAGAAGATGTTTCTCCAAGAAATTAGGATTCGTTTGTTGG +GGAGGGCTGCTGGCATGCTTGAAAAGTACTGTGACGGCTGAATTTTGTGGGCTTGGGGAG +ATTGCAGGTTCTCTGATTTCAAGGGGAATGATGTGATCCTAGAGTTGCAAAGAACAAGTG +ACAGTGGAGGCGCTTATGCTTTGTGACTGCACTAGAGACAAGGAAGACACAACTAGAATA +ATGGGGAGCAGGAATGGAGCGGCCAACAGAATATCTGACTGTTAGGGATCTTTGATGAAG +GCTGATTCTCAGGGAGTGAACTAGATCAGTGACCAACTATTTGTCTTTATATAACTGGGT +AGTGTGGATGGATTCTAATAAAGGGACTACTTACAGCACAGCAGGAAAGTCACAAAGAAA +CCAGACAGAAGAGTGTAAGTAGTAAGGGGCCAAGCAGTCATCTGACTAGAGACAGTGCCA +GCTTGCCAAAGAAGGCACCAGACAGAAGCTGTGATCTTCAGCAAAGGGACACAGTCTGCC +TGTGCTGACCCTGCAGGGGCAGAGGTGGGGGATAAACACACTCTTCTCTCACCTATCTTC +TGCCACCCCCTCCATTAGCTGAACCCCAATAAAAGCATGAGGGTAAGGGAGATCTCTGAA +GTATCCAATTCAGGTGAGCCTCCTAAGGAACAAAGCAGAATGCAGAAAAATTAAGAGTGG +GTCTAGGGAATAAAATAGAGATATGCACCAGAGTATGATGATGTGTCTGGGAAAGAATAT +ACAAATACTTTTAAAATTACTAGACAATAAACCTGAGATGACACTAATACAGGTAAATCT +CATTTAATGGTAATATATTCCAAGAAATGCATCATTAGGTGATTTTGTGGCTGTGCAGAC +ACCAAAGAGTGTACTTTATACAAACCTAGACAGCGTAGCCTACTACATACCTAGGCTATA +TCGTACAGCCTATTGCTCCTGGGCTACACACCTGTGCAGCATGTCACTGTGCTGAATACA +GTAGGCCAATGGTCCGCAACACCCAAGCCATGGACCAGCACCGGTTCGTGTCCTGTTAGG +AACTGGGCACAAAGCAGTAGATGAGTGGCTGGCCAGCCGAGCATTACTGCCTGAGCTCCG +CCTCCTGTCAGATCAGCAGCAGCATTAGACCCTCACAGGAGCGCAAACCCTATTGTGAAC +TGCCCATAGAAGGGATCTAGGTTATGTTCTCCTTATGAGAATCTAACTAGGCTGGGTGCT +GTGGCTCACGCCTGTAATCCCAACACTCTGGGAGGTCGAGGCAGGTGGATCACAAGGTCA +AGACAATCCTGGCCAACATGGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCTGGGT +GTGGTGGCGCATGCCTGTAGTCCCAGCTACTCGGGGGACTGAGACAGGAGAATCACTTGA +ACCCGGGAGGCAGAGGTTGCAGTGAGCCGAGATTGTGCCACTGCACTCCAGCCTGGCAAC +AGTGGGAGACTCAGTCTAAAAAAGAAAAAAAAGAAAAAAAAGAATCTAACTAATGCCTGA +TGATCTGAGGTGGAAGAGTTTCAGCCCAAAGCCATCCCCACTCCGTGGAAAAATTATCTT +TCACAAAACCAGTCCCTGGTGTCAAAACTTTGGGGCCCACTGCTGTAGGCAGTTATATCA +CAGTAGTAATATCTAAACATGGAAAAGATACAGTAAAAACATAGTACATTGGGAGACCGA +GGGGGGCGGATCACCTGAGGTCAGAAGTTCGAGACCAGCCTGACCAACATGGAGAAACTC +CGTCTGTACTAAAAATACAAAATTAGCTGGGCATGCTGGCGTGGGGCTGTAATCCCTGCT +ACTCGTGAGGCTGAGGCAGGAGAATCACTTGAACCCCGGAGGCAGAGGTTGCAGTGAGCC +AAGATCGTATCATTGAACTCCAGCCTGGGCAACAAAATAAAACTCCTTCTCAAAAAAAAA +CAAAGAAAAAAATATATATATATTACAAAATTTAAAGAGAGTGGTACACCAGTAAAGGAC +ACTTAGCATGAACAGAAGTTGCAGGACTGGCAGTTGCTCTGATGAGTCAGTGAGTACTTG +GTGAGTGAATGCGAAGGCCTAGGATATTACTGTACATAACTATAGACTTTATATGCACTG +TACACTTAGGCTACACTAAATGTATTTAAATTTTTTCTTTCTTTAACAAGTTCATCTTAG +CTTATCATGACTTTATAAACTTTTAATTTTTTTAATTTTTTGATTCTTTGGTAATAACAC +ATCTTAAAACAAAAACACATTGAACAGCTGTACAGAAATACTTTATATCCTTATTTGATA +AGCTTCATTTATTTTGATTTTTTATGTTTTAAACATTTTTGTTAAAAACTAAGATACAAA +CACACACACTAGCTTAGCCCTGTAAGGGTCAGGATAATCAATATCACTGTCTTCCATCTC +GAAATCTTGTCCAAGTGAAAGGTCTTCAGGGTCTTCAGTGGCAATAACATACATGCAGCT +GTCATTTTCTATGATAACAAGGCTTTCTTCTGGAAGAACTCGTGGAAGACCTTCCTGAAG +CTGTTTTATAGTTAATTTTTTTTAATGAGTAGCAGTACTACACTCTAAAAATATGAATAA +AATCTATAGTATTATACATACTGTTGATCCTTGAACAGTGCAAGGGTTAGGGGACCAGCT +CCTCTGCAGTTGAAAATCCATATATAATTCTGGGATATCCCCAAACTTAACTTAATATAT +GATATATATCATATATAATATATATGATATATATTACATATGATATATATCATATATATT +ATATATGATATATATTATATATAATATGTATGATATATATTATACACAATATGTATGATA +TATATTATACACAATATGTATGATATATATTATACACAATATGTATGATATATATTATAC +ACAATATGTATGATATATATTATATACAATAAGTATGATATATATTATATACAATATGTA +TGATATATATTATATACAACATATGATATATAATATACTATATGATAGAGATTATATACA +ATATATATGATAGAGATTATATACAATATATATGATACAGATTATATACAATATATATGA +TACAGATTATATACAATATATATGATACAGATTATATACAATATATATGATAGATTAGAT +ACTATATATGATATAGATTATATACAATATATGATATAGATTATATACAATATATATGAT +ATAGACTATATACTATATATGATATAGATTATATACTATATATGATATAAATGGTATATC +ATATATGATATAAATGGTATATCATATATGATATATACAATATATCGTATATATGATATA +GATGATATATCATCTATAAGATATAGATGATATATCATCTATAAGATATAGATGATATAT +CATCTATAAGATATAGATGATATATCATCTATAAGATATAGATGATATATCATACCTGAT +ATAGGTGATATATCATATATGATATAGATGATAAATCATATATGATATAGATGATATATA +TCATATATGATATAGATGATATATATCATATATGATATATCATATATTATATAATAAATG +ATATATATTATATATAATAAAAGCTATATATTACATAATAAATGATATATATTATGTAAA +ATATATGATATATATTATATATTATATCTGATATATATTATATATTATATATTATATATT +TTATCTGATATATATTATATTTGATATATTATATATTATATTATATATAATTATATTATA +TTATATTATATTATATATATTTTATATTATATTATATATAATTATATTATATTATATTAT +ATATTATATATAATTTATGATATATATAATGTATTATATATAATTTATGATATATATTAT +ATATTATAAATCATATATCATATATATTATGCTATATTATATATAATATAGCATAATATA +TCATATATTATATAATATATAATATAATATAATATAATATAATGTAATCATATATAATAT +ATAATATATTATATTATAAAATATATTATATTATGATATACTATATATTACATATTACAT +TAGATAATATTTTATATAATATATAATATATATTATATATTATACTTTATTATATTACAT +ATATAATTATACATTATATTTCATATATAATTTATAATTATATATAATTATATATTATAT +TATATAATATTATGTATAATATATTATTAAATATAATATTATATATTATATTAAATATAA +TATTATATATTATATTAAATATAATATTATATATTATATTAAATATAATATTATATATAA +GGATGCAGGATGTAAAAGGAAATTATATATATGTTATATATTATATATATTATATTATAT +ATAATTATATATATATTTGGGGGTGCCCTATTTCCTATCTCATAACTTATTTTAAGAAGC +ACAGCATAATAATGTGTGGACTTGGGATTCAGTTTTTGAAACGACACACTGAGCCTTCCA +TGACCTTCCTGTACATGTGAAAGCACACCTGTCTGCATGGCAGCATTTGGACCTCACAGT +GTGGATTGTGCCTTCACCCTGGAATGTTTATGCCCTATCGCCATGGTGATGTGATTAGGG +ATCTGCTGCCCTTGGTCCTAAGTGCCACTATCTGTGCTGAGTTTTTCAAAGGTCAGAGCA +GATTGAACCTTTGTGGTTCCATTTTCCCTGATTTTGATTTTTCTTATGGGGAACCTGTGT +GGCTGCATTCAAGGTATGTTCATACTGGCCTGTCAAATGCGATCTTTTCAAATTACTAGT +TAATGCTTTCAAAATATGTTATTTAAAAAATTAGCCTCTGTATTTTCCATATGCAGTTAT +AAATATGTTTCATGGTTAGGTTTTATTCCTCAATTTATATATTTGATTCTTATACCAAGC +AGAGTACCTTTGAAATTTTTCTTCATTTAAAAAATATGTATCTTGGCTCAAGCCTGTAAT +CCCAGCACTTTGGGAGGCCAAGGCAAGAGGATCACAAGGTGAGGAGATCAAGACCATCCT +GGCCAATACGGTGAAACCCTGTCTCTACTAAAAATACAAAAAATTAGCCAGGCATGATGG +CAGCTGGTGTAGTCCCAGTGTGAATTGGGATTCAGTTTATTCCCAAATTCCCAAAATATA +TATATATATATAATATATATTATATTATATATAATTTTATATATAATATATATTATATTA +TATATAATTTTATATATATATATTTGTCGGTGCCCTATTTCCCATCTCATAACTTATTTT +AAGAAGCCAGCATAATAATGTGTGGGCTTGGGATTCAGTTTTTGAAACAAAACACTGAGC +CTTTGATGACCTTCCTGTACTTGTAAAAGCCCACCTGTCTGCATGGCAGCAGTTGGACCT +CACAGTGTGGATTGTGCCTTCACCCTGGAATGTTTATGCCCTATCGCCATGGTGATGGGA +TTAGGGATCTCCTGCCCTTGGTCCTAAGTGCCACTATCTGTGCTGAGTTTTTCAAAGGTC +AGAGCAGATTGAACCATTGTGGTTTCATTTTCCCTGATTTTGATTTTTCTTATGGGGAAC +CTGTCTTGCTGCATTCAAGGTATGTTCATACTGGCCTGTCAAATGCGATCTTTTCAAATT +ACTAGTTAATGCTTTCAAAATATGTTATTTAAAAAATTAGCCTCTGTATTTTCCATATGC +AGTTATAAATATGTTTCATGATTATGTTTTATTCCTCAATTTATATATTTGATTATTGTA +CCAAGCAGAGTATCTTTGAAATTTTTCTTCATTTAAAAAATATGTATCTTGACTCAGGCC +TGTAATCCCAGCACTTTGGGAGGCCAAGGCAAGAGGATCACAAGGTGAGGAGATCAAGAC +CATCCTGGCCATTACAGTGAAACCCTGTCTCTACTAGAAATACAAAAAATTAGCCAGGCA +TGGTGGCAGCTGGTGTAGTCCCAGTGTGAATTGGGATTCAGTTTATTCAGTTTATTCCCA +AATTCCCAAATTATATATATATATATATATAATTTCCTTTTACATCCTGCATCCTTCAAC +GTTCCATCCCCCACCCCACAGATTAAGTTATTCCCCAGGGGAGAATATGGCAAAGTCTAT +TTTAATGCAGTTTTTAACCCAATTAAGAACCTATGAAATCATTACTTTCCAAAACTTTGG +AACAAAGCCACAGTAGTATGGATCCGTTGGAGGCTTTTCACACAATAAAATGTACATCTC +TTTGTTTTTAACATGTTTTTCCCTTCCTCTCTTCTTTTTTTGTGAAATGTGTATTTACTT +TAATATATTTGTAGTAAGTCACTTCCATGCACATATTAATTTTTTAAAGTAATAAGTATG +TGTATTGTCTACGTGTGAAAGAAAACACACATTTATTTTTATGCCTTGGAAGTTATCCAG +AATCATGGAATTGTCAATCACAGTCAATCACCCAACCTACTCACCTTTCCAGTGTAATCT +TAGTCAAATTTTTTTTTTGTTATCCAATGAGATGCAGTATTTCAACTCAGAAAGATAAAT +AGAGTGAATTTATAGAGACTATTAACTAAGAACATACAGTTTTATTTATACTCAGAAGCA +AGTAGATTATGTACATATATATGAAGATAAAAATTAAAAGGATAATTGTGTAAATTTGCA +TGTAGAGAGCTTTGAAAACCTGTTTACTTGTTAATGCTGTTTTGATGTATTGTGTCTTTG +TTCTCCCGACCCATCATCCAGAGCTCTCTGCAGGAGCTAAGTGCTCATCAGTTCCATGAT +TTGGAAACTGTCTAAGTTTAGAGGCACTTGTATTTGTTAGTAAATAAGGCAAGATGATAT +TGTTTCACAGGTTTTAGTGCAGAAGACTGAATAGATAAGCTGCTCCACCCAGTACACTGG +TGTTCATTTCATGGTCATCTCATCTGTTAACCATGGATATAAAACATTTATCTTCAATGA +TGGGGTTTTACCATGTTGGTCAGGCTGGTCTCGAACTCCTGACCTCAAATGATCCACCCA +CCTCCACCTTCCAAACTGCTGGGATTACAGGTGTGAGCCACTATGCCTGACTGATTATTT +TCATAACCAAGAAAAGAAATAAATACAATTAATGCTGGTGCATGGTATTAAATCTAGTTT +TTAAAACATTCACACATAAACAAGGCAGAACCCTATACCCTCCATGATAAATGCAGTAGC +AGTGTATGTGGGTCTGTGGAGGTTGAAAGGGACTTGGTAGATGTCAAGAAGGTAGTGGCA +GTCCTGCTGGGCTTTTAAAGGGTCTGAAGAAGTGACAGGATGCTGTGGTTGAATCCTAGC +ATGTATTTTAGCATTTGTTCATTTGGAGTTGATTATTTCACGTTGCTTTCATTTGCCATT +ACCTGGAAAGCCAAGGGCTCTACTCTCATTTCCTTGCTGCTCTTTCTTTGCCTTCCTTGG +TCCGTGAAGAAGATGGTCCAGGAGAAGCTCATTCCATGCTTGTTAACCAGGCACGCCCCT +AAGTTCCAGTCCCTGAGTCATTCATGAGTAGCACTGCCAATGAACTGACAGCAATGCTGT +GTCCCTCCACATCCCCTAGGTGACTCGAAGAAGCCTTCCAAAAAGCGTGTGAAAAGGAAG +CCCTACTCTACTACCAAGGTAAAGTAGCCTGTCTTTGCCTAAGATGCAAATGTTGTTTTC +TTGGATCCTTTATTTTTCAGTTGATATCAGCTATGGGAAAATTATCCACTACATTATAGG +TGTTAGATAATATTTCCTTGGGGGTGGAGGAGGTGTATTTTACCAACTGACACCTGATTC +CAGAGGACGTGCAAAATTGGCAGTGTCAGATAGTACACTGGGTGCTAAGGGATGTTTTCT +TCAGGAACAAGCTTTCCACTTTAGATAAGAATTCTGCAATTTCTACTCAAAAATTACCTA +GACAGAAACATTCTTCAAGAAAAGCTCCTGTGCTTTCCTAAGGGAACTCTACTCTAGAGT +TGGGGCTTTTGACTTGAACCTTATTTCCAGTCTTGGTTACCCAGAGTTTCCAAGTGAACA +AAAGACCTGTGTGAGCCATCCATAGCATAGCCTGATTCTCAGAGTGTTTTCCTTCTCTAA +TTACAGGTGACTTCAGGGAGCACATTCAATGGTACGTATTCTGGAATCACTCACTGGTTG +TTAGAAAAGGATTCTACAGGAAATCTGGAGCTTAACTGCTGGCTTTTGTCTGGAGAGCCT +CCATGATCCAAGACATCTGGTGGGAATGAGGATGTAGGGTATAGTAAAAGAAACTGGTTT +TCCTGGTGACATACTCTTTTTATCTATGTATAGTTTCTGGGAACATGTTCACATTAGGTT +GTGTGTGGGTATGTGTGTATTAGGGCGGGGGTGGGGTGAGGTGGTCTGTGTGCAAGTCTG +CATGATTTGCTTGTGAATGTGTGTCTATGTGTGTTTCCCCTACGAAAAAAATGTTGTGTT +TACCCAGCACAACTCTCAGTGCCATGTTTCTTAATTTAACAAATCAGACCACATACTTTA +CTTACATTAGTTCACACCTCATCATCATCATGCCCATATGTTGTGAGCTTGTTTACTGAG +CCCACATGCCAGATGGAGAAACTAAGCCACATAAATAAATGTGCCCTGGTTCACTTGCTG +CATAGTGAAGAGTCAAAATGTTTACTCATACGGTGCTAATGTTGAAGGCCTGAACTACAA +CCTCTATTTATCAGCCAGTGAAGAGATCACTATTCACCATGCAAGGGAGTTCCAGCACCC +TCTATGCCTGGAATTACCCACACCTGCAGAGATCCCAAACGCCATCCCTCACATAAGAGA +ACGTCATGATCTCATAATCCAGGTAGCTATGTAGACATCTTCCTGCAGGTGTCACATAGT +CCTTAGTGTGAAACCAACATAGAAAGCCCATGTTTCTGATCAAATCACAGGTTCTGAAAC +ACTAAGGGAGGCACTAAGTAGGACAATGTGGTGCCTGCGTGTCATAGCTGGGTCTCCTCA +AGACATGGATCAAGTCCAGTAAGAATTGGGGAGATGCTTTAGAGTCTTGATGGAGTTATC +ACCACAAGCCCTCTGAGCTACACACTTTAGGGATCATGACCATTAAGTACTCAAATTACC +ATTTGGTTGTTATCCGGGTATCCGTCGTCCTTGTGGCAACCCTCTTGTGAAGCTGGTGTG +GACAGCCTCAGTGCTGGAGCTGTGCCTGCCTTCTGAGTGGACCCTTTCTGTGTTAGCAGG +TGGGTACAAGCGTGGGGGTCAGCACACTCAGTGGATTTACACACACAGCGTTGAAGAGTA +AGGCTGGGCTTCATTATTTATACATTTTCAATAAATGATGATCTTCATAACATAAAATCA +ATGATGTAGTACACTAGAATACTGTCCCTAGTATTGAATCTTGTCTCTCAGCAAAGGGTT +GCTTAAAGTCACGTGACAGATTCCATTCAACTGATGGCACATGCTGTAGCAGCAGTTAAA +GCAGTCATTTGAAAAGGCTTTTACTATAAACTTACGTGTGAGCCTGAAGTGGGGGATAAA +AGAGGCGATTAGCTCCCCTGTGCCATGTTTCTCTTATGTGCGTGGTGGAGGAAAATTACA +CAGGAAGGTGATGGAGAGAACAGAGCAAAGGATTGGACAGGTCCATTGAACCCATAAGAC +TATGGTGAGGTTAGTGAATGAGATTGGTCATTTTAGGTCAAATTTTACCCAGAGCTGGTG +CAGCCACTGCCCATTCTTAGCCAGACCTTATTGCAGGCAGCTCTGATCAATAGTCAAGGA +GGCAGTGGGGGTTGTAGACTTTACTCATTAAATCACCAAAGCACCAGCCCACACGGCCAC +TTTTCCAGTTAATTGACAGTAGCTTGCACATTCAGGTTTGATCAGTGGAAGGGAAGTTAC +TCTTTGCAGACCCATTTTTGACAATCATTTTGCGGTGTCGGAAGGTCTGAGCAGCCTCGG +GAGGCAAGCAGTCCCTGGTCCCTCAGTGTAGTCACTGGAGGAGACAGTCACTGAGAGGCA +GCTGGCAGGGTGAAGGGAAAGGGGAGGCAGGCCACAGAGATGACAGCCTTTAAGCTGTCA +TACTGGGAGGTCAAGGATCTGAAAGAGGAAGGAGAATTCTTTATCATTAAGGACCTGTCC +TTATCTCAGGCATTTCCTCCAGAGCACCACCTTTGTCCACCCACACACCTTGGGCTAGGA +GGACTGGAGAAAGACAGTGAGGGGTCTCTTGGGTCTCTGGCACAGGGCGTGATGAAGAGA +TGGCAGTTTTTCAGGAATCTCTCTCTTTAGGGAACCAAATACATTTCCCATCTCAGGTCC +TTCACTCAGCGGGGTTGAGGTTCTGCTCGTCACTTATCATCTCTGAATGTCAGCACCCTC +AAGTGTAAAATCTCAGCCACAGCCCCTCCTCTGCACCCCCTGCAGGGCTGATGTTCTCTA +TAAACCATAAGGCGTCATGCCCACGGAAAAGCCGAACAGGAAAGCATGCTCCACTGCCCC +GGAGCCATCCAAGTTCCCCCTCCATATTCCGCCACTGCTAAGTGTCCAGCTTATTCCTCC +TGGCATGTAGTAAACACTTAGAGAACATTACTGAAGTACCAGTCCTCTCTAAGGTTTTCC +TGTATTTAGTGATTTTTTAGCCCTGTACTGTGATACTAAGAAGTAGGGCTTAAATAGGGC +CTAAAAAGTATTGCTAAAATTACATTATGACAGTGCAGAGAACTGAGGGCAGAGGGAGGA +CATGAGCTTGCCAGGTCCACATGGCTTAGTGGAATTTGAATCCGGGCCCCCACTCTGCAC +CAGCCCTGCACTCACAGTCATCCTGCTGTGTTCCCCTCTCCAGGAAGGCACTGCCCACGC +AGTCTGTCTGATAGAGGTGTTGAGTGCTCACTGAACTCCGTGATCTTCCTGAAACCCAAC +TTTGATTCAGTGGGCTCTGCTTGGAAGCCTGTAAAGAAAAGGATCATAAGTTTAAACTTA +GAACAGATTATCACTATTTTCCCTCTGGTCTTCTGTCAGCAAGATGTCAACAGCCCTATC +TATTGTCAATGCATTAACCAGCATCTTCTCTGATAGAGAATACAAGAAGATATGCTGTGC +ACACCAACCAGTGTAGGAGACCTCATGGCTCCCGGGTAAAGAAGAAGAGGTACCCACAAG +AAGGTACTGTGGAAGTTCATTAATTAAGTTGATTCAAGAATTGCAGTTGCGGGGAGTATT +CAGTGTCCCATATGTAAGAGGAAACTATGAAGAGACTAAGCCATATTTTTTAATGTGTCA +GGATTCTAATTTGCCTGGTCAGTAAATATTGCTACCACCACAAAAGTAAATATCTACTTA +AAAGTCAATTTTGGTTCATGTTTAATGATAGACAATGTTTCAAGCTAATGTCTAGAACTT +ACCTGGTTGTTAAACATAAGCATAGATCTCCCTGAAAGAGTGGTGCTATATTATTATTTT +TCAATTAATATATTTCTTTAGAGAGTTTTAAATTGACATAAAAACTGAGCATATGGCCGG +GCGTGGTGGCTCACACTTATAATCCCAGCACTTTAGGAGGCCAAGGCAGGCAGATCATCT +GAGGTCAGGAGTTGGAGACCAGCCTGGCCAACATGGTGAAACCCCATCTCTACTAAAAAT +CCAAAAAAATTAGCCGGGTGTGGTGGCAGGCGCCTGTAATCCCAGCTACTCAGGAGGCGG +AGGCAGGAGAATCGCTTGAACCCAGGAGGCAGAGGTTGCAGTGAGCCAAGATCATGCCAT +TGCACTCCAGCCTGGGTGACAAGAGTGAAACTCCATCTCAAAATTAATAAATAAATAAAT +AAATAAATAAATAAATAAAAATTGAGTATATAATACAGGAAGTTCCCATATTATTCTGTC +TCCTCACCCTCACTTCCTAATTTCACCTATTAGTAACATCTTACATTACTGTGGTACATT +TGCTAGAATAATGAGAAAATATTGACACATTATTATCTGAAGTCTACATTTGCATAATGT +TCATTCTTTCTGTTATACATATATATGAATTTTGAAATATTTAAAACATTATGTTCACCC +TTATGGTCTCATAAAGAAAATGTTCACTTCCCTAAAAATCCTCTCTTCTCATTAATCTCT +GTCCTCTTTCTCCAGAAACCTTGGCAACTATTAATATTTTTACTATCGCTTCAGCTTTGC +CTTTTCCAGAATGTCATATAGTTGGAATCATATATTATGTAGTTTTTTCAGATGAATTTA +TTGCACTAAATTGATGTACGCTTTAGCTGCTTTCATGTCTTTTTTATGCCTTAATGGCAA +AAAATGGCACATTAAATCACCAAATAATATTGCATTCAATGAATTTTTGTCTTTTTATTC +ACCTGTTGAAGAATTCGGTAGATTTCATGAGAGAAACCATCTGGGCCTGGTGCTTTCTTT +TTCGGAATGCTCTTAATGTGAATTCAACTTATTTAATAGACATAAGTTTATTCACATTAG +GATTCTAGCGTGACCTTGGGAAGATTGCCTTTCAAGGAATTGATACATTTCACTGAGGTT +ATCAGACTGCGGTCATAGAACTGTTCATGATATTCCTTTTAATGCCTAACAGTTCAGTAG +AGATGGCTCCTCTTTTATTTCTGAAATTGGTCATTTGTGTTATCTTCTTTTTCTTGGTTA +GCCTGCATATCAATTCATTCATTGTAATGAGCATATCAAAGAACCAGCTTTTGGTTTTAT +TGATTTTCTGATGATTTCAGTGTTTTAATTTTATTGATTTCTGTGATGTTGTTTATTACT +TTTACTTGCTTTCCATTGCATTCCTCTATTTTCTATAGTTCCCTAATTGAAACATGATAT +TACTGATTTTAGGTCTTGTGATTTTTAGTATATTGCATCCAATGCTGTAGATTTCCCTCT +AAGGACTGCTTTTGCTACATCCAGAAATCTTGCCAAGTCACATTTTCTTTTAATGTAGTT +AAAAGTATTTTTAATTTTCTATTGAGACTTCTTAACCCATGAGTTATTTAAAAGTGCATT +GCTAATTTGCAAATATTTGGGGATTTTGTGGCTCTTTTACAGTTGTTGATTTTTTGTTGT +CAGGTATGTGTTGCAAAAGCAGTCGTCTACCTCATCTTGCCACCACCCAAGATGGCCCAG +GATGTGGGCTCTCCCTGAGTGAATCTTTGGCAATCTGCCAACCTGATGTGTTCGGCCTCC +TTCTTTAGTCTGAGCTTGCCTTCTGCTTAGAAAGGGCCATTCTCAGTTCTGGCAGGGAGT +TTTCCCAACATTGAGAAGGTGGCATTCTTACTCCCCACTGCAGCCTGCACCTCTGACCGG +TGGTCAGCAGACAGGACAGAGGTCCTCATTAGACAGAGTTCAGCAGGGTCTCTGACCAAA +GTGCATCTTCAGAGTCTGCACCTACCCACTGTGACCACGGGCAGGCTCTGAGTCCTAAAG +CAGGAGGAACTGTGCGACCATCCTGATTGGAAATTTGTGAGGATCACCGTGTTACTCAAG +TAAGGTCTTTGGAAAGTGTCGTATTACTACTGTTTGTGAACTGCTTGTTGGTGGCCTGGC +TGAGCCACACACTTTATGAAAACCAGGACCCCTCAGCTGGTGTGGGTGTCTATGCAGCCT +GAGACCCTCATGTGAACAGCCTCGTGGCAGCTGTCTTTGCCCCTTGCCACCATCAGTGCC +TCCTTGTTCCTGGGCACTGCTTTCTCTGATGGTGCTCCATTGTTTTCCTGCACCTCAGTG +TCTACAGCTGGATGTCTCTTCCGCAATCTAGGCGAGGGGGCATCAACGGCAGTTCTGCTG +TGGCACTGCCCTCCTTCTTAGCTTGTCTTGCTCTGTCTTAGGCTCCCTCAAAGATCCCAC +CCTTCAGGTTCTTCCACAAGTTTCTTATTGAAATCCGAGCAGAAAACTATGACCAATATG +ACCAATCCTATACCCACTGAAGACATGAATGAAGAATTAAAACAATTCTCCATGGACTCT +ACCATATAGATCCCTAGAAGTAATTTCTAAAAAAAAAAAAATCAAGGAAAATGTAATAGT +TTTCCATAAATTAGAATACCCTACATGTACAATTAAATGAAATGGCTAGTATAGTCTTGA +AACCAAAACCAGATAAGGTAAATTAAATTCTGTGATATTTTAAAATACTTTAAATTCTGA +CTAATGTGAGTTAATCTCAAAATATGAAATAGTAGATTAACATTGAAAATGCAATAAATA +AAATTAGCTACCTCAAGAGTTTAATGGAAAAAAATGTGATTATTGCAATAGATTCAGGAA +ATTCATGAATAACATTCACCCTATATTTGTAGGACAACTATCTGATTAACTTTAAGTGAC +CTGTTACAACCACTTGAATTAATGCTGCTTTCACAGCATATCTCTTGGCTTGTTAAAAAC +CCGACAAGAATTTCCGTAACATTATTTTTAACACCTATATTGGGTGTGAACCCACCATAA +AGTTTGCCCACTGAAAAGGTCTACAATTTGATGCTTTATTAAATTGATACTGTGTGCACC +CAACACCACGATCTAATTTAAATATGTTTCCCTCACCCAAGTTCTGTCTTGCGCACTGGC +AGTTAATCCCCACTCCCATCTCCAGCCCTAAGCAATACTGCTGTGACATTCCATCTCCAT +AAATTTCCCACTTGCTTTATAGAAATGGACATATATATATATTTGGAATCTGACTTCCTT +CATTTAGCATACTATGTTTGAAGTTAATTGACGTGTTAGCACGTGCTGGTCATGTGTTTT +CCTTCATAGTCTGCTGTGTTTATTCATACAAATAGTGTTTATTCATTTATCAGTTAATGG +ACATTTAATTGTTTTGTTATTTTCTTTGATGAGTAATGTAGCTTTGAGCATTCACATACA +GTCATGTAATGCATAATGACATTTTGGTCAAAAAAAATTTTTTTTTTTCTGAGACCCAGG +CTGGAGTGCAGTGGCACAATCTCGGGTCACTGGAACCTCCATCTCCCAGGTTTAAGCAAT +TCTCATGCCTCAACCTCCCGAGTAGCTGGGACAACTGGCACACACCACCACGCCTGGATA +ATTTTTGTATTTTCAGTAGAGACAGGATTTTGCTGTGTTGGTCAGGCTAGTCTCAAACTC +CTAGCCTCAGGTGATCCACCCACCTCTGCCTCCCAAAGTGCTGGGATTATAGGCATGAGC +CACCACACCCAGCCTAATTTTTTTAAGAAAGAAGGGAACTATTTTCTAAATTACTTTTGC +CAATTTATATTCCTACCATGATGCATAGCACTAATTTCACCGTACAATGTATGGTAGGCC +CCAATATGTAAGAAATGATGAAAGTAACACATAAAGATTAGTATAAGACAAATGAGATTA +TCGTTGTTGCTATCATCTTTGTCAAATTCTGAAAACAATCTGAGTATATTTTTATATAAA +TATGCTTGGCAACATAGCTGAAAAAAGCATTATCAGTTACATTTATCAGTAACAAAGACA +TAAATTTGAAGGGGGAAAAACACTTGTACTAACAATGCAATGTCAGAATTAACATAAAAA +TTCTGCTGGTCACTTTGGAATATTTAATTGCCTGGGGCAGTGTTTAGTAGACAAATGAGC +ATCTATGGAGCACCCAAAGTAGGGGAATCAACAGAACTTGGGTTTCAAAAGTTATCTGGG +TTTAGAGCGTGAAACTTTGTTAGAGGACACACACCTTGCATGAGTGAGGTGCCTTGGTGT +GTGTGGACGTATCATTATGCTTGGAGGTACAGCATAATGGTGGCTTCCTCCAGAAAGGGA +CATTTGGGGTAGATTCATGCCATCTAGACAACACAGCCTGATGTGGCATGGACATGAATG +GAGGTGAAATGGTCAGTAGTTGAGAGGATCAGTCCTGACAAGGGCCGAGGTGAAAAACCT +GGGAACACCTTCAGGTGCAAAGTCTTCAGTTGAAAAAGGAGGTGGTCACAGGAAATACTG +AGATGGGTCAGCAATGCATGGGAGACAGAGTTCTTGGCCCTGCAGGGTGAGTAGTGTGGA +TTCTCAAGTTTTCTCCTCTCTCCATTAATTTCTTTCCCAATGCAGATGACTTCCATCATA +CAGTCTTCAGCAACCTTGAAAGATTGGACAAGCTTCAGCCCACTCTTGAAGGTAAAGGAA +GGCAGCTAACAAGACTGGCATCTGGGCTTGGCTGTGCATGTTTTCTATCGTGGGGAAATA +TATGTAACACATTATTTATCATTTGAACCTTTTAACCAAAGTGTGCACTCCGTGGCATTC +AATATATTCACAGGGTTGCATAACCAACACCACTATCTACACCCACAATTTTGATGATTT +CTTACAAAACCTTGTCCACAATAAGCAATATAGCACCTTCCCCCTATTTCCAGCCCATGG +TGATTCCTATCCGACTTTCTCTTGTATGAATTTGACTATTCTAGGCACTTCATGTAATTA +CAATTATACAATATATTCCTTTTGTGTCTGGCTTATTTCACTAAGCATAATGTTCTCAAT +GTCCACCCATGTTGTATCATCTATCAAAATGATGTTCGTTTTTTACAGATGGATGATGTA +GCATTGCATGCAGACCACCTTGCTTTTATTACATTCATTTGTTCACTGATGGTTGGATTA +TTTCCACCTTTTGGCTCCTGTGAAAAGTGATGCTACAAACATTAGTATACAAACATCTGT +TTGATTTCTGTTCTCTATTCTTTGGGGTGCCTAAGAGTAGAGTTCCTGGGTCCAACAGGG +GTTCTATATTTAACCTTCTGAGCCACTGCAGACTGTTTTTCACAGTGGCTGCAACTTTAT +CCATTTCTACCATCAATGTATCAGGGTTACAATTTCTTTACGTCCTTGTTCACACTTATT +TTCCTTTAAATCATCCTAGTAGGTGTATATTGGTGGCTGCTTGTGTTTTTCATTTGCATT +TCCCTAATGACTAATGATCCTGAGCAGCTTTTCCTGTGCTACTATCTGTGGCTGTATCTT +CTTTAGGGAAATATATGTTGAAGTCTTTTGCCCATTTTTAAAGAGTTGTCTGATTTTTAT +TTAGTTAGTTTGTTGCTGTAGATTTTTGAATATATCTTAAATATATTTAAAAATATTCTA +AATTTTAGTCTCTTACAAGATAAATGATTTGCAAATATTTCCCCCTTTGTGTAGAACTTT +AGATTCACAAACTTCATTAATTTGTATGAAATCCTCAGCAGTTGACCCCAAACAGATAAG +ACTGAAGCAGTATTTTAGGAATAGTTGAAAGTATGATCACCACAAAACATAAGCGTAATC +AAATCCTGCAAGCTACATGTAAGGCACAATGACAAATAAGGCAGCAAAGGGCCATCTGGT +GATTAGTTCACCACACTTGTTGCAACTGTTTGCACTGCAGAGTTAAAACATACCAGCATT +CAACCCATGTCTCCTGTCTTGAAGTAAACTGTCGTATGTTGGCTGGCCTGAACAAGCGTA +GATATTCTCCATCCTCAATTAATATGCATGCATGACAAAGAAAAGGAGGCCTGGATGAAA +AAATACTGTGTGATTAATAATTATGCTTTAATTAATTTTAAAGGATATAATTTCAGTACT +TCTAATTCTCCCATCAGCAGTTATAACAAAGGATTAGTGAATAAATACCATAGACTGTTT +TGCCTAGAATTGAATCCAACCTGTCTATTAAACTTTGCTTTTATTCAAGTGCAAAATGCT +AAAACACATAATAACTGCAGTGACAGCCACTGTGGATCCTCAGAGGTAAAAGTAGTCTTG +GGACATAAATCCTGCAAGTAATATTGTTTTTACAGGTTTAGAAAACCATTTAGCTGGGTT +TCAAACCTCACAGTGTGAGCAGTGGGACTCTCATCAAACTATAGCATGTGCTTCAGTACC +ATTTGTAGACTGACTCATTCCCATTGCCTTAAGTTGCCATCAGCAAAATGCCAGGGACTC +TATTTCTTGCTCCTTAGCTCCTCGTTCTTGCCTGTCTTTCCACGAGGGAGGATTTTCTAG +CAGGAGCTCAAGCTGTGCTTTTAATGAAACACATCCACACACACTGTCCTGTTGTCCACA +TTAAGCAGAGCTCCCTGAATAACTCATGAACAAAAGCATCTATGACTAACTGTTGCTCTG +TGTCCTCCTAGCCTCTGAGGAGTCTCTAGTTCACAAGGACAGAGGAGATGGAGAGAGGCC +AGTCAACGTGAGGGTAAGGTTGCCTTGCTTTCTCTGAAATAGAAATGTTCCTTTCTTGTT +GTCTTTCTTTTTCAACTGACTTTACATGTGAAAAGATGACAATGTCCATGACAGGTATTA +AATGCAGTTTTCTGAGGGGGAGGAAGAAGTGACTCTTAGCAACTGATATGTAATACAAAA +TGGCATTTAGCTATGATGGCTTCAGGTTGTAGACTGTATCCTTGGGGTCCTTGTCCTTGG +AAGCAATGTCTTCTCCTTGGATTCAGTATTTTGCACTTGCCAACCTACGTGGACCTGAGA +GATCCACCATCCAGAAGCTGATGTCTTTTCCAGTGTGTATCCTACCCTTGTTTTGGAGGC +CTTGAAGTTGACTACACTTTCTGATCAAGTTTTCAATATTCATTGAGAGAAACACAGCCT +TGTGCAAACAATCCACAACATGACATACCCCTCAAAAAGCTTTGTTTCTGTATTGCAGGT +GGTGCAGGTGGCCCCTCTGAGGCATGAATCTAGTAAGTATTCTGGAATCACTTACCAAGA +AAACAATCTGGATGCCAAGAAAGGTGTGGCATCCTTGCCTGGTTTCAATGTGAAGAGCCA +CCCTGATCCTGGGATTGTGATAGGAATAAGTATAGGGGAAGTGTTTTTTTAAAACCTGAA +TTCCCCAGGGAAAAATTATGGCCAAATTTTGAGGAAGCAGCTGTGCTCCCTTTTGGGTGG +TGCTGAGTTGGGTGCTTGAGGATTGGTGGTGTCTTGTGTGAGGCTGCATCGTGTGGTGTG +AATGTGTGTGTTTCTGTACAGGTGAGGCTGTGTGTTTTCTCAGGAGAGATTTCCCACTTA +TACAACCCAATCACCAGTGTCCACTTCTAACAATAAAATCCACCCCCGCTCTACTCTCTC +TGTACAGTGACTCCTCCACCCTCACCAGAGCCATCCCCGGGTCTGCCTTATTATCCCCAC +TGCTCAGGTGGAGAACCTGAAGGGCCAAGGGAGTGGCCCCAGCTCCCGAGTTCCTGAATG +AAAAAGTGAAAACACGAACCCAGGAGTGTGGGCCAGTGCTGACGCTGACATGCACTTAGT +CATGGGGTGTTCACCACCACACAGGGAGTCCAGCATTCATGTATAAACCCTAAGGCACCG +AGCCCAAAAGGCCCCAGGCACTGCCCATCATCATAAAGTGGTCTCCGTGGTCACACAACC +CAGGGCAGTTATAGGTTCATCTCCCCACGGACAGGCATAGTCATCAGTGTGTCAAAAGCA +CAAAGATCCCCAGGTGTTTGGCTCAGCTCACCGATCCTTTTTTTTTTTTTTTTTAACTTT +TAAGTTCAGGGGTACATGTGCAGGATGTGCAGGTTTGCTACATATATAAATGTGTGTCAT +GAGAGTTTGTTGTACAGATTATTGCATCACCCATATATTGAGCCTAATATCAGTTATTTT +TCCTGATCCACCCCCTCCTCCCACCTCCCACCCTCCAGTAGGCCCCACGCTCACAAATTC +TAAGAGGAGTGGGGGACCACAAAGGCCAGTGTGGCCCACTTCAGTTGTGAAGTTAATTTG +CTCAGCAACTGGCCAAAGTCTATAAGGATGGGTGATGTATTTTAGTAGATTTAGTAATAC +TATCTTCCCAAGCCCTAAAATGCTCAAATCCTGCCAGCCAAAAATGGTGAGGAGGGACAG +ATAGGAACTCTGTGTGGCACTTGGTTATTAGCCTGGCTTCCATCCCTTAGTGGCAACTCT +CTTGTATATGTGGGTTAAAGACCCTCAGCCTCAAGCCAAGCCTCCTCCATGAGGAGCCAT +CTCACTATTGACTGGCTAGTGCCGGGTATGGCCACCAGCCCAACTGAAACAAAATGTTGC +TTTAAAACAAGTGTAAATCTCATACATACAACAGGCAAATGCAGAAGCAGTGTGGTCTCG +CAAGTTGTAAAGAGGACAGTCGCAATTTTGCTGGACTTCAACCTGGGTAGAAGACACGAG +GGAACTCTGTCACTAAATCACGGCAGAGTTCAAGGCCGCTTGTAGACTATTTCGTGTTAT +AGAAGGTGGCCTTTAGCTACTAAGCAAAGGCCTCTGTTTCTCATTTCTTTCCTGTTCATC +TCCTTGGTCATCCTTCTTCCACAAGGGAAACGAGCCCAAGCAAAAGGCAGTTTCAATATT +AATTTGACCGAGGTTTTGTGCAGTTAATTATCATCCAGGTAATCAGGTGCAACCCAGTCT +GCCTAGCAGCCCCCCTATCTCTGCTCTGTGTTTTCATTTAATAAACATTTTGGTCTACTT +ACTATGTGCTAGATTTTCTCGAGACCAAGTAAATGAGATAGAATCCTTATGTTGGCAGCT +AAGTTAGATTTCACATAACTGACAAAAATTAAAATTTCTGATTTCTTGTAAAAATATTTT +GTATGTGTGAATGCATACTGAATGTAAAGTGGATAAAAAACTCACACTTGCACTCATGGA +AGGCTTTTCATGAATTTGTCAATTTTTATTTTTTATATTTCCCCACTTCACTGGATAATG +CATACCTGAACCTGGAAACTGATGCCCACTGCAGAAAGTGTTCTGAGCCACATCCCTTAG +CTTCACTAGTGCAGGTCCACCTGGGAGGATGTCCCAGCATCAGCTTGGCCCATGCTGTGA +TCAGCCACCTCCATGCACCACACCAAGCAAGCCCCTGGGTGATTCACAGTCTCCACTACC +AGGGCACTGACCTTAACTCTGTGTTCTTCTAGCTCCCCATGAGGACACCGTACACAACAT +CACTAACGAGGATGCCTCACACGATATCACTAACGAGGACGCTGTCCACGGCATCGCTAA +CGAGGCCGCCGACAAGGGCAACGCCAACGAGGACGCCGCCCAGGGCATCGCCAACGAGGA +CGCCGCCCACGGAATCGCCAGCGAGGACGCCGCCCAGGGCATCGCCAACGAGGTCGCCGC +CCAGGGCATCGCCAACGAGGACGCCGCCCAGGGCATCGCCAACGAGGACGCCGTCCAGGG +CATCGCCAACGAGGACGCCGCCCACGGCATCGCCAACGAGGACGCTGCCCACGGCATTGC +TAACGAGGATGCCGTGCACGGCATCGCTAATGAGGACTCCGTATACGACATCGCTAATGA +GGATGCCATATATGACATCGCTAATGACACCGTACAAGGCACGCTAAAGAGGACGCTGTA +CACGACATCGCTAATGAGGACACCATACAAGGCATTGGTAATGAGGACGTTGTATATGAC +ATCGCTAACAAGGACACTCTACAAGCCGTCGCTAACAAGGACACTGTACACAACATCGCT +AATGACGGCACCGTACAAGACATCACCAATGAGGGCGCTTTATACGACATTGCTAATGAT +ACCGACAAGGCACGCTAACGCGGACACTGTACACGACATCGCTAATGAGGACTCCGTATA +CCACATCGCTAATGAGGGTGCCGTATATGACATCGCTAATGACACCGTACAAGGCACGCT +AACGAGGACGCTGTACACGACATCGCTAATGAGGACACCATACAAGGCATCGGTAATGAG +GACGCTGTATACGACATCGCTAACGAGGACACCATACAAGCCGTCGCTAACAAGGACACT +GTACACAACATCGCTAATGACGGCACCGTACAAGACATCACCAATGAGGGCGCTTTATAC +GACATTGCTAATGATACCGACAAGGCACGCTAACGTGGACGCTGTACACGACATTGCTAA +TGAGGACACCGTATAAGACATCGCTAGTAACTATCGCAAGAACAAAAAACCAAACACCGC +ATATTCTCACTCATAGGTGGGAATTGAACAATGAGATCACATGGACACAGGAAGGGGAAT +ATCACATTCTGGGGACTGTTGTGGGGTGGGGGGAGGGGGGAGGGATAGCATCGGGAGATA +TACCTAATGCTAGATGATGAGTTAGTGGGTGCAGCGCACCAGCGTGGCACATGTATACAT +ATGTAACTAACCTGCACAATGTGCACATGTACCCTAAAACTTAAAGTATATATAAAAAAA +AAAGACATCGCTAGTGAGCACGCTGTATACGACATCGCTAATGAGGACACCATACAAGGC +ATCGCTAACGATGACGCTGTACACAACATCACTAATGATGACACCGTATAAGACATCGCT +AATTATGACGCTGTATACGACATCGCTAATGACACCGTACGAGGCACGCTAACAAGGATG +CTGTACACAACATCGCTAATGAGGACAGTGTACAAGCCATCGCTAATGAGGACACTGTAT +ATGACATTGCTAACGAGGACACTGTACAAGGCATTGCTAACGAGGACGCTGTACACAACA +TCGCTAATGAGGACACCATATAAGACATCACCAATGAGGATGCTCTATATGACATCGCTA +ATGACACCCACAAGGCATGCTAACGAGGACGCTGTAGACGACATTGCTTATAAGGACACC +GTACAAGACATCGCTAACGAGGACGCTGTATACGACATCGCTAATGAGGACGTTGTATAT +GACATCGCTAATGAGGATGCTTTACAAGACATAGCGAATGAGGTTGCTGTATATGACATC +GCTAATGAGGACATTGTATATGACATCGCTAATGAGGACGCTCTATACGACATCACTAAT +GAGGACGCTGTATACAACATCGCTAATGAGGACGCTGTATATGGCATCGCTAATGAGGAT +GCTGTATACGAATTCGCTAATAAGGACGCTGTATATGACATTGCTAATGAGGACACTGTA +CAAGACATCTGTAAAAAAGAAGATGCTGCCAATGTAAGACACTTTTCTTTGTCTTGAACA +GAAATGTTACTTTCCTGGCTTCTTTCCAATCAGATGTAGACATGAACATCTGCCAGTGTG +CATTATCGATGTCATCTGCAGTTTAATCAAATGTAGACATGAACATCTGCCAATGTGGAC +TATTTATGACATCTGCAATTCCCTTGGTGTGGTGCTATTGATTGGCGGCCTCTCACCAAC +CCATGCCAGGCACACTGGGGTGTGGTAGATGGCAGCATCCACGATCCACTGCAATGCAGA +GGTGTTTCCCTCCACAGCAGTTTTCCCCCATGGATTAAGAGTTGTGAAACTGCCAATCTA +GATACACTTTAAAGATAAATTCTGTGGGAAAAGGTCTTGTCTTTTCCACAGGTGTCTTCC +GTGCCAGTTTTGGGGGACTTTGACCTTTGACTCAATCACTATACCCCTTCTTATTTTCTC +TCTCAAGTTGTCGAGAGACTATCAGATCTGTGTGACGTGTATGGCATCATTTCACCCTCC +TAATGTTTTCTTTTCTATAATTGCAGGAGCCATTGACACTGGAGAATGATACCTACCCTG +AAATAACTCACTTCCTGAGGAAAAAACGCCATCTCTAGGGTACAGAAACCTGATTCTGGG +CTCCTTTTGGGAAGGAGGATTTGGAGTCTGGTGAGAGCAAATGATTTTGCAAGTATAAAA +CAATGTCCAGAGAGGCTGTAGGGATATCTGTGAGCCCAGAGGAAACACCAGGGGATCCTG +TGCGAAGCACCATGGCTTCAGCTAGGGTGGGAGGAGTGGGTGGGCCTCTCTCTAATGACT +TATCCTGGTGTTTGTGTTTCTAAAGATTTGATTGTGGAGAGCATATCTGATGATGGGGAT +TTGTAGGTAGGTAACTACTTTCCACGTAAGATCCAATTGGAGAGAGTTCCCAGGGGCCTT +CGGGGTATCCATGCTGCTTGGGAGGTTAAGGGAGGGGGCATGAAATCAAAAGGAAACAGG +AAATATGTGTCATATTGGATTTGGTCTTTTCCGGGTTTATTGGCATAATAGTTAGAACTG +TCTCTCTGGGCTATGAGGGTGCTGTGATATTTAAAGGTGGTCTTTCCCAGAACACCTGGC +CTTTTCTTTTCTGCCTCTGCCAAACATCACAGCCTTTGGGTTGGATTAGTCAGCACCCCT +TGGGATTGTGCAGAAGAGGTTTGGGGTTGCATCGAGTGTCACCTGTGGTGAACAGAATCT +GAGGGACACAACTCTCTCACAGGCACTTCCTTCAACCTGGAGACAGAGTTCTCCTGGTGT +GTGCCCAGGGGTGGAGGAGAAATTGACAGTCTGCCTCTGAACTTTCAGGACTTTAAAAAG +CACTCATGTTTCCATCCTCACTGTTGACTCCTGGCTTAAAGGGATCTCCCGGGGTGAGTG +AGGAGGCGGGATCGGACCCTGGCAGTCTGACGGCAGCACCTGTGTTCCTCTGCACTGGGC +CGTGGATGACATTACACACCTTGGTGAGAATCAGGAATTGAGGCTAACCACATCTGAAAT +TGAGATGGGCCTTGAGTCATATAAATAGTTTGGAAAAGATGCATTTTACTACGCTATTGA +AAGAAACCATTTATTTCTCACTCCAGCAGGATAAATGGTTTTCAGTATCCACTTAACTGC +TCATTGACTCTTACTGTAGATGAGGAGGTGGCCAGCAGCCCCTGCCCTCCCCCAGTTGTA +GGCCCAAGGTAACCAGCAATTGACTGGATATAATGGAAGAGTGGTGCATTCGGAGGTATC +TGTATTAATGGGACCCACATGATATGGATGAGAGCTATTAGGGTGAGAAAAAGCCTGGGA +GCACAATGAAATATTTAAATATTAAACAAAACATTGTTGAAATCTCCATTGTACTTTAGT +AGTTGAAGTCATTCTTGTGGTCATCACTGCCTTTCCCAAGCATAACAAGCTACTTAATAT +CACATGGACCCGTGCCATGAGGAATGATGATCAGTTTGTCAAATGCCAATAAAACAATTG +CCTCTATAAGCCACAATGTTTCATCCATATATTTCAATTTCCATGTGTAAGTATAGTTCA +AATTTCAGAAATTTATTATTATCTAATAGAATATGCATGGTATATCAATGAGCAATTATC +ATACTGTTTCTATTAACAATTATTTGTATGATGAAAAAAGCAGACTCCCATTCTTGGATT +TTTCTCAGTTTGCACACATTAGCATGACAGCCCCATTTCCACCTGACATGTGCCAGCAAG +AGGCCAGGAACAGAGGCTTTTCTTATTAACTAAGATTTCTAAATGTATTACGTATTCACA +TTTAGAAACTCTAAATATCATAAAAGGTTAGCAAGGAAGTTTCCCTTCCACTCTGAACTT +CCAAACACCAAGTCAACGTTTTTGTTTGCATATCATCCCTGCAATCTATGTGCAAATAGA +AGCATGCACCTGGAATGCAGGCTGATGTGTGATCGTGTTTACACAAAGTCCTCTGCACCT +CTGCATATATCACTGGGCAATGCACCTTAGTTATCATTCCACATTTCAAATGTAAATCCA +TTGTATTGTTTCAGAGCTATAAAGTACTGCACCCCATGACTATTCCCAAAATTACTTAAG +CACCCCGCTATGGGTATCCGTTTGTTCTGTTTCCAGTCTTGCTCTTATAACCAATGCTGT +AGTGAACAGCACTGTGTTGAGAAGCGGTGAACGTGGGCATCTTTGTCTTGTTCCCTTCCT +CAGGGGGAATGCTTTCAACTCTCCCCCATTCAGGAAAATGTTGGCTGTGGGTTTGTCATA +GATAGCTTTTATTACCTTAAGGTATGTCCGTTCTATGCTGATTTTGACGAACGGTTTTAA +TCATAAAGAAATGCTGGATTTTGTCAAAGGCTTTTTCTGCATCTATTCAGATTATCATGT +GATTTTTGTTTTTAGTTTTATTGATGTGATGTATCACATTTATTGACTTGCGTATGTTAA +ACCATCCCTGCATCCCTAGTATGAAACCCACTTGAATCATGGTGGATTATCTTTTTGATA +TGCTGTTGGATTCAGTTAGCTTGGTTGTAGCATTTCTTATTATTCCATCTGTGGAATGTA +TTGGTTGAAATAATGAAAACATGTTCTATCCTCACTGCTTAGCACTTTGTGTTTCTTTAA +TAGCCTTCCCAACAGGGCAACATAAAAGCAGGAGCCCTGCTAGTCTCCCCTTAACCCGGA +ATCCCCCCTTCTCCACAGCTCGCTCATTGGACAGGATAGACTGGGCGCCCAGGCTTCAAG +GTAAGGACGTGCTCTGTCACCTAGAGGTGCAGTGCTTGGGAAGGCCAACCTTGGAGGGTT +GCCTGCCAGCTTTACAGTGACAGAGGTGTTGAGAGGGACTGACCACCAGTGCATAAGGCT +GTGCTTTGTTGGTGACATAAAGGATTGTTTCACAGATTGTTGGGGAGGGACAATCCCAAG +GCCTCCCCTGGCCCTGGTGCTGGCTCTGCACAAAGGCAATAAGAGAGGGATGCTGGTAAG +GGCTGACCTGTTGCTGTGCTGGGGAGGAAGGTGCTGGGCTGAAATTCAGGAGGCTGAGGA +TGCAGCAGTCCCATAGGAGGTACATGACCTTCAGGATACATTTTCTTCATTGATGATCAA +TGGAAATGAGAAATCACTGACTATTTTTTCTATCATTGGAATCTACTCTCCACTGCTCAT +GCTGTTCCTGTCTTTTGGGGAAGATGGAGGATCAATCAGTGTGCGCTGCACTGAGTGGAA +GGAAGGAGAACTGTGACAAAAATTAAGGAAGGATGAGAGACGGGAGGGCCCTTCATCCAG +CTGCTTGCAGAGTCCTCCTGAGGAGGAAAGCCCCGTGGCTCCCTGGCGAAGGAGCAGTGA +GGGCTGCGTGACTCCCACAGTGAAGTGTGTGGTATGTCTGAGGACACCCAGGCTGGTGGT +CCATGAGGAGCCAGTGGCAGAGTGAGAAGAAGAAAGGCCAGGAGGGTGGCTGGAGGCCAG +GCTCTGAGTCATTCTCCATGTGATGGAAACAGCCGGAGCCCAGTGGGCTTGGAGGTACAG +GATGCGGTGGCTGATGACAGAACAATGTGGAGAGAGGCGTCATTTGTCAAATCCTTACTT +TGTTCTGGGCATTGTACTAAAAATTCTAATGGCTCATCCCATTTAGGGGCTGAAAGTTGC +AGAGGTTTAGGAAGCTCACCCACGATACTGGAGCCCCCATCTCTTGCCCTAGTGCTGTCC +ACCTTCTCACCCAGCCACCACCTGTTTCGGGGGAACACACAGAAGTGGTAACCTCTTATG +GAGAGGCAAGTAAATTCTGCTGTTTTTGTTATTCACAGAAAAACACTGGCTCGTGTGGGT +TGGGAAGGTGAAATACCAGAAGTATTTCATCTGGTTATTTCTACCCATGCAACTCCTATA +GTATTGAAATGCATAGGTTAGCATTTTTGGCCAATTTACTCAGCATTCTGGGTTAAAGGC +TTTTATTTATTTTATTTATTTATTTATTTATTTTTGAGATGGAGTTTCACTCTTGTTGCC +CAAGCTGGAGTGCAATGGTGCGATCCTGGTTCATTGCAACCTCCGCCTCCCAGGTTCAAA +CTATTCTCCTGTCTCAGCCTCCCAAGTAGCTGAGATTATAGGCACATGCCACCACACTGG +GCTAATTTTTTTGTATTTTTAGTAGAAATGAGATTTCACCATGTTGGTCAGGCTGGTTTC +GAACTCCTGACCTCAGGTGATCTGCCCTCCTCGGCCTCTTAAAGTGCTGGGATTACAGGC +GTGAGCCACCACGCCTGCCCTAAAGTCTTTTAAAATTCACTTGTATAAGTTGACTTAGTT +TTCTTTAACCTTGTAGAAAAATACAAAAATGGCAATCTCTTTTATCACACAAATAATGTC +TTTTTAATGGAGTGATTTTTTTCTAATTGAGGTATTATGTACTTTTCATTTACAAATTAT +TGTTTACATTTGAAGTGTTTTATGAATTAATATTTAATTGCATAGATGAAGATTACTAGT +TATAGGCATTTTACTAACCAATACTCATTAAGCATAGCATGGATTCATATGACATCAAGG +AGCTATTTTATTTGGTAAAACGAAAAAGCACAAGAATGAACGAATGCAAGAACTGAAACA +GTGGAGACACCTAGAATGACTTGTCTAAGATCTAAATCATTTTGTTGTCTTCCCAGCATA +CTTATTATCCTGATCATTGTCATCAGCATTGTTTGGGTCCTTTTAGCACAGATTTCTCAA +AATGGGTAACTCCATAACAGTTGGAAGCTTACGAATTCATATAATTTGTAAGAGGTCAAT +TTGGAAGTACCTATCTATTTTAAAATTCCAATAACCTGGGAATTTCATCCCATGTCTAGA +GTCTTTTATGTAAAATATTTCCACAATTAGGAGAAATATGTGCATGGGGATTTTCTATGT +AGCGGTGTTTTGATAGAATAGAAAATTGGGATAAATCAAATTTCCATCACGAAGGAAATA +GTAATATGCTGAATAATAATACAGCGAATATTATGCAGGCTTTAAACATCAAAAAAGAGT +TCAACTTCTGACTTCCGATGATGGTGTTGAAGCAGGTCACTGCTGGTTTACATTTGATTT +TCATGTGGGAACTCTGGAAGTCCGCCTTAGTGATTTTACATGTGGCTAAATTGAGCTAAT +GACAAGCCGTTCGAAGTATGGCAAAATGGAACTTTAAAACAGTATCTTGTCAACAACCAA +GAGGACCTGTTTCACATAAAGCCCATGCATTCATCTGCCTGTCCATCATTCTGTCTGTCC +ACACGGGCATCATTCGTTAGTGGAACTGAGTGCCCACTGTCGAGCTGACAAGCCCATAAC +CTCCCTGTTCCTAGTCACACATTAATTCTTCAACAAGTCCCTTTTGATAGATTGTGATTA +AGCTTAGCTACTATTTCCAATTGCTTCCCCAAACGTACTTCTCACTGTTCTCCCATCACA +CCCTTCAGCCCATCCATGCGGGGTTCCTTTGCTTTTCCCACCTTACACCAAACTCCCTAT +TTTTACTCCCACTTTTACCTCCTCTCCAAGACAAAACAAACAAAACTAGCATTTTAAAAC +TTAGTTGTAATCTTTCTTCCTTCATGAAAATTTCTCCAACAGCCACTCCCACGGTCCTGT +GTGTTCCAGATATTTTAAAATAATGGCTATAAGGCTGAGCACTTCAGGATATGCTGTTTT +GCTGTGTGCAGATGGAGGCAGTGGCTGGAGTGAATGAACGGCAACACTTGCTGGCAACCG +GCAGAAGCTGAGAGACAGGGAACAGGCTCTCCTCCAGAGCCTCCAGGAGCCAGGCCTTTG +GACACCTTGAATGTGGGCTTCTGGGAGACCATGCGTTTCTGTTATAAGCAGCCCAGTCTC +TGGCAGTTTTTACGGCTGCCCCGGAACACTCATCTATACCTGTCTGACAAGGTCAAGCTC +CAAGGAAGGGATTCTCTACATATCTACATTGTTTGCAGATTTTACAATAATCATTTATTC +TTGCATGGCTGATCATTGTTAACCAATACAAATAAAATAATAAAGAAATGACCCACATTT +TATGTTGGGAGTTTGATCTGCCATTTATCAAGTATGGAATCTTGAACAAGGGGTTAAACA +TCTGAATGTCTCCATCACTTCATCTCTAAAGTGGGGGTGCTCACACCCACTGGGCTCCCC +CACCCAGGTTGGTGCCGGACTCTCCCTGGGCCCCCCTGTTCTCTCACCAGCCACATCCAT +TCTCCCCCCAGAGGCGCTAGTGACTGTGCGTGGCTTTCCATTCCCACCATGTTTGTCTCT +AACCCCAGTGGCAGATCAGTGTAAGAACACAGCTGAGTGCTCCTCGCCTCCTTGCCCCTT +CAAGGGCTCCTCACCACCCACCAGATCAGGTGCAAACTTCCAAGCCTTACTGGATCCCCT +TCCACATTCTGAGCTCCGCCTGCCTTCCCATCGCTATCCTTCCCCACCTGCCTCCCTGGT +AGAGAAAAGCAGAGTGTGTGATGCTGTCTGAATGCTGAGCACGGCCTTTTGCAGCCAGTC +CACTGTGTATGCTGCCCCTATCGGAGACCTCCACCTTAACCCTTTCCAGCCTGGAGGCTC +CTCCCAGGGCCCCACAACAGAAGTGACTTCCCTTGCCTTTGAATTTCTATAGCACAAGCC +CTACTGCCCCCCGTTAAAACTGCAAAGTCCTTTTGTGGAAAATAACTTTATTCATGACTG +TGTTTATCACACTATCTTATGGAGAAGAGATGATCAATAAATATTTGTTGAATAAATGAA +TAGCAGTTACAAAACACTTGATTCATATGGAATTAATGTTGGTTCTCAAAGTGAAAAATT +ACAAACAGCACTGATATTCAGCCAGTATACAAGTCTGGTCACAGCAGTTGTATAATACTG +AAATACCCCCTGCCACTGACCTTTGGCCCCCAGATGCCTCCCACTGCCACTGCTCTCCCC +ACTGGGAACCCCTGAAGTTCCCACAGGCTCATAACTAAAGGGCTAATGTCTCGCACAGCA +GCGAGCACCCAGGACCGAGCAGCCACATGGCCGGGTCTGCTGGTGAAAGCATCCATTCTG +ACTGATCAGGACCTGAGGGGCCTCATGGTTACATATTTTGATAATATCCCTAATTATAAA +TAAGGCTCAGTTATATAGTTTGAAAACAATGCTTCTCCTCATCGCAAAATCTCTTAGAAG +ACTCCGTAGATCCAGGAACGGAAATGGAAAATGACAGCGTGTCAATCTCTGAAGGTTTTG +GGCATTTCCATTAGCACTCCATCTTCATGTAAACCAGAAGATATGCAGTTTCCTGCCTAG +AGAGAAGAGAAGACACATCAGCACAGCGGCATGAAAGCTTCATCAGAAAACAATGCTTCA +TTAATCCGTGACAGGACAAGCGTCAGCAAACTTCCAGGCGGCTGGATTAGGCCTTCATCT +ATCCATCACCTTGGAGAGGAACAAAATAGGTGGCCTGGGAAGATAAGCACTATGTTTCTA +TTCCTTAATATCTAAAGCGGAGGTTAACAAGCTATGGACACACAAGCCAAACCCAGCCCT +CTTGGGGTTTTTTAAATCTACTTTCAACTTTTATTTTAGATTCAGCGGGCACATGTGCAG +GTTTGTCACGTGGATATGAGCATACTCCCCAACAGTTGGCCTTTCACCCCTCCCCTCCCT +CCCCATCCAGCAGTTCCCAGTTGTTGCCATCTTTAAGTCAATGAGTCCCCATGTTTAGCT +CCCATTTATAAGAGAGAACATGCATTATGTTTTGTTTGGTTTTTGCTGGTTTTTTTTTTT +TTTTTTAATGGAGTCTTGCCCTGTAGCCCAGGCTAGAGTGCAGTGGCACAATCTTGGCTC +ACTGCAACCTCCGCCTCCCAGGTTCAAACGATTCTCCCTCCTCAGCCTCCCGAGTGGCTG +GGACTACAGGCGCCCGCCACCACGCCCGGCTAACTTTTTGTATTTTTAGTAGAGACAGGG +TTTCACCGTGTTAGCCAGGATGGTCTCAATCTCCTGACCTCATGATCTGCCCACCTCAGT +CTCCCAAAGTGCAGGGATTACAGGCGTGAGCCACCGTGCCCAGCCTTTTGTTTATTTTTT +GACGAGACGGTTCTTGCTCTGTCACCAGGCTGGAGTGCACTGGCACAATAATAGCTCACC +ACAGCCTCGTGCTCCTGGGCTCAACTGACCCTCCTGCCTCAGTTTTAGCTTCCTGAGTAG +CTAGGACTACAGGTGTGTACCACCATGCCTAGCTATAATAATTTTTATTTTTTTGTAGAG +ATGGAGTCTTGCTTTGTTGCCCAGGCTGGTCTTGAAGTCCTGGCTTGAAGTGATCCTCCT +GCCTCGGCCTCCCAAATTGCCGGGATTAAAGGTGGGAGATCGCACCCAGTCTCCAACCCT +CTTTTTGCAAGTAAATGTAACTGGACCCCAGCCATGCTCATCTGCCCATGTACTGTCTAC +GGCTGCTTTTGCTCTACAGGGCAGAGTTAAGTGGTTGCAACAGACACCGCACAGACCACA +AAGTCTGAAGTACTTTCTCTCCAGCCCTTTACAGAGAAAGTCTGCCAACCTCTAATCTCA +ATAACAGGGAAATCAATGACAACCACAAAGTGACAAAGACTGGGTGTCTAAGATGGATGC +TCAGAATAAACAAGAGAGAAAGATGAAAAGTAGAAGGAGGATTTCAAACGCAAGCTTCAC +CTAATCCGTTATTTTTCAAATGACCAGGCCTATCTCTGTAGCTGAAAATCACCTCAAATA +AGATCTCTGATATACAGTCTCCAAAAGCTCAGCCAAGAAACTTACAAAGTCTCTCTGCCT +TAACTTCATCCACCTTTTTTCTCTCCAGCTTCTCCTCGGTAGTTAATGATTATAAAAATA +TTTATTGGCTCATGCCTGTAATCCCAGCACTTTGAGAGGCCGAGGCGGGCAGATCACGAG +GTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAATCCCGTCTCTACTAAAAATACA +AAAAATTAGCCAGGCGTGGTGGCGGGCGCCTGTAATCCCAGCTACTCAGGAGGCTGAGGC +AGGAGAATGGCATGTACCCACAAGGCGGAGCTTGCAACGAGGTGAGATCCCACTACTGCA +CTCCAGCCTGGGCGACAGAGCAAGACTCCATCTCAAAACAAACAAACAAACAAACAAAAA +AACAGTGTGATGGCCAGGCGCAGTGCTCATGCCTATAATCCAAGCACTTTGGGAGGCTGA +AATGGATGGATGGCTTGAGCCCAGTAGTTTGAGACAAGCCTGGCAACATAGCGAGACCTC +ATCTCTACAAACATCTTTAAAATATGCCAGGCATGGTGGTGCATGCCTGTAGTCCCAGCT +ATTCAGGAAGCTGAGGTGGGAGGATCACCTGTGCCCGGGAGTTCAAGGCTGCAGTGAGCT +ATGATCACACCACAGTGCTCCAGCCTGGGCAACAAAGCAAGACTCCATCTCTAAAAATAA +AATAAAATTAAAAAAAAAAGATCTTCGCTGTAAAAGAGGTACACTCAAATGCAATAAAAG +CATATAAGAAGGCCGGGTGTGGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCCGA +GACGGGCGGATCACGAGGTCAGGAGATTGAGACTATCCTGGCTAACGCGGTGAAACCCCA +TCTCCTCTAAAAGTACAAAAAAATTAGCTGGGCTAGGTGGCAGGCGCCTGTAGTCCCAGC +TACTCAGGAGGCTGAGGCAGGAGAATGGCATAAACCCGGGAGGCAGAGCTTGCAGTGAGC +CTAGATAGCACCACTGCCCTCCAGCCTGGGTGACAGAGCGAGACTCCGTCTCAAAAAAAA +AAAAAAAAAAGAAAAAGAAAAGAAAAGTTCTTGTGACATTTGTGTATGAAATCAGCCTTC +ACTACATGGATAGGACCAGCACGCTTCTGCGGCACGACTCTGCAATCTTACTACATTTTT +TTTTACTTTGTATTTTATTTATTCCTTTTGAGACAGAGTCTCACTCTGTCACCCAGGCTG +AAGTGCAGCCGAGATCTCGGCTCACTGCAACCTCCACCTCTTGGGTTCAAGCAATTCTCT +TGTCTCAGCCTCCCAAGTAGCTGGGACTACAGGCACACGTCAAAACGCCCGGCTAATTTT +TGTATTTTTAGTAGAGATGGAGTTTTGCCATATTGGTCAAGCTGGTCTCGAACTCCTGAC +GTCAGGTGATCGACCTGTCTTAGCCTCCCAAAGTGCTAGGATTACAGGTGTACATTTATT +TATTTATTTGAGATGGAATCTTGCTCTGTATTTATTAATTTATTTATTTGAGATGGAGTC +TTGCTCCATCGCCCAGGCTAGAGTGCAGCGGTGCAATCTCGGCTCATTGCAACCTCTGCC +TTCCAGGTTCAAGCGATTCTCCTGCCTCAGTGTCCCAAGTAGCTGGGATTACAGGTGCCT +GCCACCACAGCTGGCTAATTTTTGTATTTTTAGTAGAGACAGTGTTTCACCATCTTGGCC +AGGCTGGTCTCGGGCTCCTGACCTCATGAACCACCTGCCTCAGCCTCCCAAAGTGTTGGG +ATTACAGGCCTAAGGCACCATGCTCGGCCATATTTATTTATTTAATTATTTAGAGACAAA +GTCTTGCTCTGTCACCCAGGCTGGAGTGCAGTGGCGCCATCTCAGCTCACTGCAGCCTCC +GCCTCCGAGGTTTAAGCAATTCTCATGCCTCAGCCTCCTGAGTAACTGGGACTACAGATA +CTTGCCACCACGCAGGGATTTTTTTTTCTATTTTTTTTGTAGAGACACAGTTTCACCATG +TTGGCCAGGCTGGTCTCGAACTCCTGACCTTAGGTGATCTGACAGCCTCGTCCTCTCAAA +GCACTGGGATTACAGGCATGAGCCCCTTGCCCGGCCTCTCACTACATTTAAGTGACGCCA +TGGCTCATGCCTGTAATCCTAGCACTTTGGGAGGCCAAGGCAGGTGGATCACCTGAGGTC +AGGAGTTCGACATGAGCCTGGCCAACATGGGGAAACCCCGTCTCTAGTAAAAATACAAAA +ATTAGTCAGGTGTGGTGGTACAAGCCTGTAGGCCCAGCTACTTGGAAGACTGAGGCAGGA +GAATCACTTTAAGCGGGAGGCAGAGGTTGCAGTGAGCCAATATCATGCCACTGCACTCCA +GCTTGGGTGACAGAGTGAGATACTGTCTCAAAAAAAAAGAAAAAAAGAGAGAAAAACATA +TGATGCCAGGGCATCTCGGCCTCAATACCTGGGTGAGCACAGTCATGTCCAGGCCAGGGC +TGCTGGTCGAGGTCCGGCCCCATCTCTTCCAGCAGAAAGGGAGTAAGCTTGCAGGGAGGC +TGGGGGACAAGATCCCAGGATCTCAGCCTCTGCTCATGGATCAGCTCTGAGACCCCGAGT +GAGCTGGGGGTGCTCTGTGCGCATTGGTTTCCCCAGCTGTCAAGTAAAGAGATTGGATGA +GGAAGTCTTGTCAAGGTGGAATGATCTCAGATTTGGGGCAGCAGTGAATGATCCCGCTCC +CTGGGCCATGCCAGTGGCCCGGCCTCGGCTGAACACAGCCCCAACACTCTGGAATGGGGA +TGAGGGGGCAGTCAGCTCTTGCTCCTAGTAAGAGAGATGCAACAGGGCTCTGTGGCTGAG +CTGGGTGCCTTGCCTCACACCTGTAATCCCAACCTTTGAGAGGCCGAGGCAGGAGGATTG +CTCGAGGCCGGGAATTTTGAGAATAGCCTGGACAACATAGCCAGACCCCATGTCTACAAA +ATAATAATAAAACACACAGCTATAGTCCAAGCTACTTGGCAGGCTGAGGCAGGAGGGTCC +CTTGAGTCCAGGAATTGGAGGCTGCATTGAGCTATAATCGCACCACTGCACTCCAGCTTG +GGTGACAAAGTGAGACCCTGTCTCTAAAAGAAAAAAAAATTGGCCTGTGAGCATGGGCTT +GATTTTCAAACAGGACCCGGAGGGTAGGGTAAACGTGTGGGTAAATCTAAATGAATGTTA +TTGGTATAAAATTACAGTAGTATAGAAAATGATATCTTGTGGGGTTTAAAATAAATAAAA +CATACTGAAATATGTATGGGTACAGTTATATATCTGGGATTTGCACTGAAATAATGTGGG +GTAGAGGGAAGCAGGAAAGAGTATACATGAAATGAGCTTGGCCATAAGATTGTTGTTGAA +ATTGAATGGATACTCTGGGCTTCATTACACAATTCTCTTTACTCTTACATAGCTCTACAC +TCTCAACATAAATAAGAATAAAAACACAAAAAACACACAGATACATCTATGCACACACAC +ATATTTAAAATACACAAAAATATTAGCATATAAGTCACTGGGGGTAAATTTAGTTCCTGT +TCCAAGGTTCTTGTACTGACTAGGAAGAGGATAGAAGTACTAACTCATAGGCTGGGCGCG +GTGGCTCACGCCTGTAATCCCAACACTTTAGGATGCCGAGGTAGGCAGATCTCTTAAGGT +CCGGAGTTCAAGACCAGCCTGGCCAACATGGTGAAACCCTGTCTCTACTGAAAAAGAATA +CAAAAATTGGCCGGGCATAGTGGTGCACACCTGTGGTCCCAGCTACTCAGGTGACTGAGG +CAGGAGAATTGCTTGAACCCAAGAAGTGGAGGTTGCAGTGAACCAAGATTGCTCCACTGC +ACTCCAGCCTGGGCAGCAGAGGAAGACTCTCTCTATCTCAACCACAACAAAAAGTACTAG +CTCATGTTAGACTTTGATAAGGGAAGGATGCATGTTGTAAGCTCTAAAATAATCCAGTCA +TCTTTTAAAATAACTCTAAGACTGCACAGTTATGAAACTAATAGAGAAGGAGGAAATTAA +ATAATAAAACTAATAAATCCAAAACAAGATGTGAGAGGAGATAAGAAGAAATAGAATAGG +CATGGAAAACAAATTGGTGGTGGGTTTCAACCCAAATAAATCATTAGTTACATTTAAAAG +GACAATAAAAATTAAAATAATTGAAAATAAAGTAAAACCCAACTAATGCCTTTTATATAA +GGGTACAGAGAGGTGGAAGATCATGAAAAATATGTCATGCATGTACTAACCAAGAAAGCT +GTATAACTTTTTTTTTTTTTTTTTTTTTTTTTTTGGAGATAGAGCCTCACTCTGTCTCCC +AGGCTGGAGTGCAGTGATGTGATCTTGGCTTACAGCAATCTCTCCCTTCTAGGCTCAAGC +GATTCTCCCACCTCAGCATCCCAAGTAGCTGGGACTACAAGTGTGCCAACTTAGAATTAT +ATTAGCCACACCCAGCTAATTTTTGTATTTTTTGTAGAGGCAGGGTCTCGCCATGTTGCC +CAGGTTGGTCTTGAACTCCTGGGCTTCAGTGATCCACCCACCTCGACCTCCAGCAAAGTG +CCAAGATTACAGCCATGAGCCACCATGCCCAGCATAACTATTTTTAATGAAGTAGACTTT +AAGAAGAAAAGTATTATTAGAGGTAAGGGACACATCACAGAAAAGAAGAATTTACTAGGA +GCCAGGCGCAATGGCTCGTGCCTGTAATTCCAGCACTTTGTGAGGCCAAGGCGGCGGATC +ACCTGAGGTTGGGAGTTCAAGACCAGCCTGACCAACATGGAGAAGCCCTGTCTCTACTAA +AAATACAAAAATTAGCCAAGCATGGTGGCACATGCCTGTAATCCCAGCTACTCAGGAGGC +TGAGGGAGGAGAATTGCTTGGACCCAGGAAGTGGAGGTTGCGGTGAGCTGAGATTGTGCC +ATTGCATTCCAGCCTGGGCAACAAGAGCAAAACTCTGTCTCAAAAAAAAAAAAGAAGTTA +CTAGCTAGTTTCGGTAATTCTTAACATCCAGGAAACTGGATGTGAAAGTTTTTCAGAGAA +ACTAAACCAATAGATTATACATAGAGAGAGACTTATTTAGGAATTGGCTCACATGATTGT +GGGGACTAGCAAGTTTTAAAATCTGTAGGGCAAGCCAGCAGGCTATAAATTCAGGTAAGA +GTTGATCTCGAAGTCTGGAACCTAAAATCTGTAGAGCAGTCAGCAGGCCAGAAACTCAGG +CAGGGTTTGTGTGTTACAGTCTTGAAGCAGAATTCCTGCTTCTCTGGGAAACCTCAGTTT +TTGTTCTTAAGGCCTTCAACTGATTGGAGGTGGCCCACCCATATTATGGTGGGTAATCTG +TTTTACTTAAAGTCAATTGACTGTCAGTGTTAATCACATCTATGAAATAACCTCCCAGCA +AGATATTGACAAGTATTTGACCAAACAACAGGACACCATAGCTTAGCCAAGTTGACACAT +AAATTAACCATCAGGAGCAAGTAGAATATCCAAAAAACAACATACTAGGGGTATTATATC +TTATATAGCTATTATAATTATATAAAACATATAATTATAGAATGACGATATTAAGATAAC +CATTAGAACAAAAATATAAACTTTTCTTTCTTTTTTTTTTTTTTTTTTTGAGACCAAGTC +TTGCTCTGTCACCCAGGCTGGAGTGCAGTGGTGCAATCTTGGCTTACTGCAACCTTTGCC +TCCTGCGTTCAAGTGATTCTCCTGTCTCAGCCTCCCAAGTATCTGGGATTACAGGCACCT +GCTACCATGCCCAGCTAATTTTTGTATTTTTAGTAGAGACATGGTTTCACCATGTTGCCC +AGGCTGGTCTCCAACTCCTGACCTCAAGTGAGCCACCCCCCTTGGCCTCCCAAAGTGCTG +GGATTACAGGTGTGAGCCACCACACCCAGCCAAAAATCACCTTTTTTACAAGGATCAAAA +CAGTCATTATGCTGGAGATGACAGACCTCACTGTCACCATGCTCCTTTTGTATGTCTACT +AGGCACGGTGCTGGGTCCACACTCACAGAAACCTTAGGAACTCGCACCCAGGGGCTCCGG +CTGTAGCAGAATCCCAAGAATAAAACCTGGTGCTGAAAGAGTAGGAGATGAGGCCGGGTG +CCATGACTCACTCCTGTAATGCCAGCACTTTGGGTGGCCAAGGCGGGTGAATCAAGAGAT +AGAGACCATCCTGGCCAACACGGTGAAACCCCGTCTCTACTAAAAATACAGAAATTAGCG +GGGCGTGGTGGCTGGCACCTGTAGTCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATCAT +TTGAACCGAGGAAGCAGAGGTTGCAGTAAGCTGAGATCGCGCCACTGCACTCCAGCCTGG +TGACAGAGTGAGACACCGTCCCAAAAAAAAAAAAAAAAAAAAAAAGCAGGACACTGAACT +CTGGGAGGGCCTCCTGGTGAGAGGTGAGCACAGAGGGGAGAGATGGAGGCAGGAGCATGG +GCTTCTGGTGGCCCCAGCAGACCCTGTGGCAGCGTGGCCAGGGTCCTCTGCAGGGAGGAA +TCTTGGCCAGGATGACGCTGTAGCAGGCCTCTTCCTGAGGCCTCCAGCCAGCCCGGCCAG +GGTCCCAGCGTCCAGTGACCCCTGTTTCACAGCAGCAGCTGGGGCCAGCCCCAGGCTCTC +TTCCACTCCCAGCTTCTTAAAACTGGAAGTGGAGAGAGTTGTTTGATAAAACACTGGGGC +AAACCACATCCTCTCTTCACCAAGGGAGAGTTCGAGGGGATGCCGGCAGAGGGAGCTTTA +GAGTAGAGACCCCTACCCAACCAGTGACCGTCACGCACACAGCAGGGCATGCTATGGAGA +CCCCCAGACAGTCACTCGGGGAGACCCAGCAGGTCCAGACTCTTCAGAGATCTGTGGCAG +CAGGTCCCCACTCCCAAAAGCCACGTGCCCACGGGTGGTCTCTGGTGCCTGAGACCCCAG +TCTCATTTGCATCTTTGCAACTTCGAGTTTAAGTGGGTGTCGCATCTCTGTATGTCCTCC +CGAGCAGAGGAGGGGCACAGCCTGGGGTGGCAGCTGGCGTCAAACCCTCAAATCCCCTGA +GAGCCACTGGGGAGACTAAGCAGTCCCCAGCCCCCACTTGTCCCTGAGCTGCCATTCTCA +GCCCTGTGGGAGGAGACAGAAAGCCCTGAAGAGAAACCAAAGGACCAGGTCAGGAGGGGC +TGGGGGGGTGGCATGAGCAATCAGGGCAGGGAAGGATGGACCGATGGGGGAATGGAGGGA +AGAAGGAATGAATGAAAAGGTGAATGAATGAACAAAGAGAGAGAACGGCCACTCCTCCCT +TGCTTTAGTTTACAAAGTACTGGGATCCTCCCAACAGCCTGCAAGACAGAATTTCTGGGA +AGCAGACCAGGTGGCTGGCAGGGAGGGGAGGCTTGCCCTGGCTTTTGTGGGCCCAATGGG +AGGCAGGGGGCAAGAAGGGGCATCCTGTGTGTGTCCTCCCTGCAGTGGCAGCAGCACCTT +CCTGGAAGAGGGTCAGGAAACACCCGCTGTGGCCCCTCTCCACCACGCCCTCATCCAGGA +CACCAAGTATCAGTCACTCAGCTCACGAGACCCAGGCCCTGACTCAGGGAGAGAGGATGT +GAGGGGTGGGGCACCGGGCTCCTCAGGACTGAGAGACCTGGGATGTGGCCCCGGGCTGGG +TGTAGGGGCAGACTGGCTATGGCAGCATTGTGTGTACCCCAGCAGGCCAGTACCCACGCA +GGGAGCCTCCAAACCCCTTCACCCATGACCCTGGGAGAAGACCGCAGCCTTGGAGAATTG +GCCTCACTGAAGGGGCCTGCACCGGCCAGCAGGGTCAGGCGGGGCCAGACAGGTTCCCAC +CTGGGATATGCAAATGGGCCTCCTGAATCCTGGAGCCAGGTATGGACTCACACACCACCA +TTGTCCCCAAGTCCCCATCTGCCCCACGGGCACACCCTGCCGCCTGTTCTGTGCAAGGGC +CCTGAGGCTGTCTCCTTGCGCCCAAGACCTGCAGGTGCTGAAGCCCACACACACGGCTCC +TGCTTCCTGGGCCAGTGCACGTGCACACACACACACGTGCACACACACACACACACCCAC +AAATATACCCACACACAATCACACACATTCACACATACCCACCCCCCATACTCACACACA +CATTTACACACACCCACACACCCACACTCACACACTCACAGTCACACACACCCTCACACA +GCGAAACACAATCACACACATTCACACCCACCCACACCCCACACACTCACACTCACATAT +ACTCACACACACCCACACACACATACACAAACACAATCACACACATTCACACACACCCAC +ACTCACACATACACACACCCAAACACGATCACACACATTCACACACACCCACACCCCACA +AACACACTCACACATATACCCACACACACTGACACATAATCTCTCACACACACACATGCT +CACACACACACGCCTTCTCCAGGAGGGGCTGGCTGCCAAAGGCCACCCAGCTTCCTCCCA +CGTCTCACTCACCGTACAATATTTGAGAAGACCTTGGAGTCAGCAGCAACAGCGGCGTGG +GCAAAGGCCGGGGGTCAAATGGGGCCTGGTGTCGAGAGAGGACCACAGCCAGCACAATGA +CAGCCAGCGCCAGCCCCAGCCCCAGCCCCAGCAGGACCAGGTCACCCATGGCTCCGTAGT +CCTGGGCCATGGCTCTGCGGCCCAGAAGGAGAGGGGAGGCCGGTGGGCAGATGGAGGGAC +AGATGGGTGGGCAGATGAATGGACAAGAAGATGCATAGATAGACTCACAGGTAATTGGAC +AGATGGACAAACAGGTGGGGGCTGAAGACAGACACGAAGATGGATCGACAGACAGGCCAG +ATAGCTAGACAAAGAGGACAGTAAGAGAAAGATGGTCAGATAGACAATGGGACAGAGATG +GGCTTACAGATGGGCGGACAGACAGACAGGTCTGAACAGCGGGCTGCCAGATGGACAGAT +GGGTGAATGGACAGATGGCTGGCAGCTGTGGCGAGCTGCTGCCCTCACCAAGTGCACACT +ACGGAGTGGCCAAACTCATGCCTCAACTTCTAGTTTTCAGCTCCTGCTTGTTCCTGGCAG +GAGGCCAGGCAGCAAAGCGTCTGAGGGGAGTTTTCTTTGCCTAGAGAAGTCAGCTGCTGT +GTTAACTCCCTCACTGCTGGTAGGTCCAAAGGCCCCACCTACCGCCCGCCAGAGCCCATG +GTCACACTGTCGCAATGTGCAGGAGAACTTGGTGCGTGCTGCACTGCGGTTGCCAGGTAG +GGGCAGGGCTCCCTGGAACCTCCACACCATTCCCCAGGTTCTCAGCAGCTCTGGGAAAGC +AGAGCTGGGGCCGCTTAACTCTGCCCTGGATCCGGCAAGGCTGCCCCCCTCCCAGAGTGG +AGCCCTGCTCCCCAGCTCCCATCTCTATCCCCTAACCCTCTCCGCATGGCCCAGCCTAGT +CAGCATCAAGGTGGAGCTGAACAGAGGCAGAAGGAGGAGGACCCAAGGTGGTGTCACTCA +GGACCCGGGTTCAAGTCCTTATGCTTCTGCAGCCTGGCCTGGGTCCCCCAACTCCCCCAG +GGTGACCAAGGGCTTTCCAGTCTGCACAGAGGACAGGGGGTCTTGACAGCATCAAATTCT +GGTGACTACGAGACACTTACGTGGGAAATGCAGACAGACCATGCCTCTAGCCCTTGGCAC +CAGGCACCATCCATCCCTGGGACTTGCTGTCCTGGAAATGCAGCATGGTCCTCCAGGGAG +GGGGGCTGTGCCATGTGGGGGCCCCACCCCACCTGCAGCTCTTTCCCACCCTGGCTGCAG +GTCTGCTTCCCTGAATCCAAATCCGCTACTACTGTGCTGGCAGCGCAGCCTCTCTGGGGA +CACTGGCCTGGCTCTGTTCTCCCCAGGCCTCAGGGTGCCTAAATGGGAGGCAACCAGGGG +AGTGAGGACCCACTGAGGGGCTCCGTTGACCAGGCTCAGCAGGGGTGCAGGTGATGTGGG +GTGGAATCCTTCCCACATGGCCCCCACAGTCCTCCCCGCTTCCTCCCCAACTGAACACTG +CCTGCTCCAGATGTGTACACCTGGAGTCTGGGCCCCTCCATCTGGGCAGCAGAGAAACTG +AGGCACAGAGACAGACTGTGTCCTTACAGGGCACACAGCCTGCCAGGCCCCTATGTCCGG +CCAGAGCCCCTGGTCAGCCTGGGCTGCAGTGATTGTTTAGAGGTAGGCTGTTCCCACGGC +TGCCTCTCACGGTAGGGGGGCCTGTGGACGCCTCCTCCCGCCCCCACCCGACTCCCAAGC +CTCAGTGACATTGCTCAACCAGGAGCTGAAGTGCATTCCTGGGCTCCGGCCAGCCCACCC +ACCCACCCGCTGCAGTCCTGGAAGCCCAGAGGCCTGGGCAGCAGGAACAGTGGAGACAGC +AGTGTGGGGGACGTCCCCCTCCTCTCCCCACCATCCTCGTCAGGCAGAGGCCAGGGTGCA +GGGACCACCGGAGCAAAGGCCCAGGGAAATGAATGGGTGTCATTCTGGTCCTGACCCGAG +GCACAGCCAGGAAGGTCCCTGTGGGGAAAAGAAAGAGATATCAGACTGTTACTGTGTCTA +TGTAGAAAGAAGTAGACGTAAGAGGCTCCATTTTGCTGTGTAGTAAGAAAAATTCTTTTG +CCTTGAGATGCCGTTAATCTGTAACCCTAGCCCCAACCCTGTGCTCACAGAAACATGTAC +TGTGTCGACTCAAGGTTTAATGGATTCAGGGCTGTGCAGGATGTGCTTTGTTAAACAAAT +GCTTGAAGGCAGCATGCTTGTTAAGAGTCATCACCACTCCCTAATCTCAAGTAAGCAGGG +ACACAAAACACTGCAGAAGGCCGCAGGGACCTCTGCCTAGGAAAGCCAGGTATTGTCCAA +GGTTTCTCCCCAGGTGACAGTCTGAAATATGGCCTCGTGGGAAGGGAAAGACCTGACCGT +CCCCCAGCCCGACACCTGTAAAGGGTCTGTGCTGAGGAGGATTAGTAAAAGAGGAAGGCC +TCTTTGCAGTTGAGATAAGAGGAAGGCATCTCTCTCCTGATCGTCCCTGGGCAAAGGAAT +GTCTCAGTGTTGATTGTATATTCCATCTGCTGAGATAGGAGAAAACTGCCTTAGGGCTGG +AGGTGGGACATGCTGGTGGCAATACTGCTCTTTAATGCATTGAGATGTTTATGTATATGC +ACATCAAAGCACAGCACCTTTTTCTTAACCTTGTTTATGACACAGAGACATTTGTTCACG +TGTTTTCCTGCTGACCCTCTCCCCACTATTACCCTATTGTCCTGCCACATCCCCCTCTCT +GAGATGGTAGAGATAATGATCAATAAATACTAGGGAACTCAGAGACTGGTGCCAGCGTGG +GGCCTCCATATGCTGAGTGCAGGTCCCCTGGGCCCACTTTTCTTTCTCTATACTTTGTCT +CTGTGTCTCTTTCTTTTCTCAGTCTCTTGTCCCAGCTGATGAGAAACACCCACAGGTGTG +GAGGGGCAGGCCACCCCTTCAGGTCCCTGAATGTCCTTCCTCAGGAAATGATGGGGGAAG +GGGCGATGAGAATGAAGGAGACGATTTAAGTCCCTCACCCCCCGAGGTAGTCCTGGGCTG +AGCCCCATGGGACCTAGAGAACCAGGGTGTACCCCACCAGCATGTCGGGTCCAGGAAGCC +TCGTGGCCAGCTCCCACTTCTCTTCCTGCTGTGCAACCCAGAGCAAGGCCTGCCCCTCCA +GCTTCAGTCTTCTCCCCTGCAAATGGGGCCACGGCCTTTCCTCTCAGGCCAATATAAGGA +TTGAGGCCGGGTGCAGTGGCTACCCCTGTAATCCTAGCACTTTGGGAGACTGAGATGGGG +GGACTGCTTGAAGTCAGGAGTTAAGACCAGCCTGGTCAACATAGTGAGACCCCATCTCTA +TTGGTTTAAATTTTTTTTAAAAAAATTAAATAAATAAAATAAGGATTGAAGAGTGACTTG +TACACCAGTTGAGCCCACCTCCATCTCACCCTTGCAGAGCCCCAGAGACACAGCCCTCCA +GAGCTCAGACCCAGTGGGACTTGACTCCACAGGCATAAAACCCTGTTTGTCTATGGGCCC +TTTGGAATCACCAGGTTTTCGGGGCTCCTGAAGGATAGCCCCGACCTGGCCTCACCTGGC +CCCTGGCCCCAGTGCCCCTGGTGATATCCAGGTGCTGGGCTGTGATCACCGCCTCCCACC +AGCCCACCTCCACCAGCCCTTCCCAGAACCCTGCTCCAGGTGTTGGAACTGTGCACAGAG +GAGGGAGCAGGCCCCGAGGGAGGCCTGGAGGGGCTGCCAATGGTGAAGGCTGCTGTGTCG +AGCTGTTTCCTTCCGGACCCACTCCCTCTGGGCTGCGTCCCCGGCTGGTCCAAGCCCTGA +TCCCTGGGATCTGGGGACATCTTCCCGTTTGCTGTTCCCTGAGAACCAGGCCTCCCTGTG +GAGAGGATCACAAGCTTGGGTTTCACTCTGGGCTTGCTCTTGGGAACCCCCCAGGGGCAT +GGCTCTGACCGAGATGTTTTCCTCCAGCCTGTTGCCCAGTCCCCATTCCTCGGACCCTCA +GCTTCACCTCCAGTGTCATCGGCAGGGTGAGCTGGACGCCTACGGGTCTGAGAAGGCGCC +CGGGTTCCCAGCATCGGCTGGCCACCCTCTGCCTAAGAAAGCGCCAGGGTCGTGACACCC +CCTGGTGGCTGATCCTAGGTAGTGTCACTGCCCAGCCCCAGTAAGGGAGGGCCTGGCCCC +AAAGTCTGAGGGATCAGGGTGGGAAGGGGCAGGGTTTGGTGTGAACCTTCCCCTGGCCCC +CAGCCATGTGCCTTGCTCTCCCCATGCTGAAGATGCTGAGGCTAGTTCCAGTGCCCGCAT +TGTGAAGATCTCCGAATCCCACCTCTCTGTTCCTCCCCAGCCAGATGGCTCCATTTCACA +CACAATACACTGAGGCCCAGAGAGTGGGGAGACAGGCCAGGGAGGCCACCTGGAGCCTGG +CACAGTGGCCTCATTTATTATGCTGCTCTGCTGCTCACAGGGGAAGCCCGTCCCCCAAAG +TCCTCTTCCTCATCCTGGTGAGTATCTTGTCCCTGGATTGCTTGTCAGCCTTGTCTGCCT +GGAGCAATCAGTAGCCAGCAGGTTCCCCGCCTTTCCTGGAGTCCGAGGCAGCTGCCCAGC +CACCAGCCGTGCGGACGATGGCTTGCACCACAGCGATGAAGGTGGATGCGATCTGGGTGT +GATGGTGCCGGGTCTCCAGGGCTGCAGTCACTGCCTGGGGGTGGGAGGAGAGGGGAAGCC +TGAGCAGGGCTCCAGATGCCACCTGAACCACACCTGTGTGGTCACAGGCCTCAGCCCAGG +TGGTGCCATTTCAGGCCAGGTCATCAGGAAGAGCAGGTTGGGGCCTGCTGGGTCTCACTG +GAGCAGGGGGCTTGGCTCTCATGTCACAGGGGCTCCAGATGGCCCAGGCACTAGAGAGAG +GACACCAACCATTGTCCACTCTGTGATGATCCAGGCCTCCAGCCCAGGATGCCCTGGGAC +CCCACACCGTGACTCAGTTTCTCCAACCCCTGGCCCACCTGGTCAATGTTTCTCTCCACT +GTCGTGACGTTGGGCAGAAGCTTGTTGTGCAGCCGGGGCTCCTCCACGGCCCTCTTCACG +TCATAGCCGAACCAGAGGTTGTAGATGATGGCCTGGGGCATGGGAGTGTGATCAGCGTGG +CTTGGGGGCTGTGCAGAGTGGGCAGGGCCAGGGAGAAAAGGGGTGACACATACCAGTGCA +GTGTCTGTGGTGATCTGCGTGCCCCCAGCAGCTCCCACCACCATCCGGACCTGGCCGTCC +TGGCCCACCATGATCGTCGGGCACATGGACAAGAGCGGCTGCTTCCCTGCGGCCGATGGG +AGAAGACAGGGATGCCCGTCAGCTGCCTGCCCAGGACACCCGCCCCTCTCCACCCCAGTC +CCCCACCCCCCAGACCTCCACCCCATACCTGGCTGGATGAAATTGGCAGGTGAGGGGGGT +GCCCCAAACTCATTGGTGAATGCTGGGAGAGCTGAAGTCGTCCATTCATTATTGAACAGG +ATCCCACTGACCGGGGAGCAGACCTTGGAGCCAAAGCTACCGCCCAGCCAGGTCAGACAG +CACCCGACCTTGCCTGGCCCAGCCTGGTCCCTATCCACCCACTGAGGCTCAAACATACTC +ACTGAGAGGCCCAGGATAAGCTACCAAGGTTGGGCCTCAGTTTCCCACCAGGAAAAGAGG +TGATGGAGCCACCTTACTGGATAAGTGGGCAGTCCCTGGGCCACCCGCCCCTGGCCCTTT +CCCACCCAGGCGGCCCAGCAGCCCCTACTAGAGGTTGATGGTGCTGGTGGCGGACACAGC +ACTGCCGTCCTCTGCGACGACAGACAGGTGAGCAGTGCCCCCGTCATCCGGCGTGTAGAA +CTCGGGCTTGTAGTAGGAGATCGGGTGAGTGGTGTGGTCAGAGATCTGGGACCGGAGCTG +GGCAGCGAAGAACTCAGAGGTCATGTTGCGGACCACCTGCTGAGACCCCAGAGCTGGCCT +GAGGAGGTGGGGAGGGAGGGTGGGGAGGGGGCACAGGTCTCAGAAGGCCCTTGACTGTGA +CTCTGACCGCAACCCTCTGGCACCCACAACCTTCCGTGGCTCCCCAGGACCCAAGGGCAG +GCCCAAGACCTTGCATGACCAGTCTGACTCCCTGTCTCTGTCGCGTTCCAGCAACTCTGA +ATGTCTGTCTGCCTGGTCCTCAGCCTCCAGACCCTTGCCGCATTCAATCACTCATTCTTT +CATGCAAAAAATATTTCTACAATTTGCACTGCATGCCTGGCACTGGGGAATCAACAGGGA +ACAGACACTTAGGTCCTGCCCTCATGCCAAGAAAAACAAACACACACAGGGAAAGTGCTG +AAACCACAGGCCAGGTAAGGGGAATCAAGAGGCATGAGGTATGGGCAGAGTGGTCAGGGA +GGGCTTCTCAGAGGAGGCAACGTGTGAAAAGAGCCTGGAAAGTGGCCTAAATGGTCAGTG +CAAAGGCCCTGAGGCAGGTGGCATAGGCTGGTGAGCGATAGGCAGAGAGTGAATGGAGTA +GGGTGGGGAGAAGAGGATGAAGATGCAGGCTGGGGCCCATCCCACAAGACCTCCTAGGTC +CCATAACAACTGGCTTTTGCTCTGTGCCATGCAGGCTTAGGACAGAGGAATGAGCAGGCT +GGGGAGTGTTTTCACAGGGTCCCTCTGGCAGCTATGACGGGGATAAGGATAAAGCCCAAA +GGGGAGGCTGTGGGTATCAACCAGGCAAGAGATGATGGCCTGGGTGGGAGAAAGAGAAGA +AGCCTGTGCGTTCTGTCCATGTGATGAGGTGGGCTCCTCTCAATCTTCACACAGCACAGC +TGAGCCTCAAACCCAGCACTCACCCTGACCTCTCACCTCCCCACAAGGTAGGAAAACCTG +TTGCCCAGGCTGGAGTGCAGTGGCACGATCCCGGCTCACTACAACCTCTGCCTCCTGGGT +TCAAGCGATTCTCCTGCCTCGGCCTCCCAAGTAGCTGGGACTATAGGCGCCCACCACCAC +GCCTGGCTAATTTTTGTATTTTTAATAGAGACAGGGTTTCACTGTGTTGGCCAGGCTGGT +CTTGAACTCCTGACCTCGGGATCCACCCACCTCGGCCTCCCAAAGTGCTGGGATTACAGG +AGTGAGCCACCGCATCCAGCCCCTATTTCTTTTTAAGACACGGTCATGATTCATTGCCCA +GGCTGGAGTGCAGTGGCACGATCACGGCTCACTTCAGCCTCACACTCCTGGGCTCAAGCA +ATCTTCCTGCCTCAACCTCTTGAGTAGCTGGGACTATAGGAATGCCGCACCACACCTGGC +TAATTTAAATTTTTTTTTTTTTTTTTTGTAGAGGTCTCACTATATTGCCTAGGCTGGTCT +TGAACTCTTGGGTTCAAGCAGTCCTCCCACCTCAGACTCCCAAAGTGTTGGGATTACAGT +TGTGAGCAACCATGCCCAACCAGATTTTATTTTTATTTTTATTTTTGAGACAGGGTCTCA +CTGTATCACCCAGGCTAGAGTACAGTGGTATGTTCACGGCTCACTGCAGCCTTGGCTTCC +TGGGCTAAGTAATCCTCCCACTTTAGCCTCCCTAGTAGCTGGGACCACAGGCACACACCA +CCATGCCCAGCTTTTTATGTTTTTATTTTTTTATTTTTTGTAGAGATGAAGTTTTGCCAT +GTTGCTCAGGCTGGTCTGAAACTCCTGGGCTCAAGCGATCTGCCTGCCTCAGCCTCCCAA +AATGTGGGGATTTCAGGGGTGAGTCACCACACCTGGCCCAGATTTTATTATTTAAGTTAA +CATTTTGTTACCATTGCTTAAAGCCACCTGGTCCTTTTCATCAGATAGTTTCTTACAGCC +AGGCACAGTGGCTCACACCTGTAATCCCAGCACTCTGGGAGGCTGAGACAGGTGAATTGC +TTGAGCCCTGGAGTCTGAGACCAGCCTAGGCAACACAGTGCAACCATGTCTCTACTAAAA +GTTTTGGTCTCGATCTACTGACCCCGTGATCCGCCCGCCTTGGCCTTCCAAAGTGCTGGG +ATTACAGGCGTGAGCCACCGCGCCGGCCTAAATATTTTTTTTTTAATTAGCTGGGTGTGA +TTGAGGCTGGGCACTGTGGCTCACGCCTGTAATCCTAGCACTCTGGGAGGCAGGTGGATT +GCCTGAGCTCAGGAGTTCAAGGCCAGCCTGGGCAACATGGCGAATCCTCGTCTCTACTAA +AAATACAAAAAAAAAAAAAAAAAAAAAAAAAAAATTGGCCAGGTGCAGTGGCTCACGCCT +GTAATCCCAGCACTTTGGGAGGCCAAGGCGGGTGGATTACCTGAGGTCAAGAGTTCAAGA +CCAGCCTGGCCAACATGGTGAAACCCTATCTCTACTAAAAAATACAAAAATTAGCTGGGT +GGTGGCGGGTGCCTGTAATACTGGGTACTCGGGAGGCTGAGACAGGAGAATTGCTTGAAC +CCAGGAGGTAGAGGTTGCAGTGAACCGAGATCACGCCATTGCACTCCAGCCTGGGCAACA +AGAGCGAAACTCCGTCTCAAAAAAAATTAGCTGGGTATGGTGGCATGTGCCTGTAATCCC +AGCTGCTTTGGGGGCTGAGGCATGAGAATTGCTTGAACCGGAAGGCAGAGGTTGCAGTGA +GCCAAGATCGTGCCACTGCACTCCAGCCTGGGCAACAGAGTAAGATTATCCAAAAAGAAA +AAAAAAAATTAGCGGGTGTGGCTACACCTGTAGTCCCAGCTATGTGGGAGGCTGAAGTAG +GAGGATCGCTTGAGTCCAGGTGGTGGAGGCTGCAGTGAGCTATGATTGTGTGACCGCACT +CCAGCCTGGGTGACAGAGCCAGACCCTGTCTCAAAAAGAAAAGGGTCATGATGCCTCCCC +AACAGGATATGGTGGGGACGCTCCAAGACCATGGGAGAAGAGGATACGAGCCGGAGCTAT +GGAAGAAGTGAACACGTAAGACTCCAGAGGCAAACTGCCCGTCTAGACAGCTCTGGCCTG +CAGAGCCCACCACTCCCAGGCCAGCGCCGTGAGGTCCTGTTGAGCCACCCACCTCACTGT +GATCATCCCCAGGTAAACCCTCGGACGTGGGCCCGGTGCCCTCCTTGCTGAGGCTGAAGC +CACGTGGCCATGAAAACCTGAGCATCACAAGCCGCTCTCTGCGTTTGTGGTGTGATCAGG +GCCTTCTACCACAAGCCATGTGCCCAGGCTGATCCTCTCTAATGTGGAGAGGCGGCTGCG +CTGGAAAGGCCTCAGTAAGAGCAGCTGGGCCCTTGCAGACAGACATCACGGTGTGACCCT +CGGGACTGGCTCCCTGCGGCCACTGCAGCACAGGTGTTCACATCTCACAGTGGAGGCAGT +GGAGACCATGCTCGCCTGCAGGCTCTCGGCGAGGCTCTGAGTCTGGCCCCTTGCGTAGCC +CCTGGTGACTCCTTGGCTTCTGGAGCGGAACTCCATCCTTCACATGGCGCTTTCCTCGAG +GGCCTGTGTGCGTGTCATCTGTGGCCAAGTTCCCACTTCATAAGGACATCAGTCATATTG +GATGGGGTCCCACCCTGCTCCAATGAGACCCCATCTTTTTTTTTTTTTTTTTTTTGAGAC +AGAGTCTTGCTCTGTGGCCCAGGAGGGAGTGCAGTGGCACGATCTTGGCTCACTGCAAGC +TCCGCCTCCTGGGTTCACGCCATTCCCCTGCCTCAACCTCCCAAGGAGCTGGGACTACAG +GCGCTCGCCACTGCACCCGGCTAATTTTTTGTATTGTTAGTGGAGACGGGGTTTCACTGT +GTTAGCCAGGATGGTCTTGATCTCCTGACCTTGTGATCCACCCTCCTCGGCCTCCCAAAG +TGCTGGGATTACAGGCATGAGCCACCGCGCCCGGCTGAGACCCCATCTTAATCACACCTG +CAGTGACCCTGTTTTCAAGTTAAGGCCACATTCTGAGGTATTGAAAATTAGGACTCCAAT +ATGAATTCGGGGGGACCCAATTTAACCTGTAATATGTATATCCCAAATGCAATTTTCTCC +TTTTTTCTTTTAGACAGTTGCTCTGTCACCCAGGCTGGAGTGTAGCTCTTGGCTCACTGC +CTCTGCTTCCCGGGCTCCCGCAATCCTCCACCTCAGCCTCCTGAGTAGCTGGGACCACAG +GCGCACACCACCACACCCAGCTAATTTTTGTATTTTTGAGAGAGATGAGGTTATGCCATG +TTGCCCAGACTGGTCTTGAACTCCTGAGCTCAAGAGATGCACTCACCTCAACCTCCCAAA +GTGCTGGGATTACAGGTATGAGCCAGCATGCCTGGCCTCCAAATGAAATTTTCAGAAAGG +CCTTTTCATAATAGTAACGGGAAACAAGTGACATAGGCATAGCCTTGATAAATGTGCCAA +AAGCCCATTTGGAAATAGCACATTTGTAGGAGGCCTGAGGCAAGACTTGTGGGATTGGAC +GGGAATGCTCTGTTCCTGGGACCAATACCTGATGATGGAGAGTGTCCATTCTTCCCTTTA +AAATTTTTATTTTAATATTTTATTTTTAAAAAGGGGTCTTGCTATGTTGCCCTGGCTGGT +CTGGAAGTCCTAGGATCAAGTGGTCCTCCCACCTTGGCCTCCCAAAGTGCTGGGATTACA +GGTGTGAGCCACTGTGCCCAGCCCATTCTTCCCTTTTAATATATACGCTTCACATACAGG +CTTTGTTTTTGCAACTCCTATCAAATCCCAACAATGTATTTGGGAGAATTTGACAAGACA +GTTCTAAAGTTTATCTGAAAGAGAAGCAGCAGATCCTAAATGGCTTTCCCCAGGCCCAGC +ACTGGCAGATAGCACCTTTCTAACCTTCGGGTGTGAATCCAAGGGTGGTGGTGGAGCTTC +AGGACACATGCACCCTCGTGTCCCTCCTCTATGTAAATGGAAGCCTTCAGTGCCAGCTCC +TTTAGGTTGTAAGGACTGAACACGCTCACAGAAGTCGTGTGCACAGTGAGGTGGGGGCTG +AAATGCAGCAGCTCCTCTCCCTGTGTTCTGTCCCCGCCCCACCTCTCCCCCACCTCAACT +TTGACCACTAGGGGACCAATTATAACATGAGATGGCAAGAATAGCTCATGTAAAGCCACC +CCACCCCAGTCACCTGGTTGCTGCATAGTGTCACATTCATAGGAGTACAATTTAGTCACT +AGTAATCCTTTGCGAATAATACAAACGTCCAGTAATTCTTTGCTGCATAACAGGCCTTGT +GCTCAAAAGGTTCTGCAGACCTTGCCTGGGTGCTCTGGCCAAGCTGCAGTCATCCCAGGG +CTGGCTGGGGGCTGGGGGTTTGCCGGCATGCCTGCTGGTTGGTGCTGCTGCCAGTCCCAG +CTACTCGGGAGGCGGAGGTACGGGGACGGCTTGGTCTCAGGAGGTTAAGGCCGCAGTGAG +CCATGATCGTGCTATTGCACTCTGGCCTGGGCAACCAAGCAAGACTCTGTCTCAGAAAAA +AAAAAAAAATAAGGGGGTGAGAGGGGCTATAAAGGCTCTGAGGTCCACGCTCTGGAGGCC +CACAAAGTCTTTTCTGCATCATCTTGTTTGTCAGAGCAAGACCCAAGGGTGAGCCTAGAT +TCCTGAGCTGGGGAAACAGGCTCCACCTACTGGTGGTGAGAACTGTAGACAATCTGTGGT +CGTTTTGAGTCCACTATAAGTAACCAAAATACCTTCAGTCTTGCTTGCATTTCTCAACAT +TAGTGAAAGGGGACCCAGTGCTTGGTTGTAGCAGGCGTCCTAAACTCTCCTTCTGACCTG +CAGAGTTTTCATCTGCAGAATGTCCCCTCCTGGTGCACAGCATTCATGTCCCTTGTCCTC +ATCCCTGGTGGGTGCTCTCGCCGCCTCCTTTCTGATCCCATCCTCCTCCTTGTCCTACCA +ACCGTCTGTACTCACCCTGTGTATTTAGTTTATATAAATGTTAATGCCTGGCCAGCCGTG +GTGGCTCACGCCTGTAATCCCAGCACTCTGGGAGGCTGAGGCGGGCGGATCACCTGAGGT +CAGAAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTTTCTACTAAAAATACAAA +AAATTAGCTGGGCATGGTGGCGAGTGCCTGTAATCCCAACTACTCGGGAGACTGAGGCAG +GAGAATTGCTTGAACCCTGGAGGCGGAGGTTGCGGTGAGCTGAGATCATACCATTGCACT +CCAGCTTGGGCCACAAGAGTGAAACTCTGTCTCAAAAATAAAAAAATTAAAAATAAATGT +TAACACCTGTAGTCCCAGTGCTTTGGGAGGTCAAGGCAGGAGGATCACTTTAGCACAGGA +GGTCGAGGCTGCAGTGAGCTGGGATGGCGCTACTGCTCCTCAGCCCGGGCAACAGAGTGA +GACTCGATCTCAAAAAAAAAAAAAAAAAAAAAGACCAGCGGGGGTGGTTCATGCCTGTAA +TCCCAGCATTTTGGGAGGCCAAGGTGAGTGGATCACTTGAGCCCAGGAGTTTGAGACCAG +CCTGGTCAACATAGTGAGACTTCATTTCTACAAAAAAATAATCAGCCATGCTGTAGTCCC +AGCTACTGGGGGTGCTGAGGTGGGAGGATTCCTTGAGCCCAGGATTTCAAGACCGCAGTG +AGCTAGGATCAAGCCACTGAAGTTCAGCCTGGGTGACAGAGCAAGATCCTTTCTCTCTAA +CAAAAAATTAAAATTAAAAAATATTGGCCAGGTGTGGTGGCTCACGCCTGTAATCCCAGT +AACTTTGGGAGGCTGAGGCAGGTGGATCATTCGAGGTCAGGAGTTCGAGACCAGCCTGAC +CAACATAATGAAACCCTATCTCTACTGAAAATACAAGGATTAGCCAGACGTGGTGGTGGG +CGTCTGTAGTCCAGCTACTTGAGAGGCTGAGGCAGGAGAATCACTTGAACCACCACGCCC +AACCTAATTTTTTTGTTGTTGTTGTTGTTAGTAGAGGCAGGGTTTCACCATGTTGGCCAG +GCTCATCTTGAACTCCTGACCTCAAGTGATCCACCTGCCTCAGCCTCCCAAAGTGCTGGG +ATTACAGGTGTGAGCCACCGCGCCCGATCTGAAGACATTTTTGATTGGTTGATTGAGTTG +GGGGTCTCACTGTTGCCCAGGCTGGAGTGTGGTGGCATGATTATAGCTCACTGCAGCCTT +AAACTCCCAAGCCCAAGAGATCCTCCCAGCTCAGCCTTCTGAGTAACCGGGACTACAGGT +GCACACCAGCACACCCAGCTCATTTTAAATTTTTTCTTTTTTTTTGAGATGGATCTTGCT +CTGTCACCCAGGCTGGAGTGCAGTGGCGCAAGCTCCGCTCACTGCAAGCTCCGCCTCCCG +GGTTTATGCCATTATCCAGCCTCAGCCTCCCGAGTAGCTGGGACTATAGGCACCCGCCAC +CACGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGCCAGGA +TGGCCTCGATCAGGAGATCGTCCATCTTGGCCTTCCAAAGTGCCGGGATTGCAGGCGTGA +GCCACCGCGCCCGGCCCCGTAGTTTCTTACATGTTCACAAAGAGACAAACTTAAAAGAGC +AAAGCATTACGAACTTTAAGAGGCCAGTGCAGGAGGATCACTTGTAGCTAGAATTTTGAG +ACCATCCAGGGCAACAAAGTGAGACTCTGTCCCTACAAAAAAAATTTTTTTTTTTTTTTT +TTTTGAGACAGACTCTCGCTCTGTCGCCCAGGCTAGAGTGCAGTGGCGTGATCTTGGCTC +ATTGCAACCTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGGGTAGCTG +GGATTACAGGCACTCGCGACCATGCCCAGCTAATTTTGTATTTTTAGTAGAAATGGGGTT +TCACCGTGTTAGCCAGGATGGTCTTGATCTCCTGACCTCGTGATCTGCCCTCCTTTGCCT +CCCAAAGTGCTGGGATTACAGGCATGAACCACCGTGGCCAGCCTCTTTTACTATTTTTTT +TTTTTTTTTTTTAAGATGGAGGCTCACACTGTTGCCCAGGCTGGAGTGCGGTGGCACGAT +CTCGGCTCACTGCAACCTCCGCCTCCTGGTTTTATGCGATTCTCCTGCCTCAGCCTCCTG +AGTAGCTGGGATTATAGGCCACCATGCCCAGCTAATTTTTTGTATTTTTAGTAGAGGTGG +GGTTTTACCATGTTGGCCAGGCTGGTCTCAAACTCCAGACCTCAGGTGATCCACCCACCT +TGGCCTCCCAAAGTGCTGGGATTACAGGTGTGAACCACTGTGCTTGGCCTTTACTACTAC +ATTTTTTTTTTTTTTTGAGATGGAGCCTCCCTCTGTCACCAGGCTGGAGTGCAGTGGTGC +AATCTCGGCTCACTGTAACCTCTGCCTCTCGGGTTCGATTCCCCTGCCTCGGCCTCCCGA +GTAGCTGGGACTACAGGCAAGCACCACCATACCCGGCTAACTTTTTTTTTTGTATTTTTA +GTAAAGACAAGATTTCACCATCTTGGCCAAGCTGGTCTTGAACTCCTGATCTCATGATCC +ACCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGTGCCCGGCCGTC +TACTACTTCTTAAAGGGTGAGAGGCGGAAGGATCACTTGAGCCCTGAAGTGTGCGACTGC +AGTTAGCTTTTATCGTACCACTGCACTCCAGCCAGGGTGACAGCAGGACCCTGACTCTAA +AAAAAAAAAAAAAAAGCAAAAAAAAGCATATACTATTAATACTTCCTCCTTACTATAATG +TTTACTGTGGCCTTTATCAGACTAGAGAGTGCTTTTATCTCTCCCTTTTGTTAGAGCTTT +TAGAATTATCAATAGATGGTCAGGTGAGGTGGCTCAGGCCTGTAATCCCAGCACTTTGGG +AGGCCGAGGTGGGTGGATCACGAGGTCAGGAGTTGAAGACCAGCTTGGCCAAGATGGTGA +AACCCTGTCTCTACTAAAAATACAAAAAAATTAGCTGGGCGTGGTGGCGAGTGCCTGTAA +TGCCAGCTACTCTGGAGGCTGGGGCAGAGAATTGCTTGAACCCAGTAGGCGGAGGTTGCA +GTGAGCCAAGATTGTGCCACTGCACTCCAGCCTGGGCAACAGAATGAGACTCTGTCTCAG +AAAAAAAAAAAATTATTAATAGATGGTGACCCTCATCTTCATGTTTTCTGCATCTATTGA +AGTGCGCCATTGAGATCGTGGAGTTCTTCCCTGTTTGTCCATATGGTGAACAACACTGAT +TTTCTGATGTTGAACAATCTCTGGAATAAACTCTGTTTGGTTTCCATGGATTTCTGTTTC +CCTTTAGATTTTGTTAGTAATGTATTTTGGATTTTTGCATCCAAGTTCATAATTGTAACT +GGACAAATAAAGGCAGATTTTAAAAGGACAAGTTGGGAGGCTAAGGCGGAAGGATCACTT +GAGCCTGGGAGATTGAAGCTGCAGCTAGCCTCGATTGCTCCACTGTACTCCAGCCTGGTG +ACAGCAGGACCTGCCTCTAAAATAATAATACTTAAAAGGACGAGTTTACCTACAGTCTCA +CCAAGCAATGAAAGAGCTTATCTTCCTCCTGTTTCCTTTACGGATCTTGCTCATGTATTT +TATCTTAGTTACTTTAGCATAAATGCTAAATCAAATTCTTTTTCACTAGACATCATCATA +GCTCTAGGCATACCACATAATACCCAAGCACATTTTAATCATTAAAATTGTTTCCTTTTT +TTGTGGCTACTGTAACCACTTCAGCGAACCATTTAGTGTGTAGTGGGTTTAAATTATTTT +TTCTCCTTTCTGGGATAAATTTCCCAGGAGAAGGATAATTGGGTCAAAAGCTACGAACAT +ATTTATGGGTTCTTTTCCACAAGAGATCGATCAGTTTGCAAGTGCCATCTGCAGTGTGGG +GGAACCAATCTTCAACTTGCCTGCATTTGGTATTAGTATTTTATTAACCTTTCATTGATC +CAATTACGGCTCTTTGACACAATTTTCATCAGTGATGGACTAAGTGTGATCCGGCCTGAC +CCGCCTCCGGCCGGCGTGAGAAGGGGCATGTGTCGGGCTACGCTCGGGCTTCCCCTGCCG +CCCATTGTGATCCAGCCCGCTAGGCGCTCCCTGCCGCCCATTGTGACGCCTGCCAGCCGC +AGGCTGGGTCCCCGAGGCGGGCGGCATTTAGGCTCGGTCTCCACAGCCATGGCCGCGACG +CAGGAGCTGCTGCTGCAGTTGCAGAAGGATAACCGAGATGGTCGCCAGCGGAAGCAGGAG +CTAGAGAAGCTGATGCGCGGGCTCGAGGCCGAGAGCGAGAGCCTCAACCAGCGCCTGCAG +GACCTGAGCGAGCGGGAGCGGAGGTGCGCGGGGAACGCCGCTCTTCTACCTGGCGGGCGC +GCGAGGGTCGGTCCCGCAGGCAGCGCCGCGAGGTGCTTCGCAGAGTACCAGGCTGATCCG +CCCGGGCCCGCATCTCTGCTCTAGGCCCTTGGGAACGGGTGATCCACCCAGCGGACCCAG +GTGGGGGACTCGGCCAGGACTTCCCAGTCCTCAATCATGAGCCTGCGGCTGGTCCTTCCT +GGCGACTGCGGGATCCTGAGCGACCCAGTCCGCCTTGTAGCGCCAACCTCAGTTTCCCTC +TGCAGCCTGCTGCGGAGGCGAAGCCAGGCAGCGCAGCCTCTGCAAGGGGAGGCGCGCGAG +GCGGCGCGGGAGCGCGCGGAGCGGGTGCGCAGAAGACTGGAGGAGGCGGAGCGCCACAAG +GAGGACTTGGTGAGGAAGAGTCCTAGAATGGGGCTGGACCCAGGGTGGGGTGAGGCAGGG +CGGGCGGAAGGGGGCGGGATCCGGGGGTGGGGTGAGGTAGGACCGGCGGAGAGGTCGGCA +CCGCCCCAGGACCCCGTCCGCAGGAGCAGCACAGCAGGCAGCTGCAGGAGCAGTGGGAGG +AGCTGTCGAGTCAGGTACGTGCAGGAGATGGGAGGGCCTGTCTCTTGGTTCCTCTCGGAA +GTCCTGCCCTTGTCCTCGCCCTTGTTGTCTCCCCGTCCCTGTCTCCCCTGACACTCGTTC +CTCCAGAAGCCCGGTAAACCCCGCCCTTACAAGCCCCGCCCCTAGCTCTTCTACGGAGGG +GAACCGCAGAGCCAGAAGAGCACGGAGCAGCAACTCGCAGCCCAATTGGTGACGCTGCAG +GTGCTTGAGCGGGACCCTGAGGTCTTTAGTAGGGGCGGAGCAGCAGCGTGAGCGGGGCCG +TGACCACCTGGGGGTGTGGCTTAAGGCAGGCCCTGAAGGCGTGGGCGGGGCGGGGGATGT +GGGCGGAGCACAATCGCATGGGGGCGGGGCTCTGAGGGCTAGAATAGGGGCGGAGCGCGG +AGGGGGCGTGGCCATGACCAGTTGGGGCGTTGCTTACGACTGGTCCTGAGGACGCGGGCG +GGGTCATGATCGCCTGGGGGCGGGCACTGAGGGCCGGGGGCGGGGCCCGGAGGCGCAGCG +GGTTGCCGGCCTGCGGACCTCCTGACATTCCCTGGGTCCTTCTCAAGAATGAACTGGAGC +TGGCGGAGACCAAATGCGCCTTGCAGGAGGAGAAGCTGCAGCAGGTGAGGGCAGAAGCGG +GTTCTGTTGGAGGAGGGTAGGCTTTCGAGTGTGGATGGGGAAGGGCCTGTCGCCCCGACG +CCGCCGAGTCTAACCCGGGTGTCCACACCCAGGACGCGCTGCAGACAGCGGAGGCCTGGG +CCATATTCCAGGAGCAGACCGTAGTCCTGCAGGTGCGGCCCCACTCAGACGCCAAGGTGC +CTCCCGCCTCTCCTCCCCCAGACCTGGGGCGGTAAGTCTCCCAACCCACCGCCAGGACGC +CTCCCCGAGGCCTCAGTCCGCACTCTCACCCGCTCCAGGAGGTGCAGGTGAAGGTGATGG +AGGCTGCGGAGGAGCTGGACGCCTGGCAGAGTGGCCGGGAACTGTAAGGGAGTTGGGCCT +GCGGGCGCGGCGGGGCACTGTGGGGCCGGGCTGGGCTCCCACCTGCATGCCTGTCCCCGC +AGGTGTGACGGGCAGCTTCGCGGAGTGCAGTACAGCACCGAGTCGCTCATGGAGGAGATG +GCCAGGGCGGACCGAGTGAGCGCCTGCGCGGGTCCGGGCGGGGTGGGCTGGAGCGGGACA +ACCCTCCCCGTCCCCCCCGCGGTACCGCCTCCCCCTCCTCCTGGAAACCGGGCCGGCGCC +GCGGGCGCGGAGGTAGCTGGATGCGGCCCTCTCTCCCCGCAGGAGACGCGGCTGTTCGGC +GGCCCTCGCGCGCTGGCCATCAGGTGAGCCGGGCGGTGGGCGCGGCCGCGGTCCCCCACC +TGCCCGCCTTTCGCCCCGCAGGCGGTGCGTGCTGGGCGCGCTGCAGGTGCTGCTGACGCT +GCCGCTCCTCTTCCTGGGGCTGTCGCTGCTCTGGACGGTGCTGTTGGACCCCGGCGCCGT +CTCCGCGTGGCTCTGGAGCCTCACCTCGGAGACGACGCTGCGCCGCCTGCGCTACACGCT +GTCCCCGCTGCTGGAGCTGCGCGCTAACGGGCTGCTGCCAACCTAAGTGCAGCGCCCCGC +GCCTGGCTCCAGGTGGACTCCAGGGCACCTGGCTTTATTTCTGGTGCACTCCTCTCCTGA +GAGTGTAGACCAAGGTTGCCTAATAAACTCAAGGGATGAAGCTCGTGGGTTCGTCGTCTG +TCTCCCATGTCATGTAGGAGCTTGACTGGCTTTTCAGCCTCCAAAGATTCCTTCCTTCCT +TACAGCTCATGGATTTAGAGCCACTCCCCAGTATTGTAAACAGCATTTTTAGTTTTTCAG +GATACATAATTTGCCATATTGCCAGAAACTTGTACATAGCATTTGACACTTTGCCAATCT +GACAAGCTGCCTTTTCAAGTATTGTGATTTGAAATTATTTCTACTATATTTGAATTTATT +TCTACCATTTGTAACATGTTGATCCTGATTTTTCAATGCTCCCCTTTCCTTCTGTTTATT +TGGGAAGTCACACTCTGTATTCTGGTATTCTTACTACTTGAGCTCAGCTCATCGCTGTCA +TCTTCCCCTAAACCGCCTTAAAGCATCTTAGGTCTTTTCCCTCTGATCACATGTGCTACT +TTTTTTTTTCTTTTCTTTGGAGACGGAGTCTTGCTCTGCGCCCAGGATGGAGTGCAGTGG +CATGATCTCGGCTCACTGCAAGCTCTGCCTCCCGGGTTCCAGCGATTCTCCTGCCTCAGC +CTCCCAAGTAGCTGGGACTTCAGACGCCCGCCATCATGCCCGTTTAATTTTTTGTATTTT +TTGTAGAGACAGAGTTTCACTGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGAT +CCGCCCACCTTGGCCTCCCCAAAGTGCTGGGATTACAGGCATGAGCCACCGTGCCTGGCC +AACATTTTCTCCAGATATCTTACTGAGTCCTAATTTCCACTGTGGCCAGAGAATACACTC +TGCATGATTTTAATTCTATCTCATTTACTGAGACTTGTTTTGTGGCCCAGCACAAAGCTT +GTAATGAACACTGAGAAGAAAGTGTATTGTTGCTGTTGACAGAGTGATTTTTTTTTTTTG +AGACGGAGTCTCGCTCTGTCGCCAGGCTGGAGTACAGTGGTGTGCAATCTCGGCTCACTC +CAACCTCCACCTACTAGGTTCAAGTGATTGTCCTGCCTCAGCCTCCTGAGTAGCTGGGAC +TACAGATGCATTCCACCATGCCCAGCTAATTTTTGTATTTTTAGCAGAGACAGGGTTTCA +CCGTGTTGGCCAGGATGGTCTTGATCTCTTGACATTGTGATCCGCCTGGCTTGGCCTCCC +GAAGTGCTGGGATTACAGGCATGAGCCACCGTGCCAGGCCGACAGAGTGTGTTCTAAATA +CCCAGCAGGTCCAGGTGAGTGACAGGCTTGTTCAGGGCTCCTGTGTTTACTTTCTTGTTG +GTAGCGCCGTCAATTGCTTATTGTGAAATGCCCCTTCATAAACACCCATACACATTTTGA +TTATGGATGGCCAGTCTTTTTTCTATCTGTTATTTTCATCTTACATCTGTCTTCATATGT +AAAGTGCTTTGGTTATAATCAGCACATAACTGGGTCTCACTTCTTCAATTCATGTTTGTA +ATCCCTGACTTTTAAATTGGGCTGGTAAGTCTATTATGTTTCTCCTTTTCTGCCTTCTTT +TGGATTAGTATCTTTTAAAACTCCTTTTAACTTATACGCTGACTCATATTTTTTATTAAG +ACAGTCTCACTCTGTCATCCAGTGTGGAGTGCAATGGTGTGATCTTGGCTCACTGCAACC +TCCACCTCCCGGGCTCAAGCGATCCACCTCAGCTTCCTGAGTAGCTGGGACTACGGGCTT +GTGCCACCATGCCCGGATAATTGTTGTATTTTTTGTAGAGATGGGGTCTTGCCATGTTGC +TCAGGCTGTTTTCAAACTGCTGGACTCCAGCAATCCACCCACCTCAGCCTCCGAAAGTGC +TGGGATTACACATGTGAGCCACTGTGCCCAGCCTATATTTTTATTTGTACTGCTCAGGAC +TCAACTGTGCTTCCTAAAGCTGAATATTTGTGTTTTTCAACTCTGGAAATCCCATTAGAA +GTTGATTGGGGCCGGGCACGGTGGCTCCAATCTCAGCACTTTGGGAGGCCGAGGCGAGCA +GATCACCTGAAGCCAGGAGTTCAAGATCAGCCTGGCCAACATGACGAAACCCCTTCTCTT +CTAAAAGTACAAAAATTAATTAGCTGGGTGTGGTGGCGTGCAGCTGAAATCCCAGCTACT +TAGGAGGCTGAGGTAGGAGAATTGCTTGAACCTGGGAGACAGGTTGCAGTGAGCAGAGAT +CACGCCACTGCACTCCAGCCTGGGCGACAGAGTGAGACTCCGACTAAAAAAAAAAGAAAA +AGTTGACTGGAGCTTTTCATTCAACTCTTCTTTTTTTTTTTTTTTTTGACCGTTACCCAG +GCTGGACTGCAGTGGTACCATCATAGCTCACTGGAGTCTTGCTCTTCGGCTCCAGTGATC +CTATCTTTCTTACCCTCCTGAGTAGCTGGGACTACAGGCATGCACCACCATGTGGCTAAT +TTTAAATTTTTTTGTAGAGACAGGGTCTTCCCGTGTTGCTTATGCTGGTGTTGAACTCCT +GGACTCAAGCAATCCTCCTGCTTTGGCCTGCTGAAGTGCTGGGATTAGGAGCCACCCCGC +TTGGCTTTCCTTTCCTTCACACTGCATGTTTGGTCATTTCACTATTCTGATCACTGATTT +TCTCTTCTTACCTGCTGCTGTTTAGCTGATCTTTTGGAGTTTCTCTTTTGTTTCAGGTCT +TTATTCCTATCTGCTTCAGATGCAGGGCCTCGGTGGGTGGGAGTAGATGCTTGCTGGGTC +TAGTCACACAAAGATTTTGGAGTTCCAGAGCACAGCACTTGAAAACAAGCAAGAGGGAAA +GACTAGAGCAGTGGGAAGATGCTGCGGAGAGCCACAGACAGGCCCCCACGGCAGCCCTGA +GGGAGACAGTTTTGGAGCGGGCAGACAGCAGTCAGGACAAACACAATGTGTGTTTTGGTG +CACCTATGCCGTGGTGAGATTATGGGAATCTCATGTTTGTTAAGCCACAACCCGTGCTCC +TGGGCCCCAGCCTATGAGTGCAGCCAACACTGGGCCCCCCTCTCTAGGGGCAAATCCAGG +AACTGCCCTTTGGCTGAAGGTGGACTTAGGACTTGACACAAAACCTCACAGATTCCAACA +CAGCACTATTTTGGGTTTTTATTTTGTTGATGTTGGTTAAATCTTATCTCTTTTTTTATA +CACAATACTTCATGTACCTATGAAATAAAACAGGTAGGGAATATGTCCAGTGCAAACAGA +GGACTCACACCTGTGCATAGACAGCACCATCCACTGATTGTCGCTGCAGTCCACGGCGTT +ACTAAGCCTGCGCCACCCACGTGCTGCCCCAGGAGGCGCTACCAGGCTCTTCGGGCCACA +GGCCTCTCCTCCACTGCATGTGGCGGCAGGGCGGGTAGGTCGCAGGGCTCCATGATTGTG +GGGCAGCTTCAAGGGCACATGGGGCAGAGGCCCTCGAAGGTCCCCTCCTCAGTAGGGGAT +GTCATTCTGATAGTACTGGATCATGTTGTAGGTCCGGCTCCTGAAAGGCCAAGAAGAGTG +AAGGGAGATTCGAGGAGCCAGCAGGGTCTGGGGTCCCTCCCTGCAGGGAGCCCTACCTGT +CCAATCAGCCCCTTATGGCTGCCCAGCACCTGAAGGTACAAATGTCCCAGGCGTGCCCTC +CCCCACCCAGTGACCACATCTGCTCCAACCCGGGGGACTCTGAGGCCACCTCATGCTCCT +CAGCCAAACCGCTTCCATCCGTGGGAGCCACTGCTCTGCCCCATGGGCCTCGGGCCCCTT +GGCTCGTCCTGGCCAGATGCCTCCAAGGGGTCTGTACGCTCTCCTCGCACCTCTGGAGAG +CCCCTTCTCGCCCTGCCCCAGCCCCTCTTGAGGTCCTGGCCGCCTGGCTTCTCTGCTTCC +TATCAGCAAGAGCTCCTTCCGTGCCCAGCCTCCACAGTGCCCTTGTTAGGGTGACGGACC +TGGGCGCCACCTGGGGGCAGGTCTCAGCTTCCTCTCACGCTCCTCCGGCAGTTCCCCATG +CAGATGGCCACACCTGGGCCCCTCCTCCTAGTCTGGATGTGGTCTGTCCCATGGCACAGC +CTTGGGCTCCCACATGCCCCTCAGGGCCTTACCAACCCCACATGTTTAAGTGCTGCCCCC +TTCGGGGATGGCTCACTTGCTGGGATCGCCATCCTCCCTTTGTTCAGGCCGGACCCCTCA +AAGCCACCTCTGACTCCCACAGCAGGGAGCCTGTCAGCCCGGTGCTCTGCCTTCAGACCC +TGAGCCAGCCCTCCCCAGGGCCCCCTGCACCCATCTCCTCCCACCACAGTAGTGACCAGC +AGCCCGGCCTGCACCTCCAGTCACTGCCCACGAAGCAGCCCGAGGCAGGCGCAGGGAAGG +AGTGGGCTCTGACTCCTCAGCCCTCCTGGGAGGAGGGAGGCCTGACACCACACTCAGTTC +TAATACTCCTGGCCTTGTGTCCCTATTGCTCCTTCGGCCTCACGAGCCCTGCCCAGGTGG +GCCCGGCCTCTGCCAAGTGTTCCCCTCGGCACACCACAGCCCCCTAAACCAGCACCCCAC +TGCTCCTCAAGAGTTCCTGTCAAGACTTGGGTCTTCATGAGAGGGGCGGCTTGGGGACAG +CAGAATCCTCATCGCCTGGTGCAGGCAAGGCCCTGCAGGTGCCCAGTGGCCACCGAGGCA +GTGGGAGAAGGCAGGGGGGCGGGGCACTCACCTGTTGCTGAGGAAGCAGCTCTGGATGAC +CTTCATGATGAAATTTGCAGCCTCGCGCTCAGTCATGTTGGGGCTAAACCTGTGCCTGGA +GAAGAGGCTGTGTCAGGGCTGCCATGGGCAGGGCCGTGCTGGCTCCCTGGCCCAGTGGGA +GGAGGGTCTTCCATGGGGACGGACTTCAGCTGAGAGCCATGCCCTGGAAATGTACCTTTG +GGGTCCACATGTTGGAAGATGGGGTGCTGTGAAGGCCACACCTGGCCTATCATGGGCCCT +GTCCCCTTCCCAGCATCACCTGAGTGGCCCCATGGCATTAGGGGACTAAGCATTGGGGAG +CTAAGCTACTGCAGCCCCAGACCTTAGGGTGGAGGTGGGGTGGGCGTAGCATCCTTGACA +TAAATAGAGGCCCCTGGGTGGGTCTCTGGTGTGGCCGGCACAAGCAGGGGCCCCTCACAG +TTGTGGTCTAGGGGTAGAGCCTCACCTAGGAACCCTGTCTGCTCTGAGGTTCCAAGGAGA +TGACAACCACAGTGACAATTACATGAAAGGTACCTATCTTGGATGGAGCCTCAGCTAATG +GACAACTGTCCCCCAGATGGCCTGCGTGTCCACCAAGGAACCTACTTCAAGAACTTGATT +GTCTGGCCGCGAAAACAGGGCAGGCCCGTGTCCAACATGATAGTGACCAGGGAGACGACC +ACATCCATGTAGGGCCTGGGGAGAGACAGGAGGGAGCGGTGGGCAGAGGCCAGCCTAGGT +GGTGGCCCTGCCTGTAGTCCTGTGGACTGGCTGATGCCAACAGCCTCAGGTGTGGGCTCC +TGCCACCCACCTCGCCTGCCACATCTTGCACATCCCCGAGGCAACTTTCGATCTGCTGCA +CTCGGTCACCCGTACTGCCCAGGCAAGGGCTGCCCATACGCACTCTGGACAGGCTGAGTG +TCCTGCCCTGTCCCCCACATAAGGCTGCCGGCCATGGCTTCTGCACCTGGGTGGGATGCA +GACACGCTGACCTGCCTTTCTCTGCGGGGCAGTGGGGATGAACCCAGGTTGGACTGTGGC +CTTGGCCAAGTGACCTGTATATGAAACTGGGACAAAGCCCATCTTTGGCACGTAGCCTGT +GGGGTGGCAGGTGCTCAGGCTTTGGTGACAGGGTGGATGGGATGCCCAGAAAGGGAGAGC +CCATGGCTGAAGGCGTGGGCAGGATTGTGGGGAAGGTGGTTGGAATTAGATGCCCAGAGC +AAGAATTTACTGGCACAGGTGGGCAGACAGAGGTGACCAAAGGACAGGTGTAGGTCAGCA +GGTGGCTGCTAGCACCTACCTCACTCTCTGGAACCGGATTCCCTTCATCCTAAAGGGGAT +CTCAGAACGTTCCACACACCCCCTCCGCCTCCACCCTGGTCCTCACCCAGGCTCACCGCA +CAGCCAGGTAGCCTGGACACACATCTCCATGAACCACTTGAAGGGTGTGGCCTCCATCTT +GCCCCCCATGATCATCACCATCTCATCCGTCAGCTTGATGTCGGGTTCCCAGCCGAGATT +GCCGCCCGGCGAGCTTTCAAACATGAAGCCAAAGTCTGCAAAACCCCAAAGAGCTGCCTG +TGACTGGGTAGGAGCCAGGGCGGGCAAGGACGAGTGGTCTGTTTTGAGGAGTGGAAAAGG +ACTCTGCAACAGGAGCACCCCCTCCACCCCCAAAAGGCAGGTTGTGTTTTCTTGGAGACA +GTGATGGGGTGGGTGGTGGGGCAGCAGGCAGAGAAAGAGAAGGGAGGAAGTGGAGGAAGG +AGCCAAGCTGGGGCACTGAACCTGGACGAGCCCCACTCCGCCCAGCTCCAGCTTCTGACT +CAGAGCAATGGCGGCTCTCGCCCCAGCTCCCTGGGGCCGGGGCCAGGCACCCTCTACAGC +AGAACAGCTTGGTGGCCGACAGTTCGGACCTCAGAGCTGGACCCTGACACTCCTGGCAGG +GTGGTCCTGGGCATTCTCCTCTCTGTGGGGTGGGGATCCCTATCCACCCCTGGGTGCCGG +GGTGAAGGGAGAGGAGGGTGGCGCTGTGGCTGGCTGACCGATGTGGATGATATGGCCCTT +CTTGTCCAGCATAATGTTGCCGTTGTGTCTGTCCTTGATCTGCAGCAGGAACAGCAGGAG +GCTGTAGGCGGCCATGCTTCGGATGAAGTTGTAGCAGGCCTGTGCAGAGAGCGCCCTGGG +CTCAAAAAGGCCCTGGGGCCTGTGGGCATTCTCCCTGGTCCCACACCCAGGATCCCTGGG +CCTGTGGGCACTCTCCCTGGCTCTGTGCCCCCACTATGGAGGCAGAGCCCGAATCAGCAA +GTCAGTCTTCGGCAGCAGGAGTGACGGGCTGTCTGGATGTGGGGGTGCAGGCACTTCCTC +CCACACTCAGAACTTAACTTTCTTCTAAGGAGTCCAGCCCAGCTCTACATTCTTTTGACT +CCCAAAGTGGCTTACAGATGCCCTGGTGTTTTTTTTTTTAAATGGAGTCTCGCTCTGTCG +CCAGGCTGGAGTGCAGTGGCATGATCTTGGCTCACTGCAACCTCTGCCTCCTGGGTTCAA +GAGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGTGCCCACCACCATGCCC +AGTTAATTTTTTTACTTTTAGTAGAGATGGGGTTTCACCATGTTGGTCGGGATGGTCTCG +ATCTCTTGACCTCATGATCTGCCCGCCTTGGCCTCCCAAAGTGCTGGCATTACAGGCGTG +AGCACTGCGCCCGGCCAGGTGCCCTGGTTTTTTTTTTTTTTTTTTTTTTCAGATGGAGTC +TCACTCTGTTGCCCAGGCTGGAGTGCAGTGGTGCAATCTCGGCTCACAGCAACCTCTGCC +TCCTGGGTTCAAGCGATTCTCCTGCCTCAGCTTCCTGAGTAGCTGGGACTGCAGGCGCGT +GCCACTATGCCCAGCTAATTTTTGTATTTTTAGTAGAGACAGCGTTCACCATTTTGGCCA +GGATGGTTTTGATCTCTTGACCTCGTGATCTGCCCGCCTCGGCCTCCCAAAGTGCTAGGA +TTACAGGCGTGAGCCACCGTGCCCAGCCCAGGTGCCCTGGTTTTAACCCTTAACAAAGGA +TGACTGAGGAGAGCAGGGTGTGGGTAGAGGCTGGGTATGGGTGGCTGGGGTAGGGTGGAG +CGCACACGACTTTCCCCGGCCTGTGGCCACCCTGGCTACCTGCTGGAAGGCCAGAGTGGA +CTCATCCCCGTACTGGCGTGTGAAGTAGTCGTACATGCCAAAGTCTGTCTGGCGGCCCAG +CTGGTCCCGGGAGGTGCAGTCGGGGATGCACTCGATCACCCCGCACTAGGAGGAAAGGCC +AGTTCTGAGGCCCGCCGGGTGCGAGGTGCCCAGGGCTGCCCTACTGGCTCCACTCAGGGA +ACTTACCCCAGGGGCAGTGGCCACCACGCGGTAGGGAAAAACAAAGAGGTCCAGGCCGAC +CAGCTGGAAGATGTTCTTGAAGAGGTCGATGATCTGCAGGGCCAGCATGTCCTGGGAAGC +CGGGAGGCGCAGGATGCGGTCAGTTGGCGTCCTTGCACCCCAGTAGCTCTTCTGGCTCAT +GCAGGGCAAAAGCCGAGAACCCAGAGATGGAGTGAGGTGGGGAGACCACAGCCGAGCAAG +AGTCTGGAAGGCCTTCCTTTCTGACCACCGGGGGCTGGACTCAGGCTGCTGGGACCACCA +GGCCCTGGGCTGAGCATGAGGCTGAGCTGTCTGGTGGGGTGTGTGAGATAAGGCACCCAA +CCCCTGGACAGTTCCCTCAGGTGGGCACTGGCATGGCAGGGACAGTGGGAAAGGGGCTGG +AAGGAGAGGCCTCTGGGTTTTGCAGAAGCCCTAATTTACCCCGTGGCACCTGAACCATGT +AAGAGAAGGAAAAGATTCACATTTCTGCATATGAGATTGGACTCTGGTGGGCCTGGAGCC +TTGGGGAGCAGCAAGCCCAGTCCCCAAGCCTACTTGAGGCCTGACCCTGCTTACCTGCCG +GCAGTCGTCTCCCAGTTTGAAGATGGCTGCCTGCCAGGAGATCTTCTGGCCGTCGGCCTC +CTGCGTGCTGCACTCATCCTCAGAGTCTGAGCGGCACCGCAGACCTGCCCGCAGGGAGAG +AGGCCACTGTTAGCCTGTAGGCAGTGAGAAGCCCTCTGAGGGGGCCAGGGATGAGTCCTC +TCACATGCCTGAAAGGAACCCCAGCAATCTTGGGGATCACTTCTTCAAAACCCCAAGGAG +GCCAAGGAGGAGCCCAGCAGGGCCCAAGGAAAGCCTTGGAACAGCAGGTGGAATCCAGGG +TTGGAGTCTAAGAACATTTAGGCTTAAATGGAACTAAAAACAGAGCCCCATTTCATCTGC +AGTGAAGACCCAGGTGTGCCTGAGTCAGTGTTGGGTGCTGTGAGCTGGGGGGCAGGAGGA +ATGTGTCTGTACACTGGGGTCCTGGGAGGGCTCGGGGCCAGTGTGCAGACATGGCCATGG +GAGGCAGGTCCCTGCAAGGGGTGGCAGCCCCCTCTCCACAGCTGGCCACACCGTCTGTAC +TGGGTTGAAGAGTGTCTCCCTAAAATTCATGTTCACCCCAAACCTTAGAATGTGTCCTTA +TTTGGAAACAGGGTCTTTGCAGATGTAATTAGGTTAAGACAAGGTCACAGTGGATTAGGA +CAGTCCCTAATCCAATGACTGATGTCCTTCAAGGGAGATCATCATGTGAAGAGGGAGGCG +GAGATGGGAGTGGAGCATCTGCAGGCCAAGGTGAGCCCAGGACTGCTGGCAACACCACCA +GAAGCTGAGGAGGCAAGGTGTGACCTGCCCAGAGCCTTCAGAGGGGGCACGACCCTGATG +ATGGACTCCAGCCTCCAGAACTGAGATCGTCCGTACAGTAGCCCCAGGAAACTAACACCC +CCCAACGGCCCCGGGCCCCTCAGCCTCAGGAAGGAGCCTGCTGGAGCATGCCAGCTGACT +CATGGCAGACAGACCTCTGGGGTGGCACATCTGTCAGTGCATTTGAGCTCCCTGCCATCC +CCTAGCGAGGGGTCTGACTGAGGGCAGACAGATGGACAGACATCATCTTTTATGGAGAGC +TGTACTAAATTTAACACATGCCAGAAGAAACTGGCATTAGCAAGGAAAAGCACTGAGCTC +CCAAACAAAATCAGGGTTCTTTACTGACCTTCTTTTTCAAGTTCACTAACTCCACATCGC +TTCACCTTGAACTTGGCCAGATATGGGGCTTTTGCAGCACTGAAAACAACAAAAAGAATG +TGGCACCCATGATGCAGCCGAGAAAAACTTCACACGCGGACGCTGTGGAAGCGGGGGATG +GGTAGGGTGAGGCGCTCACCTCTACATCGGGGTCCCAGACTTGTAGTCGACGTCCAGCAC +AATGGCTTCAGGGTTGCTGGGCAGGGAGCAGCCTGTGCAGGGACAGAGGCAGTCACAGGG +AGTGCATGTGTCACCACAGGTGAGCCAGAGTCAGTTTCCAACACGGATGTTACGTGCGTG +TCCACCCTGCAGGCACACCCCACCCCTGTGGAGGGGCCTCTGGTCCTTGTCCAGGGCACC +ACTAACTTATTATTCTGTGGCCGCTAAGGGTCCGTCCTCCTTGACTCTTCGACTCTTCCA +ACCATCGACCAAATGAGTCTGCACCTTCTGGGAAATGCTCTTCCTAGAGGAGCACTGAGC +CCCATCCTCACGGATACAGCCCTACTGCTTCTGGGGGTGTGTGTGAGGCCTGTCCTGCCT +CCACCTGCCTGAGCAACAGCCTCTGAACAGCCCTCCATTCTTGGAGTCCCTGTTTTTTTT +TTTTTTTTTTTTTTTTTGGATAGAGTCTCACTCTATCACCCAGGCTGGAGTGCAGTGGTG +CGATCTTGGCTCACTGCAACCTCCGCCTCCCAGGTTCAAGCAATTCTTCTGCCTCAGCCT +CCTGAGTAGCTGGGATTATAGGTGCCTGCCACCATGCCTGGCTAATTTTTATATTTTTAG +TAGAGATGGAGTTTCGCCATGTTGGCCAGGCTGGTCTCAAACTCCTGACCTCAAGTGATC +CACCCGTCTTGGTCTCCCAAGTGCTGGGATTACAGGTGTGAGCCACCACGCCCGGCCTTC +CTTACCCATTTGAACACTACTCTCTCAAAAGCCTTACTTAAATGTCACTATGTCCATGAA +CTGTCCTTGGTTCTCCCAGCCTTTAGGACTTTTGTCTAACACGTTTCTTTTTTTCTTTTT +CTTTTTGAGACGGAGTCCTACTCTGTCGCCCAGGCTGGAGTGCAGTGGCTCGGTCTCGGC +TCACTGCAACCTACGCCTCCCAGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGC +TGGAATTACAGGCGCCCGCCACCGCGCCTGGCTAATTTGTGTACTTTTAGTTGAGATGGG +GTTTCACCAAGTTGGCCAGGCTGGTCTGGAACTCCTTACCTCAGGTGATCCACCCGCCTT +GGCCTCCCAAAGTGCTGGGATTAAGGCATGAGCCACCGTGCCAGGCCAACATTGTTTCTT +TATGCTTATTTTATATCTCTGTGAGTTCAAGCTTCTTCAGCTCAGGGACCATGTCTTACT +CATCTTTTTAACAGCCACAATACCCAATGTCATGCCTTTTCTTTTCTTGTTTTTGCTTTT +TTGAGACAGAGTCTCACTCTGTCACCCAGGCTGGAGTTCAGTGGTGCGATCTCGGCTCAC +TGCAACCTCCACCTCCTGGGTTCAAGTGATTCTTGTTCTCAGCCTCCCAAGTAGCTGGGA +TTACAGGCACGCGTCACCATGCCTGGTTAATTTTTTTTTTTTGAGACCGAGTGTCGCACT +GTCATCCAGGCTGGAGTGCAGTGGCGCAATCTTGGCTCACTGCAAGCTCTGCCTCCCGGG +TTCACGCCATTCTCCTGCCTCAGCCTCTCGAGTAGCTGGGACTACAGGCGCCCGCCACCA +CACCCGGCTGATTTTTTGTATTTTTGGTAGAGACGGGGTTTCACCGTGTTAGCCAGGATG +GTCTTGATCTCCTGACCTCAGGTGATCCACCTGCCTTGACCTCCCAAAGTGCTGGGATTA +CAGGCATGAGCCACCGCGCCCGGCCCAGGCTCCCCTTTCTCAGTGAGTGCTCAGTAAATG +TGTGAAATAAGAGGACAAGGGAAAGCCTGTTTCTGGCCAAGGAACTGCCAGTCCCCAAGG +GGGATGTGTGCTCCTTGAGCCCTGAGAGGTGGGCTTTGAGGGGAGCCAAGCTTGGCCTTG +CTGTGGGGTACAGGGAGAGGGGGGCATGCCATCTCCTCCTCTTCCTCACCTGCCCTTCTG +CCTGCTCCAGGCTGTGGCTCACACTCAGGCCCCTCTGTGCTCTCCTGCTAGAGCCCTGCT +GGCTTCCCTGACCTCTGGGGCAAGACTCAGCCAACACAATCTAGAGCCAGGGGCTGGGGA +CTTCTGTGCATGCCCCCTGCAGTACAATGCTTCCAGCTCTTTTTGCCCCTCCCCAGCAGT +TGTGGCCTTCCCAATGCGGGAACAGAGGCCCTGCATACATGTCCTGCCCTCTCTGGGAAG +CCGTGCTGTTGTGTGAGCTCAATACAATGTGTTGGGAAGAAATGGTTAAACCGGGGTCCT +GCAGGGCACTCAAGGACCAGCCGCTGTGAACATCTGCACGGGACAGACAGAGTTTGGAAG +ACAAGTTTGTGCCAATAGTTAGAAATGCAGAGAGAGGCCGGGCACAGTGGCTCATGCCTG +TAATCCTAGCACTTTGGGAGGCCGAGGCGAGAGGATCACAAGGTCAGGAGATTGAGGCCA +TCCTGGCTAACACGGTGAAACCCTGACTCTACTAAAAATACAAAAAAATTAGCCGGGTGT +GGTGGCGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCGACAGAGCAAGACTCCA +TCGCAAAAAAAAAAAAAAAGAAATGGAGAGAGAGCTGCCCAGTGAAGTCTGGATTTCCAT +CTTCTCTTGAAAAGCTCGGCTTGAATTCCCATGTGGCCACAGCCACAGGCACCCAGGAGC +GACTTCCCATGGAGGCGGTGAGGGGGTGGTGCTGATGGGTCCCAGCACACCTGAGCCCTC +CCACACTGGGCCCGCCATCCCCTTGTCTGTGTGACCTGCTCCTGTAATGCTGGAGCCTAG +GAACCCTCTTCTAGAGTAACAGCTACTGAGGTCAGTGTGAGCCTCCAGAAAAACAGCAGG +CAGCAAGGAAGATGAAGGGCACCCACTGACGAGCAGCTCCCGCTCACTAGCGGCAGATTT +TGGGGAGGGGCTGGGGTCCTGGGGCACCAGGCACCGTGGGCCCAAGCAGGTGTGGGGCTG +GCCAAGCCACTGTGTCCACATCCTGCAGTGCCTGGAGAGGGCAGCAGCCTTCACGCCCCG +CCGCCCGCCTGCAGGAAGAGGTTGGTCTGAGCCTCCAGAAGCCACCTGCTCACCTGGCTG +CACCGTCACTTCAGACAGGGCCGACAGACAAGCCTTCTTTCTCTGGTCGCCTTTAGGGTA +GGGCCTGAGAAACAGGAAAGAAATTCTTGCTTTGTCTCCTGGAGGAACTGGCTACTGGGG +GACAGCCCAGGGCCCCTCCTGCCTGGATTGCCCTCTCTGTCCCTCTCTTCCCGGCTCTAG +CTGCTCCTAGCGCAGCCAGGCACCACTGGCAGCGATTCTTGCCCTGCATTCCTCATGGAG +AATCCCCGACAGCCCCGGAAGGGCTGTTTTCTGGCAGGCTCTGCCCCTGCTGTGTGCGGG +GGTGTGTGGCTGGGGGTGGTGGGGGTAATAATGCCTGCCACCTGCTGCCACCCAGGAAAC +GCTAGCTCCCGTCCTTCATCTCTTGGGTTAGAAGCTGCCAGCACTGCTATTCTCAGCACC +CAGCCCAGAAAAGGGCCTCAAAGTGACCAAGGTCAGAGCAGTGTACGCAATGGAGACAGC +CCAGCCGGCGTGCCCAGCATTGATGACTCCAGGGTTTCATCCCTTATGAATGCACAAAGC +CACCAACATCCAAGGATGGTGTCAGGTCCACATGAATGGAAGGTCTAGGAAAACGATGAC +AGGAGCTGCAAAGGCAGTCTGGTTTTCCTGACACCAAGAGGATCGATCCTTCAGAGTGTG +GAGAGGGCAGGGTGACCTCAAATTGTGGAGCCATGTGCTAAATTCCACAAATCTCAGGAA +TGAAACTAAGCTGTGTTTCCTGAAACTGGCGTGGCCGTGCATTCTGCCATACCTTCAAGA +GCCCTGACTCACCCCAAGCAGTGCTTCCCCACCCACCCTGGCTGTCCATCAGAACCACCA +GCAGCCCCGCTGCTAGAGACTCTCATTCATCTGGTTGGGGCCACGCACGGGTATTTTTAA +AGCTTCCCAGGCAATTCTGTTAAGCTGCAGGGACTGGAACAAGATTCCAGAAGGTGGTTT +CCGAAGCACTGCACTTACTTGATGACAGCCGACACGTTGGTGATCTTGTTAAAGAAATCA +AACTCCCGCTGGTAAAAGTCCTTCGCTGGGCCCGACAAGGAGCCTGTGTTCTCCTCTACT +AACTGCTCCCGGAGGTCGCCGATGTCAGCTGCCAAGGAAACAAAGAGGCTGAGTCTCTGT +GGCTGTGGCAGAGGCCCCTCAGGAATACCAGCCCTGTTTCCCAGGCCCCAGACTGGCGGT +GCCCAGAGTATGCTCTGCAGGCTTGGTGAGACCATAACAGCTGCTGTGTGCCCCTTTTGC +TGTGTTGACACAGCCGACACTGCTGGGAAGCTAAGCTGGTGCCTAAGCGGAGCTCAAGGT +AGGGGCCAAGCGCACAAGTGGACGTGGAATCCTCACTGCTGCACACGCTACACACGCAGG +AAGAAAAAAGCCCGTGTCATTTATGAATGTCACAGGGCATAGAATCCTCACTGCTGCACA +CGCAGGGAGAAAAAAGCCCGTGTCATTTATGAATGTCACAGGTGAAGCAATGAAAACTAA +TTCTTATTAAACTCTACCCTGAATACATGTCTTTTTTTTTTTTTTTAATAGAGGCACACA +GTGGGCGGGGCTCTATATTAAATAGTGTCTCTCACTATTCAAATCCCAGGCTGAATTTAA +ATTCCTGGGCTCAAGAGATCCTCCTGTCTCAGTCACCTGAGTTGCTGGGACTACAGCTAG +TAGGTGGGACTACAGGTGCGCATCACCACATCTGGCTTTTAAAGTATCCTTTGTGATGAA +ACGGGATGTGCACGAAGCACTCTTGCCGCATCTAAGGCAGGCGCTGTCCTGAGAAGCACC +TGGAGCTTGTCTGATTTTGGAGCCTGGACTAACCTTTTTCACAGACCTTTGTTTTTACTT +GAAGAAATGACTGACAGACAACATATAGCTATTCAGACTTAGGGATGTGGCAGATGTTTT +CCTGAAAATGAACAAAAAATAAATCTTCCACTCAAAGGAACTGAAACATTTTTTTGGAAA +ACTTGGATCTGCTGCTGTGAGCTCAATAGCTTCCCTTAAAAACTCTTTCTGGGCCGGGCG +CGGTGGCTCATGCCTGTAATCTCAGCACTTCAGGAGGCTGAGGCGGGCGGATCAGAAGGT +CTCAGCCAGGCAATGTGGCCCACACCTGTAATCCCAGCACTTTAGGAGGCCGAGGTGGGC +GGATCAGAAGGTCTCAGCCAGGCAATGTGGCCCACACCTGTAATCCCAGCACTTTAGGAG +GCTGAGGCGGGCGGATCAGAAGGTCAGGAGTTTGAGACCAGCCTGGCCAACATGGTGAAG +CCCCGTCTCTACTAAAAAATAAAAAAATTGGCCGGGCACGGTGGCTCATGCCTGTAATCC +TGGCAATTTGGGAGGCTGAGGTGGGCGGATCACGAGGTCAGGAGATTGAGACCATCCTGG +CTAACACAGTGAAACCCCATCTCTACTAAAAATACAAAAAATTAGCCGGGTGTGGTAGCG +AGCGCCTGTAGTCTCAGCTACTCGGGAGGCTGAAGCAGGAGAATGGTGTGAACCCGGGAG +GCGGAGCTTGCAGTGAGCGGAGACTGCGCCACTGCATTCCAGCCTGGGCGACAGAGCGAG +ACTCCGTCTCAAAAAAAAAAAATTTAAAAAAAAGTCCGGGTGCGGTGGCTCACGCCTGTA +ATACCAGCACTTTGGGAGGCCAAGGCGGGCGGATCACGAGGTCAGGAGGTCGAGACCATC +CTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAAAATACAAAAAATTAGGCGGGCGTG +GTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATGGCATGTACC +CGGGAGGCAGAGGTTGCAGTGAGCCGAGATTGTGCCACTGCACTCCAGCCTGGGCGACAG +AGTGAGACTGTCTCTCAAAAAAAAAAGAAAGCCCTCTCTCGTCTCTCAGTCTGTGCTTAG +TAATCAGCTAGGCGCTGAGAGAACAGGGCATGGCCTCCTTACAGAGGTTTGCTGCTTCTT +TGCGTGGCATGTTTTCATTTGGCATTCCCTTCCCTGGAATGCTCTGCCCTGTCCACCCTG +CATGCCGGTGCAGCCTTTCAGACTCAGCTCAACTGTCTCTTTTCCAGAAAACCATCTCTG +ATTCCCCAACTCTGGATGGGCCAAGTGTCTGTACCGGAGGACTGCTCCCACAGTTCCCTC +AAATTGAAATCACCTGTCCATTTGACTGCCTCCTACAAGACAACAACAGACCATACCCCA +GTCATGTAGGTGTGCACCAGCCTCAGCCCAATCCTGGGCCGTCAGCTGCTCAGAGGGGTG +TGTGAACGCAGCTGGTGCCAGTGCAGGGAAAGTGCGGTGAGTGGAGTCTGCTCAGCTGCC +TCGGGTGCGTGTTTATGCTTCTCAGCTCAGGCCAGAGGAGAAGCAGAGCCAGGCATGCAC +CGGCCTCCAATGACCGTGCAACACAGTCAAGGCCAGGGCTCAGGGGGGTCTAACTGCTTT +TCCGTCCCTACCTCTGGCACCCTCAGGGCTGCCCACGTTGCACCCCTGTCCTGCCTGCCT +CCACCATGAGCAGCTGCACTGTTGAGGGTTACCTGGGGGAACAAGCTGGCTGGCGGTCAG +GTACTTCTTATCTGAGAACATGGCGGTCCAAAATTTAATCATGATGCTTATGTCTTCACG +CAGCCGCTTCTCTCCTTGAGTAGGAAACTTTGGGGGACAGCTTCAAACAACCATAAGAGG +ACAACGGCTGAGTGTCTGGAGTCAGGGACTAGAGGCCAGTCACACAAAGCAGTGAGAAGG +TGATGCAGACAATTTCTTAATGACAAAAGGTCAGCCCAAAGCCAGTCATTAGCAAGGATG +ATCTGGCCCATTCCCATGGGAAGGTGTGGTGGCTCGGGAGCCCTCTTATGCCCTCCTCCC +TCACTGCCCACAGCAGCCCCTACAGCTTTCCACTGAAGCTGCACTTGTGGCAGATCAGGG +CAAGGAAGCTGCACTCACACAACAGCACAGCCTATGGGTGCAGAGCCCCAGGCAGTGGGT +GTTTGCAATTCTTCGTGCAATAAGCCTTCACATTAACAGGGAAATTAAATGGACGCACTT +AATCTCCTAAATCAGTTGAACTGAGGGAAAATTTGCCTACATCTCGCTGGGAGGCTAGCT +CTAAGTACACGGGACACATGCACAGGGCTGAAGGGTATTTAAATCCACAGGGCCAGGGAT +ACTGAAGGAGGAGGTTGGTGAGCACAACTGGGCAACTGTGTATTCCTGTGAGTGACTTCC +TGGGGGTCTGGGGCGATGGAGCACTTGGAGGGGCTGGGCCGTCCCTGGATGCCGCACAGC +ACATCTTGGGGGAGTACCTGAAGTAGTCAAAGGCAGTGGAGTAGATCTTCTCGCGAAGCA +CATTGCGGATGGTTGCATTTGGAACCACATCGGCATGCAGGAGGGACAGCCCCAGGGTCA +GCAGCCTGTGAGGGAGCCCCGGACCCGGTCAGAGCAGGAGCCTGGCCTGGGGCCAAGTTC +ACCTTATGGACTCTCTTCCCTGCCCTTCCAGGAGCAGCTCACTGAAATGTGTTCCCCGTC +TACAGAAGTACCGTGATACACAGACGCCCCATGACACACTGTACACACCAGGGGCCCTGT +GCTCCCCAGGAAGAGGGCCCTCACTTGAAGCGGGGCCCGATGGCCGCCACATGCCGGTTC +ATGCTCCCCTTGGCCCCACCGATGTTCAGGGACATGGAGCGCTGCAGCAGGCTGGAGAAG +ATCTCCACTTGGTCAGAGCTGCAGTACTTGGCGATCTCAAACCGCTGCACCAGGAACTGA +GCGAAAAGAGGAAGACGCTGTGGTGGTGGGGCTGAGCCGGGCTAACCCTGGGGCCGGCAG +AGACAGCTGAGGAGGGGTTCTGGGCTGACTGGCCGAGCTTGCCGAAGGCCTTGGGCTCGG +CTACCCCCACCCCAGGAACAGTCCTGGCCCCCCAGGTGGAGCACCCCACAGCCTTGGTCG +GGGAACAGGCACGTACGTCGATCCAGATGTAGTGGGGGGTCACTTCGGGGGGACAGGGTT +TGGGTTGACTTGCTTCCGAGGCAGCCAGGGGGTCTGCTTCCTTTATCTCAGCAGAAAACA +GGCCAAATTTCTGCTCCACTGTCATGTGCCAGCCCCTGCCATCTCCCGCATGAACTACAG +GTACAGAAGGAATGTTAGCTCCTCTGTGAAACACAAAAAGGGGCTACTGTCAAGTTTTCT +CTAATTAAAAAAAGGAACACGTGCTCACTGAATAAGATTTAGAAACCAAGAAAAGTATGA +AGAAAAAAATGAAGATCGCCTATAGTCCCACATCCCAGGGTCATCTGTGTCAGTGCTTGG +TGTATTTTCTCCAGCCTCTCGAGGCTGCATTCTGTTCAACCCGGTTCACCGTGTGGACAT +TGCAGGTGTGAGGTGGCAGCACCCGGGATGTACCTGATGCCCTGACAGGCACTGGGTAGG +TGACTCTTCTCTATAACCCTGATAACCCTCTGTGGCTGTGAGCGGGAAGACCCCTCTGTG +GGCTGCTGAGCTGGCAAGGGCAGAAGAGGGTTCTGGATCCAGGGCCTGGCTCCTCCTCTC +CACCCCACACAGACCCTACCCGATACCTTCATCCCGCTAACCAGATTAGGAAGGCCCATG +GAGCCGAGGCGAGCCTGCAGCCACCATGGCCAGGCCTGGGAGCCCAGGGTCAGGCTGCTC +CCATGGCCCAGTCATGCTCAGCCAGGTCCCCATTCCGTGGGGGGGCATGAGGGGTGCCCA +GAGGGAGCAGCATGGTGCCCGGGAGACACCTGGGTGCTGCAGAGAGTAAGAGGTGCAGCT +CACACATGGAAAGCCCCCATGATTTCTGGGTGTTTTTACAAGGAGACCAACAAACAGCAT +TTGGCATCTGGCCAAAGAGGATTCTATCCCTCTGTTGTCCCAGCAGCACCAGCTTCTACA +AGGGCTCCAAGCGCCTGTGCAGAGGTGGCAACGAGGCCTAGTGAGTAGAACACACACCGC +GGCTCCCTCCCCAGTACCCATCCCCTCCCTTCTTCCTCATGAGGTGGATCAGGGTGGCCA +CGTGGCCAGTCACAGGGCTCTCAGCCAGGGTCCCTGCTGGCTGAGGCGGCCACTGTTCTC +CCCGGCCACTCTAATGAGCCAGGGAGAGTGAACTGGGCAAGGCCCTGGGAACACATGCCA +GGGTGAGCTTCATAAGGTCAAGGCAGCTGGCCTCTGCCCTTTGCACTCTGTCCTTGTCTT +CCTTCCTGCTGGGAGGCACAGAAGCCATCTGGAGGCTGTGAGGGTGGAGACAAAGGCCAC +ATAAGCCCAGGATGGCCAGGAGCCGACAGGAGCCTGGGGCGGGGCTCTGTCTGCTGCTTC +CCTCTAAGCTGCCCATGGGGTGAGGACTCCTGGGGGACGAGGCACTACAGCCTGCTTTGT +GTTTCCTGCAGACCGATTAGTCTTATGGAAGCAGACAAGGGAGCAAAAAAATGGCGGTGG +GGGGGACCCCAGAACTGCCACGGACAAGAAGCCATCGAAGAGACACACTCCCATCTATTT +GTTAAACAACCACATCCTGGTGGCTGCCTCTGTGCCAGGGCCTCGTGGACTCTGGGCATG +GTGCTCTGAATGAAGACAAACATGACCACACAGTCTATGGTGGGGACCACAGACAACAAG +AAATAAGCTACACACACTGAGAAACACTAAGAAACAGCAGTGCGAGTGAGCGTGTGGCCA +GGTGGCCAGGGAGAGCCCTGGCAAGGAGGGGACATCTGAGATGAGCCCTGGAGGAGACGC +AGGAGTGAGGCGCAAGGCACGGCCATCTGAGAAGAGGGAATGCATGACGCGGCCTATGGT +GGGAGCAGCCCAGGAGGGCAGCTTCAAAAGTGAAACTGATGGGATGTGCTGAGAATCATC +CGTGGGAAGTGCAAGCAGGACCCCAGCATGGCCACAGCACAGCAAGCGAGGGGAAGAGAG +AGGATGGAAGTCAGGTGGGAAGGGGGCTTGCAGGTCACAGAAACAAGCCCACCTTCGAGT +CTGTAGGGGGGCTCTGATTCCCCCCTGGCTTGCGCCAACCGCATGTGGAGAAGGGGAAGA +TGCCTGGGGTTGGGGAGCAGGAGGCCAGGCCAGGGGGCAGCAGCTGGGATGCTGGGACTT +GTCAGATCCTGGATAAACCTCAAAGGTGAAGGTGATGGGATGTGCTGAGAGTCATCAGTG +GGATGTGCATAAGAGGGAGTCAAAGATGTCTGAGCAGAGAGAAGGACCTCGCTGGCATCT +ACAGTGAAGGGAGGGACCTCGCTGGCATCTACAGTGAATGGAGGGACCTCGCTGGCATCT +ATAGTGAAGGGAAGGGTCACGCTGGCATCTACAGTAAAGGGAGGGACCTAGCTGGCATCT +ACAGTGACGGGAGGGACCTCGCTCACATCTACAGTAAAGGGAGGGGCCACACTGGCATCT +ACAGTAAAGGGAGGGGCCCCACTGGCATCAACAGTGAAGGGAAGAACCACCCTGGCATCA +ACAGTGATGGCGGATGGGCGACCAGTGCAGCGGTGTCTGGAGCTGAGGTCGGACCCATTA +GTTGGGGCTGCTAATCAGTTCACCATGCAGGCCGTGGGCCAAGCCACTGAAGGGATCCAC +CTGGACTAGGAGTGACTGGGCTGGGTAAGCAGTGGAGCTGGCCAGAGTAGCCAGGCAGGG +AGGAGGAGCCCTGGGGCATGCCAGTACTGAGAAGGCCCTGAGCTCAAGAAGGTCCCTTTA +TTCTGGAGGATGAAAGGGCTACAAAAATTACAAAGGCCTTAAGTCCCTGGGAAAAGCAGT +TAGGCTCCCAAAACAGCTGCTTTCAAGACCCAGAACTCTGAAGGAGGCTGTGAGCAGCTT +TCCCTGAGCTGATCTGTCCTCCTACCATCTGGGACCTGCTAGTGATATCTGTACTCCAAC +TGCCCTGCCCCCAGGGGTCAACCAACCCACTCCTATCTGAGGACCCACTCCTATCTGAGG +ACCAGACAGAGAGGCCAGAGGTTCAAGGGCACCTGGACTAAGGAAGTGGAGAGGAGAGAG +ACAGATCTGGGCTCCAGAGCCCCAGGTACAATGTGAGGGCTGACACGTGGTCAGGAGCAC +CCCCAGTGCCCTGTGAAATGGGAACGGCTACAGCCCCTACTCTTTTTCTTTTTTTTTTGA +GACGGAGTTTCGCTCTTGCTGCCCAGGCTGGAGTGCAATGGTGCGATCTTGGCTCACCGC +AACCTCCGCCTCCTGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCTAAGTAGCTGGGATT +ACAGGCACATGCCACCACACCCGGCTAATTTTGTATATTTAGTAGAGACAGGGTTTCTCC +ATATTGGTCAGGCTGGTCTCGATCTCCCGACCTCAGGTGACCCACCCGCCTCTGCCTCCC +AAAGTGCTGGGATTACAGGCGTGAGCCACTGCGCCTGGCTGAGCAGCCCCTACTCTTAAG +GCCCACCGAGGTCTGAGGCTGAAGCAGCATATGGAAAGGAATCCTGACCATAGCAGCACT +GTGTCCTGACAGTCACAATTTGTGGTGAAACCAGGGCTTTCTGGTGACCCCAGAAAACAT +CAGTGCCGGCTCTGCCAGCACCCAGACACCTGGTACCCCGAGACTGGCAAGGGGACTGGA +AGAGGGCAGTGGCCTCCAGGCTCCCTCCACACTGCCCAGTGCTGTGTGCACACAAAGGAC +CACGGGTCAGACAGATAGAGGCCGAGCTGTGGGGCTGTGTGCCACCCAGAAACTCCGGGG +AAAGGGGCCAGTGGCGGTCCCCCCTCCCCTGCGCCTCACCAGAGTGCCAAGGGCCCAGAC +ACCATGTGAGCAGCAGCCAGCGGGGCGGGGGGGCCTCCTGAGCCCGTGGCCTTCCTGGGG +GTGCATCAATGCCTGGTCACCTGGGGAAGAGAAGACGAACACAGTCACCCAGACAGCCAT +CAGCGAAGAAGGGAGGGAAGGGCGATGAAGGGACGCACCTGGGGGTCTCTGGCTGGGGCA +CCAGGGGCAGCCCAGCAGGAGACTGTGACCACAGAGCCTGGTAGTTGGATCTGACAGCCA +GGAAGAGTAAGGACCAGCGAACAGGTCTAGGGAGTTCGGTGGGTGCAATACCTGCCCCTC +TAACACAGGACATTAAGGACACCCAGGGGCCACCTAGGCCAGGGGGCGCTGGTGGCAGAC +ACACAATTCTTGGCCTGCGGGTGTGGTAGCAGGAGGCCTGGGCAGGGCCCTTCCCAAGCT +GGCCACATTTACATAACAAAAGGTCAAAATTGGAGCCACTTGTGCAAGGGCTATTTATAA +GAACCAGTCTCAACCGCTTTTGGCTCCCCCGTTCCTTCCCTTCCTGATCTGCTCTCACCG +CCTCCCTGCCCAGAGGGCCCAGCCCCCTAGCCCGCCTGGCCTGACCACATGGCCTTCCCT +CTCCAGAGGTTTCACTGCCTAAGGGCTCACCCAAGCTGCAGGCCTGGGCTTTCAATACCG +GCCCACCAAAGCCTTGGGCAGCTGCCGCCAGGAAACGAGGGGTTCCCTCCCACCCCCAGA +GCTCTGGGTCCTGCAGGAGGGGGGATCCTGAGGTGGTGGGTCCTTGCACCCCACACCTGC +CAGCCTGGGCCCCCACGGAATAGAGCCGGGGTCGGGGAAGGGGCGTGCAGCGGCCCGTCC +CCGTTTCCCCTTCCCCCACTCCACCCCCAGGGCCAGGGAGGCTCAGCAGGTCCCTCAGGA +ATGTGACCAGACCCGGGGTGCCGGCCCAGAGCTGGGAGGCCTAGGCTGCAGCTGGCCAGC +TGTCTCTCCAGCCCCTAGTGCAGCTCCCCTCTGGGTGGGGGGTGTGAGGGGAGGGGGTGC +ATGGCTGGTCCAGGACCCCACCAAAGGCCCAGAGGGCAGCAGAACCAGCTCTGTTGGTAG +GAAGCCCAAGGGCGTCCACAGTTAGCAAGGGTTGGGCCCAGTGGGCTAATGCTGGGATGA +GGCCAGTAGGGCAGGGGCCACTGAAACACTGGCGGGAGCAGTCGAAAAACTGGGTAGAGG +GTCTCGGTGGCACACTGGAGGCAATCAGATACCACCATCCAGCCCTGTGCCACTTCATCT +GCTCCCCAACCACTGCCCCTCAGGACCCAGTCAACCATGGGTAGAGCGCATGCCAAACTC +TGCAGCCAGGGGTCACCTGACCTTGCCAGAGGCCAGGCCAGCTACTGGATGCCGCGCAGG +GAGTGGGTGGCAGAATAGGACTGGGGGGCACCTGAGTGCCATGGGGGTGGGAACACCCTC +CGTCAGTGCCCACTGCCTGGCCCTGTCGGGCACTTTACAAGCCCAAGGCTAGCACTAGGT +CTCCCCACCAAGGAAGGATGGCCCCAGGCTGGGTCAGCTGTGATGTAGAACAGGGCAGGG +CTCCTCCCTGAACCCTCCACTTGAGTTAGGGGGTGTCAGAGCCACCTGCAGAAAGTTCAC +AGCAAGCACAATGCCACCATGCGTGCCATCCACACCAGCTGGGCCAAGTCTGGGCCACAT +CTAGGCCGCCACTGTGTCTGCGAGCAGCAATGCAGCCTCACCCCATGGCGGGGCAGGGGG +TGGCACACGCCATCCTGCTGCGAAACGCCTACTAAGCACTGCTCCTTCCTTCTCAGCCAC +AGAGCTCTGCCCTACCCGCTCACCGCTGGACATGAAACAGCAAGTGCCTATCCTGGAGGG +CTCCGCCTGGGGAGTGCCAGGTAACTAGAAACACGGCTCTGGGTGCTGGCCCGGATGCTG +GTGCTCAGCCCTGGAAGCAGCTGACTGCAGGCTGCGATGAGAATGCCACCTAGCACCCAG +CAGCCCCTGGCAGCAGGCCCGGGCAGCACGCCTATCAGCCCCCAGGCTAGCACGCCCAGC +CCTGGCCTACTGCCCATCCAGAAACGCTGGCTCCCAGGACGCCCAGGATCTGAGGCAGTA +GGGCAGCAGGCCTGGGCTATCGGGGCCAGCTGGGCATTCGCCTAAGGCTGTGACCCATCC +CCTCCCTAGGACAGAGCCTTCAGGGGTCCAGGGGAGAGGCAAGAAGGGTAGCACAGAAAC +TATCTCCCAAGTTAGGATCCCTCAGCTCCAGGATTCCCCCTGTAGGTGGACTGGCCCCAG +CTGGGAAGGACAAGAGCCCCAGCAGCTGGAGGGAGGGCCAGCTCTCTCTGCGGAATCCTG +AAGGTCTGCAGGAAGGAACACAAAGCCTCGGGGTTACCTTCCCAGGCCATCTGTTAGAGC +AGACACACGGCCCTGTTTACGGAACAGCCAACATGCCAACTGCTCTCCTTTCCTTCCCCT +CCCAGTCCCCGCCACAAAAAAGCCAGAAAGCCATGTCCCCAGACACTGAGGGCAGGCCCA +GGGCCCAGGCAGGTGCTAGCGGGCTGGAACAGCACTCAGATACTCCAGCCTCTCAAGCCA +CTGGTGGCACATGCCCAAGCTCTGTAGAACTGGGCCAGCCACAGCAGCCCCACCCCCGCT +CTGTTCGCCCTCAAAGAACTTACAGTAAGGCAGCTGCTCTGCAAGGCCCAGTCAGAGCCT +ACCCGGCCCAGCAAGTCCATTCTGTGGCAAGAAAAACCTTGGGGACACACATACCCAACT +GCTAAGTAAGATTATCTCTGGGGAGCGGGATTACAGAAAAACCATAGTATCTGTATATTA +TGTTCATTTCTGTAATACTCAACATTTTAAAAAGTATTACCTTTTTTTTTTTTTTTGATG +GAATCTCATTCTGTCCCCCAGGCTGGAGTGCCATGGCGATCTGGGCTCACTGCAAACTCT +GCCTCCCAGGTTCATGCCATTCTCCTGCCTCAGTCTCCCGAGTAGCTGGGACTACAGCCG +CCTGCCACCACGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTT +AGCCAGGATGGTCTCGACCTCCTGACCTTGTGATCCGCCTGCCTCAGCCTCCCAAAGTGC +TGGGATTACAGGTGTGAGCCACCGTGCCCAGCCAAAAGTATTACTTTTAAAACCCTAAAA +ATACAAAGGGCAGAAATGATGCAGAGACATGGATGAACCTCACAGTCGTGGCACTGAGTG +GAAGAAGACTTTTGCGAGAGTACAGACTGCATGACTCCATTTACATCAAGTTCTAGAATA +GGCAAAACGGACTGAGGGTAAAAACCAGAGCAGAACAGGCTGTGAGCGACAGGTGAGGAC +TGTCCAGGATGGGGCGAGGGACAAGCGCCCACTGCCTGGCCCTGACAGGCCTATCCAGGA +CGGGGCATGAGGAACTCTTTGGGGTGAGGGCAGCTCTCTAGCTTGCTGGGAATCTGGGTT +AAGGTAAATGAAAATACAATTTTAGGCCCTTAATTGTATGTAAACTTTACTTCCAATGAA +ACCTCCAGACAGAGGCTTTCACAGGACATTCTCATCTCAGAAGCTCAGCACGATACCCTG +GAAACACCACCAGCTAACCTTTCCTGGGCACTGTTGTATGCCCGACCATGTGCTATGTCA +CTGGAGCCGCAGGGCCACCCAGGAGCCTGTGCGTTCTGTCCATGTGATCAGGTGGGCTCC +TCTCAATCTTCACACAGCACAGCTGAGCCTCAAACCCAGCACTCACCCTGACCTCTCACC +TCCCCACAAGGTAGGAAAACCTGGGGCTGAAAGTGTGCAGGGACATGTCATCTTTAGCCT +TGGGCAAAGGCAACCTTTCCCACTGATGATTTCCAGATCACCCAGGGGTATTGGTTCTGC +CTCAATTTTCCAACTGGCACAGAGATCAAGGGCGACGGTGAAGCAGATTGGAAGGGCAAA +GACGCCACGGTGGGAACAGGTTCCTATCCCTCCCGGCTGCTCTCCAGCTGGGTGGCCCTC +AGTGCATCTCCAGAGGGCAGTCAAAAGCCCCAGGCCAAGACCACCCTGCACTCTGGATCC +CTCTACTCGCTCACCAAACACAGACCAGCCAAATAACCAACGGGGATGGCAGAGGCCAGG +GCACTGTGCACCCCTCGCTGCCTCCTGACACTCGAAGCAGCCCCCCATTGGGGGACCCAC +TAGCCCATGATCCTACGCGAGCGAGGTCAGGCCACACCTGCCCGTGTGTGGAGCCTCTGG +TCAGTCCACATGGCTGCAGGGGCCTGTGGGGAGGCTGGAACAGGGTTACCTCAAGGCCTT +TCTTCTAAGCACCCAGCAAAGGGCAGGAGAGTGTGCTCTAGTCAGCATCTTTCAAAGGCC +AGGAGAGAGGAGGTTTTCAGATCAAAGTGATCCCTGCTGAGGCAGAAAGCAGAAAACTCC +AGCCCCAATTCTGAGAGGCACTGCCCCAACCCAGCCTGGATACCCACCTTGCCTAGAACC +TTGGCCTCCAAGAAGCCAGGCATTCAGAACTGCTCCAGGTGCTGGGCCTCCACATGGCTC +CTTCTGGAGGCCATGACTCTGGGAGACAGGAACACATCTCACGCCCATATCCAGGCCCAC +AGGCGGGGGTCAAACTCAGGGTCAAACTCATCTCAGCCACATGATAACCCTGCCTTCAGA +CTGAAACTTCTGGCTCACGCCCAGCCAGGAGGGAGGCCTGGGGAGAAGGGTGAAGACCAG +GAAAGGGCGCCACCCCAAGCAGGCCATCCCGCCTAGCCCTCCTAGGATCCTTCTCATCCA +CCTTCCTCCCTGGAAGCTATCCTTTACTACCAAGAAAGGGGCTTCCTGCAGGAGGCCTGG +AGCTATGCTGGAGGACTGAGGAGAAGCTCCCCCCAGCCCACTCCCAAGGAGCAGCACCTT +AACCACCTCCCAGCCTAAACAAAAGTAACAGTGAAATGAGAACCCTCCAGCTAACGCCAA +GACAGCTGAGAGTGAGCAGGGCCGCCCACGGCCCGCCCGAGCTCCACTGTCAGGTGGGAG +GGACAAGCATGCCAGGCTGAGAAGCCATGCTAGGCAGGGCAGGGCCACCAGGCAACTTCC +ACAACCCAGAGGTCTAAGTACATCTGACCAGGGTCAGAGAGAGCCCCAGACTTGCTGCCA +CTGCTCCCAAACTTCAGTCTAAGGTCAGCATCGGCCCTGGTTCCAGACCAACCCCCAACT +ATAGTCACCTCATACTTCAGGTGTGGCCGGGACAGATGCAGCTGCAACGGGTGGCCCTGT +GTGAGCTCAGCCTCTTCCCAGGAGCCCCTAAGCCATGGGAGAGACTGCCTGGAAGGCACA +AGGCAGCCAGGCAGGAGGCAGCCAGTATCCAGAAGTGCCCCAAGGCCCAGCCCCTGGACA +CACCAACCAGCCCTGCACAGTCACAAGCCCAGGACTTTGTTCATCCTATCCCCATGGCCT +GGAGTCCTCCCTGAGGAAGCCTTGGTAGACAAGAGATAGCTCCAAGGGGCGAAGGCTGCA +CCTCGCCCACCGCCACAGCCCTGTATGAGATGGCAGCCTACATGTGGGCAGGTCTGATCT +CCTCAATCTTCCCACAGGCCCAGTGAGCGGGGCGTTATCTCTGCAGTTTCAGATAAGAAA +TTAAAGGTTCTTGGGGGTTAAGCAACTTGCCCAGGGTCACAAGAGCATGTAGGTAGCAGA +GTGAGGACCTAAAGCCTGACTCCAGTGTTGTAGTCTGATGACCCCCCCGCCCAACGCAGG +CAGGGCCAGGGATGCCACTCACACATGGCTTCCAACTGCCAAGCTGCTTGGGGGAGCTAG +AGGAGGCACCGCCCTCCATGCACTGGTGTGCCAGGAGCAGGGACTCCAGTCTGCACCTCC +ACCCTCACTCCCCCACCCCCCAGCACCACCTGCCTATCCACAGCAGGGCCCCACCTGGGT +GTATTCTCCCAAACTCACTGTGAGGGCATGGGGTCTGCCCAGCTCCTCCACCAGAAGTGA +CAAGGTGGAGCCCCCATGCAGCCTGCTTTGGAGGCCCAGGTTGTGCTTGCTGCTCCTCTG +GGAACACCCCCACCCTCCATGACCTCCTTCCCTGTCCCTTCAGGAATGGGCTGGTGGTGG +GTGGGGCTCACTGCTCTGGGCAAGCCCAGCCATGTAATCCAGCCCAGACCTGGATGCCGG +CCAGCAGCTCCTGGCTCTCCTCCCAGACCATCCTCCTGGAAAGCAACAGGCTCCACAAAT +GCCAGGAGCCCAGCTCTCTCCAACGCAGCTTCTACAAAATAAACACTCGGGTTCCACTCA +ACACATTGGTGATGCAGCGATTGGGACCTGAGGCTGGCCTCCTCCTCCTCTGCTCCTCAG +GCCAGCCCTGTGGCCTAAAGTGAGGGAGGTAGCATCCACCCCAGCCCAGTGACTTTCCCA +AGCCCATCCTGAAATGCCCATGGGACCTACTGATGACCTCGTACTGCCCACAGCAGGGCC +AAGCCCTGCATGGTCCGAGCCACCCGCCCATGGGTACTGTCCCTTCTGAGGTGAGGAAGC +CCGGGCTAACACCCCAGCACAAGGAAAAGGCTGGAATGGGCTGCTGAGAGGAGAGCTGGA +CCGAAAGTGCCTCCAAACTGTGCTTCCCCCAGAAGGTACAGCCTCAGTCCCCTCCCTCTC +CTCATCACTTCCGGGTTTGGAGCCAGCAAGGAATGAGGCCAGGAAGACCTTGGAGACCTG +CAGCTGATTTCCCCTGCTAGTGCTGGCACCAAGGAGGGCCCCAGGCCTGGCACAGTCTGG +CTGACAGTTCTGAGACAGGATATTGGAGACAGGCAGGGGAACATTGCTTTAACAAGCAGA +ATAACGGCCAGGTGCGGTCTGTAATCCCAGCACTCCGGGAGGCTGACATGGGTGGATCAT +GAGGTCAGGAGATCAAGACCATCCTGGCTAACACTGTGAAACCCCATCTCTACTAAAAAT +ACAAAAAATTAGCTGGGTGTGGTGGCACGCGCCTGTAGTTCCGCTACTCCGGAGGCTGAG +GCAGAATTGCTTGACCCTGGGAGGCGGAGCTTGCAGTGAGCCGAGATCGCGCCACTGCAC +TCCAGCCTGGGTGACAGAGTGAGACTCTGTCTCAAAAAAAAGAAGCAGAATAACAATTTA +AGGTTGTGATAGTGACTACGAATGCTGTGCTCACCTCCAAACTCCATCCCAACACTACCC +TGCCCCCTAAGAGGGGCCTGAGCATGGGGTAAAAGGCTCGCTGACAGCCACAGAGCAGGA +GCCCACGCAAGACCACCAGAGGATGTGCAGCTGCTGTTCCTAGTAGGTGAGGAAAAAGCA +GGAATCTGGCATCCACACATGTCAGGAGGCAAAACAGGCCCTGTGCTGGTTTCTGCAGTC +CTCTCCAGCCCACTTCCCTGCTCTTCTTGCTCACTGGCCATGACCCAAGAAACAGCAGCA +GCACCACCAGGGATCTTGTTAGACACGCAGAGGCTCAGACTCCATCCAGCCCTAACCACC +AGAACCCTGGGGGCTCAAACACACAGTCAAGTCTGAGAAGGGCTAGGTTAGAAGCTTTTT +TTCCTTCTTTTGTTTTTACCTGGACACCACTGATTGGAAAGCTCTCTACGGCCTGAAAGC +CAAGCGCTCCACAAGTAGTAAACGCTCAAGGGATGGGCATCAAGTGGAAGGAGAAAGCCC +CCAAAACGAAGATACATGTCCCATGTACACAAGCTCACAGGCTGGTGAATACAAACGGTA +GACCTAGAATGTGCCACACCAAGGCTGAGCGTGTTACCACAGGTACCAGAATGGTGACCA +CGCTGGAGGGTTCCACAAACCCTGATCAGAGAAGTAGGCGTGAGTGGCAGGCAGAGAACA +CAGCTCTGTGTGGCAGGGAGGGAGGACAAGCAGTGGCAGCACAGCAGACAACAGGTGGAA +ATTCTACATGAAAGGCACTAAATGGGCGAGGAAAGCCTGGGCCGCCCAGGTGGAAGGGAC +TATCGTGATCATCTCTCTCACCCTAGGGACACCTTAAAAGGAGGTAACTGAGCCCCAAAG +GGAGGCACCCAGCACAGCTGAGTCACAGAGCTGCAGCCATCCAACCCACAGCTCCAGGAG +GGACTCTCCAACTTCACAATTTCAAAATCAACCCTAGCAGCTAACAATTTCTTAATGCCA +AGTAAAGACTAGAATTAGAAACAAACAGAAACCCTATCATCTTCTACCATCATTACTTCA +CAAAAGAAAACATTTCAATGATCAAAGAGCAGGAAGAACTACTGGCAGTCCTGCACCTAT +GCAGTTCTCCAGGCCTGGCAAAGGCTCTGAGGCAACCAGCATGAAGGAGCCACTGCACCG +TCTGTACGCAACACCGCCTGACCCCCGGGCTGGGACAAGACTGAGATGACATGCTGAAAG +GGAATCAGAAAAAACAAAAACACAAAAATTCAACAACAGATACTAAAGACGGGATTTTGA +CTAGCCAAACAAAGACTAAAAGAGTATCTCCCAGGGATGAAAGGAAAATGGGGATGTTTT +TAAACAAGACCAGGACTGTTCCAGAAACAATAAGAACGTCAAAGCCCATCAGGAAAAAGG +TAACACCAAATGTTGGCCAGGCAGTAAGGCAACCAGAAGTCTCACATACTTTATAATTTT +TTATTTTTTTGAGAGAGAGTCTTGCTCTGTCACCCAAGCTGGAGTGCAGTGGCGAGATGG +CTCACTGCAACCTTCGCCTCCCAGGTTCAAGCAATTCTCCTGCCTCAGCCTCCCGAGTAG +CTGGGATTACAGAAGTGCAGTGCGCCACCACCCGCAGATAATTTTTGTATTTTTCGTAGA +GATGGGGTTTCACCATGTTGGCCAGGCTGGTCTCCAACTCCTGGCCTCAAGTGATCTGCC +CATCTCAGCCTCCCAAAGTACTGAGATTACAGGCATAAGCCACCGCACCTGGCCAGAAAA +CTCACATGGTTTAGAAAACCCTTGGATGGCACAATCTAAAACTGTGGGCCCAAGGCCGTG +GGTGGTGGCTCACGCCTGTGATCTCAGCACTTTGTGAGGCCAAGGTGGGTGGATCGCCTG +AGGTCAGGAGTTTGAGACCAGCCTGGCAAACATGGTGAAACCCCGTCTCTACTAAAAATA +CAAAAATTAGTTGGGCGTGGTGGCAGGCTCCTGTAATCCCAGCTATTTGGGAGGCTGACG +CAGGAGAATCACTTGAACCCAGGAGGCAGAGCTTGCAGTGAGCTGAGACTGCGCCACTGC +ACTCCAGCCTGGGCGACAAGAGCCAACTTCGTCTCTAAATAAATAAAGCAAACAAGCTAG +CTAGCTAGCTAGCTGTGGGCCCAAAAACTCACCCAAGGTATACGCCAACAAGAATGCACA +CGTATGCACACCAAACAGGCAGGCACACTCATGAAGGCCCAACAATGGAAAACAACTATG +CCCACCCTGAGAAGGGACAGCTGCAGGTATAAAAGACTATGAGCCAGCAGTGAACAGGAC +AGAACGTTTAGGTTGAACTGTATGAATTTACCTTTTAGGGGTAAAAAAACAGCTGGCTGC +TGATAATTTCACATGGTTTGACTTACAAACAACGCAGATGAATCTCTCAAATATACTTTT +GAGCAAAGATGCCAGAGCCAAAAAGACTATTACAGTTCCATTTGCATACATTCAAAAGCA +GGCAGCACTAATCTTATGGGTGGGGGTTGAGAGTGACTACAAGCTTCAGGATACGTGAGG +GGCTTCCAGGGTTGTGTTTCGCACTCTGGATGCTAGTTACAAGGGCATAATCATTTGTGA +AGAACTCTTGGAGCTGTGCACACTTCTTACGTAGGCTGTCCTTGAATTCAATACGTGCAC +AGAAACCTTCAGAAGGGAAGCAGAGATCAACTTCATCCTGATCAGCAAGTAGACTCAGTC +CAGGCAGCAACCTCTCCTGGACCCCCAACCAATAACAACAACACAGGAAGCAAAATATTG +GCAGGACAGCTGGCAACGCGGCAATGTCCAGGATGGAGTGCCCATCCTAGAGGAGGGAGC +TCACACAAGCCAGCCAGGAGCCCACAGGAAACAGAGAGACTACTGTAAATCCTTATTAAG +AGCAGACTCATGTCTGCTATCTAAAAGATAAAAACCTGATAATGTTTTTAAGTTGTCCTA +AGGTCCAATTGACTTTTTTTTTTTTTTTTTTTGAGACAGAGTTTTCCTCTTGTTGCCCAG +GCTAGAGTGCCATGGTGCAATCTCGGCTCACCGCAACCTCCGCCTCCCGGGTTCAAGCGA +CTGTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTCCAAGTGTGCACCACCATACCTGGCT +AATTTTGTATTTTTAGTAGAGACGGGGTTTCTCCATGTTGGTCAGGCTGGCCTGGAACTC +CCGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAAGTGCTGGGATTATAGGCGTGAG +CCACCACACCCAGCCCCAGTTGACACTTCTAAAGGAGAAGGGGTTCTGCAAATCCTGAAA +GGCAAGCCCAGGAGAAACAGGAAATCTGGGAGCACGGCAAGCAAGGTTCAAACAGGACAC +CAAATAATTCCCAGCAACCGCCTCCAGCAGGTCCTGGTGGCAGCCAGGTAGGGCAGGAGC +CAGCTGGGCATATCAAAGGGAGGAAGCCAAGAAGAAAATGGGCGAGACTCTCAGGGATTT +GGGGTCAAAGGAAGCCGCTGGGAGAAGGAAACAGCCCACTTGCTCCTTCATAGGTAATCA +ACACACAAACCCCCCAGTGCCTCAAATGGCTCAGGAGCTACAGCCAAAGATCAAGGATCA +AGTTATTTTACTCCAAGAAAGAAAAAAAAAAGAAAAAGTTATACATTCGGCATCCATTGG +AAGTTTTGTGATTGTTTGGAGACAGGGTCTTGCTCTGTGGCCCAGGCTGGAGTGCAGTGG +CGCGATCATAGCTCTAATTTATGGGCTCAAGGGATCCTCCCGTCTCAGCCTTTTGAGTAG +CTGGAACTAAAGGTGCATGCCACGATGCCCAACCAATTTTTAAAATTTTTTGTAGAGATG +AGGTCTCACTATGTTTTCCAGACTGGCCTCAAACTCCTGGGCTCAAGTGATCCTCCTGCC +TCAGCCTTCCAAAATGTTGGGATTACAGGTGTGAGCCACTGTGCCAGGCCTTAAAAAGCA +ATTTTTACAGATAAAATTGGGCAACTGCTATGGAAAATATTACAATCTATGGTCCTGTGC +CTGAAAAGCCTCCAAGTGTGATACTTTATAGAGAAGAGGTCTCTCCTGTCCAGTAGGTAA +CTTGAAAGTGTCTAAGAGACAAGTGCCCAAGAACTCAAAGAATGAGAGAGGGATCCATTC +AAACTCAGGGAGAAAAATGCTCACAAAGTTTCACTGTACTTCTAAACTGAGTCTCAGTGG +TGCATTAAGGAACTGGATTTAGAGACAGAAAACAAAATAAACTGGCACTTCTCTGCCTCA +AGAAGCATAGCTTGGGGGTGAAAAGTCAGCTACTTAGAAAGCAGCAAAAACAAAACCCAG +AAACCCACCCAAATGCTCACTGCCAGCAGGACAGGTGATTATGTGTGGATGGCGCCACCT +TGGACACTGAAATGGAGAATGGGAACGCAGGAACTGCTGCTCCTGGTGTCCCAAGAGTGC +AGGACACCCGGCTCCCAGCGAACTGAGCCAAAATCATTCGCAGTTTCAGGATCCAGACAA +AGGTGGGAAACCGACAAAAGCCAGCGAATGGCTGACCCGCAATTCAGAAGACTGTTGGCT +TCTAGAGGGAAGTGGGAATAAGCCAGGGAGAAGTGCATGGGGTGTCCCAGGAACTTGGGA +GGCTATTTCTTATGTGGCCATGAGGGAACAACGGTGTCACGCTTAGTGGGACACCCACGA +TTCAACCAATAGTGGGTTTACCACTACTTATCTATTCTTTAAGATTTGTGCACACACATA +TATACGTGTAATTTACCTTTGTTTTTTTTTTTTTTTGAGTATTCCTCTGTCGCCCAGGCT +GGAGTGCAGTGGCACAATCTCGGCTCACTGCGACCTCTGCTGCCCGGGTTCAAGCGATTG +TCCTGCCTCAGCCTCCCGAGTAGCTGGGGATTACAGGCACCCATCACCACACCCAGCTGA +TTTTTGTGGTAGAGACAGGGTTTCACCATCTTGGCCAGGCTGGTCTCAAACTCCTGACCT +CAAGTGATCCGCCTGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGGGTGAGCCACCACG +CCCGGCCTATTTACACCTTTGTAAATAACTTCTTAGAGTTCAATGGCTAAGTATACTAAG +GAAGCTGAGCCTGCTCTGTGCAGAAGTCTCAAAAGAAAGGAAAGTCGGGTGTGGTGGTTC +ATGCCTTTAATCTGAGCACTTTGGGAGATTGAGGCAGGAGGATCGCTTGAGCCCAGGAGT +TCAAGACCAGCCTGGGCAACAAGGCAAGACCCTGTCTCATTTATAAAACAAACAAACAAA +CAAACAAACAAAAAACAGAGAGAGAGAGAGAAAAGAAAGAGAAAAAAGAAAGAGGAAAGA +GACCTCCCTGGAAATCCCAATCCAGGGAGAAGCCAAGAGGCTAAAGCTGAGGTGCACTCA +CAGAGCACCTCCCTTTCACCACCCGAGAGATCACCTGGTCCACAGCCGTCAAACTCAGCA +AGGCCACACACTTGGGCAGCTATGACAACAATGCACAAATGAGGGCTTTGGAGGGCTCAG +TTCCCACAAGAGATAACCCAGAAAATATCACCCTGTCCTAATCATACAAAGCAAAAGAAC +CTGGGGCCTCCACACCTGGGGTAGCAGGTATGCTTCACTCAACAGAAGTTGTCCAGGAGG +GAGAAATCCCAGGCAACCTCCCAAGGTCTACAGGAGAAAGAAGGGCAGAATCAGAGCCAC +TCTAAGTGAAAATATCAGCGTTTAGAACACAATCACCCCCCTGCAACCCCTCCAGCCAAC +CCTCGCCTAGTCCCTGTACCCTCCAATTCAGAGTCCAATAACCCTGAGTTATCTTTCTCA +ACACCCATCTCATCAAGCCACTCCCCTGCTTAAAGCTTTCAATGGGCCGGGCATGGTGGC +TCACACCTTTAATCCCAGCACTTTGGGAGGCCCAGGCGGGCAGATCATCTGATGTCTGGA +GTTTGAGACCAGCCTGACCAACATGGAGAAACCCCATCTCTATTAAAAATACAAAATTAG +CTGGGCATGGTGGCGCATGCCTGTAATCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATC +ACTTGAACCCGGGAGGAGGAAGTTGCAGTGAGCCAAGATCGCGCCATTGCACTCCAGCCT +GGGCAACAAGAGTGAAACTCTATCTCAAAATACAAAAAAAAAAAATTAAAAAAATAAATA +AAGCTTTCAATAAATTCCTGATGCCTAGCATAAAGTCCATGCTCAGGCCTGGCATGCAAA +GCACTTCCCACAGGGGCATCAAGCCCACCTCATCCCTTCCACAGACACACCTGAGAGCTC +CTGCAGGCCGTGCCCTACTCTGGGAGGAGCCTGCCCCCTCACGACTCCCTCCTCTGTGAA +GCCAGAACTGCCTCTGTTGTCCCCCCGCTTCCAACGTCAGCACCAGCTGCCAAGCTGCTT +CCCTCCTGCCTACCCCCACCACCCCATAGACCAGTACCCAAAGTTAGACCCTGTGTCCCA +AGCAGCCACAGCAGCAGGCATGCAGCAGGCACTCATAAACATTCATTAGGTGAGCAAATG +CTGGTCAGACCCAATGCTGAGAAGAGACAGAGTGAAACAACATGTGAAAGATAAGGAGGC +ACGTGGGGAAGATGGAAATGTCCAGTACCCTCCTTGCCACTGGTTCTAAAATTGTGTTGT +TTACATTCTGAAATAGTCAGGGCTCCTAACGAGCATCTGCTTATGTGGCTTCTCTATTAC +TGTTTTGTGGGGTTTTTTCTGAGACAGGGTCCTGCTCTGTCACCCAAGCTGAAGTGCGGT +GGTACAATCATAGCTCACTGCAACCTCGAACTCCTGGCTTCAAGCCATCCTGCAGCCTCA +GCCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCACTGCATCTGGGCTGTTGCTGTATTT +GATGTTACCATGTACTACAACAGTGCCCTCTTATTCAGTTTTTTTTTTTTTTTTTCCTGA +GACGGAGTCTCGCTCTGTGGCCCAGGCTGGAGTGCAGTGGCGTGATCTCGGCTCACTGCA +AGCTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCGGCCTCCCGAGTATCTGGGACTA +CAAGCGCCCGCCACCGCGCCTGGCTAATTTCTTTTTGTATTTTTAGTAGAGACGGGGTTT +CACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCTGCCGCCTCAGCCTCC +CAAAGTGCTGAGATTACAGGCGTGAGCCACCGTGCCCGGCCAAAAAAATTTTTTTTAAAG +GCTGAGCACAGTGGCTCACGCCTGTAGTCCCAGCACTTTGGGAGGCTGAGGTAGGTGGAT +CACTTGAGGTCTGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCATCTCTACTA +AAAATACAAAAAATTAGCCAAGCGTGGTGGCAGGTGCCTGTAATCCCAACTACTTGGGAG +GCTGAAGCAGGAGAATGGCTTTACCCCAGGGGAGGGTGTGGGGTGGAGGTTGCAGTGAGC +CAAGATTGTGCCATTGCATTCCAGCCTGGGCAACAAGAGCGAAACTCTGTCTTAAAAAAA +AAAATAGGCTGGACGCAGTGGCTCATGCCTGTAATCCTAGCACTTTGGGAGGCCGACGCG +GGTGGATCACCTGAGGTCAGGAGTTTGAGACCAGCCTGCCCAACAGGGTGAAATACAAAA +ATTAGCCAAATGTGTTGGTGCATGCCTGTAATCCCAGCTACTGGGGAGGCTGAGGCAGGA +GAATCACTTGAACCTGGGGGAAGCAGAGGTTGCAGTGAGCCAAGATCACGCCATTGCACT +CCAGCCTGGACAAGAGCAAAACTCCCTCTCAAAAAAAAAAAACAACAACATTTAAAAAAA +TACAAAAATTAGCTGGGCGTGGTGACAGGTGCCTGTAATCCCAGCTACTCAGGAGGCTGA +GGCAGCAGAATCCCTTGAACCCAGGAGACGGAGGTTGCAGTAAGGAGAGATTGTGCCACT +GCACTCCAGCCTGGGTGATAAGAGTGAGACTCTATCCGAAAAACAAAACAAAAAAAGGGC +TGGGCACGGTAGTTCACACTTGCAATCCCAGCACTCTGGGAGGCCGAGGCGGGCGGATCA +CGAGGTCAAGAGATCCACACCATCCTGGCCAACAGGTGAAACCCCATCTCTACTAAAAAA +TATATATATACATACAAAAATTAGCTAGGTGTGGTGGCGCGCACCTGTAGTCCCAGCTAG +CTAGAAGGCCGAGGCAGGCAAATCACTTGAACCCGGGAGGTGGGGGTTGCAGTGAGCAGA +GATCGCGCCACTGCACTCTAGCCTGGCGACAGAGCGAGACTGTCTCAAAAAAAAAAAAAA +GAAAAGAAAAGAAAAAAAGAAGGATAGCATTGCTTTACACTTTTATAAATCTCTTTAATG +TCTAGCCTAATAGAAGAAGCTGGATTCTCATATCTCCTGCATTCAGTCTGTTAGGATATG +CGGTTTTGATTAAAGTCTTTAAAGAAATTTGGCCTTACACAGAAAGAAGGAAGGAGTATT +TTAATAACTTTTTCAGAAAACTGTCTATTCTTCATTGATATTACACTAAAGCTCGACAAA +TGAGTTTCTTAAAAGTTGCTATGGGCCAGGCGCGGTGGCTCACGCCTGCAATCCCAGCAC +TTCGGGAGGCTGAGGCAGGTGGATCACTTGAGGTCAGGAGTTCAAGACCAGTCTCGCTAA +CGTGGTGAAATCCCGTCTCTACTAAAAATACAAAAATTAGCCAGGTGTGGTGGCAAGCGC +CTGTAATCCCAGCTACTCAGGAGGCTGAAGCAGGAGAATTGCGCAACTGCACTCCAGACT +AGCAACAGAGCAAGACTCCGCTTCAAGGAAAAACATAAATAAAAGTTGCTATGTAGGCCA +GGTACGGTGGCTCACATCTGTAATCCCAGCACTTTGGGAGGCTGAGGTGGGCGATCACAA +GGTCAGGAGTTTGAGACCAGCCTGACCAACATGGAGATACCCAGTCTCTACTAAAAATAC +AAAAATTAGCCGGGTGTGGTGGCACGTGCCTATAATCTCAGCTACTTGGGAGGCTGAGGC +AATAGAATTGCTTGAACCCGGGAAGCAGAGGTTGCAGTGACCCAAGATCGTGCCACTGTA +CTCCAGCCTGGGCAACAGAGACTCCATCTCAAAAAAAAAAAAAAAGTTGCTATGTAGGCT +GGGCACAGTGGCTCAACACCTGTAATCCCAACACTTTAGGAGGTCGTGGTAGGAGGACTG +CCTGAGCCTAGGAGTTCAAGACCAGGAAGATCCCATCTCTGGAAAAAAAAAAAAAAAACA +GCTGGGTGTGGTGGCACATGACTGTGGTCCTAACTACTCGGGAGGCTTATGAAGGAGGAT +CATTTGAGCCCAGAAGGTGAGGCTGCACTGAGCCGTGATTACATCATCACTGCACTCCAG +CCTAGGCAACTGAGGGAGACCACATCGTCAAAAAAAATTAAAAATAAAAAAAAAATTGTA +AAGCCTCATGCCGGGCATGGTGGCTAATACCTGTAATCCCAGCACTTCGGGAGGCCAGGG +CGGGTGGATCACCTGAGGTCAGGAGTTCAAGACCAGCCTGGCCAACATGGCGAAACCCAG +ACTTTATTAAAACTAGAAAAAATTACACCAGGCGCAGTGGCTAATGCCTGTAATCCCAGC +ACTTCGGGAGGCTGAGGCGGGCGGATCACGAGGTCAGAAGTTCGAGACCAGCCTGACCAA +CATGGTGAAACCCCATCTCTACTAAAAATGCAAAAATTAGCTGGGCATGGAGGCGCATGC +CTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCAGGAGGCGGA +GGTTGCAGTGAGCGAGATCGCACCACTGCACTCCAGCCTGGGCGACAGGGAAAGACTCCG +TCTCAAAAAAAAAAAAAAAGAAAAAATTAACCAGGCGTGGTGGCAGGCGCCTGTAATCCC +AGCTACTCGGGAGGCTGGGGTAGGAGAATCGCTTGAACCCAGGAGGCGGAGGTTGCAGTC +AGCGAGATCGCACCACTGCACTCCAGCCTGGGCGACAGGGCAAGACTCCGTCTCAAAAAA +AAAAAAAAAAAAGAAAAAAATTAACCAGGCGTGGTGGCGGGTGCCTGTAATCCCAGCTAC +TCGGGAGGCTGAGGCACGAGAATGACTTGAACCCAGGAGGTGGAGGTTGCAGTGAACTGA +GATTGCGCCACTGCACTCCAGCCTAGGCAACAGCGAGACTCCATCTCAAAAAAAAAAGCC +TCATGCATCCTTCCTATTTCATCACACAGAAAAGTAAAAATATGTGTAATTAAAGGTCTA +GATTTAATACTCAGTCATCTGTACTGCCTCCCCTGGACATGTGTAGAACTGGTGGGTCTC +AGCCCCCAGTGCTGGCTAACAGTGGGGAACACAACTCTGGCAGTGCAAGTGTCCACCCAG +GGCAGAGGCCAGTGATTTTTTTTTTTTTTTTTTTTTTTAGATGGAGTCTCGCTCTGTCAC +CCAGGCTGGAGTGCAGTGGCAAAATCTTGGCTCACTGCAACCTCCGCCTCCCAGGTTCAA +GCCATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGACTACAGGCGTGCGCCACCACACCC +AGCTAATTTTTGTATTTTTAGGAGAGGCGGGGTTTCACCATGTTGGCCAGGCTGATCTGG +AACTCCCAACCTCAGGTGATCCACCCGCCTTGGCCTCCTAAAGTGCTGGGATTACAGGCG +TGGGCCACCGCATCTGACCTTGTAACTTCTAAATATACTGAAAAAGGTATCGGGGACCCT +CAGGGTCTGCAAACAACTTTGAGAATAGCTGACAAGCACTGCCTCAAATATTTCTAACAA +CTCGTTTACAGGCAAGGAGAATGAGGCCTGGAGAGCTTAAGAGACTGGCCCACGGAGGAA +GCAGGAGTCATCTTTCCACAAAACTACACTTCCCCAAAAGCTGAACTCTTGGACAAGGAG +CCCACTGCCAAACCTAAGAGTAACAAATTTCAGCAAATAAAAGGAAGTGCCATTTCCAAT +GGCATCCTCGAATACTCTAAGAGGCGGGGCTGGCAGAATCCATCAACAGGCCTCCAGAGG +CCTGCTACTAAGAGCCAGCAACGCTGCTGAAAGCGGCACTGCCTCTGCAGGCTGTGTGAC +CAGGACCCGGCGGGGGCCCTGCACCTGGAAGCAGAGGGGGTCGGTGGGGGAGGGGTGCTA +CCAGGAAGCAAGATGTGCATGCTGCCTGCTGGGGGCATGAAGTCAAAGTCCCCTCCTGCC +CAAAGTGGCTAGGCCAGCCCTAGGCTGCCTTATCAGCATCAACACATGCCTGGAATGTGG +GACTTTGGCAACAACCCTCTCCCTTCGGCCAGCACGGGGCTGACGCCTGGTCCCTGGGCT +CTGGTTCACAGTCAGAGGACAACCACCTTGAAGCACCAGAACCCGCACCCCCAGCCCATC +TACCAGAAGACAATCTACCAAGAGTCACCGAGAGGAGCTATCCTGCCTGTGTCCCCACAC +CACCAAGGGCCGTGTCTGAGCACCTCGTTTGTCCACACTCAGAGAATCTCAAGAGCTCAG +CTCTGGTCCCTGCAATGTCCAGACCATCCCCCACATGGCTCTCCTGGAGGTGTTCGGCAA +AGGCACAGGCTGCCGTGGGGCAGAAGATTCCCACATACCAGACAGTAAGGAGCTGCGTGT +GACTGGAAGGATGAGGACGACCCAGAGAATGCTCTCTGGCCCCTGGTCCTTGGGGAGGAA +TGGGGGACCTGCAGGGGACCACCTTCCCACTCTGGGAGCCAGATTCCCTGGCTTTGTCCA +GAGCAGCCAACCCACTACCCCACTCAAAATGAACACCTCAGCTGCCCTCCAGCCCAAGCA +CTGCGCAGAGTAAGGAGTCCTCACTCACAAATGTGTCCGAGGCTTTCACACACTTCACCT +GGAGCTTCGCCACTCACTTCTTAGATGTACGACTCTGACTTCTCTGAGCCACTTGTACAA +CGAGGACTGACAAGATGATGAGGACCACACCGGTGTGTTCTGACAACTGAACCGGGCATA +GAATGCCCACTGCCAGGCTGAGCCCAGGAACGAGCCCAGGATGAGTGCTTACCCTCTTCA +CCACTACCTTCCTAGCCTTCTGTCAGAACTCCGTGGGGACCATCCTCTTCCCATCTTCTA +AGCCAAGTACCCTGGTACAAGCCAGACAGAAACTATAATAGCAGCTGCTTCCATAAAGGG +GAAGGGCCATGGGGTCCCGGACAGGGCTGGGGTGAAGGGGCACAAGACCTAGGGCAGGAA +TCCACCCCCAGATCACTCACTCAGGACAGGACCACCTGGACCCAGCTACCAAGATGTGCA +CCACCCCCTTCCTTCTGAGCCCCTTTTCCCATCTCCTCTACTGCCCCTGGCCAGGTTGCT +GTCCCACCTCTAAGAGGCAAGATTTCCTACCCAGGATGCTGACCAAAACCACACCCAGGG +GCCTACCCGTCCCCTCACATGATAGGCTGCTCCTAGCTGGCAGGAGCCGTCTTATAGCCT +CTCTCCCTCAAATACAAACAAAACCCCAGAACAACCTATTGTTCTGTTCCAGCAAAGGGG +GCCAGGCCCAGCCAGGCCCTACCTGAAGCTCTCTGTCTCCCGTCCCCCTTCAGTCCCCTC +CCACTCGTCCTTCTCTGCCCCTATTAGTGGCCTCATATACCTTGTCCAGATAACACAGCC +CAAGAAACCAACCTCAGGGACAAATCTCTTCTCTTCCAGGGAGAAGAAGACAGAGGTCCC +AGGAGTCCCAGGGGCCCCCATTACTGGCCACCTCCGCTCAACCCAAACTGCACTATGTGA +GGTCCCGCCAGACTACATACAGGCCAACGAGAGCCCAAGAATGCAGCCAGGAAACCAGGC +CAGGCAGACTCCAAAAGCCCGTGCTACGGGGTCAGTAGGACCCGCCTACAGCTGAGGGCT +ATAGGCAGCTCTCCAGAGGACCTCCCAGGAGAAGCCAACAGTGCACATGGGCTAGGTTCC +TCCTGCTGTTCACCCTCTGTGCCCAGTAACTACCCAGGAACAGGCATGACTGACTCTGCC +AACTCATTCACTGGGTAACCGCTGACCCCACCAGGCCCTCGGAGTAAAGCAGTTAGTGAG +AACCGCAGGTAAAAGCGCGGGCTACTCATCCCTCCGCCTGTGGATTCTATTATCAACAAA +GATGCATCACCTGCTAGAAGAGCGGTTCTCAAACTTGGCTGCACATCAGGTTCACTTCAA +AAGCTTCAGGAAAAGCTAGTGCCTTGGGCCCACCCCAGAGACAGGTATAGCTCAAGGTGA +GAACCAGAGCACCAGCAGGCAGGCCTCTGCCAGGCAGCATGGGCAACGTGTGCTGGGTGA +ATGTGCGGCCTGCACTCATCACCCAGAAGAGCCCCACTGTGTCCACACAAGGACAGGCCA +CCGAAGTCCCAGTCCCCACTCTCCCTTCTCACTTACTTTGTGAATGAGAGCTCACCAACA +GTTTACCGTCTTCCGAGTGCTGCGGCCTCACCGCCCGACCCGCAGCAGGGGTGAGGGCTT +GCAAAGGCCATGCAACAGATCCTCCCCCTCAAAGGGGCTCAGGCCTCAGTGTGGTAATGG +GAGCGGCCCCTTCCCACACTTGTCAGAGAACCCACCCACCCGGTGGAATGAAGCCGAAGC +CCAAGCCGCAGGGCAACAGCCCGGGACGGCAGGGGACAAGTCTGTACAGCAGCCACACCC +CTGCCAGAGCCAGGAGCTCTTCCATTTCCACAGCAGGCCAACTCTCACCTGGAAGCACCC +ACCTAACCAAACCCAATCCCTGCCTTCTTTTAGTCACACTGGGAACTGCCACAGGAAGGC +TGGGGACTCCACATACTCCCTCCTCACCACACCCCATGTGTGATCACCCTTAAACAGAAA +ACATGCACCTAAGATCCTACGATGCGCTGCCCACCTCACAAAAAACCGGTGCCTCTAGGT +AAGAGCACCTAGCTGACCCATCCCTGTTCCCTCTCACCATTAGAAGGACCCCCAAGTCCC +CCTGTTCTAACACCGAGGCTGCACTCCCACTCTCCACCCACTAACATTACCATAAAAAAT +AAATTGAATAGTTAGAAGGAGGGGCTCCCCAAAGCCTGGCCACCTTCCCGAGGGTCCCAA +GCAGTTTCTTGGAGTCCCTCTCCCCACTCAGTCCCGCCACACCCCTCTCAGGCAGTCTGG +CCCTTCCCTCCTGAACTTTGGCGGGCTGGAACCAGCTCCCAGCCTCTCCTCCCTTATCAA +GCTCACTGGGGGCCCACACAGAGCTCCCATCCCCACCAGCTTTTCTCGCTTTGCCACTCC +CAGCTAAAACTGGGCTCAAGGCAAGCAAGCCTGCAGTCCCTCAGTCCCTCGGGGCCCAGC +CCCTCCTCAGAGCCCTCCCCCAGCCCAGGTCCCTAACCCTTAGGGCAAGGCCCCTCCCAG +CAGGAAGCGAACCCTGGCGGTGCCAGGCAGAGTCAAACTGGAAGGGCTGGTTCAGAAGCC +CCTAAATGGGAGCCAGGACCTCTCTTAGGGAGGGGGCTTTCAGCCCCATCCTCCCTAGTG +CGATCTCAGATCTCTCCAGACACCACACTATATGGGCCTCCACAGGCGGTGGAGTCAAAG +GCATTTACCCTCCAGCCCCCAGCAAGCTACTTAACTTCCAAAAGCAGCTCCTTCCCTCCT +CCAGAGGAGAACGGGCCGCGTGCTGAACTGCAAGCGCCTCTATCACCCCGCCCAGGCAAG +TCTGTGAGCACCAGAAAGCAGGGGCCAACATGACTTGGTCTAAACCAAGGGGGCCAGGCA +CTGACTTTGCGCTCAGAAAATGTGGCGGAATGAATAAAAAGCTTGAAAAAGAGCCGCATA +CATGCCTGGCTGGCAGTGGGGTCTCCCAAGCGGGCTTCACAGCTCACACAGTACCCGTGG +GTCCGCGTCCTCAGTGCTCTCCAGCTGTCTCCACTGCCACCCTCTACCCTACCACTGCCT +TTTAAAACCCAGTAGCAGTCACTTCCTGGTTAAAACCCCTCCCCTCCAGGCCAACCCCAA +CCCACATGGCATCCAGGGTTCTCAGCCAGCCCACGGCCGGCTCCTCCCACGCCACCGCTA +TGCCCTCTGCGCTCCGCTAGATGCCAGCCCTTATCACCCCATCTTATAATCATTTGTTGA +GTGTCTGCCTTCCCTAGGCTGAGCTCCAGAAAACAGCGATACATGAATGTAAACTCCAGT +TCTGTTCCCATTGTAGAAACGAGGCAGGAGAGTCAATCACTGCTCAAGGTCATGACGGGA +GTAAATGGCAGAGCCAGCAATGCACTACAGACTTTGCGACCGACCAAGTCCAGTTCCCCT +CTCACTGCCCCACAGGGGTCCGTCCTAGGCCAGGCTCGGAGGAGACAAGAAGGGAGGAAC +CCAATGTGTTCTTTCTTCCCGCGGTGAAAGCTGCCTCCCAGGCCAGCAAGAGCAGCCCAG +AAGAAGTGCCCCACTCTCCCAGGGATCAGGTACTGGGGGCAGCGGGCAGGACAAGGTATG +GGGTGGGGCTGACTGCTCCAGGCCAACAGACCAAGCAATAAGGTTGGAACCACAAGTCCC +CTGGGTAAACTGAACTTTATTCCTTCCTCACGGCTCTCACTCTCCAGAACTGCCCCGCCA +GCTCTTCTCCAGGGGCTGGCTGCTGATTAAATGGCACTTCCCCACCCCTCAGATCTGACC +CCGCAAACAATAAGGACTTGAGGGGAGGCGGCAGGCTATCAGCTCAATAATGCAAAACCC +TGTTGCTCCTCGTCCACAACAGCTGACTTCAAGTGGATGGGAGGCTGCGCTTATTAACAA +AATGAGAAATCTGATCTACGGAAAGAAAACACTACGTGAGGATTAATCCGCGACTGCAGC +TTGTGGAGAAGGCTGGGCTGCTGGCCAAGACCAAGGATCGAGGATGGGATCGTGCCTACC +TGTCCCCAGAGCAGGTATCATGCAGCACAACAGGACTCATCACCCCTGCCCCACCTGCGC +CTTCTCTTCTCCTTCCAGCAACTTCGACAATCATTTCGGTTTTGCTTTAATTAAAGGCCT +GACTCCCTCGGGCTGCTTCTTCCTCTACACAGAGGCAGCAACCAGAGGGAGATTTTTCTT +TTCAGGAATCGGTCGTAAAAACTCAGTGACTAATCTACAAGTTCCAACAACTGGCAGAAA +AACACCGCAGGGAGAGGTTGCTGGGCACACAGCAGCACTCAGAGCCAACTGACCAGAGAA +GCTGGGCCACAGGCACTCTACTACAACCTCCCTCCCCCAGCCCGACACTGGCCTGCCGAC +CCACCTGAGAGCTCTGACCTCCCAAGGCAGGCAGCTGGGGAGCCTCTTCCCAGCCTTCCA +AGTCTACTGCTCGTCCTGTGGGACCCAGAGATCTTCACAGTCTCAATGGCACAAGACGGA +ACGTTCACTTCCAGGCAACAATCCTGGGCCACAGGACTCAGTTCAGCCCCCAAGTATCAG +ACTCCCGGCCCTGTTCTTGGTGTTCAGAGCCCACTAAACCAGCAGTCTTGCCACACAGCT +TTGTCTCCAGAGGCCAGCTCATCTGCTTTTTTTCACACTACTCCAGCAACCCTGCTCCAC +CAAGCACGCACACTCTCCTTGACACAGGGCTCCAGACCAACCACATGGCCTGTGCTCCTG +AAAATGCCTGCCTGAAGGCCCAGGGAGTCCAGACACTGGCAGATGAGAATGGACGAAAAG +AAGCCAAGCCAGAAGCCAGGAAGCAATAAATGCTAGACCCAGAAAGGCCTTAGGTTTGAG +TGACCCAGCTCCATCCCTCCATCCTGGATCCTTGGAGGACAGCCCCAAGTTCAGCCTAGG +AGCTCCCAAGGCCCCTCCACTACCTGCCAGCTCTCCGACAGCACAAGAGACACAGCAATC +CCACATCAGTCAACAACCCACCGCCACAGGATTCCTAGGGGCAAGGCTCTGCCCCTCCCC +CCAGCACATATGTCAGAGGGGTGGCTTCCCTGGGTTACACCCCTCCTGCTGTTGGGGGAG +TGGGGGGTCGGGGAACACAGCTTTCCAGATGTGCTTGGCAACTCCTACTAGAAAAGACTA +GGGGTGCGGGGGACAGGAGAAGACATGGATAAATAACTTTAACACCGCTCCAACCCATCC +GGCTCAGCACTGGGCTCCCTTACACAGGCAGTCCCAGGCAGTTCTGGGTGGGGCCGTTGG +GAGCATTATCTGGCCTCATTCCCTCCTCTCAGCAAGTCACTGCCCCCCAGGGAGAGGCAT +TCCCCCCCCCCCCCAACCACGCACACACCCAGGCCTGTGTGAATATGAACCATCTTGGAA +GATAAACAGAGAGTGACAAGCTGGGGCCCCTCAGCCCCCTCCCTCAGCCAGCCGGTCCCT +TCCCCCTGAAAGCCGACCCCCTCCCCCAAGCCCATCTCTCTTCCTGCCAAGCGGCTGCCT +ACAGGAGAGGTCTGGGGGCAGGGCAGCGAGCCCACCAGCCGCAGAGCCCGGCCCTCTGGG +AGCGGCCCCTCCAGGCCCGCCCCTGCCCCTGCCGTGGCCAGTCTTCCCGCGGGGAGGAGG +CAAGAGGAGAGGAGACCGCAGACGGTGCAGGACCGCAGTCCTGGAAATCGCAAAATCCTC +TAGCGAGGGGGCGGCCGCGGGCGCAGGGCCGTTTGCATAATGGGAGCCCTCCCGCCTGTC +AGGCAGCGCAGCTCGCCCGACGCTGTTCGGATTAGATTGCTAATGAAAAGGCACAAAGAG +CCGGCGCCCGCTCGCCCGCCGACCCCCGCTCCGCAGACCCGCGCCGCCTGGGCCTGGCGC +GGCCCGGTGGGCTTTGTGCCCCGGGTGCCCCACCGCCCGCGCCCCTCCGGGCATCTGGCC +CACAAAGCCCGGGCCTGCCAGGGGTCCCGCGTGCGCCCCAGCCCAGGCCCGCCAGACGCT +AGCGCGTCCCCCACGCGCGCAGTCCCACGTCGCCCGGGCGCGCGCGCGTCCACGCCCCTC +TCCCCGGGGACGCGCCCAGCCCGCGGCCCCGGCCCCCGCCCGCTCCCCGCGACCCCCGCC +CCCCAAGGCCGCCCCTCACCTCGTGTGCGTCGGCGGCGGCGCTCCGCCCGCCGGCCGGCC +GGCCCTACCAGCGGCCCTTGTCCTCAGCGCCCGGCTCGCGCCGCACGCGCCCGCCCCGTC +CGCCTGCCGCCAGCCCGGCTCGGCTCCCCGCCTAGCGCGCCCCGAGCGCCGCTCACAGCC +GCCCGCCCAGCGCCATCTTGGAAGCTTGTGACGTCGGCGCCGCCCGCTCACCCCTGACCC +ACATCTGAATGGGCGAGCGGCGGGGCGGGGACAGGGGGCAGCTGGGGGCGTGGCCTGTGA +ACAGGGGCGGGGCCTCGGGGGCGGGGCCGGGCCGGACAGCGGTCCCAGCACTAGGCGGGC +GGGCTGCCGGGGTCCGGCGCCGTGGGGAAGGGGTGCGCGGGAGGAGACGGGGACCCAGAC +CCCGGACATCTAACCGGACTCCGACCTCAAGCGCCAGGGCAGGACCGCGACCTCGCCCCT +GAAATACCCGAACCGCATACCGGCCCCCGGGACAGGGACCCTGGCCCCCCCCGACAGGCT +GACGCCCACCCCCTCAAACTCTGGTGGACTTACCCCCTTTTAGCCCTACCCTGACCCCTA +GGAGCCCCGAATTAGGGACCTCTATCGGCCTACGCGCCCCCTCCCCGACCCCTTTGCGAC +CCCTGCTCGACGCTCCCTGCGGTTGCCCGAGGCTCAAAGGCGCAGCCAGCAGTGACTGCA +AGCTCGGGGGTCTGGGCTCCTGGGGAAGCCCGGGCTGGTTGGGTGCAAAAGAGAAGGGGC +GCCCCTCCCGTGACCCCAGCGCCCCTCGGGCCCCCGCGGGCGCACCCCCGCGAACCCTAC +CTGCTCCGAGGGCGCGGAGGACCCAGCACGCTGCGCTCAGCCAGCCCCTTCCGGTGGCCG +CAGCCCCTCGCAGGCCCCAGGGGTCAAGCGCCTGCCCGAGCCGGCCCACCAGGACCCGGG +CTCCCGGCCGCCATGCAGATAGCCTTCCCAGGGGCTGGGCTGGCCTGAGCCGCCACTGCT +TCTCTAGGGAGCTAGTTAATGGACCTCTCTCTACTTTGAACACCAAACAAAGGAACACAC +CGCTATCGAGGTCGCCCAAGTCCAAGAGGAGCCCAGGTCTGCCTTACAGGGAAGTTGTGC +CCCAGCTCCAGTCAAGATCAAAAACCAGTTTCAATAACCCTTCGCCAAGCTGATGATGCA +ACCTTTCTTTCCTAATGGATGTATAGGTTTAATGTCATCCTAACAACATTTCTTAGACTT +CACAAAATTCTAAGTTCACCTGGAAAAATAAATAGCCGAAAACAGCTATGGTTTTCCCCC +CTTGGGAACAAAAGCAGGCATGAGGGTAGACTTGCCCTAACAGATATCAAAACAAATTGT +AAAGCTACCGCAGGCATTTCAGTGTGGTACAGAACAGGAAATAGAATAGAAAGCCTAGAA +ACATCCTAGAAATGGTAAGAACACAATAGATGAAAAAGGAAATATCAAACATCTGTGGAA +AAAAAAGAGCTATCAAACATCCCTGAAGAAGAACAAGAATATTCCACAAATAGAGCCTAG +CAGAGGACTCACTATTTGAAGGAACGACTGAAAAAAAAAGTCAACCATTCTACTAACCCA +AGACATATAAATTAAAACTATAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACT +TTGGGAGGCCAAGGCGGGCGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACAC +GGTGACACCCCGTCTGTACTAAAAATACAAAAAATTAGCCAGGCGAGGTGGCGGGCGCTT +GTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCCGGGGGGCGGAG +CCTGCAGTGAGCCGAGATCGTGCCACTGCACTCCAACCTGCGCGACAGCGAGACTCCGTC +TCAAAAAAATAAATAAATAAATAAAATAAAACTATAAGAAGCTGGGTTTTCTCCTAAACA +AATTAAAAATCAGTTTCAAGTGATAACCTTCAAACACAAGGTCTGATAGGTAATCACTCT +CATATACTTTTGGAAGCACATAAATCACCTCAAAATTCCTGGACATGAGTTTGGCGGTAT +GAGCCAAAAGTTGTTTTTTTGGGGTTCTTTTGTCTTTTTTTGTTTTTTGGAGACTCGGAG +TCTCACTCTGTTGCCCAGGCTGGAGTGTAGCCTCAAACTCCTGGGATCAAGTGATCCTCC +TGCCTTCCAAAGTGCTGGGATTACAGGCATAAGCCAACGCACCTGGCCCCAAAAATATTT +TAAAATAATTATACCCTCTCAGACCAGTATTCCAGGATTCTGTTCTGGGGAAATTAGCCA +CGTTTGCAGACCACGTACTTATTATCTGTCTCTCTACCCATAAGTAAGTTTGTACCTGTA +AGTTTGGAACCCATGCCTAGGAGCACACCTGAGACAACAGGCATTCCACGGTCCCTTGTT +TCATCTGCATTTGAAAACTAATGTGGCTGGCTCATGCCTGTAATCCCAACACTTTGGGAG +GCCAAGGCCGGCAGATCGCCTGAGGTCAGGACTTTGAGACCACCCTGGCCAACATGGTGA +AACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGATATCGCGGTGCATGCCTGTAGTC +CCAATTACTCGGGAGGCTGAGACAGGAGAATTGCTTGAATCTGGGAGGCTGAGGTTGCAG +TGAGCTGAGATCATGCCATGACACTCCAGCCTGGGCAACAGAGGGAGACTCGGTCTCAAA +AAACAACAAAAAAGATAACTAATATATGATAATATTTTTTAAGTGTCATGTATAATAGTG +AAAGAAAGCATATACAGGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCTGAG +GCGGGAGGATCACCTGAGGTCAGGAGTTTGAGACCAGCCTGGCCAACATGGTAAAACCCC +GTTTCTACTGAAAATACAAAATTAGCTGGGCATGGTGGCGTATGCCTGTAATCCCAGCTA +CTTGGGAGGCTGAGGCAGGAGAATTGCTTGAACTCCAGAGGCGGAGGTTGCAGTGAGCTG +AGATCCTGCCATTGCACTCCAGCCTGGGCAACAGAGCAAGACTCCGTATCAAAAAAAAAA +AAGAAAAAAGCATATACAAATTTCTCACAATGGGGTTAAATAGTCCACATAATGGAATGT +AGATTAGGCCAGGAGTAGTGGCTCATGCCTGTAATCCCAACACTTTGGGAGGCCGAGGCG +GGCAGATCACCTGAGGTCGGGAGTTCGAGACCAGCCTGACCATTATGGTGAAACCCCGTC +CCTACTAAAAATACGAAAAGTTAGCTGGGCGTGATTGCGCATGCCTGTAATCCCAGCTAC +TCGGGAGGCTGAGGTAGGAGACTTGCTTGAACCCGGGAGGCGGAGATTGCGGTGAGCTGA +GATCGCACCATTGCACTCCAGCCTGGGCAACAAAAGTGAAACTCCGTCTCAAAAAAAAAA +TTAGCCAAGCATGGTGTCACATGCCTGTAGTCCCAGCCACTCAGCAGGCTGAGGCGCAGC +ACGAGAGTTGCTTGAACCTGGGAGGCAGAGGTTGCAGTGAGCCGAGGTGGTGCCACTGCA +CTCCAGCCTGGGCGACAGAGTGGGACTCCATTTTAAAAAAATAAATAGGCCAGGCGCGGT +GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCCAGGCAGGTGGATCACGAGGTGAGG +AGTTCAACACCAGCCTGGCCAAGATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATT +AGCCAGGCATGGTGACGGGCACCTGTAATCCCAGCTATTCGGGAGGCTGAGGCAGAGAAT +CGCTTGAACTCAGGAGGCGGAGGTTGCAGTGAGCCCAGATCACACCACTGCACTCCAGTC +TGGGCGACAGAGTGAGACTCGGTCTCAGAAAATAAATAAATTAATAAATAAATAAATTAA +ATTAGTAGGATGGGCACGGTGGCTCACTCCTGTAATCCCAGCACTTTGGGAGGCCGAGGC +GGGTAGATCACCCAAGGTCAGAAGTTTGAGACCAGCCTGGCCAACATGGTAAAAGCCCGT +CTCTACTAAAAATACAAGATTAGCCAGGCATGGTGACGTATGCCTGTAATCCCAGCTACT +TGGGAGGCTGAGGCAGGAGAATTGCTTGAGCTCCAGAGGCGGAGGTTGCAGTGAGCTGAG +ATCCTGCCATTGCACTCCAGCCTGGGTGACAAGAGTGAAACTCCGTCTCCAATAATAATA +ATAATAATAATAATAAAGTAAATTTTTAATTAAAAAATAAAATTTTTATTTATTTATTTA +TTGAGACAGAGTCTCACTCTGTCATCCAGGCTGGAGTGCAGTGGCATCATCTCAGCTTGC +TGCAACCTCTGCCTCCCAGGTTCAAGCAATTCTCTGCCTCAGCCTCCTGAGTAGCTGGGA +TTACAGGCGCCCACCACCACACCCAGCTAATTTTTGTATTTCTAGTACAGACGGGGTTTC +ACCATCTTGGCAAGGCTGGTCTGGACCTCTTGACCTCATGATCTACCCGCGTTGGCCTCC +CAAAGTGCTGGGATTACAGGCGTGAGCCACTGCGCCCAGCCAAAAATAATTTAAAAAAAA +AAAAAAAAGAAGAGGCCAGGTGCAGTGGCTCACGCCTGTAATCCCAGCACTTTGGAAGAC +TGAGGCAGGAGGATCACTTGAGGTCAGGAGTTCGATACCAGCCTGGCCAACATGGTGAAA +CCCCATCTCTACTAAAAATACACACAAAAATATTAGCCAGGCATGGTGGCACACCCCTGT +AGTCCCAGCTACTCGGGGGACTGAGGCAGGAGGATCACTTGAACCCAGGAGGTGGACGTT +TCAGTGAACCAAAATCACACCACTGCACACTCCAGCCTGGTCAACAGAGCTAGACCCTGT +CTTAAAAAAAATTGTATTTTTTGTACAAAAAAATTAGCCGGGCCTGGTGGCAGATGCCTG +TAGTCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATGGCGTGAAAACCTGGGAGGCGGAG +CTTGCAGTGAGCCAAGATTACGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGATTCCG +CCTCAAAAAAAAATTTTTTTTTAATTGCATTTTTTAAATGCATGCAAAAATATTGATATA +TGGAAAAGCAACTGAAGAAAATAAATCAAAAATATTTACGGAGGTTCTCTCTGGAAGTTG +GGAATTTTTTTTCTACACATGTATACTTTAGCGTGTGTGTTATTTTTATTTTATGAAAGT +AACATTTCTAATCGGAACAATTCTAGAGATGCTGAAGCACATTTTATTTTTATTTTATTA +TTTATTTTTGAGACAGAGTCTAGCTCTGTTGCCCAGGCTGGAGCACAGTGGCGTGATTAT +GGCCCACTGCAGCCTCCACTTCCTAGGCTCAAGTGATTCTCCCAGCTGGGACTACAGGCA +TGCACCACCACACCCGGCTAATTTTTTATTTTCTGTAGAGACAGGGTCTCACTATGCTTC +CCAGGCTGAACTCAAACTCCCAGACTCAAGCAATTCTCCCACCTCAGCCTCCCAAAGTGC +TGGGATTACAAGCATGAGACACCATGCCCGGTAATTTTTTTTTTTTTTTTGTGCAGATGG +AGTCTCGCGCTGTCACCCAGGCTGGAGTGCAGAGGCGCGATCTCCGTTCATTGCAACCTC +CGACTCCCGGGTTCAAGCGATTCTCCTGCCTCGGCCTCCCAAGTAGCTGGGATTACAAGT +GAGCACCGCCACACCAAGCTAATTTTTGTAGTTTTAGTAGAGATGGGGTTTCACCATGTT +GCCAGGGTTGTCTCGATCTATTGACCTCGTGATCTGCCCACCTCGGCCTCCCAAAGTGCT +GGGATTACCGGCGTGGGCCACCGTGCCCAGCCCATGCCCAGCAATTTTTTAAAGTTAAAT +TCTCCCTCCTCCTCCACCCTAATCAGTCCTTTCTGAAGTAACATTAACAGTTTAACTCTC +TTCTCTGACATCAATATGTATCTACTCACCTTACAAAAAAGATCCTACTATTTTGAAATC +ATAGACATTTTTCTAGTTCATTTCCATAATTCATTCTTTTTTTTTTTTTTCATGTCAGAC +GGGTAATGTGCCTATGTCGTAACAAGATTTGAAGGTGGCGGCCGGGCGCGGTGGCTCACG +CCTGTAATCGCAGCACTTTGGGAGGCCAAGGCGGGCGGATCACGAGTTCAGGAGATTGAG +ACCATCCTGGCTAACACAGCGAAACCCCATCTCTACTGAAAATACAAAAAAATTAGCCGG +GCGTGGTGGCGGGTGCCTGTAGTCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATGGCAT +GAACCCGGGAGGCGGAGCTTGCAGTGAGCCAAGATCGCGCCACTGCACTCCAGCCTGGGT +GACGGAGCAAGACTCCCTCTTAAAAAAAAAAAAAAAAATATTTGAAGGTGGCACATCTTA +CATGGGAACGTGTGTGAACACATAATCATCATGCTTATGAACTACAAAAGGATCATAATT +CATTCTTTAAGAGCTGACAAATGTTCCAGAGTATGGAGCTTCTGTAATTTATTCACACTT +CGTTAACCATGGTTGTTCAGTTTATTTACAGTGTGGTGCTTTTTGTTTTCTTTTTTCTTT +GTTCTTTTTTTTTTTTTTTTTTTTTGAGATGGAGTCTTGCTCCTGTCACCCAGGCTAGAG +CGCAATGGCACAATCTTTGCTCACTGCAACCTCCACCTCCCGGGTTCAAGCAATTCTCCT +GCCTCGGCCTCCTAAGTAGCTGGGATTACAGGTATCTGCCACCATGCCTGGCTAATTTTT +GTATTTTTCGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTTGAACTCCTGACC +TCAAGTGATCCACCCGCCTCAGCCTCCCTAAATGCTGGGATTACAGGTGTGAGCCACCGC +ACCCAGCCTACACTGTGGTGCTATTAAAAAATATTTTTAGGCTGGGCACAGTGGCTCACA +CCTCTAATCCCAGCACTTTGGGAGGCCGAGGCAGGTGGATCACTTGAGGCCAGGAGTTTG +AGACCAGCCTGGTGATCATGAAGAAACCCTGTCTCTACTAAAATATTAGCCAGGCATGGT +GTCACATACGGATAATCCCAGCTACTCGGGAGTCTGAGGTGGGAGAATCACTTGAACTAG +GGAGGCAGACGTTGCAGTGAGTTGAGATCATGCCACTGCACCCCAGGCTGGACAACAGAG +CGAGACCTCGTCTTAAAGTAATTAATTAATTAATTAATTAATTAATTAATATAAATAAAA +TAGCAGATTTCAAAAATTTGTTCTCCCTGGCAGCAACCCAGTAAAGCTAGATCTGGGCGG +AAGGTAATATGGAAATATAAAAACTACAGATTAGCAGGGATGGGAAGACCCGCCAGAAAT +GGGTTCATTTTGTGTCCTCACTCCCTCCGGATTCTCTGTGTGTCTAGTCCAAGATCTAAC +CACTATGATCTGCTTTCCTCATCCATTTGGGAATCAGGGCACCACCCCCTTGGGAGGCTT +GAATGACATGAGGTTGAGCAGGCAGAGCACGGAACTCAGGCTCTGGTGAGCCCTGGTACA +GTGACTATGATTGTTGTGTCATGTAGGAAGGGTCTGTTATTTTACCATTATCTGTTTTCA +ATAAAAAACAGAAAGAAAACAAATATACAAAATACCCCCAAACAGTAAATTTCAGCTTCT +TACTATGACAGCTAGATTTAGCATTTAACAGTATGCCCAAATTTGAATTTCGGATAAACA +ACCAATAATGCTTAATATAAGTATGTCCCGGCCGGGCGCGGTGGCTCACGCCTGTAATCC +CAGCCCTTTGGGAGGCCGAGGCGGGCGGATCACGAGGTCAGGAGATCAAGACCATCCTGG +CTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAATTAGCCGGGCGCGGTGGCG +GGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCCAGGA +GGCGGAGCCTGCAGTGAGCTGAGATCGTGCCACTGCACTCCAGCCTGGGTGACAGCGAGA +CTCCGTCTCAAAAAAAAAAAAAAAAGTATGTCCCATACAATATCACTTCTCACCTTCCCA +TATCCCAAGCTCAAGACACCTTATATACGCCATGATTTCTCCTGCCATCTCTGGGCATTC +ACACCAGGGCTCCTCCTGTCCAGGCCAGCCTTCTCCCTTCTCCCACCTGTGCCTGGTGAA +CAACTCAGTTCAGACATCATCTCTTGTAGGAAGTCTCCTGTGATCCCAGGCTAATCACCC +TGATACAGTCCTGAAATACTGCTGAGCAGTTGGCATGAGGGCAGGAAGGCTTGGTCTTGT +TCACTCTTGTATCCAGCTTATACATGGCACAAAGTAGGTTCTCAATTAAAAGTCAAATGA +GGGGCCAGGTGCAGTGGCTCACGCCTGTAATCCCAGCACTTTAGGAGGCCAAGGCGGGTA +GATCACCTGAGGTTGGGAGTTCGAGACCAGCCTGGCCAGCATGGTGAAACCCCATCTCTA +CTAAAAGTACAAAAATTAGCCAGACGTGGTGGCAGGCACCTGTAATTCCAGCTACTCGGG +AAGCTGAGGCAGGAGAATCACTTGAACCCGGGAGGGAGAGGTTGCAGTGAGCCAAGATTG +CGCCATTGCACTCCAGCCTGGGCAACAGAGCAAGACTCCATCTCAAAAAAAAAAAAAAAG +TCAAATGAAGTCGGGCACAGTGGTTCGCGCCTGTAATCCCAGCACTTTGGGAGACTGAGG +CAGGAGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAGCATATGAAATCCTGT +CTCCACTAAAAATACAAAAAAAAAAAAAAAAAAAATAGCTGGGTGTGGTAATGCACGCCT +GTAATCCCAGCTATTTGGGAAGCCGAGGCAGGAGAATTGCTTGAACCCGGGACACAGAGG +TAACAGTGAGGTGAGATCATGCAACTGCACTCCAGCCTGGGCTACAGAGCAAGACTCTGT +CTCAAAAAAAAAAAAAAAAAGTCAAATGAGACCACTCATGGTAGCTGGCGCCTATAATCC +CAGCACTCTGGGAAGCCAAGGCGGGAGGATTGCTTGAGTCCAGGAGTTTGATACCAGCCT +GGGCAAAACAGTGACATCTCATCTCTACAAAAAATTTTTAAAATTAGCCGGGCATGGTGG +CACTCGTCTATAGTACTAGCTACTTGGGAGGCTGAGGTGGGAGGATTGCTTGAGCCCAAG +AATTCGAGGTTGCAGTGAGCTATACTTGCGCCATTGTACTATAGCTGGGCAACAGCCAGA +CGCGGCCTCAAAAAAAAAAAAATTTTAGCCGGGCTTGGTGGCATGTACCTATAGTCCCAG +CTACTCAGAAGCTAAGACAGGAGGATCATTTGAGCCCAGGAATTTGACGCTGCAGTAAGC +CAAGGTCACACCACTGCACTCCCGCCTGGGAGACTGAGTGAGACCCTGAGTCTAAAACAA +ATAATAATAAAAACGAAAATGAAAGTCAAATGAATGAGGCCAGGGGTGGTGGCTCATGCC +TGTAATCCTAGCACTTTGGAAGGCTGAGGTGGGTGGGTCAGCTGAGCCCTGGAGTTACAG +ACCAGCCTGGGCAACATGGCGAAACCCTGTCTCTACAAATACAAAAAAGAAAAAAGCCAG +GCCCATGGTGGTGAGTGCTTGAAGTCCCAGCTACTCAGGAGGGTGAGGTGGGAGAATAGC +TTGAGCTCAGGAGGTCAAGGCTGCAGTGAGCTGAGATTGCATCACTGCAGCCTAGCCATG +ATGACAGGGCAAGACCCAGTAGAAAACCAAGGTTAGAGAGGTTAAGTCACCTGCCCAAGG +TCCGTAGCTAACCTCAGACTCCAGCCCCTCAGACTGACATGAGCTCAGTTCACCCTCTTA +GGAACACCATCTGAAGAATGCATCCCCAGCAAGCCTATTCCCTATAGCAAAGGCCAGGCC +TGTGATGGGTGCCCCGCTCCTCACCACACACCTCCCTGTCAAGTCACCACTCCTTCACCC +CTAGCAGAGCCAGGCACCTTGAGCCCAAGGGAGGCCACTCATGGAACTGTCATTCAACCA +ACAGATGCTGACAGCAATGATGAGGTGGGCACTGGGCTAGTGAACAGCACAGATGTGTTC +CCTGGCCTCAGAAGCCCACAGACTAAGGAGAAGACGACAGAATACTCACAAGGGTGCTGG +GGCTGGGCAAAAAGGTGCAGGCTGCTTTGTCTGGTCATAAGAGGGGAACTGACTTCCCTG +GTCTGAGGAGCCAAGGAAGGGGCCGTGCCCTTGCCATGCCCTCTGCCTGCCATGCCTTGC +TGTTCTTCAGTGCCTGCTCTAAGGTCACTCCTAGTGTGGTTTTCTCTGGCTGCTCTTTTT +TTTTTTTTTTTTTTTTGAGACAGAGTCTCGCTTTGTCACCCAGGCTGGAGTGCAGTGGCA +CCATCTTGGCTCACAGCAACCTCCGCCTCCTGAGTTCAAGTAATTCTCCTGTCTCAGCCT +CCTGAGTAGCTGGGACTACAGGTGTGCACCACTACGCCCAGCTAATTTGTGTATTTTTAG +TAGAGACGGGGTTTCACCATATTGGTTGGCCAGGATGGTCTCAATCTCTTGACCTCCAGA +TCAACTCCCCTTGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCC +ACTTTTTTTTTGGAGTCTCGCTCTGTCCCCCAGGCTGGAGTGCAGTGGCGCGATTTCGGC +TGACTGCCAGCTCCGCCTCCCGGGTTCACGCCATTCTCCCACCTCAGCCTCCCGAGTAGC +TGGGACTACAGGCGCCCGCCACCACGCCTGGCTAATTTTTTGTATTTTTAGTAGAGACGG +GGTTTCACCCTGTTAGCCAGGATGGTCTTGATCTCCTGACCTTGTGATCCGCCCGCCTCG +GCCTTCCAAAGTGCTGGGATTACAGGCTTGAGCCACCACGCCTGGCCTCTTTTTCTTTTT +TGTTGAGACAGATTCTCACTCTGTCGCCCCCAGGCTGGAGTGCAATGGCGCAATCTTGGC +TCATTGCAACCTTGGCCTCCTGGGTTCAAGCAATTCTCTGCCTCAAGCCTCCCGAGTAGC +TGAGATTACAGGCGCCCACCACCACACCCGGCTCATTTTTGTGTTTTTAGTAGAGACAGG +GTTTCACCATCTTGGCCAGACTGGTCTTAAACTCCTGACTTCGTGATCTACCAGCCTCGG +CCTCCCAAAGCGCTGGGATTACAGGCTTGAGCCACCGCACCTGGCCATGCTCTTTCTTTT +TTATTTGTATTTTTATCTGTTTCTTAGAGACAGGGTCTCACTCTGTCATCCAGGCTGGAG +TGCAGTAGTGTAATCATAACCCACTGCAGCCTCAAACTCATGGGCTCAAGGGATCCTCCT +GCCTCAGTCTCCCAAGCAGCTGGGACTACAGGTGTGCACCACTATGCCCAGCTATTTTTT +TTTTTTTTGAGACAGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGTGCCATCTCG +GCTCACTGCAACCTCTGCCTCCTGGGTTCAAGTGATTCTCTGCCTCAACCTCCTCAGTAG +CTGGATTACAGGTACCTACCACCATGCCAAGCTAATTTTTGTATTTTTTTTTTTTTTTGA +GATGGAGTCTTGCTCTGTCACCCAGGCTGGAGTGCAGTGGCGCAATCTCGGCTCACTGCA +AGCTCCACCTCTCAGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTA +CAGGTGTCCGCCACCATGCCCGGCTAATATTTTGTATTTTTAGTAGAGACGGGGTTTCAC +CGTGTTAGCCAGGATGGTCTCAATCTCCTGACCTCGTGATCCGCTGGCCTCGGCCTCCCA +AAGTGCTGAGATTACAGGCGTGAGCCACCGTGCTCGGCCTAATTTTTGTATTTTCAGTAG +AGACACGGTTTCACCATCTTGGCCAGGCCGGTCCTGAACTCTTGACCTCATGATCCACCG +GCCTCGGCCTCCCAAAGTGTTGGGGTTATAGACGTGAGCCACCGTGTCTGGCCTGGCTCC +ACTTTCTTAGGGAGCTTTGCTTGCCCTCTGCTTGGGGTAGTTTGTGAATTCAGTGCTCTC +TCTCTCCCAGGTCCTGGGTCTCCTGCTCCTGATCAATCTGCCACGTGCCATTTGATCTCA +GAGTTGTCTCCACCATTAGACTGGCAGGGTCTGTCTTCTGGGGTCTGTGTGTAGGATACA +GTGGCCCCAGGATGGGTAAAGGAGGGGACCCAGTTGTTCTTCTGTGATAGGTGGAGACAA +GGGCCAGCAGGGCTGTTTCTCGGTGAAACCCATGGGGAGATAGTCGAACCAGGAACCAAA +TCCGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTAGGAGGCCGAGGTGGGC +GGATCACCTGAGTTCGGGAGTTGGAGACCAGCCTGACCAATATGGTGAAACCCCATCTCT +ACTAAAAAAAAATATATACATATATATACAAAATTAGCCTGGCGTGGTGGCACATGCCTG +TAATCCCAGCTACTCGGGAGGCTGAGGCAAGAGAATCACTTGAAACTGGGAGGCAGAGGT +TGCGGTGAGCCAAGATCGCACCATTACACTCCAGCCTGGGTGACAGAGACTCAGTCTCCA +AAAAAAAAAGTCCAAAAATTAAAAAAAAAAAAAATGCCGAACTCAGTGGCTCACGCCTAT +GATCCTACCACTTTCAGAGGCCGAGGCGGGCACGGTGGCGCGTACCTGTAATCCCAGCTA +CTCCGGAGGCTGAGGCAGAAGAATCACTTGAACCCGGGAGGCAGAGGTTGCAGTGCGCTG +AGATCGTGCCACTACACTCCAGTGTGGTGACAGAGCGAGACTTGGTCTCAAAAAAAAAAA +AAAGTCCTGGCTGTGCCACTAGGCCAGACGCCCTAGAACTGACTTCTATGGACCTCAGTT +TTCATACCTGTGAAATGGAACTCAGAGCCATATCTACTTCATTGGGTTGTGTGAATGTGA +GGTAGTTACAGGATGGAGCAGGCAGAAGGTCAGCACAGCCTCAGTGGTTGGAGGAGTGGG +GGTCAAGTGGACAGAGAGGGTACCCCAGGGCCAGAGGTAGAGGCAAGGTGAAGCAATGCT +TCATTTCCCTGCTGAGGTTCCTCTCTAGCAGTTTGGAGGTGGGGCAGGAGTGGAGAAAAC +ACATCCCCTCCTCAATATGGGCTGAGGAACAGGGTTCTTGAGGCACCAAGAACCTCTTTG +GGGCTCTGGCTTGCCTCCTGTAGTGAAAGGGATGGGCTGTGTGCAATGAACTCACACCTG +TAATCCCAACACTTTGGGAGACAGAGTCAGGAGGTCCAGAACAGCCTAGGCAACATAGCA +AGACCTTGTTTCTACAAAAAACTTTTTAAAAAATTAGCCAGGCGTGGTGGTGGCACCTGT +AGTCCCAGCTACTTGGGAGGCTGAGGTGGGAGGATTGCTTGAGCCTGGAGAGGTTGAGGC +TGAAGTGAGCTATGATAGCACCACTGTACTCCAACCTGGGCAACAGAGTGAAACCCTGTG +TCTAAAATAAAAAGAGGGCCTGGCGTGGTGGCTCACGCCTGTAATCCCAGCACTTTAGGA +GGCCGAGGCAGGCAGATCACTCGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTG +AAACTCTGTCTCTACAGAAGTACAAAAATTAGTCGGGCGTGGTGGCAGGCGCCTGTAATC +CCAGCTGCTTGGGAGGCTGACCTGGGAGAATCTCTCGAACCTGGGAGGCAGAGGTTGCAG +TGAGCTGAGATCACACCATTGCACTGCAGCCTGGGCAACAAGAGCGAAACTCCGTCTCAA +AAAAAAAAACAAACAAATAATAAAATAATAATAATAATAATAATAATAATAATAATAATA +ATAAAGAGGGTGGGAGTGGACCAAGGCCTAATGGCATGGAATTTGCAAACTTTATCTTCC +TAGTTTTCTCTGGGGAGCCTTCCTCCATCACCAGGGCCCAGCAGGGCCTCAGATGGCGGG +GGGTGGGGCGGGGGCTACACATATCCCTCATGGCCAAGATGCTAGGAAGCCCCTGAGCTG +GGGCAACAGCCTCTGGTCACAGTCATGAGGGAGGCTGGGTGCAGTAGCGCTTATCTTGCC +TGACCTTGTGGAGACTGACCACTCCCTCCTTCTGGAAAACGTTGCCCCCTGCCCTCTCCA +AGGCCTCCTCCTTCTTACATCCCCTGGTCCTGGCTAGGGCCTCAGCACCCAATTCTACTC +CTTGACCCACCACCCTGTGGTGCTTTTGCCGGGCCCCAGCCCACAGCCATCTGCCAAACA +CCTCCCCCTGGAGTCCCTGCGGCGTCTCTGACTCAGCAGGTCTGTGCCTGCCCCTGCTGT +TCCCCCCACCCAGTGGCTCTGCAGAGCACCCTGTTGCCCAAGCCCTGAGTACTGGCCACT +CCCAGGGCCTCGCCTTCCCCCACCACCAGCACCACCAATTTGAATTCCTATCTCTCCCTC +TGGCCTCCTCCCTAGACCCAGGCCCTGCCCCAGGGTGGGGGAAGGAATCCTACAGATAAG +GGCATTGGTCAAAGGGACACACCTGGGTTCGGGCCCCCAGCTCTGCCCCCAACCTGCTAA +ATGACCACACCCCTCTCCAAATCTGAGTTATTCCCTATGTAAAAGGGGGAAATAATTAAA +CTAGCTGGGTATGCTGTGCTGAGCAATATGTGAGCTCAGGAAAGGTCAGTTTTCCAGGGG +AAGTATGGCCGCTGAGAGCCTCAGTCTAGTGCTTCTCATTCACGAGCTGGTTGGTGCCTT +TCCTAAGTTGGAAATGGGTTAGTCCACAAATCCCGTTGCCTGGTCCATGCCAGGTCCAGG +TACCTGCATTCAGGGTGCAGGGTGGAGTAAGACAGTCTGAAGGGTGAGGTAGCAGGATGA +ATGAGTAATTCCTGGGTGCGTTTTGGGATTATGCCTAAGGGCTGTCCGAGCACGTGGGAG +GGGTAGCTAAATCCTGTAAGGGAGGGGAGAATGGGGAAGGCTGCCTGCAAGAGGTGGTGG +AGGGTGCTTTCAAAGGATACCTGGGAGTGAGCTGGCTGTAGGCACAGGGCAGCTGGAAGG +CCAATTTTAGGGAAGAGGAAATTGAGGCTGAAAGTCAAAAAACAAAGGAACTGCCTGAGG +TCTCTCAACTGAGAAGTGAGTGGGCCAGGGTTCACATTCAGCCCCCACTCCATTCTCAGG +CTGGGCAGGAAGGTACTTTGCCCTCAGGAGGGCACCTTCGACCCAAAGCACCCTGACCAA +GAGGCTGTGCACCGTGGCAACCCTAAGACTGAGGCCCAGGTCCTGGAGTTCCACCAAGGT +AGGTACAACCAGCCAGGCCGTCAGTGACAGGAGGATTTACTGGGGCTTCTAGGGACCCCG +TGAAAGGGCAAAAAGACGGCACAGAAGCCTGTGGATATCAGATATAGATAGTGAGGGCTA +GGGCTTTATGGGGAGGGTGGATGGGCCCTGTGGGCGGGGGATGTTTAGGAGATCTTCCAA +ACAGAGCATTCCAGCCTGGGGGAACAACCCATTATAGACCCCACCCAGCACCGAAGACAG +GGACAGGTGGGGCTGAGCCTGGAGGCTGGCAGGACCTGTGCGGCCGGCTGCGGTGTCAGC +AATGCCACCGTGTGTGGAACCCTCTGTGCCTAGGTTCCTGGAGCCAGCTCCACCTGGCAA +TGTGCTTTGTTTCACAAGACTAGAGTTGGCCCATCTTGTTCCGCCTCAGATGGAACACCT +GACCTCCTGCACACCCCACCCAGCCTCCAAATTCAACCACCCCAGTAGTGAGACAGGGAG +AGTAGGGTGTGACCAGAAGGTGGAATGAGCTTAAAATAGGCTGCAGAGATATCTCTGAGC +TTTTAAAAAGTCGTATCTTTCATATCGTGTTCTTGTGAAATCTTCTCCAGATTTTTTACA +ATGACTCTGAAATTGATCAGTTAGCCTCATGCTTTCAAAATCCATAGCCGCAGTGAGCGA +GAGGGGGCAGGTCAGCTAGAGGTGGAGGGGGCAGATGGGCCAGAGGCGCCTGGGCAGGGG +TGGATGGGGCCTGGACAGGCTTAGTGACCCTTGCACCAGAACTCTCCGGACACCGCCCCT +TCGCCCACTCGTTAGGCTCCTCTGCCTTCGGGCCGGCTGGGCTGCCCATCCCCCCTCGTT +CCCCTCCTGGGCAGGTCTGGACTGGACAGGTTTTGGGCCTCCAGGTCGTGCAGTCAGTCA +GGGAAGGGGCCTGGTGCAGGCCACAGCCAGGCATCCACCTCCTTCCCAGCCCACCCCTAC +CCCCCAGCAGCCTGCCAACTTGCTAGACCTGTGCGGCCTTCCCTTCTGCTGAAAGTCTCC +AGATTCTCAGGGCTCTGCACGGGGAGCCAAGCTGGGGCTTAGAACTGTGCTCCCTTGCCC +CCACCCCCGACCTGTGCCCTCATCAGCTTTCTTTTTTTTTCTTTTTTCTTTTTTTTTTTG +AGACACAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGTGCGATCTCAGCTCACTGC +AACCTCTGCTTCCCGGGTTCAAGCAATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGAGT +ATAGGTGCGTGCCACCACACCCCGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCA +CCGTGTTGGCCAGGATGGTCTCCATCTCCTGACTTGGTGATCCACCCGCCTCGGCCTCCC +AAAGTGCCGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTATTTTAAATTTTTTTTGTA +GAGACAGAGTCTCACCATGTTGCCTAGGCTTGTCTCGAACTCCTGGTCGCAAGTGATCCT +CCCCTCTCTGCCTCCGCCGTAGCTGGGATTGCAGGCACAAGCCATGACGCCCCCCTTCTC +ATCAGCTGTCTGGGTCTTTCTCGGGGTCCTGACACCAGGCCAAGTGTTCTGCACACCTGA +CCTGCTCAAAACAAAACCAGTTCTGCCACAGTCTGGGAAAATGAGGCTCAGAGAGGCCTT +GCAGCTGTCTGTGAGCAGGCTGGCATTTGGATCCAGTGACTGGGTTCTGGAGCCCAGTGG +CTCTGTGGCCCTGCCTCTCCAGCCTTATGCTTCTTTAAAACTGCCCATGGCCTCTGAAGT +TGAGGATTTAGGGGAGCAGGGCTGCCAGAGGGGTGAAGAAGGAGATCTTTCTGTGGGTGG +GTGTCAGAATGACCAGGCCTGACACTGGTCCCTGCTCCCCTGGAAGGACAGCCCCTCTCG +GTTATGTGGGGGGTGGGCTGCCTAGGGTGCTCTGGGGAGAAGGGAGGGAGAGAATGGGGC +TGGTCGGGTGGGGTGTGGATTCCAGGCCTGCTTGGTCAAAAAGATACAGTATTTCCTATT +CCCCCTTACACTGGGCAGGCAGGTGTGGCCCAGGCAGCTCTGCCAGGTGTGCAGCCCCTG +GAGAATGACCCTAAAGTGCACCTAGGCTGCCCCCTGGCGGCAGCATGAGGTCCTCCTCCC +TGGACTTGTAACCCTCCCAGGGGGAGGAGGTCCACACCAGCTGATGGGTTGGCACCTGTC +CCCAGTCCCTTTCTTCTTTCCTGTGCCACTGCTCCCAGAGTTGCCCAAAACACACGATCA +GGTGCAGTGGCTTGAACGAAACCCTGTCTCTACGGAAAATACAAAAATTAGCCAGGCATG +TTGGCTCATGCCTACAGTCCCAGCTACTTGGGAGGTTTAGGTGGGAGGATCGACAGCCCA +GGAAGTCAAGGCCGCAGTGAGCAGTGAGCTGTGGTTGCGCCACTGTACTCCATACTGTAG +CCTGGGTGACAGAGTGAGACCTTGTCTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCA +CTGTGTCATCCAGGCTGGAGTGCAGTGGCGCGATCGGCTCACTGCAAGCTCCGCCTCCCG +GATTCATGCTATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTATAGGCGCCCCTCAG +CCTCCTGAGTAGCTGGGACTACAGGCGCCCGCTACCACGCCTGGCTAATTTTTTTGTATT +TTTAGTAGAGACGGGGTTTCACTGTGTTAGCCAGGATGGTCTCGATCTGCTGACCTCGTG +ATCCACCCGCCTCGGCTTCCCAAAGTGCTAGGATTACAGGCGTGAGCCACTGAGCCTGGC +CGAGACCTTGTCTTAATTAAAAAAAAAAAAAAAAAGATAAAAACCTAGGTAACATGCTGG +GGGGTGATGAGGGTTATGGAGGAAAATAAAACAGGTGGGGGCTGGAGTGAAGAGTGGGGG +TGCCCCTCATAACTCTGACAAAGACTACAACGCAGCTGTCAGAAAAAAAGCTGATTTGAG +GCAATGGTCAGGGAAAGTTGTGAAATTAAGTAAAAAGTGATGGAACAGGCTAGGTGGGAA +GTGGTGGCTCACACCAATAATCCAAGTGCTTTGAAAGGCCAAGCAGGAGGATCTCTTGAG +GCCAGGTGTTTGAGATCAGCCTGGGCAACATAGCAAGACCCTGTCTTTACAAAAAATGTA +AAAATTAGCTGGAGGCTAGGCTCAGTGGCTCATGCCTGTAGGGAGGCCAAGGTAGGTGAA +TTGCTTGAGCCCAGGAATTTGAGACCAGCTTGGGCAACATGGCAAAATTCCATCTGTACA +AAAAAATTGCCAGGCATGGTGGCTCATGCCTGTAATCCTAACACTTAGGGAGGCCAAAGT +GGGCAGATTACCCGAGGTCAGGAGTTTGAGACCACCCTGGCCAACATGGTGAAACCTCGT +CTCTACTAAAAATACAAAAATTAGCTGGGCGTGGTGGCGCATGCCTGTAATCCCAGCTAC +TCGGGAGGCTGAAGCAAGAGAATTGCTTGAGCCTGGGAGACAGAGGTTGCAGTGAGCCTA +GATCGTGCTACTGCACTCCAGCCTGTGTGACAGAGCAAGACTATGTTTCAAAAAAAAAAG +AAAAATTAGCTAGGTGTGGTGACATGCACCTGTAGTCCCAGTTACTCAGGAGGCTGAGGT +GAGAGGATCACTTGAGCCCAGGAAGTTGAGGCTATGATCGTGCTACTGCATTCCAGCCTG +GGTGACAGCCAGATTCTGTCTCAAAAATCAAAACACAACAGGGCACAGTGGCTCACTTTG +GCCTTCCAAAGTGCTGGGATTACAGGTCTGCACCACCATGCCTGGCCTGAGAGTGCATGA +ATTAAAGTTTCTATTCTCTTGCTAGTCTGTCATTGCTTCCCTAGTGTGTGGGAACCATAC +TTGTCCCCACTTCTCAGTCAGAGAGCAGCCTGCCAACCTCCAAGTCTCTTGCCCCTGACC +CAAAAGGTCCCAGGCTGTCCTCTCGGCAGAAGCAGCTGTCTTGTGGCTTACTGTGCTCAG +ACTCCATTTCTAGGCTGTGGGCTCATCAGGGATCTAGTGCTTTGAAAAAGTGTAAAGGAG +AGCAGCAGTATTATTCACAATAGCTAAAAGGTAGAAACGACCAAAGTATCCATTGACAGA +TGAATGGATAAACAAAATGCATTACATACACACAATGGACTATTATTCAGCCTTAAAAAG +GAAGGAGGCCAGGTGTGGTGGCTCACACCTATAATCCCAGCGCTTTGGGAGACTGAGGCA +GGCGAATCACTTGAAGCCAGGAGTTGGAAACCAGCACAGACAGCATGGGGAAACCCCCTC +TCTACTGAAAATACAAAAGTTAGCCAGGTGTGGTGGCACATGCCTATAATCCCAGCTACT +CTGGAGGCCAAGGCATGAGAATCGCTTGAACCTAGGAGGTGGAGGTTGCAGTGAGCCAAG +ATCATGCCACTGCACTCCAGCCCGGGTGACAGAGTGAGACACTGTCTCAAACAAACAAAC +AAAAAAAAAGAAACGTGTGGTGGAAGAAGGGAAGAAAATTGGCCAGGCACACTCTGGTAG +GGGATGGTTGGATGATGAAATACAGACAATAGAAAAACCTAGATAAAGACCTGGCGTAAT +GGCTCATGCTTGTAATCCCAGCACTTTGGGAGGCTGAGGCGGAAGGATCTCTTGAGGCCA +GGAGTTTGAGACCATCCTGGTAAATATAGTGAGACCCCAGTCTCTTAAAAAATAATAATT +GGCTAGGCGCGATGGCTCACGCCTATAATCCCAGCACTTTGGGAGGCCAAGGCGGGCGGA +TCACCTGAGGTCGGGAGTTCGATACCAGTCTGGCCAACATGGAGAAACCCCATCTCTGCT +AAAAATACAAAATTAGCTGGGCGCGGTGGTGTGTGCCTGTAATCCCAGCTACTCGGGAGG +CTAAGGCAGGAGAATCGCTTGAACCTGCAAGGTGAGGTTGCAGTAAGCTGAGATTGTGCC +GTTGCACTCCAGCCCAGGCAACAAGAGTGAAACTCCATCTCCAAAAAATAAAAATAAAAA +TAAAAATAAAAAAGAAATAGGCCGGGTGCAGTGGCTCACGCCTATAATCCTAGCACTTTG +GGAGGCCAAGGTGGGGGCGGGGTGGATCACTTGAGGTCAGAAGTTCGAGACCAGCCTGGC +CAACATGGTGAAACCCAATCTCCACTAAAAACACAAAAAATTAGCCGAGCATGGTGGTGG +GCACCTGTAATCCCAGCTACTCGAGAGGCTGAGGCAGGAGAATGGCTTGAACCTGGGAGG +CGGAGGTTGCAGTGAGCCAAGATCACCCCACTGTACTCCAGCCTGGGTGACAGAGTGAAA +CTGTCTCAAAAAATAAATAAATAAATAAATAAAACTTTTAAAAAGTAAGAAGAAGAAGAA +AAAAAATATATGGAAATTAAAAAACAAGAAAAAAATAATAGGCCAGGTGCAGTGGCTCAT +GCCTGTAATCCCAGCACTTTGAGAGGCCGAGGTGGGCGGATCATGATGTCAGGAGTTCAA +GACCAGCCTGGCCAACATAGTGAGACCTCGTCGCTACTAAAAATAGAAAAAAATTAGCCA +GGCGTGGTGGCGGGCACCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACT +TGAACCCATGAGGCGGAGGTTGCAGTGAGCCGAGATTACGCCATTGCACTCCAGCCCGGG +TGACAGTGCGAGACTCCATCAAAAAAAAAAATAAATAAAATAAATAAATATTAATAATAA +TTTTAAACAATTAAAAAATATGGGATTTTTTTGAGACAGAGTCTCACTCTGTCGCCCAGG +CTGGAGTGCAGTGGCATGATGTCAGCTCACTGCAACCTCCGCCTCCTGGGTTCAAGTGAT +TCTTCTGCCTCAGCCTCCCAAGTAGCTGGGACTACAGGCACGCGCCACCACGCCCAGCTA +ATTTTTGTATTTTTAGTAGAGACAGGGTTTCACCATATTGGCCAAGCTGGTCTGGAACTC +CTAACCTTGTGATCCACCCGCCTCGGCCTCCCAAAGTGCTGGAACTATAGGTGTGAGCCA +CTGCACCCGGCCAAAAAATATGTTTTTTAAAATAATAGAGATGAGGCCGGGTGTGGTGGC +TCACACCTGTAATCTCAGCACTTTGGGAGGCCAAGGTGGGTGGATCACTTGAGGTCAGGA +GTTCGAGACCAGTCTGGGCAACATGGTGAAACCCTGCCTCTACTAAAAATACAAACCTGA +GCTGGGCATGGTGACGCATGCCTCTAGTTCCAGCTACTCGAGAGGCTGAAGCAAGAGAAT +CGCTTGAACCCGGGAGGCGGAGACTGCAGTGAGCCAAGATAGCACCACTGCACTCCAGCC +TGGGAGACAGAGCAAGACCCTGTCTCAAAAAATAAAATAAATAAAATAAAATAAAATAAA +ATAAAATAAAATAAAATAAAATAAAATAAAATAAAAAATAAAATAAAATAAAATAAAATA +AAATAAATAAAATAGATGAAGGCCAGGTGCAGTGGCTCACGCCTGTAATCCCAACACTTT +GGGAGGCCAAGGCAGGCAGATCACCTGAGTTCAGGAGTTAGAGAGCATCCTGGCCAACAT +GGTGAAACCCTGTCTGTACTAAAAATACAAAAAAATTAGCCAGGCATTGTGGCGGGCGGC +CAATAATCCTGGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTGAACCTGGGAGGCGGA +GGTTGCAGTGAGCCAAGATGGTACTACTGCACTCCAGCCTGGGTGACAGAGCAAGACTCT +GTCTCAAAATAATAATAATAGGCCGGGCACGGTGGCTCACACCTGTAATCCCAGCACTTT +GGGAGGCCGAGGCGGGCAGATCACAAGGTCAGGAGATCGAGACCATACTGGCTAACATGG +TGAAACCCTGTCTCTACTAAAAATACAAAAAATTAGCCGGGTGTGCTGGTGGGCGCCTGT +AGTCCCAGCTACTTGGGAGGCTGAGGCAGGACAATGGCGTGAACCCGGTAGGCGGAGCTT +GCAGTGAGCCAAGATCGTGCCATTGCACTCCAGCCTGGGCGACAGAGCGAAACTCCACCT +CAAAAAATAATAATAAAATAAAATAAAAATAAAAATAATAATAATAGAGATGAGGTTTTG +CCATGTTACCCATGCTGGTCTCCAACTCCTTGGCTCAAGTGATCCATCCACCTTGGCCTT +CCAAAGTGCTGGAACTACAGGCTTGTGCCACCACCCTGGCCTTAAACAATTATTCATGAA +AGGAAGGAAATTCTGACACATGCTGCAACATGGATGAAGCTCAAGGACATTATGCTAAGG +GAAGGAAGCCAAAGACCTGCCAGCCGAGGTCCCAGTCCGTCAAGGGCTCCAGTGAGCGAG +CAGGATTGAGCAGGTCCCTGGGCTGAGCAGTGGGAACTCTGCTTTGCTGTGAGTGTGGCA +CGGGTGGCGGCAGACTGTGGAGTGCAGGCTCTGTGGAGCAACTGGACACTCTGTTGAACG +AAGTGCAAGGTGGTAGGTTTTTGCTTTTTTTTTTTTTTCTGAGACAGAGTCTCACTCTGT +CTCCTAGGCTGGAGTGCAGTGGCATGATCTCGACTCATTGCAACCTCTGCCTCCCGGGTT +CAAGCAATTCTCTGCCTCAGCCTCCTGAGTAGCTGGGATTACAGGCGCGAGCCACCACAC +CCAGATAATTTTTAAAATATTTTTGGAAGAGACGGGGTTTTACCATCTTGGCCAGGCTGG +TCTCGAACTCCTGACCTCGTGATCCACCTGCTTCAGCCTCCCAAAGTGCTGGGATTACAG +GCATGAGCCACCGCGCCCGGCCACAAGGTGGTAGTCTTTCTCAAGGACGCTGGGGCAACC +CCATTGCGTCTTCACCAACACCCTGGTACAGATCCTGCAGCTGCAGTCTACCCCGCATAC +AACATGTTCAATGATCCCTAGCTAGACAAGGCATTGAAAACTATTCCATAGGCCGGGTGC +AGTGGCTCCTACCTGAAATCCCAGCAGTTTTGGAGCAAGGCGGGTAGATCATCTGAGGTC +AGGAGTTCGAGACCAGTCTGGCCAACATAGCGAAACACCGTCTCTTCTAAAATACAAAAA +ATTAGCTGGGTATGGTGGTGCATGCCTGTAATCCCAGCTACTTGGGAGGCTGAGACATGA +GAATCACTTGCACCAGGAGGCGGAGGTTGCAATGAGCCGAGATCACACTACTGTACTCCA +GCTTGGGTGACAGAGTGAGACTCTGTCTCAAAAAAAAAGAAAAAAAAAATTAGCTGGGCA +TGGTGGCATGCACCTGCAGGCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATCGCTTGAA +CCCAGGAGGCTGCAGTGAGCCAAGATCGAACCACTGCACCACAGCCTGAGTGACAGAGTG +AGACTCCATCTCAAAACAACAACAACAACAAAAATCAACTATTCCTTGGGAGGCAGAGGC +AGGAGGATGGCTTCAGGCCAGGAGTTGGAGCCAAGCTAGGCAACATAGCGAGAATTCCTC +ATCTCTACACACACACACACACACACACACACACACACACACACATTAATTAGCTGGGTG +TGGTGGCACATGCCTCCAGATACTCAGGAGGCTGAGCGGGAAGGATGGTTTGAGCCCTGG +AGGTTGAGGCTGCAGTGAGCCATGATCACGCCATGACAGAATGAGGACCAGGTGGAAGAA +CTGAAAACGCTGGGGATCCACACATCCGCTCCATCCTTTCAGATGGAAAGAAATACCAAG +ACTCAAAAAAATGAGGGTGCCCAGGTCCTCACTGAGCAGAGACTCACTGCTAAAAAAAAG +CCTTACCTATTTGGGTTTTCACTAGTAAGCAGTTGGTTTGTAAGCAGTTGGTGATTTTAG +TTTGTCTGGGTTTCAGCCATGAATATTCTATTGTAAACTTAATTATAACAACTGCACTGT +AATAATTCAATGTCCTATTATGATGTTGTTATAGACAAAATTTGCCTTTACATTGTCATT +TATTTTATTTTATTTTTCTTTTGAGACAGGGTCTCACTCTGTCACCCAGGCTGGAGTGCA +GTAGCTCAATCTTGGCTCACTGAAACCACTGCTTCCCAGGCTCAAGCGATTCTCCCACCT +CACCTTCTCGAGTAGTTGGGACCATAGGTGTGAACCACCATATCAGGCCAATTTTTGTAT +TTTTAGTAGATACGGGGTTTCAACATGTTGCCCAGGCTGGTCTTGAATTCCTGTGCTCAA +GCGATCCACTTGCCTCGCCTCCCAAAGTGCTGAGATTACAAGTATGAGCCATTGACATTT +AATCTTCCTTCCTTCCTTCTTTCCTTCCTTCCTTCCTTCCTTCCTTTTCTTTTTTTGAGA +TGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCACGATCTCGGCTCACTGCAAG +CTCCACCTCCTGGGTTCAAGCATTTCTCCTGCCTCAGCCTCCGGAGTAGCTGGGACTATA +GGCGCAGGCCACCATGCCCAGTGATTTTTTATTTTTATTTTTTTTGAGACGGAGTCTCAC +TCTGTCACCAGCCTGGAGTGCAGTGGCGCATTCTTGGCTCACTGCAACCTCCACCTCCAG +GGTTCAAGCAATTCTCTGTTTCAGCCTCCCGAGTAGCTGGGATTACAGGCACCCACCACC +ACACCCAGCTAATTTTTGTATTTTTAGTAGGGATGGGGTTTCACCATGTTGGTCAGGCTG +GTCTTGAACTCCTGACCTCGTGATCCACCAACCTCGGCCTCCCAAAGTTCTGGGATTACA +GGCGTGAGCCACCGCACCTGGCCTACTTTTTTAAACTTAAATTTTAAAAAAAATTAAGTT +TATTTTTAATGTGATTTATTTATTTATTTATTTTTTAGATGGAGTCTTTCTCTGTCGCCC +AGGCTGGGGTGCAGTGGCACTATCTTGGCTCACTGCAACCTCTGCCTCCCGGGTTCAAGT +GATTCTTCTGCTTAGCCTCCCAAGTAGCTGGGGCTACAGGTGCATGTCACCACGTCCAGC +TAATTTTTTTGTATTTTTAGTAGAGACAGTGTTTCACTGTATTGGTCAGGCTGGTCTTGA +ACTCCTGACCTTGTGATCCGCCTGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGTGTGA +GCCATGGCACCCGGCCTATTTATTTATTTATTTTTTAATTTTTTTTGAGACGGAGTCTCG +CTCTGTCACCCAGGCTGGAGTGCAGTGGTGCAGTCTCGGCTCACTGCAAGCTCCACCTCC +CGGGTTCATGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCGCC +ACCACGCCCAGCTAATTTTTTTGTATTTTTAGTAGAGATGGGGTTTCACCGTGTTAGCCA +GGATGGTCTCGATCTCCTGACCTCGTGATCTGCCCACCTTGGCCTCCCAAAGTGCTGGGA +TTACAGGCGTGAGCCACCGCGCCCGGCCTCTTTCAATCATTTCTGATGACACCGGAGAAG +GCAGTGTCCTTTCCTTATTGTCTCTTTTTTTTTTTTTACCTGGGTATTCTGGAAGTGTCT +TTCCTCTGACAGTGTTGTTGGACGTGGGGAAACAAAAGGGTATCAAAGAGAGGGAGAGAA +GGATAAAGACCACCCCTCATTCCCGCCCTCCCAGAGGGGCAGCCTGGGGAGTCTATGGAG +GAGGTGACTTTGAGCCATGATGTGACCTTAGATGAGGGGCCTCTCTGAGCCTCAGGAAGC +TCTTCTGCAAAATGGGGCAAATAAAGAAATGCCCTCTGTGTGCAGTGGCTCAGGCCTGTA +ATCCCAGCACTTCGGGAGGCCAAGGCAGGAGAATCACTTGAGCGCAGGAGTTCCAGACCA +GCCTGGGCAACATAGTGGGACCCCATCTCTACCAAAAAATTTAAAAATTAGCCAGGCATC +AGGTGTGGTGGCTCAGGCCTGTAATCCCAGCACTTTGGGAGGTTGAGGCAGGCAGATTGC +TTGAGCCCAGGAGTTTGAGACCAGCCTGGGCAACACAGTGAGACCCCTGCTTCTACACAC +ACACCAAATTAGCCAGGTGTGGTGGTGCACATCTGTAGTCCCAGCTACTTGGGAGGCTGA +GGTGAGAGAATCATTTGAGCCCTGAAGCTCAAGACTACAGTGAGCTGTGATTGTGCCACT +GCCCTCCAGCCTGGGCAACAGACCAAGACTCTGTCTCAAAAAAAGGAAAGGTCCAGGCGC +GGTGGCTCATGCTTGTAATCCCAGCACTTTGGGAGGCTGAGGTGGGCGGATCATGAGGTC +AGGAGATTGAGACCATCCTAGCTAACACAGTGAAACCCCGTCTCTAGTAAAAATACAAAA +AATTAGCCAGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTCAGGAGGCTGAGGCAGG +AGAATGGCGTGAACCCGGGAGGCAGAGCTTGCAGTGAACTGAGATTGCACCACTGCACTC +CAGCCTGGGCAACAGAGCAAGACTCCATCTCAAAAAAAAAAAAAAAAAAAAGTGGAAAAG +AAGGCCGGACGCAGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCTGAGGCGGGCA +GATCACGAGGTCAGGAGTTTGAGACTAGTCTGGCCAACATAGTGAAACACTGTCTCTACT +AAAAATACAAAAAATTAGCCAGGTGTAGTGGTGTGCACCTGTAATCCCAGCTACTCGGGA +GGCTAAGGCAGGATAATTGTGTGAACCCGGGAGACAGAGGTTGTGGTGAGCCGAGATTGT +GCCATTGCACTCCAGCCTGGGTGACAGTGCGAGACTCTGTCTCCAAATAAAAAAAAAAAA +GGAAAAGAAATTCCATCCCAAGGTGATCCCTCACAGGAAGTGACTCAGAAAAGCAGTGAG +CTACGAAAGCTATCTGAATGAAGCAAGATCGTCATGTGGGAGACACTGACTACCCACTCC +TAGCAAAGGGCTCAGCAGCACACGGCCTCCCTGCGGGGATGCCCTCGGGGAAGATGTGGC +CCAGAGGAGTTTTTTGGGCCTTGCTCCTCAGTCCTGGCTCTTAGTAGGACCCTCTGCAGC +CAAACTAGCATATCCTGAGCCAGGCTGACCTTAAAAAGTAAGCATTCAGGGCCTAGGGAC +CTTGGGCAAAGCAGAGAAGCTGGTGTCAGATGAGGTGCAGCAGGTACAGTAGGATAAGGT +GTTCTCAGGTCGCCAGTGCAGCCCCAAGATGAGCCTGCAGTATTTTCCTTACATGATCTG +GTCCTACTGTGGGCAGCGCTGCTGCCCAGAGCCTGAGAGGATTATGAAAACATGGCAACG +GAAGTGAGGCCAGGGGACACAGCATGGGGGCATGAGGAGCAGGGGACAGCGGGTGGGGCT +AGAGGAGAGGGAGGTTTAGGGAGGCCTCTGCTGCTGTGACTGTGAGCCCCAGCCAATGAT +GACGTGGCCACCGCACAATCTGAGTTCTGGGAACCATGTGATGGAATGTGTTCTGGGAAT +AGACTGCAGCCAGGAAAAAAAAAGCAGAAGGGTGCACCCTCCCTTCCTCAGCCAGGAAAA +CATTTCTGTAGCCCTTGCGGACCCAATGCGCAGAAGTGAGAGGGAGGAAAGCCAGGCTCA +TGTTTCCTTGGTTGTGCTTCAACTTCTAAAATATGTTTTTTCTATGAACATTTTAATGAG +GGGTTGGACATAAGACAGAAAAATAACATTCTGAGAGCTCGTTTGGAGGTTCTAGCAGGG +GAGCGCAGCTACTCTTATACTCTTGACTGAAGACCGATCGTCCTCTATCGGGGATGGTCA +TCCTCTTCCATTGAGCACACAGCTTCTGGAGGGACGCACATGGAGTGGTGAGGGAGGAAG +GGGATACCCGCCTAGTCAGCCAGATCAGCCGAATCAACCCTGATGATCAATGGGGTGACA +CATGTCGCAGCCAGATCACCCTCACATCCCAAAAATAACACTCAGATTGCTTTTTCTTTT +TTAAGATGTAGACTTTTCTTGAGACTTTCGTACAAGGCAATGGACAGATAACCTTAACCT +CCCAGAGAAAACACACATTGGCCACAGAAAAAGGCGCTTCTCCGTATACCTGCCATAGTT +ATCAGCAACTTTGAAATGGCTTCTCAGACTCTCAGGGGTTGGCTTGTTGTGTTTGCTTTT +GCAAATAACTCAGGCTTAAGTCCTGCTACTTTCAGAGCCTAGGGGACGCTTAAGGACTAC +CATCAGTTTAACACAGTGGCACTTGGAGCACATGCTACTTATGGTGGTGAGGAGGGGTGC +TGGTGTGAGGGTCTGAACGCTTGCTTCTCCTCTGGCAGGGTGTGAGCAGCCAGAGGCCAC +TACCTGGGCAAGGATGCTTGCTCAGTGTCCATGGAGGGACATGTGATCCAGCAGGTGGGC +AGGAACCAGGCCCCTGTGGCCGCCCAACTCTCCATAATAGAATCCTTGGTCATCCATGGG +CCCGTAAACCATGACCACGTCTCCTGCCCTCAGCGCCAGCCTGCCCTTCCCCTGGCCCCC +CATTTGCCCATCCCCAGGATCATAGTCCAGAGCTGCTATCATGATCTTTGGAGTCCACAG +TGGGAGTCTCTTGGAGTTCCCCTGGAGCACCAGGGAGGAACCCTGGGGGATGGTGAGCCC +CTGAAAGTCCTCGAGGTGGGCCACAGAAGGCAGGTTCCCTTGGGCCGGAGAACGCCACCT +CCTATCAGTCTGCTCTGTCCCCACCTCCATCTCAGCCACTAGGCGCCCGGGGATATTGCC +CACTTGCCTGTTGCACTCGCTGAGGTAGAAATCATGGGTGTCCTGAGAGCCCCACACTCT +TAGCAACTGCCTTTTCTGGAAGACCAGCTCCTCCTCTGCAGCCTTGAGGTTGGCAGACAT +CACCAGGGGGTTGTAATCAGAGAGGGCCACAAAGACCCTGGCTGGAGTGTTGGCCCCCGT +CCCCAGCTGTTGGGGGCCACCCCTGGGCATCTTGATGACTTTGGCGGATGGAGCTGGACA +CAGTGCCGAGCTGGGCTCATGGAGCTGGCACCCTCTCTTCACCCCCAGAGCTTGTCCTTG +CCTGCTGTGGGGCTCAGGCTCCCTCCTCTCCTCTCGCCTCTCTGTGCCCCACAGATCCAA +GCACAGTGCCTCCTGCTCCTCCTTCAAAACGTTATGGAAGTCAGATGCATACTGTTGGCT +GGCGCCCAGCTGGGGAGGTGTGAACCCTTGGGCATCTTGCTTTTGCCGAAGTACCCTCTC +AAGGGCAGCCTTTTCCTGACACGGTTCTTTCCTGGGCCCACACTCGGTGCGTAGATGGAT +GAATCCTGGAGCAGGGCTTTTGCTGGTGCCCATGTGCTGGGAGCAATTTTCCTTCTCCCC +AGGCTGGTCACTGACTGAAGGTGGCCTGTGGTTCTGGGGACTCTTCTGAAAGAGCAGGTC +CTTTCTACAGCCCTCCCAGGCCTCTGCAAGCTCCTGGGCTTGGCTGCCAGCCCCTGAACT +CGGACATTCTCCTTCTGAGCCCAGGTTGGACACTGGGGATTGCCTCCTTGGGGGTTCTTC +AAAGAATGCTTCTAGGAACTTGGCCTGGGGCTCCCCGCAGCTTCCAGGGTTGTGGGGGCT +GGCCTTGGCACTCGGAGGAGCCAGTGACAGCTTCTGGGGGCAGACGGGGAAGGTGACTCT +GTAGGTGGATGGGTCGCCACAAGTGTAGCTAAAGGGTGGAGTCTCTGGCCATCGGTGACA +CATGAAGAAGTCCTCGGGGATCTGAGCAGGCACTGAATCCAGGGACTCACCACAGAGTGA +CATGGTTCTCACTGAGACCTTCTGCCACGTGAGGGGCACCTGTAGCTGGGAGAATTCCAA +TAGGGTGCTCCCAGCAGTGGCATCGGCGACCTCACAAACCTTAAGCCCATCTGCATACAC +AGCATAACCGGTGACCTGGACTCCATTGGAGGACCCAGCTGAGTCAATGGTCACAGGGAG +CCAGCTGACCACCAGGACACCTGGCGAGGCATGGCGCTCCACCAGCACATCCAGCGGTGG +GTAGGGAGGTCCTGCCAAGAGTGTGTCGAAGGTGACGGTGGAGGACATAGTTCCCCAATA +CACCTGCAGCAAGTCCCGTGGCAGCCGCACCTCCACCCGCGCCCGGTAGTGCGTGCCGGG +GCACAGGCCCTGGAAGGTGTAGCAGCTCACGCCCGCTGGGGTCAGGGCATGCTCTCGGTC +ATCAAGATATACCACATGGGGGTGGCGGTGGCTGCTGTAGACCCAGGTGATGTTGGCTGA +TGTGGCTGTGACATTCTGCAGGTGTAGCTGCATGGGAGCCATACGGAGCACCCCTTTGGT +CCCCAGAAGGGGTCTGGAGAGGCCCTGCTTCCCCATGCTCTGCAGCAAGCCCAGCTGGCA +GGCTTCCGTCTTGGTATCCAGGATCTCTGTTGCTACTTCTGTCTTGGAGCCCACCCTGAC +CATCTGGCACAGCCCTCTGTCCACCACTCCCTGGGCTTTCCCAGATAATAAGCTGTCTTC +CTCCAGAGCTTCATCCTGCCCCGCTGGGAGTTGACTGGGGCCAAGATCAGGGGATTTGGC +AGGCAGGCAGCCTGGGATGTAGCTGTCCGGAATCTGCTCCACGAAGTTGGAGGGCACCAG +CCCCCGCCGGCCATCCTCAAGCTCCCCCTCATAGAAGCCATCCTCATCCATGTCCCCGAA +GATATATATGTAGTCCCCAGCTGTGAGGGGCAGCTCACCCTCAGGGTGATCATTGGGCCC +CTCAAATGGGTTGTAGTTATACTGAGCCATGAAGATCTTGAGCTTGGGGGCAGCAGGAGC +CGCCGAGCCCCCCATTTCCAGGGCCAGGGACACGCTGTCAGGCTCCAGGTCATCCACCTC +ACTGGCTGTGTCCCTGTCCAGAGTAGGGGAGGACGGCACGGTGGCCCACATCGACCCCTC +AGAGGAGGAGTTTGACTGGGAGCTGGTTTTCTTGGACAGAGGTTGGCTGGCAGGGACTGT +CTCTGAAACTTGGGGGACACTGGCTGACTCCCCAAGGGCAACTGGGCTCTCCTCGAGGGA +GGCTTCCCTTTTCTCCTGTATGTCTCTGTTGGGCTGTGACACTGCGTAGTCTTCAGGCCG +TGCGTGGAACTTGGGCCTTCTGCTACCTTTAACTTGGGACTCCGGCATTTGACTGGTCTC +CCAGGGCTGTTCAGGAGGGTGACCAGGCCGACACTGGAGCGCCTGTATCTCCTTCCAGGA +AGGATGGAGGTCACACAGGGCCTTCTGCGGGTCGCGTTGCAGCTGCTGCTGCCTGTCCTG +CCCGCGCGCCGCCTGCTGCAGCAGCTGTTCGGCGATCAAGCCGGAGGCATCGCGCTCTTG +ACACGCGCGGCCCAGGTGGCCGCGCACTTCGCTATTCTCAGCCTCCACCTTCCGCACCCA +GTCGGTCTTGGCCTGCAGCCGCCCATTCTCCTCCGCCAGCCAGGCGTTTTTGAGCAGCGC +CGCCTGCAGCTGTGTCTCGGCCTCCTCGCCACGTCGCCGCGCCGGGGCCGCCTGCGCGCC +CAGCTCCTGGCACTCGCGCCGCCGCTGGTCCAGCTCGCGCTCCAGCGCCAGCATCTGCCG +TCGCACCTCCTCGCAGGTTGCGCTCTGGCCGCCCGCCTCGGGCCAAGCGCCACCGTTGCC +CTGCTGAAGCATCAACTGCCTCTGCAGGCGCAGCACTTCCCGCTGGGACTCGCGCTGCAG +GCGGTCTAAGTCTCTGACGTTGAGCCACTGGGTGCAGGGGCCGCCCCTTCCAGTATGCAG +GTCGCTGCAGGGACCCGAGGCGACACGCGCCTGCAGGAGGTGGCACTCCTGCCGCAGCTC +TTCGATCTGCTTGTCCTTGGCCAGCGGCGCGCTCGCCTGCTCCGACAGGTCCCGAGCGCG +CTGGCGGGCAAAGACTTGGCACAGCTCCAGGCCGGCGCAACTCTCGCCGGGTATAGGCGC +GCTCACGGCCCGGAGGTTGGTCTCCTGCAGCTTGCGGGCGCGGTCAGCTAGGCGCCGCGC +GAGCCCGGTCAGCTCCGCGCGCTTTACCTTGAGCCGCTTCACCTTCTCGTCTGCCTGGTA +GGGGCAGCCTGCACGCCGCAGCTGCTTGTTCTCCGCTTGCAGGGTATAACAGCGGCGCGC +CAGCACCCACAACGCCTTGGCCAGCAGCCAGTTCAGCTTTATCAGCTCGTGGTGGCCCAG +GCCCGGGGGTGAGGGAGTCAGGACTTCGCAGGGCTGGCTCTCAGAGCCTTCTCCTCCCCG +CGGGTTGCTGAGTTTCCTGTGGGCTGACGGTGGTGGTGGCGGCGGGAGTGGCGATGGTGG +CGGCGAGAGCCTCCGGGAGCCAGGGGTGGAGGTGTCTGGTGAGGAGGGCCGCTCCTCGGA +TTTGGAGGCCTTGGGGAGGCTCCGGGTGCTGTCAAGCGAGCGGGAACACGCAGGTGCGAA +GCAATCAAGGGAGCTGGCGTGCGTGGAGAGCAGGCCGTCTGGGGAGCTGGAGCACGCAGA +GGACACCAGGCCTAGCGAGCGGGAGCGCACACCGACTTCAGCACTCAGGCTGTCTAGAGA +TCCGAGTTGGCAGGCGGGTTTGGGGGCGTGGCAAGAGCCGCTGGAGGTCTGGGGCAGCGG +TTCTTCCAAGGAATGCACAGCTTGGGGGTCCGGTGATCCCGACAAAGCCGGGTGCCCGCG +GAAGTGCGCCAGGATGTACTTGAGGAAGAGCTGGCGCTCCACCTCCAGCGCCGCCTGCAG +ATAGCGGATGCGCGCCGCCTGCTCGCCGTCAGTCTGCCAGCGAAGCTGCGCCAACACTTC +CTGCAGGCGACAGCGGCACTGCGCCGCGGAAACCTCGGACGCCCCCGGGCGGCTACAGTG +GCCGCGGTTCACCAGCTCCTCGGCCAGCTGGCGCTGCAGCTCCCGGGCTTGGCGCACCAC +GCCATCGCGCTCCCGGTGCAGCAGCTGCTGCAGCTGCCGCTGCTCGGCCTCCTTCCAGCG +CAGCAGCTGCCGGATCTCGGCCTCGCGTTCCCGCTGCATCTCCTCTTGCAGCTGCCGCAA +CTCCCGGCTGCGCTGTGCCTCCCACTTGGAGCGCAGACGGTCAGCCAGCTGTCGCCGCTC +CCGCTCGGCCTCCTCACGCAGCTGGCGCTCCCGGGCAGCGAAGCGCCGCCGTTCCGCCCG +CCAGCCTGCCCGCTCCGCCTCCAGCTCCGCCCGTAGCTTCTCCAGCTCCCGCCTCTGGTT +CTCCAGCACTGCCGCCGCCGGGCTGGAGCAGCCCGGCTTCTTGGGCGACGCGCCCAAGGG +GCTGGGCGAGTCCTTGGCCATTGCGGCCGCGGCCCGGCTGGTCTCCAGACTTCGCCTGGC +AGGCCTCAGCTCGCCAACGGCCGCCGCCGACCGCCCGGCGCGCCCCTTCCGGCGCCCCCT +GGGGGTCTCCGCGTGCCCCTTCCGCCTCTTGGCATGTTCCCCCGCCTCCTGCCGCGGATG +TCTGGCCCAGGCGCCTCTCCACCTGGCAAGGCCAGGGCTGGACTCCCACACCCCTGCTCA +AACTGGCCCATGTCCCTTCCAGGATGCACGGGGGCTTTTCCGGTCCCTACACTTTTCCTG +CTTCTAGCACCAAGTCCCTTGTGACCCTCCCCAGGCACCCTCCCAGGCACTGTGCAGTCC +CAAGCAGGCCGCGGTGCCCCGAGGGAAGTCTGGGCTGCTGCTCCTGTACTTGGAGAAAGG +GGCAGGGTGTGTGTGTTAAGGCCACCCATGGGGTGTGGGCATACCTGGTCGTGGGTTCAG +GTGGTGTATTTGGGTCAGGTGGCTGGAAAATGCAGCAGGATAGGCCTCAACTTGGCTAGA +CCTGGACTCCCCAGGACCCTTCAGCCCCCTAACAGCCCTGATCACCCAACTGGTCATCCA +CCTTTTGAGGAAAGGATGTGCCTGATACACATTTGTGTTACTGCGCAGAGTCGGCTCAGA +GGAGGTGTAAGCGAAAAGACCCTCCCCAGCAAGGTGGCATAGGGACCCAGGGCCAGATAG +GCATGAGGGGCACTGCCCAAACTGGCCATCTGGCCTGTCTACCTCCCACATCCAGGTACC +ACCTGGTGACAAACACTGGAGCCCCAGTGTCTGGGCTACTCATTGGCATCTTAGGTAGAG +GCCTCAGGATGGGCCAGGAGTGAGGGTGAGGCTTCAGCAGAAATGTTGGAGGCATCCTGA +GCTGCAGAAGAAACACACACACACACACACACACACACACACACACACACACAGCTGCAG +AAGAAACATACACACACACACATACTGAGCTGAAGGAATACACACATACACTGAGCTGCA +GAAGAAACATACATACACACACTGAACTGAAGAAATACACACACACACACACACACACAC +ACACACTGCCTCTCTGACTGGTGGCCACCTGTGTGGCTTGGGAGCTAAGAAGCCCTGCTT +CAGTCTGGCTCCTCTTTGCCCTTCCTGTCTGCTCCTGGTGCCAAGTGGCTTTTGGGAGAC +ATGGATGGACACCTGATACCTGGCAGTCTGCATCGAGACATTGATTTTTTTTTTTTTGAG +ATGGGGCCCCACTCTGTCGCCAGGCTGGAGTGCAGTGGCGCAATCTTGGCTCATTGCAAC +CTCTACCTTCCACGTTCAAGAGATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGATTACA +GGCTTGAGCCACCATGCCCAGCTAATTTTTTGTACTTTTTAGTAGAGACAGGGTTTCAGT +ATGTTGGCCAGGATGGTCTTGAACTTCTGACCTCAAGTGATCCACCCGCCTCGGCCTCTT +AAAGTGCTGCGATTACAGGCATGAGCCACTGTGCCCGACAAGACCTTGATATTGAAGTCA +CATCAGAGGCCCCTTTGCTTCTTGGTCTGTCGTGTCTCCCTGTCCCCAGGAAGCACCTGG +GACCTCTCACAACTGCTGTGTTCACTCAGCTTATGGCCCTCCTTTACAACACTCAAGTTT +TTCAAATCAGTTTCCCCGGAACCCACAGAACATGCCTTTAGGCAGTGTCAAGTAACAAGA +ACATCGGGAATTATGGCTCCAGATGTTCTTTTTTTTTTTTTTTTTTTTGAGATGGAGTCT +CGCTCTGTCGCCCAGGCTGGAATGCAGTGGCGCAATCTCTGCTCACTGCAAGCTCCGCCT +CCTGAGTTCACGCCATTCTCCTGCCTCAGCCTCTCAAGTAGCTGGGACTACAGGTGCCCG +CCACCATGCCTGGCTAATTTTTTTTGTATTTTCAGTAGAGATGGTCTCGATCTCCTGACC +TTGTGATCTGCCCGCCTCAGCCTCACAAAGTGCTGGGATTACAGGCGTGAGTCACGGCGC +ATGGCCCTTTATTTTATTTTATTTTATTTTATTTTATTTTATTTTATTTTATTTGTGAGA +CAGAGTTTTGCTCTGTCACCAGGCTGGCATGCAGTGGCACGATTTCGGCTCACTGCAACC +TCCAACTCCCTCGTTCAAGTGATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGATTACAG +GCATGCGCCACCACGCCCAGCTAATTTTTGTATTTTTAATAGAGACAGGGTTTCATCATG +TTGGCCAGGATGATTTCGATCTCCTGACCTTGTGATCCGCCCACTTTGGCCTCCCAAAAT +GGTGGGATTACAGGCGTGAGCCACCGTGCCCAGCCCAGATGTTCTTTATGAGAGGTGAGG +CAGATGTAGTGTGTATGTGAGAGGAAGGGGAGGTGAGATGCAGGGATGGCCTTAGCCTTG +AACTGGTGCTGGGCTCCTGGGCATGGTAGCCACAGGGAGGACTATCTTGAGAGAATCTTC +ATCACAGCCCTGTTTTACGGAAGAGAACCTGAGGCTCAGTGAGAGCCACTGGCTTGCCAG +GGCACATGGGCAACTGAGGCAGAGCTGGTGGGTGGGGTTTCACAGCCTGGATGCCCTGCT +GCAGTCTCCAGTAGGGCCTCAACTGTCAGGTCAGGAGGGCAGGCTGGGCCGGGCATGGTG +GCACAGACCTGTAATCCCAGCCCTTTGGGTGACCAGGGTGGGAGGATCACTTGAGCCCAG +GAACTCAAGACCTGCCTGAGCATCATAGAGAGACCCTGTCTCTACAAAAAAAATTTTAGC +TGGGCGTGCTGGTGCATGCCTGTAGTGCCAGCTACCCAGGAGGCTGAGATGGGAGGGTCA +CTTAAGCCCGGGAGTTTGAGACTAGCCTGGGCAACAGAGTGAGCGAGACCCAGTCTGCAT +TTCCAGCTGAGATGAAATGATTGCTTGAGCCTGCAAGGTGAGGTCAAAGTCGCAGTGAGC +TGTGATGGCACCACTGCACTCCAGCCTGGGCAACAGAGCAAGACTGTCTCAAAAAAAAAA +AAAAAAAAAAAAAGCCTGGTGCGGTGGCTCACTCCTGTAATCCCAGCACTTTGGGAGGCC +AAGGCAGGCGTATCACGAGGTCAGGAGATAGAGTCCATCCTGGCTAACATGGTGAAACCC +CGTCTCTACTAAAAATACAAAAAATTAGCCATTAGCCAGGCGTGGTGGCGGGCACCTGTA +GTCTCACTTATTTGGGAGGCCGAGGCAGGAGAATGACATGAACTCAGGAGGTGGAGCTTG +CAGTGGGCCGAGATCCCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCATCTC +AAAAATAAAAATTAAAATTAAAATTAAAAAAAGAGAGTAGGCTGTCACCAAGAAAAAGGA +CACTGGGGTGGATGGGTCTCTGGGAACCCCACAGTTACCATCCAGCAGCTGCCCCAAAGT +ATGTCAAAAGCATTCGGGAATGTTGGTTGAGTGGATGAGCAAAGCCAGGCTTCCCCAGGG +CTGGGTTTCTGCCATCCCTGTTCTCATCATGTTGTGCAGTGATTGTCCAGTGACCTGCCT +GTCACCCCCATCAACTCTGATCTTGGGGAGAGCAGAAGCCATGTTTTCCCTTGTCCCTAT +TGCATCCGTGCATGTGACTTAGCACATCACTGGTGCCCAAGACATATGAGAATGAGTTGC +AGGGAGGCAGGAGTCCCTGCATGAGCTCACCATGCTGCTGCCTTCTCCAGGTTCAACTTT +CTCACCTATTAAATGAAAGAGATGGTTTCTTTTTTTTTTCTTTGAGATGGAGTCTCACTC +TGTCACCCAGGCTGGAGTGCAGTGGCATGATCTCGGCTCACTGCAATCTTCACCTCCTGG +GTTCACGTGATTCTCCTGCCTCAGCCTCTCAAATAGCTGGGATTACAAACACCTGCCACC +ATGCCTAGCTGATTTTCATAGTTTTAGTAGAGACAAGGTTTCGCCATGTTGGCCAGGCTG +GTCTTGAACTCCTGACCTCAAGTGATCCACAGGCCTCAGCCTCCCAAAGTGCTGGGATTA +CAGGCATGAGCCACCATGCCCAGCCTGAAAGTGGTGGTTTCAAACTGGGTTCTTTAGGGC +TCCAGGGTTCCTTGGAAATATAGAACTGCTGGGCACACACCTATACTCCCAGCTACTTGG +GAGGCTGAGGCAGGAGGCTTGCTTGGATCCAGGACTTCTGGATTGTAATGTACTATACTG +ATGGGGTGTCTGCACTAAACTTGGCATAAATTGGTGACTTCCTGGGAGCAGGAAACCACC +AGCTTGTCTAAGGAGAGGTGAACTGGCCCAGGTTAGAAAAGGAGCAGGTTGGCCGGGCTC +GGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCCAAGGTGGGCAGGTCACCTGAGG +TCAGGAGTTCGGGACCCGCCTGGCTAACATGGTGAAAACCCGTCTCTACTAAAAATGCAA +AAATTAGCTAGGCATTGTGGCGCATGCATGCCTAATCCCAGCTACGCGGCAGGCTGAGGC +AGGAGAATTGCTTGAATCTGGGAGGTGGAGGTTGCAGCAAGCTGAGATTGCACCACTGCA +CTCCAGCCTGGGCGATAGAGTGAGATTCCATCTCAAAAAAAAAAAAAAAAAAAAAAGAAA +GAAAGAAAAAGGAAAACGAAAAGGAGCAGGTCAAAACTCCTGTGCTGGCTGGGCGCAGTG +GCTCACGCCTGTAATCCCAGCACTTTGGGCAGTTCAAGACCAGCCTGACCAATATAGTGA +AACCCCATCTCTACTAAAAATACAAAAAATTAGCTGGGTGTGGTGGCAGGCGCCTGTAGA +CTCAGCTTCTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGCGAGGCAGAGGTTGCA +GTGAGCCAAGATCATGCCATTGCACTCTAGCCTGGGTGACAGTGCAAGACTCCATCTCAA +AAAAAAAAAAAATTTTTTTTTTTGTAGGCTGGGCGCCGTGACTCATGCCTGTAATCCCAG +CACTTTAGGAGGCCAAGGTGGGTGGATCACTTGAGATCAGGAGTTCAAGACCAGCCTGGC +CAACATGGTGAAACCCTGTCTCTACTAAAAATACAAAAATTAGCTGGGCATGGTGGCAGG +TGCCTGTAATCCCAGCTACTCGGGAGGCCGAGGCAGGAGAATTGCTTGAACCCAGGAGGT +GGAGGTTGCAGTGAGCTGAGATCACGCCATTGTACTCCAGCCTGGGCAACAGAGTGAGAG +TCTGAGTCAAAAAAAAAAGAAAAAAAATTAACAACAGGCCAGATGTGACGGCTCATGCCT +GTAATCCCACCACTATGGGAGGCTGAGACGGGCAGATCACTTGAGCTCAGGAGTTCAAGA +CCAGCCTAGACAACACGGCAAGGCTGCATCTCTATAAAAAAATTACAAAATTTAGCCTGG +CATGGTGGTGCACGCCTGTAGTCCCAGCTATTTGGGAGGCTGAGATGGAAGGATACCTTG +AGCCTGGGAGGCAGAGGTTACAGTGGACTGAGATTGTGCCACTCCACTTCCACCTGGACG +AGAGAGCCAGTCCCTGTTTCAAACAACCAAAAAAACAAAAAACCCACCCCCCCAAAATAA +AAACAATAATAATAATAAAAATAAATAAAACAGAAATACCATAGGCCTTTTGGGAGGAGG +TATGCAGGAAGCGGATGGGGAGAGACAGTGCCCCTACTTGCTTCAATGGGCAGATCTACT +CTTTTTTTTTGAGACGGAGTCTCGCTCTGTCACCCAGGCTGGAGTGCAGTGGTGCAATCT +TGGCTCACTGCAACCTCCGCCTCACGGGTTTATGCCATTCTCCTGCCTCAGCCTCCCGAG +TAGCTGGGACTACAGGCGCCCGCCACCGAGCCCGGCTAATTTTTGTATTTTTTAGTAGAG +ATGGGGTTTCACCATGCCAGCCAAGATGGTCTCGATCTCCTGACCTCGTGATCCACCCTC +TTCGGCCTTCCAAAGTGCTGGGATTACAGGTGTGAGCCACAGCACCCGGCCGGCAGATCT +ACTCTTAAGTGTCACTTATCAGATTCGAAGAAGGGTTTTGGTTTAATAAAGCGTTATGCT +CCTTGAAACAAGTTTCAGAACCCCTGGGTTAGAGGACCTCTTGCCCATTGTGTGCTCACA +CACACACGACACACACATACCCCATCCCAGTACGTAATGACTTCCCTGACCCGGAGAGAA +GGCGATATGTCCCCCTTACATCTCAGTTGGCTCCACATCGCCAGCTCTGTATTCCACTTT +CCCTCTCTCTACAACCCCAGTCTATGACTTTTGGGCCATGGAGTCTGGGTTCCTGGGCAA +CCTGGTTCACATCCTTCTCCTGCCGGCATGGCCTGGCTCCATTTCTGTTCGGCTACCAAA +GCCCCAAGCTTTGTGGAGCCTGCCTGGCCTTGAGGAGGGGAATGGGCTCCATCCTGAGAG +CCTGCAGGAGGGTTTAAGCTTCCCATCCCCCATTGCTACAGTAGCTCCCCCAGCTTTTCA +TTCCATATCCTTTATTTTTATTTATTAAAAACTTAATTTTTGGCCGGGCGCGGTGGCTCA +CGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCAGGCGGATCACGAGGTCAGGAGATCG +AGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAATAGCCAG +GCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGGAGGAGAATGGCGT +GAACCCGGGAGGCAGAGCTTGCAGTGAGCCAAGATAGCACCACTGCACTCCAGCCTGGCG +ACAAAGTGAGACTCCGTCTCAAAAAAAAAAAAAAACAAAAAACACTTAATTTTTTTTTTT +GTAGAGACAGGGTCTCCCTATGTTGCCTGGGCTGGTCTCAAACTCTTAATCTCGAGTGAT +CCTCCCACCTCAGCCTCCCAGAGGGATGGGATTACAGGCATGAGCCACCATACCCAGCCA +CATATTTTTTATTTTTGTTTCTATGTGTTTTCTGTCTGTTCTTTGCTGTCTCCTCCATGT +CTAGAACAGTGCCTGGCACATGATAGGTGCTCAGTAAATACAGGACTGAAGAAGACAGAG +GCAGACAGGCCAGATGTGGCTGCTGTCCTTAGGGAATTGACAGTCCATGATCTATGAGAA +TGAGCTGCAGACTGGGAGGCAGGTGTCCCAGCATGGGCTCACCAAGCTCCTACCCTCTCC +CGGTCCAGCTTTCTCACCTATGAAATAAAAGTGAAGCTGGACATGGTGGCCCACGCCTCT +AATCTCAGCAGTTTGGGAGGCCAAGGTGGGTGGATCACTTGAGATCAGGAGATTGAGACC +AGCCTGGCTAACATGGTGAAACCCCATCTCTACTAAAAAACACAAAAATTAGCCGGGCGT +GGTGGCAGTTGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATTGCTTTAAC +CTAGGAAGTGGAGGTTACAGTGAGCTGAGGTTGTGCCACTGCACTCCAGCCTGGGTGACA +GAGCAAGACTCTGTCTCAAAATAAATAAATAAATAAACAAACAAACAATAAATGAAAGTG +ATGGATTACATGAATGGTTTTCAAACTGGGTTCCTCAGGGCTCAGTTTGGGGGAGAATGC +AAAGGTTAAATACAAACAACAATGGAATGTAGTAACATTGGCGCTTGGGCGAGTGCTCTG +GGTGAGGAATGTGGTGCTCATGGGGAGAAGAGCCGCATCAGAGCAGGCTTCCTGCATGAG +GCAAGGAGGAAACACAAGAGCTGAGAGCTGAAGGAGAGGGAGTGAGGGCTGCCAGAGGAG +CTGGGAGGCGATTTCAGGCAGTGGGAACTGGAGCTGCAAACCCCCAGCCTCCTGCAGTCT +GGCTATCTGTACCAGCCCTCACCCATGCCCACCACCCTCAGGGCACTCACATCCCTTCTT +GCACTGATATTTTCCCGTCCTATCATGACCCTCGTTTGAGGGTGCAGGGAGGACTGGAGC +TCAGAGAGGGAGGGTCACCTGGCCAGACCCTAACCCGCTGAGTTCCAGTGGAGTTCCTTC +CATCCCCCAGCACTGGATGGGGCTGGATACCTGGAGAGGGACCGTGCTGCTCTTCCTGGG +CCCTGCAGCCTGCCTGGCTGGTCCACATCTTCCTGGGCCAGGAGGGCAGTATGGTGCGAG +CTTCTAGCTCTCTGGCTGGGGCCCCCTGGCCTACTGGGCTGGCTTCCTGCCCTGGTGGCC +CTGGCCCAGCTCCCCAGCTCATGTGCCTCTGCAGCGAGTGGTCAGCCCCTTCCCATCTGC +ACTTTACAAGCCTCATCGCTGAATCCTCCCCATAGCCAGAGAGCCTGGCACTCTCCTCTC +CAGAGAAGGAAACTGTCCAGGGTCACACAGGGACTTATGGGCAGGTCTGTGTGACCTGAG +TCTGGGCTCTGAACTGGTGCTTGCAGGGTCTTGCCCTCTGGCTCTGGCAGCCTGAGTGTT +ACTGTAATCCCTGAGCCTTTGAACAAGCAGGCACTGCTTCCTGAGATCCCCATTGCTGCT +GTCAGCGTCTCTGTTTCCCAGCTCCACACTGCTGCCAGGACAGACGTATCTTCTGTCTTG +TAGCATCTCCATTTCTCTTCCCTTTAGATCTATTTTTTTTTTTTTGAGATGGAGTCTCGT +TCTATCATCCAGGCTGGAGTGCAGTGGCACAATCTTGGCTCACTGTAACTTCTGTCTCCT +GGATTCAAGCGATTCTTCTGCCTCAGCCTTCCAAGTAGCTGGGATTACAGGTGCATGCCA +CCACGCCTGGCTAATTTTTGTATTTTTAGCAGAGACGAGGTTTCACCACGTTGGCCAGGC +TGGTCTCAAACTCCTGACCTCAGGTGATCTGCCCACCTCAGCCTCCCAAAGTGCTGAGAT +TACAGGCATGAGCCACCGCTCCCGGGCTGGATCTATTTCAATCACCCTCCTTCCTCTGCA +GCCTGAAACTTGAGGCTTTGCCCACTTCCCAAAGGCACGAGTAGCAATGACCATTTGTTC +TGTCATTTGGCCATAGCTCTAGGTCAGAGGACAGTCCTTCATCAAATGTTTGTTAAGCAT +GCAATCTAGACAGGGGAGGACAAGGGCCAAGTAATGTGTTAATTTCTTCATTCACTAAGC +ACAGAGAATGGAGTAGAAGGAAGGTGGAAAGGAGAAGGAAGGAAAAGAGAGAAGAAGGAG +GATAGAAAGGAAGGAAGGTGGGAGGGAGAAAGGAAAGAAAGAGGGAGTCCTTTAGGTTAT +CTAAGTCCCCTCAGGACTTGGTCTGGCCCTCACAGTTCAAGAACAGACTTGGGCTTGAGC +CTCTGACCAGCCAGATGACTGAGAAGCTGAGGAACAGCTTGAGCCTCAGTTTTTTCCTCT +GTAAATGGGGTTGATAGCAGTTCTGACCCAGCGGGCTGTTGGGGGAGTCACAGAGATGCT +GGGTGCTGCCTTTGCACTCAGAAGGGTCCAGGCTAGGTGTGGTGGCTAACACCTGTAATC +CCAGTACTTTGGGAGGCTGAGGTGGGAGAATTTCTTGAGTCCATGAGTTACAGACCAGCC +TGGGTAATATAGAGACAAACTGTCTCTATAAATTTTTTTCCCAAAAATTAGCCAGGCATG +GAGGTGTGCACCTGTAGTTCTAGCTGCTTGAGAGTCTGAAGTGGGAGGATAACTTGAGTA +CAGGAGGTTGAGGCTGCACTGAGCCATGATCACACCACTATACTCCAGCCTGGGTGACAG +AGCCAGACCCTATCTCAAAAAAAAAAAAAAAAAAAAAATCAAAGGGGTCCAGCAGCATCT +TCCTGTCAGCTTACCTAAGGGAGCATGATAAATTGAGTCCCTGCCACGATGGACATGGGG +AGTGTAGTGGGAGGGAATGGCAAAGGCACCACAGGCAGAGGGAACAGCCTGGGCATAGTG +GGGGGTGTGATGTGGCACAGCACTGATTGTGGGAAGCGAGGGATGTGGGAGATGAGGCTT +CAGGTAGTCAGAGGGAGGTTGAGGCAAAGCCTCACCTGCATGCTCTAGATGAGGTCCAAT +TCCTGGTACCCAGCTCCTAGCCTAGTAATGGGCCACTTCTCCCAGGGCCACAGTTTTCTC +TCCTATCAAATGGAACATGTTAGCACTGCTCTATGGTGCCAATGAAGGGTATAAGTGTCC +AGTGCCCATTTCCTGCCCCAGCAAAGGCACTTTCTTTTCCTTGTCCTCTTGCCTACTGGG +GAACTCCTTCCATCCCTGGTTCCTCCCCCAGCTTCTGCGAGGCCCCTCCTCTGAGCCCCC +ACACCTCATCAAACCCTCAACACCGTCTGGTGTTTTCCTTGGTGCCTCTTCAAGATGCTG +TGAGCTCCTTGGGGGCAGGTGGTGCCTGTGTACCTCTTTTTTTTTTCTTTTTTTTTTTTT +TTTGAGACGGAGTCTCGCTCTGTCACCCAGGCTGGAATGCAGTGGCGCGATCTCGGCTCA +CTGAAAGCTCCACCTCCCGGGTTCACGCCATTCTCCTGCCTCAGTCTCCTGAGTAGCTGG +TACTACAGGGGCCCGCCACTGCGCCCGGCTAATTTTTTCTTTTTTTCTTTTTTTTTTTTT +TTGAGACGGAGTCTCGCTCTGTCTCCCAGGCTGGAGTGCAGTGGCACAATCTCCGCTCAC +TGCAAACTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGG +ACCACAAGCACCCGCCACCACGCCCAGCTAATTTTTTGTGTTTTTAGTAGAGACGGGGTT +TCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCATGATCTGCCTGCCTCGGCCT +CCCAAAGTGCTAGGATTACAGGCGTGAGCCACCACGCCTGGCCTTTTTTTTTTTTTTGTA +TTTTTAGTAGAGATGGGCTTTCCCTGTGGTCTTGATCTCCTGACCTCGTGATCCACCTGC +CTCGGCCTCCCAAGGTGCTGGGATTACAGGCGTGAGCCACTGCGCCTGGCCCTGTGTATC +TGTTTTTCTGGACCGCCATCATGTTCAGGAAGGCATCGAATATTTTGTCCGCGACTGCAA +TCACACACTGCATCATCCCTGAAACGAGGTTTCCACGGATAAGGTGCAGCAACAGAGAGA +GAATCCTAGCAACTTTCAATCATTTTCTTTTGAAATGGTTTTAGACTTACAGAGAGTGGC +AAAGACAGCACAGAGCTCCCATATACCCTTCACCTAGCTTCCCTGGATTTTAACATCTGC +AGTAACCAGGCTGATGTGTCAAAACTGAGTCAGATATTGATATAATCCTATTACCTAAAC +TTCAGACTTCGTCTGGATTTCCCAGGTCTCCCACACATGCCCCGCTTCTGTTCCAGCATC +CAACCCAGGACACCAGGTGGCATTTAATCTCTGTGTCTCCTACTCCCTTCCAATATGTGG +TGACTCCCCAGTCTGTCAATGTCTTTTGTGACTTTGACACTTTTGAAGAACAGTTATTTT +GTTGAATGTCCCCTAATTTAGGTTTGTCTGATGCTTGCCCATGATAAGACTAGGGTTACA +GGATTTGAGGGAGAACTCCATAGAGTGGATGTGCCCTTTCTTTTTTTCTTTATCATTTTT +TTGAGATTGAAACTCGCTCTGTCGCCCAGGCTGGAGTGCGGTGGCACGATCTTGGCTCAC +TGCAACCTCTGCCTCCTGGGTTCACGCCATTCTCCTGCCTCAGCCTCCTGAGCAGCTGAG +ACTACAGGCGCCCGCCACCACGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACTGGGTT +TCACCACATTGGCCAGGATGGTCTTGATCTCCTGACATCATCGTGATCCGCCCGCCTTGG +CCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGTAACCAGCCTGCCCTGCGTAATT +TTTGTTGTCGATGATCCAAAGTGCACCCACTCGCCATCTCTGGTAGCTGGGAGGAGCAGG +TTTTTGGGTGAAGGTGAAAGAGCCGGAGTTGGGGATGTGTGTGGCTGGGGGGTACAGGTG +CGACCACTTTGCAGTCCACTCCTGTGAGTAGGGCATTCCTGAGGGAGAGAAGGCCTGGTT +AGCCCTGGGGCTCCTTCAAGGCCTGGCCCCAGGCCTTGATACCTAGAGCAGCCTGACACA +TGTGTGGATGGACTGCCAGGCGGTCCCCTTAGGAGGGGCTGTTCTAGCAGGGACCCTGGC +TTGGGTGACCACCACCTCCTGCCACACTCCAGGATCCCTGTCCCTGCTGGAAGCCGGGCC +CAGGGCCATGCCAGCCGTGAGGGGTTCCTCATGAGCCTCCTCCCCGGGACCCAGGCCCCA +TGCAGCTGCCCCCTCCCACCTGCCTGACCCCCCTGGCCTGCACTTCCCTCCATCCGAATC +CTCAATCTCCGCCTGGAATTCCCTTCCCTCCTCAACCCCACCTGGAAACTCTCCTCCTCC +TCCCACACAGCTGGGACTCCCTTTCTCCCACCTCTGGGGGCCCAAAGCTCTGGATGCCAC +CCTAGCCCTCAGCTGGCCTCACCTGTCTCCTCATAGCCCCACAGCTTGATGGTGTCTGTG +TGGGCAGCGCCTCAACGTGCCAGGTCAGGCTGAGGTTGCCTGAGGTGTTGGCGGTGCCGT +AGTATTGCCAATGCATCTCGTTCATCAACTCACTCTTCTCCTTCATCAACACTTTGTTGG +GGTGCACTGGGCAGGGGAGGTGAGGGGTGAGTGAGGGCTGGGCCCAGGGAAGCAGGGAGG +AGGCCCTGAGGCTTCAAGGGCTGTGGGAGCCTCCAGGGCCAATGCATGGCAGCAGCTGGG +TGATGGTCTCTCCGGCCCTCATGGCAGAAAAGGACGCTGAGGCCGGAGAGGCAGGGGCCT +GCCCTCCCTCCACTGCCCACGCCTCTAGCCTCCACCTGGCTTCTCCCCTGCCCTGGTGTG +GCGGAGCTCAGTGGATAGGCACTGGCCCACCTCTCTAGGGACCCCTGCAGCTCCCACTCA +CTCTGGGCTGGGACCGTGGTCTGGATGCCAGGGAGTGGAAAGGAGCTGAAGGAACTCTCG +CAGGCGGGCAGGCCTCCCAAGGGATTGGTTCAGCCAGAGAGGCAGGACACGGACGGGCCA +AGTCTCGGTGTGCATAGATTAGCGCTGGGGAAAGTCCCCGTGGGGGCAGGCTGTGGGCAG +GGAGGAGGGCTCACCAGCCAGCCAAGTGCCCGCACGAGGGAAGGAGTGGCCATCGTCCAG +TGAGATGGTGAAGGGGACGTGGCCGCTCTCATAGAGCAGGGGCGACACGAAGTGCACTTG +CCCGGAGGAGTCCACATGGCCGAGGGTCTGGATGCTCTCCTGTGGCATGCAAACCTGTGG +GCCTGGTGGTGGTAGGAGGTGCTGGGCCACTGCAGGCTGAGCATCTCGACAGTCTGGGCC +CCCAACCTCCCTGGGGAACCCATCCAACCAGCTGTCCTGTGCCCTCCTGCCTGTCCTGGG +TCTGAGGGGCTCCATTCAGCCTGCCAGTGGGTCTGAGCCCTCAGCATGCACAGGGATGGG +GGCGTGGGGCCCCAGTGCCCGGCCTCCTGGGCCTCCCAACCTGCAGATCACACTGGTGTC +TGTGGGGCTGGACATCTTGAAGTGCCGCACCACAAAGTCCTTGCCGCCCATCATGGAGAC +TGAGTAGGGCAATATCTCCAGGCAGAAGTCCTGGAAATCCAAGCAGCAGGTACCAAGGCC +GGAGCACGTCGGGTGGCAGGAACATGGCCTGTCCAGGGCGCCACAGCGCATGGAGCAGCT +CTCTTGGGCATCTGGAGGAGAGGATGCTGAGGGCCTGGCCTGGCGGGTGCTGGACTGGCT +GCCCCATGTGCAGGGCCTGTCTCTGTTTCTCCCACAGCCCATTAGACAGCAGCATCCCCA +GGAAAATGCTGGCCGGGAACTGCACAGGCCACCACGTTGATATGGTAGAAAGTCACAGGG +CCCGTGGGGATGGGATGAATCTTAGCATAGACCATCTGGCTGTTGCCCTGACTGATGGGC +TAGAATGCAGCGAGAAGAGGCAAAAGGCTGCACCCGCTCAGCCCTGCAGGGTCAGGAGAA +GCCCCCTGGGCATGACTGCCCTTCAGCCACCAAGTCCTCTATCCCACAGGCGGGGGTCCT +CAGAGGGCCACGGTAGCTGGAGCCCTCGACCTCTCTTGGCTCCCCAGGGTGTCTGTGAAC +AGTGCGCCTTCTGGGGGCCCTGAAGACTGCAGAAGAACCCCTTTTGCTTGCCTGGACTGG +GGTGGGGGATACAGCTGCCAAGAGTCCCATTCGAGGCCTGGTAGGGAATGGTGCAGGGTC +TGAACTTGACCCTTCTCACTCTCCCAGGGGCTGCAATTGAGACCCCAAAGAGAGCTCCGC +CCTCAGGGCAGAAGGCCAGGGCTGCTCCCCTCCTTTCTGGGGGGAGGAGGGACATTTCAA +GCAGGTGTCCCAGCAGTGGGGAAAGGGAAGTGGGGGTGCACACTGCGAGGGGCTGTGGGT +GCTGGGCCCAAAGCATCTCCTATGGGCCATGACACCCACGGCAGGGCCACATTTCCAGGG +ACTGGCATGGAGGAAGAGTTGCAGGGAGGGACTGGGGCAGGTGCGTGCCCAGCTGGGGGC +CCTGAAGGGAGACAGGTCCCATCCCCGGGCTCCAGAGATGCCACCACCCAGGGAAGCCTT +GGCTGTCCCAGCCACACCTAATCCTGCCGCTCAGAGTTTGCATAATTTAATCAGTGTTCA +CCTCCCTTCTTGAGCAGATGAGGAAACTGAAGCTCAGATGCGGACAGGCAAGGCCGAGCC +AGGCCCTTGGTGGCTTCTGGGAGCCCCTGCAGCCAGGGCGGGTGGTAAGGGCTGTGGCCT +GGGACTGGTCACTTGGGCCCTGCCACCTGGGCTGGACGTCCAGACAGGCGGGTGGTCCTC +TCCCAGCTGCAGCTTCTCAAGGTGTCCAAAAGGGACCCTTGCCACACTTGCAGGGAGTCA +GAGCTGGCCCCAGGGCATGAGCTTAGGCCATCTGGTCTGTGATCTGCCCCAGCGCTTGCG +GCCAGGCCAGCCCATTGTTCCCCAGCCTGTTCTCTCTCTTGCCTGGGTGGTGCCTGCTGG +GAACCACAGGGTGTTGGCAGCCACCCATGGCTCCAGAGAGGTGGGCCCCTACTGAAAGTG +TTAGAGGCATGTGCTGGGGGACTTGGCTCCTGAGTCCCCACACAGAGCTGTTCCAGGGAA +GCCACACCCGCATTTGATGTCTTCCCACCTCCCAAAGACATTTGGTGAGAGGCCCCCAAG +ACCTTCAGCTTCTCAGGACTGTTTCCAGAGGGGAGAAAGTCCCTTAACCCACAACGGTGC +TCAGGGTACCTCTGCAGGTGAGTGTCCTGTAAGTGGATGCTCAGGCGCTGAGCCACGGAG +AGCCTGTCTCCTGGCCCCAGGCTGGCGAGGGCATTCATCCCAGGCCCAGGGCTGAGGTTG +CTCCCTACAGCACTGGGTGCAGGTGTCCATCCTCCTGTAGGTGAGGCTCCGGGTGGTTCG +TCTGAGACAGTTCCCAGGAACTGGAGGAGGGAGAGCTGCTGCTTCCTGTTTCTGGCCTGG +GAACCCTTGCTGTGCGTGTCTGGAGACGATGGAAGTCCGGTAGGGGCTAGGTGTGCAGTC +TGGGAAGCTCCATACCACAGGCCCTGCCGTCAGCTGAGTAACTGAGGCTGGCTCATCTCA +GCCCCCCAAAGGTGGGTTCAGCCAGACCCATATGGGGCCGCCAAGCCTCTGGTTCCAAAA +GGGCAGTGCCCAGCCCTTTAGGCCTTTCTTCTTGGGTGGTGGGTGTGTCCAGCCCATGAG +GGGCACAGGGCAGTGAGGAGGGTCCCCATCTCCATCCTGGACAGAGAGCCTGCTCCATGG +GAGGTCCCAGGGGCCAGGAGGGCCCCTGTGGTGCTGGGAGGGGAAGAGAGTTGTGAAGGG +TGGGTTTGGGTCCCTGGAACCTGTGCATCCCCAAGACAGATGGGCCTGGAGCCTTCGGGG +GCGGGGCCAGCTCTAGGGGCTTGTTTAATGCTCTGTGGTGGCTGCCTTAAAATACTTAAT +CATTTTTGGTCAGGCACGGTGGCTCAGGCCTGTAATCCCAGCACTTCGGGAGGCCTAGGT +GGGCGGATCACTTGAGGTCAGGAGTTCAAGACCAGCCTGGCTAACATGGTGAAACTCCGT +CTCTACTAAAAATACAAAAATTAGCTGGGCGTGGGGACATGTCCCTGCCCCCTGCCCCTT +GTGCCCGCCCTCTCCCTCCCCCTGCCCCTTGTGCACACCCTCTGTACCCCCCTTCCCGCC +CCAGCCTGTGTTTGCAGTGCTACAGGGTGGCTGCAGCCTTCGCCTCCCACAGCTCCTCAA +GGTCAGAGGGCTGGGAACCCCTGGGATGTGGCTGGGATGAGGACCCTGCCCCTCGTCAGG +CATTTCCAGGCAAGGGGCACAGAGACACAGCCAGCCCAGCAGCTGAGCCCCTCCTATCAG +GCCAGGTGGGAAACTGAGGCATGAACACAACCCCAGACTGGAAATAACAAAGCCATCATG +TACTGTGTGCCTACTGAATGCCAAGCCCTATGCCCTATTCTTTAATGTCATCTTTAATGA +GATGTCTTCACTCAGTTCACCACATGATAGTAGACAGGTATTTATGATGCGCCTACTGCA +TGCTGGGCACTTTTCTGGGCCATGAGGACTCAGGGAGGTCCCTGCTCTGGTAGAAGTCAA +ACCCGACAGGAGACAGAGACTAGGGCCCCACACACCAGGAGGAAGAACCCAGTGCAGAGA +GTGCCAGCAGGAAGCCACAGGGGCAGGGAAGGCGCTCCTGGGAGGGTGGGTGGCTGCGTC +CTGAGGCTGAGCAGGGGGAGCCGGCCTAGGGACAGGATGTGCGCAGGCGTGGAGCAGGAA +GTTCCTGGAATGATGGGCAAGAGGGAGCAGGTCACTGGGGGCAGGTCTAAATACTGGGGG +CTGCATTGGGATAGCAGAGGCCTCGGACAGGCCCGCTCAGCATGTAGACTCCCCTCCCTG +GCCCCTTCCCTAGGATGACAGTGGAGGTCCATCCCCAGGTGGTGGTGAGGACACTCCTGA +CACAGAGACCCCAGCCCAGCTGGCAGGGGAAGGGGAGCCCTCAAGGACCCCTGTAGGTTG +GGGCATGTCCCGCCAGGCGGCCCTCCATGGTGCCCAGCAGCAGCCCTGCTTGCGGACCCT +CTTTAGTCCCCACTGTTAAAAAAATAATTCAGTGAAAATTGACACTAGCTGGGCATGGTG +GTAGGTGCCTATAATCCCAGCTACTCGGGAGGCTGAGACGGGAGAATCGCTTGAACCAGG +GAGTCGGAGGTTGCAGTGAGCCGAGATGGCACCACTGCATTCCAGCCTGGTGACAGAGTG +AGACTCCGTCTCAAAAAAAAAAAAAAAAAAAGAAAAGAAAAAAAGAAAAATGACACTTGC +CAAAGCGCCGTAGGGAAGACTGTCCAGGACCGTGTCACTAGGCATAAGGGACCACGGCGA +TGAGATTCTGCAGTGGGTAAGAGACTGGGCTCAGCCTCGAAGACAGCATGGGCAAGTGGG +ATCGGACCGTCAAGGAGCAGGGGGAAGACTATCCAGGACCGTGTCACTAGGCATAAGGGA +CCATGGCGATGAGATTCTGCAGTGGGTAAGAGACTGGGCTCAGCCTCGAAGACAGCATGG +GCAAGTGGGATCGGACAGTCAAGGAGCAGGGTGGGGGGCAGTGCATGGAAAGTGACTAAG +AGGAAGCATCAGCAGCCAGGGATTCTGGCTTAACTCCCCTAACAGAATTCTTGCTGAAGG +CCGGCCAGGATGGCCAGACCGCACCTGGGGGACAGAGGGGGAACCCGATCAGATGTCAAG +GACGGGGGGCTTCTCGCTAAACTGACTTTGCAGAATTCTTGCTAAAACTAGAGTTTATAA +GGAAGTGCTCAGATGTGCCTAGGAGAAGGCTCAGGAGCCTAAATAAAGCTTGACCAAGCA +GAGAATCTTTGTCACCAGCTTGGTGAGAGTGAGGCCCAGAGTGGGTCAGGGCTTACCTGC +TCAGGTTGTACAACTGGGAGGGGTCCGTCAGGAGCCTGGAGTCTTCCCACTGGGGGCTGA +GTCTGGGCCCTCAGGGCACATATAGGTAGATGCTGTCAGGCCCGACACCCCATGCAGTGC +AGACTGGGATGCTGGTGCCTGTGGCACAGACTGGTCAGACCCACCCCAGGCTAGACATTC +CTTGCAGGGAGAGCTGAGGCTGTTGATGCCAGGAACAGACAAACAAGGTCATCACCAGGT +AATGAGCTCGTTACCCACATCAGGCAAGGCCATTGCTGGGTAATCATTTGGCCTCCTGCA +GAGCCACCTGCCCTGGGACGGTTCCCAGAGATCTGGCTAGCCAGTGCCACAGGAGGCCGG +GGCACAGTTCCAGGGCCTAGTGGGCAAGGCAGGGGTATTCAGGGACAAGGTGCACCTGAG +TCCCCACAGGAAGCCCCAGACCCTGCAGTAAGACCTGGAACAAGTCATTCTCCTCTCTAA +GCTCTCAAAGAAGAGGTCAGCTCAGGGGTCGGGGAGGCAAGGTCTCCTAGAGTAGACTAA +GCTGGCCGCTGCCATGCTAACACACACCCTCCCAGCCAGCCGCTGGTCCTGACAGATGAG +GAAACTGAGGCTAGGTGTTTTCATGAGGCCTCTGAACCCAGAACCCAGTACCCAGTGTCC +TGAGCAAAGCCCAAGCCAGACCTGCCAGCTTATGTGTACAGAGGCCCCACTATGCAGCCT +GCACTCCTGGGGCCTAAAAGGACCATCACCTCCCCTTCTAAGAGGCTGGGCACAGGGGCT +GGGTCCCAGCCAGGCCACCCTTCCTCCTCCTCCAAAGGCCCAGCAGCAGCTGCTCCCAGA +GCAAGCAGCACAGTCGCAGCAAAGGCTGGACAAGTGTCCTGTGCTCTGTGGTCTGAGCAG +AGGGCCAGGGGCCGTGACCCACATGGCCCCTCCAGGAGCAGCTCCAAGATGAGCCTTCCT +GTCCCCACCAGTGTGTCCGGGACCTCAGGGTGGATCAGCCGCACCTGCCGGCTGCTCCTG +TTCCTTGCTCTTCCTGGGGCTCCCCACCACCTTTGGGAGGCAGCTCCAAGACCCTGCTGC +CTCCCTTGCTTACCCTCAGGCCCCTGCTGGGTGTGCCCCCTCCCCACACAGACCCCTGAG +CCAGCTTCGAGGACTCTGCCTGCATCTCCACGTCCACTCTGATGACTGTGAAGCTACAGT +CACACTCTCTGCCTCCACCCCCGCCCTGCCGGGTCCCTGCAGCAGCTCTCAGAGGCCCCC +ACAGCCCCTCCCACACACAGAGCTCGCCTGGCCCTGCCGTCTGAGCGCTCCCCTGGCTCC +CAACCGCCCTTGGGCTCTGGGACATAAAACCTCCAGCCAAGGTGGCTGCCAAGTTTTATT +TAATAGCCATAGAGCACAAAAGGGATGACAATTAAGTCACACGGAGAGACTGCCCCATCG +GGACCCCTGGCCAAGCTAAGCAGAGGAGCCCTGGGGTTTCCTCTGACCCCCCACTGTGAA +TGCATTACTGACGGGCAAGGTGGCCAGTTGCCCACCCAGCCTGCACGTCTGCATGCACAG +AGCTACAGCCTGGGCCCAGGGACAGCCCCGGGAGCCGGTGGCTTCCCACAGGCACAGCCA +GCCTCCCCACCCCCACCATGCCTGGCCCCCGGCCCAGGGCACACTGAGGGGAACAGGAGG +GGAAGGAGCAGGCCCTGCACTGGAGCCTCAGTCAGCTCCTCTGTGCCAGGGGCAGCTGGA +GCTGGCTGAATTTGAGGAACTTCCCGGAGGAACTGGAGAGCAGACCTGTGTGCGCGCCCC +GCTCTGCCACGTTCCCTGAGGGACACACTGCTTTGACCAGCCCCCACCCCATCTCCTCTC +CGCCAGTCCCCGGTGTCCACCCCAGCCCCGCTACCGATGAAGCTCAAGCCAGGGTCCCCA +TCACCTGCTTCACGGCCATCCCTGCACTTGAGCTCAGGGGCTCAGGGTTGTGCTGACACG +CCTGCCTGTCCACCCCTCACCGACACCTCTGCAGTCCTCCTCAGGGCCCAACAGGGTCCC +CAGGACCCCGCTCCCATTCTGCCCTTGCTCCTGCCAACCTGTCTTCTCTATGCCCATGAC +TTTGGGACCCTCTGCCATAAGTACCAGCCCCCACATCTGTAGCCTCTGCAGGAACAAGCC +CTAATCTGGACTCATTGCAATGGCTCAGGTTCTCTTCCAGCCTGTCCATAGACCTCCAGG +CAGAGATCACAAAACTAACAGCTCTTTCTGCAAATCTGGACTCTCTCCTGCACCTTCGCC +ACCATCCAGGCCTCCCTGTCACTCCTCCTGGGGACCTCAGAGATCTCTTCATTTCACAGG +TGAAGACTTGGCTCAGAGAGGGCAGAGCCCCTGGGTCACACAGCCCGTCTCCAGTAGGGC +TGGGGAAAAGAACCCAGGGCGCTCTTGTTGGCAGTGTTTTAATGACTGGGGGGAGGGAGA +GAATGGCTAATGAGGCTCTAGGGATCAGCAGAGCCAGCTGCTGGAGGTCCCAGAGGACGG +GTGACTCAGCAGGAATGGGGGAGTCTGAGGGGTTGACAGAGTGCCCAATTCTCACCTCCT +CCTGCACCTTCCAGGGAGACGAGCTGCAGGGAGTGATTGAGTGGATGAACATGACTAGAG +GGACCCAGTCTTCTCCTCAGCAGCTGCTTGAGGACAGTATTAGCGGTCCCCTCAATCCCC +CCCCCAACACATGCACACATGCACTGATCTGCAGGGGCCAGACCCCAGTGGACCCGGGGA +CACAGCTTCCAGGGGCTGTCTTGCCATGTCCCTTCCCAGACCTGCCTGGATTCATGCCAG +GCCACCCTCCACAATTTGCTGAGGGAGGCCCCAGGGTCCAGGGCACCCAGAGTGCTAACT +AGCCCCTGGGGGTACAGGGAGAACAGCATGCTGTGTGGGGGATACTACTCTGCCCATACC +TAGGGCCCTGTGGCGGCCACAGGCAGCACTGCTTATGCAGAGGCACTGGGGTGAGAAGGA +GTAGGGTGCAGTAGTGAGGGCTCCACAGAGGGTGGAAGGGCTGCAGGGGAAGGCAGACCA +GCAGGGATGCCATGAGGGCCTCAGATGCCAGGCTGCAGAGGTGGAGGGGCTGGCCAGCTC +CTCTCCCCAGCCTCCTACTGTAAAATGGGAGAGGATCCCAGCCCTGCTGATTCATCCACT +CACTCCATGGCAAGACAGCCACTGACAGCTGTGTCCTGGGGCCCACCCGGGCCTGGCCTC +TGTGCAGGCTAGCACTCAGCATGCTTAGTGCCACACTCGGCCCAGGAGAAATGACCCAGC +CCTGGGCTCAGAGACCCCACAGTGGGTGGATGGAAAACAGAGTCATTTAAATACAACTGT +GAAGGGTGCTGTGGAGGAGGGGCTCGAGGTCCCCAGTGTGGGATGGGGTGGTCATGGAAC +ATTTCCCTATCTAAGTGGGAAAAGTGAGAGCCACCAGGAAAATGGCAGGGGTTGGGGGGA +GAGGAACTGTGCAAAGACCCAGAAGGATGAAGAAGACTGTTGGATTAGAGGAGGTTAAAG +ACACAGACAGGCTTGGACATGGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCCAA +GACAGGAGGATCACTTGAGGCCAGGAGTTCAAGACCAACCTGGGAAACACAGTGAGACCC +TATCACTACAAAATAAATAAATAAATAAAATAAAATAAATAAAAATAAAAATAAAAAAAG +CCGGGCATGGTGGTGCACGCTTGTAGTCCCATCTACTCAGCAGACTGAGGTGGGAGGATC +GCTTGAGCCCAGGAGATGGAGGCTGCAGTGAGCTATGATTGCACCACTGCACTCCAGCCT +GGGCAACACAGCAAGATCCTGTTTATAAAAACAAAACAAAACAAAAAACCCAGCTTGAGA +TGGCCAGGGATTTCCAGAGGCCCTAGGCGGGCCACCTTGGAGAGTAAGGGAGACCCTGAA +GAGCTCGCATGGAAGGACCAGCAGCTTGGGGCTGGAGATGCCACCTGGATGGATTCCTCC +AGCTGTGTGGACTAGGGGACAGGGTGAGGAGATAATGGCCTGGACTACATGGAAGGACGT +CGCCCTGAGCCATTGAAAAGGGCTGTGGGGTCCATGACCACTCCTGGCCTTCCCTTGACT +ATGATGGAGACAGTAGCTTCTAGTGTAAAATGTGAGCTCAGTTCCTGACTCGTGTCTAGC +GGGTGGTGCCTGGTCTTTGGCTGGCGATGCTCCATGTCACCTATGCCCAGAAACTGGGCC +ATGCCCTTTGTGAGGACCTGCCCACAACGTCGTTGTTCCCGCAGGGTCCCCGGGGAACCG +GGCTGCCCGGGACCCAGCCCTGCCAGGGTTCCCTGCGGCCCAGGCCGGAACCCCCAGGGC +TTTGGTCGTTCCCAAAGCCGTCCCGCGGGCCCCCTCCACGGTGCGTCGCCTGGGCCCCAG +CGATGGCTAGGCTGCAGCCTGGGGCTCCCGGCCATCCCGGGCGCTGGCGGCGGTGGAGGC +AGCAGGGCGGATGGCGACGCCCGGCCGCCCGTGGGCCCAGGCGCGTAGTGCGTACAGAGC +CAGCGAGGTGCTGCGGCGCGTCACGGGCCGCCGGCGGGACCCGGGGCCGCAATCCAATGG +GCCGGGCCGGGAAGACGCCCGAGCCCCGGGCCGGCTGGCTCGCCTGCTCCGCCAGCTCCG +GGCCGAAGCGGCTTCGCGGTCCGAGGTGCCGCGGCTGCTGAAGCTGGTGGAGCGTGCGGG +GGCCGGGGCGGCGGGCGCGGGCGAGAGGACCGGCGCGCACAGCCGCGGCTCCGTGTGCTC +AGTATGCGGGGAGCCCCGCGGCAGGGCCACCTACCCGGCGGGGGTCCTGGAGGTCAGCGA +GCGGCGGCTGCAGGAGGGACTCGCGGCAGTGCGCGAGGAGCTGGGCGCCGAGATTGAGGC +GCTGCGCGCGGAGCTTCGAGCGGAGCTGGACGCTCTGCGCGCGCTGCTGCCGCCGCCGCC +GCCGTCCCCGCCTGCCCGCCGCGAGCCCCGCGCCGTCCCCCGCGCCGCGCCCCGCGGCCC +GACCCTGCTGCGGACGCTCGGCACCGTGAGCGCCCTGGTCGCCGCCTCCAGGCCCGCAGA +CGACGCCCCGGACGGCCCAGCAGAATGCGGAGCGCACCGAGCCCCGGCCAGGAAGAACCA +CAAGAAGATGCCAGTGCCGCCTGGGGCCCCGCAAGGTGGCGGGGACTGAGGGCGGCCGCA +CAAGGGCAGCCTAGGCGAGGTGCGGAAGGCGTCGCGCTGGCTACTCTGGTACCCCCAGGA +CGGGGCAAGTGAGCAGATCGGTCCCCCTCGTGTAGCGTGGCTCTGAGTCAGAAGGGTGCC +CGGGTGCCGCCAGTTAGGGCTCCGGTACTGGAGGGAGGGGGTGGGCGCGGGACGGGCGGA +GGGTGGGCCACGTGCGTCTGGGGAGTGCGGATGGGAGCCGGGGTCCTTGCGAGAGACTGA +GTCCGGCTAGAGAACAGGGTGGAGCCCCTTTGGACCTTAGAGCTGGGCCTTTGGGCCTTG +GGTCTGGGTCAGCCTTTGGGCCAGGGCTGGGTCAGCCTCCAGGGGAACAGCCGAGACTCC +TCTTCCTTGGGGTGACTGCACGTTCTTTTTCTTTCAAGGTCTCAGTGACGAGGCAGGGTC +TCCACGCACTGGAATAGTGTCAAACCACACACAAGGGCACAGAACCCAGGGACACAGGGC +GGCTGAGACACGCAGCTGGTGATGGCCCTCAGGACGGTGTCAACCAGTATGTAATCACAT +TCCACACGGCACCAATTGGACAGCGGCCTCCGGTCAGCCTCACTCTGACTGCAACCCCCT +CTCACTGGACAGCATCACCTGGACACAGAGCCTCACCTAGTCTGTGTCCCGTGGACACAA +CTTTGCTTTGGACAGTGACACCAAGAATACTCACCCCTTTACTTAGCCAGACCACAGGCA +GCCACCGGGACACAAACATCTCTCCAGAGTCACCCTCCACACAGACTCCACAGCAGAACT +CCCAGGGAGGCACTGGGGCATGGCTTCAGAGCACCAGGCAGCGCTCCGCGGTGCCCAGCA +CCCGCTCAGCCAGGGCAGCCTGCAGCGGAGCCTGGAGCCAGCAGCTTCTCATCTCTTGGC +CTCGGGAAATGTAGCTGGAGTCATCATTTAGCAGAGCACGGTGTCCCTGGGTTGGCCACC +CAGCTTAGTTTTAAAATAAAATAATGTAATCTTGGCCTAATGGTGGAATTTCTGACTGCT +AGATGTTCTCCTTCCATCTGACCAGGGGATTCAGACACAATGTATAGGATGAGGGTGGGG +GCCCTTCTCCCTCTGAGAGGAACCTGGAGAGGCCCGGGTTCTGCTGAGGTAGGAGAGGGT +CACTCAGAGGGCTCCCACGGAACAGACAGGCACACTGAGGCCTCAGTGGGATTAGGGCAG +GAAATGCCCCTCTTGCAGTGGGGGACCGCGCCGGGTCACGCACCCACCCCCTCCAGGCCA +CATTAGGCTAAGGTCACTCTGTGCATTATTTCATTATTTCACACGGAGGGTCGCCCACGG +TTGACAGAACCACAAGCTGAGCCCCAGGCGCATCAGAGCGCAGTGCACACCTGCTAGGGT +GGGGGAAGAGCGGGGGGCTGCTGCGGGGACCGAGGCTCTGGGGGAGGCCAAGACTGTGTG +CCCCTTAAGAGGCTGCTGTGGGCCAGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTT +GGGAGGCCGAGGCGGGCGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGG +TGAAACCCCGTTTCTACTAAAAATACAAAAAATTAGCCGGGCGTGATGGCGGGCACCTGT +AGTCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATGGCATGAACCCGGGAGGCGGAGCTT +GCAGTGAGCTGATTCCACCACTGCACTCCAGCCTGGGCGACACAGCAAGACCCCGTCTCA +AAAAAAAAAAGAGGCTGCTGTGGGCGGAAGTTACCCGTGAGCGGTATGGGGTGAAAGTGG +GAGGGTCCAGCCTGGGGCACGGTGTGGAACAGGGTCATCTGAGGGTGCAGGGCAAAGAGA +TCAAAACGGGGTGGGAGAGGGGCAGCGCCTGCGGGAGGAGCCAGGCAAGGCCCGGGGAGG +GGTGTGGGGGGCGAGGGGGGACGGTGGCTCCACCTGCACCCACCGGATGGGTATGGGGTC +CTCGAAGACGGTCCAGAGCACCGTGGGCTCGCAGTCAGGCGGGGTCAGTGACCCAGCGAA +GCGACAGTAGCTCGAGGTGTTGGGCCGCATCGACGCCAGCAGGAAGGTGGACATCAGATT +CACTGAGAGCCCCAGGCGGCAGAGGGAGGGGGATTGGAACCAGCCTCCCCGTGCCCTGCT +GGTGCCATCCCCAGCAAACCCCCCGCCCCTGAGCCGGAAAACGACCCGGTGGCTCCTCAC +CTGGCTCAGGCACCTTCCTCAAGCCCGACACTATGGCGCAGAAGTTGGTGTTGCTACAGT +CCTGCTCCTGGGAGGGGCTCCAAGTGCAAGGGGTTCTGAGTGGGGGAGTCTCTGAAGAGG +GAACTCAGCTGGGATACTCTAAGCAGGAGGCTCCCAGTGGCAGGAGGCATCTGGCATCTG +GGAAATGGGGTGGAGGGTTCTCGCCGCCACTGGCCCTGCCGATTCAGGCATTCCCACCGC +CCTATCCTCCCACAGATCCTATAGACCCTAGAGATCCCCCCATGGACTCCACAGGCCTAG +CCTGTCCACTTCCAGGATCCCCATGGACCCCTTTTTTGGAAACATACATAGAATCACAGC +CCTGCCTTCACCGCCAGCAGCACCTCCAGCAGCAGGGCCTGCACTCGAGCCCATCACCGT +GGCGTGGTGCCTCCTCCATGCTCTGGTACTTTGTGTTACTGTGGACCACATGCATCTGGG +GGTAGGCACAGATGTGGGAGATACCCAGCTGCCCCCACCTTGCTCTCTCTCATCTCCGTG +GGAGCACTCCGGCCACCCCACCTTGTGCCCAAGTCTACCTCCATAGCCTGGCGCTGCCTG +TCCAGGCTGTGCTCTGAGTCTGCTCGCCTCGGGCCCCCCCCCCGCCCCCAGCTGATGTGC +AGCCGCAGTGCGCGGTAGACAGGCAACAGCAGCCCAGCCTCCCAAATCTCCAGGTGGTTC +TGAGGATCAGTGTCCATTCAGAGTAGCAATGAAGGGGAGGAGGCACACGACACCCTCTCT +ACCTCACCACCTGCCCACCTTGTGCCATTAGAGCACAGGGTACGAGCAGAGGCCCTTCCT +CAGGCCAGGCTGCTCCCCAGTGGGGCCCCATTGCCTCCATGGAATGGAACCCATGGTGTC +CATGGCTCCTGCTGGTCAGCAGAGCAGAGTTCCACAGACTGCAGGGGAAACCCTCCCCCA +CACACCCAGCATGGCATGGAAGAAGGCAGAGGGCAGCCCTCCTGGGGTGGGGCTCTGAGC +CCAGCTGTGTGTGGTCAGGGATCCTGGCCCTGCCAGAGACCCTGACTCCTGGTCCTCAGG +GGCCATGGCTGGTTGTGATTTGTAGGGTGGGACCTGGCACCTACACAGAGGGCGCATCTG +GACTGCTGTGTGAGGATGCTCCTTTCCTGTGAAGCTAGCCCCTGTGGAGGGTTTTTGGCC +CTGTAGGATGTCTCAGCTCCCGTGTGTAGGGGGTCCTGGTCCCTGTGTACACTCAGGAGC +TCTCATGCCCTGTGTGGGGCTCCTGCTCCTGCCTAGGAGGGGTCCTGACCACAGGGTGCT +GACCTGTGTGGCTGTCATTCTCCAGGGTCCAAGGGCCTGGAGGTGCTGAGTCATAGCCTC +GGAAGATGAAGGGCCCTAGGGTAGAGTTCCGCCGGACCCTGTGAAAGTCAATGTCGATGA +GGGACTGGCCTGGGCCCCCACAGGCAGGGGCCAGCTTCTTCCAGTGGGTGGGGACTGCTG +GAACAAGGACTGGGGCCTTTGGGGTGGGGGTGGGGTCAGGGAGGCAGCCTGGGGGTGCAG +CAGAGAGTGGCCCCCATACACCCAGAGGATAGCCCAGCCTCAGGAAACCCAGCTCCCTGC +TTCCCCCACACCTGAACAGGTCACAGCATCCCCCTTCTCTTGTACCCTGCATCTGGCCTA +TCCATCTAGTCTCCCGGTTGGCTGCCCCATGGTGAGATGAAGGCAAAAGAGTCTCTGGCA +CACAGTTGCACACCCACACTGGGACCTCTATCACCTGCACACACTTGGGCTGGCCACAGT +AACAGGCACACCAGCCTTGACTGACTCGGGACCGGCGATCACAGCAGGCCCAGACTCACA +CACAGGGGGACAGTCACGCACTTGCACCTGATGCCCAGTCAGTCTACTGGGGCAGACGAC +TACAGACAGAAACCTGCAAAGGCCACAGAGTGCAGGGTGCAGACTGTCTCTTGGCCACTC +TTGCACCACTGGCACATGCTCTGGTGGTCCCAGGACACAGACTGATCTCACTCAGCCTCA +CATGACACTCTGTCCTCACCACACTTGGGGTCCTGGGAGTCGTAGCACCAGGCACCTATA +GAGACAGTGGGCAGGAGGCTGAGCTGAGAGACCAGTCATCTGGGTCCTCTCCAAGCCCCA +TCCCATCAGCTGGGGTCCAAGAATGAAGTAGGCCAGGGGCTTAGGCCAGGGGAGGCCAGT +GAAGAACCTTCTCTCCTGTCACCCCTACCCCAGTATTTTCTCTCCCCAGTCCCTCCCCAC +TCCCAGTGGCAAGACCTAAGGGGTGGTGGAGCATGGCAAGAGGGTAGATTGGAGACCAGA +CGGGAAGAGTCCTGGTGCTCACTCTCAGAGTCTGCGCCACACCAGCTGCACTGCAAGGCG +AGCATGATGCCAGGGGCGTGCGTGGCTCAGTGGCAGCCGCAGCTAGGAAGGACTAGAAAG +TGGCTAGACCTCGGAGCCAAGATGCTGGCCTGGCTACTACATATTTATTCATTAGCTAGC +TGGGCTAGGGGCGTGGCTATTGAGGAGAGGATGGGAGGGGTCGGTGCCCAGGCAGGGCCA +CCAGGAAGAGCGCGAGCGCTCCAAACTCACCCAGGTGTGCTGTGGTCCCAGGGCTGCACT +GATGCCAGCAACAATCGCTGAGGTTAATCCTGCAGGGGAGGGGTGACCTATTATTATCCC +CACTTACAGTCCAGGAAACAGAGGCTGGGGACCAGAATGACACACCAAAGCCACCAGCAA +GAAAGGTCAAATAGTGAGGGCACCTTGAACCCCAGCGCTCATGACTTTCGGGGCAAGGAT +GGGAGGAAGGTGTAGGATGTCTCTTGCCCCACTGTGGGCCTGGAACTGCCACCCCATCCC +AAGACCACAGCCTCCCTCCTGCCCAGGCCCCCCTGCGGATCCAGGCCAAGACGGAGCCCT +GCAGGAGGTCAGAGGAGGGGACTTCTTTGCACCCTGCGCCTCTTTCCAATCGCCAGAGGG +CAGGGCCCTACCCACATTGGGGATGGGAAGTCAAATGCAGAAGTTAGGTGAAGTCAGTTC +TTGGATACTCCTGTACTGTCACCCTGGTCTTATCTCATGCCTTAGCCCAAGCTGTGCACA +CAATGGGGTCCTAGGTCCCCCTCACCTCCCAGATTCCACCTTCCCAGGGATGGGACCCCC +TAGAACCCTCGGGGGCCTGGGCAGTGGCCTTGCTGGCTCTTGCCTTCCTAGGAGCTGAGC +AGGAGCTCCACTCTCAGCAGGGCAGTTCACTGCAGCCTCTGCTTCCTCAGCTCAAGCCAT +CCTCCCACCTCAGCCTCCTGAGTAGCTGGGAATGCAGACACACACCACCACGCCTGGCAA +ATTTTTGTATTTTTAGTAGAAACAGGGTTTCACCATGTTGCCCAGGCTGGTCTTGAATGC +TTGAGCTCAAGCGATCTACCCACCTCGGGCTCCCAAAGTGTGGGAGCCAAGATGGGAACC +CAAGCATACGGCCCCAATGCTGAGGCTCTGAACTACTGACCTGCCCTCAGCACTCAGCCT +TGGGATCATGAGTCACTGTGCAAGGGAGTTCCAACATCTGCATGTATGTCTGGAATGATC +TGAGCCTGCAGAGTTCCTACACACTGGCCACATTATAGGGTGGTGTCTGTGGTCACACAG +CTCAGGTCAGGTATTTATTAGTACATGAATAGCTTAGCTGTGTCATAGTCTTTATGTGAA +AGGCACATAAAAGGCACTTTGGCAGGCTCAAAGTGTGGGGATTATAGGTGTCAGCCACCG +TGCCTAGCCCACTGGATGACTTATGATATCATATGTGACATTGTGACATCATGTGAGTCA +GGGATGTACCCCGTTCTCAGCTGCTATATGCTATGTTACAGTGACAGAATGGGAATGAAG +AATGTGTCCCACTCTCTCAGCTGTTGTATTATATCATACAAGGTGCAGTGACTAAGTGTG +TCAGCTGTGTCCTCATCCTACATAGCATATGAGAGTGTGTGACGGGAGATAGGATGCAGA +CCTGAGAAGCATTAAACACCTAGGCAATAAAGGTGCCAGCATCAGCTGAGAGCGCAGGTA +GACCTCAGTCACATTTGTTACTGTGTAACTAAAAATACAAAATTAGCTGGGTGAGGTGGT +GCACGCCTGTGGTCCCCACTGCTCAGGCGCCTGAGGCAGGAGAATTGCTTGAACCCAGGA +GGCAGAGGTTGCAGTGAGCTGAGATTACACCACTGCACTCCAGCCTTGGGGACAGAGTGA +GACTGTATCTCAAAAAAATAAAATAAAATAATCTAGGACAACCAAGAGAAGGACTCAGGC +TCACCTTACTCTATGTCACATGTGATATATAACACTTGAGGGGGACACACACTTGTCACT +TTGTATCACTATGTCCTATATGATAGCCAATGATAACCCAAGAGGGGGAGGCAGCCTTGT +TCACACCATGTCACTTATGATATTATATGACATCAAGGAGCCCAGACCTCAGTTGCATTA +TGTCACTATGGCACATACAGTATCATGCAACAACTGACAGAAGGGTTGTGGGCCTGATTC +ACAGTGTATGTGAGTCACAAATGTCACTTATGACACCATACAGCAGGTGAGAGGGGATAG +TCATTATTCACCTCGGGTCACATTGTCACACATGATACCATATGATAGCAGAGGGGGATG +CAGAACTGAGTTGTATTATGTCGCTATGTAACCTGTGATAGATGATAGCTGATGCGGGTG +AAGACCAAAGTTACATTTTGTACCTACATTATATGAAATATACAACATGCACGAGGGAGG +TGCAGACACAAATCACACTGTGTGCTGTATCCCATATATGTGTTTAATTTCTGTGTTTAA +GAGAGCTGGGTTCATGAATGAACGTAGTCACTGCACCTTGTATGATATAATACAACAGCT +GAGAGAGTGGGACACATTCCTCATTCCCATTCTGTCAGTGATTGACCACACCTTGTGTTA +ACTGTATGAGCCACTGTGTTTGGCCGTGTATAGCGCACGTGGCACATTGTTCACTGATAC +ATATTTGCTCCGTACTGAAGTGAGGCAGATGCATGAGTGATTGAGAATAATGCTACCTTG +GTGAAGCATTCCATTTAGAAGCATGAGGTTCAGGTACTACCATCTTCTGTGATACATATT +TAAATGTTAAGTTTAACAGTTAGGTCTACCATGAGTGAATGAGCCTTGCATTATCTTTGA +GAATCGATGTGTTTAGCAGGGTGTGGCTCATCTAAGATAGCCTTCTTTAATGCATGTTCA +CTATCTGTGTTTATTAGAAATAGGTTTGTGAATGACTTAGCATAGTGTTTTCCCTATGAA +ACCCTGTTTATTTATTTATTTATTTATTTTGAGATGGAGTTTTGCTCTTGCAACCCAGGC +TGGAGTGCAATGGTGTGGTCTCGGATCACTGCAACCTCTGCCTCCCAGGTTCAAGCGATT +CTCCTGCCTCAGCCTCCTGAGTAGCTGGGATTACGGGCGTCTGCCACCACACCCAGCTAA +TTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAGCTCC +TGACCTCAGGTGATCCTCCTGCCTCAGCCTCCCAAAGTGCTGGAATTACAGGCGTGAGCC +ACCACACCCGGCCAAAACCCTGTTTTAAGAAGTGTGTGGTGCAGGTACTACATTCTTTGT +TGTTCCAAGTTCAAACCCTGTGTTTAACAGAGATAGGTGCAAGATTGCCTAGGTGTAGCG +TAGACTCTTGGAAATACTGTGGTTAGCATTCTGTGTTTTCTGTAGCATAGTTTCAGTGAT +GCCTGTTTAGATAGTTTATCAGATCTAGGTACTAGTGTTATGTCAGCGAAACACTGTTTA +GCTGCACGCGGTGCATGTAATACTTTGTTCATTGATATTTGTTTAAATCAATCAGTGTTT +ACCAGAACTAGATATGTGAGTTACTGAGAATAGCCTAATCTCTGAAAATGTTTTCCAGCC +TATGGTGCATGTAATATACTCTTCTATGGTACCTCTTTCCAATGTTTTTATTGTGGCAAA +ATATACATAACAGATTATACCATTTTTAAGTGTAGAATTTAGTGGTAGTAAGTATAGAGA +GAACCTTAAAAGCAGCAAGAGAAAAGTGACTTCTCATGTGCAAGGGAGCCTCTAGAAGAT +TATCAGTGGATATTTCAGCAGAAACCCTGCAGGCCAGAAGGTGGTGGGATGATACATTGA +AAGTACTGAAAGAGTAAAGCCTGCCAACTGAGAATACTATATTTGACAAAACTGTCCTTC +AAAAGTGAAGGAGAAATTAAGACATTCCCAGATAAATAAAAGCTGAAGGAGTTTATTACC +ACTAGACCTGACCAATAAGAAATAATAAAGGGGCCAGGCACAGTGGCTCATGTCTGTAAT +CCCAGCACTTTCGGGGGCCGAGGCAGGTGGATCACCTGAGGTCAGGTGTTCAAGACCAGC +TTGACCAACATGGCGAAACCCCATCTCTAATAAAAATACAAAAATTAGCTGGGTGTGGTG +GCACGCACCTGTAATCCTAGCTACTCAGGAGGCTGAGGCAGGAGAATCACTTGTACCCGG +GAGGTAGAGGTTGCAGTGAGCTGAGATCACACCACTGCACTCCAGTCTGGGCAACGGAGC +GAGACTTCATCTCAAAAAAAAAAAAAGAAACAATAAAGGGAGTCCTTCAAGTTGATATGA +AGGGATACTAGACAGTTGCTCAGCCTGATGAAAATATAAAGGTAAATACATAAAGATTAA +AACCTGTATTATTGTAATTTTTGCTAATAACTCAATTTTTAATACTTTAATGGAATTTAA +AGGACAAAAGCATAATAATTATAAATCTATGTTAATGGATACAAAGTATATAAAGTTTAA +TTTGTGACATCAGTAACTTAATAGTGGGGCAAAGATGTAAAGAGTAGAGGTTTTGTATGT +GATTGAAGTTATCTGTTTAAAATAATAAGATAACTTTAAGATGTTCCATGTAATTCCCAT +AGTAACCACAAAGAAAATACCTACAGAATATATGCAAAAGGAAATGAGAAGGAAATCAGA +GCATAGCACTACAAAAAATCAACTATAACACAAAGGAAGACAATAAGGGAGGAAAGAAGG +ACAAAAAGGCTCTAAACATACAGAAAACAATGACCAAAATGGTGGTAGTAAGTCCTTCCT +AATCAATAATTTCATTAAATGTGCATAGGTTAAACTCCCAATAAAAAGACATAAATCGGC +TGATTGGATTTAAAAAAACGGGATTCAACTATATATGCTGTCTACAAGAGACTCATTTGA +AATCCAATACAAATGGGTTGAAAAATGAGAGAATGGAGAATATTCCGTGCAAAAAGTAAC +CAAAACAGATCAGGAATGGCTGTACAAGTTATGGCTGTACAAGTTGTTGGGTTTTATGTT +ACTGAAGAATGAACAGAGATGAGTAAGTGGAGGTGTTATGTAAAGGCATACTGTACTCAA +AATCTGAAGACCTGCAGCAGATTTAAATTCCAGCTCTTATTATAACTTTTTAAAAGATTG +TGAAAATATCAAAATATAGATGAATCAAGTTTTAATATACTGTATGATGGGTGGATGAGG +CTGTCCATTGTACCATTTGTTTGAATTCTCAGGCATGGTTTGGCAGTGCAAGAACTCTGT +AACGTTAACAAATTCAATAAAAAGTAAATATATGGAAAAAAAAGTATAGACAAAATAGAT +TTTAAGTAAAAAACTATTACAAGAGAGGGTTCTGGGAAGAAAGTGGAGTAGGAAGCACTG +GGAATTCATCTCCCCACCTAGAAAATAATCACACTGGCAGAATCTGCCTGATATAACTAT +TTTGGAACTCTAGACTCTATCAAAGGAGGCTTGTAACCTCCAAATGAAGGCTTAAACTAT +AATTTTTACTTAATTTTGGTCAATTTCAGCTCTTAGCTCAGCAGTGGCTACCCAATCCCC +ATGCCCCAGCTTCACGGCAAGAAGCTTTGCATGTGTTCCTGAAGCAGCTTGTACCAAGCT +TGTGGGAACAATCATGGGCAATAAGCACTCTGTCCTCCAAGTGTTAGCATCTGCGTTCTG +GTTGTTGATTGCTACTTTTGATTATGGAAGGGCAAACACAGAGGCTGGCAGCCATTATTG +CTCACAACTCCCCACTCCACTGCTGCAAGCCCTTACCAGACTGAAGCAACTTCTAGGAGA +TAGAAAAGGCCAGAACCCCATTTCCCTTCCCCTTCATTGTTCTCTTTTCCTTTTTTGGGA +GCCAAACATTAAAGACTAGGACACTCAAAAGCAATGGCATACCCAGAGGAAATTAAAGTT +ACCACACATCCTTGGAGAGAGGAGTGTGTGCCCAGGGAAAGGAGCAGCTTCAGACCTGAG +AAGACCTCAAGCTTACAACTCAGGTTGATCCTCAGCATGGAGACAACCTACAACAATGTA +AAACATAACAAAACCCCAAAACAGCAAACCCTGAGGAAGAGGAGAGTCTCATCTCCAGAG +TTACTGCATTATTATATTCAAGTGTCCAGTTTTCAATACAAAACACAAGGCATACAAAAA +ACAAGAAAATATGGTATTTCAAAGGAAAAACAACAACAGAAACTGTTCCAGAGAAAGACC +AGATGGTAACCTACTACACAAATACTTTAAAACAACTTTCTTAAAGATGGTCAAAGAACT +GAAGGAAGATGTGGAGAAAGTCAGTAAAATTATGTGTAAACAAAATGGGAATATCAATAA +AGAGATAGAAAACCTAAAAAGAAAAAAAATAAATAAATTCTGGAACTAAAAGTGTAGTGA +TTGAAATAAAAGTTCACCAGAGGGATTCAAAAGCAGATTTGAGCAACAGAAGAAAGATTC +AGTGAATTTGAAGATGGGACACTTGAAATGATCAAGTCAGAGGAACAGAAATAAAGATGA +TTGTTGAATAGCAAGCAGACATTGTGGAAGTCCCAAAAGAAGAGAGGGAAAGGGGCAGAG +AGATCATTTGAAGAAATAATGGCTGAGGCTGAACACGGTGACTCACACCTGTAATCCCAG +CACTTTGGGAGGCCAAGGTGGGTGGATCACAAGGTTAGGAGTTCGAGACCAGCCTGGCCA +ATATGGTGAAACCCCGTCTCTACTAAAAACACAAAATTAGCCAGGTGTGGTGGTGCATGC +CTGTAATCCCAGCTACTTGGGAGGATGAGGCAAGAGAATCGCTTGAACCCAGGAGGCAGA +TGTTGCAGTGAGCCGAGATTGTGCCATTGCCCTCCAGCCTGGGCAACAAGAGTGAAACTC +CGTTTAAAAAAAAAAAAATTAGCTGGGCGTGGTGGCACGTACCTGTAGTCCCAGCTACTC +AGGAGGCTGAGGCAGAAGAATCGCTTGAATCCAGGAGGCAGAGGTTGCAGTGAGCCAAGA +TCACACCACTGTACTCTAGCCTGGGCAACAGAGCGAGACTCTGTCTCAAAAAAGAAAAAA +AAAAAGCTGAACACTTCCCAAATTTGATGAAAGACATGAAAATAAATATCCAGAAAACTC +AATGGACTCCAAGTAGGATGAAAAAAAAAAAAAAGACTCATACTGAGACATTATAATTAG +CCAGTAGGGCCTCTTGAAAGCACCAAGAGAGAAGCAACTAGTCACATGCTAGGAATATAT +AATAGGATTATAAGTAGATTTCTCATCAGACACTTTGGAGAACAGAAGACAATGGGATGA +CATATGTAAAGGGCTAAAAGAAAAACAACCACTACCTCTCAACCAAGAATCCTATATCCA +GCAAAACTGTCCTTCAAAAGTGAGGAAGAAATTGGGAAATCCCCAAATAAACCAAAGTTG +AGAAGTTTGCTACCATTAGACCTGCCCTGCAAGAAATCTTAAAGAGAATCATGCAGGTTG +AAAAGAAAGAACACTAGATAGTAACTCAAAGCCATATGAAGAAATAAAGATGCCAGTAAA +AGTAAATATATGGGAAAATATTAAATCTAGTATTATCGTAACTTTGGTTTAAAACTCCAT +GTTTTGCTTTCTACATAATTTAATAGACAAATGCATTAAAAACAATTATTAGTTTATGTT +TATGGACACACAATGTACAAAAATGTAATTTTGTGACATTGATAACTGAAAGAGGAGTGG +CAAAACTGTGAGGAGAGTTTTTGCATATTATTGAAATATAGCTGGTATGAATTCAAGTTA +GAGTGCTATAACTTTAGAATGTTAAGTGTAATCCCTATGGTAACCACAAATAAAACATTA +TATAACATAAAAAAGTAAATGAGAAGGGAATTAAAACACTTCGCTACAAAAAATCAACTA +AATACAAATGAGATCATGCAGGAAATGGACAAAAATGCTGTAAGGCATATAGAAAATGTA +TAGCAAAATGCCAGAAGTAAGTCCCCCCTTATCAGTAATTACTTTATTACTTTTTAAACC +ATTTTGTTGAGGAATGATTTACATAAAAACTGTACATATTTAACGTACACATCTTGATAA +ATTTACACCATAAAACCATTATCATCAAGCCTATAAACATATCCATCACCTTTTAAAATT +TCCTTCTGCCTCTTTATTATTATTATTGTATAAAAAAAAGTTTTTAATGGCCAGGTGCGG +TGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCTGAGGCAGGCGGATCACCTGAGATC +AGGAGTTGGAGAGCAGCCTGGCTAAGATGGCAAAACCCCATCTCTACTATAAATACAAAA +ATTAGCTGGGCGTGGTGGCGGGTACCTATAATCTGAGCAAAGTACTGGGAGGCTGAGGTG +GGAGAATCTCTTGAACCTGGGAGGCGGAGGTTGCAGTGAGCCGAGACAGCACCATTGCAC +TCCAGCCTGAGCAACAAGAGTGAAAATCTGTCTAAAAAAAAATAATAATAATAAAAGTTT +TTAATTAAACAATTTAAGAATATAGTAACATTTTCTGTGGGTACTATGCTGTATAGCTCT +CCAGAACTTACTTATCTTGCATAACTGAAATTTGTACACTTTAACCATCAACTTCCCATT +TCCTTCTCTTCCCCAGCTCCCAGCAACCACCATTCTGTTCTCTTCTTCTGAGTTTGACGT +CTTTAGGTTCCACACATAAGTGAGATTGTACAATATTTCACTTTCTGTGTCTGGCTTATT +TTACTTAACATAATGCCCTCCAGTCCATCTATGCATAGAAATGCAACTGATTTTTGGATG +TTGACTCTGTATCTTGCTACTTTATTGAATTTATTACTTCTAACAGTCTTTTAGTGAAGT +CTTTACAGTTTTCTATACATAAAAATATGTCATCTATGGAGACCATTTTACTTCCATTCT +TATTTCTTTACTTGCTTAATTGTTCTGGCTAGGACTTCCAGTCCTATTTTGAGGAGAAAT +GGTGAGAGTAGGAATTCTTGTCTTGTTCTTCATCTTCGAGGAAAAACGTTCAGTCTTTCA +CTGTTGAGTATGTTACATGTGGTCTTCATTATGTTGAGGTACATTCCTTCTGCACCTAAT +TTGCTCAGTTTGTTGTTTTTTTTAATCATGAAAGGATGTTGAATTTTATCAAGTGCTTTT +TAATAATAAAAATAAAGGATTTTTATCGTTTATTGTGTTGATGTGGTGTATCACATTTAG +TGACTCTTGTATGTTAAAGCATCCTTGCATGCCAGAGATAAATCCCACTTGATCCTGGTG +AATAATTCTTTTTTTGTTTTTTATTTATTTATTATGTATTTATTTTTTTGAGATGGAGTC +TCGCTCTGTCATCCAGGCCTGAGTGCAGTGGCGCGATCTCGGCTCACTACAAGCTCCGCC +TCCCAAGTTCACGCCATTCTCCTGCCTCAGCCTCCCTAGTAGCTGGGACCACAGGCACCC +GCCAGCATGCTCAGCTAATTTTTTTTTTTTTTGGATTTTTGGTAGAGACTGGATTCATGG +TGAATAATTCTTTTAATGCACTGTTGAATTTGGTTTACTAGTATTTCATTGAGGATTTTT +GCATCTATGTTAACCAGGGATATTGACCTATAGTTTTCTTATTTTGTAGTGTTCTTATCT +GGCATTGATATCAGGATAATGCTGGCCTCTTAGAATGAGTTTCAATGTGTTCCCTTTTCT +TCAATTTTTTTGGAAGAGTTTGAGAAGGATTGTTATTAATTCTTTAAATGTTTGTTGAAA +TTGACCAGTAAAACCATTTGGTCCTGGGATTTTCTTTGTTGGGAGATTTTTCATTACTGG +TTTAATCTCTACTTATTTTTTCTGTTTTTTTTTTTTTTTTTAATTATACTTTAAGTTCTG +GGGTAGCCGTGCAGAAAGTGCAGGTTTATCACTTAGGTATACATGTGCCATGGTGGTTTG +CTGCACCCATCAACCCATCATTTACATTAGGTACTTCTCCTAATACTATCTCTCCCCTTG +CCCCCATCCCCCAACAGGCCTCAGTGTGTGATATTCCCTGCCCTGTGTCCAAGTGTTCTG +ATTGTTCAACTCCCACTTATGAGTGAGAACATGCGGAGTTTGGTTTTCCGTTCCTGTAGT +TTTCAGATGTGCTCTGGCCTCCAAGGACCATGAAGCCAGGCGGTGGTGGGGGGGTGTCCT +CTGTATAAAAAGTGCTGTCCCAGGAACTTCCTGACGGACACTTTGGGGCATGTGAGCGAT +TCCTGGGGAGGGCACCTCGGCCTTCCTAAGGCTACCCCTGCAGCCAGTGCTGGCCATTCT +CACCAGCAACCAACCAAAAAACATCAGAGTCCCTGTAAACCTGGTTGTGATGATAAAAAC +CAAATGTTTTCTAATCTAAGACTTGTATGCAGAACACAGAAAACTGCAGTTAAGGAACAC +CCTACAAAATTGACCAACAGTATTTTCCAAAAACATTTTTCATCACTTCAAATAATGTAC +AAAATCTACAAAAATCATATTTACCAGGACACATCTGTTAAATAAAAGCATTGTTTCGTG +TTGGTATACATATACACAAGACTATGTATAACAGACTGTTTCCCCTCCCTGCAACCACAG +AACCATCACACACAGGCACAGATACACGTCGGCTATGCCGCTTTCCACGAATGCATGGAA +CCCAGGACGCGAACCCACAGCTCGAGGTCTTATACCTTCACTACTGAGCTGCCGCCACTG +CAGCAGCAACACCTCTGCGGAGGTGTCGCTGAAGTCACTGGTGTCCCTGCCCAAGGTGTC +CTGGTCCTGGTCAACTCTACTGATTGACCCTTCGTGGATACCTCAGGTCTAAAATCCTTT +CCTCCGAGCCAGAGCTCTTCCTGTTGTGCAAACTCAGCCCCGTCTGTACCTTCCTGCTTG +GCCGGTGATCGCAGTCCTTCTTCGCCAGCAAGTGTGGGCTTCCAAAGACAGGGCTGGGCC +TGGCCTGGGACTCCCTGAAGGCCGAGAAGGACAGGCCCTGCAGAGGCAGCCCCAGGTAGG +GGCTGAAGAGGCCGGTCTCCCTGCCCCCCCAGGAGACACCCTTTCCAAAGTGGAAGAGCT +GGGTGGACAGCAGCGGGGAGAAGCCCGCAAGGGGCAGGCTGGGCAGGCCCCGGGCGGGGC +CCTCGGACCCCTTGTCTCTCTCTCCCAATGCCGCCCCCTCCACGCCGCATCTCTACCTTT +GGAGCGCAGTGCCCATGGGCTGGGCAGCCGACTGTGGTGGGCAAAGTCACTCCAGGGCGG +GGCGCGGTTGGCCTGGGCTCCAGCATCCCTCTCAGCTCCCGGGCTGGGGGTCAGGAAGCT +CCGGTTCCTGCAGTCCACGTGAAAGGTCCCCCTGGCTCCTCCTGCCTCGGTGCCCGCTGT +GGCACAGCTGGGCCGCTGACCAAAAGCACTTTTGGCAGCAGGCTCAGCCCCTCGTGGGCT +CTGGTCTTTGTAAACGTAAACATTTCTCTTGCTCACTGAGAGACCTTGGCGGAGACACTG +CCGCTGGTCCTCCTGGGTGGCGCATGATCCCCTCCGCCGGATCAGTGGGGAGCCCCTTCC +CACCCTGGCTGAGCAGGCACGCTGCGCCCTGTGCTCCTGCGCTCCCTGCGCCTCGATGCC +CTTTAGCCAAATGTGGGACCCCCCGGCCCTCTGGCTCCGTGTGCACATGCCAGGCAGTGG +GGCCGGCTCCTTCCTGCAAGGTACCTCTGGCCTGGCTGGGCCCCCTGTCCCGAGAGCGGT +GGGGCCCTCTGCCTGAACTTCTGAACTGCTCACCGACTCCTTGGCCTTTTCCACCAAAAA +CTTCCTAATCTCCAGTTCGATGCTATCGTCGCTGTCCACTGAACTGCTGTTGTCAGACAA +GGAGCCAGGGCTGGGAGCTGGGCCCTGGGACTCTCTGGGGAATAGATTCTCTTCGGAGGC +GGAGGCAGAAGCGGGTCTCCTCACCAGGAAGGCCGGGGCTTCGTCCTCTCGGCCCTGCTG +CCAAAGACACTGGGGGGTTTCTTCCTGGAGCCCTCGCAGCTGTCTCTCTTGGACTTAGGC +AGCTCTTCAGCACCTGCAGGTGCCTGTTTTTCCACTCTCTCAGGAGCCTGCCCAGCTGCT +CCTGGAAGTGCGTCTGGGAGGTGCTGAACCTGACCTTCTTCCTGCACACAGCCCTGGGCT +CCCTGGACCTCTTCTTGAGCTTTCACTTGGACCTTAACAAGTCCTTGATGGCTGTGTCCA +GGTCCTTGTCACTGTCCAGGGAACTGCTTTTGTCTTCGGAGCTCTTCTTCTTGTCTAGGT +GCCTTGCCTCGTCTGTCTTACCCTGGCCCTGTGACGTGCGAGTGTCACCGGGCACCCTAG +CGGCGCCCTCTCCTCCCGGGGCCTCGCTGGCTGTGCCCTGGATGGAAAGGTCCCGCCCCT +CATGGCCCGGCCCGGCTCTCCCCTGGCTGCGGTCGGCATCCTGGCCACCCTCTTTCCCCA +CCACCCGCATGTTCTTGGGAGTGGATGGCCTCACTTGGCAGCCGCCTCTATGCTTCCTTT +TGCAGCCAGGAGTGGGTCCAGTGTTTTAGAGAGAGGGGCCTTGGGGCTGCCGGTCTGGCT +GCTGAGGCCAGGTGGTGAAAGTGGGCCCTGGGCAGCCTGAGGGCAGCTCTCACCTCTGGC +CAGCAAACTTCTAGACTGCACCTTGAGGGCCAAAAACGTCCAGATTTCCTGCTCAATGCT +GTCGTCGCTGTCCACGGAGCTACTGTCACCATCAGAGCGGGAAGGCACGTTGGGGGAGTA +GAAGAGTGGGCTTGCGGACAGGGACCCATCGCTGCCCTCCATAGGGCCGGCAGGATCGTC +TTGAAAATGTCCAGGACTGCTTCTACACACATCAGCTCAGCGGAGGTGTCTGCCTGGCAA +GAGGACCATTCCACAAACTTGCTCCTGGAAGCCGGGCTCGTTGGAGGTGGAGCTTTGGTT +TCCTTTGGGATCTTGGGGGAATGGTCAGCGTCCAGATCCCCTGGACCAGGGTCCGTGGTC +TTGGTGGGCACTGGCTTCTTCTTGCTGGGTGTTTTCCTGTGGGTCTCTGGCAAGGCACTT +TTTGTGGCGCTGCTTGTGCTGTGTGCGGGAGGGGCAGGTGCTCTTTCCTCTTGGAGCTGG +ACCCTCTGGGGCGGGTCCCCGTCGGCCTCCTTGCGTGTTTTCTGCACCTGGTACAGCTGG +ATGGCCTCCTCAATGCCGTCGTCGCTGCTGGAGTCGGACGCCTCGGGCGCCTGTACGGCG +CTCGTGACTCGCTTTCCCCTCCTTGCGGTGCTGGCGTTCCTTTTAATCCCACTTTTATTC +TGTACTGCTTCTGAAGGGCGGTGGGGGTTGCTGGCTTTGTGCTGCCCTCCTTCTCCTGCG +TGGTCGTGACCTTGGACCTGAGGCTTCTGGGCTGCACGTTTGTCTTTGCTAACCGGGGGA +GGTCTGCAGAAGGCGAACTCCTTCTGGACGCCCATCAGGCCCTGCCGGTGCACCACCTTT +GTAGCCGGCTCTTGGTGGGATTTCGAGAGTGACTTCGCCGAATTTTCATGTGTGTCTGGT +TTCTTCTCCACTGACCCATCACATTTTTGGTTCTCATGCTGTCTTTTCTCATTCAGAAAC +TGTTCGATTTCTGCCCTGATGCTCTGCTCAAAGGAGTCTGCTCTGCTCATGCTGACTGGG +GAGGCAGAGCCCTGGTCCTTGCTGGATCCCACCTGGCTGCCAGGGCCACACCACCTGAGC +CAGGTACAAGTTTTGGGGAACACAGGGCAGTTGGGCACTGCTGTGAGCCAGTTCCGGCTT +ACATCTACTGCCTCCGCCTGCAGCCCTGGAAGGCTGTGCATGGCTGGGCCCCGCTGGCCC +CGGGCTGTGCGGCTCCACTCTTTACCTTCAGGTACTCCTGGATGGCCTCCTCAATGTCCC +GGTCCACGGAATCGTCACTGTCTGAATCTAGCACCAATGGGCCAAAGTCTGCAGTTTCCT +CCTCCCCCACGGGGTCAAAGTCAGCAACAAGACCACAGGCAGCCAACGCAGGCAGCTCCT +TGTGCATGGTGGGCTTGGCAGCAGGCCTGGCGTCGTGGCATCCCTCTGCCCCCTCTGCGC +AGTGCGCTCATCGCTGGTGCCCCTAGCAGCCCTGTCGCTCTGCAGCGTGCTGATGAGCAT +CTGCACCCGGGTGCTCACCGACATGCTCTCCACGCCCTTGTCAGCCTCCGAGAAGCACCC +GGGGAACCTAAAGCTCCCTGGTGGGACAGAGGCCACCCATTTGCGCTGGAGAGCAACCAC +TGGAGGAGCATTCATGAGAAACATTCTGGCAGATGGGGAGCGACGCGCAGAGGGGCGACA +CTTTATTTCTCTGCAGGCTTCACATCCTCCAAAGATTGGCAAGCAGTACCCGTGAAATAA +CTTTAAACCTGCAAATGCTTCTTTGCAGGTTTAAAAGGATGACTATAAACTATGACGTCA +TGCCTAGATTCATTCTTGACCCAACCAACAAGCTCTTGACATTCTCTGAGTCCAGGTTGA +CTGTGATGAAAGGCAGCTAGTGTTCCCAAATGGCCCAGGGATCAGGTCTTCATCGCTCCA +CTCAGAGGGAAGCATCCTCTCTCTGCTTTTTAAATAGACTTTTGACTGGGGCTCCAGCAG +CGCGGGGCGCGCAGACCTGGAGTTGCATGGAGGCCAGAGCCACGACACCCGCCTGGGGAA +CGGAGCAGCCCCAGGCGCTGATCCCCGTCCACCTGCCCCACGGAGCCCTCGCCGCCCGCT +TGCCACTGCCTGCATGGCCCTCCTGTCCCCGGCCCCCCAGCCCTCCTTTCCCCAGCTCCC +CCACCCTCCTGTCCCCGGCACCCCAGCTTCCCAGCCCCCGAAACCGCCCCCCCACCTCGA +CCCGGCCCATGCCGCAAGTCGCCCGCTGCGCGGACCCGGCCTCCGCCCGCCTCCTGCGTC +CTGGGGGAGGCGGCTGCCGGGGGTGGTGGGGGAGGGAGAGGGGGAAGAGGCCGCCCTCCG +CCCGGGTGCGGGGAGGGGGCGCAGGGGTGTCCGGCCAGGCCCCCCGCCTCCCCGCCTCCC +CGCAGCAGCTGCCCCGCTCCCGGGCCGCCTAATACTTTTACATTTTAACTTTTATACTAC +AGTGAAAAGTGATTTACACACCACCACTGCAATATTACAGTGTTATGAATGTGACTATAT +ACTTACCTTTCCCTGTGAACTTTTTTTTTTGAGACAGAGTCTCGCTCTGTCGCCCAGGCT +GGAGGGCAGTGTCCATGATCTCGGCTCGCTGCAAGCTCTGCCTCCCGGGTTCAAGTCATT +CTCCTGCCTCGGCCTCCCGAGTAGCTGGGACTACAGGCACCCGCCACCACGCCTGGCTAA +TTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGCCAGCATGATCCCCCTCTT +CTGACCTTGTGATCCACCCGCCTTGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCA +CTGCGCCCGGCCTACCTGTGAACTTAATATCTTAATGTTTTAATGTTGCTAATCAGAATC +CTTTTATTTCAACTTGAAAAACTGCCTTATAAGGCAGGTGCAGTGGTGATGAACTCCCTT +AGAATTTTTTTGGTGGGAGTCTGGGAAAGACCTTATCATCTCTTTTTCATTTCTGAAGGA +CAGCTTTACAAGTTGTGGTCTTCCTGATTGGCAGTTTTTTTCTTCCAATACATTGAATAT +AGCATCCTATTCTCTCCTGGCTTATAAGGTTGCTGCTGAGAAATCCACTGATAGCCTTAT +TGAAGTTTCCTTGTATGTGATGAATTCCTTTCTTCTTGCTGCTTTTGAAAGTCTCTGTCT +TTGACTTTTGATATTTTAATTATAATACATCTTGGTATTATGGTCTTTGGGCTGGCCTTT +TTTGGGGCCTCTGAACTTCATGTGTCTGGAAGCCCACTTGCCTCTAAGAATTTGGAAAGT +TTTTACCCATTATGTCTTCAAATATACTTTCAGGCCTTTTCTATCTTTTTTACTTCTAGA +AAGTCCATAATATGTTTGACCCACTTCATGGTGGTGTCCTATAAATCCCAAAGGTTTTTA +CTTTATAAACTTTTTTTTCTTTCTGGTCTTCTGACGGGATATTTCAAATGTCCTGTCTTT +AATTTCACAGATTCTTTCTTCTGTTTGATCAAGTCTGCAATTGAAATTCTCTATTGCATT +TTCATTTCATTCATTTATTTATTTTTATATATTTTTGAGACAGAGTCTGTGTCACCCAGG +CTTGAATGCAGTGGTGCCATCTTGGCTCACTCCAACTTCCACCTCCCGGTTCAAGCGATT +CTCCTGCCTCAGCCTCCCTAGTAGCTAGGATTACAGGCATATGCCACCATGCCTGGCTAA +TTTTTGTATTTTTAATACAGATGGGGTTTTGGCATGTTGGCCAGGCTGGTCTTGAACTCT +TGCCATCAAGTGATCTGCCTGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTCCGCC +ACGGCACCCAGCTTGCATTTTCATTTTATTCATTGTATTCTTCAGTTCTAGAATTTCTGT +TTGGTTCTTATTATTTCTGTATCTTTATTGAACTTTTAGTTTTGTTCATCTACTATTTTC +TTGATATTATTGAGTTGATATATACATTCTAGTAAATTTCACTGAGCTATCTTAATTATT +TTGAATTGTCAGGCAATTTGTAGATCTCTATTTTTGGGGGGTTGATTACTGGAGATTTAT +GAGTTTATTTTGGTAGTGTCATATTTGCTGATTCTTCATGATCTACAAACTTTCATTAAT +GTCTATGAAGAAGCAAATACCTCTTCTTTTCTTTTTTTTTTTTTTTTGAGACAGAGTCTT +GCTCTGTCACCCAGCTGGAGTGCAGTGGCGTGATCTCAGCTCACTGTAACCTCCACCTCC +CAGGTTCAAATGATTCTCCTGCCTCAGCCTCCCAAGCAGCTGGGATCACAGGCATGTGCC +ACCACGCCTGGCCAATTTTTTTGTATTTTTTGTAGAGACAGAGTTTCACCGTGTTGTCCA +GGCTGGTCTCAAACTCCTGGCCTCAAGTGGTCTGCCCGCCTTGGCCTCCCAAAGTGCTGG +GATTACAGTTGTGAGCCACCATGCCCAATCTCTTTCTGTCTTTATAGATTGGTTTCAGCA +GGTACAAACCTTTTCCTGCTGGATCCCTTGACTGGATCACAGTCAAGTGGGCCTGGAGCC +ATATCACATGGCTGCTGCCTGGTCTGCAGCTGAATCTCTGATTGGCAGGCCGCTATCAAG +GCATAGGTTGGTGATGCAGTTTCTGCTGGATCCTCAGGAGAACTGGACTGCCTCTGATAC +CCTGATTGAACAGGACTGGAGCCAGGTCATGGGGCCACTTCTAGTTCTACAGTCAAGTCT +TCAGATATCAGGCCTATTACCAAGGGCATGGACTGGTATAGCTCCCTGTGGGTCCCAGAT +TGAGCTCCTGCTGGCTTACTAGGTAGGTCCATGGGAAGACAGGACTGCCTCCAGACCACA +GTAGAGCAGGGCTAGAGCCAAGTCACAGGACAGCTTTGGTGACCACATTTGGGTTCAAGA +TTGGTGGTCCTCTTATTAGGAGAATGGATGGTATGTCTTTCACCAGGTCCCAGGATGGGC +TGGACTGTGCCCAGACTGTGGCAAAGCAAGACTGGAATGGAGTCACAGGGCTACTTTAGT +GTCCATAGCTGAGACTGAGATCAGCAGGCCTGTTACCAAGGGCATGTAAAGGCATCACTG +AATTCCTGGGCAGGCACGACTGACTGTGGTAGAGTGGGGCTGAAGCCAGGTCAGGGCTGC +TTTAGTTTCTGCAGTCAGGACCATGGTTAGAAGGCCTGTTACTGGGGGCATAAATGGTCA +TGGTTCCTCCTAGGTGCTTAGTGGATGGGGCTAGTTGCAAGACCATGATCTAGTGGAGCT +GGACCCAAGTCCATAGGAGGACAAAGCTGCTTTCAGTCTGCAACTGGGAACCTGTCACTG +GTGTGTGGACCTGCCTTCTCAAAGCAGCTCTCCTTGGTTTTGGGCTTTGCTAGAGTTTTG +CCACCTCCTGCCTGGATATTAAAACTCTTGCAAAGGCAGTTTTGTCCATGAATGGCTGCC +AGATCATTGTTTGTGTGGGGAGAGGTGAGTGGAGGGCCTCCTGTTCTGCCATCTTGCTGA +TGTCACCCTAAGATGATTATTTGAATTCTTTGTCAGGCAATTTGTAGATCTTCATGTCTT +TGGAGTCAGCCACTGGAGTTTCATTTTGTTTCTTTGGTGGTGTCATATTTTCTCATGCTT +CCTGTTCTTTGAAGACTTAGATTGCTTTCTTCATGTTTGAAGAAGGAGTCATCTTTTCCA +CTCTTTACTAACTTCAGGAGAGAAAGACCATCAATTAGCTAAGCTATAGATTCTGGGGGT +CTCTCAGTCCTTTTCTGTGGGTGGTCCTTCCCTTTCAAGGGGGATGTCTTAGGATTTTGT +CCCTTGTCTTCATTTCACAAATGAATAAAACAACCAGACCAGACATAAGTAAGGAAATAC +AGCACTTGAACAACACCTGAAAAAACAACTAGACCTAACAGACATACACAGGATATTCTA +CCCAACAACATAATACACATACTTCTCAAGTATACATGGGACATTTTCAGGATAGACCAC +ATAACACATCACAAATTAATTCTCAATAGGGGCTGGGTGCAGTGGCTCACATCTGTAATC +ACAGAGTAATTTGGGAGGCTGAGGCGGGTGGATTGCTTGAAGCCAGGAGCTTGACATCAG +CCTGGCCAACATGGTGAAACCCCATCTCTACTAAAAATACAAAAATTAGCTGGGCGTGGT +GGTGTGTGCCTGTGATCCCAGCTTCTTGGGAGGCTGAAGCGTGAGAATTGCTTAGGAGCC +CAGGAGGTTGAAGCTGTAGTGAGCAGAGATTGTACCACTGTACTCCAGCCTGTACTTCAT +GACAAAGAAAATGTACCATTGTACCACTGACAGAATGAGACCCTGTCCCAAAAAAGGAAA +AAAGCTCAGTAGATTTAAAATGATAGACATCATACAAAGTGTCTTCTCTGACCACAACAG +GATAAAGTTAGAAATCAATAACAGAAGATTTTAAAAAGTTCACAAATTAGTAGAATTTAA +ACAACACACTCTCAAACAACCAATGGATCAAAGAAATCACAAAGAAATTATAAAATTCTT +AAAGACAAATGAAAATGAAAGCACACTATATCCAAACTTATGGGTTGTGGCCAGTTGTGG +TGGCTCACACCTGTAATCCCAGCACTTTGGGAGACTGAGGGAGGTGGATAGCTAGAAGTC +AGGAGTTCAAGATCAGCCAGGCCAACATGGCGAAACCCTGTCTCTACTAAAAACACAAAA +ATTAGCTGGGAGTGGTGGTACGTGCCTGTAGTCCCAGCTACCCAGCAGGCTGAGGCATGA +GAATTTCTTGAACCCAGGAGGCAGAGGTTGCACCACTGAGCTAACACCACTGCACTCCAG +CCTGGGTGACAGAATGAGACTCTGTCTCAAAAAACAAAGAAACAACAAAAAAACACAACT +TATGAGTTGTGGTGAAAGGAGTGCTAAGGAGGAAATTTATAGCTATAAACACATTAAAAA +AAGAAACACCTCAATTCAACAACATAAGTTTACACATTAAGAAACTAGAAAAAGAAGAAT +AAAACTAAACCCAAAGTTAGCAGAAGGAAGGAAACAATAGAGATCAGGGCAGAGATAAAT +GGAAAAGAGAATAGAAAAACAATAAAAAACAAAACCAAAAGTTGGTTCTTCAAAAAGATT +AATAAAACTGACAAGACTAAGGAAAAGGGAAACAATCTAAATTACTTAAAACAGAAATGT +ATTTGAGAATATCTTTATATATTTCTGTCTGTCTGTCTGTCTGTCTGTCTGCCTGTCTAT +GTTTTAGAGACAAGGTCTAGCTCCATTGCCCTGGCAACAATCAGATGCAACCACAATCAG +TGGCACAATCGGCTCACTGCAGCCTTGAATTCCTGGGCTCGCCACTATGCCAGCTCTTTT +TTTTTTTTTTTTTAAGAGACAGGATCTTGCCATGTTATCCAGGCTGATCTTGAACTCCTG +GCCTCAAGGAATTTTCCCACCTCGGCCTCCCAAATTGTTGGATTACAGGCATGACCCACC +ATTCCCAGCCTAGAAAGGATTATAAAAGATTACTATAAATAATTGTGTGCTCATAAATTG +GATAACCCAGATGAAATAGATGAATTCCGCGNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNGTGTCTCATGCCTGTAATCCCAACACTTTGGGAGCCTGAGGTAGAAGTT +GAGGTCAGGAGGTCCAAGACAAGCCTGGGCAACATAGTGAGTCTACAAAAAAAATATTGA +GAATCTACATAAATATAGTGGGGGTGGGGGTGGGGAACAAGAAAACAAAAAATTCAAAGC +ATAGTGAAAAGAAATATAGCAAAACCCAGCCGGGCATGGTGCCTCACGCCTGTAATCTCA +GCACTTTGAGAGGCCGAGGCATGTGGATCACGGGGTCAGGAGATCGAGACCATCCTGGCT +AACACAGTGAAACCTGTCTCTACTAAAAATACAGAAAAATTAGCCACATGTGGTGGCGGG +TGCCTGCAGTCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATGGCGTGAACCTGGGAGGC +GGAGCTTGCAGTGAGCCGATATCGCGCCACTGCACTCCAGCCTGGGCGATAGAGCGAGAC +TCCGTCTCACAAAAAAAAAAAAAAAAGAAAGAAAGAAATATAGCAAAACACAACAAAAAA +ATTAAAATTTGCTGGGTGTAGTTGTGCCTTTAGTCTCAGCTACTGGGGAGGGTCTGCTGG +AGGATCACTTGAGCCCAGGGGTTCAAGGCTGTGATTAAGCCACTGCACTCCAGCCTGGGT +AACAGAGCAAGATCGTATCTCTAAAAAAAAAAAAGAAAAAAGAAAACACTCTCTGGCCAC +AGATGAGAGAAGGCAGACAGGAAGCCAGTAAGCCAAGCAGGCAGAGGGCAGGTGGCCCCA +GCCCAGCTGTCGGGTGGTGAGCAGTGTCAGGGAGACAGGAGTGCCGGAGCTGGTGAGGTT +CCCAAGGAGGTGAGGTTGCCTGTGGCCCCCTCCCAGGGCACAGTCCCACCCCTCCAGTAG +TGCACTCTGTCCTCCCTGGTAGGTACAGGCTTTTCCCCACCACCATGGTGCAGCCATGCC +TGCCCCCAACACCACTGGTGCAGCTCCATCCTCCACGCAGTGGTGCAGCCCCATCCCAGC +GCCCATTCGAGCTGCTGTCCCATTGCAGGGGGCATTGTGACAGCTGAGGACCTGAACAAC +TACCGTGCTGAGCTGATCGAGCACCCGCTGAACATCAGCCTGGGAGACGCGGTGCTGTAC +ATGCCCAGTGCGCGGCTCAGCGGGCCCGTGCTGGCCCTCATCCTCAACATCCTCAAAGGT +GAGTGGTCGCACCACAGCCGTGTGGTAGGACCCATGACACTGCCTCTCTCTCCCCACGCC +CCACCCCTCCTGCATCTCTGCTCGCCCCCCATGCCACGTCTTTCCATCACTGAGCTCCTG +AGGTGTGTCCCTGCGTCACAGCTCACCATGTCCTGAAGGAGGCAGTGCAGAGCAACAGGG +CTGAAGCGGGCAATGCTCAAGGGTTGGAGGAGGAACAGGAGTCATCAGGAGGGAGAGAGG +TGCAGGAGCTCAGGGCTGCAGGGCCGGTCCAGAGGGTACCCCGGTCCAGTGGGTGACCCT +GCCACTTGGTCATTGAATGGCCAGAGCTGATGCGTGACGTGAGGTCCAGGCTCAGGAGCC +CTCACCTTACTTCACTCTCACCACAGCCTTCTGAAGCAGCTGCTGCTATTGGTTATTAAA +CGCTCCTTGAGGTGGGCAAAGTGGCTCAGGCAGGTGTTAACCCTCTGAGCCCCTCAGAGC +CTCTGGGGCTCAGCAACATGCACCTGGCTCTGATCAACCAGGGTACAACTTCTCCCGGGA +GAGCGTGGAGACCCCCGAGCAGAAGGGCCTGACGTACCACCGCATCGTAGAGGCTTTCCG +GTTTGCCTACGCCAAGAGGACCCTGCTTGGGGACCCCAAGTTTGTGGATGTGACTGAGGT +AAGGGGCAGGGGCTGGCTCACTGTGGGTGTGGGGCCTGCCGTAGAGGCATCAGGTGGGCT +CCCCAGGGTGGCTACAGCCTCACATATGCTTTATGAATCCATTCCTGCCACAGAATTTGA +TTGCGGGCCTACTGTGTGCTCGGATGGACTCGTGGGTGACCCCCAGTCTTGGCTTCTGCC +CCACAGAACTGACAGTGTGGGGAATTAGTGGCCACCCTCCTACCTCAGGTCCTTTGCACA +TGCTGTGGTTCTTGAGTGCTCAGTGCTGAGATGAGGAATGCATGGGGGCATTGCAGCCCT +CGGGCATGGTGAGATGGATGGGTGAAAGGGAGAGGGCCAGGTGAACAGAGACCTCGGCCA +CCCACTCCCTGTCACTCCAAACTAACGCTTCCCAGATGCCACCCTCAGCCCTGCACCACC +TGACCCACTCACCCAGTAGTTGCCCCAGCTCCATGCTGGGTCCCTATAAGACCCTGTCAT +ATCCCTTCCCGCTGAGGATCCTCACATCCCTCCTTACCTACTTGGGTCCTTGGCATTCCT +GGGCGGATCTGCAGACCCCCCCACACTGACCAGTGACCTCCCAGGAGGGGCGTCAGCTGC +CCGGGTGGTGTCTTCTCTTTCCCTGTGAGCATTCTGCACCTCTGACTCCCGCTGCAGCCA +GTGACCTGGTGTCTTGTCTCTCTAAGGGGACAGAGCCACTGCAGCGTGTCCCTCTGCCCT +CCTTTTTGGCTAAGGCCAGGTCCTTCATCTACTCGCTGGCTCGGGGTGCTGTTCCTACAA +TGCTCCTGTCTGCCTCTGATGATTTATTTCTTTATCACGGATTATTCCCAAAAAAAAGGC +AGCTGTTATTGCTCTAGAGACTTCTATCTGCTGCCTCCTTCTGTCCTTTGCTCCTCTTAG +AGCAAACATGGCTGGGCTATGTCCTCTCTCCCTTCAGGGTATCCCCTCCCCTGCTCTATC +CCCATGCCACCAGATCGCCATGTCCAGCCTCAGTTTCCCCATCAGGCCCCACTCAGCAGC +ATCTCACACAGCTCACCACACTCTCCTCGAGTTTTCATTTTGCAAATTTTCGCACCTACA +AAAATGTAAAAAAAACCCAAAACTGCCCAAGCATCAATGTTCCCTTTTCCTGGATTCTGC +AGTGTGGACATTTCTGCCGGATTTCCTAACTCTCTCCGTCTCCACCCACATCTATTGGGA +TTCGTGTTTTTCTGAGGGATTCCACAGTAGGTTACTGATGTCGCACCTCAGGGTGTGTCT +CAAAAGTGAGACTTGAACATAGCACAGCAGGATGTCGGGGTGACACAACCTGCTGTCCCT +TCTCTTTACCTACAGTAGGCTCCCTTGGCTGTTTTGTTTTTGTTGTGTTTTTGTTTGTAC +ACAGAGTCTCTGTCATCCAGGTTGGAGTGCAGTGGTGTGATCTTGGCTCACTGCAGCCTC +AACCTCCAGGGCTCAAGCAATCCTCCCACTTCAGCCCCCTGAGCAGCTGGGACCAGAGGC +ACGTGCCACCAAACCTGGATAATTTTTGTATTTTTTTTAGAGACAGGGTCTTGCCCAGAC +TGGTCTCAAACTCCTGGGATCAAACTGTCTTCCCACCTCAGCCTCCCAAAGCGCTGGGAT +TACAGGTGTGTTTTTTCTTTGAGACGGAGTCTCGCTCTGTTGCCCAGGCTGGAGTGCAGT +GGCGCTATCTCGGCTCACTGCAAGCTCCGCCTCCTGGGTTCACGCCATTCTCCTGCCTCA +GCCTCCCGAGTAGCTGGGACTACAGGCGCCCGACACTGCGCCCAGCTAATTTTTTGTATT +TTTAGTAGAGACGGGGTTTCACCGTGATCGCGATCTCCTGACCTCGTGATCCGCCTGCCT +CGGCCTCCCAAAGTGCTGGGATTACAGGCATGAGCCACCGTGCCTGGCCTGTTTTGTTGA +TTTTTAAAGCCCAGGGCAGTAGTCTTGGAAAATGTCCCACATCGTGGATTTGCCTTTCTG +TTTCCTTGAGGGCAGATTCAGACAGAACACCTTTCCCTGGGATTGGTCAGCTAATCCGTG +GGGTGTTGCTGAAACTTTATTTTATTTTATTTTATTTTATTTTATTTTATTTTTGAGACG +GAGTCTCGCTCTGTCCCCCAGGCTGTCCCCCAGGCTGGAGTGCAGTGGCGGGATCTCGGC +TCACTGCAAGCTCCGCCTCCCAGGTTCACGCCATTCTCCTGCCTCAGCCCCCCAAGTAGC +TGGGACTACAGGTGCCCACCACCGCGCCCAGCTAATTTTTTGTATTTTTAGTAGAGACGG +GGTTTCACTGTGTTAGCCAGGATGGTCTCGATCTCCTGACGTCATGATCCGCCCTCCCAA +AGTGCTGGGATTACAGGCCTCGGCCTCCCAAAGTGCTGGGATTACAAGCGTGAGCCACCG +CGCCCGGTGTTGAAACTCTCTTGAGGCATTTTCTTCGGCCTTCGGGTCCATCCTCTCTGT +CTCCTCTTTGACCTCCTCATCTCCTCCTCGCCACTGCCTTGGGGACCTTGGCCAGGCTCA +TGGCATCCAGCAGCCACTCAATGTCAATACCTCCATGTTCATCTCTCAGCCCGCCCTTGC +CCGTGAACCCATGCTTATTTATTTATTTTTTTTATACGTGCACCCATGCTCTTTGGGTCT +GATGAAGTATCCAAAATCAAGCTCCTGACCATCCCCAAACCGGCCCCTTCTGCCGGCCTC +GCTAGTCGGCACCATGCTTGATTCTTCTCTTTCTCCCACCCAGGCCATCATCTCTTGCCT +GGTTGATACCCACAGCCTCCCCTTTGGGCTTTATCCTTATCCCCGTCATAGCTGCCAGAG +GGACCCTGTGAAAACACTCCCCAGCCTCCTCATTCTTCTGCCCTAAGCCTGCATGGCACA +GAGCAAAAGCCAGTTGTTATGGGACCTAGGAGGTCCTGTGGGATGGGCCCCAGCCTGCAT +CTTCATCCTCTTCTCCCCACCCCACTCCATTCACTCTCTGCCTATCGCTCACCAGCCTAT +ACCACCTGCCTCAGGGCCTTTGCACTGACCATTTAGGCAACATTCCAGGCTCTTTTCACA +CGTTGCCTCCTCTGAGAAGCCCTCCCTGACCACTCTGCCCATACCTCATGCCTCTTGATT +CCCCTTACCTGGCCTGTGGTTTCAGCACTTTCCCTGTGTGTGTTTGTTTTTCTTGGCATG +AGGGCAGGACTTAAGTGTCTGTTCCCTGTTGATTCCCCAGTGCCAGGCATGCAGTGCAAA +TTCTAGAAATATTTTTTGCATGAAAGAATGAGTGATTGAATGCGGCAAGGGTCTGGAGGC +TGAGGACCAGGCAGACAGACATTCAGAGTTGCTGGAACGCGACAGAGACAGGGAGTCAGA +CTGGTCATGCAAGGTCCTGGGCCTGCCCTTGGGTCCTGGGGAGCCACGGAAGGTTGTGGG +TGCCAGAGGGTTGCGGTCAGAGTCACAGTCAAGGGCCTTCTGAGACCTGTGCCCCCTCCC +CACCTCCTCAGGCCAGCTCTGGGGTCTCAGCAGGTGGTCCGCAACATGACCTCTGAGTTC +TTCGCTGCCCAGCTCCGGTCCCAGATCTCTGACCACACCACTCACCCGATCTCCTACGAC +AAGCCCGAGTTCTACACGCCGGATGACGGGGGCACTGCTCACCTGTCTGTCGTCGCAGAG +GACGGCAGTGCTGTGTCCGCCACCAGCACCATCAACCTCTAGTAGGGGCTGCTGGGCCGC +CTGGGTGGGAAAGGGCCAGGGGCGGGTGGCCCAGGGACTGCCCACTTATCCAGTAAGGTG +GCTCCATCACCTCTTTTCCTGGTGGGAAACTGAGGCCCAACCTTGGTAGCTTATCCTGGG +CCTCTCAGTGAGTATGTTTGAGCCTCAGTGGGTGGATAGGGACCAGGCTGGGCCAGGCAA +GGTCGGGTGCTGTCTGACCTGGCTGGGCGGTAGCTTTGGCTCCAAGGTCTGCTCCCCGGT +CAGTGGGATCCTGTTCAATAATGAATGGACGACTTCAGCTCTCCCAGCATCACCAATGAG +TTTGGGGCACCCCCCTCACCTGCCAATTTCATCCAGCCAGGTATGGGGTGGAGGTCCGGG +GGGTGGGGGACTGGGGTGGAGAGGGGCGGGTGTCCTGGGCAGGCAGCTGACGGGCATCCC +TGTCTTCTCCCATCGGCCGCAGGGAAGCAGCCGCTCTTGTCCATGTGCCCGACGATCATG +GTGGGCCAGGACGGCCAGGTCCGGATGGTGGTGGGAGCTGCTGGGGGCACGCAGATCACC +ACAGACACTGCACTGGTATGTGTCACCCCTTTTCTCCCTGGCCCTGCCCACTCTGCACAG +CCCCCAAGCCACGCTGATCACACTCCCATGCCCCAGGCCATCATCTACAACCTCTGGTTC +GGCTATGACGTGAAGAGGGCCGTGGAGGAGCCCCGGCTGCACAACAAGCTTCTGCCCAAC +GTCACGACAGTGGAGAGAAACATTGACCAGGTGGGCCGGGGGTTGGAGAAACTGAGTCAC +GGTGTGGGGTCCCAGGGCATCCTGGGCTGGAGGCCTGGATCATCACAGAGTGGACAATGG +TTGGTGTCCTCTCTCTAGTGCCTGGGCCATCTGGAGCCCCTGTGACATGAGGGCCAAGCC +CCCTGCTCCAGTGAGACCCAGCAGGCCCCAACCTGCTCTTCCTGATGACCTGGCCTGAAA +TGGCACCACCTGGGCTGAGGCCTGTGACCACACAGGTGTGGTTCAGGTGGCATCTGGAGC +CCTGCTCAGGCTTCCCCTCTCCTCCCACCCCCAGGCAGTGACTGCAGCCCTGGAGACCCG +GCACCATCACACCCAGATCGCGTCCACCTTCATCGCTGTGGTGCAAGCCATCGTCCGCAC +GGCTGGTGGCTGGGCAGCTGCCTCGGACTCCAGGAAAGGCGGGGAACCTGCTGGCTACTG +AGTGCTCCAGGCAGACAAGGCTGACAAGCAATCCAGGGACAAGATACTCACCAGGATGAG +GAAGAGGACTTTGGGGGACGGGCTTCCCCTGTGAGCAGCAGAGCAGCATAATAAATGAGG +CCACTGTGCCAGGCTCCAGGTGGCCTCCCTGGCCTGTCTCCCCACTCTCTGGGCCTCAGT +GTTTTGTGTGTGAAATGGAGCCATCTGGCTGGGGAGGAACAGAGAGGTGGGATTCGGAGA +TCTTCACAATGCGGACACTGGAACTAGCCTCAGCATCTTCAGCATGGGGAGAGCCAGGCA +CATGGCTGGGGGCCAGGGGAAGGTTCACACCAAACCCTGCCCCTTCCCACCCTGATCCCT +CAGACTTTGGGGCCAGGCCCTCCCTTACTGGGGCTGGGCAGTGACACTACCTAGGATCAG +CCACCAGGGGGTGTCACGACCCTGGCGCTTTCTTAGGCAGAGGGTGGCCAGCCGATGCTG +GGAACCCGGGCGCCTTCTCAGACCCGTAGGCGTCCAGCTCACCCTGCCGATGACACTGGA +GGTGAAGCTGAGGGTCCGAGGAATGGGGACTGGGCAACAGGCTGGAGGAAAACATCTCGG +TCAGAGCCATGCCCCTGGGGGGTTCCCAAGAGCAAGCCCAGAGTGAAACCCAAGCTTGTG +ATCCTCTCCAGAGGGAGGCCTGGTTCTCAGGGAACAGCAAACGGGAAGATGTCCCCAGAT +CCCAGGGATCAGGGCTTGGACCAGCCGGGGACGCAGCCCAGAGGGAGTGGGTCCGGAAGG +AAACAGCTCGACACAGCAGCCTTCACCATCGGCAGCCCCTCCAGGCCTCCCTCGGGGCCT +GCTCCCTCCTCTGTGCACAGTTCCAACACCTGGAGCAGGGTTCTGGGAAGGGCTGGTGGA +GGTGGGCTGGTGGGAGGCGGTGATCACAGCCCAGCACCTGGATATCACCAGGGGCACTGG +GGCCAGGGGCCAGGTGAGGCCAGGTCGGGGCTATCCTTCAGGAGCCCCGAAAACCTGGTG +ATTCCAAAGGGCCCATAGACAAACAGGGTTTTATGCCTGTGGAGTCAAGTCCCACTGGGT +CTGAGCTCTGGAGGGCTGTGTCTCTGGGGCTCTGCAAGGGTGAGATGGAGGTGGGCTCAA +CTGGTGTACAAGTCACTCTTCAATCCTTATTTTATTTATTTAATTTTTTTAAAAAAAATT +TAAACCAATAGAGATGGGGTCTCACTATGTTGACCAGGCTGGTCTTAACTCCTGACTTCA +AGCAGTCCCCCCATCTCAGTCTCCCAAAGTGCTAGGATTACAGGGGTAGCCACTGCACCC +GGCCTCAATCCTTATATTGGCCTGAGAGGAAAGGCCGTGGCCCCATTTGCAGGGGAGAAG +ACTGAAGCTGGAGGGGCAGGCCTTGCTCTGGGTTGCACAGCAGGAAGAGAAGTGGGAGCT +GGCCACGAGGCTTCCTGGACCCGAAATGCTGGTGGGGTACACCCTGGTTCTCTAGGTCCC +ATGGGGCTCAGCCCAGGACTACCTCGGGGGGTGAGGGACTTAAATCGTCTCCTTCATTCT +CATCGCCCCTTCCCCCATCATTTCCTGAGGAAGGACATTCAGGGACCTGAAGGGGTGGCC +TGCCCCTCCCCACCTGTGGGTGTTTCTCATCAGCTGGGACAAGAGACTGAGAAAAGAAAG +AGACACAGAGACAAAGTATAGAGAAAGAAAAGTGGGCCCAGGGGACCTGCACTCAGCATA +CGGAGGCCCCACGCTGGCACCAGTCTCTGAGTTCCCTAGTATTTATTGATCATTATCTCT +ACCATCTCAGAGAGGGGGATGTGGCAGGACAATAGGGTAATAGTGGGGAGAGGGTCAGCA +GGAAAACACGTGAACAAATGTCTCTGTGTCATAAACAAGGTTAAGAAAAAGGTGCTGTGC +TTTGATGTGCATATACATAAACATCTCAATGCATTAAAGAGCAGTATTGCCACCAGCATG +TCCCACCTCCAGCCCTAAGGCAGTTTTCTCCTATCTCAGCAGATGGAATATACAATCAAC +ACTGAGACATTCCTTTGCCCAGGGACGATCAGGAGAGAGATGCCTTCCTCTTATCTCAAC +TGCAAAGAGGCCTTCCTCTTTTACTAATCCTCCTCAGCACAGACCCTTTACAGGTGTCGG +GCTGGGGGACGGTCAGGTCTTTCCCTTCCCACGAGGCCATATTTCAGACTGTCACCTGGG +GAGAAACCTTGGACAATACCTGGCTTTCCTAGGCAGAGGTCCCTGCGGCCTTCTGCAGTG +TTTTGTGTCCCTGCTTACTTGAGATTAGGGAGTGGTGATGACTCTTAACAAGCATGCTGC +CTTCAAGCATTTGTTTAACAAAGCACATCCTGCACAGCCCTGAATCCATTAAACCTTGAG +TCGACACAGTACATGTTTCTGTGAGCACAGGGTTGGGGCTAGGGTTACAGATTAACGGCA +TCTCAAGGCAAAAGAATTTTTCTTACTACACAACAAAATGGAGCCTCTTACGTCTACTTC +TTTCTACATAGACACAGTAACAGTCTGATATCTCTTTCTTTTCCCCACAGGGACCTTCCT +GGCTGTGCCTCGGGTCAGGACCAGAATGACACCCATTCATTTCCCTGGGCCTTTGCTCGG +GCGGTCCCTGCACCCTGGCCTCTGCCTGACGAGGATGGTGGGGAGAGGAGGGGGACGTCC +CCCACACTGCTGTCTCCACTGTTCCTGCTGCCCAGGCCTCTGGGCTTCCAGGACTGCAGC +GGGTCGGTGGGTGGGCTGGCCTGAGCCCAGGAATGCACTTCAGCTCCTGGTTGAGCAATG +TCACTGAGGCTTGGGAGTCGGGTGGGGGCGGGAGGAGGCGTCCGCAGGCCCCCCTACCGT +GAGAGGCAGCCGTGGGAACAGCCTACCTCTAAACAATCACTGCAGCCCAGGCTGGCCAGG +GGCTCTGGCCGGACATAGGGGCCTGGCAGGCTGTGTGCCCTGTAAGGACACAGTCTGTCT +CTGTGCCTCAGTTTCTCTGCTGCCCAGATGGAGGGGCCCAGACTCCAGGTGTAGACATCT +GGAGCAGGCAGTGTTCAGTTGGGGAGGAAGCGGGGAGGACTGTGGGGGCCATGTGGGAAG +GATTCCACCCCACATCACCTGCACCCCTGCTGAGCCTGGTCAACGGAGCCCCTCAGTGGG +TCCTCACTCCCCTGGTTGCCTCCCATTTAGGCACCCTGAGGCCTGGGGAGAACAGAGCCA +GGCCAGTGTCCCCAGAGAGGCTGCGCTGCCAGCACAGTAGTAGCGGATTTGGATTCAGGG +AAGCAGACCTGCAGCCAGGGTGGGAAAGAGCTGCAGGTGGGGTGGGGCCCCCACATGGCA +CAGCCCCCCTCCCTGGAGGACCATGCTGCATTTCCAGGACAGCAAGTCCCAGGGATGGAT +GGTGCCTGGTGCCAAGGGCTAGAGGCATGGTCTGTCTGCATTTCCCACGTAAGTGTCTCG +TAGTCACTAGCATTTGATGCTGTCAAGACCCCCTGTCCTCTGTGCAGACTGGGAAGCCCT +TGGTCACCCTGGGGGAGTTGGGGGACCCAGGCCAGGCTGCAGAAGCATAAGGACTTGAAC +CCGGGTCCTGAGTGACACCACCTTGGGTCCTCCTCCTTCTGCCTCTGTTCAGCTCCACCT +TGATGCTGACTAGGCTGGGCCATGCGGAGAGGGTTAGGGGATAGAGATGGGAGCTGGGGA +GCAGGGCTCCACTCTGGGAGGGGGGCAGCCTTGCCGGATCCAGGGCAGAGTTAAGCGGCC +CCAGCTCTGCTTTCCCAGAGCTGCTGAGAACCTGGGGAATGGTGTGGAGGTTCCAGGGAG +CCCTGCCCCTACCTGGCAACCGCAGTGCAGCACGCACCAAGTTCTCCTGCACATTGCGAC +AGTGTGACCATGGGCTCTGGTGGGCGGTAGGTCGGGCCTTTGGACCTACCAGCAGTGAGG +GAGTTAACACAGCAGCTGACTTCTCTAGGCAAAGAAAACTCCCCTCAGACGCTTTGCTGC +CTGGCCTCCTGCCAGGAACAAGCAGGAGCTGAAAACTAGAAGTTGAGGCATGAGTTTGGC +CACTCCGTAGTGTGCACTTGGTGAGGGCAGCAGCTCGCCACAGCTGCCAGCCATCTGTCC +ATTCACCCATCTGTCCATCTGGCAGCCCGCTGTTCAGACCTGTCTGTCTGTCCGCCCATC +TGTAAGCCCATCTCTGTCCCATTGTCTATCTGACCATCTTTCTCTTACTGTCCTCTTTGT +CTAGCTATCTGGCCTGTCTGTCGATCCATCTTCGTGTCTGTCTTCAGCCCCCACCTGTTT +GTCCATCTGTCCAATTACCTGTGAGTCTATCTATGCATCTTCTTGTCCATTCATCTGCCC +ACCCATCTGTCCCTCCATCTGCCCACCGGCCTCCCCTCTCCTTCTGGGCCGCAGAGCCAT +GGCCCAGGACTACGGAGCCATGGGTGACCTGGTCCTGCTGGGGCTGGGGCTGGGGCTGGC +GCTGGCTGTCATTGTGCTGGCTGTGGTCCTCTCTCGACACCAGGCCCCATTTGACCCCCG +GCCTTTGCCCACACCGCTGTTGCTGCTGACTCCAAGGTCTTCTCAAATATTGTACGGTGA +GTGAGACGTGGGAGGAAGCTGGGTGGCCTTTGGCAGCCAGCCCCTCCTGGAGAAGGCGTG +TGTGTGTGAGCATGTGTGTGTGTGAGAGATTATGTGTGAGTGTGTGTGGGTATATGTGTG +AGTGTGTTTGTGGGGTGTGGGTGTGTGTGAATGTGTGTGATCGTGTTTGGGTGTGTGTAT +GTGTGAGTGTGGGTGTGTGTGAATGTGTGTGATTGTGTTTGTGTATGTGTGTGTGGGTGT +GTGTGAGTATATGTGAGTGTGAGTGTGTGGGGGTGTGGGTGGGTGTGAATGTGTGTGATT +GTGTTTCGCTGTGTGAGGGTGTGTGTGACTGTGAGTGTGTGAGTGTGGGTGTGTGGGTGT +GTGTAAATGTGTGAGTGTGAGTATGGGGGGTGGGTATGTGTGAATGTGTGTGATTGTGTG +TGGGTATATTTGTGGGTGTGTGTGTGTGTGCACGTGTGTGTGTGTGCACGTGCACTGGCC +CAGGAAGCAGGAGCCGTGTGTGTGGGCTTCAGCACCTGCAGGGCTTGAGCGCAAGGAGAC +AGCCTCAGGGCCCTTGCACAGAACAGGCGGCAGGGTGTGCCCGTGGGGCAGATGGGGACT +TGGGGACAATGGTGGTGTGTGAGTCCATACCTGGCTCCAGGATTCAGGAGGCCCATTTGC +ATATCCCAGGTGGGAACCTGTCTGGCCCCGCCTGACCCTGCTGGCCGGTGCAGGCCCCTT +CAGTGAGGCCAATTCTCCAAGGCTGCGGTCTTCTCCCAGGGTCATGGGTGAAGGGGTTTG +GAGGCTCCCTGCGTGGGTACTGGCCTGCTGGGGTACACACAATGCTGCCATAGCCAGTCT +GCCCCTACACCCAGCCCGGGGCCACATCTCAGGTCTCTCAGTCCTGAGGAGCCCGGTGCC +CCACCCCTCACATCCTCTCTCCCTGAGTCAGGGCCTGGGTCTCGTGAGCTGAGTGACTGA +TACTTGGTGTCCTGGATGAGGGCGTGGTGGAGAGGGGCCACAGCGGGTGTTTCCTGACCC +TCTTCCAGGAAGGTGCTGCTGCCGCTGCAGGGAGGACACACACAGGATGCCCCTTCTTGC +CCCCTGCCTCCCATTGGGCCCACAAAAGCCAGGGCAAGCCTCCCCTCCCTGCCAGCCACC +TGGTCTGCTTCCCAGAAATTCTGTCTTGCAGGCTGTTGGGAGGATCCCAGTACTTTGTAA +ACTAAAGCAAGGGAGGAGTGGCCGTTCTCTCTCTTTGTTCATTCATTCACCTTTTCATTC +ATTCCTTCTTCCCTCCATTCCCCCATCTGTCCATCCTTCCCTGCCCTGATTGCTCATGCC +ACCCCCCCAGCCCCTCCTGACCTGGTCCTTTGGTTTCTCTTCAGGGCTTTCTGTCTCCTC +CCACAGGGCTGAGAATGGCAGCTCAGGGACAAGTGGGGGCTGGGGACTGCTTAGTCTCCC +CAGTGGCTCTCAGGGGATTTGAGGGTTTGACGTCAGCTGCCACCCCAGGCTGTGCCCCTC +CTCTGCTCGGGAGGACATACAGAGATGCGACACCCACTTAAACTCGAAGTTGCAAAGATG +CAAATGAGACTGGGGTCTCAGGCACCAGAGACCACCCGTGGGCACGTGGCTTTTGGGAGT +GGGGACCTGCTGCCACAGATCTCTGAAGAGTCTGGACCTGCTGGGTCTCCCCGAGTGACT +GTCTGGGGGTCTCCATAGCATGCCCTGCTGTGTGCGTGACGGTCACTGGTTGGGTAGGGG +TCTCTACTCTAAAGCTCCCTCTGCCGGCATCCCCTCGAACTCTCCCTTGGTGAAGAGAGG +ATGTGGTTTGCCCCAGTGTTTTATCAAACAACTCTCTCCACTTCCAGTTTTAAGAAGCTG +GGAGTGGAAGAGAGCCTGGGGCTGGCCCCAGCTGCTGCTGTGAAACAGGGGTCACTGGAC +GCTGGGACCCTGGCCGGGCTGGCTGGAGGCCTCAGGAAGAGGCCTGCTACAGCGTCATCC +TGGCCAAGATTCCTCCCTGCAGAGGACCCTGGCCACGCTGCCACAGGGTCTGCTGGGGCC +ACCAGAAGCCCATGCTCCTGCCTCCATCTCTCCCCTCTGTGCTCACCTCTCACCAGGAGG +CCCTCCCAGAGTTCAGTGTCCTGCTTTTTTTTTTTTTTTTTTTTTTTTTTGTGACGGTGT +CTCACTCTGTCACCAGGCTGGAGTGCAGTGGCGCGATCTCAGCTTACTGCAACCTCTGCT +TCCTCGGTTCAAATGATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGACTACAGGTGCCA +GCCACCACGCCCCGCTAATTTCTGTATTTTTAGTAGAGACAGGGTTTCACCATGTTGGCC +CAGGATGGTCTCTATCTCTTGATTCACCCGCCTTGGCCACCCAAAGTGCTGGCATTACAG +GAGTGAGTCATGGCACCCGGCCTCATCTCCTACTCTTTCAGCACCAGGTTTTATTCTTGG +GATTCTGCTACAGCCGGAGCCCCTGGGTGCGAGTTCCTAAGGTTTCTGTGAGTGTGGACC +CAGCACCGTGCCTAGTAGACATACAAAAGGAGCATGGTGACAGTGAGGTCTGTCATCTCC +AGCATAATGACTGTTTTGATCCTTGTAAAAAAGGTGATTTTTGGCTGGGTGTGGTGGCTC +ACACCTGTAATCCCAGCACTTTGGGAGGCCGAGGGGGGTGGCTCACTTGAGGTCAGGAGT +TGGAGACCAGCCTGGGCAACATGGTGAAACCATGTCTCTACTAAAAATACAAAAATTAGC +TGGGCATGGTAGCAGGTGCCTGTAATCCCAGATACTTGGGAGGCTGAGACAGGAGAATCA +CTTGAACGCAGGAGGCAAAGGTTGCAGTAAGCCAAGATTGCACCACTGCACTCCAGCCTG +GGTGACAGAGCAAGACTTGGTCTCAAAAAAAAAAAAAAAAAAGAAAGAAAAGTTTATATT +TTTGTTCTAATGCTTATCTTAATATCGTCATTCTATAATTATATGTTTTATATAATTATA +ATAGCTATATAAGATATAATACCCCTAGTATGTTGTTTTTTGGATATTCTACTTGCTCCT +GATGGTTAATTTATGTGTCAACTTGGCTAAGCTATGGTGTCCTGTTGTTTGGTCAAATAC +TTGTCAATATCTTGCTGGGAGGTTATTTCATAGATGTGATTAACACTGACAGTCAATTGA +CTTTAAGTAAAACAGATTACCCACCATAATATGGGTGGGCCACCTCCAATCAGTTGAAGG +CCTTAAGAACAAAAACTGAGGTTTCCCAGAGAAGCAGGAATTCTGCTTCAAGACTGTAAC +ACACAAACCCTACCTGAGTTTCTGGCCTGCTGACTGCTCTACAGATTTTAGGTTCCAGAC +TTCGAGATCAACTCTTACCTGAATTTATAGCCTGCTGGCTTGCCCTACAGATTTTAAAAC +TTGCTAGTCCCCACAATCATGTGAGCCAATTCCTAAATAAGTCTCTCTCTATGTATAATC +TATTGGTTTAGTTTCTCTGAAAAACTTTCACATTCCAGTTTCCTGGATGTTAAGAATTAC +CGAAACTAGCTAGTAACTTCTTTTTTTTTTTTTTTTTGAGACAGAGTTTTGCTCTTGTTG +CCCAGGCTGGAATGCAATGGCACAATCTCAGCTCACCGCAACCTCCACTTCCTGGGTCCA +AGCAATTCTCCTCCCTCAGCCTCCTGAGTAGCTGGGATTACAGGCATGTGCCACCATGCT +TGGCTAATTTTTGTATTTTTAGTAGAGACAGGGCTTCTCCATGTTGGTCAGGCTGGTCTT +GAACTCCCAACCTCAGGTGATCCGCCGCCTTGGCCTCACAAAGTGCTGGAATTACAGGCA +CGAGCCATTGCGCCTGGCTCCTAGTAAATTCTTCTTTTCTGTGATGTGTCCCTTACCTCT +AATAATACTTTTCTTCTTAAAGTCTACTTCATTAAAAATAGTTATGCTGGGCATGGTGGC +TCATGGCTGTAATCTTGGCACTTTGCTGGAGGTCGAGGTGGGTGGATCACTGAAGCCCAG +GAGTTCAAGACCAACCTGGGCAACATGGCGAGACCCTGCCTCTACAAAAAATACAAAAAT +TAGCTGGGTGTGGCTAGTATAATTCTAAGTTGGCACACTTGTAGTCCCAGCTACTTGGGA +TGCTGAGGTGGGAGAATCGCTTGAGCCTAGAAGGGAGAGATTGCTGTAAGCCAAGATCAC +ATCACTGCACTCCAGCCTGGGAGACAGAGTGAGGCTCTATCTCCAAAAAAAAAAAAAAAA +AAAAAAAAAAAAAGTTATACAGCTTTCTTGGTTAGTACATGCATGACATATTTTTCATGA +TCTTCCACCTCTCTGTACCCTTATATAAAAGGCATTAGTTGGGTTTTACTTTATTTTCAA +TTATTTTAATTTTTATTGTCCTTTTAAATGTAACTAATGATTTATTTGGGTTGAAACCCA +CCACCAATTTGTTTTCCATGCCTATTCTATTTCTTCTTATCTCCTCTCACATCTTGTTTT +GGATTTATTATTTTTATTATTTAATTTCCTCCTTCTCTATTAGTTTCATAACTGTGCAGT +CTTAGAGTTCTTTTAAAAGATGACTGGATTATTTTAGAGCTTACAACATGCATCCTTCCC +TTATCAAAGTCTAACATGAGCTAGTACTTTTTGTTGTGGTTGAGATAGAGAGAGTCTTCC +TCTGCTGCCCAGGCTGGAGTGCAGTGGAGCAATCTTGGTTCACTGCAACCTCCACTTCTT +GGGTTCAAGCAATTCTCCTGCCTCAGTCACCTGAGTAGCTGGGACCACAGGTGTGCACCA +CTATGCCCGGCCAATTTTTGTATTCTTTTTCAGTAGAGACAGGGTTTCACCATGTTGGCC +AGGCTGGTCTTGAACTCCGGACCTTAAGAGATCTGCCTACCTCGGCATCCTAAAGTGTTG +GGATTACAGGCGTGAGCCACCGCACCCAGCCTATGAGTTAGTACTTCTATCCTCTTCCTA +GTCAGTACAAGAACCTTGGAACAGGAACTAAATTTACCCCCAGTGACTTATATGCTAATA +TTTTTGTGTATTTTAAATATGTGTGTGTGCATAGATGTATCTGTGTGTTTTTTGTGTTTT +TATTCTTATTTATGTTGAGAGTGTAGAGCTATGTAAGAGTAAAGAGAATTGTGTAATGAA +GCCCCGAGTATCCATTCAATTTCAACAACAATCTTATGGCCAAGCTCATTTCATGTATAC +TCTTTCCTGCTTCCCTCTACCCCACATTATTTCCGTGCAAATCCCAGATATATAACTGTA +CCCATACATATTTCAGTATGTTTTATTTCTTTTAAACCCCACAAGATATCATTTTCTATA +CTACTGTAATTTTATACCAATAACATTCATTTAGATTTACCCACACGTTTACCCTACCCT +CCGGGTCCTGTTTGAAAATCAAGCCCATGCTCACAGGCCAATTTTTTTTTCTTTTAGAGA +CAGGGTCTCACTTTGTCACCCAAGCTGGAGTGCAGTGGTGCGATTATAGCTCAATGCAGC +CTCCAATTCCTGGACTCAAGGGACCCTCCTGCCTCAGCCTGCCAAGTAGCTTGGACTATA +GCTGTATGTTTTATTATTATTTTGTAGACATGGGGTCTGGCTATGTTGTCCAGGCTATTC +TCAAAATTCCCGGCCTCGAGCAATCCTCCTGCCTCGGCCTCTCAAAGGTTGGGATTACAG +GTGTGAGGCAAGGCACCCAGCTCAGCCACAGAGCCCTGTTGCATCTCTCTTACTAGGAGC +AAGAGCTGACTGCCCCCTCATCCCCATTCCAGAGTGTTGGGGCTGTGTTGAGCCGAGGCC +AGGCCACTGGCATGGCCCAAGGAACGGGATCATTCACTGCTGCCCCAAATCTGACATCAT +TCCACCTTGACAAGACTTCCTCATCCAATCCCTTTACTTGACAGCTGGGGAAACCAACGC +GCACAGAGCACCCCCAGCTCACTCGGGGTCTCAGAGCTGATCCATGAGCAGAGGCTGAGA +TCCTGGGATCTTGTCCCCCAGCCGCCCTGCAAGCTTACTCCCTTTCTGCTGGAAGAGATG +GGGCCGGACCTCAACCAGCAGCCCTGGCCTGGACATGACTGTGCTCACCCAGGTATTGAG +GCCGAGATGCCCCGGCATCATATGTTTTTCTCTTATTTTTTCTTTTTTTTTGAGACAGTA +TCTCACTCTGTCACCCAAGCTGGAGTGCAGTGGCATGATATTGGCTCACTGCAACCTCTG +CCTCCCGCTTAAAGTGATTCTCCTGCCTCAGTCTTCCAAGTAGCTGGGCCTACAGGCTTG +TACCACCACGCCTGACTAATTTTTGTATTTTTACTAGAGACAGGGTTTCCCCATGTTGGC +CAGGCTCGTGTCGAACTCCTGACCTCAGGTGATCCACCTGCCTTGGCCTCCCAAAGTGCT +AGGATTACAGGCATGAGCCATGGCATCACTTAAACGTAGTGAGAGGCCGGGCAAGGGGCT +CATGCCTGTAATCCCAGTGCTTTGAGAGGACGAGGCTGTCAGATCACCTAAGGTCAGGAG +TTCGAGACCAGCCTGGCCAACATGGTGAAACTGTGTCTCTACAAAAAAATAGAAAAAAAA +AATCCCTGCGTGGTGGCAAGTATCTGTAGTCCCAGTTACTCAGGAGTCTGAGGCATGAGA +ATTGCTTAAACCTCGGAGGCGGAGGCTGCAGTGAGCTGAGATGGCGCCACTGCACTCCAG +CCTGGGTGACAGAGCAAGACTTTGTCTCTAAATAATTAAATAAATAAATATGGCCGAGCA +TGGTGCCTTAGGCCTGTAATCCCAACATTTTGGGAGGCTGAGGCAGGTGGTTCATGAGGT +CAGGAGCCCGAGACCAGCCTGGCCAAGATGGTGAAACACTGTCTCTACTAAAAATACAAA +AATTAGCCAGCTGTGGTGGCAGGCACCTGTAATCCCAGCTACTTGGGACACTGAGGCAGG +AGAATCGCTTGAACCTGGAAGGCAGAGGTTGCAATGAGCCGAGATTGCACCGCTGCACTC +TAGCCTGGGCGATGGAGCAAGACTCCATCTCAACTAAATAAATTAATAAATACAGAGCAA +GATTCCATCTCAAATAAATAAATAAATGTACACCTGTAATCCTAGCACTTTGGGAGGCTA +AGACAGGTCGATCACCTGACGTCAGGAGTTCGAGACCAGCCTGACCAATATGGCAAAACT +CCATCTCTACTAAAAATACAAAAATTAGCTGGGCGTTTTGACGTGTGCCTGTAGTCCCAG +CTACTTGGGAGGCTGAGACAAGAGAATTGCTTGAACCCAAGAGGTGGAGGTTGCAGTGAG +CCGAGATCTCGGCTGCACTTCAGCCTGGGTGACAGAGTGAGACTCTGTCTCAAAAGGAAT +AAATAAAATACAAAGTAAAAAAAAATGTAGTAAGATTGCAGAGTCGTGCCGCAGAAGCGT +GCTGGTCCTATCCATGTAGTGAAGGCTGATTTCATACACAAATGTCACAAGAACTTTTCT +TTTCTTTTTCTTTTTTTTTTTTTTTGAGACGGAGTCTCACTCTGTCACCCAGGCTGGAGT +GCAGTGGTGCGATCTAGGCTCACTGCAAGCTCTGCCTCCCGGGTTTATGCCATTCTCCTG +CCTCAGCCTCCTGAGTAGCTGGGACTACAGGCGCCTGCCACCTAGCCCAGCTAATTTTTT +TGTACTTTTAGAGGAGATGGGGTTTCACCGCGTTAGCCAGGATAGTCTCAATCTCCTGAC +CTCGTGATCCGCCCGTCTCGGCCTCCCAAAGTGCTGGGATTACAGGCATGAGCCACCACA +CCCGGCCTTCTTATATGCTTTTATTGCATTTGAGCGTACCTCTTTTACAGCGAAGATCTT +TTTTTTTAATTTTATTTTATTTTTAGAGATGGAGTCTTGCTTTGTTGCCCAGGCTGGAGC +ACTGTGGTGTGATCATAGCTCACTGCAGCCTTGAACTCCCGGGCACAGGTGATCCTCCCA +CCTCAGCCTCCTGAATAGCTGGGACTACAGGCATGCACCACCATGCCTGGCATATTTTAA +AGATGTTTGTAGACATGAGGTCTCGCTATGTTGCCAGGCTTGTCTCAAACTACTGGGCTC +AAGCCATCCATCCATTTCAGCCTCCCAAAGTGCTTGGATTATAGGCATGAGCACTGCGCC +TGGCCATCACACTGTTTTTTTGGTTGTTTGTTTGTTTGTTTGTTTTGAGATGGAGTCTTG +CTCTGTCGCTCAGGCTGGAGTGCAGTAGTGGGATCTCACCTCATTGCAAGCTCCGCCTTG +TGGGTTCACGCCATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGATTACAGGCGCCCGCC +ACCACGCCTGGCTAATTTTTTGTATTTTTAGTAGAGACGGGATTTCACCGTGTTAGCCAG +GATGGTCTCGATCTCCTGACCTCGTGATCTGCCCGCCTCGGCCTCTCAAAGTGCTGGGAT +TACAGGCATGAGCCAATAAATATTTTTATAATCATTAACTACCGAGGAGAAGCTGGAGAG +AAAAAAGGTGGATGAAGTTAAGGCAGAGAGACTTTGTAAGTTTCTTGGCTGAGCTTTTGG +AGACTGTATATCAGAGATCCTATTTGAGGTGATTTTCAGCTACAGAGATAGGCCTGGTCA +TTTGAAAAATAACGGATTAGGTGAAGCTTGCGTTTGAAATCCTCCTTCTACTTTTCATCT +TTCTCTCTTGTTTATTCTGAGCATCCATCTTAGACACCCAATCTTTGTCACTTTGTGGTT +GTCATTGATTTCCCTGTTATTGAGATTAGAGGTTGGCAGACTTTCTCTGTAAAGGGCTGG +AGAGAAAGTACTTCAGACTTTGTGGTCTGTGCGGTGTCTGTTGCAACCACTTAACTCTGC +CCTGTAGAGCAAAAGCAGCCGCAGACAGTACATGGGCAGATGAGCATGGCTGGGGTCCAG +TTACATTTACTTGCAAAAAGAGGGTTGGAGACTGGGTGCGATCTCCCACCTTTAATCCCG +GCACTTTGGGAGGCCGAGGCAGGAGGATCACTTCAAGCCAGGAGTTCAAGACCAGCCTGG +GCAACAAAGCAAGACTCCATCTCTACAAAAAAATAAAAATTATTATAGCTAGGCATGGTG +GTACACACCCGTAGTCCTAGCTACTCAGGAAGCTAAAACTGAGGCAGGAGGGTCAGTTGA +GCCCAGGAGCACGAGGCTGTGGTGAGCTATTATTGTGCCAGTGCACTCCAGCCTGGTGAC +AGAGCAAGAACGGTCTCGTCAAAAAATAAACAAAAGGCTGGGCACGGTGGCTCCCGCCTG +TAATCCCTGCACTTTGGGAGACTGAGGTGGGCAGATCATGAGGTCAGGAGATTGAGACCA +TCCTGGCTAACACGGTGAAACCCTGTCTCTACTAAAAATACAAAAAGTTAGCCGGGCGTG +GTGGCGGGCGCCTGTAGTCCCAGCCACTCGGGAGGCTGAGGAGGGAGAATCGTTTGAACC +TGGGAGGCGGAGGTTGCAGTGAGCCAAGATTGTGCCACTGCACTCTAGCCTGGGCTACAG +GGCAAGACTCCATTAAAAAAAAAAAAAAAAAACCAGCAAAAACCAAACAAAACATAATGC +ATGTTCTCTCTTATAAATGGGAGCTAAACATGGGGACTCATTGACTTAAAGATGGCAACA +ACTGGGAACTGCTGGATGGGGAGGGAGGGGAGGGGTGAAAGGCCAACTGTTGGGGAGTAT +GCTCATATCCACGTGACAAACCTGCACATGTGCCCGCTGAATCTAAAATAAAAGTTGAAA +GTAGATTTAAAAAACCCCAAGAGGGCTGGGTTTGGCTTGTGTGTCCATAGCTTGTTAACC +TCCGCTTTAGATATTAACGAATAGAAACATAGTGCTTAACTTCCCAGGCCACCTATTTTG +TTCCTCTCCAAGGTGATGGATAGATGAAGGCCTTATCCAGCCGCCTGGAAGTTTGCTGAC +GCTTGTCCTGTCACGGATTAATGAAGCATTGTTTTCTGATGAAGGTTTCATGCCGCTGTG +CTGATGTGTCTTCTCTTCTCTCTAGGCAGGAAACTGCATATCTTCTGGTTTACATGAAGA +TGGAGTGCTAATGGAAATGCCCAAAACCTTCAGAGATTGACACGCTGTCATTTTCCATTT +CCGTTCCTGGATCTACGGAGTCTTCTAAGAGATTTTGCAATGAGGAGAAGCATTGTTTTC +AAACTATATAACTGAGCCTTATTTATAATTAGGGATATTATCAAAATATGTAACCATGAG +GCCCCTCAGGTCCTGATCAGTCAGAATGGATGCTTTCACCAGCAGACCCGGCCATGTGGC +TGCTCGGTCCTGGGTGCTCGCTGCTGTGCGAGACATTAGCCCTTTAGTTATGAGCCTGTG +GGAACTTCAGGGGTTCCCAGTGGGGAGAGCAGTGGCAGTGGGAGGCATCTGGGGGCCAAA +GGTCAGTGGCGGGGGGTATTTCAGTATTATACAACTGCTGTGACCAGACTTGTATACTGG +CTGAATATCAGTGCTGTTTGTAATTTTTCACTTTGAGAACCAACATTAATTCCATATGAA +TCAAGTGTTTTGTAACTGCTATTCATTTATTCAGCAAATATTTATTGATCATCTCTTCTC +CATAAGATAGTGTGATAAACACAGTCATGAATAAAGTTATTTTCCACAAAAGGACTTTGC +AGTTTTAACGGGGGGCAGTAGGGCTTGTGCTATAGAAATTCAAAGGCAAGGGAAGTCACT +TCTGTTGTGGGGCCCTGGGAGGAGCCTCCAGGCTGGAAAGGCTTAAGGTGGAGGTCTCCG +ATAGGGGCAGCGTACACAGTGGACTGGCTGCAAAAGGCCGTGCTCAGCATTCAGACAGCA +TCACACACTCCGCTTTTCTCTACCAGGGAGGCAGGTGGGGAAGGATAGCGATGGGAAGGC +AGGCGGAGCTCAGAATGTGGAAGGGGATCCAGTAAGGCTTGGAAGTTTGCACCTGATCTG +GTGGGTGGTGAGGAGCCCTTGAAGGGGCAAGGAGGCGAGGAGCACTCAGCTGTGTTCTTA +CACTGATCTGCCACTGGGGTTAGAGACAAACGTGGTGGGAATGGAAAGCCACGCACAGTC +ACTAGCGCCTCTGGGGGGAGAATGGATGTGGCTGGTGAGAGAACAGGGGGGCCCAGGGAG +AGTCCGGCACCAACCTGGGCGGGGGAGCCCAGTGGGTGTGAGCACCCCCACTTTAGAGAT +GAAGTGATGGAGACATTCAGATGTTTAACCCCTTGTTCAAGATTCCATACTTGATAAATG +GCAGATCAAACTCCCAACATAAAATGTGGGTCATTTCTTTATTATTTTATTTGTATTGGT +TAACAATGATCAGCCATGCAAGAATAAATGATTATTGTAAAATCTGCAAACAATGTAGAT +ATGTAGAGAGTCCCTTCCTTGGAGCTTGACCTTGTCAGACAGGTATAGATGAGTGTTCCG +GGGCAGCCGTAAAAACTGCCAGAGACTGGGCTGCTTATAACAGAAACGCATGGTCTCCCA +GAAGCCCACATTCAAGGTGTCCAAAGGCCTGGCTCCTGGAGGCTCTGGAGGAGAGCCTGT +TCCCTGTCTCTCAGCTTCTGCCGGTTGCCAGCAAGTGTTGCCGTTCATTCACTCCAGCCA +CTGCCTCCATCTGCACACAGCAAAACAGCGTATCCTGAAGTGCTCAACCTTATAGCCATT +ATTTTAAAATATCCGGAACACACAGGACCGTGGGAGTGGCTGTTGGAGAAATTTTCATGA +AGGAAGAAAGATTACAACTAAGTTTTAAAATGCTAGTTTTGTTTGTTTTGTCTTGGAGAG +GAGGTAAAAGTGGGAGTAAAAATAGGGAGTTTGGTGTAAGGTGGGAAAAGCAAAGGAACC +CCGCATGGATGGGCTGAAGGGTGTGATGGGAGAACAGTGAGAAGTACGTTTGGGGAAGCA +ATTGGAAATAGTAGCTAAGCTTAATCACAATCTATCAAAAGGGACTTGTTGAAGAATTAA +TGTGTGACTAGGAACAGGGAGGTTATGGGCTTGTCAGCTCGACAGCGGGCACTCAGTTCC +ACTAACGAATGATGCCCGTGTGGACAGACAGAATGATGGACAGGCAGATGAATGCGTGGG +CTTTATGTGAAACAGGTCCTCTTGGTTGTTGACAAGATACTGTTTTAAAGTTCCATTTTG +CCATACTTCGAACAGCTTGTCATTAGCTCAATTTAGCCACATGTAAAATCACTAAGGCGG +ACTTCCAGAGTTCCCACATGAAAATCAAATGTAAACCAGCAGTGACCTGCTTCAACACCA +TCATCGGAAGTCAGAAGTTGAACTCTTTTTTGATGTTTAAAGCCTGCATAATATTCGCTG +TATTATTATTCAGCATATTACTATTTCCTTCGTGATGGAAATTTGGTTTATCCCAATTTT +CTATTCCATCAAAACACCGCTACATAGAAAATCCCCATGCACATATTTCTCCTAATTGTG +GAAATATTTTACATAAAAGACTCTAGACATGGGATGAAATTCCCAGGTTATTGGAATTTT +AAAATAGATAGGTACTTCCAAATTGACCTCTTACAAATTATATGAATTCGTAAGCTTCCA +ACTGTTATGGAGTTACCCATTTTGAGAAATCTGTGCTAAAAGGACCCAAACAATGCTGAT +GACAATGATCAGGATAATAAGTACGCTGGGAAGACAACAAAATGATTTAGATCTTAGACA +AGTCATTCTAGGTGTCTCCACTGTTTCAGTTCTTGCGTTCGTTCATTCTTGTGCTTTTTC +GTTTTACCAAATAAAATAGCTCCTTGATGTCATAGGAATCCACGCTATGCTTAATGAGTA +TTGGTTAGTAAAATGCCTATAACTAGTAATCTTCATCTATGCAATTAAATATTAATTCAT +AAAACACTTCAAATGTAAACAATAATTAGTAAATGAAAAGTACATAATACCTCAATTAGA +AAAAAATCACTCCATTAAAAAGACATTATTTGTGTGATAAAAGAGATTGCTATTTTTGTA +TTTTTCTACAAGGTTAAAGAAAACTAAGTCAACTTATACAAGTGAATTTTAAAAGACTTT +AGGGCAGGCGTGGTGGCTCACACCTGTAATCCCAGCACTTTAAGAGGCCGAGGAGGGCAG +ATCACCTGAGGTCAGGAGTTCGAAACCAGCCTGACCAACATGGTGAAATCTCATTTCTAC +TAAAAATACAAAAAAATTAGCCCAGTGTGGTGGCATGTGCCTATAATCTCAGCTACTTGG +GAGGCTGAGACAGGAGAATAGTTTGAACCTGGGAGGCGGAGGTTGCAATGAACCAGGATC +GCACCATTGCACTCCAGCTTGGGCAACAAGAGTGAAACTCCATCTCAAAAATAAATAAAT +AAATAAATAAATAAAATAAATAAAAGCCTTTAACCCAGAATGCTGAGTAAATTGGCCAAA +AATGCTAACCTATGCATTTCAATACTATAGGAGTCGCATGGGTAGAAATAACCAGATGAA +ATACTTCTGGTATTTCACCTTCCCAACCCACACGAGCCAGTGTTTTTCTGTGAATAACAA +AAACAGCAGAATTTACTTGCCTCACCATAAGAGGTTACCACTTCTGTGTGTTCCCCCGAA +ACAGGTGGTGGCTGGGTGAGAAGGTGGACAGCACTAGGGCAGGAGATGGGGGCTCCAGTA +TCGTGGGTGAGCTTCCTAAACCTCTGCAACTTTCAGCCCCTAAATGGGATGAGCCATCAG +AATTTTTAGCACAATGCCCAGAACAAAGTAAGGATTTGACAAATGACGCCTCTCTCCACA +TTGTTCTGTCATCAGCCACCGCATCCTGTACCTCCAAGTCCACTGGGCTCCGGCTGTTTC +CATCACATGGAGAATGACTCAGAGCCTGGCCTCCAGCCACCCTCCTGGCCTTTCTGCTTC +TCACTCTGCCACTGGCTCCTCATGGACCACCAGCCTGGGTGTCCTCAGACATACCACACA +CTTCACTGTGGGAGTCACGCAGCCCTCACTGCTCCTTCGCCAGGGAGCCACGGGGCTTTC +CTCCTCAGGAGGACTCTGCAAGCAGCTGGATGAAGGGCCCTCCCGTCTCTCATCCTTCCT +TAATTTTTGTCACAGTTCTCCTTCCTTCCACTCAGTGCAGCGCACACTGATTGATCCTCC +ATCTTCCCCAAAAGACAGGAACAGCATGAGCAGTGGAGAGTAGATTCCAATGATAGAAAA +AATAGTCAGTGATTTCTCATTTCCATTGATCATCAATGAAGAAAATGTATCCTGAAGGTC +ATGTACCTCCTATGGGACTGCTGCATCCTCAGCCTCCTGAATTTCAGCCCAGCACCTTCC +TCCCCAGCACAGCAACAGGTCAGCCCTTACCAGCATCCCTCTCTTATTGCCTTTGTGCAG +AGCCAGCACCAGGGCCAGGGGAGGCCTTGGGATTGTCCCTCCCCAACAATCTGTGAAACA +ATCCTTTATGTCACCAACAAAGCACAGCCTTCTGCACTGGTGGTCAGTCCCTCTCAACAC +CTCTGTCACTGTAAAGCTGGCAGGCAACCCTCCAAGGTTGGCCTTCCCAAGCACTGCACC +TCTAGGTGACAGAGCACGTCCTTACCTTGAAGCCTGGGCGCCCAGTCTATCCTGTCCAAT +GAGCGAGCTGTGGAGAAGGGGGGATTCCGGGTTAAGGGGAGACTAGCAGGGCTCCTGCTT +TTATGTTGCCCTGTTGGGAAGGCTATTAAAGAAACACAAAGTGCTAAGCAGTGAGGATAG +AACATGTTTTCATTATTTCAACCAATACATTCCACAGATGGAATAATAAGAAATGCTACA +ACCAAGCTAACTGAATCCAACAGCATATCAAAAAGATAATCCACCATGATTCAAGTGGGT +TTCATACTAGGGATGCAGGGATGGTTTAACATACGCAAGTCAATAAATGTGATACATCAC +ATCAATAAAACTAAAAACAAAAATCACATGATAATCTGAATAGATGCAGAAAAAGCCTTT +GACAAAATCCAGCATTTCTTTATGATTAAAACCGTTCGTCACAATCAGCATAGAACGGAC +ATACCTTAAGGTAATAAAAGCTATCTATGACAAACCCACAGCCAACATTTTCCTGAATGG +GGGAGAGTTGAAAGCATTCCCCCTGAGGAAGGGAACAAGACAAAGATGCCCACGTTCACC +GCTTCTCAACACAGTGCTGTTCACTACAGCATTGGTTATAAGAGCAAGACTGGAAACAGA +ACAAATGGATACCCATAGCGGGGTGCTTAAGTAATTTTGGGAATAGTCATGGGGTGCAGT +ACTTTATAGCTCTGAAACAATACAATGGATTTACATTTGAAATGTGGAATGATAACTAAG +GTGCATTGCCCAGTGATATATGCAGAGGTGCAGAGGACTTTGTGTAAACACGATCACACA +TCAGCCTGCATTCCAGGTGCATGCTTCTATTTGCACATAGATTGCAGGGATGATATGCAA +ACAAAAATGTTGACTTGGTGTTTGGAAGTTCAGAGTGGAAGGGAAACTTCCTTGCTAACC +TTTTATGATATTTAGAGTTTCTAAATGTGAATACGTAATACATTTAGAAATCTTAGTTAA +TAAGAAAAGCCTCTGTTCCTGGCCTCTTGCTGGCACATGTCAGGTGGAAATGGGGCTGTC +ATGCTAATGTGTGCAAACTGAGAAAAATCCAAGAATGGGAGTCTGCTTTTTTCATCATAC +AAATAATTGTTAATAGAAACAGTATGATAATTGCTCATTGATATACCATGCATATTCTAT +TAGATAATAATAAATTTCTGAAATTTGAACTATACTTACACATGGAAATTGAAATATATG +GATGAAACATTGTGGCTTATATAGGCAATTGTTTTATTGGCATTTTACAAACTGATCATC +ATTCCTCATGGCACGGGTCCATGTGATATTAAGTAGCTTGTTATGCTTGGGAAAGGCAGT +GATGACCACAAGAATGACTTCAACTACTAAAGTACAATGGAGATTTCAACAATGTTTTGT +TTAATATTTAAATATTTCATTGTGCTCCCAGGCTTTTTCTCACCCTAATAGCTCTCATCC +ATATCATGTGGGTCCCATTAATACAGATACCTCCGAATGCACCACTCTTCCATTATATCC +AGTCAATTGCTGGTTACCTTGGGCCTACAACTGGGGGAGGGCAGGGGCTGCTGGCCACCT +CCTCATCTACAGTAAGAGTCAATGAGCAGTTAAGTGGATACTGAAAACCATTTATCCTGC +TGGAGTGAGAAATAAATGGTTTCTTTCAATAGCGTAGTAAAATGCATCTTTTCCAAACTA +TTTATATGACTCAAGGCCCATCTCAATTTCAGATGTGGTTAGCCTCAATTCCTGATTCTC +ACCAAGGTGTGTAATGTCATCCACGGCCCAGTGCAGAGGAACACAGGTGCTGCCGTCAGA +CTGCCAGGGTCCAATCCCGCCTCCTCACTCACCCCGGGAGATCCCTTTAAGCCAGGAGTC +AACAGTGAGGATGGAAACATGAGTGCTTTTTAAAGTCCTGAAAGTTCAGAGGCAGACTGT +CAATTTCTCCTCCACCCCTGGGCACACACCAGGAGAACTCTGTCTCCAGGTTGAAGGAAG +TGCCTGTGAGAGAGTTGTGTCCCTCAGATTCTGTTCACCACAGGTGACACTCGATGCAAC +CCCAAACCTCTTCTGCACAATCCCAAGGGGTGCTGACTAATCCAACCCAAAGGCTGTGAT +GTTTGGCAGAGGCAGAAAAGAAAAGGCCAGGTGTTCTGGGAAAGACCACCTTTAAATATC +ACAGCACCCTCATAGCCCAGAGAGACAGTTCTAACTATTATGCCAATAAACCCGGAAAAG +ACCAAATCCAATATGACACATATTTCCTGTTTCCTTTTGATTTCATGCCCCCTCCCTTAA +CCTCCCAAGCAGCATGGATACCCCGAAGGCCCCTGGGAACTCTCTCCAATTGGATCTTAC +GTGGAAAGTAGTTACCTACCTACAAATCCCCATCATCAGATATGCTCTCCACAATCAAAT +CTTTAGAAACACAAACACCAGGATAAGTCATTAGAGAGAGGCCCACCCACTCCTCCCACC +CTAGCTGAAGCCATGGTGCTTCGCACAGGATCCCCTGGTGTTTCCTCTGGGCTCACAGAT +ATCCCTACAGCCTCTCTGGACATTGTTTTATACTTGCAAAATCATTTGCTCTCACCAGAC +TCCAAATCCTCCTTCCCAAAAGGAGCCCAGAATCAGGTTTCTGTACCCTAGAGATGGCGT +TTTTTCCTCAGGAAGTGAGTTATTTCAGGGTAGGTATCATTCTCCAGTGTCAATGGCTCC +TGCAATTATAGAAAAGAAAACATTAGGAGGGTGAAATGATGCCATACACGTCACACAGAT +CTGATAGTCTCTCGACAACTTGAGAGAGAAAATAAGAAGGGGTATAGTGATTGAGTCAAA +GGTCAAAGTCCCCCAAAACTGGCACGGAAGACACCTGTGGAAAAGACAAGACCTTTTCCC +ACAGAATTTATCTTTAAAGTGTATCTAGATTGGCAGTTTCACAACTCTTAATCCATGGGG +GAAAACTGCTGTGGAGGGAAACACCTCTGCATTGCAGTGGATCGTGGATGCTGCCATCTA +CCACACCCCAGTGTGCCTGGCATGGGTTGGTGAGAGGCTGCCAATCAATAGCACCACACC +AAGGGAATTGCAGATGTCATAAATAGTCCACATTGGCAGATGTTCATGTCTACATTTGAT +TAAACTGCAGATGACATCCATAATGCACACTGGCAGATGTTCATGTCTACATCTGATTGG +AAAGAAGCCAGGAAAGTAACATTTCTGTTCAAGACAAAGAAAAGTGTCTTACATTGGCAG +CATCTTCTTTTTTACAGATGTCTTGTACAGTGTCCTCATTAGCAATGTCATATACAGCGT +CCTTATTAGCGAATTCGTATACAGCATCCTCATTAGCGATGCCATATACAGCGTCCTCAT +TAGCGATGTTGTATACAGCGTCCTCATTAGTGATGTCGTATAGAGCGTCCTCATTAGCGA +TGTCATATACAATGTCCTCATTAGCGATGTCATATACAGCAACCTCATTCGCTATGTCTT +GTAAAGCATCCTCATTAGCGATGTCATATACAACGTCCTCATTAGCGATGTCGTATACAG +CGTCCTCGTTAGTGATGTCTTGTACGGTGTCCTTATAAGCAATGTCGTCTACAGCGTCCT +CGTTAGCATGCCTTGTGGGTGTCATTAGCGATGTCATATACAGCATCCTCATTGGTGATG +TCTTATATGGTGTCATTAGCGATGTTGTGTACAGCGTCCTCGTTAGCAATGCCTTGTACA +GTGTCCTCGTTAGCAATGTCATATACAGTGTCCTCATTAGCGATGGCTTATACACTGTCC +TCATTAGCGATGTTGTGTACAGCATCCTTGTTAGTGTGCCTCGTACGGTGTCATTAGCGA +TGTCGTATACAGCGTCATAATTAGCGATGTCTTATACGGTGTCATCATTAGTGATGTTGT +GTACAGCGTCATCGTTAGCGATGCCTTGTATGGTGTCCTCATTAGCGATGTCGTATACAG +CGTGCTCACTAGCGATGTCTTTTTTTTTTTATATATACTTTAAGTTTTAGGGTACATGTG +CACATTGTGCAGGTTAGTTACATATGTATACATGTGCCACGCTGGTGCGCTGCACCCACT +AACTCATCATCTAGCATTAGGTATATCTCCCGATGCTATCCCTCCCCCCCCCACCCCACA +AGAGTCCCCAGAGTGTGATATTCCCCTTCCTGTGTCCATGTGATCTCATTGTTCAATTCC +CACCTATGAGTGAGAATATGCGGTGTTTGGTTTTTTGTTCTTGCGATAGTTACTAGCGAT +GTCTTATACGGTGTCCTCATTAGCAATGTCGTGTACAGCGTCCACGTTAGCGTGCCTTGT +CGGTATCATTAGCAATGTCGTATAAAGCGCCCTCATTGGTGATGTCTTGTACGGTGCCGT +CATTAGCGATGTTGTGTACAGTGTCCTTGTTAGTGACGGCTTGTATGGTGTCCTCGTTAG +CGATGTCGTATACAGCGTCCTCATTACCGATGCCTTGTATGGTGTCCTCATTAGCGATGT +CGTGTACAGCGTCCTCGTTAGCGTGCCTTGTACGGTGTCATTAGCGATGTCATATACGGC +ACCCTCATTAGCGATGTCGTATACGGAGTCCTCATTAGCGATGTCGTGTACAGCGTCCAC +GATAGCGTGCCTTGTCGGTATCATTAGCAATGTCGTATAAAGCGCCCTCATTGGTGATGT +CTTGTACGGTGCCATCATTAGCGATGTTGTGTACAGTGTCCTTGTTAGCGACGGCTTGTA +TGGTGTCCTCGTTAGCGATGTCGTATACAGCGTCCTCATTACCGATGCCTTGTATGGTGT +CCTCATTAGCGATGTCGTGTACAGCGTCCTCGTTAGCGTGCCTTGTACGGTGTCATTAGC +GATGTCATATACGGCACCCTCATTAGCGATGTCGTATACGGAGTCCTCATTAGCAATGTC +GTGTACAGTGTCCACGTTAGCGTGCCTTGTCGGTGCCATTAGCAATGTCGTATAAAGCGC +CCTCATTGGTGATGTCTTGTATGGTGCCGTCATTAGCGATGTTGTGTACAGTGTCCTTGT +TAGCGACGGCTTGTAGAGTGTCCTTGTTAGCGATGTCATATACAACGTCCTCATTACCAA +TGCCTTGTATGGTGTCCTCATTAGCGATGTCGTGTACAGCGTCCTCGTTAGTGTGCCTTG +TACGGTGTCATTAGCGATGTCATATATGGCATCCTCATTAGCGATGTCGTATACGGAGTC +CTCATTAGCGATGCCGTGCACGGCATCCTCGTTAGCAATGCCGTGGGCAGCATCCTCGTT +GGCGATGCCGTGGGCGGCGTCCTCCTTGGCGATGCCCTGGGCGGCGTCCTCGTTGGCGAT +GCCCTGGGCGGCGACCTCGTTGGCGATGCCCTGGGCGGCGTCCTCGCTGGCGATTCCGTG +GGCGGCGTCCTCGTTGGCGATGCCCTGGGCGGCGTCCTCGTTGGCGATGCCCTTGTCGGC +GGCCTCGTTAGCGATGCCGTGGACAGCGTCCTCGTTAGTGATATCGTGTGAGGCATCCTC +GTTAGTGATGTTGTGTACGGTGTCCTCATGGGGAGCTAGAAGAACACAGAGTTAAGGTCA +GTGCCCTGGTAGTGGAGACTGTGAATCACCCAGGGGCTTGCTTGGTGTGGTGCATGGAGG +TGGCTGATCACAGCATGGGCCAAGCTGATGCTGGGACATCCTCCCAGGTGGACCTGCACT +AGTGAAGCTAAGGGATGTGGCTCAGAACACTTTCTGCAGTGGGCATCAGTTTCCAGGTTC +AGGTATGCATTATCCAGTGAAGTGGGGAAATATAAAAAATAAAAATTGACAAATTCATGA +AAAGCCTTCCATGAGTGCAAGTGTGAGTTTTTTATCCACTTTACATTCAGTATGCATTCA +CACATACAAAATATTTTTACAAGAAATCAGAAATTTTAATTTTTGTCAGTTATGCGAAAT +CTAACTTAGCTGCCAACATAAAGATTCTATCTCATTTACTTGGTCTCGAGAAAATCTAGC +ACATAGTAAGTAGACCAAAATGTTCATTAAATGAAAACACAGAGCAGAGATAGGGGGGCT +GCTAGGCAGACTGGGTTGCACCTGATTACCTGGATGATAATTAACTGCACAAAACCTCGG +TCAAATTAATATTGAAACTGCCTTTTGCTTGGGCTCGTTTCCCTTGCGGAAGAAGGATGA +CCAAGGAGATGAACAGGAAAGAAATGAGAAACAGAGGCCTTTGCCTAGTAGCTAAAGGCC +ACCTTCTATAACACGAAATAGCCTTGAACTCTGCCGTGATTTAGTGACAGAGTTCCCTCG +TGTCTTCTACCCAGGTTGAAGTCCAGCAAAATTGCGACTGTCCTCTTTACAACTTGCGAG +ACCACACTGCTTCTGCATTTGCCTGTTGTATGTATGAGATTTACACTTGTTTTAAAGCAA +CATTTTGTTTCAGTTGGGCTGGTGGCCATACCCGGCACTAGCCAGTCAATAGTGAGATGG +CTCCTCATGGAGGAGGCTTGGCTTGAGGCTGAGGGTCTTTAACCCACATATACAAGAGAG +TTGCCACTAAGGGATGGAAGCCAGGCTAATAACCAAGTGCCACACAGAGTTCCTATCTGT +CCCTCCTCACCATTTTTGGCTGGCAGGATTTGAGCATTTTAGGGCTTGGGAAGATAGTAT +TACTAAATCTACTAAAATACATCACCCATCCTTATAGACTTTGGCCAGTTGCTGAGCAAA +TTAACTTCACAACTGAAGTGGGCCACACTGGCCTTTGTGGTCCCCCACTCCTCTTAGAAT +TTGTGAGCGTGGGGCCTACTGGAGGGTGGGAGGTGGGAGGAGGGGGTGGATCAGGAAAAA +TAACTGATATTAGGCTCAATATATGGGTGATGCAATAATCTGTACAACAAACTCTCATGA +CACACATTTATATATGTAGCAAACCTGCACATCCTGCACATGTACCCCTGAACTGAAAAG +TTAAATAAAAAAAAAAAAAGGATCGGTGAGCTGAGCCAAACACCTGGGGATCTTTGTGCT +TTTGACACACTGATGACTATGCCTGTCCGTGGGGAGATGAACCCATAACTGCCCTGGGTT +GTGTGACCACGGAGGCCACTTTATGATGATGGGCAGTGTCTGGGGCCTTTTGGGCTCGGT +GCCTTAGGGTTTATACATGAATGCTGGACTCCCTGTGTGGTGGTGAACACCCCATGACTA +AGTGCATGTCAGCGTCAGCACTGGCCCACACTCCTGGGTTCGTGTTTTCACTTTTTCATT +CAGGAACTCGGGAGCTGGGGCCACTCCCTTGGCCCTTCAGGTTCTCCACCTGAGCAGTGG +GGATAATAAGGCAGACCCGGGGATGGCTCTGGTGAGGGTGGAGGAGTCACTGTACAGAGA +GAGTAGAGCGGGGGTGGATTTTATTGTTAGAAGTGGACACTGGTGATTGGGTTGTATAAA +TGGGAAATCTCTCCTGAGAAAACACACAGCCTCACCTGTACAGAAACACACACATTCACA +CCACACGATGCAGCCTCACACAAGACACCACCAATCCTCAAGCACCCAACTCAGCACCAC +CCAAAAGGGAGCACAGCTGCTTCCTCAAAATTTGGCCATAATTTTTCCCTGGGGAATTCA +GGTTTTAAAAAAACACTTCCCCTATACTTATTCCTATCACAATCCCAGGATCAGGGTGGC +TCTTCACATTGAAACCAGGCAAGGATGCCACACCTTTCTTGGCATCCAGATTGTTTTCTT +GGTAAGTGATTCCAGAATACTTACTAGATTCACGCCTCAGAGGGGCCACCTGCACCACCT +GCAATACAGAAACAAAGCTTTTTGAGGGGTATGTCATGTTGTGGATTGTTTGCACAAGGC +TGTGTTTCTCTCAATGAATATTGAAAACTTGATCAGAAAGTGTAGTCAACTTCAAGGCCT +CCAAAACAAGGGTAGGATACACACTGGAAAAGACATCAGCTTCTGGATGGCGGATCTCTC +AGGTCCACGTAGGTTGGCAAGTGCAAAATACTGAATCCAAGGAGAAGACATTGCTTCCAA +GGACAAGGACCCCAAGGATACAGTCTACAACCTGAAGCCATCATAGCTAAATGCCATTTT +GGATTACATATCAGTTGCTAAGAGTCACTTCTTCCTCCCCCTCAGAAAACTGCATTTAAT +ACCTGTCATGGACATTGTCATCTTTTCACATGTAAAGTCAGTTGAAAAAGAAAGACAACA +AGAAAGGAACATTTCTATTTCAGAGAAAGCAAGGCAACCTTACCTTCACGTTGACTGGCC +TCTCTCCATCTCCTCTGTCCTTGTGAACTAGAGACTCCTCAGAGGCTAGGAGGACACAGA +GCAACAGTTAGTCATAGATGCTTTTGTTCATGAGCTATTCAGGGAGCTCTGCTTAATGTG +GACAACAGGACAGTGTGTGTGGATGTGTTTCATTAAAAGCACAGCTTGAGCTCCTGCTAG +AAAATCCTCCCTCGTGGAAAGACAGGCAAGAACGAGGAGCTAAGGAGCAAGAAATAGAGT +CCCTGGCATTTTGCTGATGGCAACTTAAGGCAATGGGAATGAGTCAGTCTACAAATGGTA +CTGAAGCACATGCTATAGTTTGATGAGAGTCCCACTGCTCACACTGTGAGGTTTGAAACC +CAGCTAAATGGTTTTCTAAACCTGTAAAAACAATATTACTTGCAGGATTTATGTCCCAAG +ACTACTTTTACCTCTGAGGATCCACAGTGGCTGTCACTGCAGTTATTATGTGTTTTAGCA +TTTTGCACTTGAATAAAAGCAAAGTTTAATAGACAGATTGGATTCAATTCTAGGCAAAAC +AGTCTATGGTATTTATTCACTAATCCTTTGTTATAACTGCTGATGGGAGAATTAGAAGTA +CTGAAATTATATCCTTTAAAATTAATTAAAGCATAATTATTAATCACACAATATTTTTTC +ATCCAGGCCTCCTTTTCTTTGTCATGCATGCATATTAATTGAGGATGGAGAATATCTACG +CTTGTTCAGGCCAGCCAACATACGACAGTTTACTTCAAGACAGGAGACATGGGTTGAATG +CTGGTATGTTTTAACTCTGCAGCGCAAACAGTTGCAACAAGTGTGGTGAACTAATCACCA +GATGGCCCTTTGCTGCCTTATTTGTCATTGTGCCTTACATGTAGCTTGCAGGATTTGATT +ACGCTTATGTTTTGTGGTGATCATACTTTCAACTATTCCTAAAATACTGCTTCAGTCTTA +TCTGTTTGGGGTCAACTGCTGAGGATTTCATACAAATTAATGAAGTTTGTGAATCTAAAG +TTCTACACAAAGGGGGAAATATTTGCAAATCATTTATCTTGTAAGAGACTAAAATTTAGA +ATATTTTTAAATATATTTAAGATATATTCAAAAATCTACAGCAACAAACTAACTAAATAA +AAATCAGACAACTCTTTAAAAATGGGCAAAAGACTTCAACATATATTTCCCTAAAGAAGA +TATAGCCACAAATAGTAGCACAGGAAAAGCTGCTCAGGATCATTAGTCATTAGGGAAATG +CAAATGAAAAACACAAGCAGCCACCAATATACACCTACTAGGATGATTTAAAGGAAAATA +AGTGTGAACAAGGATGTAAAGAAATTGTAACCCTGATACATTGATGGTAGAAATGGATAA +AGTTGCAGCCACTGTGAAAAACAGTCTGCAGTGGCTCAGAAGGTTAAATATAGAACCCCT +GTTGGACCCAGGAACTCTACTCTTAGGCACCCCAAAGAATAGAGAACAGAAATCAAACAG +ATGTTTGTATACTAATGTTTGTAGCATCACTTTTCACAGGAGCCAAAAGGTGGAAATAAT +CCAACCATCAGTGAACAAATGAATGTAATAAAAGCAAGGTGGTCTGCATGCAATGCTACA +TCATCCATCTGTAAAAAACGAACATCATTTTGATAGATGATACAACATGGGTGGACATTG +AGAACATTATGCTTAGTGAAATAAGCCAGACACAAAAGGAATATATTGTATAATTGTAAT +TACATGAAGTGCCTAGAATAGTCAAATTCATACAAGAGAAAGTCGGATAGGAATCACCAT +GGGCTGGAAATAGGGGGAAGGCGCTATATTGCTTATTGTGGACAAGGTTTTGTAAGAAAT +CATCAAAATTGTGGGTGTAGATAGTGGTGTTGGTTATGCAACCCTGTGAATATATTGAAT +GCCACGGAGTGCACACTTTGGTTAAAAGGTTCAAATGATAAATAATGTGTTATATATATT +TCCCCACGATAGAAAACATGCACAGCCAAGCCCAGATGCCAGTCTTGTTAGCTGCCTTCC +TTTACCTTCAAGAGTGGGCTGAAGCTTGTCCAATCTTTCAAGGTTGCTGAAGACTGTATG +ATGGAAGTCATCTGCATTGGGAAAGAAATTAATGGAGAGAGGAGAAAACTTGAGAATCCA +CACTACTCACCCTGCAGGGCCAAGAACTCTGTCTCCCATGCATTGCTGACCCATCTCAGT +ATTTCCTGTGACCACCTCCTTTTTCAACTGAAGACTTTGCACCTGAAGGGGTTCCCAGGT +TTTTCACCTCGGCCCTTGTCAGGACTGATCCTCTCAACTACTGACCATTTCACCTCCATT +CATGTCCATGCCACATCAGGCTGTGTTGTCTAGATGGCATGAATCTACCCCAAATGTCCC +TTTCTGGAGGAAGCCACCATTATGCTGTACCTCCAAGCATAATGATACGTCCACACACAC +CAAGGCACCTCACTCATGCAAGGTGTGTGTCCTCTAACAAAGTTTCACGCTCTAAACCCA +GATAACTTTTGAAACCCAAGTTCTGTTGATTCCCCTACTTTGGGTGCTCCATAGATGCTC +ATTTGTCTACTAAACACTGCCCCAGGCAATTAAATATTCCAAAGTGACCAGCAGAATTTT +TATGTTAATTCTGACATTGCATTGTTAGTACAAGTGTTTTTCCCCCTTCAAATTTATGTC +TTTGTTACTGATAAATGTAACTGATAATGCTTTTTTCAGCTATGTTGCCAAGCATATTTA +TATAAAAATATACTCAGATTGTTTTCAGAATTTGACAAAGATGATAGCAACAACGATAAT +CTCATTTGTCTTATACTAATCTTTATGTGTTACTTTCATCATTTCTTACATATTGGGGCC +TACCATACATTGTACGGTGAAATTAGTGCTATGCATCATGGTAGGAATATAAATTGGCAA +AAGTAATTTAGAAAATAGTTCCCTTCTTTCTTAAAAAAATTAGGCTGGGTGTGGTGGCTC +ATGCCTATAATCCCAGCACTTTGGGAGGCAGAGGTGGGTGGATCACCTGAGGCTAGGAGT +TTGAGACTAGCCTGACCAACACAGCAAAATCCTGTCTCTACTGAAAATACAAAAATTATC +CAGGTGTGGTGGTGTGTGCCAGTTGTCCCAGCTACTCGGGAGGTTGAGGCACGAGAATTG +CTTAAACCTGGGAGATGGAGGTTCCAGTGAGCCGAGATTGTGCCACTGCACTCCAGCCTG +GGTCTCAGAAAAAAAAAAATTTTTTTTGACCAAAATGTCATTATGCATTACATGACTGTA +TATGAATGCTCAAAGCTACATTACTCATCAAAGAAAATAACAAAACAATTAAATGTCCAT +TAACTGATAAATGAATAAACACTATCTGTATGAATAAACACAGCAGACTATGAAGGAAAA +CACATGACCAGCACGTGCTAACACGTCAATTAACTTCAAACATAGTATGCTAAATGAGGG +AAGTCAGATTCCAAATATATATATATGTCCATTTCTATAAAGCAAGTGGGAAATTTATGG +AGATGGAATGTCACAGCAGTATTGCTTAGGGCTGGAGATGGGAGTGGGGATTAACTGCCA +GTGCGCAAGAGAGAACTTGGGTGAGGGAAACATATTTAAATTAGATCGTGGTGTTGGGTG +CACACAGTATCAATTTAATAAAGCATCAAATTCTAGACCTTTTCAGTGGGCAAACTTTAT +GGTGGGTTCACACCCAATATAGGTGTTAAAAATAATGTTACGGAAATTCTTGTCGGGTTT +TTAACAAGCCAAGAGATATGCTGTGAAAGCAGCATTAATTCAAGTGGTTGTCACAGGTCA +CTTAAAGTTAATCAGATAGTTGTCCTACAAATATAGGGTGAATGTTATTCATGAATTTCC +TGAATCTATTGCAATAATCACATTTTTTTCCATTAAACTCTTGAGGTAGCTAATTTTATT +TATTGCATTTTCAATGTTAATCTACTATTTCATATGTTGAGATTAACTCACATTAGTCAG +AATTTAAAGTATTTTAAAATATCACAGAATTTAATTTACCTTATCTGGTTTTGGTTTCAA +GACTATAGTAGCCATTTCATTTAATTGTACATGTAGGGTATTCTAATTTATGGAAAACTA +TTACATTTTCCTTGATTTTTTTTTTTTTAGAAATTACTTCTAGGGATCTATATGGTAGAG +TCCATGGAGAATTGTTTTAATTCTTCATTCATGTCTTCAGTGGGTATAGGATTGGTCATA +TTGGTCATAGTTTTCTGCTCGGATTTCAATAAGAAACTTGTGGAAGAACCTGAAGGGTGG +GATCTTTGAGGGAGCCTAAGACAGAGCAAGACAAGCTAAGAAGGAGGGCAGTGCCACAGC +AGAACTGCCATTGATGCCCCCTCGCCTAGATTGCGGAAGAGACATCCAGCTGTAGACACT +GAGGTGCAGGAAAACAATGGAGCACCATCAGAGAAAGCAGTGCCCAGGAACAAGGAGGCA +CTGATGGTGGCAAGGGGCAAAGACAGCTGCCACGAGGCTGTTCACATGAGGGTCTCAGGC +TGCATAGACACCCACACCAGCTGAGGGGTCCTGGTTTTCATAAAGTGTGTGGCTCAGCCA +GGCCACCAACAAGCAGTTCACAAACAGTAGTAATACGACACTTTCCAAAGACCTTACTTG +AGTAACACGGTGATCCTCACAAATTTCCAATCAGGATGGTCGCACAGTTCCTCCTGCTTT +AGGACTCAGAGCCTGCCCGTGGTCACAGTGGGTAGGTGCAGACTCTGAAGATGCACTTTG +GTCAGAGACCCTGCTGAACTCTGTCTAATGAGGACCTCTGTCCTGTCTGCTGACCACCGG +TCAGAGGTGCAGGCTGCAGTGGGGAGTAAGAATGCCACCTTCTCAATGTTGGGAAAACTC +CCTGCCAGAACTGAGAATGGCCCTTTCTAAGCAGAAGGCAAGCTCAGACTAAAGAAGGAG +GCCGAACACATCAGGTTGGCAGATTGCCAAAGATTCACTCAGGGAGAGCCCACATCCTGG +GCCATCTTGGGTGGTGGCAAGATGAGGTAGACGACTGCTTTTGCAACACACACCTGACAA +CAAAAAATCAACAACTGTAAAAGAGCCACAAAATCCCCAAATATTTGCAAATTAGCAATG +CACTTTTAAATAACTGTTGGGTTAAAGAAGAAGTCTCAATAGAAAATTAAAAGTACTTTT +AACTACATTAAAAGAAAATGTGACTTGGCAAGATTTCTGGATGTAGCAAAAGCAGTCCTT +AGAGGGAAATCTATAGCATTGGATGCAATATACTAAAAATCACAAGACCTAAAATCAGTA +ATATCATGTTTCAATTAGGGAACTATAGAAAATAGAGGAATGCAATGGAAAGCAAGTAAA +AGTAATAAACAACATCACAGAAATCAATAAAATTAAAACACTGAAATCATCAGAAAATCA +ATAAAACCAAAAGCTGGTTCTTTGATATGCTCATTACAATGAATGAATTGATATGCAGGC +TAACCAAGAAAAAGAAGATAACACAAATGACCAATTTCAGAAATAAAAGAGGAGCCATCT +CTACTGAACTGTTAGGCATTAAAAGGAATATCATGAACAGTTCTATGAGCGCAGTTTGAT +AACCTCAGTGAAATGTATCAATTCCTTGAAAGGCAATCTTCCCAAGGTCATGCTAGGATC +CTAATTTGAATAAACTTATGTCTATTAAATAAGTTGAATTCACATTAAGAGCATTCCGAA +AAAGAAAGCACCAGGCCCAGATGGTTTCTCTCATGAAATCTACCGAATTCTTCAACAGGT +GAATAAAAAGACAAAAATTCATTTAATGCAATATTATTTGGTGATTTAATGTGCCATTTT +TTGCCATTAAGGCATAAAAAAGACATGAAAGCAGCTAAAGCGTACATCAATTTAGTGCAA +TAAATTCATCTGAAAAAACTACATAATATATGATTCCAACTATATGACATTCTGGAAAAG +GCAAAGCTGAAGCGATAGTAAAAATATTAATAGTTGCCAAGGTTTCTGGAGAAAGAGGAC +AGAGATTAATGAGAAGAGAGGATTTTTAGGGAAGTGAACATTTTCTTTATGAGACCATAA +GGGTGAACATAATGTTTTAAATATTTCAAAATTCATATATATGTATAACAGAAAGAATGA +ACATTATGCAAATGCAGACTTCAGATAATAATGTGTCAATATTTTCTCATTATTCTAGCA +AATGTACCACAGTAATGTAAGATGTTACTAATAGGTGAAATTAGGAAGTGAGGGTGAGGA +GACAGAATAATATGGGAACTTCGTGTATTATATACTCAATTTTTATTTATTTATTTATTT +ATTTATTTATTTATTTATTTTGAGATGGAGTTTCACTCTTGTCACCCAGGCTGGAGTGCA +ATGGCATGATCTTGGCTCACTGCAACCTCTGCCTCCCGGGTTCAAGCGATTCTCCTGCCT +CAGCCTTCTGAGTAGCTGGGATTACAGGCGCCTGCCACCACACCCGGCTAATTTTTTTGG +ATTTTTAGTAGAGATGGGGTTTCACCATGTTGGCCAGGCTGGTCTCCAACTCCTGACCTC +AGATGATCCGCCTGCCTTGGCCTCCTAAAGTGCTGGGATTATAAGTGTGAGCCACCACAC +ACGGCCATATGCTCAGTTTTTATGTCAATTTAAAACTCTCTAAAGAAATATATTAATTGA +AAAATAATAATATAGCACCACTCTTTCAGGGAGATCTATGCTTATGTTTAACAACCAGGT +AAGTTCTAGACATTAGCTTGAAGCATTGTCTATCATTAAACATGAACCAAAATTGACTTT +TAAGTAGATATTTACTTTTGTGGTGGTAGCAATATTTACTGACCAGGCAAATTAGAATCC +TGACACATTAAAAAATATGGCTTAGTCTCTTCATAGTTTCCTCTTACATATGGGACACTG +AATACTCCCCGCAACTGCAATTCTTGAATCAACTTAATTAATGAACTTCCACAGTACCTT +CTTGTGGGTACCTCTTCTTCTTTACCCGGGAGCCATGAGGTCTCCTACACTGGTTGGTGT +GCACAGCATATCTTCTTGTATTCTCTATCAGAGAAGATGCTGGTTAATGCATTGACAATA +GATAGGGCTGTTGACATCTTGCTGACAGAAGACCAGAGGGAAAATAGTGATAATCTGTTC +TAAGTTTAAACTTATGATCCTTTTTTTACAGGCTTCCAAGCAGAGCCCACTGAATCAAAG +TTGGGTTTCAGGAAGATCACGGAGTTCAGTGAGCACTCAACACCTCTATCAGACAGACTG +CGTGGGCAGTGCCTTCCTGGAGAGGGGAACACAGCCGGATGACTGTGAGTGCAGGGCTGG +TGCAGAGTGGGGGCCCGGATTCAAATTCCACTAAGCCATGTGGACCTGGCAAGCTCATGT +CCTCCCTCTGCCCTCAGTTCTCTGCACTGTCATAATGTAATTTTAGCAATACTTTTTAGG +CCCTATTTAAGCCCTACTTCTTAGTATCACAGTACAGGGCTAAAAAATCACTAAATACAG +GAAAACCTTAGAGAGGACTGGTACTTCAGTAATGTTCTCTAAGTGTTTACTACATGCCAG +GAGGAATAAGCTGGACACTTAGCAGTGGCGGAATATGGAGGGGGAACTTGGATGGCTCCG +GGGCAGTGGAGCATGCTTTCCTGTTCGGCTTTTCCGTGGGCATGACGCCTTATGGTTTAT +GGAGAACATCAGCCCTGCAGGGGGTGCAGAGGAGGGGCTGTGGCTGAGATTTTACACTTG +AGGGTGCTGACATTCAGAGATGATAAGTGACGAGCAGAACCTCAACCCCGCTGAGTGAAG +GACCTGAGATGGGAAATGTATTTGGTTCCCTAGAGAGAGAGATTCCTGAAAAACTGCCAC +CTCTTCATCACGCCCTGTGCCACAGACCCAAGAGACCCCTCACTGTCTTTCTCCAGTCCT +CCTAGCCCAAGGTGTGTGGGTGGACAAAGGTGGTGCTCTGGAGGAAATGCCTGAGATAAG +GACAGGTCCTTAATGATAAAGAATTCTCCTTCCTCTTTCAGATCCTTGACCTCCCAGTAT +GACAGCTTAAAGGCTGTCATCTCTGTGGCCTGCCTCCCCTTGCCCTTCACCCTGCCAGCT +GCCTCTCAGTGACTGTCTCCTCCAGTGACTACACTGAGGGACCAGGGACTGCTTGCCTCC +CGAGGCTGCTCACACCTTCTGACACCGCAAAATGATTGTCAAAAATGGGTCTGCAAAGAG +TAACTTCCCTTCCACTGATCAAACCTGAATATGCAAGCTACTGTCAATTAACTGGAAAAG +TGGCCGTGTGGGCTGGTGCTTTGGTGATTTAATGAATTAAGTCTGCAAGCCCCACTGCCT +CCTTGACTATTGATCAGAGCTGCCTGCAATAAGGTCTGGCTAAGAATGGGCAGTGGCTGC +ACCAGCTCTGGGTAAAATTTGACCTAAAATGACCAATCTCATTCACTAACCTCACCATAG +TCTTATGGGTTCAATGGACCTGTCCAATCCTTTGCTCTGTTCTCTCCATCACCTTCCTGT +GTAATTTTCCTCCACCACGCACATAATAGAAACATGGCACAGGGGAGCTAATCGCCTCTT +TTATCCCCCACTTCAGGCTCACACATAAGTTTATAGTAAAAGCCTTTTCAAATGACTGCT +TTAACTGCTGCTACAGCATGTGTCATCAGTTGAATGGAATCTGTCACGTGACTTTAAGCA +ACCCTTTGCTGAGAGACAAGATTCAATACTAGGGACAGTATTCTAGTGTACTACATCATT +GATTTTATGTTATGAAGATCATCATTTATTGAAAATGTATAAATAATGAAGCCCAGCCTT +ACTCTTCAACGCTGTGTGTGTAAATCCACTGAGTGTGCTGACCCCCACGCTTGTACCCAC +CTGCTAACACAGAAAGGGTCCACTCAGAAGGGAGGCACAGCTCCAGCACTGAGGCTGTCC +ACACCAGCTTCACAAGAGGGTTGCCACAAGGACGACGGATACCCGGATAACAACCAAATG +GTAATTTGAGTACTTAATGGTCATGATCCCTAAAGTGTGTAGCTCAGAGGGCTTGTGGTG +ATAACTCCATCAAGACTCTAAAGCATCTCCCCAATTCTTACTGGACTTGATCCATGTCTT +GAGGAGACCCAGCTATGACACGCAGGCACCACGTTGTCCTACTTAGTGCCTCCCTTAGTG +TTTCAGAACCTGTGATTTGATCAGAAACATGGGCTTTCTATGTTGGTTTCACACTAAGGA +CTATGTGACACCTGCAGGAAGATGTCTACATAGCTACCTGGATTATGAGATCATGAGGCT +GTCTTATGTGAGGGATGGCGTTTGGGATCTCTGCAGGCGTGGGTAATTCCAGGCATAGAG +GGTGCTGGAACTCCCTTGCATGGTGAATAGTGATCTCTTCACTGGCTGATAAATAGAGGT +TGTAGTTCAGGCCTTCAACATTAGCACCGTATGAGGAAACATTTTGACTCTTCACTATGC +AGCAAGTGAACCAGAGCACATTTATTTATGTGGCTTAGTTTCTCCATCTGGCATGTGGGC +TCAATAAACAAGCTCACAACATATGGGCATGATGATGATGAGGTGTGAACTAATGTAAGT +AAAGTATGTGGTCTGATTTGTTAAATTAAGAAAAATGGCACTGAGAGTTGTGCTGGGTAA +ACACAACATTTTTTTCCTGGGGGAAACACAGATAGACACACATTCACAAGCAAATCATGC +AGACTTGCACACAGACCACCTCACCCCACCCCCGCCCTAATACACACATACCCACACACA +ACCTAATGTGAACATGTTCCCAGAAACTATACATAGATAAAAAGAGTATGTCACCAGGAA +AACCAGTTTCTTTTACTATACCCTACATCCTCATTCCCACCAGATGTCTTGGATCATGGA +GGCTCTCCGGACAAAAGCCAGCAGTTAAGCTCCAGATTTCCTGTAGAATCCTTTTCTAAC +AACCAGTGAGTGATTCCAGAATACGTACCATTGAATGTGCTCCCTGAAGTCACCTGTAAT +TAGAGAAGGAAAACACTCTGAGAATCAGGCTATGCTATGGATGGCTCACACAGGTCTTTT +GTTCACTTGGAAACTCTGGGTAACCAAGATTGGAAATAAGGTTCAAGTCAAAAGCCCCAA +CTCTAGAGTAGAGTTCCCTTAGGAAAGCACAGGAGCTTTTCTTGAAGAATGTTTCTGTCT +AGGTAATTTTTGAGTAGCAATTGCAGAATTCTTATCTAAAGTGGAAAGCTTGTTCCTGAA +GAAAACATCCCTTAACACCCAGTGTACTATCTGACACTGCCAATTTTGCACGTCCTCTGG +AATCAGGTGTCAGTTGGTAAAATACACCTCCTCCATCCCCAAGGAAATATTATCTAACAC +CTATAATGTAGTGGAGAATTTTCCCATAGCTGATATCAACTGAAAAATAAAGGATCCAAG +AAAACAACATTTACATCTTAGGCAAAGACAGGCTACTTTACCTTGGTAGTAGAGTAGGGC +TTCCTTTTCACACGCTTTTTGGAAGGCTTCTTCGAGTCACCTAGGGGATGTGGAGGGACA +CAGCATGGCTGTCAGTTCATTGGCAGTGCTACTCATGAATGACTCAGGGACTGGAACTTA +GGGGCGTGCCTGGTTAACAAGCATGGAATGAGCTTCTCCTGGACCATCTTCTTCACGGAC +CAAGGAAGGCAAAGAAAGAGCAGCAAGGAAATGAGAGTAGAGCCCTTGGCTTTCCAGGTA +ATGGCAAATGAAAGCAACGTGAAATAATCAACTCCAAATGAACAAATGCTAAAACACATG +CTACGATTCAACCACAGCATCCTGTCACTTCTTCAGACCCTTTAAAAGCCCAGCAGGACT +GCCACTACCTTCTTGACATCGACCAAGTCCCTTTCAACCTCCACAGACCCACATACACTG +CTACTGCATTTATCATGGAGGGTATAGGGTTCTGCCCTGTTTATGTGTGAATTTTTTAAA +AACTAGATTTAATACCATGCACCAGCATTAATTGTATTTATTTCTTTTCTTGGTTATGAA +AATAATCAGTCAGGCATAGTGGCTCACACCTGTAATCCCAGCAGTTTGGAAGGTGGAGGT +GGGTGGATCATTTGAGGTCAGGAGTTCGAGACCAGCCTGACCAACATGGTAAAACCCCAT +CATTGAAGATAAATGTTTTATATCCATGGTTAACAGATGAGATGACCATGAAATGAACAC +CAGTGTACTGGGTGGAGCAGCTTATCTATTCAGTCTTCGGCACTAAAACCTGTGAAACAA +TATCATCTTGCCTTATTTACTAACAAATACAAGTGCCTCTAAACTTAGACAGTTTCCAAG +TCATGGAACTGATGAGCACTTAGCTCCTGCAGAGAGCTCTGGATGATGGGTCGGGAGAAC +AAAGACACAATACATCAAAACAGCATTCACAAGTAAACAGGTTTTCAAAGCCCTCTACAT +GCAAATTTACACAATTATCCTTTTAATTTTTATCTTCATATATATGTACATAATCTACTT +GCTTCTGAGTATAAATCAAACTGTATGTTCTTAGTTAATAGTCTCTATAAATTCACTCTA +TTTATCTTTCTGAGTTGAAATACTGCATCTCATTGGATAACAAAAAAAAAAATTTGACTA +AGATTACACTGGAAAGGTGAGTAGGTTGGGTGATTGACTGTGATTGACAATTCCATGATT +CTGGATAACTTCCAAAGCATAAAAATAAATGTGTGTTTTCTTTCACACGTAGACAATACA +CATGCTTATTACTTTAAAAAATTAATATGTGCATGGAAGTGACTTACTACAAATATATTA +AAGTAAATACACATTTCACAAAAAAAGAAGAGAGGAAGGGAAAAACATGTTAAAAACAAA +GAGAGTTACATTTTATTGTGTGAAAAGCCTCCAACGGATCCTTACTACTGTGGCTTTGTT +CCAAAGTTTTGGAAAGTAATGATTTCATAGGTTCTTAATTGGGTTAAAAACAGCATTAAA +ATAGACTCTGCCATATTCTCCCCTGGGGAATAACTTAATCTGTGGGGTGGGGAATGGAAC +GTTGAAGGATGCAGGATGTAAAAGGAAATTATATATATATTATATATATTATATATTATA +TGATATATATAATATATATATTATTTATTATATATATTATATGTTATATATAATATATAT +AAATAATATATAATGTACATAATATATAATTATATAATATATATTATATATTATATATTA +TATACATAATATATAATTATATAATGTATAATATATATTATATATAATATAATATATATT +ATCTATAATATATATAATATATTATATATATTATATATTATATTTAATATATTATATATA +TTATATATATAATATATTATATATAATATATATAATATATTATATATAATATATTAAATA +CATTATATATATATTTTATATATATATATATTTTTTATATATATATATATAATATATATA +TATAATTTGGGAATTTGGGAATAAACTGAATCCCAATTCACACTGGGACTACACCAGCTG +CCACCATGCCTGGCTAATTTTTTGTATTTGTAGTAGAGACAGGGTTTCACTGTATTGGCC +AGGATGGTCTTGATCTCCTCACCTTGTGATCCTCTTGCCTTGGCCTCCCAAAGTGCTGGG +ATTATAGGCCTGAGTCAAGATACATATTTTTTAAATGAAGAAAAATTTGAAAGATACTCT +GCTTGGTACAATAATCAAATATATAAATTGAGGAATAAAACATAATCATGAAACATATTT +ATAACTGCATATGGAAAATACAGAGGCTAATTTTTTAAATAACATATTTTGAAAGCATTA +ACTAGTAATTTGAAAAGATCGCATTTGACAGGCCAGTATGAACATACCTTGAATGCAGCC +ACACAGGTTCCCCATAAGAAAAATCAAAATCAGGGAAAATGAAACCACAATGGTTCAATC +TGCTCTGACCTTTGAAAAACTCAGCACAGGTAGTGGCACTTACGACCAAGGGCAGGAGAT +CCCTAATCCCATCACCATGGCGATAGGGCATAAACATTCCAGGGTGAAGGCACAATCCAC +ACTGTGAGGTCCAACTGCTGCCATGCAGACAGGTGGGCTTTTACAAGTACAGGAAGGTCA +TCAAAGGCTCAGTGTTTTGTTTCAAAAACTGAATCCCAAGCCCACACATTATTATGCTGG +CTTCTTAAAATAAGTTGTGAGATGGGAAGTAGGGCACCCACAAATATATATATATATAAT +TATATATAATATAATATATAGTATATATATAATATATTTAATATATTATATATATATATA +ATTTGGGAATTTGGGAATAAACTGAATCCCAATTCACACTGGGACTACACCAGCTGCCAC +CATGCCTGGCTAATTGTTTGTATTTGTAGTAGAGACAGGGTTTCACTGTATTGGCCAGGA +TGGTCTTGATCTCCTCTCCTTGTGATCCTCTTGCCTTGGCCTCCCAAAGTGCTGGGATTA +CAGGCCTGAGTCAAGATACATATTTTTTAAATGAAGAAAAATTTCAAAGTTACTCTGCTT +GGTACAATAATCAAATATATAAATTGAGGAATAAAACATAATCATGAAACATATTTATAA +CTGCATATGGAAAATACAGAGGCTAATTTTTTAAATAACATATTTTGAAAGCATTAACTA +GTAATTTGAAAAGATCGCATTTGACAGGCCAGTATGAACATACCTTGAATGCAGCCACAC +AGGTTCCCCATAAGAAAAATCAGAATCAGGGAAAATGAAACCACAATGGTTCAATCTGCT +CTGACCTTTGAAAAACTCAGCACAGATACTGGCACTTAGGAGCAAGGGCAGGAGATCCCT +AATCCCATCACCATGGCGATAGGGCATAAACATTCCAGGGTGAAGGCACAATCCACACTG +TGAGGTCCAACTGCTGCCGTGCAGACAGGAGGGCTTTTACAAGTACAGGAAGGTCATCAA +AGGCTCAGTGTTTTCTTTCAAAAACTGAATCCCAAGCCCACACATTATTATGCTGGCTTC +TTAAAATAAGTTATGAGATGGGAAATAGGGCACCCACAAATATATATATATATAATTATA +TACAATATAATATATATAATATATATAACATATATATAATTTCCTTTTACATCCTGCATC +CTTATATTATATATAATATTATGTATAATATAATATGTATTATTATATATAATATAATAT +ATATGAATATATATAATTATATAATATAATATGTAATTCTATATAATTCTATATAATATA +ATATATATTATATATAAATATATATAATATACTATATTATATATAAGTATACATAATATA +TTATATATACGTATACATAATATAGTATATTATATAAACATATATATAATATATTATATA +TACGTATATATAATATATTATATATACGTATATATAATATATTATATTATATATACGTAT +ATATAATATTATATATACGTATATATAATATATTATATTATATATACGTATATATAATAT +TATATATATATTATATATAAGTATATATAATAATGTAATATATTATATATAAGTGTATAT +AATGTAATATATAATATAATATAATAATATATTATAATATATTATATATTATAACATAAT +ATAATATAATATACATTATAATATAATATATAATATTATATATAATATAATATATAATAT +AAAATAATATAATATATAATATAATATATAAAATAATAATTAATATTTTAAATTAATTAT +TATTAATTAATATTAATTAATATTAAAAATAATATAAAATATAATTACTATAATATAATC +TATAATTATTATATATAATATAATATACAATTATATATAATATTATATATAATAAAATAT +ATATAATTATGTGTATTTATTACATATAATATAATACATAAATTATATATAATTATTACA +TATAATATAATATATAATTATATATAATTATTACATATAATATAATATATAATTAGATAT +AATTATTACATATGATATAATATATAATTATATATAATTATTACATATGATATAATATAT +ATAATTATATATAATTATATAATTATATATACAAGATAATAATATATAATATATAATATA +TATTACATAATATTTTATATAATATATTATACTATATATATTATATAATATTTTATATAA +AATATTATATATTATATATAATATAATATTTTATATAAAATATTATATATTATATATAAT +ATAATATTTTATACAATATAATATATAATATATATTATATTATCATATGTTATACAATAA +TATGTTATAGTATAATATATATTATAATCTAATATATTATATATTATTATATATTATATT +ATATATAACATTATATAGTATTATATATTTTATAATAATAATATAATATATAATATTATA +TATTATATATAATATATAATATATTATGACATATTATATATAATATATAATATATTATGA +ATATATTATATATAATATATAATATATTGTGATATATATTATATATTATATAATATATTA +TGATATATATTATATATAATATAATATAATTATATGTAATATAATATTATATATCTTATA +TATCACATATTATATATTTTATATCATATACTATATATTATATCATACATTACATATAAT +GTATGTCACATATTATATGTAATATATGTCACATATATGTAATATATGTCATATATTATA +TATAATATATATCATATATTATATATGTCATTTATTCTGTAATATGTATCATACATATTA +CATAATATGTATTATGGTGTCTGGTGGTCCCAGGACACAGACTGATGTCACTCAGCCTCA +CATGACACTCTGTCCTCACCACACTTGGGGTCCTGGGAGTCGTAGCACCAGGCACCTATA +GAGACAGTGGGCAGGAGGCTGAGCTGAGAGACCAGTCATCTGGGTCCTCTCCAAGCCCCA +TCCCATCAACTGGGGTCCAAGAATGAAGTAGGCCAGGGGCTTAGGCCAGGGGAGGCCAGT +GAAGAACCTTCTCTCATGTCACCCCTACCCCAGTATTTTCTCTCCCCAGTCCCTCCCCAC +TCCCAGTGGCAAGACCTAAGGGGTGGTGGAGCATGGCAAGAGGGTAGATTGGAGACCAGA +CGGGAAGAGTCCTGGTGCTCACTCTCAGAGTCTGCACCACACCAGCTGCACCGCAAGGCG +AGCATGATGCCAGGGGCGTGCGTGGCTCAGTGGCAGCCGCAGCTAGGAAGGACTAGAAAG +TGGCTAGACCGCGGAGCCAAGATGCTGGCCTGGCTACTACATATTTATTCATTAGCTAGC +TGGGCTAGGGGCGTGGCTATTGAGGAGAGGATGGGAGGGGTCGGTGCCCAGGCAGGGCCA +CCAGGAAGAGCGGGAGCGCTCCAAACTCACCCAGGTGTGCTCTGGTCCCAGGGCTGCACT +GATGCCAGCAACAATCGCTGAGGTTAATCCTGCAGGGGAGGGGTGACCTATTATTATCCC +CACTTACAGTCCAGGAAACAGAGGCTGGGGACCAGAATGACACACCAAAGCCACCAGCAA +GAAAGGTCAAATAGTGAGGGCACCTTGAACCCCAGCGCCCATGACTTTCGGGGCAAGGAT +GGGAGGAAGGTGTAGGATGTCTCTTGCCCCGCTGTGGGCCTGGAACTGCCACCCCATCCC +AAGACCATAGCCTCCCTCCTGCCCAGGCCCCCCTGCGGATCCAGGCCAAGACGGAGCCCT +GCAGGAGGTCAGAGGAGGGGACTTCTTTGCACCCTGCGCCTCTTTCCAATCGCCAGAGGG +CAGGGCCCTACCCACATTGGGGATGGGAAGTCAAATGCAGAAGTTAGGTGAAGTCAGTTC +TTGGATACTCCTGTACTGTCACCCTGGTCTTATCTCATGCCTTAGCCCAAGCTGTGCACA +CAATGGGGTCCTAGGTCCCCCTCACCTCCCAGATTCCACCTTCCCAGGGATGGGACCCCC +TGGAACCCTCGGGGGCCTGGGCAGTGGCCTTGCTGGCTCTTGCCTTCCTAGGAGCTGAGC +AGGAGCTCCACTCTCAGCAGGGCAGTTCACTGCAGCCTCTGCTTCCTCAGCTCAAGCCAT +CCTCCCACCTCAGCCTCCTGAGTAGCTGGGAATGCAGACACACACCACCACGCCTGGCAA +ATTTTTGTATTTTTAGTAGAAACAGGGTTTCACCATGTTGCCCAGGCTGGTCTCGAATGC +CTGAGCTCAAGCGATCTACCCACCTCGGGCTCCCAAAGTGTGGGAGCCAAGATGGGAACC +CAAGCATACGGCCCCAATGCTGAGGCTCTGAACTACTGACCTGCCCTCAGCACTCAGCCT +TGGGATCATGAGTCACTGTGCAAGGGAGTTCCAACATCTGCATGTATGTCTGGAATGATC +TGAGCCTGCAGAGTTCCTACACACTGGCCACATTATAGGGTGGTGTCCGTGGTCACACAG +CTCAGGGCAGGTATTTATTAGTACATGAATAGCTTAGCTGTGTCATAGTCTTTATGTGAA +AGGCACAAAAAAAGGCACTTTGGCAGGCTGAAAGTGTGGGGATTATAGGTGTCAGCCACC +GTGCCTAGCCCACTGGATGACTTATGATATCATATGTGACATTGTGACATCATGTGAGTC +AGGGATGTACCCCGTTCTCAGCTGCTATATGCTATGTTACACTGACAGAATGGGAATGAA +GAATGTGTCCCACTCTCTCAGCTGTTGTATTATATCATACAAGGTGCAGTGACTAAGTGT +GTCAGCTGTGTCCTCATCCTACATAGCATATGAGAGTGTGTGACGGGAGATAGGATGCAG +ACCTGAGAAGCATTAAACACCTAGGCAATAAAGGTGCCAGCATCAGCTGAGAGCGCAGGT +AGACCTCAGTCACATTTGTTACTGTGTAACTAAAAATACAAAATTAGCTGGGTGAGGTGG +TGCACGCCTGTGGTCCCCACTGCTCAGGCACCTGAGGCAGGAGAATTGCTTGAACCCAGG +AGGCAGAGGTTGCAGTGAGCTGAGATTGCACCACTGCACTCCAGCCTTGGGGACAGAGTG +AGACTGTATCTCAAAAAAATAAAATAAAATAATCTAGGACAACCAAGAGAAGGACTCAGG +CTCACCTTACTCCATGTCACATGTGATATATAACACTTGAGGGGGACACACACTTGTCAC +TTTGTATCACTATGTCCTATATGATAGCCAATGATAACCCAAGAGGGGGAGGCAGCCTTG +TTCACACCATGTCACTTATGATATTATATGACATCAAGGAGCCCAGACCTCAGTTGCATT +ATGTCACTATGGCACATACAGTATCATGCAACAACTGACAGAAGGGTTGCAGGCCTGATT +CACAGTGTATGTGAGTCACAAATGTCACTTATGACACCATACAGCAGGTGAGAGGGGATA +GTCATTACTCACCTCGGGTCACATTGTCACACATGATACCATATGATAGCAGAGGGGGAT +GCAGAACTGAGTTGTATTATGTCGCTATGTAACCTGTGATAGATGATAGCTGATGCGGGT +GAAGACCAAAGTTACATTTTGTACCTACATTATATGAAATATACAACATGCACGAGGGAG +ATGCAGACACAAATCACACTGTGTGCTGTATCCCATATATGTGTTTAATTTCTGTGTTTA +AGAGAGCTGGGTTCATGAATGAACTTAGTCACTGCACCTTGTATGATATAATACAACAGC +TGAGAGAGTGGGACACATTCCTCATTCCCATTCTGTCAGTGATTGACCACACCTTGTGTT +AACTGTATGAGCCACTGTGTTTGGCCGTGTATAGCGCACGTGGCACATTGCTCACTGATA +CATATTTGCTCCGTACTGAAGTGAGGCAGATGCATGAGTGATTGAGAATAATGCTGCCTT +GGTGAAGCATTCCATTTAGAAGCATGAGGTTCAGGTACTACCATCTTCTGTGATACATAT +TTAAATGTTAAGTTTAACAGTTAGGTCTACCATGAGTGAATGAGCCTTGCATTATCTTTG +AGAATCGATGTGTTTAGCAGGGTGTGGCTCATCTAAGATAGCCTTCTTTAATGCATGTTC +ACTATCTGTGTTTATTAGAAATAGGTTTGTGAATGACTTAGCATAGTGTTTTCCCTATGA +AACCCTGTTTATTTATTTATTTATTTATTTTGAGATGGAGTTTTGCTCTTGCAACCCAGG +CTGGAGTGCAATGGTGTGGTCTCGGATCACTGCAACCTCTGCCTCCCAGGTTCAAGCGAT +TCTCCTGCCTCAGCCTCCTGAGTAGCTGGGATTACGGGCGTCTGCCACCACACCCAGCTA +ATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAGCTC +CTGACCTCAGGTGATCCTCCTGCCTCAGCCTCCCAAAGTGCTGGAATTACAGGCGTGAGC +CACCACACCCGGCCAAAACCCTGTTTTAAGAAGTGTGTGGTGCAGGTACTACATTCTTTG +TTGTTCCAAGTTCAAACCCTGTGTTTAACAGAGATAGGTGCAAGATTGCCTAGGTGTAGC +GTAGACTCTTGGAAATACTGTGGTTAGCATTCTGTGTTTTCTGTAGTATAGTTTCAGTGA +TGCCTGTTTAGATAGTTTATCAGATCTAGGTACTAGTGTTATGTCAGCGAAACACTGTTT +AGCTGCACGCGGTGCATGTAATACTTTGTTCATTGATATTTGTTTAAATCAATCAGTGTT +TACCAGAACTAGATATGTGAGTTACTGAGAATAGCCTAATCTCTGAAAATGTTTTCCAGC +CTATGGTGCATGTAATATACTCTTCTATGGTACCTCTTTCCGATGTTTTTATTGTGGCAA +AATATACATAACAGATTATACCATTTTTAAGTGTAGAATTTAGTGGTAGTAAGTATAGAG +AGAACCTTAAAAGCAGCAAGAGAAAAGTGACTTCTCATGTGCAAGGGAGCCTCTAGAAGA +TTATCAGTGGATATTTCAGCAGAAACCCTGCAGGCCAGAAGGTGGTGGGATGATACATTG +AAAGTACTGAAAGAGTAAAGCCTGCCAACTGAGAATACTATATTTGACAAAACTGTCCTT +CAAAAGTGAAGGAGAAATTAAGACATTCCCAGATAAATAAAAGCTGAAGGAGTTTATTAC +CACTAGACCTGACCAATAAGAAATAATAAAGGGGCCGGGCACAGTGGCTCATGTCTGTAA +TCCCAGCACTTTCGGGAGCCGAGGCAGGTGGATCACCTGAGGTCAGGTGTTCAAGACCAG +CTTGACCAACATGGCGAAACCCCATCTCTAATAAAAATACAAAAATTAGCTGGGTGTGGT +GGCACGCACCTGTAATCCTAGCTACTCAGGAGGCTGAGGCAGGAGAATCACTTGTACCCG +GGAGGTAGAGGTTGCAGTGAGCTGAGATCACACCACTGCACTCCAGTCTGGGCAACGGAG +CGAGACTTCATCTCAAAAAAAAAAAAAAGAAACAATAAAGGGAGTCCTTCAAGTTGATAT +GAAGGGATACTAGACAGTTGCTCAGCCTGATGAAAATATAAAGGTAAATACATAAAGATT +AAAACCTGTATTATTGTAATTTTTGCTAATAACTCAATTTTTAATACTTTAATGGAATTT +AAAGGACAAAAGCATAATAATTATAAATCTATGTTAATGGATACAAAGTATATAAAGTTT +AATTTGTGACATCAGTAACTTAATAGTGGGGCAAAGATGTAAAGAGTAGAGGTTTTGTAT +GTGATTGAAGTTATCTGTTTAAAATAATAAGATAACTTTAAGATGTTCCATGTAATTCCC +ATAGTAACCACAAAGAAAATACCTACAGAATATATGCAAAAGGAAATGAGAAGGAAATCA +GAGCATAGCACTACAAAAAATCAACTATAACACAAAGGAAGACAATAAGGGAGGAAAGAA +GGACAAAAAGGCTCTAAACATACAGAAAACAATGACCAAAATGGTGGTAGTAAGTCCTTC +CTAATCAATAATTTCATTAAATGTGCATAGGTTAAACTCCCAATAAAAAGACATAAATTG +GCTGATTGGATTTAAAAAAACGGGATTCAACTATATATGCTGTCTACAAGAGACTCATTT +GAAATCCAATACAAATGGGTTGAAAAATGAGAGAATGGAGAATATTCCGTGCAAAAAGTA +ACCAAAACAGATCAGGAATGGCTGTACAAGTTATGGCTGTACAAGTTGTTGGGGTTTATG +TTACTGAAGAATGAACAGAGATGAGTAAGTGGAGGTGTTATGTAAAGGCATACTGTACTC +AAAATCTGAAGACCTGCAGCAGATTTAAATTCCAGCTCTTATTATAACTTTTTAAAAGAT +TGTGAAAATATCAAAATATAGATGAATCAAGTTTTAATATACTGTATGATGGGTGGATGA +GGCTGTCCATTGTACCATTTGTTTGAATTCTCAGGCATGGTTTGGCAGTGCAAGAACTCT +GTAACGTTAACAAATTCAATAAAAAGTAAATATATGGAAAAAAAAGTATAGACAAAATAG +ATTTTAAGTAAAAAACTATTACAAGAGAGGGTTCTGGGAAGAAAGTGGAGTAGGAAGCAC +TGGGAATTCATCTCCCCACCTAGAAAATAATCACACTGGCAGAATCTGCCTGATATAACT +ATTTTGGAACTCTAGACTCTATCAAAGGAGGCTTGTAACCTCCAAATGAAGGCTTAAACT +ATAATTTTTACTTAATTTTGGTCAATTTCAGCTCTTAGCTCAGCAGTGGCTACCCAATCC +CCATGCCCCAGCTTCACGGCAAGAAGCTTTGCATGTGTTCCTGAAGCAGCTTGTACCAAG +CTTGTGGGAACAATCATGGGCAATAAGCACTCTGTCCTCCAAGTGTTAGCATCTGCGTTC +TGGTTGTTGATTGCTACTTTTGATTATGGAAGGGCAAACACAGAGGCCGGCAGCCATTAT +TGCTCACAACTCCCCACTCCACTGCTGCAAGCCCTTACCAGACTGAAGCAACTTCTAGGA +GATAGAAAAGGCCAGAACCCCATTTCCCTTCCCCTTCATTGTTCTCTTTTCCTTTTTTGG +GAGCCAAACATTAAAGACTAGGACACTCAAAAGCAATGGCATACCCAGAGGAAATTAAAG +TTACCACACATCCTTGGAGAGAGGAGTGTGTGCCCAGGGAAAGGAGCAGCTTCAGACCTG +AGAAGACCTCAAGCTTACAACTCAGGTTGATCCTCAGCATGGAGACAACCTACAACAATG +TAAAACATAACAAAACCCTAAAACAGCAAACCCTGAGGAAGAGGAGAGTCTCATCTCCAG +AGTTACTGCATTATTATATTCAAGTGTCCAGTTTTCAATACAAAACACAAGGCATACAAA +AAACAAGAAAATATGGTATTTCAAAGGAAAAACAACAACAGAAACTGTTCCAGAGAAAGA +CCAGATGGTAACCTACTACACAAATACTTTAAAACAACTTTCTTAAAGATGGTCAAAGAA +CTGAAGGAAGATGTGGAGAAAGTCAGTAAAATTATGTGTAAACAAAATGGGAATATCAAT +AAAGAGATAGAAAACCTAAAAAGAAAAAAAATAAATAAATTCTGGAACTAAAAGTGTAGT +GATTGAAATAAAAGTTCACCAGAGGGATTCAAAAGCAGATTTGAGCAACAGAAGAAAGAT +TCAGTGAATTTGAAGATGGGACACTTGAAATGATCAAGTCAGAGGAACAGAAATAAAGAC +GATTGTTGAATAGCAAGCAGACATTGTGGAAGTCCCAAAAGAAGAGAGGGAAAGGGGCAG +AGAGATCATTTGAAGAAATAATGGCTGAGGCTGAACACGGTGACTCACACCTGTAATCCC +AGCACTTTGGGAGGCCAAGGTGGGTGGATCACAAGGTTAGGAGTTCGAGACCAGCCTGGC +CAATATGGTGAAACCCCGTCTCTACTAAAAACACAAAATTAGCCAGGTGTGGTGGTGCAT +GCCTGTAATCCCAGCTACTTGGGAGGATGAGGCAAGAGAATCGCTTGAACCCAGGAGGCA +GATGTTGCAGTGAGCCGAGATTGTGCCATTGCCCTCCAGCCTGGGCAACAAGAGTGAAAC +TCCGTTTAAAAAAAAAAAAATTAGCTGGGCGTGGTGGCACGTACCTGTAGTCCCAGCTAC +TCAGGAGGCTGAGGCAGAAGAATCGCTTGAATCCAGGAGGCAGAGGTTGCAGTGAGCCAA +GATCACACCACTGTACTCTAGCCTGGGCAACAGAGCGAGACTCTGTCTCAAAAAAGAAAA +AAAAAAAGCTGAACACTTCCCAAATTTGATGAAAGACATGAAAATAAATATCCAGAAAAC +TCAATGGACTCCAAGTAGGATGAAAAAAAAAAGACTCATACTGAGACATTATAATTAGCC +AGTAGGGCCTCTTGAAAGCACCAAGAGAGAAGCAACTAGTCACATGCTAGGAATATATAA +TAGGATTATAAGTAGATTTCTCATCAGACACTTTGGAGAACAGAAGACAATGGGATGACA +TATGTAAAGGGCTAAAAGAAAAACAACCACTACCTCTCAACCAAGAATCCTATATCCAGC +AAAACTGTCCTTCAAAAGTGAGGAAGAAATTGGGAAATCCCCAAATAAACCAAAGTTGAG +AAGTTTGCTACCATTAGACCTGCCCTGCAAGAAATCTTAAAGAGAATCATGCAGGTTGAA +AAGAAAGAACACTAGATAGTAACTCAAAGCCATATGAAGAAATAAAGATGCCAGTAAAAG +TAAATATATGGGAAAATATTAAATCTAGTATTATCGTAACTTTGGTTTAAAACTCCATGT +TTTGCTTTCTACATAATTTAATAGACAAATGCATTAAAAACAATTATTAGTTTATGTTTA +TGGACACACAATGTACAAAAATGTAATTTTGTGACATTGATAACTGAAAGAGGAGTGGCA +AAACTGTGAGGAGAATTTTTGCATATTATTGAAATATAGCTGGTATGAATTCAAGTTAGA +GTGCTATAACTTTAGAATGTTAAGTGTAATCCCTATGGTAACCACAAATAAAACATTATA +TAACATAAAAAAGTAAATGAGAAGGGAATTAAAACACTTCGCTACAAAAAATCAACTAAA +TACAAATGAGATCATGCAGGAAATGGACAAAAATGCTGTAAGGCATATAGAAAATGTATA +GCAAAATGCCAGAAGTAAGTCCCCCCTTATCAGTAATTACTTTATTACTTTTTAAACCAT +TTTGTTGAGGAATGATTTACATAAAAACTGTACATATTTAACGTACACATCTTGATAAAT +TTACACCATAAAACCATTATCATCAAGCCTATAAACATATCCATCACCTTTTAAAATTTC +CTTCTGCCTCTTTATTATTATTATTGTATAAAAAAAATGTTTTTAATGGCCAGGTGCGGT +GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCTGAGGCAGGCGGATCACCTGAGATCA +GGAGTTGGAGAGCAGCCTGGCTAAGATGGCAAAACCCCATCTCTACTATAAATACAAAAA +TTAGCTGGGCGTGGTGGCGGGTACCTATAATCTGAGCAAAGTACTGGGAGGCTGAGGTGG +GAGAATCTCTTGAACCTGGGAGGTGGAGGTTGCAGTGAGCCGAGACAGCACCATTGCACT +CCAGCCTTAGCAACAAGAGTGAAAATCTGTCTAAAAAAAAATAATAATAATAAAAGTTTT +TAATTAAACAATTTAAGAATATAGTAACATTTTCTGTGGGTACTATGCTGTATAGCTCTC +CAGAACTTACTTATCTTGCATAACTGAAATTTGTACACTTTAACCATCAACTTCCCATTT +CCTTCTCTTCCCCAGCTCCCAGCAACCACCATTCTGTTCTCTTCTTCTGAGTTTGACGTC +TTTAGGTTCCACACATAAGTGAGATTGTACAACATTTCACTTTCTGTGTCTGGCTTATTT +TACTTAACATAATGCCCTCCAGTCCATCTATGCATAGAAATGCAACTGATTTTTGGATGT +TGACTCTGTATCTTGCTACTTTATTGAATTTATTACTTCTAACAGTCTTTTAGTGAAGTC +TTTACAGTTTTCTATACATAAAAATATGTCATCTATGGAGACCATTTTACTTCCATTCTT +ATTTCTTTACTTGCTTAATTGTTCTGGCTAGGACTTCCAGTCCTATTTTGAGGAGAAATG +GTGAGAGTAGGAATTCTTGTCTTGTTCTTCATCTTCGAGGAAAAACGTTCAGTCTTTCAC +TGTTGAGTATGTTACATGTGGTCTTCATTATGTTGAGGTACATTCCTTCTGCACCTAATT +TGCTCAGTTTGTTGTTTTTTTTAATCATGAAAGGATGTTGAATTTTATCAAGTGCTTTTT +AATAATAAAAATAAAGGATTTTTATCGTTTATTGTGTTGATGTGGTGTATCACATTTAGT +GACTCTTGTATGTTAAAGCATCCTTGCATGCCAGAGATAAATCCCACTTGATCCTGGTGA +ATAATTCTTTTTTTGTTTTTTATTTATTTATTATTTATTTATTTTTTTGAGATGGAGTCT +CGCTCTGTCATCCAGGCCTGAGTGCAGTGGCGCGATCTCGGCTCACTACAAGCTCCGCCT +CCCAAGTTCACGCCATTCTCCTGCCTCAGCCTCCCTAGTAGCTGGGACCACAGGCACCCG +CCAGCATGCTCAGCTAATTTTTTTTTTTTTTGGATTTTTGGTAGAGACTGGATTCATGGT +GAATAATTCTTTTAATGCACTGTTGAATTTGGTTTACTAGTATTTCATTGAGGATTTTTG +CATCTATGTTAACCAGGGATATTGACCTATAGTTTTCTTATTTTGTAGTGTTCTTATCTG +GCATTGATATCAGGATAATGCTGGCCTCTTAGAATGAGTTTCAACGTGTTCCCTTTTCTT +CAATTTTTTTGGAAGAGTTTGAGAAGGATTGTTATTAATTCTTTAAATGTTTGTTGAAAT +TGACCAGTAAAACCATTTGGTCCTGGGATTTTCTTTGTTGGGAGATTTTTCATTACTGGT +TTAATCTCTACTTATTTTTTCCTGTTTTTTTTTTTTTTTTAATTATACTTTAAGTTCTGG +GGTAGCCGTGCAGAAAGTGCAGGTTTATTACTTAGGTATACATGTGCCATGGTGGTTTGC +TGCACCCATCAACCCATCATTTACATTAGGTACTTCTCCTAATACTATCTCTCCCCTTGC +CCCCATCCCCCAACAGGCCTCAGTGTGTGATATTCCCTGCCCTGTGTCCAAGTGTTCTCA +TTGTTCAACTCCCACTTATGAGTGAGAACATGCGGAGTTTGGTTTTCCGTTCCTGTAGTT +TTCAGATGTGCTCTGGCCTCCAAGGACCATGAAGCCAGGCGGTGGTGGGGGGGTGTCCTC +TGTATAAAAAGTGCTGTCCCAGGAACTTCCTGACGGACACTTTGGGGCATGTGAGCGATT +CCTGGGGAGGGCACCTCGGCCTTCCTAAGGCTACCCCTGCAGCCAGCGCTGGCCATTCTC +ACCAGCAACCAACCAAAAAACATCAGAGTCCCTGTAAACCTGGTTGTGATGATAAAAACC +AAATGTTTTCTAATCTAAGACTTGTATGCAGAACACAGAAAACTGCAGTTAAGGAACACC +CTACAAAATTGACCAACAGTATTTTCCAAAAACATTTTTCATCACTTTAAATAATGTACA +AAATCTACAAAAATCATATTTACCAGGACACATCTGTTAAATAAAAGCATTGTTTCGTGT +TGGTATACATATATACAAGACTATGTATAACAGACTGTTTCCCCTCCCTGCAACCACAGA +ACCATCACACACAGGCACAGATACACGTCAGCTATGCCGCTTTCCACGAATGCATGGAAC +CCAGGACGCGAACCCACAGCTCGAGGTCTTATACCTTCACTACTGAGCTGCCGCCACTGC +AGCAGCAACACCTCTGCGGAGGTGTTGCTGAAATCACTGGTGTCCCTGCCCAAGGTGTCC +TGGTCCTGGTCAACTCTACTGATTGACCCTTCGTGGATACCTCAGGTCTAAAATCCTTTC +CTCCGAGCCAGAGCTCTTCCTGTTGTGCAAACTCAGCCCCGTCTGTACCTTCCTGCTTGG +CCGGTGATCGCAGTCCTTCTTCGCCAGCAAGTGTGGGCTTCCAAAGACAAGGCTGGGCCT +GGCCTGGGACTCCCTGAAGGCCGAGAAGGACAGGCCCTGCAGAGGCAGCCCCAGGTAGGG +GCTGAAGAGGCCGGTCTCCCTGCCCCCCCAGGAGACACCCTTTCCAAAGTGGAAGAGCTG +GGTGGACAGCAGCGGGGAGAAGCCCGCAAGGGGCAGGCTGGGCAGGCCCCGGGCGGGGCC +CTCGGACCCCTTGTCTCTCTCTCCCAATGCCGCCCCCTCCACGCCGCATCTCTACCTTTG +GAGCGCAGTGCCCATGGGCTGGGCAGCCGACTGTGGTGGGCAAAGTCACTCCAGGGCGGG +GCGCGGTTGGCCTGGGCTCCAGCATCCCTCTCAGCTCCCGGGCTGGGGGTCAGGAAGCTC +CGGTTCCTGCAGTCCACGTGAAAGGTCCCCCTGGCTCCTCCTGCCTCGGTGCCCGCTGTG +GCACAGCTGGGCCGCTGACCAAAAGCACTTTTGGCAGCAGGCTCAGCCCCTCGTGGGCTC +TGGTCTTTGTAAACGTAAACACTTCTCTTGCTCACTGAGAGACCTTGGCGGAGACACTGC +CGCTGGTCCTCCTGGGTGGCGCATGATCCCCTCCGCCGGATCAGTGGGGAGCCCCTTCCC +ACCCTGGCTGAGCAGGCACGCTGCGCCCTGTGCTCCTGCGCTCCCTGCGCCTCGATGCCC +TTTAGCCAAATGTGGGACCCCCCGGCCCTCTGGCTCCGTGTGCACATGCCAGGCAGTGGG +GCCGGCTCCTTCCTGCAAGGTACCTCTGGCCTGGCTGGGCCCCCTGTCCCGAGAGCGGTG +GGGCCCTCTGCCTGAACTTCTGAACTGCTCACCGACTCCTTGGCCTTTTCCACCAAAAAC +TTCCTAATCTCCAGTTCGATGCTATCGTCGCTGTCCACTGAACTGCTGTTGTCAGACAAG +GAGCCAGGGCTGGGAGCTGGGCCCTGGGACTCTCTGGGGAATAGATTCTCTTCGGAGGCG +GAGGCAGAAGCGGGTCTCCTCACCAGGAAGGCCGGGGCTTCGTCCTCTCGGCCCTGCTGC +CAAAGACACTGGGGGGTTTCTTCCTGGAGCCCTCGCAGCTGTCTCTCTTGGACTTAGGCA +GCTCTTCAGCACCTGCAGGTGCCTGTTTTTCCACTCTCTCAGGAGCCTGCCCAGCCGCTC +CTGGAAGTGCGTCTGGGAGGTGCTGAACCTGACCTTCTTCCTGCACACAGCCCTGGGCTC +CCTGGACCTCTTCTTGAGCTTTCACTTGGACCTTAACAAGTCCTTGATGGCTGTGTCCAG +GTCCTTGTCACTGTCCAGGGAACTGCTTTTGTCTTCGGAGCTCTTCTTCTTGTCTAGGTG +CCTTGCCTCGTCTGTCTTACCCTGGCCCTGTGACGTGCGAGTGTCACCGGGCACCCTAGC +GGCGCCCTCTCCTCCCGGGGCCTCGCTGGCTGTGCCCTGGATGGAAAGGTCCCACCCTAG +CGGCGCCCTCCCATCCCGGGGCCTCGCTGGCTGTGCCCTGGATGGAAAGGTCCCGCCCCT +CATGGCCGGGCCCGGCTCTCCCCTGGCTGCGGTCGGCATCCTGGCCACCCTCTTTCCCCA +CCACCCGCATGTTCTTGGGAGTGGATGGCCTCACTTGGCAGCCGCCTCTATGCTTCCTTT +TGCAGCCAGGAGTGGGTCCAGTGTTTTAGAGAGAGGGGCCTTGGGGCTGCCGGTCTGGCT +GCTGAGGCCAGGTGGTGAAAGTGGGCCCTGGGCAGCCTGAGGGCAGCTCTCACCTCTGGC +CAGCAAACTTCTAGACTGCACCTTGAGGGCCAAAAACGTCCAGATTTCCTGCTCAATGCT +GTCGTCGCTGTCCACGGAGCTACTGTCACCATCAGAGCGGGAAGGCACGTTGGGGGAGTA +GAAGAGTGGGCTTGCGGACAGGGACCCATCGCTGCCCTCCATAGGGCCGGCAGGATCGTC +TTGAAAATGTCCAGGACTGCTTCTACACACATCAGCTCAGCGGAGGTGTCTGCCTGGCAA +GAGGACCATTCCACAAACTTGCTCCTGGAAGCCGGGCTCGTTGGAGGTGGAGCTTTGGTT +TCCTTTGGGATCTTGGGGGAATGGTCAGCGTCCAGATCCCCTGGACCAGGGTCCGTGGTC +TTGGTGGGCACTGGCTTCTTCTTGCTGGGTGTTTTCCTGTGGGTCTCTGGCAAGGCACTT +TTTGTGGCGCTGCTTGTGCTGTGTGCGGGAGGGGCAGGTGCTCTTTCCTCTTGGAGCTGG +ACCCTCTGGGGCGGGTCCCCGTCGGCCTCCTTGTGTGTTTTCTGCACCTGGTACAGCTGG +ATGGCCTCCTCAATGCCGTCGTCGCTGCTGGAGTCGGACGCCTCGGGAGCCTGTACGGCG +CTCGTGACTCGCTTTCCCCTCCTTGCGGTGCTGGCGCTCCTTTTAATCCCACTTTTATTC +TGTACTGCTTCTGAAGGGCGGTGGGGGTTGCTGGCTTTGTGCTGCCCTCCTTCTCCTGCG +TGGTCGTGGTCGTGACCTTGGACCTGAGGCTTCTGGGCTGCACGTTTGTCTTTGCTAACC +GGGGGAGGTCTGCAGAAGGCGAACTCCTTCTGGACGCCCATCAGGCCCTGCCGGTGCACC +ACCTTTGTAGCCGGCTCTTGGTGGGATTTCGAGAGTGACTTCGCCGAATTTTCATGTGTG +TCTGGTTTCTTCTCCACTGACCCATCACATTTTTGGGTCTCATGCTGTCTTTTCTCATTC +AGAAACTGTTCTATTTCTGCCCTGATGCTCTGCTCAAAGGAGTCTGCTCTGCTCATGCTG +ACTGGGGAGGCAGAGCCCTGGTCCTTGCTGGATCCCACCTGGCTGCCAGGGCCACACCAC +CTGAGCCAGGTACAAGTTTTGGGGAACACAGGGCAGTTGGGCACTGCTGTGAGCCAGTTC +CCGCTTACATCTACTGCCTCCGCCCGCAGCCCTGGAAGGCTGTGCATGGCTGGGCCCCGC +TGGCCCCGGGCTGTGCCGCTCCACTCTTTACCTTCAGGTACTCCTGGATGGCCTCCTCAA +TGTCCCGGTCCACGGAATCGTCACTGTCTGAATCTAGCACCAATGGGCCAAAGTCTGCAG +TTTCCTCCTCCCCCACGGGGTCAAAGTCAGCAACAAGACCACAGGCAGCCAACGCAGGCA +GCTCCTTGTGCATGGTGGGCTTGGCAGCAGGCCTGGCGTCGTGGCATCCCTCTGCCCCCT +CTGCGCAGTGCGCTCATCGCTGGTGCCCCTAGCAGCCCTGTCGCTCTGCAGCGTGCTGAT +GAGCATCTGCACCCGGGTGCTCACCGACATGCTCTCCACGCCCTTGTCAGCCTCCGAGAA +GCACCCGGGGAACCTAAAGCTCCCTGGCGGGACAGAGGCCTCCCATTTGGGCTGGAGAGC +AACCACTGGAGGAGCATTCATGAGAAACATTCTGGCAGATGGGGAGCGACGCGCAGAGGG +GCGACACTTTATTTCTCTGCAGGCTTCACATCCTCCAAAGATTGGCAAGCAGTACCCGTG +AAATAACTTTAAACCTGCAAATGCTTCTTTGCAGGTTTAAAAGGATGACTATAAACTATG +ACGTCATGCCTAGATTCATTCTTGACCCAACCAACAAGCTCTTGACATTCTCTGAGTCCA +GGTTGACTGTGATGAAAGGCAGCTAGTGTTCCCAAATGGCCCAGGGATCAGGTCTTCATC +GCTCCACTCAGAGGGAAGCATCCTCTCTCTGCTTTTTAAATAGACTTTTGACTGGGGCTC +CAGCAGCGCGGGGCGCGCAGACCTGGAGTTGCATGGAGGCCAGAGCCACGACACCCGCCT +GGGGAACGGAGCAGCCCCAGGCGCTGATCCCCGTCCACCTGCCCCACGGAGCCCTCGCCG +CCCGCTTGCCACTGCCTGCATGGCCCTCCTGTCCCCGGCCCCCCAGCCCTCCTTTCCCCA +GCTCCCCCACCCTCCTGTCCCCGGCACCCCAGCTTCCCAGCCCCCGAAACCGCCCCCCCA +CCTCGACCCGGCCCATGCCGCAAGTCGCCCGCTGCGCGGACCCGGCCTCCGCCCGCCTCC +TGCGTCCTGGGGGAGGCGGCTGCCGGGGGTGGTGGGGGAGGGGGAGGGGGAAGAGGCCGC +CCTCCGCCCGGGTGCGGGGAGGGGGCGCAGGGGTGTCCGGCCAGGCCCCCCGCCTCCCCG +CCTCCCCGCAGCAGCTGCCCCGCTCCCGGGCCGCCTAATACTTTTACATTTTAACTTTTA +TACTACAGTGAAAAGTGATTTACACACCACCACTGCAATATTACAGTGTTATGAATGTGA +CTATATACTTACCTTTCCCTGTGAACTTTTTTTTTTGAGACAGAGTCTCGCTCTGTCGCC +CAGGCTGGAGGGCAGTGTCCATGATCTCGGCTCACTGCAAGCTCTGCCTCCCGGGTTCAA +GTCATTCTCCTGCCTCCGCCTCCCGAGTAGCTGGGACTACAGGCACCCGCCACCACGCCT +GGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGCCAGCATGATCCC +CCTCTTCTGACCTTGTGATCCACCCGCCTTGGCCTCCCAAAGTGCTGGGATTACAGGCGT +GAGCCACTGCGCCCGGCCTACCTGTGAACTTAATATCTTAATGTTTTAATGTTGCTAATC +AGAATCCTTTTATTTCAACTTGAAAAACTGCCTTATAAGGCAGGTGCAGTGGTGATGAAC +TCCCTTAGAATTTTTTTGGTGGGAGTCTGGGAAAGACCTTATCATCTCTTTTTCATTTCT +GAAGGACAGCTTTACAAGTTGTGGTCTTCCTGATTGGCAGTTTTTTTCTTCCAATACATT +GAATATAGCATCCTATTCTCTCCTGGCTTATAAGGTTTCTGCTGAGAAATCCACTGATAG +CCTTATTGAAGTTTCCTTGTATGTGATGAATTCCTTTCTTCTTGCTGCTTTTGAAAGTCT +CTGTCTTTGACTTTTGATATTTTAATTATAATACATCTTGGTATTATGGTCTTTGGGCTG +GCCTTTTTTGGGGCCTCTGAACTTCATGTGTCTGGAAGCCCACTTGCCTCTAAGAATTTG +GAAAGTTTTTACCCATTATGTCTTCAAATATACTTTCAGGCCTTTTCTATCTTTTTTACT +TCTAGGAAGTCCATAATATGTTTGACCCACTTCATGGTGGTGTCCTATAAATCCCAAAGG +TTTTTACTTATAAACTTTTTTTTCTTTCTGGTCTTCTGACGGGATATTTCAAATGTCCTG +TCTTTAATTTCACAGATTCTTTCTTCTGTTTGATCAAGTCTGCAATTGAAATTCTCTATT +GCATTTTCATTTCATTCATTTATTTATTTTTATATATTTTTGAGACAGAGTCTGTGTCAC +CCAGGCTTGAATGCAGTGGTGCCATCTTGGCTTACTCCAACTTCCACCTCCCGGTTCAAG +CGATTCTCCTGCCTCAGCCTCCCTAGTAGCTAGGATTACAGGCATATGCCACCATGCCTG +GCTAATTTTTGTATTTTTAATACAGATGGGGTTTTGGCATGTTGGCCAGGCTGGTCTTGA +ACTCTTGACCTCAAGTGATCCGCCTGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGT +CCGCCACGGCACCCAGCTTGCATTTTCATTTTATTCATTGTATTCTTCAGTTCTAGAATT +TCTGTTTGGTTCTTATTATTTCTGTATCTTTATTGAACTTTTAGCTTTGCTCATCTACTA +TTTTCTTGATATTATTGAGTTGATATATACATTCTAGTAAATTTCACTGAGCTATCTTAA +TTATTTTGAATTGTCAGGCAATTTGTAGATCTCTATTTTTGGGGGGTTGATTACTGGAGA +TTTATGAGTTTATTTTGGTAGTGTCATATTTGCTGATTCTTCATGATCTACAGACTTTCA +TTAATGTCTATGAAGAAGCAAATACCTCTTCTTTTTTTTTTTTTTTTTTTTTTTTTTTGA +GACAGAGTCTTGCTCTGTCACCCAGCTGGAGTGCAGTGGTGTGATCTCAGCTCACTGTAA +CCTCCACCTCCCAGGTTCAAATGATTCTCCTGCCTCAGCCTCCCAAGCAGCTGGGATCAC +AGGCATGTGCCACCACGCCTGGCTAATTTTTTTGTATTTTTTGTAGAGACAGAGTTTCAC +CGTGTTGTCCAGGCTGGTCTCAAACTCCTGGCCTCAAGTGGTCTGCCCGCCTTGGCCTCC +CAAAGTGCTGGGATTACAGGTGTGAGCCACCATGCCCAATCTCTTTCTGTCTTTATAGAT +TGGTTTCAGCAGGTACAAACCTTTTCCTGCTGGATCCCTTGACTGGATCACAGTCAAGTG +GGCCTGGAGCCATATTACATGGCTGCTGCCTGGTCTGCAGCTGAATCTCTGATTGGCAGG +CCGCTATCAAGGCATAGGTTGGTGATGCAGTTTCTGCTGGATCCTCAGGAGAACTGGACT +GCCTCTGATACCCTGATTGAACAGGACTGGAGCCAGGTCATGGGGCCACTTCTAGTTCTA +CAGTCAAGTCTTCAGATATCAGGCCTATTACCAAGGGCATGGACTGGTATAGCTCCCTGT +GGGTCCCAGATTGAGCTCCTGCTGGTTTACTAGGTAGGTCCATGGGAAGACAGGACTGCC +TCCAGACCACAGTAGAGCAGGGCTAGAGCCAAGTCACAGGACAGCTTTGGTGACCACATT +TGAGTTCAAGATTGGTGGTCCTCTTATTAGGAGAATGGATGGTATGTCTTTCACCAGGTC +CCAGGATGGGCTGGACTGTGCCCAGACTGTGGCAAAGCAAGACTGGAATGGAGTCACAGG +GCTACTTTAGTGTCCATAGCTGAGACTGAGATCAGCAGGCCTGTTACCAAGGGTATGTAA +AGGCATCACTGAATTCCTGGGCAGGCATGACTGACTGTGGTAGAGTGGGGCTGAAGCCAG +GTCAGGGCTGCTTTAGTTTCTGCAGTCAGGACCATGGTTAGAAGGCCTGTTACTGGGGGC +ATAAATGGTCATGGTTCCTCCTAGGTGCTTAGTGGATGGGGCTAGTTGCAAGACCATGAT +CTAGTGGAGCTGGACCCAAGTCCATAGGAGGACAAAGCTGCTTTCAGTCTGCAACTGGTA +ACCTGTCACTGGTGTGTGGACCTGCCTTCTCAAAGCAGCTCTCCTTGGTTTTGGGCTTTG +CTAGAGTTTTGCCACCTCCTGCCTGGATATTAAAACTCTTGCAAAGGCAGTTTTGTCCAT +GAATGGCTGCCAGATCATTGTTTGTGTGGGGAGAGGTGAGTGGAGGGCCTCCTGTTCTGC +CATCTTGCTGATGTCACCCTAAGATGATTATTTGAATTCTTTGTCAGGCAATTTGTAGAT +CTTCATGTCTTTGGAGTCAGCCACTGGAGTTTCATTTTGTTTCTTTGGTGGTGTCATATT +TTCTCATGCTTCCTGTTCTTTGAAGACTTAGATTGCTTTCTTCATGTTTGAAGAAGGAGT +CATCTTTTCCACTCTTTACTAACTTCAGGAGAGAAAGACCATCAATTAGCTAAGCTATAG +ATTCTGGGGGTCTCTCAGTCCTTTTCTGTGGGTGGTCCTTCCCTTTTAAGGGGGATGTCT +TAGGATTTTGTCCCTTGTCTTCATTTCACAAATGAATAAAACAACCAGACCAGACATAAG +TAAGGAAATACAGCACTTGAACAACACCTGAAAAAACAACTAGACCTAACAGACATACAC +AGGATATTCTACCCAACAACATAATACACATACTTCTCAAGTATACATGGGACATTTTCA +GGATAGACCATATAACACATCACAAATTAATTCTCAATAGGGGCTGGGTGCAGTGGCTCA +CATCTGTAATCCCAGAGTAATTTGGGAGGCTGAGGCGGGTGGATTGCTTGAAGCCAGGAG +CTTGACATCAGCCTGGCCAACATGGTGAAACCCCATCTCTACTAAAAATACAAAAATTAG +CTGGGCGTGGTGGTGCGTGCCTGTGATCCCAGCTTCTTGGGAGGCTGAAGCGTGAGAATT +GCTTAGGAGCCCAGGAGGTTGAAGCTGCAGTGAGCAGAGATTGTACCACTGTACTCCAGC +CTGTACTTCATGACAAAGAAAATGTACCATTGTACCACTGACAGAACGAGACCCTGTCCC +AAAAAAGGAAAAAAGCTCAGTAGATTTAAAACGATAGACATCATACAAAGTGTCTTCTCT +GACCACAACAGGATAAAGTTAGAAATCAATAACAGAAGATTTAAAAAAGTTCACAAATTA +GTAGAATTTAAACAACACACTCTCAAACAACCAATGGATCAAAGAAATCACAAAGAAATT +ATAAAATTCTTAAAGACAAATGAAAATGAAAGCACACTATATCCAAACTTATGGGCTGTG +GCCAGTTGTGGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGACTGAGGGAGGTGGAT +AGCTAGAGGTCAGGAGTTCAATTTCAGCCAGGCCAACATGGTGAAACCCTGTCTCTACTA +AAAACACAAAAATTAGCTGGGAGTGGTGGTACGTGCCTGTAGTCCCAGCTACCCAGGAGG +CTGAGGCATGAAAATTTCTTGAACCCAGGAGGCAGAGGTTGCACCACTGAGCTAACACCA +CTGCACTCCAGCCTGGGTGACAGAATGAGACTCTGTCTCAAAAAACAAAGAAACAACAAA +AAAACACAACTTATGAGTTGTGGTGAAAGGAGTGCTAAGGAGGAAATTTATAGCTATAAA +CACATTAAAAAAAGAAACACCTCAATTCAACAACATAAGTTTACACATTAAGAAACTAGA +AAAAGAAGAATAAAACTAAACCCAAAGTTAGCAGAAGGAAGGAAACAATAGAGATCAGGG +CAGAGATAAATGGAAAAGAGAATAGAAAAACAATAAAAAACAAAACCAAAAGTTGGTTCT +TCAAAAAGATTAATAAAACTGACAAGACTAAGGAAAAGGGAAACAATCTAAATTACTTAA +AACAGAAATGTATTTGAGAATATCTTTATATATTTCTGTCTGTCTGTCTGTCTGTCTGCC +TGTCTATGTTTTAGAGACAAGGTCTAGCTCTATTGCCCTGGCAACAATCAGATGCAACCA +GAATCAGTGGCACAATCGGCTCACTGCAGCCTTGAATTCCTGGGCTCGCCACTATGCCAG +CTCTTTTTTTTTTTTTTTTTTTTAAGAGACAGGATCTTGCCATGTTATCCAGGCTGATCT +TGAACTCCTGGCCTCAAGGAATTTTCCCACCTCGGCCTCCCAAATTGTTGGATTACAGGC +ATGACCCACCATTCCCAGCCTAGAAAGGATTATAAAAGATTACTATAAATAATTGTGTGC +TCATAAATTGGATAACCCAGATGAAATAGATGAATTCCTAGACACACAAAACCTACCAAG +ACTCAATTATAAAGAAACAGAAAGTCAGAATAGACCTAACCTAGTAAGGGAATTGAGTCA +GTAATAAGACAATCTCATGACAAAGAAAAGTCCTGGACCTGATAGCTTTACTGGTGAGTT +CTGCCAAACGTTTAAAGAAGAACTAACACTGATTCTTTTCAAGCATTTCCAAAGAGTGGA +AGAGGAGTGAATACCTCTTAACTCTTTCTATGAGGCCAGCATTACCCTGATACCAAAGCC +AAAAACACTGTAAGAAAATAAAACAACAGATCAATATGCCTCTGATCATTGATGCAAAAA +CTCTAAAAATGCTAGTATACTGGGTGGGGCACGGTGGCTCATGCCTGTAATCCCAGCACT +TTGGGAGGCTGAGGTGGGTGGATCACCTGAGGTCAGGAGTTCAAGACCAGCCTGACCAAC +ATAGAGAAACTCTTGTCTCTACTAAAAATACAAAATTAGCCATGTGTGGTGGTGCATGCC +TGTAACCTCAGCTACTCGGGAGGCTGAGACAAGAGAATCGCTTCAACCCGGGAGGTGGAG +GTTGTGGTGAGCCGAGATCATGCCATTGTACTTCAGCCTAGGCAACAAGAGCAAAACTTT +GTCTAAAAAAAAACTATCATACTGAATTCAGCATCATATTAAAAGGATTATACACCATGA +CCAAGTGGGATTTATTCCTGGAATGCAACGATGTTTCAATATATAAAAATTGATCCATAT +AAATTCAGACAATTAATTTTAACTGATGTATAGTTTTACATTATATGCACATACCACGAT +TCACTTTTCCATTTTCCTAGTAATGAGCTTTAAGATTATTTCCAATTTTTAGCTATTACC +AATAATGTTTCAACAAACACCTTTGGACATCTCTTCTGCAAATGTGTGTGTATTCTCTTG +GAGAAGGTTTACCCTGTGGAATTTCTAGTGTGTATGTGTATGTGTGTGGGTATATATATA +TACACATATGTATATATTTTCAACTTTACTATATATTTCCAAAAAACTCTTCAAAGTGGC +AGATTTAAACTCCCCTTCGCCGTGTATAAATTCACGGTTCCTAAAGCCCTTGGCATAGTT +TTGTGTTTTTATAGAAAAGGGCATAAAGTCTTATTGCTGTTGAATTGTACCTTGCTCTTT +TCAATTGTCTTTTCACAATAATTCACTATTTATCATCTGTTGTGAATATTGTGTAGTAGT +TCTTAATCATTTTTTCTACTATATTATTTATTTTATATCATGAATTTGTAGAAATCCTCT +TTATATAATGGATATTATACTTTTCAATTATATATTTTGTAAATATTTTACCCAATTTTC +TGGCTTGCATTTAACCTTTATTTAGGATGCCTAGGATTTTTCATTTTAATACAGTCTAAT +TTATTTTTCTTTTCTTTTCTTCTTCATGCATTCTGGCATTTGATTAATAAGCCATTTGTT +GCCCAACGTCATTAGTTCTCTGTATATACTTTTAAATATCTTAAACATGTAATATTCATA +AGTAATTTCACAAGTAGTTTAAGATTTGAGTCTTTAACCTAAATAGATACAGGATTTTGT +GATATGCTGTAATATACAGATTTTTATTTATATATGGATAAAAATTTATATGTTTTTTCT +GACATCATTTATTGAATAGTATAATTTTTCTACTGACTTTTAGTATCATATCTTTTATTA +CAAGTCCAATGTATTCAGCATATCTATGGATCTATTTCTTGGTGCTTCCATTTTTTCCCA +TTGTTTAACGGTGTATCACTGAGCCAAAGCAAACCATTTTAATTATTATAATTAAAACAC +ATTTAAATATTGATAGGCTATTATTGTTCAGAATAACTTTGGTTCTTCCTTTCCATTGAT +TATTTTAATATGACTTTTCCAATCAAGTTCTAAAAAAATACTGGTGGTATATTTATTGAA +ATAACTTTAATAAAATTAAATGAAAGATCATGTGTTTTCTGAATTAATTCTTAGATACGT +TAATGTTTTATGTTACCATGAATGTGATATTATAATATAATATTTTTAATTGGTTGCTAC +TGTTTATAAGAATTTCATTTTCTCTTTACTTTGCCTTCATATCTGAAAACCTTGCTGATT +TGATTAGTGCATCCACAAATTTTCTTGGATTTTCTATGGGTAATTACAAATCTCCACACA +ATGAGGTTGCAGTGAGCCAAGATCACACCACTGTACTCCAGCCTGGGCGACAGAGTGAGA +CACCATCTCACAAAAACACATAAACAAACAAACAGAAACTCCACACAATGACAACGTATG +TGTTTTCTTTTTTTCTTCCTCTTTCTATAATATTTCTTTGTCCTATCTTAACTGAACTGG +CCAGAAACCCCAGGACAATGATAAATACGAGCAGTGTCAACAGACATCTCATTCCCTTTC +CTAGCTTTTATAAAAAATAACGATTATGCTTCAACATTACATATGGTGGTGTCGATGGTT +TTGTTATAGATAAGCTTATCAGGTTAAGAAATTTGTCTGCTTTTCCTAGTTTGGTATAAA +GATTTTAATATAAATGAATGTTGTATTTTACCATCTTATTTTTTTTCCTACATCTGCTAA +GGTAATCCTGTGTTTTCCCTTTTTCAATCTCCTAATGTGGTGAATGACATTAAAATACCT +TCTATTGTTAAAATATTCTTGCAACGCTGTATAGAACCAATGCCTTTATTCTGTATTGCT +GATGGATTTTTGAAAAATATGTAGGTGGACTTAGTTTTCTAAGGGGAATAGAATTTCTAG +TATATTTAAACTATTTTGCATGTATGTTCTGAAGGACATTGGTGTGTCATTTCTATACCA +TCTGGCTACGAGAGGAGCCGACTGAAAGTCACACTGCCGGAGGAGGGGAGAGGTGCTCTT +CCGTTTCTGGTGTCTGTAGCCATCTCCAGTGGTAGCTGCAGTGATAATAATGCTGCGGTG +CCGACAGTTCTGGAAGGAGCAACAACAGTGATTTCAGCAGCAGCAGTATTGCGGGATCCC +CACGATGGAGCAAGGGAAATAATTCTGGAAGCAATGACAATATCAGCTGTGGCTATAGCA +GCTGAGATGTGAGTTCTCACGGTGGCAGCTTCAAGGACAGTAGTGATGGTCCAATGGCGC +CCAGACCTAGAAATGCACATTTCCTCAGCACCGGCTCCAGATGCTGAGCTTGGACAGCTG +ACGCCTTGGATCATCTGCCACTGATCTCTGGTCAACATTTTTATCACCCAACACAAAAGA +AGCAGAGATTTATCAAGTTACTTAAACAACCTGACCCTTTCATCTTTTGCTACACATACT +CTTGTAATTGATCTCTCCATGAATTGTTTTCTGTTTAAAATATCTAGAATGTTTTCTGCT +TCCTGACTTGATTCTAATATTGTATAGGTACTAGAAATGGTTCTAGAAATAGATCTTTAT +AGATGAGAATCTGGAAGTGGTTTGCTGACCTGTTTCAGTCTGAATGAATTCCTGACCTTG +TTGTCGGGAGGAGACAGAAACCTGATCATCTGTAGTGTACGGTGGTATCATGATTACTTA +AATCATCAAATGTGGTTATTGGGAATGATGTGTTTTTTAAAGTGGTACATGAGAGGTAAA +ATTGCTATTGTAGTTGACTGTTGCAGTTATAATTTTGTCAACATGGTCTGTAAGAGTGCA +ATGGATTCGGCCCGGTGTGGTGGCTCACGCCTGTAATCCCAATACTTTGGGAGGCCAAGG +TGGGCCGATCACGAGGTCAGGAGATCGAGACCATCTTGGCTAACACAGTGAGACCCCATC +TCTACTAAAAATACAAAAAATTAGCCATGTGTGGCGGCACACGCCTGTAGTCCCAGCTAC +TCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCGGGAGGCAGAGCTTGCAGTGAGCCGA +GATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCCATCTCAAAAAAAAAAA +AAAAAAAAAAAAGAGTGCAATGGAAAGCTGGTAGAAAATGGACATTGTTTAAAGACCAAA +ACAACCACACTACTTTGCTAATCCCTATCAGCTAAAGCCTAGAGAATATATGAGGTATAG +ATTTACATGGTGTTTGACTAAGGCATGCAGAATATAATCTTGACATAGGCTAAGTTTATC +AACATAGATACATAGAGATTCTGAAGTTAAAGTCTTAGCCCAAGAAGTTAGAAGGGGTGC +TCAAGTTTGTTTGTCTGACAGAAACTGTGAATCCACACTGGCCTGCTTATTTTGAGGTTG +TGTTGCCAGAGCTTTCCTAGCATAATAAAGAGAGGTGCATACAAAGGAATAGGAATAGTA +GGAGGTGGGGGTAAAAATATCAGGTGTGATCCACATGCCAAGCCGACCCGCACTGTGTCC +CACAGGAAGCCCAGAAGATGTTTCTCTAAGAAATTAAGGATTCGTTTGTTGGGGAGGGCT +GCTGGCATGCTTGAAAAGTACTGTGATGGCTGAATTTTGTGGGCTTGGGGAGATTGCAGA +TTCTCTGATTTCAAGGGGAATGATGTGATCCTAGAGTTGCAAAGAACAAGTGACAGTGGA +GGCGCTTATGCTTTGTGACTGCACTAGAGACAAGGAAGACACAACTAGAATAATGGGGAG +CAGGAATGGAGCGGCCAACAGAATATGTGACTGTTAGGGATCTTTGATGAAGGCTGATTC +TCAGGGAGTGAACTACATCAGTGACCAACTATTTGTCTTTATATAACTGGGTAATGTGGA +TGGATTCTAATAAAGGGACTACTTACAGCACAGCAGGAAAGTCACAAAGAAACCAGACAG +AAGAGTGTAAGTAGTAAGGGGCCAAGCAGTCACCTGACTAGAGACAGTGCCAGCTTGCCA +AGAAGGCACCAGACAGAAGCTGTGATCTTCAGCAAAGGGACACAGTCTGCCTGTGCTGAC +CCTGCAGGGGCAGAGGTGGGGGATAAACACACTCTTCTCTCACCTATCTTCTGCCACCCC +CTCCATTAGCTGAACCCCAATAAAAGCATGAGGGTAAGGGAGATCTCTGAAGTATCCAAT +TCAGGTGAGCCTCCTAAGGAACAAAGCAGAATGCAGAAAAATTAAGAGTGGGTCTAGGGA +ATAAAATAGAGATATGCACCAGAGTATGATGATGTGTCTGGGAAAGAATATACAAATACT +TTTAAAATTACTAGACAATAAACCTGAGATGACACTAATACAGGTAAATCTCATTTAATG +GTAATATATTCCAAGAAATGCATCATTAGGTGATTCTGTGGCTGTGCAGACACCAAAGAG +TGTACTTTATACAAACCTAGACAGCGTAGCCTACTACATACCTAGGCTATATCGTACAGC +CTATTGCTCCTGGGCTACACACCTGTGCAGCATGTCACTGTGCTGAATACAGTAGGCCAA +TGGTCCCCAACACCCAAGCCATGGACCAGCACCGGTTCGTGTCCTGTTAGGAACTGGGCA +CAAAGCAGGAGATGAGTGGCTGGCCAGCCGAACATTACTGCCTGAGCTCCGCCTCCTGTC +AGATCAGCAGCAGCATTAGACCTTCACAGGAGCACAAACCCTATTGTGAACTGCCCATAG +AAGGGATCTAGGTTATGTTCTCCTTATGAGAATCTAACTAGGCTGGGTGCTGTGGCTCAC +GCCTGTAATCCCAACACTCTGGGAGGTCGAGGCAGGTGGATCACAAGGTCAAGACAATCC +TGGCCAACATGGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCTGGGTGTGGTGGCG +CATGCCTGTAGTCCCAGCTACTCGGGGGACTGAGACAGGAGAATCACTTGAACCCGGGAG +GCAGAGGTTGCAGTGAGCCGAGATTGTGCCACTGCACTCCAGCCTGGCAACAGTGGGAGA +CTCAGTCTAAAAAAGAAAAAAAAAGAAAAAAAGAATCTAACTAATGCCTGATGATCTGAG +GTGGAAGAGTTTCAGCCCAAAGCCATCCCCACTCCGTGGAAAAATTATCTTTCACAAAAC +CAGTCCCTGGTGTCAAAACTTTGGGGCCCACTGCTGTAGGCAGTTATATCACAGTAGTAA +TATCTAAACATGGAAAAGATACAGTAAAAACACAGTACATTGGGAGACCGAGGGGGGCGG +ATCACCTGAGGTTAGAAGTTCGAGACCAGCCTGACCAACATGGAGAAACTCCGTCTGTAC +TAAAAATACAAAATTAGCTGGGCGTGCTGGCGTGGGGCTGTAATCCCTGCTACTCGTGAG +GCTGAGGCAGGAGAATCACTTGAACCCCGGAGGCAGAGGTTGCAGTGAGCCAAGATCGTA +TCATTGAACTCCAGCCTGGGCAACAAAATAAAACTCCTTCTCAAAAAAAACAAAGAAAAA +AATATATATATATTACAAAATTTAAAGAGAGTGGTACACCAGTAAAGGACATTTAGCATG +AACAGAGGTTGCAGGACTGGCAGATGCTCTGATGAGTCAGTGAGTACCTGGTGAGTGAAT +GCGAAGGCCTAGGATATTACTGTACATAACTATAGACTTTATATGCACTGTACACTTAGG +CTACACTAAATGTATTTAAATTTTTTCTTTCTTTAACAAGTTCATCTTAGCTTATCATAA +CTTTATAAACTTTTAATTTTTTTAATTTTTTGATTCTTTGATAATAACACATCTTAAAAC +AAAAACACATTGAACAGCTGTACAGAAATACTTTATATCCTTATTTGATAAGCTTCATTT +ATTTTGATTTTTTATGTTTTAAACATTTTTGTTAAAAACTAAGATACAAACACACACACT +AGCTTAGTCCTGTAAGGGTCAGGATAATCAATATCACTGTCTTCCATCTCGAAATCTTGT +CCAAGTGAAAGGTCTTCAGGGTCTTCAGTGGCAATAACATACATGCAGCTGTCATTCTCT +ATGATAACAAGGCTTTCTTCTGGAAGAACTCGTGGAAGACCTTCCTGAAGCTGTTTCATA +GTTAATTTTTTTTAATGAGTAGTAGTACTACACTCTAAAATATGAATAAAATCTGTAGTA +TTATACATACTGTTGATCCTTGAACAGTGCAAGGGTTAGGGGACCAGCTCCTGTGCAGTT +GAAAATCCATATATAATTCTGGGCTATCCCCAAACTTAACTTAATATGATATATATCATA +TATAATATATATGATATATATCGTATATATTACATATGATATATATCATATATAATATAT +ACGATATATATCATACATAATATGTATGATATATATTATACACAATATGTATGATATATA +TTACATACAATAAGTATGATATATATTATATACAATATGTATGATATATATTATATACAA +TATATGATATATATTATATACTATATGATATAGATTATATACAATATATATGATAGAGAT +TATATACAATATATATGATACAGATTAAATACAATATATATGATACAGATTATATACAAT +ATATATGATACAGATTATATACAACATATATGATAGATTATATACAATATATATGATATA +GATTATATACAATATATATGATATAGATTATATACAATATATATGATATAGATTATATAC +AATATATATGATATAGATTATATACTACATATGATATACATGATATATCATATATATGAT +ATACATGGTATATCATATATGATATATACGATATATCGCATATATGATATAGATGATATA +TCGTATATATGACATAGATGATATATCATACCTGATATAGATGATAAATAATATATGATA +TAGATGATATATATCATATATGATATATCATATATTATATAATAAATGATATATATTATA +TATAATAAAAGATATATATTACATAATAAATGATATATATTATGTAAAATATATGATATA +TATTATATATTATATCTGATATATATTATATATTATATATTTTATCTGATATATATTATA +TATTATATCTGATATATTATATATAATTATATTATATTATATTATATATTATATATTTTA +TATTATATGATATATAATTATATTATATTATATTATATTATATATATTATATTATATATA +ATTATATTATATTATATATTATATATTATATTATATATAATTATATTATATGATATTATA +TATTATATATAATTTATGATATATATTATGTATTATATATAATTTATGATATATATAATA +TATTATTTATCATATATCATATATATTATGGTATATTATATATAATTGTATTATATATAA +TAATATATAATATACCATAATATATCATATATTATATAATATATAATATAATATAATATA +ATGTAATCATATATAATATATAATATATTATATTATAAAATATATTATATTATGATATAC +TATATATTACATGTTACATTATATAATATTTTGTATAATATATAATATATATTATCTATT +ATACTTTATTATATTACATATATAATTATACATTATTATATTTCATATATAATTATATAA +TTATACATAATTATATATATTATATTATATATAATGTTATGTATAACAATATATATTATA +CATAATATTATATATTATATTAAATATGATATTATATATAAGGATGCAGGATGTAAAAGG +AAATTATATATATGTTATATATTATATATATTATATTATACATAATATATATATATATTT +GGGGGTGCCCTATTTCCTATCTCATAACTTATTTTAAGAAGCACAGCATAATAATGTGTG +GACTTGGGATTCAGTTTTTGAAATGAAACACTGAGCCTTCGATGACCTTCCTGTACATGT +GAAAGCACACCTGTCTGCATGGCAGCAGTTGGACCTCACAGTGTGGATTGTGCCTTCACC +CTGGAATGTTTATGCCCTATCGCCATGGTGATGGGATTAGGGATCTGCTGCCCTTGGTCC +TAAGTGCCACTATCTGTGCTGAGTTTTTCAAAGGTCAGAGCAGATTGAACCTTTGTGGTT +TCATTTTCCCTGATTTTGATTTTTCTTATGGGGAACCTGTGTGGCTGCATTCAAGGTATG +TTCATACTGGCCTGTCAAATGCGATCTTTTCAAATTACTAGTTAATGCTTTCAAAATATG +TTATTTAAAAAATTAGCCTCTGTATTTTCCATATGCAGTTATAAATATGTTTCATGGTTA +GGTTTTATTCCTCAATTTATATATTTGATTATTGTACCAAGCAGAGTACCTTTGAAATTT +TTCTTCATTTAAAAAATATGTATCTTGGCTCAAGCCTGTAATCCCAGCACTTTGGGAGGC +CAAGGCAAGAGGATCACAAGGTGAGGAGATGAAGACCATCCTGGCCAATGTGGCGAAACC +CTATCTCTACTAAAAATACAAAAAATTAGCCAGGCATGGTGGCAGCTGGTGTAGTCCCAG +TGTGGTGTAGCCCCAGCTACCTGGGAGGCTGAAGCAGGACAATCGCTTCCACCCGTGAGG +CAGAGGTTGCAGTGAGCCAAGATGGCACCATTGCATTCCAGCCTGTGCAACAGAACAAGA +ATCTGTCTAAAAAAAATTATATATATATAATGTATATTATATGTACTATATATTACATAA +AACATATAATATATAATATATATAATATATATAAAATATAATATACATTATATATAAATA +ATATATATTATATGTCACGTTATGTAATATCTATGATACATATTACATAATAAAAGACAT +ATATAATATATGATATATATTACATATAATATATGATATATATTACATATAATATATGAT +ATATATTACATATAATATATGATATATATTACATATAATATATGATATATATTACATATA +ATATGTGACATATATTACATACAATATGTGACATATATTATATGTAATATATGATATATA +ATATATAATATATGGTATATAATATATAATATATGATATATAATATATGATATATAATAT +ATAATATATGATATATAATGTTATATTACATATAATTATATTATATTATATTATATATAT +CATAATATATTATATAATATATAATATATATCATAATATATTATATAATATATAATATAT +ATCATAATATATTATGTAATATATATAATATATCATTTTATATATTATATATAATATGTC +ATAATATATTATATAATATTATATATAATATATCATAATATATTATATATTATATTATTA +TAAAATATATAATACTATATAATGTTATATATAATATAATATTATATTAGATTATAATAT +ATAATATTATATTAGATTATAATATATAATATTAGATTATAATATATAATATATTAGATT +ATAATATACATTATATTATAATATATTATTGTATAATATGTTATAATATAATATATATTA +TATATTATATTATACATAATATTATATTATATATAATATATATAATATTTTATACAACAT +ATTATATATAATATCTTATATAATATATTATACAATATATTATATTGTATAATATATTAT +ATATATTATGTATTATTATCTTGTATATATAATTATATATAATTATATAATTATATATAT +TATATTATAGGTAATAATTATATATAATTATATATTATATTATATGTAATAATTATATAT +AAGTATATATTATATTATATGTAATAATTATATATAATTTATGTATTATATTATATGTAA +TAAATACACATAATTATATATATTTTATTATATATATTATGTATAATTGTATATTATATT +ATATATAATAATTATAGGTTATATTAGATATATTAATTGTATTTTATATTATTTTTAATA +TTAGTTAATATTAATTAATAATAATTAATTTAAATATTAATTATTATTTTATATATTATA +TTATATATTATGTTATATATTATATTATATATTATATTATATTATATATTATATTATATT +ATATATTATATTATATATTATATTATATATAATATAGATAATTATATATTATATATCATT +ATATATAATTACATATATTATATTATATTGCTATATATATTCATATATATTATATTATAT +ATAATAATACCTATTATATTATACATAATATTATATATAATATAAGGATGCAGGATGTAA +AAGGAAATTATATATATGTTATATATATTATATATATTATGTCATATATAATTATATATA +TATATTTTTTTGGGTGCCCTATTTCCCATCTCATAACTTATTTTAAGAAGCCAGCATAAT +AATGTGTGGGCTTGGGATTCAGTTTTTGAAACAAAACACTGAGCCTTTGATGACCTTCCT +GTAGTTGTAAAAGCCCTCACCTGTCTGCATGGCAGCAGTTGGACCTCACAGTGTGGATTG +TGCCTTCACCCTGGAATGTTTATGCCCTATCGCCATGGTGATGGGATTAGGGATCTCCTG +CCCTTGGTCCTAAGTGCCACTATCTGTGCTGAGTTTTTCAAACGTCAGAGCAGATTGAAC +CATTGTGGTTTCATTTTCCCTGATTTTGATTTTTCTTATGGGGAACCTGTGTGGCTGCAT +TCAAGGTATGTTCTTACTGGCCTGTCAAATGCGATCTTTTCAAATTACTAGTTAATACTT +TCAAAATATATTATTTAAAAAATTAGCCTCTGTATTTTCCATATGCAGTTATAAATATGT +TTCATGATTATGTTTTATTCCTCAATTTATATATTTGATTATTGTACCAAGCAGAGTATC +TTTGAAATTTTTCTTCATTTAAAAAATATGTGTCTTGACTCAGGCCTGTAATCCCAGCAC +TTTGGGAGCCCAAGGCAAGAGGATCACAAGGTGAGGAGATCAAGACCATCCTGGCCAATA +CAGTGAAACCCTGTCTCTACTACAAATAGAAAAAATTAGCCAGGCATGGTGGCAGCTGGT +GTAGTCCCAGTGTGAATTGGGATTCAGTTTATTCCCAAATTCCCAAATTTTATATATATA +TATATATAAAATATATTAAATATATTATATATATACTATATATTATATTATATATAATTA +TATATATATATATTTTTGGGTGCCCTATTTCCCATCTCATAACTTATTTTAAGAAGCCAG +CATAATAATGTGTGGGCTTGGGATTCAGTTTTTGAAACAAAACACTGAGCCTTTGATGAC +CTTCCTGTAGTTGTAAAAGCCCACCTGTCTGCATGGCAGCAGTTGGACCTCACAGTGTGG +ATTGTGCCTTCACCCTGGAATGTTTATGCCCTATCACCATGGTGATGGGATTAGGGATCT +CCTGCCCTTGGTCCTACGTGCCACTATCTGTGCTGAGTTTTTCAAAGGTCAGAGCAGATT +GAACCATTGTGGTTTCATTTTCCCTGATTTTGATTTTTCTTATGGGGAACCTGTGTGGCT +GCATTCAAGGTATGTTCATACTGGCCTGTCAAATGCGATCTTTTCAAATTACTAGTTAAT +GCTTTCAAAATATGTTATTTAAAAAATTAGCCTCTGTATTTTCCATATGCAGTTATAAAT +ATGTTTCATGATTATGTTTTATTCCTCAATTTATATATTTGATTACTGTACCAAGCAGAG +TATCTTTGAAATTTTTCTTCATTTAAAAAATATGTATCTTGACTCAGGCCTGTAATCCCA +GCACTTTGGGAGGCCAAGGCAAGAGGATCACAAGGTGAGGAGATCAAGACCATCCTGGCG +AATACAGTGAAACCCTGTCTCTACTACAAATACAATCAATTAGCCAGGCATGGTGGCAGC +TGGTGTAGTCCCAGTGTGAATTGGGAGTCCGTTTATTCCCAAATTCCCAAATTTTATATA +TATATATATATAATATATATAATATATAATATATTATATATATTTTATATAATATATAAT +ATATATAACATATATATTATATATAATATATACATTATATGTAATATATTATATATATTT +TATATTATATATAAAATATATATACTATATATAATATATATAGTATATATAATATATATT +ATATATAATATATATAGTATATATAATATATATTATATATAATATATATAGTATATATAA +TATATATTATATATAATATGTATAATATTATATATTATATATATTATATATATTATATAT +TATATATTATGTATATAATATATATTATTTATATATTATATATAATATATAATATATAAT +ATATAATATATAATTATATATTATATATATTATATATTATATAATATATAATAAATAATA +TATATTATATATATAATATATATGATATATAATTATATATTATATATTATATAATATATA +ATATATAATAAATAATTATATATCATATATATAATATATATAATATATATAAATAATATA +TATTATATACATAATATAATTATCATATATTATATATATGTCATATATAATATATGTCAT +AATATAATATATGTCATAATATATATAATATAATATATGTCATAATATATATATAATTTC +CTTTTACATCCTGCATCCTTCAACCTGCATCCTTCAACGTTCCATCCCCCACCCCACAGA +TTAAGTTATTCCCCTGGGGAGAATATGGCGAAGTCTATTTTAATGCTGTTTTTAACCCAA +TTAAGAACCTATGAAATCATTACTTTCCAAAACTTTGGAACAAAGCCACAGTAGTAAGGA +TCCGTTGGAGGCTTTTCACACAATAAAATGTAACTCTCTTTGTTTTTAACATGTTTTTCC +CTTCCTCTCTTCTTTTTTTGTGAAATGTGTATTTACTTTAATATATTTGTAGTAAGTCAC +TTCCATGCACATATTAATTTTTTAAAGTAATAAGCATGTGTATTGTCTACGTGTGAAAGA +AAACACACATTTATTTTTATGCTTTGGAAGTTATCCAGAATCATGGAATTGTCAATCACA +GTCAATCACCCAACCTACTCACCTTTCCAGTGTAATCTTAGTCAAATTTTTTTTTTTGTT +ATCCAATGAGATGCAGTATTTCAACTCAGAAAGATAAATAGAGTGAATTTATAGAGACTA +TTAACTAAGAACATACAGTTTGATTTATACTCAGAAGCAAGTAGATTATGTACATATATA +TGAAGATAAAAATTAAAAGGATAATTGTGTAAATTTGCATGTAGAGGGCTTTGAAAACCT +GTTTACTTGTGAATGCTGTTTTGATGTATTGTGTCTTTGTTCTCCCGACCCATCATCCAG +AGCTCTCTGCAGGAGCTAAGTGCTCATCAGTTCCATGACTTGGAAACTGTCTAAGTTTAG +AGGCACTTGTATTTGTTAGTAAATAAGGCAAGATGATATTGTTTCACAGGTTTTAGTGCC +GAAGACTGAATAGATAAGCTGCTCCACCCAGTACACTGGTGTTCATTTCATGGTCATCTC +ATCTGTTAACCATGGATATAAAACATTTATCTTCAATGATGGGGTTTTACCATGTTGGTC +AGGCTGGTCTCGAACTCCTGACCTCAAATGATCCACCCACCTCCACCTTCCAAACTGCTG +GGATTACAGGTGTGAGCCACTATGCCTGACTGATTATTTTCATAACCAAGAAAAGAAATA +AATACAATTAATGCTGGTGCATGGTATTAAATCTAGTTTTTAAAAAATTCACACATAAAC +AGGGCAGAACCCTATACCCTCCATGATAAATGCAGTAGCAGTGTATGTGGGTCTGTGGAG +GTTGAAAGGGACTTGGTCGATGTCAAGAAGGTAGTGGCAGTCCTGCTGGGCTTTTAAAGG +GTCTGAAGAAGTGACAGGATGCTGTGGTTGAATCGTAGCATGTGTTTTAGCATTTGTTCA +TTTGGAGTTGATTATTTCACGTTGCTTTCATTTGCCATTACCTGGAAAGCCAAGGGCTCT +ACTCTCATTTCCTTGCTGCTCTTTCTTTGCCTTCCTTGGTCCGTGAAGAAGATGGTCCAG +GAGAAGCTCATTCCATGCTTGTTAACCAGGCACGCCCCTAAGTTCCAGTCCCTGAGTCAT +TCATGAGTAGCACTGCCAATGAACTGACAGCCATGCTGTGTCCCTCCACATCCCCTAGGT +GACTCGAAGAAGCCTTCCAAAAAGCGTGTGAAAAGGAAGCCCTACTCTACCACCAAGGTA +AAGTAGCCTGTCTTTGCCTAAGATGTAAATGTTGTTTTCTTGGATCCTTTATTTTTCAGT +TGATATCAGCTATGGGAAAATTCTCCACTACATTATAGGTGTTAGATAATATTTCCTTGG +GGATGGAGGAGGTGTATTTTACCAACTGACACCTGATTCCAGAGGACGTGCAAAATTGGC +AGTGTCAGATAGTACACTGGGTGTTAAGGGATGTTTTCTTCAGGAACAAGCTTTCCACTT +TAGATAAGAATTCTGCAATTGCTACTCAAAAATTACCTAGACAGAAACATTCTTCAACAA +AAGCTCCTGTGCTTTCCTAAGGCAACTCTACTCTAGAGTTGGGGCTTTTGACTTGAACCT +TATTTCCAGTCTTGGTTACCCAGAGTTTCCAAGTGAACAAAAGACCTGTGTGAGCCATCC +ATAGCATAGCCTGATTCTCAGAGTGTTTTCCTTCTCTAATTACAGGTGACTTCAGGGAGC +ACATTCAATGGTACGTATTCTGGAATCACTCACTGGTTGTTAGAAAAGGATTCTACAGGA +AATCTGGAGCTTAACTGCTGGCTTTTGTCTGGAGAGCCTCCATGATCCAAGACATCTGGT +GGGAATGAGGATGTAGGGTATAGTAAAAGAAACTGGTTTTCCTGGTGACATACTCTTTTT +ATCTATGTATAGTTTCTGGGAACATGTTCACATTAGGTTGTGTGTGGGTATGTGTGTATT +AGGGCGGGGGTGGGGTGAGGTGGTCTGTGTGCAAGTCTGCATGATTTGCTTGTGAATGTG +TGTCTATGTGTGTTTCCCCTAGGAAAAAAATGTTGTGTTTACCCAGCACAACTCTCAGTG +CCATGTTTCTTAATTTAACAAATCAGACCACATACTTTACTTACATTAGTTCACACCTCA +TCATCATCATGCCCATATGTTGTGAGCTTGTTTATTGAGCCCACATGCCAGATGGAGAAA +CTAAGCCACATAAATAAATGTGCCCTGGTTCACTTGCTGCATAGTGAAGAGTCAAAATGT +TTCCTCATACGGTGCTAATGTTGAAGGCCTGAACTACAACCACTATTTATCAGCCAGTGA +AGAGATCACTATTCACCATGCAAGGGAGTTCCAGCACCCTCTATGCCTGGAATTACCCAC +GCCTGCAGAGATCCCAAACGCCATCCCTCACATAAGACAGCCTCATGATCTCATAATCCA +GGTAGCTATGTAGACATCTTCCTGCAGGTGTCACATAGTCCTTAGTGTGAAACCAACATA +GAAAGCCCATGTTTCTGATCAAATCACAGGTTCTGAAACACTAAGGGAGGCACTAAGTAG +GACAATGTGGTGCCTGCGTGTCATAGCTGGGTCTCCTCAAGACATGGATCAAGTCCAGTA +AGAATTGGGGAGATGCTTTAGAGTCTTGATGGAGTTATCACCACAAGCCCTCTGAGCTAC +ACACTTTAGGGATCATGACCATTAAGTACTCAAATTACCATTTGGTTGTTATCCGGGTAT +CCGTCGTCCTTGTGGCAACCCTCTTGTGAAGCTGGTGTGGACAGCCTCAGTGCTGGAGCT +GTGCCTGCCTTCTGAGTGGACCCTTTCTGTGTTAGCAGGTGGGTACAAGCGTGGGGGTCA +GCACACTCAGTGGATTTACACACACAGCGTTGAAGAGTAAGGCTGGGCTTCATTATTTAT +ACATTTTCAATAAATGATGATCTTCATAACATAAAATCAATGATGTAGTACACTAGAATA +CTGTCCCTAGTATTGAATCTTGTCTCTCAGCAAAGGGTTGCTTAAAGTCACGTGACAGAT +TCCATTCAACTGATGACACATGCTGTAGCAGCAGTTAAAGCAGTCATTTGAAAAGGCTTT +TACTATAAACTTACGTGTGAGCCTGAAGTGGGGGATAAAAGAGGCGATTAGCTCCCCTGT +GCCATGTTTCTCTTATGTGCGTGGTGGAGGAAAATTACACAGGAAGGTGATGGAGAGAAC +AGAGCAAAGGATTGGACAGGTCCATTGAACCCATAAGACTATGGTGAGGTTAGTGAATGA +GACTGGTCATTTTAGGTCAAATTTTACCCAGAGCTGGTGCAGCCACTGCCCATTCTTAGC +CAGACCTTATTGCAGGCAGCTCTGATCAATAGTCAAGGAGGCAGTGGGGGTTGCAGACTT +AATTCATTAAATCACCAAAGCACCAGCCCACACGGCCACTTTTCCAGTTAATTCACAGTA +GCTTGCATATTCAGGTTTGATCAGTGGAAGGGAAGTTACTCTTTGCAGACCCATCTTTTG +ACAATCATTTTGCAGTGTCGGAAGGTCTGAGCAGCCTCGGGAGGCAAGCAGTCCCTGGTC +CCTCAGTGTAGTCACTGGAGGAGACAGTCACTGAGAGGCAGCTGGCAGGGTGAAGGGAAA +GGGGAGGCAGGCCACAGAGATGACAGCCTTTAAGCTGTCATACTGGGAGGTCAAGGATCT +GAAAGAGGAAGGAGAATTCTTTATCATTAAGGACCTGTCCTTATCTCAGGCATTTCCTCC +AGAGCATCACCTTTGTCCACCCACACACCTTGGGCTAGGAGGACTGGAGAAAAACAGTGA +GGGGTCTCTTGGGTCTCTGGCACAGGGCGTGATGAAGAGGTGGCAGTTTTTCAGGAATCT +CTCTCTCTAGGGAACCAAATACATTTCCCATCTCAGGTCCTTCACTCAGCGGGGTTGAGG +TTCTGCTCGTCACTTATCATCTCTGAATGTCAGCACCCTCAAGTGTAAAATCTCAGTCAC +AGCCCCTCCTCTGCACCCCCTGCAGGGCTGATGTTCTCCATAAACCATAAGGCATCATGC +CCACGGAAAAGCCGAACAGGAAAGCATGCTCCACTGCCCCGGAGCCATCCAAGTTCCCCC +TCCATATTCCGCCACTGCTGAGTGTCCAGCTTATTCCTCCTGGCCTGTAGTAAACACTTA +GAGAACATTACTGAAGTACCAGTCCTCTCTAAGGTTTTCCTGTATTTAGTGATTTTTTAG +CCCTGTACTGTGATACTAAGAAGTAGGGCCTAAATAGGGCCTAAAAAGTATTGCTAAAAT +TACATTATGACAGTGCAGAGAACTGAGGGCAGAGGGAGGACATGAGCTTGCCAGGTCCAC +ATGGCTTAGTGGAATTTGAATCCGGGCCCCTACTCTGCACCAGCCCTGCACTCACAGTCA +TCCTGCTGTATTCTCCTCTCCAGGAAGGCACTGCCCACGCAGTCTGTCTGATAGAGGTGT +TGAGTGCTCACTGAACTCCGTGATCTTCCTGAAACCCAACTTTGATTCAGTGGGCTCTGC +TTGGAAGCCTGTAAAGAAAAGGATCATAAGTTTAAACTTAGAACAGATTATCACTATTTT +CCCTCTGGTCCTCTGTCAGCAAGATGTCAACAGCCCTATCTATTGTAAATGCATTAACCA +GCATCTTCTCTGATAGAGAATATAAGAAGATATGCTGTGCACACCAACCAGTGTAGGAGA +CCTCATGGCTCCCGGGTAAAGAAGAAGAGGTACCCACAAGAAGGTACTGTGGAAGTTCAT +TAATTAAGTTGATTCAAGAATTGCAGTTGCGGGGAGTATTCAGTGTCCCATATGTAAGAG +GAAACTATGAAGAGACTAAGCCATATTTTTTAATGTGTCAGGATTCTAATTTGCCTGGTC +AGTAAATATTGCTACCACCACAAAAGTAAATATCTACTTAAAAGTCAATTTTGGTTCATG +TTTAATGATAGACAATGTTTCAAGCTAATGTCTAGAACTTACCTGGTTGTTAAACATAAG +CATAGATCTCCCTGAAAGAGTGGTGCTATATTATTATTTTTCAATTAATATATTTCTTTA +GAGAGTTTTAAATTGACATAAAAACTGAGCATATGGCCGTGTGTGGTGGCTCACACTTAT +AATCCCAGCACTTTAGGAGGCCAAGGCAGGCGGATCATCTGAGGTCAGGAGTTGGAGACC +AGCCTGGCCAACATGGTGAAACCCCATCTCTACTAAAAATCCAAAAAAATTAGCCGGGTG +TGGTGGCAGGCGCCTGTAATCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATCGCTTGAA +CCCGGGAGGCAGAGGTTGCAGTGAGCCAAGATCATGCCATTGCACTCCAGCCTGGGTGAC +AAGAGTGAAACTCCATCTCAAAATAAATAAATAAATAAATAAATAAATAAATAAATAAAT +AAAAATTGAGTATATAATACACGAAGTTCCCATATTATTCTGTCTCCTCACCCTCACTTC +CTAATTTCACCTATTAGTAACATCTTACATTACTGTGGTACATTTGCTAGAATAATGAGA +AAATATTGACACATTATTATCTGAAGTCTGCATTTGCATAATGTTCATTCTTTCTGTTAT +ACATATATATGAATTTTGAAATATTTAAAACATTATGTTCACCCTTATGGTCTCATAAAG +AAAATGTTCACTTCCCTAAAAATCCTCTCTTCTCATTAATCTCTGTCCTCTTTCTCCAGA +AACCTTGGCAACTATTAATATTTTTACTATCGCTTCAGCTTTGCCTTTTCCAGAATGTCA +TATAGTTGGAATCATATATTATGTAGTTTTTTCAGATGAATTTATTGCACTAAATTGATG +TACGCTTTAGCTGCTTTCATGTCTTTTTTATGCCTTAATGGCAAAAAATGGCACATTAAA +TCACCAAATAATATTGCATTAAATGAATTTTTGTCTTTTTATTCACCTGTTGAAGAATTC +GGTAGATTTCATGAGAGAAACCATCTGGGCCTGGTGCTTTCTTTTTCGGAATGCTCTTAA +TGTGAATTCAACTTATTTAATAGACATAAGTTTATTCAAATTAGGATCCTAGCATGACCT +TGGGAAGATTGCCTTTCAAGGAATTGATACATTTCACTGAGGTTATCAAACTGCGCTCAT +AGAACTGTTCATGATATTCCTTTTAATGCCTAACAGTTCAGTAGAGATGGCTCCTCTTTT +ATTTCTGAAATTGGTCATTTGTGTTATCTTCTTTTTCTTGGTTAGCCTGCATATCAATTC +ATTCATTGTAATGAGCATATCAAAGAACCAGCTTTTGGTTTTATTGATTTTCTGATGATT +TCAGTGTTTTAATTTTATTGATTTCTGTGATGTTGTTTATTACTTTTACTTGCTTTCCAT +TGCATTCCTCTATTTTCTATAGTTCCCTAATTGAAACATGATATTACTGATTTTAGGTCT +TGTGATTTTTAGTATATTGCATCCAATGCTATAGATTTCCCTCTAAGGACTGCTTTTGCT +ACATCCAGAAATCTTGCCAAGTCACATTTTCTTTTAATGTAGTTAAAAGTACTTTTAATT +TTCTATTGAGACTTCTTCTTTAACCCAACAGTTATTTAAAAGTGCATTGCTAATTTGCAA +ATATTTGGGGATTTTGTGGCTCTTTTACAGTTGTTGATTTTTTGTTGTCAGGTGTGTGTT +GCAAAAGCAGTCGTCTACCTCATCTTGCCACCACCCAAGATGGCCCAGGATGTGGGCTCT +CCCTGAGTGAATCTTTGGCAATCTGCCAACCTGATGTGTTCGGCCTCCTTCTTTAGTCTG +AGCTTGCCTTCTGCTTAGAAAGGGCCATTCTCAGTTCTGGCAGGGAGTTTTCCCAACATT +GAGAAGGTGGCATTCTTACTCCCCACTGCAGCCTGCACCTCTGACCGGTGGTCAGCAGAC +AGGACAGAGGTCCTCATTAGACAGAGTTCAGCAGGGTCTCTGACCAAAGTGCATCTTCAG +AGTCTGCACCTACCCACTGTGACCACGGGCAGGCTCTGAGTCCTAAAGCAGGAGGAACTG +TGCGACCATCCTGATTGGAAATTTGTGAGGATCACCGTGTTACTCAAGTAAGGTCTTTGG +AAAGTGTCGTATTACTACTGTTTGTGAACTGCTTGTTGGTGGCCTGGCTGAGCCACACAC +TTTATGAAAACCAGGACCCCTCAGCTGGTGTGGGTGTCTATGCAGCCTGAGACCCTCATG +TGAACAGCCTCGTGGCAGCTGTCTTTGCCCCTTGCCACCATCAGTGCCTCCTTGTTCCTG +GGCACTGCTTTCTCTGATGGTGCTCCATTGTTTTCCTGCACCTCAGTGTCTACAGCTGGA +TGTCTCTTCCGCAATCTAGGCGAGGGGGCATCAATGGCAGTTCTGCTGTGGCACTGCCCT +CCTTCTTAGCTTGTCTTGCTCTGTCTTAGGCTCCCTCAAAGATCCCACCCTTCAGGTTCT +TCCACAAGTTTCTTATTGAAATCCGAGCAGAAAACTATGACCAATATGACCAATCCTATA +CCCACTGAAGACATGAATGAAGAATTAAAACAATTCTCCATGGACTCTACCATATAGATC +CCTAGAAGTAATTTCTAAAAAAAAAAAAAAATCAAGGAAGATGTAATAGTTTTCCATAAA +TTAGAATACCCTACATGTACAATTAAATGAAATGGCTAGTATAGTCTTGAAACCAAAACC +AGATAAGGTAAATTAAATTCTGTGATATTTTAAAATACTGTAAATTCTGACTAATGTGAG +TTAATCTCAATATATGAAATAGTAGATTAACATTGAAAATGCAATAAATAAAATTAGCTA +CCTCAAGAGTTTAATGGAAAAAAATGTGATTATTGCAATAGATTCAGGAAATTCATGAAT +AACATTCACCCTATATTTGTAGGACAACTATCTAACTTTAAGTGACCTGTGACAACCATT +TGAATTAATGCTGCTTTCACAGCATATCTCTTGGCTTGTTAAAAACCCGACAAGAATTTC +CGTAACATTAATTTATTTTTAACACCTATATTGGGTGTGAACCCACCATAAAGTTTGCCC +ACTGAAAAGGTCTACAATTTGATGCTTTATTAAATTGATACTGTGTGCACCCATCACCAC +GATCTAATTTAAATATGTTTCCCTCACCCAAGTTCTCTCTTGCGCACTGGCAGTTAATCC +CCACTCCCATCTCCAGCCCTAAGCAATACTGCTGTGACATTCCATCTCCATAAATTTCCC +ATTTGCTTAATAGAAATGGACATATATATATATTTGGAATCTGACTTCCTTCATTTAGCA +TACTATGTTTGAAGTTAATTGACGTGTTAGCACGTGCTGGTCATGTGTTTTCCTTCATAG +TCTGCTGTGTTTACTCATACAGATAGTGTTTATTCATTTATCAGTTAATGGACATTTAAT +TGTTTTGTTATTTTCTTTGATGAGTAATGTAGCTTTGAGCATTCATATACAGTCATGTAA +TGCATAATGACATTTTGGTCAAAAAAATTTTTTTTTTTCTGAGACCCAGGCTGGAGTGCA +GTGGCACAATCTCGGCTCACTGGAACCTCCACCTCCCAGGTTTAAGCAATTCTCGTGCCT +CAACCTCCCGAGTAGCTGGGACAACTGGCACACGCCACCATGCCTGGATAATTTTTGTAT +TTTCAGGAGAGACAGGATTTTGCTGTGTTGGTCAGGCTAGTCTCAAACTCCTAGCCTCAG +GTGATCCACCCATCTCTGCCTCCCAAAGTGCTGGGATTATAGGCATGAGCCACCACACCC +AGCCTAATTTTTTTAAGAAAGAAGGGAACTATTTTCTAAATTACTTTTGCCAATTTATAT +TTCTACCATGATGCATAGCACTAATTTCACCGTACAATGTATGGTAGGCCCCAATATGTA +AGAAATGATGAAAGTAACACATAAAGATTGGTATAAAACAAATAAGATTATCATTGTTGC +TATCATCTTTGTCAAATTCTGAAAACAATCTGAGTATATTTTTATATAAATATGCTTGGC +AACATAGCTGAAAAACGCATTATCAGTTACATTTATCAGTAACAAAGACATAAATTTGAA +GGGGGAAAAACACTTGTACTAACAACGCAATGTCAGAATTAACATAAAAATTCTGCTGGT +CACTTTGGAATATTTAATTGCCTGGGGCAGTGTTTAGTAGACAAATGAGCATCTATGGAG +CACCCAAAGTAGGGGAATCAACAGAACTTGGGTTTCAAAAGTTATCTGGGTTTAGAGCGT +GAAACTTTGTTAGAGGACACACACCTTGCATGAGCGAGGTGCCTTGGTGTGTGTGGACGT +ACCATTATGCTTGGAGGTACAGCATAATGGTGGCTTCCTCCAGAAAGGGACATTTGGGGT +GGATTCATTCCATCTAGACAACACAGCCTGATGTGGCATGGACATGAATGGAGGTGAAAT +GGTCAGTAGTTGAGAGGATCAGTCCTGACAAGGGCCGAGGTGAAAAACCTGGGAACCCCT +TCAGGTGCAAAGTCTTCAGTTGAAAAAGGAGGTGGTCACAGGAAATACTGAGACAGGACA +GCAAAGCATGGGAGACAGAGTTCTTGGCCCTGCAGGGTGAGTACTGTGGATTCTCAAATT +TTCTCCTCTCTCCATTAATTTCTTTCCCAATGCAGATGACTTCCATCATACAGTCTTCAG +CAATCTTGAAAGATTGGACAAGCTTCAGCCCACTCTTGAAGGTAAAGGAAGGCAGCTAAC +AAGACTGGCATCTGGGCTTGGCTGTGCGTGTTTTCTATCGTGGGGAAATATATATAACAC +AATATTTATCATTTGAACCTTTTAACCAAAGTGTGCACTCCATGGCATTCAATATATTCA +CAGGGTTGCATAACCAGCACCACTATCTACACCCACAATTTTGATGATTTCTTACGAAAC +CTTGTCCACAATAAGCAATATAGCACCTTCCCCCTATTTCCAGCCCATGGTGATTCCTAT +CCCACTTTCTCTTGTATGAATTTGACTATTCTAGGCACTTCATGTAATTACAATTATACA +ATATATTCCTTTTGTGTCTGGCTTATTTCACTAAGCATAATGTTCTCAATGTCCACCCAT +GTTGTAGCATCTATCAAAATGATGTTCGTTTTTTACAGATGGATGATGTAGCATTGCATG +CAGACCACTTTGCTTTTATTACATTCATTTGTTCACTGATGGTTGGATTATTTCCACCTT +TTGGCTCCTGTGAAAAGTGATGCTACAAACATTAGTATACAAACATCTGTTTGATTTCCG +TTCTCTATTCTTTGGGGTGCCTAAGAGTAGAGTTCCTGGGTCCAACAGGGGTTCTATATT +TAACCTTCTGAGCCACTGCAGACTGTTTTTCACAGTGGCTGCAACTTTATCCATTTCTAC +CATCAATGTATCAGGGTTACAATTTCTTTACGTCCTTCTTCACACTTATTTTCCTTTAAA +TCATCCTAGTAGGTGTATATTGGTGGCTGCTTGTGTTTTTCATTTGCATTTCCCTAATGA +CTAATGATCCTGAGCAGCTTTTCCTGTGCTACTATCTGTGGCTGTATCTTCTTTAGGGAA +ATATATGTTGAAGTCTTTTGCCCATTTTTAAAGAGTTGTCTGATTTTTATTTAGTTAGTT +TGTTGTTGTAGATTTTTGAATATATCTTAAATATATTTAAAATATATTCTAAATTTTAGT +CTCTTACAAGATAAATGATTTGCAAATATTTCCACCTTTGTGTAGAACTTTAGATTCACA +AACTTCATTAATTTGTATGAAATCCTCGGCAGTTGACCCCAAACAGATAAGACTGAAGCA +GTATTTTAGGAATAGTTGAAAGTATGATCACCACAAAACATAAGCGTAATCAAATCCTGC +AAGCTACATGTAAGGCACAATGACAAATAAGGCAGCAAAGGGCCATCTGGTGTTTAGTTC +ACCACACTTGTTGCAACTGTTTGCGCTGCAGAGTTAAAACACACCAGCATTCAACCCATG +TCTCCTCTCTTGAAGTAAACTGTCGTATGTTGGCTGGCCTGAACAAGCGTAGATATTCTC +CATCCTCAATTAATATGCATGCATGACAAAGAAAAGGAGGCCTGGATGAAAAAATATTGT +GTGATTAATAATTATGCTTTAATTAATTTTAAAAGATATAATTTCAGTACTTCTAATTCT +CCCATCAGCAGTTATAACAAAGGATTAGTGAATAAATACCATAGACTGTTTTGCCTAGAA +TCGAATCCAATCTGTCTATTAAACTTTGCTTTTATTCAAGTGCAAAATGCTAAAACACAT +AATAACTGCAGTGACAGCCACTGTGGATCCTCAGAGGTAAAAGTAGTCTTGGGACATAAA +TCCTGCAAGCTAATATCGTTTTTACAGGGTTTAGAAAACCATTTAGCTGGGTTTCAAACC +TCACAGTGTGAGCAGTGGGACTCTCATCAAACTATAGCATGTGCTTCAGTACCATTTGTA +GACTGACTCATTCCCATTGCCTTAAGTTGCCATCAGCAAAATGCCAGGGACTCTATTTCT +TGCTCCTTAGCTCCTCGTTCTTGCCTGTCTTTCCACCAGGGAAGATTTTCTAGCAGGAGC +TCAAGCTGTGCTTTTAATGAAACACATCCACACACACTGTCCTGTTGTCCACATTGAGCA +GAGCTCCCTGAATAACTCATGAACAAAAGCATCTATGACTAACTGTTGCTCTGTGTCCTC +TTAGCCTCTGAGGAGTCTCTAGTTCACAAGGACAGAGGAGATGGAGAGAGGCCAGTCAAC +GTGAGGGTAAGGTTGCCTTGCTTTCTCTGAAATAGAAATGTTCCTTTCTTGGTGTCTTTC +TTTTTCAACTGACTTTACATGTGAAAAAATGACAATGTCCATGACAGGTATTAAATGCAG +TTTTCTGAGGGGGAGGAAGAAGTGACTCTTAGCAACTGATATGTAATCCAAAATGGCATT +TAGCTATGACGGCTTCAGGTTGTGGACTGTATCCTTGGGGTCCTTGTCCTTGGAAGCAAT +GTCTTCTCCTTGGATTCAGTATTTTGCACTTGCCAACCTACGTGGACCTGAGAGATACAC +CGTCCAGAAGCTGATGCCTTTTCCAGTGTGTATCCTACCCTTGTTTTGGAGGCCTTGAAG +TTGACTACACTTTCTGATCAAGTTTTCAATATTCATTGAGAGAAACACAGCCTTGTGCAA +ACAATCCACAACGTGACATACCCCTCAAAAAGCTTTGTTTCTGTATTGCAGGTGGTGCAG +GTGGCCCCTCTGAGGCATGAATCTAGTAAGTATTCTGGAATCACTTGCCAAGAAAACAAT +CTGGATGCCAAGAAAGGTGTGGCATCCTTGCCTGGTTTCAATGTGAAGAGCCACCCTGAT +CCTGGGATTGTGATAGGAATAAGTATAGGGGAAGTGTTTTTTTAAAACCTGAATTCCCCA +GGGAAAAATTATGGCCAAATTTTGAGGAAGCAGCTGTGCTCCCTTTTGGGTGGTGCTGAG +TTGGGTGCTTGAGGATTGGTGGTGTCTTGTGTGAGGCTGCATCGTGTGGTGTGAATGTGT +GTGTTTCTGTACAGGTGAGGCTGTGTGTTTTCTCAGGAGAGATTTCCCACTTATACAACC +CAATCACCAGTGTCCACTTCTAACAATAAAATCCACCCCCGCTCTACTCTCTCTGTACAG +TGACTCCTCCACCCTCACCAGAGCCATCCCCGGGTCTGCCTTATTATCCCCACTGCTCAG +GTGGAGAAACTGAAGGGCCAAGGGAGTGGCCCCAGCTCCCGAGTTCCTGAATGAAAAAGT +GAAAACACGAACCCAGGAGTGTGGGCCAGTGCTGACGCTGACATGGCACTTAGTCATGGG +GTGTTCACCACCACACAGGGAGTCCAACATTCATGTATAAGCCCTAAAGCACCGAGCCCA +AAAGGCCCCAGACACTGCCCATCATCATAAAGTGGGCTCCGTGGTCACACAACCCAAGGC +AGTTATAGGCTCATCTCCCCACAGACAGGCATAGTCATCAGTGTGTCAAAAGCACAAAGA +TCCCCAGGTGTTTGGCTCAGCTCACCAATCCTTTTTTTTTTTTTTTTTTAACTTTTAAGT +TCAGGGGTACATGGGCAGGATGTGCAGGTTTGCTACATATATAAATGTGTGTCATGAGAG +TTTGTTGTACAGATTATTGCATCACCCATATATTGAGCCTAATATCAGTTATTTTTCCTG +ATCCTCCCCCTCCTCCCACCTCCCACCCTCCAGTAGGCCCCACGCTCACAAATTCTAAGA +GGAGTGGGGGACCACAAAGGCCAGTGTGGCCCACTTCAGTTGTGAAGTTAATTTGCTCAG +CAACTGGCCAAAGTCTATAAGGATGGGTGATGTATTTTAGTAGATTTAGTAATACTATCT +TCCCAAGCCCTAAAATGCTCAAATCCTGCCAGCCAAAAATGGTGAGGAGGGACAGATAGG +AACTCTGTGTGGCACTTGGTTATTAGCCTGGCTTCCATCCCTTAGTGGCAACTCTCTTGT +ATATGTGGGTTAAAGACCCTCAGCCTCAAGCCAAGCCTCCTCCATGAGGAGCCATCTCAC +TATTGACCGGCTAGTGCCGGGTATGGCCACCAGCCCAACTGAAACAAAATGTTGCTTTAA +AACACGTGTAAATCTCATACATACAACAGGCAAATGCAGAAGCAGTGTGGTCTCGCAAGT +TGTAAAGAGGACAGTCGCAATTTTGCTGGACTTCAACCTGGGTAGAAGACATGAGGGAAC +TCTGTCACTGAATCACGGCAGAGTTCAAGGCCACTTGCAGACTATTTCATGTTATAGAAG +GTGGCCTTTAGCTACTAAGCAAAGGCCTCTGTTTCTCATTTCTTTCCTGTTCATCTTCTT +CGTCATCCTTCTTCCGCAAGGGAAACGAGCCCAAGCAAAAGACAGTTTCAATATTAATTT +GACCGAGGTTTTGTGCAGTTAATTATCATCCAGGTAATCAGGTGCAACCCAGTCTGCCTA +GCAGCCCCCCTATCTCTGCTCTGTGTTTTCATTTAATAAACATTTTGGTCTACTTACTAT +GTGCTAGATTTTCTCGAGACCAAGTAAATGAGATAGAATCTTTATGTTGGCAGCTAAGTT +AGATTTAACATAACTGACAAAAATTAAAATTTCTGATTTCTTGTAAAAATATTTTGTATG +TGTGAATGCATACTGAATGTAAAGTGGATAAAAAAAATCACACTTGCACTCATGGAAGGC +TTTTCATGAATTTGTCAATTTCTATTTTTTTATATTTCCCCATTTCACCGGATAATACAT +ACCTGAACCTGGAAAATGATTCCCACAGCAGAAAGTGTTCTGAGCCACATCCCTTAGCTT +CACTAGTGCAGGTCCACCTGGGAGTATGTCCCACCATCAGCTTGACCCATGCTGTAATCA +GCCACCTCCATGCATCACACCAAGCAAGCCCCTGGGTGATTCACAGTCTCCAGCACCAGG +GCACTGACCTTAACTCTGTGTTCTTCTAGCTCCCTATGAGGACACTGTACACGACATCGC +TAATGAGGACGCTGTATATGACATTGCTACTGAGGACGCCATATATGACATCGCTAACGA +GGACGCCGTCCAGGGCATCGCAAAGGAGGACGCCGCCCAGGGCATCGCTAACGAGGAAGC +CGTCCACGACATCGCTAACGAGGATGCCGCCCACGTAATCGCTAACGAGGACGCCGCCCA +GAGCATCGCTAACGAGGACGCGGTCCACGACATCGCTAACGAGGATGCCACCCAGGGCAT +CGTTAATGAGGACGCCGCCCACAATATCGCTAATGAGGACGCCGCCCACGGCGTCGCTAA +CGAGGTCGCCGCCCAGGGCGTCGCTAACGAGGACGCCGTCCACTGCATCGCTAACGAGGT +CGCCGCCCAGGGCGTCCTTAAAGAGGACGCCGCCCAGGACATCGCAAACGAGGACGCCGC +CCAGGGCATCGCTAACGAGGACGCCGCCCACGGCATCGCAAACGAGGACGCAGCCCAGGA +CATCGCAAACGAGGACGCCGCCCAGGACATCGCAAACGAGGACGCCGCCCAGGGCATCTC +TAAGGAGGACGCCGTCCAGGGCATCGCGAACGAGGATGCCGCCCAGGGCATCGCTAAGGA +GGACCCCGTCCAGGGCGTCGCTAACGAGGACCCCGTCCAGGGCGTCAATAACGAGGACGC +CGTCCAGGGCATCGCTAAGGAGGACCCCGTCCAGGGCGTCGCTAACGAGGACCCCGTCCA +GGGCGTCGCTAACGAGGACGCCGTCCAGGGCATCGCTAACGAGGACGCCGTCCACGGCGT +CGCTAAAGAGGTCGCCGCCCACGGCGTCGCTAACGAGGATGCCGCCCACGCGATCGCTAA +GCCGGACGCCGCCCGCGGCATCGCTAACGAGGTTGCCGCCCAGGGCATCGCTACCGAGGA +CGTCGCCGACGGCATCGCTAACGAGGACGCCGCCCACGGCGTCGCTAACGAGGTCGCCGC +CCAGGGCGTCGCTAACGAGGACGCCGTCGAGGGCGTCGCTAACGAGGACGCCGCCCAGGG +CGTCGCTAAAGAGGTCACCGCCCACGGCGTCGCTAACGAGGACGCCGTCCAGGGTGTCGT +TAACGAGGACGCCCTCCACGGCGTCGTCGCTAACGAGGATGCCGCCCAAGCGATCGCTAA +GCCGGACGCCGCCCATGGCATCGCTAACGAGGTCACCGCGCAGGGCATCGCTAACGAGGT +CGCCATCCACAGCATCGCTATCGAGGACACCACACCGTCCAGGGCGTGGCTAACGAGGTC +GCTGCCCAGGGCATCGCTACCGAGGATGTCGCCGACGGCATCGCTGAGGACGCCGCCCAT +GGCATCACTAACAAGGAGGCCGCCCAGGCCATCGCTAAGCAGGATGCCGCCCACGGCATC +ACTAACGAGGACGCCGTCCAGGGCGTCGCTAACGAGGTCGCCGCCCAGGGCGTCGCTAAC +GAGGACGCCGCCCACGGCGTCGCTAACGAGGTCGCCGCCCACGGCGTCGCTAACGAGGAC +GCCGCCCAGGGCGTCGCTAACGAGGTCGCCGCCCAGGGCGTCGCTAACGAGGACGCCGCC +CAGGGCGTCGCTAACGAGGACGCCGTCCAGGGCGTCGCTAACGAGGTCGAAGTCCAGGGC +ATCGCACACAAGGACGCCATCCAGGGCATCGCTAACGAGGATGCCGCCCACGGCATCGCT +AACGAGGTCGCCGCCCACGGCATCGCAAACGAGGACGCTGCCCAGGGCATCGCGAACGAG +GACGCCGCCCAGGGCATCGCTAACGAGGTCGCCGCCCACGGCATCTCTAAGGAGGACACC +GTCCAGGGCATCGCGAACGAGGACGCCGCCCAGGGCATCGCTAAAGAGAACCCCGTCCAG +GGCGTCGCTAACGAGGACCCTGTCCAGGGCGTCGCTAACGAGGTTGAAGTCCAGGGCGTC +GCACACGAGGACGCCGTCCAGGGCATCGCTAACGAGGACGCCGCCCACGGCATCGCGAAC +GAGGTCGCTGCCCAGGGCATCGCGAACGAGGACGCCGCCCAGGACATCGCAAAGGAGGAT +GCCGCCCAGGACATCGCAAACGAGGACGCCGCCCAGGGCATCTCTAAGGAGGACGCCGTC +CAGGGCATCGCGAACGAGGACGCCGCCCAGGGCATCGCTAAGGAGGACCCCGTCCAGGGC +GTCGCTAACGAGGACGCCGTCCAGGGCGTCGCTAACGAGGTTGAAGTCCAGGGCGTCGCA +CACGAGGACGCCGTCCAGGGCATCGCTAACGAGGACGCCGCCCACGGCATCACTAACGAG +GTCGCCGCCCACGGCATCGCGAACGAGGACGCCGCCCAGGGCATAGCACATGAGGATGCC +GTCCAGGGCATCGCTAACGAGGACGCCGTCCACGGCGTCGCTAAAGAGGTCGCCGCCCAC +GGCGTCGCTAACGAGGATGCCGCCCACGCGATCGCTAAGCCGGATGCCGCCCACGGCATC +GCTAACGAGGTCGCCACCCAGGGCATCGCTACCGAGGATGTCGCCGACGGCATCGCTAAT +GAGGACGCCGCCCACGGCGTCGCTAAAGAGGTCACCGCCCACGGCGTCGCTAACGAGGAC +GCCGTCCAGGGCGTCGTTAACGAGGACGCCCTCCACGGCGTCGTCGCTAACGAGGATGCC +GCCCAAGCGATCGCTAAGCCGGACGCCGCCCATGGCATCGCTAACGAGGTCGCCGCCCAC +GGCATCGCGAACGAGGACGCCGCCCAGGGCATCGCTAACGAGGTCGCCGCCCACGGCATC +TCTAAGGAGGACGCTGTCCAGGGCATCGCGAACGAGGACGCCGCCCAGGGCATCGCGAAC +GAGGACGCCGCCCAGGGCATCGCACACGAGGACGCCGTCCAGGGCATCGCTAACGAGGAC +GCCGTCCACGGCGTCGCTAAAGAGGTCGCCGCCCACGGCGTCGCTAACAAGGATGCCGCC +CACGCGATCGCTAAGCCGGACGCCGCCCGCGGCATCGCTAACGAGGTTGCCGCCCAGGGC +ATCGCTACCGAGGATGTCGCCGACGGCATCGCTAACAAGGACGCCGTCCACGGCGTCGCT +AACGAGGTCGCCGCCCACGGCGTCGCTAACGAGGATGCCGCCCACGCGATCGCTAAGCCG +GATGCCGCCCACGGCATCGCTAACGAGGTCGCCACCCAGGGCATCGCTACCGAGGATGTC +GCCGACGGCATCGCTAACGAGGACGCCGCCCACGGCGTCGCTAACGAGGTCGCCGCCCAC +GGCGTCGCTATCGAGGACGCCGTCCAGGGCGTCGCTAACGAGGACGCCGTCCACGGCGTC +GCTAACGAGGACGCCGCCCAGGGCGTCGCTAAAGAGGTCACCGCCCACGGCGTCGCTAAC +GAGGACGCCGTCCAGGGCGTCGTTAATGAGGACGCCCTCCACAGCGTCTTCGCTAACGAG +GATGCCGCCCAAGCGATCGCTAAGCCGGACGCCGCCCATGGCATCGCTAACGAGGTCGCC +GCCCACGGCATCGGGAACGAGGACGCTGCCCAGGGCATCGCGAACGAGGACGCCGCCCAG +GGCATCGCTAACGAGGTCGCCGCCCACGGCATCTCTAAGGAGGACGCCGTCCAGGGCATC +GCGAACGAGGACGCCGCCCAGGGCATCGCGAACGAGGACGCCGCCCAGGGCATCGCACAC +GAGGACGCCGTCCAGGGCATTGCTAACGAGGACGCCGTCCACGGCGTCGCTAAAGAGGTC +GCCGCCCACGGCGTCGCTAACGAGGACCCTGTCCAGGGCGTCGCTAACGAGGACGTCGTC +CAGGGCGTCGCTAACGAGGTTGAAGTCCAGGGCATCGCACACGAGGACGCCGTCCAGGGC +ATCGCTAACGAGGACGCCGCCCACGGCATCGCTAACGAGGTCGCCGCCCACGGCGTCGCG +AACGAGGACGCCGCCCAGGGCGTCGCTAACGAGGACGCCGCCCAGGGCGTCGCTAACGAG +GACGCCGTCCACGGCGTCGCTAACGAGGACGCCGCCCAGGGCGTCGCTAAAGAGGTCACC +GCCCACGGCGTCACTAACGAGGACGCCGTCCAGGGCGTCGTTAACCAGGAGGCCCTCCAT +GGCGTCGTCGCTAACGAGGATGCCGCCCAAGCGATCGCTAAGCCGGACAGCGCCCATGGC +ATCGCTAACGAGGTCGCCGCCCAGGGCATCGCTAACGAGGTCGCCGTCCACAGCATCGCT +ATCGAGGACACCACACCGTCCAGGGCGTGGCTAACGAGGTCGCTGCCCAGGGCATCGCTA +CCGAGGATGTCGCCGACGGCATCGCTGAGGACGCCGCCCATGGCATCACTAACGAGGAGG +CCGCCCAGGCCATCGCTAAGCAGGATGCCGCCCACAGCATCACTAACGAGGATGCCGTCC +AGGGCGTCGCTAACGAGGACGCCGCCCACGGCGTCGCTAACGAGGACGCCGCCCAGGGCA +TCGCTAACGAGGTCGCCACCCATGGCATCTCTAAGGAGGACGCCGTCCAGGGCATCGCGA +ACGAGGACGCCGCCCAGGGCATCGCGAACGAGGACGCCGCCCAGGGCATCGCACACGAGG +ACGCCGTCCAGGGCATTGCTAACGAGGACGCCGTCCACGGCGTCGCTAAAGAGGTTGCCG +CCCACGGCGTCGCTAACGAGGACCCCGTCCAGGGCGTCGCTAACGAGGACGCCGTCCAGG +GCGTCGCTAACGAGGTTGAAGTCCAGGGTGTCGCACACGAGGACGCCGTCCAGGGCATCG +CTAACGAGGACGCCGCCCACGGCATCGCTAACGAGGTCGCTGCCCATGGCATCGCGAACG +AGGATGCCGCCCAGGGCATCGCGAACGAGGACGCCGCCCAGGGCATCGCACACGAGGACG +CCGTCCAGGGCATCGCTAACGAGGACGCCGTCCACGGCATCGCTAAAGAGGTCGCCGCCC +ACGGCGTCGCTAACGAGGATGCCGCCCACGCGATCGCTAAGCCGGACGCCGCCCGCGGCA +TCGCTAACGAGGTCGCCACCCAGGGCATCGCTACCCAGGATGTCGCCGACGGCATCGCTA +ACGAGGACGCCGCCCACGGCGTCGCTAACGAGGTCGCCGCCCACGGCATCGCTAACGAGG +TCGCCGTCCACTGCGTCGCTATCGAGGACGCCGCCCAGGGCGTCGCTAAAGAGGTCACCG +CCCACGGCGTCGCTAACGAGGACGCCGTCCAGGGCGTCGTTAACGAGTACGCCCTCCACA +GCGTCGTCGCTAACGAGGATGCCGCCCAAGCAATCGCTAAGCCGGACGCCGCCCATGGCA +TCGCTAACGAGGTCGCCGCCCACAGCATCGCGAACGAGGACGCTGCCCAGGGCATCGCGA +ACGAGGACGCCGCCCAGGGCATCGCTAACGAGGTCGCCGCCCACGGCATCTCTAAGGAGG +ACGCCGTCCAGGGCATCGCGAACGAGGACGCCGCCCAGGGCATCGCGAACGAGGACGCCG +CCCAGGGCATCGCACACGAGGACGCCGTCCAGGGCATTGCTAACGAGGACGCCGTCCACG +GCGTCGCTAAAGAGGTCGCCGCCCACGGCGTCGCTAACGAGGACCCCGTCCAGGGCGTCG +CTAACGAGGACGCCGTCCAGGGCGTCGCTAACGAGGTTGAAGTCCAGGGCGTCGCACACG +AGGACGCCGTCCAGGGCATCGCTAACGAGGACGCCGCCCACGGCATCACTAACGAGGTCG +CCGCCCACGGCATCGCGAACGAGGACGCCGCCCAGGGCATCGCGAACGAGGACGCCGCCC +AGGGCATCGCACACGAGGACGCCGTCCAGGGCATCGCTAACGAGGACGCCGTCCACAGCG +TCGCTAAAGAGGTCGCCGCCCATGGCGTCGCTAACGAGGATGCCGCCCACGCGATCGCTA +AGCCGGACGCCGCCCGCGGCATCGCTAACGAGGTTGCCGCCCAGGGCATCGCTACCGAGG +ATGTCGCCGACGGCATCGCTAACGAGGACGCCGCCCACGGCGTCGCTAACGAGGTCGCCG +CCCACGGCATCTCTAAGGAGGACGCCGTCCAGGGCATCGCGAACGAGGACGCCGCCCAGG +GCATCGCTAAGGAGGACCCCATCCAGGGCGTCGCTAACGAGGTTGAAGTCCAGGGCGTCG +CACACGAGGACGCCGTCCAGGGCATCGCTAACGAGGACGCCGCCCACGGCATCGCCAACG +AGGACGCTGCCCAGGGCATCGCGAACGAGGACGCCGCCCAGGACATCGCAAACGAGGACG +CCGCCCAGGGCATCGCTAAGGAGGACCCCGTCCAGGGCGTCGCTAACGAGGACCCCGTCC +AGGGCGTCGCTAACGAGGACGCCGTCCAGGGCGTCGCTAACGAGGTTGAAGTCCAGGGCG +TCGCACACGAGGACGCCGTCCAGGGCATCGCTAACGAGGACGCCGCCCACGGCATCGCTA +ACGAGGTCGCCGCCCACGGCATCGCGAACGAGGACGCCGCCCAGGGCATCGCGAACGAGG +ACGCCGCCCAGGGCATTGCACACGAGGACGCCGTCCAGGGCATCGCTAACGAGGACGCCG +TCCACGGCGTCGCTAAAGAGGTCGCCGCCCACGGCGTCGCTAACGAGGATGCCGCCCACG +CGATCGCTAAGCCGGACGCCGCCCGCGGCATCGCTAACGAGGTCGCCGCCCAGGGCATCG +CTACCGAGGATGTCGCCGACGGCATCGCTAACGAGGACGCCGCCCACGGCGTCGCTAACG +AGGATGCCGCCCAAGCGATCGCTAAGCCGGACGCCGCCCATGGCATCGCTAATGAGGTCG +CCGCCCAGGGCATCGCTAACGAGGTCGCCGTCCACAGCATCGCTATCGAGGACACCACAC +CGTCCAGGGCGTGGCTAACGAAGTCGCTGCCCAGGGCATTGCTACCGAGGATGTCGCCGA +CGGCATCGCTGAGGACGCCGTCCAGGGCATCACTAACGAGGAGGCCGCCCAGGCCATCGC +TAAGCAGGACGCCGCCCACGGCATCACTAACGAGGACGCCGCCCAGGGCGTCACTAACGA +GGTCGCCGCCCAGGGCGTCGCTAACGAGGACGCCGCCCACGGCGTCGCTAACGAGGTCGC +CGCCCACGGCGTCGCTAACGAGGACGCCGCCCAGGGCGTCGCTAACGAGGTCGCCGCCCA +GGGCGTCGCTAACGAGGACGCCGTCCACGGCGTCGCTAACGAGGACGCCGTCCAGGGCGT +CGCTAACGAGGTCGAAGTCCAGGGCATCGCACACAAGGACGCCATCCAGGGCATCGCTAA +CGAGGACGCCGCCCAGGGCGTCGCTAACGAGGACGCCGCCCAGGGCGTCGCTAACGAGGT +CGCCGCCCACGGCGTCGCTAACGAGGATGCCGCCCACGGCGTCGCTAACGAGGTCGCCGC +CCAGGGCGTCGCTAACGAGGACGCCGCCCAGGGCGTCGCTAACGAGGTCGCCGTCCACGG +CGTCGCTAACGAGGACGCCGCCCGGCACATGGCTAAGGAGGACGCCGCCCAGGACATCGC +TAACGAGGACGCCGCCCACGGCATCGCTAACGAGGACGCCGTACACGGCATCGCTAACGA +GGACTCCGTATACGGCATCGCTAATGAGGATGCCGTATATGACATCGCTAATGACACCGT +ACAAGGCACGCTAACGAGGACACTGTACAGGACATCGCTAATGAGGACACCATACAAGGC +ATCGGTAATGAGGACGCTGTATACGACATCGCTAACGAGGACACCCTACAAGCCGTCGCT +AACAAGGACACTGTACACAACATCGCTAATGAGGGCACCATACAAGACATCACCAATGAG +GGCGCTTTATACGACATTGCTAATGATACCGACAAGGCACGCTAACGCGGACACTGTACA +CGACATCGCTAATGAGGACACCGTATAAGACATCGCTAGTAACTATCGCAAGAACAAAAA +ACCAAACACTGCATATTCTCACTCATAGGTGGGAATTGAACAATGAGATCACATGGACAC +AGGAAGGGGAATATCACACTCTGGGGACTGTTGTGGGGTGGGGGGGGGAGGGATAGCATC +GGGAGATATACCTAATGCTAGATGACGAGTTAGTGCGTGCAGCACACCAGCATGGCACAT +GTATACATATGTAACTAGCCTGCACAATGTGCACATGTACCCTAAAACTTAAAGTATAAT +AAAAAAAAAAAGACATTGCTAGTGAGCACACTGTATACGACATCGCTAATGAGGATGCTA +TATATGACATCGCTGATGAGGACGTTGTACACGACATCACTAATGAGGACACCATACAGA +TGGCTTGAGCCCAGTAGTTTGAGACAAGCCTGGCAACACAGCGAGACCTCATCTCTACAA +ACATTTTTTAAAAATATGCCAAGCATTGTGGCGCATGCCTGTAGTCCTGGCTATTCAGGA +GGCTGAGGTGGGAGGATCACCTGTGCCCAGGAGTTCAAGGCTGCAGTGAGCTATGATCAC +ACCACAATGCTCCAGCCTGGGCAACAAAGCAAGACTCCATCTCTAAAAATAAAATTAAAT +TAAAAAAAAAGATCTTCGATGTAAAAGAGGTATGCTCAAATGCAATAAAATCATATAAGA +AGGCCGGGTGTGGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCTGAGGCAGGTGG +ATCATGAGGTCAAGAGACTGAGACCATCCTGGCCAACATCGTGAAACACCGTCTCTACTA +AAAATACAAAAAACAATCAGCTGGGCGTGGTGGCACACACCTGTAGTCCCAGCTATTCAG +GAGGCTGAGGCAGGAGAATCGCTTGAACCTGGGAGGCGGAGGTTGCAGTGAGCCAAGATT +GTGCCACTGCACTCCAGCCCGGCAACAGAGCGAGACTCCGTCAAAAAAAAGAAAAAAAGA +AAACCATACAAGAAGCCCCTACGAATCTGGGTGAATCAGCAAAGGCTTCACAGGATAGGA +AAAGGCCATGAGTATGAAAACATGAGTGGGGGGTTGGCCTAGATGGTAAACGAGGAGAGG +ACAATACGGCCAGGAAGTGCATGTGCACAGCAGACGGGAGATGGAGGGCACGGCTGATTG +GCAGCTACCTGTACTTTAATATTCCTGGAGTGTGACGTGTGAGGCAAACATGGGGTGGTG +GGGTGAGTGCTGGAGATGAGGCAGACAGTGAGCCATCAGCATGTTAGTAACAGGGGAATC +CATGGGCGTTTCATGCAGCTCCAAGGACCCTATGTGAGCTCATCAGACTCCAATTACCCA +AATCTGTTTCTTACCAGTGGTAGTTAGGATTTCCGTAGGTACACTGGATGTCTTCCCAGG +ACACCTGGAAGCCAGAAACAAATGGGGTCAAGCAGCCAGCTGACACAAGCACCCACAGAG +GACAGGACACTATAATGTGCCCCTCATCCTCCAGACAAGCCCACCCCTAGCAACTCCCAA +GAGCCTGGAGCAACAGCAGCAGGATGTCTTGTGCAGTGAAGTATGTTTAGCGCCCTTGTC +CCTGTGTTTAAACTTGGTCATGCTTTTCCCAGAGGCCTAACTTACATATTTCTAACTCTC +CTTGCTCTCTAGACATTTCCCACAAAGCAGGAAGCTGCAGAGCATGAGCCTAGAGCACCA +AAAACTTAGAAGTGAAGTGAATTGAACCCTTGCAGAGGTGCAAACCACATGGGGACTTCA +GACCGAAAAAGGCTTGAACCCTCATCAAAGGCCCCACACCCAGACAGCAGGATCACAGTG +TGTCTTGATCTCAGAGACCCAGGCAGCCCTGAGCCCAGCATTTGCTAGAGGTGGCCAATC +CAGGGCAGGAGGCAATTGTGGATGATGTTTCTTACCAAGCAAATATTGGAGTCATTGAAG +CAGAACCATTTTCCATCCACAGCATTCCGGATGTAGACACAGTAATGACCGGAGTCTGCC +ATTCCCACGTGCGCAATCACAGCAAAAAGCTCATACTGCCCTCCAGACTGGAAAGAGACA +GGCACGAGACAGTTCACCCACCACCACAAACACGACTGCCTCACACATTTATACCTGTCC +ACCCGAGGCCCGACTCTGGGACTTTAGAATATTAGAAAACAGTCCTGACCCACGGCCAGT +GGGCACCCTCCAGGAGGGGCTTTCCTGAGTGTGGATCCTGAACTGTGAAGGCTCAGCAGC +TTGGCAGGGCAGAGCACACAAGAGATGGAAGGCATGAGGTCATGAGAGAGGCCCCCAAAG +CCAAGGCAAGATTAGAACTTTCCCCTGGGAGGGGTGGGGATCATCAGAACACAGTTCTGC +TTCTCCACAAGCCCAGGAATGACCCTAACCTGAGCCACCGTACCCAGCCAAAAAAGTCTT +TTGTAGGGGGAGGGGAGACAGAGGGCAAGGGCGGCACACGCAAAGCTGAGTCACCCACAG +TGGAGGCTGAAATTATCCCTATTTCCATTTGCAGACTTTGTGCAGAACTTTACAAAATGC +AGGGCAATATCTGTATCTAAATATTAGTTTACAAGAAATAAAAGGGTTATAGATGATGTT +AAAAGATATCAACCAGCCAGTGCTGGCATATGAAAAAAATATTCAACCAAATCAACTGCA +AGGAAAAGAAAAGGAGCGGAAATCTAAAAGGAACTTAAGAAAAGAAAGAAACTTAACCAA +ATGCAAGTGTAGGCGTTGTTTGGATCTAGAGTCAAACAAACCACATAGATTACACAACAG +TCAGGAAGTCTGAACGTTAACCAGATATTTGATGCTATTAAGGAATTGCAAGATGGAGAT +GATTATCTTTTAAGTGCTTATTCTTTAGTGATACATAATGAAGTGTTTATAAATGAAATA +GTATGATGTCTGGGATTTGCTTCAAAAAAATCCAGTACAGGGTTGGGTGAAACAAGAGGG +TGGGTTATAGGAAAAGCAAGACCTGCCATGCGTTTTGGTAACTGTTACAGCCAAGGATTC +ACTGTCTAACTCTTCCTTCACTACACACTTTGGTATCTATTTGTAATGTTCCATAACAAA +GTCTTTTTTGTTGTTGTTGCAAAGATGGAGTCTTGCTTTGTTGTCCAGCTGGGCTCGACT +CCTGGACTCCAGCAATCCTCTTGGCCCCGCAAAGAGCTGGGATTACAGGTGTGACCCACT +GCATCCAGCCAAAAAAGTCTTTTTTAAGTGAAAAACAATTTTAAATGTACTTCATCGGGT +AGAATCTGCAGGTAGAAATAAAAAGAACTAGAAGGAAAGAAAAAGAGAATCCTATGAAAT +GTATACGGCCTAAGTAGGGAAAGAAATCATGTTATAATAGGAGCTATTGTAACGTATTAT +CAGGTCTTAATTTTCTAAATGTCATGGTTCTAGAGCATAAATAGCCAAACAGACCAATAA +AACAGAAGAGACACTCCAAAAATAGACCCGTACATATAGAAATACAAAATTTGTTATGTA +GCATTTCAAATCTATGGAAAAAAAGATATTTGGGTCCTTTTTTTTTTTTTTTTTTTTTTT +TTTTAAGTTTTTCTTGTTTTGTTTTTAGAGACGGGGTCTTGCGCTGTCACCCAGGCTGGA +GTACAATGATAGATCATAGCTCACTGCAGCCTCAAACTCCTGGGCTCAAGTGATCCTCCA +ACTCAGCCTCCCAAGTAGCTGGGACTAAAGGTTTGTGCTACCACACCCAGCTAATTTTTT +TTTTTTTTTTGAGAGACGGGGGTCTCATTATGTTGCTGAAGCTGGTCTCGAACTCCTGGG +CTCAAGCAATCCTCCTGCCTCAGCCTTTTGAGGTGTGGGATCACAGGCATGAGCCACATC +ACCTGGCCTTGTGTCTCTTTTTTTCTCAGAATCTCTTGATAAGTTATTATCTCAGGACAA +GTGAATTAAACTTTACTCTGATTAAATGTGGTCCAGTCAACTGGCTACTTTATGATTATC +CTCAAAGTGGGCACAGGCCATCTGAGGAAGAAAGTTTGGTTGGTACTATGAACCCACAAT +GACATGTTTGCCTATGCATTTCTATGTATTCCACATTATTTCCAAAGAAGCCTACTGAAT +TCTGGATTCTGTCCCTCAGTGCTGTGAGGCATATGAACACAAATCCAATATTTAGAAAGA +AGAAAGGCTTTTGAGGCTGGGGACAGTCTCTGAGGAACAGGTGGCTCCTAGCAGACTGGG +GGTGAAGGAAGGGAAAACAGAAAGGACATGGCATCTGGGGAGAGGCAGGATGGCAAGGAG +GTCGGGATCATCCCACCTGCTCCTCAGCATCACAAGACTCTCGCTTCATTGGAAGGATCT +GGCTGAAATCCAAGCTCTGGGGGAAGTACAGGGAGTGGCAGATCTTTCTCGTCTGTGAAT +TCCTGATGGAGAATCGCATGAGGTGGATTGTCAGGGTCTGGGGCAAATGGGTCAGCTTCA +AGACCTGAAAAATCAAAATCACTCTGGAGATGAAGTCCACTTGGAATGAGAGAGTCAGAG +AAATGTCAGGGCTAGCAACGCGTTCTTCCCAAAAAGAGGGGAGCAGGGCGACATCATGGA +GTTCAAGGCTCCTGTTCACTGAATCTGCTGAGCTCCTCTGAGAATAATGCAAAACTTGGC +TATTCTACCACCATGTGGCCCCCCACAAGAGGCACAAGGGCACAGGGCTCTGCACAGGTA +CCAGGACCCAGGGAGCCACCACGTCTGCTCCAGGGAGCTGGTCTCGCCCCGAGGGAAATG +CCCTGCCCGCTGCTCTCAGGCCCCCTCCTGCAGTCCCTCTCAGTGATGCGACATGAGAAG +CCACATCCACGGCTCCAGATATCAGAACCACAGCGTCCCAGATACTAGCCAGACCCAGAT +TCCCTTTCTCACCCATCATGGGATGTCCTCGGGGCCTCTGATGATTCTATCAGGAAATCC +ACGGGGGGCAACCCCAGACTAGTGAAGGACAGATGACCCCTGAACAGCACAGGTTTGGAC +TGTATGGGTCCGCTTATACACAAATTGTTGTAAATAAATACAGTCTGCCCTCCCCCATAG +GCACGAGTTCCACATCTGCAACCAAATGCTCATCAAAAATAAAGTATCCCAGATGCACAA +CCGCATAGATGAGGGACTGACTTTTCCTATACGTGGGTTCCACAGGGCCGACTGGGTGTG +ACTTGAGTGTGTGTGGATTTTGGTAAAAGCAGGGGTCCTGGAACAAATCCCCCATATGCA +CCGAGGGACAGCTGAGTCTGATTTAGGGAATGAGTACCTGTTTCCCACGGGTCTTCTTCC +CACAGTTCTCACAGAAGCATTTGCTTTTGCTTGATAACTCCCTGGGCTGGAAGAAGCAGT +GCAGGGCGTCCTCCTGTCCCACCCAAGAGAACAGGGAAAAAGCAGCGTTGGTATTTCCCA +CCTAGAATACTATATTCGTCTGTTCTCACACTGCTGTGAGACAATAGCTGAGACTGGGTA +ATTTATAAGAAAAGAAGTTTGATTGGCCCATGGTTCTGCAGGCCGACCGGAAGCATGGCA +GCATCTGCTTCTAGGGAGGCCGCAGGAAGCTCCCAATCATGGCGGAAGGCCAAGCAGGAG +CAGGCGTCTTACCTGGGAGGAGCAGGTGCAAGAGACCAAGTGGGGAGCTGGAGTGCAGGA +GCGCGATCTCAGCTCACCGCAACCTCCACCTCCCGGGTTCAAGCGATTCTCCTGCCTCAG +CCTCCTAAGTAGCTGGGACTACAGGCTGCACCACCACGCCCAGCTAATTTTTTATGTTTT +TAGTAGAGACAGGGTTTTACCATGTTGGCCAGGCTGGTCTCAAATTCCTGACCTCAGGTG +ATCCACCCACCTCAGCCTCCCAAAGTGCTGGGATTATGGGCGTGAGTTACCACACCCAGC +CTGGCTCCACACTTTTAAACAACCAGATCTCATGAGAACTCACTGTCACAAAGACAGTAC +CACGGGGGATGGTGCCAAACCATTCACGAGAAACACTCCCCCATGATCCAATCTCCTTCC +ACCAGGCTTTACCTCCAACACTGGGGATTACAATTGAACAGGAGATTTTTGGGCAGGGTC +ACAGACCCAAACTGTATCAGGTACAGCCTCCCCACACTTTGCAGCTGAGGGTTTCAGCTT +TTCTTCTGGTTCTCCTCATATCCCCCTTAATGCCTGGCCTGGTGTGGTCCACTTGGCAGG +ATCTCAAAGGGCACCTACTGACTGGAGGAACAAAACAGGAGGGGGCTCCTATTAAGGGAG +CAGTATGAGGGAAGTGGAGGTAGGAGGGAAAAACATTTTTGCCAAAACAAAGGAGAAATT +GGCCAGCGCCGTGAAAATGAAATAAGTGACTTTCCAGAAGCCAGAGATAGATCAAAGAGG +CAAGGGGGAATTGGCCACACTGTGCTGTGTGGTGTGATGGTTTGAGGGTTTGTCCTCCCC +AAAACCCAGGTTGAAACTGAATCCCCAGTGTGGCTGTATGGAAAGGTGGGGACTTTGAGA +GATAATTGGGGAGGGCAAGGTGTCTCATGCCTGTAATCCCAAGGAGGCTGAGGCAGGGGG +ATCACTTGAAGTCAGGAGTTTCAGATCAGCCTGGCCAACATGGTGAAACCCCATCTCTAC +TAAAAATACAGAAATTAGCCGGGCATGGTGGTGGGCACCTGTAATCTCAGCTACTTGGGA +GGCTGAGGCAGGAGAATCGCTTAACACCGGAGGCGGAGGTTGCAGTGAGCTGAGATTGTG +CCACTGCACTTCAGTCTGGGTGACAGAGGAAGAATCTGTCTCCAAAATAAAAGGTAGGGG +GGTTATTGCGTCATGAGGGCACAGCCCTCATAAATGAATTAATCCACTCATGGATTAATG +GGTTAATCGGTTATCCTGGGAGTGGGACTGGTGGCTTCATATGAAGAGGAAGAGAGACTT +GAGCCGGCACATTAGCATGCTCAGCCCCCTCGCCATGTGATGCCCTGCACTGCCTCGCCT +CAGGACTCTGAGAGTCCCCACCAGCAAGCAGGTCCTCACCAGGTGCAGCCCCTCAACCTT +GACTTCCCAGCCTCCAGAACTGTAAGAAATAAATTACCTAGTCTCAGATATTGAGTTATA +ACAACAGAAAATGGACTAACACATACAGGTGCAGCAAATACCAAGAGAATCCCCTTACCA +GTGTCTTCAGGGGCTTTGAGTCCACATCAAAAAAAGAAAGTCGGAGGGTGAGCATGCTGC +TGTTTCTGCTACTCTCCATGGCACAGTCAAGGCAAATCAAGGAGTCCTTCATCCGGATCA +TATACAGGGCCTGCAGTCTCTCCACCTGCAAGAGGGAGAGCGGAGAGGTGAGATGGGAAC +ACCACCATAAGCCAGGTCAGGCCAACTGCCAGAAGGTGGAGCCAAGAGACAGAAGCTCCT +CACAAAGGTGCAAAATAATACCAGTAACAATATTATGAAACTGTGATAAGTGTTTTATGT +GGATTAACTCTTTTTTTTTTTTTTTTGAGACAGAGTCTCGCTCTGTCACCCAGGCTGGAG +TGCAGTGGCGCGATCTCGGCTCACTGCAAGCTCCACTTCCCGGGTTCACGCCATTCTCCC +ACCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCGCCACCACGCCCGGCTAATTTTT +TGGTATTTTTAGTAGAGACAGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGA +CCTCGTGATCTGCCTGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGGGATTAACTCTTT +TATTCCTCTACACACTACGGGAACAACAGCTGAATGAATGAAATGAGCAGCATCTTAAGC +TTCCGAGTGCTCTGGTTCTAGGTTCTTACCAAGTGCACATCAGCGATCTGGTCCTTAATC +AGGTTCCAGAGTTTGAGGTACAGTTGGGCAGCATCATGTTGGACAAACACTGGCCACAGA +GAAAAGGAACAGCCAATTAGTGAGGTCTTCAGAGTCACTTCTCATGCCCTGGTACTTAGA +CACACATGAACACTGATCACTTTGTCTCTTCCTAGCCCTCCACCCCACTGCAAGGCTAAG +TCGGCACGACAGAAGAAGTGATCAATAAACAAACAGTAAGTTTGGTATTGATTTTTGAGA +ACGCCTCTTAATACTTAAGCTCCAGTGCCAGATGTAGTGGCTCATGCCTATAATCTTAAC +ACTTCGGGAGGCCAAGGTGGGAGGATCGCTTGAACTCAGGAGTTTGAGACCAGCCTGGGC +AACATAGCAAGACTCCGTCTCGATAAAAAATTTAAAAAAGTACTTGAGAGTAGCGGTGCA +CGCCTGTAGTCAGAGCTACTTGGGAGGCTGAAGCAGGAGGATCACTGGAGCCCAGGATGT +TGAGGCTACAGTGAGCTGTGATGTCACCACTGCACTCAGCCTGGGGAATGAGATGTCCAC +AGGGACTTTGAAAAGCACTGATACATTCCTGAGGATCTAGAAGGCCAGGCGCATGCCCAA +GTAAGACCTGAGAGGGCTCCGATCCCTCCCCTCTGGCTGACCACAAGGCCACATGCAAGA +AAGCAATGAAGGCGAATACAGAATCATGAACTGCCCAGAAACGATAAAACAGCAACAACA +ACAAACCCTGCTGTGGGGATCTCTGATGTCCAGTGTTGCCACCTTACATTATCTAAAATG +TTCATTTTCCAACAAAAGAGACAAGACCTACAATGAAACAGAAAAGCATGGCCACCCAGG +AAAACAGCAATCGTAGAACCTTCCACTGAGAAGCCCTAGATGTTGGCGTTACTAAACAAA +GACTTTAGCCGTTATAATATGTTCAAAGAACTAAAGAAAACCATGTCAAAAGATGTAAAA +GAAAGTGTGAAAATGCTGCCTCACCAAATAAAAAGTATCAATAAAGAAAAAACTATAAAA +GAAGAACCAAGCAGAAATTCTTGAGTTGAGGAGAACCATAACTGAAATGAAAAATGTACT +AGAGGAGGTCAATAGCAGATTAGAGCTGGCAAAAGAAAGAATCCATAAACCTGGAGCTAG +GTTGGTTGAGGTTCTCCAGACTGAGGACGGAAGGAAAGCTGAACACAGTACCTATGCAGC +GAAAAGCTGAGGAGCTGCTCACTGCGTGTAGAGCTCAACTAACTCTGATACAAAAAAAAA +TGTGATACAAAAGGTTTTATACACAAACGTGAACTTATTCTGAAGCCAGCCTGGGGCAAG +GAGCAAGACACATCCTGCCTTTTAATGTGCCATTTCTCCTTTGGAGGAGAGAGGGGCATT +TTCATAAGGTGTGTTGTGGGGGTGGGGGGAGTGAGCAATGGCGGTTTTCTACCGGGCAGT +TATCAAGTTCCCAGCAGCTGAGTTGGCGCCTTTCCGGGCAGAAAAGTGGCCGAGTGGGCA +TGCTTTCTCCATGCCTTCCTAGTGGGTGAAAGACAAACCCCTAGAGGGTGGAAGTTTCGA +AGCCACCCCCTGGCAGGGAGAGTTCCCTGGTGGGTATGCTTTGGGCTGCAAATCTACTGC +CAAGTCTCAGGAAAGATCAGCTGGAAGATCTTGCCCTGTTCTCAATCAAACCTCCATCAG +CCTACCTCCATTTACAGATTCTTCCTTTTGCCCAGCAGGGAATGTCTGGAGAGGGGTAGG +CGAAAGGTGATATTTGCATTTCTAAAGGGCTAACAGGAAACAGGAAACACGGCCGCGGTG +AGGGGTGAGGGATGGGAAGAGGAGAAGTGAAAAGAAAGTAATAAGAAAACTAATATAACT +ATCTCTTAGAAAATGGGAGTACTCAGTTACACCCAGATGGGTTCACTAGTGAACGATGGG +TTCTACCAAACATTTATGGGAAAAATTATACCAATTCCCTACAACCTCTTCCAGAGGACA +AAAGCAGAGAGAACACTTCCTAACTCATTCTCTGAGGCCAGCATTATCCTAACACCAAAA +CCAGACAGACATTGTAAGAAAAAAACCTAAAAACCAGTATCTCCCGTGAACATAAATGCA +AAATTCCTCAACAAAATATTAGTGAATCAAATTCAACTATGTATAAAAAGAATCACAGAG +GGTGACACAAGCGAGATGGTAGAGAAGGAGGCTCCTGGTGCTCCCCTTCTCCCATGGACA +CACGGTAAAACTCTCATCTACACACAGATCAACTCCCTCTGAAAGAAACCCAGAAACCAG +CTGAGAGAGGCCTGCACAGGGGGCAATGGGGAGAACACCCACATCGCAACCGGTTGGAAA +AGCTGAGACACACTCGAACCACAAGCCCCACCCCAGACACAGTACCTTACTGTGAGGAAG +GAATCCCTACTCCCAGCTTTCTCCTGGAGAGGGGAGGGTTTGGACCACACATAGGCAGCC +CCAGCTTTTGCAGTTCCCACTCAAGGGCTTGGCTGCTGGGTCACCTAGCTCTGGGAGCAG +ACATGGGTTAGCAGTTATGAATCCTCTGCGACCACAGAGGACAAAGAGGCAGTTTTAAAC +GGGCACGTGAGCACTTCCAGCAGCTGTGCTCAGCACAGAGCAAACAGGCAGAAATGCCAG +GCTCCCAGTTCCTCCCTGGAAGGATATGTCTGCGGGCTCTCCTAGCTGTTGCCCGAGGGA +TGGGCTTCTATTAATAACTGGCCTGCACCTGGAGCCAATGAGGCAGGTAAACAATAGATT +TCCAGGAGTCTGGACAGGCCTGTGGGCAAGTCCCATGCCTTCTCCCACCCCCTGCTGTAT +GGTAAAATCAGGTCTCTAACTTCACACATTGAGTACACATTTACCCCTCTCAAATGAAGG +TCTAGCTCCTCAACCACCGAGCTTTGGGAGCTGACAAGGCTCTGTATTTGTAAGTCCCAC +AGGGGCACAGAGACCAAAGAACATCCCCCCAGGCCCAGTGCAGCGTGAACAGGCAAAAAG +GCCCGGCTCCTACTTTCTCCCTAGAAGGAGTTTGTCTGCACAGTGCAGCTGCTGCCCTGG +GGTTTCTATGCATCTGGGAGCTGACAGAGCAGGAAACCAGTGCTCCTCTGAGACCCTGAA +CAGGCAGGTGAGCACCTCCACAGCTGCTCCCACTGGTTTGCTCAACAGATAGCGTCAAGC +TTCCAACTAACCTGTCCGTCTCTAAGCAGAGAACAGCCAGCATTTGCTAGGCCCCTGGGG +GTGACAAAGAGTAAAACAATGCATGAAGGAGTGTGCAGTTTAAACTTGAGTGCAGGCACT +TGCCACAGATCCTCTCTCCAGCATTTTGCAGAGCGAGTGGCAGATAAACTCATGCTCCCA +GCTTTTCCCTGAGGATAGAATAAACTGGAACACACATTTAATGCCCCAACGTCTCCAGCT +GCACCTCAAGGGGCTGGTTTCTATCTCCCCTGTCTGGGGCACTGACAGGACATGACACAT +TCTAATCTCCTGAGGGCCGCTAAGAACATAGATGACAGTTTGGACAACACAAAAGGTCAA +GAGGTGCTCCGAGTGTCTGGCAGGGCTAATTGGTAAGTTCGTCTCTTATACAAGGCCCGT +GGGACAAGACTGGGAGAGGTATTTTTCTTTTTTTTTTTTGAGACGGAGTCTCGCTCTGTG +GCCCAGGCTGGAGTGCAGTGGTTCGATCTCGGCTCACTGCAAACTCCACCTCCCAGGTTC +ACGCCATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACTACAGGCACCCGCCACCACGC +CCAGCTAATTTTTTGTATTTTTAGTAGAGACAGGGTTTCACCGTGTTAGCCAGGATGGTC +TCCATCTCCTGACCTCATGATCTACCCGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGT +GTGAGCCATGATGCCCAGCCTGGGAGAGGTATTTTTCTTAACTAAAGTACAGAAAATAAC +AGAGAATCAAGAACAATGAAGAAACAGAGAAATCTGTTCCAAGCAAAAGAACAAGATAAA +TCTCCAGAAAATGCCCCCGGTGAAATGAAGATAAGTGACTTACCTTACAGACAATTCAAA +ATAATGATCATGAAGATGCTTACCATGTCTAGGACAGCAATGCATGAACAAAGTAAGAAT +TTCAGCAAAGAGACAGAAAAAAGTTCAAAAGAAATAATAGAGCTGAAGAATACAATAACT +GAAATAAGAGCTCAAAAGAGAAGCTCAACAGACTAAATCAAGCAGAAGAAAGGATTAGCA +AACTAGAAGATAGGTCACTGGAAATCATTCAGAGGAGCAAAAATGAAAAAGAATGAAAAA +GAGTGAAGATATCTTAAGAGAGTTAGGGGACATCAAGCCGACCAAAATAGGCATTATCAG +AGTCCCAGAAGGAGGAGAGTGAGAAGAGATGGAAAGCTTATCCAAAGAAATAACAGTCAC +GAGTTGCAGATCAACATTTCAGTCAATGGCAGACCGCATGAGCACAGTGGCCATAGCATG +CAGCCTAGCTGTGTAGTAGGCTGTCCCATCTAAGTTTGTGTAAGTGCACTCCATGATGTC +CACACAATGACAAATTCACCTTCTGATGCATTTTCAGAACGTATCCCTGTTGGTAAGCAA +CGCATACCTATAATGACTGAAAACTTCCCAACCCAGGGAAGGAAATACAAATCCACATCC +AGGAAGCCCAAAGGACACCAATAAGACTGATCTAAAGCAACCCACATCAAGACACATCAT +AATCAATTTCAAAAGTTAAAGACAAAGAATTCTGAAAGCAGCAAGAGAAAAGCAATGTAT +TATACACAAGGGAAAACCCGTAAGTGTAGGATTTCTTCTGGGGGTGATGAAAATGTTATG +AAATTAGATTGTGATGATGGAGGCACAACTCTGTGAATACACTACAAACCTTTGAATTGT +ATGTTTTTAAATGGATGAATTTTACAGTATGTGATGCACATCCCAATATAGCTATCTAAA +AAAAAAACCTGACCTATTTGTGAATTATTTTGTAGAACATGTTTTCTCTTTTTTATTCCT +TTATTCTGTTTTCCTGCAGTTCCCTAACTGTGGTGGAATAAGCTTGGAGGAGATGCCTTC +ACCTACTCTACTGAGAGGCAGAGAAAGGTTAGGTATGGAAGTAAAATGTCCATAAATAAT +ATCAATTCTGCACCTCCAAAGATGTGTGCCTCTTACACAAATGTCTCTATAATCTGAGGG +GTCCAGGCTACCTAACTTGCTTCTTGTTGCTTCACTGCCCATTAGTTCCTCTCTTCACTC +TCTAAGCTTCCCATCCCCTTTGGATCAGCAGCCAGGAACCCAGTGGGAGGGTATCTTACG +GGGCACGTTGTACTTCTGCAGGCAGTAGGCCAGCTCCAGGGGCCACACTGCTTTCTGCCG +GCTGTCCTGCATCTTCTCCAGCAGCAGAAGCATCTGGAAAGGGACGCTTCTCCTCTGCTC +GTCAGCTCCCCTGGGCACCGTGATCCTGCCACACAAGAACAGCCAGAAACCTCCTTACAT +GTAGTTCTCAAAGCTAGGTGCACACTGGAATCACCTGGGAGAAATTTTAAATCTTCATAT +CCAGGCCACTCCCATACAAATTAAACCAGAATTCTGGGTCCGGGGGGCGGGGCGCGGGCA +GTTATCAGAATTTTTCAGTTCCTCAGATTATTTCAATGTGTGGTCAAGGTTGAGAAGCAC +CATGTGAGTGAATCTGCTCTACAGGGGACCCTCTAGAGAGGGGGATCAGCATGACTGCTG +CCTTGGCCCCTCTGCACCCTGCCCAGCAATCGTGCTAGAGCAGACGGCAGATGCACGAGG +TCCTGGCTCCACATCTCGGGCAAGCAGCAAAATGTCCAGCCTAAAGCTCTCCAGACTGCA +GAGGCACATGCTGGGTTAGGTCCTGGGAAGAAAGGCAACATGAAAGGCTGTCCGTGTGGT +TTGGCAATGATTCATTTAATCACAGGCATTTTTCGGATGGGAAAAAGGAGAGAGCACAGG +GCAGAAAAGGAGTGATAGAGAGAGACAGAGGGAGAGAGTGCATTGTGTGAGTGTTGGTGG +AAGGAGGGGTGTGTACTCCCCACTGACTTACTCCTTCTCCGACTTTGGTCTAGTCCCAAC +TCTGTCTTTTAACCAAGTCAGTGGCTGTCAATCCTGGCTGCATTTAAAATCACCCAAAGA +GGGCTTTTAAAAAGTACCAATACCTGGCCGGGCCCAGTGGCTCATGCCTGTAATCACAGA +ACTTTGGGAAGCTGAGGCGGGCAGATCACCTGAGGTCAGGAGATTGAGACCAGCCTGGCC +AACATGGTGAAAACTCGTATCTACTAAACATACAAAAATTATCCAGGTGTGGTGGCGGGC +GCCTGTAATCCCAGCTACTTGAGAGGCTGAGGCAGGAGAATCCCTTGAACCTGGGAGGCG +GAGGTTCCACTGAGCCGAGATTGCACCATCCCTCTCCAGCCTGGGGACAGAGTGAGGCTT +TAGCTCAAAAAAAAAAAAAAAAAAGTACCAATACCTGGGCCGCACCCCAGACCAACCTAA +GAAGTTTTAGAAATAATGTGTAGGCCAGAATATGCAGAAGCCGATCTTTCCTCTGCCAGC +TTTTGAGAGAAGATCTCAAAAAGTCCAAGTCACTGTGAATAACTGAATATATTAAAAATG +AAATAATACAGGCCGGGTGCAGTGGCTCACGCCTGTAATTCCAGCACTTTGGGAGGCTGA +GGTGGGTGGATCACGAGGTCAGGAGATCAAGACCATCCTGGCTAACACGGTGAAACCCCG +TCTCTACTAAAAATACAAAAAAAATTAGCCAGGCATGGTGGCGGGCTAGTCCCAGCTACT +CGGGAGGCTGAAGCAGGAGAATCGCTTGAACCCGGGGGGCAGAGCTTGCAGTGAGCCAAG +ATCGCGCCACTGCCCTCCAGCCTGGGCAACAGAGCAAGACTCCATCTCAAAAAAAAAGAA +AAAGAAATAATACAAACGCTTATCAGCCTGAAGAACAGTCTTACCTCTTCAATATCCTGG +CGAAGTCCACATTCATTACAAACACCTGAATCAAGGAGTTAAGGCAGCAGGTCTGTCCAA +TGTTGTGTAAACCAACCAGGCCTATAAGGGGAAGAGAAAAAAATGCTGAGGGCAAGGCCT +AGGTAAAGAAGTTGACAAGGCTGGGCATGGTGGCTCACGCCTGCAATTCCAGTACTTTGG +GAGGCCGAGGTGGGCGGATCACCTGAGGTCGGGAGTTCGAGACCAGCCTGGCCAACATGG +TAAAACGTGTCTCTACAAAAAAATCCAAAAATCAGCCGGGTGCGGTGGCTCACGCCTGTA +ATCCCAGCACTTTGGGAGGCCAAGGCAGGTGGATCACCTGAGGTCGGGAGTTCGAGACCA +GCCTGATCAACATGGTGAAACCCCGTCTCTACTAAAAAATACAAAACTAGCTGGGCATGG +TGGCACATGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGTAGGAGAATCATTTGAAACC +AGGAGGTGAAGGTTGTGGTGAGCCGAGATCACACCATTGCACTCCTTTCTGGGCAACAAT +AGTGAAACTCCATCAAAAAAAAAAAGAAGAAGAGAACTTACAGCAGGGTGTGGCGTTTCA +CACCTGTAATCCTAGCACTTTGAAAGGCCAAGGTGGGCAGATCACCTGAGGTCAGGAGTT +CAAGACCAGCCTGGGCAACAGGGCAAAACCCTGTCTCTACTAAAAATACAAAAGCTAGCC +AGGCATGGTGGCAGGTGTCTATAATCCCAGCTTCTTGGGAGGCTAAGGCAGAAGAATCAT +TGGACCTAATTTATTGATTTTTTTGAACTTGGAATATGGTGTCATGACAAAATCATGCCA +CTGCACTCAATCCTGGATGACAGAGCAAGACTCTGTCTCAAAAAAAAAAAGGTTACTTTT +TATTTCCTGAAAAGCTTGACTTCCTGACAATGAAAGTCCTAAGGAGCGAGTGGCAGAAGA +CAGCTGGCAGCTGGCAGTCATTGGTGGGGCAAAGGCAGAGATACCACTGCTGACTCTACT +GGGCCTCGGGCCTCAGCGCTCATGGGAAGGGACCCTCTTTCAAGCTTTTGGAGGGGTAAG +GGAGGGCCAGGTAACAGTGCCTACATCTTAGGCACCGAAAACAACTGGAGCTGCTTCATG +CAGTCAAGAATCCCCTGAAGAAGGAAGGCAGTTACCTCCCTATTCTCCAGATCTCTTCAG +CTGGCCTGGCTCTCATCCTTTCCAAGACCTGGCTGTTTGCTTTGCATTTAGTTATATGAG +CTAATTCATATGCTTCTAATAAATGCCTTTCATCCTTCTCCAAAGTTAGCCCTAGAGTAT +ATTGCTGTTGTTTACAATCAAAAAGCCCCAGAGGTGTACTCTCCAATTCTCTTAATAACC +TCATTTTTCTGTCATTACCCCTGTTTTAGTACAAGTAGCTGAGCTTCAGAGAAGTTAAAA +GATGTGCCCAAGGTCGCACAGCTAATGAGTGTCACAGTCAGAATCTGAACCCAAGTCTGT +CTGATGTGGGGTCATGACAAGAGCATGGGTTTTCTTGGGAGAAAAAAATGACTTCAAATC +CCCACTTCCCGCATAAAAATGCACAATGGGTGTAGCAATGCCGAAGCCACAGAGCTATTG +AGGATGGATGGAGGTGACGCATAGGAAAGCACATGGCCTAGGGTATGTATTCCCAACACT +TTGGGAGGCCGAGGTGGGCAGATCACCTGAGGCCGGGAGTTCAAGACCAACCTGACCAAC +ATGGAGAAACCCTGTCTCTACTAAAAATACAAGATAAGCTGAGTGTGGTGGTGCATGCCT +GTAATCCCAACTACTTGGGAGGCTGAGGCAGGAGAATAGCTTGAACCCAGGAGGCAGAGG +TTGTGGTGAACCAAGATCATGCCATTGCAGTCCAGACTGGGTGAAAAGAACAAAACTCCA +TCTCAAAAAAAAAAAAAAAAGAATTAAGAAAAAATATTTGAACACTTCTTTTTATCCCTA +ATTTATTCATGTTTTTGAACTTGGAATAGGAACACTACTGCCAACAAAAGTAATTAGAGA +ATATAGATGTTAAATCTATTCTCTAAATGAATTATTGCTTATTTTAAAATTGCATATTAA +TTAATTACAGGAAATTTGGAAAGTAAGAGACATATGAAGAAAAAAAGGTAAAAATGATTT +GTAATCCCTCTGAGAGTTACCCTTTAATGCCATTTTTGGTATATTTCCTCTCAGTTCTTT +CCCCCTCCACCACCAGTATGTGTGAGTAGTTTATGTTTTATAAAGCTGGGTTCATACAAT +ATGATTTCTTGGCTGGTGTGGTGGCTCATGCCTGTAATTCTAGCACTTTGGGAGGCTGAG +GCAGGCATATTGCTTGAGCCCAGGAGTTCCAGACCAGCCTAGGCAACACAGTGAGACCCC +ATCTTTACAAAAAGTAAAAAATTAGGTGGTGGATATGGTGGCACTTGCCTGTAGTCCCAG +CTACTTGGGAGGCTGAGACCAGAGGATCACTTGAACGGAGGAGGAGTCTGCAGTGAGCCT +AGATTGTGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACCCTGTCTCAAAAAACCCA +AAAAAGATTTCTCTATAATTTCTTTTTTTATTCAGTAAAATATTGTAGTTTTCTCTATCT +TGATTAAACAGTCTAAAATGGTTTTAATGTCACCATAGTATTCCATTTTATAAATTTGTC +AAAATTTATTTAACCCTTATTGTTACATTTCCATTTTTTAAAATAGTGCTGCAGCTGTTG +ATTTGAAAAGATACTTTTTTCAGGTAGTTGACAACCAAAATATCCCCTTGATGGGTTCCA +GCATACCCCATGCCTCAGTATCACACAGTATACCTTTGTAACTAACCTGCATGGTACCCC +CGATTCTAAAATAAAAGTTGAAAAAAAATCATCTAGTGTTTTAAAACAAATTATGTATTA +TTTTTGGGCCAAGCGTGGTGGCTCACACCTGTAATCCCAGTTCTTTCAGAGGTTGAGGTG +GGAGGATCACACAAGAGGATCCACTGAGACCAGGAGTTTGAGACCAGCCTGGGCAACATA +GTGAGACCCCCATCTCTACAGTAATTAAAAAAAAAATTAGCTGGGCATGATGGCATGCAC +TTGCAGTCCCAGCTACTTAGGAGGCTGAGGTGAGAGGATTGCTTGAGCCCAGAAGTTTGA +GGTTACAGGGAGCTATGATGATGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCT +GTCTCAAAAAAATATACTATTTTAAATTTATAAATGACAATTGAATTACATTTATATAGT +ACAATGTGATGTGTGATGTATGTATACTGTTAAAGAGAAAAGCCTTTGACTAAATTTGAC +AGAGTTTAGTTGAACAGAAAAAAGATTCATGAACCAGGTAGCACTGAGAACCAAAAGTGG +TTCAGAGATCTCTGCTCCACAGGTGGGCAGGGAATATTTATAGCCAGAAAACGTAAGTTT +CATTGAGAAATAGCCTGTTGGTTACAGCTCTGCAGTTGCCTTGTTTGAACATGTTTTTGG +CAGCTTGCAGCCTGTGAAGGGCTGAGAGCTCAGCTGCTGTGATTGGCTGAGACTGGGCTA +CTTATTCCAAGGGCATATTCTCAGGTTAGGTTGCAGTTTGTTTTGTTTACCTATGTGAAC +CCCCAATAGCTGAGACAGGTCTCAGTTAATTTAGAAGGTTTATTTGCCAAGGTTGGTGAC +ATGCACCCATAACACAGCCTCAGGAGGTCCTGACATGTGCCCAAGGTGGTCAGAGCACAG +TTTGGTTTTATACATTTTAAGGAGACATGAGACATCAATCAACATATGTAAGATGAACAT +TGGTTCGGTCCAGAAAGGCAGGACAGCTCGAAGCAAAGGTGGGACAACTCGAGGTGGGGA +GGGGGCTTCCAAGTCTTAGATAGATAAGAGACAAATGGTTGCATTCTTTTGAGTTTCTGA +TGAGCCTCTCCAAAGGAGGCAATCAGATATGCATTTATCTCAGTGAGCAGAGGGGAGACT +TTCAATAGAATGGGAGGCAGGTTTGCCCAAAGCAGTTCCCAGCTTGACTTTTCCCTTTAG +CTTCTTGGGGCCCCAAGATTTATTGTCCTTTCACATCTATTAAGTTAGATTACAGTGTGC +TATATATGGAGGCAGCTTTATTTTATTTTAATTAATTAATTAATTTATTTGTTGGTGATG +TTGATTTCTCTTGTTTTAAAATCAACATTATAGGCTGGGCATGGTGGCTCATATCTGTAA +TCCCTGCACTTTGGGAGGCCGAGGCGGGTGGATCACTTGAGGTCAGAAGTTCAAGACCAG +CATGGCCAACTTGGCAAAACTCTGTGCCTACTAAAATATATGAAGATTAGCTGGTCACGG +TGATGCACACCTGAAATTCCAGCTACTCAGGAAGCTGAGGCAGGAGAATCACTTGAACCC +AGGAGGCAGAGGTTGCAGTGAGCTGAGATTGCACGATTGCACTCCAGCCTGGGTGATAGA +GTGAGACTCCAACTCAAAAATAAATAAATAAATAAATTAATTAATTAAATAAAATCAACT +TTACGGAGAAAAATTTACATTCAAGAAAATACGCCCATTTTAAGTGTCCAGCTGGATGAA +TTTTCAGAAATGTGTGTACCCTCATGACCACTTCCTCAATCATGATACAGAACATTCCCA +TAACCTGGAAATCCCTGTGGCCCTTTGCTGGCAGTTTCCCCACCCAGCCCCAGGCAACCC +CTGATCTGCCCTCTGTCACTGTAGACTAGTTTTGCCTTTTCTAGAATCTCATATAGAGGG +AGTCATGCAGTATGTACTCTCTTGTAGCTAATGTTTTTGAGGTTCCTTGGTGTTACTGCA +TTGTTTTCTATTGCTGAGTAGTATTCCGTTCTACTCAGTACCATAATTTGCTCATCTACT +CTTCTGTGGCTGGACTTTCCAGTTTGGCGCTAATATGAATTGTGGAGGCAGTTTTAGACC +AAATTTAACTTAACAATACAATATGAAAAGATTAAGTCAAGCTAATGAACATACCTGTTA +CCTCACTTACTTGGCATTCTTTTAAGTTGATGCACTAGAAATTTTCTCTCTTAGACCGGG +CACAGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGGGGATCATGAG +ATCAGGAGATCGACACCATCCTGGCTAACACAGTGAGACCCCGCCTCTACTAAAAATACA +AAAAATTAGCTGGGCGTGGTGGCAGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGC +AGGAGAATGGCATGAACCCAGGAGGCAGAGCTTGCAGTGAGCCGAGATCACACCACTGCA +CTCCAGCCTGAAAGACAGAGACTCCGTCTCAAAAAAAAAAAAATTTCCTTAGTTATTTTG +AAATATTCATTATTATTGACTATAGTCTCCTGCTGTGCAATAGACCTCAAAACTTTTTTC +TCCTGCTTGGCTGAAACTTTGTACCTTTTGAAGAGGAAGTCTCCATTCCCTTGTTCTGCC +ACCTCGCCAGCTCTGGTAACCATTATTCTTCTACTGAATGAATTCAACTTTTTTTTTTTT +TTTTTGAGATGTCTCACTCTGTCACCCAGGCTGGAGTGCAGTGGCACAATCTCAGCTCAC +TGCAACCTCCGTCTCCCAGGTTTAAGCGATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGG +ATTACAGGCGCCTGCCACCATGCCCAGCTAATGTTTGTATTTTTAGTAGAGACAGGGTTT +CACCATGTTCGCCAGGCTGGTCTCAAACTGCTGACCTCAAGTGATCCACCTGCCTCAGCT +TCCCAAAATGCTGGGATTACCAGCATGAGCCAGGGCGCCTGGCCCAAATTCAACTTTTTT +TTTTTTTTTATGAGTGAGCAGCAGCAAGATTTATTGCGAAGAGCGAAAGAACAAAGCTCC +CACAGTGTGGAAGGGGACCCGAGTGGGTAGCTCCCAAATTCAACTTTTAAAAGATTCCAC +GGGCATGTGAGATTATGTGGTATTTGTCTTTCTGTGGTATTTGTATTTCACTTAGCAGTA +TGTCCTCTAGATTTATTCGTGTCATTCCCAATGACAGAGTTTTGTTCTTTTTTATTTTTT +TGGAGATGGAGTTTCATTCTTGTTGCCCAGGCTAGAGTGTAATGGCATGATCTCGGCTCA +CCACAACCTCCACCTCCCGGGTTCAAGTGATTCTCCTGCTTCAGCCTCCCAAGTAGCTGG +AATTACAGGAATGTGCCACCACACCTGGCTAATTTTGTATTTTTTTTTAATAGAGATGGG +GCTTCACCATGTTGGTCAGGCTGGTCTTGAACTGCCGACCTCAGGTGATCTGCCCACCTG +GGCCTCCCAAAGTGCTGGGATTACAGGCATGAGCCACTGCTCCCGGCTGAGTTTTGTTCT +TTTTAAGGCTGAATAGTACTTCGTTGTATGCGTGCACCGCCTTTTCTGTACGCATTCATC +TTAGGCTGATTGACTTAGGCAGATTCCACATCTTGGCTACTGTAAATGGGGCCGCAGTGA +ACATGGGAGTGCAGACATCCCTGAACACACTGATTTCAATTCCCTTGGATATATTCCCAG +AAGTGGGATTGCTGGCTCACATGATAGTTCTATGTTTAGTGTTTTGAGGAACCTCCATGC +CATTTTCCATCGCGGCTGTACTACACACCGGCAGTATACTAGGATTCCCCTTTCTCCACA +TCCTGGCCAAAACTTACCTTCCATCTTTTTCATAAAACTTGTTCTGATGAGAGGGAGATG +TTATGTCACGGTGGTTTTAATTTGCATTTTCCTAAAGATTTAGGTTGCCACTGGTCAGCT +CCAGGCCCCAGAGCTTCTTCAACTGAAGAAGTACTTGCTGTCTATAGTAAGCATCTCAAA +CTTTAACGTGCATGTGGATCACCTGGGAATCTTGTCAAATGCAGCTTCCAGGTAAGTGGG +TCTGCAGTGGGGCCTGATATTTTGCCTTTTGAACAAACTCCCAGATAATACTGATGTTAC +TGGTCCCAGGACCTGCCTTTGAGGTAGCATTTTTTCATGAACCTATTGGCTGTTTGTATG +TCTACTTTTGAAAAATGTGTTTGGTTCCTTTATTCATTCTTACATTGGCTTATTGGTTCT +TTTGCTGTTGAGTTGATTGAGTTCCTCATATATTTTGGATATTAGCCCCTTTTCTAATGT +ATGGTTAGCAGATATTTTTTCTCACGACGTGGGTTATCTCTTTGTTACTTGTTCCTTTTG +CTGTGCAGAAGCTTTTCAGTTTGATGCCATCCTGTTTCTATTTTTGCTTTTATTGCCTGT +GCTTTTGGGGACATACCCAAAAAATCATTGCCCAGATCAGTGTCATGGAGCTTTTCCTCT +GTGTTTTCTTATAGTAATTTTACAGTTTCCGGCATAATGTTAAAGTCTTTGATCCATTTT +GGGTTGATTTTGGGGCATGGTGTAAGATAAGGGTCTAATTTTATTTTCTGCATATGAATA +GCCACTTTTCCCAGCACCATTTATTGAATAGAGTGTTCTTACCCCTTGGCGTGTTCTTTG +TGCCTTTGTCAAAAGTCAATAGACAGTCAGCTGGGCACGGTGGCTCACGCCTGTAATCCC +GCACTTTGGGAGGCCAAGGCAGGTGGATCACCTGAGGTCAGGAGTTCAAGACCAGCCTGG +CCAACATGGTGAAACCCCATCTCTAATAAAAATACAAAAAATTAGCCGGGTGTGGTGGCA +GGCACCTGTAATCTCAGCTACTCGGGAGGCTAAGGCAGGACAATCGCATGAACCCAGGAG +GCAGAGGTTGTAGTGAGCCGAGATCACTCCACTGCACTCCGTCCAGCCTGGGCAACATTC +CTTCTGTGTTCTGGGATATATATTCTTGAGGGTATTCCCATCTGTGGATAGTTGCCAGTT +AGATTTCTGTGGTGGGGAAGTGGAGCTAGAGTATTCTTTTCCTTTTTTTTTGAGACAGCA +TCTCACTCTGTCATCCACGCTGCAGTGCAGTGGCATTAACATGGCTTCATTGATCTCTTG +AGCTCAAGTGATCCTCCCACCTGAGTCTCCTGAAAAGCTAGGGCTACAGGCATGCATCAG +CATGCCCGGCTAATTTTTAATTGTTTTGTAGACATGGGTTCTTGCTGTGTTGCCTAGGTT +GGTCTCGAACTCCTGGACTCAAGCAGTCCTCCTGCCTTGGCCTCCCAAAGTGCTGGGATT +ACAGGCGTGAGCCACTGCTCCTGGTCTGTTTCTTTTGTTTAGTATTTTTGTTTGCTTTGT +TTCATGAGCTGCTATTTTGACTACAGTTTTAATATTTTTAGCAATTTTTTGGATATGCAG +AAATTTAATTCTTATTCATCAAATATATTTTAGTATTTTGTGTGTTTTTTAGATTTCCTT +TTTTAGGAGTCTTTCCAACCATAAACTGCCAAATAGAAATACACACACACACATACACAG +AGGCTATGAATAGACAACTAATTGAAACAGAAAAAAATTGATTAATTAAAGCACCTATGG +GATTTTTCTCCAGCCAAATTAGCATTTTTATTTAAAAAAATCTTTTAGTGTTATTTAGCC +TACATAGGAAGAGGCTTACTTATGTAGTTGATAAGAGTGTGAATTGATCAACTTTTTGGA +AAATAATCTGCCAGTATAATTAAAATTATAATTAAAATGTATCATAGGTCAGTTACACCA +CTTTGAGAATCCTATACTACAGAAAAGAAGAAATATAAAACAAAAACATCAGTGGGATCC +ATGTATATTGATGATTATTGTAGTCATCAGTGGTGGGAATTTTTAAAAACAACCTTAATA +TCCATCTAATAGGGAATAAAGAAATCAGAACTTATCTACAATGCGCATTACTACACAGAA +ATTCAAATGATACATTTGATCTACATTAGCTGGAATTAGTAATGTCCATGTACTCTAATG +GGAGAAAGAAAATTACTAAATAATGTAAGATTTGAGCACGTTCTTTAAACAAAACAAATC +AATAAAGCCCTAAAAGGACATACTATTGAAAGCAAGTCACAAAATCCAGAAATGGCAGGG +AAAATGTTCAACAATTTGACTATGTAAAAATGAAAGTTTCTTCAAAACAAAGATAATATA +AACAAAATAAAAGTTATGATACAGACCTTAAGAAACAAACTTGCTAATTATATGGAATTA +ATAAATGTGCAGAATATATAAAGAATATCATTACATGAATTAGAAAACTAGTAAATCATA +ATGAAATTCTCAGCGAGAAATACAATTGGCCAGTAAACATATGCAAAAATGCACAATCTC +TCTAATAAGTGGAGAAATCCAAATTAAAAATGGAAATATTTCTTAATCATAAAATTGGCC +AAGGTTAGCATTGGTCATTTCCTCTGTAACTAAGGGTGTGTGGAAATAGGAACTCTCATA +TACTGATGGTGGGAGTATAATATCATTAAATTTTTTTGAAACACAGATTTTTAATTTTTT +ATCAAAATATGGCCAGGTGCAGTGGCTCACGCCTGCAATCCCAGCACTGTGGGAGGCCAA +GGCAGGCAGATCATGAGGTCAGGAGATCGAGACCATCCTGGCTAACATGGTGAAACCCCG +TGTTATTTGTAAAAATACAAAAACAAAATTAGCTGGGTGTGGTGGTGGGCACCTGTAGTC +CCAGCTACTTGAGAGGCTAAGGCAGGAGAATGGCATGAACCTGGGAGGCTGAGCTTGCAG +TGAGCCAAGATCACACCACTGCACTCCAGCCTGGGCAACAGAACGAGACTCTGTCTCAAA +AAAATAATAATAATATTAAAAATTTATACTTTGCCTCAGTATTTTATTTTTAGAAATCAA +TCATAAAAGAAAAAACCCACCTAACTAAGGTACCAAAAAGATGAATATACAGGTATGCCC +AGGATAGTGTAGGTAACGGTAACAAATAACTGAATCCAACATAAAAATTCCACAGTGAAA +AAGTGACCAAATTATGGTCAGATACCCTGTTGAATAATGATGCAACTATAAATAAGAATG +AGAGAGACCTATATGCACTCATAAGCAATGATCACCACATTCTATTTTATATGGAAAAGC +CAGTTAGGGAACAACACACAAAGCAGGATTACGTTATGTAAAAACATGGGCATGTCTATC +TGTGTGTAAGTGGATGTAGACGACTGAGTGGGCTGGGAGCTGGCGGTGACACATTTCATT +CTACAGACCTAAGCAGTATTTGAATTTTTATAACAATAGCATATGTGTTTCTTATGTGAT +TTACTAGAAACAATTTAATTTCTTCAGTTGTAAAAGGTGTATTTTAAACCTATATAAAGG +TACATGGAGATCGTCATTCAGAATAACAAAATAAAAAGCAATATAAAGACACAAATAGAT +ATAGTACAATATTAAATACTACAATATTCAAGATGATATTAAAATTATATATATCTACAT +CTATATGCATGTAAGGATCTCAGTAAATGATATTGTCTAAAATTAAACTGTGGTAATTTT +TGCAGACCTCTGTGAATATATTAGAAACCATTGAATTGTGTGTACACTGTGTGTGGGTGA +AATTTATTGTATTTGAATTATATCTCAAAGTTCTGTTAAAAAATGAATGGCTTGATATTC +TAGTGCTAAGGGATGGTTTTATTTAAAACATAAACTAATAGTTTCCAGCAGAGTCCCTTA +AAAATATTACAGCAGGGATTTCCAGATCACACTCACTCCCACCTTTTTAGAATTTTTGAC +TATTTTCCAAAGTATTGAATTCAATGAGGTGCAGGTGGAAGAAAGCGTTTACAAAGTCAG +TGGGTTACGGGAGAGCTGGTGTGGGTTGAGGCAGAGTGGGGAGAGAGGCTCCTTTCAAAT +GGAAGCCTCTGGAACCCACAGACGGCAAGCGTAAGGCAGGGCAATCTTCTGGAGACCTAC +CAGAGAAAGGCTTTAGACCCAACACTCCCATTCACAAACATGGACAGGGGCATCTCAGTG +TTTCCTTAACTTCACTCTTTCTATATTGACACAAGGAAATAATTAAAAGGGGGTAATTCT +GGTTTAGTTTATTTCATTAAATTATCAACAAACAACTTTTGAAACAAAAACTATTAAAGT +GGAACTCAATAAAATTACTAGGTTTAATAATCTGTATCTGTGACTCAGATACAGATCAGG +CAAACCAGCTGTGGATGAAGCTCCCAGCTGTGGATGATGCCCCCAGACATGGATGAAGCC +CCCAGCTGTGGATGATGCCCCCAGATGTAGATGAAGCCACAAATGATGCCTGTGTGGACA +GACAGAATGATGGACAGGCAGATGAATGTGAGGGTTTTATGTGAAACAAGTCCACTTGGT +TGTTGACAAGATGCTGTTTTAAAGTTTCATTTTGCCATACTTTGAATAGCTTTTCATTAG +CTAAATTTAGCCACATGTAAAATCACTAAGGCAGACACCTGGAGTTCCCATATGAAAATC +AAATGTAAACCAGCAGTGACCTACTTCAATGTGATCATCGGAAGTCAGAAGTTGAGCTCA +TTTTTGATGTTTAAAGCCTGCATAATATTCACTGTGTTATTATTCAGTGTATTACTATTT +CCTTCATGATGGAAATTTGGTTTGCCCCAACTTTCTATTCTATCAAAACACTGCTACATA +GAAAATCCCCATGCACATATTTCTCCTAATTGTGGAAATATTTTACATAAAAGACTCTAG +ACATGGGATGAAATTCCCAGGTTATTGGAATTTTAAAATAGATAGGTACTCCCAAATTGC +CGTCTTACAAATTATATGAATTCGTAAGCTTCCAACTGTTATGGAGTTACCCATTTTGAG +AAATCTGTGCTAAAAGGACCCAAACAATGCTGATGACAATGATCAGGATAATAAGTACGC +TGGGAAGACAACAAAATGATTTAAATCTTAGACAAGTCATTCTAGGTGTCTCCACTGTTT +CAGTTCTTGCATTCATTCTTGTGGTATCTTTTCCCTTTTACCAATAAAAAAGCTCCCTGA +CATCACATTGTGGCAGTCCCCATGGTTTGCCGCAGTTACTGCGGGACTGAACGAAGGAGG +ACGAATGAAGAAATGAAAACCAAGGAAAAAAGGAGCTGTTTAAAGAAGGGTCCAGGGAAG +AAGAAGAGGGCTCCCAGCTTCTAGTGAGCAAGGGCAGCAGCCCTGAGCTTCTACAGCCCT +TCATATTTATTGAGTAGAAAGAGCAGGGAGCAGGAGGTAATGATTGGTCAGCTTCTCAAT +TGATCACAGGTTCACATTATTGCTAACAGGTTTCAGATGTGCCTAATCTCAAGAAACGCC +GCGCCTGGGGCATGACTGCCCTCAGCATTCCCTCTGGGTGGCAGACGCAGTTTGCCAACA +TTCTGCATTCATGAGAACAGTTTACTGTTTACTCATATAACCTCCAGTGGTACACCGAGT +TGATCATGACCCTCCCTCTTTCGGCCTGCAACATCACATGAATCCAATGAGTATTGGTTA +GTAAAATGCCTATGACTAGTCATCTTCATCTATGCAATTAAATATTAATTCATCAAACAC +TTCAAATGTAAGCAATTAATAATTAGTGAATGAAAAATACATAATACATCAATTAGAAAA +AAACTCTATTAAAAAGACATTTGTGTGATAAAAGAGATTGCCATTTTTGTATTTTTCTAC +AAAGTTAAAGAAAACTAAGTCAGCTTATATAAGTGAATTTTAAAAGCCTTTAGGCCAGGC +ATGGTGGTTCATGCCTGTAATCCCAGCACTTTGGGAGGCCAAGGTAGGCAGATCACCTGA +AGTCAGGAGTTCGAGACCAGCTTGGCCAACATAGTGAAACCCATCTCTACCAAAAATACA +AAAATTAGCCAGGCGTGGTGGCAGGTGCCTGTAGTTCCAGCTACTAGGGAGGCTGAGGCA +GGAGAATTGCTTGAACCCGGGAGACGGAGATTGCAGTGAGCCGAGATGGTACCACTGCAC +TCCAGCCTGGGCAACAGAGTGAGACTCCATCTCAGGAAAAAAAAAAAAGGGCATATATAT +CCATTATTTCACTGGGCCTTCACACAGCCCTGCTAGTCAACCCTCGAGAATTGAGGAATC +AGAGAACTTAGGAACAAGAACATGACCTGGGACTTCTTATGTAAGTGAGAATCTTAGGCT +AAATTGTCTTGCGGTAAATGCCTTTCCCATCCTCAGAAGTCTATAGTGAGCAGAAAGCAG +TAACAGCATCCTCCCTTCCACTCTTCCCTTCAGGCTGAACACATCCCTGTCCCTACAGCC +TTTGCAGAGACACAGGTGACAGAGCACTCAATATAGAATCCCTGGGCTCACACTGGCTTT +CCTACAGACACAGGGGATCCATGAGGCCCACAGTGACATACACATGTACACAAGGATGCA +CGTACTGACACTGATGAGCCCTGGAAGTCAACACAGAGTGGCATACAGAAGAACACATGC +AGGCACACACACAGCCACAACTGGACCTGCGAGGTCTACAGCCCCACAAATGAAAAGAGG +TGCATACCATCTCACAACAAACTAGAACTTTGAGAGAACCACACACACACACAGGTCTAC +AAATTCACCTGAACAAGACGCCGTCCATAGTCACTGGCGTAAGGCATCTACTCACTATCA +CACAGAACACACTCAGGCAAGCAGACATAGATGATACATCCACAATCTACAGAGATGAAC +TGCAGAGTTACATGCACAGATATGCCAGTCATACATTCAGATCTGTGTAAACAGTGAGGC +ACACACACACATACACATGTGCACTTATACAGAAAGACTCAGACCCAGCCAGGCATGGTG +GCTCAGGCCTGTAATCCTAGCACTTTGGGAGGTCGAGGCGGGTGGATCACGAGGTCAAGA +GATTGAGACCATCCTGGCTAACATGGTGAAACCCCATCTCTACTAAAATTACAAAAAAAT +TAGCCGGGTGTGATGGCGGGCACCTGTAGTCCCAGCTACTCAGAAGGCAGAGATTGCAGT +GAGCTGAGATTGCGCCACTGCACTCCAGCCTGGACAACAGAGCGCGACTCCATCTCAAAA +ACAAAAAAAGAAAGACTCAGACCTGACCTGTTAGTCAACAGCCAGATGCCAACAGATAAT +CACATTAGACTCATCAGCAAGCACACCGTGCACACAAGTTCCACTCCCATGTACAGGTTC +CTGTCCCATGTACACAAGCCTGCACACAAACACGTATCCAGACTCCAGAACTGTTGAGAC +AGTGCCCACATACACACTCATAGAGTCACCTGTGCACAGGTGCACGTACACATTGTTTCA +CAGAGGCACACCAGGACAGAAACACATAAGAAATGTGAGGCAAAAGCGGACCCACACCTA +CACCTGTAAGATACACAGAGAAACAAACACATACACAAACTTACACATAGACTCATGCAC +TGTGTAATCAACAGTTCTATACCCCACCAAGGCTCTTGTCCTGGTGTGTCCAGAATTGGT +GGGTTCTTGGTTTCACTGACTTCAAGAATGAAGCCACAGACCCTCGTGGTGAGTGTCACA +GTTCTTAAACGCAGCGTGCGCGGAGTTTCTTCCTTCTGGTGCGTTCGTGGTCTCGCTAGC +TTCAGAAGTGAAGCTACAGACCTTCGCAGTGTTAACAGCTCATAAAGGCAGCGTGGACCC +AAAGAGCTAGACACAAAAGTTCTCCACGTCCCCACTAGATTAGCTAGATACAGAGTGTCC +ACTGGTGCATTCACAAACCCTGAGCTAGACACAGAGTGCTGATTGGTGCATTTACAAACC +TTGGGCTAGATACAGAGTGCCAATTGGTGTATTTACAATCCCTTAGCTAGACATAAAGGT +TCTCCAAGTCCCCACCAGAGTAGCTAGATACAGTGTTGATTGATGCATTCACAAACCCTG +AGCTAGACACAGAGTGCTGATTGGTGTGTTTACAAACCTTGAGCTAGATACAGAGTGCCG +ATTGGTGTATTTTCAACCCCTTAGCTAGACATAAAGGTTCTCCAAGTCCCCAGCAGAGTA +GCTAGATACAGTGTCGATTGATGCATTCACAAACCCTGAGCTAGACACAGGGTGCTGATT +GGTGTGTTTACAAACCTTAAGCTAGACACAGAGTGCCCACTGGTGTATTTACAATCCCTT +AGCTAGACATAAAGGTTCTCCAAGTCCCCACCAGACTCAGAAGCCCAGCTGGCTTCACCC +AGTGGATCCCGCACCGGGGCCGCAGGTGGAGCTGCCTTCCAGTCCCGCGCCGTGCGCCCG +CACTCCTCAGCCCTTGGGTGGTCGATGGGACTGGGCGCCGTGGAGCAGGGGGCGGCGCTT +GTCGGGGAGGCTTGGGCCGCGCAGGAGCCCACGGCGGTGGGTAGGCTCAGGCATGGCGGG +CTGCAGGTCCCGAGCCCTGCCCCGCGGGGAGGCAACGAAGGCCCGGCGAGAAGTCGAGCA +CAGCAGCTGCTGGCCCAGGTGCTAAGCCCCTCACTGCCCGGGTCCGGCTGGCAGCTCCGA +GTGCGGGGCCCGCTGAGCCCACGCCCACCCGGAACTCGTGCTGGCCCGCAAGCGCTGCGC +GCAGTCCCGGTTCCCGCCCGCGCCTCTCCCTCCACACCTCTCGGCAAGCTGAGGGAGCCG +GCTCCGGCCGCAGCCAGCCCAGGAAGGGGCTCCCACAGTGCAGCGGTGGGCTGAAGGGCT +CCTCAAGCGCGGCCAGAGTGGGCGCCAAGGCCGACTAGGCGCCGAGAGCGAGCAAGGGCT +GTGAGGACTGCCAGCACGCTGTCCCCTCTCACTGGGTTGCTATTAAAAATGGGCGCTGAA +TATAAAAATTAACCGGGCGTGGTGGTGCATGTCTGTAAGTCCCAGCTACTCCGGAGGCTG +AGGTGGGAGAATAGTTTGAGCCCGAGAGACTGAGGCTACGGTGAGCCGTGATTGTGCAAC +TGCACTCCAGCCTGGGCGACAGAGTGAGACCCAGTCTCAAAACAAAATCAAACACGGGCG +CTGCCCTGGGCAGGGGAGGTCTGGGCACAGCTGTGCTGATGTGGGCAGTGGACAGGTGTC +ACGTGAGAGCCTGTGGCCAAGCCTAAGCTGCGGTTAAGGTGGGAGGCTGAAGGGTTTGGA +GTGTGCACCACCCCGGACCACCTCTCAGTAGGAATGTGGCAATGCACTTCGGTGCTGGAT +TCCTACTTCTGGCTGTGTGATTTGCGACACGTTACCCCACGCACCTGTACCACAACTTCC +TCATCTCCCAAATGGGATGACAGTAGTTAGCAAACCTCCCTGGACTGCTGTGAGCATTCT +GCCAGAAAAATGTACTCCAAGCTCTTGAAGTAGGCCTGGCGTTTGGTTGGTGCTCAGTAC +ATATTCAATGTTTCTGTCGCTGGCGTCATCACTGTGTTCATGGCAGAGCTCTGTGCCGCC +AGGACTGGGCCGCCACCCAGACCTTCCTTTCCCGCCCCAGGGGTGAACTGTGGACAGGAC +CCTCTGGGGGGACTAGGCCAAGGCTCTCCCACCTCTGGGGACCCGCGCCCATGGGCCTCA +GATATGTGGCTCGAGAAGGGGGAGTGGGGGCAGGGCCCCTGGAAAACCCCTCGAGGCCAG +ACCCACCACTCCCAATATCCGTAAGACCAGGGGCTTGAGTCTTTCCTGCAGACAGATGTG +GGCAGGAACCCACCCGACTCCCAGCTACGTGGGGACGTGGACGAACTAACAGGACAAATT +CTAGTTCCTGGACACCGCCCTCCCAGTATCCTCGCAATTAGACACCCATGCAGCGGCACT +ACACTGATCCGCACACGGAGATACACAGCGACATCCGCCCGCAAGAGCATGGTGGGTGCC +GGAGCTCCCGGCTGTGGGCCCAGGAACTACATTTCCTAGAAGGATGTGCTAGATACTCGT +TCGCGTCGGGACGCAAGCACCGGCCCGAACTCGCTCTAGGAAATGGAGTCTGACGCCTGC +GCGGCGCAAACGCTCCCGGGAGGTGTAGTTTGCGCCTATTTCGCGCAGGCGCGCTTTCCC +GCAGCGGCCGCCTGCTGCTCTTTGTGGCAGTCGCAGTCCTTTTGTGGGAGTCCGGTCTGT +CCACTTGCCGGTCCCTCAGACCGTCGGCGGTCTCTGTCCGCTTCGGGACCTGTCCGCTGG +TCGCTCCGCGTCCGATGGCTCCTGGCCGCGGAACCTTAGGCCTGGCCCTGGTCTCCGAGC +GCGGGTTCGCCGGGAGGAGCGTGTGGCGGGGGTGTGCCGGGGCGTGAGTGCGCCGAGCAT +GGGGCTGAGCCTGGTGTGGGGAGTGGGTATCTGCGGAGCCGGCCTGAACCCCACCTCAGC +CGGGCGCGGGGAGGGGGCTCCGTGCGTGTGATCGTGCAGCTGTGAGCGCGTGGCCGCCCC +GCGGGGCTCCGCTGCAGGCCCCTCAGCCCCAGGAGCAGTACTCGCTCTTCAGGGCCTGCC +CTGGATCCTGGAGGCTACACAGCTGCCCACTCCTCCTGGGGAGGCTGCCGTGGAGGCCAT +GGAGATCCCTGCCCCGGAGCCCGAGAAGACAGGTACAGCTTCACTCTTGTAGTCAGTATG +TCTGTGGATTTGCACTTGAGGATATTGACACTCAGAGAGATCAGGCCAGTTTCCCAGAAG +CATCCAGCATCTTTTTTTTTTTTTTTAAATGGAATCTCACTCTGTCACCCAGGCGGGAGT +GGTGCAATGGCTTGATCTCCGCTCACTGCAACCTCTGCTTCCCGGGTTCAAACTATTCTC +CCGCCTCAACCTCCCAAGTAGTTGGGATTGCAGGCGCGCGCCACCACGCCCGGCTAATGT +TTGTATTTTTAGTAGAGACGGGGTTTTGCCATGTTTCCCAGGCTGGTCTTAAGCTCAAGC +GATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCATCGCTCCTGG +CCTCCTGCTTCTTTCTTCCCCGCGCCCAATCATTTGTTCCCACTCAGACGTGCTTGATTG +GGGGTAGCAGCTCTTTTCGTCATGAATTGGAGTGTCTGAGACACAGGCAGATTATTCCTC +GATGCTGTTTCTGTTGGTGCTAGCTACTGGGGCTCGTTCCCCACCTGGCCTTGCTGGCAG +GTTCTGCCTCCTCTCTGATTCTCACTTGTGCCACGACAGGTGGGCTTTAAGCTCAGCCGT +GAGCCTGGTAGCCGTGACCTTGACTCATTTCTCCTTCGCCAGCTCTTTCCTCTCAGGATC +CTGCTCTTTCCCTGAAAGAGAATCTCGAGGATATATCGGGTTGGGGTCTTCCCGAAGCCA +GGTCCAAGGTGAGTGGCTGTGTGTTCTTCCTTCTTTATGAAGAGGGTTGGCACATTCCCT +CCCCAGCCCTGGATCGTCAGCCTTCCACAGACCTTCACCCGGGTACCTGCTGGCCAGTCC +TCAGGAAGCTCTGGGCAGTGGGAACAGCTCCAAGTGAAGAGTCCAGATGTTGCCCGAGGT +GTGCCCTCGAGATGTGCTCTATTGAATGTCTCGCCCAGTCCAATTCAGTGGGTCAGTTGC +TGGGGATCTCTCTGCCCAGCGCTGTGCTGGGAGCTCTGAGGAGTGGTTTTAGTGGAATCT +GAGAAAGTAGTTGACGGCTCACATGGGGTCGCTTTGCCTATCCACGTGCTGGCCAGGGGT +TGGTGCAGGATGTTTGCACGGTTGGTTTACTGGGACTGGAATCAGACAGGGGCACGTGGA +GCAGCTCAGGAAGGGCTGGATTGCAGCACTTGGTTAAGAGTGGCGGTAGGGGACACATCC +CTGTGTGCTTTCCTCTGTACCCAGCCCCTGGCATGGGGCCTTCATGGAGAGTCCTGGAGA +TTTAGAGAATCTGGGGGCTCAGCAAGGAATTGGCCCTGGGAACTTCTGCAGCCAAATGGG +ACCCAGATCCCTCCTTTTACAGACTGTGAACTCTGTTCCTCATTTTGTTTTTCTACCGCA +ACCACCTCACCCCATGGTTGTATTTAGAAGGGTGCAGTTGACAAAGAGAGCACAGTAGAG +TGAAAGATGTGGAGGAATAAGGGGTCTTTGGGTTTTTGTTTTTTTTTTAGCATTTTGTGA +TTTTGTCAGTCTGCCACTCAAAAGAGGTGATGGCAGATTATAGCCAGCTAGAAACAGCTG +CAAAACGTGTATTGTGAGCTGTGCCTCACTTCTGTAGTGTGTCACAATGCTTTATAGTCC +TCAAGAAACTTGACAGTCAAATGCAATGCATGTTCCTTGCTTAGGGGATAGATCCAAAAA +AATTGATAAAGGGCAGTTTGGGACAAGAGGAAATTTGAAAATGGGCTGTATCTTAGGTCA +TATTGTATCAATATGAAATTTATTGCATGTGATAATAGTACTGTGATTATGTAAGAGAAT +ATCCACAGGAGATACAAGCTGAAGACTGTAGAGTGAGACATCATGATGTCTGCAACTTAC +CTTCAAATGGTTCAGCAAAAAAAAATGTGTGTGCATGTATGTATGTCTGTATGTGTCTGT +GCAGCTCCGTCTAGGTGGTAGGGCCCTCATGCATGTGTTATTCCTATTCCTAAAATATTA +GCTTGTTAGATCTTTGAATAACCAGTTGACTCTCCTGAGTACACATCAGCTTTGGCATTT +TCTCTTGACAGTCTCATTCTCACTCACAGTTCAAGTTGTTCTGTCTCCTGGGACATCTTG +TGTGGTCTGTTCTGACTTCTCTGCTAGTGTCATTACCACATCAGCATCTGATTCCAATGC +ATACTGTCTGGGTTTTTTATTCCTTTCCCTGCAGAATGATGACACATTAGACATGGACTT +CTCCCAGTGGCCAGAAATACCCATCAAAAAGGCCTGTGCGGGCCTCAGACCCTACCCTAC +TCTTGCTGGTTTTGCTCCCCACACTGAAGCCATGTAGCCTCTGACAGCCTCACTGGCCAA +CACCATATTCTGATACCTGTGGAGCTATTTTTTTGTTTTTCTTTCTTTTTCTTTTTTTTT +TTTTGAGACAGAGTCTTGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCACAATCATAGCTC +ACTGCAGCCTTCAACTCCTGGGCTCAAGCAAATCCCCCACCTCGGCCTCCTAAGTAGTTG +GGACCAACGTGTGTGCCCCCACACCTGGCTTGGAGCCTTGTTTCAGCCCATTTTCTCCTT +CCCTCGGCTATCGTCATCCTTCATGCCTGCATGCCTCAGTTAGGGGTGTGTTCTCAGCAT +TTGTCCTCATCTCTACCTCCTCATCTGAAATCCAGTGTTTTGGGGGGTAAGGTTTTATTG +AGGTGTAATTTATATACAGCACAGTTGTACAATTTTAAGTATACAATTTGATGAGGTTGA +GCAAACGTATACAGTTGTGTAACCAGCACCACAATCATGATATGGGACGTTTCCATGACC +CAAAAATGTCCTCATGCCCCTGTGCATTCAGTGGTCTCCCCTCTCCCCTTGGTAACCACT +ATAGTTTTGCTGTAGATTTTCCTCTTTAAGGATTTCATATGGAATTTATATTATGTGGTC +TTTTGTCTCTGCTTTATTTCGCTTAGCATATGCTTTTGAGTTTATCCATATTGTTGTGTG +AATCAGTAGTTTGTTCCTTTTTATTGGTGAGTAATATTCCATTTGTATTAGTCTGCTAGG +GTTGCCATACAAAGTATGGCAGGAGGGCTTAAATGACAGAAATGTATTTCCTCACAGTTC +CAGAGGCAAGAAATCTGAAATCAAGGTGTCAGCAGGGTTGATTTCTTCTGAGGCTTCTCC +TTGGCTTGTAGATGGCCACCTTTTCCCCGCATCTTCACATGGTCTTACCTCTTCTATGTC +TGTGTCCAAATTTCCTCTTCTTATGAGGACACCAGTTATATTGGATCAAGGCCCACTGCA +TTGAATTCATTTTAACTGAATTACTTCTTTAAAGAACCTCTCTCCAGGCCGGGTGCAGTG +GCTCACGCCTGTAACCCCAGCATTTTGGGAGGCTGAGGTGGGTGGAACACTTGAGGTCAG +GAGTTCAAGACCAGCCGGCCAACATGGTGAAACCCTGTCTCTACTAAAAATACAAAAATT +TAGCTGGGCGTTAGTTCTCGCCTGTAGTCCTAGCTACTCAGGAGGCTGAGGCAGGAGAAT +CTCTTGAACCCAGGAGGCAGAGGTTGCAGTGAGCTGAGATTGCACCACTGTACTCCAGCC +TGGGTGACAGAGTGAGACCATGTCTCAAAAAAAAAAAAAAAAAAAAAAAACCTCTCTCGG +AGTTACCAAGCATTGAGACATAAGAATTTGAGGTGAGGGGTATACATTTCATCCCATTGC +TCCATTGGATGGAGCAAACTGCTCCGCAGTTTGCTTATCCATTCACCAGCCGATGGACAT +TTAGGTTACTCATAGGCTTTTGCTATTACAAATAAACCCGCTATAAATATTTGAGTACAC +GTATGGCTGTGCATTTTAATTTCTCTTGCATAAATACCTAGGTGTGAAATTGCTGAGCCA +TATAGTAAATATATGTTTGTCAATTTTATTGATCTTTTCAAAAAACAATTTTTTTGTTTT +ATTGATTTTTCTCTATTTTCTGTTTTCCATTTCAGAGATTTCTGATCCTTATTATCTCCT +TGGTGTCTTAAGATTGAAACTTACTTTACTTACTTTAGTGTCTTAAGATTGAAACATAGA +TATTGATTTGATACCTTTTTTCCTCATATAGCATTCAGCGTTGTAAATTTCTAAGCACTG +CCTCAACTATATCCTACAACTTTTGTTATGTCACATTTTCACTTTTATTCAGCTCAAAAT +AGCTTCTAATTTCCCTAGTGATGTATTCTTCAGCCATATGTTATTTCAGAATGTGTTGTT +TCATTTCCTAATATTTAGGGCATTTTCCAGATAACTTTCTATTATGGATTTCTGATTTAA +TTCTATTGTCATCAAAGAATGAACTTTGTGTGATTTAAAGTCTTTCAAGTCTACCAAGAT +TTATTTTATTTCCCAGAGTGTGGTTTATCTTGGTAAATGTTCCACATGCCCTTGAAAAAA +TATGTATTATGCCTTTGTTGGTTAGAGTGTTATGTAAATATCAGTTAAGTCAAGTTGAAT +GATGGTGTTTTTTAATTGTTCTGTCAGTTTTTGCATTATGTATTTTGAAGCTCTTTTAGT +AGATACATTTATATTTAGGATTGCTATGAGTTCTTAATGAATTAACTGTTTTATCATTAT +GTAATATTCTCTTTGTCTCTGATAGTCCTCTCTCTGAAATCTACTTTGCTATTACTATAG +GCACTTGAGCTTTTTTTTTTTAAATTAGTATTTGCAGGGTTTATCTTTTTCTATCCTATC +ACTTTTAACCTATCTTTGTATGTAAAGTGGGTTTCTGTAGCTTGCTTTTTGAATTCAGTT +TGACAATCTCCACCTTTTAATTGGAGTGTTTAAACTATTCGCATTTAGTGTAATTATTAA +TATGGTTGGATTTACATGTCCTTTTTTTTTTTTTCTTTTGACAGAGTTTGGCTCTTGTCA +CCCAGGCTGGAGTGCAGTGGCGCAATCTTGGCTCACTGCAACCTCCACCTCTTGGGTTCA +AGCAATTCTCCTGACCTCAGGTGATCTGCCACCTTGGCCTCCCAAGTGCTGGGATTATAG +GCGTGAGCCACCCCACCCGGCCTTGCTATTAGTTTTCTATGTCTTATGTCTTTTTTGTTT +CTCATTTCCTCCCTTAATGACTTCTTTTGAGGTAAACACATACTGTTCTAATATACTCTT +AATTCCTCTGATTTCATTAATATTTTTCACCTTTTAGTTATTTGATTGGATGCTGGACAT +TGTGAATTTTACTTTGTTGAGTAGGTTTTTGGAATTCCTTGTTTCTGGGATTTGTTCTGG +GATGCAGTGAAATTACTTGGCATCTATTTGATCCTTTCAAGGATCAAAGCAAGTCTGGAG +TTGGGTGGTGGCCAGGATGCCCAGGCCCAGGGGGAGCAGGCAGGAAGGATTGGAAGGCAG +TCAGGTGAGGACTGCAGGTAGCCTCTCTTAGTCCCTTGGGGCACAGAACTTGCTGATATA +TACAAGGTTTCCTTCCTTAGAGTGTAATAACAGTTTCTAGTCTGGGATCGGATCCTTTTG +CGTTACTGAGCTTTGAATAGAGTTGCTGGGATTTTTGTACTATTAATATTCTCTCAAATT +TTTAAGTTTGAAATTCTTGACTCCAGGAAGCATAGCATCTCACCCAGGACTTAGAGCCTG +ATACAACAGCCATGTACAGCCCCTGGGTCCCACCGCAAGTTGCCAGGGCCCTGTGGTGTC +TATTCAGACCTGCTTTCCCAATCTAGAAGGTGGTGTGGGAATGCATGCCAATTCCCAGCC +CCCCTCCCTGCCCCTGGTCCCCGAATTCAGTCATGGTCAGTGTCTACAGCACAACCTTCT +GGCATGGGAGAAGGTCACCTGAGGTTAACCCTTTACTTGTAGGGTCCTTAATCAATCTCT +GGCTCCTTTGGAACCATGGACACAGAATCAGTCCTGGGTGAGAACCTATATGTCATTTCA +GGAATCGGTGAGTTTCAAGGATGTGGCTGTGGACTTCACCCAGGAGGAGTGGGGTCAACT +AGACTCCCCTCAGAGGGCCTTGTACCGGGATGTGATGTTGGAGAACTACCAGAACCTTCT +TGCCCTAGGTAAAATCCCCCCAGCCCCAGGCTGGGCGTCGGCTGTCAGCCTTCTTCTACA +TGGTAGATATTAAGGAGAATCCCTTCAGTGCCGGCCCTGGTGCCAGATATATCATGGGGT +GGGAAGCAAGGCCTGTGCCTTGGGGCACTCATGGCCAGGGGCCTCGGCCACGCCAGAAGT +CCCCAAGGGACAGCGTGGGAGCTGGTAGCATCCTTGCTTGGTGTTTCCCCTGGTCCTGGG +ATAGGGGCAGGGAGAGAAGAGAAGAAGGTGGGACCCTGGAGCCCAGCTGGCTCTGGAACA +GTCCTCAAGCAATGAAGTAAGGACGTCAGCACACGTGGGGTCTTCAGAGTATACACTGGG +ATTTGGGTTCCAGTCTGAGACCCTCACTGCTTTTGGCCCAGAGGACCTCCTGTTCCCAGA +GAGGGTGTCCACTTCACAGGCATTGTCCTTGTTAGGGGGCGGCCTGTAAGGTCGACTGGG +CCTGGAGAGCTGCAGCATGCCCAGCCCACTTTCTCTCCACGAGCAGGACCTCCACTGCAC +AAGCCAGATGTGATCTCTCATCTGGAACGAGGCGAGGAGCCATGGAGCATGCAGAGGGAA +GTCCCCAGAGGGCCCTGTCCAGGTGAGCAGAGGCACAGGTGGAAGGGTGCCAGCCCCAGC +ACCCCTGGTGGCACCTCCTCCTATGGCCCCTATTTTCCTCCCTCAGTTTGCCCCGGCTCC +ATCTCCCCTTTTCAGGTCCCCCGCCAGACCCTCCTGCCTGCCTCCCTTCAGCACGTACTG +AGCACTGCCTGTGTGCTGAGACCTGTTCTGGGATACAACAGGGAACAAACCTGGCCACTG +GGACCAAATGGTCACCTGCTGATGGGGATGGGGAGGCAGGAATAGAAATATTGAGCCATT +GGTGTTGGCTGGGCAGCCGGGGGTGTGGGATTGGGGGCAGGGGGCCACCAGGGAAGGGGC +AGCTGGCGTTGGTCAGTGTCAGGGTCAGCATGAGCATCAGCATCAGGGCCAGCGTCAGCG +TCAGGGTCAGAGCCAGCATCAGTGTCAGGGCCAGCGTGAGCATCAGCATCAGGGCCAGTG +TGAGCATCAGTGTCAGGGCCATCATCAGCGTCAGGGTCAGAGTCAGCATCAGCATTAGGG +TCAGCATCAGCGTCAGGGTCAGGGTCAACGTCAGCGAGGGCTTCCAGGCTGTGGCCAAGA +TGGCAGCCCTCCGTAAGGGAAAGGGGACTGGGGTGGGTGTTGAGCAGAAGGTAGTCTGCT +TTTCTTCTCCAACATTCTTTTTGTAGACTTCTCTTCTGAGTTGGTTGATAAATAAGTGGT +AAATTCGGCTTAATTTATTGGACAGTGACTGCTGAGCATGTTAGGTGTCTAGAAAGCTCG +AGAAACACCTTTCTGTGGGAAAGGCAGCCGATAAGCTGTGGAGCACAGAACACGGGGGTC +GGGGAAGGCAGCCCCTCCTCCATCCCCCTCCACTGCCACATACCACCCAGCCCCAACCTG +CAACAGTGGCTGCAGCCCCAGCTTTCTGGTCTTTATTTCTCACCCCAGCCTCTGATGCTT +GCTTTGTCCTCTTTAACAAGATTACAGGTTTCAAAAAATTCATATTTTTGTTTGATTCAT +ATATGAATATACTGAAAATGAGGCTGAAAGGTTACTGAAATCCTGCATAATCTTTCCACC +CAGAGATGACGGTTCTCTAATAACTGACCATGTCACTGATTCCTCTGGGCCCTTCCTCTG +TGTGCGGATAGACTTCTAAAGCTACATGTGGCCAGGCGTGATGGCTCACACCTGTAATCC +CAACACTTTGGGAGGCCGAGGCAGGCAGATCACCTGAGGTCAGGAGTTCGAGACCAGCCT +GACCAACATAGAGAAACCCTGTCTCTACTAAAATTACAAAATTAGCTGGGCATGGTGGTG +CATGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCAGGAG +GCAGAGGTTGCGGTGAGCTGAGATCGTGCCATTGCACTCCAGTCTGGGTAACAAGAGTGA +AATTCCATCTCAAAAAAAAAAAAAAAAAAAAATTAGCTGGGCGTGGTGGTGAGCACCTGT +AATCCCAGCTACTCTGGAGGCTTAGGCAGGAGAATTGCTTGAACCCGGGAGGCAGAGGTT +GCAGTGAGCCATGATCGCACCATTGCACTCCAGCCTGTGTGACAGAGCTAAACTCCATCT +CAAACAAATAAATAAATAAAGCCTTATGCACCCTCGGAAGCACAGCACTCACGCCGGAGT +CTGGCTACCACACTGGCCATGGCATAGTGAGCGTCCTCACGTGATGTTTACTGATTGCAC +AGTATCCCTTAGATGAATGTGCCCTGCTGTGTTGTTGGCCCTATTGCTGCCAAAACAACT +TTTTTCTGGCCTGGGCCTCTTCTCTCTGCTCTGTGCAACTTCCCTAGACTCTTACCTCTC +ACTTGGTGTGGATGTGGTCTACACTATAGCAACACCAGCTGAGCTGCAGCCCTGTGTCAC +TGACCATCTCGTGGGCCTCTGCTTGCATGTCTAAAACCAGACTTGGAACTGGCCCTTCCT +CCTCCTTTATAAAAGCTTCCACCATCCACCCAGGCTTGGGTGTTTGCTGAGGTCTCATCC +TCTCCCCCATTTCCTCTCTTGCCACCTTCAGTCAACAGCTAGGGCCATCAGTTTTAGCAC +CTAAATGGCTCTTAGATCTGCTGCTTCCTGTTCATTTTCACAGCCTTGGTTCCAGTCTGA +AGTTGCCATTTTTCACCTGAATTATTCAGATGGCCTCCCTAACTGGCCTCCCTAACTGCT +GGCTCCAACCACCTTACACACCACCAAAGCCACCTTCAAGACAAACCATTATGCCAGGCA +CAGGGACTCACACATCTAATCCCAGCACTTTGGGAGGCCAAGGCAGGAGGACCACTTGAG +CCCAGGAGTTGGAGGCTGCAGCGAGCTGTGATTGCACCCCTGCACTGCAGCCTGGACAAC +AGAGCGAGACTGTGTCCCCGCCCTCTGCTTCCCCCGCCCCCCGTCAAGACACCATATGAT +CACATCACCCTCTGTGCAAAAGGCCTTTCTTCCCCGGCTGCCCTCTGAGTCAGGCATCTG +GTCACCTCGAACCCATCCACCTTGCCAGGGTCACCTCTCAGCCTTCCTGCCTCTGCGTGT +CTTGTCTTCATCTTACTGTCTCACCTCTGAGCTTCTGCACATGCTGTTCACTCTCCACCT +GGCTACCTGCCATGTGTGCCATGGCCAGTTCCACTTGCCCTGCTCTTCTGCATGTGTTGG +GGCCACAGGCAGCACCACCATGCAGCTCTGCCCGCTGCACTAGACTGCTCCCTTTTAAGC +CTGGGGAGGACAGAATTAGAAATAAAATCTCCTGCCAGCTCAGAAAACCTTGCTATAGAG +ATAGAAAAGAAAGAAAGCCTTCTTATTGAATAAGTGTTTAACCAGATGTGGTGTACAGGA +GAGAAAGAAACCTCCCCCTTTGACAGAGGAGACACATCCAACCCAGCACACAGGGCAACT +GTCTTCACCTAAAGGAAGAGTACAGCTTTTTGGGTTTTGTTTGTTTGTTTGTTTTTTGAG +ACAGGGTATCCCTCTATTGCCCAGGCTGGGGTGCGGTTGCATGATCATAGCTCGAAGCCT +CAACCTCCCTAGGCTCAGGTGATCCTCCCACCTCAGCCTCCTGAGTAGCTGGGACTATAG +ACAGGTACCATCATTCCTGGCTAATTTTGTATTTTTTGTAGAGGGGTCTCACTGTGTTAC +CCAGGCTGGTCTTGATCTGGGCTCAAGTGATCCGCCACCTTGACCTCCCAAAGTGCTGGG +ATTACAGGTATGAGCCACCGCGCCCAGCCTCTTGTAGCTTTTTGACAAGCAGGAAGTTAC +ATCTTGGACTCGACTCTTGGACTAGGCTCATTTTGCTTCTGAAAAGGTTTGCATGCGTCT +CGAGACAGAGAAAGCATTTGCAATTCCCAGTGTTCTAAAGGGAAGCGCTGGGGTGTGGGG +AGCCCTTTTTTCCTTTTGGCACCACAGACAATTTTTAATTCATTTGTTAGTCCTAGCTAC +TCGGGAGGCTGAGGCCGGAGAATGGCGTGAACCTGGGAGGCGGAGCTTGCAGTGAGCCGA +GATCGCGCCACTGCACTCCGGCCTGGGCGACAGAGCGAGACTCGGTCTCAAAACAAACAA +ACAAACAAAAAACATGTATTTGTCCTGTGTATTGCGAGCACCTGCCATGGTATTGATACA +GAGCAGTCGGTCAGCCGACGTGTCCGTGAATGAGTGTAATCCTCATCAACAGCTCCCAAG +CTTGCCCCTTTCTCAGCCCCCTTGGTGCTTGTAGCTGTTCCTTCCGGGATTCCTTTGGCT +TCTCCGCCTCTCCCGGTCTTTGCTCTCTGGTAGCTCCTTCCGTGTTCTTCCTCTCCCTTC +TTCCCGCTCTTGCTTGCCCATCTCATCCTGGGCGCTCCCTTCTCTCTGCCTGCACCTTCC +AGCCTTGGCCCTGGTGGCCTCTCCTCTCTTTTCTGGGCTTATCTGCATCTCCAGGCCAAT +TCTCAAATCTGAATTCTAGGCCAGGTTTCCGCTTGCTCACAGAAAATCATTTTCAATATC +TTACAGAATGGGAGCTGAAGGCGGTGCCCTCTCAACAGCAGGGCATTTGCAAAGAAGAAC +CGGCCCAGGAGCCCATCATGGAGCGGCCCCTCGGCGGGGCGCAGGCGTGGGGGCGCCAGG +CAGGTGCTCTGCAGAGGAGTCAGGCTGCGCCCTGGGCGCCCGCACCTGCCATGGTCTGGG +ACGTCCCTGTAGAGGAATTCCCCCTCAGGTGTCCCCTCTTCGCCCAGCAACGCGTTCCCG +AGGGGGGACCCTTGCTGGACACACGCAAGAACGTCCAGGCCACTGAGGGCAGAACCAAGG +CCCCCGCGAGACTGTGTGCAGGGGAAAACGCCTCCACGCCAAGTGAGCCAGAAAAGTTCC +CCCAGGTGCGCCGGCAGCGCGGGGCGGGCGCCGGGGAGGGCGAGTTCGTGTGCGGCGAGT +GCGGGAAGGCGTTCCGCCAGAGCTCCTCCCTCACGCTGCACCGGCGCTGGCACAGCCGGG +AGAAGGCTTACAAGTGCGATGAATGCGGCAAGGCCTTCACCTGGAGCACCAACCTTCTGG +AGCACCGGCGCATCCACACCGGCGAGAAGCCCTTCTTCTGCGGCGAGTGCGGGAAGGCCT +TCAGCTGCCACTCGTCCCTCAACGTGCACCAGCGCATCCACACGGGCGAGCGGCCCTACA +AGTGCAGCGCCTGCGAGAAGGCCTTCAGCTGCAGCTCGCTGCTCAGCATGCACCTGCGGG +TGCACACCGGCGAGAAGCCCTACCGGTGCGGCGAGTGCGGCAAGGCCTTCAACCAGCGTA +CACACCTCACACGCCACCACCGCATCCACACGGGCGAGAAGCCCTACCAGTGCGGCTCCT +GCGGCAAGGCCTTCACCTGCCACTCATCCCTCACCGTGCATGAGAAGATCCACAGCGGGG +ACAAGCCGTTCAAGTGCAGCGACTGCGAGAAGGCCTTCAACAGCCGCTCGCGCCTCACCC +TCCACCAGAGGACGCACACGGGCGAGAAGCCCTTCAAGTGCGCCGACTGCGGGAAGGGCT +TCAGCTGCCACGCGTACCTGCTCGTGCACCGGCGCATCCACAGCGGCGAGAAGCCCTTCA +AGTGCAACGAGTGCGGCAAAGCCTTCAGCTCCCACGCCTACCTCATCGTGCACCGGCGCA +TCCACACAGGCGAGAAGCCCTTCGACTGCAGCCAGTGTTGGAAGGCCTTCAGCTGCCACT +CGTCCCTCATCGTGCACCAGCGCATCCACACCGGTGAGAAGCCCTACAAGTGCAGCGAGT +GCGGCAGAGCCTTCAGCCAGAACCACTGTCTCATTAAACATCAGAAAATCCACTCCGGGG +AGAAGTCGTTTAAGTGTGAGAAATGTGGGGAGATGTTCAACTGGAGCTCGCACCTCACTG +AGCACCAGAGGCTGCACAGCGAGGGGAAGCCCTTGGCCATCCAGTTCAACAAACACCTGC +TCAGCACATACTACGTGCCTGGCAGCCTGCTGGGTGCAGGGGATGCTGGACTGAGGGACG +TGGATCCCATCGACGCGCTGGATGTGGCAAAGCTCTTGTGCGTGGTTCCCCCCAGAGCTG +GCAGGAATTTCTCCCTGGGGAGCAAACCTCGAAACTAACATGATGTGCTTTGGTGTCAGT +AGCTGCTTTCTGAGCTACTCAACAAGGAAAGCACCCTGGTCCTCCCTGGCTCCTAGATCC +AGACCACCTTCCTCCAGGTGTGGGAGCCTTGCCTTATCACCCCCATCAGGTCTGCATGCC +AGGGTGCCTCCTCTAGTTAAAGTCAGTCACCTCCCCAGAAGGGCCACACTCCAGGAGGAG +TGTTGAGAGTCATTTGAGGTAGTCTTGCCACCTGTTTTCCTTGATGGGCCTGGAAGTTGT +TGACAAGGGGAAAGATCTTTCTTGCCAATAAAAAGAAGGGATATCGTTGGGTGCCATGGC +TCACACCTGTAATCTTAACACTGTGGGAGGCCAAGGCAAGGGGATCACTTGAGCCCAGGA +GTCTAGGACCAGCCTGGACAACATGGTGAGACCTCGTCTCTACAAAAAATGCAAAAATTA +GCCAGATGTGGCGGCATGTCCCTGTGGTCCCAGCTACTCAGGAGGCTGAGGTGGGAGGAT +CATTTGAGCCTAGGAGGTCAAGGCTGCAGTGAGCCATGATTCACAGCACTGCACTGCAGC +CTGGGTGACAAAGCAAGACCCTGTGTGAAATTAAAAGGAGGTATATCAACTGTTGTATCC +TCGGACGGGCTCCTGACACGGCTTTAATCAGGAGTTTCCTCCATAAACTATTATTTTCAG +AATAATAATAAAACAAGAATTATGACTAGCATCACTTTCCAGTGAACATTATTATATTGC +TAGAGAGGAGAATAATCTTGGGTGGTGGGCATTTGGAAAAAGTGAATTTCCTGGACTTAA +CTCATGTAAATAGCTCTACTGCAGAGCTGTGTGTTTAGTGACAGTGCAGTCAGGGGCATT +CCCACAGCTGTCACAGCACGGCCCAGCATCATTGTAGCCAGATCCTAACATGCCAACATC +ACCTCTTGCCATTTAGCCCCTAGTGAGAAATTGGGAGCTACCAGGCAGGTGCCCAGTGCA +TTCAGGGAAGATGGGCACAACATCAGGATGGGTGTGTCTGGAAGCTCCTCTTCACTGACC +AGGGCTGGGCAGGGCCACCCTGGGCTGGTGAGGCTGCCCTGCAGGGCTTCTCACTATGTA +GCACCAGTCACCAGCCCAGGTCACAGAAGAAGCCCCTCCCAGCACCCACTGGAGAGGGGA +AGCTGAAGCCCAGGGAGACGGGCTCCATGGTGGTCCCACATGGAGCTGTTTTGCAAACCC +TGGAAAAGGCGGCTCTCCCTGTCCCAACACTCTTCAGAGACAGGAAGACAGAGTTAATCT +TGAATGAATGTTATTTCTACTTAGTCCTAAGCAAGAATAAGCTGCCCTCTTGGTGATCAT +ACTTTATACGGAGTGCTATCGTTTATGAAATGCTTTCAGTATGCTTTTTAAATTATGCAG +GCAAAATAACCACACTTCAGAAAATGTGGACTTCTGAAAAATAAAATCCCCATAATTCTT +AATAAAACTGCCTTTTGCACTTTGATACATTACCCTCTGCTTTGTTCATGTAGAACTTTG +CATTATTTTCAACACAGTGTATCTATAGTTTTTATCTTTTTTCCCTCTTTAGCTTTGTAG +CAGATTTTTTTTTCAAGCATCTAAACTGTCCTCGTTATACTTCTTTTTTTTTTAAATTAT +ACTGTAAGTTCTAGGGTACGTATGCACAACGTGCAGGTTAGTTACATATGTATACATGTG +TCATGTTGGTGTGCTGCACCCAGTAACTCGTCATTTAACATTAGGTATATCTCCAAATGC +TATCCCTCCCGCCTCCCCCCACCCCACAACAGGCCCTGGTGTGTGATGTTCCCCTTCCTG +TGTCCATTGTTCAATTCCAACCTATGAGTGAGAACATGCGATATTTGGTTTTTTGTCCTT +GCGATAGTGTGCTGAGAATGATGGTTTCCAGCTTCATCCATGTCCTTGTTATACTTCTAA +ACAGAGGCACAGTGTTCTCTCTGGTCTCTTAGATACTCTTTTGTTGGATGTTGAAGTTAT +TTCCATGGTGCAGTGGTGTGATCACAGCTCACTGAAACCTCAACCTCCCTGGCTCAGGTG +ATCCTCCCACCTCAACCTCCTGAGTGGTTGGGACTTTGGGACTACAGGTGTGCGCCACCA +CGCCCAGCTAAATTTTTGGGGGGGGCAGGGGGTCTCGCTGTGTTGCTCAGGCTGGTATTG +AACTCCTGGTCTCAAGCGATCCTCCCACCTCAGCCTCTCAAAGTGTTGGGGTTACAGGCA +TGAGCCACTGCACCCAGTCTATTGATCTTTCTTTTTGAAAAATATTTGAATATGCCCTTT +GTCCCTCTTTATATTGGAGTCTTGATGTTTTTCTTTTATGTATGTAAAAACTTTTCCCTG +ATTATGAATATTATATGTATACTGCAGAAAATTTTTAAAAATCACAAAATCATAATAAAA +ATGACCCTCAGGTTCCAGCTCAGTGGCTCATGCCTGTACTTTGAGAGGCCAAGGCAAGAT +TGCTTGAGCCCAGGAGTTCAAGACCAGCCTGGGCAACATAGGGAGATCCCATCTCTACAA +AAAATACAAAAAAAAAAAAAAAATTAGCTGGGCATGGCGGTGCGTGCCTGTAGTCACAGC +TACTCTGGAGGCTATGGTGGGAGGATCACTTGAGCCCAGGAGGTCAAGGCTGTAGTGAGT +CATAATGGCACCACTGCACTCCAGCCTGGGTGACAGAGCAAGACTGTCTCAAAACAAACA +CCTTCAGAATCAGTTCATGGATCATTACCATTATTAACATTTAGATGCGTTTTCTTGCAG +TTGTTTTCTATGCATGTATACATATCTTTTTTCTTTTAAAAGAAAATTAGGCTGGGTGCG +GTGGCTCACGCCTGTAATCCCAGCACTTTGGGAAGCCGAGGTGGGCAGATCACCTGAGGT +CAGGAGTTTGAGACCAGCCTGGCCAACATGGTGAAACCCCTGTCTCTACTAAAAATACAA +AAATTAGCCGGGTATGGTGGCCGGTGCCTGTAATCACAGCTAGTCAGGAGCCTGAGGCGG +GAGAATCGCTTGAACCTGGGAGGTGGAGGTTGCAGTGAGGCCAGATTGTGCCATTGCACT +CCAGCCTGGTCAACAAACAAGAGCGAAACCCTGTCTCAGAAAAAAAAAAAATTAGATTCA +TATTCTACATATTATTTTTATCATTTTCCATTTAACATTTTATGATAAGCATTTTCTATG +TTATAAAATATTTTCAGACATAATTGGCCACATCATTTTCTGTCTTACAGTTGTATCTTA +ATTTATCTATTCCCTTACTATTGAACATCAAGATTGTTTGTGCCTATAAATAATTATTTA +ACATAGGCCTCTGTTCCCATCTTTGATCATTTCCTCTGAGTACGTTCTGAGAAATGCAGT +TAACTGGTGAAGGTTATGAACACTGAAGCTCGTTGGTGTTCCTGGGTGGAATCTGATCTG +GTGGACCTGCTGTGAGCTGTTCCCATCGGCTCTGACATGACGCTGTGAGCTGTTCCTCCC +TGGACGAACATGTTCCTCCCATGACGCTGTCACCACTCCCTGGCCAGCATGGGGTCGTTC +CGGCCTTGATTTTTATATCTGCCATTTTGAGGCTTTAAGAATGTTATTTGTCCCTGCTTG +TTGGCCGTTTGTTTTTCTCTTCATATGTGTGGTCTGTTCTTGTCTTTTGTCCACTATTCT +CTTTGGTTTATTTCACACACACACATACACAGAGAGACACACAGATGCATAAATGACATA +CACATACACACATAAACAGATGCATAAGCACAGACACACACACACACATATATATCCCCT +TGGCATGTACATTTTCTGTCTTTGTTACAGCATTTTTAATGCCCAGAGGTTTTCTTTTTC +TTTTTTCTTTTCTTTTCTTTTTTTTTTTTTTTTTATGAGATGGAGTCTCCCTCTGTTGCC +CAGGCTGGAGTGCAGTGGTGTGATCTCGGCTCACTGCAAGCTCCGCCTCCCGGGTTCACG +CCATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACTACAGGCACCTGCCACCACGCCCA +GCTAATTTTTTGTATTTTTAGTAGAGGCGGGGTTTCACCGTGTTAGCCAGGATGGTCTCG +ATCTCCTGACCTTGTGATCCGCCTGCCTCGGCCTGCCAAAGTGCTGGGATTACAGGCGTG +AGCCACCGTGCCCGGCCAAAAAGGACTCTTAAAACTCAACAATAAGAACATGAACGGCTG +GGCACAGCCGTTCATGACACAGATGGCTGTGTCATGGGCCATAGAATAATTTAAAAAACA +ATAAAAGTCTCAGCTGGGCACAGTGGCTCATGCCTATAAACACAGCATTTTGGGAGGCTG +AGGTGGGAAGATCACTTGAGGCCAGGAGCTCGAGACCAACATGGACAACATAGCAAGACT +CCATCTCTACCAAAAAAACAAAAAAAATTAGTTGGGCATGGTAATATGCACCTGTAATCC +CAGCTGCTTGGAGACTTAGGCAGGAGGATTGCTTGAGCCCAGGAGTTCAAGTCTAGTGAG +CTATGATTGTGCCACTGCACTTTAGCCTGGGCGACAGAGCAAGACCTTGTCTCAAAAAAA +AAATTCTATTTTTTAATAAATGGACCTGAATATTTTACCGTAGGTCTTCCCAAAGGGTCC +AGGGATGCCATCACCCGATTGCATAGCTAGAAAGCGGATGGACTTTTTAGTAAGTGTGGG +TTCTAGTAAGTGTTGCCGCACTGAAGAACATCTCTGGCCAGGCGCGGTGGCTCACGCCTG +TAATCCCAGCACTTTGGGAGGCCAAGGTGGGTGGATCACTTGAGGTCAGGAGTTCAAGAC +CATTCTGGCCAACATGGTGAAACACTTCCTCTACTAAAAGTACAAAAATTAGCTGGGCAT +GGTGGTGGGTGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCGCTTGAAC +CCAGGAGGTGGAGCTTGCAGTGAGCCGAGATCATGCCACCGCACTCCAGCCTGGGCAACA +GAGCAAGACTCCGTCTCGAAAAACTAAAAAAGAAGAAGAACATCTCTGCTCTCAGCTTAG +GTCCTGGGCCCCTAAAGTCAGCGAGGCAGCCATTGTCCTAGGGTGATACGATGACATGGA +TTTGGGACTCCCTGGAGGAGGCCACACACACACAAAATGCAACTCTACAAGGGGAGGGAA +CAGAAACTGAAACAGGTATCTGCAATGTGCCTGGATCCGAATTTCAAAATGTGTTGAAGA +ATTATGGATTTACACCATGAGAAAAGCACTGGTTGGCTTTTGTGCCTGCTTCACTTTGGC +TCTGAGTTAACTTCAAACACTGCCAAACACAATGCTAGCACATGGTGACCCTCAAACTAA +TAATTTATAAGATGGGATCCACTTCCCCGGTGGAAGTGGCCACATGCTGCCCTCTCCCCA +CCCCCACGGCATACTTGGGCATGACCTGCAGCCTTTTGTTGCCATCCAGGTTCGTGTTCA +TTCTGCTGGTGCAGTAAATCAACCACTATGACATGGTTTTGCAAAAAAGAAAAGATTTAT +TCACAAGGGCACTGATCGGGGAGGTGGATCCTTTTTAAGTCCTTAAGTCTTTCGTCCCCC +GAAGATAAGTAACAGCTTCCTTCTGTGTGAGCATGGCTGGAGTTCATGGCATTTCACAAG +ACACGTGTACAGAAGAGGGTAGCATTAGCATTTTCCAAAGGCGGAGTTTTTGGCCCTCCA +GTGTCAAAAGGCCACCTTTCGGGCACTTGTGCAGGCCCAATTGAAGGGTTGGTGGTCTCA +ACCAGTTTGAACTGGACAGGAGCTGGCCCAAGTTCCTGAAAAACAACTGAAGAAGCCGGC +ACCATGGTGACTTATGCATGTTATCTATACAGTAGCCAGGGAAGGTTAAGTTTCAGCATT +CAGCGGCAAGGCTTTCAGCTACTGTGGCCCTTAAGCTTCACAGAAAAAGGAGAAAAAAAA +ATTAAAAACCAAATGACCCAAAAGCAAGCAGAGCAGGCAGACCTGGCCAAATTAACCCTT +CAGTTTCACTTTCCCTTGCTCTGTTGGGCATTTGTGGCTTGGAAAATGGCCCCCTCCTGC +CGCAGACAGACCCTTTACTGCTGACACCAATCATTACATTGATCTGTCGTGGTTCAGTGC +ACCACTGCACTCCAGCCTGGGCAACAGAGTGAGACCCTGTCAAAAAGGAAAGGAAAGGAT +AGGAAAAGGGAAGAGAAGAGAAAGGGGAAGGGGCAAGGGAAGGGGAAGGGGAAGGGGAAG +GGGAGGAGGGAGGGAAAGAAAGGAAGAAAATAAAAGAAAGAAAAGAGAAAAGAAAAGAAA +AAGAAAAAAAGAAAAGAAAAGAAAATGCACAGTGAGTTAGATTTGGCCTGGAGGCTGTAG +CTTGCCAACTCCTCCTCTGGATTCATGGATTAGAGTTTAAGGAAGAGATGGAGAAGTATG +GCAGAAGAACTGGAGTGAGGCCGAACCCCGTGGTGTTCTAGAAGAAAGGCTCTTGACACA +AGCATCTGGCAGTTATTTTTCAGGGGAGAACCTACAAATTATAAGGCTGTGCAGCTCCCC +TTACTAGTCAAGTGATTACACTTTTGTTTTCTTTTATTTTTTTTTAGAGACAGGATCTCC +CTCTGTCACCCAGGCTGGAGTGTGGTGCCGCAGTCACAGCTCACTGCAGCCTCGAACTTG +TGGGCTCAAGTGATCCTGCCTCAGCCTCCTGAGTAGCTGGGACTACAGACACACTCCACC +ATGCCTGGGTAATTGTTTCTAGTTTTGTAGAGATGAGGTCTCCTTGTGTTGCCCAGAATG +GTCTTGAACTCCTCGCCTCAAGCAATCCTCCCACCTTGGCCTCCCAAAGTGCTGGGATTA +TAGGTATGAACCACCATGCCCGGCCTCATGTGACTACTCCTACAGAGGCCCTCGATGGCC +TGGAATGATTGATTCCAGGACCCTGTCTTCTCATCCCAAAGCACCAAACCCTACTGACAA +GGAAAGAGCCCCCATTTCAGAACTTCCATGGCCACCCCCCAACCCTGGGATTGGCAACTT +TGCAACCTGGGCTTCTCCGATAATCAGATTTACGGTAAGACCCCAAAGAGGTCCAGGCGC +TTCTAAAACAAGTTAGACAGAGAATCCAAACCCAAATCCTCAAGATGTGAGCCCACATCA +CATCCCCCAAAATAGAGCATCCGTGACCACTCCAGCCTTGCTAGGGACACAGCATACTCC +TTGACCCTCACCAAAGGACAATTGCTCTCACAAACACGGGGATCCACTGGTTCCCACGCA +CTAAGCAAACTTTTATGGCCTTAATTGACACTGTTTCCCCAGAGACCCCACTAAATGTAA +GGGAGGAACCCTCTCTGTTCCCTGGGACATAAGATTGAAGGCAAACAAGTCCGCCTTAGC +CTGATCACTGGTGCAATAGCTTTGCAGAATATTCCAGTGGTCTCAGTTCTTTCTGCTCTT +GAATTTCTATCACTGGGACAGATGCTCCAATTGGAATAAAACCCACACTTTGTCCCTTAC +TGTTGGTCTTGGCAGATGAGACCCTGAGGACCTGCCCAAGCCATTAAAATCATTAATATG +ACCCAAAATAAATTAAAACAGGGACTTCAAGGATTGAGAGAAGGGGTGACCATCCCCTCT +GTTTCTCCATTTAACAGCCCAATTTGACGTGTTCTTCAAATGGAAAAGAACAAATGACAT +CTCACAGTCGATGCTGAAACCTTAACCCAGGGGGCCATGGGGGCCCCAGCCCCTGCAGAC +TGAAAGGATGGAGCCCATTCAGTCTTTTGCTGGCCGATATTTTGCTATTTTTGCTTCCTT +GCAGTGTTCCACAGTACCTAATTCAACAGGCTCTCAGCCTCTACTCCCATTCTATTTATT +GTGGTAAAATATATAACAAAATTTACCGTTTTAACCTTTTTCTTTCTTTTTTTTTTTTTT +TGAAACAGGGTCTCACTCTGTCACCCAGGCTGGAGTACAGTGGTGCCATTGCAGATCACT +GCAGCCTCAACCTCCTGGGTTCAAGTGATCCTCCCACCTCAGCCTCCTGAGTAATTAGGA +TTACAGGCTTAAGCCACCATGCCCAGCCTACTTTAATCATTTTTTAATTTTAATTTTATT +TATTTTTTTAAATTTTACTTTAAGTTCTGAGATTCATGTGCAGAACGTGCAGGTTTGTTA +CATAGGTATACACATGTCATAGTGGTTTGCCGCAGCTGTCAACCCGTCATCTAGGTTTTA +AGCCCTGCATGCCTTAGGTATTTCTCCTAATGCTGCCCCTCCCGCAGCCCCCCATCCTCA +GACAGGCCCCGGTGTGTGATACTCCCCTCCCTGTGTCCATGTGTTCTCATTGCTCAACTG +CCACTTATAAGTGAGAACATGCAGTGTTTGGTTTTCTGTTCCTGTGTTAGTTTGCTGAAA +ATGATGGCTTCCAGCTTCATCCGTGTCCCTGCAAAAGACATGAGTTCATTATTTTTTATG +GCTGCATAGTATTCCATGGTGTATATATGCCACATTTTCTTTATCCAGTCTATCACTGAT +GGGCATTTGGGTTGGTTCCAAGTCTTTGCTATTGTAAATAGTGCTGCAATAAACATATGT +GTACAGGTGTCTTTACAGTAGAATGATTTATAATCCTTTGGGTATGTACCCAGTAATGGG +ATCACTGGGTCAAATGGTATTTCTGGTTCTAGATCCTTGAGGAATCGCCACAATGTCTTC +CACAATGGCTGAACTAATTTACACTCCCACCAACAGTGTAAAAGCGTTCCTATTTCTCCA +CAGCCTTGCCAGCATCTGTTGTTTTCTGACTTTTTAATAATCGCCATTCTAACTGGTGTG +AGATGGTATCTCATTGTGGTTTTGATTTGCATGTCTCTAATGACCAGTGATGATGAGCGT +TTCTTCATATGTTTGTTGGCTACATAAATATCTTCTTTTGAGAAGTGTCTGTTCATATCC +TTTGCCCACTTTTTGATGGGGTTGTTTTTTTCTTGTAAATTTGTTTAAGTTCCTTGTAGA +TTCTGGATATTAGACCTTTGTCAGATGGGTAGCTTGCAAAAGTTTTCTCCCATTCTGTAG +GTTGCCTGTTCACTCTGATGATAGTTTCATTTGCTGTGCAGAAGCTCTTTAGTTTGATTA +GATTCCATGCGTCAATTTTGGCTTTTGTTGCCATTGCTTTTGGTGTTTTAGTCATGAAGT +CTTTGCCCATGCTTATGTTCTGAATGGTATTGCCTAGGTTTTCTTCTAGGGTTTTTACGG +TTTTTGGGTTTTACATTTAAGTCTTTAATCTATTTTGAGTTAATTTTTGTATAAGGTATA +AGGAAGGGGTCCAGTTTCTGTTTTCTGCTTGTGGCTAGCCGGTTTTCCCAGCACCATTAT +TAAACAGGGAATCCTTTTTCCATTTCTTGTTTTTGTCAGGTTTGTCAAAGATCAGATGGT +TGTAGATATGTGGTGTTATTTCTGAGGTCCCTGTTCTGTTCCATTGGTCTATATATCTGT +TTAAAAATATGGAACACTTTGGCTGGGTGCGGTGGCTCACACCTGTAATCCCAGCACTTT +GGGAGGCCGAGGCGGGTGGATCATGAGGTCAGGAGATCAAGACCATGGTGAAACCCCGTC +TCTACTAAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTAC +TCGGGAGGCTGAGGCAGGAGAATGGCATGACCCTCGGAGGCAGAGCTTGCAGTGAGCCGA +GATTGTGCTACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCCGTCTCAAAATAAATAA +ATAAATAAATAATAAAAAATAAAAATAAAAATATGGAACGCTTCACACATTTGCGTGTCA +CCCTTGGGCAGAGGCCATGAGAAACTTCTATCGTTCTGATTTTAGTATATGTGCTGCCGA +AGCGAGCACTATTTTAATCATTTTTAAGTGCACGGTTTAGTGACATTAAGTACATTCACT +GCCATCTATTTCTAGAATCTTTTCATCACCCGAAACTGAAACTCTGCATTCCTATTAAAC +ACTAATTCCCATACCTCCCAGCCCGTGGTAACCACGGTTCCACCTTCTGTTTCTACAGAA +TTTTTTTTCTTTTTTTTTTTTTTTTTTGAGATGGAGTCTTGCTCTGTCGCCCAGGCTGGA +GTGCAGTGGCGTGATCTCGGCTCACTGCAACCTCTGCCTCCCGGGTTCAAACGATTCTCC +TGCCTCAGCCTCCCAAGTAGCTGGGACTACAGGCACACATCACCATGCCCTGCTAATTTT +TTTGTTTTGTTTTGTTTTTGTTCATTTGTTTTCTTTGAGGCAAAGTTTTGCTCGTCACCC +AGGCTGGAGTGCAGTGCCGTGGCAGTGGCGCAATCTTGGTTCACCGCTACTCTGTCTCCC +GGGTCCAAGTGATTCTCTTCCCTCAGCCTCCCAAGTAGCTAGGATTACAGGCACACACGA +CCACATACGGCTAATTTTTGTATTTTTGGTAAATTTTGTATTTTTGGTATTTTTGCCAGG +CTGGTGTCAAACTCCTGACCTCAGGTGGTTCACTCACCTCCGCCTCCCAAAGTGCTGGGA +TTACAGGCGTGAGGCACCTCGCCCCGCCTGTCTCTACGAATTTGACTCCTCTAGATACTC +CACATACATGGAACTGCACAGTATTTGTCGCTGACTTCTGGCTGTCCTCACCTGGCTATG +CCTACTTTGGGGGATAATCTGGGCCTGGGACACCACATCCCCGTAGCAGGCCCCACCGGG +CCGCACAAACAGAACTGCGCCGAAGTTCCCCCAGGCTGTTACCCCCACTTGTTTAGGCTC +TTCAAAGATGCTACCGGCCGGGCGTGGTGGCTCATGCCTGTAATCCCAGCACTTCGGTGG +GCCGAGGTGGTCAGATCACTTGAGGTCAGGAGATCGAGACCAGCCTGGCCAACATGGTGA +AACCCCGTCTCTACTAAAAATACAAAAATTAGTCGGGCTTGGTGGTACGCGCTTGTAATC +CCAGCTACTCTGTAGGCTAAGGCAGGAGAATCGCTTGAACCTGGGAGGCAGAGGTTGCAG +TGAGCCAAGATTGCGCCACTGCACTCCAGCCTGGGCGATAAAGTGGATACTCTGTTTCAA +AAGAAAAAAAAAAGGTGGCACCGGCCCGGTGCGGTGGCTCACACCTGTAATCTCAGCACT +TTCGGAGGCTGAGATGGAAGGATGTCTTGGGCCCGGGGGATGGAGGCTGCAGTGAGCGGT +TATTGAGTGACTGCGTCCAGCCTGGGCCTCAGAGTGAGGCCGTGCCTCAAAAAGAAGAAA +AAAAAAAAAAAAAAAAGCAGCAGCTACCCATCCAGCCGCCAGATGGCAGCACGATTCCAT +GGCCCCCACTAGCCGCTGGCTGCCTGGTGCCGGATGCAGTCCGAAAGGCGGTTCTGAGCT +CCAAGGGCCGGCGCCGCGCGTGCCTGCGGTCCCAGCTGCTCGGGAGGCCCAGGCAGGAGA +ATCGCGTGACCGGGAGTTCTGCGCTGCAGTGCGCTGTGCCGATCCAGGTCCTCACTGAAG +CCGGCATCAGTATGGTGGCCTCTGCCACCAGGCCGCCTAGGGAGGGGCGAGGCAGCCCCA +GTTGGAAACGGAGCGCGTCAGTAGTGGCATCGCGCCTGGGAATAGCCTCTGCGCTCCAGC +CGGGTCGACCTGCCGAGACTCCGCCTCTAAAACCAGCAAATACCGGGAGGCTGAGGGACG +AGGACCGCTGGAGCCCGGGAGGCGGAGGTTGCAGTGAGTAGCTGGGATTACAGGCAGGCG +CCACCACGCCCGGGTAATTTTTGTGTTATTAGTAGAGACGGGGTTCCACCATGTTGACCA +GGCAGGTCTTGAACTCCTGACCTCAAGTGATCCACCCTCCTTGGCCTCCCAAAGTGCTGG +GATTACAGGCGTGAGCCATTGCGCCTGGCCAAATAAGTAATTTAAGCAATTTTTTTTGTT +TCAGAAAAAAAATTAACTCATAAAGGAGCCCCCCCGACACACACACACACACACACACAC +ACACACACACACACACACACACGGGAAGCAGCAGTGTAATTGGAATGCCCACTGCTGAGG +CTCTCGAATGAGTGTCATGTGTGCACATTTTGCCCCTCACAATGCACGTCAGACTATTAA +TAGGAAGCAGCCGCTGGAATACCTCTGGTGGGGTGTGTGCAGCAGGCAGCTTTGGAATTA +CGGTATCCCCAATGTGACAGCAGGACAATGGCTTCTTCACTGGCACTGTCCAACCACAGT +GCTGTGGTTATGTCTCGAGGTCCATTATTGGGCGAAGGTCACTGCAGACTCAGGGAGGCC +CAGGAGCTCACCCCAACCAGCTTCTTTACCAGGTGAGTGACCTCAGCCCTGCAGCTGGAC +TCATGGAGACAATGACCTCACCCGAAGATAGATGACCCCTGTCCACCGTCCACACAACAA +CTGAACTGGCCCCAGGTTGACTCCTCTACACCCTGCTTCCTACTGTACCCCTGTCAAAGG +CCTTCCTCCCACCCTTCCCACTCCTCTTCCTCCCTGGCAGCCCAACCACCACAAATTCAC +TAACTTGTTTTCAACTCTTACAGGCTGCCCTCCTAACCCTAGCCCTGACCTCTGCCCTCC +TAACCCCAGCCCTGACCCCACCGCCCTCTCAGGAGGCTTTACCAATCCTCTGCATGTTAG +CCCCACCCAAGGAACAGGCCTATTTTCAAGCATACTAACTGTTGGGTGTGTCGCTTACCT +ATGCATATGACAGCCAATCCAACCTACCTAAATGACAGCACAGCTCCATCCATGGATGTG +GCTGATCACATCTCCAAACCATCTTCCCACTGGGGACAAATTCTTGGATTTTTTAGAGAC +AGGTTCTCACTCTGTCACCCAGGCTGGAGTACAGTGGTGAGCCTCAGACTCCTGGGCTCA +AGGGATCCTCCCACCTCAACCTCTGGAGTAGCTGGGACTACAGGTGCACACCACCACCGT +GCCTGGCTAAATTTTTAATTTTTTTTTTTTTTTTTTTTTTTTTAGAGATGGGGGTTGCTA +TGTTTTCCAGGCTGGTTTTGAACTCCTATCCTCAAGTGATCCTCCCACCTCAGCCTCCCA +AAGCACTGGGATTATAGATGTGAGCCACCAATCTTAGCCCAAATTTTTAACTAAACTCAA +TTAAACAAATGAACAAACAGAAAAGACTCTCAAACCACACACTGTCGTCTCTCACCCAAC +AGAGACAAGACGTCTATAAACCACCAATCTGTTTAAAGAGACCACCAAATGTTCAGACCA +TGAAACTGAAAGAATCAAAATAAAGTTGTCACTATGCCACCAACGGACAAAAGCCAGGAG +CACACAACCAGGGGTCATACTCACTGAAGGTCCCCCACCCTGATGGCCAGTCAGGGGCGG +CTTGCGGGTCTCAGAGAGGGCTCACCACCTGCTTGCAGTCATTGGGGATGGGAAGGTAAG +TCCAACCCATGCCAGGTCAAGGCACAAAGACTGTGGGGGGAATGCACCAGACACATAAAG +AGATGACTTTATTCAGGCTACTGCTATGGGAAGAAAACTCACTAATGAGGAAGGAGGGGC +CTGGTAAATAAGAGACTCATGTGGAAGGCTGGAGGCGGGGTCTTAGAATGGACAAGGGCA +AAGTGCTCCTTCTGGTCTCACTATTTCTCAGAACGCAAAGGAAGAGGCAATTTCTCAGTG +AGCACCATTTCCCTGGAACCCAGGGCTGAGATGAATTTCAGTACTGCCAGCGGCCTCCGC +GTTCTCCTGTGTGTGTAGGAGTTTGTGTGTTCCAGTGTGCGTGTTCCCACTGTGTCTTGG +TGTTTCTGTGTGCCTGGTGGCATGTCTGTGTGTGTATCTGCAGGGTGAAAGACCACAGAT +ACTATCCGAATTCCCGTGCTCAAGTGCGCCTCCTGCCTTGGCTTTCCTGAGTGTGGGGAT +GACAGGCGTGAGCCACCACGCTGGGCCTCATGATCTGTTTCTAGTGCAGTGAGTGCATCT +TAGGGTGGGTGCCCGCCTCCATGCTTAGTAGGGCATGGATGCTGTGTATGCCATCTGTCG +GTAGGATGGTTACACTTCTGTGTGTTGGCTGATGTGTGTGTCTCAGTATTTAGTCGGCTC +TGGTGGCACATGTGTGTGTTGCAGGTTGTATAAGTTGCTGTTTCTCTGTGTCTGGTGGAA +GGTGTCTATGGTTGCAGAGTGAGGGTCTCTTGGTGTTTCTGTGTGTCAGAGGGTGTGTGT +GTGTGTGTGTGTGTGTGTTTTGCAGGATGAGGTGTGTTCTGTGAGTGTGATGTGGGGGGT +GCAGGGTGCATATCTGTGTCTACATTTCTTTTTTTTCTTCTTCTTCTTTTTTTTGAGACA +GAGTCTCGCTCTGTCGCCCAGGCTGGAGCGCAGTGATGCGATCTCCACTCACTGCAAACA +CTGCCTCCCAGGTTTAAGCAATTCTCCCAGCTCAGCCTCCCAAGTAGCTGAGATTACAGG +CGCATGCCACCACGCCCGGCTAATTTTTTGTATTTTTAGTAGAGATGCGGTCTCACCATG +TTGGCCAGGCTGGTCTTAAACTCCTGATCTCAAGTGATCTGCCTGCCTCGGCCTCCCAAA +GTGCTGGGATTAACAGGCGTGAGTCACTGCACCCAGCCTGTCTGCATTTCTGTCTGGTGG +TGAGCTATGTGCTGCAGGTAGTGTGTTTAATGTGTTTCTCTCATGTTTCCGTGTATCTGG +TGGAAGATGTGTGTCTGCAGGATGAGTGTCTCTTTTGTCGGTGTATCATGGATTGTGTGT +TTGTGTGTGTTGCAGGGAATGTGTTTGTCTCAGTATCTGAGTGTGTCGGGTCGTGTGCAG +GTGTGTGTTTTTTTGTGTGTTTGTTTGTTTGAAACAGAGTCTCACTCTGTCGCCAGGCTG +GAGTGCAGTGGTGTGATCTCAGCTCACCTGCAACCTCCGCCTCCCGGGTTCAATCGATTC +TTCTGCCTCAGCCTCCTGAGTGGCTGGGATTACAGGCGTCTGCCACCACACCCAGCTAAT +TTTTTGTATTTTTAGTGGAGACAGGGTTTCACCATGTTGGCCAGGCTAGTCTTGAACTCC +TGATCTCAAGTGAACCGCCTGCCTCGTCCTCCCAAAGTGCTAGAATTACAGGCGTGAGCC +ACCGTACCCGGCCGCACGTGTATTTTGTATGATATATATCTGCTTCTGACTTTGGTGAAG +TGTGTGTGTCGCAGGGTTGTGTCTATCTTGTGGTTTCTATGTGGCCTATGGTGTGTGTGC +TGCAATGAGTCTCTGTCTTGGTGTTTCTGTGTCTGGTGATACATGTTGTATGTTGCAGAG +TGAGTCTTTTTTTCCATTTTTATGTGCGTGGTGTGGGTTGAGTGTCCTCCATAGTAGTGG +GCTGGCTGTGTGTGTGTCACAGCGCGTCTGTCTCTGTGAGCGGTGGGTTGTGTGTGTGTG +TTGGAGAGTGAGTGTTTCTGTCTGATGGAGTGTCGTGTGTGTGTGTGTGTTGGCAGGTGA +GTGGCTGTGTTCCTCTGGGCCCAGTGCTGTGTGGGCGTCTGTTGCAGTGTGTGTGCCGTC +TCTGTGTTTCTGAGATTCTGCTGGCTTTGTGTGTGCATGTCTGCGTTTCGGGCTGTGTGT +CTGTGTTGGTGTTTCTGAGTCTAGAGGAGCGTGCATGCGGCGTAGGGTTAGTGTCTGCAG +GCCTCTGTGTGCTGTGGCCTGCGTCTGCTTGTAACCACCAGGCTGGGCCTGGTCCCCACA +TTCCTCCCACTCCATTGTGCTCTGTGCTACAAAAAATATTTGCGTTTTCCACCACTCATA +ATTTTTCCTCCTCAATCGCTACCAGATGTGGGTCCTGTGGTCCCCATCCCCATGGCAACG +GAGGTTCCAGCAGCCGCGGTTCCCTTTTGTGGCGCCTGCCCTGGGAGCTGGCACTGCTGT +GAGCCCCGAGGGGGTGGGGAGAAGCGGGTGGTGGGAGGCAGGAGGCTGGAGGGGTCCCAG +CAGCTCCCTTCCCAGCAGCCACTGGAACCCTCCTGGGGGGAAGGGAAGGCTGGTGGTAGG +TTGGGGGGGGTCCCCTGTTTTGTGAGGCACCCACAGGTAGAGTCTTCGACACCTAGGGGG +CTTCCCAGAGAAGGGCAAGGAGAGGTCCCCAGAAAGGCCTGGTCACTGGGACATCCCTGT +TCTCTCCAAATCGTGCTGGTCCAGGCTGGGGGAGGAGAAATAGCAAGAGCTGAGGGCTTG +GGGGTTCAGGGAACAGCAGTGTGGCATCCTCATCGTTCTGGGGGGTGGGCACCGCCTAGA +CAAGAATGTGGTCTGAGCCCCTGCACTGCCCACCCCTCACCCACACGCAGGCACTGCCCT +GCCCTCGCGGAGCCCGGCTGGGCATCTGCCCTCTTGGCCTTAACCCCAGCAGCCCGCATG +GCGCAGTGCCCAAACAGAGCAGAGGCTTTCCAGCCACAGGGCCATGGCTGGGAGGGTGCC +CAGCTGGCTCCAGGGAAGCAGTATGGCCCAGAGGCAGGGATCCCACATTCCACGTGCTGC +CTGTTTGTGTCCTGCCAGCTGCTCTGGACGGCCGACCCAGGGCAGGACGCAGTCTCCACT +CAGATGGCTGGGCCTTCTGAGATGGGTGACCCCTCTAAGATGGGTGACCAAAGTTGACCT +CCGTGGGGCATCTGGTCAGAGGATGAGAGTGTGCCCAGGACGAGAGCCCCAAGTTGCCAG +CGTGAGACCAATGAGTGCTGTGTGGCGGCTGCAGGGTGATCAGAAGATCCTGGCAGCAGG +GGAAGACTTTGAGCTGGGCATCAAAAGATGAGATGCAGGAGAACAAGAAGAGGCCTGCCG +GGCAGAGGCAGGAGGTGGGAGGAGAACAGACAGGCAGGAGCTGGGGGAACTTAGTGTGGG +GACAGCAGGGGCCATGGAAGGTGAGGGTCTGCGGCTTAGGAGGGTCACTTAGGAGGCCTG +CCCGGGAGGGTAGGCGGGTCAGGAGCAGCGAGGCCGTAGGGCACAGATGGTCACAGATAG +CCAGGCAGCCGGGTCTGGCTTGGGGGCAGGGGATCTGCCCCGGGCGCTCCCTCCGTCTCC +CTCAACCTTCCGCAGTAGCAGGCGGGGCTGGCAGGCAGGAGAGTTTCCGGCTGAGGACAA +GGAACCCGTTGAGGCAGACAGCAGGGCTCTGTGCAGCAGGAGACCGGCTTGTTTGTGCTG +GAGGAGGCCCGGCTGGAGTAAGCGCCCGCGGCCCCCTGGCCGCAGCCTCTGCACCCCCCG +CAAGCTGCCACCAACAGCTCTGAGGCCCAAGTTTAACAGCAGCGGCCAGGGGAAAAGGCC +GGGATCCGAGGCAGGCCCATCGCCTGTCCCTTTCAGTCAGGGACTCTGCCTGATCTGCCC +CCACCCAACCTTCCTCCCCGTGACCAGTCCAGAATGGGGCAGGCCAGATCGGGGCCGCAT +AAACTTGATAAACAAACGAAGCCTGTGGGTTGACACAGCCCACTTCCACCATGGTCCCGG +GAGACCCACGCCTGGGCTCCAGGGCTCCCCGCTGAGATAAGGGGTGGGGGAGAGTGCAGG +ACAGGTCGTCACATAAAGTTACTTCTGGGGGGCATCTTTGGCCGAAGCCTGCAAACAGGA +AGTGATCAGAGCATGAGGGGAAAGGCATTTTATTAAGAAAGCTTTGGCCAAGCCCCCGCC +GGGAGGAGCCCATCCTGGGGCACCGGCCGAGGGGGGAGCAGCCCTGCTGGGGGCCTATAA +ATACATCTCCTCAGGCCACTAGAGTCGCCCCTGGAGGTCCCGCGGCGTGTGGGGCAGTGG +GAGGCCATCAAAACCAGCTGACAGCTGAGGGGCCTGGGCCCGGGGCGCCAGGGAAGGGAA +GGGGTGGGACGGACTAAGCCCCTGAGGAGCGGAGACCAGCCCTCCTCCCCAGGCTGGGGT +CAACCAGATGCCCGGGAGCCTGGCCAGTGGAAGTAGCCCCGGGCTCTATTGGGACGGCCA +AGGGAGGGAGCCTCCAGCCAGAGGCACCAGGCCCTAGCGGGGGAGAGGCAGCCCGGGCAG +AAGTGCCTCTGGCTGCAACCAATGAGAAGGTAGCCCTAACCAAGTCCACTCCTGGCCAGT +AAGAGGCGGTCTTTAAGCGGAACCCTCCCATCTTTGGCCAATGAGACGCTGTCTGGTCGG +AGCGCTCCATAACTCGGCCAATGGGGAGGGAGTCGCCCGCTAAGCGCCTGTCAGGCCTCG +ACCAACGGGATGGGCCCTTCCCGCCAGAGCACACTGCTCCAATCCAGGAGCGGCTGCAGG +ACCTGAGCCAATGAGACGCAACCTCCGCTAGCCGCGCGGTGCCCGGCCAATAGGAGGCCG +CCCGTGCCCGGTAGCGTGGGAGGTGTGGGGTGGCGGGCGGCGCTGCGAAGCTGAGGGAGC +TGCGCGCGGACGAGCCACAGCCTGCTACAGGGTGGGTGCGCCCGCCCTGCCCAGCTCGCC +CGCCGCCTCCCGGCTCTTCTTGCGCGGCGGCTTCTGGATGGGGGTCTTCTTCCGGGGGGT +CTCAGGCGCGGGCAAGTCGGTCGCCGCCTTCTCAGGCCCCGGGGTTTCGGTCGCTGGGGG +CGCGCGGGCGGGCGAGGCTGGCGGCACGGAGCGCTTGGCTTTCTGTGGGGGCGCCGGCTT +TTCCCTGGGGCCCTGCGCGCCCAGGGCCACCTCGGCGCGGCCCAGGCTGCGAGTCTTGCC +GCGCACGTCAGCGGCCAACATGGAGGCAGCCTCGGCGCGCTTCGCGGGGGACCGCCGGCG +TCCGTGGGCTCCAAGAGGCCGGGGCGCGGCGCGCGCTCGGGCCCTGGGCGGCGAGGGCGC +AGCGAGGGCTGTCGCCTCCTCGGGCAGCCCCGGGGGGCGCGGCGTTGGGTCGCGGGTCCG +GGGGCTGCCGTGTTCGACCGTATGCGCCGATTTGTCGCTGGGCGTCCGTTTCCTCTTGCT +GGGGCTGGGCGCGGCCTCGGAGCCTGGCGGCGGTGGTGAGGGCGCACGGCCAGCTGCAGC +GGCGCTGTGCTTGCCGTGGATCCAGGACACCTTAGGCTTGGTGGCGGGGTCAGGTGGCGG +CGGTTTCCTGCGCTCGGGCGATGGCGACAGCGACAGGCCCCCAATCTCGCCCCGGGCCCG +GGCCGGCCGGGCCTCGCGTCGGGCCACGCGCGCGTACAGAGCCCCTCCGGGCCCCTCCAC +GCTGGACGCCGACCGCTCGCTGTCGGAGGACGCGGGGAGGGGTATCGCCTCCTCGGCGGA +GGCTGGCGTGGTCAAGGGCACAGGGGACGGCGCCGGCGCCTCGGCAGGGACAGTGGGGAC +TTCGGGGTCCCGGCTCTCCGCTGGTGCCTCTGGCAAGGGAAGAGCAGGGCGGTCACAGCC +TTCAGGAATACCTTTAGGGGCTCCAACCTCCAGGCGCAAACCTTCACCTTTCCTCCTCTA +CCCACGCCCTTGGCACTGTGTCACCCTGGCATCTAGACGTGCCTGGCACAGAGCACCAAC +CACACATGTGTACTTCAGAGCCTCAGGCTATGCGGCTGACTCTGAACTCAGGCTGTGCAA +GGAGTGATGGGCGCATTCCCGCGACTGAAGCCCAACCTTGGCCTGGCCTGGCAGCCCTGC +CCCAACACCCCTCCAGCCTTGGGCGCCTGCATCATCGAATTTGAGGCTGGAACATCCCCA +CAGGGGTTCCATTATCCTTCTTTAAGAGAGAAGGCTTCTAGCTGGGCGTGGTGGCTCACG +CCTGTAATCCCAGCACTTTGGGAGGCTGCGGTCACCTGAGGTCAGGAGTTTGAGACCAGC +CTGGCCAATATGGTGAAACCCTGTCTCTACTAAAAATACAAAAATTAGCTGGGCGTGGTC +GTGGGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATTGCTTGAACCCAG +GAGGCAGAGGTTGCAGTGACTGGAGATCATGCCACTGCACTCCAGCCTGGGTGACAGAGG +CAGACTCCGTCTCAAAAAAAAAAAAAGAGCGATAAGGCTTCTGAGGCTCTGAGTTGGTCA +CCTGCCCGTGGCATCTGGGGGCACAGTCAAGGGAGGAGACAGGGCAGAATCCCAGACCAT +CAGGCTCCTTTCTGCTATCTGTGGATCAAAGGCTGTGTGTGGATCCAACTCCCATACTCT +GTGGATCCCTTCCTCCTGTCTCCTCTGCCACCCCCTACCTAAGCCCAGAAACTTCTCTCT +GCCTACTTTCCCCCAGGAGCAACCCTGGGCTGTGGCCTACCCATCTCCCCACACCTGTCT +CCGGTACTGAGCAGCAGAGTCCAGCTCTATGGCCTCAGCTCTGTGGGCAACCCTCAGGTA +CCAGCATGGCCAAGGCCTTCCTCTCCATGCTGCCTGCCCTTCTCCTGTGTCCCACCCAGA +ACACAGCTTTTCCATGGGAACATTTCACTGGGCTGGAGTGATGCCCAGGTAGGGCCTTAC +TTACCCTCATGGGGTACACAGTACACAGGGCCTTCATCAGTGGTGTCAAACGAGGAGAAG +GAGGCCCGAGAGGACCAGGATGGTGAGGGCTGCTCCAGCCCTGAGGGTGGCTCCAGGAAG +CTGCAGTTGAGTGTGTTATCCAGGTCGTGGTGGGCCACTGGGGAAGGATGAGGCAGAGCT +GCCAGGGGTCCACTCTGCCCCAGCCGGTGCCCTCATTAGCCCCTGCTGTGACAAATGTTG +ATGGGGTGACCAAGACCAGCCCTATTCTTGGACTGCAGGGGGCACAGGCAACTCAACAGG +TACAGTGACACTGGGCCAGGACTGGAGAATGGGTAGAGGTGAACCCAGAAAAGAAACATG +GGCACAACATGTGTTAAGGTGCAAGGCAAGTGTGTGCATCCAGGGAGCTGGAAAAGGTTG +GGCTGACGTGTTGAGAAAAGGCCAGAAAGGTGGGAGACGGGGCTGAAGAAGACACCTGGC +TGGGACAGGCCACCAAGGCCTCCAAGCTCTGTTTTAGGCCCTGCCTTGTGCGAATAAGGA +AGCCTTGGAGCACTTCTGAGCCCAAATGCGCAGTGGCAGGGCTGCCCTTTGGCTCAGCCC +ATCTCAGCAAGCTGCACCCTAGGCCTGGCCTCCAGAGATGCAGACCCCACAACAGGGGGC +CCCAGAAGCTGGCTAGCCTGGCCTCCATTCCACAGAGGGATGTGGGGTCCACTCCCCACA +CCTATGTCCCCAGATGCCATGCTGCTAGGTGGGCCAAGGGGTTTTCTCTCTCTTGCTTGC +TTGCTTTTTCTTTTCTTTCGACAGTTTTGGGGTTTCTCCTCCCTCCCTCCCTCCCTCCCT +CCCTTCCTTCTCTTTCTTCTTTTCTCTTCTCTTCTCTTCTCCTCTCCTCTCCTCTCTTCT +CTTCTCTTTTTTCTTGACAGTTTTGCTCTTGTTGCCCAGGCTGGAGTGCAATGGTGCGAT +CTCGGCTCACTGCAACCTCTGCCTCCCTGCAACCTCTGCCTCCCAGGTTCAAGTGATTCT +CTTGCCTCAGCCTCCAGAGTAGCTGGGATTATAGGCATGCGCCACCACACCCGGCTACTT +TCGTATTTTTAGTAGAGACCGGGTTTCTCCATGTTGGTCAGGCTGGTCTTGAACTCCAGG +CCTCAGGTGATCCACCTGCCTCGGCCTCCCAAAGTGCTGAGATTACAGGCATGAGCCACC +ATGCCCGGCTTGGCTAAGGGGTTTTCTCAGCTGTTTCCTTCATCTCCCAAGAGTCTGAGC +CCCCTTCCCCTCCATCTTTAGAAATAAGTGGGAAAGAGAGGAAGAGAGGGTCCACCCAGG +TGCAAGTCAGCATGGAGAGAAGCGGCACACCCCACCCCCCTATCCTGTTGGTGGGTGGGG +ACCAGCCTGCCCACTTTCCAGGAGGATCTCTGGGTACTGCTGGGGATGTAGGCAGGAGGG +TGGGTCCTCTCCGTGGGGCAGGGCACTCCCGTGGCCACGCATTCCCCCAGCAGCTCTGGC +CGGGATATACAGCATTCTCCTACAGACTGGGGGAGCCTGCGGGACCCAGATGTCCACACA +CCCCTGATGCGATCACTCTCAGGGCCAACGTTTCTGACATATGCCCACATCAGCCGGCGC +ACCTAGGACTCCTGCCTTCCTACCTTGCCTCGCCCTCAGCAGCTGCCACACCTCTTACCT +TCCCTCTGGTCGCACCTCCTCGCGCCCACGCACATCAACACTCAAGGTCCCCCATTTCCT +CACTGAGATCTGGACCCCCTCACACCCATTTCCCAGCAGCTTCCTGCGTCGAGAGGTGTT +TCTCCCAGATACCCCGCGCTGTCATCCTTACCTACGACTTTGGGTAGTTTCTGCCTCCGG +AGCGGGATCCGGGGCAGCTTCATGCTGATGCGACTGAAGCGCCCGCATAGTCGGTGCGGC +GCCTTCTTCCTCCCAAGCGAAAGCTCCCTGCGGGGGCGGGGTCTGAGCGGAGGGGCGGGG +CCGGGGCGGGGCCCAGGGGCGATTAGATCTCGGCCGGAGCCAAGCACAGAAGGGGCGGGG +CCACGTCCGGGGCAGGGGCGCGGAAGGGTTGGATCTGGGTCCGGTGGCACCCAGAGGGTG +CGGCCTGAACCCAGGCGAAAGCGGGGCAGTATCTGGGCTCACCGGCGCGTAGGGTCCTTG +CCGCGGCAAGCGCAGCAGCAGCCGAGCAGCGAGAGCAGCAGGCAGACGAGCAGGACGAGC +AGCGCGCCCGCGCCCATCACGCCCTTGCGCTGGTTGGTTTCTGTAGGGGGTTATGGGGTC +AGCGCGGTTTCTGGCACCCCCTGCATTCCTTAACGGGACGCCCCTCATCCACTTACCTAG +GTGGCAGGCACCAGTGACCGGGTCGCACGTGTCCTCGTGGCAGCTGCAGGCCTGAGCACA +GTCCGCGCCGTGGAGTCCGGGCGGGCACGTCACGTTACAGCTGCCGGGACATAGGGTCAA +GGCATGCCACAGCCGCCCCCCTAGGATGCCCCCTACCCTCACCCCTCACCCGCGGCCAGG +GCCCAGGGTCCAGGGTCCCAGAACCGGGCTCTCTCTCGCTGCATTCGTCGTCTAGGGATT +GAAGCCCCGCCCCGCCCGTGGCACATATTGGGTAGTGGATGTTTTGGAAACGAATGTTGC +ACAACACTAGTGGGGGGGCCTGGGGGATAGGACCCGTGGTGGCCGCGCCGCAGCGCCCAC +TGCTGGGGCTGACTCGGCTAGGTGGGAAGAGGGGCTGCTGAACCAGACCCTTGGCTGTAA +GTGGAGGTCCCTGGTCCTTCTGTACCAACTCGGGGTCACCTTAAGAGCCCACTGACACAG +GCTGGACTGGTCACCAGATCCTGCGCTGAACGTGAGCTCCCCGACATCCAGGCAGGAAAC +TGCCCCCAAAGAGCCTACTGTCACTCCCTAGCTGGGGTAAGGGACCAAGGCTGAGGTGAT +AACCCAGCTCAGGGTGGAGAGGCCACCTCGTCGGCTGGACCCCCCTCTGTGGCAGGTACA +AGCCCCCAACCCCCTCCCGGTCCCGGGGCACTCACTGGGGCCCGTGGACGCCAGGGCTGC +ACAGGCAGCGCCCCGACTGGAAGTCGCAGTGTCCGCTGCCGCAGTCGGCGCACACGAAGG +CGCAGTCCTCGCCGTAAGTGCCATTGCTACACTTGGTCTCGCACCTTGGAAAGAGGGGAG +GAGGCGTCAGCAGAAATGGAGGCAAACGGACCACAGCGCCCTCGACATTGGGGCCTTTGT +TAGGGAGGCAGGGCGGGGGACTGCGTGTCTGGGCCGACGCAGGCAGGGCTGCTCACCGGT +CGCCGATCCAGCCCGCGTTGCAGCGCGTACACTTGCCGGTGACATGGTTACAGGCATGCC +CGTCGCGGCATGGCGGACAGCGGTGGCTGCAGCCCTCGCCATAGAAACCGGTGGCGCAAG +GCTGGTCGCACTTGGTTCCGTTCCAGCCGGGCTCGCACGTCAAGCAGCGGCCCTCGGCCA +CCGTGCACGGCTGCTGGCCCTTGCACTGGCCACACCTGGGGGAGGGGTCGGAGGCTAGGG +AAGGCTGGGACCCGCCTCACCCCTGTCCCCATCGGCGGCCCGCCCTTCCACCTCCTCCCT +CCCTGGCCGAGAGACCGCGCTTACCGGCGGCGACAGCCCAAGCCGTAGAAGCCGGCGGGG +CACGGCTCGCGACAGTACTTGCCGCGGTAGCCCGGCTCGCAGGCACACGTGCCGTCCACA +GGGTGGCAGCGGCCGCGGAAGCACTGGCAGTAGCGATCGCAGCGCGCGCCGAACGTACGC +TCGCGGCACTGACAGCGGCCGCTCTGCTGCTCGCAGGGAGACGAGTTGCAGGCGCACTGG +TTGTTGCAGCTGCGGCCCCACCAGCCTGCGTGGCACAGGCAGGCGCCGGTCTGTGGGTCG +CAGCGCGACGTGGCGCTGCAGTAGCACGCGCTGGCGCACTGCGCGCCCCACCAGCCGGGC +TCACAGCGGCACGCGCCGCTCCGCGGGTGGCACGTGCCGTGCTGGCACTGGCACGCATGC +TCGCAGCGCGCGCCCCAGCGCCGCGCGTGACAAGTACACTGGCCTGTCACGTCCTCGCAC +TGCCCGTGTGGGTGGCAGCTACACAGCTCCTTGCAGTCGGGGCCCCAGAACTGGCGCGGG +CACTCTGCAGGGGAGGAGCGGAGGGGGTGGAAGGCCCCGGTGCTTGAGTAGGTGCCCCCA +GCCAGCCGGAACCTGCCCGCGTCCCGCACTCCAGCCGCCACCTCTGGGGACCCGCCCCGA +AGGCGGATCCGACCCCGCCCCACCTTGGACCCCGCCCCATCTCTCCAGGATTCCGCCCCA +CCGACCAATACCGGCCCGACCCCACGCTCACTGGTGTCGCAGTTGGCACCGAAGTAGCCG +TGGCGGCAGCGGCACTCGCCAGGCCTCACGCACACCTCGTTCTCTGAGCACGTGGAGTTG +CCTTCGCACACCGCTGTGGACGAGACAGGCCAGAGCTGCTGCGCGTCCTAGCCCCGCCCC +CTCCCCCGCCCCAGGTCCCCGGGATGACCCACTCACCAATCCCACACTCGTCCCCTTGCT +GCCTCCAGCCAGCGCAGCACGTGGGCACCTGGGAGCTGCGAGCAGAGGGAGGACATCTAA +GCCCGATGCCCCTCCCCCAGCCCCCATCTGCTCCGGGCTCCTCCGCAGCCTCCGCACAGC +CTCCCTGCCTCTGCAGTTGCGCTCCTGTCCTAGGGGGGTGGTTAATGAGGTCCTGCCAGG +CAGTCGTCTGGCGGTGGAGGATTGCCACAAACAGACCTCACCGCCAAGAACTTTCCAGCC +CAGAACCACCCACCCCATCTCACCTAACTCCTTTGTGTCCTCCTCCCACAGCCATTAGGC +CTGAGAAAAGTGGCCTTGGAGTCCCAGGAAGGACAGCAGTGAGGCAGCAGCCAGGGAGAA +GACCATGGGCCTCAGGACCTGCCCAAATGTGAGGGTCTAGGGCATCCCTCCAAGTGGAGG +GCCCAGCCTAATGGGTGTCATGGATCCAGATGTCATGTGATGTGACATGATGAGGACGGC +ACTAGGACTAGCTGGGCCACAGGGACACACCTAGATGCCCAGTTGGGGTTGAGCTCCAGT +GGGGAGGGGCAGCCTCTGGACAGGGGGACGGGGGGTGGCATGTAGACACAGTGAGGTGAC +CACTTGGCAGAGGGGCTTCTGGAGGGAGGTGGGGTGAGGCCCACCCAGCCACTAGGGAAG +GAGCAGGCACATGGGGAGTCCTCCACGTGCAGGAGGAAAGTCAAACTGTGGCAAAAGGGC +TCAGGGACCAGGGTGGAGGAAGGAGCTATTAGAGCATCTGTTTATGCCTTTGTTGTTGGT +TTTGAAACAGTCTCGCTCTGTTGCCCAGGCTGGAGTGCAGTGGCACGATCTCGGCTCACT +GCAACTTCCACCTCCCAGGTTCAAGCAATTCTCCTGCCTCAAACCTCTCAAGTAGCTGGA +ATTACAGGCACGTGCCACCACACATGGCTAATTTTTGTATTTTTACTAGAGACAGGGTTT +CGCCATGTTGGCCAGGCTGGTCTTGAACTCTGACCTCAGGTGATCCGCCCGCCTCGGCCT +CCCAAAGTGCTGGGATTACAGGCATGAGCCAGCGTGCCCGGCCTGTTTATGCTGATTGTC +TTCCACTTGTTCCATTTGATTTGGCTTTACAGAGTACACAATGCTACCGCAGCACTTCAG +GAGAGCACAACCTCTAAAACCCTGGGTGGATGGGGTGTGTGAGCACAAAGGTTTGGAGGC +CCTGCCTGAGTGAAGCCATGATGGCTAGAGAGGGTTCGTAGGGGATGAGCTGGGGTTAGC +CCACATTCAGGGGCAGAGGAGGCTGAGAAGGAAGAGCCACCAAGGCTGCGGGATCCTAGG +TTCCCAGCCGACCCAGAACTTCGCCTCGATCCTAAAGGCATGTAACAGCCATTCGCACTT +TAGAGAAAAGGAGCACGCTTCTTCACAAGCAGGCTTGGATTCAGTATCTGAATCTAGAAC +CGGAAGGTGGTGGGGGGCGGAGTTTGGGAGGAAGCCTGAAGACCCTAGCGGGATGGGATA +GACTGGCCCTACACCATGCATTTCAGGATAGTACATGCCTGGCATGCTCCTGCCCAGGCT +TCCGAGGATGCCCCAGCCATTCCCCAGCTAGACAGAATACCACCCCTTGTACAGAGCACC +TAGGCCTCGAGGCCCCTCCTCCTGAACAAGGCTTGGTTCTCCTTCAGGCCAAAGATGGGA +CCTGGCGAACAGGACCATAAAATCTCAAACTTTGGGGAATTGGTCCAGTGTATTTTCCAC +AATAAAACTGGAAACCAGAGACAGGAATACAAAGGCCTGGCTTTTCCTGGCCCCTGCCCC +CACTGTCTCCACCCACCCAGCCCTGACCCAGGGGGCACCAAGGTGCCCATCTGCAGGGAG +ACCCAGGTCTGCTGGCCATCACCCACCCACTTGTGTCTGCTTCTGAACCATGTTCACTGG +ATGGAGAGCCTCTGCAGGTGTCCCTCCTGTGGGGAGCTTTGCCAGGTGGCCTGCTGGAAT +TGCCCCGGAAGCACTAACCCCCTTCTGCCCTGGACATAAGGCTCTGTGGCTAGGACCCTC +GTTTTAGGGCCACACTCAGCTGGGTTTGATGTCTGGGTTTGCCACTTGCTGTGTGACCTC +AGGCATGCAGCTTAAGCTCTTTGAGCTTCAGGCTCCTTAGCTATATAGTAGGGCTCTACT +GTAGAAAGATTAAGAGAGCCAAGATGTTTTTTTGCCGGGCGTGCTGGCATGTACCTATAA +TCCCAGCTACTCAGGAAGCTGAGGCAAGAGGATTGCTTGAGCCCAAGGAGTTTGAGGCTG +CAGTGAGCTGTGATGGCACCACTGCTCTCCAGCCTGGGCAACAGAGTGAAACCCTGTCTC +AAAAAGAGAGATAGAGATAGGAGAGAGAGAGAGACAGAGAAAGACGACAGAGAGAAAGAG +AGCGCGCGCCAAGACCTATGAAGTGGTTTGGCACATACTAAGCACAAAGGATCCTGAGCA +CAGGCTGCAGTGCCTCTGAGTGCACTCGGTCATCTCTGGTTTAGCGGGCACCCAGGCCTA +ACCCCAGCAGAGCTAATGCCCACCAGCAGGACAGAGTCCATTTGGGTTCACTGAGAACAA +GATGCTTTCCTTCTTGGAAGGGCTGGCAGAAAAGGCCGGGTGGTGGTGGCAAGCCCACTC +TCTGAAGATCTTACAAAGACGGGTGGGGAAAGATGGCCTGCTGCAGGGCTGCTCCCACGC +TTGCCCTGTAAGTCGGCAAGACCCTCAGAAGCCAGGCCATAGGCTGGGCCCAGGCCCCTC +AGCAAGCAAGACATGCCGTAGGCTTCTCCCTCGTAGAGCATTAGTTCTGTGATTCAAACC +CACTGTGAGTTCTTACATCTTCAAGTTGGCTCATGGGGTCCATACCTGTCCATGTTCTCC +CCCAAATCCAGGCAGAAAAGCTTCTCTGCCTGCCACACCCTTCAGACCCCACACCAGCCC +GACCCTCCTGCTGCCCCGCCTGGACACCTCTCCTCTGGTTTCTGTCCCCATCCTCAGATC +TCTCTTGTGTCCACACTAGCTCCCTCGGGAAGCTCACTGAACACTGACAACACCTTGGAC +CCTCCAGCAGGGCTCTCCCCATCAATGCTAGACTTGCAAATCCAACTGCCCAATAGATGA +TGTCACCAGAAGGAGACCTCATGGGCCTCATGCCCAACCAGCCCAAACCTGAACTCTTGA +CCTCCTTGCACCCTGAGTCTTCCCTGCCTCAGATAACCCTGCCTGTCCTTTCAGGCTGGG +AAGCTGAGACCTGGAGAAGCATCTCTGACTCTTCTTTCTTGTCTACACCAGATCCATTGA +GAAAAAAGACGCTGAGCTCTAGCTTCTAAACGATATCTAAAATGGGATCACTTCCTCCTT +CCCTATCCCAGCCTTGTGAGAGCCACCCTCCATCTCCCTGGCTTTTTGTAACTGGCCCCA +GCTTCCACCCCCTGCCCCCCAGGGTGCGAAGTGTTTGCCACCTGGCAGCCAATCCTGGTC +AGATCCCACCCTTTCTCTGTACAAAGCCCCCAGGGGGTCCCATGGCCCTCAGAAGAAAAG +CCCAAGTCTTCTGAGCAGTGGCAGCCCACGGTGGCCTGCCCACCCTCACTCTCTGGCAGC +TCCTCCGCCAGCCCCTCCTGGACTCTGCCCAGCCTCCTGGATCCTTCCTGTACTTCCACA +GGCCCTCTGCCTTGGGAACCTTGCCCCAGACACCCACAGGGCCTCCGTGCTCTTCTGCAG +GCCTGTGCTCACTCACCACCTTCTCAGCCGTTCTCCTCAGAACACCCTAAAGGAAACAGC +ACTCCCCAACCCAACACCTCCCAGCTCTTGCTAGATGTCTAGACACACTTGCTTTTCTGG +ACTGTGTCTCCTCTGCCACAGGGTCAGGGTCTCCACTGCAGCTTCCCTACACCAATACAG +GCCTGGCACAGTGTAGGTGCATAGGATTTTACTGGCTTTGCACTCTCCTCTCCCCATCCT +CCTGCCTCCCAGGAAAGGGAAGGGCTGATGGAAGCCTTGCAAGATGTGGGAGGACAACAA +AGACATGACCCAGACCCCAGGTGGGCAGTGCCCAGAGTGGCTGTCCTTCCCATCGACCCT +TGCCACTCATGACCACAGCGATCTCCACTTAGCCCCATGAGGGGGTTGTGATCACTATCC +CATTTTACAGAGGTGGAGACAGGCCCAGAGAGGTCAAATGACTTGCCCAGGTCCACACAG +CTGGGTAGGGTGGCACCAGGATGGGAACCCAAACCCAGGCTCGGTTCCTGGAGCAGGCGC +CTCCTGGGAGGTCTGCAGACCAGGGGAGCGGGAGCAAAGGGCTCTCAGGCTCCGGACAGC +TAGAGCCTGGCACCGGACCCCCATTCCTGGTCCCGGGCCTGTCTGGGAAAGAGGGCTGGA +GTGAGTAGACAGAGACCACGGCGATCGCGCCCTCCCCCTCTCCAGGCACCGCCCCGCCCG +CCGCCGCCGCTGCATTCCTGGGGCCGGGGAGGCCCGGGAGTCCGCGGCGGCGGCATCGAT +GCGAAGCAGATGGCGGGCCAGGCCGGCCCCCGCCCCGGAGGCGGGGCTCGCAGCGGGGAG +GGGTCTGCGCGCCTCCGGACCAGAGTTCGGAAACCCCGCGCTGCCCCCTCGCAGCCCCTT +GTCTCTGGCGCCGAGCCCCTGGACTCGGCTTGCCCCGGGTCCGTGGCTCAGGCGCCCCGA +CGCGACCCAGCTCGGAGCCCGGCGGCCGCCTCTGCGGCTGCCAGCCCGTGCGGCGTCTCC +AATCCGCGCCAAGGCGCCCCCACACGCCCGCTCCTCACCCCTGCGTGGAGATGCCCTTGA +CCTTGTGGCCTTTGGCCAGCCCACCGGCTCCCCCGACCCCAGCTCCTAACTCCTGGCGTC +CGGGGCTCACCTCTCTGCCCTCCGTGCAAACTCCCGGACTCGCCCCCCCACCGGCCGGGC +CTGTCGCCTGCCTGTCCCACACTGGCTTCCCCAGCCGGCGCCCCACATTTGCGCGGCCCC +TTCTTGCACCCAGAATCCCCACCTGGTTGTCGCCAGGGTTCCCACATCCTCCGGTCCCCT +GTGCCTGTCATAGTCCTTCCGCCTCCCGCTTCTGGTCCACTTCGAGCGCCGGCTCCTCGA +CGCTCGCCTGGGTCACCACCCTCCCACCCGCGCCCGCTCTAAGACGCGAATCTGTCTTTC +CAAGTCTGGCTTGCTCCAGGACCTCCCCGGCTAGCACCGCTCCTCCTCCCCTGACCTTCC +TCCTCACTCCCACTCCCGGCTCCAGTCGGACTAGTGCGGGGCTTTAGCACATGCAGTTCC +TTTGGCTTGAATGCCCTTCCCCAGCTTTTCCCCGTGGACTTCACAACTCGGTCAAGGGTC +CCTTTTTCCAGGTCTTTCCTGATATCCCGGACAACGCGTGTCATCCCCTGGGTGCCCACG +GCACTCCATCCTCCCCTCAGGTGACTTCTCGTCCAGGGTTCCCTGGGATGCACTGTCGTG +GCCCATGGCCGAGGCTGGGCCTGGAACGGAGCCGCGCTGGCTAGGGAGCCGAAGGGGCCC +ACACTTGGCGTAGGAAGGTTCCGGTAGCCCCCTCCCAATGCCCGGCGCCGCCACACCACT +GCCCAGAGCGTCCCTCTCGCCCCCTCCCCCAGCCAGGCCGGCTCCTACCCGGGAGCACGG +CACACGTTGCGGCCGCGAGGGTTCAGTTCCTGAGGCGCCACGGTGTCCGGCAGCATCCAG +AGCAGCAGCAGCAGCAGCAGCGACGGCAGCAGCGGTGACGGCGGCCCCCCGGCTCCCCGG +CGCCGCGCCGGCCCGGCCCCCCGGGGCCCTGCGCCCTCCATGAGGCGCGGGGCAGGCGCG +GGGCGGGCACGGGCGCGGGTGCGGCCGCAGCGAGAGCGGCCGGAAGCGGAGTGCGAGGCC +GGGCGGGGGGCGGCAGGAGGGGCGCCGAGCCGGGCAGGAAATTCCACATCGGCTCCCTGA +TCCCGGGCCAGCCGCGGCCGGCCAGGCGGCAGCGCCCGCCCCGCGCTCCAGACCCCACCC +TCCGGCCTAGCCACCGCCCCTCCGCGCAGCCCCCGCCCCCATTTGGCCGCTCGCAAACTT +GGGGGCGAACTTGGAGGAGGCCCGCCGAGCGCTGGTGGAGGGAGGGTCCGCCCGAGGAAT +CCCCGGTTCCCGCGCCTTTCCCTCCGCTCCCTAGACATCTACCGAAGCAGCCGCCCCTGC +GCGCCCACTGCGTAGGCCAGGCCCGGGACGGGAAGGCGGATCGGGGACACGGACGCGACT +CCGCAAGCCCACCCCCGCTCCCAGCGCCTCGGAACCGGCCAAGGGAGTGGGGCCCGCAGA +CAGCCCAGGTCGAGGCCGAAAGCCAACTCCACAGCACCCCACCGCGAAGTCCTTGTAGTG +CTGGGGTGTGCGGTCAGCTCCCCCTGCGCACTCCTGCGCACTCAGAGCTCGCAAGGGGCG +GACCGAGCAGGATGCTTGAGGCCTGTGGTAGACTCGGTCCGGACCAGAGGCCTGGGGGAA +GGGGTCTCCGTAGGGACGGATGGGAGAGATACAGAGGAAGTAGAATGGCCAGGCTGTGGA +CTGCGGTAGGAAGTAGAGGTAAAGACAGAAGGAGACCCCCGGGATGGAAACCCTGCAGTC +CTAGTTGAGGAGTGAAGGGGGCTGGGGGAGCCTGGGCGGTGGATTCTGCTGGCTGTCGGA +TTTCCAGGAGAGGCTGGGGCTAGAGAGTGGGAACTCCTGCATCAGGTAAGGTGGGTAACA +GCACAGAGCAGAAGGAAGGTGTGGGAAGGGAGCAAGACAGAGGGACAGGCCCTAGGGAGC +CAGCGGGGAAGCTGGACTTGGACTGGGAGGCAGCAGGGTGCGGGTCCCCGGGGGGCAACT +GGGGCAGACCGACTCTGGAAGTTGGGCAGGGAGCACAAATGTCCCGTGTGTAGCCAGGTC +CTAGTGAGGATGTTTGTCTTTTGGGGACTTGAGAGCTGGCAGAAGGCCTAAGCAGGGAGC +GGGCACCTCCGTTCTTTGGGGAGCAGGCCACATGCGGCCCACAGGCTCACTGAGGGAGGG +CAGGCAGACTCTGTCCAGGTGTGGCCAGATAGCCCCGAAGGGTGGGGCTGGAGGAGGGGG +CTTGGAGGAGGAGCCACTGAAAGGCCCAAGGCCATGGGCATCACCAGGCTGGCACAGGCA +GAGAGTGGGATGGGGAGGAGCAGGCCCCTCACTCAGCCATGCTGGTGCAGGAGGGAGGTG +GGGTGGGCTGAAAGAGAAGTGGAACTAGTGTGGACACGAATGCAACTACCTGGGGACAAA +TAAACAGGCCAACCACTTCTGGGCAGCTCAGCCTGGGCACAGCCCTGACCTCAGCAATGC +CCTCCCAGTGCTGGGGCCCCTCCCACGGAGGATCCGGATTGCGGCTCCCAATAGAGGCCA +GCAGAGGTCCCAGTGGCTGCAAGGTCTGTCCCAGCCCAGCTCCTGGCTTTAGAGGCACTG +ACCCCCTGGGCTGGAGGGGACACTCACCTCAGCTGGATGGAGGTCATGAATAGGCATATC +AAATTCCATCACCTTTCTCCTTCCCCCAGACCTGTCCAGGGAGGGCACCACCTGGATGGT +GCCCCCCAATCCCCCAGGACCTCCTAAATGTCTTAGATCTCTGCCCACCCCTGTCTGCCA +CCCAGAGTAGTCTTTTTAATCTCACATGTGGCTATCCACCACTGCTCTCAGGATGAGGAC +CATCCCCAGTCTCACCTCACGACCCGGTACATACATCCCTTGCAGTAGATGGCAACTTTG +GGTGTCATGGGCAGAGCCCTCATTTCTCCCAACCACCACTGCACCCCTAGGGGAAGACTG +AGAAGTATGGGAGGAATGAAGGATCATCCACATGGCACTAGGATGTAGAAGGAACCCACA +GACAATTGACCACCACGCAGGCAAACTGCTGGAGCCCGGAGCCCTGGCCTGCTGGTTTTG +GCCAGGATGGCTGCCAGAAGGAAGTGGACTTTGAAGGCTGAGGAGGGTTTTGCATGAAAG +CAGGGACCAAGAGCCTAGCCACACTGGAGAGCACAAGTTGGGCTGCCTCAGCTCCGGGGT +ATCAGGTGTAACCCTTAGAAACCCAGCTTAGCCCCAAGGCTGACAGTGGCAGGGCCAGGC +TGCGCCCAACTCCAAGGCACTCTCCACCCCTCCCCAGGCTGTGCAGGACCTCAGTGAGCC +CTTCCTGCCCCTGCAGACCAGAGGCCTCCTGCAAACCTGTGGCTGTGGCTGGTGGCTCAG +GTGAGATGTTGCCAGCCAGCATGTTGTCAGGACGCACTGGAGAGTGGTGGGGCATGTCTG +GTGCTCCTCCTTTCCAGAAACCTCTGAGGTCTGCAGGAGCTATTGCCCTGGCCTGCCAGG +TGGTAGCAGAGACACAGGAAGTGCTGGGTCTGAGACCCACAGAGTGGGGTTCCTACTCTG +AACCCATGCTCTTGCGCCTCACCCTCAGGAGAAAGGGAGATGACACTGGAGTGTATAGCC +ATCCCGCCCCACTAGGGCTCAACACTGGGTCTGCCACCCCAAGGCAATGTCATGTTTCCT +CAGGGGTACTCAGGGCCCAGGACAGTGAGGAAGGTCTTCAGGTTACACACACACCCACAT +ACCACAGCCCTGGGGGCACAGGCCAACTCTGAGGGCTTTGTCTCTACCCTCAAGAATCCT +TGGGGTATCCATTTAGGAGAGCCCAGAGTGACTGATAGTCCCCCGTCCCCATCCCCTCTC +CTCCCATCAAGGAGCACACACTGCCCCCTTCTCTTCCGGGACAGGATGCCAGCAGGAACA +CATGGCTGTCCCCATGCTCTGCCCAGTCAACTGGGAAAGGATGGGGCCTTGGCAAGTTAC +AGCCCTGTCTGCGGGATGGCTCTGGGCACAAGGCTCATAGTAGGAGTCTGCAGGTGCAGA +CCCTATGAGGGCATCAGGATGGAAGAAAGCTTCGTGTGGTGCAGCCCTGCAGTGAGGCTG +GTGTGGGCTCACAGCCCATCCATGCAGGGTGCAGGCTAAGCCTGGCAGTCTCAAGAGGGC +AAGAAAAGCATGGGGACAGGTGAACAGGGGAGGGTCCCAGCTGCTGTGCCAGTGTGGTCC +TTGAGACTGGGTGCCTAAGGGTTACCTGGCAAACCTGTAGCACATGCAGCTCCCCTCCCC +AGCACACTCAGCTTCAGGAGGTTGGGAGGAGCCGGGCACAGGCCAGAGGGGCCCCCCCCA +CCGACCCCCTGATGCTGCCCCGCTCCCAGGTGCTGAAGCTCTGGTTCCCTGACCCTCAGG +CTGAGAGACTCCTAACGAGCTGGGCTGAAACTCTGCGTGCTGCCCTGGTTGGGAGACAAG +TTCCACACCAGAGAAGACACTCAAGTGGCTGCACAACCACCTGTCACCAGACCAGGGCGA +GTTCCAGCTTTCCCACTGAGTGCCCTGTTAGCTTGTGGAGTTTACTGTGCTTCCCTCTCA +GGCAGGGTCTTCCCAGTGCCACTTGGCTTCTGGCCTGGGCTTCCACAGCCTCTCCCAGAT +GGCAGCTCTGACCCCAGAAGCCACGCTTCCTGGGGACGCAGCTTGGGTGCACTGTGGAAA +GGAACTGAGGCTGTGGCTACAGAAAAACAGTTAACACCTTCTGTAAGATGCTTTATTTCA +TTGACCAACAACATGGGGTCTGAAAACCCAGCGGGAGGGGTCTTTTTATCACAGAGCCAG +TCCCAGGCGAGCTGATGCATCTCTGCTCCTCTGCCCCTCAGGAGCTCTCATCCTCCAACC +CCAGCTGCCCCCACAGCCCCACCCCATTCACAGAAAGAGGGCTACCACGTGCCTCAGCCC +CCCTGCCCAGGCTGCCAGCTCCCAGGTCCTTTTGGAGAAGGACTGATCTAGGCAGGGAGG +AGAGAAGGCCAACCCCTCCAGGGCTCACTGAGGAAGGCCAAAGCCTTTCAGAAGCAGTTC +CTGCAGTGACGTAATCCACAGCCTGGGATCTGCATGGCCCTGAGATGCCTGCGGCAGGCT +GGCCAAGGGGCTGGTGTGAAGAAAGAGGGCAGGGCCCATAAGCTGTGGCCAACAGGGGCA +GGGGCCCTGCCTGGAGTAAAGTGCTCTGGCCTAGGCTGCGTGGGTTTCACTGCCCTGCAG +CCCCAGCCTCCCTTCCCTCTGATGCCAGGCACAGGGAGCCTAGTCCTCACTGGAGTTGTC +AAACTCCTCCCAGTCAGACACACTCATCACCTCAGAGGCAAAGTCCGGGTCGGCCTGGCT +GCGGTCAGGGGTCCCGCGGGGCGGCTCAAGGAGCAGGGAGCGGGGCAGGGTGAGCACACA +GGCCGCCAGGCCTGAGATGGAGTTGTCCAGCTGGGGCCCTTCCTCCCAGCAGTCCTTCTC +CACATCGTAAATGTGCACGTAGCCTGTGCGGCTGCCGCGGTTGTGTGAGCGGCCACCTAA +CACATAGATCCTGTTGTCCAGCACAGCAATGCCAGGCTCACCGTGCCCAGCAGGGAGTGG +GCAGACAGATGACCACTGTCCAGACGTGCAGCTGTAGCAGGCCACCTGCAAAGCCAAGGC +TAGAATAAAGCCCAGCACCCACTGGGACGAGGCTGCCAGCTCCCCCACAAGCCCACTGAC +CAAGGTGGCAACAGTTACCCACCATTCTGACAGGTCAGGCCCCCTGGAAGGATGGCACAG +GGCCAGTGTTGAAACATCCTGTCCATGGGCCAGGCTGCTGCTTTCTCACCCAGGGATGGC +CGAGTCTGCAGGAGGCTGTCTTTTGGGCCCTCCCCTAGAATGATCCTATTCTCTCTGGCT +TTGCTCCAGCCCAGCCCTTCCTCTAGGCCACCCGGCCCTAAGCCAGTCTCTGCCCCCGAG +TGTGTGGCTGTGCCCCCATCAGCATGTGGTCATTCCCCATGTCTGAGGGCACTGCTTCTC +TCCCCTCCAGGAAACGAAGCTGCCTGAGAGACATGGACTCCACTGAAATGTCTTCAACCT +CCTTCTGCAGCACTCAACACAGGCAGTAGGACAGGGGGAGGAGACAGAAAGTGGAAGTAA +TTCCTTGCGGAGTAGTCAACATCAATATTCTCTTGTCTTCAAAATTGTCCAAACAGTAAA +CAAAGACATGGAAAAATGTTCAACCTACTAGTAATTCAAGAAACGCAAAATCAAAACATC +AGAGACAAATTTTTCAACTAGCAACCACTGCAGAGAAGGAAAATCACAAAACCCAATGCT +GTTGAGGGTGTGGTAAAGCCATACTACTATGTATCTGTCTGACATGCCAGAAAAGGAATT +TGGCAATAAGTATTAAGAGCCATAAAAATCTTACCTTCTTGACTGATTAGCCGTTTTTGG +TTCTCTAGCTATGGTGGGCAGAATAGTGGCCCCCAAAGATGTCTAGTTCCTAATCTCCAG +AATATGCTACCTTAAGCAGCCAAAGGGGCTTTGCAGATAGGATCAAGGGTACAAGAATAC +AAAAAAAAAAAGGATTAAGGGTGCAGACCCTGAGATGGGGCAGTTTTCCTGGATTTTCCA +GGTGATCCATCCAATTGCATGAGCCCTTAAAAGTGGAAGGGGAGAGGCCGGGCGTGGTGG +CTCACGCCTGTAATCCCAGCACTTTGGGAGGCCAAGGCGGGCTGATCACAAGGTCAAGAG +ATCAAGACCATCCTGGCCAACATGGTGAAACCTCGTCTCTACTAAAAATACAAAAATCAG +CTGGGCATGGTGGCACATGCCTGTAACCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATC +ACTTGAACCCAGGAGGCAAAGGGTGCAGTGAGCCAAGAGCACCACTGCACTCCAGCCTGG +CGACAGAGCGAGACTCTGTCTCAAACAAACAAACAAAAAAAGTGGAAGGGGAGGCCGGGG +GTGGTGGCTCACGCCTGTAATCCTAGCACTTTGGGAGGCTGAGGTGCGTGGATCACCTGA +GGTCAGGAGTTTGAGACCAGCCTGGCCGACATGGCAAAACCCCGTCTCTACTAAAAATAG +AAAAATTAGCTAGGTGTGGTGGCGAACACCTGTAGTCCTCCTAGCTACTCCGGAGGCTGA +AGCAGGAGAATCGCTTAAACCCAGGAGGCAGAGGTTGCAGTGAGCTGAGACTGCACCAGT +GCCCCCCAGCCTGGGTGACAAAGCAAGACTCCATCTCATAAAAAAAAAAAAAAGAAAAAA +AGGAAGGGTAAAGCCAGACAAGAGAGCTGAGAAGTGAGGAGGACTCAACCTGCCATTGCT +GGCTTTGAAGACGAAGTGTCCCCAAGCCAGGAAAAGTGGGGACCTCTAGAAGCCAAGAAG +GCCCTCAACTGACAGCTAGCAAGGAAATGGGACTGCAACTGCCAGGGACTGAATTCTTCT +AACCAGAGTGAGCAGGGAGGCAGATCCCCACTAGGGTCTCCACAAAGGAAAGATGCCCTG +ACCGCACCCTGGTTTTAGCCTGGCAAGGCCCATGTCAGAGGACTTATGAATGCAGAATAA +ATTTATCTTAAGTTGCTATGTTTGTGGTGATAGGTTACTGAAGCAATAGAAAACTAATGT +GCTGGCCCAGAGGATCTCACAAGCAGAGGACCTGTTAAAAGCCAGATCGCCAAGACCCAC +CCTCAGGGCTTCCGAATCAGTGGGTCTGAGGTGCCTCGTAACACAGCGTTCCCAGGTGAT +GCAGACTCTGTTAGTCTGAGAATCACACTTTGAGAACTGTTGCATGAACCTAAGGAAATA +ATCCAAAATACCTGAAAAGCTCAACAAAGCAACTATTACTTGTCATGTTATTTGGAATAG +TTAAAACTTAGACACAAGAGGGGATGATGAAGATTGAGTGTTAATCTGGAAAATGCCCAG +GACAAAAGTCACTGAGAGAAGCAGAATGGGATATGGGATGCTCTTTTTTGTTTGTTTGTT +TGTTTGTTTGAGACAAGGTCTCGCTCTGTTGCCCAGGCTGGAGTGCAGTGGCACCATCAC +GGTTCACTGCAGCCTCAACCTCCTGGGCTCAGTTGATTCTCCCTCAGCCTCCCAAGTAGC +TGGGACCACAGGTGCAAGCCACCATGCCTGGCTAATTTTTTTTTGTATTTTTAGTAGAGA +TGGAGTTTCACCATGTTGCCCAGGCTGGTATCAGTTCCTGGACTCAAGCCATCCACCTGC +CTTGGCTTCCCAAAATGCAGGGATTACGGGTGTGAGCCACTGTGCCCAGCTCAGAATGGG +ATGCTCTTTGACTACAATCAGGCAGAAAACATACACGGTCAATGCTGAAAATGGTCATAC +TGAAAAACAGTACTGCAGGGGCCTGATTATGAATGCTTTTCCTCACCCTTCTCTATTTTT +TTTTTTTTTTTGAGATGGAGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGTCGTGTGATC +TCAGCTCACTGCAACCTCTGCCTCCCGGGTTCCAGCGATTCTCCTGCCTCAGTCTTCCCA +AGTAACTGGGATTACAGGCGCCCATCACCGCACCTGGCTAATTTTTGTATTTTTAGTAGA +GATGGAGTTTCACCATCTTGGCCAAGCTGGTCTTGAACTCCTGACCTCGTGATCCACCTG +CCTCAACCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCACCCATCT +CTATTTTTTAAAGTTCCGGCAATGTGGTTATATTAACTTTATAATAAAAGTTTTGTTCTG +TTGTTTTGTTTTATTTTGTTTTGAATAGAGACAGGGTCTCATTCTGTCACCTAGGCTGGG +GAACAGCGGTGTGATCATAGCTCAGTGAAGCCTCCAACTCTGGGCTGAAACAATCCTCCC +ACTTCAGCCTCCTGAGTAGCTGGGACTACAGGTATGTGCTGCCAGCCTGGATCTAAAAGT +TTTGTTGTTGTTTTTTTTTGTTTTTTTTGAGACAAGGCCTGGCTCTGTCGCCCAGGCTGG +AGTGCAGTGGCGTGATCTCAGCTTACTGAAACCTCTGCCTCCTGGGCTCAAGCCATCCCC +CAACCTCAGCCTCCCAAGTAGCTGAGAGTACAGGCATGCAACCACACCTGGCTAATTTTT +GTATTTTTGTAGAGATGGGGTTTTGCCATGATGCCCAGGCTGGTCTCAAGCTCCTGAGCT +CAAGTGATCCTCTCGCCTTGGCTTCCCAAACTGCTTGGATTACAGGCATGTGCCACCACA +TCCGGCCTAAAAGTTTTTAAGAGTAATAAGCAAAGGTAGATGTGTATGTGTGTGATACTG +TCATGGTGACATTTGTCCAAACCTATAGAATGTGCCAAGAGTGAACACTGTGGACTCTGG +TTGATGGTGATGCATCAATGCAGTTTCAACAACTGTGACACATCCACCCCTCTGGAGCGA +GAGGTCTGCAGTGGGGAGGCTATATGTGTATGGGGGGAAAAGGGGGTGTATGGAAACTGT +ACCTTCCACTTAATTTTGCTGTGAACCTAAAACTGCTCTAAAAAATAGTCTATTTTTAAA +AGGCACATGATTCAATTACATTTTCCATCAATAACAACTGAGAGGCTTGGGAATGATGAC +GGGTGTGGACTGCCCCGGGCCCCACTCACCTGGTGCACGTCCCTCCTGTATCCGGCATCG +TTGTTGCTGCCCCCGATCACATACAGCTTGTTGAGGAGGGTTGCCATGCCGTGCCAGGCG +CGCCGCACAGGCCCATCAGCCAGTGTGTGCCAAGTGTTGCTGCCTGGATCGTAGCAGTGT +GTCTCTTTCAGGTAATCCTCCCCTCTGCGGCCGCAGGTGATATACATCTTCCCCTCCAGC +GTCGCGCCTGCGTGGGCATACACCTGTGTGGAGCCACCAGGAGAAATGGCGTGAGAGGGC +AGTGAGGAAGGGTGGCTGGGTACTCTGTCAAGGCCAATCACCACCCCACACCTGCCTGAC +AACGCTGTGAGGCTTCACCACTTCCCACTCACATACAGACAAGAATCCTCCCTCGATTCT +CATGGGACATACTGTCCTAGTCATTGACCTCTAACCTCTTCCCAATAGCGGATACATCTA +TAGACTCAGAAATGAAATGTAGCCTTTGTCTTTCCTTTGCTCCTAAAGTTAACATCTGCA +TGGGGGAACTCCAAGAAAATAAAAGCATTTTTTCCAAGCCAGTCTGGAAGAAGCCACAGA +ACCCTGGGGACCGCTGACCCACCCTCAGTGGCCTCCACCCCTCCCCCACTCCCACTGGAC +ATGCAATCCTCCGGGCCTGCTGACCCTGCTTTTAAAATGTTTGTAAATAATATCTGACAC +TAAACTCATGATGTGGGGAGAAAAATACACATGGTTCTTGATTCTGGTCCTGAACCTTCT +GTGGGCTTTTTACCCTCCCTCTGTTGGGCTATCTCCATAGGGCCCAGCCTGCCCCCATCA +TGGGCACCTCTTACCCTGGCCCTACCACTTCCGGAACCCACCCACTCACACCCAGCAGGC +CAACAGAAGGCCCCAAGGCAAGGCCCTGGGTGTTTTGAGGGTAGAGGAGCCCCTCAGGGT +GTGGAGGCTCCAGAGCTATGTTACTGGGCAAGTCACTAACACTGAGGCCATGCGGGTGGC +AAAAACACAAGAGTCTAGAGAGGTGGACGGAGCAGTGGGGAGGAAGAACACACACCAAGG +GTTTGGTTTGGGTCTTCTCCTATCACCAGTAGGGGTCTGGGGAGGTTTTTCTTTTTTTTT +TTTTTTTTGAGACGGAGTCTCGCTCTGTCACCAGGCCGGAGTGCAGTGGCACGATCTCGG +CTCACTGCAATCTCCGCCTCCCGGGTTCGAATGATTCTCCTGCCTCAGCTTCCCGAGTAG +CTGGGATTATAGGCGCCCGCCACCATACCCAGCTAACTTTTGTATTTTTAATAGAGACAG +GGTTTCACCATGTTGGCCAGGATGGTCTCGATCTCCTGACCTCGTGACCCACCTGCCTCG +GCCTTCCAAAGTGCTAGGATTATAGGCATGAGCCACCACGCCCAGCCGAGGTTTTTCTTT +TTAATTAAACTTTTCATTTTGAGATAAGTGTAGTCACGTGTAGTTGGAAGTAAAAATACA +GAGGTCTCACGTGCCTTCTTTCCCCCATGGTAGGAGCCTGTGTCACTATATCACAGGGTC +ACCAGGATGCTGACGTGGACACAGCCACAGAACTGTCCTATCACCACCAGGATCCCGAGG +GGCTGCCCTTTAGGGTGACATCCACTCACCTCCCTTACCCTAATCCCTTCCTTAAGCCCT +GGAAACCACCAATCTCTTCTCTATTTCTACAGTTTTGTAACTTCAAGAGTGTTGTCTGAA +TTAAATCAGTATACAACCTTTTGACATTGGCTTTCTACCCCAACCCAGGTTCTACCAGGT +TGTTGCATGTACAGGTTGAGTACCTCCAAAATCTGAGACTTTTTGAGCACAAACACGGCA +CTCAAAGGAAATGCATTTCAGACTTTTGGATAAGAAATGCTGAACCAGTAACTATAATGC +AAAGATTCCAAAATCTGAAAAAATCTGAAATCCAAAACACTTCTGGTCCCCAGCATTTCA +GATAAAGGGTACTCAACATGTACCAATAGTTGGTTCCTTTTCTATTGCTCAGTAGTATTC +CATGGGTATCTACCACAGCTTGTTTAACCATTCACTGTTGAAAACACCTGATTTGTTTCC +AGTTTCTGGCTATTACCAATAAAGCTACTATTAACATTCATGTACAGATAAGTTATTTTT +TATTTTTTTGAGACAGGGTCTTGGTCTATTGCCCATGCTGGAATATAGTGGCACAATCTT +GGCTCACTGCAACCTCTGCCTCCCGGGTTCAAGCAATTCTTGTGCCTCAGCCTCCGGAGT +AGCTGGGATTACAGGCACGCATCACCATGCCTGGCTAACGTATACAGAGTAGTTTTCATT +TCTCTGGGATAAATACCCAGGATGGTAACTGCCAGATCTGTTTTCCAGAGTGGCTGTACC +ATTTCACATTCCTATCAGCAATCCATGAGCATAGTTTCCCTGCACAGTCACCAGTATTTC +GTATTTTCACTACTTTTTATTTTAGTCATCCTGAGGTAGTTCTCTCACTGTGGCTTTAAT +TTGCATTTCCCTGATAGCTAATAATGTTACATATCTCCTTATATGCTTATTTGCCATCTA +TACATCCTCTTCAGTGAAATGTTTCTTCAACTCTTTTGCCTTCTGTTTTTTGTTTTGTTT +TGTTTTGTTTTGTTTGTTTTTTTTTGAGACAGAGTCTTGCCGTGTCGCCCAGGCTGGAGT +GCAGTGGCACAATCTCAGCTCACTGCACCCTCTGCCTCCTGGGTTCACACCATTCTCCTG +CCTCAGCCTCCCAAGTAGCTGGGACTATAGACGTCCGCCACCACATCCAGCTAATTTTTT +GTATTTTTAGTAGAGATGGGGTTTCACCGTGTTAGCCAGGATGGTCTCAATCTCCTGACC +TCGTGATCCACCCACCTTGGCCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCACCGTGC +CCGGCTTCTTTTGCCCTCTTTTTAATGGGGTTATTTGCAGTATTTTGCCAAGGTTTTTTG +TTTTTGAGATGGAGTCTCACTCTGTCGCCCAGGCTGGAGTGCAAAGGCACGGTCTCGGCT +CACTGCAACCTCTGCCTCCCAGGTTCAAGCAATTCTCCTGCCTCAGCCTCCCAAGTAGCT +GGGATTACAGGCGTGTGCCACCGCGCCCAACTAATTTTTGTAGTTTTAGTAGAGATGGGG +TTTCACCATGTTGGCCAGGCTGGTCTCTGGTCTCGAACTCCTGACCTCGTGATCCGCTCA +CTTCAGCCTTTCAAAGTGCTGGGATTACAGGTGTGAGCCACCACACCTGGCCGTTTATTG +AGTTTTAAGAGTTGTTTATATATGTATTTATTTAATACTAGTCCTTTGTCAGATAGGTGG +TTTGCAAATATTTTTTAATTTTGATGAAGTCCAGCTTATCAATTTTTCTTTTTATGGCTC +ATGCTTTTGGGGTCAAGTCTAAGAATTCCTTGCCTAGCCCTAGCTAGATCCTCAAGATTT +TCTTCTGGTCATCACGTCGCGTGGCCGCAGGGAGCAGACCCGGACAGCTCCAGAGCCTCC +GGGCCGGGGCGGCGGCGGCGACGCTTCGGCTCCTCCTGAGCCACCTGCTGGACCCGCACC +CCACTCCATCCCCACAGGCTGGGGACAGGCCCTGGCGCGGCTGTGTGGGATCAGAAGCAG +AGTTGCAGAATCCAAGGACCTATTTTTGTTCTTTCTCCGCACTGCTTTATGGGAGGCATT +ATGGCCCCCAAAGACATAATGACAAATACTCATGCTAAATCAATCCTCAGTTCAATGAAC +TCCGTTGGGAAGAGCAATACCTTCTGTGATGTGACATTGAGTAGAGCAGAAAGACTTTCC +TGCCCATGAGATTGTGCTGGCTGCCTGTAGTGATTACTTCTGTGCCATGTTCACTAGTGA +CCTTTATAGAAGGGGAAACCCTATGTTGACATCCAAGGTTTGACTGCCTCTACCATGGAA +ATTTTATTGGACTTTGTGTACACGGAAACAGTACATGTGACATGGAGAATGTACAGGAAC +TGCTTCCTGCAGCCTGTCTGCTTCAGTTGAAAGGTGTGAAACAAGCCTGCTGTGAGTTCT +TAGAAAGTCAGTTGGACCCTTCTAATTGCCTGGGTATTAGGGATTTTGCTGAAACCCACA +ATTGTGTTGACCTGACACAAGCAGCTGAGGTTTTTAGCCAGAAGAATTTTCCTGAAGTGG +TACAGCATGAAGAGTTCATTCTTCTGAGGCAAGGAGAGGTGGAAAAGCTAATCAAGTGCG +ATGAAATTCAGGTGGATCCTGAAGAACCAGTCTTTGAGGCTGTCATCAGTTGGGTGAAGC +ATGCCAAGAAAGAGCAGAAAGACTCCTTGCCCAGCCTGCTACAGTATATGCAGATGCCCC +TGCTAACCCTCAGGTATATCACAGATGTAATAGATGCTGAGCCTTTCATCCACTATGGTT +TACAATGCAGGGATCTGGTTGATGAAGCAAAGAAGTTTCATCTGAGGCCTGAACTTCAGA +GTCAGATGCAGGGACCCAGGACAAGGGCTCGCCTAGGAGCCAATGAAGTGCTTTTGGTGG +CTGGGGGCTTCGGAAGCCAGCAGTCTCCCATCGATGCAGTAGAGAAATATGACCCCAAGA +CTCAGGAGTGGAGCTTTTTGCCAAGCATCACTTGTAAGAGATGTTATGTGGCCTCAGTGT +CCCTACATGACCAGATCTACGTCATTGGTGGCTGTGATGGTCGTTCCAGCCTTAGTTCAG +TGGAATGTCTAGACTACACAGCAGATGAGGATGGGGTCTGGTATTCTGTGGCCCCTATGA +ATGTCTGATGAGGTCTTGCTGGAGCCACCACCCTGGGAGATTTGATCTATGTCTCTGGAG +GCTTTGATGGAAGCAGGCGTCACACCAGTATGGAGCGGTATGATCCAAACATTGACCAGA +GGAGCATGCTGGGAGATATGCAGACAGCCCGGGAAGGTGCCGGACTCGTAGTGGCCAGTG +GAGTGATCCACTGTCTAGGAGGATATGACGGCTTGAATATCTTAAATTCAGTTGAGAAAT +ATGACCCTCATACAGGACACTGGACTAATGTTACACCAATGGCCACCAAGCGTTCTGGTG +CAGGAGTAGCCCTGCTGAATGACCGTATTTATGTGGTGGGGAGATTTGATGGTACAGCCC +ACCTTTCTTCCGTTGAAGCATACAACATTCGCACTGATTCCCGGACAACTGCCACCAGTA +TGACCACTCCACGATGCTATGTAGGGGCCACAGTGCTTCGGGGGAGACTCTATGCAATTG +CAGGATATGAAGGTAAGTCCCTGCTAAGTAGCATTGAATGTTACAACCCTATCATCAACA +GCTGGGAAGTCGTGACATCCATGGGAACCCAGCGCTGTGATGCTGGTGTGTGTGTTCTCC +ACAAGAAGTGACCATTGTTGGAGCACCATCCAGAGCTAGTGACCAGTCCAGTGGACAGTT +AGTGGGAGAATCAAGAATCCTTTCTAGAATGTCTGTTTCTCACTATGTGCACAGGGTGAT +TACAGGCACCAGTGCAGTGATGATTGTACTTATTTGACACATACTCCCCCTCGTCCTGGT +TGTTGTTCCTGAGAAGGGTGGGTAACAGATACTCCAGGGAAAAGAATGCACATTGAATGG +ATGTGAGAGACCACATTACCTCTCCCACTACTTTGGGGAGCACTTTCCTGTCATTTCTAA +CTTACCACGTGCTTGGTGTACTATATGTATGTTGTGCCTCATATGTTGCAAAGAACTAAG +GTGAGTATAGCCTACTAGATGTGAGCAATATCCAGCCTAGATGATTGGAAAGATACCAAT +TTAAGTAAACTTGGTAAAATCCAAGTCTTTTTTTTTTCCAGGAACAAATACATTTTCTAA +TCTACAGGTAGCTAGGGGCAACACAGTTCCATTCTAAAGGGAAACAAAAGGGAGAGCCCC +ACAAAACTTTGGGGGCAAGGGAGAGATACTCATCTGACACTTCTTTTGGAGGTCAGGGTT +TGTATATCAGAATTGAAGTTAGAATCAGTGAATTAAACTGAATTTGATGGAATGTGAGTG +AACCTAGAACAGCACTGAAGTATTACATAACCTGGAAGACTGAGAAGGGTATATTCTTTG +AATGATCTTTTTATTTCCCCAAGGTCTTTCACACTGGAGACAGCATAAAAGAGTGAACCA +ATGTTGGGATGAGAGAAGATGACATAAATGTGGGAGTTCAGTATAACTGGGGATAAACTA +GAAGTACCTGTGATTTTACAGTCATCTTATTGCCTGCCAGGGGTCATCTAGCCATGGCAG +TGTTAACCTTGAATGGGGGTGAAAGCCTTTCTTTGTTGAATCAAATACTACTACACTATT +ACACTTCCACACTATTTATTTGGGGATGGACTGGGAGTGACAGTAGCCTAGTAGTTCAGC +TACCTGATTACTGCCCCATTCTTTTAGAAGCACACTTCTGCCAAGGAGTGGTTTGTACTG +CTGTGATTGGTACATTTAGTCTTTTTTCTGCTATAAGTTTTCCTTACCTGTCCTTTAGTG +TAGATTTTATTCATTACAGGACAGAATAATCAAGGACAACCAAAATCCTTTTGTTAGTTT +CAGTACCTCAGCTATCAACATTTCTGAGCTATCATTCAATGTTCCTCTGTGTCATGGAGT +GAAATTCTTGTTTTATGGGTATTGGGAGTGTGGGAATGTGATAACCTAAACAACCTTTGC +TCTGAAATTCCATTTTTCCCTCTTTCCCTGAAGTGTACTGACCTGTACTACAGAGTTAAT +TTCTTTTGTATTTTTTTAAGAAAATATTAAAAATCAATGGTCTCAAAAAAAAAAGATTTT +CTTCTTCTTTTTTTCTAGAAGTTTTATGGTTTTACATTTAACTCCATAAACCATTTTGAA +TTCATTTTGGTATATGGTATGAGACTTGGGTTGATATCCATTTTTTTGCCGACGCATGTC +TAATTGCTCCAGTACTATTTGCTGAAAGATATCTTTCCTCCACTCAATTGTTTCTGCACC +TTTGTCCAAAATCATGTGTAACATGACTGGGCATATTTACTCAGGTCTGTGGGTTTTCCA +TTCTGTTTCACTGATCTATGTGTCTATCACTGTATCCATACCACAGCCTTGATTACTGTA +GCTATGTAGTAATCTTGAAATTGGGCAGACTCCTCCCATCTTATCGTTCTTTTTCAAAAT +TATTTGAGCTATTCTAGTTCCTTTGCCTTTATATATACATTTCAGAATAATCTCATTTAC +ATCTAATGTCACAAGTAAAATCAAATCAAAATTAAAAAATTTATTTTATTTATTTTGAAA +TTTATTTTATTATTATTTTTTGAGACAGTCTCACTCTGTCGCCCAGGCTGGAGTGCAGTG +GCGCAATCTCGGCTCATTGCAACCTCTGCCTCCTGGGTTCAATCCATTCTTGTGCCTCAG +CCTCCCCAGTAGCTGGGATTACAGGTGTGTGCCACCACACCCAGCTAATATTTGTATTTT +TAGTAGAGATGGGGTTTCACCATGTTGGCCAGGCTGGTCTTGAACTCCTGACCTCAAGCG +ATACACCATCCTCGGCCTCCCAAAGTGCTGGGATTACAGACGTGAGCCACTGCGACCAGC +AATAAAAAAAAATTTTAGGCTGGATGTGATGGCTCACTTCTCTAATCCCTGCACTTTGGC +CGAGGCAGACAGATCCCTTGAGACCAGGGGTTCAAGAACAGCCTGGGCAATATGGTGAAA +CCTTCATGATACTAAAAATATAAATATTAGCTGGGCATGGTGGTGCTCACCTGTAATCCC +AGCTACTTGGGAGGCTGAGGCAGGAGGATCACTTGAACCAGGGAGGCAGAGGTTGCAGTG +AGCCGGGGTCATGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAA +AAATAAAAATAAAAAAATTTTTAAAAAGCTTATTGTGCATGTAAAAGAAGAGTTCATGAA +CCAGGAAACCCCAAACTGGAAGTGGTATGAAGCCCCAATTCACAGCAGTTACAACACAGT +TTATAAAGCATGAATGAGGAAGAATATTGTTCCCTACTGTGCATGTGGAGCTGCATAAGC +TTTCTTTGTAAAGCTATCACACTGAGGGAGGAAAGCTTAAGTTTCAGTTACATGGCTACA +GGCAGTTGGCCTAGGGGTATATCCAAACCTCAGCTTCAATTTTCATTTTCCCTTAATAGT +ACAAAAAACCTTCCTGAAATTTTGATAGGAATACTAGTATTCCTGTATTAATACTATATC +AATATGAGGAATTCTGTATTAATATATGTATTATTAATTGGGTTGGGAAAGAACTGACAT +TCTTACTATTCTTACTCTTCCAATCCATGGACTTGGTATGTCTCTCCATCTATTCACATC +TTCTTTTATTTCTTTCATCGGCTTTTTGTAATATTCAGTGTACCAATCCTGTGAGGTTTT +TTGTTTGTTTGTTTTGAGACGGAGTCTTGGCAGAGTCTTGCTCTGTTGCCCAGGCTGGAA +TGCAGTGGTGTGATCTCAGCTCACTACAACCTCTGCCTCCCGGGTTCACGCCATTCTCCT +GCCTCAGCCTCCTGAGTAGCTGGGACTATAGGCGCCTGCCACCATGCTCAGCTAATTTTT +GTATTTTTAGTAGAGATGGGGTTTTACCATGTTGGCCAGGCTGGTCTTGAACTCCTGACC +TCAGGTGATCCCCCCCGCCTGGGCCTCCCAAAGTGCTGGGATTACAGACGTGAGCCACCG +CGCCCTGCCAACATGACCTATCCTTTTTCATTCCCCAACATCTGATCCATCAGCAAGCCT +GATCAGTTTACCTCCACGCTACACTCTGAACCCTCCCATCTCCCTCCAGCTCCACAGCTG +TGTGCCAGCACAGGCTACTGCTGTCTTCAGCCCAGACCACGCAATGACCCAGGGCCAGCC +TCGCTTGCTCTGCTCTTGATCCTTGGGCTTCCTTCTTCTACAAGCATGCCCCATGCTTCC +TCTCCTGCTCAGGAAAAGCTGTGAGCCCCTCTCCTGCCCTGGAAGGTCCTTCTGCTAGAC +TAGGCAGATGGTTAGATATGAGCAGGGAAGTGGACTCCAGGTTGGCCTCAGCCCCTAGCA +TGGCCAGAGGGACTATGACCTCTGAGACATGTTAACCACACCTGGGGTTTCAAAAGTCCC +AACCCCTCTTTCAGTTAAGCTTATCCAGCTCAAGTAGGAACTAACCCCACAAGGAACTTT +CCACCCCCGGGAGCCTGTGTTGTATAGTTTGCCCCAGTTTTGCTTGCTTCGAAAGTAACC +TTTTGTTCACTGCAGTGGTAAAAAACACACCTCCTGGGTGAAGATTTAAGATGCTAATGA +GACATGCGATAAATACACTAGCACGTAGAGCCACAGTGCATGCGCTGCTAGAAGACCACC +CAAACAAAACATGCTTATGAGTAACACCTCTTCCCGCCCATTTATGAATAATCATGTAAG +CCTCCTGTAAGGGAGGTTTCCCATGCCAGTTGGGGCGATCTTATTCTGGAGCAGCCTGCT +CTGACTCGGCCTTCAGAATGTACCTTCACTTTTGCAATAAACTGCCTTACACTTATCTTC +GTTAATGCATGTCTCTTGCTTAAAGTCTTAACCAAGAAGACAAGAACCGAGGACACATTC +CCAGTAACATTTCCTCCCTCTGTAAATGGCAGGTCTAATTGACATCTCCTCAGATGGACA +TTCCCCAAAAAGTCCCTCTAGATTCATCCTACCAGAGCCCCCTCCCTGGAAATCCTCCAG +TTCAGCCCCCTCTTCATTTCACTTACCATAATTGTAACCACTTTACTGATCTGTTTTCTG +GTTAAGAGTGGTTTCTCCTACAAGGCTGTAAACACCATGGAGTGGGGAGTCTTTCTCATT +TCTTGTCACCACTGGATTCCCAGGACTTGGTACATGGTGCTTAGTAGTTGCGGAATGAAT +GCGTGCATGAAGGAACACCGGCAGCCACGATGCAGCTGGTTGAAAGCCAAGGCTGCCCCT +CTGTGGCCAAGCTGGAGAACTGCTGCTTCCCCAGCTGCCGCAGACAAAGAAACTGCTAAG +CCCCTCCCACAACACCCACCTAGGAGGCTTCCCCCAACACCCTTCCTGGAACAGAACCCA +AGAGCCCTCCTTGTCCACTTGGTCTCAAGGACCTCCTGATCTTCTCCATGCAAAATCTGC +TCCTTTCCCCAGGATGGGGGTACGGAGTGTCCTTCCAAAGAGGCTTCCTTCTCCCCAAAG +AAGCTGAGAGCAAGGAGCAGGCCACAGCTTGCAGCTCACAGCTGCCCAAGAGACAGAAAT +CTGTGTGGGCTGTGTGAATGACCTGGATAAAGAAGAGCTGGATCCATTGAGGAGACACTA +GCCCTGACCCCTGAAGTCAAAGTCTAAGTCCCTGGGGGAATTGGGGGTCATGAGAGATAC +TGGAAAGAGGCTTACAAAGGCCAGAGCTCTTGCTAAACTCCCTGAGGCAGTGCAGGTAGA +ACCTGCCCCCCATACCATCCTCTGCCTCTCAGGTGCTCGGTTATCCCCAAAGGTGTACCT +GGCCCATAGCAGGTGCTCCACAAATGCTATGCCAGTACTAGCGGGGTGCCCTGCTAGGGT +ACCCCTAGTACTGCTAGGGTATGCCCTGTTCTCGGAGGCTGAGACCCAGGAAGTAGGGGG +CCAGCAAGGCCTGTTCCTTCCCTGGGATCCCATGTCTGGTTGTTACACCAGGCCTGAGTA +CCCCAAAGTCGTAAGATAAAAGAGAGGCACCTCCCTAAAACATCATTTTTCCATAATGAA +CATAATTTTAAACATCTTCAAATTGGAATAAACCAAGGTATTATATAGGATGCACGTGAT +TTTTTTTTCAAGTTGTCCCAATATTCAAACATAGGATTTAATATAAAAAATAAAACCTTA +GAGTGTTTCTTGGCTCTGGAAGGCGCCACCCTTCAGGGGACCTAGAGGGAAGAGGTCTGA +GGATAGGCCAGTGAGCTCTCAGGGTCCCTTCCTGTTTAACATCTCTGGAAGTGTCCCAGC +TCGTCTCAGCTGCACAAAGAGCAAATGTTCTATCCACATTTGAACTACAAGGTTTAAGCA +GCTTGCCAGGGCCAAGGTGGTGTGACAAGGTCTCAGCATGTAAGACTCACAAGTGGGGCA +CAATTCTGAGCAATGGGGTCTGGATCCTTGATGAGCCTCCCCAGGTGGACTGGCTGAACC +TCATGGGGCCTCATCAATGCATTCAATGCAGGGTCTCTGGAGAAGCATGGATTCTGCTCA +CTGAGCCTGTTTCTCTGACCTGGGACAGGCCTTGACACCTAGGCCTCTCACTCTTTGCAC +ATCCTGGGGAAGGAAATTAGGCAGGAGAAGGCCCCTCTTCGAGGGCTTCCTTACCTCCCT +CTTGAGTGGGGCCACGTATGCCCAGGAGTTGGTGGCAGGGTCGTAGCGCTCCACAGCATT +CAGGTCATTGTGGTAGTCACGGCCCGCCACAGCGTAGATGTACCTGCCTACAACACACAC +GGACAGGTCGGCGTGCTCCTGCTGCAGGGACTGGATCTGGAACCAGCGGTTGTGCCGTGG +GTCATACCTGTGCCGAGACATGAGGAAAGCACAGCACTGACTAGGCAGGGAGGCTGCTCC +GCAGGCTTGCACCTCTTGTTCTGGTCCCATTTCTGCTTTGCCTCAGCCCAGCCTGAACCC +TCCCCAACTACCCCACATACCAGTGCACACCGATCTACAGCCAGGCCCACAGGCTGTTAC +ACACCTGAGCACAGGTGCTCCCCACCACACAGCATGGAAGGAATGGCCAGGAGCCTCCTG +TGTTTCCACCTAACTGTCCACCTGGGCTGTCCAATGGCTCTTTTTTTTTTTTTTTTTTTT +TTTTTGTGGAGAGAGGTCTCTCTGTCCCCTAGGCTGAAGTGCAGTGGCACAATCATGGCT +CACTGTAGCCTCGACCTCCTGGGCTTAAGTGATCCTACCACCTCAGTCTCCTAAGTAGCT +GGGACTACAGGTGCGCACCACAACGCCCGCTATTTTTTTTTTTTTTTTTTTTTTTGATAG +AGACAAGGTCTTGACACCTGGGATATGGACTTAGGCAATCCTCCTGCCTCAGCCTCAATG +GCTGTTCGTACAGCCACTTCCATCTCACTTACCTCATCTCATCATCCACATAGCCTTGCT +CCCACTCAGGAGGGCCTCCAAATACAGCCTCTCCACTGGATGGGTGCCATCAGCCCCCTC +ATTGGCCTCCAAGCCTTGAACTTCCCCCAGCACTCCATCAGGAGTGGTCTTCTCAGGCAC +GCCCTGCACACAGCACCCATCATACCCTCAGCTTTGTCAACAGCCGCACACACAAGGCTC +TGGTCATCGTACTTTCACCTCCTCCCACCATGCTGCCAGCCCCGCGTTTCCTGACCTCCC +CTCCACTCAAGGTCTCCCTCTCTGGGAAAGTGGAAAGGGTTAACTCCCACCCTGGACTGC +AGGCTCTGTAAGGGCAGGGTGTGTGAGATTCCAGGGACCCCCATGGGAGAAAAGGCACTA +GACTGGTTGCGGCAGGCCCACGAGGGTGGAGGAGTCTGGCCAGGACCCAAGGAGGGCTCC +TGGGCTGAGCACCAACAGGGAGCATGGCCTGCAATGTGGAGCAGAGATGATCACAGAGCC +ATGAGGGGCCAGGACCCAGTGAGCTCAAAGCCAGTATCTGAAGTGTCCTTGCCCAACAGC +CCACAGATAGTTACAGCCTCTTTCAGACAAGTGTTTTTATTTCTGCAGAGGAAGACCTAT +TCCAAAGAGAACAAGTCTAACTTCAACTGGCTCACAAGAGAGTAACCAAAGTTTTCTTTG +GCCTGTGCCCTATGCTGGTTAGTGAAAAGATCAGTTTCATCCTTTGCTTCCTGCAGCCCC +TGCAGTGTGGCCAGCCCCTCCCCGCCGCCTACCAGGAATGCAGCAAACAGGAGCCTCAGA +AGGCAGGGCCAACAAGAGAGGGAACACACACCTGGGTCTCCCTGACCTGGTGTGAATCCT +GGGCTCTTCCCTGTCTAGCTGGGTGTGCCTGGGCATCAGTTTTAATCTCTTGGGATTCGC +TCAGTTCTGCACCTGTCCAATGAGGTGATATCACCTGCCTAGAAAAGCTATCAAGAAGAA +TCATAAAAGACGAGCATGAAAAGTGTCCTTGTTAACTTGGGGGTTCTGATGGATCATGGT +TCATTTCTATATTCCTAGACCTCAGTGTAGCCTCTGGCTCAGGACAGATTATGTTCTGTC +TGCTGAATGAATTAAACAAAGGAGTAAGATTACCAGCAATGAATCATTCATTCACTCTAC +CAACATTCCCAGGTCTCCCACTGTGGATGGGGCACAGTGCTCAGCACTACATCCAGAGGG +AGCCTCAAGAGGTCAGGGCCCTGCCTTCCCCTGCAGAATGGAAACAGGCTAACCAGAGCA +AATGAGTGCTTAATATAATGACAGACACTGTGAAAGAAAGAAAGGGAGGTAAGAAGATGA +GTGACCTGCTGGTCAGCGGGGCCTGGGGAGGGCAGAGCTCCCTTGCAGTGATCAGGAAAG +GCCTCCTGAGAAGGCAGGGTCTGAGACCTGCATGACCTGGGGCACAAGCCAGAAGAAGGT +CTGGCAGGGGACCAGCAGTCCAGGATTGGGGCACAGCACTCACAGGGGCCCCGTCAACTG +GCTGGGATGGTGCTGTTCACTGCCTTTCCTGTGCGCCTTCCTCCTTTATATACAAGTCTC +TCCTCTCTTGTGTTCAGTCTGGCTCAATCACCATCTGCACCCCAAGTGTCTGCAACCAGA +GTGAGATAGGACCCTCCATGTGCTCCTGTTTGCTCCTTCGAGTCTATTGTCCTCACAGCT +TTTTAAGCTGAGTTCATGTGGATTTGTGGCTTCAAATTTAAAGACACACACAGGGCCGGG +CGTGGTGGCTCACGCCTGTAATCCCAGAACTTTGGGAGGCTGAGGCGGGTGGATCACGAG +GTCAGGAGTTCAAGACCAGACTGTCCAATATGGTGAAACCCCGTCTCCACTAAAAATATA +AAAAAATTAGCCAGGCATGGTGGTGTGTGGCTGTAGTTCCAGCTACTCGGGAGGCTGAGG +CAGGAGAATCACTTGAACCCGGGAGGCAGAAGTTGCAGTGAGCTGAGATCATGCCATTGC +ACTCCAGCCTGGGCGACAGAGTGAAACTCCATCCCCCAAAAAAAAAAAAAAAAAAAAAAA +AAAAAAAAAAAAAAGACACCAACAGTGATACCATATGTCAATAATTTAGACATAGCCAAA +GTTACACTGATTTCTTATTTCCCTGTTGTGTCCTGTATCCCTCTTCTTCTTTATTGAATT +AATTTTCTACTTTATGAGAATAAGTCCTCAAGTAATATTTTTCTAATAAAAATAAGCTTT +ATTTTTGCATATAAAGATATAAACAATTTTACTTATTGCTATGGACTAGATGTTTGGATT +CCCCCAAAACCCCTATGTTGAAGGCCTAACCCCCATTGTGATGGTAGTGGGAGGCATGGC +CTTTGGGAGGTGATCAGGTCATGAGGGTGGAGCCCCATGATGGGCTTAGTGCCCGAGGCT +ACAGCTCTTTCTCTCCACCATGTGAGAACACAGACGGAAAGGACCTTACCGGGACTAAAC +TGGCCAATACCATGATCTTGGACTTCCCAGTATCCAGACAGTGAGAAATAGAGGTATGTT +GTTTAAGTCACTCCATCTATAGTATTTGTTATAGCAGCTGAAACTGACTAGGAGAGTCAC +TTTAACTCAGATTTGTATCATCCTATTGTAAACTCAGACATGAACCCTAGAGTTGGTACA +AAAATATTCTGGAATTCTTGTTAAATTCATTAGACTGTGGCCAGGCACGGTGGCTCACGC +CTGTAATCCCAGCACTTTGGGAGGCTAAGGCGGGAGGATCATGAGGTCAGGAGATCAAGA +CCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAATTAGCCAGGC +GTGGTGGTGGGCGCTTGTAGTCCCACCTACTCGGGAGGCTGAGGCAGGAGAATGGTGTGA +ACCCAGGAGGTGGAGCTTGCAGTGAGCTGAGATCACGCCACTGCACTCCAGCCTGGGTGA +CAGAGAGACTCCAACTCAAAAAAAAAAAAAAAAAAAAAAAAATTCAGTAGACTGTTTTTC +TTCTATATTTCCATGACTTAAGTATTTTCATGACTATTAATGTTTTAATGTCTATTAAAT +TTTTTAAATAAATTAAATGTACTGACCGGGCACGGTGGGTGGCTCATGCCTGTAATCCCA +GCACTTTGGGAGGCTGAGGCAGGCGGATCACCTGAAGTCAGGAGTTCAAGACCAACTTGG +TCAACATGGTGAAACTCCATCTCTACTAAAAATACAAAAAAATGGCCGGGCGTGGTGGCT +CACACCTGTAATCCCAGCACTTTGGGAGGCCCAGGTGGGCAGATCACCTGAGGTCAGGAG +TTCAAGACCAGCTTGGCCAACATGGTGAAACCCTATCTCTACTAAAAATACAAAATTAGC +CAGGCATGGTGGCGTACACCGGTAATCTCAGCCACTCGGGAGGCTGAGGCAGGAGAATTG +CTTGAACCTCAGAGGCGGAGGTTGTGGTGAGCCAAGATTGCACCATTGCACTCCAGCCTG +GGCAGAAAACAGCGAGACTCCATCTCAAAACAAAAAAACAAAAAAACAAAAAAATTAGCT +GGGCGTGGTGGTCGGCGCCTGTAATCCCAGCTATTCAGGAGGCTGAGGCAGGAGAATGGC +TTGAACCCGGGAGGCAGAGGTTGCAGTGAGCCGAGATCGTGCCACTGCACTCCAGCCTGG +GCGACAGAACGAGACTCTGTCTCAAAAAAAAAAAAAAAAAAATTAAATTTACTTTAAAAT +ATTAGCAGCCATGAAATGCCGAAGGAAAAATTACATTATTATTTAAAAAATTTTTTTTAG +AGACAGGGTCTCACTCTGTTGCCCAGGCTACAGTACAGGGGCATGATCATAGTTCACCGA +AGTCTAAAACTCCTGGACTCAAGCGATCCTCCTGCCTCAGCCTTCCAAGTAGCCAGGGCT +GCAAGCACGTACCATCAGGCCCAGCTAATTTTTTTTTTTTTTTTTTGGTAGAGACGAGGT +CTCGCTTTGTTGCCCAGGCTGATCTTGAACTCCTGGGCTCAAAAGATCCTGCTGCCTCAG +CATCCCAAAGTGCTGGGATTATTGACATGATCTGCACATAGCCTAAATTATTATTTTAAG +TAGGGTCAGGGCACATAGCAACATGTAACCAGCTACTACTAGGTATACCATGTAGTTGGT +GTTCAGTAATCCACAGGGCTATGGCTATGGGAAAGGTACAAGGAATGATGCAGATAATAT +AGATGCAAAGAAAGAAGGAATGAAAGAGAAAAAGTAGTATATTGGAGACAAACACCATTT +GCTCCAAGAGAGGAAAGGTTTTTAAATATGCGGTTTCTGCACCAATGCTTTAAGATATAA +CCTGGTAAGTAATAACTCCTCAAAATGCAAAGCCTAATTTACTGCAAAATCTTGTTTCCT +TATATTCTAAAAGCCTATAGCTTTTAGAATTCACCAGTTCTAAGGCTGATGAAATGAGAA +AGCAGAGAAAGAACCAAATCTTAGAATATTCAAATTAAGGATTCATCTGGAAGTTTTGAG +TCCACTGGAAGGTTAATGATGCCTTCATTCTGGAGAGATGATACATCCCTGGGTTAACCA +GTAGGGCTGTGGTCCTTCCTGCCACCTCCACTCTGTGGATACCTACTCTCTCAAGGCAGG +TCTTGCCGACCCCACTTGCCATTGTGATCCTACTCTCAGACAGGAGTTTGGGACTGGTTA +TTGGTTACAGGGTCTAATATTAGCATTACCCAGAGTTCAGCTTTGGTCTCACACCCCTTC +ACAAGGGGAAATTCAGACTCCTACTTTAAGTGAAAGTGAGATCAATGGCCCATAGCAGGT +GGACAGAGGACAGGGGCAGGCAGAATCCCAAGCTGTCACCCCCCTAGGGACAGCTCTGTG +GAAACATCCAGCATTACTGCCAAGCACACAGCTGTCTCTACCTTCTGCTAAGGGCCAGAA +GCCAAAACACTCTGGAGAGATTGGCTTTGCAGGTCTGTAGGTTACTCTCAGCAGGGTGGT +CTGGCCTTGGCCCCCCAACTGGGAAACCGAGCCCCCTAAAAAAGAAGGTTTAAGGTTTGG +CCAAAGGGAAGTCTAAACCTAGTGTGGCCACACCCACTGGGGGCACAACTTTGTTTAGAG +AGGAAATGAAGGTAGCTGTTTCCTTTTTTCTGAAACCTGCCCTTCCTCATATTTAACCAC +AATGTGTTATTGCCAACCTCTGAGTCTCATCAGCTTAACAAAGGAGATCACTTCCTGGGG +CCAGCAGTAGAGAGGTCGCTGAGAAATGGCCTTCTCCTCCTTGGCCAGGATCACTATGCT +TGATTCCAAGTGGCATGGCCAGGCCAGGCCCTACCCCACCCTGACCCCAGAAAATAGAAA +CATGAACATGAACTCAGTCAGCATCTAGCTCATGGCTGCTGGAAATAGTTGGCAATCTCT +CTCTCTCCAACACAGCTCCATCTTTCAGCATGAAGGGTAGGGAGATTTCTCACATGCACC +AAGGTAAGGTGGAGGGGTCCCTGGAAATCTAATGGTAGCTGAGTCTTGTGGGCCTGAACC +ACTGTCCCTTGCTCTGCCCAGAAATGGATTTGTGAGATCACAGCCTAAGTGACTGATGGA +GCCAGGCGCATCCTACGGACCACAGCAGTCCACAAAACTCTCTCTTGGGAAAACTGCCTC +TTCCCAGGGGAAATTGTGCCAAGTGCGTGGCAGACTGCAGGGCAGAGCGGAAATGTAGCA +TGAGGCTCGGTGATGCTTGTTGGCCTTTACCACGCCCCTGGGCTGAAGACCGCATGCTGC +TCAGGCACAGCAGCAGGAAGCCTGTTCCACCATTAGCAAGCAGGTAAGAGAGAAGAGCAA +AGCTGACTGGTGAAGGGAAACTGAAACAACATGCATCAAAGCTCAGAGAGAGCATGATGG +AGGGCGAGGCTGCTCAGCTGGCACTGCTCCTAGGTCCTCTCCATCCCAGTCCGACTGCCT +TTCCTGCTGTGGGCTCCTCTCCCCTCCCGTCTCCTCCTGCACAATGCACTCGCAACCTAG +CCTACCCCAAAGAGGCTGAGCAAGGGTCCAGGGTAAGTGGGTTTCTGAGGAGTAGGGGAC +ATGCAGGAAAGAAGGCGATCCACATGTCCCCTTCTCTTATCTCACTGCTGTCCACCTTCT +CTGAATTCACCTAGACTTCATTCCACAATGGCAAGTGATGCTCATGTGAAAGGCTGGGGG +GAACCTGACCAGCTCTCTCAGCCCATGACTTCCACACCCTCTCATCACCTGAAGACAAAG +GGACCAGCTAGCACCCTGTCCTACCTCCAGCATCGGGACTCTGCTCGAAATCCTTGGACA +TTGTTGTCCCCTCCAATCAAGTATACGAAGTTGTTGAGCACCGCGATGCCCTGGTTGGAC +ATGCGGGGGGCCAGGGAGGCAGTGAAGTGCTTCCACTCTCCCAGTAAGGGGTTTAGATAC +TTGGCCTGGTCGCTGAGGACAGTGGACGGCGTGGAGTGAATGCCCCCGAAGCCCACAACG +CACTGGAAGTCCGACCGCAGCTCCGTTTGCGGGCTCTGCAGGCTGGGCTGTAGGCTCTCG +TTCCGGTGGTACATGAGGGCGCTGGCCACTGTGTCCCTCAAAGGGCTGGGGTCCAGCTTG +TCATGCAGCCGCTGCAGGACCTCAGCTTCCATCAGCGGAAACCGCACTGTCTCAAGGAGC +TTTGGGGGCTCGTGCAGCGAGATCTGGTCAGCCTGCACCTGCTCCAGGCTATAATGGTAG +AGAAGGGCCCCCTCATATACCTCGGTCTCGCAGGAGACCTCCAGGCGATTGCTGCTGAGG +AGGGAGTAGACCTTCTCCAATGGAAGCTGGCGGTACTTGTCAGTCCGAGAGAAGGCCACA +AAGTTTTTGAGGATATAGGTGTCCAGTTGCTCAGTCAGGCGGCTCAAGTCAAACAGCTCT +GCCAGCCGGTAGACATCGAGAATGTTCTCTTCGTCCACCCAGGACATGAGGAAATCACAG +CAGAAATGGATAATTTCTGGGATCTGCAGAGAGAATGACACCCATTCAAGCCTGGGCTGG +TGAACAGCATCTGGGGGTGGGAGAGAGAATGATGGCAGAAATACGGGCTGTTGTGGGCCA +GGCAAAGCAGAAGGGAAGTCCTCCTTAATTGTCCCCGACCTTTTCTGACACACTGGAGAC +TGGGGCTTACTGCAGACTAGGTTTAAGTCCTGGTTTGGAAACATACTGGGTGACAGGATA +TACAGCATGTTTTTTTTTGGTTTTGTTTTGCTTTTCTTTGAGACAAAGTTTCGCTCTTGT +TGCCCAGAATGGAGTGCAGTGGCAGAATCTCGGCTCACTAACACCTCCCCCTCCCGGGTT +GAAATGATTCTCCTGCCTCAGCCTCCCGTACAGAATGTTTTTAACCTCTCTAAGCCTCAG +TCTCCTCATCTATAAAACGGGGATCATTAAAGTGAAGAGGAACATGATGAAAATGAGAGA +GCACATAGGAGTGGACCCTTGATATATGTCAGTTCCCTCTTAGATCAAAAGTACTGGCTC +GGCCGGGTGCGGTGGCTCATGCCTATAATCCCAGCACTTTGGGAGGACAAGGTGGGTGTA +TCACGAGGTCAGGAGTTTGAGACCAGCCTGACCAACATGGTGAAACACCGTCTCTACTAG +AAATACAAAAATTAGCTGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCAGGAGGC +TGAGGCAGGAGAATCGCTGGAACCCAGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCC +ATTGCACTCCAACCTGGGTGACAGAGCGAGACTCCGTCTAAAAAAAAAAAAAAAAAAAAA +AAAAGAAGTACTGGCTCTCCAGAAAAGCAGGAAGCAGCACAAATGGTCACTGTGTCATAT +ATCTGTGGTTTGGAAAGGACCTAAGACCATCCCCATCCCTACCCTAGGATTCTCTGGAAG +AGCCTGCCTCTCCAAAAGCGCTGGGACCAGAAACAAACCTGGCTTTTTCCCCCGCCAGAA +CATCCACCCTGGTTCCCTAGAAGTGCTCAGAGCCTCTCAGACACCTAACCAATACCCACC +CAGGAACCTGATTTAGAATTATTTTTCTTTTTTCTTTTTAAAATTTGAGACAGGGTCTTG +CTCTGTTGCCCAGGCTGCAGTGCCATGGCAAGATCATAGCTCACCGAAGTCTCAAGCTTC +TGGGCTCAAGTGATCCCTGACTCAGGGTTCTTGAGGTTCCCAGCTGGGTCAAGACCCACT +TGACTCCAGTAAGTGCAGGGGCTCCCAGGTGGCCATATCAACCCTTCCCAGCCATGGGGA +AGCACTAGTGGTTTGAAAGGACGGCAAGAGAGAGAATGACACTGGAGGGAGGTGGGAACA +AGGCCACAACCAGCAGGCTGTTAAAGGCCACCCTGTGAACAGACAACCAAATCTTGGGAG +AATAGGAGACAAAGGGAAGGGCCCTAGGGAAAACTGCCTGGGAGAACAAACCAGGTGTTG +ATAAATTATTTAGGAAAGCAGGATTTTAGAGTTTCAGTGGGGCCCCCCAGTGCCCACCTC +CTTTCTTTCCATGTTCAGATTGAAAATGACGAGTACCTTGACCTCTCTGTGACCCAGACA +GCTGCAGGTTCTCCCAGCAGGCTTGAACCCAAACCAGGCCCTTGAACATTCCCAGGCACT +GATAAAGGCATCAAGGTTGTTACCCAAAACACTGAAGGAAAAACTGGTTCTGGCCCTGAG +ACAAATTCCTTAAACCCTCATAGAAACTCCATACACTAACCCACTCTGTGGGCAAGCCTG +GGTAAAGCCTCTCTTTTCCCTTGTCTGTCTTGAGGACATGCTACATCACTCTATACATAA +GTTCCCCTAATAAACGCTCTGGCCAGATCACCCTGGTGTTTGATGCTTCTTTCTTTGGAA +TCCCAAAACGGCTCCATTTCAGGATGCTGTGGGGTCCCCTCTTGTGGGAACCCCCTTGCT +ACCAACTCCAGCCATGTGTTCAATGCAATAAATCAGTTTCTCTAAAAATGGGAATACCTG +GCTTATCCAGGATCATCAATAAATTAACCATTACACATGGAATAATTCTGCTGGTATTTA +TGTATTTATTTATTTATTATTTTGAGATGGGGTCTCACTCTGTTGCCCAGGCTGGAGTGC +AGTGGCACGATCTCGGCTCACTGCAAGCTCCGCCTCCCGGGTTCACGCCATTCTCCCACC +TCAGCCTCCCAAGTAGCTGGGACAACAGACGCCCGCCACCACGCCCGGCTAATTTTTTGT +ATTTTTAGTAGAGACGGGGTTTCATCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTC +GTGATCTGCCCGCCTAGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACGGCGCCC +GGCCAAGGTATTTAAAGCTTATCAATAAACACTAACTATAACTTTTTTAAAATTCAAAAG +ACATCTAATATTGCTTGCAAGGTGACATGCTAAGAGCTGGATGAGACCTACCAAATGTCA +GGCAGGTATGTGCCCTACATGAGGTATCCCCATGGTGCTTTGCTAGGCCCTGACCAGCTT +CACTTGACCTTCTTTTTTCCCTTTTTGCCAGTCTTCTGCTTTGCCACTTCTTCCCAAACT +ACTAGGTTTTCCTAAAATTCTGGTTTGTCATGGGCCAGGGTCCTGAAGACCAAGTTTTGG +GCAGAATGTGAGCACAAACAGCCCTGAAGAAGCCTGGGCAGCAGCAGCTCCTTGAGAGGA +CCCTGCTGACTCTCCTTTCAGTCCTATTTTTAGAGCAGTGGGCCTTCTGTTCAGTGAAAT +CTCACAGAGGAACCAGCCTGGGCTCAGGATGTGCAAAAGCCCCATACGAGGCTCACAGAA +GGGACTGCATTTGCTTGGGGTAACCCAATAGGACAGTGTTACTGGGAATCAGACTCAGGC +CTCTGGGGACACCGTGAAGAATCTGGTCCCAGGCCTACTGCAGGGCACAACACATGGGGG +AAGCCTCCAAAAGTCACTAGCTATGTGAACATACATGATTCTACAGGCCTATGGACTTGA +CATGGTTATGCCAAGTAAAACCTGCTATAAAACAATATTGTGAAGAAGCCTGTTTTTTTT +TGTTTGTTTGTTTTTTGTTTTGAGATAAGCTGGCATGTAGTGGTGTGGTGATCACAGCTT +ACTGAAGCCTCCACCTCCTGGGCTCAAGTGATTCTCCCACCTCAGCCTCCCGAGTAGCTG +GAACTACAGGTGCATGCCACCATGCCCGGCTATTTTTAAAGTGCTTCTGTAGAGGTCTCG +CTTTGTTGCCCAGGCTGATCTCAAACTCCTGGCCTCAAGCAATCCTCTCACCTTGGCCTC +CAAAAGTGCTGGGATTACAGGCATGAACCTCCAAGCCTGGCCCAGAAGCCTGATATGTTA +AAAAACTAAGATAGGCTGCTGCCTCTGAGATGACCTCTGAGGCACTGGGGGAGATGACCC +AGGGCTGTCGGGAGCACTGGCCAAACCAACAGAGAGAGACTGCAGGCCTCCAAGCTGACT +GGTGAGAGAGGGAAAGTGATGTTAGAAAGATGATCTGGTTTCAGTGTGTGTGATCACTGC +AGGGAGAAGAAAGAAATGGCAGAAACATACAGATGGAGGCCAGGCAAAAGGGAAGGAAAT +GGAAACACAGAACAGAGATGGCTACAAGCAATACCGAGAAGGATGACCTGTCCTCACATG +GACGCAGGGAATTAAGAAGGTGGACGGGCTCAAGGAGCCACTGAGGTTGAGCCCAGGGAA +CTGGCAGGATGGTCACAGGACACACAGAGCAGGGGGACTGACAGGGGAGCTCCGGGAGAG +TGGCAGGTGTGGAGGAGGAGATAGGACAGCTGGACCTGAGCCATGCTGGGGGAGATGTTT +CAGAGGCAGCTGAATCAGGAGTTAACTGCTGCCAGGGTGGAGAATTGGATGAGGGTTCCC +AAAGCAACAATGACAGGGATGTGGGGACAGAAGAGATCTCTAAATAAAAGAACAGGGGGA +AATTCTTTTGTTGTAAAAGCCAACTCCATCTACTCACCATGCAGGTATGGTGAGACAAGT +CAGTCATCTCAGATCAACCCACCCAGCATCCAGGTCCTATCCACCAATCTGAGAACAGGG +CACAGGGAGGCTGCCAGGGCCCTGTCACATGGGGTGGCACCTCCACATGCCCCCGAACAG +GTTGGTTTTCCTGTGACTTGAGTTGTTTTTTTTTAATTGCGTCAAACCTGCTCTCATGGG +AAGAAAGGTCAGAATGATGAACTTAGCATAGCATTTTGTTTCACTTTTAAGAAATGTGGG +AAGAGGTGACTGTCAACTCCTATCAGGCCTCTGCCTGATAGCTCAAAGGCACCTGCAGGC +CCAGTCCCGTGTAACTGCACCCACACCACTGAGAATAATCATCCAAAACATAAGTCACAC +TGTCAAAGCCTCAGAAAACTTTTTGAATTCTGCAGGAAAACAAGTACTTAAAGTTCAATA +ACAGGTTGGGTACAGAGGCTCACACCTGTAATCCCAGAACTTTGGGAAGCCAAGGTGGGA +GTATCACCTGAGGCCAGGAGCTTGAGACCAGCCTGGGCAACATAACAAGGCCCCATTCCT +ATAAAAAAATATATATATATATAATATCAGCTAGGCACAATGGCACGCATCTTTAGTCCC +AGCTACTCAGAAGGCTGAGGCGGGAGGACCACTTGAGCCTAGGAGGTGGAGGCTACAGTG +AACTATGATCATGCCATTGCATTACAGCCTGGGTAACAGAGTGAGACCCTGTCTTTAAAA +AAAAAAAAAAAATAGGCCAGGCGCAGTGGCTCACACCTGTAATCGCAACACTTTGGAAGG +CTGAGGTGGATCACTTGAGGCCAGGGGTTCAAGACTAGCCTAAGCAACAAAGCAAGCCTC +TATCTCTATAAAAATAATTTAAAAATTAGCCAGGTGTAGTGGCACATGCCTATAGGTCCT +AGCTATTTGGGAGGCTGAGGCAGAAGGATTGCTGGAGCCCAGGAGGTTGAGGCTGCAGTG +AGCTGTGATCGTGCCACTGCACTCCAGCCTGGGCAAGAGAGAGAGAGCTTGTCTCCAATA +AATTGAATGAGTGGATGGATGGATGGGTGGATGGATGGATGGATGGATGGATGGATGGAT +GCATGCATGGATGGATGGATGGATGGATGGATGCCTGCCACCATGGATGGATGCATGCAT +GGATGGATGGATAGATGGATGGATGGATGGATGGATGGATGGATGGATGGATGGATGCAT +GCATGCATGGATGGATGGATGGATGCGTGCATGCATGGATGGATGGATGGATGGACAGAC +AAATAGATAGATAAAGTTCAGTAACAAAACACATAAGTTTTCAAGAGACTGAAGTGGCCC +AGTGAGGGGACAGGCAGGAAGGGCCCTGGCAGCAATGCTGGTGGATGCTCCCCATTGCCA +GACCCCAGAGCACACAGGACGTCCACATGCAGCCTCGGGTCAGCCTACCTGCTTATGTGC +AGGATGCTTTCAATGACAGGGAGACTCAGACAGTGGGGAAACAACCAGCAATAGAGAAGA +GGGCAAAGGCAGAGGCCTGGGGTGTCTAGAGCGGCTCTGCTTGAGCTCCAGGAACAACTC +CAGAGTCTTCCCCACTTCATGGCCTTCGTCACTGCTTGGTCTCCTCACATTCCTGACCCA +TCTTCAAGCCAATGCTAGTGCCCGGCAGGCATCATGGGCATCTCAGGGAACCCCACCTGG +GTGTGGGTCTGCCTTGCTAGATGAGACATGCCTCACCTGAAGCTGGCAGGCAGCCACCAG +TGTCTCTTGTACATTGCTCAGGCTGAGCTCCAGCTCGGAGGTGTATATGAAATGTAGGAT +TTGGCACATAGCATTGTAGGACACACCGTGGATCAGGACCTCTTCCTGTTCCATCTCCTT +CAATCCCCCAGCAAACATTCCTCTGCAAAGTGAAGACACAAGAAAATGGAGTTATACCAA +CAGGGACTTAAGCCCATGTTGCCAAGAGTGACAGCATTCAGAGAAGAACCTGGCGCTGGT +GGCAGGGCCCTGGTAGTGGGCTGACCCACTCTGTGCTCTGAGTGAACTGTGTGCATGCAT +GAGCCATCCCACAAAAGGATCCTGACGAAGTCTGGGCAGCCATCCTGGAAATTGTCCTGT +TTCCCTTTTCATGGGCCCAAGGAGTCCCATGTGAGCTTCGGGAGGTGAGGCCACAAGCAG +CTTGCTTCTGCTGTGTTCATCAGTGCAGTTCCTGGATCGGAGCAGTAGCAACTCCAGAGC +ACATTTCAAAGCAATGAATTAATTAATGGGAAACTAATAAAAGTCATTTCATTCAATGTA +TATTTATGGAGTTCCTACCAAATGCCAAGTATCATTCAAAGAGCAGAAGGAGGCCAGGCA +CGGTGACTCACACCTGTAATCCCAGCACTTTGGGAGGCCAAGGCAGGCAAATCACTTCAG +GTCAGGAATTCGAGACCAGCCTGGCCCACATGGTGAAACCGCGTCTCTACTAAAAATACA +AATATTAGCCGGGCATGATGTTGTGCATCTGTAGTCCCAGCTACTTTGGTGGCTGCGGCA +CAAGAATTGATTGAACCTGGGAGACGGAGGGTGCCATGAGCCAAGATTGCACCACTGCAC +TCCAGCCTGGGCAACAAAGCAAGACTGTCTCAAAAAGCAAAACAAAACAAAAAACAAAGA +GCAAAAGGAGTTATAGTGAAGTAAACTGGTAAAATAGCTGGGCGCGGTGGCTCACGCCTC +TAATCCCAACCCTCTGGGAGGCCAAGGCAGGCAGATCCCTTGAGCCTAGGAGTTCAAGAC +CAGCCTAGGCAACACGGTGAAATCCTGTCTCTACAAAAATTACAAAAATTGGCCGGGCGT +GGTGGCTCACGCCTATAATCTCAGCACTTTGGGAGGGCAAGGCGGGCAGATCACCTGAGG +TCAGGAGTTCAAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAA +AAATTAGCTGGGTGTGGTGGCGGGCGCCTATCATCCCAGCTACTCAGGAGGCTGAGGCAG +GAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCACACCACTGCGAT +CCAGCCTGGGTGACACAGCAAGACTCTGTCTCAAAAAACATATATACATATATAAATAAA +TGCTAAGAAGGAACCCAGCAGGGCATCTGACCTGAGAGTCACTGGTTAGAAGAGGGGTAG +GTGGAATGATGAGAAGGAGCAGGGAAAGCCCCATCTGAGGAACTGAGACCTAAGGATAAG +GCTGAGAGGGTGTAGGCAGCCGGGGAGGGAGTTCCAGGTGAAGGAGACAGGTGTACAAGT +TCTGGGGCAAGGCAGAGCTTGGCATGCAAGGCAGCCAAGATGATGACGAAGTGAGCAGAA +AGTGGGATGGGCAGGCAGGCCTTGGAGCCAGGTCCCCCTACCTCTAGGGTAGGTACAGTC +CAGATTGAAATCTGAGCATTATGGGATGCTCTTGAGGAATTTACCCACAGAGTGATGTGA +ACTGACTGGCTTTCCGACTCAGGTGCTGGAGGAGGAGCCCCTGGAAGACCAATTAGGCCT +GAGATTCCTGTGGGCCTGGAGGCCAAGGGTGACCCTGGCCCTGGGGAGGGCATTGCAGCT +AGAAAGCAGTAGCTGAGACTACCCTGGGACACGGCACAGAGAACCTACTGACAGACTGAA +TCTAGGAGGACCTGGGAGGACTCAAGGCCCTCTGCTGGGCCGAAGAGGACCCTTTACTGA +TGGAAGAAAATCTGGACAGCTGGGAGTCCCGTGATTGTGTGATCAGTGTGAGATACCTGT +GAAACCTCCAAGTGTAGACACTGATTAGGTGTTTGGAGGTAAGAGGCTGGCAACTGGGGC +TGAAGTTTAAATATGACAGCAACATGAAGATTATAAGAGAAGCTATTTTTACATTTTATT +TATTTTTTTGACTAGGTAATACATATACATGCTACCAGATTTAAAATACAGTATAGGTTA +GGCCGGGCACAGTGGCTCACGCCTGTAATCCCAGGACTTTGGGAGGCCGAGGCAGGCCGA +TCATCTGAGGTTGGGAGTTCGACACCAGCCTGACCAACATGGAGAAACCCCATCTCTACC +AAAAATACAAAATTAGCTGGTCATGGTGGCACATGCCTGTAATCCCAGCTACTCAGGAGG +CTGAGGCAGGAGAATTGCTTGAACCCAAGAGGCGGAGGTTGCAGTGGGCCAAGATCGTGC +CATTGCACTCCAGCCTGGGCAACAAGAGTGAAACTCTGTCACACACACAAAAAAAATACA +GTATAGGTTGTGGTGAAAGTCCCCTTGCCTCTGCTCCCAGACCATCAGTCAGTTCCCCTC +CCAGGGCCACACTATCCTCAGGGCAGTCATATTCCACATCTCACTTGGCTTTTTTCTTTT +TTTTGAGACGAAGTCTCACTCTGTTGCCTGGGCTGGAGTGCAGTGGTGCGATCTTGGCTC +AGTGCAACCTCCGCCTCCTGAGTTCAACTGATTCTCCTGCCTTGGCCTCCCAAGTAGCTG +GGACTACAGGTGCCCCCCACCACGCCCAGCTAATTTTTTGTATTTTTAGTAGAGACCGGG +TTTCACCGTGTTAGCCAGAATGGTCTTGATCTCCTGACCTCGTGATCTGCCTGCCTCGGC +CTCCCAAAGGGCTGGGATTACAGGCGTGAGCCACTGCACCCAGCCCGGTTTTTACTTAAC +AATATAGCTTGGAGACTGATCCATGCCCATGTACGCAAGACTGCCTCACTCTTTGCAAAG +GCTGTCTCACTTTTTTTATTTTATTTTATTTTTTTAAGATGGAGTTTCGCTCTTGTCGCC +CAGGCTGGAGAGCAATGGTGCAATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAG +CAATTCTCCAAACTCAGCCTCCCGGGTAGCTGGGATTACAGTCCTGCGCCACCATGCCCG +GCTAATTTTTGTATTTTTAGTAGAGATGGGGTTTCACCACGTTGGCCAGGCTGGTCTCAA +ACTCCTGACCTCAGGTGATCCACCCGCCTCAGCCTCCCACAGTGCTGGGATTACAGGCAT +GAGCCACCGCGCCCAGCCAGCTGTCTCACTTCTTTAACCCATTTCTACAGGGGTATATTT +AGGTTACTTGTAATTTCCAGTTATTTCACATGATGAACATCCTCACCCTTACAACATGCT +GAGTATGCATATGCCAGGATTAAGTCCCACAAATACAGTTGCTGGGTCGAAGGGTTTACA +TACATTTATACTTTGATAGTATTGTCAAGTTGCTGTTTTTATATCATTCCTGATAGCAGG +GCCTGGTGCCTGCACCCTCTACCCCCAACCCAGGAAGACACCAGCTCTAACTCACCACCA +CAAAAGGGAAAAACCTTATCACATAAGGTACAGAAAATCTCCTTATATGTTAAAGAGCCA +TTTCCATTTTCTTTTCTGAACAGTTCAGATTCTTAGTCAGTTTTCTGTTAGACTGTCTTA +TCAATTTGAAGAAATCTGCCCTTTCTTTGATGAGGAGTTTCCCCGATCTTTTGATTTTGT +ATTGCTTTTTTGCCATGCAGAAATTTGTCTTTTTTTTTTTTTTTTTTAAACAAGTCTAGT +CAACTGCAGTAGTGAGAAGGTGAAAAAGTAGAACCTGGAGTTGGATCTGTGACTGTGAAC +AATCAACTGAGGTAACTCACACTACCTTCGGACCAACCTATCTTCGTTTTCAATGTTGTC +AAATTTACCATTTACTTGCCTCTGAGTTTTGTGTCAAACTTAGAAAGGCTTTTTCTTTCT +GAGATTATTCAGAAAACATTTCCGATATTTTCCTCTAGTGTTTTTATTATTGAATCTTTC +ACATTTAAAACTGTTGATCCACCTGTAATTTATTTTGGTATAAGGTATGAAGCAAGGGTC +CAACTTTGTTTTTCTAGATGGCAACTCTGTCGTCTTCCCTCCTTCCTCTAATTTTTTTTT +TTTTTTAAGACGAAGTCTCACTCTGTTGCCCAAGCTGGAGTGCAGCAGCGCGATCTCGGC +TCACCACAACCTCCACTTCCCGGGTTCAGGAGATTCTCCTGCCTCCCAGCCTCCCGAGTG +GCTGGGATTACAGGCATGTACCACCACACCCGGCTAATTTTGTATTTTCAGTAGAGATGG +GGTTATCACTATGTTGGCCAGGCTGGTCTTGAACTCCTGACCTTGTAATCCGCCTGCCTT +GGGCTCCCAAAGTGCTGGGATTACAGGCGTGAGTCACCGCACCCAGCCCCTTCCTCTAAT +TTTTGAGGCTATCTTTATCACATAGGACAGGAATAGTCTCCTTTCGTTACTTTTGTTTTC +GACAATTTCCTGGCCACTGCTAGCTTTTTACATTTGAATTTTAGAATCAAGTTTCTGTAA +TTGCCCCAAATAAACCCTCTTGGTATTTTAGGGGGTTATGTTAAACATCTAGGTTAATGT +AGAGATAAATAGCATTTTTATAATGGACACAGTGGATCTTTTTAACTGTCCAGCCATCCT +CTGGGTCCATTTGGAAGTCTTATAAGTCATCTTCCTACAGATCTGGCACATATTTGTTAC +ATTTATTCCCAGACATTTCACTCCATGCATGGCCAAGCCCTCGGGGCACTTTCCCATGCT +CTTTACAGTGGTCTGTCTGGCTCTCCTGGACTTTCCAGGACTGTGCTCGTGCATGTACTT +CTGTCTGATTCTTCTGCTGGCACCTCTGGGACTATATTGGCAGCAGTGGAGATGGCAACA +TTTCAAGCACTGCCCGCCCTGAGCAGGGTGTGGGTGCTTGGATTGACACAGATTTTTTTT +TTTTTTTTTTTTTTTTTTTGAGACGAAGTCTTGCTGTTGCCCAGGCTGGAGTGCAGTGGC +GCGATCTCGGCTCACTGCAGGTTCCGCCCCCCGGGGTTCACACCATTCTCCTGCCTCAGC +CTCCCGAGTAGCTGGGACTACAGGTGCCCGCCACGTCGCCCGGCTAATTTTTTGTATTTT +TAGTAGAGACGGGGTTTCACCATGTTAGCCAGGATGGTCTCGATCTCCTGACCTCATGAT +CCGCCCGCCTCGGCCTCCCAAAGTGCTGGTATTACAGGCGTGAGCCACCACGCCCGGCCT +TTTTTTTTTTTTGTGACAGAGTCTCGCTCTTGTTGCCCAGGCTGGAGTGCAATGGCGCAA +TCTCGGCTCACTGCAACCTCCGCCTACCACGTTCAAGCGATTCTCCTGCCTCAGCCTCCC +AAGTAGCTGGAATTATAGGCATGTGCCACCACGCCTGGCTATTTTGTATTTTTAGTAGAG +ACAGGGTTTCACCATGTTGGTCAGACTGGTCTCGAACTCCTGACCTGATGTGATCCACCC +ACCTCGGCCTCCCATAGTGCTGGATTACAGGTGTGAGCCACCACGTCTGGCCGGACACAA +ACTTTTACCATTTTAAGAAAAAAAAAAATCTTATAATCCTTTCGGGGGCCAAGGCAGGAA +GACTGCTTGAGGCCAGGACTTTAAGATTAGACTGGGCAGCCAGGCACCATGGCTCACACC +TGTAATCCCAGCACTTTGGGAGGCCGAGGTGGGCAGATCACTTGAGGCCAAGAGTTCGAG +ACCAGTCTGGCCAACATGGTGAAACCTCATCTCTACTAAAAGTACAAAAATTAGCTGCAT +GGTGGTGCACACCTGTAATCTCAGCTACTTGGGTGGTTGAGGCATGAGAATCACTTGAAC +CTGGGAGGCGGAGGTTGCAGTGAGCTGATATCGCACCACTACTCTGTGACAGAGCAACAC +TGTGCCTCAAAAGAAAAAGAAAAAGAAGGGAAAAAAAAAGACCAGATGGGGCAACACAGA +AAGACCCCATCTCTTAAAAAAAATTAGCTGGGCATGGTGGCACGCATCCATAATTCCATC +AATTTGAAATGCTGAGGTGGGAGTGTTGCTTGAGCCCAGTATTTTGAGGCTGCAGTGAGC +TATGATGACACCATTGCCCAGCCTGGGCAAAAAAGTGAGACTCTGTCTCTTAAAAAACAA +AACAACACAAAAAAGTTCACTGCATGGATCATGAATGGAGGGAGGAGAAAGAAGATATGA +GACATTGTGGGTGCAATTAGGAAAATCTGAACGTGGACTCAATGTCAGATGAAATTATAG +AATGATTACACATTTCTTTTTCATTTTTATGTTTTTAGAGACAGGGTTGCTACGTTGCTC +AAGTTGGACTTAAACTCCTGTGCTCAAGTGATCCTCCCACCTCAGCCTCCCAAATAGCTG +GGACTAAAAGCACTCGCCATGGAACATGGCTCAATCATAAATTTTCTTATATCATGATAG +TATTACAGGTATGTCAAAGAATCTCCTCATCCTTAGAGAGGCGAACTGAAGTTGTTAGAG +ATAAATTGTCATGATATCTATAACTTACTCTGAAGTGAATATGCAAAATGTTAACTGATG +AATTTAAGCCAAGAGCACAGTTGTTCACTATACTTAACTTTCAACTTTTCTGTAGGATTT +AAATATTTCAAAATAAAAATCAGAGAGAAAAAAGAGCCCAAAAGAAAAATAAAATATATA +GTCACTCCTTTTTATTGTTTTTCTTTTTCACCAAAAATTTTACCAAATTTAATTTTTTTT +TTTTTTTCTGAGGCAGAGTTTCACTCTTGTTGCCCAGGCTGGAGTGCAATGGCGCAATCT +CGGCTCACCACAACCTCCAGCTCCCAGGTTCAAGCAATTATCCTACCTCAGTCTCCCGAG +TAGCTGGGATTACAGGCATGCGCCACCACACCCGGCTAATTTTGTATTTTTAGAAGAGAT +GGGGTTTCTCCATGTTGGCCAGGCTGGTCTTGAACTCCCGACCTCAGGTGATCCACCCTC +TTCAGCCTCCCAAAGTGCTGGGATTGAGCCACCAAGCCCGCCCCAAACTTAATTCTTTTT +TTTTTTTTTTTTTTTTTTTGAGATGGAGTCTTGCTTTGTCACCCAGGCTGGAGTGCAGTG +GCATGATCTCGGTTCACTGCAAGCTCCGCCTCCCAGGTTCACGCCTTTCTCCTGTCTCAG +CCTCCCGAGTAGCTGGGACTATAGGCGCCCGCCACCACGCCTAGCTAATTTTTTGTATTT +TTAGTAGAGACGGGGTTTCACAGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGA +TCCTCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCATGCCCAGCC +TCCCAAACTTAATTCTTTAAACATTCATTTTTGGGGCTGGGCGCGGTGGCTCATGCCTGT +ATCCCAGCACTTTGGGAGGCCGAGGTGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCA +ACTTGGCCAACATGGTGAAACCCCGTCTCTAGTAAAAATACAAAAATTAGCTGGGTGTGG +TGGCAGGCGCCTGTAATTCCAGCTACTTGGGAGGCTGAGGAACAAGAATTGCTTGAACCT +GGGAGGTGGAGGTTGCAGTGAGCCGAGATGATGCCACTGCACTCCAGCCTGGGCGACAGA +GTGAGATTCTGTCTCAAAAAAAAAAAATTCATTTTTGGCTCACGGCAGCCTCAACCTCCT +GGGCTCAAGTGATGCTTCTGCCTCAACCTCCCAAGTAGTTGGGACTATAGGCATGCACCA +TCATGCCTGGCCAATTTTTAAAAAAATATATTAAAAAAAAATTATTACTACTTTTCTGTA +AGTCAGGAATTATGTCAAAATAAAGACTTAAAAGTTTTAAAAAGTTGTTCACACCTCATA +TCCATTAGGATGACCACTATAAAAAAACAACAAAACAAACAAACAAAAAGTAGGCCGGAT +ATGGTGGTTCACATCTATAATCTCATGTCCATAATCCCAGCACTTTGGGAGGCTGAGGTG +GGAGGATTGCTTGAGCCCAGGAGTTTGAGACCAGCCTGGGAAACATAGTAAGATTCTGTC +TCTACAAATAATTTTTTTAAAAAACTGGCCAGGCATGGTGGTGCATGCCTATAGTCCCAG +CTACTTGGGAGGCTGAGGCAGAAGCATTGCTTGAGCCCAGGAGGTGGAGCCATGATGGTG +CCCCTGAACTCCAGCCTAGGTGACAGAGCAAAATCCTGTCTCAAAAAACAAAACAAACAA +AAAACCAGAAAATAACAAGCTTTAGCAAGGATGTGGAGAAACTATAACCCTTGTGCACTG +TTGGTGGGAATGAAAAGTGGTGCAACTGCTATGTTCCTCAGAAAATTGTCAATAGAATGA +TCTAGCAATTCCACTTCTGGGAATATAGCCAAAGAACTGAAAGCAGAGTCTCAGACATTT +GCACACCCATGTTCATAGCAGCATTATACACAATAGACAAGAGATGGAAGCAAACCAAGT +ATCCCTTGATAGGTGAATAGACAAACAAAATGTGGTATATCCATAAAATGGCACACTATT +CAGCCTTCAAAATGAAGGAAATCAGGTCACATGTTATAACACACATGAACCTTAAGGACA +CGATGCCAAATGAAATAAGCAAGTGATAAAGGGCTAATGCTGTATGATTCTACTTATGAG +GTACTTAGAATAGTTTAATTCATAGAGACAGAAAGTAGAATGGTAGCTGCCAGGGCCTGG +GGGTAAAGGAGAAAAGGGAGTTGTGGTTTAGTGGGTATAGAGTTTCAGATTTGTAAGATA +AAAAAGTTCTGGAGATCTGTTTCGTAATAATGTGAATAATCGTAATATTACTGAACTAGA +TGGTAAGTGTTATTTGTGTTTATTATTTTAGAACCACAATTTTAAAAAATTGTTCACCAA +GTACCCAGTAGCAGGGGAATGACCAAATAAATAGAGGACATCGACAGCACGGAACACTCC +ACAGCACAGAGAGTGAGGGCCATCTCTACTGCTGGTGCCTGGGACCTCCAGGATGTCACA +GGTACAGAATGGCAAACCAGTATGCTGCCTCTGGTGTGAGATGGGGGAGAAAAATATTTC +TTGGGTCAGCTTGTTCAGGTCCTCGATCCCTTGTCCAAACTCTTTAAATCAAGTGTATTT +TAGAATTCAGAAGGTTCCAACGTGAGAAATACTGTACACATGCCACATAGGATCTTATAT +CCCACGGGATCTGGGTCGGTGCCTCCTCTTCAAACACCTTAATATGCCCCCCAAAAAAGT +ATAAAGACTCACACTAGGATAAAAAATATAATTAGCCTCGAAAATAATTTTGAGTTTTGG +GTGGGTTCCGTGGCTTAATGAGTTTACGCCAAACTTTTTTTTTTTTTTTTTTTTTTGAGA +CGGAGTCTTGCTCTGTCGCCCAGGCTGGAATGCAGTGGCGCGATCTCGGCTCACTGCGAG +CTCCGCCTCCTGGGTTCACGCCATTCTCCTGCCTCAACCTCCCGAGTAGCTGGGACTACA +GGCACCCGCCACCATGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCA +TGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCGCCTGCCTCGGCCTCCCAAA +GTACTGGGATTACACGCGTTGGCCACCGCGCCCGGCCTACGCCAAACATTTCTTTAAGAC +GTTTGGCTGGGCCAAAGTTAGCTGAGTTATTTCAGTTGATTGTTCATAGTCAGTAACAGA +TCAAACTCATTCGTCTCTGTCCCCCCTTCTCACTACTGCACTTAACTAGTCTAAAAAATA +ATAATAGATAAAAATAGGCCGGGCACGGTGGCTCACGCCTATAATCCCAGCACTTTGGGA +GGCCAAGGTGGGTGGATCACCTGTGGTCAGGAGTTCAAGACCAGCCTGGCCGACATGGTG +AAACCCTGTCTCTTTCAAAAAATACAAAAATTAGCCGGGTGTGGTGGCATGCACCTGTAC +TCACAGCTACTCAGGAAGCTGAGGTGGGAGAATCACTTGAACCTGGTAGGCGGAGGTTGC +AGTGAGCAGGGATCATGCCACTGCACTCCCGCCCGGGCGACAGAGAGAGACTCCATCTCA +AAATAAAAAAAAATTAAAAAAAAATTTTTTTTTCTTTTGTAAACGGAGTCTTGCTCTGTC +GCGCAAGCTGGACTGTAGTGGGACCATCTTGGCTTGGCTCGGCTCACTGCAGCTTCAACC +TCCAGGCTCAAGCAATCCTCTCACCTCAGCCTCCCAAGTAGTGGGGACTACAGGTGTGTG +CCACTGTGCCCAGCTAATATTTGTATTTTTTGTAGAGATAGGGTTTTGCCATGTTGCCTA +GGCTAATCTCAAACTCCAGGACTGAACTGATCTGCCCACCCCAGTCTCCCAAGGTGCTAG +GGTTATAGGTGTGAGCCACCCTGCCCAGCCCCAAAAGTTTTTTGGTTTTTGTTTGTTTTT +GAGAGATGGACCCTTGCTATGGTGTCCAGGCTGCAGTGCAGTGACATGATCCCACTACTC +AACAGCATGGGAGTTTTGACCTGGTCCATTTCCATCCTGGGCCGGTTCACACCTCTTTTT +AGGCAACCCTGCTCCAGGAAGGTCACCATATTGATGCCAAATTTAGTGTGGATACCCTGT +TGGCATGGCACACTGCAACCCAGAACTTCTGGGCTCAAGCAATCCTCCGGCCTATTCTCC +CAAGTAGCTGAGACTATAGGTTTGAGCCACCACGCCCTGCTCAAAAAACTTTTTAACTGT +GAGAATGGATGCTGAATGTTATCACATGCCTGCTCACCATGTCTGCAGATATGCTATATA +GACACGTATGCAGATATGTTGGGTTGTCATGAATTGATGGAATGCCATGATTCCCTGGCA +TGTAACTAGTAAGACAGATTCCCTAGCATGAAACCAATTTTTTTTTTTTGAGACAAGGTC +TTGCTGTGTCACCCAGGCTGGAGTGCAGTGGCACACTCACAACTCATGGCTCACTGTAGC +CTTGACCTCCCCAGGTTTAAGTGATCCTCCCACCTCGGCCTCCCCAGAAGCTGAGACTAC +AGGCATGCCTGGCTAATTTATGTTTTGTTTTGTTTGTTTGTTTTTCTTTTTTTTTTTGAG +ACGGAGTCTCACTCTGTCGCCCAGGCTGGTGTGCAGTGGTGCGATCTCGGCTCACTGCAA +CCTCCGCCTCCCAGGTTCAAGCGGTACTTCTGCCTCAGCCTCCCAAGTAGCTGGGATTAC +AGGCATGCACCACAACACCTGGCTAATTTTTGTATTTTTTTTTGACCTCTGAACTTTTTA +TTGGCCTCCTGCTCCCCAAAGGATACCCTGCTTCTGGTGGCTTAATGTCTCAGAACTTTG +GTGTCATTGGTCTCAGACACCACTTTGCCATCCATTATCTGGCAGGTGGTGGTCTTTTGG +ATGGTTTGCATGGAGTTGCTGCTGTCCAGGGCATCACCAAGACTGAAGTCCTCGCCATCT +TCCAGCAGGCGGCAGTAGGTGGTGATCTTAGCCTCCAGCTTGACCTTAACGTTCACCAGG +GCGTCCTACTCCTAGGCCTGGCACTGTCCCTCTGCCCGGATTTGTGCCACCTCTGACTCC +AGGTGCAGCAGGATCCCATTGAGCTGCTCCATCTACAGGGCATAGAAGGCCTCCACCTCC +CTGAGGCTGTTCTACAAGCTGGCCTTCAGATTTCTCAAGGAGTCCAGGTTGATCTCCAAG +GACTGGACGTACGTCTCAACTCCGTGATTGTCATCTCAGCAGTTCCAACCTCAGTGGACT +GCATGGTGACCACTGTGGTGCTCTCCTCAATCTGCTGAGACCAGTACTTGTCCAGCTCCT +GTCGTTTCTTCCGAGCCAGCTCATCATATTGAGCCTGGATGTCTGCCACGATCTTGGCAA +GGTCCTGAGATTTGGGGGCATCTACCTCCACAGTCAACCCAGAGCTGGCAATCTGGTCTT +GTAGGCCTTTTACTTCCTCTTCATGGTTCTTCTTCATGAAGAGCAGCTCCTCCTTGAGAG +CTTCGATCTCTGTCTCCAGCTACAGCTGAGGGACATTGGTGTCATCAATCACTTTGCGAA +GCCCATGTATGTCGCTCTCCACAGACTAGTGCATGGGCAGTTCTGTCTCATACTTGACTC +TAAAGTCATCAGCAGCAAGACGGGCATTGTCGATCTACAGAAAGATACGGGCGTTGTCCA +CAGTATTTGCAAAGATCTGAGCCCTCAGGTCCTCGATGGTCTTGAAGTAATGCCTCCAGT +CTCTGATCTGGGTCCCTTCTTCTCCAAGTGCTTCTGGATTTTGCTCTCCAGCTTCCGGTT +CTCGGTCTCCAGGCTCCTCGCTCTGTCCAGGTAGGAGGCCAGGCGGTCATTCAGGCTTTG +TATGGTCTCCTTCTCATTCTGGATGCTTCCCATTCCTGCCAGATCCCCAGCCATCCCCGC +GGCCAGGCCCCCAGACCCCATGCAGCCCTGGAAGCTGGTGGAGCGGGACACGAAGATCCA +GGAACCAGAGCCCTCGGTGCCTGCATAGACGCTGGCCACGCTGCTGACCAGCCAGGCGCC +ATAGCTGGGAGCCTGGACAGAGCCCAGGGACCAGTAGTTGGTGGAGAAGGTAGAGCGAGT +GGTGAAGCTCATGCTGTCCGGGGAGGAGAGCAAGAGGACAGGACTCAGGCTTTGCTGATG +ACCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCGCCATGTTGGCCAGGCTGGTCTCC +AACTCTGGACCTCAGGTGATCCACCCGCCTCAGCCTCCCAAAGTGCTAGGATTACAGGCA +TGAGCCACCGTGCCCAGCTTTGTTTGTTTTTTGCTTTTTGAGGCAGAATCTCACTCTCTC +ATCCAGTCTGGAGTGCAGTGACGCGATCTCAGCTCACTACAACCTCCACCTCCCAGGCTC +AAGCGATTCTTGAGCCTCACCCTCCTGAGTAGCTAGAATTACAGGTGTGCACCACTACGC +CCAGCTAATTTTTGTATTTTTAGTAGAGATGGGTTTTTGCCATGTTGGCCACACTGGTCT +TGAACTCCTGGCCTCAAGTGATCTGCCGGCCTTGGGCACCCATTGTGCTGGGATTACATG +CATGAGCCACTGTGACCAGCTCTAATTTTTGTATTTTTTGTAAACATGGGGTTTCGTCAC +GTTGCCCAGGCTGGTCTCGAACTCTTGGGCTCAAACAATCTGCCCACCTTGGCCTTGCAA +AACGCTGGGATTATAGGTGTGAGCCACTGTGCTCAGCCTTTTTTTTTTTTTTTTTACGAG +AAAAGAGGTCTCTTACTTTGTCATCCAAGCGAGAGTGCAGTGGCATGATCATGGCTCACT +GCAGCCTCAACCTCTCAAGCTCAAGCTATCCTCCCTCCTCAGCCTCCCAAGTAGCTGGGA +CTATAGGCCACGCCACCATGCCCAGCTAATTAAAAAAAATTTTTTTTTCTTTTTCTGAAA +CTGCTCTGAAAAAATTTAAATGGCATTGGAGTCTTTTCATAAAAGGCACACCTACCCTGG +AAAACATCTGGGCTTTGTGTTTTGTGGGGAATACAACTTTCTCTCTCTTTTGGGGATAAA +GTGCTATTAATTTTTTAAAAATAAAATCATCAATTGTATCCAAACTTTTAAAGTAACAGA +CATTGAGTTCAGCTAAGCAGTCTTTTATAATTCACAGATGGAATGAAAACTAAGGGAGGG +ACGAGGTGACCCATGGAGGTTGTATAAAAAGGACAGAGCCTTTGGGCTCTTCAGACTATA +AGGGTCCAGGAGGAGAGGAGAAACCACCTCAGAAGCATACAAGGTTAAAGCATACAAGGT +GGGTGTAGGGTCAGGGGAGCCAAAGTAAGTGTGCAATGAAAAGTTATTAACAAAAGTCAA +AAAAAGAAAATGCAGTCTGGCCATCCAATTCCTAAACCATGTACAGAGTACACAGAAAGA +GCACAGGAGGCTTCCTGCAGCAGGGCAGTACAGCTCTCTGCCCTTCAGGAAAACTGAGAT +TCCTCCACCTGGGAAACAGCAAGGGACAATTAGGGAAAGCTCTCCAGAGGAGGAGGCATC +TCAGGTGGGCCAAAGAAGAAAAATTTCCACAGGACAGAAAAGGCAGAGAAGAGGCCAGGC +ATGGTGGCTCACGTCTGTAATCCGAACACTTTGGGAGGCCAAGGCAGGCGGATCACCTGA +GGTCAGGAGTTGGAGACCAGCCTGGCCAACATGGCAAAACCCCGCCTCTACTAAAAATAC +AAAAATTAGCCGGGTTTGGTGGCACACGTCTGTAATCCCAGCTACTCAGGAGGCTGAGGC +AGGAGAATCACTTGAACCCGGGAGGCAGAGGTTGCAGTGAGCCGAGATTGCCCCACTGCA +CTTCAGCCTGGGGGACAGGGCGAGACTCCATCTCAAAACAAACAAACAAAAGGCTGGGCG +TGGTGTGGCTCACACCTGTCATCCCAGCACTTTGGGAGGCCAAGGAGGGCAAATCACAAG +GTCAGGAGTTCGAGATCAGCCTGGCCAACATGGTGAAACCCTGTCTCTACTAAAAATACA +AAAAATTAGCCGGGCGTGGTGGCGCACACCTGTAGTCCCAGCTACTCGAGAGGCTGAGGC +AGGAGAATTGCTTGAACCCGGAGGCAGAGGTTGCAGTGAGCCGAGATCACACCACTGCAC +TCCAGCCTGGGTGACAGAGTGAGATTCCGTCTCAAAAAAAAAAAAAAAAAAAGAAATTTA +TGTTCTTTGTAAAATAATTCAGACAATGCAGTAAACAGAAGAAATTCAGCCTCCCTCTCC +CTTTGCCCTTTGCTGAGTCCCCACGGTGCAGCAAGATCCTTCCAGATCTTCATGGTTGGG +ATCATCTTCCACCAGGTCCTGAGGTTTTTCACTCCCTATCAGTAGACACACCACACTGCC +CAGTTTGCAAGTAGCTCCATTGGATAACACAGAAAGGCTCTGTCCTGCACCATGTCTCAG +CACTGGGTGCATCGTCCCACAGGAATGACACCTAGACACCCAACATACAACTGCAGGGCC +CTAAACTATGGGCCAGCCCACCCCTAAATGCCCTTCTCTCTACCCTTGCCCATATCAAAT +ATTACAAGCCTCTCACACATATTTACCAGTCATACTGGGAAAAAAATAGTCCTTTATCAT +TTTAACATTTCACAAGTAACTAAAACAAATAATAAACATGCTAACACTTGGGAATGGTCT +ATTTCTTTTCTTTTTTTTTTTTTTTGAGACGGAGTTTCATTCTTGTTGCCCAGGCTGGAA +TGCAGTGGCGCAATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCAATTCTCCT +GCTTCAGCCTCCTGAGTAGCTGGGATTACAGGCATGTGCTACCACGCCCAGCTAATTTTG +TATTTTTAGTAGAGACAGGGTATTGCCATGTGGGTCAGGCTGGTCTCCAACTCCTGACCT +CAAGTTATCCGCCCTCCTTGGCCTCCCGAAGTGCTGGAATTATAGGAGTGAGCCACCAAG +CCTGGCCTCTTTCTAGGCTCACTGCAACCTCCGCCTCCTGGGTTCAAGTGATTCTCCTGC +CTCAGCCTCCCAAGTAACTGGGATTACAGGCATGTGCCACCACGCCCCGCTAGTTTTTGT +ATTTTTAGCAGAGACGGAGTTTCACTATGTTGGCCAGGCTGGTCTCGAACTACTAATCTC +AGGTGATCCACCCGCCTCGGCCTCCCAAAGTTCTAGGATTACAGGCGTGAGCCACTACAT +CCAGCCTCTTTTTTGTTTTTTTGAAACAGGGTCTTGCTCTGTCACCCAGGCTGGAATGCA +GTGGCACAATCTCGGCTCACTGCAACCTCCGCATCCTGTGTTCAAGCAACTCTCCTGCCT +CAGCCTCCCAAGTAGCTGGGATTACAGGCGCCCACCACTACACCCAGCTAATTTTTTTTT +TTTTTTTTAGTAGAGACAGGGTTTCACCATGTTGGCCAGGCTAGTCTCAAACTCCTGGCC +TCAAGTGATCCACCTGCCTTGGCCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCACCGT +GACTGGCCTCTATTTCTTATTTATACATACACACACACACCCCCCTGTGAAATGACTCTG +GGCTCTTCCAATAATGTTGCCACTCTCCCGCTCCCCTTCCCAATGGAAGTGGAGGGATGC +AGGGGGTGTTAGTGATGCATTGTAATCATCCCGGAGAGCAGGACACCTCTCATAAGAGGT +TTACCCAAGGGGGGCTGTGGAAAGGGCTAGAAATTGCTCCTGGAGGACGTGGAAAGCCAC +ATAAGGTTTTTGACAGATTAGCGAAAGGCTGCCTACACTGTCATCTTCAGAAAGAGTGGT +ATGACACTAAATCAAGAAGGGCTGAGAGGGTGAGCCCAGGGGATCCCATAGGCTCCACAG +CCTCTGGCCATCCTCTCCTATACAGGTCCTAAGTCCCCTGCAGGGGGCCCTCAGCTCCTC +TACCCTACCCACTGTCCTGTATGTTATCTTAATGCTTATTAAAGGCCTCAGCTGAATTTC +CCTGGTGAACTGAAAGAATTCTGCTGCTCCTGGACACATCTGCACCAAGCCACCACTGGG +TCACATTCTCCAAAACTCCTTTTGGAGACAGAAGGCACTTAAACCATACATATTTTGCTT +GATAGTTCCTAATTAAAAAAATAGAGAGAGATAAAGAGAAAGAGAGAGAGAGAGAACTAC +AACTAAGCCTTGAAGACAAAATGATTGAACAGAATCAGAGCTAGAATGGAACACCTCAGG +CTTGGAGTCCCATGACAACCTTGCTGGGGACCACGAGTGGCCCTCAGTCCTTCTCAGGCT +CATGCCACCTATGTGAAATAAGGGTGGTGACACCCACCTCCTTGAGTGCAGAGCAGTGCA +GACAACAGGGCAAATGGTGTATTGATCTGCTATCTGTGTACATAAATTTATTCATTTAAG +CCATGGGTATTGTTTGAATATGTGAAAACCATGTCAGGTGCTAGGGATAAGAGACAGATT +CCAACCCAGTGGAAAAGACAGATAAGCCAGTGGTTCCTCACACTGTGCCAAGTGCTGGGA +TGGGGTCACGAAGGAGGCCATAAGAAACCCCAGAGGGGCCACAGAGCTCACTGGGAGGGC +CAGAGGCTAACACCTGAGGGATGAGAGGTGTGAGCCAGAAGGTGATGCATACACAGCAAT +AATTACTGACGGAGGGGAGGAATGGTAAGGGAGGGGAGGAGAGAAAGGAGGAAAGAAAAG +AGAAAAGAAAAGAGAGAGAAAAGGAAAAGAAAAGGAAGAGAAGAGAGGGAAGAGAAGAGG +AGGCAAAGCCCAAGAAACAGGATCACTGAATGAAAAATGAACTCTAAGACCCCCTTCTGT +CTCTCACACTTGTTGACTAAGTCATAGCCACAAGGAGTAGGGGAGGCTCTGAGGTGAGCA +GGAGACCAGCCACTGTCACCGCCAGTACCTTTACTAGCAGAGCCAGGATTACCTTTGTTA +TTCTTCTTACAAACAGCTCTAGTGAACTCACCTGAAGTAATCGCAGGACGCAGCCAGCAG +GATGCGATGGGCCTCGATGTGTCTGCCCTCCACCACCAGCACAACATCGAAGAGGATTCC +GCTGTCCCGGAGAGCCAGCAGCCCTCGGAGCAGAGCCTGGGAGTGCTGTGCGCTGCGGTA +GGTGTTGTTCACGCAGTGTGGGTGTGAGGGCTGTGCAGGCAACTTGCAGAGCTGGGTGAA +CTCCTGCTCCTCTGCCATCCTGACAGCCACAAGATCCCTTACAACTGTGGCCTGACAGTT +GGCAAAACAGGGGACAGGGGTGAGCAGGCAGGCATCAGCCCCACCACACACAGACTGGCC +AGCTCTGTTGCTCCCCTTGCTCCTGTGCCTCCCTCAGGCTCACCTGTTTTCCCTCTATTT +CCAAATCCCCTAATTCCTTCTCCCCTCCCACCTCTAGTCCAACTTCCTGATGGCCTTCTT +CCTGATGACCTGTGTGCCATCATGCCCAAGACTCTTCATCTCATTGTATCTTCTTCAGCG +GCAAAGTCAAACAGAAGGGTTTTGGGAAACAGACTGACCTGGGTTGGAGCCCAACTCCAC +AGGGACTTACAACATGACCAAACTGAGCCTCGAGCTCCCCAATTTTAAAAAGAGAGATAA +AAGTACCTACCCCATTGGGGTATTCTAAAATTGGTAAAACAATGAACATGAAGCATAAGC +ACACACCAGGTTCTCAAGAGCTCTTCCTCCCTCCCATTGAATGAACAAAATTGGGTTCTT +CCTCATAGTAATGGATGCATAAAAATGGCAAGACCTGAGTAGTGGTCAGGAGACACACAG +CCAGGAGTCTGGGAGGTGGGTGCCAAGGGGGTGGCATTTCCAGGGCCTACTTTCCTTTAC +TGAAATTCTGAGAGTCAAGCACAGTAACTGTGATTGAACCTGATGCATTTAAATTCATCT +GCATTGCCTTTTAAATTAAAGATTATCTTCTAATAATTGAGTCAGTACGTTCAAAAACCG +CAAACTGATTTGAATTGGAGCCTCTAAAACCTAATCAGTAAAATGAACCTACTGCAAATG +CACTTTTGCCATGGAGCAACCAAAAGTGACTGTGGATAGCCAGCTGTTCAGGTCAGGGAG +TGGCCAGTCCCACACTGTGAGGCATGGCCAAGCTGCAGGCTCTGTAGGCACAGCACAGTC +AGCTACAAGATGTGAACATCTGTCTTCTCCACCTCACTTCATCCTACTTTCCTCTCCCCA +AAATATGTTTATTCTCCAATTACCTCCAGTCATGCATCACTTAATGTTGGGGATACATTC +TGAGACTTATATCGCTGGAGGCAATTTCCTCAGTGTGTGAACATCAAAGGGTACACCTAC +ACAAACCTAGGTGGTAGAGACTACTACACATCTAGGCCATAAGGTACAGCCAATTGCTCC +TAAGCTACATACTTGTACAGCATGTTACTGTAGTTCTGAATGAAAAAAACACACTGTTTT +TCCTCTCCTCTTATACCACAACAACAAACACAGGAGACTTCTGTGACCAAATGTGTGAGG +GTTTTTCCCCATTAACAAGCAAGCAATCAATTCTGCCACAGATACCAGCTGGGTGTCTTC +TAATTCAGCAGTCCCCAATCTTTTTGGCACTAGGGACCACTTTCTTGGAAGACAATTTTT +CCACGGGGTCGGGGAATGGTTTCGGGATCAAACTGTTCCACCTCAGATTATCAGGCATTA +GCTAGATTCTCTTTCGGAGCACACAATCTGGACCCCTCACATGCATGGTTCACAACAGGG +TTTGTGCTCCTATGAGAATCTGATGCCACCTCTGATCTGACAGGAGGTGGAGCTCAGGTG +GTAATGCTGGATCACCCTCCACTCACCTCCTGTTGTGCCACCTGGTTCCTAACCAGCCAC +AGACTGATACAGGTCTGAGGCCTGGGGGTTGCGGACCCCTGCTCTAATTCAACTCTAACA +CTACCTGGAGACAGTGTCTGATCCCACAGGTTGAGGGCTGAGTCTCCCAGGTTGTTTCAC +CTGTGCTTCTGACCAATAGGCTATAAATTGGGTTCCCAAGATCCCCTTCTTGGGTTCAAC +TAATTTGTTAGAGCGACTCACAGAACTCAGGAAAATACTCATGTCTACTGGTTTGTTATA +AAGGGTATTGCAAAGGATACAGATGAAGAGATGCATGGGGCCAGACATGTGGGAAGTGGT +GCAGAGCTTCCAGGCCCTCTCCAGGAACATCACTCTTTAGGAGCCTCCATGTGTTCAACT +ATCCAGAAGTTTTCCAAACCCAGTCATTCAGAATTTTTATGGAAGCTTCATTATGTAGGC +ATGACTGATTAAATCACTGGCCATTGGTGATGAACTTAATCTTCAGCCTGTCTCCCCTCT +CCGGAGGTTGAGAGGTGAGGCTGAAAGTCCCAACCCTCTTATGCCTGCCTTGGCCTTTCC +TGTCACCAGCCCCCATCCTGAAGCTACCTAGGGGCTGCTGGCCAGCATACAAAAAGGCAT +CACTCTGGAAAGTCCAAAGATTTTAGAAGTTGTATGCCAAGAAATGAGACAAGACCAAAT +ATATATTTCACAATATCACAGTACTGAATACTATAGGCAACTGTAACACAATGGAATTTG +TGTATCTAAACACAGACAAGGTACAGTAAAAATATGGGACCACCATCCTTCATGTGGTCT +GGTCTGTCATTGACGGAAACATCACTGTGTGGCACATGATTATATATCTTTAAAAGAAAA +AATGAAAAATATATGCTGATATAAAACTTGCCAAAAAGAAGAGGTCTCTCTCTTCACTAC +TGAGGCTCAGGGCCACCCCCAACCCCATCCCCTGGATGCTCTACTGCTTCCCATCAAACA +CACTCAGTTATCTGTGCCTGCCCCTGAGTCTGCCTCCAGGACCTGGGACCTCTATCCAGC +CACCCACCCATTCCTTTCCCCCTCCTGGGCTGGCTCATCTTTCTCACCAAGGCCCCAGAG +ATGGCCACAGCCAAGGTGTATACCCCATGACCACCTCTCACTTAGGATTCTCTTGCCTGG +AAACTCTGCTCTATGATAATTCTCCCTTTTCTGACCTCTCTCTTTCATCCCACACACCCT +GCTCGAGCCCATCCTTCTTTCCCAGTGGTTCACATTCCCAGTTTCACCTACAGCTCAGTG +CCTAATCTAGCTAGTAGCCCTCCAACTTGTCTAATTTTACTATCAAAGCTACCATTTCCA +TCTTGGAAACCTCACTGTTTTAATATTTTTCCTTTCTAACCCATGTGCCCTATGAACCTA +CTAGTTCTGATTCCTTCACCTCTCCTCCATCTCAGACAGCCACCCTCCACTCCTCCAGGG +CCACCACTCTGGGTCAGACCACAGCAGTGATGCCCCTACCAGTTTCCCTACCTCTTGTCT +CATCCTCCCCAGGCTGCTCTCCTTCAAGCCTTACCTACTCAGCCCCATCAGCAGTCCTTA +GTCCAAAAGGGATACCTGTCATCCCCTGAACACCTTTAGTCACCCATCTAGGCCATTCTG +TCTCTCATAGTCCCATATTGGTCAAGGTCTAGCTCGATTACTTCTCCCTCTATCATTGTA +CAGAGAACATGACAAATATGCCCCACCATTCAAACCTGCCCTTTTTTTTTTTTTTAGACG +GAGTCTCGCTCTGTCACCCAGGCTAGAGTGCAGTGGTGTGATCTTGGCTCACTGCAACCT +CTGCCTCTCAGGTTCAAGTGATTCTCCTGCCTCAGCCTCCCCAGTAGCTGGGATTACAGG +CGTGCACCACCACACCCAGCTAATTTTCTTGCATTTTTAGCAGAAATGGGGTTTCACCAT +GTTGGTCAGGCTGGTCTCGCATTCCTGACCTCAGGTGATCCATCTGCCTTGACCTCCCAA +AGTGCTGGGATTACAGGCATGAGCCACCGCGCCCGGCCCAAACCTGCCTATTTACCCACG +ACAAGGAGATTCCGGCTTCTTGAGGGGTGAGGTTTTATCTCACAAACATCTCAAAAGCCC +CCATGGCTGGCTGTACCACTTGCTAATGACAGACTCTGGGCACTTGTCTCATATACCCAC +GCCTCAGCTTTCTCATCACCACCATGACCACCGCAGAGTCAAGAGGGCCTCCTTCTCTGA +GGGTGACCTCCACAACTACCCCAATAGACATCAAGCCCCCAGGTGGATCCTCACCTTCCG +TGAGCATAGTAGTGCCTCTGCTCTCTTGGGAAGGGATAAATGGCAGCTACTCCCTAAACC +CATACTGCAAAAACAAGTACGGTAAAACAGAAAGACATCTTCCATGGGCAGGCAAAGCAC +ATTTACCAAGTCCAATTTAAGAAACTTAGCTAGGGGCTAAGGGGGATATGCTGAAAAATG +AATCAATTCTCTGCTGGGAAGGAGCACAGCACAGGGTGCAACATTTAAAACATTTAAAAC +CACATCTATACGGTTTTAAATCACCAAAACGTCCACCCTCAGGAGAACAGATGTCATGGC +ACAGTGATCTGACAGAACATTAGGCAACAGTCAAAATGCGCAAAGTGGCCGGGCGCAGTG +GCTCACGCCTGTAATCCCAACACTTCGGGAGGCTGAGGCGGATGAATCACGAGGTCAGGA +GTTCAAGACCAGCCTGGCCAACATAGTGAAACCCCATCTCTACTAAAAATACAAAAAAAT +TAGCCGGGCATGGTGGTAGGTGCCTATAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGA +ATCACGCCACTGCATTCCAGCCTGCATGACAGTGCGAGACTCCGTCTCAAAAACAAAAAA +ACAAACAAAAAAACAATGTGCAAAGTAGGCTGGGCACAGTGGCTCATGCCTGTAATCCCA +GCACTTTGGGAGGCCAACGTGGGAGGATCACCTGAGGTCAGGAGTTCAAGACCAGCCTGG +CCAACATGAGGAAGCCCCATCTCTACTAAAAATACAAAAATTAGTGGGGCGTGGTGGCGC +ATGCCTGTTATCCCAGCTACTGGGAAGGCTGAGGAAGGGGGAAGCTGAGGCAGGAGAATC +GATTGAACCCAGGAGGTGGAGGTTGCAGTGAGCCAAGATTGTGCCATTGCACTCCAGCCT +GGGCGACAGAGCCAGACTTTGCCCCCCCCCCAAAAAAAAAAGTGCAAAGTAGGCTGGGCA +TGGTGGCTCATGCCTGTAATCCCAGTGCCTTGGGAGGTCCAGGTAGGTGGAGTTCAAGAC +CACCTTGGGCAATATAGCGCGACCCTTTCTCTACAAAAAATTTAAAAATTAACTAGGCAT +GGTGGTGTACACCTGTAGTCCTGGCTATCTGGGGTGAGACAGCTTGGGGTTCACTGCAGT +CTCAAGCTCCCAGGTTCAAGTGATCCTCCCACCTCAGCTTCCTCAGAAGCTGGAACAACA +GGAGCTCACCACTGCGCCCGGCTAATTTTTCAAATTTTCTTTGTAGAGAAGAGGTTTCAC +TATATGGTGTAGGCTAGAGTGCAGTGGTAGGGTCTTGGCTCACTGCAGCCTTCATCTCCT +GGGTTCAAGCGATTCTCCTACCTCAGCCTCCCGAGTAGCTGGGATTACAAGCACGTGTCA +CCACACCCAGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGC +TAGTCTCTCCTGACCTCAGGTGATCTGCCAGCCTTGGCCTCCCAAAGTGCTGGGATTACA +GGCATGAGCCACCGCGCCCGCCCCCAGTCTCATATCCCCAATTTTAGTCATCACAAGCCA +AGTTCCCCGTTTTGTACTGTTCGAAGTTTCAGTGGGGAGAAGAAAGGAAGCTTAATGTGC +CACATAGCTACAAACTGAAGGATACAAGATGAATTTTTATGAAGTGATGGATGGCAGAGT +CAAAATCATTTCAGAGGAACTTTCAGTTCGGTACGATTTTACTGAGTAGATACCCTCAGC +CTTCTGTAGGGCCCCCGAACGAGCAGTATCCCCTCAAAGAAACTCAATCCTTGGCAGCAT +GAAGCAAGGTCGGCCACAAAGGCAAAGGGGGCGGCTGCGGTAGAGGAGAGGGAGTGGGTG +CTGATTCCCAGCGGGGCAGCCTTCCAAAGGGTGCGCTCTGGAAGAATCTCCCAGGGAGCC +GTCTTGGAGGCACTCGGCCCCGCGGAGGGCCGGAGCGAGGGCGCCCAGTGAGGAGCCCAG +GCGGAAGGTGGCGGTGCCCCGCTGCCCGCCCCAGATCCACTCGGCTCGGCCCGCACCGGA +TCTAAAATGGTCAGAACTGGCAGCCGACGCTTCGCGCTCTGGTGATTTTGCACTGACAGC +GCGATCTCCGGCTCCTCTCAGGAAACCGGGAGAGCTGGCAAGGCTGGGCTCCTACGGCTG +CAGTCCCCGGGCCCGATCGAGGGAACTGAGGCTCGTCAGGCCGGCCCCAAATCGCCCGCC +TGTTGCAAGGCCGCCCGCTCCAGCGCGGAGGGTCGCGGCCAGGAAGGCCGCATTCGGACC +TGCCGCAGGCAACAGGGCCCGCCCGGGTGCCAGGAGGCCGGCGCGCCAGCAGCCGCGCTG +AGGAGGCCTCGCACCCTGGCCCCTGGCCGAGCGCCAGATCCCGGCCCCTACGCGCCCGCG +CCCGGGGATCCCGCCGGCCGCGTCCCCGCCACGTCAGGCCGCGGGCTCTCCTCAGGTCGC +CGCCCCCGAGCCTCCGGCCCGCGCCCGCCCCCGCGCCCCTCGGACTCCGCGCCGCTAGCT +TGTCCCCGGCCCAGCGATGCCTGCTTGCCGCCCGACCCCCGCGTTCGCTGCCGCCCGGCC +CGTTCGGCTCACGCCTCGCAGCTGACCTGGTGCCGCCGCCAGCCGAGGCCGCACTCGCAG +CTGGAGGGAGGCGGGAGGCGGGAGGCGGGAGGCGCGAGGCGGGAGGCTGGGCGAGAGGAG +CCGGCGGGGCGGGGGCCTGGGCCGTCACGTGGGTGCGCCGCGCAGGCGCCGGGCGGGCCA +GACGTAGGCCCGGGGGTTCCCGGCCTGGCCGGGTAGCAGCGCGGGGGCGGCCCTGGGGCG +GGGAGACAGGGGGCGGCTCCACTCCGGCGCTCTGCGGAGGACAGCAGCGATCGCCAAGGC +TCAGTGAGTCCCCGCACTAGCGCGGCACCTGGCACTTGGCGGGGCCGAACCAACGAATGA +GCGAATTTGCCGAGAGTCGCCCCGTCTGAATCCATTGACTCTTTCCTTGCCTCGGTCCTA +CCCTCATTCCTCGACTCACTTCCTCCTGCCCTTCCTCACTATCTCCCTCCCCGCTCATAT +CCTCCCTCTCCTTTCCTCATTCCCTTCTTCACTCTCTTCTCTCCTCTCTTACTCCCCCCC +TCTCATTCCCTCCCTCCTTCACTCTCTCATTCCTTCCCTTCCTATCTTACCCACCATTCC +CTTCCTCCCTCACTCATTCACTCCCTCCCTGTCTCCCCCTCTCATCCCCTCCCTCTCTCA +TTCCCTTTCCCTCCCTCCTTAACTCTCGTTCTCTCCTCCCTCTCTCATTCCTTCCCTCCC +TGTCTCATTTCCCCCCTTCTTTCCCTCCCTCCCTCTCCTCTTCTCCCATTGCCTCCCTCC +CTCTTTGATTCCCTTCTTTCCTCCTCTCCTCCACTTGTTCCCTTCGTCACGCGCTCATTC +CCTCCCTCCCGCGGCCTGCTCCTTCCCCGCTCCCTCCGCGGTCGGTGCCGCTGACCGTAG +CGCGCCCCCTGCGGGATATCGGGGAGAATCGCTTAACACCGCCAGAGTGAACAGGTGCTA +GTTTGTGTGCCCGTTCCTTTACTGAGATTTTGCAGTAAATCTAATTCCCAGTCTTAAAGC +CTGGTCCCAGAGGTCCGTGCGCGGGTCTTGGCATTCATTGTTCTCCACCTCACCTTCGAG +CAGGTCCCATCCTCCGAGGCGTATCCGTAATGGCTGCGCCCAGAGCTTGGAGCCACAGAG +CCTCCTGATGGGATAAGGTTGGAGTGTCCCACACTGTGGTTTTGAGAGGGACTGTGAAGG +GTGGAGAGAGTGGTAGGGGGAGGATGTGGCTGGAGATGGCTCATGGTTAAAAAGAAAGGT +GGGGCCGGGCTTGGTGGCTGGCGCCTGTAATCCCAACACTTTGGGAGGCCGAGACGGGCG +GATCACGAGGTCAGGAGATGGAAACCATCCTGGCCAACATGGTGAAACCCCATCTCTACC +AAAACACACACAAAAAAACTAGCCGGTCGTGGTGGCGCGCACCTGTAGTCCCAGCTACTC +GGGAGCCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCAGAGGTTGCAGTGAGCCGAGA +TCACGCGCCACTGCACTCCAGCCTGGTGACAGAGCGAGACTCCGTCTCAAAAAAAAAAAA +AAAAAAAAAAAAAAAAATTGGTGGAAGGGGAATGTAAAAGGAGGAAAATACACGGAAGAG +AATTGCTGTCCTGGCTGAGTCCAGAGAGATAACTGAGGGTCCCAGACAAGGATCAAGAGA +ACGGGATTGGCCTCCAGAGGCAGAGGTTCCAAATGGGAGTGGGCTTCCTCCTAGAAAGAC +TTTCTGGAGGAGACCCCCCTACTGTGTAACAGAGGAGGACTTTGGGATTAAGAAAAGCAT +TCCAGGAAGCCGACAGTGTCAGCAAACGTGGAGGTGAGATCCTTCAAAGTGAGTGGTGTG +GAGGTTTCCAGAATTTTCTGAGCCTGAAGGGAAGGTTGGAGAGCAGACCCTGCCCTTTGG +AGGCTTGACTTAGCCCTGAGGGCACCCTGTAGCCAGGGTGGGCAGATGCCAATATGGTAG +AGACGAAGACTGAGTAGGGAGCCAGCCACAGTGCTGTGGTCTCAGGCAGGGAGTGAAGAC +CAGAGTGGAGCAGGCTAGAAACCTGGGAAGGAAGCAGGTTCCCCAGTATAAGCCCAGTGA +TGTGTGAAGAATGAGCCCAGGAGATGGGTGGGGAAGTAGGCCCACCCTGCCTACAGGGGA +GCCAGAGCAAGAGCAGGTCTGGGGGAAGATGAGGCCCCCCTTGGCTCCAGGTAGGGGAAG +ACTGACCTCCAAGGCCCAATGGGACCAGGTGTACGGGTGGACCCAGAGAGGAGAGGAAAA +GGAATCCTGGGGCAGCAACAAGAAGGCCACTGTGGTCTGAATGTCGGTACCCCCAAAATT +CATATGTTGGAACTGAATCCTCAGTGCGATACTATTATAAAGTAGGGCCTTTGGGAGGGG +TTTAGGTCATGAGGGTGAAGTCCTCACCATGGGATCAGTGTATAAAAGAGGTTCGAAGGA +GCTCATCCCCCACTTTTGCCAGGTGAGGACACTGTAATAAGCACCATCTTCAAGGCAGAG +AATCGGCTGGGCGCAGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAAGCCGAGGTGGG +AGGATCATGAGGTCAGGAGATCAAGACCATCCTGGCTAACATGGTGAAACCCCATCTCTA +CTAAAAATACAAAAAATTAGCCAGGCATGGTGGTGGGCGCCTGTAGTCCCAGCTACTTGG +GAGGCTGAGGCAGGAGAATGGCGTGAACCCGGGAGGCAGAGCTTGCAGTGAGCTGAGATC +GCGCCACTGCACTCCAGCCTGGGTGACAGAGCAAGACTCCGTCTCAAAAAAAAAAAGCAG +AGAACGGACCCCTCACTAGACACTGGGGCTTTTTTCTGAAGGCTACTGTGTCATATAAAA +TTTATATTAAATAAATTGATATGCTTTTCTCTTGTTAATCTGTCCTTTGTTAGAGGGGTG +TCAACCAGGAACCTTGTGATGCGTGAGGAAAAAATATTACCTTTTCCCCCTACTCTTTCT +GGCACCCAACAAACAAGTGGGGTGGCAAAGACACCCCACTTGGGACAACTGGCAAAAGGT +AAGCATTCTTACCAAGGTTAACTGTCCCATATCACTGCCTGTAGTACTCAGCTGAGAGTG +GAAGGTAAAAACTTCTTGTCCCTTCCTTTCCAAATTCAAATTAGCAGAAGAAAACATTGT +GTCTGGATTGTGACTCTTGCTTAAATTTGGTTGAGGGGTAACTGTTTGTTATTGATCCTT +TCCTCCCAGGCACAGCTACCTCTTTCCTGTTTGTTTTATTTGTGTCCTGAGAGCTTGGCT +TTATGACCAGTGAGAATATTCTCCCTGATCTCTGAATAGCCAGTGGGTGCAAGTGACAGC +TTGCTTTAGGACAGCTTGCGTTAGGGTTGCAAGTAACAGCTTGCATTATTACAAACTCCT +CTGTCCCGGTCAGAAAAAGAGAAAGTTTGGTTTTTGTTTGTTTGTTTGTTTTTTGTTTTG +TTTTAATAGTCTTGCTCTTTCGCCCAGGCTGGAGTGCAGTGGCATGGTGGCATGATCTTG +GCTCACTGCAACTGCTGCATCCTGGGCTCAACTGATTCTTCTGCCTCAGCCTCCTGAGTA +GCTGGGACTACAGGCACACACTACCACGTCCAGGTAATTTTTTTTGTATTTTTAGTAAAG +AGGGGGATTCATCATGTTGCGCAGGGTGATTTTTTTTTTTTTTTTTTAATTCTTTTCTGA +GACGGAGTCTTGCTCTGTCACCCAGGCTGGAGTGCAATGGCGGGATCTCGGCTCAGCCTC +CTGAGTAGCTGGGATTACAGGCACCTGCCACTACGCCAGCTAATTTCTGTATTTTTAGTA +GAGACAGGGTTTCACCATGTTAGTCAGGCTGGTCTCGAACTCCTGACCTCGGGTGATCTG +CCCACCTCGGCCTCCCAAAGTGCTGGGATTACAGGCATGAGCCACCGTGGCTGGCCTCTT +TTATTTTTAAGATACAGGGTCTCACCCAAGCTGGTCAGGAACTCCTGGCCTCAGTCCACC +TGTCTCAGCCTCCCAAAGTGCTAGGATTACAGGCATGAGGGACCACGCCCGGCCAAAGAG +AAGGTTCTGAACCTTCTCTTTTTCTGACCATGACAGAGGAGTTTGTAAGTGGGCCAAGTC +TATAAGGGGTTTTTGTTGTCTCAGTATTCATTGGTGTGATAGATGAGCCAGTCCATGACT +GGAGACGTGACAGTGTTCTGTACAAGATGACATCCTTAGTCACCTGTGTCAACAAAGGTG +CCTTCTGGCTAGGCACGGTGGCTCACGCCTATAATCCTAGCACTTTAGGAGGCTGAGCCA +GGCAAATCACGAGGTCAGGAGTTAGAGACCAGCCTGACCAACATGGTGAAACCCTGTCTC +TACTAAAAATACAAAAAAATTAGCCAGGTGTGGTGGCGCATGCCTGTAATCCTAGCTATT +CGGGAGGCTGAGGCAGGAGAATGGGTTGAATCTAGGAGGCAGAGGTTGCAGTGAGCCAAG +ATCGCGCCACTGTACTCTAGCCTGGGTGACAGAGCGAGACTCCGTCTCAAAAAAAAAAAA +AAAACCCAAAGGTGCCTTCTTTCTTTTGACTGTCTTTGCAAGGGGTCTGGATTTGAGAGT +GCTGAATCTTTGTGCACCCTCTTTGGAGTGTGTCTTTCTCTCTCTCTCTCTCTCTTTTTT +AAGAGACAGAGTCTCACCCAGACTGATCAGGAACTCCTGGCCTCAGGTGGTCCTCCCACC +TTGACTTCCCAGAGTGCTGAGATTACAGGCATGAGCCATTGTGCCCAGCCTTGAGGATGT +CTCTTTTGTCCATGGTTAAGTCATAAAATGCTTATTGGTCTTGGTTCTGAGTTGCTTGGT +AGGTACCTTTGGTTTACAAGAAGAAAAGTGGTGGGTGGGCAGGGTGGTAGTAGGGAAGAG +TGTTCAGTACTGCCAGGAATATGTACTATCTGTTCTGGCTGAAAACTGACAATGAGATAT +TTGAAAGGATTTTTTTTTTTTTTACAGCTCTATGATCAGAAGATGGCATAAGTGGGGCTG +GGCACAGTGGCTCAGTGCCCAGCTGGGATTACGCCTGTAATACCAGCACTTTGGGAGGCC +AAGGTGAGTGGATCACGAGGTCAGGAGTTGAAGACCAGCCTGGCCAACATGGAGAAACCC +CGTCTCTACTAAAAATACAAAAAATTAGCCGGGCGCGGTGGCAGGTGCCTGTAATCCCAG +CTACTCGGGAGGCTGAGGCAGGAGAATAGCTTGAACTCAGAGGGCGGAGGTTGCAGTGAG +CCAAGGTTGCGCCACTGCACTCCAGCCTGGGCGACAGAGTGAGACTCCGTCTCTAAAAAA +CAAAAAAGAAGATGGCGTAATCAGAAGTTGATATTCTGTCTATTGGATCCTGCTTCTCCC +CTGGGGTCTTCTTGGTTCCCTGAAGCCCCTCTTCCGGAGTCCCTGCCCTCTATCGGCTCT +GCCTCCTGCTTCTTTTCTGTATTGCCATGATTTTTGCTAAGGATAAAGGACAACTTCATT +GGCTTCTTTGGGAAACTTGCAATCTCCCCAAACAGGCCCCTCTGAGATTTCTCCTTTCCA +TTTGCTTCTGCTCCCCCTTCCTCCTTTTGCCACCTTCGATATTTTCTCCAGTTTCCTTGA +TTCCTTTGATACGTCCCCATTCAGGCCACTCTGGCCCTCCATCCAACCTGCCAGAACTTT +TTCCACTTCAGCACTTCAGTCACTTAAACATTAGGGCTCCCTGCCCTAAAGGGACTCAAA +ATCCTGCAAAAACAACCACCTAAGGATAAAAAGAGAAAAAATGAAACTGTTTGGAAACTG +GGCAAATGAATACTCTTTTAAAAGTCTTTTTCTGGGGGCCAGGTGTGGTGGCTCACGCCT +GTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCAGATCACAAGGTCAGGAGTTCAAGACC +AGCCTGGCCAATATGGTGAAACCCCGTCTCTCCTAATGATACAAAAAAAATTAGCCAGGT +GTGGTGGCATTCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGAAGGAGAATCGCTTGA +ACCCGGGAGGCGGAGGTTGCAGTGAGCCAAGATTGCACCACTGCACCAGCCTGGCGACAG +AGCAAGAAGACTCCATCTCGAAAAATATATATGTATGTATATATACGTATTTACGTGTAT +ATATGTATGCATACATATATATATATATAGAGAGAGAGAGAAACCAGTGAGTTTGTATTA +CTGTGTTTTACTGACTCACGACTACCATTTTGGAATGAAAGCTATAAGATCTTTGTGTTT +TTACGTTTATGTGTTTATATTTGTGTATATCTATGTATGTATGTTTGAATATTATATATA +ATATTTTTTCTATCTCCATATTCAGTATTACTAACTGAATTTTTTTTTTTTTTTGAGATG +GAGTCTCACTCTGTCACCAGGCTGGAGTGCAGTGGCATGATCTCGGCTCACTGCAACCTC +CGCCTCCCAGGTTCAAACAATTCTCCTGCCTCAGTCTCCTGAGTAGCTGGGACTACAGGT +GCGAGCCACCATGCCCAGCTAATTTTTGTATTTTTAGTAGAGACGAGGTTTCACCATGTT +GGCCAGCATGGCCTCTATCTCTTGACCTAGTGATCCACCCACCTCGGCCTCCCAAAGTCC +TGGGATTACAGGCATGAGTCACCACGCCCAGCCTAAATTTTTTAAATCCCTTAAAGGAGT +TATATTCAAATTGTACCCTCCTTTGAAGATGTCTCTTTTGTCTGTGGTGTCTTAAAAGGA +TTATTGGTCTTAGAGGGAGGTAAATAAGCATTCATACAAGCTAAATATTACCAAAAATTT +AAGAAAAATAGAAACTAATCCAACTGCTTTTTAGTTTACATGATCTGGAATATTCTTTAG +TAAATAAAGCTAGTTTGAAAATTGATGGACTACGCCGGGTGCAGTGGCTCAGGCCTGTAG +TCCCAGCACTTTGGGAGGCCAAGGCGGGCGGATCACGAGGTCAGGAGATCAAGACCATCC +TGACTAACACGGTGAAACCCTGTCTCTACTAAAAATACAAAAAATTAGCCAGGTGTGGCA +GCATGGGCCTGTAGTCCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCA +GGAGGCGGAGCTTGCAGTGAGCTGAGATCACGCCACTGTACTCCAGCCTGGGCAACAAAG +CAAGACTCCGTTTCAAAAAAGAAAATTGATGGACTAGTCAAACAAGTTTTATGTTATCAC +TACTGAATGGTAAAGATAATAAAACTATCAATCCAACCTAAGAGCAAAATGTGCAATAAA +AATGAATTGCGGCTGGGCTCACCCCTGTAGTCCCAGCAGTTTGGGAGGCCGAGGCGGGCG +GATCACCTGAGGTCAGGAGTTTAAGACCAGCCTGGCCAATATGGTGAAACCTGGTCTCTA +CTAAAAATAAAAAGTAAGGCCAGGTGTGGTGGCGGGACCTACCTGTAATCCCAGCTACTT +GGGAGGCTGGGGCAGGAGAATCGCTTGAACCTGGGAGGTGGAGGTTGCAGTGAGCCGAGA +TCGTGCCACTGCACTCCAGCCTGGGAGACAGTGCGAGACTCCATCTCAAAAAAAAAAAAA +AAAATTGGGCACGGTGGTACAAGCCTGTAATCCCAGCTACTTGGGAGTCTGAGGCAGGAG +AATTGCTTCAACCTGGGAGGCGGAGGTTGCAATGAGCCAAGATCGTGCTGCTGCACTCCA +GCCCAGGCAACAGAGTGAGACTTTGTCTCAAAAAAAAAATGAATTGCATGGCTTGGCGCG +GTGGCCCATGCCTGTAATCCAGCACTTTGGGAGACTGAGGCAGGTGGATATTTGAGGTCA +GGAGTTCGAGACCAGCCTAGCCAACATGGTGAAACCCCATCTCTACTAAAAATACAAAAA +ATTAGCCAAGTATGGTGGCGTGCGCCTGTAATCCCAGTTACCTGGGAGGCTGAGGCAGAA +TTGCTTGAACCCAGGAGCTGGAGGTTGCAATGAGCCAAGATTGCACCACTGCACTCCAGC +CTGGGCAACAGAGTGAGACTCCATCTCAAAAAAAAAAAAAAAAGAAACAATAACCACTCC +CTCCCTACCTCCACATACATAAGTGATTACAGGCTCCAGGCCAAACACACAGAGGAGTTA +CATCTGGCTGGTGCTAGGATGGAAGGTGGGGCAGGTCACCACCTCCCAGGGACTGAGTGC +CAAAGCAAGGAGGAAGATTGGAGGTGAGGCTGTATGGAGACAGCCACAGGCTCACAGAGC +TTGTGCTGAGCTCTCTAGGGGGAAGCTTGATCTGCCTGGGAAGCTTCCAGCTCCTACCTC +CTGCAGAGCCCAGAATGGTGTCCAGGAAATATGTGGCCATTGAGTGGAACCAGCCTTGGC +ATGGTAGCCTGCAGGAACCCGCAGGCTAGAGACATGGCCACATGAGCAGTGAGGATCTCC +CCCGAGATGAGGATCCCCAGAACATAGTGTGGGATGCAGGTCTGGCCTCTGCATACCCTC +GGCTTCCTCCATCACTTATTCTCAGACATCTAAACCACTAACATCCAGACTGGGGATCCC +AGGAGGGTCTACAGTGTCAAGAGAGGGCAAGTCCACTTCAGGATCAACCTTCCTGTGGCC +CCTTACTTTCCTTGGTCCACCTTTCCAGGTCACCCATCCAGACTGTCACCAGGGATCACT +GCCCTGGGCACCAACCCAATGCGGGGACAAGTAGAAAAAGAGCTCTGGGGAAAATACCAA +AGGGAATAGTCACTCAATTTTACTGTGATGATAAACTCCTGGCAAAGTTCCAGGCCTCTC +TCTGTCTGTGTCTGTGAATTATGTGTCTGTGTCTCCATCAACCATAGTGCAGAAGGGGGA +GGGTTGTCCTAAGTATCCTATTAACATGTTTGTTTGTTTGTTTGTTTGAGACAGAGTTTC +GCTCTTGTTGCCCAGGCTGCAGTGCAATGGCATAATCTCAGCTCACTGCAACCTCCACCT +CCTGGGTTCAAGTGATTCTCGTGCCTCAGCCTCCTGAGTAGCTGGGATTACAGGCACACA +CCACCATGCCTGGCTACTTTTGTATTTTTAGTAGAGACGGGGTTTCACTATGTTGGCCAA +GCTGGTCTCGAACTCCTGACCTCAGGTGATCCACCTGCCTCAGCTTCCCAAAGTGCTGGG +ATTATAGGCGTGAGCCACCATGCCCAGCCAGAACACAGGACTTTCAAAACTTAAACCTGG +GGCTGGGCACGGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGGCCGAGGCAGGCGGA +TCACCTGTCAGGAGTTCAAGACCAGCCTGGGCAACATGGTGAAACCCCATCTTTACTAAA +AATAAAATAAAATAAAATACAAAAATGAGTCGGGCGTGGTGGCGCACTGTAGTCCCAGCT +ACTGGGGAGGCTGAGGCACGAGAATCACTTGAACCCCGGATGGCGGGGACTGCAGTAAGC +GATCACGCCACTGCACTCAAGCCTGGCAGACCTAGCGAGACTCTGTCTCAAAAAAAAAGA +AAACAAGGCTGGGCGTGGTGGCTCACGCCTGTAATCCCAACACTTTGGGAGGCCAAGGCG +GGTGGATCGCCAGGTCAGGAGATTGAGAACATCCTGGCCAACATGGTGAAACCCTGTCTC +CACCAAAAATACAAAAATTAGCCAGATGTGGTGCCAGGTGCTTGTAGTCCCAGCTACTCA +GGAGGCTGAGGCAGGAGAATCACTTGAACCTGAGAGGCGGAGGATGCAGTGAGCTGAGAT +TGCGCCACTGCACTCCAGCTTGGGCGACAGAGTGAGACTCTGTCTCAAAAAAAAAAAAAA +AAAAAAAAAAAAAAAAACCAAGGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCAAT +TTGGGAGGCCGAGGCAGGCGGATCATGAGGTCAGGAGATCGAGACCATCCTGGCTAACAC +GGTGAAACCCTGTCTCTACTAAAAATACAAAAAAATTAGCTGGGCGTGGTGGCGGGCGCC +TGTAGTCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCGGGAGGCGGAG +CTTGCAGTGAGCCAAGATTGTGCCACTGCACTCCAGCCTGGGCAACAGAGCAAGACTCCA +TCTCAAAAAACAAACAAACAAACAACAACAACAAAATAAAACAAGGAAAAAAACACTAAA +ACCTTGGATGTCCTGGGCAACCCCGGTCAAGATGATCACTCTAGATAGAGTTTTTAAAAA +TTAGTTTTCAGCCTGGCCAAGATGGTGAAACCCTGTCTCTACTAAGGATACAAAAAATTA +GCTGGGCTTCGTAGCAGGCACCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAAT +CACTTGAACCCGGGAGGCAGAGGTTGCAGTGAGCCGAGATGTCACCACAGCACTCCAGTC +TGGGCAACAGGGCGAGACTCCATCTCAAAAAAAAAAAATTAGTTTTGCCACCGGGCACGG +TGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGTAGGTGGATCACCTGAGGTC +AGGAGTTCGAGACCAGCCTGGCCAACGTGGTGAAACCCTGTCTCTACTAAAAATACAAAA +ATTAGCCAGGCATGGTGGGGTACGCTTGTAATCCCAGCTACTGGGGAAGCTGAGGTGGGA +GAATTGCGTGAACCCGGGAGACAGAGGTTGCAGTGAGCCGAGATGGCACCACTGCACTCC +AGCCTGGCCAACAAGGGCTAAATTCTATCCCCCCCCCCTCAAAAAAAACAATAACGATTT +TGGGAGTATTAGAGCAGAAAACGTTTGAAAACATGAACAGCTCTCACATCAACACAAGAC +ATGAAACTCTGTTGGCAAGAGAGGCCTCTTTGGGGAGAGTGCTCACTCCTCTCCCACCCA +CTCTCTACCACCTTCAACCTCCTGTCTGGGGCACAGTAACAAGGGGTGTAAGGGGTGTAC +AGTGCCTAGCAGGGGCCTGGCACTTAGGCTTCAGTAGGTGCCAGTCCACAGTCTCCTGAG +GCCCATGTTGGTTGGTGTCCACCTGGTGCTTATTGACCACTTGGGCTCCAGATTTTCCTC +ACGTCACCTCCCTCCCCAGGGTAGCAGATAAAGGCCCCATAGTTCCAAGGAGGCAGAACT +TTTACATTCCTAAAAGCATTCTCAGGCCCCCATAACCCCTCATCCCACCTTCAGCTCGCT +CACCTTCACCTATCAAATTCGTTTGACATCTGGCCTCCAGCAACTAGCTAGGCTGAGTTC +TCCATGAAGGGGACGTGGCTGCTGGTCAAAAGGGAAAGCCCATGTCTAAAAATTAATTAA +CGTGGTTTAGGGGCCTCACATTCCCAACCCCCAAATACAGCCATTGTGAGATGTGAGATT +CAAAAAATCGAAGAGTGAAGCGGTGATGAGAGTCCATCTGGTCGCTCTTCGGATGCGGCG +CTAGCAGGCAGTTGCATGAAAGCAGATCCCAGATCACGTTGCACTGGCACACAGTAGGTA +CCCACACATATTTGATAAATAAACACTCTCTCCCTAGCATTACTATAGATAGCTGGCAGC +TTGGCCTGGCATCCCGGAGGGGTCTGGATCTCAGGGACTAGATAAGGCCAGCACACTGAA +GAGTTGCAGAAAATGCAGGCGGTCAGGCGCAGGTCTGCTGCGGGAGCACCTGCCGTGTCC +GACAGCTGCACTGGAAGAGGGGCGTGTGTCTCCCAGCCGGGCGGAACCGAGACCGCTTCG +TCCCCGGCGGGGAACTTCATTTCCCACAAGCCTGTGCGCGAGACTCCATTTCCCACAAGC +CCTCCTTCACTTCCGGCTCGGCCGAGGCACTGCGGACGGCTTCCGGGTTTGGGCCTGGCT +CTGTGACTGAGGCGGCGGCGGTGGCGGCCAAGCGGGATACGGGCGGCGGGAGCTGGGGAA +CAGGCATGGACGTTTCCGGGCAAGAGACCGACTGGCGGAGCACCGCCTTCCGGCAGAAGC +TGGTCAGTCAAATGTGAGTAGTGGTCGGGGCAGGGGGCTGGATTGTGGGACCTTCCTCCT +TAGCGTGGCGGGCGAGGCCAGGGCCGGACCCGTGAGAAACCTACGGCGCCGGGAGACAGA +AGGCGCCGGGGACTTGCGTGGGTCCCAGGGCTCGGGCCCCCCCGTCTGTCCCCTCCGTTC +CCGACATGGACTGCTCGTTTGCTTTCCTGGGGCCGAGCTCTGCAAACCAACGAGCCCTGG +CTTATGAATCATCGTCTTGAGCTTTCTCATTCGTCATTTGTGTGCTTTGGGTGCAGACTT +TCCCCCGCTTTTTGAACCACACTGAATAGGGAAAGGGAGAGAAGCAGCCGGCTGTAGTGG +GTTTAGCAGGGCTGGGTGCTTCATGGTTTCTCTTCCAGAAAAGCGCATCAACGGTGAAAG +AAAGTGATGGGAGGAGGGTGGATGTAAGATGAGATTGGGAAGGTAGTTGTTTGGGGTGTG +GGCGGTGGGACGGAGGATCTGAGGGTGACCCCCCGAAGGAATGTGGACTTTGCCGAAGGA +TGGCAGGAAGCCGCTGTCAGGAAGCGATCGTCGTTTCTGGAGGAGAGCGTGAAGAGCTGG +GGTCAGTGGCCCCGCTCAGAGGAACTCTAGCCCCTCACCACCGACTGCTTGTTTCTGTTA +GCAGTTTTTTTCATGCGCGTCCGTGTGAAGAGACCACCAAACAGGCTTTGTGTGAGCAAT +AAAGCTGTTTATTTCACCTGGGTGCAGGTGGGCTGAGTCCGAAAAGAGAGTCAGCGAAGG +GAGATAGGGGTGGGGTCGTTTTATAGGATTTGGGAAGGTAATGGAAAATTAGTCAATTGT +TCTCTGGTGGGCAGGGATGGATCTCACAAAGTACATTCTCAAGGGTGGGGAGAATTACAA +AGAACCTTCTTAAGGGTGGGGGAGATTACAAAGTACATTGATCAGTTAGGGTGGGGCAGG +AACAAATCACAATGGTGGAATGTCATCAGTTACGGCTGTTTTTACTTCTTTTGTGGATCT +TCAGTTACTTCAGGCCATCTGGATGTATACGTGCAAGTCACAGGGGATGCGATGGCCTGG +CCTGGGCTCAGAGGCCTGACAGTTTTTAACTGAGCTTATTGGCGTGTTGTGCTCGCCACA +GATTTGCCTGAGACACAGCCCCTTTCCTGAGGGATGGTAAAGGAACCGTCACAGCGCAGT +CTTGTCAGTCTGTAATGTGTATACTGCAGGGGTTTCTTGCGAAGCGGCGGTTAACTTGAG +CAGACCAGGGGCAGTCTAGTGAGTAGGTGACGAATAAATTGAGTCCTAGAAAGACAAGTA +GGAGTGTGAATAGCCACCATTCCAAGCTGAGGAGTTAGGAAACATGAGCATTTACTGCGT +GGAGGGAGGATCTTGTAGGCTGAGAGTTCTGGGGCTCGAGGAGTAAGAGAGCCCAGGAGA +TGATGGAATGAGGGTGAGCAGGTACTAGCACTTTAGTGGGGGTGTTGTGCCAAGGAATTG +AGCCTGTGTTTGGCATGTGGCATTCTTTGGAGTAGTGAAATGCCAGTGTTGGGAATATGC +CTAGAGTCCCTGTCCCTGGAGATGGATGCAGGGCAGAGAAGAGAGACCAGTTAAGCTATT +AAGAGAGTTTGAACATTTGGAGGGAGTAGGGAACCTGTCCTTTTTCATCCTTATATTCTC +ACTGCTAGCTCACTGCCTGCCACATTGCTCCGTAAAATGATTGTGGCATGACCTAGTCGG +GCGGGAGATTATAAAGGCCTGCATTAGACAGTGGTAGAAACTGAAAGAGGAAGACAGATT +CCAGAGGAGGGGGAACGTAGACAATTCTTGATGACTAACTGGATGGAGGGGGGTGGGGTG +TGGGGTGAAGCATTGTGGAAGGAAGGAAGCCGGGCTGAATGCTGGGAAACCTTTCAGTAC +AGTGGAAAGAGTAGGGACTGTAGAGTTAGTAGCTATGGGCTTTGGGAAGAATAGCCTTGG +GATATTTCTGGGGATATTGCACAATCAGTGAACAGTGTTTTACTGATAAAATATAGTTAT +CTGTAGGTGAAAACTTGAGAGATATCCCACTTCAGGGTTCCTGCAAGAAAAGTCCCCAGG +GCAAGGGTTCTTCTCTGGGAAAGCCCAGAATTAAGAACTGGAAGGGTGAGTTGTCATCAA +AGAAAATGGGGGTAGGGGAAGTGTTAAGAGGGCTGGGAGGGTGATTGAAGAGTTTATGGG +TGTTGAGTGACTGGATACTTCATAACTCAGCTTTATTTTGCTTCCGGGAGAATTGAGTTC +TTTATTTTTTTCGATATTGCTGTTTGTGTGTTTTTCTGTTGCCGCTCTAACAAATTACCA +CAAGTTTAGCATCTTAAAACAACACACCTTTAGCCAGGTGTGGTGGCTCACGCCTGTAAT +CCCAGCACTTTGGGAGACTGAGGCAGGCGGATCACGAGGTCAAGAGACTGAGACCCATCC +TGGCCAACACGGTGAAACCCTATCTCTACTAAAAATCCAAAAATTAGCTGGGTGTGATGG +CGCACACCTGTAGTCCCAGCTACTCAAGAGTCTGAGGCAGGAGAATTGCTTGAACCCGGG +AGGCGGAGGTTGCAGTGAGCCGAGATCGCACCACTGCTATCCCGCCTAGTGACAGAGCGA +GACTCCATCTCAAAAACAAACAAACAAAAAACACACATTTATCATTTGGTAGTTTCTGTA +GGTCAGAGGTCCAAGTAGGTTCCACTAGGTCTCCTGCTATCAAGGCCTATCAAGGCTGAA +ATCAAGACGTTGGCAGGGTGCATTTCTTACTGAATGCCTTGGGGTGGAACTTGCTTGCAA +GCCCATTTATGCTGTTGGCCATATTCAGTTCCTTGAGCTTGTAGGACTGAGGTGCCCTCT +TTCCTTACTGGCTGTTAGCCAGGAGTCAGTCTTTGTTCCCAGAAGCTGCCCGAATTCCTT +CTCATGCTTTTTATGTGTCTCCCTCCAGCGAGGGCCAGTTCTTCTGCCACATCTCTCTGA +TTTCAGCTAGAGAAAATATCCTGCTTTTCAGGGCTCAGATTAGATTTAGTGCACACAGAG +AAGCCAGGATAATCTCTTTATTTTAAAGTTTGTAATTTTAATTACATCTGCAGAATTCCT +TTTGACATGTAATGTAGCATATTCACAAGTTCTAGGGATTAGGCACGAGATGGGCATTCT +CCTACCACAGCCGGGAAGAGCAGTTTCAGAGGTTGCCTTCCCTAGGTGATGTGGGTGTTC +TCAGTTCTCAAATTTGTCTGTTTTCTCTCTAGGGTTCACTTTCTCTCTAGGGTTCACGTT +GAAAGTCGTGATTCTTGGTGCCTTGATTTATTGTGCTCATAGGGTTAAAAAAAGATTTGC +ATTAGAATTCTAGACATACCTTTAACTCTTAACTTCCTCCCATGTTTTTACACTGTTGTT +TAAATTTGGCCATTTTGGGAAATGCTTTATACCCAAGTAATTTATAGTTGATGAAGGGAT +AGAGAAAGTGTCTTGCTTGTGGCACCGTGACTCACACCTGTAATCCCAGCACCCTGGGAG +GCTGAGATCTGGGACTACTTGAGTCCAGGAGTTTGAGACCAGCCTAGGTAACATAAACAA +ATACAACAATCAGCTGGGTGTGGTGGCGTGCACTCGTAGTCCCAGCTACTCTGGAGGCTG +AGGTGGGGGGATCGCTTGAGCCTGGGAGGTCGAGGCTGCAGTGAGCTACGTTCACACCAC +TGCACTCCAACCTGGGCCACAGAGCAAGACCCTTCTCAAAAAAAGAAAAAAAAAAAGCAT +TCTGAGTTTTGACAGAGAAGAAAACATTTGATGGTTACTTATCTGTCTCTACCAAGAGCC +TAGGGAACAATGCCCCAGAGCATGAGCTGGTATATAGTCTAGTCTCCTGGCTCCCACTGA +GAAAAAGGGATTTTGAGTTGAGGCTGGCTATGACTAGACAGCCATTGCAGGGACTCTCCA +GTTCAGCCTCTTAGCCCTCCACTGCCTCCACCTCATCTGGGCCCATTCTGGGCTGTGGTC +TCCTGCTCTCTTCCCAGTCCCATCCTTCTTCCCCCAACAGCACAAGTGAGGGTGAGGCCT +AAAGTGTAGCCACTGCCCTGCTGCAGAACAATCAGTGACTTTTCTTAGCATTAGAATAAA +ACTCCAGATGTCCCACCCTCCCAGCCCCTCACCCTGACCTGGTATAGTCATCTCTTCTCC +TACCCCCACCGTGTTCTGACCTCTCTGGTCTTTCAGCTTCTTGAACATTCTAAGCTTTTT +TTTTTTTTTTTTGGAGACAAGATCTTGCTCTGTGGCCCAGGTTAGAGTGCAGTGGTACGA +TGATAGCTCACTGCAGCCTTGACCTCCCAGGGTCAAACGATCCTCCCACTGTAGCCTTCT +GAGTAGCTGGGACTACAGGCACACACCACCACGCCTGACTGATTTTTATATTTTTTGTAG +AGATGAGGTTTCACCATGTTGCCCAGACTGGTCTTGAACTCCTGGGTTCAGATGATCCTC +CCTACTAGCCTTCCAAAGCACTGGGATTACAGATGTGAGCTACTATGCCTGGCCCCTGAG +CTCTTTCTTTTTAACCAATTTATTATCAAGTGTTTAAGGTATTGAAAAATGAGTAGACTC +CTCCCCACCACTGTATTCATGACTAGCTCCTTCTTTTAGGTGTCTGCTCAGTATCAGATA +CTCAGAGGATCCTTCCCTGCTTGATCTAAAGTTGGTCACCGCCTCTGATATGTTTTATCT +CAGCCCTTTTTAGTTTCCCATAGGACCTCATACCTATGAAGCTTGGTACCTGGTATACAA +AACACTTGTTGAGGAGTGGAATAGTATTTTCTTGGGGGCTGTGTGAGTCACCTCTTTTTT +TTTTTTTTTTTTTTTTGAGGTGGAGTCTCGCTCTGTTGCCCAGGCTGGAGTGCAGTGTCA +CAATCTCAGCTCACTGCAAGCTCCGCCTCCCAGGTTCACGCCATTCTCCTGCCTCAGCCT +CCTGAGTAGCTGGGACTACAGGCGCCCGCCACCACGCCCAGCTAATTTTTTTTTTTTTTG +AGACAGAGTCTCACTCTGGAGTGCAATGGTGTGATCTCGGTTCCTTCAACCTCTGCCTCC +CGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAATAGCTGGGGTTACAGGCATGCGCC +ACCAGGCCCAGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGTCAGG +CTGGTCTCGAACTCCTGACCTCGTGATCCACCCGCCTCGGCCTCCCAAAGTGCTGAGATT +ACAGACATGAGCCACCACGCCCGGCCGGGTCACCTCTTGTATTACAGTGGTATTCTTTAT +TTTTTTATTTTTTTAAGAGACAGGGTCTTGCTTTGTCACTTGGGCTGGAGTGCAGTTGGT +ATGATCATAGCTCACTGCAGCCTCAAACTCCTGGACTCAAGTGATTCTCCAGTGCAGTGG +TATTCTTTACCTGAACCAGTGTACATTGTGAAGCTCTGGAGCCCCTGCTTTCTCCACTTG +TTACACTTGTTAGCCTTTCTTTTTCTTTCTTTTTTTTTTTTTTTGAGACGGAGTCTCGCC +CTGTGGCCCAGGCTGGAGTGCATTGGTGCGATCTCAGCTCACTGCAACCTCTGCCTCCCG +GGTTCAAGCAGTTCTCTGCCTCAGCCTCCAGAGTAGCTGGGATTACAGGCACCCGCCACC +ACTCCTGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATCTTGGTCAGGCTG +GTCTTGAACTCCCAACCTCATGACCCACCTGCCTCGGCCTCCCAAAGTGTTGGGATTACA +GGCGTGAGCCACTGCACCTGGCCTAGCCTTTCTTAATCGTGTCCCTCATCAATGAAATGG +TAGAAAGCCAATCTGTTTTACAGGTGTGGTAGAAGGGAAAAATGAAGAGACCTCTGTACA +AGCCTTTGCAGAGGTCTTGGGACTCAGTGGTGAGCTCCCTGCCACCACCTCCTGTGCCCT +TGCAGGAGTAGGAGATCTATAGTCTTGGAGAATAAGGGTGATACCCCAATACCCCAAGCC +TGGTTAGGGAAATTGATGACCCTTATTGGAGCCTGTGGGGAGGTAGTTGGGAAATTGTTA +GTAGCTTCATGTTGGCATTTTCTTTTTTGAAACAGAATCTTGCTCTCTCCTCCAGGTTGG +AGTGCAGTGGCATGATCTTGGCCCACTGCAACCTCTGGCTCCCAGGCTTAAGCGATCCTC +CCACCTCAGCATTCCCAAGTAGCTGGTACTACAGACGCATGCCACCAGGCCCAGCTAATT +TTTGTATTTTGTGTAAGAGACAGGGTTTCACCATGTCTCCCAGGCTGGTCTTGATCTCCT +GGGCTCAAGTGATCCACCTGCCTCAGCCTCCCAAAGGGCTAGGATTACAGGTGTGAGCCA +CCGTGCTTGCCTTTGTTGACTCTTTATTGAACATCTACTCTCCCTGCAGAAACTGCAGGA +TAAGCCACCCTTAGAACCAATTTAACTGGCCTTTTTGGGGCTTGGACCTTCCTCATGGGA +TTGCTGGGTTAATGTTGAGCCTGTCTCTGGGCTATAGGCTCTCAGAGAACAAGAGTGCAT +CTGCAATGACTAGTGCTGCACCTGTCATACAGGTTTGTAGAAAATATTTAAGCTGTGAAT +GAACAAAAGGACCTTATCAGCTATTTTCTTTCAATTCTTTTTGGTTTTTCTGTGATCCTA +TTCTTGAGACTTTTATCCCTCCCACCCCTCTGTACAGCAGATTGCACATATAAAGATGAA +ATAAATAGATGTCCATTGGGCAAAACAATACAGAGAGGAGGTGACACAGCTGGGGAAGGA +CAGTCCAGTATCTGATGAAGTGCCAGAAGGGGCTTAGGGGGAGCATGGAATGGAGCTGGA +ACTGCCAGGTCAGCCACCTCTTCTAGTGAAGCCTCCAGAGAGACGTGAGCTCCAAGTAAG +ACAACACAAGGAACAAAACATACAACTGAACAGCAACTTCCAAGGGTCTGGAGAGGTCTT +CAGAAGGATAGGGAGTGCCCTTAAACTTGCAAGCCCAGATTCTGCCTGTGGGTCATGGTC +GTGCCATAGGCAGAAGCCAGTGAGTCAGCCTTGGCGGAGCTTTTTCTGTGGCTTTTCCCA +CTCACAGCTCTGTGCCCTTCTGGTGCAGCTCCAGCCCTTGTTTTCCAGCTTCATAGCCTG +GTTGTCACATAAATGGCTGCACTGGCTTACAGGTGTCTGTGTATAACCTATTGCTAGAAT +GTCCATTCCTTGAGGCTAGGGGCCCTGTCTGTCCCTCTCTTCACTTTCATAGCCCCAGTG +CCCCATAGGGCTGCTCCTGATGGGCAAATAGGAAGTGGTTGTTGAGCCTCTAGGCCAGTC +GTGACAGTGACCTGAGGGGACTTGAGCCCTCAGTCATCTGCTCAGGAGTTCAGGGCTTTC +TCAACAGCAGCTGACGTGCGGCAGGCATGCTATGCCAAGCCCTGTTGCAGGAGTTGACAG +ATGTTGACTCATCCCCTCTTCATGCTTTCTTTGTCAGTAGATGTACATGTTACTCACATT +TATAAGATGAGGAAAATGAGACCTAGAGGAGAGGCCACTGCTGCTTAGAGTTACTCTACA +TGTTGTAAATCAGGGGTGTCCAGTCTTTTGGCTTCCCTGGGCCATGTAGGAAGAAGAATT +GTTTTGGGCCACACATAAGATACACTAACACTAATGATAGCTGACGAGCTAAAAAAAAAA +AATTGCCAAAAAATCTCATAATAAAGTTTACGAAGGCTGGGCGTAGTGGCTCACGCCTAT +AATCCTAGCACTTTGGGAGGCTAAGGTGGGTGGATTGCTTGAGGTCAGGAGTTTGAGACC +AGTCTGGGCAACATGGTGAAACCCCATTTCTACTAAAAATACAAAAATTAGCTGGGCATG +GTGGCGGGCACCTGTAGTCCCGGTGGGGAGGCTGAGGCAGGAGAATCGCTTAAACCCGGG +AGGCAGAGGTTGCAATGAGCCAAGGTCGCGCCACTGCACTCCAGCCTGGCGACAGAGTGA +GACTCAGTCTCAAAAAAAAAAAATATATAGACCAACCTGGCCAAGATAATAAAACCTCGT +CTCTACTAAAAATACAAAAATTAGCCAGGCATAGTGGTGCACACCTGTAGTCCCAGCTAC +TTGGGAGGCTGAGGTAGGAGAATCGCTTGAACCTGAGAGGCGGAGGTTGCAGTGAGTCAA +GATCACGCCACTTCACTCTAGCCGGTGACAGAGCAAGACTCTGTCTCAAAACAAACAAAA +AAAATTAAAAATAAATAAGTAGGTTGGGCGCGGTGGCTCACACCTGTAATCCCAGCACTT +TGGGAGGCTGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCTAGACCAGCCTCAACATGG +AGAAACCCTGTCTCTACTAAAAATACAAAATTAGCTGGGCATGGTGGTACATGCCTGTAA +TCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATTGCTTGAACCTGGGAGGCGGAGGTTGC +AGTGAGCCGAGATCACGCCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCGTCTC +AATAAATAATAATAATAATAATAATAATAAATAAGTAAATAAATGAATGGTTGCTGAACC +ACAGCAGGACAAGACAGACAAACCCCTTTTATTTGTTTTTGTTTTTTGGACCCTAACAGC +AGAGGAAGAGCCAGCCAGCAAGCCCCTTTTGATCTTTGGCCTCTGCCAACCAGTACCCCC +CAGACCTTAACCCCCAGCCCTACCAGGTCCCTTTTGTAAGTGACCCCACATGCCTACCAC +AGTTGCAGGCCCTGACATCCACCCTGACCAGAGGTCTTCCAGCTATTACTGGTGTTTGTG +ATGCTGTGGCTGTGGCCAGTCAAGGACCCATGCTGGGCATCCCGAGGTAGGAAGGACCAT +GACCACGTGATTATCTCCCTGCCACTATCATGGCAGGGCTGTGGGGTACTAGCCTCCTTT +AAAGCATTCCTTAGGACAGTAATTTCTGTTAAATATCTGGAGGGCAAACATTTTAAGGAA +TTTTAATGAATTTTTAAAAATTCAAAATGAAAAATTTAAATCATAAAACCAAATAGAAGA +AAGACCCCTATGTCCATATACGCACCACTCTGCTTCAGTGATTATCAACTCTTGTCTCAT +GTACTTGACCTCCACACAATTATTATTACTATTTTTTGAGAAAGGGTCTTGTTATGTTGC +CCAGGCTGGAGTGCAGCAGTACAATCATAGCTCACTGCAGCCAACTCCTGGGCTTAAGTG +ATCTTCCTGCCTTAGCTTCCCAAGTAGCTGGGACTAAGGGTAGGCGCCACCATGCTGGCT +AATTTTCTTATTTTTTTTTTTTTTGGTAGAGACCAGGTCTTGGTTTGTTGCCCAGGCTGG +TCTTGAACTCTTGGCCTCAAGTGAGCCTCCTGCCTTTGCCTCCCAAAGTGTTGGATTACA +GGCATGAGCCACTACGCCTGGCCATCCCCAGTTATTTTGATGCCCAGACTGTTTTAGTGG +CTGCCCACTGTCTGTGGATTCTGTTCTTAGTCTTGATCCTTCGTGCAGTGTCTGCACCTG +CCTGCCAGTTCCTTGACCTGTGCTCCATCATACCAAGCTGGCAGTGGATCCTCAGACAAT +CTCAGTTCTTACTCCCTGCTCTGTGTCCAGTTTGTTTGCCTACCTAAAAGATCCTTCCTT +TTCGTGGCTGAGCAGTGCAGCCGTTCTCCAAGGAGACTGGGCCTTCTGCTGCCCCCACCA +CCCTGCTCTTTTCACTGCCCCCACCCCGCTGCTCTTTTCACAGGCTTGGTGAGGAGTTTC +CAGGTTGCACTTGGTTTGTTTGAAGGCTTATCTCCTCCTAGTCTGAACTCGTCAGGGCAG +GAGCTTGAATCAATCATTTCCTGCCTCTCAGGACTGCTGTAGCAGGGGGATTCCTAGATC +TGTCCAGTGCCAGTTTCCTGAGTACTGCTGCAGGCCCATTTCAGCTGCTATGGTGATGGG +GGCTGTCCTGAGGGGACATTCACCTCAGGTAGAGATGGCAGAAACACCACAGGAAGATGC +AACCAGTCAGCTCAGCTGGGGGCACTTAAATTAGATCCATTTTTTTTCTGGAAGGCCTTT +TGGCAATACTGTTTATCCCAGTACCTTAAAAATGTCGATTTCCTCCAAGGGCGCAATCAG +GAAAGTATTCAAAGCCTGATGGTGATGAGGGTGCTCATTGCCGCATCATTGTCTCAGGAG +TGGGATTTGTGAAACAGACCAACAGTGGGAGCTTGGTTAGAGTTGTAGCTGCAGGTGATG +GGCTCTGTGCTGCTGTTGACATTGACGTGAGGGACTGCCATAATGCGTCAACATCGACAA +GCGCTTGTTACCAAGCAGGATGCGTGGTACAGGCTGAGTTTTATTAAACAGATGTCTTTA +ACTGGTATGAATGATTTTAATTTTTTCTTTGTTTGAGATGGAGTCTCGCCCTGTCGCCCA +GGCTTGCGTGCCTGGTGACTTCTCTCACCTGGGCCCTCAATGCCAGACCAGCCTTCCCAG +GATGATTCCATCCTTTCCATCCCTGCAAGGGAGCTTGAGGCCCTTTCCCTTTCAGTTTGA +TACTGTCCTTCCAAGTTACCTTTCTTCATGTTGGCACGAGAGCTTTGTGCCACACCATGC +CCTTTCTGGATGCTGTAGTGTCTGGCTCTTCCTCTGTCTCTGTGACCCTCTTCCCTGGTG +AAGCTTTGACCTTGGCTTCATCCCCTCCAGATGACCCCTGATGGTTAGCTCAGCCTGTGG +TGGCTCCAGAGCCCTCATTGAGTGCATCCTGACACGAACTGAACAGATTAACTATTTTTT +GCCTTGCCAACAGCATTGGAAATTCCTTGAGTGTGGGACCTGGACTCATAGCTCTATGTC +TCTTATAGTTCTTAGCACAATATCTTGGCCTAGATGAAGTACATAATAATTATATGTAGG +GTTGTGGAAAGCAGTGCTGGCTTTAATTAAAGCCTGCCATGGTTTTGTCCATCAGTGCTG +CCTAAACCGTTCTGAACTTCTCATTTTGAACTTTTTTCCCTCACAGCTTGTGATGGTTGA +TTTTCTTTTTGAGACATTCATTTATTTGTTTGTTTATTTGGCAGGGTCTCACTCTGTCAC +TCAGGCTGAAGTGCAGTGGCGTGATTGTGGCTCACTGCAACCTCCACCTCCTGGGCTCAA +GCGATCCTCCCACGTCAGCCTCCCAAGTAGCTGGGACCACAGGTGTGTGCCTCCATGCCC +GGCTAATTTGTGTAATTTTTTGTAGAGTAAGGGTTTTGCCATGTTGTCCAGGCTGGTCTC +CAACTCCTGGGCTCAAGCGATCTGCTGCCTCCGCCTCCCAAAGTGCTGGGATTACAGGTG +TGAGCCACTGTGCCTGGCCCTGAGACATTTATTTAAAGACAAAGGGGAGTACAGGCTAGA +GAACAGAGGGGAATAGAATAGCAGCCAAGGCATAATTACCTCTAAACCATCACCCCATAA +GGGAGATGTTCGCCTCAACTTTAGTAATGACTCTCTTTCTAGTCTAGGTGCAGGGCTGTT +AATAGAGGCCCTGTGATTGGGTTAAGGGCTTGAGGTACAATAACTCTGTTATTTAGGGCA +GTGAGTCCTTGGAGAAGATGCTTTACCTTCTTTCCAAGCCCCATGTCTCCTTATTTTGAG +GGTGTTTTTTTTTTTTTTTCTTGAGACAGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGT +GGCCTGACCTCGGCTCACTGGCCTGACCTTGGCTCACTGCAACCTCCGCCTTCCGAGTTC +AAGTGATTCTTGTGCCTCAGCCTTCTGAGTGGCTGGGATTACAGGCCTGCACCACCACAC +CTGGCTAATTTTTGTATTTTTATTAGAGATGGGGCTTCACCATATTGGCCAAGCTGGTCT +CAAACTTCTGACCTCAGGTGATCTGCCTGCCGCATCCTCCCACAGTGCTGGGATTACAGG +CGTGAGCCATTGCATCCGGCCTGAGGTTTTTATTTTGAGAATATTTCAAATGTACAGAGA +GTTGCAAGGACAGTAAAAGCACTCAGGCATCCTCTTCCCTGGGATTCACCAGCTCATTCT +CACTTTTACCATCTTCCCATACTCCCTGGGGGCCTCCTCCTTCTGGGCTTCGTTTCCTTT +TATTTGTTTCTGTCTTTTTAATTTTAATTTTTTTTTAACTAGCCCTTCTCTTTGGCAGGA +TTTGTTCACTTTTAATCCTTGGCCCCAGCAGCTTCTCTGAGCTCTGCCTATGAATCTGGG +GGAGAGATAGGGGCTCACACAAGAGTGAAGTGTGAGCTGGAGGGCTGAAGGGGAATGAGG +GACATCTGGGCCAAAGGCAGGGGACCAGCAAGGCCCCCACCCCTGGAACTTCTGTTCTTC +CCAGCCTACGTACCCCTTGCTTCTTCTTATGTTGGTGGGCTCTGGGGGCTGGGTGAAAGC +AGTGCTTGTAGGTTAAGAAGTGACTCATCCCACGTGCCCGCTGCCCTTCTCTTCTGTTCT +TTGCTCTTTCCAAGCTTCCTTTCAGTCCTCCTCATCAGACATGACCTTTGAGGAAGTGAT +GAGTCTGCTATGTGTGAGACAGAGCCCCTGTTGTGAAGTAGAACTCAGGGTTCGTCCTCA +GAAGCCTCGGTGGGTCACCATACGTCCTGCAAGTGCTGTCAGTCTGCTTTGATTCGTGTG +AGGGAGAAGTCCCTGCCAAGTGCTCTGTAGTAGGCATTCAGGCCAACCAAAGAACAGGGC +AGGGTCCTGAGGCACAGGCTCAGAGCTCTTGCTTATTTCATGGGGTGTTTAAATTGAGGT +ATAATCCCTATCACAGTGAACGACACGCATACTTCCAATCTTGAGTGTGCCACCAGAGAA +CTGTTGATACTCCCCAGATCTGCACCCAGCCAGGGCAAACTGTGCACCACTCTTCTGACT +TCCATGGCCAGCCGAGACATTTGGAATCTCTGTGCAATCACACTCTGTGCCATCTTTTTT +TCGAGACAGGGTCTCGCCCTGTTGCCCAGGGTAGAGTGCAGTGGTACAATCTCGGCTAAC +TGCAGTCTCGACCTCCTGTGGGCTCAAGCAATCTTCCCACCTCAGCCTCCCAAGTAGCTG +GTACTACAGGTGCTCACCACCATGCCCTGCTAATTTTTGTGTTTTTTGTAGAGATGGGGT +TTTGCCATGTTTCCCAGGCTGGTCTCGAACTCCTGGGCTCAAGCAATCCACCTGCTTCAG +CCTCCCAGAGTGCTGGGATTACAGTCCTCTGTGCAATTGTGTGCATCATGTGTGAAATTT +ATCCTCATTTATCCTCACAGGCAGCAGTTGTTCTATTTTTTTTTTTTTTTTTTTTTTTTT +TGAGATGGAGTCTCACTCTGTTACCCAGGCTGGAGTGCAGTAGCACAATCTTGGCTCCCT +GCAACCTCTGCCTCCTAGGTTCCGGCAGTTCTCCTGCCTCAGCCTCCCGATTAGCTGGGA +TTACAGGCACGTACCACCATGCCTAGCTAATTTTTATATTTTTAGTAGAGACTGGGTTTT +GCCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGGGATCCACCCACCTCGGCCT +CCCAAAGTGCTGGGATTACAGGCATGAGCCACCATGCCCAGCCTGTGTTTTTGTTTTTTT +TGAGACTGAGTCTCACTCTGTCGCCTAGGCTGGAGTACAGTGGCGTGATCTTGGCTCACT +GCAAGCTCTGCCTCCCGGGTTCACACCATTCTCCTGAGTAGCTGGGACTACAGGCGCCCA +TCACCACACCCGGCTAATTTTTTTTTTTTTTTTGTATTTTTTTTAGTAGAGATGGGGTTT +CACCATGTTAGCCAGGATGGTCTCCATCTCCTGACCCCGTGATCCCCCTACCTCGGCCTC +CCAAAATGCTGGGATTACAGGCGTGAGCCACTGCGCCTGGCCTTATTTATTTATTTATTT +ATTTATTTATTTATTTATTTATGTATTTCTGAGACAGAGTCTTTCTCTGTTGCCTAGGCT +GGAGTGTGGTGGCTCAATCTCACCTCACTGCAGCCTCTGCCTCCAGGGTTCACATGATTC +TCCCACCTCGGCCTCCAGAGTAGCTGGGACTACAGGCGTGCACCACCATGCCTGGCTAAT +TTTTGTATTTTTAGTAGAGATGGGGTTTCGCCATGTTGGGCAGGCTGGTGTTGAACTCCT +GACCTGACCTCAGGTGATCCACCCGCCTTGGCCTCCCAAAATACTGGGATTATAGGCGTG +AGCCACCGTGACCAGCCTGTTCTTGTTTATGACGTGTTATTCCATGGAGGACTCTGTTCC +AGATAACTCTTTTATTTTGTGCCTAATGAACTTTGGTGTTAGTTCCAGATGTTGGCTACA +TGAAGGCAACCCCAGGAATGGCTTTGGTCATGTCTTCTGGTAAATATATGCACTTAATTC +TCTGGGCTGTGCACCTAGAAGTTAAATAAAGGACTGGGTCAGAGGATGTGTCTCTTAGCA +TGTTCTGCTGAACAGTGTTCCCAAGTGGTACCAGTTTACGTTCCCACCAGCAAAATGTGA +GGGAGTTCCACTTGCTGGACATCCTTGCCAACACCTGGTATTGTCAAGGAACCTGTCTGT +AACAGGCACTCTCGTTGATTTTGAATGCACGCTGAAATTTACTGGGTGCAGTGACAGTAG +GAAGGGAAAGGATGATTAAAAAAAGAAAAAAGGAATGGAAAGGCTGATCTAGGAGGGAGG +AAGCTCATAAGGAAATAAGTGGTAGGATTTGGTGATTCCATGTGTGAATGCTCGTATTCC +GTGTGCATGCGTATATGAGCTTGCAGGAGAAAGGGAGGAGGGAGGCACTGGTATGCACTC +CACCAGTTCTCCTGCCACAGCCTCTCTACTGATCTGAGGGCTGGGTGTAAGGTGATGATG +TGGGGAGTGGGTGTGGGTGAAATTTCCAACAGTCAGTGGGAGATGGGGGAAGCAAACCTG +GGAAGAGCAGCCTGCTGCAGGAGCAAGTGGGTGGGCCGTCCTGGAGGCTGGATGAAACCT +GTCTGCCTAGGTGGGGATGGCAGAAGTGGCAGAGCAGAGAACTCCAGAGCAGGGCTGTGG +TGGGCAGAGCAGATGTGAAGGAAGACTGTGGGATGAGAGGATGGGGCACGGTGGAGCCCC +ATCAGCCTCCAGTCACAGCAGCCCCGGTCCCGTCTCTCCCTCCTGTTTTGGTCAGTACCT +TCAGGACTTGTGGGTGTAGTACTTCGTTCCATTCCGGATAACATTGTAGAAGACTGAAAA +ACAGGAAGGTGTTTTCCATACTCTCTCCAGAGCTGGCTGTTACTGACATATTGATATATT +TCCACCAGTCTTGTGTGTGCTTATAGCTTGATATGTATGCATTTTAAATATGTAAATATA +TATACACATGTATTTTTAAAGTGAAACTAGGCTGAGACTGTGAGTTCAGTTTTGAGTTAT +AGGTTTCCCATGGCTTAGGTTGTGTTCTCTCCCTGGATGGAGTCTGGCTGGGACCTGCAT +GCTCACAGCCCAGGTTCACATGAGGTGTGGATCATGCACAATCATCCCACGGGCTTGAGA +TAAGCATTTGACTTCCTTTTTTTTTAAAAAAAAAGTTCATTATGGACTTTTTCAAGTGTT +CCCAGAAGTAGAGATAATAGAAAAGTGGTCTCCCAACTACTTTGAGAGTTAGCAACATAT +GGCCAGTGTTGCTTCACCCACCCTTCCTCATCACCTCCTGGACTATTTTAAATCACATCC +CAGACACCAGATGATTTCAACTGTGAGTACTTGAGTGCAGATCTCTAAAAGATCGAGCTT +CCCCACCCCCACCAAATACCACAGAGCCATGTGAGATCAGCATTTGGGTTGGCAGATCTC +CTCCTTGCCACATGCTTGCCTGGTGGTGAATGGGACACTTTCAGGCGAGACCTAGGGGAG +CCCCAGGCCAGCACCAGGACAGAGATCTCGAGGCCTTTCCAACTCTGCTGGTTCTTCAGA +AGTCAAACCAACAGATTGGAGTATGTGAAGGAAGCTCGTGTATTCTTCTGAACTAGCTTA +GACAACAGTACACAGTTTGAAGGGCAGCAGCCTTTTCTGCAGGTTAGTTCTTTCAATGAT +CTTGTTTACTGGGAGTTGAGAGACCTTGAGGATAGAGGCAAGAAAGCGTCATCAAGCAGC +CCAGAGGGGTGTACAGTGTTTGAAGTCTCCTGTATTGGTATGATTCGAGATTTTCATTCT +GCAGGACCAAAATACCCATTCCTACATGTGGGTGGAGAGTTAAATTCTGAAGTATATATT +TTTATCTTGTTAAAAAAACACTTTTATGTTTCTTTAAAAAGTAAAAGAACCAGAATGTAG +AAGTTTCAGTAAAGGAAAAAGTTACAGTTTGTTTGGGATACAGTTTGACAAATGTGCTCC +CAGCTGCCTGAGAGATAGAGAGGCCAGATGTTTGCTTTGGTTACATTGATGACTGAACAA +TCGGTATCTCATCATTTCCTGCCTTAGAGATGAAAGCACCTGGGCACTGGGGCAGTAAAT +TGGTTGCAGGATTGGAAGAAGTCCAAGGACAGTGTGGGATGCCACCCTGTAGCTGTCTAG +AGACCTAGTGAAGGTCAGAAGGCTGGGTGAGAACAAGCCCTGCACTGATGGAGGACAGGG +AAGGGGCTTTTCTGGCTGGGAGTGTGGTGAGGCATTTGAGCTGCAGCCAGAAGGCACCTC +TCAGATTGGTGGGGCTTGGGGTGGGGGTGGGAGGAACTGCTTTGCCTGTGCCGTTGGCCT +CACTCATCCTGAGGTCCCTTCAAACTCCATCCATACTCTGACTGTGGGGCCACTTGCAGA +AGGAATATGTGGCAGGTTTTTTTTGTTTTGTTTTGTTTTTTGTTTTGAGACGGAGTCTAG +CTCTGTCACCCAGGCTGGAGTGCAGTGGCACGATCTCGGCTGACTGCAACCTCCGCCTCC +CGGGTTCAGGCGATTCTCCTGCCTCAGCTTTTGGAGTCGCTGGGCTTACAGGCGCCTGCC +ACCATGCCCAGCTAACTTTTGTAATTTTAGTAGAGACGGGATTTAGTAGAGACGGGATTT +CGAACTCCTGACCTCGTGATCCGCCTGCCTTGGCCTCCCAAAGTCCTGGGATTATAAGTG +TGAGCCACTGTTCCCTGGCCTGTGGCAATTTTTTGAGACGCTCCTGGGGGTCCAAATTGC +TAATAGAGGAGAAACTTACTGTCCCTGTGTTGACTGAGACAGGAAGGTTTAGATAGGTGA +AAGTGGTCTGGGCGCAGTTGCTCACACCTATAATCCAGCACTTTGGGAGGCCAAGGTGGG +AGAATCGCTTGATGCCAGGAGTTCAAGACTAGCTTGGGCAACATAGTGAGATCCTGTCTC +TATAAAAAATAATTTTAAAAAATTAGCTAGGCCTGTAGTCCCAGCTACTCGGGAGACTGA +GGAGGGAGGATTGCTTGAGCCTAGGAGTCGGAGGCTGCAGTGAGCTGATTGCGCCACCAT +ACTCCACCCTGGGCAACACAGAGAGCCTGTCTCAAAAACAAAAGAAAGGTGAACATGAGT +TTTTTTGTTTTTTTTTTGACATAGTTTTGCCCTGTCCCACAGGCGTGGGCCACCACGCCC +AGCTAATTTTTGTATTTTTAGTGGAGATGGGGTTTCGCCATGTTGGCCAGGCTGGTCTTG +AACTCCTGGCCTCTCAAGTGATCCACCCACCTCGGTCTCCCAAAGTTTTGGGATTACAGA +CATGAGCCATTGAGCCTGGCCAAACATGACCTTTCTCTTTTTTTTTTTTTTTTTTTTTTT +GAGACGGAGTTTCACTCTTGTTGCCCAGGCTGGAGTACAGTGGCACGATCTCAGCTCACT +GCAAACTCCACCTCCCGGGTTCAAGTGATTCTCCTGCCTCAGCCTCCTGAGTAGCAGAGA +TTACAGGCATGCACCACCACGCCTGGCTAATTTTGTATTTTTAGTAGAGACGGGGTTTCT +CCATGTTGGTCAGGGTGGTCTCAAACTCCCCACCTCAGGTGATCCACCTATCTCAGCCTC +CCGAAGTGTTGGGATTACAGGTGTGAGCCCCTGCACCCGGCCACGTGACCCTTCTTAATA +GCTATTGTTCGCTGAGTATTTCTCTACGACACTCTTTATGTGCATTTTGTCATTTAATTT +TAATTTTAATTTTCTTTTTTTTTTTGAGACAGGGTCTTACTCTTTCACCCAGGCTGTAGT +GTAGTGGCACGATTTTGGCTCGCTGCAGCTTCCGCCTCCTGGGCTCAAGAAATCCTCCCA +CCTCAGCCCCCGAGTTTCTGGGACTACAGGCATGCACCACTATGACTGGCTAAGTTTTTG +TATTTTTTGTAGAGATGGGGTTTCACTATATTGCCCAGGCCAATCTTGAATCTTGAATTC +CTGGGCTCAAGCATGAGCCACCACACTCGGCCTCATTTAATTTTTAATTACTTTAAATTT +TATCAACATAATATATCACATAAACTGTAAAAAGACAAGAAACTACTGTGGTTCCCATTC +TGCCAGTTCTCCCCAGAGGTAGCAACTGCCTTTACATCTGAGCTTTCTTCCGGCATATAC +CTCTGATTCTAGGTAATAGGCACATGCTGCTTTTGCAACTTGGCGTTTTACAAATTATGT +TGGCTTGCTCTGTAGCTCCCTTCCATCGCCATTATCTCTGCTTCTCCTCCCTCATTCTTC +CAGTATTCTACTTTCTGCATAAATCGGTAATGAATGTTTACATTTTCTGCCTACACAAAT +GATATTCACAGCTGAGCATAGTGGTGTACTGCAATTGCTTGTTATTTTTTTCCTAGGAAA +GAATTGCCTTTTCAACACTATGCCCAGTTTTCTGTGTATCTATTCTTATTCTTCCTTTAT +TTTCCGGTTGCCTCTCAGTACAGTTATCTTCAGGCTCAGTGCTAGTGAGCTGTCAGTTTC +AGTTCAAACCCCTGGCCTCAAGTGATCCATCCACCTCGGCCTCCCAAAGGAGGTGGGCGG +TGACCCTCTTGGACCTCCCTTCTGGAACCCTCTAGGCAGTGGCCGCAGGTGTGGTGTGAC +AGCCCTCAGCGTGCTTGTCTGCCCAGACTGGGACTGTTGTCTTCCTTTGTTGGGTCCCTT +GTTTTCTATATCCTGTGTCTCATCCTTTGTGATTTATTCCCTCATTCTGCCAGCTTCCTG +AGAAGGGACTCCTGGGAATTAACTTTCTGGAGACCATGCCAGTTTGAGTGCATCTAGGCC +CCCTGGTGGGACATGCTAACTGGGGAATTTAAAATGATGTTACACTGCGCTGCCCTTGTG +CTGGCTTGTGCTTCTGTCTCCTGAGGTGCTAGCTCTCAGCCTCATATGACTTGTCTTGAT +CCCTAAACCCAAGTCCTGCTCTACTTGGGAAGTTCCTTTGAGTCATTTCAGGGGAATCTC +ACTCCCTACACTTCCTGTGCTGGAGTGTTTAATGTTCACTCTTTGGGCCTCTAGTTTTAT +CTTTTATCTCCTGTTTTCTATCTCTTTTACTTTTAGCTCTCCTCTCTAAGTGATTTCCTC +AATTTTTTCTTTGACCTTTCACTGCTAACATGCTTTTATTTCCAAGAACTCCTTCTTTTT +CCTTTTTTGTAATCTGACTTTTCCCTTTTTATAGCACCCATTCCTGTTAAATGGATACAA +TGTAGTATAATATTTATATATATTATAATAAATTCTACCGTTTTTTTTGATAAATTTTCA +CTTTATTGCCCAGGCTGGAATGCAGTGATGTGTTCATGGCTCACTATGGCCTCGAACTCC +TGGGCTCAAGTGATCCTCCCACCTCAGCCTCCCAAGTAGCTGGGACTGCAGGCATGCACC +ACCACTCCTGGCAAATATTTTTGTTTTTCGTAGAGATGGGATCTTGTGATGTTGCCCAGG +CTGGTCTCAAACTCCTGACTTCAGCTGGACGCAGTGGCTCACACCTGTAATCCTAGCACT +TTTGGGAGGCCGAGGGCAGGCGGATCACAAGGTCAGGAGTTCGAGACTAGCCTGGTCAAT +ATGGTGAAACCCCATCTCTACTAAAAATCAAAAATTAGCTGGGCATGGTGGCGCGCGCCT +GTAGTCCCAGCTACTCGGGAGGCTGAGGCAGAAGAATCACTTGAACGTGGGAGGCAGAGG +TTGCAGTGAGCCGAGATCGTGCCACTGGACTCCAGCCTGGGCGACAGAGCTAGACTCCGT +CTCAAAAAAAAAAAAAAAAAAAATCAAACTCCTGACCTCAAGTCATCCTCTCGCCTCAGC +CTCTGAAAGTACTGAGATTACAGGTGCGAACCACTGCACCTGGCCTAATAAATTCTACAT +TTCCTTCTTCTTGCTGAATTTCTGTTACCGCTAAATTACTCTTTTCTTGTGGGGTCTTTA +TTGGCCATGTTATGGCGTCTGGCTGCCCTTGGCTGGTTGCTCACTCTGAGAGGGACCCTC +CAAAGCCTAGGAGTCTCTTCCAGCACGTGGAGCTTGTCGACTGGGAACTTCATTGAAGGC +TCATCTGGCTGCAGATTCTGAGTCCTTCCTCCAGAACAATGGTCCCCAACCTTTTTAGCA +CCAGGGACCGGTTTCACAGAAGACAATTTTTCTACAGACCGGCAGATGAGGATGGTTTCA +GGATGAAATTGTTCCACCTCAGATCATCAGGCATTAGATTCTCATAAGGAGCATGCGACC +TGGATCCCTCACATGCACAGTTTACAGTGGGGTTCGTGCTCCTATGAGAATCTGATGCCA +CCACTTATCTGACAGGAGGCAGAGCTCAGGCAGTAATGCTTGCACACCCTCTGCTCACCT +ACTGCTGTGAGGCCTGGTTCCTAACAGCCCACAGACCTGTACTGGTCTGTGGCCCAGGGC +TTGGGGACCCTGCCCTAGAGCATGGTCTTCCAGACACTGGGCTGTGGGTGAAGACCTGGC +TGCCAGCATGCTGCTCACTGCGATGGAGGAGGCTGATGGGCCAGTGGGTCTCGGCAGTTA +GCGTGCATCGATCTAGTCACTTAGACCAGAGCACAGATGACCACACAACCCACCTCTCTC +AGTGTCCTACCTGTGCCTTTTCAACTGGGTGGGTTGGCCCCCTAGTCCAGTGACTATCCG +TTTCATGCTTTTCCAGAGAGTAAACCTCCTGTGGTCACCTGGTGCTGGAGTAGCTACCCA +CAGTGTAAAGTTGGGGAGTGAGGGATGCAGGGATATCTTTCACCCTCTGTCCCAGAGCAC +TTGGTGCAGCCTGGCCTCAACCTGGCAAAGACCTAGGGCTGTCACTGGGTGGGCGCACCC +CCTCCTCACTGTTACTTAGGACTCTGGTTTCTTGGTCCTCTTGCTGTGTACGTTACCACT +GGGCCATCAGCTTTCCAGCTTCTAAAAGGCTGTTGAGGCCACCATTTCCATTCTTCCTGG +TTTTTTTTTCTGTGGTTTTAGTCAGGTTTTAAGATGGAAGGGAATTAAATGTGCAGGCTT +TGTTCCTCATTTCAACTTGGAGGTTCATATCTGTTATTTATTTATTTATTTAGAGACAGA +GTCTCACTCTGTTACCCAGGCTGGAGTGCAGTGGTGCTACCTTGGCTCACTGCAACCTCC +ACCTCCCTGGTTCAAGCGATTCTCTTGCCTCAGCTTCCCAAGTAGCTGGGATTATGGCCG +CACACCACCATGCCCAGCTAATTTTTATATTTACGTATATATTTTTTTGAGACAGAGTTT +TACTCTTGTCACCCAGGCTGGAGTGCAATGGCTTGATCTTGGCTCACTGCAACCTTCACC +TCCCAGGTTCAAGTGATTCTCCTGCCTCGGCCTCCCGAGTAACTGGGATTATAATGCACG +CCACCATGCCTGGCTAATTTTTGTATTTTTATTTTTTATTTTTGAGACAGAGTTTCACTC +TTGTTGCTCAGGCTGGAGTGCAGTGGTGGGATCTTGGCTCATTGCAACCTTTGCCTCCCG +GGTTCAAGCCATCCTCCTACCTCAGCCTCCCGAGTAGCTGGGATTGCAGCGCCCACCACC +TTGCCCTTTTGTATTTTTAGTAGGGATGGGGTTTCACCATGTTGGCCAGTCTGGTCTCGA +ACTCCGGACCTCAGGTGATCCACCTGCCTTGGCCTCCCAAAGTGCTGGGATTACAGGCGT +GAGCCACTGTGCTCAGCTAATTTTTGTATTTTTAGTAGAGACGTTTCACCATGTTGGCCA +GGCTGGTCTCGAACTCCTGACCTCAAGTGATCCACCTACCTCCGCCTCCCAGAGTGCTGG +GATTACAGGCGTGAACCACCACACCTAGTCCAAATCTTTTTTTTAATATGGGAAAATCTT +TTTTCCTTTATAGTGTCTGCTTCTGGTGACATGCTTAGGAAACCATCATCCTGATATTGT +ATAACTCTCCTTCTGTTGTCTTGTGGTCCTCTGTGATTTAGGTTTTTAAACACATAGGGT +AAAGGGTCCCACATACAAGCTGTGGGCTCCTGTCCCAGGTGGGCAAGCAGGCTGCACACA +CAGGCAGCAATTTACTGGGAGGCTTCTTGGGTGGGGTGAAGTGGAGCTGCCAAAAATTTA +TTCTGCCCTATGAGAGTTCACTTGATTGATAGATGTTGTGTTTCTCAAAATTAGAAAATT +AGAAGAAACTCACAGAAGTTTCCTTCGAGCAGAACATAAATCGGCTGAGAGGAATCATCC +TGCCCCCCTGAGGTCAGGAGGGTGAATCTGTGCCCCAGGGAGGAACGCACCTTCCTGCTT +TAGACACCAGCAGAGGACAGAGAAGCTGGCTGTCCTGGGAATTCATGCCCTCATTCAGCA +ATGGGAAAGCAGTTGGACCAGGGAAGGGGGCCTGGCAGTGTGGCAGGGGGAGCAGTGGTA +AAGAGGGTGGGGTCAGGGGAGAGGCCTCACTGAGAGGGGAGAGGGTCCTTGGTGGGAACC +AGGGCGGTTGTGGAAGGAAGAGTCAGGGCGTGGAGCAGAGCAGAGCGGGAGTTGGGGTGG +GGAGGCCAGTTCATGCCTCATCAACATGCCTGTGTGAGTGTCCTTCTTGCGGGCGGTGGA +GCTGGCCTCAGGGTACACTGCAGGTGACTCAGAGCTCCAATGGGAGGGTGGCTCTGCCTG +AGGGAGCTGCACATGCTCATTGGGCCATCGTCTGCTGAGGAGGGGGCAACAATAACCTCA +ACACATGGCGGAGGTGTTTTGGGATGCATCCTATTGCACATTTGGGCAGGTTGTTGCCTG +ACTTATTTCTTTAACGCATACCTAGCACTTCCTGTATGTCCAGTGCCTTATAATATTAAC +TACTATCATCCTCAGGGCTACCCGTGAGGTCGGCACTGTCCCCACCCTTTGTGCAAAGGA +AGGATAGGGAGGCGCTGCCAGATGGGTTGAGGGCAGACCGGGGCAGCTTGCCTCAGGCCT +CCCTGGCCTTAGCAGGCTTTAAGTTTGATGCCCTGTGCTCCTTCCCTAGGAAGCTCAGTT +TGCCAGTTTTTTGCTTTATCTTAGTCTCATTCAGATTACAAGGACTGGGACTGGACTCCT +GTTGCCAGTTTCTCAATTTGGAATAGGAAGGAGGTGAGGAACAACAGGCCCATGCTGGGC +AGAAGCCCTGCAGACCAGGCCAACAGTGGGCCTCTCCTCACTGCCTGGCCCTGTCTGTGC +ACCTTCGTCCTGGCCACAGCCTCTCAGAGCCCCTCCTTTATGAAGAGGTTACGCTTTAGG +GGAGGGAGTGCCTCTCCCAAGACCCAGAGCTGGTGAGTGGCTAGGTTGGAATTTGAGCCC +AGGTCCCTCCTCCCCTGTCTGCTTCCTGCCAGGCCCAGCTGCCTGCCCCAGCACACGCCT +TCTCCCAGGCACCCTGGCCTCCACTGCCAGGACGTGCCTGTTTGAGGCCTGGATTCCCCC +ATGGCTGCCACGCTGATTCTCCGTTCTGCACGTCGTGTCCCATCAAAATGCATGACAGTA +CCACTCCTTGTGTTAAAAAGAGCCAGTTATTGTGCCTGGCAGTGATGACTGCCCACTGCA +CACAAAAGTGGACGCCGTTGTCTCTGCTCTCAGGGTGTTCTTAGTATAGTGGGGCTGGCA +CACAGAAACCAGAGATTCCTCCAGGAAGTGCCACAGACATGGGTGAGGTGGCTACAGGGT +TTCTGTGCAGCCGACAGCAGGGGTGCCTGGGTGGAAGCAGGGTGGGAGGCCCTAACAGGC +CTGATGGGATAGTCAGGCAATGTGCCAGGTAGTAGGTCATCATCCCTGGGGCCCTGGGGA +GAGTGTGTGGGGCAGGCGCAGGAGCAGGCTCCTCAGTGGGCTCCTGGACAGGCTGCCTGC +TCTGCTGGTGAGGAGCGAGGCTCAAAGCAAGCAGGGGCCTGGGTTGCTTCCCACACTCAG +TGGCAGTTGCTCACTGGCAGTCATGTGACCTGGGGAGCTGGCGAGGGCGTGGGGGTAGGA +ATGGGAACGGGAGACAGAGGCCCTTTGGGAGGAATTGCCACAGATGCAGTGGGAGTTGGG +GCAAAGATTTGAACTTTGGAGAATTAAAGGTGGCAGTCAGAAAGTGAAGGGAGTTCCCAA +GTTTCTGGGATGGGCACTTGGTGCTGGGAGATGAAGACACGAAGAGCTTTTCTTGATGTA +TCCCTGCTGGCTGCTCAGGCAGGGTCGGGCAATGCCATTAAGACACACTGTGTTCTGGGA +ACAGTTCTGGACAGTAAGGACATCCTCAGACCCCTTAGTGGGGGAGCTCCAAGGGCCTGT +AATGATGAATCGGTCCCCTGGGAGGCTTCAGTCTAAGCTAGATACCAGTTGAACCCTGAC +TGAGTACACAGACATAGCTATGGCAACCAGCTAGACCATGGCCAAAGGCAGAGCATTGGA +GCCCAGTTCACAGCAAGGGACTGTGTGCTCTGAGCCGGTAACAGTCCTTGTGTTCAGAGG +TCTGGCTGGTCGCAACCAGGGAGTGGCCCCCAGCCCCCTTCCTCAGCTAGAGCGAGCACC +ACCCAGGCAGCCTTGGGTTCTTTGGTGGCCGCCTCATAGGCCTGTACTGTTTGTGAACCT +GTTGCTTATGTTGGACAGGACTGTGGCAGGACAGCCATCAAGGGTCTTCCCTTGTGAGCT +GACACCAGTCTGCTCTTTGTCCTCCTCTGGCTGGACAGGCCCTCGTCTTCCCGGCTTGCT +TGTGTGGGGGAAGAGCTCACTGTGACATGTGGGTTCTCTGTTCTTTGTCTCAGGCTGTGC +TCCTTACTCCATGTTGGGGGCTGTCTCCTTGTGTGATAGTTTCCTTTCATTTCGTGACAG +TGGCTCGAAGGGTTTTGTTTTCAGGAGACAGAGACTGAGGAGTAGCCATTCGGGTCTGGC +TGCTCAGGAAGGCATGCTGGGGATGGCAGCAGAGCCCAGCTCAGGGAGATCAGAAGGGGG +CTTTGGGAGCAGCCAGTGCTGTTCTGAAGGTGGGCCTGGGTCCTGCCTGCAGCCTCCCGC +AGCGGGTGTGTTAGGTCTAAGGGAGGAAGGAAGGGGACTTGACCTCAGCAGGAGTGTGAA +ATGGGTACCCTCCTGGAGTCAATGTCCAGGCAGGGGATGCCTCTGAAAAGCGCCAGGCCA +GCTGGAGAGGAAAGGGCAGGCATCTGCAGGGAAGTGTGGGGGATTAGCTGCAGAGTTCCT +GGGTTCTTTTTCGCTTTGTCTCTATTTTGTGACACTTGATCTTCAAACTAGAAACAGGAG +CAAAGACATTTTCAAAAATACAGTTGTCAGAACTGCAAAACCACACAGGGTCTGTTACTG +GCCACCAAACCTGCTGGGCCTCCCTGGGTGGATGCTGTGGGAGAGATGTGGGAGTGACAT +GGCACAGTGCTGCTGTTCTGAGCGGTAGCCTTGCTGGAGGAGCGCCTTGCCGGAGGAGCA +CCTTGCTCCTTGGCCCCACTCTGACCCTCCGAAGCTCATGTGGAACTGGACATTTACCCC +AACTCGGTTTCCTGGTGTTGGCCTGTCAAGACTTGCCTGTTTCTGGCCTCCAGCTTGGTC +CATGCTCCTCAGGCTCTGAGTAGCTGACGGTGCCCACAGTGTCCATGTCATCTGTGACAT +TACCCAGGACAGGACAGGGCCCGTCTGCCAGTACATCCCCTGGTGTATGGTGCCACCTTG +CCTTCAGCTCACCAAGCATATTTCTTCAACCTGGTCATCCCACCACCTCAGAGCTATTGC +ACTAAGCTGTGTTTCTAGATCTTGTCTCTCTGACTGTGCCAGGTGTCTCTGCTGCACCAA +GACAGTGTCTGTCTGGGGCCTGCCAGTCTGCTGACACTCTCAAAAACGGCCAGTAGCAGC +TCAAAAGGACTGCTTTTGTATCAAAGCCTGGCTTTGCTCTCTGATCGCTGCTGGTGGGCA +CAGAGCCCATCTGTGTAACTGGTTGTCACTCCCCAGACTCTTCTGCACCCTTCTTCCTCT +CTGACTCCTGGCCACATCTGCCACCCACAGCCTCCCCACTCCTTACACAGCCAGCCTCCA +GGCCGTCCTGCTTGGCCTGCAAATGCTCCCCTGCTGTTTCTACTGCTCTTCCCTGAGTCT +CCCTATTGGTCCCAGTGCTTTGGGAGGCCAAGGTGGAGGATCACTTAAGGCCAGGAGTTT +GAGACCAGCCTGGGCAACAAAGCAAGACCCTGTCTCTACAAAAAAAGTAAAAAATAAAAA +AATTAGCCAGGCGTGGTGGCGCATGCCTGTAGTCCCAGCTATTTGGGAGGCTGAGGTGAG +AGGATACTTGAGCCCAGGAGTTTGAAGCTGCAGTGAGCTGTGTTTGCCCCACTGCGCTGC +ACCTGTGAGAGAGTGAGACATTGTCTCAAAAAACAAAAGAAGAAAAAGAAAAAAGGAAGG +TGGGAAGAACAAGTATGCCTTTGAAGGGCATACCTGCCTTGAGGGGCACAGATGAGGCAA +CCTTCAGAAAACAGGCCTGTGCAGCCACTTCTATCAGAAATGCTCACTGACTTCTTGGGG +CCTAGCAGCTATCGTCTCAGCCTCTGGGTGTGGAGACCCTGCCCCATGCTAACCTGAGAG +CATGAGTACAGGGTCCTCCCAGCCCACCCATGCCTCGCTTCCCCAGGACTCTGACCCAGT +GCCGCGCGTGAGTTGGTTGTCATTATCCTCCTGGATTGTTAACTCTCTCATCTTTTGTGG +CCCTGGTGACCTTTCTTACTTGTCTGTTCCACACCTGGATGCCTTGTGAGCAGAAACCCC +TTGCTGCCTCCTCGCCTCCCTTCAGGCTTCAGGAAGCAGACAGGGAGGTAGGAACTCCAA +GAGTGCAGGAGTCTCATTCTGTCGCCCAGGCTGGAGTGCAGTAGTGCCATCTTGGCTCAC +TGCAACCTCTGCCTCCAGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGG +ACCACAGGTGCCTGCCACCACACCCGACTAATTTTTGTATTTTTAGTGGAGACAGGGTTT +CACCACGTTAGCCAGGCTGGTCTCAAACTCCTGACCTCAGGTGATCCACCTGCCTCGGCC +TCCCAAAGTGCTGGGATTACAGACCTGAGCCATCGTGCCCAGCCCAAGAGTGCTTTTTGA +ATCTAGCAGGGTGGTAGGTCAATGTCATTTTGTTTTTCTTTTTACATTAAGACAGATGTA +TGCACCTCTCAGCATTAAACCTCCTTTCTGTGATGTCTTTAAGAATCACAGTCATATTGC +TGTAGCCCCGTCTGTACATTCCTTCAGTTTCCTGAGGTTGGCTGGTTGCTGTTTGGAGTG +GTCAGAATGCTTTCTAGCACGAGCTGGACTGGGCCTATTCAATTCACTCAGAGGCAAGGC +CCCAGAGCCCTGACATGTTTGCTTGTTTCTTTTTTCTTTTGCTTTCTTTCTTTTTTTTTT +TTTTGAGACGGAGTCTCGCACTGTTGCCCAGGCTGAAGTGCAGTGGCGCAATCTCCGCTC +ACTGCCAGCTCCGCCTCCCGGGTTCATGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTG +GGACTACAGGCGCCTGCCACCATGCCCAGCTAATTTTTTGTATTTTTAGTAGAGACAGGG +TTTCACTGTGTTAGCCAGGATGGTCGTGCTCGCCTGACCTCGTGATCTGCCCGCCTTGGC +CTCCCAAAGTGCTGGGACTACAGGCGTGAGCCACCGTGCTCAGCCTGCTTCTTTCTTTCT +TTTTTTTTTTTTTTTTTTTTTTGAGATGGAGTTTCGCTCTGTCACCCAGGCTGGAGTGCA +GTGGCGCAATCTCGGCTCACTGCAACCTCCACCTCCCGGTTCAAGCGATTCTCCTGCCTC +AGCCTCCTTTGTAGCTGAGATTACAGGCATGCGCAACCACACCCTGCTAATTTTGTATTT +TTAGTAGAGACAGGGTTTCACCATGTTGGTCAGGTTGGTCTTGAACTCCTGACCTCAGAT +GATCCACCCGCCTCGGCCTCCCAAAGTGTTGGGATTACAGGCGTGTGGCACTGCACCTGG +CCACTTGTTTCTTTATCAGCAAGCCCAGGGTGGGAGGCCAGGCAAGGAAGAGACCCATCC +CACTGGCAAGAAGGCAGGCAGTGAGCACACCAGAGCAGTCCACCCCTGGGAGCCACAGGC +CAGGCCTTTCTAACAGATTCACACTTGTTTTTGTCTTTCTGTGTGTGAGTGTGGGAGGTG +GGGGTGGGGGTGGGTGGGGCAGCATGTGCTGCATTCGAGGGGTCTCCACCTTAACTGATG +TACATGGCTCTTTATCCCCGTGGAGCAGCAGTGTGATGAAAAACTCTTCATAGGATTCTA +TCTCTTAGTACAAAATTGTTGGTAAATGGGTTTTTAAAGATATTTTTCACATTTATGTGG +TCTTTCTCTTCTTGTCTCTAGCTAGAAGTGAAGTTGGAGCTTGTCTCTTTGATCCCACAT +ATAGGACCTGCATTGTCCCACATAGGGACAGACCCTTGCCTTGGGAGGGACTGAGAGATG +ACTTCATGTGGCCCCTTCATTTTATAGGAGTAATCGAGTCTAGAACCAGGGTGCTACCAT +CTGGCAGAGTTCATGGGGAACGCAGGCCACTCAGAGAGCAGGAAATAGAGGCCAGCTGGC +CACCCAGTAGTCTAATGCGTACTTCCTTGATCCTGAACTGGTTGTGAGCCATCCCTAGGG +GCAACCCCTCCCCCGCCCCGTCCCTTCATGCCAGGGCTCTCTCTTTGCTTCATGGCGACC +ACCTTTCCTGTCCCTTTTTTCTGATCACCCTGCTCAGTGGCCTGTCATTGCTCTCTGTTG +AGTGCCCCTTCCCATATGCCTCTCCTGGACCTCAGCCAGGCTGGTGTGCTGGCGATAGCA +CCTTGTCACCAGGGGCAGCTGCTATGCAGGGCCTGTTGGCCAGGGCCCTGCAGCGGTGGA +GTCACTGGTGTGTGCAAACGTCTCTTCTCCTTTTGTGTTTCAGCGAGGATGCCATGAGGA +AAGCTGGTGTGGCACACAGTAAATCCAGCAAGGATATGGAGAGCCATGTTTTCCTGAAGG +CCAAGACCCGGGTAAGGCCCTAGTAAGTGGAGCCACTCTGGCAGAGAGACTTGGAGAGGA +GAGCATCACAGACTCTGGAACCCCTCTGTTGGGTGTCCTAGGGTGTAGGGGTGGGGTGGT +TGCTCATCGATTGATCACAGGCACAGTTGGTTTTGCCTGCAGCAGTCTGGGTCCTCCTGT +GTGTCCATGGTACAGGGAGGGCACTTGGGTTGCAGGCCCAGCCTCTGGCCCCAGACCTCA +CTGCCGTCACCTTCCATCTGTGGTCAGCCCCTGTGAATCCTACAGGAAGCTTGCTCAGGC +CCCTAGCTGGTCCTGTGGAAGGGATCAATGCCGTGAGCAAGAACCTGGTTTGTCCTCAGC +CCCTCCTAGGGGTCCTTACCTAAGCCCTACAGGGCCCTGAGGCATGGGCCCAGAAGCATT +CCTGTCCCCTTTTGCTAAGTGAAGAAGGTGATGCTTATCTAGAACTCGCCACAGGTGGAG +TGGAGAAGAAGGATTTCCACTATATGTGGGCTTTCTCCCTGGGTGGGGCACCACACTGAG +GGCTTTATGTGACCACCTCCTTTAATCTGTGTGTGCTCCCTGGGACAGAACCGTTTTACT +AGAAATGAACAAAACACATATTCACTTGTGTGCACGTGTGTGTGCAAGGCAGAGCTGCAA +GAGCACAGGGACTGAGGTGGGGTAGAGTGGGCTCTGTACTATGGACACGTGTGACTGAAG +TGGGATAGAGTGGGCTCTGTATTATGGATACGTGTCACTGAGGTGGGGTAGAGTGGGCTC +TGTACTACGGATACGTTTGATAAGCTTTTTCTTTTTCTTTAAGAGCTTTATACGGTTCAC +CCTTTAAAGTTTACATTTCAGTGGTCTTTAGGATATTCACAGAGTTGTGTGGCCATCACC +AAAATCTAATTTTGGAACATTCATTAATAAATAAAAAGAGGTGCAGCTTCCCTGTGTTGC +CCAGGCTGGTTTCAAACTTTTGGCCCTACGCCATCCTCTTGCCTAGGCCTCCCAAAGTGC +TGGGGTCACAGGTGCGAGCCACCAAGCCTGGCCTAATTTTAGAACTTTTTTTTTTGAGAC +AGGGTCTCACTCTGTACCCAGGCTGGAGTGCAGTGGCGTGATCTCAGCCTGCTGCAGCCT +CAATCTCCCGGGCTTAGGCGATCCTCCTTTCTCAGTCTTCTGAGTAGCTGGGACTACAGG +CATGTGCCACCACACCCGGCTAAGTTTTTTATTTTTTGTGGAGACGAGGTCCCACTATGT +TGCCCAGGCTAGTCTTGAACTCCTGATCTCAAGCTATCCCCACGCCTTGCCCTCCCTAAA +TGCTGGGATTACAAGTGTGAGCCACCATGCCCAGCCTTCAACATTTTTTAAAGAAACTTT +ATACCTGTTAGCAGTCACACTCCCTTTTCCTTCAACCCTCAGCTACCACTTATACTTTCT +ACTGCTGTAGATTTGCCTATTCGGAATTTTCCATATAAATATAATTGTACAACATATGGG +TTTTTTTTGTTTGTGCGTGTGTGTGTGTGACAGTTTTCCTCTTGTGCCCAGGTTGGAGTG +CAGTGGCACAATCTCAGCTCACTGCAACCTCCGCCTACCAGGTTCAAGCGATTCTTCCGC +CTCAGCCTCCCAAGTAGCCAGGATTACAGGTGCGCACCACCACACCCGACTAATTTTTTG +TATTTTTAGTAGAGACTGGGTTTCATCATGTTGGCCAGGCTGGTCTCAAACTCCTGAGCT +CAGGTGATCCGCCCGCCTCAGCCTCCCAAAGTGCAGGGATTACAGGCATGAGCCACCGCA +CCCAGCCACGTACAATATATGTTTTTATTTTGCAGCTGGCTTATTTCACTTAGCATAATA +TTTTCAAGGTTCAAGGTCCATGAACCATGTTTGGTATATACTTTATTCCTCTTTTGTTTT +TTTTAGATGGAGTCTCGCTCTGTCACCCAGGCTGGAGTGCAATGGCACGATCTCGGCTCA +CTGCAACCTCCACCCACTGGGTTCAAGCAAGTCTCCTGTCTCAGCCTCCTTAGTAGCTGG +GATTACAGGTGTGTGCCACCATGCCTGGCTAATTTTCGTATTTTTAGTAGAGATGGGTTT +TGCATTGGTCTGGGGCTATGTTGGCCAGGCTGGTCTCAAACTCCTGACCTCAGGTGATCC +ACCTGCCTTGGCCTCCCAAAGTACTGGGATTACAGACGTGAGCCACCGTGCCTGGCCAAC +TTTATTCCTTTTTATTGTCAAATATTCCATTTCTGGATGGCCATATCACATATTTTATAG +CCATTCGTTAGTTGGTGGACATTTACATAGTTTCTACTTTTTGGTTATTATGAGTAAGCA +GAACAATAGTTAATGTATAAATTTTTGTGTGGGCTGTGTTTTCATTTCTCTTGGGTATAT +ACCTAGGGGTAGAGTTGCTGGGTCATACCCTACCTCTGTATTTGACCTTTTGAGGAATTG +CCAGATGTTTCCCAAAGCAACTGTACTATTTTAAATTCTCACCAGCAGCTTAAATGAGGT +TTCAATTTCTCCTCATCCTCTCCAATACTTGTTATTGTCTATTTTAATTGTAGCCATTAT +AATGGGTGTGATTTGCATTGTTTTGATCTGCATTTGCCTAGTGACTAATGATGTTGAGCA +TCTTTTTCATGTGCTTTTTGGCCATTTGTATATTTTCTTTGGAGTAATGTCTATTCAGAT +CCTCTGTCCATTTTTAAATTGGGTTGTTAGTCTTTTTATTGTTGGGTTGTAATAGTTCTT +TATGCATTTGGATATAAGTCTGTTATTAAATATATAATTTGCAAATATATAGGGGGGATT +TTATTTATTTATTTTTATTTTTGTTTTTTTGAGACAGTCTCGCTCTGTCGGCCAGGCTGG +AGTGCAGTGGAAGTGGAGGGATGCAGGGGGTGTTAGTGATGCATTGTAATCATCCCGGAG +AGCAGGACACCTCTCATAAGAGGTTTACCCAAGGGGGGCTGTGGAAAGGGCTAGAAATTG +CTCCTGGAGGACGTGGAAAGCCACATAAGGTTTTTGACAGATTAGGGAAAAGGCTGTGCC +ACTCTTTCTGAAGATGACAGTGTAGGCAGCCTTTCCATTTTGTGCCAAAAATAAAATTTT +AAATACATAAATCACAGTAGTTTCTAGGACAACTGGGTAACTACATACAAAATATTGAGG +TTGAGCCCCAACTTCATACCCTATACAAAACTTAACTCAAAATAAAAAACACCAGCCAGG +CATGGTGGCGCGTGCCTGTAGTCCTGCCTAATCGACAGGCTGAGGCTGGAGGGTCACTTG +ATCCCAGGAGTTTGAGATTACAGTAAGCTATGATTGTACCAACATGTTCCAGCCTGGGTG +ACAGAGTGAGACTCTGTCTCTAAAAAACTTTTTAAAAAATTAACTCAAAATGGGTCAAAA +ACCTAAATTTACAAGCTAAAACTATAAATATCTTAGAAGAAAGCAGAGGCGTACATCTTC +ATGACCTTGGATTAGGCAGTAGTTTCTTAGATATGATGCCAAAAAAGCACAAGCTACAAA +AAACGTAGATAAATTTGACTTCATCAAAATTAAAAACTTGTACATCATAGGACACTGTCA +TAAGAGGGAAAAACAACCCATGGGATGGGAGAAAATATTTGCAAATCACATATCAGGTAA +GGGTTTAATATCCAGAATACATAAAGAACTCCTACAACTCAACAACAACAACAACAACAA +CAACAAAAAGCAACTAAAAAATAGGCAAAATACAGCCAGGCGCGGTGGCTCACACCTGTA +ATCCCAGCACTTTGGGAGGCCGAGGTGGGCAGATCACGAGGTCAGGAGATTGAGACCATC +CTGGCTAACATGGTGAAACCCTGTCTCTACTTAAAATACAAAAAAAGGAAAAAAAATTAG +CCAGGCGTGGTGGTGGGCGCCTGTAGTCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATG +GCGTGAACCCGGGAGGTGGAGCTTGCAGTGAGCCAAGATCATGCCACTGCACTCCAGACT +GGGTGACAGAGTGAGACTCCGTCTCAAAAAATAAATACATAAATAAAATAAATAAAAATA +AATGAAATAAAAAATAGGCAAAATACTTGAATAAACCTATCTCCAAAGAAGAGATACAAA +TGGCCAACAACCATGTGAAAAGATGCTCAACATCACTAGTCATTAGGGGATTGCAAATAA +AAACCACAATGAGATGCCACTTCATTTCCATTATAATGGCTATGATTTTTAAAAAAGACA +ATAATAAGCGTTGGCAAAGATGTAGAGAAATTGGAATTCTCTTTCTTTTTGAGACAGTCT +CGCTCTGTCGCCCAGGCTAGAGTGCAGTGGCACAATCTCGTCTCACTACAGCCTTCGCCT +CCCAAATTCCGGTGATTTTCCTGCCTCAGCCTCCTGGGTAGCTGGGACTATAGGCACACA +CCATCACACCCAGCTAATTTTTTTTTTTTTTTTTTGAGATGGAGTCTCGCTCTGTCGCCA +GGCTAGAGTGCAGTGGCGGGATCTCGGCTCAACGCAACCTCCGCCTCCCAGGTTCAAACA +ATTACCCTGCCTCAGCCTCCCAAGTAGCTGGGACTACAGGCATGTGCCACCATGCCCAGC +TAATTTTTGTATTTTTAGTAGAGACAGGGTTTCACCATGTTGGCCAGGATGGTCTCGATC +TCTTGACTTTGTGATCCACCCACCTAGGCCTCCCAAAGTGCTGGGATTACAGGTGTGAGC +CGCTGCACCCAGCCAATGCCCGGCTAACTTTTGTATTTTTAGTAGAGACAGGGTCTCATC +ATGTTGGCCATGCTGGTCTTGAACTCCTGACCTCAGGTGGTCTACCCATCTTGGCCTCCC +CAAAGTGCTGGGATTACAGGGGTGAGCCACCGTGCCTGGCAGAGAAATTAGAATTCTCAT +ACATTGCTGGAGAGAATGTGAAATGGTGCTGTGGAAAACAGTATGACAGTTCATCAAAAA +ATTGAATGTAGAGTTACCATACGACCCAGAAGTTTCACTCCCAGGTATATATCCAAGAGA +AAAGAAAACATGTCCACACAAAAATATATACACAAATGTTTATAGCATCATTATAACATC +ATAAATAGCCAAAAATGGAAACAACCCAAATGTCTACCAGCTGATGAATGGATAAACAGA +ATGTAGAATATCCATACAATGAATATCATGTGGCCACACGAAGCAATGAAGGGCTGATAC +ATGCTACAACATGGATGAAGCTAAAAACATTACATAAATGAAGCAGTCACAAAGGATGAC +ATGTTGCATTTTTATAAAATGTCCAGAGAATACGAAGATCTATAGAGATAGTGTATTAGT +TTCCCATGGCTGCCATAACAAGTTACCATAAAGTTGGAGGCTCCTATCTCTGGGTCCATG +GCTCTGACTTCTGAGCCCATGTCTCTGCCCTCACTTTCTTAAAGGGTAGCACATGTTTGA +GGCTGCGTAGTTTTATCAGCCTATTCCTGCTTCTAAAATTTTGGGAGTTCTTCTTTGATT +TTGCCTAGTCTATAAACTTTCTACTCCAAGCTGGCAGTGTTTCTGCTGGTAGAACATTCT +CAAAATCCTTGTGGAACTTCTACATATATGATGGAATTCATAGTATTAGATGAGGTTTCT +CCACAGATCTTTCCTAGATAATTTCATCTCTCTTCCTCACTTCTGTGAGATGATTGAGGG +GATTCATGAGTCCCATGTTTAATCTCTTTGGCACTAGAAAAATTTGGAAGATATGCCACA +CCCTTGGCATTCTCTACAGAACACACTTTCCTAACAGTAAATCTCCTAGTTTTAGCATTT +TGGTTTTTTCCAATCTAAATAGCCTGAGAATTTCCCAAATCATTAAGTCTAGATTCCTTT +CTGCTTAACAGTTCTTCCCTCAATTTCTCTCTCTTCTTTGTGTGTGTGTGTGTGTGTGTG +TGTGTGTGTGTGTGTGTGTATTTTTAAAATTCTTTTTATTGTACCCTTAATTTATCTCTT +TCATGTTGCTTTTTTTTTTTTTTTTTTTTTTTTTTGGAGACGGAGTCTCACTCTCTCGCC +CAGGCTGGAGTGCAGTGGCGTGATCTCAGGTCATTGCAACCTCCGCCTCCTGGGTTCAGG +TGATTATCCTGCCTCAGCCTCCCAAGTGGCTGGGATTATAGGTGCCCACCACCACACCCA +GCTAATTTTTGTATTTTTAGTAGAGACAGGGTTTCACCATGTTGGCCAGGCTGGTCTTGA +GCTCCTGACCTCAAGTGATCTGCCTGCCTTGGCCTCCCAAAGTGCTGGGATTATAGACGT +GAGCCACCATGCCCGGCCTCCTCTTGCATTTTACTACAAGCAGAAAGAAGAAACCAAGCT +GAGTCCTTTTTTTTATTTTTTTGAGACTGAGTCTCACTCTGTCGCCCAGGCTGGAGTGCA +GTAGCGCGATCTCGGCTCATTGCAACCCCCACCTCCCAGGTTCAAGTGATTCTCCTGCCT +CAGCCTCCCAAGTAGCTGGGATTACAGGTGCCCACCACCACGCCCGGCTAATTTTTGTAT +TTTTAGTAAAGATGGGATTTCACCATGTTGGCCAGGCTGGTCTTGAACTCCTGACCTCAG +GTGATCTGCCTGCCTCAGCCTCCCAAAGTGCTGGGATTATAGGCATGAGTCACTGTGCCC +GGCCCAAGCTGAGTCTTGAATGCTTTGATTGGAAGTCTGCTCAGCTAAATGTGCAAACTT +ATTGCTTATGATTCTGCCTTCCACACAGCTACAGGACACAATCCAGCTAAACTTTTTGCC +ATTATATTACAAAGATCACCTTTGCTCTAGTTTCCAGTAACATGTTCCTTGTTTCCTTTG +TTTCTTTAGAATCACCTTTAATTTTCATGTTTCTAACAACATTCTGTTTTTGATAGTATA +TGTATTCTTGAAGATAATACAGACTTTCTGTATCAAGTTCGTCATTTCCTTTAAGGCAGA +GTCTGGGTGACAGTCTTTAACATCCGCATTTCTACCCACAGCCTGTTCAAGGAAACCTAG +GCCTTTTCTATCATGCTTCTCACAATTCTACTAGCCCTCCTTTGTTATTGAGTTACAAAG +CCACTGCCATATTTTTAGGCACCCTACTCCTGGTAACAAAATTCGTATGTTTCCTGTGGC +TGCTATAACACGTTACTACAAGCTTGGTGGCTTAAAAGAACAGAAATTTAGGCCGGGTGC +GGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCTGAGGCGGGCAGATCACAAGATC +AGGAGATCGCAACCATCCTGGCTAACATGGTGAAACCCCCGTCTCTACTAAAAATACAAA +ACAACTAGCCAGGCGTGGTGGCGGGCGCCTGGAGTCCCAGCTACTCGGGAGGCTGAGGCA +GGAGAATGGCTGAACCCGGGAGGTGGAGCTTGCAGTGAGCCGAGATCGCGGCACTGCACA +CCAGCCTGGGTGACAGAGTGAGACTCTGTCTCAAAAAAACAAACAAACAAACAAATTTAA +TCTCACAGTTTTCAAGGCCAGATGTTCAAAGTCAGTACCATGGGTAAAAATCGCAGTGGC +AGCAGGGAGGCTATAGGAGAGGAACCTGTTTCTTGCATCTTTTAGCTTTTGGTCACTGCT +GGTACTCCTTGGCTTGTGGCTGTATCACTCCAGTCTTCAAGTCCAGCATCACGAGGTAAA +TACATTCTCTGTATTCCATCTTCACGTCGCCTTCTCTGTGTGTGTGAACTTTCCTTCTGC +CTCCCTCTTAGGAGGATACATGCAATTGCACTTGGGGTCCACCCGCATAACCCAGGATAG +CCCTCCATCTCAAGATGTTTAACTTCATCATATCTGCAAAGACCTTTTTCTCAAATAAGG +TAACATTTACAGGTTCCAGGGAATAGGACCTGATTTAAGGACTGTTATTTAGCCTGCCAC +AGACAGAAAGTTGATTATTGATTGCCAAGGGTGCTGTAGGGAAATGGAGTTTTCATTTGG +AGTGAAGATGATGTTCTAAAATTAGGCCAGGCACGGTGGCCCATGCCTGTAATCCCAACA +CTTTGGAGGCCAAGGCCAGTGGATCACTTGAGGCCAAGAGTTTGAGACCAGCCTGGCCAA +CATGGTGAAATCCTGCCTCTACTAAAAATACAAAAATCAGTAGGGGGTGACAGCGCATAC +CTGTAGTCCCAGCTACTTGGGAGGGTGAGGCAGGAGAATCACTTGAACCCAGGAAGTGGA +GGTTGCAGTGAGCTGAGATGGTGCCACTTCAGCCTGTGTGACAGAGCGAGACTCCACCTC +AAAAAAAAAAAAAAAAAAAAGAAACAAAATGCTTTAAAATTGATTTTGGTAATGCTGCAG +AACTCTGAATGTAGTACCAAAAACCATTGAATGGCATGTTTTAAATGGGTGAATTGTATC +TCAGCCAGCCTATTACATATATTTTTAATGGAACGAGTGAAAATGATGAATAACAGGTCT +GTTTTCCCTTGGTGGCCAGGTCAGGGCCGTGTGGTTTCCTTGGGTGGGGGGCACTGCTGG +GGCTGCCTTGTCCCTACTTGGTTTCTGATGGTGTGTCCTGTGGGCTGGGGCCAAGGTCCC +ATGGGGCTATGGATACTATCATGGAACCACTCTTTCTGGAATTTGATGGTATTTCAAAAT +CTATTAGGAAAACTCTGAGACATGAAAGCACAGAGGCGTCATATTCCTCTAATCTGGAGT +CAGCAGTTGCTGTGGTTTGGTGACTTTTCCCTTGGCTGTCTCTGTGTCCCTCCTGAAGTG +TCGAGGGCACATCCCCACCCTCAGGGTTGTATCTCTGCTGAGCATCTCCACACTGGCCTG +TGCTAGGGCCCCACCTGACAATCACATGGAGTCACTATTGTCATCAATGGGAAGGCCTCT +TTCAAGTTTCAACGCTTTTTTCCTAATAGTTGGCTTCTTTGAATCAGGATCCAAACGAGG +GTCTTTTCTCCCCTCTGGCCCCTCCCAGCCTGGTGAAGGAGCAGCCCAGCTGTCCTGCAG +CCCTCCCCATGGGCTCTGATGTCTCCCAAGCTGCTGATTCATCCTGCCTGCCCCCTCCTT +CTTATGAACTGGGTGCTAAATCGGAGAGCAGAGAGGGTGCCGTGGTCTTTTAGCAAAAAT +ATATCCGAAGTATTGCCTTAATTTGTATTTCTGGGCATCTTGATATTCCCATAAGACCAC +AATTTGGAGGTTTGGAGAGCAACAACAGCCTGACCCTTGTAAAGCTCCTATTATCTTTCC +GATGGTTTCATCATTTAATTAGGGCTTGTAGACTAATTCTGGCTTTTCTCCCACATTCGT +TACATGGAGTTTTTTTTGTTTTTTTTTTTTTGTCAAGAAAGGTTTTCTTTCGTCAGCCAG +GGCTAGATGATTGTTTGAAACCCTGTGTGTTCAGGACCTCCAGGAATGCTGTTTTTCTGA +TCTTTCCAGTCTAAGGAGTCCCCCCAAGGGCCACAGATGGGTTCTCTTTGAGTGACATTT +TGAGCTCAGGGCTGTGTATCTGGGGTGTTCTGGTCCATTGCTGTCATTCATTTTGATGCT +TAGATGGTCCCATCCTCATCTAGCAGGAGCTCCTGTGTCTCTTGGGGCCCTCACACAACC +TCTTTAGTTTTTCTTAGTCTGCTGGCATTTAGACCCAGATTGGCTGCTCCTCTAGGGAGT +GAACCCTGGGGACTTCTGCCGGTAAAGTGGCATTTGGAGCATGCTACGTGGGCGCTGGTC +TGTTCATTGCTTTGGGCCTTTCAGTGAACAGAACTAAGAAAAAACATGTTTTTGAAATAA +CAGCAACAACAAAAACCCATAATTTTACACTAATATTTCTAATTCAAATTTAAACTTATA +TATAGGGTTTTTACTTCTTTCATTTTAAACATGTGTCTTTTCTGTTACTATGAATATCTT +GGTTTCTAATATTATTAGCATACTTGGTTGCTTTACTTTAAAATCATTTCAGAATAAACA +GTTTATATTACCACTGATAACAAAAGGTAGACTAAACATTTAAGATTACCTTGAAGTTCT +AATTGTCCTTAAAATGTATCCCTTTTATTGAGAACATTTTGCTTTTTGTTTCATTTTTTT +AAAAAGCCTCTCAAAAGATTTCTTTTCTCTATATGATTCTGCCACCAACTTAATATTCAG +CTCATTTGTTTTAGTTTATTTGGAATGTTTAGGAATTGGACCTTTTTCTTTAGATTTGCT +ATACTTTTCATTTTAGAAAAATATTTTAGGCTGGCTACAGTGACTCACACCTGTAATCCC +AACACTTTTGGAGGTTGGGGACTGAGCAAAAGACAAGCCGGCCAACATGGCAAAACCGTG +TCTCTCCAAAAAATACAAAAAATTGGCCAGGCGTGGTGGCTCACGCTTGTAATGTCAGCA +CTTTGGGAGGCTGAGGTGGGCGGATCAGAAGGTCAGGAGATCGAGACCATCCTGGCTAAC +ACAGTGAAACCCCGTCTCCACTAAAAAATACAAAAAATTAGCCAGACGTGGTGGCAGGCG +CCTGTAGTGCCAGCTACTTGGGAGGCTGAGGCAGGAGAATAGCTTGCACCACCCGGGAGG +CGGAGCTTGCAGTGAACTGAGATTGCACCACTGCACTCCAGCTTGGGCCACAGAGTGAGA +CTCCATCTAAAAAATAAAAATAAATAAATAAATAAATTAGCTGGACATGGTACTGCATGC +ATGTAGTCCCAGCTGCTCAGGAGGCTGAGGTGGGGAGGCTCACTTGAGCCCAGGAAGTTG +AAACTGCAGTGAGCGATGATCGCGTCACTGCACTCCAGCCAGGGCGACAGAGTGAGACCC +CATCTCAAAAAATATATATATTTTTTAGAGAAATAAAATTCACTGAGTTGCCCCTGAGTT +GCCCAGGCTGAACTCCTGGGTGTAAGCAATACTCCTACTTTATCCTTCCAAGTAGCTGGG +ATTACAAGACTGTGCCACTGTGCCTAGCTTTGATTTGTTTTTTTTGTTTTTTGTTTTTTG +TTTTGAGACAGAGTTTTCACTCTTGTTGCCCAGGCTGGAGTGCAGTGGCGTGATCGCGGC +TCACTGCAACCTCTGCCTCCTGGGTTCAAGCAATTCTCCTGCCTCAGCCTCCCAAGTAGC +TGGGATTACCGGCATGCGCCACCAGGCCTGGCTAATTTTTTTTGTATTTTTAGTAGAGAT +GGAGTTTCACCATGTTGGCCAGGCTGGTCTTGAATTCCTGTCCTCAGGTGATCCTCCTGT +CTTGGCCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCAAGGTGCCCGACTTGATTTGGC +TTTATCTTCTCAATAGTAAAAGATTTATGTGCTTCTGAAGTGAAAACCCATAACACTGTG +GATTTAGAGAGGGCACATTTCCAGCCCAGTTCCTGATACTTGCTGGGTTGTCGATTTTCA +ATTTTGGTGTATCTTTCCAGTGTTTCTCATGGCAGATTTAAGCAAATATGTGTGTTTACG +TTCCACAGGTCTTATATAAAAGGTGTATGTCATGCAGTGTATGTGGCACGGAGCACGTGT +TTCTATCCTTTCTCCTTCCTTGTGTTCACTCCACAATATGTTGTGAGGCACTTGTGCAAG +ATTTCCACATGTCAAGCAAGTGTGGAAATTCTATGAAGCTTTTAACTTTTTGAGGTGGGG +TCTCACTCTGTCGCCCAAGCTGGAGTGCAGTGGAGTGGTCACAGCTCATTGCAGCATTGA +TCTCCCAGGCTCAAGCCTCAGCCTCCCACTTTGAGGCCACTGGCGTGTGCTACCACACCA +GGGTAATGTTTGTTTGTTTGTTTGTTTTGTAGAGATGGTGTCTTACTATGTTGCCCAGGC +TGGTCTTAAACTCCTGGCCTCAAGGGACCCTCCTGCCTTGGCTTCCCAAAATGTTGGAAT +AATAGATGTGAGCCTCCACGATTGGCCTCTATGGAGTTTTTTAAGGTTTCTGTTTTCTTA +TCTGTAAACTGGATGTGTTAGTAGTACCTGCCTCTGGGTGGTTGGGAGAAATCAAGGCGA +TAATGCATGTGAATTGATGACTCCTGTGTCTGGCACTGTAAAGTGCCAAGTGCTAGCTGT +TACTGTCATGATGTTCATTGTTTATGGTTTGTAATACTCAGCCTCAGGTACAGCTGGACA +CTTATATTTTGATAGCCAGCAGGGGAGCTGGAGCAGACCAAGTGAAGATAGGGAGTTTCC +TGACTCTTTAGCCCTTTTTTCTGCAGCAGTTCTAGGGGAGTTGGTGTGCTTTGGAGCAGA +AAGGGGCATAATAATCTGTCCATCTGGATGGGGGTGGGGGTGGATGGTGAGGAGTCAGGA +GTGAAAGAGCCCAGCCAGGGGGATCAGAGGCGCAGTTTCAGATCCAGGGCTTTCTCCCCT +GGAAAGGAGTTGTTGTGGTTGACCCATTGCGTTCAGCCCCTAGATGTTCCTAGATAAACC +TGCCTTTGGGGGAATGTAGGAAGTATCATCGCCCACAAGAAAGCTCCTTGGCTGGCAGCA +TGACAGCTTCCTGTGCTTGTTTCTCACATCAGGCGTCCCCTCCCTCCCCGCTCCCCACTA +AGCCTCCTCACGCTGGTCTGGTAGCAGAGCTGGAGCAGATGGCAGTGGCTGCGCCTACTG +TGGGTCAGGAGTGGTGCTGGGCACTGCAGGATGATGATGCCTGCCCACTGCATTCCAGGT +GCTGGGCTGGCAGTGGGTCTTGCTCAGCCCTGGTGTTATGCAGAAGGGCTTTGGTGACAT +GGTCCTGAGCCTTGTTTGAGTGTCTTATGTCTAAGTAAAAGTATCTTGTTGCTTTTGAAA +CCTACTTCCCCTTGTTTTACCTTTTATGGATGTGGAGAAAACAAATTCATTTAAAGTATC +GCAGACCTAAAAAATGGTCCAGGGCCTTAGGAGCTGTGAGGTGTTGACCTGGCTGGCCCC +CAGGCCCTTCTCCCCTCATACATGGCCACCTTGCTTCTGTTCCTGAGTGTCCTCAGGTTC +CCACACCTCCATTGCCCCCATTCTCATACCACTGTCCTTCAGGGACAGGCTGCCTGTGGG +CCCCCTTCACACGCACATCAGCTTTCCTATCAGTAATTGATGGGGATAGCCGGAAGCTGG +GCGTGTTGACAGAACTGCCTTGGTGTCCCCTTGTTCTGGCCAGGCCTCGCTGCTTATCCC +AGCTTTTCCTCTCCCCTGGGCCTTTCTTCTTCATGTTGTCCTGTGCTAATTCCCCGAGCT +GAAAAAAAGAAGTGGGGTAGAGTTTGGAGATAGATGTGGGATGATAAAGATTTCAAAAGT +GTCTTTGTATTTTTACACTCATCTGAGACCACTTTTCATCATCCTTCCCTCTGCCCTGCC +ATCTTATGAGTGTAGTGGATGGACTCTGAGTGTCACAGTGACTCAGGAGGAAGGTTGGGT +GAGCAGGGCACGGAGCCGGGGCTTTGCCCCTTCCAGGCTGCCCTGCCAGAAGGATGGGCA +CTTCAGCAGAAGACAAGGATGCTTCTCTGAAATAAGACTTTACCTCAGACATCCGTGTAT +TGACCACAAGACCAACTCACAGTAGTGATTTCCCTGAAACACTTGGGAGTGAGTGTCCTT +GGGGCCAAAGAGGACAATGGGTGGGAGGCAGCCCCTGCCAGACAATCTCAGAAACACAGC +AGCTTGCATATAGCGGGCCTTGGCAGCCAGTGGCCTTCACCTCACCTGAGAGTGGGCTTC +ACCCTCTGCCTTGCCTGTGCGTAAATGGAGATGAACACAGATTCTCCCGGAAAGCGCAGC +CAGGGGAGGGCCGCCCAGATTTTCAGCAGATACTGCGTCATCAGTGAAAAGGCAGCAACA +GGCTGGGCTGAGAAATGAAAGCCCTTCTTTACAGATAGTGCAAAGACACAGCGGCCCACC +CACGGCTGCCTCTGACAGGCCTTGTGCCCTCAGGCCAGCGCCTCATCACCCCCAACCTGC +CTGTCCCCTTTGTGATGAAGAGAGGGCACTCAAGATGGCTCCCAGCTCTCTCAGCTGAGT +GCCTTTCCATCAGGGGAGAGGAGTGGCCGTTGGAGGCTCTCTGTCCCCTTCTTGTTTCTT +TGTGCCTGTTATTGGCCAGCCTAGCTGAGCACTGACGGCTCTCTTCCGAGAGGCGGATTC +CAGAGGAGGCCGAGCAAGCGTCTGAATCTGCCTTGCAGGATGGGCTTCAGCTCGGTGCCA +GCAGGCGAGGGGGCGGGAAGGGGGAGTCCGATGACTGCGCTTCTTCATCTGGTATTAAAC +TGCTTCCTGTTTTTACTTTTAGGACGAATACCTTTCTCTCGTGGCCAGGCTCATTATCCA +TTTTCGAGACATTCGTAAGTAAGATTTTGCATTTCTGGGTTGTTGAGATCTCTGTGAGAG +GCCAGCCCTGACGCTGCCTCGGCAGAGCTTTCTGGGCAGGCCGTGGTGCCTGAGTCAGAA +GGTCTGTGTCACCTCACTTGCTTCTGGGAAGTTCTTCACTGCCTTGCATTTGACTCCAGA +TCCCTCCATCCTCCCAGAGCCTTGGCCTCAAAAATGCTGATTCTAGCATCATGGAAATGC +TGTCCTCAAAGTGGTCTAAACGGGTTGCTGCTTCACTTGCTCACTTAATCTCCCTTTTCA +TAGGGCTGTTGTTTTTACTTCTGGGAAGTTCTGTTTACCCTGGAACAGAAACTCTCTTCC +CTAAAAGTTGATTTTATTGACCCATGGAGGCCAGAGACACTTAGGCATATTTTCCCTCCA +GACTAGAAGCTTCTGAGGAGGACCTCCTGAGTCTGCACCCTGGCTCCCTGCTGTGCTGAG +GGCCCCCGTGTTAACCTCACGTTGTGCCTCCTCTGATTCAGAGGGCCCAGTGTGGTTCTG +TCAGCCAGGCAGTGGCCCCAGCTCTACAGAAATGAGTTGTCATTGCATCCTAGGGCCAGG +GTCTTCGTGCTTGTGTGTGTTACGTGGAAGTATGTGGACACCAAGTGTTCCTGGATGGCC +ACAGCCTGCGAAGGAAACTGGGGCCAGCAGCTGCTCTGTGTTTTCAGCCAACAATGGCTC +CTGCCCACTGCCGCTGCATAACCACCAGAGGCAGGCTTCTCTTGACACAGGCCTGTCGTT +GGAGCATGTGCCTGGCGAGTCCTATTTCTATTCCCCTGTGGGTTAGGGACAGGCAGCTGT +ACCTTCAGTGTGTTGCTGGGTCAAAGGGAGACATTGAGCACTCCAGAGCCTGGTAGGTTT +TGCTTCTTTCCACACTAACTTTTCACCAGAATGGCCAGCAAAGTGAACTTCACTGATCTC +ACTCTCTGTGGTGTTGGAAAGTCACTGGGATGTGTAAGAGGAGAAGGAGCAGCCCAGATG +ATCTGGCCCAGATGCGCTGGCAGACTCCGCATGTTCTGCTTTCAGGGACAGTGGCCTGGC +TGGTGCACACAGTCCTGTGGGGCTCATGAGGTCATGGGAAGCAGCCGAGCAGGAGGCTCA +GGGCCCCATGACTGCTACAGAAGTTCTCCGCTGGGCACTGGGGTCAGCTGGACTGGCTTC +CAGCTGGCCGTGGACAGCCTCAGTTTCCTCATCTAGGTAACAGAGACACTGCCTCACCAG +AGTGGGCTGCAAAGGGAGGCAGTGTCAAGTCTTGGCACCACTCCAGCCCCGCCGTGGGTT +CCTGTAGTGTTCTATTATTTGTTTGCTCAGGGCTGAGCTGAGACTCTGGACCTGGGAAAA +AGGAAAGCCAGAGTAGGCCCAGGGTCCCCAACCCCAGAGTGCTTCAGCCTTGGGGGCAGC +TCCCTAAGGCCCCAGGTTGACCCTGGAGCTACTCTGCACCCTCTGCCCCCAGGTCTCTTG +TGGGGTCAGCGCTGCCAAGAGTAGAAACACTTCCCCCAGTAGTGGGGAGTACTAGGAGCT +CCGTGGGGATTAAGGCTTGGGCAAATGCCTACAGAACTGACCTACCCATGGAAATATGTG +GTTATATAAAACTATGTTTACTTTCGTTTTTTTGTTTGTGTTTTCATATAGATAACAAGA +AATCTCAAGCTTCCGTCAGTGGTAAGAGTTTTCCCTTTTGAGTTAAAGTTTCTCCCAACT +TTGGATTTGAGGTGGCAAGAATGACATAGTGCATGCCTCATGTGCCTGGGTTAACCTGTC +ACTAGAGGAGAATGCTTGCGGAGAGAACTCTGGAGGGAGGAGGCTGACCAGCTGTGCCCT +GACCTCTCCAGCCTTCGTCTTCCTGCAGACTGTGAAGTCGCATGATCCTCCCCATCCCGT +TTCACACAAATGCTGCCTTTGCCCAGCAAAAAGATAAACACTCACAAAGCTATTAAGACA +TTATCCAAAAGTGTTGAGCTCTCTGGTAGAGAATTATTATGCTGTTTTCAGTGTAATAAT +ATGTAGTAGCATTTCTGAGAAAATGCTGTGAATTCACTTTTTTAAGTTACAAAGTTTCTT +TGTAAATTTAGAAATCTGGCTGGGTGCTGTGGCTCATGCCTGTAATCCCAACACTGTGGG +AGGCTGAGGCAGGCAGATCAGTTGAGGTCAGGAGTTCAAGACCAGCCTGGCCAACATGGT +AAAACCCCATCTCTACTAAAAATACAAAAATAGCCGGGTGTGGTGGCATGCGTTGTAGTC +CCAACTACTCAGGAGGCTGAGGCATGAGAATCGCTTGAACCTGGGAGGCTGAGGTTGCAG +TGAGTTGAGATCGTGCCACTGCACTCCAACCTGGATGACTGAGTGAGACTCCATCTCAAA +AAAAAAGTTTAGAAATCCATATTTTTAAATTTCAGCGTTTATCACATGAGGGGAATTCTG +TCAATTCAGCCTTGGTCTCTGCTGACTAAGCTGCAAGGGAAGGGGTTTAACAGATCCAGG +CTGGCCAACTGCATTCTGCCATTGGGAAGGGAGTGCACTGACCTCACAGGCTGCACGCCA +CTGTCTGCCCTGCCCTAGACAGTAGAGCTGTACATGCCAGCTGTCTCTGCCCATGTGGGT +CTCACCTCTCAGCCAGGACAGGGATTCTTGGCCAACTGGGGCTGAGCGGCAGCAAGGGCA +TGTGGCCAGAACATCTGGGCCCCTTGGCATTGGAGCATGGGGTCCTGGGTACCACCCAGT +TGTGTGGCATGGCGGTCCTGCCAGGACATACCTGTCTGTGGGTAGCTGTTTGCTGTGAAG +TCCACACTGTTGTGACAATGGCATCCTTGTCCTTGGTTGTGGCATTGCTCACTGAGCTGC +TGACCTGGTGGGCTTGGGACATTTCTCCTCAGTGCTCTGTGGAGCCCTCCTCTGCACCCC +TAAGCTGTTCTGGCATGGTGGCCCTGCACACAGGGGCCCAGGCTGAGTTGGACTCTGCAA +CAGCACGAGTGGAGCTGTGTGTGCCTGTGGACTTGTGCCCTCCCTGGGAGAGCGTCCCCT +GGCCACTGTGTTACCGCTTGCTCAGAAGGGCCCATCGTGCTTTGTACGCTCACCCAGCAG +GAGGGCTGGACAGCCAGGAGAGGCAGGGGTTGCCACCTGCCCTCAAGGCCTCAGCCCATC +TTTAGTGTATCTGCAGGCATCAGAGAGGTCATTTGTCCCTTAACATTAGGACTCTGGTCC +AGGCCAGGCTAGAGGTATGGGTCATGCAGTGACCAACACACCTGGCGTCCTAGCCATTCA +TATTTGGGAGTCTCCAGGAGCCTAGTCTCTTACTGCTTGGGGCTGTGAGGGGATTGAGCC +TGTAGGTAGGCGAGATCTGTGCTCTGTGAGCCTTACGCCCTTTGAGCCATGGTCAGTCTG +GTAGGCCCTTTCCTGAGAAGCTCTGCCCTTGTGTTCCCACAGATCCTATGAATGCACTCC +AGAGCCTGACTGGCGGACCTGCTGCGGGAGCCGCTGGAATTGGCATGCCTCCTCGGGGCC +CGGGACAGTCTCTGGGCGGGATGGGTAGCCTTGGTGCCATGGGACAGCCAATGTCTCTCT +CAGGGCAGCCGCCTCCTGGGACCTCGGGGATGGCCCCTCACAGCATGGCTGTCGTGTCTA +CGGCAACTCCACAGAGTGAGTACCACACTTCTTGGAGGATTTGCCGCTTTCCTTGCAAAC +GACAACACATTTTATTCCTGTGCTTATGATGGTATCAAGAAAAGCATGGAGAAGCTCCAA +ATCGCTCTGGTTTTTTTGTTTTTTAAATCTTTGTTGTATGTGGAGAGAAAAGGCTTCCAA +TTTTCGTTGAGCTCCCAGAAGGGTTAAGTGATGCTGGGCCCTCCTCCTCCTCCTAGCCCT +GGGCAGGAGGGTGGGGATGGCCTGAGACTCCATCCAAAGGTTTGTGTTTTGTGGCTGAAA +TAGATACAGTGTGTTTCAGAGGGTTCTCAGCTACAAGGCAGGGCTGAGTGCCAGCAACCA +CAACTCCACTGTCTGCCCGAGCTCTGGGCACCATCTCTTCTAGTGCTGCCCTTGCCCCTT +GGTCACCATGGGTGTGGCTTGCAGCCATATTATCTTCTGGCTTCCTTTCTGTGAAGGAGA +CTCTGTGTCCTCTCCACACGGCTGCTGTGGTGGGAAACTGGGCTGTGATCAGGTGTTATA +GGTGAACCAAAGCTGCTCTAAAGGGGAGGCAAAGAACCTCCCTCTTCTTTCCAGGACCTG +TCTTTTGAGACGGGGTCTCGCTCTGTGGGCCACATTGGAGTGCAGTGGCACAATCACGGC +TCACTGCAGCCTGGACATCCTGGGCTCAGGTGATCCTCACACCACAGCCTCCAAAGTAGC +TGGGACTATAGGCTCACAGCACCGCGCCCGGCTAATATTTTGTATTTTTTGTAGAGATGG +GACTTCACCATGTTGCCCAGGCTTATCTTGAACTCCTGGGCTCAAGCGATCTGCTCACCT +CGGCCTCCCAAAGTGTTGGGATTACAGGCGTGAGCCAACGCGCCTGCCCTTAGGACCTAT +CTTGATGTATCCTTGAGTCCCATCAACATTTCTTAGAGTGTGCTGTGCCCACTGCAAAGT +GCTTTAGTGTTTTTTTCCCCTTTCATGTAACTTTTTATTTTTAAAATAGGAGTCACAGGA +AGTTGTGGAATAGCATAGAGAGTTCCCACATGCCCTTTACCCAGCTCTCCCCAATAATAG +TATCTTACATAGAACATTGTCAAAACCAAGAAATTAATGTTGAGGCAGTGTAATCACCTC +AGTTAGAGACTTAGCTTGGACTTCATCAGTTTTTTTTTTTTTTTTGAGACAGAGTTTCAC +TCTTGTTGCCCAGGCTGGAGTGCAGTGGTGTGATCTCAGCTCACTGCAACTTCTGCCTTC +CGAGTTCAAGCAGTTCTCTTGCCTCAGCCTCCGACTACCTGGGACTACAGGCGCCTGCCA +CCATGCCCAGCTAATTTTATATTTTTAGTGGAGATGGGGTTTCACCATGTTGGCCAGGCT +GGTCTCAAACTCCTGACCTCAAGTGATCTTCTCTCCTGAGCCTCCCAAAGTGCTGGGATT +ACAGACGTGAGCCATCACGCCCAGCCGACTTCATCAGTTTTTTTACACTCATTCTTGGGC +TACATATATGGGTATGTGTAGGTCTGTGAGATTTTACCACGTGTCAAATCAAGTAGCCAT +CACCACAATTGGCATGCGGAGCCGTTCCATTACCACCAACCAACTCTCTTGTGTTACCTC +TCAGAGATCGCGCCCTCCCTCCAGCCTTAAGCAGTGACAATTGCTGATCTGTTGTGTGTC +TCTGTAGTTCTGTCCCTTTGTGAATGTTGTATAAATGGAATCATACCATATGTGACTTTT +TGAGATTGGCTTTTTAAATTCAATCTGATACCCTTGAGACCCAGCCAGGTTTTATCAGTA +ATTCATTCCTTTTCATCACTAAGTAGTGTTCCATGGAACGGATTTTCCACAGTTTGCTTA +TTCATTCACCCATGAAGGACATTTGGGTTGTTTCCTGCTTTTGCTTACTATTATGCAAAC +AAAGCTGCTGTGGATATCTGTGTACAGGCTTCAGTGCTTCAATATATTTTTGTCACACAT +CCCTCATGATAACCCTATGAAGAGGGCAGCATCGCCCCCTCTGCTTTGTCAGTGTAGCAG +CTGCTAAGGAGAACTGGAGTCACACCCCAGGCCAAGAGCTGGGATCGAAGCTGCTGCCTG +ACCCCATCCTCACACTCCTACCCTATCCTGTGTCAGGTTTCCTCTTCTTTGGGGCCATAG +GCATCCTCATTTCTACCCACTCCTGCTCTGGAATCTTCCTCCATCCCAAGCCACCTTCCC +TTCTCATACCTTAATTCTCCTCTTACTTCCTGTTGTCTAACTCTCAGCACGTGCTGGTTT +CCTCATCCTAGAACAATCTGCCTGGTTCTTCGTCCCAAGCTCTCATTACCTCACTATTTT +TAGCACATCAGATAAGTACACCAGCTGGGTTTAGGGAGGACCCACAGAAGTGCCGGCTCA +AGTTCCTTGTCACGTTCTACTCTTGTGTACTTGGTGCATTTTTTTTTTAATCATGAGCTT +ATACTTGATAATAAATTGTTTTTAAATGTATGAGTTTAGAATGCTCACATGAGTTGCCTT +GTCATCCCACTTCAGAGACTTTGTCTTAAACCAGTTCTTTGTCAGAAGTTCAGTGGTGAA +GCTTGATATCCTGCCCACGCCCTCCCCAGTCTGGCCTTGGCCCCCACATCCTGTTTTTAG +CTTCCAAGGGGACTGGTCTTCTGTGTGGCACTAGTTGAAAGTTACAGTGGGGGCTGGGCA +CGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCTGAGGCGGGCGGATCACGAGGT +CAGGAGATCGAGACCATCCTGGCTAACACGGTGAGACCCCGTCTCTACTAAAAATACAAA +AAATTAGCCGGGCGTAGTGGTGGGTGCCTGTAGTCCCAGCTACTCGGGAGACTAAGGCAG +GAGAATGGCATGAACCTGGGAGGCGGAGCTTGCAGGGGGCGAGATCGTGCCACTGCACTC +CAGCCTGGGCAACAGAGCGAGACTCTGTCTCAAAAGAAAAAAGAAAAAGTTACAGTGGAT +TCTCTACTTCAGGATCTTGTTACCACCTTTGTCAGAGGCCAACTCTCCCTGCCCTTCACC +ACTTGCCTCCCTGGTCTCCCTGAAACTAGTAGTGTTGTCTGATGGGCATTTGATACCCCT +TCCTGACACCTTTCCCAAAGACTACTGCATGATTAAGGCCCTTTCAGATCTTCATCTAGT +GCCACCAAGAGGACAGAGCCTGGGTCAGAGGTTTTGCCAGGGACTCTCAAAGGGTAGGCC +AGGGGACATTGTGATAGTTTTCAGGTGACCCAGGTTAGTCGGAAAAGGCTGAAACTGCTG +GTTGTTCCTATGGCTGGGGCTGCAGTACAGAGTTGGAAGCAAAGCTTTTCACTCTGCGCA +TTTTTGTTTCTCATGAGTTTTATATGACGTGCATGTAAAGCCGACTTTAAAAATAATTTC +TACTGCCAGTTGTGTGATGGACTGGGGTGCTGAGGACATGTTCTCCTCCTGGATAACTAG +GTTCTGGTTAGGATGCACTTTGGAATGCTGGCCTGCAGCAGTTGGGGAGGCTTTGTCCTC +CTGAGCCTCAGCTGCTGCAAGGTGGGGGAAGAAAGCTTGGGATTCCTGTAGTCAGCCAGG +ATTTGTGCTGGGTCCTGTGTCACCCCCCATACCCCCCAGAAGCAGGTGCAGATTGGAGCA +GGTGCGGATTGGAGGAACATGTCCCCAGTGTAAGACCTGTGACAGTTGCACATTAAGGTC +AAGCAATCAGCATACAAAGATCACCAGCCTTGAAGGGAAGGAGAGACACCATGAAGGTGA +ACAGAAAAAATAGACACATGTAGGCATGCCCAGTGACCACACACACAGGAACTGTTAGTT +ATGGGCCATGGGGTATATAAAGAAATAAAGTGGAGAGCCGGGCACAGTGGCTCACGACTA +CACTCCTGGCTACTCGGGAGGCTAAGGCAGGGGGATTGCTTGAGGCCAGGAGTTTGAGGC +TATGGTGAACTATCATTGCACCACTGCACTCCAGCCTGGGGGATAGAACGAGACACCATC +TCTGAAAAAATAGAAAACAAAAAGGAAAAGAAAGAGCAGAAATTCAAATGAAAAAGCATC +ATCAGGAGCAATGAAGCAGATTTGAAACGGAACCAAATGAAAGCTGTAGAAATGAGCAAT +ATAATTATTGAACGAAGAAAGTGGATGAATGAACAGCGCATTAGATACAGCTGAAGAAAG +AATTTGTGTAATAGAAGCTACATTTGAAGAAATTATCTAGAATGTAGCACAGAGAAGAGC +ACATGGAAAATGTGAAAGCAAGGGTAAGAAATGAGGATAAGATCAGAAATTCTAATTGGC +ATCCAAGCAAAGCCCCGGAAGGATCAAATGGAGAGAATGGAAGAAAACAATATGTGAGGG +GATAACTACTGAGAAAAGCCCATCTGGCCCCAACCAAGTTAGCCATCTCATCTGCTGTGA +GACTGCAGAGCACCAAAGGCCAGAGGAATGTCTTAGAAATAGGCAGTTCCTGCAAAGGAG +CAATGAGTACACTCAGCAGTAGAGTCAGAAAGGTAATAGAATATTGTTAAAGCATTGAGA +ATAGAGCTGCCAGTCACATTTGTATGCCAGCAAGACTCAAGGATGTGGAAAGCAAGCCAT +TTTTAGATAAACCTAGCCTTAGAGACCATCACTAAAAGATTTTCTAAAGGATATATCTTT +TTTTATCCTTTTTCCAATAAGCCTTTTGCACTCTTAGATATATTCTTTTGTTTATCCTTT +CCTCTTCCCTCTCAGGTGATACAAGGTAAAATGGGAAAAGAAACGTTGAGCATAGGTGAT +AGTAAATGTGTATAAATCTAAAGCAGTGATTCTCACCCAGGGTGACATTCCACCACAGGG +GATATTTGACAATGTCTGGAGACATTTTGGGATGTCACTACTGTGGGGGTGCTGCTGGTG +CGTAATGGGTGGAGACCAGAGGTGCTGCTAACCATCTTACAAATACAGGTCAGTCCCCCC +AACAACATAGAATCATCTGGGTCAAGTGTCAGTAGTGAAAAGGTTGAAAATCGCTGGTGT +AAATAAAAATTTACCTCTGTAAAATAAAACAGTCAGGCTAACCTCCATACAGACTTGCCA +CATTCCAAGCACTATTTCATTTCTTTTCTATTATTATTATTATTATTATTAAGATGGAAT +CTCGCTCTGTCAACAGGCTGGAGTGCAGTGGTGCAATGTCGGCCCACTGCAACCTCGACC +TCCCAGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACTACAGGCGCGC +GCCACTACGCCCAGCTAATTTTTGTATCTTTAGTAGAGACGGGGTTTCACCATGTTGGCC +AGGATGTTCTCGATCTCCAGACCTTGTGATCTGCCCTCCTTGGCCTCCCAAAGTGCTGGG +ATTACAGGCATGAGCCTCCGCGCCTGGCCTATTATTATTATTTATAGAGCTAGGGTCTCC +TTATGTTGCCCAGGCTGGTCTTGAATTGCTGGGCTCAGGCGATCTGCTCTGCCTTGGCTA +TCCAAAGTGTGAGATTACGGGCATGAGCCACAGCACCCAACCCCAGGGACTATTTTGAGT +GTTTTACTTACATAATAGATTATTTAATTCTTAGTCCAGTGTGTGGAGTAGGTACTATTA +TTGCATTTTCTAACAGGGAAAACTAAGGCAATGAGCAGTGACAGAGTTGAGGTTTTGAGT +CCAGGCAGCCTAGCTTGGAGGCTGTATCCCTAGCCTCAAATCCATCCAGCCTCTTAATAG +AATGTCTGCTGTGAAGAGTTAAAATAGATCAACAAAATATGAAATAAATGCAAGTCAGCA +GGGAGATAGAGCGTGAATATCGGTTCTGTCGGGTACATATAAAACCTACTTTAAAAAGTA +GTAAATTTGGGAAATGCTAAAATATTGATGAGGTTTATACTTTAAGTTAAATATGCATGA +TAAAATTGAAAAATTATTGCTAAAACAATGGAAATAATTCATAAAATTTAAGATCAATAG +AAAAAATAGGATAAGGCAGAAATCCAGATAATACAAAGTTCAATCCAATCATCCAACAAA +AATGCAATCAAAAAAGACAAAGAAGCTTAGAAAAGGAAAGACAAAGGCCGGGCGCGGTGG +CTCAGGCCTGTAATCGCAGCACTTTGGGAGGCTCAGTCAGGCAGATCACGAGGTCAGGAG +ATTGAGGCGATCCTGGCCAACATGGTGAAACGCCGTCTCTCCTAAAAATACAAAAAGTTA +GCTGGGCATGGTGGCGCATGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAAT +CGCTAGAACCCGGGAGGCGGAGGTTGCAGTGAACCGAGATCACGCCACTGCACTCCAGTC +TGGCGACAGAGTGAGATTCCATCTCAAAAAAAAAAAGGAAAGGAAAGACAAATATTAATA +GAAAATAAGTTTAAAAATATAAGTAATCATAATAAATGTGAGTGGACTAAACTTTTCTGC +TGAAAGATAGGAAATGTGAGAACAAATGGAAATGAAACAGCACAGTTGTTTTATTGTTAG +AAGAAACACACACCTACAATAAAACATGCGGAAGGATTTAAAAATAAAAGATGAAGAAAG +GCCAAGTGCAGTGGCTCACGTCTGTAATCCCAGCACTTGGAGAAGCTGAGGTGGGAGGAT +CACTTGAGCCCAGGAGTTCAAGACCAACCTCGGCAAAATAGGAGACCCCCATCTCTACAT +AAAATTTAAAAATTACCTGTAGGTACACTTGAGATGCTGAGGTAAGAGGATCACTTGAGC +CCAGGAGGTCAAGGCTGCAGTGAGCCATGATTGCACTATTGCATTCCAGCCTGGGCTGCA +GTGAGACCCAATCTCAATCAATCAGTCAATAAAATTAGCTGAGTGTGGTGGTACACACCT +GTAGTCCCATCTACTTGTGAGGCTGAGGTGAGAGGATCGCTTGCTTGAGCCTGGAACGCT +GAGTCTGCAATGAGCCACGATGGAGACACTGCATTCCAGCCTGGGTGACAGAGCAAAATC +CCGTCTCAAAAAAGAAACAAACAAAAAAAAGGATGAAGAAAGATGTATTGGGCAATTACT +TCAATTACATCTCTTTGTTATTGATAAACAGACCAAAAAATTCATTTACAGAAAATTTGA +ACAATGTAATGAACAACTTAATTTAAGCATATGATGGAACAATATGTTTATAAAAATAAA +TCATAAAGCAAGCTTCAACAATTTCAGAAAATTTAAAAGTATTGTCATACTTTTTGTTGT +TGTTGTTTTGAGACAGAGTCTCACTCTGTCACCCAGGCTGGAGTGCAGTGGCGCGATCTC +GGCTCACTGCAACCTCTGCCTCCCGGGTTCAAGCAATTCTCCTGCCTCAGCCTCCTGAGT +AGCTGGGATTACAGGCACGTGCCACCATGCCCGCTAATTTTTGTATTTTTAGTAGAGATG +GGTTTTTGCCATGTTGGCCAAGCTGGTCTCAAACTCGACCTCAAGTGATCCTCCTGCCTG +AGCCTCCCGAAGTGCTGGGATTAGAGGTGAGAACCACCGCGACCCGCCGGTCATACATGT +TTTATGAATGTCAAATAAAGTTAGAAATCTGTCACAAAATAAATTAAAAATTTTTTACAC +TTGGCCAGGCACAGTGACCCACGCCTATAATCTCAACACTTTGTGAGGTCGAGGCAGGAG +GATCACTTGAACCCAGGAGTTTGAGATCAGCCTGGGCAACAAAGTGAGACTGTCTCTATA +AAAATTTTACAAATTAGTTGGGTGTGGTGGCATGCACCTGTAGTTCCAGCTACTCGGGAG +GCTGAGGCAGGAGGGTCACTTAAGCCCAGGAGGTCAAGGGTGCAGTGAGCCACGATGGTA +CCACTTCAGTCCAGACTGGGCAACAGAGTGAGACCCCACCTCAAAAAAAAAAAATCTTAC +ACCCATCAGAATTGCTAAAATGAATAGTGACAGCCTCAAGTGTTGGTGACAGAGAAACTG +AATCACTCATATATTGCTGGTGAGAATGTAAAGTGGTTTTACCTTCTTTCAAAATGAAAT +ATGCAACTCAGCAGTTACACTCTTGGGTATTTTTCCTAGAGAAATGAAAACCTCGCATAA +AAACCCATGCACGAATTTTAACAGCAGCTTTATTTGTGGTAGCCCAAATCTACAGTTACC +CAAATGTCCTTCATGTAGACGGTTAAACACACTGTGAGGTGCTTGTACCATGGAATACCC +TGGCAGTACTGTTGCTAGAACTACAACCTGTATGCTGAATGTAAAAAGCCGATCCCAAAA +AGGTTCATACTGTATGGTTCCATTTTTGAAATATTATGTATAATTTTGAAATGACAAAAT +TTTAGAAATGGAGGATGCTGCAGTGGTTGCCAGAGTTTGCGGGTCAGGGTTGCTGAGAGA +GGTGGCAACACGAGGGATCCTTGTGGTGTGGGGACTGTGTAGTGTATTGACTGGTGGTGA +CTGAACCCACCCAGGTGGTTAAATTGTCCAGAACTTAGTACACACACCCACACAAGTGAG +CACACATCGTGGTTGTGATACCTTGTAAGTTTGTAGAATGTTACCATTGGAGGAAACTGG +GCAAAGTATACAAAGGATCTCTCTATATTTCTTTAATACAGCTTTATTGAGACATAATTC +TCATTAGCATACAATTCACCCATCTGAAGTGCTCCACTCGGTAGGTTTTAGTGTAATCCA +CAGAGTTGTGCAGCCATCAGCACAATTTTAGAACATTTTTATGTTCACCCCAAAAGAAAC +CTCACCTCAGAAAATCTTGTAATTGACTGTTCATATAAGCAGTTTTCATAATAGCCACAA +AGTAGAAGCAGCGCAAATGTTCATCACAGGAAATCGGTGTAGTAGAATATTATTTGGCGA +TAAAAATAACACAGAAAAACTGATAACATGCCACGACATGGGCGAACTCGTAAATATTAT +GCTAAAGAAGCTAGTCACAAAAGAGCACATGTTCGATGCCATTCGTACAAAGAGTCCGGA +AGTGGCCAATCCATAGAGACAGAGAGGAGATTACTGATTGCCAGAGTCTAGGCTTCTTAT +TTATCCAAAAGACTTAGTTGTCCCTTTTCTTTTGTCTTTGGTTATTATAGAGTAACTCAT +GATAGGAAATCCCAAAATCAACACAAATGCTACTTCGTATTCTATCTTTCTGTCTGTGGT +AAATGGAACGTTCAGATTCCAGCGGCAGCCGTGGCAGTGGGGCTTTTGCTGGCTGTTTTG +TCCCTTGCTGTGCAGCCCTGCAGCGTTTCTGGGAATCTGCCCTGTGGACTGACTGGCGAC +TCTGGTCTTTTCTCAGCCCAGCTGCAGCTCCAGCAGGTGGCGCTGCAGCAGCAGCAGCAA +CAGCAGCAGTTCCAGCAGCAGCAGCAGGCGGCGCTACAGCAGCAGCAGCAGCAGCAGCAA +CAGCAGCAGTTCCAGGCTCAGCAGAGTGCCATGCAGCAGCAGTTCCAAGCAGTAGTGCAG +CAGCAGCAGCAGCTCCAGCAGCAGCAGCAGCAGCAGCAGCATCTAATTAAATTGCATCAT +CAAAATCAGCAACAGGTACCAGGTCCCCTGTTCCTGCTCTTGGCCTCCCTCCTGCAGCGT +GAGCCCTGGGCTGGCATCAGCCACAATGCTGGGTGCAGTGCCCGGAGCCAGCCGAGGGTT +CTCTTGACACGTGTGCCTGCCCTTTTCCATGGGCTTCTCAAAAAGTCTGGGACAAGGCCG +GGCCCAGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCCAAGGCAGGCGGATCATC +AGGAGTTCAAGACCAGCCTGGCCAGCATGGTGAAACCCCATCTCTACTAAAAATACAAAA +AATTAGCTGGACATGGTGGCGTACACCTATAATCCCAGCTATTTGAGAGGCTGAGGCAGG +AGAATCGCTAGAACCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCGCTGCGCTC +CAGCCTGGGCAGCAGTGAGACTCCGTCTCAAAAAAAATAAAAATAAAAAAGGAAAACAAA +AGTCTGGGACAGGCAGGAGAAACAGGCCCTGACTCACTAGGAAGGTTCTGTGATCGAGCC +CTTCCATTTGGGCTTGGGCATCTGGCGGTGGGCCTCCTGTGTAGTTCAGGGGCTGCGAGG +GATTCAAACAAGAGTGAGCTGAAGCAAGAAGTGAGACGAACACTCTGAGGCTTGGGACTA +CTTGAGCCATTGTGGGCCATTTTGGCTGAGGACACATGTGCCTCCTGTTTACCTCTGTGT +CTTGGGTTCTGGGCTCAGTGTTAGTCGACCTCCAGCTGGTGTTGCATGAGGTGATGGTGA +TGTCATGTTAGCAGCACTGCTTCTCAGAAGTTGTTCAGCTTCCCATGGTTTCCATAGAGA +GTCTAGACGTGTTAAACATCTGCCCTCACAGCAGGTCTTGGTTGGGAGAGGAAAGCCCCA +AAGGGCCCCACCACCCTCCCACCCCTTCAGGCCTCAGCTAGCTGGGCTTCTCTGGATCTT +GGCACCCCTCTCCCAGAGCTGACAGTGCATCTGCAGCCACACTTTTCCTTCTCCTGCTGA +CAAGCAGACCTTGACCGTCACCGTGAGGCTCATGCCACTGACCTATAGGGGCAAAGCAAG +CATCGTTGCAGAGGCTTCAGAAATCTGCAGTTGCCCCAGCTTTTATCCTAGCTTCCATGG +AGCTGTACCTATTCCTTGTCATCTTTTGCTGAAGTTTCTAGGTATGCCCTTTGAACCATC +CTAGTGGAATGGTAGACCAGAGCCCAGGCACACACTTGGTGCCTCCCCGGTGTCAGGAAG +GCAGCTGGTGTTGGCCTCGGGGGGCCCAGAGCCTTTCTGTGGGAGATGTGCCCCTCCCCG +TGTGAGTGAGAGGTCAGCCTGTCAGCCCAGGAAGGCCTTTTTTTTTTTTTTTTTTTTGAG +ACTTTTTTTTGCCTCTTACTCTGTTGCCCAGGCTGGAGTGCAGTGGTGTGATCTCGGCTC +ACTGCAACCTCTGCCCCACCAGTTTAAGTGATTCTCCTGCCTCAGCCTCCCAAGTAGCTG +GGATTACAGGTGTGAGCCACCATGCCTGGCTAATTTTTGTATTATTAGTAGAGACGGGGT +TTCGCCAGGTTGTCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCTGCCCGCCTCGG +CCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACTGCACCCAGCCAGGAAGGCTTTCTT +GATGGCTCTGCAGATGGCCACCTGGGCTTCCTGAGGCCCAGACTGTCACAGGGAGGAGCT +GGTGCCACAGAGGCAGATGAGTGATAACCGAGTGCTGCTTGTTCTGTCTCTAGATACAGC +AGCAGCAACAGCAGCTGCAGCGAATAGCACAGCTGCAGCTCCAACAACAGCAACAGCAGC +AGCAGCAGCAGCAGCAGCAGCAGCAGCAGGCTTTGCAGGCCCAGCCACCAATTCAGCAGC +CACCGATGCAGCAGCCACAGCCTCCGCCCTCCCAGGCTCTGCCCCAGCAGCTGCAGCAGA +TGCATCACACACAGCACCACCAGCCGCCACCACAGCCCCAGCAGCCTCCAGTTGCTCAGA +ACCAACCATCACAACTCCCGCCACAGTCGCAGACCCAGCCTTTGGTGTCACAGGCGCAAG +CTCTCCCTGGACAAATGTTGTATACCCAACCACCACTGAAATTTGTGAGTACCTGTGGCC +CACAGTGGAGCACATGCAGCCCGTGGCTCTGTCAGCAGTAGTTTCTAGGCTCTTTGGCTA +GAGATAGCATATCCTATTCTTCAAGTGCTGAGACTTCAGGCAGCCCCCACCCCTTGCCAG +CCCTGCCGACTCTAGCATGGCTCAGCAGGGAAGCATGTGTTTTGCATTTTGTGTTCTGAG +AGCTGCTTCTGCACCCCGCACAGGGTACTGCTGCTGCTACTGGGGCACCAAGTCGCATTC +CAAGCAGTGTGGCTGATTGCAGCAGTTCTGTGGGCCCTTTCAGGGTGTGAGCACTTCGAC +TGATGAGCACCACATGAAAGGATGTGCCAGCTCCGTCAGTGTCTGGTGGTTTGCCAACTA +CTCAGAGTTAATGTTCTAAGCAAATATCTTTTTTTTTCCTTTACACTCTGCCAAGTCTCA +GGAGAACAAAGTTCATTACTATATTTTTGAGAGGAGTATTAATATAAAATACTGTTATTC +CTAGTTTCATTTTGTATCTCTGCACAAAGCCAGTCTACTCAGTGACAGGCACAGAGGACA +GAGTATGATCACCAGGATGAAGGTAGCTGCATCAAGGGGCCATTTGGACAACTCTTTGGC +TCCCTTGGTTTTTGTGGCCTTTTCTGTTTGTGATTCATGAGTGGAATGGGGCCTGAGGCA +AGTGTGGGGGTGAACAAGAGGCCTCATAGGCAGACTGGAAGTCTAGGAGAGTTGTGAGGA +GCAGATATGCTCACTCATCTGTCACCTGGGAGTCGACACACTGCCTGACCCCGGGGACTG +TGCAGACTGAAACCCTTGTGTGCTGCATGGTGGAATGTGCCCTGGCAGGGTTTGCCCTAG +TGGCTTGGAGATGTCTAAGGAAGAGCCCGCTGCATGTTGGAATGTGCCCTGGCAGGGTTT +GCCCTGGTGACTTGGAGATGTCTAAGGAAGAGCCCCCTGCATGGTGGCGCGGGCCTGTCT +CGTCCTTCTGGAGGCCCCTCCATGCAGTGGCTCCACAGTTGCCCCTTTTGCCCAAGTTTG +CCCCCCACCCCCACTTTTTTCTTTTGAAGACAGGGTCTCTGTTTTCCAGGCTGGAGTGCA +CTGATGTGATCAGACCTCCCTGCTCAAGCGATTCTCCCACCTCAGCCTCCCAAGTAGCTG +GGGCTGCAGGTGTGCATCCCCACACCTAGCCAAATTTTTGTATATTTTGTAGATATGGGG +TTTGGCCGTGTTGCCCAGGCTAGTCTTGATCTCCTGGGCTTAAGCGATCCACTGGCCTTG +GCCTCCCAGAATGCTGAGAATACAGACAGGAGCTACTACGCCCTGCCCTCTGCTGGCCGG +CTAACCCAGCTTTCCTTCAAGGAATCAGGCTGAGTGAAGCTCTTGTGGACCATCAGCATG +CTTGGCCCAGAGGATGAGCAGGCTAGGCTGTAAGCTCTGCATTGGAAGTCGTCAGTGCTT +GTGATAGCTGGTGCTGAGGGATGTGTGGAGACATCCCAGTGGGAGGGCCACATTGCCACT +TGCCAGAATTGGATTTGAGCTAATGGGATCCTAGATGGCCAGTAAGGTGACATCACAGCA +CATGAGGTCATGAAAGTCCCATTCCTCACCTCAAGAAAGGCTCCATTTCCTAAGCTTCCA +CAGGAAGACTAGGCTCTGCTCTCTGACCCAGGTGGTTCCAAATCACATTCTCTCTTTCTC +CCTCAAGGTCCGAGCTCCGATGGTGGTGCAGCAGCCCCCAGTGCAGCCCCAGGTGCAGCA +GCAGCAGACAGCAGTACAGACAGCTCAGGCTGCCCAGATGGTGGCTCCCGGAGTCCAGGT +GAGGGCCTGGGGGTGGAGGGCTCCATAGTCATCAGCAGGTGCATGTTCACCTGCGCATGT +AGTGCTACAGTGAGGGTTCTGGTTGGATTAGGGGCTGGGGGCTAAGTTGGTAAAGCAGGG +GCTTCTCTCCCCCTTGCACTCTGCAAGACACCCCTGGAGTCCTTGGGGTGGGCAGCTGTG +CTTGGTCATGTCTAGGACCCACCAGCCGCATAGTGCGTGTCGTTTCCAGCTTTGCTCGGG +CATCCTTCATTATCCCTCTTCCTTGCCACTCTGAGTGACTCTGGGAACATGTATCAGAGA +GTAGAAGGCTGCAGTTTTCTCCTCACCTCTCATCTTCCATTTGAGAGTCTGAGTACAGAG +CTGACCAGGGTGGAGGGCAGCTTTCTAGACACTCGGAAGCAGATGGGAGCCACATGCAGC +AGGAGCGTGGGAGAAGGCATCCTGGTGCTCGCCTGCACTCAGGGACTCCGGCGCTGTGTG +GTGAGAAGCGTGCCACTCCCACATTGCTGAGCTTGCTGGTGTGTCACATGTGTGGAATGT +GTGTGTGTCTCTGTGTGTCACGTGTTAGTGGAATGCAGATGGCCTTGTCTGGCCTCCTAG +TGTCCTGGGATTGGAGGCCCAGGGTTTCTGTACCTTTTCCTATAACCCCTCCTCTCCTTC +TTGTGGGGAGACGTACAGTCTGGGGCTTCAGTTTTTCCTCTTTTCACATGATGGTGGAAA +CACTGTTGGTCCTGACTCGGAAGGGGCGTTGGCTTTAGAAAGAGTCAGAATGTGCTAGTT +GATGTGGTAAGACTGTGGCTGGGTCCCAGCCCACAGCTGGCAGTGCGCCTGCACTTTGCT +GTCGTCTTAAGGGTACCTGGCCTTGTGGTTTTCTGCAGGCCTGCTGGGCGCAGCGCCTGG +GTGCACAGTGCCTCGTGGCAGGCTTCATCTCACGCTTGGGGGGTGCTCAGTAATGTCCTT +CATGGTGCCTGCTACCTCTTTTAAGTCCCTGGGAAGCTGCCTGAGGAGGGAATCTGAGGT +ATTTGGCTGCAGCTGAGGTTAGCATCCTACCTTGGCAGGGCACCAAATGACTTGGGGCCA +GGGTCCCCATGTCACATTCTTACTTAGATGGAGCCCTGTATGAAACACACCACAGAACTC +ACATTTACTTACAGCTGGGGGAGGCTGACCACTTCGTGTGATGAGGTTGGAGGTCTCTGC +CACCTCCCTGTGTGTCTCGGCATGTCCTCAGGCCTCGTGGGGGGGCTCCCTTGTCCTGGT +CTCATGCAGAGGAAGGACGTGGTGTGTTCAGAGTCAGCTTAGTGGTGGCACATAGTGACT +GCCCACCCAGTGGAAGCTGCTACCTTTGCCTCTTTGGACCATCGATCCTCTGTTCAGGAC +CTCACTTCCTGGAAACTTTTTTTATTTTTTGAGATGGAGTCTTGCTCTGTTGCCAGGCTG +GAGTACAGTGGCACGATCTTGGCTCACTGCAACCTCTGCCTCCTGGGTTCAAGTGATTCT +CCTGCCTCAGGTTCCCGAGTAGCTGGAACTACAGGCGCGTGCCATCACACCTGGCTAATT +TTTGTATTTTTAGTAGAGACATGGTTTCATCATGTTGGCCAGGATGGTCTCCATCTCTTG +ACCTCGTGATCTGCCGGCCTTGGCCTCCCAAAGTGCTGAGATTACAGGCATGAGCCAATG +CGCCCGGCTGGAAACTTCTGTACCGCAGGTCAAGCCCAGCAGGAAAGTGAGAACTGCTCT +TTTCTTTATTTCTTTCTGTCTTTTTTTTTTTTTTTTGAGACAGAGTCTGACTCTGTCATC +CAGGCTGGATTGCAGTTTTGCGATCTCGGCTCACTGCAACTTCTGCCTCCTGGGTTCAGG +CGATTCTTGTGCCTCAGCCTCCCAAGTAGCTGGGACTACAGTTGCACACCACCACACCTG +GCTAATTTTTGTATTTTTAGTAGAGATAGGGTTTCACCATGTTGGCCAGGCTGGTTCGAA +CTCCTGACCTCAAGTGATTCACCCGCCTCAGCCTCCCAAAGTGCTGGGATTATAGGCATG +AGCCATCGCACCCAGCCTAGAACTTCTCTTCTCTTCTCTTTTTTCTCTTTTCTTTTCTTT +CTTTCTTTCTTTCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGACAGAGTCTTGCTCTC +TCACCTAGGCTAGAGTGGCTGGAGTGCAGTGGTGCAATCTTGGCTCACTGCAACCTCTGT +CTCCCAGGTTCAAGTGATTGTCCTGCCTTTGCTCCCAAGTAGCTGGGACTACAGGCATGC +GCCACTATATCCAGCTAATTTTTTTTTCCTCTTGTATTTTTAGTAGAGATGGGCCAGGCT +GGTCTTAAGCTCCTGACTTCAAGTGATCCACCTGCCTCAGCCTCCCTAAGTGTTGGGATT +ACAGGCGTGAGCCACTGTGCCCAGCCGAACTTTCCTTTTCTTTCCATTTCTTGGTATTTG +GAAAGTGAATAGCCATGGAGCAGCCAGTGTTTTCAGCTGGTGCAGGCCCAGCTGGTATGT +TCGTTAGGCTTCTAGCTCAAGGAAGTGTCCACTCTGCAGTCTCACTGGCATCCCATCACC +GTCCAGAAGCAGGATGCCAGCTGTGTGTGCTCATGGCCCATGCTGAGCCTGTCCTTGGGC +CACTGGTCTTCCTGTCGGTTTCTCCACCCCTTGTTTCAGGTGGAGGTCCCCGCCCGACTG +GCACAGGAAGGAGGATGGAGAGTGGTTACCACTGTGGTTCGTTGCTGAGTGAGTTTGATG +TTTTAAGGATCATTGCCCACCTGCCCTGTGGCACCATGTGCTGCCATGCAGGGGATTCCC +AAGTGCTAGTGGCAGAGGAGCCTTGTAGTTTCTCAAAGTCCAGCAGTGGGTAGGGCGTAT +CTATGAGGTGCTTCTCAGTCCAGATGTGGCACCTTGAAATTCTGTAACCCGCATTCTGCT +GGTTTTCTCTCAGAGCTTGGTCTTGCCTTGTTGCAAGCAGTGCAGTTTCTTGGGGGCTAA +TTGGGCACCCTCACTGCCTTGTTCCACCAATAGCAGCGCCCCCTGCTGGGGGATGGGGAG +GGTTGGGCACAGCAAGCCCTCTGAGGGGCCATCTGGATTCATGATCTTCTTTTGAAACTT +GAATTGGCACAGTGACCAGCTGAGGCTACGGTACTCTGATCTAAAAACTTTAAAAGTATG +TGTGGTAACTGTAGAACCCAGAAAGTGGAGAGGAACAGAAGTAAGAGAACCATTCCATGA +GAGAGAAGGTCTGGTGTCCTTTCTGGTTGTTTCCCATAGTCAAGGTCATGCTATCCTGGG +GTTTTTTGTGCAGCATCTTCCACATCAATAGGGGATGAAATGTTCTCCTTTAATTAGAAA +GCCCTGGTCGGCACGAAGGAACTCTTGTCATACCCCTCTGTGAATGTCTCTTGCCATCCT +CAGTTACCATCTTGTTCTTGGACCTTGTAATAATTTCCGATTTTTTGCTATTACCAGTGA +CACATATCTTGATGTCTTAGATTGTTCCTGGAGCTGATGGAGGTGAGTGAATGGTTCACT +TTGAGGGCCTTCAGATCTGATCAGCCACCCTCTAACCAGGAGGTCAGAGATGCAGATTGG +AGCTCCCCTTGTGTTGTTCACACAGTGGACATCAGGACGTGGTGACCACGTGTTGCAGCT +TTGTATGTATGAGCTGCCGCGTGCTGCGGAAACAGGCCTCTTGAGTTCATGAATGAAATT +CTGGGATTGGGGGCCCCCATGCAGGCAGCCCTCCATGCCCTGTCTCTCAAGCCCATCTTA +GAGCCGAAGAGAGGCGCCCTGTCAGGCCCTTGCAGTACTGTGGTGTTGGATGTGGTAAAG +GGTAGCCTGGGACTGAGTATTCTAGAAAAATTGCTCAGCCTTGTGCTGTGCACACCTGGT +TCTCTTTCCAGTGTTATTTATGTAGCTGAGAAGGGCAATGTCACCCACAAGGGGATTTAG +CAGGACACAGCAGACTTCTTTTTCTCCCCTATAAAACCAAGACATGCAGGCACAGTGGCT +CATGCCTGTAATCCCAACGTTTTGGGAAGCCAAGGTGAGAGGATCACTTGAGACCAGGAG +TTCAAAACTATCCTGGGCAACACAGCGAGACCCTGTCTCTACAAAAAAATAAAAATAAAA +AGTTAGCTGGGCGGAGTGGTGCGTGTCTGCAGTCCCTGCTACTCGGGAGGCCGAGTTGGG +AGGATCGTGCAAGCCCAGGGGTTTGAGGCTGCAGTGAGCTGTGATGACAGCATTACACTC +TAGCCTGGGAGACAGAGCAAAACTCTTGTCTCTTTAAAAAAGCAAGAAACAGGACAGAAT +GCATCTCATTTTCTTTTTTTTTTTTTTTTTAAAGACAGAGTCTCTTGCTCTGTCACCCAG +GCTGGAGTGCAATGGTACAGTCTCAGCTCACTGCAGTCTCCACCTCCTGGGTTCAAGGGA +TTCTCCTGCCTCAGCCTCCCAAGTACCTGGGATTACAGGCATGTGCCACCATGCCTGGCT +AATTTTGTATTTTTAGTAGAGACGGGGTTTTGTCATGTTGGCCAGGCTGGTCTTGAACTC +CTCGCCTTAAGAGATGTGCCCGCTTTGGCCTCCCAAAGTGCTGGAATTACAGGCGTAAGC +CACTGTGCCTGGCCATGCATCTCATTTTCAAACAGTCCCTATACCTTTGTCATTTAACGT +TCTATTCTTTTAGTATGTGAAAACCAAACTCTAATTTACAAGTCACAAAGAATTATCCAA +GACAGACTCCCATTGTGGACATGTATGTGGCATTGCTGATTCACAGCTACTGTACTGAAG +CCTTGGCCACCCTGGAGGACCCTCTCTCTTTTTGCTGGACCCTTCTGCACAGTGACTGCA +GTAGGGAGAGTCAGACCTGGGGGTGGGGGAGAGGTTTTTTGGATTCCTCTCTGGGTAGAT +GTGTGTTGGGAAGTAGTGAAATGTTGATAGCTGCCTGGCTCATCTGATCCAGTAAACATA +ACTGGTGCGTTTGGGTTGAAAGAAATTGGGCTTTCCTTATCAGTTCTCAGTTATAAGAAC +ATCAGTCAGCAGTACATTAGACTTAAGCTTTGCATTCCTTGCGTTTTTTTGTTTGTTTTT +CTCTTCCTGGAAAAAAGTTTGCTTCTCTCATACCATCTGACTTACTTCCAGGCTTTTCTC +CCTTGTGGAACGAGTGCCGTTGAGCCCTGCTGCACTCTCAGACGGGCTCCTCCGAAGTGC +CGCAGGTGGTGGTAAATCGACTCTCACCCACTGGGGTCGCTCCTTCGTGTCTCCCCCCGG +TCGGTTCATCTGTTGCTCTGGCTGCAGGAGGAACGAGTGAGCTTCTGGTCGGCGTCTGCC +ATGCCGTGTCACCCCGGCTTCTGGCACCTCCTGTGCGTGCCCAGGATTGTGAATGTGGGC +CGTGTGTGTGAGGCCACGGGTCTCCCTGCAGCCACTCTCCTGCTGGAGCTCTGTTACTGG +CACCTGTCGCTGCCTGCACCGAAGGCTGGCAGCACCTCCTGGAGCTTGGGACCCAGAGCA +CAGCCTCCCACCATGAGATGTGTTGTTTTTCTGTGGATCAGTCCTCCTTTCTTTCTGAGC +CTGGCGTGTTTTGTTCTAGTTTGTTACCGTCCTAAGTGCCTGTAGGCCCTGCTCTCCAGG +GACGAGACTCGGGCTCTACCCCCAACTCAGAACCCAGAGCAAGAGTGGTCGGGCCCGGGC +CCACAACAGTGCTCAGCTGTCCTGCTGCCTTTGTAGTTCAAGAAGTGTCCATTGATGAGG +GGAATGGTCCTGGCTCATGCTGGAGTTCCTGACTCGCATCCCTGTGGAGATGAACTTCCT +CGTCAGGGCGGAGGCCTGCCAAGCAGTCCCCCCAGGCTTCTCTTGCTCACCTTTGCCCAT +TTTTATTACGAAAGAAAACCAGTTCCTTGATAGATACCAGGACCATCAGCCTCAGGCCTG +GAGGAGGAGAGGAGGATGATTTGGGTTCGGGCTGTAAGAGGTGTGCCACTGAGAAGGAGG +GATGCTGTGAGCAGGCTTAACTGAGCTCATGGTTCAGTGGGAGTTGAGTGTTCTCATCAC +AGGCTTTGGTGGAATGTACTCTTGACATCTGTCCCCAGGAGCCTGGTCTCCAGAAACACC +AGCTCAGGCCCTCAAGGTCTGGCTCTGATGGTTCTGTGGGCTATAGGATTCTGATCTGTT +AGCGAGGTGTGTTCAGAAGTGTGTTGAGGACACCAGTGCAGGAGAGCAACCAGTAGAACA +GAAAGGTCTGGAAGCAGCATTCTTGGCAAATCTTCTAGATTCCCAATGCCCAGACAGACC +TGGAGGTGCTGTGGGCTTGAACATGTGGGTGGCCTCCCCTCCCAGGCTGCCCCGAGCTGC +CCAAGCTTTCCTTGCCCTGGTGCTCCTTCTTGCAGAGGCTACACGTGCCCTCTCCACCTG +CCCAGGCACTGAGTTTCTTTGTTGCGATCACCTTGTCTGTTGTCCCTCTGTCCTCAAAGA +TGATCACGGAAGCCTTGGCCCAAGGTGGGATGCACATAAGAGCCCGGTTCCCGCCTACCA +CCGCTGTGTCCGCCATCCCGTCAAGCTCCATCCCTTTGGGCAGACAGCCCATGGCACAGG +TATTTACGGGGCAGCGCCCAGGGCACGGCTGGGAGCTCGGGGCGTCCTCTGAGCGCCTTT +GTAAAGCGCACCTGTCATTATCATCGCCGGTGACTTCTTTTATGGTGGCTTTCAGAGTGC +CAAACCCACAGTCCCTTTTTTTTTTTTTTTAAATGAAGCCAAGCCCAGCTTGTGCTTGTT +GAGAGGATATGAATAAATATCTGGGTCTTTTCAGCTTAAAATAGTTATACTGTACGCATT +TTCATGGTAAAAATAAAACAGCAGCTAAGCAAAAGTTTTGCTTTCTAGAGCTGTGCTATC +TAATAGTGTAGCCACTAAGCATGTGTGGCTGTTTACATTTAAATTGACTAAAATTAGGCC +GGGCACAGGGGCTTATGGCTATAATCTCAACACCTCAGGAGGCCGAGATGGGAGGATCAC +TTGAGGCCAGGAGTTTGAGACCAGCCTGGTTCACATAGCGACATACTATCTCCAATTAAT +TATATACATTTTTTAATGGAAAATATATATATAAATGTATATATATTTTAAAAACAAAAA +AAAAACCTAAAAGTCAACTAAAATTAAATAAAATGAAAAAATGCAGTTCTTCAGTCGCAC +CAAATAAATTTTAAGCCCTCATTTGCCCACAGGGAAGCTAGTGGCTGCTGCAGTGGACAG +CACAGACTCCAGAACGTGGCCAATGTGGCCAAAGTGGTATTGGGCCATGCTGTTGTAGAG +TTTGATGTGTAGAGTGTGTAATTTGTAACACAGATCCCCTTTCTAGGTAGAGGTAGACTT +CTGTGTACATTATGCTTTTCAGAGACAGGAATTCCCGGCTTCTTTTATTGTATGAAATTT +GGAATTACTCCATGGCATAGTCATTTTTTTAACAAATAGTTACCACATGCATTCTGTGTG +CCAGGCACTATGCTAAGATTCTGAGAATAAAATTATGCTGAAGTTCTCTCATCATACAGG +CTTTCCATTTTAGTGGGTTTGTCGTGAAAACATCAAATTTGCCAAAAGAACTTTACAATT +CAGTTTCTGTTTTAGGGGAAATGCCTCTATAATCCTAGCAGACAGTGGTCCCTGTCCATC +CCTGAACGCCCAGGTGCAGGCACTGCCACCACCCTGGCTTGTGCAGTGGCGTCTCTGCAG +CTCTGTTCTACACAGGAAGAAGTGGAGGCTGAGCCAGGATGGCCCTTGGTCCATGTGGCC +CCAAGCCTGGTGCCACACACCACACACTGCCAAGAAAGTGGCCTGGGCCCCCACTGGGGC +ATGTTTAGCTTTTGCCTTCCCTGCATACTAACCCCCTGGGTTACCCCAGATGTGCCATTC +ACATAGTTACTGACTGGAACTTAGTTTCTTTGGCAGGGACTTCCTTTTTAGTAGTTGTTG +AATTACAGCGGGAGGAAGTGCCTGGAATCTGCTGGTAGAAAGGCTTTGGTGAAAGAGCTC +TCCAGAGGAGCGGAAGTTGGGGGTCAGGAATTAGGTTGTGTGGCTGTGGTGGAAAGGAGG +CCAGTGTGGGCAGAAGCCAGGGTGCAGTTCCCCATGGCCTGGAGGCTGTGAGTTCCACGA +CCTGCTCACATGCTGCTGGTGTCTCCTTGACTAGACCCCTTTGGGTTCTGCACTCTTGTC +TGCTAAAACTGCTTCTCCTCCTTTCCCATAGCCTCTGGCACAGTATCCACTGGGTTCACA +GAATCAGGTTGGGTTGCTGTATGTGGAGGAATGACTTTAGTAGCAGCATGTAGGACTGAT +TTTTATAAACACAAAAAGTCCCCAATGTTTTCAGTGTGCGATATGTGCCAGGCCTTGAGC +TAAGCTCTTTGTGTACAGAGCCTCACTGTTTCTTGTTGTAGCAGGTCATAGTTTGAGCAT +TTAGCCCCTTTTACAGACTAAGAGTGCCCCCAGGGAGCCACCAAGTCAGGATCGAGGGCT +CAAGTGAAACCCTTATGCCTTGGCCTCTCAGCAGGATTCTTCTGGAAACTCCTCACTCCC +CTTCCTGCTGGGCTGATGACCTGGCTTCTCCTTCTTGTTTACTGGGCACTCCTGGGTGGG +CTCCCTCCATGCCCCCACAAGACCAGGCTCTGTCCTGGGCTTTGGCTTGCCCATACTCTG +TTGTGGGCTGGCCACTCCCCACACCCAGCTGCTGGTGTTTCCACCTAGACTTCTCACAGG +CAGCCCCTCCCTCCCTCCCTCCCTCCTTCCCACCAGTTCCCAAACCCACTCAGTCTTTCC +CTCCCCAGCCTTCCCTGGCCCTTAGATTAAGTCTGCCCTGAGTTATGCCAGCTAATAGGT +CTCTCTCTCTCTAGAAGCTTCCAACCCCCAAAGGCAAGGAGGTAAGGTGGAGCAGAGGAA +GGATAGAGAAATATATGATGAAGCTGAAAAAGTTATTGCCTTGGAATAAATGCCATGGAG +AATTCTGGTTTGTCCTCATTGCACTGTGTGGGGTGAGAGCGCTCTCATGGGAAGATGCTG +GCTAACTGCACACAGATTTAATCACCAGGCCCCGCCCATTCGCACTGGCAGTGAGAGGCA +CACCCCTGAACCTTTGGGGTTGTTCACTGAGGACAAACGCGGCAGCATTTAGAAAGTATT +CCTCGTCCCGGCAGTAGACAAAAGCCACAGGATTTATGGAAAGAGGAAGGAAGGCACAGA +ACTGGGGCAAGGTTCTGGTTTTGTTCTGTTATTTTGTTGTCATTGTTACTGTTTGTTTTT +CTTTTTTTGAGACAGAGTCTCGCACTTGTCCCCCAGGCAGGAGTGCAATGGCGCACTCCT +GGCTCACTGCAACCTCCACCTCCCAGCTTCAAGCGATTCTCCTGCCTCAGCCTCTCGAGT +AGCTGGGACTACAGGCGCCTGCCACCACATCTGGCTAATTTTTGTATTTTTAGTAGAGAT +GGGGTTTCACCATGTTGGCCAGGCTGGTCTTGAACTCCTAACCTTAGGTGATCCGCCCGC +CTCGGTCTCCAAAACTGTTGAGATTACAGGTGTGAACCACTGCGCCCAGCCTTGTACTGT +GTTTTTAAAGCAGGTTTGTTCATGCTTGTTAGCTGCAAGAAAGGAAACTGGAAGGAGTTG +AGGGAGGGAGAAAAGCGAATAGAGGGACACCCTGGAGTGGAAGCCTCCCTACTGCCCAGG +CAGCTGCTCTGCAGCCCTCTCTGGTCCTGGCCTTCCCCGCAGAGGGCCCTCCCCAGCTCA +GCCCTCTACGTCCGGCTGCAGTACCAGGCTCTTGGTACAGTCTCCCACACCTGGCTGTGG +GCCAGCCTTGGGGACGTGAGGCATGCTGGGCCCCGTGTACCAAGGGCATTCACCTGACCA +CACCAGGTGGCTCCAGAGGTGTGCCTCTGCTTTTGTGTCAGTATCTTTCAATCCAGTCCC +TTCCGAGAAAAAGAGAGCTTATCTCAGGAACGTATCTGTTGATCTCACCAGGAATTGCCC +CACTGTGTCCCATCTGAGCTCTCAGGGGCCAGAACCTGCTGTCTCCAGTGTGTCCAGATG +GCCACTCTGTGGCCACTGTCCCTCCAGCCACTTGCCCTCAGCAGTCAGCAGGACACAGTC +TGGAGACTACACTGCCCATTTGCTCTTGCTCTTTCCTTGTGGCAACAAATTAGGTCCAAG +AAAACAGGTGGAGCTGAAAGAGGGAATGTACGTGCCCCATGGGCTCTTCTCCAGAGGGCA +GGGCCCAGGCTCCCTCCCTCTGTCTTGACGGCTCATGGCTGCATCTTCCTGTTACGGTGG +GCATGCCTCACGGACTCAGGCTACCAGGTCAAGCAGGATTCTGTGCTGGGTGCATTCTGG +ACACAGTGCTCAGCACGGGGGCCCAGGGGGTGGCATGGCCCATGCCACAATCCTCCCTTG +GCAGATGTAAGCAGGCAGTGTCTTTCTCACTAGGGGCTTGTCCTTTGACTAGAGAGGCAC +TGTTGCCTCCCTGTTCCCTGCTCCAATCCCAGACCTGACATGAGACCAAGGACTGGGAGG +TTTCTTTCTCGCCTCTCTGGCTGCCTCCCCAGAGCCTAGAGCCTGTCTTTGGATGAGTCT +TTGCAGGGGCTGCTTCTCATCCCAGGTGCTGGACTTGGACTGCCCACCACTGACACCTGG +GCACAGGGCCTGGCTTCCTGCTCTCGTGCTCTTGGAAATGAATGGGTGCCCTTCATTTGT +TTTTCTTCCCACTAGAAATCTCCCTGCCTCCCATCCTCTGGGCTCTGCTGGCTGCCAGCT +GCCAGGGTGAGACCCGTGTTCCACTGCACCCTGGGGAGTGCCTCCAGGGGACACACTGAG +TGCCGGAGCCAGGAGCAAGGAACGGTTCCCAGTATGTGGGGGCGGGGGTGGGGATCGCGC +AGTGCCGCCAGCCTTGCCTGAGGGTTCCTCTGTGAGATGGAGCTCCCTGGACCAGCCTTT +TTGCCCTCCCACCTCCTCTCTGGCCCACTCGGTGTCCACCTGCCTTCAGCTCCAGGCTGC +TGTGTGGGTCCCTTTCCCAGAAATGCAGTTCAGGCCATTGTTGCTGGGGTGGTGGTGTTG +CTGGGGTGTTGATGTTTAGCGTGCCTGAGCTCCCTTCATCACTAGGATGACCTTGAGCAA +GTCACTTCCCTCTGAGCTCCCTGTTCCCAACCTGTGACACACAGGTGACAGGTGGTGTGC +ACTCAGAAACTCCTTCCCTAGGATGTGTGACATTCTTGTCCTGGGGCCTTTGCTAGCTGT +GACACACCGGCGCCCGACCTCTCTGGCTGCCTCCCTAATGCCAGTTGTGTCTTCAGATGA +GTCTTTGGGGGGCTGCTTCTCATCCCAGGTGCTGGACTTGGACTGTCCACCACTGACACC +TGGGCACAGGGCCTGGCTTTCTGCTCTTGTGCTCTTGGAAATGAAAGGGTGCCCCTCGTT +TGTTTTTCTTCCCACTAGAAATCTCCCTGTCTCCCATCCTGTAGGCTCTGCTGGCCACCA +GCTGCCCTGACCTGGGGCAGCTGAAGAACTGAGATAGACCTAAGTTCTGGGTGGGTGCTT +GGAGGCCCCACGGGCGTGGCTGGTGAGATTGCCGCTTCAGGGCCAAAGTGGGTGCCGGGC +TTCCTGAGGTTCGAGAGCAGATGAGAACTCTTGAGTGGCAATGAGGGCTGTGTGGTGGCC +TTCTCTGCTTGGCAGCCTCTGCCCAGCCTGCAGCTGCCTTGCTCCTCAGAGAAGCGCTGA +GCTTTAGGCCACAGAAAAGAGCTGACCATCTTCTAGAAGGGACAGCTGCTCAGGCACAGG +CGCCTCTCTGGAAGAGGCAGTCAGGTAGAGGCAAGGGGAGGGGCCCTGACCTTCAAGCAG +GAAGAGAGGTGAGCTTGTGAGACAGCTTTCTGGGGTGCAGGTAGAGAGTAGCAGCAAGTC +AAGCATGGGTCAGAAAAGGCAGAGTTGGGCTGAGCATGGGGCCCAGAGTCTTTGCTTCCC +ACTCCTCGGTGCCGTCTGTGGTTCTCTGTATGGAATTTTCTGGCCTTGGGATTTTGTTGA +CACCTGGATAAGTCTAGAACATGTGTTCCGGTGTCTGGGACATGCCCTTACTCCTCTTCA +GCTAGGGCTCTGTGCACCCCAGCACCGTGAATGCACAGGACTTTGGTGGGAGAAATCTCT +GTACTTGTTCAACTCAGCATTTAGCAAATGAAGTCACATCAGTCCCCGTTTCTGACCTTG +CTAACCTTTGCGAGAATCACCGCGAGCCATTTTGCTCCATTAGGTAGAGTGATGAGGCCC +GGCAGGCCGAGGCTGGGGCTCCTGCACTCTGTCCCTCCTCTCCCAGGCAGAGGTGTCTTG +ACCACAAAGCAGTGGGAACAGTCCATTCCATCGGTTGATGTCCCACTGGGCCAGTGGCTT +TGGATAGGGACCTCTGACCCAGCTGTGCTGGAAGTACCCTGTTGTGGTCACGCTCTGTGC +CTTAGCCCCCCACCCCCTGGTCTCCCAGCACACGTGGTTCAGGAACGGCCTGCACCCAGC +ATCCCTTGCAGCACTGTCAGGCACAGCCCATGGAACAGGCAGGACAGAAAGAGCATTTCG +CTCCAGGAGCTTGGTCCACGTGGTGCTGAAAGGGCAGTGCCACCCTGCCCTCTGCTGCCT +GGTGGGGTGGTGAGGAGGGCAGCATTCTGTTTGTGGGCTCTGGGCCTGGTCCCCAATTTA +AGGAGCTTAGAAAGCAGCTGTCCTATCCCAGTACTCACAGCACTGTGGTGGAAGTGCTTG +GGGGGTGGTGTGAGCCGTGGCCCGAGACAGGCGTTCATGGCCAGAGTGCCCCAGGAGAAG +CACTGAGTTGGTCTACAGAAATGGATGGTCCAGGGGCCTGGCTCCTAGGCAGACTGTGAG +GGGATGCTTGGCATGCAGGGGGTTGACTAGGGTGTGGGAAATGTAGCAGGGCCTTCAGGG +TTCCCCCAGTGTGACAAGGCATCCCTCTTTGCTGAGAGTCCCTCTAGGGGAGGCCAGACG +CCCAGGGCTGGCAGGCAGCTGTTCTCCCCATCAAAGCAGCCATCTGTGCTCCACCCCAGG +AAGGTGTGGGTGTTTTACATATGCTTATCCCTGATGCCTGGGTGGTGTGGTTTCATCCCT +AGAGTTTACGGATGAAGACAGTGGTGTCCCTGGTGCAGACACTGGGGTGATTGATCTTAT +TTGTGTCGTGGCCAGCAAGGCCTGGGGAGCAGAGCAACAGCCTTGCTTTGCATACACACA +GGGCCTCAGAGGACTCAGGTGCACACCTGCATGGCAGAACCAGGCTCTGGGCCCAGGTGT +CTGACACTGAGTAGCTCGCCTCTCCCAGGGGCGCTGGTGTGGGCCGATGCAAGTGCCACC +ATGGGTGTGTGTGTGGCTCCCTGGGCCACCTCACTCCAATTAAGCAGGTCCAGGTGGGAC +AGCCTGGGACCAGCCCTGCAGCAGCATGTGTTAACCACAACAGATGGCTGTAGCCACCAG +GCTTGTCCTTGGTGCAGTCAGCCTGGGGGCTGCGGGGGCTTGTGGAAGCATGTGGGAAGA +CGGTCCCACTGGTCAGCAGAGCAGGGTCCTCACCGACCCACAGAGGAGCATGTCACAGGA +CAGGCAGGCGTGTCAGGGAAGGCCAGGCTAGTGCCAGCCAAGGGCATTGCCTCCCAGACT +GCCCTTTTCCTCACACTCCCACGACACAGCCCTCAAAGCCTGTGCCCCTGTGGGTCCCTG +GCCAAGGAGCAAGGGCTGCCCTGCCTGGGCCCCCCCCGCCAGCCCCTTGGTCACCTCATG +CTGTGTGCCAGGCTGGGGGAGTGTGCCTGTGTGTATGTCCAGGTGGCATTTGGATGAAGA +CACAGGTGTGTGGTGAGGCTGTGCGGGAGGATGGGCCTATGCGTGCGGTGGGCAGGTGGT +GTGGAGGCAGGCGGGCGGGCGTGTGGCAGCGCGCCGGCTGAGCCCCTCCACTTCCAGGTC +AGCCAGAGCAGCCTCCCCATGCTGTCCTCGCCGTCACCGGGCCAGCAGGTGCAGACCCCG +CAGTCGATGCCCCCTCCCCCCCAGCCGTCCCCGCAGCCCGGCCAGCCCAGCTCACAGCCC +AACTCCAACGTCAGGTAGGCCTGGCCTGGGGTGCCCCTCCCCACCTGGCCCTCGAGGCTG +GCCCTGCCTGGACCCCGGCTCACATGTTTCTCTCACTTGGCTGCAGCTCTGGCCCTGCCC +CATCTCCCAGTAGCTTCCTGCCCAGCCCCTCACCGCAGCCCTCCCAGAGCCCAGTGACGG +CGCGGACCCCACAGAACTTCAGTGTCCCCTCACCTGGACCTTTAAACACACCTGGTAAGT +TGGGCCTGAGGTGCTAAGGTCACTCCTCACCTTTATGAGGCCTCAGCTCATACTGGGTGT +GCGAGCTCTGGGGCCCTCAGAGCTCAAGTTCCCCACCCGAGGGTCGAGGGCTGTGGCCTC +ACCCGCCTGTGTCCTGCAGTGAACCCCAGCTCTGTCATGAGCCCAGCTGGCTCCAGCCAG +GCTGAGGAGCAGCAGTACCTGGACAAGCTGAAGCAGCTGTCGAAGTACATCGAGCCCCTG +CGCCGCATGATCAACAAGATCGACAAGAACGAAGGTAGGCTGCAGCCAGGGCAGGGGCCT +GCACCCTGGGGACACCACCAGGCTTGTGTCTTAGTGTGTACCCTCTTCTGTCCCAGACAG +AAAAAAGGACCTGAGTAAGATGAAGAGCCTTCTGGACATTCTGACAGACCCCTCGAAGCG +GTGAGCTTTGCCCACAGCCCACGGAGGGTCCACAAGGGCACAGATAGCCCAGCCATGGAT +GGGCACTTGGTGATGATGTGGGTTTAACAAAGGCACCAGGCAGCTCTTTGGACCCTGGCC +AGAGGCCTCCAAGGCTCCACTCTGGTGTGTGCTGGGGCTTCAAGCCCAGGCTTCATCTTG +GCCCATGCCCAGCCTTTGCCCTATTCCTGGCTGCTGCTGTGGCCCTCATGCTGGGCCATC +ACAGCCAGACCTCATCCAGTCAGCAGCCAGGGGCCCAGCTTGCAGTGGCCTGACCATCAG +CTGGCCCAGATGGGCCTGAGCCTGACCTGGAGTTCTGCCCCTGACTTGCTGGTGACCATG +GGCAGGCGACTGCATCTCAGTTACCCCATTTGTAGCCCACGTGTGTTGTCATGAAAATGA +AACAAGGTGGCGCTGGTGAGAAGCAGGTGGAAGGCAGGGCTGCTGGCCACAGGCTGCTGT +GAGGATCAGAGCTCGGGCAGTACCTCCAAACTGCATGGGCATTGGCATATACCTCCCTTT +CACCTCAACTCAGGCTATCCCCCAACCCCTGTCTAGAAGGGTCTCTCTGTTCTGCTTGTC +TGGATAGAATGCCAGTCACTATTGGGTGGTCCTCCAGGTCTTCATGGGTTGATCCTTTGT +TAGGGTTATAGGGATGAGTGGGGCAGGGACTGGCCTACATGACAATGAGGCATTCACAGC +CTGATCAGGGGCCTGGCCACTGTCCCTTCTCTTGACCCATCACCTCACTCTGCCAACCAG +GGACAAGCAGCTCCTGGGGTTGAGGGCTGCCTGCCTTCAACTCTGGGTCTGGGAACAGCT +GGTCAACTGGTAGGAGCTCCTGCAGAGGTTTCTGATGGCTGAGGCCCAGTGGCAGTAGTT +GGGATCCTGAGCCTGAGAACTGCCATGTCTTCCACTCTTTCAGCCCAAGTCCTCTCTCTC +TGCAGGTGTCCCCTGAAGACCTTGCAAAAGTGTGAGATCGCCCTGGAGAAACTCAAGAAT +GACATGGCGGTGGTGAGTGGGATGCCAGACACCCCTAGGGGAACCAGGGCTCTCCTAAGA +GCTCCTGGGAGTGCTGCTGAGAGGGCCTTCAAGGTCAGGGCATCTGGGCGGGGGCCGGGC +CTGACCTTGGACCCTGCCCACGAGGCTTCCTGGCCAGGTCTGCTGTGCTGGGTGGGAACA +TGGGAGAAGTCACCCTCTGTCTACGGCCCCCGTGGGTGCCTCCTTCACCCATTTCAGAGG +AGGCAGGGACTGTAGGGAGGATAGGCTGTGGGGGTGGGGGATTATTCCCAGGATCAGCCA +ACATTGTCTGCACAAGGGTGGAGGCTGTGAGAGAGGCCTCGGGAATCTGACTGTGAGTGA +CCATGGGCCTGGGGTGTGAAGGCCCCCTAAATGGGGAACCCTCGGGTCCCGGGCTGCTGA +CCGTGCCCATCCTGTCTCCAGCCCACTCCCCCACCGCCCCCGGTGCCACCGACCAAACAG +CAGTACCTATGCCAGCCGCTCCTGGATGCCGTCCTGGCCAACATCCGCTCACCTGTCTTC +AACCATTCCCTGTACCGCACATTCGTTCCAGCCATGACCGCCATTCACGGCCCACCCATC +ACGTATGTCCAGCTGGGCTGGGCTTTGCGGAGGGCGGCCAGCCCTGGGCCGCGTGTGCCA +GGTGTGGTCACCATGCCGCCTCCCCAGGGCCCCAGTGGTGTGCACCCGGAAGCGCAGGCT +TGAGGATGATGAGCGGCAGAGCATCCCCAGTGTGCTCCAGGGTGAGGTGGCCAGGCTGGA +CCCCAAGTTCCTGGTAAACCTGGACCCTTCTCACTGCAGCAACAATGGCACTGTCCACCT +GATCTGCAAGCTGGGTGAGTGTCCAGAGGGCCGGGACTGGTGTGGGAAAGCAGGCCCTGA +CCGCAGCCCCAGGACTCTGCCATCCAAGCCAGATGGCACAGCGCCCAGAACCCACCCTGT +GTTCACGCCCCGCCAGGCTGTCTGCTCTATCCTCACACACACCGGGCTCCAGACAGCCTG +GCCATGCCTCAGTCCCCACTGCCAGAGCCCGCACACTCTCACCTCCCCAACACAGATGTG +GCTTTGCTTGCCTGGGCCCTAGGGAGGTGGTAGGCAGGACCTCTGGGCACCCATCAATGC +AGAGCACTCCAGGCTTCACGTTTGGAAATCTGAGAGTCAGAGAGACCTCCTCCAGGCTTT +GCCACAAGCTTCACCAGAACCTCCCTGGGTGTGGAGTCCTGTTCCAGAGCAGGGCTGTGA +GGCAGGGCAGGCCGGGGCTTGTCCAGGTCACAGATAGAGCCTTCTGTGTTGCAGATGACA +AGGACCTCCCAAGTGTGCCACCACTGGAGCTCAGTGTGCCCGCTGACTATCCTGCCCAAA +GCCCGCTGTGGATAGACCGGCAGTGGCAGTACGGTAGGTAGACCCAGAGGAGCTGTCTGG +GGACCCAGGGCAAGCAGGGGTCATTGTGGAAAACCAGGCTTCCACTGCAGCAGGGACAGC +CTCTGTGTGCTCCTGCCCCTCCCCAGCTCAGGCCCCTCCCTCCCCTCTTCTGGCTCCTCC +TGCACAGACAGCAGCGGGTTTTCCAGTGGTCGCCTGAGAGAAAAGGGTCTCTGCTCCCAC +TGTGTCCCTTCGCCCCTTCAACTTGTGTCACGTCAGCTGTCCAGGGTGGTTGTGTTCAGT +GGGTGAGGGGTGAGTAAGCCCATGGCTCAGGGACAGTCCCCGCTCTTCCCAGAGGCCACT +AGCTTCCCAGAGGCCACTACCTTGGCAGGGCTGCCAGGTTCCACTGGTAGCTTGGGCACA +CAGGGCCCAATCTGGGTGCTGTGGCTCCAGAGAGAACCACGGTGGCAGGGGGCAGTCTCT +GAGCCCAGAGCTGGCACACAGCAGAGCTGGCACACAGCAGAGCTGGCACACAGTAGAGCC +AGGCTGGGTCCCCAAGGTTGTTAGAGGCAGAGCCCTAGATCCTGTTCAGATTTGGTTCTC +ACTGACGATGAGGCTCTGGGAGGAGAGCCCAAAGGCCGGGCAGCGTGCAGGACACATCAC +CTCCTGGTGCTTCGGCCCGCGCCTGGGGCTACCCCTTCCCAGAGCACTGCCGGGTGTGCC +AGGAGCGAGCGCAGCGCCGGCCGCCTTAGGTTCACGCCCACTGCTCTGTTGCAGACGCCA +ACCCCTTCCTCCAGTCGGTGCACCGCTGCATGACCTCCAGGCTGCTGCAGCTCCCGGACA +AGCACTCGGTCACCGCCTTGCTCAACACCTGGGCCCAGAGCGTCCACCAGGCCTGCCTCT +CAGCCGCCTAGCCAAGACTGCAGGGATGGCCCGCAGCCTCATCGGGGCCAAGGACACACG +CCTCCTGTCAGACACTTCTAGGTGTTGGCTTCCTTAGAGAGCCTGGGGTTAGGTTAGCTT +TCCTGCTTTTATCTTCTGCCTTGGGGACCTGCCAAACGAAATCCCACACCTGTACAGAAC +TGGGATAGGCGCAGTGGAGCGGGTTGCTTGGGGGGCGTTGGCCGACTTCTTAGAGAAGGC +CCTCCATGTGACTTCCTCCCAGGAGCCAGATGCGATCCTCAGGCTGCTCTCACCGTGGCC +TGTCCACGGTCCAGGTCCATCTCAGCAGCGTGAGGGTGCACTCAGGGTGTTGTTAGAGCG +TCTCGTGTGTGCTAGACGCACCCCTACTCGTTCCTATAGAACACAGAGGACATAGGAAAC +CCTTAAAACACACATGGGATTCTCTGGTCACAGTTTTGGGTTCAGGCTATGCTGCTTTGG +GCAGGTGGAGCACCCCCCGAGGAAGCCTGCAAGTCCAGGGCACAGGCTGCCTTTTGGAGG +GAGGGCTGGCCCATAGGTGCTGCTGGCTCCCCGCCACCAGCTGGGCCTCAGCCCTCACGG +CATTCCTGCTGAGCACCGTGGGGCACCCAGGGAGCAGGGGCGTCAGGGATCCTGCTGCCG +GCACCCCTGTGCCGCTGGCATGAGGGCCGTGTCCCCACTGTGAAGGATGAAGAGCAAGGC +CCTCAGGACCCGTGTCCTCAGAGCACCACACACTGAGCACCCAGAGACAGCGGGCCTGGC +AGCGGGCCGGGCCATGCAGGGAGCGCCTCCCTATGTTGCCTGCCACTCTGGGCACCGGCC +AGCACCCTCTGGTGAGAAGAGGTCCCCCCTTTTTATGTGCACTACCCCACCATCTGTGAT +TATAATAAATTTATTATTCCTGTGTTTGTCAGTTGTTCATCACTGTGTCCCCTTCTCCAT +GAATCTGGGCTCGCTGTCCTGTCAGGATGCTGTCAGCCTTGGGGCTGCCTGGGGCCTCTG +CCTCCCTTCCAGGAGGGACTTGAGGTGCTGCAGCTGAAAGGCAGGGGCAGGGGTGTGGTG +GGGCCTGCGGTGGCACCAGGCTGCCCAGGGCCATCCCCACCACCCAGGGCCTCACCTCTT +GGGCCTGCAGAAGGTTGGTATTCCACAGCTGGTGGATGACCACTTACACTGCTGAAGTGT +GGTGGGCTTTCGCAGGTGTGGGGACAGGATCATCGGGGAGTGCCATCATCTGCCTGTCCC +TGTGCTGGCTGCCCACCCAACAGAAAGCCCTGGCGTTAGAGGCCTCTGCTTCCCTGTCTC +TGAAAGTCATTACAGTAGCCAGAAATGACCTGCCCAGGTGGGTCTGAGCATCCAGGCAGC +ATACTTCCCCCAGGCAGTGCCCAGGCCAGTGTGGGGAACAGGGCCTGGCTCCCTTCTCCC +ACTGACCAGAGTGCTCTGGGTGCCAGATGGGGGTCTCGGACTGGGGTCTGTCCCTTCCTC +CCCTCTGAAGGTGCCCTGAGGGTCAGATGTGCTTCAGTGGGTGACCAGCTAGCTGCCAGG +CTGCCCTTAGTCAGGACCCTATACCCAGTGGGGCTTCTTGCTTCAGGGCTGGTCTCAGGG +GACGTGCAGAGATGCAGGAGCCTGTCCTGCTGGTTGCACCGCTCAAAATGTGCACTCTGC +AATGTCTCAGGGCAGCCCGGCAACTCGGGGAGTACCAGAGCCTGGTCCTGCTCTCCTAGT +CCCCTCCAAAGGCAGGCTGTGGCCAGGGGCAGCAGGCACTGACCAGCTCCATTCTAATCA +GGCCCAGGAACACAACTCACTTTTCTTGGTAAAAAGATTACTTTGTTTTCTTTTCTCTGA +GAAAGTGGTTTAGGGGCTGTTGCTGCCCCCTCTTGTGGGCTCAGGTCAGCACAGGTAATC +TGCAGGACTCCAGGGGAGGGGAGGAAGACAGCGCCCACCCAATGGACAGAGTGTGGACAG +AGCGTGGCCATTGTCTGCTTTCATGAGCCCTGGAGACAGGATCCTAGAGCAAGGGACACA +GCAGTTTGCTGACTGCAGTGTGGGAGCCTAGTAAATGTGGTCCTGGGGCCCTTAGGGTGC +AGGCAAACCTTACTTGGACTGCCCTTGCACCCCAGGAACTTTGTCCAGCTTGCTGTTGTG +AAGCAGTGGCTCCTCCTCAAGGTCTGCCTTCTGGGGTACCTTAGAGTACCCCAGTGCTTT +AGAGTCTTGTTTCTTGAAGGAACCGGCTGGGGCCTGTCCTTTCCTTGAGAGTTGGCTGCA +GGTCAGACCCGACAGGGCCAGTCACTGCAGGCACCAACAGCCGCTGCCCAAGTCTTTCAG +GATCAGCTGTGTGTGCCCAAGAAGCCCCTGCCTCAGTTGCCTCCTCTGTAAAGGGAAGCC +TGCCTTGGAGGGTTCTAGGAGGAATAAATGAGGTTCTGAGGATGAAGCCCATGGGTGGTG +CCTGTGAACATTCTAGAACAGTCAATATAGTGGAAGCTGGCTCTGTTTTGGGTAAAATAA +AACTGCATGCGAGGCCTGGGACTAAGCCTGCATGGGTTTCCTCTGCAAGGACAACGCCCA +TGGCACGCCTGGGCTCATCACAGGAACAATAGGGGCCAGGGTTGGGGCCAAGGAAGATGC +CTGGTCTCGAGCCACTCCCCGCTCACTCTGCAGAAGATGCCATGAGCCCAGGGCTCCGTG +GGCAGTGCCATCCCTCCTGTATCCTGTCTGCTTGTGGAGGGGGACGTGGGCCAAGCAGGG +CCTCTGCCAGGCAGCCAGTGTACGTCCTTCCCCCTAGAAGGCCACCCAAGGAGGCCCCTT +CTGACCGCTTGCTGTGCTGGCTTGGGGTGCCTGCCATGAGGCCTTGCTGCCCTGCGTTGA +CAGCCCCTGTGACTGGCCCCCAGTCCTGCCCACCCCACCCTAGGCCCCTCCTGGTGGTTT +CAGCTTACAGGGCTCCCTTCAGCTGCCCGTTCACCCCCAAATCCTTGAGGTTGGCCCAGC +AGAGGACTTTCAGCTCCCTGGTCAGCACCTCTAGTGACCACCCAGGCTGGGGTCCCTGTG +GGGCACTTGCCCCTGTGAGCACATGGAGGACACCAGCCTGCTCACCTGACACCATTTAAT +TCGAGGTGGAGTTGACAGGCTGGAAGTTGCCACCCGCACACCTGCCTGAGGGAGAAGCCC +TCCGTCTGCCTGCAGGCCTGTGTCCTCCCTGCTCTACCCACACTCCACTGAGGCAGCCCC +AGACCCACCGCGCGCCCAAGGGAGCAGAGGCCCCGGAAAGGTTGTTTCCTGAGGGGCCAT +CCAGAAAGCGGGAGGGCTTGGGACCCATGGAGCAGCAGGTCCGGAGGGAGAGCATGGCCT +CTTCCCGTGTCCTGTCAGGGTCCCCAGCACTGCAGGGCCACACTTTGCCCAGAACAGCTC +ACTGGAGTTGCCACCGCAGCTAGGGAAGGGCCTTCACTCAATCCCATTCTCTGTGTCTCC +CTAGAAGGGAGAGCCCTGGACAATGCCCCAGCTGGGATGTTCCCAGGCTGGTCCCATTCA +CCAGTCAGATGTCCTCCTGATGTCAGCCCCGGCCCCAAAGGTAATAGAGTGAGCTCAAGA +ACTCAACCAGAATCCTGGGCTGGGTGGGGGCCACCATTGGGATGGGGAAGTGCCTTGGAC +AAGTGGCAGGGAAGCAGGACTCTGGTTACTCCCCCCACCCCCATCCCCACAGTTGTTGCT +GAGATGGCAGGAAGGAATCCCTGGATGGGGTGTGCCCGGAGCCCTTCTCCCAGCGTATCA +GGAATCAGAGCCACAAGGCTGGAGATCCTAGGCCCAGCCACAGCAGCAATCTGAGGCAAA +GACCCAGCAGGGTAGGGGCTGTCCCAGAGTCCCCTCCCATTTCAGGAGCTCTGCTTGGAG +GAGGGAAGGTCTGGGAAGCACCCTGTGCCCACCATCACCACTGCACTGACCACAGCCCTT +GGGCTTCCTGAGAGCAGCACGTGCTGTGCTTGGCTGTATAACATCGTCCAGCCATGGCTA +GGGGCAGCAGCATCTGTCCAGGTCCCTCAGAACCTGTCCTCACTGGGTTGGCATACAGAG +GCCAGTGCAGCCAGGTGGGCAAGGACAGTCTGGAGGGGCTCCCCGGGGTGGCAGGAGCTT +CCCATCAGGCACGAAAGGGCCCAAGTCTCACCTTTCTCCTGTGACTCATTCATTATGAGC +TTGTAATGGGGATCTGAAATCAAGCGCATGCTCTTCTCATCACTTGTGAAGCCACAGCCA +TTCTAAGAGAAGCAGCTTGGGCTGTCCACTACCCCCTGCTGAACCCCGAGAGTCCTGGCT +CGCCACTGGCTGACTGGGCCCTGCCCCACAGGTCCCGCTTTCCACCACACCTTTGCTGTG +GGCATGTGAGGGCAGGAATGACCCTGCCTGTTCCCTAGGCAGAGCCCACCCAGCACATCA +GCCTCATCCTCCCATCTGACTACAAGACGCCCATGGTGCCAGGCAGGACCTGGGCCCCAG +GGGTACAGGGGTGAACTCTGGGAGCCCCTGGCCATGGATGAGCATGGTCAGTGTCATGAT +CCTGTGGGCATCAGGGACTGGGGAGAGGCTCAAGAAGACATGGGGAAGGGGGATCCTGAC +ATAACAGATGCCTGGTGATGGAATACTTAGCACCTAGGTAAGGCAGCACCAGGGAGCGGG +GAGGGAGATGAGGCTGCAGGCACTGGTGCCCTCGCTGGAGCCTCTGCTGAAGCAGGGAGG +GAGGAGCGGAGCCACATCCTGGCGGGCCCTCTTTCTGCTCTGGTGGGCAGTGGGGGTGAG +GAGCTGCTAAGGGTGCAGTTAGGAAGGCCCAGCCCGAGGACAGTGTGGGCACAGGCGAAG +CCGGGGGCATGCCTGCTCCTGATCCGGCATGTTTAGTTGGAGAAAATTCACTCAGTTGTG +CACCGGCACTGGGCATGTTTTCCTTCTACGAAGTTTTTAACAGATGTGTACCTGACCCTG +CCTGGAGAGGGGAGGGCTGGCAGGGTCAGCGTTTGAAATGATGCTGCCCCCGGCGGGACA +GGCAGACAAGGAGGGCACCGCCCCGGGCCTAGGCGGGTCTCCGTCCTGGGGCAGGGGAAG +AGCTCAGCATGGCAGGACCTAGCGTGGGCTTGCTGCCCTGTGTCCTCCCTGAGCCCAGGC +AGGTGCTGGAGTTCATGTGCTGCGTTTCTGCTCTGCGGGCCGCCGCCGTGTTCAGGCGCT +TGGGGAGGCAGGGCAGGCCTAGGGCCCTGTGGCTGGGCTCACCCTTGTTCTCCCACTTCA +GATGCTCGATCTTCTAGTGGAGCTTGGCCAGCAACTCCAAGGACTGCTGCTGTAGGAACT +GCAGGCCCAGGTCCAAGTCCAGGATCTGCCTCTGCTGGTCGATCTGGACACCCACAGACC +GGCGGCACTGCGGGGGGTGCGACGTAACCTGAGATCCCGGGGACCAGCTGGGAGGCTAGC +TCGCACGGCTCATGTCGCCACCCCCAACTACACAGCCGCTGCCACTGCACCCTGGCTCAG +TGGCCATGCTGCCCGTCACAATGGAAAGGGGCGTGGCTGAGGAGCAGGGTGGGCAGCTGG +GGCAGGGCCTGTGGCTGGCTCTTCCTGGGTGAGTACAGTCTGATTTAAAAGGCACCCAGG +GCTGGGCACGGTGGCTCATGCCTATAATCCCAGCACTTTGGGAGGCCCAGGTGGGCGGAT +TACTTGAGGTCAGGAGTTTGAGACCAGCCTGGCTAACATGGTGAAACCCCTTCTCTACTA +AAAATACAAAAATTAGCTGAACGTGGTGGCACGTGCCTGTAGTCCAAGCTACTTGGGAGG +CTGAGGCCGGAGAATGGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCAGAGATCGCGC +CACTGCACTCCAGCCTGGGTGACAGAGGGAGACTCTGTCTCAAAAAAGAAAAAATAAAAT +AATAAAACGCATTCAGGCATTCTCTACAGCCTGGGCAACATAGTGAGACCCTGTCTCTAT +AAAAAGTAAAAGAAAAAAAATTAGCCGGGCTTGGTGCTGTGCTCCTGTAGTCCCAGCTAC +TTGGGAGGCTGAGGCAGGAGGACTGCTGGAATCCCGGAGGTCAAGGCCGCAGTGAGCTAT +GATGGCACCACTGAACTCCAGGCTAGGTGACACAGTGAGATGCTGCCTAAAAGAAAAAAA +AAAAAAGAAATATGTCTTTCTTTACAAGGCCTGCCCTAAAAAAAAAAAAAAAAAAAAATG +TTCGCCAGAGCCAGGCCAACTGTGATTTTACCAGATGCCAACATACCCAACCCAAGGGAG +GAAGGGAACATACCCAATCCCAGCCTGTTCTAGCTCCTCTTCCCCACATCTTCCTGCCAC +ATCAATAGGGCTCCTGTATAATAACAGGGGATACAAGTGAAAGAACAGCCCATCTCAGAA +CTTATTTGAGAAATGTCTAGGAAACCCTAAAGACAACAGGGAGACAAAAACTAAGACACC +AGAGGAAGTTTATGCCTCTGACACTTACAGTTAGAACAAACAGCACACATGGCCCAACCC +CTAACCAGAGCAAAATAAAACCTCACACAGGCCGGGCGCTGTGGCTCACCCCTGTAATCC +CAGCACTTTGGGAGGCCGAGGCGGGTGGATCACGAGGTCAGGAGTTCAAGACCACCCTGG +CCAAGATGGTGAAACCCCATCTACTAAAAATACAAAAATTAGCGGGCGTGGTGGCAGGCA +CCTGTAATCCCAGCTACTTGGGAGGGTGAGGCAGAGAATTGCTTGAAGCCAGGAGGTAGA +GCTTGCAGTGAGCGGAGATTGCGGCACCGCACTCCAGCCTGGGTGACAAAGTGAGACTCT +GTCTCAAAAAACAAAAACAAACAAACAAAAACTTCACACAGAAAGCCCATTTATCACGTA +CCCCGTACAGCATGTCCAGCTCCCCACAAAAAATTTCAAGGCATAATAAAAACCAAAAAA +TGCAGTTTGAAGAGACAGCAAGCATCAGAGCCAGACTCAAATATGGCAGAGATGTCGGAA +TTATCAGACCAAGAAATGGAATACAACTATGATTAATATGCTAAGGGCTCTAATGGAAAA +GTGGACAACATGAGATTACAGACAGAAAATGGAAACAGGGATAGACACTCTGAGAAAGAA +TCTAAAGAATATGTGAAGGCCGGGCACAGTGGCTTATGCCTGTAATCCCAGCACTTTGGG +AGGCTGAGGCAGCGGATCACTTGAAGTCAGGAGTTCGAAGACCAGCCCAGTTCGAGACCA +GATGGCAAAACCCCATCTCTACTAAAAAATACAAAAATTAGCCAGGTGTGGTGGCATGAA +CCTGTAATTCCAGCTATTCAGGAGGCTGAGGTGGGAGGACTGCTTGGACCCAGGAGGTGG +AGGTTGCAGTGAGCTGAGATTGTGCCACTGCCCTCAAATCTGGGTGACAGAGTGAGACAC +CACCAAAAAAAAGAAAAGAAAAGAAAGGAAACAAGGAGAGGAAAGAAAGGAAAGAAAGAA +AGAAAGAAAAGAAAGAAAGAAAGAGAAAGAGAAAATAAAGAAAAAGAAAGAAAGAAGGAG +AATATGTGAGAAGTCAGCCAGGTAGTGACTCATGACTGTAATCCCAACACTTTGGGAGGC +TGAGGTGGGTGGATCACTTGAGGTCAGCAGTTCAAGACCAGCCTGGGCAACATGGTGATA +ACCCATCTCTACTAAAAATAAAAAAATTAGCTGGATGTGGTGTCATGCACCTGTAATCCC +AGCTACTCAGGAGGCTGAGGCAGGAGAATTGCTTGAACCTGGGAGGTGGAGGTTGCAGTA +AGCTGAGATTATGCCACTGCACTCCAGCCTGGGCAGTAAGACTCCGTCTCAAAAAACAAA +AAGAAAATATAAGAAGTCAAAAACACAACAGAATGAAAGATCAGGGCTGGCAGCAGTGGC +TCATGCCTATAATCCCAGCACTTTGGGAGGCCAAGTTGGGTGGATCACCTGAGGTTAGGA +GTTGGAGACCAGCCTGGCCAACATGGAGAAACCCCGTCTCTACTCAAAATACAAAAATTA +GCTGGGTGTGGTGGCGCATGCCTGTAATCCCAACTACACAGAAGGATGAGGCAGGAGAAT +TGCTTGAACCTGGGAGGTGGAGGATGTAGTGAGCCGAGATCATGCCACTGCACTCCAGCC +TGGGTGACAGAAGGAGACTCTGTCTCAAAAAAGAAAGAAAGAAAGGTCAGGTGCATGGCT +CATGCCTGTAATCCCAGCACTTTGAGAGGCCAAAGTAGAAGGATCGTTTGAGCCCAGGAG +TTGGAGACCAGCCTGGGCAATATAGCCAGACCCTGTCTCTACAAAAAATTAAAAAAATTG +CCAGCATGGTGGTGCACATCTGCAGTCCCTGCTACTCAGGAGGCCAAGGCTGCAGTGAGC +TGTGATTGTACTACTGCACTCTGGCCTGGGTAACAGAGAGAGACCCTGTCTCAAAAATGG +GGTAAAATGGTATAGGAAATTTGGATCTAAAAAAAAAAAAAAGGAAGTGCATTAGATAAG +AAATAAATGAAGATAAAATAGAATCTTTTTTTTGGTGGGGGTGGGGAACGGAGTTTCACT +CTTGTTGCCCAGGCTGGAGTGCAATGGCACGATCTTGGCTCACCACAACATCCGCCTACT +GGGTTCAGGCGATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGATTACAGGCATGTGCCA +CCACGTCCAGCTAATTTTGTATTTTTATTAGAGTCGGGGGTTACTCCACGTTGGTCAGGC +TGGTCTTGAACTCCTAACCTCAGGCGATCTGCCTGCCTCGGCCTCCCAAAGTGCTGGAAT +TACAGGCATGAGCCACTGCACCTGGCCAGAATCTTGTATTGTTCTTATTTTTAGTTGAAC +AGATGGCAGGTTATTCAAAATAATTATATCAACATGTATTCAGTAATTATAGCTTATGGA +TAGCTGGAATGAATGGCAGCAATGTTATAAGGGATGGGAAGGAGGATATCCTGAGTGGAA +AGAAAGTCACCCAGGCTGGAGTGAAGTGGCACAATTATAGCCCACTTGCAGCCTGCAACT +CCTGGGCTCAAGCCATCCTTCTGCTTCAGCCTCCCTGGTAACTGGGATTACAGACACACA +CCACCATGCCTGGCTAATTTTTAAATTTTTTGTCTCGCTGTGTTGCCCAGGCTGGTCTCA +AACTCCTGGGCTCAAGAAATCCTCCTGCCTTAGCCTCCCAAAGTGTTGAGATTACAGGCG +TGAGCCACTGTGCCTGGTTTATGGTTGCTTTTGAATGTCCTAGTCCTACATAAGTGGCTT +CCAAGAAGGGAAAAAGAAAAATGCAGGCAGGAAAAGGTGCTGATCCATAAAAACCTCTGG +AAGGTATGTCTCTAGGGAGGGGGTGAGTGTTGAGGAGGGATGTTGCAAACATGGTAGTGG +AGATGGCAAGAGTGGCTGCCTGCCTCTTCGTCTGCACTTCCACAATCTGAAGCAGCCATC +ATCAATCGGAACACAGATCCCCAGTATTTGGAGGATGGGGCTCTTATTGCCTGCTGTGGC +TCCTGCAAGCTATAGGGAAGCTACCCCAGAAGTATAGGCATGGCTGCTTGCACCACCAAT +TTAAATGCTAATTACTTCTGAAAACACCCTCACACACATGCCCAGAAATAATGTGTAACC +AGATATGTGTGCATCCAGTGGCCCAGACAAGTTGACATATAGAATTAACCATCACAAGTC +CACCCCTTGTCAACCTGGTACCTACACACATCTCCTTAAACCATACTTAACCTCCAAATA +AACACAATAACAAAGTCAGTGTTAGCAGTGGAAGGTGTCTGAGTTAGTGGCAGCGAATCC +GTATGGGTCTGCAGCAACCTCAATTCTTGCCTCCTCAGAAAAAAGAATTCAACTGGCCGG +GCATGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCAAGGCGGGCAGACCACGA +GGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCTCGTCTTTACTAAAAATAC +AAAAATTAGCCGGGCATGGTGGTGGGTGCCTGTTGTCCCAGCTACATGAGAGACTGAGGC +AGGAGAATGGCAGGAACCCAGGAGGCGGAGCTTGCAGTGAGCCGAGATCGCGCCACTGCA +CTCCAGCCTGGGCGACAGAGCAAGACTCCGTCTCAAAAAAAAAAAGAATTCAACTGAGGG +GCATAAGGCAGAAAAAGAGACCGAGGCAAGTTTTGGAGGAGGAGTGAAAGTTTATTTAAA +AAAGCTTTACAACAGGAAAGAAAGTATTCTTGGAAGAGACCTAAACAGGCACATAAAGGT +CAAGTGCGGTGTTTAACCTTGATTCTAGGACTTTATAGACTGACCCCTTTCCCAAGATTC +TTCCCCTAGGGTGGGCTGCCTACATGCACAGTGTCCTCCTTACCCTTGGGAGATGAGCAC +ACGCAGTGTGTTTAGGAAACTGTACGCATGCCCATCTGAAGATTTCTTCCGTCCCAGAAG +GTCATACTCTGCCATTGTCTCTTAATGCACATGGCCGGGGAAATTGCTTCTCCCTGGTGC +CTGCATTCAATTAACACTTTAATGAAACAGGTGTGACCCATCAGGAACTGGCCTCTCCCT +GATGCCAGCTGCCAATTTATCACTTTCATTTTTAATTTATTTTTTAGAAGGAGTCTCACT +CTGTCACCCAGACTGGAGAGCAGTGGCATGATCTCGGCTCACTGTAACCTCTGCCTCCCA +GGTTCAAGTGATTCTCCTGCCTCAGCCTCCCGAGTGGCTGGGATTAGAGGCATGCACCAA +CATGCCTGGCTAATTTTTGTATTTTTAGTAGAGATGGGCTTTCACCATGTTGGCCAGGCT +GGTCTCGAACCCCTGATGTCAAGTGACCCACCCACCTTGACCTCACAAAGTGCTAGGATT +ACAGGCATGAGACACCATGCCCAGCCCTGGCTTTTTTTTTTTTTTTTTTTTTTTTTGAGA +CGGAGTCTCGCTCTGTGGCCCAGGCTGTAGTGCAGTGGCATGATCTCGCCTCGCTGCAAG +CTCCGCCTCCTGGGTTCACGCCATTCTCCTGCCTCAGCCTCCAGAGCAGCTGGGACCACA +GGCGCCCACCACCACGCCCAGCTAATTTTTTGTATTTTTAGTAGAGACGGGATTTCACTG +TGTTAGCCAGGATGGTCTCGATCTGACCTCATGATCCGCCCGCCTTGGCCTCCCAAAGTG +CTGGGATTACAGGCGTGAGCCACTGTGCCCAGCCGCTAATTTTTTATTTTTTATTGTTTG +TAGAGAGGGGGTCTCCCTATGTTGCCTAGGCTGTTTTGTTTTTTTTTTTAAACAGAGACA +GGGTCTTGCTATGTTGCCCAGGCTGGTCTTGAACTCCTGGGCTCAAGCAGTCCTCCCATC +TTGGTTTTCCAAAGTGCTAGGATTACAGGCATGAACCACCAGGCCCGGTCTCCACTGTGA +ATCTTTTTTTTTTTTTTTTGAGACTGAGTTTCACTCTTGTTGCCCAGGCTGGAGTGCAGT +GGCACCATCTCGGCTCACCGCAACCTCCGCCTCCTGGGTTCAAGCAATTCTGCTGCCTCA +GCCTCCTGAGTAGCTCAGATTACAGGCATGCACCACTACGCCCAGCTAATTTTTTGTATT +TTTAATAGATACAGGTTTTCACCATGCTACCCAGGATAGTCTTGATCTCCTGACCTCATG +ATCCACCTGCCTCAGCCTCCCAAAGTGCTGGGATTACAGATGTGAGCCACCGCACCCGGC +CGCTAATGTTTTATTTTTTATTGTTTGTAGAGACAGGGTCTCCCCATGTTGCCTAGGCTG +TTTTTTGTTTTGTTTTGTTTTGTTTAACAGAGACAGGATCTTGCTATGTTGCCAAGGCTG +GTCTTGAACTCCTGGGTTCAAGCAGTCCTCCCATCTTGGTCTCCCACCCAAAATGCTAGG +ATAGGCGTGAACCGCCAGGCCCGGTCTCCATTGTGAATCTTTTTTTTTTCTTTTTTTTTT +TTTGAGACTGTGTTTTGCTCTTGTTGCCCAGGCGGGAGTACAGTGGTGCGATCTCGGCTC +ACCGCAACCTCCGTCCGCCTCCTGGGTTCAAGCGATTCTCCTGCCTCAGCCTTCCTGAGT +AGCTGGGATTACAGGCATGCGCCACCATGCCTGGCTAATTTTGTATTTTTAGTAGAGATG +GGGTTTCCCCATGTTGGTCAAGCTGATCACGAACTCCCGACCTCAGATGATCCACCCGCC +TCGGCCTCCCAAAGTGCTGGGATTACAGGCATGAGCTACCGCGCCCGGTTTACCATTTTG +AATCTTAAGAGGACAAAGTCTGGTTCTCTAGAAGGCCCGAGTAGCTTTTCCCCTGAGCAT +CTTGAGAAAAAAGTGCTCTGAGCACTCCTTGGGAAGTCCGAGACAGCACATGCAGCCAGG +TGCTCCCTGGCTGCTCTCCCAGAGGTCAGCACAGCTGGAGGCAGCTGGAGGGAGGAGGCT +GCAGAGGCGCTGAAGCCAGGAAGACCCAACAGTGGAGAGAGAAGTTGCCTGGTGACTGAC +CCTCGGCTTGACCCTGTGCTCCCCTGGGCTGAGCTGGGTGTCAAAAGGCCTCCACCCTCC +AAAGTGCCAGCCTCCTGCTGTGGCTGGAAAGCATTTGCTCCTCCTCCTCTTCTCACTTCT +TTGCATTCTTCTGATCTCTTAAGGGCGTACAAAGATTCACATGGATATTAAAGTCAATGG +TGATTTTCATAGCTGGAATACTCGTTGGGAGAAAGTAGGAATTCTTTTTTTTTTTTTTTT +CTCCCAAGCTCAAGCGATCCTCCCACCTCAGCCTTCTGAGTAGCTGGGACCACAGGTGTG +AGCCTCCATGATGGCCTGGCTAATTTTTTATATTTTTAGTAGAGATGGGGTTTCACCATG +TTGCTCAGGCTGGTCTCAAACTCCTGAGCTCAGGCGATCCACCTGCCTCGGCTTCCCAAA +GTGCTGGGATTACAGGCGTGAGCCACTGTGCCTGGCCAAAAGTAGAAATTCTTGATTGGG +AAATAATTTTATATATTTCGGCCCCTTCTCACTTGGGAAAATTCTGCTAAGAATTCTGGT +TTTGAGAGTCAAGGGAAGCTAGCTTAATTTTTAAAGTGAAGGCTACATAAGAGTCCTCTC +TCCAGTGTGGTGGCTCACACCTATAAACCCAGCGCTTCGGGCAAATCACTTGAGCCCAGG +AGTTCAAGACCAGCCTGGCAATGTAGTGAGACCCCTATCTCTACAAAAAAATTTAAAAAT +TAGCAGGGTGTGGTGGTGCGTGCCTGTAGTCCTGGCTACTTGGGTGGGATGCTGATGTGG +GAGGATTGCTTGAGCCCAGGATTTGGAGGTTGCAGCGAGCCATGATGGTGCATTGCACTC +CAGCCTGGGCAACATGGCAAGACCTCGTCTCAAAAAAATACAAAGAGTCCTCTCTATGTT +GGAAATGTCAGAGTTATTGGCCATTTAGGGGATCTAACTTGCTCTTTGATGTCCAAGAGG +AAAGGTATAGAGTTCTCCTGATTACACAGGGCCTCTGCTTGTGGGTCGCCTGCATGAGGC +GATCAGAATCTGAGTCACCTGGGATCATGTGGGTGCTGGTACATCCTAGAGATAGGATGT +GTTGGTCATGGAAGAGAGTGGGCCACAAATGGGTTCCAGGGGAATAGCGTTGCCTGGTGC +AAGGAGGGGGATGTGGAGGTGCAGGGAGCAGGGGTGGTGTCTGGAGAGCCTGAAGCTGTG +CTGCCAGAGGCTCCAGGCCATGAGGTCTGAGAAGCATCTAGATTTGGCCTGTTCACAGCA +GGGAAGTGTGGTCCAGTGGCAAAAAGTTGAGGGCATCCTAGGAGTTGAGGAGGTGGTGGC +GGTTAGAGTAAACCACTCTGGAGGCGTCCGGGGTGGAGAGACGAGAATGTGCAAGAACTC +CTGCCTCTAGCCTGGCCTCCTCAGGACCCCGGCCACCAGCCTCCCTCCTAGAAGTCCCCA +GAGCTTCCTCCTCCTTGTGCCCCAGAACTGGTTCCTGGGTTGCTCCAACCCCAGACCTTC +CCTGGAAGTCCCCCCCAGTGCCTCCTGGCTGGGAACCCCCAACTGCATTTTGCTTCTGCT +CTCGTGCCAGTAGTCCCCTTTTCTCTCCAGTGTCCAGGCCCCAACCACCAAGACCCTGCC +TCCTCACTTGGCTTTGCTGTTCTTAGTTCTCTACTCAGTAGGTTGTGTCTGCCTCCATTC +CTGGCCAGACGCTGCCCTGGGGCTGAGTGCACAGTCCCCAGCCACTCCCGAGATCCTCAG +AGCTTCTGGAAGCCACAGACATGGTGGCATATGTTTCAGGTCGTCAGCTTGTTTTTTTTT +GGAGACAGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCGGTGACGTGATCATTCACTGCA +GCCTTGACCTTCTGAGCCCGGGTGATCCTCCCACCTCAGCCTGTCAAGTAGCTGAGACCA +CATGTGTGAGCCACCACACCTAGCTTTTTGAATCTTTTTTTTTTTTTTTTTTGAGACGGA +GTCTCACTCTGTCACCCAGGCTGGAGTGCAGTGGCACAATCTTGGCTTACTGCAACCTCC +AGCTCCCAGGTTCAAGTGATTCTCCTGCCTCTCCCGAGTAGCTGGGATTACAGGTAGGTG +GCACCACACCTAGCTAATTTTTATGTCTTTAGTAGAGACAAAGTTTCACCATGTTGGCCA +GGCTGATCTTGAACTCCTGACCTCAGGTGATCTGCCTGTCTTGACCTCCCAAAGTGCTGG +GATTACAGGCGTGGGCCACGCGCCCAGCCAACTTTTTGAATCTTTTGTAGGGATGGGGTC +TTGCTGTGTTGCCCAGGCTGGGCTTAAACTCCTGGACTCAAGTGATCCTCCCTGCCTTGG +TTTCTCAAACTTCTGGGATTACAGGCCAATCTACTCAACGTTGATATTGAGTAGAAGGCA +AAATTCACATGGCTCCGGCCCAGCCCCTTTCTGTCTCAGGCACTTCAGTGTTAATATGCG +CCTTGGTTTTGCTGCTGCCGAGTCTACTGTGCCCCTGGGCAAGGAAAGCAAAGGAAACCA +GAGGGCCAGGCCCGTGGACACTGGGGTGAGGGGCCAACAGGGTGCCCAGCAGTTGTTTCC +ACAGCCCCCACGTGGTCAGGGACCCTGTATGGGTGAACCGCCTGGATACTGAGCCTAGGC +GGCGGAAAACTCAAAAGCAAGGGCAGGTTTATTCAGATCCCTGGTGGAGTTACAAGCTTG +GTTTGGTTTTTGGTCACAGGCCCTAAAATACCTGTATTCTGAGTTTACATAAAAATACAA +AGCACTGGCCGGGCGCGCTGGCTCACACCTGTAAGCCCAGCACTTTGGGAGGCTGAGGTG +GGTGGCTAACATGAGGTCAGGAATTTGAAACCAGCCTGACTAACATGGTGAAACCCCATC +TCTACTAAAAATACAAAAATTAGCTGGGCATGGTGGCGCATGCCTGTAATCCCAGCTATT +CGGGAGGCTGAGGCAGGAGAATCACTTGAACCTGGGAGGCGGAGGTTGCAGTGAGCCATG +ATCGCACCGCCGCACTGCAGCCTGGGCTACAGAGCAAGACTCTGTCTCAAAAAAAAAAAA +AAAAAAATCACCACCGAGACGCTGTTCTCCCTGGTGTGTCATCTGTGGGGTGAATGAAGG +TGTGAGATACCCCGTGGTGGTTTCTGTCAGTGAATCTGTAGTTGTCATATATTTTGAATA +CTTCAATATCATCGAACAGTTGCTGTGTCTTTAAGCACTAGATTTTATTGAAGCTACCTT +CTTCCCTTTTTAGTCCCATTTCCCCTAGATGCATCCACGACTGCAGGAACCCCCCTGCCC +CCACCAGGAGCCCAAAGATAACCATGTTCCCACCTGGAGTGGCCTCCCACCTCCAATGCC +ATCAGGGCTGGTGCAGCTTCCCCACCTGGTCAGCAACATCATGGCCAAGGAGTAGAGGGC +GTAGCCCAGGGTGAACAGCAGTGCACAGGTGAGGGGCCTGATGCAGAACAGAGAGGCCAT +GAAGAAAGCCAGCAGTAGCCAGGGCAGTGTCAGGTAGCTGCCTAGGAAGTGCAGGTTGAG +CCTGTGGCCGCTGGCCAGCGTGGGCTCTGTGAGGTGGTAGCAACAGAAGCTGGCAAGGAA +GTCATCTCAGGAACACAGAGCCGCCATCTGGTCTGCAAACTGAAGCGCAGGCAGTGGCAC +GGGGGCATGGGGTGGTGGCACCACAGGCCTCAGGACAGCTGCACCCCACCCCCACAGGCC +ACCCTGAGACTGCACCCCAGGCAGGAGCACCCAGAACCCTCTCTGAAGAGGCCAGGCCAG +CCCTCCTGCCCACTCACTCTGTGTTTGATGGCAGATGACAGCCGGAAGGATGCTCAGACC +AAGCTGGTAATCTCATAACTCCAGAGGGGCTGCAGCTCCGGGTCCCCCTGGTACTCAATT +TCAAACCTTTGCAGCCTGTTGATGATCTAGAAACCCAGGCTATAGGGTTGGGCCAGAAAA +CATGGCCCTGGCCAGGTGCAGTGGCTCATGCCTGTAATCCCAGCACTCCGGGAGGCCGAG +GCGGGCGGATCACAAGGTCGGGAGTTAGAGACCAGCCTGATCGATATGATGAAACCCCGT +CTCTATTAAAAATACAAAAATTGGCCAGGTGCAGTGGCTCACACCTGTAATCCCAGCACT +CTGGGAGGCTGAGGCAGGTGGATCACGAGGTCGGGAGATCAGGACCATCCTGGCTAACAC +TGCGAAACCCCGTCTCTACTAAAAAAAATACAAAAAAAAATTAGCTGGGCTTGGTGGCGG +GCACCTGTAGTTCCAGCTACTCAGGAGGCTGAGGCAGGAGAATAGCGTGAACCCAGGAGG +TGGAGCTTGCAGTGAGCCGAGATTGCGCCACTGCACTCCAGCCTGGGCTACAGAGCAAGA +CTCCGTCTCAAAAAAAAAAAAAAGTTAGCCAGGCATGGTGGCATGTGCCTGTAATCCCAG +CTATTCAGGAGGCTGAGGTGGGAGAATCGCTTGAACCCAGGAGGTGGAAGTTTCAGTGAG +CCAAGATCATGCCACTGCACTCCAGCCTGGGCAAGAGAGGGAAACCTTGTTTCAAAAAAA +AGAAAGAAGAGAAAAGGAAACATGGCCCCTGCACACAACTCAGAAGAGCCAAGCCATCTC +TGCTCACAGGGGAGGAGGGAGCTGGGGACACGTCAGGGCCCAACCCATGCACCTCTGGGA +CTTGGGCCTCTCACCTGGTACTGGCCCAGGGGCGTGGGGATGAGTCTGTCCTCACCCGCG +ATGCAGTCCGGAGCTGCTTCTTTCCATTCTCATCCTGGATGGTGTCCAAGGCAAGTGTGA +GAGACTCTGCGTGAGCTGCACTTCACTGAGCTACCAGAGAGAGAGAAGCTCCTCTGCCCT +CAGTGTCAAGGCAAAGGAACACCAGTCTTCACCCCACCAGCCTCTAACAGTGGCAACGAG +AGCACCCGGACATGACACTCGGGGTGTGGGCCTGCCTCCCACAACACCGGGTTCATCTGC +ATGGGGCCTGGGACGGATGCATGCTGACCATGGGCAAAGGGGGAGTGGATCCTTCGGGGA +AGGGGACCCTAACGGCCGTGGAACAGGCCCCATTGCTCATACTCGGAATATCTGGCGCAG +GTACGCCAGGGCCTCCTCCAGGCACTCGTCCATCTTCTGGACGCTGTCCTGCCCCATCTC +GCCCAGGTCATGGCTGAATAGGAGCCGTTGGTGTCTGTGGGGCCAAAGCCCAGCCACAAC +AGGAAGGAGTGGCCAGCTGGAGTCTCCACACACTGGTCCGAGATGGACCTGGCCGTGTGC +TTGGCCTGCGTGATGAGCTGAGTAAGGTGCAGGACCTGCAAGGGAGGCGAGGGCAGGTCA +CCGGCAGGATACGCCTGTACTGCCACCCAGGCAGGCTGGCTGGGGGCCCTAAGATGGGTG +CACAGCCAGATCACTGAGTGACGGGAGGAGCAGCTTCTGCTGGAGGACTTTTAAAGACTA +AAGAATGATCTTCTAAATGTGAAATTTCCCAACACTGCCTTCGTTATCAGAAACAAATCA +GACTTTACTGAAACGGGGAAAGGAGATGTGCGTCAACTCTCCGCCCAACCTCCCTCCCTC +CCGCCAGAACACAGCACTTCCCTGCCTCTGCTCAGCCTTGTGGCTCCAGGGGTCCTGTGA +AGGGGCACCGCCAACCCACTCACCAGGGTGTAAGCCTCGGGCCCGAACATCGGGTGTACT +TGCAGTCCTGGCCCTCCAGGCCATAGATGTGACTCTTCACATTGAAGGAGGCATCAGTGA +CCACTGGTGGCCACTATGACAGGATGCTGCTGGCGAATGTGCAGGCTGTGAAGAGTCGGT +GCTGGCGGTAGGGGGATGACGAGCTCCGGCTCCGGGAACAGCTGCTCGAACTGGAAGGCA +GGAGACGAACCTAGCGCCCACTTCCGGGGGCACAGAGAACCTGGTTGCTGCCCACAGGTT +TTCAGGATGCCTCCGAGGTATCTGGGGAGCCATGACCCAGGGGTGTCCTCCTGAGGTCCT +GCCCAGCAGTGTCTCCCATGTAGCAGGTGGCCGACATGGGGAGGGCAAATGAGGGAGCTG +GTGGCTCACAGAGCCCCCGACAGTGTCTGCAGGAGGGCAAACACCAGGAGGAGTGACAGA +AGCCAATGTCCAACCCAGCCAAGACAAACGCATGCCAGAGACCAAGACCGCCTCCCAAGG +ATGCTGCCACGAGTTGCCAGGGAGAAACTGTGGTGCCATGGGCCCGCAGCTTGGTCTGTG +CAGACCCCCTCTGTGGGTATACCTGCGGTCTTCCACCCAGTGCCTGGTGCCTGGTGGCTG +CAGGGCAGGCAGGGTCCAGCCGGTCTCAGCCTTAACTGTGCAGTGTTATGGATGACAGCT +CAGGAGACTGTTGGCAGGCAGGGAGGGCAGGACACAGGCAGGTGAGACCCAGGCCCTCTG +AGGTTGCTGCATCCTTAGCAGGCCTGAGGCCCCTTTCCAAGAGGACTGAAGCCACCATGG +CAGAGGCCAGCCCAGAGCCTGAGGGGGCTGAGGGAGGATCCCCCAGAGGGTCCAAAGGGG +AAGGCCTCGGGCAAGGAAAGGAGAGGTCAGTGGCCACGCCAGGTGCCATGGAAGCCTGGG +GGGGTCTGCTGTGAGGATCAAGCAGGAGATGAGGGGAAGGTGGCCACTGGTCTGCTCTGT +GCTGGTAAGGAAGGGTTAAGGAACAGTTTGGGGGTCACCATGAAAATTTGTCTGGGCCAG +AAGGGAGAGGGGACAGGTCACCCACTGCTTCCATGAGAAACCCATTCTAGGGCCTGGCGA +AGTTGAGCCTAGGAAGGGCTGGTCTGTGCTCCACTCAGCAGCAGCTGGGAGAGGGCCAGG +GCCAGGCTGACCCCAGCACAGGCCGCTGTCTTGGCCTCCACCCACTGGAGCAACATTTGC +CCAGGGGAGCCAGTGGCATGTGAGGAGAGGGGGCCAGGGCTCACCATTTCTCAGACACAC +TGGGGCTGGGAGTTGCTGCTGGCAGCCTGCTTGTCAGCTGCATACCACCATGGCTGCAGG +TAGCTCAGCCACACCTCCAGGACCTGTGGGGGAGGTGTGTGCTGAAGGCCCAGTGGCTGG +GGCCAAATCCTCTGATGGCCTGGAACCCAGGTACCCAGGTGGCAGCTAAGTTGGCTCTTC +CCACGTCCCCCAGAGGGCGCAGGAGAGACGCGCACAGCCCTGTGAGATTTGGTTCTCTCA +CTGCTCTTCAGAAGCCAGGGTCATGGGCGGGCTCTTCCCGGCCCACGGGACCCAGTGCTA +CCTGGTTGCGTAAGAGTAGAAACCTGTCTGGCACCAGACAGGTGGGATGGGGCAGGGCTT +TGCTGTGCCTGAGGGTTGCCTTTGGAAACTGAAACAGAAAGAACAACGTGGCCCTCCAAG +GCCCCCTCCTCTCCAAGGTCTCTGTGACAACACTTGTTCTCCTTCATCCACCCCAGGGGA +CCTCCCTCATTGGGGCACACAGAAGACACAGGCACACATGTGATGGGGCCGACACTCCTA +GCTCTGAACAATGCGTCCAAGGGCCAGTGGCCAAAGCAATGCCGCAGGAAGAGGTAGAGT +TTCTGCTGGACTAACCTCGGGACAGCGGCCTGCAGAGGAGGGGAGGGTCACCAGCCGCTT +GCACAGTAAGGTGGCCCTGCTCACAGATGAGCCCGGGGTTCCCATTAGTGAGGGTTGGCT +CTGCTGGGAGAGAGCACTAGGCACTTCTGAGTGGTAAGAGCTGGTTTCAGCCCCATTCTT +CAGAGGAAGAAACCGAGGTTAGGGAGCATCCCGAGGACACAGCATGCAGGCGGCCAAGCC +ATTGCATCTCCTGCACACACCTCACCTCAGCCCCACACAAAACAGCCCACTCTGCCAGCC +CCGATGGGCCTGGGAGTCAGGACCCGCCCCAACCCTTCTCCTGACGGGTGGGGAGGAGCA +AGAGACGGCACCGTGATGGGGCCTTTTTGGCCAGTCTGGAGAGAGAGGCTCTGGGAGCCA +GAGATCAGCCGGCAGCCCCACCAGGCTAGCTGAGAAGGCACTTGCGTAGCCCTCCAGAGG +CCTCACGTGCTCTGTGAGCAGGAAGGGATTCCCCTCACCCTGCCCCCCATGCCCCGGGGA +GAGGTCGCCGTGGGCAACACCGGAGGAGGGGAACGAGGAGGAGACAGCAGCGGCTGGAAG +CCGCAAGGGCCACCCACTGTTTGAACTCCCCCAGCGGGCTGGTGGTGTGGGAGTGGACGA +AAGGTGAGACCTGCTCTGGCTTCAGGCTGTCGGCAAAGGCGTGCAGGTGCTTCAGTGGCA +GGCGCACCACCAGCATGTGCCCCTTGGTGGGCATGAACGACTCCTAGGTGGAGGAAGAGG +AGTCAGGGCCTGGGAAGGGCTGCAGCACGTGGCCTCCAGGTCTGACCACAGCCCGCCCTG +GGACAGCTGATCTGCCTCCACACAGGAGGCACGAGCCAGGCAAAGAGTGGGGCCACCACT +GTGCCTCACACCCAGCGGGTGTGCCTTGCACTGCTTGGTCTGAACTGCGACCAGCTCGTG +GTAGGGAAGTGCTTGGCCCAGCGCCCACACAGCCTACCTGGGGAGAACTCGCTGAGACAG +ACGCATCTGCTTCTGCACCGCTGCACACCCGCGGTGGGTGGCACAGGGCTATGGCCCGCC +AGACTGTGCCCTTGGCCATTGCTGCCTATCTTGATCCTCATGGCAGGAGGAGGCCGATGT +CACCCCCAAGCTCTGGATCCAGGGTCTCCCAGGGAAGATGGCTAAGGCCAGGAATTCGAG +ATTCGCTTGGGCAACATAGGGAGGCCCTGTCTCTACAAAAATTTTAAAAAGTTAGCTGGG +CATGGTGGTGTGCACCTGTAGTCCTAGTGACTCAGGAGGCTAGGGTGGGAAGATCGCTTG +AGCCCAGGTGTTCTTGGTGCAGTGAGCTAGGATCGTGCCACTGCACTCCAGCCTGGGTGA +CAGAGTAAGACCCTGTTCAAAAAATAAGAACCTTCACTTTTCTCCCCAATTCCTTCCATC +TGCTCCTCATTCTGGACACAGCTGGATGAGGGCTCACCCTGGGAGAAGCGCTGGGCATCA +ATTCCTCTGTTGCCCACAGACCTGTGGTCAGGAAAACCCCAAGACAAGAGCACAGTCAAG +CTAACAGGCCAAGCTTTTGATTTACAATGTACAAGCAGGATCCTGGAGACCTACACCCAA +GAGGAAAGCCTTTTTGCTGGCTGTGGGCTTTCTCTGGTTGCTCAGCATTGAGAGGGCAGG +GGGGAGCATCAAGAGTGGATGAGGGCAGGAGAAGGGGGCACTGAGGGTGGGCCCCTGAGT +CCGAGTGGACATGCTCGCCTCTCCATGCTTACCCCCAGCCCTTTGGGCCTCAACACATCT +GTGATGCTGAGACTGGTGCAGGGCCCGGCCTCTGGGTATATCCTGGGATGCTGCAGAACC +CACATGGACCACTGCTCACATCACTGCCCACTCAAGTCTGCCCGTCAGGAAAATGTCCCC +ACAGGTCCAGGAGACACACCCCACAGATCAGTGGCTGAGGCTGGGAATGGAGGTATTTTC +TTCTTCTTCTTTTTCTTTTTTTTTTTTTTTTTGAGACAGGGTCTCACTCTGTCACCCAGG +CTGGAGTGCAGCAGCACAATGTCGGTTCACTGTAACCTCTGCCTCCTGGGCTCAAGGGAT +CCTCCCACCAAAGCCTCCCGAGTAGCGGAGACCAGAGGCACATACCAACATACCCAGATA +ATTTTTCTATTTTTTTGTAGAGGCAGGGGTCTCATCATGTTTCCCAGGCTGGTCTCAAAC +TCCTGAGCTCAAGCAATCTGCCTTCCTTGGCCTCCCAAAGTTCTGGGATTCCAAGTTTGA +GCCACCGTACCTGGCTATTTTCTTCTATCTCTATCTTTAAGTATTTAGGCAATGAATTTG +CATTGCTTTGGAAGGAAGGAGAAAACCAGTACCTATACGGCCCGACTAGAATGGTACTAG +TGGGGTGGGAAGTTAGACCAGGGTCCTTCTCCTCCAGTCTCACCCCTGGCCCTCCCTCTG +ATCCTGGCCTGGGATCCTGAGCCATGACATCCTCCCTGCAGCTCACGGGAGGGGACAGCA +TCCACATAGGAACTGGGAACCTGCCTTCCCCTGCAGGTTCACTTACCCAACATCACCTGG +TCCTTCCAGAGGGTCTGGAGGGGCCAGTGTCTGCCCCTGCCCCCAGCAGTCACATGAAAG +CCTCTCAGGCTATTTTCCCACAGATTTGCTGAGCCAAACTTTTCCCTCAGCTCTGTACCC +CTCCTGGGCCACATTCCCATGTGTAGTCCCTGGCCACAGTCACACCTGCTTGCTTCTCAA +AGATGTGTACCCAAAGCCCCGCATTTCCCACTGGGCCCAGCCTCACAGCACCTCAGCTCA +CTCCTCCAGGGTCTCCCTGGTCACCTGTCCTGCCATGGGGGTTGCTAGATGCTTCCTGGA +CACTGCCCTGGCCCAAGCCCTCATGTCCCTCTTCTCCAGTGGCTCCTGCGGAACAGGGAC +GCCATCACATGCAGTGGTTGCCATCCCAACCCACCGGCACTTGGGGCCCACATGTTTTTT +TCATCTTGCGCTCAAATTCCCATTCAATCACGTGTGCTCCTTCGCAGGGTGAACGGGACA +GACCCCTGGCTGTGGGCACAGCTCGTCTCCCCGGGCTCTCACAGACTCACTAACTCTGTC +GTCCCCGAAATCCCCCATGGGCCTGAACTCTGCTTCACTGACCTCAAATCCATCCATACG +CTTGTCATTTGTGCCTAGGCAGACCACAACCTCTATGAGGTCACTACCTATTCTCATCCA +CCTCCACAGGTGTCTACGAGTGGCCAGGGCTCCCTAGGTGAAGGACTGACAGCCTCTGTG +AGCTATAGGGCCAAGCCCCAGGTCCTCCCCAACCCCCTTGCCTCACACACAGTCTGGGTC +ATGGAACCTCCAAGCTGGATGACAGTTCTGAACAGGGCAGCACCCACATTACCGTAAGTC +CCAGGCCTCTGTGTCTTCAGCTCCTTCCTTAGCACTGGGCCCTGCAGAGTGGGGGTGAAC +CGGGCCACTGCAGAGGCCTCACGACTCCAGCTCTTCAAACCAGGGCCGAGCACAGAGCCT +CCAACACCAGAGAACTTGTGACACCTCCACCACTGGTGGGGAGTCTGGCCAGCCCAGGCC +ACATGCCTCATTCTGACACACAGCCACACCCCACTCCCACAAGCCAGGCTAGTCTGCAAA +GCCAGGGGCCAGGCCAGGCTGGAGGCTGATCTCTGCCTCCCTCAACACAGGGGCTTCACT +CTGCCTGAAACTAGCCTCTGAGTCACCTGCAGCTGGGCCCAGCCAGCCCCTTGCTTTGCC +AGGCGCAGACACGCACTCCGGGCAGGGGAAGGGAATGAACGAGCACAGCCAGTACTTGGT +AGGTGTGGAGGGCCTGGAGGTTGGGCTGCCGGGGGCTGTGGTGGGCACTGGAGACACAGA +GTCGGTAGTGCAGCACCTCCAGCTGAGAGAGTGAACAGAGAGAAGCCGGAAGAGGCAGAA +GAGAAAAGCATGAGAGGACAGGGTGGTGAAGGAGAGACATGGTACCGGACATGGGAGGGG +GAAGTGGAGAAAGAAAGCAATGAGAACGAGCCCAGAAGTGAGGAGAAAAAAACAACAAAA +GCATGGTCAGTGACCATCCAAACAACTGCAGTCCCAGCAGCTGTGACCAGCGCAGCACCC +TCCTCGGCAGCCAGGCCAGAGCTGTGACAGCACTCTCCTCAGCCTTGGAGGCTGGGGGCG +GATAGACCCTCCAGGTCACCTGGGTGGCCTCTCAATGTCGCTGGGGCCTAAAGGCCTCTC +CAGCTCATTGGAAGGGCCCAACTCACTGCCCTTCCAGATGGGACCCGCTCTCCCCAGTGA +CTCTGAGGCCACACCGTGTGTGCCATGTGCACACCACAGCCCTTCAAGTACCCAGGGCCT +TCTCTCTGGGACATGCTGTCAGCCACCAAGGCCTCAAGATCCTGAGGCCCAGAAAATAAA +CTGGCAAAGCATGTTCTAGGAGGAGGACTCTGCCTAGAGAAGACACATATGTAATTGACT +GGATTTCCCTGGGTTGTTTTGAGAAATTTATCAAATTTGTATATGACATGATCAATTCCT +ATGTCTAAAACCCAATCAATGTCCCCTCCATGAGTGGATCTCTCTGAGAGCTGAGGATGA +GGACAGGTCCCCAAGGGTAGCACAGGCTCAGGCCCCCAGGCCCTGCCCTCCGCACTCCCC +CAGCCTGGAGGTCAGCCGAGGTGGTGTGTGCATCAGCTCTGACTCCAGGAGGCAACCTCT +CCTACCGCATCAGGAACTCCCACCCTCCAGATATGGAGGGCTCCCAAGGGCAGGAGCCCC +CGAGGACCTGGCCACAGCCACACACACCCAGAGGCAACAGGAAACTGAGGCTAAGCTGGC +CCCCAGCCAGGACCCTCCAATCTACCACCCTGCAGCGCTGCAGCCTCCGAAGCACCCCTT +CGGCCCAGCATAGCCAGAGCTGGCCTGGTGCATATCCTGCCCCGTGAGCGCCATGCCTCC +TGCCCAAGTAGCACGACCACTGGGCTCCACACAGGAGACATCAGAACACACCTGCTAGAT +GTCACAACACGCAGAGCAGGGCGGGAAGGAGCAGGGCAGAGAGATGTTGCCCTTCACAGC +CTTCCTCTGTGCCAGACCTAGGGCTCAGGCAGGAAACAGACACACAGTTCTCCAGTGCGT +CTCTGCCCTTGCCTAAGGAACACGGTGGCTTGTGGGGCCATTGGCCATCTTTATGGCCCA +CGGGGCCACCAGGGAGCAAGCATGCACCTCGGATTCAACCAGGGTGTGGAGGGCAACCCT +GGGAGCCTCACTTCCTGCATCTGAAAATAAGAGCTAGCAGAGACAGGCAGGGATTTCTGT +ATGGCAGGAGCAGCATAGGCCTGGGCAAGCGGCAAAGGGCCAGGGGCCTTGGGGGCCCCG +ACCTCCACAGCTGCCTTTCTCTGACCCCCACCCACCCAGTTTTCTCACGTTTTGTTTTTT +GCCTCAACTGGAAGAGGCAAAAAGTGGAGCATCTCTGGGCACTTCGTCCCACCACTGCAA +GTCATTCTGAGCAGGACTCACGCAACCCAAGGTCTGGCCAAGAGGAAGCGAACATTGAGA +AGAGGGGAAAGGCACCTCCTGCCCTCTGCAGGGACGACGCCTTGCCAGTTCCCCTGACCC +ATGAGGACACAGAATCCTCACAGGTGACAGCCCAGAAGAGCCTTTCTGGCTTGCCCTTGA +AAGTAGGTGTGTTTCCACTCACTCAGTAGGCAGAACCAGCAGATAGGAAGGTGACACAGT +GCAGCGGCCCCTGCCCCTGCCAGGTCAAGCCACATGCAAGAGCGCTGAGCAAGTGTTCTA +GGGCGCCAGGGAGAAGCGGGGCCCTAGGAACCAACCAGCCCCCCAGGCACCAGCCAGGCA +TATGCAGTAAGTCTGCTCTGGAAGCGGGAAGGGCACTGCAGGCTCTTTGTGACTCAGGAG +GCCCAAGATGGGGTCCCCAACTTTTCCTTGTGCTCCTTCCAGGTGATGCTGAGGGTGAGG +ATTTGGGGGAACCTATGCTCTGAGAGAGCCTTCACATGGCAAAGCATGGGGCCACAGAAC +TGGCTGCAGAGGCTGTGGCAGAAACAGACCCACAGCTCTTCTCCAGAACCGGAGGAAAAG +TGACAGGAGGCTCTTCTGAAGTTTAACTCCATCACCCACAGATCCTTCATTCTGGATGAA +GCCTTCTGGAACTTTGTCAGCAATAAAGGCAAAGTCTCCCATGCATAGACGCCACGGGCA +CCGGACTACAGGATGTGTGCCCAGGGCATGCTGCCTAGAGAGAGAGTGGGGAGAGGCGAG +GCGGAGGGGGAGGAAGCAGGCCAGGCTGCTTGTAGGAGGGCGCCAGGCCCCACACACCAT +GAGTATGGCGGCTTGCTCATGCTCTGGAGGAGGTGGGGGAGTGGGGGGCAGACAAAGGAG +GCTAACTATGGTGCCACTATGGTGGGCATGGGTCTGCCCTGGGCCATCACTTCATGGGAA +AGCACAAAGGCACTGCTGATGGTGGAGGAGACAGACGAGAACGCGGCTCCCAGAGGCATT +GCAGCTCCCCCACCCCCACAGCAAAAGTGCCACAACCTCATCTGTTCCTGTTGCCAGGGA +GGGGGCTCCGGAAGCTGCACAGGAAAGGCCCTGGAGAGGCCAGACCTCCAGGCCTCAGTG +TCCACATATCAGGGTGCAATGCCACCTGCTCCTCATCGGGAACTAGAGAACTATGCCCAC +TTCTTATTCAGCTTCTGAATGAAGGGTGGGTGGTGGATTTCTGCTTCCCACCATCCGGTC +TGTGAAAACTCACTCAAATGCAGGATAAGAAGCAGCAGGCTGAGCATGGTCAGAAGCAGG +AGGTGGGAAGAAAAACTATGGAAACCTACCTTGGCATGAGGGGAACTGCATTTTTTGGTA +CATCTCCAGAGAATAGTGTTGAAGCCACATTTCGACAAAAACCTGCAAAAGAGCATTACG +TAGTCAAATCATTCTCGATAACTGCCAAGACCATTCAGTGGGGAAAGGACAGTGTCTCCC +ATAAGCAGAGCTGGGAACACAGAACACCCTCATGCAGAACAGTGAAGCTGGGCCCTGCCC +TCACACCATCGACAAAAACTAACTCAAAGTGGGTAAGCGGCCTGAGGATAAGAACTAAGA +CCATAAACTTACAGAAGAAAACATGGGAGAAAGCTTCATAACACTGGATTTGGCAATGAT +TTATTAGATATGATGCCCAAATCACAAGCAACAAGCAAAAAAATACAGCATCAAAAGATA +CTACTGATAGAGTGAAAAGAACCAGCGGGCATGATGGCTCAGGCCTGTCATCCCAGTGCT +TTGGGAGGCCGAGGCAGGTGGATGGCTTGAGGCCGGGAGTTCAAGACCAGCCTGGGCAAC +ATAGCAAAAAGTTGTCTCCACAAAAAGCACAAAAATTTGCTGGGTGTGGTGGGGCACAGC +CATAGTCCCAGCTACTGGGAGTCTGAGGTGGGAGGATCGCCTGAGCCCAGGAATTTGAGC +TGTAGTGAGCCAAGATCATGCCACTGCACTCCAGCCTGGGAAACAGAGCGAGACCCTGTC +TCAAAATAAATAAATAAATAAATAATAAAATAAAAATAGAAGCAACTCCAGTGTTCACTG +AGAGATGAATAGATAAACACAACATGGTACATCCGCACAAGGGAACACTATGCAGCCTTA +AAAAGGAAGCAAATTCTGACACATGCTGTAACATGGATTAATCTCGAATCCATTATGCTA +AGTGAAGCATGCCAGTCACACACAAAGAAGTACTGTGTGCTACAGAAAAATGCTTTAGAT +ATGAGATCTAGGAGTCAAACATATAGAAACAGAAAGCAGAATAGTGGTTGCTAGGGGCTG +CGTGGGGAATTAATTGTTCAGTTTCTTTTTTTTTATTAAAAGAGATGGGGAGCCAGGCAT +GGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGTGCATCATGAAGTC +AGAAATTCAAGACCAGCCTGGCCAAGATGGTGAAACCCCGTCTTTACTAAAAATATAAAA +ATTAGCTAGGCACCGTGGCAGGTGCCTGTAATCCCAGCTACTCAAGAGGCTGAGGCAGGA +GAATCACTTGAACCCGGGAGGCAGGGATTGCAGTGAGCTGAGATCGCACCACTGCACTCC +AGCCTAAGCGACAGAGTGAGACTCTGTCTCAAAAAAAAAAAAAAAAAAAAAAAAAAAAGA +GAGAGAGAGACGGGGTGTCACTATGTTGGCGAGGCTGGTCTCAAACTCCTGGCCTCAAGC +AATCTTTCCCACCTTGGTCTCCCAAAGTGCTAGGATTACAGGAGTGAGCTCTCATGCCTG +GTCGGTTCAGTTTCAGTTTTGCAAGATTCTGGAGATTGGAAAGGAAGCAAATTCTGACAC +ATGCTGTAACACGGATTAATCTTGAATCCATTATGCTAACTGAAGCATGCCAGTCACACA +CAAAGAAGTACTGTGTGCTACAGATAACAACAACGTGAATGTATTTAATACTACTGGCTG +TACACTTGGAAATGGTTAAGACAGTAAATTTTATGTTATGTATATTTTACAATTACAGTT +TTTTTCTTAATTAAAAAAACGAAAAGCATTAATGCTTTGGTCCAGGAAACAGCAGAACCT +TCCAGTTCTGTCCTGTTGTAAGCTTGTGTCATGGTCTCCTGCCCTCTTCACAGCCAGCAC +ATCCCATGGTCCACACAGCCAAGAGCCATCTGGGTATCATGGTCCCCTCTGGGGACCATG +CCCCTCCCAGGGACAATGGTCCCTATCATTCTAGCACCCGAGGTTGTTGTGAAATGGTGT +GGAGGCTTCAAAAAGCACGGCACCCCACATATTAGGAGTTATGTACAAGGTCGGAGACAG +AATCCTATCTTATCTACTTCCCAGGTGTCAACGGCGAGGCTGTGGCTACCAAGTCAGTCC +TAGGATCCAGTGTCAGGAGCAGCACCTGCCAAATCTCTCCAAGTTATGCTCAGAAGACAC +AGCCAGGACCTGGTGGATAGCACTCAGGGAGTGAGCCAGTCCACACCATCGTGAGAGGCT +GGAGCCCCCCAGGACAGACAAGGGACTCTGTCACCTACTATTTGGATCCCCTGAATCAAC +AGAAGGCCTGTGGAGGCTGCACAAACCCCTCACCATTCCGTGTCACAAAAGTGCTACTGG +GGCAACGCTTAATATCCACATGATGGTAATCATAGGAAGCATCTCCAGCAACACGAGAGA +CGGACACCAAGAGACCGCAACAATTATATCACAATGGGCAAATGAAACGACTCCGATGGA +CAGGGGATCCTGGGATGAAAAACACCATGAATGACAAGTAAGTTAGGTGGGTGGGCAGAC +TTATATTCATGTTTTCTAAATGTCATACCAGGAAAATGCCAACAATTTAGAATCAGCCCT +GGGCTAAGCGGCCGCCTCTGGGACTCTGACCCATATAGTGGAAGATGGCTGACCAGTTTC +TTTTGAAACAGACGTTGAGACCAGCTCAGCTCTCACCCGGAGCAGAGTTTCTGACCTCCA +GATCTCCTGGGAGGCGGGGTCTGCATTCACAGACATCTGATGAGAGAAGTGTCGCTTCAG +GGGGCTAGTGTGCTGGAGGCTACAGGAAGCAAAGGGCATGGCTGGTGTCCTGAGGGAGAC +ACGGAAACAGGCCCAAGGTAGGAGCTCCTGCTGGCCAGGCCAGGGGCGATCTGGACATCA +AAATCAATGACAGCAGGGACGTATTACAACTTGTTCAATTGCACAATGCACAAACCTCAC +CCCTGCCCCAAATCCACACTGATACACCTGAATAAACAAATAGCAAAGAGAAAAGTCTTC +CTTCCCGCAGAGCGACCACTGACCCAATACAGGAAGAATGAAGTTAGGGAACCACCACTT +GGCAACTGCCCTATTAATAGCTTATTTGGTCAAGAATCAAGTGAGGCCAGGCATGGGACC +TGCGTGAGACGCCTGAAATCTCAGCACTTTGGGAAACCAAGGCAGGAGGATCTCTTGAGG +CCAGGCACTTGAGACCAGCCTGGGCAACACAGTGAGACCCCGTCTCTACAAAAACATTAA +AACATGAAATAGCTGGGCACGGTGGGGTGTGCCTGTAGTCCTAACTACTTAGAAGGCTGA +GAAGGGAAAATGGCTCCCTCCTTGGGCCTAGGAGTTGAGGCTGCAGTGACCTATGATGGC +ACCAACTGCACTCCAGCCTGGGGTGACAGAGCGAGACTCTGTTTCTAAAAAAAATAAGAT +GTCCAGCTTGGGCAACATGGGCAAAACCCATCTCTATTAAAAAAAAAAAAAAGCATTCAG +ATGTGGTGATGCATGCCTGTAGTCCCAGATACTTGGGAGGCTGAGGCAGGAGGATCACTT +GAGCCTAAGAGATTGAGGCTGCCGTGAGCCTAGATCATGCCACTGTACTTCAGCCTGGGG +TGAGAGAACAAGATCCTGTCTTAAAGTAAAATAAAATCAATTTGAAAAAAGGAAAGAATC +GACTTCTCTGTGTATGGAGTAGCCCTTCTTCTATTCCTTTACTTTCTTAATAAACTTGCT +TTCACTTAAAAAAAAAGAGAAAAGAATCAACAGTTGATGTTAATACTAGTGATAATACAT +AGTCTCTATACATCTTCCCGGTAAATCATCATTACCAATTTTTCTTTTTTTTGAGACAGG +GTCTCATTCTGTTGCCTGGGCTGGAGTGCAGTGCTGTGATCACAGCTCACTGCAACCTCA +ACCTCCTAGACCCGGGCCTCCCAAGTAGCTGGGACCACAGTGCACACCACCATGCCTGCA +TGCCTGGCTAATGTTTTTCTTTCTTTGTTTTTTGTTTTTGTTTTTTTTTTGAGACAGACT +CTCACTCTGTTACCAGGCTGGAGTGCAGTGGCTCCATCTCGGCTTACTGCAATCTCTGCC +TCCCAAGTTCAAGCGATTCTCCTGCCTCAGCCTCGTTAGTAGCTGGGACTACAGGCATGC +ACCACCACGCCTGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTGTCACCATGTTTCT +CAGCCTGGTCTTGAACTCCTGACCTCATGATCTGCCCGCCTCTGCCTCCTAAAGTGCTGG +GATTACAGGCGTGCACCACCACACCCAGCTAATTTTTGTATTTTTAGTAGAGATGGGGTT +TCACCGTGTTGACCAGGATGGTCTCGATCTCCTGACATCGTGATATGCCTGCCTTGGCCT +CCCAAAGTGCTGGGATTACAGGCATGAGCCATTGTGCCTGGCCAATATTTTTCTTTTTAG +TACAAACAAGGTCTTGCTATGTTGTCCAGGCACTCATTACTAATTTTTTTTTTTTTGAGA +TGGGCTCTCACTCTGTAGCCCAGGCTGGAGTGCAACAGCATGATCTTGGCTCAATGCAAC +CTCTGCCTCCTGGGCTTGAGTGATCCCCCCACCTCAGCCTCTAAGTAGCTGGGATTACAG +GCGCATGCCACCACGCCAGGCTAATTTTAAAATTTTTGGTAGAGATAGGATTTTGCTATG +TTGCCCAGGCTGGTCTCAAACTCCTGAGCTCAGGTGATCCACCCACCTTGGCCTCCCTAA +GTGCTGGGATTCCAGGCGTGAACCACTGCACTCAGCCTCATTACTATTTAATCTGGGCAC +GAAGTACTTATTTTACAGTGGACAAACCTGGCCGACACCCTTTCCACCCAGTGATAAAGC +TAGCATCACCAGTGATGGGGCAAAGGGGCACTGCAGGCCTCCTGATGAGACACCTGGCAA +GGACCAGGTGTCCCCGCTGCAGTGCTCCTGCCAACAAAGCCCAAGCTATGCCTAATCACC +AGCAAATACAAGACAGCCCAAGGGGTGGCCACATGCTTTGGAGGGGTCAAGGTCAGGAAA +GACAGACAGGCTGCAGAACCATTCCGGACTGAAGGGGACTAGAAACATCACTGCATGCAG +TGGGTGAGCCTGTGCTGAAACCCAGCCAGGTAACGGGCATGAGTGGGGTAGCTGGTGGTG +AACCCCGAGGTCTGATGACTAGGTGTAGCCTGGTGGCAATACTAACTTCCTGGTGTGGTG +GCATGTGCTGGCTTGCACATGCCAGCAGATCTGGGGTGGGCCAGCATCTGCAGAAACGGG +CCTGTATGCAGCCTCCTGGGGAAGGCAATGGGGCAGCAAGATTAAGGAGGGTCAGAGGAG +CACACGCACTGTGACTATATAAGATAATGCATTCACTTTTAGGAAATACACTGACATATT +AAGAGGCCAAGGGGTGTCCTACCTGTAGTTTATTCTTATTGGCTTAGGGGAGAAAAAACA +TATCATAGAAAGGGGATAAAGCAGATGCAGCAAAGTGTTAATTTGGAAATCTGGATGAAG +GGTCTATGAACTTTTTTTTTTTTTCTGAAACAGGGTCTTCTTGTGTCACCCCAGGCTAGA +GTGCAGTGGCGCAATCATAGCTAACTATAGCCTTGAAGAGTGCACAAGAGATCCTCCCAC +CTCAGTCTCCTGAGTAGCTTGGTCCACAGTTGCACGTCACCATACTCAGCTGATTTTATT +TTTTGTAGAAATGAGGTCTCCCTATGTTGCCCACACTGGTCTCAAATTCCTGGCCTCAAG +TGATCTTTCTGCCTTGGCCTCCCTAAGTGTTGGGATTACAGGAGTAAGCCACCATGCCCA +GTCTGCAAGTTCTTTTTATTATGCGTGGAAACTTTTCTGTAAGTCTGAAGTTATTTCAAA +ATAAAAAGTTAGCTAGGTGTGGTGGCCTGCGCCTGTAATCCCAGCTACTTGGGAAGCTGA +GGCAGGTTCACCCTCCCAATCCGTGCAGTATGGCATCAGGCTCAGCCTGTTTGGTCTGGC +ACAACAGGCTCTTTGGCCCAGGGCCTCCCCCTCTCAAGACCTGCTCCTGGGGCCAATAAA +GGATTGTGGAAGCTAGGAGTTCAAGACCCACCAGGGCAACACAGCAAGCCTCCCCTGTCA +TATCTCTACAAAAAAAAAATGTTTTAAAATTATCTGGCTGTGGTGATGCGTGCCTGTAGT +CACTGAGGTGAGAGGATGACTTGAGCCCAGGAATTTAAGGCTGCAGTGAGTGACGACTGT +GCCGCTGCACTCCAGCCTGTGTGACAGAGTAAGACCCTGTCTTTAAAAAGACATAATGAC +TGAATGTGGTGGCTCATGCCTGCAATCCCTGCACCTTGGGAGGCTGAGGCAGGAGGATCA +TTTGAGCCCAGGAGTTCTAGACCAGCCTGGGCAACATAGCGAGACCCATCTTACAAAAAA +TACAAGCTGGATGTGGTGTTGCACACCTGTAATCCCAGCTACTCAGGAGACTGAGACAGG +AACATCACTTGAACCTGGGAGGTGGAGGTTGCAGTGAGCTGTGATCACAATACTGCCCTC +CAGCCTGGGCAATTGAGCAAGACTCTGTTTCCAATATATATATGTGTGTGTATATATACA +CATATATATATTATACACACACATATATATAACATAAATGTGTATATATACACACATTAT +ATATATATAATTTCTAAAAGAAAAAATTAAATTACAAAAAAACTCAAAAGCTGGGCATAA +AAACCACTTGGCAACTGAATTGTACATCTGAGAGGGTGTGAATCCACCTCACCTCCAAGT +CCTCCACAGCATCAGCAACATTTCAGGGTCACTGACCACTGCCGCCACCCAGGCAGCCTC +TCCTCCTTGCTGCTCCCTTTCTCCCCTCCACCCTCTGGGGCATCTGTCCACACATGTGAA +TTGTAGGTAGAGGCCTGTCCTGGCTTGCACATGCCAGCAGATCTGGGGTGGGCCAGCATC +TGCAGGAAGGGGCCTGCACGCAGCCTCCCAGGGAAGGCGATGGGGCAGCAAGATTAAGGA +GGGTCAGAGGAGCACAGGCAGCTGCTGCCTGTGCAGAGGGTCTCCTGGGAGGTCCCCAGA +GGGTCTCCTGGGAAGTCAGCCAAGAGGCCCTGGGTTAGGGCCCACTGAGACCCCAGCACA +GCTAGGCACAAGAGCCCCTGGTTCACACTTCCCACCTGTGCAGTCACCAACAGGCTCAGC +TTGTTTGGACTGACACACAGGCTCTTTGGCCCAGGGCCTCCCCTTCAAGACCTGGTCCTG +GGGCCCAGAGAGATTTCCATTTTTTTTTCCCCCATAGAACAAGCCTTCTGCCATGCTCAC +CACCTCAGACACTGCATGTGGGGAGGGCTGCTTCATGCAGAACACAGCCCATTCCCATGC +AGGCCGTGGAGGCCTCCAGAGACCTGATAGCTTCAGTGATGGCCACAGACATCGAAAACT +AAAGGACAGATCACCCTCCTCACCCCCACCCTCCAGTCTGTGCTAAGAAAGCTGGAGGTG +GGGAATGAGGCATAAGACAAATTTGGGAACCCTGGACAAAGGGTGGCACCATTGTCCTGG +TGGGTGATTCAACTGACCAGGCCACTCCCTTATGGAAGGGCCAGGTCTTCACAATCAGGG +TAGCCTTACCTGGGAGCAGGTGAGGGACTGGCCCATCCTGGGCTGGAGGACAGTGAGGGT +GGTGGCAGGCTGCCTTTGGTGGGCAGGAACCATGACAGGTACCTGTCCACCAGGAGGAAA +TAGGTACAGTCTGAAATATGGACATGGAGAGACACAGGGAGTGGCTGGAAAACCAAGCTA +GTTCTTAGCATGGGGTCTGGGCACTGCCTGGCCCCAGAGCACTGGCAAGTGCAGGGAAGC +CCCGGGCCATGCCCCCACCCCTCCCAGCAAGAGGCATCCCTTCCCTACCTTTAGTGTGTT +GAGGCTCAAGGCAAAGAAGCATATGTAATACTCGAACGGATCTGATGGCAGCGTCAAGGG +CAAATACACAGGGTTGCTGGGAAACCCCCAGTGTTGAGAGCTGAGCCACCCACAACCTTC +CTACCTCAAGGCCACCAAAGGATACTCAAGGCCAGGTTCAGGCCAAGGCCCTTAGTTGGG +CGGAACTGGATCTTGTGTTACAGAGGACTGTCAAGGACGCACTCCTGGATGGATGCCTTC +ACGAGACCCTGCAGAGAGAGGCAGCAAGGCTTGTGGGCAGGCACTGGCGTGTAGTGCCCG +GTACGCAGTGCCCCACATGCACAGCAGCCCTTTGGGAAAGCTGCTGGGGAAGAGACCCGC +ACACAGAACTCACCTGTGGGGACAGTGCCCTGCTCCTGAGCAGTGCCTCAACAAGAGTCT +TTGCTCAGAACTTCCACCATCTCATAAAAAGCTGCAGACAGGCCAGGTATGGTGGCTCAC +GCCTGTAATCCCAGCACTTTGGGAGGCCGAGGTGGGCAGATCACAAGGTCAGGAGTTCCA +GACCAACTTGGCCAATATAGTGAAACCCCATCTCTACTAAAAAAAAAATACAAAAATTAG +CCGGGCATGGTGGCATGCATCTGTAGTCCCAGTTACTTGGGAGGCTGAGGCTGAAGAATT +GCTTGAACCCAGGAGGCGGAGGTTGTGGTGAGCCGAGATCGTGCCACTGCACTCCAGCCT +GGCAACAGAGTGAGACTTTGCCTAAAAAAAAAAAAAAGCTGCAGATGGCAAGGGTAGCAG +CCATGCAATACCCATCTCCTTTGCCTCTCATACCCCCGGAGCAAATCACTCCTTAATTTG +CCCTCAGAGGACCGGAGGAGACTGCCCGTCACCACTGGGCTGTGCAAGGTCCACTGCCCG +AGCACCACTCTCACCACTGTGGTTTACTTATGGGCAGGTAGGAGATGGGAAAGTCAAACT +TAAGTCTTCAGCTTGAAGCTTATAAACCAACTTCATCATTGGGCCGCTGAACATGAAATT +GAACAAACAAAAACAGGGTATATGAACTGCAAATTATCTTGCTTTGATTGTATCTTAACC +TTAAGAAATTGAGAGTGACTTAAAAGTGGAATCACCATGAGGAAGCACCCTGCCTCTCTC +GGGTGTCAAACCTACCTTCCCAGGGTCGAGAAATTCCATCACCATGCTGTACTTCACAGG +ATTCACGCGCCTCTGTAAGCAGCGCAGGTTCCAGCCCACAAGGACACCATCCAGGCTGCC +GAAAATGCTACCAGCCATGGGGAGATGGCGTGCAGCTCCTGAAACAGTGTGCGGTGAGGC +TGCAGGCAGGACCAGCACCCACTCCTGCCCCAGCTCCAGGCCTGCACCACCTGCTGCCCA +GCTCCCTCTGAGGAGGCCACTCCCTGGCTGAGACCCATGGGATCAGCCCTGCGCTCAAAG +GAACCCTGGTGCGGTGATGCCAATGAGAAGAGCAGCTTTGGGTTCTTTCACTGGGGTACG +TGGCTTACCAAGCAGTGCTGCTTTAGACGTGGGCCAGTTAAGTAAGTCAGCCTCTGATCA +AAATACATTTTTTGTGTGTGATAAAAGCCTAAATTAAGGGAAAAAAAAAACAAAAAAATG +AAAAAATGAAAAATAAAATAAAATAAACAAAATACCTATTCTTTTGAGACAGGGTCTCAT +TCTGTTGCCCTGTCTGGAGTACACTGGCGCAATGCACAGCTCACTGCAGCCTTGGCCTCC +TGGCCTCAAGCAATCCTCCTGCCTTAGCCTCCTGAGTAGCTGGGACTGTGGGCACACACC +ACCACGCTCAGCTAATTTTTAGATTTCTTGTAGAGATAGGGTCCCAGTATGTTGCCCAGA +CTGGTCTCAAACTCCTGGCCTTAAGTGATCCTCCCACCTCAGCCTCCCAAAGTGTTGAGA +TTACAGGCATGAGCCACCACACCAAAAGATTTTTTTTTTAATGTCAGAAACTTCTGATGG +ATGCGAAGGATGCTCTATTAGAAAGGAAAGCAACACCCTTCCCAGAGTGAAGCCCTTAAA +ATCTAACTGACCTGTTGTCATTTGGTATTAACTCCTTGTCTAGGCCAGCTTCCTGGGCTT +GACAATTCCAGGTGTGATTTAAATACATATTTTTAGCATTTACATCAGGGTTCCTCAACG +TTGATGCTATTGACATTTTGGGCTGGATTCTTCTTTGTTGCAGGGAGCTGTCCTGTGCTT +TGCAGGATGTTTCCAGCATCCCTGGCCCCTACTTACTAGATGCCAGTAGCACACACTCCC +CATCACCCCCAGTTGTGATAATCAAAAATGTCTCTGATATTGCCAAGTGTCCCCTGAGGG +ACAGAGCACCTGTGGTTAAGAACTCCTGATCTGCAAGTTCACATATTTAAATGTGTCCAT +AAACATGACGGTCATGATAATGGGCCAGGACTCTGTGCCAGGCAGTGCTTGGAGTGGGGG +TGGTGGGGGGAATTAGGAATTATAATAAAGTTTTTAAAGGAATGTGTAAGACATGCTCCA +AAGAGAGATGTCAGCAGAGCTATACCTCTGCTGGAAAATCCTCAATGACTTTAACCAAGT +CTTGGCATCACTGTGCAAAGGGCTTATTTACAGAGTCAGCTTTCAGGGTAGCCTAGAAGG +TAGAACAAAGTGAAAAAATCATGAGGACATTCACTGCAGCTTTTAGTGATACTCTGGCTT +GAGTCAAACAGAAATCCAAGTGATAGGTTATGAGTCAGACACATAACCACTGGCGTGATT +CCCCAGCCCCTCATGTGTGTCAGTGGCCCAAAGGATGCTACTAGGAGAGGCTGTGGTTCT +ATGCACTCTGGACCAGGAACTCTCAGGCTTGCTTCCCAGTACCAACTCAGCGTGCTGCTC +ATCATCAGAACAGGACTGACTTCCACCAATCCATAAGTAGGGGTTGGCCCCAGGGTCAGC +CCCAGAGCCAGAAGAGGATCTCGATTGTGTAAGGGCACCTCTCACAGACCACAGAGTACT +ACTGAGTTCGTACGCATCAAATGGAAATTTCCCACCTGATGGACCATCATGATTACACAG +TTGTGATCATCAGTAAGAATGCCCAACAAAATCAGGGCTACAGCATCCTCTCTTAACTGA +GCTGTCTTTTATTCAGCAGACAGTGAACCTACTAAACCAATGACCTTAAAACACAGGGCA +GGGCCGGGCGTGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCAAAGTGGGCAA +ATCACAAGGTCAGGAGTTCAAGACTAGCCTGGCCAAAATGGTGAAACCTTGTCTCTATTA +AAAAAACACAAAAAAATTAGCCGGGCTTGGTGGTGCATGCCTGTAATTCCAGCTACTCTG +GAGGCTGAGGCAGGAGAATCGCTTGAACCTGAGAGGCAGAGGTTGCAGTGAACTGAGATC +GCACCACTGCACTCCAGCCTGGGCGACAGAGAGAAACTCCGTCTCAAAAAAAAAAAAAAT +AAAACAAAGCAAAACAACAACAACAAAACACACACACACACACACAAGGCAGGTAAAAAT +TATAGATGGCTTGCACCAAGTAAACACTAAACATAAAGGCAATGGAGAGGCCAGAAAAGA +CTCAGGACAAGGACTAATAAAATCTACCCACAGTCGTGGGATATACGGAAAGCATCCTGA +CTACAAGGATAAAAATAGCGTCTGTGCAAATGGACTCACATCAGGAGAAACTGCTAGAAT +CTGTCTACCTCCAAATGCCTTGTCTACCTCCAAATGAGGGTGGATTTCACAGGATTTGAG +ACAAGAGATGGAGACCTGAGTTGTCTGGCCATGTGGCAGACCTGGTGGCTCTAGTGGGGT +GCCCTGGGGAGGAACGTCCTTCCACATACATATTTACAAAAATACCTCCTAAGAACTAAA +ATTAGGAACTTTCATCACCAATACTTGCTCATACCTAACTTCTCGCTTCTCAGTATCATT +TTTTCTTTTTTTTTTTTTTGAGTCTTACTGTGTTGCTCAGGCTGGTCTTAAAACTCCTGG +GCTCAAGTAATCCTCCTTCCTCGGTGTCCTTAGTAGTTAGAACTACAGATACGGACCATT +ACACCCAGTTCTCAGCACCATGTTTAATAGTTAAAGTGCTAACCTAGCACCCCCTCAGAA +CATAAGAGTGTCTCCATTTTTCACTAATGACAATTATGATTATCAGAATGTTTCATGTCT +GTTTAAGAACTTACAGATCATGAACACTCTGATGTACATCATTCTCATTTGCTCCTCCCC +CTCCTCCTCCTCATACCCCCTTGGGAAGTAGAGAAAATAGAACTTGAATGAATTTGTCTT +GTATGGGAAAACCCAGAGCCCAGAGCCCAGCGGTTGAGTAACCCCAAGGCCCTGTAGTAA +TAAGGGGGCACAGTCAAGCTTAAGGAGTCTGGCATTGCTTCCATCACACGCTCCTAAAAG +ACAACCACTAGAGAATTCTTCAAGTCAATGGGAAGAACATTCAAGTCCCACAGCTACAGA +GCTGTGGCCACACTGACACTCATCCAGACAGCTGATATTTACCAGTAGAAAGCTGGGCCG +CTGCAGGTGAGGGAACGCCATAAAAGCCTCCAGTGGAGAGTTGAAAATGGCCTTCTTAGC +AAACCACTGTGGAAAAACAGAGACAAAATCTCAACATCTGCAGCACAGCAGAATCTTTTT +ATTTTTCTAGAGACAGGGTCTCATTCTGTTGCCCAGGAGAGAATGCAATAGCACGATCAT +AGCTCACTGCAACCTCAAACTCCTGGGTTCAAGCAATCCTTCCCCCTGAGCCTCCTGAGT +AGCTGGAACTACAGGCACACACCACCATGCCTGGCTAATTTTCTTCTTATTTTCTTTTTT +GTAGAGACAGGGTCTCGCTATGTTGCCCAGGCTAGTTTCAAATTCCTGGCCTCCACTGGC +CCTCCTGCCTTGGCCTCTTAAGTGCTGGGATTACAGGTGTAAACCACTGCACCTGGCCCT +AACAAAATATTACATAGTTTCACAATGCCCAGGAGAACATTTGTGTAACTCAACATGCAT +TACATTATCGATTCTCATGTCTTCTTTATTTATTTGTAGAGACAGGGTCTTGCTTTCCCA +CTCAGGCTGGAGTGCAGTGGTATGATCATAGCTCACTGCAACCTCAGACTCCTAAGCTCA +AGCGATCCTCCTGCTTCAGCCTCCCAAATAGCTGGGACTACAGGCACACACCACCAGGCC +CTGCTAATTTTTATATTTGTTGTAGAGATGTGGTTACATCATGTTGCCCAGGCTGGTCTT +CAGCTCCAGGGCTCAAGTGATCTACTTGCCTCAGCCTCCCAAAGTACTGGGATTACAGGC +GTGAGCCACCACGCCCGGCCAGCAATTCCCACATTTTACAGAAACACCATTCGCATGCAG +CCCACAGTGGCCAGCTTCAGAGCCAAGTCTGTGGTACTGAATCCTTGGTAATGCTTGTAC +AGCACTTGCTGTATGCCAGGACTTGCTCTATACACCACACACACTGTCAGTCCCCAGGAG +GGGAATGTGGTATGTGATGCTTCATAAACACATTGAACTCTTTCTGCCAAGGAACAGCCA +TTAACACCTTGGGGTTACTTGCAACTCACCTGTGGACTGTTGGTCTGAGCTATTGTCCAT +CACTTGCAACTCAGTGTACACAGGTTCATCAGAGCTGGTCCCAAAAGCAAGTCCAAGCGC +TGGAGAGCATGAGTGGGAAATCTCACTTCCACCTCGGATGGAGAAAAACTTCAGCAAGAT +TGTCATTTTATCTTTCCGACCCAGATGGACTCCATCTTGACTCAAAGACAACCAGAACAT +CACCAAGCATCTCCTACAGGGCAGGCTTGGTACTAGGTGGTTTTATTTAGATTATTTCAT +GGAATGTTCAGGAGAATTGCTCCCTCTTCACTCCTGAATTCTTGATGCAGTTCTTCCACT +TTCGCCCTTTCTTCTCGTCTCTGTCTCATTCAGCTCCCTCATTACAAACCTTCCAACTGG +CTGGATGGCGCAGTTGCTCAACCAGCAGAGAGCACTGGGGTAGCCCCAAATAAATGTTCA +TGTCATCTCCAAATTACCAGCACTATTCCCAGGTCCCAATTCAAGACAAAACCTTATCAG +CCTCATCAGTGAGAGCACGTTTTCACCGAGCTGAGTAAAAACTTAGGTACAAGACCATTC +AGTGGGGAAAACACTCATTTCTTTGAAGAATGGTGCTGGGAGAACTGGATATCCACATGC +AAAAGGATGAAGCTGGACCCCTACATCACACCATATACAAAAATTAAGTCACAATAGGTC +AAAGCCCTAAGTATAAACACTAAAACTGTTAAACTCTAAACAAAAAACATGGAATTACAT +TTTCAAGATCTTGGATTTGGCAATGGATTCATTCTTATTTGGTTTTTAACTTCATTGTGA +GCCAATTTAGCATCCAAAATTCCAGTAGGTTTGGTAAAAATTGACAAGCTGATTCTAAAA +TTATAGCAAAAATGTAAAGGGCAGGCCGGGTGCAGTGGCTCACACCTGTAATCCCAGCAC +TTTGGGAGGCTGAAGCAGGTGGATCACTTGAGGTCAGTAGTTTGAGACCAGCCTGGCTAA +CATGGTGAAACCCCATCTCTGCTAAAAAAAAAAAGAAAACACCAAAAATTATCCGGGCAT +GGTGGAGTGTGCTGTACCTGTAATCTCAACTACTTGGGAGACTCAGGCAGGAGAATCACT +TGAAGCCGGGAGGCGGAAGTTGCAGTGGGCCAAGATCATGATACTGCACTCCAGCCTGGG +TGATAGAGTGACTCTGTCGAAAGAAAGAAAGAAGGAAAGAAAAAAAAGAAAAAGAAGAAA +GAGAGAGAGAGAGAGAGACAGACAGACAGACAGAAAGAAAGAAAGAAAGAAAGAAAGAAA +AGAAAGAAAGAGAAAGAGAAAGAGAGAAAGAAAGAGAGAGGGAGGGAGGGAGGAAGGAAG +GAGAAAAAAATGTAAAGGACAAAGAAAATCTAGACTCTTCTTAAAGGACAACTCACCTCA +TCCTGGTCATCTTGGCGTCTTCATGATTATGAGCATGAGGACTTCTAGAATTCTCTCTCT +CTGTCCCCTGTCCTCTCTAGGATTCCACAGTCTTCCCAGGTGACCAATACTTGGGATCCA +CCTTTCAGGCAAGGTCCTCTTCTTACCCATGGGGCTTACATATCTAATCCAGTGGTTCTT +ACCCAGAGCTGTGCACTGGCACCAGGACACCCCAGGCACACTTGGGGCAGCCCCTAGAGA +TTCGGTTGGTCTAGTTTGGAGTCTTCTTCCACAAACGCGACTGTGATTGATTTACATCTG +TGACTGAATCCCTAGTGAAGATGCCTAATTGCAGCACAGTGACCTCAGTGACGCTAGCCC +TAAGCATAGCCACCCAGAAGAACAGGGCATTTGGCCAAGCGAGAGTGGTGTTCTGAGGAG +GGTCCTGGTCACACACCTCCATCTGTCTCCACACCTGTCTGGCCATCTGGGCCCAGCTAA +TCGGAGTCCAGTACCTCTTTTCTTGGTCTCAGGCCCACACAGCTTCACTCTTCGCATGTA +AGCCATGCCCCTCTGAATTTCTGATGCCACTATCTTATCTCCAGTGCTGGAGTTACTACT +TCTTTCAGGGAACATCACAATTCTTTCCGCATTTGAAACTTCCCACTTTCAAAAGTTTTG +TCTTTCTCAGTGTCACCTCCTCTGTCTAAGGGTAGGGCCCAGGTGTGAGAAGCAGGGAAC +TAAGAGAACCATCCTGACTCTCGGGGAGCTGTTCTGCCTTGGCCCGTTGCTGTGAGGACA +CACGTCCATGGTACTGAGACAGCCAACAGCCCTGGGAACCAGAAACCTCACCCTCCTATG +AGCAGGTCATGGCAGGTAGACACAAGCCTTGTTCTGGGGGCCAGGCTAAGAATGTCCCAT +GGATTGAACAGTAGAGACATTGACCCTGTTTGAGGTGTGAATTCTAGATTTGAGCTCCAG +CAGTGCAGTTAGAAGCAGCCGACCTGCCTCTAGTTCCTAAGCCCTCGTACCCATCACACT +GTTCTACAGAAGTGGCCACTTGATCCCCTAAAGCTGTGCCCTCAATGGACACTTTGTTCA +CATTTAAGTGAATATGATCCTTTTAAAAATTAAAAATACATATTTTAAAGATAGTCTTAC +CCTGTCACCCAAGACGGAGTGAAGTGGGATGAACACAGGTCACTGCAGCCTCAATTCCTG +AAGTTGAGGAGTCAACTGCCTCTGTCTCCAAAGTAGCTGAGACTACAGGCGTACACCACA +CCTGGTTTCATTCAGGTGAATATAATCTTTGCTGCCAGCATCTCACCCTTTGATAGTAAT +CACATCCTCTTAAGAGGGCTTTGGAAACAATTGGCCTTGGCTAGCCAATCCCAGGTTTTC +GCTTCCATGAGGGTCTCCAGCTTCACACAATATTGTGCCCATATCTGTAGCGGCCTTGGT +TTTTAATTGCAAACAACAGCATCCACCCACATCAGTGTTGGTAAAAAATAAATTGCTTTT +TTTTTTTTTTTTGAGACAGACTCTCGCTGTCGCCCAGGCTGGAGTGCAGTGGTGTGATCT +CGGCTCACTGCAAGCTCTGCCTCCCAGGTTCACGACATTCTCCTGCCTCAGCCTCCCAAG +TAGCTGGGACTACAGGTGCCCACCACCATGCCCGGCTAATTTTTTGTATTTTTAGTAGAG +ACGGGGCTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTTGTGATCCACCCGC +CTCAGCCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCACCACGCCCGGCCAAGGTATTG +CTTCTTTAAGAATGGAGCTGTTAATACTGCTGGTACCAGAAACATTAAAAAAGAAAAATC +CTGCATGAGGGCCAGAGGAGGAGTACAGGAGAGTGAGATGGAGTGGAATCCTCCAGTGGG +TGATAGATGGATGGACAGACAGATGAATAGACAGACGAACAGATGAACAGACGGACAGAC +ATGAATAAACAGGTGGACAGACAGATGAATAGATGAACAGACAGATGGACAACTGGATAG +ATGGACAGATGAAACTAATAATCTGAGAAATCAGAAAAGCTTCATGAAAAAAGTGGGACT +GAGCTGTGTCTCCACGGATAGATATAAAAGCAGAGGACTCTCCACTTGAGTCGAGAATGA +CCCAGTGTCCTGATCCAGAGAGGAAGCCAGCCTGGCTTGACTGGGAAATTTGTGGGAGGA +CTCAGAGGCCCTTAAAATGAGGCCAGGTGAGGTTGGGCTGATCCGAGCCAGCTCAGGACT +CCTCTGCCACACAGCACAGCTGCCTTAGGGGACACATTACTCAGGGAGTTGCTGGGACCT +ACTGGGCCCAGCATTGCCACCAGCACCAACAGCTTCAGAGAGGGGGGACACACACTTGGG +GTGACTCCAAGACTGTGGGTGGCACCTGCCTCAAACAGGGGACAGGCACAGGGACACCTC +TCGGGGTCTGGCACCCCCACGCACTGTGCCTAGGTCCCAACAACGCCCACTGCAGCCCTG +TGCCCATCATGCCCAGAAGGTTTCCGCTTCAGCCTGGCCCCTGTACTGGCCCCAGGAATT +TGGACCCAAGCCTCAGTCACTGGGTAAACAGCAGTGGGAACCAGCTCATTACTCTAGGTA +AGTGGCTCTTACAACCTTCCCCAGCCAGTTCCACCCTCTGTTGTCTCTGGAAAATATGTT +TTCTCTCCCTGGGGTGGCTTCTCCTCTGCCCTCCCAGCCTTAATCACTGACCCCTACCTT +TCTCTATGGGTCCTGGGGGAGGTGGGTTAGTCTTGAGGTAACCAGCAGAAGGGCCCCAGG +TTCCAACAGCCAGACGCAGCCTGGTCCCGGGGCCTGGGCTGGGTTTAGGCAAGGTCAGAG +TTCCTTCACCTCTTTCAGGGCAGGCACCCGAGGTGCGGGGCAAAGGCCAGTTCTGACTGG +CACACTGCAGTAGCATCAGAGACACCCCCCGGACCCCAGGGTCTAGGCTGATGGCTGGAT +GCCCATCCAGCCTGGGAAGGCCACACGGGGGCCTGGGGACAAAGGGGTCACCATGAGGTG +ACATCAATGCAGGTGCAGAGAGGGCTCTGGGTCTAGGCTGCAGCTCTCTGGCCTCTGCTG +GGTCATGAGGACACAGGGACAAAAAAAGAAGATGGGTCAGATGGGGCAAGATGGCCAGAG +CCCAGCCCTCCCAGAATAGTCATCAGAGAGGAGCAGATCCCTTAGGGCAGAGACATATTT +GTCCCTGGAGCCCCTTCACCCCTGGGGCCTGGCGTCTCACTGTCCATGGGTCAGTCTCCC +ACCTTCCTCAAAGGGCACGTTAGACTCAGGAGGTGACAAGAGGGGAGCGAATGGGGGGTG +CAGAGGACTCTACGGCAGCCAGCTGAAGTCTAGAGTTGTCAGAGTCCGTGGAGGCAGGCA +TGGGGGGCTGCTGTGTCCCGTGGTCCAGGGGAGCAGCCCCAACACCACAGTGGAGGTGAA +GGGTCCTATGGTTGGGGTGGTGGGGACAAGGGAGGTGAAGAGCGGTGGAGGAGCCCCGGG +GCTTGTCTGGGTGCAGCCCACCCTTCATCAGGAAAGCTGAATGGGATGGGCTGGGGCAAA +GCCTGGTGCCCCAGGGGACAGGAAGCTCCAGGCCCCACCAGGCTTGGGCCTTTCCACACT +CTGCCAGGATAGTCCTGTGGGCTGGGCGGGGACGTGCAAATTCCAAACTCAGACTCCAGA +GACCAGAGAGGAGGGAGCACAGCCTGCCCTGGGTACACACAGGGAAACAGAGGCTGCAGA +GGAGGGCTGGGCCAGGGCTCCTAGAAAAGGTGACTTGGGAAGAGCTCCTAGGAAGGTGCG +GGCTGGCTGCTCTGCAGAGGTCTTGAGTGAAAAGGAGGGGAATGAGGAGGGAAGAGGCAG +CCCCGGGTGGACTGGACAGCCATGCCGTGAACCTCACAGAGACTTTAGACAGAGAGGGGG +CTCTACAACACCCCAGTACTCCCTCTGCCTCTCTTGCCCCCTCCTCTGTCCACACAGGTC +TGCCCAAGGCCGCCCCTTTGGACACTCTGAGGAACTCCAAGCCAACAAGATGCCTCTCAT +TAGTGACTTCTACCCTGATGCCGTGATGGTGGCCTGGAAGGCAGAGGGCACCACCATCAC +CCTGGGCATGAAGACTGCCACACCCTCCAAACAGAGAAACACAAGTACACGGTTAGCAGC +TACCTGAACCTGACGTCTGACCAGTGGAGGTCCCCCAGCAGCTATGTTCTTAGGCCCCTC +ACCCCACCCACAGCAGCCAGGAGCTGCAGGATCCCAGGGCAGGGGTCTCCCCTCCCACCC +CAAGGCATCCAGCCCTTCTCCTTGCACCCAATAAACCCTCAGTAAATATCCTCATTGTTA +ATCAGAAATTCTGCTCCCTGTCTTCATTTCTTACCTTTCATATAATTTGACACTTCCCCC +AGGTTCTCAGTGGGGGATGGGGGAATCCTGACACTCAGTGGGAAAATAGCTTGTGGGAGA +GGCTCCCAGGCTCCCAGGGGCATCTGCTGGAGAAACAGGCCAGGCAAGGAGCAATCTGAT +CACTGAAGACCAGTCCCTCTGCCCTCTCCCTCCTCCAATTCCCCGCTGCAGCACTCCCTC +CCCCAACCCCCCGCCACTCCCTGCCTCCTTTCTGGATGGAGCTGTCCCTGGCTGGGCCTT +CAGATATGCCCTCTGTCCTTGCCCTAGTAAAGACACTCTTCCCACCTACGACCTCTTTTT +CCTCAGCCTGGAAGTAACACTCTGGGCCTGGAGTTCCTCTGCCCGTGGCCCTGGCCCCTG +GAATCCCCTCCTCCCTCTCTGCCCAACTCCCCACCCCTGAGAGCTGGACTGTCCAGAATA +CTCCAGCACCTTCAAATTCATGAGCTGTTAAATTTGGGGCCCACCTCGATTCTTCACCTG +CCAGCAGGTTCACAGGTGTAAGAAAAATCGGAGGAAGGCAAGAAGGAAACACACACAAAA +GGCTTGCAGGAGGCTCAGGACACTTGCCAAAGATAGGCTCTAAGCTCCCCAGGAACCAGA +GCAAGAAGGGTAACAGGAGCTCCATCATTTCTATGAGATTTAACAAAGCCTTTGCTTGAG +GGAAGTTGGTGTTTTCAATGCTGAGGCCAGGCATCTCCTCAGAACTAACAGTGGGGATGT +GTTTTTTGTCTGTTTGTTTTGTTTCGTTTTGTTTTGAGACGGAGTCTCACTCTGTCGCCC +AGGCTGGAGTGCACTGGTGCAATCTCGGCTCACTGCAAGCTCCGCCTCCCAGGTTCACGC +CATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACCACAGGCGCCCACCACCATGTCCGG +CTAATTTTTTGTATTTTTTTTTAGTAGAGGTGGGGTTTCACCGTGTTAGCCAGGATGGTC +TTGATCTCTTGACCTCGTGATCTGCCCACCTTGGCCTCCCAAAGTGCTGGGATTACAGGT +GTGAACCACCACTCCAGGCAGTGGGGATGTGTTTTACCCAATCCCTTAGTATTGATTGGA +TTTGGGCTCCGACACCTGCACAGGTGACTGCCTGGCAGCCCCAGAACACGGAGATGTTCT +CAAGTGTGTGGCACAGAAAGTCACTGGCTGCCTTTGGGGGGCATTCTTTTTGGGAAACAC +CTCTCCATTGGATCCTGGGCCTCAGGTGGAAAACCAGGTCCTGCCTCGGGACTTTCTCTG +TGAGGCTCCCTGGGTCTGGGGCTCCAGCCATTCTCCGAGAGGAAGGTGCCAGGAGAGAGA +GCAAGGCCCAGCCCCACCTAGTCTTTCCTGCATGGTCCCTGCTCTGGTAGAGACAGGGTT +TGGCCTGGCTGGGTTCCACACCTGCTGAAGGATGCACAGATGGTAGATGCCCAGTCTGAC +CCTGGGTCTACCCAGGGCTAACCCCCTTCCCTGAGGCCAGACTGAGGAGGCTGAGCCTGC +CCAGGTCCTGCAGGTGTTCAGGGCAGCACAGACAGTGGGTGCCCAGGCCCCTGACCCAAG +GTCACCCTTCCCTCTTGAGGCTGTGCTAGGCAGGGCCAGGAGACAGTACTGAACACACCA +ATCCCCTTGTGGCTCTGTGGGGTTGGGGACCTACACTCCTCAGTCTACATCGAGCCGGTG +TAGGAGACAGGCAGAAGCAGGAGGGTCCTCGGAGTCCACAAAGCAGAGCAGGGGCAGGTG +CTCGTGTGCCCATTTCACCGAAGAGAGAACTGAGGCTCCACGGGAAGGGAAGTGACCTGC +CCGGGTCAACATGGTGAGCTGCTGCCTGACCCACCCCCAGCCTCTTTCCACAGCTCCTCA +GTCCTTCATTCATTCACTCACACACTCATTCAGCAAACCCTCAGTGAGCACAGCTGTGGG +CCAGGTGCTGGTGCAAGACCAGGACCACCTGCCCTGGAGAAGCTCATGAGCTGGAGGACA +ACAGGGAGACACCACTGCACCCCCCTGTCCTCAAGTCCCCAAGGTTGGAGCTTGGCCCAG +GGAGCGCCTGGGGCAGCAGGGGCATCTCCCCCTACTTAGCAGAAGATTTAACTGAAAGAA +CCATCTGACGGAGCCCCAGCTGTCATCCCTGGGCCACAGTGGGAACATCACAGTCCCTCA +TCAACCCTCACAGTGACCCTGAGGCCCAGGGCAGCCATGGTCCACCTCACATAGGGCAGG +CCGACCATGAGGAATGCACCACGGTTGCCTCATAGCGACCCTGAGCTCTAGAGCTGCCAT +TGCCCACCTTATAGAGGGGATGCTGACCAGGAGGGAGGCACCAACCAGGGCCACCCAGGC +TCCGGCCAGCCTGAGAGTGGCCCAGCTGGCCCTGTGCCTGCACTGCCCTGCACCTGCTGC +CCTGCCCAGCTCAGACCCTGGTCATATCCTCAGGAGTGATGGCCCCACAGCACCAGGGCA +GGGTAGGTCCACACTGGGAGAGCCCCTAAAGGTACAGAGAAGAGCACTGGAGTCCAGGGA +GGAGGGATTGTCCCGGCCCTGGTCCTGCCTCGCTGTGACCAGGGCTGAGGCATTTGCTCT +AAATCAGATGGGGGAGGGGCATCTGTGGCTTCCTCTACAGCCAAAACCAGAGCCCCAAAT +AGCCCCGGTGGGTATGGAGGGGCCTTGGGGGAGGGGAGGAAATGGGAGGGGAGGTGGCTG +CTTGGGGCTCTGGGTGAGGGCTAGGAGCTCCTACTGAGCTAAGCGGATGACTCTCCTATC +TGACCTGGCCCCCCATGGGGCCTGCAAAAGCACCAAGAGAGCATCCCCAGCCAGGCCTGT +GGAGGGGCCCTTGACCCCTCATACTCACTGAGCCCAGTGGTGGGAAGAGGGGGCTGGTCC +CCGGCCAGTATTCTCCTGAGGACACCGACACTCACTGGTAACAAGCAAACCAGACCCAGA +TGCTCCAAAAAGAGGCCACTGGATCCCAGCAAGCACCCACTCCCCCCAACACGAAGGTCC +AAAACCAAGCTTCAGCCAACTAGAATGTGGGAACTTCCCGGGAGGTAGAGGAAACAAGTC +ACTGTCCACTTGCAATGCCCACCGGTTAGTGACGCAGCACAATGACAGTCACACCTCTCA +GAGGGGGCTCTCCCTCGACTCTCCTTCCTGACACATCAACAAGGACAGCCTTACCAAGGG +CCACACCTGGGCAGTCACCATGGGGTTCGTGGTCTCACGTGGGCCCTGCTTAAAGCCCCT +GGGGATAAGCTCACTCCAGCTCATGAGAGGCCGGGGGTGACCCTGACCCCATTTTCCGAT +CAGCGATAGCACAGCACATGTGGGGACCAATGAGAAATGAGTCTGGGGAGACCACGGGCT +TCAGGGAGAATGTTTCGATGAGGCAAGTACAAGAGCCAGTGAGGGACTGGGTGACTTCTG +TGGCCTGTGTCACAGAGCTGGGGCCATCAGATGCCCCATGTCCACTCTGAGGACAGAGTA +GCCCATGGTTAGGATGACCCACCAGCCAGGCCTGGCCAGGGCACCCACAGGAGACAGGAA +GGCTGTGAGGGCCCGGGGGCACGGAGCAGCTCTGAAAGGAAGGGGTGCTGGGTCCCGGGG +CTCCTGGGCAGAGTGGTTGAAGAAGGTGAGACCCCAGACCAGCCAGATGGGCCCACTTCT +CAGGGGACCCTGGTCAGACAAGGGAGCACCTCCTGCTCATGGAACACAGCAAGGGGCAAG +GTAGGCACAGCCAGGCTCGGAGAACTCGTATTTGGGGGAGATGTAAATCAACCCAATAGA +CAAGGGAGCGATGTGGTGTAGGAGATGGGGTTTGTGCTGGGGAGAAATAAGGCAGAGAGT +GGGGAGTAGAGAGCACCTGTCTGGGGTCTCAGGAGGAGCGATGTCCCAAGACTGGAGGCG +GCCAGGGGCTGAGGAGCCCTGACTGGGCCAGGCCTGTGGCTGGAGCTGGAGGCTCGGGTT +CAGCCTCAGTTTCCCCTCTGTGAAGCTAGGTTCATAATAATGGAAATTGACTTTCTGGGT +GGCTGGAGAGACATTCAGATGGAAAATGGATATTCTATTTTGTCTCTGTCCTTGGTTGTT +GTCATCTCAGGCTGTTCCAGCCCCAGTTAGCCTCGGGTCAGCCCCCTGCCCATGTTGGTC +TCCCCATGAGCTCAGGCACCTCATCCCCAGCACAGCTGGCCTGGAGAAACCAAGATTCCC +TGAGGCTTAAAATAGCTGGAAAGCCAGCCCAGCTGCAGGTGCCTCCCTGTAGGATAAAAT +GTAGGAGAAGATCTTCAAGAGCTAGGACTAGGCAAAATTTTCTTAGACTTGACACCAAAA +ACATAGTCTACAAAAGGAAAAATTGCTAAGCTAAGTTTCATCAAAATTTAAAAATTTGTT +CTGCAAAACACCTTGTTAAAAAGATGACAAGTTACAGACTAGGAGAAAATATTTTCAAAC +AACATATATGAACAAAGCACTGTTATCTTGAGTATATAAGGAACTTGCAAACTCCACACA +AAAAATCAAACAATCCAATTAGAAGATGGGCAAAAGACACGAGCAGACATTTCACCAGAG +ATGATACACAATGGACAAATAAGCCCATGAAAAGATCTTCAACTTCACTAGCCATTTGGG +AAATGCAAATTAAAACCACAATGAAATATAATTTACACACATCAGAGTGGCTAAAATAAA +AAATAGTGACAAGACAAAACTCTGCTGAAGATATAAGTGTGAATCACTCATATATTGCTG +GTGGGCATATAAAATACCACAGCCACTGCAAAAGTGTGGCAGTCTCTTTTTTCTCTTTAG +AGACAGGGTCTCCCTATATTGCCCAGGCTGGACTCAAATTCCTGGGCTAAAGTGATCCTC +TCGCCTCAGCCTCCAGAGTAGCTGGGACTAAAGGCACATGCTACTGTTGATGTTTGGCAG +TTTCTTATACGACTGAACATGGAACTACCGTATGACCTAGCAATTACATTCTGACTCATC +TATCCCAGAGAAATGACAACCTGTGCTCATACAAACAACCGTACGTGAATGTTCATTACA +GCTTTAATAGCAATAGCCAAAAATCTAGAAATAACTCAGCCATCCATCAGCAGGTGAACA +GTTAAACTGTGCTACATCCATCTTATGGAATAGCACTCAGCCATCCAAAGGAATCAACTC +TTGATACACTCAACCAGCTCAACCCTCCCCAGAACGCACCTGGGTGGGAAGGTGCACAGC +AGATTGGGCCACTAGATTTACCCAGTGTGGAGGAAACAGGTACCTCCTCCTAGATTTTGT +TCTTGGCAGAGATAAACCAGTGCTTCTCAGTCATTTTTGCTTTTTACTAGCCATGGGAGG +TATTTTCCAGTAAAGGCTTCTATGACCAAATGCATTTGAAGGAATGTTCCATATGCTCTC +CCACACGCCTTCAATTTCCTGAGAGTGACAAGAATACCAGAATAGCCAAGGTTCTAAGAT +ATCCTTCAGGATACCTGTTTAACTTAAACTTCCCCAAACTATTTGATTATGAGACTCTTT +TCAAGCATAAACATTTTCCTTCCAGGAACATTTGTATTCTTATAAGAAAATATTAGAAAG +CATTCCAGGTGACTTACCTAATGGGAGAACCTCAGGCGCCAAGCCAGATGAGATAAAAGT +TTCAAATAGTGCTGATAAGGTTTTCTTGGTTCAGCTGCCTCCCTCCAACTCAGCTGCTGT +CACACTTCTAGGCAGTTTTGACTGGACTGTAAATTCCTTCTGGCTGGGGCCATGTCTCAG +CACCTTGTGCAGGAGTGATACAAACTAATGCTTCATAATAAATACTTACTCACTGACTTT +CTCTTTATTCATCCCAGGTGAGCATGTTTTCCATCCCCTTCAATCTCCACACTCACGGGC +AAAAGTGAGTTGTGGGTCTTGGAACTCTCAAGAAGAGGAGGAGATCTTCAATTTGCATAT +GCAGAATCCAAACAAGTCATCTTGAGTCCCCACAAGTAGGTGGGCTTATTTCTCACATAA +CAAAATCGCCCCTCCTTTGCAGCCTCTACACCACTGCAGATCAAGCCCAGCTAAACCTTG +CTGTGCTATCCTGAAGGTATTGCTCCCAGATCCCGGCTGTCAGCTCCTGTCCAGCATCAC +AGGGCAAGTCCAGAGACATGATCATGTAATTGACCAGGGATGACAAGTGGGTCCTCTCAG +TGGACAGGTGTCTGCAGTAGGTAAGCCCTGTCAGGAGAGGAAAGAAAAGCATCATCATCT +ACCACCTGATACCCTTTGCCCTGTCTTATGGAGTCCCCAAAGTGCTGTCCCACACTCCCA +TCTTTATTTTTTTATTTTTCTGAGACGGTCTCGCTGTCACCCAGGCTGGAGTGCAGTGGC +GTGATCATGGCTCACTTCAGCCTAAGCCTCCCGTGCTCGAGCAATCCTCCCATCTCAGCC +TCCTGAGTAGCTGGGACTACAGGCATGTGCCATCATGCTCAGCTACTTTTTAACCCCTCC +TTTTCTTCACAAGGAAGGCAAGGCTTGATGTCACTTGCTGGCTGAATGGCTTAATAGCTC +TACAACCCTAGGGAAGTCTTATCACCTTGCTCAGCCACAGTCAGGGTGATGCCATCCTCC +TCTGCTGGAAGCCAAAACATTTAATTGATAGGCTGGAGGCAGAGAAGGGACAGGAAAGAA +GGTGGCCAATGACTCTGTGGCCTTGCTGTGCCTTTGTCTGCCTCCAATCCATACACAGAG +AAAGACACTGTGAGATGTGGACAACTTTCTTTCAAATATCAGCTGAATACAGGTATTCAT +CCAAAAGAAAGGAAATCAGTACATCAAACAGTTATCTGCATGCCCATGTTTACTGCAGCA +CCATTAACGATAGTACAAAATCAACCTATGTGTGGAGGAATAGTACAGAATCAACCTAAG +TGTCCATCAATGAATTGACAGTTAAAGGAAATGTGGTATATGTATGCAATGGAATATTAC +TCAGCCATGAAAAAGGATGAAATCCTGTCACGTGCAGTAACATGAATGGAACTGAAAATC +ATTATGTTAAGTAAAAAAAGTGAGGCACAGAAAGACAAATATCACATGTTCTCAGTCATA +CATGGAAGCTAAAAAAGTATTTCTCATGAATGCGGAGAGTAGACTGGTGGTTACCAGTGG +CTGGGAAGGGAAGAGTAGGAGGGGAATGAAGAGAAGTTGGTTAATGGTACAAAAATCCAG +TTAGATGGAATTAAGTTCTTGTATATTATAGTAGGAAAAATATAATTAACAGTAATTTAT +TGTATATCTCAAAATAGCTAGAAGAACTGTAAAGTTCCCAACATGAAGAAAACATAAATG +TTTGAGGTGATGGATGTTCCAATCACCTCAAACGTGATAGATCAATACGCAGTGGATCTA +TCAAAATACTACATGCACCCCCAAAATATGTACAACTACAGTATATCTATTACAAATTTT +TTTAAAAAGAGGGCCGGCCGGGCACAGTGGCTCACGCCTGTAATCCCAGTGCTTTAAGAG +GCTGAGGTGGGTGGATTGCTTGAACCCAGGAGTTCTGGACTAGCCTGGGCAACACAGTGA +AACCCAGACTCTACAAAAAATAAGCAAACTTAGCTGGGTGTGGTGGCGTGTGCCTGCAGT +CTCAGCTACTTGGGAGGCTAAGGTGGGAGGATTGCTTAAGCCTAGGAGGTCAAGGCTGCA +GTGAGCCAAGATCTTGTCTCTAAAAAATAAAATCGGCTGGGCGTGGTGGCTCACGCCTGT +AATCCCAGCACTTTGGGAGGCCGAGGCCGGTGGATTGCCTGAGGTCAGCCATCCTGGCCA +ACATGGTGAAACCCCGTCTCTACCAAAAACACAAAAATTAGCCGGGCATGGTGGCACACC +CCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATTGCTTGAGCCTGGGAGGCAG +AGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGATTGACAGAATGAAACTC +TGTCTCAAAATAAATAAATAAATAAATAAAACAAAATCAGATAAAAATAAAACAAAATTG +CAGCTGGATTAAAAAGGCCCCAGTAAGTCAGGAACCAGGGAGAGAGATACTGAGGGGGCA +GCTTGCTTTCAATGATGGTCCTGCCTCAAAAGCACACCTACCAATGTGCCACCCCTTATC +TGTCACACCCTGTCACCTGGAGGGTGTTAGGCAAAGTGCCTCAGCAAACGCCAGCCCGTG +CTGTTTGCTCTGGCTGTGCCCTTTCCTTCTGGCGTTCCCCAGCCATTTCATCCTCAATTC +CCAGTGAGAACCCTGGACATTTTGCAGCCTGGGGACTAAATTCTAACAGGAGGGTCAATC +ATAGCAATGATGATGATGATGATGACCACGATGATCTAATAGCTAATACTTACACAGTAC +ACACTATGTGCCAGGGAGCTGTTAGCAGAGTTTGCATAACTCAGTCTCGCCACAACCCAA +GAAAGCAGGCGCTACTGTTATTTCCATCTTATGGATGGGGTAAGTTGAGGCCCAGACTGG +TTATGTCACTTGCACAAGTTACATAACTGGAATCTGAAACCAGATCACCTGGCTCCAAAA +CCTGGGCTCTCAACCACCATCAGCCATGAGCCACATCATCTGATGACATTATTCCAGACC +CGTATCAACCCCCAACCCTTCCTGCAGGAAGATATTTCATTTGGGTCCAAAAATACTTGC +CCAATGCCTGCTCCACGCCAGCCCCTGCATTAGGCAAAAACGTGGATTGTAACGGAACAG +GCAAAACCTTTGTCCTAGTAGAGTCACGCTCTCCCCGCCTCTGCAGCACTCAGGTGAGCT +GCACTGTGGTTTCACAGGCCCATGCAGGTGTGTGTATAGCTGCAGGTTTTATTTCCCTAG +CCTCACAGCCTCAAGAGGAGAGATAAAATGTGTATATTCAGGGATGGAAAACACACACAC +ATTTTCTCTCTCCCATCAAGGCTGTGAGGCTACGGAAATAAAACCTGCTAAGAATAAATA +TACGGAGAAGAAAAAACAAAGCCTTGGAGTCAGGCCAGAGTCTGAGTCCCAGCTCTGCTG +CTTGGTAGCTGTGTGACCTTGGGCAAGTCACTTTGTCTCTCTGTGCCTTAGTTTCCTCAT +CTGTTAAGTGGGAACAATCAGAATACAGATACTAGGAATTTAATTTTTAAGTTTTGGGGG +AGGCATAACGTCACTTTACTCTTCTGCTTCTGCCCATGACAGAAGGAAACTGACATTTAC +TGAGCACCTACAGTATGCAAGACTAGCTGCCAGGGGCTTTCTACATGCTCCTCACTTAAT +CCTGAAAACAAACCCTCCACTGTGGCACCTGGGGGTACAATGGTGATTTAATGAAACCTC +TGTCCCCAGGGGTCCCCATTGTTCACTGCCCTCTCTTAGGACTCAGGCATAAAATTTATC +CCCAATCTGTAGATGATGAAACTGAGGCTCAGAACGGTTCATGTAAATTCACTGAAACCA +GGCATTCAGGTAATAGAGGAGGAGAGGTTCAAGCTCCAAAGCCAGTATCTCCTGGTATCG +CTGACATAGAGTAGGCACTGATAACTGCCCCTCGGAGACTACACGGCAACAAAGACCTAT +GTACATCCATAGAAGGCTCTGCAGAGGATCTGGTACTTCATATTGTCACAAGAGAAGTTT +CTGGGGAGCCCTGCAGGCAGAGAAGGAGGGAGATGCTTAGAATAGATTCAAACACAGCCC +AGGTTTTTTGGAGAGGAGCACAACTTGTTATCTCTAGAGGGTAACTGAAATCAGCCAGTC +ATGAGGAGTGCAGGGAGGGTGCTATTACCTCCACCTTTACAACATCACCTCTTCCACTAC +CAGTGACCCAGTTTGCCCTTGAGAAATCACTCCGTCCACAACTTTCTGTCTCCGGTTTCT +AAAGGGATGACTCTACCCATTTATAGGACCAAAGCTGGAGTCATAGTAATGGAAGCGGAG +ATTGTCATATGACCCAACCTGAGCCAATGAGAATTAGGCTTGGGACTTTCATTGTAACTA +CCAGGAAAAAGTCTTTTTCTGCTTCAGTTTCTAAACCAGTGGGATACAGGACAGTAGCTC +CTAGGAGCTATCTTTGCCACAACATAAAAAGGGCCTGCCTGTGGATGGAACCATCACAGA +GGGAAGCAGAGCCAAGAGCTGAAGAAACAGAGATTCCCGTCTCGAGCATCTGAGCCTGAA +GCCACTATGACTCTAAGCTTCTGTTATGCCAACAATTTTTTTTACCTGAACCAGTCTGAA +GTGGGTTTCTATCACTTGCAGAAAGAGTTCTGATTTCTTTTTTTTTTTTTTTTTACACAG +AGCCTCGCTCTGTCGCCCAGGCTGAAGGGCAGTGGCGCGATCTCGGCTCACTGCAAGCTC +CACCTCCCAGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGT +GCCCGCCACCACGCCTGGCTAATGTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGT +TAGCCAGGATGGTCTCGATCTCTGACCTTATGATCTGCCTGCCTCGGCCTCCCAAAGTGC +TGGGATTACAGGCATGAGCCACTGCACCCGGCCGAAAGAGTTCTGATTTCTAAAGTGACC +CAGCTGAACCAAGAGCCAAACTGGGATTCAGTATCTGGCTGTTTGCTAAGTAGGTGATGC +CTCAAGGATTCTCCCTGCATGGGAATTTGTGGCAACTTTTTTGGCTGAAAGAGGCAAGAG +GAGTAAGGTACCAGCATTGCTGGATAAATCCAGGACTGCTGAAGGGAGTGTTGTACGCTC +TTGCAATGAACAGAATGGAAGAATATCGGCATATTTATTCTCAGCACATCCCAAAGGACA +TCCAGAAAAATCCAGGGAGACTGGGGTGGGGGAAACATAGCCGAAGTCCTCTAGTTAGAC +ATGTGAACTCCAAATATGAGAGACAGGTCTCAGTTAATTTAGAAAGTTTATTTTGCCAAG +GTTGAGGATGTGCGCCCACGGTACCGCCTCAGGAGGCTCTGACGACATGTGCCCACGGTG +GTCAGAGCAGTTTGGTTTTATACATTTTAGGGAGATATGAGACACCAATCACCGATCAAT +GTATGTAAGATGAGCATTGGTACTGTTCAGAAAGGCAAGACAACTTGAAGTGGGGAGGAG +GCTTCCAGGTCATAGGTAGATAAGAGACAAACAGTTGCATTCTTTTGAGTTTCTGATGAG +CGTCTCCAAAGGGGGCAATCAGATATGCATTTATCTCAGTGAGCAGAGGGGAGACTTTCA +ATAGAATGGGAGGCAGGTTTGCCTTAAGCTGTTCCCAGCTTGACTTTTCCCTTTAGCTTA +GTGGTTTGGGGCCCTGAGATTTACTTTCCTTTTACAGGCACATGGAGGCATTGAATACTG +AAGTGTCCCACTTTGTGCCCTCAGGTTGGTGAGGCTTACAGTTGCTCAGGTTACATGAGA +ATAGGGTCACATAAACAGCTGTCCCACTGTGGGCTTGGTTTTCTGCAAGGTACTGAGTTC +CCCATTATGCCAGGTGTGTAAGCAGACGCTGTAACAGGAGAGAGGGCAGTAAGTTATGGC +AGCATTTTTTTTCTTTTTTCTTTTCTTTCTTTTTTTTCTTTTTGAGATGGAGTTTTGCTG +TTGTTGCCCAGGCTGAAGTGCAATGGCGTGACCTTGGCTCACCACAACCTCCGCCTCCCA +GGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGTATTACAGGCATGCGCCAC +CACACCCAGCTAATTTTGTAGGTTACAGCAGCATTTCTCACTGGTCCATGGACCATCAGC +CTAACAGTTACTTGCAGTTGCTCCAACATGTGTATTCCTGGGCCCTGCACAGACTTAATC +AGAACTTCTATGAAGTAGTCTAGGAGCTGGCATGTTTTGCTACCTAGCCCAGGGGATTCT +GATGCACAATAAGAGAATATCCGGTAACATTAGCACTTCTCAATCTTAAAAGTACACACA +TGGTCTTGGGACTACGTTAAACACAAATTCTGATTCAGAAGGTTTGGTATAAGAGACTGA +CCATCTGCCCGGTGCTGATGCTGGTTCACAGGCCACAGTCTGAGGAGCCAGGAGCTTAGA +TGATGACTTCAACAATGTAATGCCAGCCCTCACACCGTACACTCCTATGGCTGAAGACTG +GACCACACACTGGTACAGAAGACCAGCAAGGACAATAGGGTTCCATTCTACCAGGCAAGG +CCAGGAGGAAACAAAACCAGTCTCATGAACTGGCTGAATAGTTCCAAAGAGACAAAAGCT +CTATGGGCAAAGAGAGAGATGCGCCTGAGATTTAAGTTTACATGATAAAATGGTAACTGT +TATTTACTGAACAGTACTGTGTGCAGGTGGCATATTCTGGCCACTTTAAACCATTATCTG +AGCTGGCATTTACCATAGCCATATAAAGTAAGCACCAGAATCTCCACTGTGTGGAGGAGC +AAACTGAGATGAATGTGGTTGACATTAAATAATATTTGAGATTATTCTGCCTTCTAGGCT +CATGGTAGGGCTGCACTTCCTGGCTCCTTGTGGATGGGGCCAATGAGCTGTGGGTGGGTA +TGACATGCTAATTTTGGGTGGGGCCCTTTTATTGTGGGTTCAAGCCCCTCCAATGCTACC +TTTCCCTGTCACTGTGACTGGCAACATTCAAATGATGGCT diff --git a/example/reference/22_20-21M.snp b/example/reference/22_20-21M.snp new file mode 100644 index 0000000..68abb59 --- /dev/null +++ b/example/reference/22_20-21M.snp @@ -0,0 +1,3502 @@ +rs73387790 single 22:20000001-21000000 145 A +rs55902548 single 22:20000001-21000000 427 T +rs114690707 single 22:20000001-21000000 770 C +rs147349046 single 22:20000001-21000000 809 T +rs1978233 single 22:20000001-21000000 949 G +rs2079702 single 22:20000001-21000000 1005 A +rs139570132 single 22:20000001-21000000 1158 T +rs5993894 single 22:20000001-21000000 1332 T +rs71788814 deletion 22:20000001-21000000 1587 2 +rs111598545 single 22:20000001-21000000 1821 G +rs73877109 single 22:20000001-21000000 2230 T +rs9605047 single 22:20000001-21000000 2847 T +rs62223726 single 22:20000001-21000000 3058 T +rs75213299 single 22:20000001-21000000 3151 T +rs2531714 single 22:20000001-21000000 3265 G +rs2531698 single 22:20000001-21000000 3333 A +rs10678141 insertion 22:20000001-21000000 3490 AG +rs2531715 single 22:20000001-21000000 3730 T +rs2531699 single 22:20000001-21000000 3785 A +rs113101099 single 22:20000001-21000000 4162 A +rs551721384 deletion 22:20000001-21000000 4216 5 +rs2286480 single 22:20000001-21000000 4310 T +rs141779732 single 22:20000001-21000000 4429 T +rs2531716 single 22:20000001-21000000 4703 T +rs2531717 single 22:20000001-21000000 4936 G +rs9606210 single 22:20000001-21000000 5257 T +rs3788322 single 22:20000001-21000000 6039 T +rs2531700 single 22:20000001-21000000 6143 C +rs5748501 single 22:20000001-21000000 6353 C +rs5748502 single 22:20000001-21000000 6549 C +rs2531701 single 22:20000001-21000000 6573 A +rs74731343 single 22:20000001-21000000 6710 T +rs1858770 single 22:20000001-21000000 6991 C +rs115149055 single 22:20000001-21000000 7391 T +rs7288996 single 22:20000001-21000000 7598 A +rs182897799 single 22:20000001-21000000 7969 G +rs73387800 single 22:20000001-21000000 8172 C +rs10427828 single 22:20000001-21000000 8227 A +rs73387801 single 22:20000001-21000000 8487 T +rs737986 single 22:20000001-21000000 8710 T +rs111664354 single 22:20000001-21000000 8834 C +rs56270168 single 22:20000001-21000000 8887 T +rs737985 single 22:20000001-21000000 9077 T +rs4819854 single 22:20000001-21000000 9927 C +rs2518827 single 22:20000001-21000000 10729 G +rs56345620 single 22:20000001-21000000 10818 C +rs4819855 single 22:20000001-21000000 10849 A +rs55725292 single 22:20000001-21000000 10922 A +rs9605049 single 22:20000001-21000000 11158 T +rs145679795 single 22:20000001-21000000 11317 G +rs1476771 single 22:20000001-21000000 11473 G +rs5746852 single 22:20000001-21000000 11836 C +rs2518828 single 22:20000001-21000000 11988 A +rs113192851 single 22:20000001-21000000 12021 T +rs2518829 single 22:20000001-21000000 12285 G +rs189209957 single 22:20000001-21000000 12863 T +rs9754500 single 22:20000001-21000000 12956 A +rs114176759 single 22:20000001-21000000 12998 A +rs2531719 single 22:20000001-21000000 13221 G +rs8142163 single 22:20000001-21000000 13288 A +rs112567206 single 22:20000001-21000000 13681 T +rs12165568 single 22:20000001-21000000 13776 T +rs2531720 single 22:20000001-21000000 13802 C +rs9606211 single 22:20000001-21000000 14031 C +rs41326345 single 22:20000001-21000000 14060 G +rs5748504 single 22:20000001-21000000 14151 G +rs80254054 single 22:20000001-21000000 14197 G +rs2531705 single 22:20000001-21000000 14403 C +rs2531706 single 22:20000001-21000000 14472 C +rs5748505 single 22:20000001-21000000 15015 T +rs73389717 single 22:20000001-21000000 15227 C +rs4629255 single 22:20000001-21000000 15390 A +rs2520732 single 22:20000001-21000000 15510 T +rs9606212 single 22:20000001-21000000 15614 A +rs5748507 single 22:20000001-21000000 15740 T +rs2531707 single 22:20000001-21000000 15973 C +rs7290592 single 22:20000001-21000000 16011 G +rs5748508 single 22:20000001-21000000 16491 A +rs9606213 single 22:20000001-21000000 16970 A +rs2531721 single 22:20000001-21000000 17267 G +rs73389721 single 22:20000001-21000000 17349 C +rs2531708 single 22:20000001-21000000 17444 C +rs2531722 single 22:20000001-21000000 17552 A +rs556782134 deletion 22:20000001-21000000 17673 1 +rs72195121 deletion 22:20000001-21000000 17857 4 +rs9605050 single 22:20000001-21000000 18102 T +rs200431361 deletion 22:20000001-21000000 18131 3 +rs143984612 single 22:20000001-21000000 18691 A +rs572490569 insertion 22:20000001-21000000 18720 A +rs9606219 single 22:20000001-21000000 19177 T +rs2078749 single 22:20000001-21000000 19767 G +rs887205 single 22:20000001-21000000 20072 G +rs2008591 single 22:20000001-21000000 20228 T +rs76656759 single 22:20000001-21000000 20408 G +rs117194710 single 22:20000001-21000000 20600 A +rs117194710 single 22:20000001-21000000 20600 T +rs112637156 single 22:20000001-21000000 20767 G +rs79885628 single 22:20000001-21000000 20882 C +rs114783249 single 22:20000001-21000000 21229 T +rs142921548 single 22:20000001-21000000 21659 A +rs2531709 single 22:20000001-21000000 21695 G +rs72211264 insertion 22:20000001-21000000 21730 AC +rs6518597 single 22:20000001-21000000 21862 T +rs6518598 single 22:20000001-21000000 21937 T +rs75566222 single 22:20000001-21000000 21984 A +rs78880579 single 22:20000001-21000000 22093 A +rs2520733 single 22:20000001-21000000 22497 G +rs62222174 single 22:20000001-21000000 22745 T +rs73150855 single 22:20000001-21000000 22839 T +rs4819858 single 22:20000001-21000000 23009 C +rs2531710 single 22:20000001-21000000 23086 T +rs2531711 single 22:20000001-21000000 23302 G +rs2531712 single 22:20000001-21000000 23443 T +rs5748510 single 22:20000001-21000000 23631 A +rs2520734 single 22:20000001-21000000 23730 T +rs2531713 single 22:20000001-21000000 24173 G +rs74628130 single 22:20000001-21000000 24900 T +rs112148654 deletion 22:20000001-21000000 25176 1 +rs9617858 single 22:20000001-21000000 25310 T +rs73150856 single 22:20000001-21000000 25651 T +rs9606221 single 22:20000001-21000000 25766 T +rs117607297 single 22:20000001-21000000 25852 A +rs11703292 single 22:20000001-21000000 25950 T +rs62222175 single 22:20000001-21000000 26206 A +rs71691029 deletion 22:20000001-21000000 26448 1 +rs7288316 single 22:20000001-21000000 26524 G +rs116356549 single 22:20000001-21000000 26557 A +rs11361191 deletion 22:20000001-21000000 26623 1 +rs115650955 single 22:20000001-21000000 26676 A +rs114089161 single 22:20000001-21000000 26757 T +rs393947 single 22:20000001-21000000 26916 C +rs16982922 single 22:20000001-21000000 27269 A +rs114114568 single 22:20000001-21000000 27321 T +rs148174824 single 22:20000001-21000000 27456 C +rs143084532 single 22:20000001-21000000 27603 C +rs389471 single 22:20000001-21000000 28540 A +rs7364127 single 22:20000001-21000000 29082 T +rs695326 single 22:20000001-21000000 29259 C +rs79343076 deletion 22:20000001-21000000 29411 1 +rs58838155 single 22:20000001-21000000 29538 G +rs374225 single 22:20000001-21000000 29877 C +rs114456898 single 22:20000001-21000000 29953 T +rs77937532 single 22:20000001-21000000 30040 A +rs148393048 single 22:20000001-21000000 30173 C +rs113697305 single 22:20000001-21000000 30656 T +rs409519 single 22:20000001-21000000 30748 A +rs446388 single 22:20000001-21000000 30979 T +rs45548038 single 22:20000001-21000000 31015 C +rs12627769 single 22:20000001-21000000 31101 A +rs17817665 single 22:20000001-21000000 31663 A +rs77046301 single 22:20000001-21000000 31849 G +rs140487675 single 22:20000001-21000000 31908 T +rs455049 single 22:20000001-21000000 31963 T +rs62222177 single 22:20000001-21000000 32006 G +rs138004901 deletion 22:20000001-21000000 32054 4 +rs397701 single 22:20000001-21000000 32326 C +rs17210273 single 22:20000001-21000000 32575 A +rs62222178 single 22:20000001-21000000 32679 G +rs375897671 deletion 22:20000001-21000000 32795 1 +rs111315345 single 22:20000001-21000000 32833 C +rs570570040 deletion 22:20000001-21000000 33022 2 +rs143413803 single 22:20000001-21000000 33734 A +rs114166464 single 22:20000001-21000000 33756 G +rs74752779 single 22:20000001-21000000 34313 C +rs4819860 single 22:20000001-21000000 34390 T +rs148446823 single 22:20000001-21000000 34818 T +rs118008587 single 22:20000001-21000000 35027 T +rs76173340 single 22:20000001-21000000 35150 T +rs79375034 single 22:20000001-21000000 35263 A +rs368880 single 22:20000001-21000000 35689 T +rs78882292 single 22:20000001-21000000 36513 A +rs80146538 single 22:20000001-21000000 36694 G +rs138276244 single 22:20000001-21000000 37107 A +rs405342 single 22:20000001-21000000 37314 A +rs115571782 single 22:20000001-21000000 37506 T +rs147134283 single 22:20000001-21000000 37697 T +rs111797664 single 22:20000001-21000000 37819 T +rs695500 single 22:20000001-21000000 37890 G +rs114758499 single 22:20000001-21000000 38296 A +rs114758499 single 22:20000001-21000000 38296 T +rs79393192 single 22:20000001-21000000 38491 G +rs76309774 single 22:20000001-21000000 38721 G +rs114226275 single 22:20000001-21000000 38755 C +rs174875 single 22:20000001-21000000 39154 A +rs189306965 single 22:20000001-21000000 39634 T +rs115084231 single 22:20000001-21000000 39717 A +rs3804046 single 22:20000001-21000000 39866 T +rs77399579 single 22:20000001-21000000 40219 T +rs28635282 single 22:20000001-21000000 40539 A +rs17817689 single 22:20000001-21000000 40601 T +rs695727 insertion 22:20000001-21000000 40821 G +rs16982925 single 22:20000001-21000000 41231 G +rs366148 single 22:20000001-21000000 41608 T +rs115208925 single 22:20000001-21000000 41703 C +rs73877127 single 22:20000001-21000000 41735 C +rs429965 single 22:20000001-21000000 42074 G +rs57180287 single 22:20000001-21000000 42113 A +rs758376 single 22:20000001-21000000 42562 A +rs61371564 single 22:20000001-21000000 42847 G +rs75170603 single 22:20000001-21000000 43100 G +rs16982930 single 22:20000001-21000000 43358 C +rs113449283 single 22:20000001-21000000 43699 G +rs75011475 single 22:20000001-21000000 44040 A +rs62222179 single 22:20000001-21000000 44932 A +rs73877128 single 22:20000001-21000000 45382 A +rs440042 single 22:20000001-21000000 45410 G +rs201660291 insertion 22:20000001-21000000 45552 TA +rs5748514 single 22:20000001-21000000 45738 A +rs59609897 single 22:20000001-21000000 45882 C +rs61095784 single 22:20000001-21000000 45966 A +rs8140658 single 22:20000001-21000000 46041 G +rs148529668 single 22:20000001-21000000 46149 A +rs5748515 single 22:20000001-21000000 46343 G +rs399162 single 22:20000001-21000000 46686 A +rs140134708 single 22:20000001-21000000 47097 A +rs58308907 single 22:20000001-21000000 47586 C +rs142549904 single 22:20000001-21000000 47765 A +rs111276581 single 22:20000001-21000000 47814 C +rs111323873 single 22:20000001-21000000 47850 T +rs76518635 single 22:20000001-21000000 47972 T +rs75418691 single 22:20000001-21000000 48082 C +rs113509411 single 22:20000001-21000000 48446 A +rs73877129 single 22:20000001-21000000 48646 C +rs73389740 single 22:20000001-21000000 48862 T +rs61737483 single 22:20000001-21000000 49149 A +rs61737483 single 22:20000001-21000000 49149 C +rs2286481 single 22:20000001-21000000 49243 T +rs2286482 single 22:20000001-21000000 49284 A +rs410655 single 22:20000001-21000000 49339 C +rs149412137 insertion 22:20000001-21000000 49414 GGCCCA +rs426894 single 22:20000001-21000000 49453 G +rs2286484 single 22:20000001-21000000 49494 A +rs417981 single 22:20000001-21000000 49934 C +rs74276823 single 22:20000001-21000000 50018 T +rs419135 single 22:20000001-21000000 50400 C +rs111669330 single 22:20000001-21000000 50699 T +rs73389745 single 22:20000001-21000000 50956 T +rs73389746 single 22:20000001-21000000 50978 A +rs432909 single 22:20000001-21000000 51465 A +rs115297588 single 22:20000001-21000000 51927 T +rs75305197 single 22:20000001-21000000 52049 T +rs16982614 single 22:20000001-21000000 52088 G +rs34082990 single 22:20000001-21000000 52142 T +rs3804045 single 22:20000001-21000000 52301 A +rs17818431 single 22:20000001-21000000 52419 A +rs3804044 single 22:20000001-21000000 52441 G +rs3804043 single 22:20000001-21000000 52480 A +rs415520 single 22:20000001-21000000 52531 G +rs442277 single 22:20000001-21000000 52565 A +rs367450 single 22:20000001-21000000 52781 T +rs140840130 single 22:20000001-21000000 53008 T +rs11555967 single 22:20000001-21000000 53153 T +rs1054215 single 22:20000001-21000000 53553 T +rs115561292 single 22:20000001-21000000 53579 T +rs73389755 single 22:20000001-21000000 54176 G +rs34919729 deletion 22:20000001-21000000 54208 1 +rs57497163 single 22:20000001-21000000 54457 C +rs74938744 single 22:20000001-21000000 54479 G +rs60598731 single 22:20000001-21000000 54506 A +rs11913070 single 22:20000001-21000000 54945 C +rs391310 single 22:20000001-21000000 55081 T +rs695800 insertion 22:20000001-21000000 55245 A +rs75563814 single 22:20000001-21000000 55569 A +rs116724863 single 22:20000001-21000000 56111 A +rs114497925 single 22:20000001-21000000 56193 A +rs61046561 single 22:20000001-21000000 56924 T +rs116496431 single 22:20000001-21000000 56976 C +rs7284822 single 22:20000001-21000000 57077 T +rs7285397 single 22:20000001-21000000 57142 G +rs5992507 single 22:20000001-21000000 57457 G +rs143585153 deletion 22:20000001-21000000 57659 5 +rs406086 single 22:20000001-21000000 57805 T +rs11913908 single 22:20000001-21000000 58273 A +rs75651845 single 22:20000001-21000000 58356 A +rs145089673 single 22:20000001-21000000 58680 T +rs450977 single 22:20000001-21000000 58862 A +rs73389760 single 22:20000001-21000000 59217 T +rs446059 single 22:20000001-21000000 59762 A +rs546604629 insertion 22:20000001-21000000 59849 T +rs377793 single 22:20000001-21000000 59979 T +rs453352 single 22:20000001-21000000 60022 A +rs2110139 single 22:20000001-21000000 60098 T +rs442479 single 22:20000001-21000000 60135 C +rs5748522 single 22:20000001-21000000 60723 A +rs146199336 single 22:20000001-21000000 60757 C +rs139142965 single 22:20000001-21000000 60816 T +rs4425182 single 22:20000001-21000000 61414 C +rs75504350 single 22:20000001-21000000 61947 A +rs553587287 deletion 22:20000001-21000000 62022 1 +rs560994073 deletion 22:20000001-21000000 62053 1 +rs147073705 single 22:20000001-21000000 62293 T +rs5992510 single 22:20000001-21000000 63423 A +rs12628100 single 22:20000001-21000000 63726 G +rs111268543 insertion 22:20000001-21000000 64143 AGGAG +rs60877362 single 22:20000001-21000000 64381 T +rs5746857 single 22:20000001-21000000 64718 T +rs9605055 single 22:20000001-21000000 64750 C +rs174884 single 22:20000001-21000000 64777 C +rs60807043 single 22:20000001-21000000 64805 A +rs5993913 single 22:20000001-21000000 64904 G +rs5748526 single 22:20000001-21000000 65085 A +rs9606231 single 22:20000001-21000000 65275 C +rs73877134 single 22:20000001-21000000 65313 T +rs9606232 single 22:20000001-21000000 65369 A +rs112475914 single 22:20000001-21000000 65750 G +rs145098347 single 22:20000001-21000000 65792 A +rs145098347 single 22:20000001-21000000 65792 T +rs150648253 insertion 22:20000001-21000000 65982 ACA +rs148839941 single 22:20000001-21000000 66135 G +rs111275592 single 22:20000001-21000000 66240 A +rs77669633 single 22:20000001-21000000 66421 C +rs9606233 single 22:20000001-21000000 66610 C +rs117313664 single 22:20000001-21000000 66664 A +rs41281421 single 22:20000001-21000000 66797 G +rs61555679 single 22:20000001-21000000 66901 C +rs367842687 deletion 22:20000001-21000000 67082 1 +rs75798129 single 22:20000001-21000000 67231 C +rs9606234 single 22:20000001-21000000 67516 G +rs174886 single 22:20000001-21000000 67658 G +rs174887 single 22:20000001-21000000 67694 C +rs57117909 single 22:20000001-21000000 67979 T +rs61174903 single 22:20000001-21000000 68085 C +rs34310575 deletion 22:20000001-21000000 68111 1 +rs2269723 single 22:20000001-21000000 68406 C +rs75967927 single 22:20000001-21000000 68495 T +rs2269724 single 22:20000001-21000000 68555 A +rs117653823 single 22:20000001-21000000 68722 T +rs62222181 single 22:20000001-21000000 69113 T +rs9606235 single 22:20000001-21000000 69171 G +rs73877135 single 22:20000001-21000000 69296 C +rs566411699 single 22:20000001-21000000 69589 T +rs11704078 single 22:20000001-21000000 69724 A +rs4819865 single 22:20000001-21000000 69843 A +rs9605057 single 22:20000001-21000000 69912 G +rs9606238 single 22:20000001-21000000 70170 C +rs9606239 single 22:20000001-21000000 70284 A +rs1008101 single 22:20000001-21000000 70353 G +rs75051956 deletion 22:20000001-21000000 70407 1 +rs142817386 insertion 22:20000001-21000000 70889 GTGT +rs76889809 single 22:20000001-21000000 70955 A +rs28610569 single 22:20000001-21000000 71350 G +rs9606240 single 22:20000001-21000000 71422 C +rs17817767 single 22:20000001-21000000 71629 A +rs1558495 single 22:20000001-21000000 71651 G +rs1558496 single 22:20000001-21000000 71736 C +rs1558497 single 22:20000001-21000000 71898 T +rs3827292 single 22:20000001-21000000 72415 C +rs2302497 single 22:20000001-21000000 73146 C +rs61746264 single 22:20000001-21000000 73517 T +rs35987994 single 22:20000001-21000000 74005 G +rs34301880 single 22:20000001-21000000 74154 G +rs2073778 single 22:20000001-21000000 74574 T +rs7291552 single 22:20000001-21000000 74698 T +rs11703058 single 22:20000001-21000000 75047 A +rs9606241 single 22:20000001-21000000 75857 G +rs67625586 deletion 22:20000001-21000000 76125 2 +rs142558416 deletion 22:20000001-21000000 76763 1 +rs34204101 single 22:20000001-21000000 77719 C +rs2073779 single 22:20000001-21000000 77862 C +rs79279088 single 22:20000001-21000000 79581 T +rs11089328 single 22:20000001-21000000 79602 G +rs112629076 single 22:20000001-21000000 79687 C +rs74576059 single 22:20000001-21000000 79972 A +rs60133865 single 22:20000001-21000000 80021 G +rs2286925 single 22:20000001-21000000 80446 A +rs138402987 single 22:20000001-21000000 80796 A +rs9606243 single 22:20000001-21000000 80926 G +rs116662725 single 22:20000001-21000000 80959 G +rs77989302 single 22:20000001-21000000 81517 A +rs7291773 single 22:20000001-21000000 81675 C +rs1640297 single 22:20000001-21000000 81851 C +rs2286926 single 22:20000001-21000000 82073 T +rs35569747 single 22:20000001-21000000 82292 G +rs55929151 single 22:20000001-21000000 82565 T +rs1807527 single 22:20000001-21000000 82861 T +rs8141289 single 22:20000001-21000000 82921 T +rs76639574 single 22:20000001-21000000 82945 C +rs116089709 single 22:20000001-21000000 83426 T +rs139814453 deletion 22:20000001-21000000 83500 3 +rs9606245 single 22:20000001-21000000 84680 A +rs59528277 single 22:20000001-21000000 84820 C +rs62894660 single 22:20000001-21000000 85354 A +rs113504177 single 22:20000001-21000000 85572 G +rs174890 single 22:20000001-21000000 85678 A +rs74858445 single 22:20000001-21000000 85804 T +rs7288396 single 22:20000001-21000000 86570 T +rs7284918 single 22:20000001-21000000 86736 C +rs79374459 single 22:20000001-21000000 87187 A +rs79061783 single 22:20000001-21000000 87257 T +rs73389779 single 22:20000001-21000000 87374 T +rs78119820 single 22:20000001-21000000 87399 T +rs9606247 single 22:20000001-21000000 87472 A +rs9606248 single 22:20000001-21000000 87538 G +rs1628967 single 22:20000001-21000000 87644 G +rs4276104 single 22:20000001-21000000 87691 A +rs9606249 single 22:20000001-21000000 87749 T +rs77437399 single 22:20000001-21000000 88323 T +rs12170420 single 22:20000001-21000000 88469 T +rs34220917 single 22:20000001-21000000 88662 G +rs78743119 single 22:20000001-21000000 88802 C +rs112297714 single 22:20000001-21000000 89082 A +rs113307754 single 22:20000001-21000000 89270 G +rs73389783 single 22:20000001-21000000 89523 T +rs174891 single 22:20000001-21000000 89750 C +rs9605062 single 22:20000001-21000000 89879 C +rs9605063 single 22:20000001-21000000 90037 T +rs9606250 single 22:20000001-21000000 90191 T +rs144836942 single 22:20000001-21000000 90612 G +rs112346953 deletion 22:20000001-21000000 90865 4 +rs75810475 single 22:20000001-21000000 90944 C +rs75728310 single 22:20000001-21000000 91192 C +rs2904565 single 22:20000001-21000000 91627 T +rs1633418 single 22:20000001-21000000 91755 C +rs112367533 single 22:20000001-21000000 91832 A +rs11705462 single 22:20000001-21000000 92411 A +rs9606251 single 22:20000001-21000000 92776 A +rs9605065 single 22:20000001-21000000 92962 G +rs9606252 single 22:20000001-21000000 92995 T +rs443678 single 22:20000001-21000000 93125 C +rs8139107 single 22:20000001-21000000 93287 T +rs8139591 single 22:20000001-21000000 93316 G +rs8139374 single 22:20000001-21000000 93521 T +rs113088659 single 22:20000001-21000000 93966 A +rs113088659 single 22:20000001-21000000 93966 C +rs113088659 single 22:20000001-21000000 93966 T +rs4819529 single 22:20000001-21000000 94262 T +rs149617395 deletion 22:20000001-21000000 94569 3 +rs74689531 single 22:20000001-21000000 95442 A +rs56689361 single 22:20000001-21000000 95681 C +rs146106255 insertion 22:20000001-21000000 96168 G +rs1633442 single 22:20000001-21000000 96313 A +rs77725161 single 22:20000001-21000000 96394 G +rs79989360 single 22:20000001-21000000 97244 G +rs73389786 single 22:20000001-21000000 97335 G +rs41281423 single 22:20000001-21000000 97729 T +rs41281425 single 22:20000001-21000000 98167 C +rs1640299 single 22:20000001-21000000 98358 G +rs2286928 single 22:20000001-21000000 98520 A +rs417309 single 22:20000001-21000000 98543 A +rs142317498 deletion 22:20000001-21000000 98574 4 +rs73389791 single 22:20000001-21000000 98795 T +rs720014 single 22:20000001-21000000 98881 C +rs1063286 single 22:20000001-21000000 99245 C +rs114225267 single 22:20000001-21000000 99298 A +rs3757 single 22:20000001-21000000 99330 A +rs145947632 deletion 22:20000001-21000000 99579 2 +rs9605066 single 22:20000001-21000000 99653 T +rs73389792 single 22:20000001-21000000 99751 T +rs1048659 single 22:20000001-21000000 99890 G +rs113958995 single 22:20000001-21000000 99940 A +rs447017 single 22:20000001-21000000 100157 C +rs56997592 single 22:20000001-21000000 100386 A +rs9606254 single 22:20000001-21000000 100408 T +rs1633445 single 22:20000001-21000000 100595 C +rs73877139 single 22:20000001-21000000 100910 T +rs117918233 single 22:20000001-21000000 101342 T +rs111617686 insertion 22:20000001-21000000 101392 CT +rs8142540 single 22:20000001-21000000 101433 A +rs78218055 single 22:20000001-21000000 101630 T +rs59276810 single 22:20000001-21000000 102045 T +rs885980 single 22:20000001-21000000 102089 C +rs2073777 single 22:20000001-21000000 102749 T +rs77201758 single 22:20000001-21000000 103011 A +rs79886149 single 22:20000001-21000000 103188 C +rs9605067 single 22:20000001-21000000 103262 T +rs59177373 single 22:20000001-21000000 103461 T +rs2008701 single 22:20000001-21000000 104234 T +rs114221480 single 22:20000001-21000000 104267 G +rs395446 single 22:20000001-21000000 104300 C +rs713982 single 22:20000001-21000000 104818 C +rs2286929 single 22:20000001-21000000 104852 G +rs555759275 single 22:20000001-21000000 105227 A +rs737871 single 22:20000001-21000000 105640 T +rs4589855 single 22:20000001-21000000 106068 A +rs2871050 single 22:20000001-21000000 106103 C +rs28496942 single 22:20000001-21000000 106304 T +rs140554045 single 22:20000001-21000000 107063 A +rs60558247 single 22:20000001-21000000 107112 T +rs78885732 single 22:20000001-21000000 107399 C +rs73877140 single 22:20000001-21000000 107463 T +rs2238798 single 22:20000001-21000000 107728 A +rs78632306 deletion 22:20000001-21000000 107855 2 +rs5992511 single 22:20000001-21000000 107914 C +rs75229755 single 22:20000001-21000000 107956 C +rs73877141 single 22:20000001-21000000 108061 C +rs9606256 single 22:20000001-21000000 108499 A +rs61127939 single 22:20000001-21000000 108550 G +rs111522241 single 22:20000001-21000000 108735 G +rs2871051 single 22:20000001-21000000 108939 G +rs1640407 single 22:20000001-21000000 109087 T +rs2238799 single 22:20000001-21000000 109324 G +rs139120568 insertion 22:20000001-21000000 109557 CC +rs72490649 single 22:20000001-21000000 109669 T +rs548036766 single 22:20000001-21000000 110312 G +rs572135609 deletion 22:20000001-21000000 110422 17 +rs117243758 single 22:20000001-21000000 110660 A +rs144916982 insertion 22:20000001-21000000 110770 A +rs143558410 deletion 22:20000001-21000000 110825 1 +rs200793138 deletion 22:20000001-21000000 110939 19 +rs76318258 single 22:20000001-21000000 111675 T +rs100798 single 22:20000001-21000000 111728 T +rs114855038 single 22:20000001-21000000 111857 C +rs140198500 deletion 22:20000001-21000000 111970 1 +rs73391904 single 22:20000001-21000000 112228 T +rs5844407 insertion 22:20000001-21000000 112343 C +rs111795946 insertion 22:20000001-21000000 112513 G +rs73391907 single 22:20000001-21000000 112621 C +rs114654023 single 22:20000001-21000000 112751 T +rs175162 single 22:20000001-21000000 113252 G +rs73391909 single 22:20000001-21000000 113335 T +rs175163 single 22:20000001-21000000 113776 C +rs147429452 single 22:20000001-21000000 113808 T +rs143470990 deletion 22:20000001-21000000 114717 1 +rs58050290 single 22:20000001-21000000 114877 A +rs79918302 single 22:20000001-21000000 114949 C +rs145735203 deletion 22:20000001-21000000 115258 4 +rs77648957 single 22:20000001-21000000 115412 A +rs77856381 single 22:20000001-21000000 115468 A +rs79100373 single 22:20000001-21000000 115509 C +rs175164 single 22:20000001-21000000 115561 T +rs28415889 single 22:20000001-21000000 115675 A +rs74276833 single 22:20000001-21000000 115871 A +rs175165 single 22:20000001-21000000 116014 G +rs76243800 single 22:20000001-21000000 116095 A +rs175166 single 22:20000001-21000000 116181 G +rs62219901 single 22:20000001-21000000 116593 A +rs175167 single 22:20000001-21000000 116669 T +rs175168 single 22:20000001-21000000 116905 T +rs2871052 single 22:20000001-21000000 116971 C +rs2871053 single 22:20000001-21000000 117045 C +rs80098812 single 22:20000001-21000000 117083 T +rs117718692 single 22:20000001-21000000 117343 T +rs117422471 single 22:20000001-21000000 117644 G +rs139564597 single 22:20000001-21000000 118309 A +rs41281437 single 22:20000001-21000000 118351 C +rs41281439 single 22:20000001-21000000 118449 C +rs533822857 deletion 22:20000001-21000000 118621 1 +rs146644744 single 22:20000001-21000000 118777 A +rs113118838 single 22:20000001-21000000 118913 C +rs76972128 single 22:20000001-21000000 119001 G +rs191584744 single 22:20000001-21000000 119176 C +rs187564771 single 22:20000001-21000000 119207 A +rs8135702 single 22:20000001-21000000 119244 C +rs8137316 single 22:20000001-21000000 119301 C +rs545014283 insertion 22:20000001-21000000 119343 CCG +rs9606258 single 22:20000001-21000000 119711 G +rs77304385 single 22:20000001-21000000 119883 A +rs62219902 single 22:20000001-21000000 119929 G +rs142441474 single 22:20000001-21000000 119983 T +rs151120156 single 22:20000001-21000000 120978 T +rs175170 single 22:20000001-21000000 121079 T +rs113514212 single 22:20000001-21000000 121210 T +rs60407076 single 22:20000001-21000000 121644 A +rs383129 single 22:20000001-21000000 121841 T +rs75031525 single 22:20000001-21000000 122090 G +rs201045886 insertion 22:20000001-21000000 122644 T +rs116955623 single 22:20000001-21000000 123011 G +rs175173 single 22:20000001-21000000 123234 T +rs60292854 single 22:20000001-21000000 123504 C +rs116320840 single 22:20000001-21000000 124312 C +rs114111317 single 22:20000001-21000000 124669 G +rs149005029 deletion 22:20000001-21000000 124762 3 +rs117746982 single 22:20000001-21000000 125199 T +rs116846796 single 22:20000001-21000000 125701 C +rs73391922 single 22:20000001-21000000 126005 G +rs117318384 single 22:20000001-21000000 126467 T +rs45450699 single 22:20000001-21000000 126701 T +rs61737216 single 22:20000001-21000000 126740 T +rs175174 single 22:20000001-21000000 127553 G +rs175175 single 22:20000001-21000000 128650 G +rs79425519 single 22:20000001-21000000 128673 C +rs2286930 single 22:20000001-21000000 128879 A +rs77480122 single 22:20000001-21000000 129105 C +rs79627501 single 22:20000001-21000000 129489 T +rs175177 single 22:20000001-21000000 129537 A +rs11704034 single 22:20000001-21000000 129741 C +rs74491992 single 22:20000001-21000000 129967 A +rs117183915 single 22:20000001-21000000 130147 G +rs150179091 single 22:20000001-21000000 130625 T +rs537210848 single 22:20000001-21000000 130699 C +rs115574320 single 22:20000001-21000000 131054 T +rs9605069 single 22:20000001-21000000 131114 T +rs3747072 single 22:20000001-21000000 131426 C +rs117888991 single 22:20000001-21000000 132298 C +rs4819866 single 22:20000001-21000000 132459 T +rs175178 single 22:20000001-21000000 132637 A +rs175179 single 22:20000001-21000000 132971 G +rs114492336 single 22:20000001-21000000 133242 T +rs11556521 single 22:20000001-21000000 133289 C +rs1974652 single 22:20000001-21000000 134146 G +rs1974653 single 22:20000001-21000000 134349 A +rs9606261 single 22:20000001-21000000 134441 C +rs11705346 single 22:20000001-21000000 134524 T +rs9605070 single 22:20000001-21000000 134738 T +rs60730595 single 22:20000001-21000000 134973 T +rs527450612 single 22:20000001-21000000 135096 T +rs7290959 single 22:20000001-21000000 135210 C +rs2292570 single 22:20000001-21000000 135339 C +rs7286804 single 22:20000001-21000000 135382 T +rs9606262 single 22:20000001-21000000 135584 C +rs8137258 single 22:20000001-21000000 135960 C +rs175180 single 22:20000001-21000000 136107 T +rs175181 single 22:20000001-21000000 136262 A +rs60603127 single 22:20000001-21000000 136330 T +rs4819867 single 22:20000001-21000000 136378 A +rs56959114 single 22:20000001-21000000 136477 G +rs73877146 single 22:20000001-21000000 136605 C +rs75274389 single 22:20000001-21000000 136638 T +rs7291332 single 22:20000001-21000000 136665 A +rs7291332 single 22:20000001-21000000 136665 C +rs548968027 insertion 22:20000001-21000000 136804 C +rs9606264 single 22:20000001-21000000 137045 A +rs8142411 single 22:20000001-21000000 137408 C +rs368639630 deletion 22:20000001-21000000 137433 1 +rs116805844 single 22:20000001-21000000 137616 T +rs139050179 single 22:20000001-21000000 137722 T +rs3887519 single 22:20000001-21000000 137821 G +rs560586592 insertion 22:20000001-21000000 137948 TGGGGC +rs560586592 insertion 22:20000001-21000000 137948 TGGGGCTGGGGC +rs73391926 single 22:20000001-21000000 138104 C +rs77065155 single 22:20000001-21000000 138764 T +rs76091326 single 22:20000001-21000000 138785 A +rs3885541 single 22:20000001-21000000 138892 A +rs114106572 single 22:20000001-21000000 138976 T +rs59634879 single 22:20000001-21000000 139164 A +rs9605071 single 22:20000001-21000000 139203 C +rs175182 single 22:20000001-21000000 139298 T +rs175183 single 22:20000001-21000000 139339 T +rs138336705 single 22:20000001-21000000 139539 C +rs186157328 single 22:20000001-21000000 139606 T +rs73877149 single 22:20000001-21000000 140153 T +rs80308524 insertion 22:20000001-21000000 140234 C +rs16982973 single 22:20000001-21000000 140294 T +rs4819869 single 22:20000001-21000000 141061 A +rs595224 single 22:20000001-21000000 141991 G +rs619015 single 22:20000001-21000000 142378 G +rs11913445 single 22:20000001-21000000 142512 A +rs625686 single 22:20000001-21000000 142931 T +rs117085592 single 22:20000001-21000000 143398 T +rs638956 single 22:20000001-21000000 143625 T +rs639516 single 22:20000001-21000000 143781 T +rs8136462 single 22:20000001-21000000 143860 A +rs73150878 single 22:20000001-21000000 143903 T +rs1860943 single 22:20000001-21000000 144239 C +rs568827214 deletion 22:20000001-21000000 144527 1 +rs653448 single 22:20000001-21000000 144559 T +rs649737 single 22:20000001-21000000 144662 G +rs9605073 single 22:20000001-21000000 144752 A +rs78426131 single 22:20000001-21000000 144795 A +rs738078 single 22:20000001-21000000 144856 A +rs175184 single 22:20000001-21000000 144901 C +rs60321341 single 22:20000001-21000000 145034 T +rs175185 single 22:20000001-21000000 145120 G +rs9606269 single 22:20000001-21000000 145257 T +rs114715156 single 22:20000001-21000000 145331 G +rs17817803 single 22:20000001-21000000 145525 T +rs666346 single 22:20000001-21000000 146084 C +rs670708 single 22:20000001-21000000 146148 A +rs62218076 single 22:20000001-21000000 146497 A +rs9605074 single 22:20000001-21000000 146532 A +rs175187 single 22:20000001-21000000 146794 C +rs2106148 single 22:20000001-21000000 146879 A +rs680725 single 22:20000001-21000000 146953 C +rs594652 single 22:20000001-21000000 147206 A +rs584567 single 22:20000001-21000000 147279 G +rs5992515 single 22:20000001-21000000 147391 A +rs117643825 single 22:20000001-21000000 147904 A +rs588536 single 22:20000001-21000000 148117 G +rs9618744 single 22:20000001-21000000 148157 G +rs9618745 single 22:20000001-21000000 148181 C +rs599380 single 22:20000001-21000000 148242 C +rs599434 single 22:20000001-21000000 148282 C +rs1640282 single 22:20000001-21000000 148366 T +rs1640284 single 22:20000001-21000000 148587 G +rs701450 single 22:20000001-21000000 148682 C +rs701449 single 22:20000001-21000000 148740 T +rs8138714 single 22:20000001-21000000 149169 G +rs148485532 single 22:20000001-21000000 149204 T +rs60242968 single 22:20000001-21000000 149470 T +rs73391941 single 22:20000001-21000000 149514 G +rs57629764 single 22:20000001-21000000 149658 G +rs12171242 single 22:20000001-21000000 149807 C +rs146217113 single 22:20000001-21000000 149860 C +rs115554508 single 22:20000001-21000000 149919 A +rs550118721 insertion 22:20000001-21000000 149986 ACT +rs371812250 single 22:20000001-21000000 151056 C +rs111213231 single 22:20000001-21000000 151153 T +rs531796616 deletion 22:20000001-21000000 151554 3 +rs190277665 single 22:20000001-21000000 151600 C +rs571803356 insertion 22:20000001-21000000 151725 ACC +rs374075810 single 22:20000001-21000000 151772 A +rs139580779 single 22:20000001-21000000 151798 C +rs549131213 insertion 22:20000001-21000000 151890 ACCATC +rs539377113 insertion 22:20000001-21000000 151992 ATCACT +rs453205 single 22:20000001-21000000 152558 C +rs62218079 single 22:20000001-21000000 152861 A +rs629665 single 22:20000001-21000000 152907 C +rs682114 single 22:20000001-21000000 152931 A +rs112329860 single 22:20000001-21000000 153458 C +rs454656 single 22:20000001-21000000 153623 C +rs449360 single 22:20000001-21000000 153656 A +rs112413245 single 22:20000001-21000000 153738 A +rs111691053 single 22:20000001-21000000 153889 G +rs415796 single 22:20000001-21000000 154320 A +rs422213 single 22:20000001-21000000 154726 G +rs603084 single 22:20000001-21000000 154767 G +rs373747 single 22:20000001-21000000 155191 C +rs605321 single 22:20000001-21000000 155273 T +rs423543 single 22:20000001-21000000 155297 A +rs175191 single 22:20000001-21000000 155665 A +rs77886622 single 22:20000001-21000000 155742 A +rs384110 single 22:20000001-21000000 155982 C +rs74670780 single 22:20000001-21000000 156057 T +rs429994 single 22:20000001-21000000 156110 G +rs28384 single 22:20000001-21000000 156414 C +rs73391947 single 22:20000001-21000000 156598 A +rs105273 single 22:20000001-21000000 156743 A +rs74898296 single 22:20000001-21000000 156852 C +rs5992517 single 22:20000001-21000000 156885 A +rs117487307 single 22:20000001-21000000 157217 T +rs9605078 single 22:20000001-21000000 157298 T +rs2010063 single 22:20000001-21000000 157964 A +rs139231218 single 22:20000001-21000000 158242 T +rs607556 single 22:20000001-21000000 158297 C +rs738026 single 22:20000001-21000000 158332 A +rs1153415 single 22:20000001-21000000 159048 G +rs74550883 single 22:20000001-21000000 159167 C +rs622741 single 22:20000001-21000000 159309 A +rs1153416 single 22:20000001-21000000 159346 T +rs111837408 single 22:20000001-21000000 159421 C +rs201558243 single 22:20000001-21000000 159460 A +rs624146 single 22:20000001-21000000 159698 G +rs625467 single 22:20000001-21000000 159980 C +rs625467 single 22:20000001-21000000 159980 T +rs77178912 single 22:20000001-21000000 160080 A +rs455127 single 22:20000001-21000000 160204 T +rs5992518 single 22:20000001-21000000 160288 G +rs144519743 deletion 22:20000001-21000000 160329 10 +rs72236103 deletion 22:20000001-21000000 160527 4 +rs67222192 single 22:20000001-21000000 160585 G +rs117988645 single 22:20000001-21000000 160671 T +rs116550458 single 22:20000001-21000000 160729 A +rs175192 single 22:20000001-21000000 160770 T +rs1210776 single 22:20000001-21000000 160971 C +rs9617862 single 22:20000001-21000000 161029 T +rs9605079 single 22:20000001-21000000 161294 A +rs1153417 single 22:20000001-21000000 161316 C +rs1153418 single 22:20000001-21000000 161423 C +rs8136093 single 22:20000001-21000000 161604 A +rs615103 single 22:20000001-21000000 161737 C +rs737826 single 22:20000001-21000000 162620 A +rs175193 single 22:20000001-21000000 162722 C +rs201608214 insertion 22:20000001-21000000 162758 CT +rs201608214 insertion 22:20000001-21000000 162758 CTGCC +rs175194 single 22:20000001-21000000 163069 A +rs8137965 single 22:20000001-21000000 163461 G +rs75442801 single 22:20000001-21000000 163832 T +rs11464394 insertion 22:20000001-21000000 164318 G +rs404060 single 22:20000001-21000000 164359 C +rs5748545 single 22:20000001-21000000 164579 T +rs405011 single 22:20000001-21000000 164695 T +rs412160 single 22:20000001-21000000 164801 T +rs175195 single 22:20000001-21000000 164948 A +rs79507494 single 22:20000001-21000000 165007 T +rs4474965 single 22:20000001-21000000 165265 A +rs79312272 single 22:20000001-21000000 165363 C +rs175196 single 22:20000001-21000000 165438 A +rs175197 single 22:20000001-21000000 165631 T +rs1153419 single 22:20000001-21000000 165830 A +rs58319702 single 22:20000001-21000000 165863 T +rs175198 single 22:20000001-21000000 166012 T +rs80093097 single 22:20000001-21000000 166183 T +rs1153421 single 22:20000001-21000000 166221 T +rs74815951 single 22:20000001-21000000 166685 A +rs419438 single 22:20000001-21000000 166722 G +rs451455 single 22:20000001-21000000 166910 A +rs117888625 single 22:20000001-21000000 167009 T +rs79525085 single 22:20000001-21000000 167405 T +rs75267189 single 22:20000001-21000000 167784 A +rs116804714 single 22:20000001-21000000 167852 A +rs175199 single 22:20000001-21000000 168294 T +rs175200 single 22:20000001-21000000 168508 A +rs390906 single 22:20000001-21000000 168603 T +rs111441035 single 22:20000001-21000000 168624 C +rs117700651 single 22:20000001-21000000 168900 T +rs150450872 deletion 22:20000001-21000000 168982 9 +rs175203 single 22:20000001-21000000 169036 C +rs175204 single 22:20000001-21000000 169093 T +rs393174 single 22:20000001-21000000 169366 T +rs73150891 single 22:20000001-21000000 169421 A +rs12484240 single 22:20000001-21000000 169456 A +rs438798 single 22:20000001-21000000 169498 G +rs78429682 single 22:20000001-21000000 169895 T +rs9306236 single 22:20000001-21000000 170092 A +rs73391970 single 22:20000001-21000000 170776 T +rs658073 single 22:20000001-21000000 170995 A +rs658583 single 22:20000001-21000000 171145 G +rs10212087 single 22:20000001-21000000 171250 T +rs10212092 single 22:20000001-21000000 171348 G +rs74880766 single 22:20000001-21000000 171374 G +rs659918 single 22:20000001-21000000 171448 G +rs9618747 single 22:20000001-21000000 171567 T +rs618773 single 22:20000001-21000000 171652 T +rs671840 single 22:20000001-21000000 171813 C +rs117780955 single 22:20000001-21000000 171875 G +rs75275392 single 22:20000001-21000000 172047 A +rs114977667 single 22:20000001-21000000 172141 A +rs701447 single 22:20000001-21000000 172312 T +rs632194 single 22:20000001-21000000 172405 T +rs10212004 single 22:20000001-21000000 172818 T +rs687264 single 22:20000001-21000000 172900 C +rs5844408 insertion 22:20000001-21000000 173497 G +rs175206 single 22:20000001-21000000 173525 C +rs5993927 single 22:20000001-21000000 173576 A +rs79270025 single 22:20000001-21000000 174099 G +rs118095966 single 22:20000001-21000000 174269 C +rs74931122 single 22:20000001-21000000 174497 T +rs55911007 deletion 22:20000001-21000000 174561 1 +rs663353 single 22:20000001-21000000 174711 A +rs75766 single 22:20000001-21000000 174852 A +rs75765 single 22:20000001-21000000 174958 T +rs9618749 single 22:20000001-21000000 175222 C +rs117565467 single 22:20000001-21000000 175367 T +rs7290062 single 22:20000001-21000000 175472 C +rs116622544 single 22:20000001-21000000 175571 C +rs79714974 single 22:20000001-21000000 175618 A +rs139587601 single 22:20000001-21000000 176518 A +rs201946414 deletion 22:20000001-21000000 176558 4 +rs149723481 single 22:20000001-21000000 176704 T +rs175207 single 22:20000001-21000000 177060 T +rs597859 single 22:20000001-21000000 177218 A +rs175208 single 22:20000001-21000000 177319 T +rs667987 single 22:20000001-21000000 177594 G +rs5992519 single 22:20000001-21000000 177644 T +rs599687 single 22:20000001-21000000 177668 G +rs600545 single 22:20000001-21000000 177847 G +rs600608 single 22:20000001-21000000 177890 G +rs601014 single 22:20000001-21000000 177942 A +rs670147 single 22:20000001-21000000 178022 C +rs601541 single 22:20000001-21000000 178094 A +rs2871058 single 22:20000001-21000000 178146 G +rs2871059 single 22:20000001-21000000 178174 G +rs4819874 single 22:20000001-21000000 178204 G +rs684186 single 22:20000001-21000000 178798 G +rs175210 single 22:20000001-21000000 178914 T +rs4819876 single 22:20000001-21000000 178988 T +rs4819877 single 22:20000001-21000000 179038 C +rs1640409 single 22:20000001-21000000 179095 C +rs137898282 single 22:20000001-21000000 179132 A +rs78736437 single 22:20000001-21000000 179172 A +rs1642138 single 22:20000001-21000000 179222 T +rs701446 single 22:20000001-21000000 179254 C +rs701445 single 22:20000001-21000000 179285 A +rs627833 single 22:20000001-21000000 179338 A +rs627891 single 22:20000001-21000000 179378 A +rs5992520 single 22:20000001-21000000 179439 T +rs589754 single 22:20000001-21000000 179502 C +rs76414747 single 22:20000001-21000000 180070 T +rs56252913 single 22:20000001-21000000 180103 A +rs701444 single 22:20000001-21000000 180454 A +rs116924716 single 22:20000001-21000000 181042 A +rs646002 single 22:20000001-21000000 181125 T +rs917837 single 22:20000001-21000000 181210 A +rs147687370 single 22:20000001-21000000 181487 T +rs676524 single 22:20000001-21000000 181983 T +rs58263193 single 22:20000001-21000000 182068 T +rs689135 single 22:20000001-21000000 182455 T +rs34529390 deletion 22:20000001-21000000 182566 1 +rs634473 single 22:20000001-21000000 182636 A +rs635394 single 22:20000001-21000000 182863 G +rs583898 single 22:20000001-21000000 183029 A +rs637599 single 22:20000001-21000000 183344 C +rs696884 single 22:20000001-21000000 183508 G +rs11089331 single 22:20000001-21000000 184001 A +rs652976 single 22:20000001-21000000 184444 C +rs78412557 single 22:20000001-21000000 184475 G +rs611299 single 22:20000001-21000000 184576 A +rs612140 single 22:20000001-21000000 184709 G +rs10222234 single 22:20000001-21000000 184832 A +rs73394005 single 22:20000001-21000000 184936 T +rs80115931 single 22:20000001-21000000 184960 A +rs665780 single 22:20000001-21000000 185010 C +rs613930 single 22:20000001-21000000 185118 A +rs666349 single 22:20000001-21000000 185190 C +rs625704 single 22:20000001-21000000 185456 A +rs625749 single 22:20000001-21000000 185486 T +rs3081798 insertion 22:20000001-21000000 185699 TCT +rs138361379 single 22:20000001-21000000 185886 T +rs627919 single 22:20000001-21000000 185965 C +rs680652 single 22:20000001-21000000 185998 G +rs628355 single 22:20000001-21000000 186073 C +rs681112 single 22:20000001-21000000 186101 C +rs7292285 single 22:20000001-21000000 186549 C +rs77264923 single 22:20000001-21000000 186635 A +rs558869332 deletion 22:20000001-21000000 186986 4 +rs701442 single 22:20000001-21000000 187130 G +rs655618 single 22:20000001-21000000 187542 C +rs655656 single 22:20000001-21000000 187574 T +rs73394008 single 22:20000001-21000000 187922 G +rs658298 single 22:20000001-21000000 188159 C +rs658353 single 22:20000001-21000000 188198 C +rs114252382 single 22:20000001-21000000 188344 C +rs670665 single 22:20000001-21000000 188608 T +rs144211478 single 22:20000001-21000000 188745 T +rs574475587 insertion 22:20000001-21000000 188856 A +rs143552218 deletion 22:20000001-21000000 188919 12 +rs672570 single 22:20000001-21000000 189076 T +rs672966 single 22:20000001-21000000 189119 T +rs185065728 single 22:20000001-21000000 189151 A +rs673062 single 22:20000001-21000000 189201 A +rs684423 single 22:20000001-21000000 189377 T +rs62218156 single 22:20000001-21000000 189407 A +rs685667 single 22:20000001-21000000 189621 T +rs686104 single 22:20000001-21000000 189721 C +rs2098257 single 22:20000001-21000000 190054 T +rs687965 single 22:20000001-21000000 190173 T +rs114137281 single 22:20000001-21000000 190378 A +rs738079 single 22:20000001-21000000 190412 A +rs592681 single 22:20000001-21000000 190696 A +rs661900 single 22:20000001-21000000 190759 C +rs741414 single 22:20000001-21000000 191545 G +rs117701200 single 22:20000001-21000000 191590 A +rs117182259 single 22:20000001-21000000 192146 G +rs9605084 single 22:20000001-21000000 192330 G +rs12485013 single 22:20000001-21000000 192543 A +rs638150 single 22:20000001-21000000 192585 C +rs41282451 single 22:20000001-21000000 193098 T +rs585450 single 22:20000001-21000000 193158 A +rs585450 single 22:20000001-21000000 193158 G +rs114911618 single 22:20000001-21000000 193818 T +rs654389 single 22:20000001-21000000 193860 C +rs144829478 single 22:20000001-21000000 195038 T +rs75367243 single 22:20000001-21000000 195485 A +rs627235 single 22:20000001-21000000 195590 T +rs17818762 single 22:20000001-21000000 195729 T +rs35630489 single 22:20000001-21000000 195777 C +rs13056865 single 22:20000001-21000000 195927 A +rs17746105 single 22:20000001-21000000 195998 C +rs17746115 single 22:20000001-21000000 196045 A +rs75313062 single 22:20000001-21000000 196181 C +rs77763468 single 22:20000001-21000000 196281 G +rs67477750 single 22:20000001-21000000 196449 A +rs35306520 single 22:20000001-21000000 196565 A +rs9605085 single 22:20000001-21000000 196675 G +rs62219680 single 22:20000001-21000000 196735 G +rs9606276 single 22:20000001-21000000 196771 G +rs17818792 single 22:20000001-21000000 197334 A +rs111479272 single 22:20000001-21000000 197456 A +rs11912140 single 22:20000001-21000000 198134 A +rs9606277 single 22:20000001-21000000 198203 T +rs9606279 single 22:20000001-21000000 198238 G +rs9606280 single 22:20000001-21000000 198340 C +rs17746140 single 22:20000001-21000000 198531 A +rs9606281 single 22:20000001-21000000 198583 T +rs78864465 single 22:20000001-21000000 198724 T +rs145389230 insertion 22:20000001-21000000 198805 T +rs115483801 single 22:20000001-21000000 199232 T +rs9605086 single 22:20000001-21000000 199578 A +rs9605086 single 22:20000001-21000000 199578 T +rs116036640 single 22:20000001-21000000 200128 C +rs79330709 single 22:20000001-21000000 200358 A +rs9606282 single 22:20000001-21000000 200512 G +rs73394013 single 22:20000001-21000000 200616 T +rs79714928 single 22:20000001-21000000 200653 A +rs116485434 single 22:20000001-21000000 201052 T +rs9606283 single 22:20000001-21000000 201429 T +rs9605087 single 22:20000001-21000000 201736 A +rs730669 single 22:20000001-21000000 201953 T +rs75747087 single 22:20000001-21000000 202002 T +rs9606284 single 22:20000001-21000000 202113 C +rs9606285 single 22:20000001-21000000 202159 A +rs9606286 single 22:20000001-21000000 202421 T +rs35945458 single 22:20000001-21000000 202504 G +rs587317 single 22:20000001-21000000 202666 C +rs71312739 single 22:20000001-21000000 202711 C +rs640207 single 22:20000001-21000000 202788 A +rs77527928 single 22:20000001-21000000 202851 A +rs5748552 single 22:20000001-21000000 202958 T +rs9606287 single 22:20000001-21000000 203157 C +rs9606288 single 22:20000001-21000000 203364 C +rs35410261 single 22:20000001-21000000 203389 T +rs9605088 single 22:20000001-21000000 203448 G +rs17746176 single 22:20000001-21000000 203667 A +rs17746176 single 22:20000001-21000000 203667 G +rs17746176 single 22:20000001-21000000 203667 T +rs66790491 single 22:20000001-21000000 203936 T +rs4819530 single 22:20000001-21000000 203996 A +rs2192152 single 22:20000001-21000000 204035 T +rs2192153 single 22:20000001-21000000 204158 T +rs146513613 single 22:20000001-21000000 204233 A +rs542849601 insertion 22:20000001-21000000 204302 A +rs9618751 single 22:20000001-21000000 204602 G +rs5993932 single 22:20000001-21000000 204747 A +rs5993933 single 22:20000001-21000000 204790 T +rs9606289 single 22:20000001-21000000 204922 T +rs77703137 single 22:20000001-21000000 205065 A +rs7293153 single 22:20000001-21000000 205262 A +rs672297 single 22:20000001-21000000 205332 T +rs150406620 insertion 22:20000001-21000000 205758 G +rs1642170 single 22:20000001-21000000 205779 A +rs855066 single 22:20000001-21000000 205831 C +rs184586910 single 22:20000001-21000000 205925 T +rs1642171 single 22:20000001-21000000 206051 C +rs16983093 single 22:20000001-21000000 206263 A +rs16983095 single 22:20000001-21000000 206383 T +rs11403191 insertion 22:20000001-21000000 206493 G +rs76335459 single 22:20000001-21000000 206603 T +rs73394025 single 22:20000001-21000000 206808 T +rs34702408 single 22:20000001-21000000 207005 A +rs16983098 single 22:20000001-21000000 207140 C +rs17818857 single 22:20000001-21000000 207327 C +rs79041058 single 22:20000001-21000000 207439 T +rs1640426 single 22:20000001-21000000 207824 T +rs535028580 insertion 22:20000001-21000000 207877 CCT +rs855067 single 22:20000001-21000000 208059 C +rs117866503 single 22:20000001-21000000 208134 C +rs58095234 single 22:20000001-21000000 208259 T +rs117251787 single 22:20000001-21000000 208307 A +rs11382517 insertion 22:20000001-21000000 208347 C +rs16983102 single 22:20000001-21000000 208416 T +rs79718986 single 22:20000001-21000000 208498 T +rs138984011 deletion 22:20000001-21000000 208603 1 +rs140668404 deletion 22:20000001-21000000 208652 1 +rs5748554 single 22:20000001-21000000 209029 C +rs145930670 single 22:20000001-21000000 209121 T +rs1642172 single 22:20000001-21000000 209424 A +rs77300267 single 22:20000001-21000000 209448 A +rs5748555 single 22:20000001-21000000 209486 A +rs145585391 single 22:20000001-21000000 209706 T +rs1640348 single 22:20000001-21000000 209774 G +rs1640349 single 22:20000001-21000000 210054 A +rs1640349 single 22:20000001-21000000 210054 T +rs1642173 single 22:20000001-21000000 210118 A +rs117027931 single 22:20000001-21000000 210183 A +rs1640350 single 22:20000001-21000000 210362 T +rs12166707 single 22:20000001-21000000 210429 C +rs117725357 single 22:20000001-21000000 210630 A +rs8142885 single 22:20000001-21000000 211138 C +rs1640351 single 22:20000001-21000000 211327 G +rs1642174 single 22:20000001-21000000 211348 G +rs60356256 deletion 22:20000001-21000000 211529 1 +rs1640352 single 22:20000001-21000000 211560 G +rs56350811 single 22:20000001-21000000 211689 T +rs141901933 single 22:20000001-21000000 211790 C +rs111163807 single 22:20000001-21000000 212539 G +rs71718621 deletion 22:20000001-21000000 212584 2 +rs536300675 single 22:20000001-21000000 212660 G +rs143894206 insertion 22:20000001-21000000 212700 AA +rs62651450 single 22:20000001-21000000 212736 C +rs368437419 single 22:20000001-21000000 212911 A +rs78870595 deletion 22:20000001-21000000 212947 2 +rs201670381 deletion 22:20000001-21000000 212976 2 +rs111066904 single 22:20000001-21000000 213003 T +rs183700374 single 22:20000001-21000000 213176 T +rs529446461 insertion 22:20000001-21000000 213249 CA +rs140384568 deletion 22:20000001-21000000 213308 4 +rs116821952 single 22:20000001-21000000 213367 C +rs142260528 insertion 22:20000001-21000000 213478 CA +rs4389405 single 22:20000001-21000000 213634 A +rs854962 single 22:20000001-21000000 213677 C +rs1962811 single 22:20000001-21000000 213717 T +rs854964 single 22:20000001-21000000 213757 C +rs701436 single 22:20000001-21000000 213810 G +rs140866047 deletion 22:20000001-21000000 213926 21 +rs114523918 single 22:20000001-21000000 214087 T +rs696883 single 22:20000001-21000000 214224 T +rs114211813 single 22:20000001-21000000 214384 G +rs57810421 single 22:20000001-21000000 214412 G +rs2110150 single 22:20000001-21000000 214484 T +rs77599853 single 22:20000001-21000000 214621 C +rs117156885 single 22:20000001-21000000 214752 T +rs116142588 single 22:20000001-21000000 214781 T +rs701434 single 22:20000001-21000000 214898 T +rs67698406 single 22:20000001-21000000 215190 A +rs80027834 single 22:20000001-21000000 215271 T +rs113169311 insertion 22:20000001-21000000 215376 TGTC +rs75859816 single 22:20000001-21000000 215488 C +rs854965 single 22:20000001-21000000 215569 G +rs701433 single 22:20000001-21000000 215939 C +rs8137809 single 22:20000001-21000000 215980 C +rs8141593 single 22:20000001-21000000 216155 C +rs78061232 single 22:20000001-21000000 216386 T +rs74841242 single 22:20000001-21000000 216455 C +rs150727039 deletion 22:20000001-21000000 216665 1 +rs115687216 single 22:20000001-21000000 216714 A +rs114970991 single 22:20000001-21000000 216783 T +rs77569499 single 22:20000001-21000000 216858 G +rs60594683 single 22:20000001-21000000 217202 A +rs854966 single 22:20000001-21000000 217339 A +rs854967 single 22:20000001-21000000 217627 C +rs62219686 single 22:20000001-21000000 217679 T +rs73877189 single 22:20000001-21000000 217797 T +rs113635983 single 22:20000001-21000000 217867 A +rs77924361 single 22:20000001-21000000 217938 A +rs55974715 single 22:20000001-21000000 218133 A +rs9606290 single 22:20000001-21000000 218236 A +rs56310064 single 22:20000001-21000000 218259 A +rs59497188 single 22:20000001-21000000 218375 A +rs73877191 single 22:20000001-21000000 218517 A +rs73877191 single 22:20000001-21000000 218517 C +rs9618754 single 22:20000001-21000000 218606 T +rs73152989 single 22:20000001-21000000 218751 A +rs854968 single 22:20000001-21000000 218996 A +rs1210818 single 22:20000001-21000000 219171 G +rs62219688 single 22:20000001-21000000 219479 C +rs73394049 single 22:20000001-21000000 219509 T +rs74404487 single 22:20000001-21000000 219570 A +rs117703390 single 22:20000001-21000000 219640 A +rs79892278 single 22:20000001-21000000 219914 A +rs1076449 single 22:20000001-21000000 220110 G +rs2871060 single 22:20000001-21000000 220165 C +rs696882 single 22:20000001-21000000 220336 C +rs113263410 single 22:20000001-21000000 220419 C +rs73877195 single 22:20000001-21000000 220480 T +rs5993935 single 22:20000001-21000000 220763 C +rs854969 single 22:20000001-21000000 220988 A +rs854970 single 22:20000001-21000000 221556 A +rs117600433 single 22:20000001-21000000 221797 A +rs13340092 single 22:20000001-21000000 221826 C +rs9618755 single 22:20000001-21000000 221855 T +rs855073 single 22:20000001-21000000 221962 T +rs10212079 single 22:20000001-21000000 222104 G +rs77601027 single 22:20000001-21000000 222475 G +rs9618756 single 22:20000001-21000000 222622 G +rs113434912 single 22:20000001-21000000 222914 G +rs855075 single 22:20000001-21000000 223024 T +rs114024471 single 22:20000001-21000000 223207 T +rs370354715 insertion 22:20000001-21000000 223341 CCGGCAGGCTACCAA +rs56729142 single 22:20000001-21000000 223468 T +rs78851436 single 22:20000001-21000000 223507 T +rs2871061 single 22:20000001-21000000 224121 T +rs138074275 insertion 22:20000001-21000000 224682 AA +rs116399860 single 22:20000001-21000000 224822 T +rs114153637 single 22:20000001-21000000 224895 A +rs16983133 single 22:20000001-21000000 224980 C +rs78658262 single 22:20000001-21000000 225043 G +rs35400391 insertion 22:20000001-21000000 225143 G +rs112428913 single 22:20000001-21000000 225275 A +rs112428913 single 22:20000001-21000000 225275 T +rs759577 single 22:20000001-21000000 225535 G +rs11913543 single 22:20000001-21000000 225625 C +rs73394077 single 22:20000001-21000000 225748 A +rs73152993 single 22:20000001-21000000 225781 A +rs113186711 single 22:20000001-21000000 226003 A +rs13433573 single 22:20000001-21000000 226075 C +rs115769370 single 22:20000001-21000000 226111 T +rs701431 single 22:20000001-21000000 226454 C +rs115157974 single 22:20000001-21000000 226530 C +rs117480194 single 22:20000001-21000000 226682 T +rs117775417 single 22:20000001-21000000 226788 T +rs80024082 single 22:20000001-21000000 226948 A +rs147191054 insertion 22:20000001-21000000 227090 A +rs13433575 single 22:20000001-21000000 227383 C +rs701430 single 22:20000001-21000000 227550 G +rs73877201 single 22:20000001-21000000 227594 A +rs5993937 single 22:20000001-21000000 227954 G +rs5993937 single 22:20000001-21000000 227954 T +rs701429 single 22:20000001-21000000 228464 T +rs887764 single 22:20000001-21000000 228486 A +rs701428 single 22:20000001-21000000 228541 G +rs114995292 single 22:20000001-21000000 228593 A +rs34031696 single 22:20000001-21000000 228763 A +rs114960593 single 22:20000001-21000000 228826 A +rs74315508 single 22:20000001-21000000 230300 A +rs115511482 single 22:20000001-21000000 231077 T +rs114444096 single 22:20000001-21000000 231544 G +rs149364449 single 22:20000001-21000000 231859 T +rs80121779 single 22:20000001-21000000 232603 A +rs9618759 single 22:20000001-21000000 232662 G +rs34909867 single 22:20000001-21000000 233230 T +rs701427 single 22:20000001-21000000 233267 C +rs854971 single 22:20000001-21000000 233428 C +rs79189372 single 22:20000001-21000000 233538 T +rs117611205 single 22:20000001-21000000 233745 A +rs9606293 single 22:20000001-21000000 233864 A +rs189652039 single 22:20000001-21000000 234090 C +rs74868504 single 22:20000001-21000000 234111 A +rs9606294 single 22:20000001-21000000 234524 T +rs143133500 single 22:20000001-21000000 234617 T +rs77225097 single 22:20000001-21000000 234640 T +rs116703716 single 22:20000001-21000000 234751 C +rs696881 single 22:20000001-21000000 234812 G +rs696880 single 22:20000001-21000000 235199 A +rs140464581 insertion 22:20000001-21000000 235221 A +rs114946480 single 22:20000001-21000000 235498 T +rs150607319 single 22:20000001-21000000 235671 A +rs149419362 single 22:20000001-21000000 235786 A +rs17818126 single 22:20000001-21000000 235972 A +rs117664735 single 22:20000001-21000000 236086 C +rs7290335 single 22:20000001-21000000 236436 A +rs9618761 single 22:20000001-21000000 236631 C +rs111745046 single 22:20000001-21000000 236829 A +rs145052327 single 22:20000001-21000000 236913 G +rs116786539 single 22:20000001-21000000 237050 A +rs79002492 single 22:20000001-21000000 237104 C +rs117333152 single 22:20000001-21000000 237238 C +rs117725102 single 22:20000001-21000000 237329 T +rs145579679 single 22:20000001-21000000 237533 C +rs144718685 insertion 22:20000001-21000000 238771 A +rs112525813 single 22:20000001-21000000 238917 A +rs855077 single 22:20000001-21000000 239080 T +rs200367744 single 22:20000001-21000000 239107 T +rs566415111 insertion 22:20000001-21000000 239429 CAT +rs78143546 single 22:20000001-21000000 239805 G +rs7292462 single 22:20000001-21000000 240188 G +rs701424 single 22:20000001-21000000 240230 T +rs116729469 single 22:20000001-21000000 240841 A +rs1005863 single 22:20000001-21000000 240998 A +rs1567871 single 22:20000001-21000000 241159 T +rs11333422 deletion 22:20000001-21000000 241367 1 +rs7287730 single 22:20000001-21000000 241435 A +rs370890149 single 22:20000001-21000000 241478 T +rs117998532 single 22:20000001-21000000 241619 A +rs142238138 single 22:20000001-21000000 241922 A +rs76826266 single 22:20000001-21000000 242484 C +rs116561438 single 22:20000001-21000000 242521 G +rs117877353 single 22:20000001-21000000 242647 A +rs112291347 single 22:20000001-21000000 242983 G +rs701423 single 22:20000001-21000000 243304 T +rs115304055 single 22:20000001-21000000 243404 A +rs116881746 single 22:20000001-21000000 243651 A +rs117285556 single 22:20000001-21000000 243756 C +rs78055866 single 22:20000001-21000000 244258 T +rs115714416 single 22:20000001-21000000 244550 A +rs9606296 single 22:20000001-21000000 244615 T +rs9606297 single 22:20000001-21000000 244722 T +rs8136268 single 22:20000001-21000000 244814 A +rs4819879 single 22:20000001-21000000 245173 C +rs117060868 single 22:20000001-21000000 245539 G +rs76149327 single 22:20000001-21000000 245733 C +rs701421 single 22:20000001-21000000 246080 A +rs115321896 single 22:20000001-21000000 246199 C +rs58754659 single 22:20000001-21000000 246446 T +rs79810059 single 22:20000001-21000000 246485 T +rs115015860 single 22:20000001-21000000 246618 T +rs115307663 single 22:20000001-21000000 246683 G +rs74933079 single 22:20000001-21000000 247271 C +rs854927 single 22:20000001-21000000 247857 C +rs117839889 single 22:20000001-21000000 247996 T +rs73879307 single 22:20000001-21000000 248172 G +rs73879307 single 22:20000001-21000000 248172 T +rs855050 single 22:20000001-21000000 248390 A +rs855051 single 22:20000001-21000000 248712 G +rs114406286 single 22:20000001-21000000 249037 A +rs79829870 single 22:20000001-21000000 249099 A +rs855052 single 22:20000001-21000000 249420 A +rs855053 single 22:20000001-21000000 249550 C +rs148220813 single 22:20000001-21000000 249580 G +rs148220813 single 22:20000001-21000000 249580 T +rs115273511 single 22:20000001-21000000 249685 G +rs9617866 single 22:20000001-21000000 249716 T +rs146194573 single 22:20000001-21000000 249775 C +rs855054 single 22:20000001-21000000 249806 A +rs1807466 single 22:20000001-21000000 249917 C +rs142689515 single 22:20000001-21000000 250043 C +rs12170447 single 22:20000001-21000000 250546 A +rs12171082 single 22:20000001-21000000 250745 G +rs855055 single 22:20000001-21000000 250918 T +rs12166302 single 22:20000001-21000000 251015 T +rs3970485 single 22:20000001-21000000 251062 C +rs701420 single 22:20000001-21000000 251085 T +rs5748568 single 22:20000001-21000000 251311 C +rs573917653 insertion 22:20000001-21000000 251540 AAAG +rs12166236 single 22:20000001-21000000 251611 A +rs12166774 single 22:20000001-21000000 251632 C +rs73153000 single 22:20000001-21000000 251924 A +rs854928 single 22:20000001-21000000 251953 A +rs116957492 single 22:20000001-21000000 252116 A +rs8143104 single 22:20000001-21000000 252337 G +rs112535005 single 22:20000001-21000000 252398 G +rs855056 single 22:20000001-21000000 252511 A +rs113463158 single 22:20000001-21000000 253199 T +rs141339830 single 22:20000001-21000000 253459 T +rs145089296 single 22:20000001-21000000 253552 A +rs114446548 single 22:20000001-21000000 254194 G +rs7293034 single 22:20000001-21000000 254514 A +rs9617867 single 22:20000001-21000000 254539 G +rs854929 single 22:20000001-21000000 255019 C +rs112035786 single 22:20000001-21000000 255096 T +rs701417 single 22:20000001-21000000 255159 A +rs117475224 single 22:20000001-21000000 255247 A +rs367897714 single 22:20000001-21000000 255354 T +rs117774209 single 22:20000001-21000000 255980 A +rs9618765 single 22:20000001-21000000 256309 T +rs2871065 single 22:20000001-21000000 256803 G +rs3960058 single 22:20000001-21000000 256975 C +rs5993943 single 22:20000001-21000000 257028 A +rs187924471 single 22:20000001-21000000 257235 A +rs12330099 single 22:20000001-21000000 257349 T +rs150288217 single 22:20000001-21000000 258002 T +rs2102667 single 22:20000001-21000000 258144 T +rs34141746 single 22:20000001-21000000 258193 T +rs854930 single 22:20000001-21000000 258410 G +rs116978178 single 22:20000001-21000000 258518 C +rs111641237 single 22:20000001-21000000 258579 T +rs854931 single 22:20000001-21000000 258622 A +rs9617868 single 22:20000001-21000000 258889 T +rs117781464 single 22:20000001-21000000 258969 T +rs113277925 single 22:20000001-21000000 259325 G +rs3091371 insertion 22:20000001-21000000 260111 C +rs3091371 insertion 22:20000001-21000000 260111 T +rs150603492 single 22:20000001-21000000 260244 A +rs71776343 deletion 22:20000001-21000000 260558 9 +rs11350657 deletion 22:20000001-21000000 260731 1 +rs854936 single 22:20000001-21000000 260813 T +rs854937 single 22:20000001-21000000 261080 G +rs62219713 single 22:20000001-21000000 261302 T +rs113297045 single 22:20000001-21000000 261585 T +rs192910773 single 22:20000001-21000000 261686 T +rs854939 single 22:20000001-21000000 261822 G +rs855060 single 22:20000001-21000000 262165 A +rs189886717 single 22:20000001-21000000 262218 T +rs13433533 single 22:20000001-21000000 262338 C +rs565237655 deletion 22:20000001-21000000 262786 1 +rs854940 single 22:20000001-21000000 262852 C +rs36076240 single 22:20000001-21000000 263017 G +rs182008760 single 22:20000001-21000000 263099 T +rs73879318 single 22:20000001-21000000 263212 A +rs150341931 single 22:20000001-21000000 263243 A +rs113436252 deletion 22:20000001-21000000 263332 4 +rs854941 single 22:20000001-21000000 263506 A +rs114029464 single 22:20000001-21000000 263547 T +rs11297569 deletion 22:20000001-21000000 263624 1 +rs187916289 single 22:20000001-21000000 263964 G +rs5993945 single 22:20000001-21000000 263986 C +rs117523061 single 22:20000001-21000000 264055 C +rs854942 single 22:20000001-21000000 264100 A +rs114123407 single 22:20000001-21000000 264151 A +rs114123407 single 22:20000001-21000000 264151 C +rs113787821 single 22:20000001-21000000 264295 A +rs113072881 single 22:20000001-21000000 264325 T +rs151078850 single 22:20000001-21000000 264349 C +rs149903682 single 22:20000001-21000000 264474 A +rs71186647 insertion 22:20000001-21000000 264547 GGTGAGAGGTGTG +rs854944 single 22:20000001-21000000 264936 A +rs112732937 single 22:20000001-21000000 264962 A +rs13433673 single 22:20000001-21000000 265762 G +rs854946 single 22:20000001-21000000 265869 A +rs200714027 single 22:20000001-21000000 265917 A +rs2110151 single 22:20000001-21000000 266002 A +rs566429444 single 22:20000001-21000000 266170 A +rs854947 single 22:20000001-21000000 266353 A +rs854948 single 22:20000001-21000000 266408 T +rs9618767 single 22:20000001-21000000 266471 A +rs701416 single 22:20000001-21000000 266527 C +rs854949 single 22:20000001-21000000 266796 C +rs56211463 insertion 22:20000001-21000000 267023 C +rs855061 single 22:20000001-21000000 267212 A +rs854950 single 22:20000001-21000000 267270 A +rs142729824 single 22:20000001-21000000 267387 A +rs854951 single 22:20000001-21000000 267556 T +rs701415 single 22:20000001-21000000 267688 A +rs191141480 single 22:20000001-21000000 267868 T +rs9618769 single 22:20000001-21000000 268133 C +rs9618770 single 22:20000001-21000000 268211 A +rs11705685 single 22:20000001-21000000 268296 T +rs701414 single 22:20000001-21000000 268482 T +rs143271913 single 22:20000001-21000000 268543 C +rs11705580 single 22:20000001-21000000 268611 A +rs854953 single 22:20000001-21000000 268775 A +rs854954 single 22:20000001-21000000 269134 A +rs201157197 insertion 22:20000001-21000000 269198 TA +rs151068286 single 22:20000001-21000000 269702 T +rs854955 single 22:20000001-21000000 269741 T +rs67659900 single 22:20000001-21000000 270249 C +rs9606308 single 22:20000001-21000000 270366 T +rs5993947 single 22:20000001-21000000 270849 T +rs147109067 single 22:20000001-21000000 271208 T +rs5993948 single 22:20000001-21000000 271286 A +rs113581498 single 22:20000001-21000000 272062 A +rs112262759 single 22:20000001-21000000 272252 C +rs701413 single 22:20000001-21000000 272792 G +rs9617870 single 22:20000001-21000000 272894 T +rs854957 single 22:20000001-21000000 273125 A +rs701412 single 22:20000001-21000000 273403 G +rs12167477 single 22:20000001-21000000 273440 A +rs12167520 single 22:20000001-21000000 273476 A +rs148753980 single 22:20000001-21000000 273519 C +rs854958 single 22:20000001-21000000 273576 C +rs854958 single 22:20000001-21000000 273576 G +rs115983387 single 22:20000001-21000000 273957 T +rs116389213 single 22:20000001-21000000 274082 A +rs150248016 single 22:20000001-21000000 274187 A +rs34726340 single 22:20000001-21000000 274497 A +rs701411 single 22:20000001-21000000 274610 G +rs115713971 single 22:20000001-21000000 274788 T +rs184316207 single 22:20000001-21000000 274847 T +rs369960903 single 22:20000001-21000000 274868 A +rs369960903 single 22:20000001-21000000 274868 G +rs854959 single 22:20000001-21000000 275266 A +rs12152120 single 22:20000001-21000000 275367 T +rs9606311 single 22:20000001-21000000 275409 C +rs1640384 single 22:20000001-21000000 275455 A +rs55851392 single 22:20000001-21000000 275635 A +rs142456498 single 22:20000001-21000000 275661 G +rs9606313 single 22:20000001-21000000 275684 A +rs35275474 deletion 22:20000001-21000000 275728 1 +rs140558888 single 22:20000001-21000000 275752 C +rs855062 single 22:20000001-21000000 276094 A +rs854960 single 22:20000001-21000000 276383 G +rs147392519 single 22:20000001-21000000 276527 T +rs117897640 single 22:20000001-21000000 276600 G +rs145440008 single 22:20000001-21000000 276882 T +rs9606315 single 22:20000001-21000000 276947 A +rs9606315 single 22:20000001-21000000 276947 T +rs112744194 single 22:20000001-21000000 277053 T +rs855063 single 22:20000001-21000000 277112 C +rs145876893 single 22:20000001-21000000 277324 T +rs146603621 single 22:20000001-21000000 277482 A +rs854961 single 22:20000001-21000000 278003 G +rs118187598 single 22:20000001-21000000 278035 A +rs138647324 single 22:20000001-21000000 278071 G +rs79026442 single 22:20000001-21000000 278378 G +rs5748573 single 22:20000001-21000000 278470 G +rs855064 single 22:20000001-21000000 278528 A +rs5993951 single 22:20000001-21000000 278734 T +rs79510620 single 22:20000001-21000000 278915 A +rs9617872 single 22:20000001-21000000 278952 T +rs9618775 single 22:20000001-21000000 279127 C +rs12628408 single 22:20000001-21000000 279158 A +rs12168751 single 22:20000001-21000000 279532 A +rs12169202 single 22:20000001-21000000 279717 T +rs116156996 single 22:20000001-21000000 279814 C +rs113821366 single 22:20000001-21000000 280438 A +rs75341343 single 22:20000001-21000000 280633 T +rs5993952 single 22:20000001-21000000 280781 G +rs532736304 single 22:20000001-21000000 281054 C +rs57059797 single 22:20000001-21000000 281260 T +rs112314714 single 22:20000001-21000000 281340 T +rs9618776 single 22:20000001-21000000 281852 C +rs7287659 single 22:20000001-21000000 282242 C +rs5993953 single 22:20000001-21000000 282494 T +rs28483521 single 22:20000001-21000000 282728 C +rs144106158 deletion 22:20000001-21000000 282774 2 +rs5993954 single 22:20000001-21000000 282885 T +rs75220018 single 22:20000001-21000000 282926 C +rs5993955 single 22:20000001-21000000 283227 T +rs141153949 insertion 22:20000001-21000000 283373 T +rs11913556 single 22:20000001-21000000 283417 A +rs17757179 single 22:20000001-21000000 283559 C +rs570173676 deletion 22:20000001-21000000 283700 1 +rs12628656 single 22:20000001-21000000 283731 A +rs55936357 single 22:20000001-21000000 283995 A +rs111740045 single 22:20000001-21000000 284026 A +rs9617874 single 22:20000001-21000000 284604 A +rs114718020 single 22:20000001-21000000 286069 T +rs741413 single 22:20000001-21000000 286098 G +rs141109831 single 22:20000001-21000000 286423 T +rs202027003 deletion 22:20000001-21000000 286543 1 +rs712977 single 22:20000001-21000000 287089 G +rs184027097 single 22:20000001-21000000 287282 G +rs566802242 single 22:20000001-21000000 287873 G +rs375965193 single 22:20000001-21000000 288096 A +rs187389825 single 22:20000001-21000000 288767 A +rs117680284 single 22:20000001-21000000 289158 T +rs111245247 single 22:20000001-21000000 289197 T +rs73155116 single 22:20000001-21000000 289237 T +rs545001354 insertion 22:20000001-21000000 289541 T +rs138285111 single 22:20000001-21000000 289781 T +rs71683800 deletion 22:20000001-21000000 290385 5 +rs56185412 single 22:20000001-21000000 290948 A +rs56306832 single 22:20000001-21000000 291029 C +rs3970483 single 22:20000001-21000000 291258 T +rs548744927 single 22:20000001-21000000 291352 G +rs548744927 single 22:20000001-21000000 291352 T +rs9618779 single 22:20000001-21000000 292392 A +rs113465869 single 22:20000001-21000000 293303 C +rs2904577 single 22:20000001-21000000 293624 A +rs2241306 single 22:20000001-21000000 294618 A +rs7286515 single 22:20000001-21000000 294866 G +rs199755922 single 22:20000001-21000000 295378 T +rs139500366 single 22:20000001-21000000 295413 C +rs144421249 single 22:20000001-21000000 295453 C +rs148492600 single 22:20000001-21000000 295525 A +rs5993962 single 22:20000001-21000000 295577 G +rs738076 single 22:20000001-21000000 296320 A +rs4995261 single 22:20000001-21000000 296539 C +rs12166558 single 22:20000001-21000000 297309 T +rs9605096 single 22:20000001-21000000 298057 T +rs373572922 single 22:20000001-21000000 298190 A +rs373572922 single 22:20000001-21000000 298190 C +rs112074972 single 22:20000001-21000000 298514 G +rs112086538 single 22:20000001-21000000 299117 G +rs113848355 single 22:20000001-21000000 299414 A +rs111505542 single 22:20000001-21000000 300664 A +rs111505542 single 22:20000001-21000000 300664 T +rs145639142 single 22:20000001-21000000 300737 C +rs141053532 single 22:20000001-21000000 301345 G +rs3890996 single 22:20000001-21000000 301799 G +rs1056804 single 22:20000001-21000000 302275 C +rs1056804 single 22:20000001-21000000 302275 T +rs183700583 single 22:20000001-21000000 302403 T +rs113468394 single 22:20000001-21000000 303663 A +rs5993964 single 22:20000001-21000000 303853 A +rs111772448 single 22:20000001-21000000 304105 A +rs117526563 single 22:20000001-21000000 304706 T +rs397844026 single 22:20000001-21000000 304819 A +rs12627754 single 22:20000001-21000000 305356 G +rs36033784 deletion 22:20000001-21000000 305986 2 +rs397788332 single 22:20000001-21000000 306277 G +rs2280116 single 22:20000001-21000000 306715 T +rs2280117 single 22:20000001-21000000 306992 T +rs7235 single 22:20000001-21000000 307255 A +rs142800099 insertion 22:20000001-21000000 307346 C +rs190235761 single 22:20000001-21000000 307519 T +rs113667770 single 22:20000001-21000000 307602 A +rs61587716 single 22:20000001-21000000 308278 A +rs1210828 single 22:20000001-21000000 308750 A +rs1210829 single 22:20000001-21000000 308799 A +rs1210830 single 22:20000001-21000000 308928 G +rs55828031 single 22:20000001-21000000 309072 A +rs752689 single 22:20000001-21000000 309392 G +rs146050228 deletion 22:20000001-21000000 309870 4 +rs7410411 single 22:20000001-21000000 311135 T +rs901789 single 22:20000001-21000000 311424 A +rs901790 single 22:20000001-21000000 311452 T +rs370525522 single 22:20000001-21000000 312435 C +rs370378383 deletion 22:20000001-21000000 312722 1 +rs550697009 single 22:20000001-21000000 312944 C +rs56407284 single 22:20000001-21000000 313298 G +rs200183474 insertion 22:20000001-21000000 314124 G +rs539415100 single 22:20000001-21000000 314156 C +rs35488903 single 22:20000001-21000000 315928 T +rs535072475 deletion 22:20000001-21000000 316072 1 +rs557642247 insertion 22:20000001-21000000 316336 T +rs9618787 single 22:20000001-21000000 317093 A +rs201255184 insertion 22:20000001-21000000 317798 TATT +rs28595448 single 22:20000001-21000000 318161 A +rs558372232 deletion 22:20000001-21000000 320012 4 +rs375452397 single 22:20000001-21000000 320228 A +rs138497664 single 22:20000001-21000000 320730 G +rs9605099 single 22:20000001-21000000 320762 G +rs367872441 single 22:20000001-21000000 320937 G +rs200930121 single 22:20000001-21000000 321015 C +rs200325927 single 22:20000001-21000000 321214 G +rs5742509 single 22:20000001-21000000 321400 C +rs5742536 single 22:20000001-21000000 321578 A +rs8139497 single 22:20000001-21000000 321888 G +rs369341018 single 22:20000001-21000000 322873 C +rs368454934 deletion 22:20000001-21000000 322911 1 +rs536650213 single 22:20000001-21000000 323146 A +rs573996925 deletion 22:20000001-21000000 323186 1 +rs6518604 single 22:20000001-21000000 323210 A +rs62217987 single 22:20000001-21000000 323741 T +rs199719896 insertion 22:20000001-21000000 323978 TATATGATAGAGATTATATACAATA +rs191287568 single 22:20000001-21000000 324162 T +rs111220727 single 22:20000001-21000000 324415 A +rs111220727 single 22:20000001-21000000 324415 T +rs62651454 single 22:20000001-21000000 324437 C +rs376032434 deletion 22:20000001-21000000 324547 2 +rs374108049 insertion 22:20000001-21000000 324681 TATATC +rs56651470 single 22:20000001-21000000 324731 A +rs143971653 single 22:20000001-21000000 324890 C +rs143502836 deletion 22:20000001-21000000 324916 5 +rs372011139 deletion 22:20000001-21000000 324962 17 +rs7511281 single 22:20000001-21000000 324991 G +rs7511299 single 22:20000001-21000000 325094 A +rs201125528 single 22:20000001-21000000 325376 C +rs28411685 single 22:20000001-21000000 325424 A +rs9618790 single 22:20000001-21000000 325848 A +rs9618793 single 22:20000001-21000000 325905 A +rs9606328 single 22:20000001-21000000 326524 G +rs533988831 single 22:20000001-21000000 326856 C +rs577032639 single 22:20000001-21000000 326969 A +rs9606332 single 22:20000001-21000000 328086 A +rs542506597 single 22:20000001-21000000 328784 T +rs572559485 single 22:20000001-21000000 328841 G +rs9618801 single 22:20000001-21000000 329382 G +rs201353123 single 22:20000001-21000000 329425 T +rs60443210 single 22:20000001-21000000 330512 G +rs60443210 single 22:20000001-21000000 330512 T +rs9617876 single 22:20000001-21000000 330743 T +rs529255526 single 22:20000001-21000000 332452 G +rs370283224 single 22:20000001-21000000 333875 C +rs62218051 single 22:20000001-21000000 334969 G +rs62218052 single 22:20000001-21000000 335224 G +rs377551426 single 22:20000001-21000000 335814 T +rs201990883 single 22:20000001-21000000 339616 T +rs62218058 single 22:20000001-21000000 339653 G +rs571632636 single 22:20000001-21000000 340608 A +rs200279950 single 22:20000001-21000000 341137 A +rs553506061 single 22:20000001-21000000 341429 A +rs62218577 single 22:20000001-21000000 341960 T +rs12167504 single 22:20000001-21000000 342117 C +rs12168181 single 22:20000001-21000000 342287 C +rs371394196 single 22:20000001-21000000 342335 C +rs543306940 single 22:20000001-21000000 342363 A +rs373507807 single 22:20000001-21000000 342573 T +rs199634801 single 22:20000001-21000000 343214 T +rs542550911 single 22:20000001-21000000 345875 A +rs532764825 single 22:20000001-21000000 349567 T +rs201169033 single 22:20000001-21000000 350074 T +rs575276568 single 22:20000001-21000000 351051 A +rs560936846 single 22:20000001-21000000 351328 T +rs200564038 single 22:20000001-21000000 351703 C +rs199730484 single 22:20000001-21000000 351791 G +rs113612733 single 22:20000001-21000000 351818 T +rs535538762 single 22:20000001-21000000 351941 G +rs377011547 single 22:20000001-21000000 353257 A +rs77014131 single 22:20000001-21000000 353415 A +rs77014131 single 22:20000001-21000000 353415 C +rs77014131 single 22:20000001-21000000 353415 T +rs554377674 single 22:20000001-21000000 353474 A +rs572346121 single 22:20000001-21000000 353795 T +rs140562723 single 22:20000001-21000000 353878 G +rs555841649 insertion 22:20000001-21000000 357933 CACTTT +rs200470960 deletion 22:20000001-21000000 358724 1 +rs3962691 single 22:20000001-21000000 360855 A +rs143901841 single 22:20000001-21000000 360880 A +rs556586210 single 22:20000001-21000000 360927 C +rs576544144 single 22:20000001-21000000 360973 T +rs112091896 single 22:20000001-21000000 361253 T +rs184623332 single 22:20000001-21000000 361276 A +rs116539977 single 22:20000001-21000000 361374 A +rs542543351 single 22:20000001-21000000 361454 G +rs140439240 single 22:20000001-21000000 361620 C +rs140439240 single 22:20000001-21000000 361620 G +rs527489294 single 22:20000001-21000000 362071 G +rs569673765 single 22:20000001-21000000 362877 G +rs572271810 single 22:20000001-21000000 363672 A +rs578023590 single 22:20000001-21000000 363875 T +rs201054810 single 22:20000001-21000000 366366 C +rs199549402 single 22:20000001-21000000 366460 A +rs527456804 single 22:20000001-21000000 366489 A +rs201511466 single 22:20000001-21000000 367250 G +rs5997420 single 22:20000001-21000000 367780 C +rs62218637 single 22:20000001-21000000 368019 A +rs202041794 single 22:20000001-21000000 368168 A +rs372514025 single 22:20000001-21000000 368263 T +rs75792497 single 22:20000001-21000000 368386 G +rs373752216 single 22:20000001-21000000 368441 G +rs111275698 single 22:20000001-21000000 369106 A +rs552051382 single 22:20000001-21000000 369981 A +rs537663619 single 22:20000001-21000000 370619 C +rs2542835 single 22:20000001-21000000 371296 G +rs116228135 single 22:20000001-21000000 374714 T +rs2845468 single 22:20000001-21000000 375541 C +rs375818636 single 22:20000001-21000000 375603 C +rs2845469 single 22:20000001-21000000 375665 A +rs144356941 single 22:20000001-21000000 375743 G +rs2542838 single 22:20000001-21000000 376407 G +rs2542839 single 22:20000001-21000000 376435 G +rs564004426 insertion 22:20000001-21000000 376498 A +rs531229763 single 22:20000001-21000000 376706 A +rs383405 single 22:20000001-21000000 377591 A +rs422893 single 22:20000001-21000000 377760 T +rs2909302 single 22:20000001-21000000 378592 G +rs372269496 single 22:20000001-21000000 378807 C +rs376605814 single 22:20000001-21000000 378834 C +rs547549857 insertion 22:20000001-21000000 378890 CTA +rs55698413 single 22:20000001-21000000 378964 C +rs56006201 single 22:20000001-21000000 379065 T +rs199961704 insertion 22:20000001-21000000 380315 T +rs201171045 insertion 22:20000001-21000000 380615 AT +rs200663001 deletion 22:20000001-21000000 381347 1 +rs192854033 single 22:20000001-21000000 381639 T +rs577338017 single 22:20000001-21000000 382385 C +rs567789504 insertion 22:20000001-21000000 382665 A +rs554712972 single 22:20000001-21000000 383711 A +rs546518638 single 22:20000001-21000000 383800 T +rs576233803 single 22:20000001-21000000 383862 A +rs572081719 single 22:20000001-21000000 384063 G +rs2629347 single 22:20000001-21000000 384360 C +rs556166906 deletion 22:20000001-21000000 384521 2 +rs371888151 single 22:20000001-21000000 385201 G +rs189445532 single 22:20000001-21000000 385440 T +rs146242498 deletion 22:20000001-21000000 385739 4 +rs200418297 single 22:20000001-21000000 385844 T +rs2261264 single 22:20000001-21000000 386499 C +rs569353582 single 22:20000001-21000000 386699 C +rs114530820 single 22:20000001-21000000 386906 A +rs202117096 single 22:20000001-21000000 387159 A +rs202117096 single 22:20000001-21000000 387159 T +rs574087712 single 22:20000001-21000000 387275 G +rs533039262 single 22:20000001-21000000 388261 A +rs9606381 single 22:20000001-21000000 388690 C +rs201856114 deletion 22:20000001-21000000 389997 1 +rs9618819 single 22:20000001-21000000 390062 C +rs1892849 single 22:20000001-21000000 390125 T +rs534652005 single 22:20000001-21000000 390291 A +rs557930232 single 22:20000001-21000000 390327 C +rs180958865 single 22:20000001-21000000 390416 A +rs1892850 single 22:20000001-21000000 390443 G +rs528776917 single 22:20000001-21000000 390489 G +rs2629355 single 22:20000001-21000000 390720 G +rs536778745 single 22:20000001-21000000 390983 T +rs534872361 single 22:20000001-21000000 391760 A +rs2629356 single 22:20000001-21000000 391787 A +rs2629357 single 22:20000001-21000000 391936 C +rs543992948 insertion 22:20000001-21000000 393004 A +rs375588630 single 22:20000001-21000000 393203 C +rs375966002 single 22:20000001-21000000 393296 A +rs369000751 single 22:20000001-21000000 393474 A +rs62218654 single 22:20000001-21000000 393584 C +rs575999723 single 22:20000001-21000000 393786 T +rs200092583 single 22:20000001-21000000 394493 A +rs457751 single 22:20000001-21000000 394525 T +rs2508168 single 22:20000001-21000000 394629 A +rs185513629 single 22:20000001-21000000 394784 C +rs561004990 single 22:20000001-21000000 394993 G +rs147643879 single 22:20000001-21000000 395127 C +rs554639305 single 22:20000001-21000000 396583 T +rs66752705 single 22:20000001-21000000 396613 A +rs560814968 deletion 22:20000001-21000000 397513 1 +rs562662261 single 22:20000001-21000000 397926 A +rs576159895 single 22:20000001-21000000 398210 G +rs2629364 single 22:20000001-21000000 398237 G +rs376186645 single 22:20000001-21000000 399410 G +rs9605122 single 22:20000001-21000000 399867 G +rs9606389 single 22:20000001-21000000 399958 C +rs188751969 single 22:20000001-21000000 400015 A +rs62219187 single 22:20000001-21000000 400163 A +rs9605123 single 22:20000001-21000000 400370 A +rs567926851 single 22:20000001-21000000 400909 C +rs143231396 single 22:20000001-21000000 400953 A +rs28473258 single 22:20000001-21000000 401065 C +rs2629366 single 22:20000001-21000000 401089 T +rs9606391 single 22:20000001-21000000 401418 T +rs3016141 single 22:20000001-21000000 401718 T +rs9617890 single 22:20000001-21000000 402304 A +rs470943 single 22:20000001-21000000 402340 G +rs9617891 single 22:20000001-21000000 402386 A +rs73382204 single 22:20000001-21000000 402649 G +rs558135405 single 22:20000001-21000000 402933 A +rs369085249 single 22:20000001-21000000 403012 T +rs2101297 single 22:20000001-21000000 406215 C +rs532784199 single 22:20000001-21000000 406576 A +rs2019782 single 22:20000001-21000000 406629 G +rs529596108 single 22:20000001-21000000 406793 G +rs2020423 single 22:20000001-21000000 407332 G +rs571639553 single 22:20000001-21000000 409610 G +rs182525195 single 22:20000001-21000000 412485 C +rs113561534 single 22:20000001-21000000 412902 G +rs62219190 single 22:20000001-21000000 412981 G +rs4820055 single 22:20000001-21000000 413859 G +rs552031623 single 22:20000001-21000000 414040 T +rs187945649 single 22:20000001-21000000 414558 T +rs149889172 single 22:20000001-21000000 414823 A +rs559773908 single 22:20000001-21000000 415426 A +rs2629370 single 22:20000001-21000000 418875 G +rs187688475 single 22:20000001-21000000 419418 T +rs9618826 single 22:20000001-21000000 421155 C +rs5998191 single 22:20000001-21000000 421529 T +rs201827947 single 22:20000001-21000000 421612 T +rs531144600 single 22:20000001-21000000 421896 C +rs140987377 single 22:20000001-21000000 423567 G +rs536392073 single 22:20000001-21000000 426623 G +rs539306598 single 22:20000001-21000000 430562 C +rs559314693 single 22:20000001-21000000 430624 A +rs9618830 single 22:20000001-21000000 431155 T +rs138803844 single 22:20000001-21000000 431657 C +rs141920510 single 22:20000001-21000000 432172 C +rs564821190 single 22:20000001-21000000 432450 G +rs189544432 single 22:20000001-21000000 432732 T +rs201838577 single 22:20000001-21000000 433012 A +rs2629371 single 22:20000001-21000000 433149 T +rs536612136 deletion 22:20000001-21000000 434061 1 +rs201046218 single 22:20000001-21000000 436776 T +rs181600012 single 22:20000001-21000000 437003 A +rs529922417 single 22:20000001-21000000 438732 C +rs538781968 single 22:20000001-21000000 439548 T +rs186367178 single 22:20000001-21000000 440172 A +rs561472877 deletion 22:20000001-21000000 442231 1 +rs556097218 single 22:20000001-21000000 445213 T +rs468131 single 22:20000001-21000000 447832 A +rs9617893 single 22:20000001-21000000 447985 C +rs9618833 single 22:20000001-21000000 448061 A +rs9618834 single 22:20000001-21000000 448123 G +rs7410700 single 22:20000001-21000000 448457 A +rs183002658 single 22:20000001-21000000 448806 G +rs139180081 single 22:20000001-21000000 449099 G +rs4051030 single 22:20000001-21000000 449383 G +rs532538945 deletion 22:20000001-21000000 450125 3 +rs112040170 single 22:20000001-21000000 450216 G +rs188048949 single 22:20000001-21000000 454581 T +rs374395444 single 22:20000001-21000000 456764 A +rs9617896 single 22:20000001-21000000 457169 T +rs199673858 single 22:20000001-21000000 457382 T +rs201532727 single 22:20000001-21000000 458028 C +rs374606390 insertion 22:20000001-21000000 458100 CACCTG +rs200600994 single 22:20000001-21000000 458332 G +rs12169145 single 22:20000001-21000000 462489 T +rs375703269 single 22:20000001-21000000 462548 T +rs142672399 single 22:20000001-21000000 463315 C +rs77517009 single 22:20000001-21000000 463374 T +rs201871245 single 22:20000001-21000000 464664 A +rs2629307 single 22:20000001-21000000 464947 C +rs2629308 single 22:20000001-21000000 465129 C +rs2553081 single 22:20000001-21000000 465274 G +rs2930745 single 22:20000001-21000000 465808 C +rs201056222 single 22:20000001-21000000 466460 T +rs76133228 single 22:20000001-21000000 466861 A +rs561093882 single 22:20000001-21000000 466889 C +rs187704075 single 22:20000001-21000000 466938 T +rs536711359 single 22:20000001-21000000 466992 T +rs199636404 deletion 22:20000001-21000000 467039 3 +rs536238319 single 22:20000001-21000000 467323 T +rs55764652 single 22:20000001-21000000 467392 A +rs201768123 single 22:20000001-21000000 467530 A +rs560115061 single 22:20000001-21000000 467779 C +rs375812371 single 22:20000001-21000000 468501 C +rs373903545 single 22:20000001-21000000 468976 T +rs529863292 single 22:20000001-21000000 469496 T +rs2629318 single 22:20000001-21000000 471235 C +rs2845431 single 22:20000001-21000000 471503 G +rs2629319 single 22:20000001-21000000 471783 C +rs62219206 single 22:20000001-21000000 471977 A +rs57602388 single 22:20000001-21000000 472040 T +rs5994322 single 22:20000001-21000000 472134 T +rs62219207 single 22:20000001-21000000 472215 C +rs2629323 single 22:20000001-21000000 472300 C +rs62219209 single 22:20000001-21000000 472630 A +rs546552263 single 22:20000001-21000000 473051 T +rs2845427 single 22:20000001-21000000 473417 G +rs62219232 single 22:20000001-21000000 474592 C +rs9606411 single 22:20000001-21000000 474991 C +rs2542873 single 22:20000001-21000000 475020 A +rs3016125 single 22:20000001-21000000 475049 C +rs62219233 single 22:20000001-21000000 475111 A +rs550030019 single 22:20000001-21000000 475718 T +rs5997671 single 22:20000001-21000000 477377 T +rs576983814 single 22:20000001-21000000 477544 T +rs200034603 single 22:20000001-21000000 477834 G +rs541420811 single 22:20000001-21000000 478132 C +rs143051943 single 22:20000001-21000000 478748 T +rs2909305 single 22:20000001-21000000 479021 C +rs112022569 single 22:20000001-21000000 479174 T +rs539220275 single 22:20000001-21000000 480562 A +rs575730762 single 22:20000001-21000000 480593 A +rs540645874 single 22:20000001-21000000 481178 G +rs543604580 single 22:20000001-21000000 481209 G +rs566201976 single 22:20000001-21000000 481341 A +rs570610441 single 22:20000001-21000000 481409 T +rs539769640 single 22:20000001-21000000 481431 C +rs578183428 single 22:20000001-21000000 482445 C +rs558825079 insertion 22:20000001-21000000 484308 GCC +rs200918691 single 22:20000001-21000000 484805 A +rs9606413 single 22:20000001-21000000 486325 A +rs369235919 single 22:20000001-21000000 486640 G +rs79270954 single 22:20000001-21000000 486835 G +rs371437107 single 22:20000001-21000000 486913 G +rs71317874 single 22:20000001-21000000 487030 T +rs138155637 single 22:20000001-21000000 487085 A +rs9618845 single 22:20000001-21000000 487366 T +rs9617903 single 22:20000001-21000000 488777 C +rs363894 single 22:20000001-21000000 488924 C +rs5753125 single 22:20000001-21000000 488961 C +rs200076771 single 22:20000001-21000000 489029 T +rs9606414 single 22:20000001-21000000 489120 T +rs9606417 single 22:20000001-21000000 489259 G +rs558886792 single 22:20000001-21000000 489342 A +rs5753121 single 22:20000001-21000000 489578 C +rs528381246 single 22:20000001-21000000 489711 T +rs936774 single 22:20000001-21000000 490446 G +rs73155195 single 22:20000001-21000000 490506 T +rs4820841 single 22:20000001-21000000 490660 C +rs553926870 single 22:20000001-21000000 491059 G +rs62219247 single 22:20000001-21000000 491455 T +rs559931919 single 22:20000001-21000000 491805 G +rs532822831 single 22:20000001-21000000 492201 T +rs370938883 single 22:20000001-21000000 493175 G +rs542330989 insertion 22:20000001-21000000 494052 A +rs188855964 single 22:20000001-21000000 499047 G +rs577428873 single 22:20000001-21000000 499295 C +rs368372863 single 22:20000001-21000000 499487 C +rs540442379 single 22:20000001-21000000 499838 T +rs576878656 single 22:20000001-21000000 499913 A +rs562068139 single 22:20000001-21000000 500041 T +rs568355827 single 22:20000001-21000000 502475 A +rs1659630 single 22:20000001-21000000 502770 G +rs111955416 single 22:20000001-21000000 502805 T +rs564740928 single 22:20000001-21000000 502869 T +rs5992222 single 22:20000001-21000000 503436 G +rs142043904 single 22:20000001-21000000 503487 A +rs376970331 single 22:20000001-21000000 505467 T +rs5747617 single 22:20000001-21000000 507900 C +rs368729527 single 22:20000001-21000000 508291 A +rs5746553 single 22:20000001-21000000 508424 C +rs9306237 single 22:20000001-21000000 508496 G +rs2930752 single 22:20000001-21000000 508530 T +rs62219967 single 22:20000001-21000000 509125 C +rs202184662 single 22:20000001-21000000 610426 C +rs113919133 insertion 22:20000001-21000000 612109 A +rs2260886 single 22:20000001-21000000 613309 A +rs573853982 single 22:20000001-21000000 614838 G +rs201295428 single 22:20000001-21000000 614899 T +rs547158110 single 22:20000001-21000000 617632 G +rs9618813 single 22:20000001-21000000 623121 T +rs76427616 single 22:20000001-21000000 623151 G +rs559622704 single 22:20000001-21000000 623185 G +rs369139964 single 22:20000001-21000000 624131 G +rs564451226 deletion 22:20000001-21000000 624380 1 +rs530398743 single 22:20000001-21000000 625022 T +rs567168109 single 22:20000001-21000000 625428 C +rs552355790 single 22:20000001-21000000 626105 G +rs374497719 single 22:20000001-21000000 626206 A +rs12484743 single 22:20000001-21000000 626659 G +rs75830540 deletion 22:20000001-21000000 626770 1 +rs2629382 single 22:20000001-21000000 627264 G +rs569181015 single 22:20000001-21000000 627494 A +rs372781346 single 22:20000001-21000000 627873 A +rs451661 single 22:20000001-21000000 628358 G +rs2570606 single 22:20000001-21000000 629049 C +rs9611247 single 22:20000001-21000000 629156 T +rs572567411 single 22:20000001-21000000 629224 T +rs9611301 single 22:20000001-21000000 629386 A +rs531484780 single 22:20000001-21000000 630499 A +rs568279229 single 22:20000001-21000000 630753 A +rs373865441 single 22:20000001-21000000 638849 A +rs555986999 single 22:20000001-21000000 639001 C +rs375699301 single 22:20000001-21000000 639051 T +rs144932868 single 22:20000001-21000000 639229 G +rs570343827 single 22:20000001-21000000 639270 C +rs368130601 single 22:20000001-21000000 639323 A +rs566351057 single 22:20000001-21000000 639367 A +rs544044522 single 22:20000001-21000000 639448 A +rs542999762 single 22:20000001-21000000 639527 T +rs62220045 single 22:20000001-21000000 639570 T +rs62220046 single 22:20000001-21000000 639628 T +rs552068448 single 22:20000001-21000000 639781 T +rs375158475 single 22:20000001-21000000 639873 C +rs369382441 single 22:20000001-21000000 639985 T +rs8139291 single 22:20000001-21000000 640043 T +rs371060663 single 22:20000001-21000000 640073 T +rs140752632 single 22:20000001-21000000 640153 G +rs376587890 single 22:20000001-21000000 640200 G +rs370859808 single 22:20000001-21000000 640229 A +rs62220047 single 22:20000001-21000000 640374 G +rs62220048 single 22:20000001-21000000 640410 G +rs62220049 single 22:20000001-21000000 640461 C +rs71232734 single 22:20000001-21000000 640547 C +rs12157795 single 22:20000001-21000000 640611 C +rs201246106 single 22:20000001-21000000 640634 T +rs199788739 single 22:20000001-21000000 640668 A +rs12159681 single 22:20000001-21000000 640731 G +rs568415142 single 22:20000001-21000000 640867 A +rs566777369 single 22:20000001-21000000 640900 C +rs575236172 single 22:20000001-21000000 640932 A +rs1210208 single 22:20000001-21000000 641050 G +rs1210207 single 22:20000001-21000000 641089 A +rs201337867 single 22:20000001-21000000 641781 A +rs199992440 single 22:20000001-21000000 643677 C +rs576540233 single 22:20000001-21000000 646864 C +rs541424142 single 22:20000001-21000000 647133 G +rs9624391 single 22:20000001-21000000 647782 T +rs71263644 single 22:20000001-21000000 647886 C +rs565432 single 22:20000001-21000000 650830 G +rs572552379 single 22:20000001-21000000 651354 G +rs564584408 single 22:20000001-21000000 651570 T +rs533322206 single 22:20000001-21000000 651752 A +rs556120606 single 22:20000001-21000000 653138 A +rs200546022 single 22:20000001-21000000 653708 A +rs145880464 deletion 22:20000001-21000000 655613 5 +rs12158734 single 22:20000001-21000000 655791 T +rs3931714 single 22:20000001-21000000 655852 G +rs113912182 single 22:20000001-21000000 656504 T +rs549044110 single 22:20000001-21000000 657098 A +rs375148435 single 22:20000001-21000000 657261 C +rs111383011 single 22:20000001-21000000 657335 A +rs536237400 single 22:20000001-21000000 657380 T +rs147969392 single 22:20000001-21000000 657401 C +rs201300574 single 22:20000001-21000000 657562 A +rs79480102 single 22:20000001-21000000 657610 T +rs71313954 single 22:20000001-21000000 657641 C +rs567640061 single 22:20000001-21000000 658149 A +rs531678622 insertion 22:20000001-21000000 658349 ATA +rs369534471 single 22:20000001-21000000 658374 A +rs566497608 single 22:20000001-21000000 658434 T +rs28969632 single 22:20000001-21000000 658795 T +rs549833549 deletion 22:20000001-21000000 658854 2 +rs143307432 single 22:20000001-21000000 659320 G +rs10689549 insertion 22:20000001-21000000 659350 AA +rs28482902 single 22:20000001-21000000 659405 T +rs2570607 single 22:20000001-21000000 659521 G +rs11090371 single 22:20000001-21000000 659556 A +rs12160710 single 22:20000001-21000000 659640 G +rs12160710 single 22:20000001-21000000 659640 T +rs2570609 single 22:20000001-21000000 659776 G +rs141766341 single 22:20000001-21000000 659991 T +rs71313956 single 22:20000001-21000000 660079 G +rs546242842 single 22:20000001-21000000 660487 C +rs562851011 single 22:20000001-21000000 660516 T +rs376154 single 22:20000001-21000000 660590 T +rs200333335 single 22:20000001-21000000 663143 A +rs442561 single 22:20000001-21000000 663164 A +rs540495340 single 22:20000001-21000000 663249 A +rs5993150 single 22:20000001-21000000 667847 G +rs537575869 single 22:20000001-21000000 667900 C +rs553997796 single 22:20000001-21000000 668423 C +rs577513610 single 22:20000001-21000000 668498 T +rs541487565 single 22:20000001-21000000 671528 G +rs560389629 single 22:20000001-21000000 672457 T +rs423039 single 22:20000001-21000000 673233 A +rs423039 single 22:20000001-21000000 673233 C +rs201029695 insertion 22:20000001-21000000 677258 T +rs199641621 deletion 22:20000001-21000000 679373 1 +rs531358249 single 22:20000001-21000000 690785 A +rs561438348 single 22:20000001-21000000 691057 A +rs113293990 single 22:20000001-21000000 691926 G +rs191176164 single 22:20000001-21000000 693179 A +rs62221374 single 22:20000001-21000000 699590 T +rs62221376 single 22:20000001-21000000 704318 T +rs470210 single 22:20000001-21000000 705901 C +rs3888410 single 22:20000001-21000000 706868 C +rs554887243 deletion 22:20000001-21000000 707191 2 +rs9754433 single 22:20000001-21000000 708640 T +rs5753493 single 22:20000001-21000000 708684 C +rs113229144 single 22:20000001-21000000 708751 A +rs114369088 single 22:20000001-21000000 708789 T +rs113701813 single 22:20000001-21000000 708868 A +rs538372579 deletion 22:20000001-21000000 708949 27 +rs144843298 single 22:20000001-21000000 708993 T +rs35733491 single 22:20000001-21000000 709065 C +rs72617204 single 22:20000001-21000000 709227 A +rs72617204 single 22:20000001-21000000 709227 T +rs77252703 single 22:20000001-21000000 709249 G +rs5013585 single 22:20000001-21000000 709306 G +rs146423980 single 22:20000001-21000000 709386 T +rs371497993 single 22:20000001-21000000 709411 G +rs11089437 single 22:20000001-21000000 709561 A +rs369591645 single 22:20000001-21000000 709646 C +rs11089440 single 22:20000001-21000000 709734 C +rs10212022 single 22:20000001-21000000 709839 C +rs187881685 single 22:20000001-21000000 709953 A +rs375606213 single 22:20000001-21000000 710051 T +rs138536179 single 22:20000001-21000000 710078 T +rs62218177 single 22:20000001-21000000 710218 T +rs11089443 single 22:20000001-21000000 710385 T +rs11089444 single 22:20000001-21000000 710558 G +rs558827029 single 22:20000001-21000000 710636 C +rs199806732 single 22:20000001-21000000 710820 A +rs12160675 single 22:20000001-21000000 710984 C +rs556571211 single 22:20000001-21000000 711166 G +rs568475296 single 22:20000001-21000000 711239 T +rs368174987 single 22:20000001-21000000 711434 C +rs12172593 single 22:20000001-21000000 711476 A +rs12172593 single 22:20000001-21000000 711476 C +rs35704269 single 22:20000001-21000000 711696 G +rs369305236 single 22:20000001-21000000 711737 T +rs371180846 single 22:20000001-21000000 711791 G +rs370233176 single 22:20000001-21000000 711836 C +rs62218197 single 22:20000001-21000000 711895 T +rs62218198 single 22:20000001-21000000 711956 T +rs62218200 single 22:20000001-21000000 711984 A +rs371807106 single 22:20000001-21000000 712007 T +rs71317839 single 22:20000001-21000000 712046 C +rs71317841 single 22:20000001-21000000 712068 G +rs142119189 single 22:20000001-21000000 712130 T +rs146591865 single 22:20000001-21000000 712153 C +rs377460378 single 22:20000001-21000000 712289 A +rs144875927 single 22:20000001-21000000 712401 A +rs200035577 single 22:20000001-21000000 712491 A +rs559723354 single 22:20000001-21000000 712530 A +rs371738345 single 22:20000001-21000000 712554 A +rs138633425 single 22:20000001-21000000 712608 A +rs566144792 single 22:20000001-21000000 712692 G +rs116799229 single 22:20000001-21000000 712724 T +rs62218239 single 22:20000001-21000000 712788 G +rs151337892 single 22:20000001-21000000 712951 T +rs138862071 single 22:20000001-21000000 713063 A +rs62218245 single 22:20000001-21000000 713117 G +rs200774933 single 22:20000001-21000000 713377 T +rs541497429 single 22:20000001-21000000 713760 A +rs12168608 single 22:20000001-21000000 713975 A +rs541753746 single 22:20000001-21000000 714157 C +rs62218251 single 22:20000001-21000000 714235 T +rs12168773 single 22:20000001-21000000 714328 A +rs202066321 single 22:20000001-21000000 714730 C +rs146925076 single 22:20000001-21000000 714776 A +rs201759295 single 22:20000001-21000000 714835 G +rs180935332 single 22:20000001-21000000 714865 T +rs71317857 single 22:20000001-21000000 714901 T +rs62218961 single 22:20000001-21000000 714926 G +rs199653697 single 22:20000001-21000000 714956 A +rs202170294 single 22:20000001-21000000 714983 A +rs201421607 single 22:20000001-21000000 715018 T +rs200303972 single 22:20000001-21000000 715060 T +rs187089255 single 22:20000001-21000000 715167 T +rs371049047 single 22:20000001-21000000 715205 A +rs190757663 single 22:20000001-21000000 715280 C +rs542848105 single 22:20000001-21000000 715414 T +rs150186951 single 22:20000001-21000000 715507 A +rs190505990 single 22:20000001-21000000 715672 G +rs373236512 single 22:20000001-21000000 715736 A +rs201779658 single 22:20000001-21000000 715821 G +rs199549488 single 22:20000001-21000000 715885 G +rs200674668 single 22:20000001-21000000 715939 A +rs201507643 single 22:20000001-21000000 715962 C +rs145854951 single 22:20000001-21000000 716054 G +rs62218969 single 22:20000001-21000000 716109 T +rs74830498 single 22:20000001-21000000 716153 G +rs62218970 single 22:20000001-21000000 716224 G +rs111878801 single 22:20000001-21000000 716288 G +rs112628579 single 22:20000001-21000000 716838 T +rs113311941 single 22:20000001-21000000 717197 A +rs190361552 single 22:20000001-21000000 717384 A +rs190361552 single 22:20000001-21000000 717384 T +rs2543700 single 22:20000001-21000000 717802 G +rs3827282 single 22:20000001-21000000 718115 A +rs2277833 single 22:20000001-21000000 718524 C +rs2530576 single 22:20000001-21000000 718677 C +rs28540966 single 22:20000001-21000000 718725 C +rs45444397 single 22:20000001-21000000 718765 C +rs737973 single 22:20000001-21000000 718919 T +rs547587826 deletion 22:20000001-21000000 718956 1 +rs117070904 single 22:20000001-21000000 719016 G +rs142680475 single 22:20000001-21000000 719247 G +rs147440632 single 22:20000001-21000000 719324 G +rs2075298 single 22:20000001-21000000 721136 C +rs2075297 single 22:20000001-21000000 721159 C +rs140748893 single 22:20000001-21000000 721486 G +rs148842077 single 22:20000001-21000000 721782 G +rs60080017 single 22:20000001-21000000 722009 G +rs362116 single 22:20000001-21000000 722219 G +rs12167355 single 22:20000001-21000000 722522 T +rs145532549 single 22:20000001-21000000 723277 A +rs62218975 single 22:20000001-21000000 723493 G +rs147981494 single 22:20000001-21000000 723535 T +rs61741937 single 22:20000001-21000000 723761 G +rs61741937 single 22:20000001-21000000 723761 T +rs75178771 single 22:20000001-21000000 723831 C +rs191610943 single 22:20000001-21000000 724173 A +rs191610943 single 22:20000001-21000000 724173 T +rs137981924 single 22:20000001-21000000 724385 C +rs79737013 single 22:20000001-21000000 724621 T +rs377364815 single 22:20000001-21000000 724805 G +rs201047142 single 22:20000001-21000000 724854 A +rs572637793 single 22:20000001-21000000 724962 A +rs143014385 single 22:20000001-21000000 725292 A +rs201882021 single 22:20000001-21000000 725383 G +rs543519631 single 22:20000001-21000000 725640 A +rs17757293 single 22:20000001-21000000 725719 C +rs149363840 single 22:20000001-21000000 726047 A +rs549941186 single 22:20000001-21000000 726233 T +rs200383105 single 22:20000001-21000000 726318 A +rs142395307 single 22:20000001-21000000 727025 T +rs201685089 single 22:20000001-21000000 727183 T +rs199637954 single 22:20000001-21000000 727247 A +rs559480049 insertion 22:20000001-21000000 727296 ACA +rs2542122 single 22:20000001-21000000 727331 G +rs60469696 single 22:20000001-21000000 727674 T +rs73879334 single 22:20000001-21000000 727974 T +rs138313869 single 22:20000001-21000000 728589 T +rs376641802 single 22:20000001-21000000 728903 T +rs114525765 single 22:20000001-21000000 729417 T +rs2542134 single 22:20000001-21000000 729790 C +rs2542135 single 22:20000001-21000000 729824 G +rs148868491 single 22:20000001-21000000 729884 T +rs377472356 single 22:20000001-21000000 729940 G +rs371098813 single 22:20000001-21000000 730073 A +rs111648638 single 22:20000001-21000000 730859 A +rs375357548 single 22:20000001-21000000 731022 A +rs138280542 single 22:20000001-21000000 731280 T +rs142942616 single 22:20000001-21000000 731303 A +rs61752242 single 22:20000001-21000000 731565 G +rs143879121 single 22:20000001-21000000 731667 T +rs6003874 single 22:20000001-21000000 731789 C +rs78043622 single 22:20000001-21000000 731938 G +rs362117 insertion 22:20000001-21000000 732014 A +rs34771823 deletion 22:20000001-21000000 732536 1 +rs148920262 single 22:20000001-21000000 732576 A +rs362064 single 22:20000001-21000000 732808 T +rs79609167 single 22:20000001-21000000 733170 T +rs6003971 single 22:20000001-21000000 733494 G +rs362243 single 22:20000001-21000000 733666 T +rs9609613 single 22:20000001-21000000 733903 T +rs77021806 single 22:20000001-21000000 735223 G +rs362106 deletion 22:20000001-21000000 735627 4 +rs75545215 single 22:20000001-21000000 735795 G +rs147617380 single 22:20000001-21000000 735958 A +rs6004065 single 22:20000001-21000000 735993 T +rs112357213 single 22:20000001-21000000 736262 C +rs535233152 deletion 22:20000001-21000000 736300 1 +rs376864875 insertion 22:20000001-21000000 736830 A +rs7290852 single 22:20000001-21000000 737315 T +rs61614631 single 22:20000001-21000000 737571 G +rs6004125 single 22:20000001-21000000 737796 A +rs112724111 single 22:20000001-21000000 737878 A +rs9609699 single 22:20000001-21000000 737902 A +rs113492390 single 22:20000001-21000000 738470 A +rs78418667 single 22:20000001-21000000 738602 C +rs13055191 single 22:20000001-21000000 738688 A +rs13055393 single 22:20000001-21000000 738789 T +rs7286096 single 22:20000001-21000000 738878 C +rs13056016 single 22:20000001-21000000 739058 C +rs74556925 single 22:20000001-21000000 739334 A +rs7292186 single 22:20000001-21000000 739374 A +rs147549438 insertion 22:20000001-21000000 739422 T +rs112578295 single 22:20000001-21000000 739499 C +rs7287029 single 22:20000001-21000000 739625 T +rs4822505 single 22:20000001-21000000 740036 C +rs7285976 single 22:20000001-21000000 740777 C +rs5760515 single 22:20000001-21000000 741059 A +rs7288007 single 22:20000001-21000000 741232 C +rs8137370 single 22:20000001-21000000 741286 C +rs2108744 single 22:20000001-21000000 741889 C +rs361689 single 22:20000001-21000000 742449 A +rs60935923 single 22:20000001-21000000 742491 A +rs115459716 single 22:20000001-21000000 742641 C +rs577624713 single 22:20000001-21000000 742667 T +rs56303282 single 22:20000001-21000000 743105 A +rs9609829 single 22:20000001-21000000 743175 T +rs62219003 single 22:20000001-21000000 743511 T +rs2041969 single 22:20000001-21000000 743535 C +rs113582314 single 22:20000001-21000000 743621 T +rs2041970 single 22:20000001-21000000 743717 G +rs361766 single 22:20000001-21000000 743770 T +rs362215 single 22:20000001-21000000 744110 G +rs361885 single 22:20000001-21000000 744467 G +rs78154936 single 22:20000001-21000000 744976 G +rs362125 single 22:20000001-21000000 745200 G +rs5760821 single 22:20000001-21000000 745379 A +rs5760843 single 22:20000001-21000000 745615 C +rs76618551 single 22:20000001-21000000 745963 C +rs137990895 single 22:20000001-21000000 746351 C +rs6004458 single 22:20000001-21000000 746401 C +rs8137787 single 22:20000001-21000000 746974 T +rs138614277 single 22:20000001-21000000 747346 T +rs887021 single 22:20000001-21000000 747762 G +rs61261475 single 22:20000001-21000000 747862 T +rs2266945 single 22:20000001-21000000 748052 A +rs887022 single 22:20000001-21000000 748252 C +rs35012563 single 22:20000001-21000000 749041 G +rs12233360 single 22:20000001-21000000 749334 C +rs113662065 single 22:20000001-21000000 750171 A +rs361927 single 22:20000001-21000000 750202 G +rs113020357 single 22:20000001-21000000 750283 A +rs362199 single 22:20000001-21000000 750428 C +rs362080 single 22:20000001-21000000 750457 T +rs12627942 single 22:20000001-21000000 750593 C +rs361877 single 22:20000001-21000000 751135 G +rs6004729 single 22:20000001-21000000 751166 A +rs361591 single 22:20000001-21000000 751254 T +rs75106793 single 22:20000001-21000000 751279 T +rs362075 single 22:20000001-21000000 751338 C +rs361657 insertion 22:20000001-21000000 751361 TTTCTT +rs5752197 single 22:20000001-21000000 751665 A +rs361749 single 22:20000001-21000000 752118 G +rs8135306 single 22:20000001-21000000 752353 T +rs5761169 single 22:20000001-21000000 752475 G +rs362244 single 22:20000001-21000000 752646 G +rs362103 single 22:20000001-21000000 752686 G +rs362062 single 22:20000001-21000000 753040 A +rs362030 single 22:20000001-21000000 753064 T +rs362164 single 22:20000001-21000000 753100 C +rs57342969 single 22:20000001-21000000 753134 C +rs362165 deletion 22:20000001-21000000 753200 15 +rs361613 single 22:20000001-21000000 753270 T +rs57137868 single 22:20000001-21000000 753304 G +rs361640 single 22:20000001-21000000 753614 A +rs362222 deletion 22:20000001-21000000 753670 1 +rs362181 single 22:20000001-21000000 753770 T +rs7289261 single 22:20000001-21000000 753794 G +rs362136 insertion 22:20000001-21000000 753861 T +rs361860 single 22:20000001-21000000 754038 A +rs5761326 single 22:20000001-21000000 754238 G +rs12330004 single 22:20000001-21000000 754287 C +rs2329377 single 22:20000001-21000000 754505 G +rs4485654 single 22:20000001-21000000 754549 A +rs73879341 single 22:20000001-21000000 754596 G +rs4546089 single 22:20000001-21000000 754654 T +rs4546090 single 22:20000001-21000000 754688 T +rs73879342 single 22:20000001-21000000 754843 C +rs1990249 single 22:20000001-21000000 755097 G +rs55651125 single 22:20000001-21000000 755131 T +rs4581986 single 22:20000001-21000000 755225 G +rs7290661 single 22:20000001-21000000 755282 G +rs6004920 single 22:20000001-21000000 755339 G +rs117562299 single 22:20000001-21000000 755512 A +rs117562299 single 22:20000001-21000000 755512 T +rs12484067 single 22:20000001-21000000 755753 T +rs76196980 single 22:20000001-21000000 755864 G +rs73382250 single 22:20000001-21000000 756298 T +rs73879346 single 22:20000001-21000000 756406 A +rs9610170 single 22:20000001-21000000 756475 A +rs5761392 single 22:20000001-21000000 756511 G +rs12628614 single 22:20000001-21000000 756727 A +rs56834439 insertion 22:20000001-21000000 756793 TT +rs5761416 single 22:20000001-21000000 756967 A +rs6004975 single 22:20000001-21000000 757254 G +rs73879348 single 22:20000001-21000000 757628 G +rs77930355 single 22:20000001-21000000 758108 T +rs2108745 single 22:20000001-21000000 758135 A +rs141707686 single 22:20000001-21000000 758730 T +rs737811 single 22:20000001-21000000 758953 T +rs73879349 single 22:20000001-21000000 759245 G +rs77462388 single 22:20000001-21000000 759404 T +rs5761555 single 22:20000001-21000000 759533 G +rs3747076 single 22:20000001-21000000 759671 A +rs2229316 single 22:20000001-21000000 760105 T +rs200156770 single 22:20000001-21000000 760514 C +rs2228236 single 22:20000001-21000000 761062 A +rs117467773 single 22:20000001-21000000 761191 T +rs4020 single 22:20000001-21000000 761383 A +rs117506051 single 22:20000001-21000000 761565 A +rs887023 single 22:20000001-21000000 761898 T +rs13433540 single 22:20000001-21000000 761936 C +rs9610309 single 22:20000001-21000000 761975 G +rs2266946 single 22:20000001-21000000 762053 A +rs3088200 single 22:20000001-21000000 762399 G +rs12628313 single 22:20000001-21000000 762422 A +rs140749953 deletion 22:20000001-21000000 762879 3 +rs11704853 single 22:20000001-21000000 762954 T +rs12628644 single 22:20000001-21000000 763031 G +rs377391797 insertion 22:20000001-21000000 763084 CATGTGTTCT +rs6005202 single 22:20000001-21000000 763120 G +rs62219007 single 22:20000001-21000000 763229 C +rs73382266 single 22:20000001-21000000 763314 G +rs6005214 single 22:20000001-21000000 763397 T +rs74626013 single 22:20000001-21000000 763427 T +rs75599766 single 22:20000001-21000000 763537 C +rs61507812 deletion 22:20000001-21000000 763807 2 +rs73156918 single 22:20000001-21000000 763999 A +rs59857822 single 22:20000001-21000000 764219 G +rs141735774 deletion 22:20000001-21000000 764456 2 +rs147542853 single 22:20000001-21000000 764537 G +rs1013633 single 22:20000001-21000000 764558 C +rs1013634 single 22:20000001-21000000 764594 G +rs6005267 single 22:20000001-21000000 764993 C +rs5761913 single 22:20000001-21000000 765020 C +rs56660586 deletion 22:20000001-21000000 765136 1 +rs112254027 single 22:20000001-21000000 765168 G +rs111314022 single 22:20000001-21000000 765200 C +rs11704282 single 22:20000001-21000000 765239 A +rs368651913 deletion 22:20000001-21000000 765362 3 +rs60349480 single 22:20000001-21000000 765417 A +rs68039353 single 22:20000001-21000000 765537 C +rs5761947 single 22:20000001-21000000 765581 C +rs5761951 single 22:20000001-21000000 765654 G +rs28558075 single 22:20000001-21000000 765813 G +rs55992499 single 22:20000001-21000000 765976 T +rs5761998 single 22:20000001-21000000 766083 C +rs5762002 single 22:20000001-21000000 766149 C +rs5762009 single 22:20000001-21000000 766193 A +rs5762012 single 22:20000001-21000000 766272 A +rs12159543 single 22:20000001-21000000 766374 T +rs12157280 single 22:20000001-21000000 766455 A +rs5762024 single 22:20000001-21000000 766729 A +rs5762031 single 22:20000001-21000000 766956 G +rs5762039 single 22:20000001-21000000 767102 T +rs73382284 single 22:20000001-21000000 767164 A +rs5997205 single 22:20000001-21000000 767212 G +rs73879356 single 22:20000001-21000000 767302 A +rs141432776 deletion 22:20000001-21000000 767569 3 +rs549852927 deletion 22:20000001-21000000 767591 1 +rs200236731 insertion 22:20000001-21000000 767618 AG +rs5762058 single 22:20000001-21000000 767716 A +rs17819170 single 22:20000001-21000000 767790 T +rs2041971 single 22:20000001-21000000 768029 A +rs139864261 single 22:20000001-21000000 768094 T +rs116015090 single 22:20000001-21000000 768265 G +rs5762084 single 22:20000001-21000000 768295 G +rs73382290 single 22:20000001-21000000 768406 A +rs55999317 single 22:20000001-21000000 768474 C +rs9610418 single 22:20000001-21000000 768517 A +rs362220 single 22:20000001-21000000 768634 C +rs75163427 single 22:20000001-21000000 768701 G +rs9610447 single 22:20000001-21000000 768890 T +rs917409 single 22:20000001-21000000 769003 A +rs12330023 single 22:20000001-21000000 769054 T +rs35359276 insertion 22:20000001-21000000 769186 T +rs147913459 single 22:20000001-21000000 769416 C +rs77424257 single 22:20000001-21000000 769728 G +rs58771127 single 22:20000001-21000000 769965 T +rs149129621 single 22:20000001-21000000 770053 G +rs9607342 single 22:20000001-21000000 770789 T +rs362023 single 22:20000001-21000000 770889 C +rs5762277 single 22:20000001-21000000 770967 C +rs11912225 single 22:20000001-21000000 771514 G +rs138656412 insertion 22:20000001-21000000 771539 TTTTG +rs1073829 single 22:20000001-21000000 771589 A +rs1073828 single 22:20000001-21000000 771618 A +rs74647853 single 22:20000001-21000000 771979 C +rs112094838 single 22:20000001-21000000 772002 T +rs35317976 insertion 22:20000001-21000000 772047 C +rs9610540 single 22:20000001-21000000 772098 A +rs9610543 single 22:20000001-21000000 772152 G +rs116551437 single 22:20000001-21000000 772364 C +rs148687416 single 22:20000001-21000000 772419 G +rs738089 single 22:20000001-21000000 772597 A +rs76881461 single 22:20000001-21000000 772772 A +rs6005636 single 22:20000001-21000000 772909 C +rs5762370 single 22:20000001-21000000 772986 A +rs5752645 single 22:20000001-21000000 773166 G +rs367565697 deletion 22:20000001-21000000 773209 1 +rs58803203 single 22:20000001-21000000 773479 C +rs738088 single 22:20000001-21000000 773520 C +rs8139756 single 22:20000001-21000000 773710 T +rs67939127 deletion 22:20000001-21000000 773844 27 +rs5752660 single 22:20000001-21000000 774065 C +rs6005680 single 22:20000001-21000000 774453 G +rs362031 single 22:20000001-21000000 774478 G +rs5752676 single 22:20000001-21000000 774613 A +rs9808840 single 22:20000001-21000000 774753 G +rs5752681 single 22:20000001-21000000 774807 A +rs361638 insertion 22:20000001-21000000 775034 GTA +rs1638038 single 22:20000001-21000000 775081 C +rs1771145 single 22:20000001-21000000 775166 T +rs1638039 single 22:20000001-21000000 775199 C +rs1771146 single 22:20000001-21000000 775246 T +rs1771147 single 22:20000001-21000000 775273 T +rs73384210 single 22:20000001-21000000 775404 T +rs738087 single 22:20000001-21000000 775549 G +rs200601342 deletion 22:20000001-21000000 775594 3 +rs149007797 single 22:20000001-21000000 775635 A +rs5762477 single 22:20000001-21000000 775781 C +rs11705342 single 22:20000001-21000000 775851 T +rs1638041 single 22:20000001-21000000 776198 A +rs1638042 single 22:20000001-21000000 776265 G +rs1638043 single 22:20000001-21000000 776405 G +rs59618208 single 22:20000001-21000000 776796 A +rs111787024 single 22:20000001-21000000 776818 C +rs6005740 single 22:20000001-21000000 776842 A +rs10606665 deletion 22:20000001-21000000 776973 3 +rs58298509 single 22:20000001-21000000 777288 A +rs73879364 single 22:20000001-21000000 777312 T +rs61318380 single 22:20000001-21000000 777468 A +rs117815810 single 22:20000001-21000000 777532 T +rs738085 single 22:20000001-21000000 777671 G +rs5762554 single 22:20000001-21000000 777940 T +rs738084 single 22:20000001-21000000 778065 G +rs5762563 single 22:20000001-21000000 778150 A +rs10212080 single 22:20000001-21000000 778446 A +rs1076486 single 22:20000001-21000000 778769 T +rs968456 single 22:20000001-21000000 779510 T +rs1319199 single 22:20000001-21000000 779562 T +rs7286419 single 22:20000001-21000000 779603 G +rs874101 single 22:20000001-21000000 779767 C +rs874100 single 22:20000001-21000000 779821 C +rs759612 single 22:20000001-21000000 779945 G +rs587606297 insertion 22:20000001-21000000 779973 G +rs5844420 insertion 22:20000001-21000000 780024 C +rs185252842 single 22:20000001-21000000 780078 T +rs9680797 single 22:20000001-21000000 780295 A +rs111631797 single 22:20000001-21000000 781005 T +rs71746154 deletion 22:20000001-21000000 781225 2 +rs12152201 single 22:20000001-21000000 781370 T +rs77025632 single 22:20000001-21000000 781444 G +rs57456207 insertion 22:20000001-21000000 781492 TG +rs57590287 single 22:20000001-21000000 781669 C +rs2241231 single 22:20000001-21000000 782101 C +rs116142926 single 22:20000001-21000000 782138 C +rs148075695 deletion 22:20000001-21000000 782497 4 +rs373753666 single 22:20000001-21000000 782524 C +rs144200018 single 22:20000001-21000000 782570 T +rs11267636 deletion 22:20000001-21000000 782649 17 +rs5997411 single 22:20000001-21000000 783484 A +rs8137004 single 22:20000001-21000000 783720 T +rs2241230 single 22:20000001-21000000 784049 A +rs35216297 single 22:20000001-21000000 784101 G +rs35216297 single 22:20000001-21000000 784101 T +rs882745 single 22:20000001-21000000 784216 T +rs7289390 single 22:20000001-21000000 784549 G +rs73156937 single 22:20000001-21000000 784629 A +rs879193 single 22:20000001-21000000 784907 A +rs879193 single 22:20000001-21000000 784907 C +rs879193 single 22:20000001-21000000 784907 G +rs361566 single 22:20000001-21000000 785638 A +rs5763025 single 22:20000001-21000000 786487 C +rs4823020 single 22:20000001-21000000 786599 G +rs4823021 single 22:20000001-21000000 786809 A +rs57291235 single 22:20000001-21000000 787331 T +rs75366319 single 22:20000001-21000000 787476 T +rs76753324 single 22:20000001-21000000 787773 G +rs76753324 single 22:20000001-21000000 787773 T +rs1155419 single 22:20000001-21000000 787849 C +rs9622744 single 22:20000001-21000000 788129 G +rs5763152 single 22:20000001-21000000 788230 A +rs12483784 single 22:20000001-21000000 788435 G +rs1123521 single 22:20000001-21000000 788589 A +rs115253544 single 22:20000001-21000000 788869 T +rs2108746 single 22:20000001-21000000 788968 G +rs9610925 single 22:20000001-21000000 789045 A +rs1005640 single 22:20000001-21000000 789073 C +rs5763244 single 22:20000001-21000000 789189 C +rs5763252 single 22:20000001-21000000 789293 G +rs139207108 deletion 22:20000001-21000000 789459 1 +rs114366243 single 22:20000001-21000000 789528 T +rs1477177 single 22:20000001-21000000 789573 C +rs1477178 single 22:20000001-21000000 789687 A +rs9610946 single 22:20000001-21000000 789914 A +rs9622791 single 22:20000001-21000000 789959 A +rs5752939 single 22:20000001-21000000 790086 C +rs9610955 single 22:20000001-21000000 790722 G +rs12628193 single 22:20000001-21000000 791437 A +rs9619753 single 22:20000001-21000000 791820 A +rs113021966 single 22:20000001-21000000 792629 T +rs62219027 single 22:20000001-21000000 793066 T +rs1035239 single 22:20000001-21000000 793913 C +rs62219028 single 22:20000001-21000000 794074 C +rs75336684 single 22:20000001-21000000 794118 G +rs5763655 single 22:20000001-21000000 794483 G +rs62219029 single 22:20000001-21000000 794725 C +rs56163007 insertion 22:20000001-21000000 794804 AC +rs59848556 single 22:20000001-21000000 794923 T +rs62219031 single 22:20000001-21000000 795125 G +rs112153408 deletion 22:20000001-21000000 795295 2 +rs60574840 single 22:20000001-21000000 795419 G +rs62219032 single 22:20000001-21000000 795453 C +rs1153427 single 22:20000001-21000000 795756 T +rs2287256 single 22:20000001-21000000 795925 T +rs8353 single 22:20000001-21000000 796116 T +rs9997 single 22:20000001-21000000 796174 C +rs8142474 single 22:20000001-21000000 796866 C +rs5763911 single 22:20000001-21000000 797288 C +rs741436 single 22:20000001-21000000 797409 A +rs77467662 single 22:20000001-21000000 797459 G +rs553553094 deletion 22:20000001-21000000 797640 1 +rs741435 single 22:20000001-21000000 797789 A +rs1153428 single 22:20000001-21000000 798353 A +rs73156947 single 22:20000001-21000000 798779 T +rs150439544 insertion 22:20000001-21000000 799066 TTTG +rs150439544 insertion 22:20000001-21000000 799066 TTTGTTTGTTTG +rs139519810 single 22:20000001-21000000 799152 A +rs114552752 single 22:20000001-21000000 799455 T +rs5753167 single 22:20000001-21000000 799639 A +rs62219033 single 22:20000001-21000000 799826 A +rs114051492 single 22:20000001-21000000 799885 C +rs5997675 single 22:20000001-21000000 799971 G +rs144812370 deletion 22:20000001-21000000 800041 3 +rs62219034 single 22:20000001-21000000 800255 T +rs78186591 single 22:20000001-21000000 800354 G +rs34110805 single 22:20000001-21000000 800780 G +rs8748 single 22:20000001-21000000 800834 G +rs1058990 single 22:20000001-21000000 800982 A +rs75249336 single 22:20000001-21000000 801053 T +rs361727 single 22:20000001-21000000 801220 C +rs77101095 single 22:20000001-21000000 801572 A +rs79820558 single 22:20000001-21000000 801874 A +rs73384246 single 22:20000001-21000000 802177 A +rs62219035 single 22:20000001-21000000 802698 G +rs146343607 deletion 22:20000001-21000000 803327 10 +rs12159159 single 22:20000001-21000000 803582 A +rs12160833 single 22:20000001-21000000 803610 C +rs112983520 single 22:20000001-21000000 803843 T +rs12168610 single 22:20000001-21000000 804245 A +rs12167329 single 22:20000001-21000000 804294 T +rs3810601 single 22:20000001-21000000 804349 T +rs16988440 single 22:20000001-21000000 804496 G +rs2329356 single 22:20000001-21000000 805368 G +rs2003591 single 22:20000001-21000000 805685 G +rs76649694 single 22:20000001-21000000 806396 A +rs62219036 single 22:20000001-21000000 806957 G +rs5997972 single 22:20000001-21000000 808712 T +rs12157521 single 22:20000001-21000000 809458 T +rs5753651 single 22:20000001-21000000 809619 T +rs5753658 single 22:20000001-21000000 809717 A +rs361988 single 22:20000001-21000000 810053 T +rs75125763 single 22:20000001-21000000 810751 T +rs73879379 single 22:20000001-21000000 811104 A +rs5749316 single 22:20000001-21000000 811230 G +rs111314590 insertion 22:20000001-21000000 811324 CG +rs362071 single 22:20000001-21000000 811644 C +rs361573 single 22:20000001-21000000 811673 T +rs34030327 deletion 22:20000001-21000000 811853 1 +rs362087 single 22:20000001-21000000 811929 A +rs73384254 single 22:20000001-21000000 812479 G +rs5753787 single 22:20000001-21000000 812727 A +rs28478721 single 22:20000001-21000000 812941 C +rs5753802 single 22:20000001-21000000 813246 C +rs79873328 single 22:20000001-21000000 813397 G +rs148076715 deletion 22:20000001-21000000 814310 3 +rs139831649 single 22:20000001-21000000 814666 A +rs4820064 single 22:20000001-21000000 814759 A +rs192682105 single 22:20000001-21000000 814936 T +rs73384257 single 22:20000001-21000000 815265 G +rs73384259 single 22:20000001-21000000 815314 T +rs8140159 single 22:20000001-21000000 816110 T +rs5753874 single 22:20000001-21000000 816192 C +rs62217858 single 22:20000001-21000000 816271 A +rs4626022 single 22:20000001-21000000 816310 G +rs113178597 single 22:20000001-21000000 816421 A +rs165658 single 22:20000001-21000000 816549 T +rs7285244 single 22:20000001-21000000 816675 G +rs558617516 deletion 22:20000001-21000000 816808 1 +rs17819290 single 22:20000001-21000000 817182 G +rs165809 single 22:20000001-21000000 817807 T +rs98399 single 22:20000001-21000000 818103 A +rs165807 single 22:20000001-21000000 818266 G +rs165630 single 22:20000001-21000000 818420 C +rs75351052 deletion 22:20000001-21000000 818504 2 +rs77097917 single 22:20000001-21000000 820131 T +rs62219860 single 22:20000001-21000000 820532 T +rs73384264 single 22:20000001-21000000 820850 T +rs73384265 single 22:20000001-21000000 821025 A +rs116129811 single 22:20000001-21000000 821131 C +rs5998587 single 22:20000001-21000000 821696 G +rs165889 single 22:20000001-21000000 821873 T +rs5998600 single 22:20000001-21000000 822004 A +rs62219862 single 22:20000001-21000000 822136 A +rs165864 single 22:20000001-21000000 822365 T +rs78391699 single 22:20000001-21000000 822706 T +rs177343 single 22:20000001-21000000 822944 T +rs76783915 single 22:20000001-21000000 823241 T +rs114505951 single 22:20000001-21000000 824279 T +rs62219863 single 22:20000001-21000000 824372 A +rs9611669 single 22:20000001-21000000 824469 A +rs5994653 single 22:20000001-21000000 824815 T +rs5749549 single 22:20000001-21000000 824901 T +rs538204093 insertion 22:20000001-21000000 824967 TGGA +rs566014622 insertion 22:20000001-21000000 825006 ATGG +rs4820097 single 22:20000001-21000000 825059 C +rs57089443 single 22:20000001-21000000 825129 C +rs144278149 single 22:20000001-21000000 825153 G +rs5754388 single 22:20000001-21000000 825188 G +rs115766542 single 22:20000001-21000000 825278 G +rs73384271 single 22:20000001-21000000 825409 G +rs114700187 single 22:20000001-21000000 825871 T +rs78490140 single 22:20000001-21000000 827098 C +rs5749587 single 22:20000001-21000000 827263 C +rs62219865 single 22:20000001-21000000 827509 C +rs77539215 single 22:20000001-21000000 827620 T +rs78498710 deletion 22:20000001-21000000 828221 1 +rs181310028 single 22:20000001-21000000 828348 A +rs62219866 single 22:20000001-21000000 828537 A +rs9623544 single 22:20000001-21000000 828637 G +rs165828 single 22:20000001-21000000 828866 C +rs112756688 single 22:20000001-21000000 828925 T +rs57516660 single 22:20000001-21000000 829664 T +rs11914248 single 22:20000001-21000000 832036 C +rs5749656 single 22:20000001-21000000 832189 C +rs5754661 single 22:20000001-21000000 832335 T +rs68060846 deletion 22:20000001-21000000 832492 1 +rs200673843 single 22:20000001-21000000 832522 C +rs5749659 single 22:20000001-21000000 832579 A +rs5749659 single 22:20000001-21000000 832579 C +rs9623646 single 22:20000001-21000000 832661 C +rs112585570 single 22:20000001-21000000 832718 T +rs181474359 single 22:20000001-21000000 833048 T +rs62219868 single 22:20000001-21000000 833080 A +rs201980940 deletion 22:20000001-21000000 833552 1 +rs74335326 single 22:20000001-21000000 833696 A +rs5999159 single 22:20000001-21000000 834269 G +rs62219869 single 22:20000001-21000000 834312 C +rs4401303 single 22:20000001-21000000 834563 A +rs62219870 single 22:20000001-21000000 834709 T +rs73384283 single 22:20000001-21000000 834939 G +rs5749718 single 22:20000001-21000000 835088 T +rs7288014 single 22:20000001-21000000 835242 G +rs183361424 single 22:20000001-21000000 835267 A +rs75482140 single 22:20000001-21000000 835433 G +rs113106249 single 22:20000001-21000000 835795 A +rs147017596 single 22:20000001-21000000 835821 C +rs111410527 single 22:20000001-21000000 835883 T +rs116454307 single 22:20000001-21000000 835922 A +rs75115156 single 22:20000001-21000000 836188 G +rs76041972 single 22:20000001-21000000 836300 T +rs361967 insertion 22:20000001-21000000 837034 T +rs5749793 single 22:20000001-21000000 838054 C +rs5749798 single 22:20000001-21000000 838198 A +rs5749800 single 22:20000001-21000000 838228 C +rs5749808 single 22:20000001-21000000 838435 C +rs5749818 single 22:20000001-21000000 838592 T +rs12159968 single 22:20000001-21000000 838742 T +rs5749847 single 22:20000001-21000000 838861 G +rs576583986 deletion 22:20000001-21000000 839018 1 +rs62219872 single 22:20000001-21000000 839620 G +rs75179603 single 22:20000001-21000000 839809 T +rs4564890 single 22:20000001-21000000 839874 A +rs1970990 single 22:20000001-21000000 839911 C +rs559391443 deletion 22:20000001-21000000 839979 1 +rs112326480 single 22:20000001-21000000 841054 A +rs73384285 single 22:20000001-21000000 841268 T +rs144900889 single 22:20000001-21000000 841486 A +rs35035626 deletion 22:20000001-21000000 841671 1 +rs5755236 single 22:20000001-21000000 841840 A +rs5999507 single 22:20000001-21000000 841919 T +rs1572876 single 22:20000001-21000000 842156 G +rs1771144 single 22:20000001-21000000 842361 C +rs1572877 single 22:20000001-21000000 842383 G +rs5755286 single 22:20000001-21000000 842668 C +rs528013093 single 22:20000001-21000000 842961 G +rs9623846 single 22:20000001-21000000 843377 A +rs165613 single 22:20000001-21000000 843884 G +rs9620147 single 22:20000001-21000000 844335 C +rs12484813 single 22:20000001-21000000 844662 A +rs12169691 single 22:20000001-21000000 844943 A +rs12169691 single 22:20000001-21000000 844943 C +rs78138393 single 22:20000001-21000000 845954 A +rs112668245 single 22:20000001-21000000 846547 C +rs4821372 single 22:20000001-21000000 846640 G +rs200685174 deletion 22:20000001-21000000 846746 1 +rs5755677 single 22:20000001-21000000 847471 T +rs144135811 single 22:20000001-21000000 848166 A +rs144807249 insertion 22:20000001-21000000 848360 G +rs115394808 single 22:20000001-21000000 848451 A +rs12160574 single 22:20000001-21000000 848856 C +rs114867051 single 22:20000001-21000000 849422 A +rs115435672 single 22:20000001-21000000 849602 A +rs554510520 single 22:20000001-21000000 849744 C +rs562091946 single 22:20000001-21000000 850207 A +rs2079099 single 22:20000001-21000000 850439 G +rs7287307 single 22:20000001-21000000 850465 G +rs7287307 single 22:20000001-21000000 850465 T +rs1860127 single 22:20000001-21000000 850576 T +rs9608041 single 22:20000001-21000000 851423 T +rs138249536 single 22:20000001-21000000 851449 T +rs5750181 single 22:20000001-21000000 852483 T +rs115998938 single 22:20000001-21000000 852835 G +rs79092664 single 22:20000001-21000000 853156 C +rs371461908 single 22:20000001-21000000 853479 T +rs6519391 single 22:20000001-21000000 853520 T +rs62219897 single 22:20000001-21000000 853606 T +rs562639782 deletion 22:20000001-21000000 853709 1 +rs148640315 single 22:20000001-21000000 853867 A +rs5756019 single 22:20000001-21000000 853910 A +rs4820216 single 22:20000001-21000000 854160 C +rs111499360 single 22:20000001-21000000 854305 A +rs539471333 deletion 22:20000001-21000000 854678 4 +rs35094345 deletion 22:20000001-21000000 855009 1 +rs139058763 single 22:20000001-21000000 855183 A +rs113733472 single 22:20000001-21000000 855213 T +rs165798 single 22:20000001-21000000 855285 A +rs1682 single 22:20000001-21000000 855538 C +rs115649345 single 22:20000001-21000000 855615 A +rs61740440 single 22:20000001-21000000 855637 C +rs9612142 single 22:20000001-21000000 855696 T +rs61747404 single 22:20000001-21000000 855728 T +rs5995230 single 22:20000001-21000000 855763 T +rs74368349 single 22:20000001-21000000 855850 A +rs138574421 deletion 22:20000001-21000000 856214 2 +rs201449145 deletion 22:20000001-21000000 856419 1 +rs113152604 single 22:20000001-21000000 856620 A +rs78379949 single 22:20000001-21000000 856857 C +rs4820226 single 22:20000001-21000000 857049 T +rs6000237 single 22:20000001-21000000 857390 A +rs34592890 single 22:20000001-21000000 857644 A +rs144189613 single 22:20000001-21000000 857680 C +rs76788398 single 22:20000001-21000000 857794 G +rs557848953 insertion 22:20000001-21000000 857838 C +rs79785707 single 22:20000001-21000000 858094 G +rs373739552 single 22:20000001-21000000 858328 A +rs75702695 single 22:20000001-21000000 858742 A +rs6000285 single 22:20000001-21000000 858802 T +rs5995304 single 22:20000001-21000000 859291 A +rs145396820 single 22:20000001-21000000 859562 G +rs2876824 single 22:20000001-21000000 859628 A +rs2876825 single 22:20000001-21000000 859800 C +rs530924853 single 22:20000001-21000000 859939 A +rs79538179 single 22:20000001-21000000 859988 A +rs375433191 deletion 22:20000001-21000000 860070 3 +rs1807511 single 22:20000001-21000000 860378 C +rs112097936 deletion 22:20000001-21000000 860486 1 +rs35816559 insertion 22:20000001-21000000 860794 G +rs2010947 single 22:20000001-21000000 860880 G +rs142518536 insertion 22:20000001-21000000 860908 T +rs738092 single 22:20000001-21000000 860930 T +rs74754583 single 22:20000001-21000000 861289 T +rs5844426 insertion 22:20000001-21000000 861405 G +rs76278738 single 22:20000001-21000000 861505 G +rs386486036 single 22:20000001-21000000 861642 C +rs12159991 single 22:20000001-21000000 861721 G +rs917408 single 22:20000001-21000000 862148 T +rs5750324 single 22:20000001-21000000 862645 G +rs118084062 single 22:20000001-21000000 863436 T +rs73156970 single 22:20000001-21000000 863884 A +rs75633876 single 22:20000001-21000000 864043 C +rs76651446 single 22:20000001-21000000 864341 G +rs111354787 single 22:20000001-21000000 864391 G +rs141370998 single 22:20000001-21000000 864420 T +rs57046550 single 22:20000001-21000000 864600 C +rs76188872 single 22:20000001-21000000 864666 T +rs8137729 single 22:20000001-21000000 864843 T +rs6000569 single 22:20000001-21000000 864939 T +rs5756526 single 22:20000001-21000000 865144 C +rs11704564 single 22:20000001-21000000 865290 C +rs35887140 insertion 22:20000001-21000000 865785 A +rs6000620 single 22:20000001-21000000 866230 C +rs997734 single 22:20000001-21000000 866292 T +rs997144 single 22:20000001-21000000 866377 T +rs997145 single 22:20000001-21000000 866657 T +rs5756597 single 22:20000001-21000000 866833 A +rs5756605 single 22:20000001-21000000 867072 T +rs62238762 single 22:20000001-21000000 867119 T +rs165687 single 22:20000001-21000000 867264 A +rs5756675 single 22:20000001-21000000 868222 A +rs115442611 single 22:20000001-21000000 868251 A +rs74637305 single 22:20000001-21000000 868673 C +rs78093216 single 22:20000001-21000000 868705 G +rs11703747 single 22:20000001-21000000 868963 T +rs13053946 single 22:20000001-21000000 869135 T +rs28438593 single 22:20000001-21000000 869474 T +rs28676928 single 22:20000001-21000000 869508 T +rs73384298 single 22:20000001-21000000 869566 G +rs361781 deletion 22:20000001-21000000 869750 1 +rs11703134 single 22:20000001-21000000 869960 T +rs150125958 single 22:20000001-21000000 869987 A +rs145506338 single 22:20000001-21000000 870027 A +rs148839032 single 22:20000001-21000000 870153 A +rs536348998 insertion 22:20000001-21000000 870286 CAAAACAAACAAA +rs569935957 insertion 22:20000001-21000000 870604 AAT +rs114048337 single 22:20000001-21000000 870684 T +rs5756801 single 22:20000001-21000000 871337 C +rs548844927 insertion 22:20000001-21000000 871390 T +rs76713085 single 22:20000001-21000000 871742 T +rs75516987 single 22:20000001-21000000 871963 G +rs77931322 single 22:20000001-21000000 872109 C +rs112448672 insertion 22:20000001-21000000 872139 T +rs112448672 insertion 22:20000001-21000000 872139 TT +rs112448672 insertion 22:20000001-21000000 872139 TTT +rs11090191 single 22:20000001-21000000 872506 T +rs75439996 single 22:20000001-21000000 872720 G +rs2014654 single 22:20000001-21000000 872854 T +rs11090193 single 22:20000001-21000000 873392 A +rs11913315 single 22:20000001-21000000 873557 G +rs11912924 single 22:20000001-21000000 873874 A +rs73879393 single 22:20000001-21000000 874447 G +rs114516538 single 22:20000001-21000000 874539 A +rs11912433 single 22:20000001-21000000 875405 C +rs114056587 single 22:20000001-21000000 875484 T +rs5750555 single 22:20000001-21000000 875692 T +rs148027533 single 22:20000001-21000000 875713 T +rs141901165 single 22:20000001-21000000 875776 A +rs557681487 deletion 22:20000001-21000000 875837 1 +rs200999942 deletion 22:20000001-21000000 875982 20 +rs8140155 single 22:20000001-21000000 876274 A +rs7291930 single 22:20000001-21000000 876508 G +rs114191051 single 22:20000001-21000000 876840 A +rs9612171 single 22:20000001-21000000 877224 T +rs74548973 single 22:20000001-21000000 877583 A +rs61541782 single 22:20000001-21000000 877644 A +rs75902369 single 22:20000001-21000000 877766 G +rs199594432 insertion 22:20000001-21000000 878900 TTTTG +rs8137443 single 22:20000001-21000000 879126 T +rs113246670 insertion 22:20000001-21000000 879607 T +rs141994484 single 22:20000001-21000000 880011 A +rs114335926 single 22:20000001-21000000 880118 A +rs117141812 single 22:20000001-21000000 880709 T +rs79962566 single 22:20000001-21000000 880897 T +rs1076348 single 22:20000001-21000000 881110 A +rs1110462 single 22:20000001-21000000 881384 G +rs11090206 single 22:20000001-21000000 881677 A +rs148371154 deletion 22:20000001-21000000 881739 1 +rs12165407 single 22:20000001-21000000 882326 T +rs6001257 single 22:20000001-21000000 882388 T +rs114819925 single 22:20000001-21000000 882608 G +rs114819925 single 22:20000001-21000000 882608 T +rs144682660 deletion 22:20000001-21000000 882735 2 +rs11913514 single 22:20000001-21000000 882951 T +rs78944331 single 22:20000001-21000000 883025 T +rs79052816 single 22:20000001-21000000 883376 T +rs1771149 single 22:20000001-21000000 883448 C +rs1638036 single 22:20000001-21000000 883637 A +rs149561599 single 22:20000001-21000000 883739 G +rs6001392 single 22:20000001-21000000 884236 T +rs12484631 single 22:20000001-21000000 884299 T +rs6519408 single 22:20000001-21000000 884909 T +rs141160674 insertion 22:20000001-21000000 885037 G +rs74434132 single 22:20000001-21000000 885306 T +rs115226093 single 22:20000001-21000000 885417 T +rs144537609 deletion 22:20000001-21000000 885474 8 +rs75648513 single 22:20000001-21000000 885688 A +rs73156978 single 22:20000001-21000000 886277 A +rs5995708 single 22:20000001-21000000 887079 G +rs77182190 single 22:20000001-21000000 887191 A +rs5995717 single 22:20000001-21000000 887532 G +rs74875546 single 22:20000001-21000000 887603 G +rs73879395 single 22:20000001-21000000 887971 T +rs115972182 single 22:20000001-21000000 888166 T +rs5995739 single 22:20000001-21000000 888737 C +rs115678536 single 22:20000001-21000000 888774 G +rs115678536 single 22:20000001-21000000 888774 T +rs73156979 single 22:20000001-21000000 888827 A +rs77372198 single 22:20000001-21000000 888925 T +rs11703918 single 22:20000001-21000000 889101 G +rs5757702 single 22:20000001-21000000 889270 A +rs5757702 single 22:20000001-21000000 889270 C +rs148429575 single 22:20000001-21000000 889368 G +rs79089854 single 22:20000001-21000000 889685 T +rs6001632 single 22:20000001-21000000 889907 A +rs6001633 single 22:20000001-21000000 889932 A +rs139346388 single 22:20000001-21000000 889994 C +rs5995758 single 22:20000001-21000000 890026 A +rs4528873 single 22:20000001-21000000 890117 T +rs556617943 deletion 22:20000001-21000000 890159 6 +rs5757755 single 22:20000001-21000000 890266 A +rs147886513 single 22:20000001-21000000 890656 A +rs77859693 single 22:20000001-21000000 891139 A +rs2269830 single 22:20000001-21000000 891258 A +rs5995779 single 22:20000001-21000000 891365 C +rs148534902 insertion 22:20000001-21000000 891498 CCTA +rs4821915 single 22:20000001-21000000 891544 G +rs2269831 single 22:20000001-21000000 891567 A +rs75878005 single 22:20000001-21000000 891724 T +rs2269832 single 22:20000001-21000000 891841 T +rs2269833 single 22:20000001-21000000 891927 T +rs78413892 single 22:20000001-21000000 892831 T +rs142664482 single 22:20000001-21000000 893193 T +rs139562158 single 22:20000001-21000000 893267 G +rs140034483 insertion 22:20000001-21000000 893574 G +rs1638037 single 22:20000001-21000000 893768 C +rs79860789 single 22:20000001-21000000 894170 T +rs140548641 single 22:20000001-21000000 895023 A +rs2329363 single 22:20000001-21000000 895157 C +rs144714885 single 22:20000001-21000000 895371 C +rs76852703 single 22:20000001-21000000 895772 C +rs113878252 single 22:20000001-21000000 895926 T +rs137870605 single 22:20000001-21000000 896220 T +rs11914092 single 22:20000001-21000000 896692 A +rs73156987 single 22:20000001-21000000 896899 C +rs7292126 single 22:20000001-21000000 896925 A +rs56167190 single 22:20000001-21000000 897231 C +rs147394731 single 22:20000001-21000000 897558 T +rs5750937 single 22:20000001-21000000 897845 T +rs140000135 single 22:20000001-21000000 897882 T +rs9608082 single 22:20000001-21000000 897913 A +rs5750938 single 22:20000001-21000000 898004 T +rs4821945 single 22:20000001-21000000 898119 C +rs115166844 single 22:20000001-21000000 898156 G +rs4821947 single 22:20000001-21000000 898322 C +rs75472746 single 22:20000001-21000000 898401 A +rs77937663 single 22:20000001-21000000 898507 T +rs147989304 single 22:20000001-21000000 898814 A +rs5757972 single 22:20000001-21000000 898955 C +rs150376781 deletion 22:20000001-21000000 899157 1 +rs5757984 single 22:20000001-21000000 899185 A +rs5757992 single 22:20000001-21000000 899253 A +rs5757994 single 22:20000001-21000000 899315 A +rs114481046 single 22:20000001-21000000 899362 G +rs12483842 single 22:20000001-21000000 899633 T +rs12484159 single 22:20000001-21000000 899704 A +rs56307139 deletion 22:20000001-21000000 899760 1 +rs4821961 single 22:20000001-21000000 899970 A +rs4821963 single 22:20000001-21000000 900073 C +rs4821967 single 22:20000001-21000000 900576 T +rs4821970 single 22:20000001-21000000 900797 T +rs16988498 single 22:20000001-21000000 900943 T +rs35724035 single 22:20000001-21000000 901075 T +rs16988501 single 22:20000001-21000000 901619 A +rs16988502 single 22:20000001-21000000 901649 C +rs9637342 single 22:20000001-21000000 901695 C +rs11913912 single 22:20000001-21000000 901797 A +rs60395699 single 22:20000001-21000000 902116 T +rs140738445 deletion 22:20000001-21000000 902439 7 +rs76279337 single 22:20000001-21000000 902871 T +rs886319 single 22:20000001-21000000 903409 T +rs12484992 single 22:20000001-21000000 903438 C +rs886320 single 22:20000001-21000000 903516 C +rs73879402 single 22:20000001-21000000 903695 A +rs17211374 single 22:20000001-21000000 903862 G +rs116345961 single 22:20000001-21000000 904030 T +rs5751024 single 22:20000001-21000000 904072 C +rs75195136 single 22:20000001-21000000 904263 G +rs114815122 single 22:20000001-21000000 905061 G +rs1209259 single 22:20000001-21000000 905087 C +rs115227665 single 22:20000001-21000000 905204 C +rs116430800 single 22:20000001-21000000 905619 G +rs77357764 single 22:20000001-21000000 905645 T +rs1075339 single 22:20000001-21000000 906366 A +rs28681662 single 22:20000001-21000000 906453 T +rs165829 single 22:20000001-21000000 907302 C +rs71725860 deletion 22:20000001-21000000 907650 2 +rs177419 single 22:20000001-21000000 908114 A +rs5758363 single 22:20000001-21000000 908514 T +rs165749 single 22:20000001-21000000 908701 C +rs634622 single 22:20000001-21000000 908731 C +rs165609 single 22:20000001-21000000 908992 C +rs145029830 single 22:20000001-21000000 909205 T +rs117046295 single 22:20000001-21000000 909447 T +rs73157002 single 22:20000001-21000000 909543 A +rs73159105 single 22:20000001-21000000 909640 T +rs165804 single 22:20000001-21000000 909673 C +rs79160294 single 22:20000001-21000000 910030 T +rs117176662 single 22:20000001-21000000 910090 G +rs73159109 single 22:20000001-21000000 910192 G +rs116891401 single 22:20000001-21000000 910219 C +rs148876050 single 22:20000001-21000000 910263 T +rs115069799 single 22:20000001-21000000 910481 T +rs361842 deletion 22:20000001-21000000 910589 1 +rs165752 single 22:20000001-21000000 910682 G +rs5758417 single 22:20000001-21000000 910832 T +rs138331367 single 22:20000001-21000000 910865 T +rs5758468 single 22:20000001-21000000 911755 C +rs1807730 single 22:20000001-21000000 912208 T +rs165855 single 22:20000001-21000000 912252 T +rs139965797 deletion 22:20000001-21000000 912403 2 +rs4560234 single 22:20000001-21000000 912841 G +rs145095141 single 22:20000001-21000000 912878 C +rs165833 single 22:20000001-21000000 912969 G +rs16988513 single 22:20000001-21000000 912994 A +rs165671 single 22:20000001-21000000 913509 G +rs177421 single 22:20000001-21000000 913756 A +rs177422 single 22:20000001-21000000 913877 A +rs141805630 insertion 22:20000001-21000000 914548 TATTAT +rs141805630 insertion 22:20000001-21000000 914548 TATTATTAT +rs148167389 single 22:20000001-21000000 914626 T +rs147303132 single 22:20000001-21000000 914688 T +rs165779 single 22:20000001-21000000 914756 A +rs165672 single 22:20000001-21000000 914957 T +rs112729229 single 22:20000001-21000000 915534 T +rs5758631 single 22:20000001-21000000 915616 G +rs4822096 single 22:20000001-21000000 915674 T +rs150408219 single 22:20000001-21000000 915791 C +rs5996131 single 22:20000001-21000000 915988 G +rs5751254 single 22:20000001-21000000 916332 T +rs73159117 single 22:20000001-21000000 916619 G +rs74972844 single 22:20000001-21000000 916666 T +rs362137 deletion 22:20000001-21000000 916694 2 +rs148221516 single 22:20000001-21000000 916831 T +rs165884 single 22:20000001-21000000 917066 G +rs165788 single 22:20000001-21000000 917303 T +rs8137236 single 22:20000001-21000000 917526 G +rs77047145 single 22:20000001-21000000 917683 G +rs143377454 single 22:20000001-21000000 917830 G +rs143377454 single 22:20000001-21000000 917830 T +rs532571713 single 22:20000001-21000000 918637 G +rs165874 single 22:20000001-21000000 919305 G +rs177423 single 22:20000001-21000000 919376 A +rs3791192 single 22:20000001-21000000 919445 A +rs165685 single 22:20000001-21000000 919754 T +rs73159120 single 22:20000001-21000000 920043 T +rs165900 single 22:20000001-21000000 920198 C +rs555771579 deletion 22:20000001-21000000 920317 4 +rs116294672 single 22:20000001-21000000 920701 A +rs539945336 insertion 22:20000001-21000000 920831 CAG +rs165664 single 22:20000001-21000000 921338 G +rs77616353 single 22:20000001-21000000 921454 A +rs116892353 single 22:20000001-21000000 921722 G +rs165727 single 22:20000001-21000000 921892 T +rs79106741 single 22:20000001-21000000 922155 G +rs12484645 single 22:20000001-21000000 922315 C +rs17819434 single 22:20000001-21000000 922469 T +rs55636848 single 22:20000001-21000000 923360 T +rs72200590 deletion 22:20000001-21000000 923922 3 +rs165846 single 22:20000001-21000000 924027 G +rs165721 single 22:20000001-21000000 924590 G +rs362206 deletion 22:20000001-21000000 924680 1 +rs143351279 single 22:20000001-21000000 924780 A +rs146839104 single 22:20000001-21000000 924812 A +rs574570557 deletion 22:20000001-21000000 925033 16 +rs2079096 single 22:20000001-21000000 925100 A +rs595044 single 22:20000001-21000000 925203 G +rs138548433 single 22:20000001-21000000 925353 T +rs57039063 single 22:20000001-21000000 925623 G +rs5751448 single 22:20000001-21000000 925759 T +rs165620 single 22:20000001-21000000 926294 C +rs79995359 single 22:20000001-21000000 926558 A +rs165918 single 22:20000001-21000000 926624 A +rs5759268 single 22:20000001-21000000 926812 T +rs362058 insertion 22:20000001-21000000 926926 C +rs531789186 insertion 22:20000001-21000000 927256 CTT +rs531789186 insertion 22:20000001-21000000 927256 TTC +rs177425 single 22:20000001-21000000 927542 T +rs5759311 single 22:20000001-21000000 927674 A +rs5759314 single 22:20000001-21000000 927715 C +rs165666 single 22:20000001-21000000 927801 A +rs11704626 single 22:20000001-21000000 928129 T +rs6003200 single 22:20000001-21000000 928233 T +rs165772 single 22:20000001-21000000 928265 A +rs165674 single 22:20000001-21000000 928808 T +rs12484576 single 22:20000001-21000000 929083 C +rs165842 single 22:20000001-21000000 929285 G +rs177426 single 22:20000001-21000000 929781 A +rs165684 single 22:20000001-21000000 929913 T +rs11365089 deletion 22:20000001-21000000 930008 1 +rs61090860 deletion 22:20000001-21000000 930055 1 +rs199804769 deletion 22:20000001-21000000 930342 1 +rs361812 deletion 22:20000001-21000000 930978 3 +rs12484690 single 22:20000001-21000000 931093 C +rs12484039 single 22:20000001-21000000 931122 C +rs114152992 single 22:20000001-21000000 931489 G +rs17819470 single 22:20000001-21000000 931543 A +rs582898 single 22:20000001-21000000 932166 A +rs2079097 single 22:20000001-21000000 932237 G +rs94001 single 22:20000001-21000000 932278 T +rs2079098 single 22:20000001-21000000 932457 T +rs116277470 single 22:20000001-21000000 932679 A +rs78146318 single 22:20000001-21000000 932711 T +rs66846976 deletion 22:20000001-21000000 932963 2 +rs56024156 deletion 22:20000001-21000000 933197 2 +rs17757688 single 22:20000001-21000000 933734 T +rs6003243 single 22:20000001-21000000 933783 T +rs5751537 single 22:20000001-21000000 933900 C +rs73877705 single 22:20000001-21000000 934039 T +rs5759416 single 22:20000001-21000000 934238 A +rs78095529 single 22:20000001-21000000 934410 T +rs79876592 single 22:20000001-21000000 934507 T +rs200362928 deletion 22:20000001-21000000 934763 1 +rs5751540 single 22:20000001-21000000 934971 G +rs73159139 single 22:20000001-21000000 935008 T +rs165838 single 22:20000001-21000000 935375 G +rs165614 single 22:20000001-21000000 935672 C +rs633788 single 22:20000001-21000000 935716 A +rs73159140 single 22:20000001-21000000 935945 A +rs646164 single 22:20000001-21000000 936333 T +rs111672536 single 22:20000001-21000000 936819 C +rs116529718 single 22:20000001-21000000 936872 A +rs165643 single 22:20000001-21000000 937600 T +rs5759455 single 22:20000001-21000000 938179 A +rs165690 single 22:20000001-21000000 938376 G +rs1572878 single 22:20000001-21000000 938771 C +rs165835 single 22:20000001-21000000 939122 A +rs61741073 single 22:20000001-21000000 939399 A +rs61748930 single 22:20000001-21000000 939445 T +rs2285698 single 22:20000001-21000000 939607 A +rs2285699 single 22:20000001-21000000 940084 A +rs165615 single 22:20000001-21000000 940207 C +rs165695 single 22:20000001-21000000 940265 G +rs734740 single 22:20000001-21000000 940290 T +rs116623013 single 22:20000001-21000000 940330 T +rs73877706 single 22:20000001-21000000 940476 A +rs93173 single 22:20000001-21000000 940793 A +rs148327276 single 22:20000001-21000000 940821 T +rs12484193 single 22:20000001-21000000 941011 T +rs648963 single 22:20000001-21000000 941194 A +rs648963 single 22:20000001-21000000 941194 C +rs648963 single 22:20000001-21000000 941194 T +rs741191 single 22:20000001-21000000 941449 C +rs150690002 single 22:20000001-21000000 941550 T +rs73877708 single 22:20000001-21000000 942157 A +rs11913826 single 22:20000001-21000000 942191 A +rs2240029 single 22:20000001-21000000 942216 A +rs73159144 single 22:20000001-21000000 942640 A +rs9620292 single 22:20000001-21000000 942962 T +rs1076456 single 22:20000001-21000000 943201 A +rs56023828 single 22:20000001-21000000 943589 C +rs77784036 single 22:20000001-21000000 943618 C +rs77214221 single 22:20000001-21000000 943671 C +rs6003315 single 22:20000001-21000000 943948 A +rs165600 single 22:20000001-21000000 944154 G +rs73877709 single 22:20000001-21000000 944404 C +rs165765 single 22:20000001-21000000 944832 C +rs361989 single 22:20000001-21000000 945318 A +rs11912746 single 22:20000001-21000000 945760 C +rs78227773 single 22:20000001-21000000 946197 A +rs77700530 single 22:20000001-21000000 946266 T +rs165591 single 22:20000001-21000000 946301 T +rs595272 single 22:20000001-21000000 946402 T +rs12160935 single 22:20000001-21000000 946445 T +rs139511310 single 22:20000001-21000000 946675 T +rs165638 single 22:20000001-21000000 946862 T +rs165870 single 22:20000001-21000000 946907 T +rs165904 single 22:20000001-21000000 947016 G +rs165716 single 22:20000001-21000000 947082 G +rs361646 single 22:20000001-21000000 947273 G +rs177427 single 22:20000001-21000000 947366 A +rs77272549 single 22:20000001-21000000 947480 G +rs150549289 deletion 22:20000001-21000000 947834 4 +rs577762 single 22:20000001-21000000 947902 G +rs165708 single 22:20000001-21000000 947995 T +rs165723 single 22:20000001-21000000 948457 A +rs77737922 single 22:20000001-21000000 948489 G +rs147796987 insertion 22:20000001-21000000 948516 AAAG +rs4820534 single 22:20000001-21000000 948552 A +rs12160184 single 22:20000001-21000000 948761 A +rs5759496 single 22:20000001-21000000 949162 G +rs59219870 single 22:20000001-21000000 949242 G +rs361690 insertion 22:20000001-21000000 949302 A +rs59461209 single 22:20000001-21000000 949364 T +rs372403230 insertion 22:20000001-21000000 949468 AAAAA +rs58792806 single 22:20000001-21000000 949548 T +rs12160007 single 22:20000001-21000000 949577 G +rs140659230 deletion 22:20000001-21000000 949715 1 +rs658396 single 22:20000001-21000000 949856 T +rs76211167 single 22:20000001-21000000 950021 T +rs607893 single 22:20000001-21000000 950078 A +rs9620311 single 22:20000001-21000000 950116 C +rs4822328 single 22:20000001-21000000 950327 C +rs4822329 single 22:20000001-21000000 950429 A +rs362089 single 22:20000001-21000000 950472 G +rs554733 single 22:20000001-21000000 950563 T +rs117887561 single 22:20000001-21000000 950820 A +rs7410267 single 22:20000001-21000000 950858 C +rs139424286 single 22:20000001-21000000 950883 C +rs139565162 single 22:20000001-21000000 951011 A +rs181876047 single 22:20000001-21000000 951110 T +rs28576992 single 22:20000001-21000000 951218 A +rs8135752 single 22:20000001-21000000 951260 G +rs115767262 single 22:20000001-21000000 951319 G +rs689145 single 22:20000001-21000000 951442 C +rs853122 single 22:20000001-21000000 951772 C +rs9624294 single 22:20000001-21000000 951846 A +rs11090281 single 22:20000001-21000000 951987 G +rs12170799 single 22:20000001-21000000 952038 T +rs7511235 single 22:20000001-21000000 952131 G +rs187454438 single 22:20000001-21000000 952170 G +rs685627 single 22:20000001-21000000 952225 C +rs115803514 single 22:20000001-21000000 952284 A +rs144972331 single 22:20000001-21000000 952518 T +rs853125 single 22:20000001-21000000 952635 G +rs138298773 deletion 22:20000001-21000000 952802 5 +rs114688043 single 22:20000001-21000000 952941 T +rs56075775 deletion 22:20000001-21000000 952969 1 +rs12166518 single 22:20000001-21000000 953028 T +rs142358110 single 22:20000001-21000000 953056 A +rs117381628 single 22:20000001-21000000 953187 G +rs34814924 deletion 22:20000001-21000000 953519 3 +rs545280536 deletion 22:20000001-21000000 953744 1 +rs80296680 single 22:20000001-21000000 953849 G +rs138098180 single 22:20000001-21000000 953876 T +rs684057 single 22:20000001-21000000 954189 T +rs34443648 single 22:20000001-21000000 954213 T +rs12166099 single 22:20000001-21000000 954686 G +rs12166147 single 22:20000001-21000000 954751 C +rs165785 single 22:20000001-21000000 954865 A +rs527831543 deletion 22:20000001-21000000 955474 1 +rs165919 single 22:20000001-21000000 955521 C +rs147181459 single 22:20000001-21000000 955750 A +rs74466416 single 22:20000001-21000000 955906 C +rs12165536 single 22:20000001-21000000 955935 T +rs143814858 single 22:20000001-21000000 955999 A +rs626568 single 22:20000001-21000000 956162 T +rs546419 single 22:20000001-21000000 956241 T +rs165703 single 22:20000001-21000000 956440 C +rs566704625 deletion 22:20000001-21000000 956568 2 +rs5759527 single 22:20000001-21000000 956599 T +rs144048639 deletion 22:20000001-21000000 956662 2 +rs552823 single 22:20000001-21000000 956905 T +rs111348451 single 22:20000001-21000000 956992 C +rs9612414 single 22:20000001-21000000 957036 A +rs117727383 single 22:20000001-21000000 957111 A +rs60128632 single 22:20000001-21000000 957342 T +rs58090831 single 22:20000001-21000000 957401 A +rs5759533 single 22:20000001-21000000 957495 A +rs74756466 single 22:20000001-21000000 957598 A +rs111822094 single 22:20000001-21000000 957833 T +rs9612416 single 22:20000001-21000000 957866 A +rs35725919 insertion 22:20000001-21000000 957909 A +rs9612417 single 22:20000001-21000000 958043 T +rs12170292 single 22:20000001-21000000 958141 A +rs79337706 single 22:20000001-21000000 958312 C +rs74476273 single 22:20000001-21000000 958399 G +rs5751562 single 22:20000001-21000000 958460 A +rs5751562 single 22:20000001-21000000 958460 T +rs9620323 single 22:20000001-21000000 958487 T +rs9624321 single 22:20000001-21000000 958573 A +rs165897 single 22:20000001-21000000 958624 T +rs78264877 single 22:20000001-21000000 958706 A +rs5751564 single 22:20000001-21000000 958778 T +rs8141468 single 22:20000001-21000000 959070 T +rs13340094 single 22:20000001-21000000 959099 T +rs75679744 single 22:20000001-21000000 959167 T +rs526942 single 22:20000001-21000000 959251 G +rs34877965 deletion 22:20000001-21000000 959349 1 +rs528634 single 22:20000001-21000000 959406 A +rs731272 single 22:20000001-21000000 959690 T +rs3747078 single 22:20000001-21000000 959801 A +rs73877712 single 22:20000001-21000000 959914 G +rs731273 single 22:20000001-21000000 960185 A +rs76481083 single 22:20000001-21000000 960452 A +rs34944089 insertion 22:20000001-21000000 960567 G +rs879953 single 22:20000001-21000000 960624 G +rs115786135 single 22:20000001-21000000 960663 T +rs561595 single 22:20000001-21000000 960706 G +rs879955 single 22:20000001-21000000 960936 C +rs13340098 single 22:20000001-21000000 961108 T +rs13433634 single 22:20000001-21000000 961188 C +rs73386169 single 22:20000001-21000000 961780 A +rs73386169 single 22:20000001-21000000 961780 T +rs17819593 single 22:20000001-21000000 962027 T +rs73386171 single 22:20000001-21000000 962210 C +rs67888103 single 22:20000001-21000000 962378 A +rs67695329 single 22:20000001-21000000 962418 A +rs510270 single 22:20000001-21000000 962560 A +rs73159154 single 22:20000001-21000000 962688 G +rs143565951 single 22:20000001-21000000 962771 G +rs5759551 single 22:20000001-21000000 962804 A +rs17819599 single 22:20000001-21000000 962977 T +rs514648 single 22:20000001-21000000 963039 A +rs556083381 deletion 22:20000001-21000000 963075 1 +rs6003435 single 22:20000001-21000000 963206 A +rs6003437 single 22:20000001-21000000 963267 A +rs1557798 single 22:20000001-21000000 963372 A +rs602808 single 22:20000001-21000000 963474 C +rs544887 single 22:20000001-21000000 964055 A +rs79101340 single 22:20000001-21000000 964103 C +rs58396621 single 22:20000001-21000000 964465 A +rs59007955 single 22:20000001-21000000 964755 T +rs680548 single 22:20000001-21000000 965554 C +rs112940211 single 22:20000001-21000000 965655 A +rs493316 single 22:20000001-21000000 965854 A +rs5759559 single 22:20000001-21000000 965898 T +rs5759560 single 22:20000001-21000000 966098 G +rs5759562 single 22:20000001-21000000 966124 A +rs115257028 single 22:20000001-21000000 966216 G +rs650538 single 22:20000001-21000000 966269 C +rs1108099 single 22:20000001-21000000 966395 G +rs1108101 single 22:20000001-21000000 966456 A +rs738040 single 22:20000001-21000000 966546 G +rs6003459 single 22:20000001-21000000 966726 G +rs6003460 single 22:20000001-21000000 966823 C +rs7285254 single 22:20000001-21000000 967163 G +rs7287256 single 22:20000001-21000000 967189 T +rs16988585 single 22:20000001-21000000 967286 G +rs472409 single 22:20000001-21000000 967332 T +rs473304 single 22:20000001-21000000 967449 A +rs12484196 single 22:20000001-21000000 967611 C +rs5759567 single 22:20000001-21000000 967755 A +rs145036582 insertion 22:20000001-21000000 967970 T +rs853127 single 22:20000001-21000000 968291 A +rs619770 single 22:20000001-21000000 968582 C +rs58136388 single 22:20000001-21000000 968684 G +rs76937208 single 22:20000001-21000000 968705 G +rs114671525 single 22:20000001-21000000 968733 T +rs7285851 single 22:20000001-21000000 968763 C +rs78344658 single 22:20000001-21000000 968876 G +rs112669588 single 22:20000001-21000000 968932 G +rs617867 single 22:20000001-21000000 969009 C +rs148735102 single 22:20000001-21000000 969043 A +rs142285441 single 22:20000001-21000000 969184 T +rs141897641 single 22:20000001-21000000 969305 C +rs80189286 single 22:20000001-21000000 969578 G +rs147731752 single 22:20000001-21000000 969679 T +rs602738 single 22:20000001-21000000 970071 G +rs11090296 single 22:20000001-21000000 970174 T +rs9624340 single 22:20000001-21000000 970260 T +rs562536 single 22:20000001-21000000 970287 G +rs113244786 deletion 22:20000001-21000000 970321 12 +rs853131 single 22:20000001-21000000 970884 A +rs853131 single 22:20000001-21000000 970884 G +rs853131 single 22:20000001-21000000 970884 T +rs569817 single 22:20000001-21000000 971050 A +rs570795 single 22:20000001-21000000 971163 T +rs482165 single 22:20000001-21000000 971221 G +rs483273 single 22:20000001-21000000 971369 G +rs112133338 single 22:20000001-21000000 971452 A +rs543463705 insertion 22:20000001-21000000 971647 A +rs75156636 single 22:20000001-21000000 971738 C +rs5759575 single 22:20000001-21000000 971833 A +rs143269077 insertion 22:20000001-21000000 971920 T +rs143269077 insertion 22:20000001-21000000 971920 TT +rs143269077 insertion 22:20000001-21000000 971920 TTTTTG +rs143269077 insertion 22:20000001-21000000 971920 TTTTTT +rs118139208 single 22:20000001-21000000 971980 A +rs112372337 single 22:20000001-21000000 972101 A +rs490749 single 22:20000001-21000000 972175 T +rs5759576 single 22:20000001-21000000 972208 A +rs512705 single 22:20000001-21000000 972276 C +rs79438615 deletion 22:20000001-21000000 972403 1 +rs79882895 single 22:20000001-21000000 972758 T +rs11912543 single 22:20000001-21000000 972845 T +rs654526 single 22:20000001-21000000 973385 C +rs67589781 deletion 22:20000001-21000000 973451 1 +rs8138867 single 22:20000001-21000000 973483 C +rs653181 single 22:20000001-21000000 973687 C +rs114547898 single 22:20000001-21000000 973905 A +rs2027369 single 22:20000001-21000000 973950 T +rs8141122 single 22:20000001-21000000 974460 T +rs199593637 insertion 22:20000001-21000000 974510 TA +rs201122671 deletion 22:20000001-21000000 974542 2 +rs116432563 single 22:20000001-21000000 974823 C +rs202212126 deletion 22:20000001-21000000 975199 1 +rs11090300 single 22:20000001-21000000 975591 T +rs623679 single 22:20000001-21000000 975689 G +rs111319236 deletion 22:20000001-21000000 975866 4 +rs9620335 single 22:20000001-21000000 975891 G +rs9620335 single 22:20000001-21000000 975891 T +rs73386179 single 22:20000001-21000000 975924 T +rs73386180 single 22:20000001-21000000 976037 A +rs78003513 single 22:20000001-21000000 976087 A +rs111455684 single 22:20000001-21000000 976133 A +rs557958 single 22:20000001-21000000 976357 G +rs143901038 single 22:20000001-21000000 976534 A +rs148714004 single 22:20000001-21000000 976605 C +rs77793416 single 22:20000001-21000000 977047 T +rs564493 single 22:20000001-21000000 977076 A +rs34404739 single 22:20000001-21000000 977266 T +rs687164 single 22:20000001-21000000 977291 T +rs687106 single 22:20000001-21000000 977338 T +rs531775638 single 22:20000001-21000000 977440 G +rs578134684 deletion 22:20000001-21000000 977498 1 +rs115769480 single 22:20000001-21000000 977553 A +rs144037599 single 22:20000001-21000000 977604 C +rs480471 single 22:20000001-21000000 977645 A +rs116434024 single 22:20000001-21000000 977676 C +rs685380 single 22:20000001-21000000 977718 G +rs142051118 insertion 22:20000001-21000000 977789 T +rs74607071 single 22:20000001-21000000 978213 T +rs79591662 single 22:20000001-21000000 978260 C +rs73159169 single 22:20000001-21000000 978387 G +rs16988610 single 22:20000001-21000000 978496 C +rs28690416 single 22:20000001-21000000 978535 A +rs17818796 single 22:20000001-21000000 978671 G +rs670380 single 22:20000001-21000000 978718 C +rs8135803 single 22:20000001-21000000 978976 T +rs8135829 single 22:20000001-21000000 979028 A +rs8142362 single 22:20000001-21000000 979078 G +rs72050218 deletion 22:20000001-21000000 979145 2 +rs115556005 single 22:20000001-21000000 979319 A +rs115556005 single 22:20000001-21000000 979319 C +rs554397435 insertion 22:20000001-21000000 979805 T +rs10427922 single 22:20000001-21000000 979979 G +rs112030875 single 22:20000001-21000000 980004 C +rs1861078 single 22:20000001-21000000 980307 G +rs4822364 single 22:20000001-21000000 980356 T +rs73159173 single 22:20000001-21000000 980436 G +rs4822365 single 22:20000001-21000000 980558 T +rs150412261 single 22:20000001-21000000 980771 A +rs111445826 single 22:20000001-21000000 980796 A +rs2539916 single 22:20000001-21000000 980961 G +rs149404181 deletion 22:20000001-21000000 981661 1 +rs623999 single 22:20000001-21000000 982127 C +rs117536765 single 22:20000001-21000000 982391 A +rs77465539 deletion 22:20000001-21000000 982463 1 +rs9620348 single 22:20000001-21000000 982574 A +rs112467558 single 22:20000001-21000000 982698 G +rs376886796 insertion 22:20000001-21000000 982733 A +rs5759632 single 22:20000001-21000000 982875 T +rs7285862 single 22:20000001-21000000 983227 G +rs597996 single 22:20000001-21000000 983259 A +rs597996 single 22:20000001-21000000 983259 G +rs597996 single 22:20000001-21000000 983259 T +rs597617 single 22:20000001-21000000 983308 G +rs596235 single 22:20000001-21000000 983591 C +rs7286749 single 22:20000001-21000000 983615 T +rs115287577 single 22:20000001-21000000 983692 G +rs150886737 single 22:20000001-21000000 983752 C +rs117082775 single 22:20000001-21000000 983793 C +rs12483894 single 22:20000001-21000000 983829 G +rs555454 single 22:20000001-21000000 984032 T +rs115952739 single 22:20000001-21000000 984184 A +rs578356 single 22:20000001-21000000 984247 C +rs375348388 deletion 22:20000001-21000000 984356 1 +rs78263078 single 22:20000001-21000000 984390 T +rs2039822 single 22:20000001-21000000 984724 C +rs61417886 deletion 22:20000001-21000000 984768 3 +rs5759640 single 22:20000001-21000000 984877 T +rs16988629 single 22:20000001-21000000 984999 A +rs673440 single 22:20000001-21000000 985782 G +rs114160819 single 22:20000001-21000000 985927 C +rs741192 single 22:20000001-21000000 986009 G +rs602269 single 22:20000001-21000000 986070 A +rs73159179 single 22:20000001-21000000 986254 C +rs6519496 single 22:20000001-21000000 986298 T +rs12627833 single 22:20000001-21000000 986382 T +rs565048 single 22:20000001-21000000 986546 T +rs7364324 single 22:20000001-21000000 986656 A +rs11090304 insertion 22:20000001-21000000 986699 G +rs41277303 single 22:20000001-21000000 986933 G +rs7364305 single 22:20000001-21000000 986963 G +rs10854776 single 22:20000001-21000000 987463 T +rs585467 single 22:20000001-21000000 987525 T +rs9624384 single 22:20000001-21000000 987856 T +rs583184 single 22:20000001-21000000 988044 T +rs1974310 single 22:20000001-21000000 988172 A +rs1974309 single 22:20000001-21000000 988232 A +rs853108 single 22:20000001-21000000 988371 G +rs4822376 single 22:20000001-21000000 988748 C +rs664193 single 22:20000001-21000000 989381 A +rs5759659 single 22:20000001-21000000 989408 T +rs113275556 single 22:20000001-21000000 989498 C +rs149210767 single 22:20000001-21000000 989541 T +rs662758 single 22:20000001-21000000 989748 G +rs1003718 single 22:20000001-21000000 989897 A +rs139226225 single 22:20000001-21000000 989962 T +rs661440 single 22:20000001-21000000 990056 C +rs79261470 single 22:20000001-21000000 990134 T +rs57517020 single 22:20000001-21000000 990255 T +rs75042821 single 22:20000001-21000000 990292 A +rs648936 single 22:20000001-21000000 990518 A +rs145875329 single 22:20000001-21000000 990569 A +rs17757910 single 22:20000001-21000000 990661 T +rs12166248 single 22:20000001-21000000 990854 A +rs580204 single 22:20000001-21000000 991061 C +rs5751615 single 22:20000001-21000000 991128 C +rs5751616 single 22:20000001-21000000 991234 C +rs633773 single 22:20000001-21000000 991577 G +rs9612546 single 22:20000001-21000000 991636 A +rs2080195 single 22:20000001-21000000 991770 G +rs78488852 single 22:20000001-21000000 992012 G +rs5996510 single 22:20000001-21000000 993307 T +rs741193 single 22:20000001-21000000 993466 A +rs741194 single 22:20000001-21000000 993518 A +rs73877727 single 22:20000001-21000000 993549 C +rs5996512 single 22:20000001-21000000 993621 A +rs741195 single 22:20000001-21000000 993765 G +rs2016016 single 22:20000001-21000000 993796 A +rs9612548 single 22:20000001-21000000 993960 T +rs117692845 single 22:20000001-21000000 993989 T +rs113977853 single 22:20000001-21000000 994079 C +rs2539918 single 22:20000001-21000000 994185 T +rs78464105 single 22:20000001-21000000 994219 G +rs9624394 single 22:20000001-21000000 994343 C +rs7292155 single 22:20000001-21000000 994541 G +rs56772365 single 22:20000001-21000000 994643 T +rs78817229 single 22:20000001-21000000 994722 G +rs9754375 single 22:20000001-21000000 994748 A +rs117437821 single 22:20000001-21000000 994770 T +rs7288217 single 22:20000001-21000000 994951 C +rs12484849 single 22:20000001-21000000 994996 C +rs113982879 single 22:20000001-21000000 995097 A +rs10522714 insertion 22:20000001-21000000 995287 AAATAAAT +rs505603 single 22:20000001-21000000 995479 C +rs759226 single 22:20000001-21000000 995606 C +rs510954 single 22:20000001-21000000 996025 C +rs201135342 deletion 22:20000001-21000000 996055 1 +rs141761092 single 22:20000001-21000000 996403 A +rs658793 single 22:20000001-21000000 996594 T +rs61351538 deletion 22:20000001-21000000 997219 1 +rs9624401 single 22:20000001-21000000 997320 A +rs673017 single 22:20000001-21000000 997482 T +rs35333818 deletion 22:20000001-21000000 997597 1 +rs491264 single 22:20000001-21000000 997644 G +rs674393 single 22:20000001-21000000 997810 A +rs759228 single 22:20000001-21000000 998185 A +rs115900295 single 22:20000001-21000000 998450 G +rs5759729 single 22:20000001-21000000 998534 A +rs520609 single 22:20000001-21000000 998583 A +rs11090308 single 22:20000001-21000000 998884 A +rs2003574 single 22:20000001-21000000 998964 A +rs113572801 single 22:20000001-21000000 999020 T +rs526119 single 22:20000001-21000000 999189 G diff --git a/extract_exons.py b/extract_exons.py new file mode 100644 index 0000000..42f705d --- /dev/null +++ b/extract_exons.py @@ -0,0 +1 @@ +hisat2_extract_exons.py \ No newline at end of file diff --git a/extract_splice_sites.py b/extract_splice_sites.py new file mode 100644 index 0000000..7b09145 --- /dev/null +++ b/extract_splice_sites.py @@ -0,0 +1 @@ +hisat2_extract_splice_sites.py \ No newline at end of file diff --git a/fast_mutex.h b/fast_mutex.h new file mode 100644 index 0000000..fbd7846 --- /dev/null +++ b/fast_mutex.h @@ -0,0 +1,294 @@ +/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; -*- +Copyright (c) 2010-2012 Marcus Geelnard + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. + +***************************************************************************** +Copyright (c) 2016 Nigel Dyer +This version has been modified from the original, whose original Copyright notice +is reproduced above. The permissions continue as above. + +The software has been modified in three ways: + + a) mHandle is now a mutable pointer variable which is passed to the + copy in the new =operator. This allows it to be pushed into a vector or equivalent + It is not intended to allow multiple copies of a mutex to be created + + b) The code for 64bit MSC builds uses InterlockedExchange which provides more + efficient access to the xchg assembler instruction than implementing the code as + a function in a separate .asm file. MSC does not support in line assembler for 64 bit builds. + + c) The NO_FAST_MUTEX_ASM option has been introduced + +*/ + +#ifndef _FAST_MUTEX_H_ +#define _FAST_MUTEX_H_ + +/// @file + +// Which platform are we on? +#if !defined(_TTHREAD_PLATFORM_DEFINED_) + #if defined(_WIN32) || defined(__WIN32__) || defined(__WINDOWS__) + #define _TTHREAD_WIN32_ + #else + #define _TTHREAD_POSIX_ + #endif + #define _TTHREAD_PLATFORM_DEFINED_ +#endif + +// Check if we can support the assembly language level implementation (otherwise +// revert to the system API) +#if !defined NO_FAST_MUTEX_ASM && ((defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) || \ + (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || \ + (defined(__GNUC__) && (defined(__ppc__)))) + #define _FAST_MUTEX_ASM_ +#else + #define _FAST_MUTEX_SYS_ +#endif + +#if defined(_TTHREAD_WIN32_) + #ifndef WIN32_LEAN_AND_MEAN + #define WIN32_LEAN_AND_MEAN + #define __UNDEF_LEAN_AND_MEAN + #endif + #include + #ifdef __UNDEF_LEAN_AND_MEAN + #undef WIN32_LEAN_AND_MEAN + #undef __UNDEF_LEAN_AND_MEAN + #endif +#else + #ifdef _FAST_MUTEX_ASM_ + #include + #else + #include + #endif +#endif + +namespace tthread { + +/// Fast mutex class. +/// This is a mutual exclusion object for synchronizing access to shared +/// memory areas for several threads. It is similar to the tthread::mutex class, +/// but instead of using system level functions, it is implemented as an atomic +/// spin lock with very low CPU overhead. +/// +/// The \c fast_mutex class is NOT compatible with the \c condition_variable +/// class (however, it IS compatible with the \c lock_guard class). It should +/// also be noted that the \c fast_mutex class typically does not provide +/// as accurate thread scheduling as a the standard \c mutex class does. +/// +/// Because of the limitations of the class, it should only be used in +/// situations where the mutex needs to be locked/unlocked very frequently. +/// +/// @note The "fast" version of this class relies on inline assembler language, +/// which is currently only supported for 32/64-bit Intel x86/AMD64 and +/// PowerPC architectures on a limited number of compilers (GNU g++ and MS +/// Visual C++). +/// For other architectures/compilers, system functions are used instead. +class fast_mutex { + public: + /// Constructor. +#if defined(_FAST_MUTEX_ASM_) + fast_mutex() : mLock(0) {} +#else + fast_mutex() + { + #if defined(_TTHREAD_WIN32_) + mHandle = new CRITICAL_SECTION(); + InitializeCriticalSection(mHandle); + #elif defined(_TTHREAD_POSIX_) + mHandle = new pthread_mutex_t(); + pthread_mutex_init(mHandle, NULL); + #endif + } +#endif + +#if !defined(_FAST_MUTEX_ASM_) + /// Destructor. + ~fast_mutex() + { + if (mHandle) + { + #if defined(_TTHREAD_WIN32_) + DeleteCriticalSection(mHandle); +#elif defined(_TTHREAD_POSIX_) + pthread_mutex_destroy(mHandle); +#endif + delete mHandle; + mHandle = 0; + } + } + /// The handle is passed from the source to the desitination + /// Used primarily when mutexes are pushed onto a List either on their + /// own or as a member variable of another classs + fast_mutex & operator = (const fast_mutex& fm) + { + mHandle = fm.mHandle; + fm.mHandle = 0; + return *this; + } +#endif + + /// Lock the mutex. + /// The method will block the calling thread until a lock on the mutex can + /// be obtained. The mutex remains locked until \c unlock() is called. + /// @see lock_guard + inline void lock() + { +#if defined(_FAST_MUTEX_ASM_) + bool gotLock; + do { + gotLock = try_lock(); + if(!gotLock) + { +#if defined(_TTHREAD_WIN32_) + Sleep(0); +#elif defined(_TTHREAD_POSIX_) + sched_yield(); +#endif + } + } while(!gotLock); +#else + #if defined(_TTHREAD_WIN32_) + EnterCriticalSection(mHandle); + #elif defined(_TTHREAD_POSIX_) + pthread_mutex_lock(mHandle); + #endif +#endif + } + + /// Try to lock the mutex. + /// The method will try to lock the mutex. If it fails, the function will + /// return immediately (non-blocking). + /// @return \c true if the lock was acquired, or \c false if the lock could + /// not be acquired. + inline bool try_lock() + { +#if defined(_FAST_MUTEX_ASM_) + int oldLock; + #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + asm volatile ( + "movl $1,%%eax\n\t" + "xchg %%eax,%0\n\t" + "movl %%eax,%1\n\t" + : "=m" (mLock), "=m" (oldLock) + : + : "%eax", "memory" + ); + #elif defined(_MSC_VER) + #if defined(_M_IX86) + int *ptrLock = &mLock; + __asm { + mov eax, 1 + mov ecx, ptrLock + xchg eax, [ecx] + mov oldLock, eax + } + #elif defined(_M_X64) + oldLock = InterlockedExchange(&mLock, 1); + #endif + #elif defined(__GNUC__) && (defined(__ppc__)) + int newLock = 1; + asm volatile ( + "\n1:\n\t" + "lwarx %0,0,%1\n\t" + "cmpwi 0,%0,0\n\t" + "bne- 2f\n\t" + "stwcx. %2,0,%1\n\t" + "bne- 1b\n\t" + "isync\n" + "2:\n\t" + : "=&r" (oldLock) + : "r" (&mLock), "r" (newLock) + : "cr0", "memory" + ); + #endif + return (oldLock == 0); +#else + #if defined(_TTHREAD_WIN32_) + return TryEnterCriticalSection(mHandle) ? true : false; + #elif defined(_TTHREAD_POSIX_) + return (pthread_mutex_trylock(mHandle) == 0) ? true : false; + #endif +#endif + } + + /// Unlock the mutex. + /// If any threads are waiting for the lock on this mutex, one of them will + /// be unblocked. + inline void unlock() + { +#if defined(_FAST_MUTEX_ASM_) + #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + asm volatile ( + "movl $0,%%eax\n\t" + "xchg %%eax,%0\n\t" + : "=m" (mLock) + : + : "%eax", "memory" + ); + #elif defined(_MSC_VER) + #if defined(_M_IX86) + int *ptrLock = &mLock; + __asm { + mov eax,0 + mov ecx,ptrLock + xchg eax,[ecx] + } + #elif defined(_M_X64) + InterlockedExchange(&mLock, 0); + #endif + #elif defined(__GNUC__) && (defined(__ppc__)) + asm volatile ( + "sync\n\t" // Replace with lwsync where possible? + : : : "memory" + ); + mLock = 0; + #endif +#else + #if defined(_TTHREAD_WIN32_) + LeaveCriticalSection(mHandle); + #elif defined(_TTHREAD_POSIX_) + pthread_mutex_unlock(mHandle); + #endif +#endif + } + + private: +#if defined(_FAST_MUTEX_ASM_) +#if defined(_M_X64) && defined (_MSC_VER) + long mLock; +#else + int mLock; +#endif +#else + #if defined(_TTHREAD_WIN32_) + mutable CRITICAL_SECTION * mHandle; + #elif defined(_TTHREAD_POSIX_) + mutable pthread_mutex_t * mHandle; + #endif +#endif +}; + +} + +#endif // _FAST_MUTEX_H_ + diff --git a/filebuf.h b/filebuf.h new file mode 100644 index 0000000..66dffb4 --- /dev/null +++ b/filebuf.h @@ -0,0 +1,718 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef FILEBUF_H_ +#define FILEBUF_H_ + +#include +#include +#include +#include +#include +#include +#include +#include "assert_helpers.h" + +/** + * Simple, fast helper for determining if a character is a newline. + */ +static inline bool isnewline(int c) { + return c == '\r' || c == '\n'; +} + +/** + * Simple, fast helper for determining if a character is a non-newline + * whitespace character. + */ +static inline bool isspace_notnl(int c) { + return isspace(c) && !isnewline(c); +} + +/** + * Simple wrapper for a FILE*, istream or ifstream that reads it in chunks + * using fread and keeps those chunks in a buffer. It also services calls to + * get(), peek() and gets() from the buffer, reading in additional chunks when + * necessary. + * + * Helper functions do things like parse strings, numbers, and FASTA records. + * + * + */ +class FileBuf { +public: + FileBuf() { + init(); + } + + FileBuf(FILE *in) { + init(); + _in = in; + assert(_in != NULL); + } + + FileBuf(std::ifstream *inf) { + init(); + _inf = inf; + assert(_inf != NULL); + } + + FileBuf(std::istream *ins) { + init(); + _ins = ins; + assert(_ins != NULL); + } + + /** + * Return true iff there is a stream ready to read. + */ + bool isOpen() { + return _in != NULL || _inf != NULL || _ins != NULL; + } + + /** + * Close the input stream (if that's possible) + */ + void close() { + if(_in != NULL && _in != stdin) { + fclose(_in); + } else if(_inf != NULL) { + _inf->close(); + } else { + // can't close _ins + } + } + + /** + * Get the next character of input and advance. + */ + int get() { + assert(_in != NULL || _inf != NULL || _ins != NULL); + int c = peek(); + if(c != -1) { + _cur++; + if(_lastn_cur < LASTN_BUF_SZ) _lastn_buf[_lastn_cur++] = c; + } + return c; + } + + /** + * Return true iff all input is exhausted. + */ + bool eof() { + return (_cur == _buf_sz) && _done; + } + + /** + * Initialize the buffer with a new C-style file. + */ + void newFile(FILE *in) { + _in = in; + _inf = NULL; + _ins = NULL; + _cur = BUF_SZ; + _buf_sz = BUF_SZ; + _done = false; + } + + /** + * Initialize the buffer with a new ifstream. + */ + void newFile(std::ifstream *__inf) { + _in = NULL; + _inf = __inf; + _ins = NULL; + _cur = BUF_SZ; + _buf_sz = BUF_SZ; + _done = false; + } + + /** + * Initialize the buffer with a new istream. + */ + void newFile(std::istream *__ins) { + _in = NULL; + _inf = NULL; + _ins = __ins; + _cur = BUF_SZ; + _buf_sz = BUF_SZ; + _done = false; + } + + /** + * Restore state as though we just started reading the input + * stream. + */ + void reset() { + if(_inf != NULL) { + _inf->clear(); + _inf->seekg(0, std::ios::beg); + } else if(_ins != NULL) { + _ins->clear(); + _ins->seekg(0, std::ios::beg); + } else { + rewind(_in); + } + _cur = BUF_SZ; + _buf_sz = BUF_SZ; + _done = false; + } + + /** + * Peek at the next character of the input stream without + * advancing. Typically we can simple read it from the buffer. + * Occasionally we'll need to read in a new buffer's worth of data. + */ + int peek() { + assert(_in != NULL || _inf != NULL || _ins != NULL); + assert_leq(_cur, _buf_sz); + if(_cur == _buf_sz) { + if(_done) { + // We already exhausted the input stream + return -1; + } + // Read a new buffer's worth of data + else { + // Get the next chunk + if(_inf != NULL) { + _inf->read((char*)_buf, BUF_SZ); + _buf_sz = _inf->gcount(); + } else if(_ins != NULL) { + _ins->read((char*)_buf, BUF_SZ); + _buf_sz = _ins->gcount(); + } else { + assert(_in != NULL); + _buf_sz = fread(_buf, 1, BUF_SZ, _in); + } + _cur = 0; + if(_buf_sz == 0) { + // Exhausted, and we have nothing to return to the + // caller + _done = true; + return -1; + } else if(_buf_sz < BUF_SZ) { + // Exhausted + _done = true; + } + } + } + return (int)_buf[_cur]; + } + + /** + * Store a string of characters from the input file into 'buf', + * until we see a newline, EOF, or until 'len' characters have been + * read. + */ + size_t gets(char *buf, size_t len) { + size_t stored = 0; + while(true) { + int c = get(); + if(c == -1) { + // End-of-file + buf[stored] = '\0'; + return stored; + } + if(stored == len-1 || isnewline(c)) { + // End of string + buf[stored] = '\0'; + // Skip over all end-of-line characters + int pc = peek(); + while(isnewline(pc)) { + get(); // discard + pc = peek(); + } + // Next get() will be after all newline characters + return stored; + } + buf[stored++] = (char)c; + } + } + + /** + * Store a string of characters from the input file into 'buf', + * until we see a newline, EOF, or until 'len' characters have been + * read. + */ + size_t get(char *buf, size_t len) { + size_t stored = 0; + for(size_t i = 0; i < len; i++) { + int c = get(); + if(c == -1) return i; + buf[stored++] = (char)c; + } + return len; + } + + static const size_t LASTN_BUF_SZ = 8 * 1024; + + /** + * Keep get()ing characters until a non-whitespace character (or + * -1) is reached, and return it. + */ + int getPastWhitespace() { + int c; + while(isspace(c = get()) && c != -1); + return c; + } + + /** + * Keep get()ing characters until a we've passed over the next + * string of newline characters (\r's and \n's) or -1 is reached, + * and return it. + */ + int getPastNewline() { + int c = get(); + while(!isnewline(c) && c != -1) c = get(); + while(isnewline(c)) c = get(); + assert_neq(c, '\r'); + assert_neq(c, '\n'); + return c; + } + + /** + * Keep get()ing characters until a we've passed over the next + * string of newline characters (\r's and \n's) or -1 is reached, + * and return it. + */ + int peekPastNewline() { + int c = peek(); + while(!isnewline(c) && c != -1) c = get(); + while(isnewline(c)) c = get(); + assert_neq(c, '\r'); + assert_neq(c, '\n'); + return c; + } + + /** + * Keep peek()ing then get()ing characters until the next return + * from peek() is just after the last newline of the line. + */ + int peekUptoNewline() { + int c = peek(); + while(!isnewline(c) && c != -1) { + get(); c = peek(); + } + while(isnewline(c)) { + get(); + c = peek(); + } + assert_neq(c, '\r'); + assert_neq(c, '\n'); + return c; + } + + /** + * Parse a FASTA record. Append name characters to 'name' and and append + * all sequence characters to 'seq'. If gotCaret is true, assuming the + * file cursor has already moved just past the starting '>' character. + */ + template + void parseFastaRecord( + TNameStr& name, + TSeqStr& seq, + bool gotCaret = false) + { + int c; + if(!gotCaret) { + // Skip over caret and non-newline whitespace + c = peek(); + while(isspace_notnl(c) || c == '>') { get(); c = peek(); } + } else { + // Skip over non-newline whitespace + c = peek(); + while(isspace_notnl(c)) { get(); c = peek(); } + } + size_t namecur = 0, seqcur = 0; + // c is the first character of the fasta name record, or is the first + // newline character if the name record is empty + while(!isnewline(c) && c != -1) { + name[namecur++] = c; get(); c = peek(); + } + // sequence consists of all the non-whitespace characters between here + // and the next caret + while(true) { + // skip over whitespace + while(isspace(c)) { get(); c = peek(); } + // if we see caret or EOF, break + if(c == '>' || c == -1) break; + // append and continue + seq[seqcur++] = c; + get(); c = peek(); + } + } + + /** + * Parse a FASTA record and return its length. If gotCaret is true, + * assuming the file cursor has already moved just past the starting '>' + * character. + */ + void parseFastaRecordLength( + size_t& nameLen, + size_t& seqLen, + bool gotCaret = false) + { + int c; + nameLen = seqLen = 0; + if(!gotCaret) { + // Skip over caret and non-newline whitespace + c = peek(); + while(isspace_notnl(c) || c == '>') { get(); c = peek(); } + } else { + // Skip over non-newline whitespace + c = peek(); + while(isspace_notnl(c)) { get(); c = peek(); } + } + // c is the first character of the fasta name record, or is the first + // newline character if the name record is empty + while(!isnewline(c) && c != -1) { + nameLen++; get(); c = peek(); + } + // sequence consists of all the non-whitespace characters between here + // and the next caret + while(true) { + // skip over whitespace + while(isspace(c)) { get(); c = peek(); } + // if we see caret or EOF, break + if(c == '>' || c == -1) break; + // append and continue + seqLen++; + get(); c = peek(); + } + } + + /** + * Reset to the beginning of the last-N-chars buffer. + */ + void resetLastN() { + _lastn_cur = 0; + } + + /** + * Copy the last several characters in the last-N-chars buffer + * (since the last reset) into the provided buffer. + */ + size_t copyLastN(char *buf) { + memcpy(buf, _lastn_buf, _lastn_cur); + return _lastn_cur; + } + + /** + * Get const pointer to the last-N-chars buffer. + */ + const char *lastN() const { + return _lastn_buf; + } + + /** + * Get current size of the last-N-chars buffer. + */ + size_t lastNLen() const { + return _lastn_cur; + } + +private: + + void init() { + _in = NULL; + _inf = NULL; + _ins = NULL; + _cur = _buf_sz = BUF_SZ; + _done = false; + _lastn_cur = 0; + // no need to clear _buf[] + } + + static const size_t BUF_SZ = 256 * 1024; + FILE *_in; + std::ifstream *_inf; + std::istream *_ins; + size_t _cur; + size_t _buf_sz; + bool _done; + uint8_t _buf[BUF_SZ]; // (large) input buffer + size_t _lastn_cur; + char _lastn_buf[LASTN_BUF_SZ]; // buffer of the last N chars dispensed +}; + +/** + * Wrapper for a buffered output stream that writes bitpairs. + */ +class BitpairOutFileBuf { +public: + /** + * Open a new output stream to a file with given name. + */ + BitpairOutFileBuf(const char *in) : bpPtr_(0), cur_(0) { + assert(in != NULL); + out_ = fopen(in, "wb"); + if(out_ == NULL) { + std::cerr << "Error: Could not open bitpair-output file " << in << std::endl; + throw 1; + } + memset(buf_, 0, BUF_SZ); + } + + /** + * Write a single bitpair into the buf. Flush the buffer if it's + * full. + */ + void write(int bp) { + assert_lt(bp, 4); + assert_geq(bp, 0); + buf_[cur_] |= (bp << bpPtr_); + if(bpPtr_ == 6) { + bpPtr_ = 0; + cur_++; + if(cur_ == BUF_SZ) { + // Flush the buffer + if(!fwrite((const void *)buf_, BUF_SZ, 1, out_)) { + std::cerr << "Error writing to the reference index file (.4.ebwt)" << std::endl; + throw 1; + } + // Reset to beginning of the buffer + cur_ = 0; + } + // Initialize next octet to 0 + buf_[cur_] = 0; + } else { + bpPtr_ += 2; + } + } + + /** + * Write any remaining bitpairs and then close the input + */ + void close() { + if(cur_ > 0 || bpPtr_ > 0) { + if(bpPtr_ == 0) cur_--; + if(!fwrite((const void *)buf_, cur_ + 1, 1, out_)) { + std::cerr << "Error writing to the reference index file (.4.ebwt)" << std::endl; + throw 1; + } + } + fclose(out_); + } +private: + static const size_t BUF_SZ = 128 * 1024; + FILE *out_; + int bpPtr_; + size_t cur_; + char buf_[BUF_SZ]; // (large) input buffer +}; + +/** + * Wrapper for a buffered output stream that writes characters and + * other data types. This class is *not* synchronized; the caller is + * responsible for synchronization. + */ +class OutFileBuf { + +public: + + /** + * Open a new output stream to a file with given name. + */ + OutFileBuf(const std::string& out, bool binary = false) : + name_(out.c_str()), cur_(0), closed_(false) + { + out_ = fopen(out.c_str(), binary ? "wb" : "w"); + if(out_ == NULL) { + std::cerr << "Error: Could not open alignment output file " << out.c_str() << std::endl; + throw 1; + } + if(setvbuf(out_, NULL, _IOFBF, 10* 1024* 1024)) + std::cerr << "Warning: Could not allocate the proper buffer size for output file stream. " << std::endl; + } + + /** + * Open a new output stream to a file with given name. + */ + OutFileBuf(const char *out, bool binary = false) : + name_(out), cur_(0), closed_(false) + { + assert(out != NULL); + out_ = fopen(out, binary ? "wb" : "w"); + if(out_ == NULL) { + std::cerr << "Error: Could not open alignment output file " << out << std::endl; + throw 1; + } + } + + /** + * Open a new output stream to standard out. + */ + OutFileBuf() : name_("cout"), cur_(0), closed_(false) { + out_ = stdout; + } + + /** + * Close buffer when object is destroyed. + */ + ~OutFileBuf() { close(); } + + /** + * Open a new output stream to a file with given name. + */ + void setFile(const char *out, bool binary = false) { + assert(out != NULL); + out_ = fopen(out, binary ? "wb" : "w"); + if(out_ == NULL) { + std::cerr << "Error: Could not open alignment output file " << out << std::endl; + throw 1; + } + reset(); + } + + /** + * Write a single character into the write buffer and, if + * necessary, flush. + */ + void write(char c) { + assert(!closed_); + if(cur_ == BUF_SZ) flush(); + buf_[cur_++] = c; + } + + /** + * Write a c++ string to the write buffer and, if necessary, flush. + */ + void writeString(const std::string& s) { + assert(!closed_); + size_t slen = s.length(); + if(cur_ + slen > BUF_SZ) { + if(cur_ > 0) flush(); + if(slen >= BUF_SZ) { + fwrite(s.c_str(), slen, 1, out_); + } else { + memcpy(&buf_[cur_], s.data(), slen); + assert_eq(0, cur_); + cur_ = slen; + } + } else { + memcpy(&buf_[cur_], s.data(), slen); + cur_ += slen; + } + assert_leq(cur_, BUF_SZ); + } + + /** + * Write a c++ string to the write buffer and, if necessary, flush. + */ + template + void writeString(const T& s) { + assert(!closed_); + size_t slen = s.length(); + if(cur_ + slen > BUF_SZ) { + if(cur_ > 0) flush(); + if(slen >= BUF_SZ) { + fwrite(s.toZBuf(), slen, 1, out_); + } else { + memcpy(&buf_[cur_], s.toZBuf(), slen); + assert_eq(0, cur_); + cur_ = slen; + } + } else { + memcpy(&buf_[cur_], s.toZBuf(), slen); + cur_ += slen; + } + assert_leq(cur_, BUF_SZ); + } + + /** + * Write a c++ string to the write buffer and, if necessary, flush. + */ + void writeChars(const char * s, size_t len) { + assert(!closed_); + if(cur_ + len > BUF_SZ) { + if(cur_ > 0) flush(); + if(len >= BUF_SZ) { + fwrite(s, len, 1, out_); + } else { + memcpy(&buf_[cur_], s, len); + assert_eq(0, cur_); + cur_ = len; + } + } else { + memcpy(&buf_[cur_], s, len); + cur_ += len; + } + assert_leq(cur_, BUF_SZ); + } + + /** + * Write a 0-terminated C string to the output stream. + */ + void writeChars(const char * s) { + writeChars(s, strlen(s)); + } + + /** + * Write any remaining bitpairs and then close the input + */ + void close() { + if(closed_) return; + if(cur_ > 0) flush(); + closed_ = true; + if(out_ != stdout) { + fclose(out_); + } + } + + /** + * Reset so that the next write is as though it's the first. + */ + void reset() { + cur_ = 0; + closed_ = false; + } + + void flush() { + if(!fwrite((const void *)buf_, cur_, 1, out_)) { + std::cerr << "Error while flushing and closing output" << std::endl; + throw 1; + } + cur_ = 0; + } + + /** + * Return true iff this stream is closed. + */ + bool closed() const { + return closed_; + } + + /** + * Return the filename. + */ + const char *name() { + return name_; + } + +private: + + static const size_t BUF_SZ = 16 * 1024; + + const char *name_; + FILE *out_; + size_t cur_; + char buf_[BUF_SZ]; // (large) input buffer + bool closed_; +}; + +#endif /*ndef FILEBUF_H_*/ diff --git a/formats.h b/formats.h new file mode 100644 index 0000000..05ee679 --- /dev/null +++ b/formats.h @@ -0,0 +1,57 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef FORMATS_H_ +#define FORMATS_H_ + +#include + +/** + * File-format constants and names + */ + +enum file_format { + FASTA = 1, + FASTA_CONT, + FASTQ, + TAB_MATE5, + TAB_MATE6, + RAW, + CMDLINE, + QSEQ, + SRA_FASTA, + SRA_FASTQ +}; + +static const std::string file_format_names[] = { + "Invalid!", + "FASTA", + "FASTA sampling", + "FASTQ", + "Tabbed mated", + "Raw", + "Command line", + "Chain file", + "Random", + "Qseq", + "SRA_FASTA", + "SRA_FASTQ" +}; + +#endif /*FORMATS_H_*/ diff --git a/gbwt_graph.h b/gbwt_graph.h new file mode 100644 index 0000000..0ab86f0 --- /dev/null +++ b/gbwt_graph.h @@ -0,0 +1,2797 @@ +/* + * Copyright 2015, Daehwan Kim , Joe Paggi + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#ifndef GBWT_GRAPH_H_ +#define GBWT_GRAPH_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "alt.h" +#include "radix_sort.h" + +// Reference: +// Jouni Sirén, Niko Välimäki, and Veli Mäkinen: Indexing Graphs for Path Queries with Applications in Genome Research. +// IEEE/ACM Transactions on Computational Biology and Bioinformatics 11(2):375-388, 2014. +// http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=6698337 + +//-------------------------------------------------------------------------- + +struct NongraphException : public exception +{ + const char* what () const throw () + { + return "Nongraph exception"; + } +}; + +struct ExplosionException : public exception +{ + const char* what () const throw () + { + return "Explosion exception"; + } +}; + +template class PathGraph; + +// Note: I wrote the following codes based on Siren's work, gcsa (see the reference above). +template +class RefGraph { + friend class PathGraph; +public: + struct Node { + char label; // ACGT + Y(head) + Z(tail) + index_t value; // location in a whole genome + + Node() { reset(); } + Node(char label_, index_t value_) : label(label_), value(value_) {} + void reset() { label = 0; value = 0; } + + bool write(ofstream& f_out, bool bigEndian) const { + writeIndex(f_out, value, bigEndian); + writeU16(f_out, label, bigEndian); + return true; + } + + bool read(ifstream& f_in, bool bigEndian) { + value = readIndex(f_in, bigEndian); + label = (char)readU16(f_in, bigEndian); + return true; + } + + bool operator== (const Node& o) const { + if(value != o.value) return false; + if(label != o.label) return false; + return true; + } + + bool operator< (const Node& o) const { + if(value != o.value) return value < o.value; + return label < o.label; + } + }; + + struct Edge { + index_t from; // from Node + index_t to; // to Node + + Edge() {} + Edge(index_t from_, index_t to_) : from(from_), to(to_) {} + + bool write(ofstream& f_out, bool bigEndian) const { + writeIndex(f_out, from, bigEndian); + writeIndex(f_out, to, bigEndian); + return true; + } + + bool read(ifstream& f_in, bool bigEndian) { + from = readIndex(f_in, bigEndian); + to = readIndex(f_in, bigEndian); + return true; + } + + bool operator< (const Edge& o) const { + if(from != o.from) return from < o.from; + return to < o.to; + } + }; + + static index_t EdgeTo (Edge& a) { + return a.to; + } + + struct EdgeFromCmp { + bool operator() (const Edge& a, const Edge& b) const { + return a.from < b.from; + } + }; + + struct EdgeToCmp { + bool operator() (const Edge& a, const Edge& b) const { + return a.to < b.to; + }; + }; + +public: + RefGraph(const SString& s, + const EList& szs, + const EList >& alts, + const EList >& haplotypes, + const string& out_fname, + int nthreads_, + bool verbose); + + bool repOk() { return true; } + + void write(const string& fname, bool bigEndian) { + ofstream rg_file(fname.c_str(), ios::binary); + if(!rg_file.good()) { + cerr << "Could not open file for writing a reference graph: \"" << fname << "\"" << endl; + throw 1; + } + writeIndex(rg_file, (index_t)nodes.size(), bigEndian); + for(index_t i = 0; i < nodes.size(); i++) { + nodes[i].write(rg_file, bigEndian); + } + writeIndex(rg_file, (index_t)edges.size(), bigEndian); + for(index_t i = 0; i < edges.size(); i++) { + edges[i].write(rg_file, bigEndian); + } + rg_file.close(); + } + + void nullify() { + nodes.nullify(); + edges.nullify(); + } + + void read(const string& fname, bool bigEndian) { + ifstream rg_file(fname.c_str(), ios::binary); + if(!rg_file.good()) { + cerr << "Could not open file for reading a reference graph: \"" << fname << "\"" << endl; + throw 1; + } + index_t num_nodes = readIndex(rg_file, bigEndian); + nodes.resizeNoCopyExact(num_nodes); + for(index_t i = 0; i < num_nodes; i++) { + nodes[i].read(rg_file, bigEndian); + } + index_t num_edges = readIndex(rg_file, bigEndian); + edges.resizeNoCopyExact(num_edges); + for(index_t i = 0; i < num_edges; i++) { + edges[i].read(rg_file, bigEndian); + } + rg_file.close(); + } + +private: + static bool isReverseDeterministic(EList& nodes, EList& edges); + static void reverseDeterminize(EList& nodes, EList& edges, index_t& lastNode, index_t lastNode_add = 0); + + static void sortEdgesFrom(EList& edges) { + std::sort(edges.begin(), edges.end(), EdgeFromCmp()); + } + static void sortEdgesTo(EList& edges) { + std::sort(edges.begin(), edges.end(), EdgeToCmp()); + } + + // Return edge ranges [begin, end) + static pair findEdges(const EList& edges, index_t node, bool from); + static pair findEdgesFrom(const EList& edges, index_t node) { + return findEdges(edges, node, true); + } + static pair findEdgesTo(const EList& edges, index_t node) { + return findEdges(edges, node, false); + } + static pair getNextEdgeRange(const EList& sep_edges, pair range, bool from) { + if(range.second >= sep_edges.size()) { + return pair(0, 0); + } + range.first = range.second; range.second++; + + if(from) { + while(range.second < sep_edges.size() && sep_edges[range.second].from == sep_edges[range.first].from) { + range.second++; + } + } else { + while(range.second < sep_edges.size() && sep_edges[range.second].to == sep_edges[range.first].to) { + range.second++; + } + } + return range; + } + +private: + struct ThreadParam { + // input + index_t thread_id; + RefGraph* refGraph; + const SString* s; + const EList >* alts; + const EList >* haplotypes; + string out_fname; + bool bigEndian; + + // output + index_t num_nodes; + index_t num_edges; + index_t lastNode; + bool multipleHeadNodes; + }; + static void buildGraph_worker(void* vp); + +private: + EList szs; + EList tmp_szs; + + EList nodes; + EList edges; + index_t lastNode; // Z + + int nthreads; + +#ifndef NDEBUG + bool debug; +#endif + +private: + // Following composite nodes and edges are used to reverse-determinize an automaton. + struct CompositeNodeIDs { + index_t id; + EList add_ids; + + CompositeNodeIDs() { + id = (index_t)INDEX_MAX; + } + + bool operator<(const CompositeNodeIDs& o) const { + if(id != o.id) return id < o.id; + if(add_ids.size() != o.add_ids.size()) return add_ids.size() < o.add_ids.size(); + for(index_t i = 0; i < add_ids.size(); i++) { + assert_lt(i, o.add_ids.size()); + if(add_ids[i] != o.add_ids[i]) return add_ids[i] < o.add_ids[i]; + } + return false; + } + + index_t size() const { + if(id == (index_t)INDEX_MAX) return 0; + return (index_t)add_ids.size() + 1; + } + index_t getID(index_t i) const { + if(i == 0) return id; + else { + i -= 1; + assert_lt(i, add_ids.size()); + return add_ids[i]; + } + } + void push_back(index_t node_id) { + if(id == (index_t)INDEX_MAX) id = node_id; + else add_ids.push_back(node_id); + } + }; + + struct CompositeNode { + CompositeNodeIDs nodes; + index_t id; + char label; + index_t value; + + CompositeNode() { reset(); } + + CompositeNode(char label_, index_t node_id) : + id(0), label(label_) + { + nodes.push_back(node_id); + } + + Node getNode() const { + return Node(label, value); + } + + void reset() { + nodes.id = (index_t)INDEX_MAX; + nodes.add_ids.clear(); + id = 0; + label = 0; + value = 0; + } + }; + + struct CompositeEdge { + index_t from; + index_t to; + + CompositeEdge() : from(0), to(0) {} + CompositeEdge(index_t from_, index_t to_) : from(from_), to(to_) {} + + Edge getEdge(const EList& nodes) const + { + assert_lt(from, nodes.size()); + const CompositeNode& from_node = nodes[from]; + assert_lt(to, nodes.size()); + const CompositeNode& to_node = nodes[to]; + return Edge(from_node.id, to_node.id); + } + + bool operator < (const CompositeEdge& o) const + { + return from < o.from; + } + }; + + struct TempNodeLabelCmp { + TempNodeLabelCmp(const EList& nodes_) : nodes(nodes_) {} + bool operator() (index_t a, index_t b) const { + assert_lt(a, nodes.size()); + assert_lt(b, nodes.size()); + return nodes[a].label < nodes[b].label; + } + + const EList& nodes; + }; +}; + +/** + * Load reference sequence file and alt information. + * Construct a reference graph + */ +template +RefGraph::RefGraph(const SString& s, + const EList& szs, + const EList >& alts, + const EList >& haplotypes, + const string& out_fname, + int nthreads_, + bool verbose) +: lastNode(0), nthreads(nthreads_) +{ + const bool bigEndian = false; + + assert_gt(nthreads, 0); + assert_gt(szs.size(), 0); + index_t jlen = (index_t)s.length(); + +#ifndef NDEBUG + debug = (jlen <= 20); +#endif + + // a memory-efficient way to create a population graph with known ALTs + bool frag_automaton = jlen >= (1 << 16); + if(frag_automaton) { + { + EList > alt_ranges; // each range inclusive + for(index_t i = 0; i < alts.size(); i++) { + const ALT& alt = alts[i]; + index_t left_relax = 128, right_relax = 128; + pair range; + range.first = alt.pos > left_relax ? alt.pos - left_relax - 1 : 0; + if(alt.type == ALT_SNP_SGL) { + range.second = alt.pos + 1; + } else if(alt.type == ALT_SNP_DEL) { + assert_gt(alt.len, 0); + range.second = alt.pos + alt.len; + } else if(alt.type == ALT_SNP_INS) { + assert_gt(alt.len, 0); + range.second = alt.pos; + } else if (alt.type == ALT_SPLICESITE) { + assert_lt(alt.left, alt.right); + range.second = alt.right + 1; + } else { + assert(alt.exon()); + continue; + } + range.second += right_relax; + + if(alt_ranges.empty() || alt_ranges.back().second + 1 < range.first) { + alt_ranges.push_back(range); + } else { + assert_leq(alt_ranges.back().first, range.first); + if(alt_ranges.back().second < range.second) { + alt_ranges.back().second = range.second; + } + } + } + + index_t chunk_size = 1 << 20; + index_t pos = 0, range_idx = 0; + for(index_t i = 0; i < szs.size(); i++) { + if(szs[i].len == 0) continue; + if(szs[i].len <= chunk_size) { + tmp_szs.push_back(szs[i]); + pos += szs[i].len; + } else { + index_t num_chunks = (szs[i].len + chunk_size - 1) / chunk_size; + assert_gt(num_chunks, 1); + index_t modified_chunk_size = szs[i].len / num_chunks; + index_t after_pos = pos + szs[i].len; + ASSERT_ONLY(index_t sum_len = 0); + while(pos < after_pos) { + index_t target_pos = pos + modified_chunk_size; + if(target_pos < after_pos) { + for(; range_idx < alt_ranges.size(); range_idx++) { + if(target_pos < alt_ranges[range_idx].first) break; + } + pair alt_free_range; + if(range_idx == 0) { + alt_free_range.first = 0; + } else { + alt_free_range.first = alt_ranges[range_idx - 1].second + 1; + if(alt_free_range.first >= jlen) { + alt_free_range.first = jlen - 1; + } + } + + if(range_idx == alt_ranges.size()) { + alt_free_range.second = jlen - 1; + } else { + alt_free_range.second = alt_ranges[range_idx].first - 1; + } + + assert_leq(alt_free_range.first, alt_free_range.second); + if(target_pos < alt_free_range.first) target_pos = alt_free_range.first; + if(target_pos > after_pos) target_pos = after_pos; + } else { + target_pos = after_pos; + } + + tmp_szs.expand(); + tmp_szs.back().len = target_pos - pos; + tmp_szs.back().off = 0; + pos = target_pos; + ASSERT_ONLY(sum_len += tmp_szs.back().len); + } + assert_eq(pos, after_pos); + assert_eq(sum_len, szs[i].len); + } + } +#ifndef NDEBUG + index_t modified_jlen = 0; + for(index_t i = 0; i < tmp_szs.size(); i++) { + modified_jlen += tmp_szs[i].len; + } + assert_eq(modified_jlen, jlen); +#endif + } + + if(nthreads > (int)tmp_szs.size()) { + nthreads = (int)tmp_szs.size(); + } + assert_gt(nthreads, 0); + AutoArray threads(nthreads); + EList threadParams; + for(index_t i = 0; i < (index_t)nthreads; i++) { + threadParams.expand(); + threadParams.back().thread_id = i; + threadParams.back().refGraph = this; + threadParams.back().s = &s; + threadParams.back().alts = &alts; + threadParams.back().haplotypes = &haplotypes; + threadParams.back().out_fname = out_fname; + threadParams.back().bigEndian = bigEndian; + threadParams.back().num_nodes = 0; + threadParams.back().num_edges = 0; + threadParams.back().lastNode = 0; + threadParams.back().multipleHeadNodes = false; + if(nthreads == 1) { + buildGraph_worker((void*)&threadParams.back()); + } else { + threads[i] = new tthread::thread(buildGraph_worker, (void*)&threadParams.back()); + } + } + + if(nthreads > 1) { + for(index_t i = 0; i < (index_t)nthreads; i++) + threads[i]->join(); + } + + index_t num_nodes = 0, num_edges = 0; + for(index_t i = 0; i < threadParams.size(); i++) { + num_nodes += threadParams[i].num_nodes; + num_edges += threadParams[i].num_edges; + // Make room for edges spanning graphs by different threads + if(i > 0) { + num_edges += 16; + } + } + nodes.resizeExact(num_nodes); nodes.clear(); + edges.resizeExact(num_edges); edges.clear(); + + // Read all the nodes and edges + EList tail_nodes; + bool multipleHeadNodes = false; + for(index_t i = 0; i < threadParams.size(); i++) { + if(threadParams[i].multipleHeadNodes) multipleHeadNodes = true; + std::ostringstream number; number << i; + const string rg_fname = out_fname + "." + number.str() + ".rf"; + ifstream rg_in_file(rg_fname.c_str(), ios::binary); + if(!rg_in_file.good()) { + cerr << "Could not open file for reading a reference graph: \"" << rg_fname << "\"" << endl; + throw 1; + } + index_t curr_num_nodes = (index_t)nodes.size(); + ASSERT_ONLY(index_t curr_num_edges = (index_t)edges.size()); + ASSERT_ONLY(index_t num_spanning_edges = 0); + // Read nodes to be connected to last nodes in a previous thread + if(i > 0) { + assert_gt(tail_nodes.size(), 0) + index_t num_head_nodes = readIndex(rg_in_file, bigEndian); + for(index_t j = 0; j < num_head_nodes; j++) { + index_t head_node = readIndex(rg_in_file, bigEndian); + for(index_t k = 0; k < tail_nodes.size(); k++) { + edges.expand(); + edges.back().from = tail_nodes[k]; + edges.back().to = head_node + curr_num_nodes; + ASSERT_ONLY(num_spanning_edges++); + } + } + } + while(!rg_in_file.eof()) { + index_t tmp_num_nodes = readIndex(rg_in_file, bigEndian); + for(index_t j = 0; j < tmp_num_nodes; j++) { + nodes.expand(); + nodes.back().read(rg_in_file, bigEndian); + } + index_t tmp_num_edges = readIndex(rg_in_file, bigEndian); + for(index_t j = 0; j < tmp_num_edges; j++) { + edges.expand(); + edges.back().read(rg_in_file, bigEndian); + edges.back().from += curr_num_nodes; + edges.back().to += curr_num_nodes; + } + + if(nodes.size() >= curr_num_nodes + threadParams[i].num_nodes) { + assert_eq(nodes.size(), curr_num_nodes + threadParams[i].num_nodes); + assert_eq(edges.size(), curr_num_edges + num_spanning_edges + threadParams[i].num_edges); + // Read last nodes in this thread + tail_nodes.clear(); + if(i + 1 < (index_t)nthreads) { + index_t num_tail_nodes = readIndex(rg_in_file, bigEndian); + for(index_t j = 0; j < num_tail_nodes; j++) { + index_t tail_node = readIndex(rg_in_file, bigEndian); + tail_nodes.push_back(tail_node + curr_num_nodes); + } + } + break; + } + } + rg_in_file.close(); + std::remove(rg_fname.c_str()); + if(i + 1 == (index_t)nthreads) { + lastNode = threadParams.back().lastNode + curr_num_nodes; + assert_lt(lastNode, nodes.size()); + assert_eq(nodes[lastNode].label, 'Z'); + } + } + + if(s.length() + 2 == nodes.size() && nodes.size() == edges.size() + 1) { + cerr << "Warning: no variants or splice sites in this graph" << endl; + throw NongraphException(); + } + + if(multipleHeadNodes) { + if(!isReverseDeterministic(nodes, edges)) { + if(verbose) cerr << "\tis not reverse-deterministic, so reverse-determinize..." << endl; + reverseDeterminize(nodes, edges, lastNode); + } + } + assert(isReverseDeterministic(nodes, edges)); + } else { // this is memory-consuming, but simple to implement + index_t num_predicted_nodes = (index_t)(jlen * 1.2); + nodes.reserveExact(num_predicted_nodes); + edges.reserveExact(num_predicted_nodes); + + // Created head node + nodes.expand(); + nodes.back().label = 'Y'; + nodes.back().value = 0; + // Create nodes and edges corresponding to a reference genome + for(size_t i = 0; i < s.length(); i++) { + nodes.expand(); + nodes.back().label = "ACGT"[(int)s[i]]; + nodes.back().value = (index_t)i; + + assert_geq(nodes.size(), 2); + edges.expand(); + edges.back().from = (index_t)nodes.size() - 2; + edges.back().to = (index_t)nodes.size() - 1; + } + + // Create tail node + nodes.expand(); + nodes.back().label = 'Z'; + nodes.back().value = (index_t)s.length(); + lastNode = (index_t)nodes.size() - 1; + edges.expand(); + edges.back().from = (index_t)nodes.size() - 2; + edges.back().to = (index_t)nodes.size() - 1; + + // Create nodes and edges for haplotypes + for(index_t i = 0; i < haplotypes.size(); i++) { + const Haplotype& haplotype = haplotypes[i]; + const EList& snpIDs = haplotype.alts; + assert_gt(snpIDs.size(), 0); + assert_lt(haplotype.right, s.length()); + bool pass = true; + for(index_t s = 0; s < snpIDs.size(); s++) { + index_t snpID = snpIDs[s]; + assert_lt(snpID, alts.size()); + const ALT& snp = alts[snpID]; + assert(snp.snp()); + if(s + 1 >= snpIDs.size()) break; + index_t snpID2 = snpIDs[s+1]; + assert_lt(snpID2, alts.size()); + const ALT& snp2 = alts[snpID2]; + assert(snp2.snp()); + if(snp.type == ALT_SNP_INS) { + if(snp.pos > snp2.pos) { + pass = false; + break; + } + } else if(snp.type == ALT_SNP_DEL) { + if(snp2.type == ALT_SNP_DEL) { + if(snp.pos + snp.len >= snp2.pos) { + pass = false; + break; + } + } else { + if(snp.pos + snp.len - 1 >= snp2.pos) { + pass = false; + break; + } + } + } else { + if(snp.pos >= snp2.pos) { + pass = false; + break; + } + } + } + + if(!pass) continue; + + index_t prev_ALT_type = ALT_NONE; + index_t ID_i = 0; + for(index_t j = haplotype.left; j <= haplotype.right; j++) { + if(prev_ALT_type == ALT_SNP_INS) j--; + const ALT* altp = (ID_i < snpIDs.size() ? &(alts[snpIDs[ID_i]]) : NULL); + assert(altp == NULL || altp->pos >= j); + if(altp != NULL && altp->pos == j) { + const ALT& alt = *altp; + assert_lt(alt.pos, s.length()); + assert(alt.snp()); + if(alt.type == ALT_SNP_SGL) { + assert_eq(alt.len, 1); + nodes.expand(); + assert_lt(alt.seq, 4); + assert_neq(alt.seq & 0x3, s[alt.pos]); + nodes.back().label = "ACGT"[alt.seq]; + nodes.back().value = alt.pos; + if(prev_ALT_type != ALT_SNP_DEL) { + edges.expand(); + if(j == haplotype.left) { + edges.back().from = alt.pos; + } else { + assert_gt(nodes.size(), 2); + edges.back().from = (index_t)nodes.size() - 2; + } + edges.back().to = (index_t)nodes.size() - 1; + } + if(j == haplotype.right) { + edges.expand(); + edges.back().from = (index_t)nodes.size() - 1; + edges.back().to = alt.pos + 2; + } + } + else if(alt.type == ALT_SNP_DEL) { + assert_gt(alt.len, 0); + assert_leq(alt.pos + alt.len, s.length()); + edges.expand(); + if(j == haplotype.left) { + edges.back().from = alt.pos; + } else { + edges.back().from = (index_t)nodes.size() - 1; + } + j += (alt.len - 1); + assert_leq(j, haplotype.right); + if(j == haplotype.right) { + edges.back().to = alt.pos + alt.len + 1; + } else { + edges.back().to = (index_t)nodes.size(); + } + } else { + assert_eq(alt.type, ALT_SNP_INS) + assert_gt(alt.len, 0); + for(size_t k = 0; k < alt.len; k++) { + uint64_t bp = alt.seq >> ((alt.len - k - 1) << 1); + bp &= 0x3; + char ch = "ACGT"[bp]; + nodes.expand(); + nodes.back().label = ch; + nodes.back().value = (index_t)INDEX_MAX; + if(prev_ALT_type == ALT_SNP_DEL && k == 0) continue; + edges.expand(); + edges.back().from = ((k == 0 && j == haplotype.left) ? alt.pos : (index_t)nodes.size() - 2); + edges.back().to = (index_t)nodes.size() - 1; + } + if(j == haplotype.right) { + edges.expand(); + edges.back().from = (index_t)nodes.size() - 1; + edges.back().to = alt.pos + 1; + } + } + ID_i++; + prev_ALT_type = alt.type; + } else { + int nt = s[j]; + assert_lt(nt, 4); + nodes.expand(); + nodes.back().label = "ACGT"[nt]; + nodes.back().value = j; + if(prev_ALT_type != ALT_SNP_DEL) { + edges.expand(); + if(j == haplotype.left && prev_ALT_type == ALT_NONE) { + edges.back().from = j; + } else { + edges.back().from = (index_t)nodes.size() - 2; + } + edges.back().to = (index_t)nodes.size() - 1; + } + if(j == haplotype.right) { + edges.expand(); + edges.back().from = (index_t)nodes.size() - 1; + edges.back().to = j + 2; + } + prev_ALT_type = ALT_SNP_SGL; + } + } + } + + // Create nodes and edges for splice sites + for(size_t i = 0; i < alts.size(); i++) { + const ALT& alt = alts[i]; + if(alt.pos >= s.length()) break; + if(alt.type != ALT_SPLICESITE) continue; + if(alt.excluded) continue; + assert_lt(alt.left, alt.right); + edges.expand(); + edges.back().from = alt.left; + edges.back().to = alt.right + 2; + } + + if(s.length() + 2 == nodes.size() && nodes.size() == edges.size() + 1) { + throw NongraphException(); + } + + if(!isReverseDeterministic(nodes, edges)) { + if(verbose) cerr << "\tis not reverse-deterministic, so reverse-determinize..." << endl; + reverseDeterminize(nodes, edges, lastNode); + assert(isReverseDeterministic(nodes, edges)); + } + } + +#ifndef NDEBUG + if(debug) { + cout << "num nodes: " << nodes.size() << endl; + for(index_t i = 0; i < nodes.size(); i++) { + const Node& n = nodes[i]; + cout << i << "\t" << n.label << "\t" << n.value << endl; + } + + sort(edges.begin(), edges.end()); + cout << "num edges: " << edges.size() << endl; + for(index_t i = 0; i < edges.size(); i++) { + const Edge& e = edges[i]; + cout << i << "\t" << e.from << " --> " << e.to << endl; + } + } +#endif +} + +template +pair RefGraph::findEdges(const EList& edges, index_t node, bool from) { + pair range(0, 0); + assert_gt(edges.size(), 0); + + // Find lower bound + index_t low = 0, high = (index_t)edges.size() - 1; + index_t temp; + while(low < high) { + index_t mid = low + (high - low) / 2; + temp = (from ? edges[mid].from : edges[mid].to); + if(node == temp) { + high = mid; + } else if(node < temp) { + if(mid == 0) { + return pair(0, 0); + } + + high = mid - 1; + } else { + low = mid + 1; + } + } + temp = (from ? edges[low].from : edges[low].to); + if(node == temp) { + range.first = low; + } else { + return range; + } + + // Find upper bound + high = (index_t)edges.size() - 1; + while(low < high) + { + index_t mid = low + (high - low + 1) / 2; + temp = (from ? edges[mid].from : edges[mid].to); + if(node == temp) { + low = mid; + } else { + assert_lt(node, temp); + high = mid - 1; + } + } +#ifndef NDEBUG + temp = (from ? edges[high].from : edges[high].to); + assert_eq(node, temp); +#endif + range.second = high + 1; + return range; +} + +template +void RefGraph::buildGraph_worker(void* vp) { + ThreadParam* threadParam = (ThreadParam*)vp; + RefGraph& refGraph = *(threadParam->refGraph); + + const SString& s = *(threadParam->s); + index_t jlen = (index_t)s.length(); + + const EList >& alts = *(threadParam->alts); + const EList >& haplotypes = *(threadParam->haplotypes); + + EList nodes; EList edges; + const EList& tmp_szs = refGraph.tmp_szs; + + index_t thread_id = threadParam->thread_id; + index_t nthreads = refGraph.nthreads; + std::ostringstream number; number << thread_id; + const string rg_fname = threadParam->out_fname + "." + number.str() + ".rf"; + ofstream rg_out_file(rg_fname.c_str(), ios::binary); + if(!rg_out_file.good()) { + cerr << "Could not open file for writing a reference graph: \"" << rg_fname << "\"" << endl; + throw 1; + } + +#ifndef NDEBUG + set snp_set; +#endif + + const bool bigEndian = threadParam->bigEndian; + + index_t& lastNode = threadParam->lastNode; + + index_t& num_nodes = threadParam->num_nodes; + index_t& num_edges = threadParam->num_edges; + index_t szs_idx = 0, szs_idx_end = (index_t)tmp_szs.size(); + if(threadParam->thread_id != 0) { + szs_idx = (index_t)((tmp_szs.size() / nthreads) * thread_id); + } + if(thread_id + 1 < nthreads) { + szs_idx_end = (index_t)((tmp_szs.size() / nthreads) * (thread_id + 1)); + } + + index_t curr_pos = 0; + for(index_t i = 0; i < szs_idx; i++) { + curr_pos += tmp_szs[i].len; + } + EList prev_tail_nodes; + index_t alt_idx = 0, haplotype_idx = 0; + for(; szs_idx < szs_idx_end; szs_idx++) { + index_t curr_len = tmp_szs[szs_idx].len; + if(curr_len <= 0) continue; + index_t num_predicted_nodes = (index_t)(curr_len * 1.2); + nodes.resizeExact(num_predicted_nodes); nodes.clear(); + edges.resizeExact(num_predicted_nodes); edges.clear(); + + // Created head node + nodes.expand(); + nodes.back().label = 'Y'; + nodes.back().value = 0; + + // Create nodes and edges corresponding to a reference genome + assert_leq(curr_pos + curr_len, s.length()); + for(size_t i = curr_pos; i < curr_pos + curr_len; i++) { + nodes.expand(); + nodes.back().label = "ACGT"[(int)s[i]]; + nodes.back().value = (index_t)i; + assert_geq(nodes.size(), 2); + edges.expand(); + edges.back().from = (index_t)nodes.size() - 2; + edges.back().to = (index_t)nodes.size() - 1; + } + + // Create tail node + nodes.expand(); + nodes.back().label = 'Z'; + nodes.back().value = (index_t)s.length(); + lastNode = (index_t)nodes.size() - 1; + edges.expand(); + edges.back().from = (index_t)nodes.size() - 2; + edges.back().to = (index_t)nodes.size() - 1; + ASSERT_ONLY(index_t backbone_nodes = (index_t)nodes.size()); + + // Create nodes and edges for haplotypes + for(; haplotype_idx < haplotypes.size(); haplotype_idx++) { + const Haplotype& haplotype = haplotypes[haplotype_idx]; + if(haplotype.left < curr_pos) continue; + if(haplotype.right >= curr_pos + curr_len) break; + const EList& snpIDs = haplotype.alts; + assert_gt(snpIDs.size(), 0); + bool pass = true; + for(index_t s = 0; s < snpIDs.size(); s++) { + index_t snpID = snpIDs[s]; + assert_lt(snpID, alts.size()); + const ALT& snp = alts[snpID]; + assert(snp.snp()); + if(s + 1 >= snpIDs.size()) break; + index_t snpID2 = snpIDs[s+1]; + assert_lt(snpID2, alts.size()); + const ALT& snp2 = alts[snpID2]; + assert(snp2.snp()); + if(snp.type == ALT_SNP_INS) { + if(snp.pos > snp2.pos) { + pass = false; + break; + } + } else if(snp.type == ALT_SNP_DEL) { + if(snp2.type == ALT_SNP_DEL) { + if(snp.pos + snp.len >= snp2.pos) { + pass = false; + break; + } + } else { + if(snp.pos + snp.len - 1 >= snp2.pos) { + pass = false; + break; + } + } + } else { + if(snp.pos >= snp2.pos) { + pass = false; + break; + } + } + } + + if(!pass) continue; + + index_t prev_ALT_type = ALT_NONE; + index_t ID_i = 0; + for(index_t j = haplotype.left; j <= haplotype.right; j++) { + if(prev_ALT_type == ALT_SNP_INS) j--; + const ALT* altp = (ID_i < snpIDs.size() ? &(alts[snpIDs[ID_i]]) : NULL); + assert(altp == NULL || altp->pos >= j); + if(altp != NULL && altp->pos == j) { + const ALT& alt = *altp; + assert_lt(alt.pos, s.length()); + assert(alt.snp()); + if(alt.type == ALT_SNP_SGL) { + assert_eq(alt.len, 1); + nodes.expand(); + assert_lt(alt.seq, 4); + assert_neq(alt.seq & 0x3, s[alt.pos]); + nodes.back().label = "ACGT"[alt.seq]; + nodes.back().value = alt.pos; + if(prev_ALT_type != ALT_SNP_DEL) { + edges.expand(); + if(j == haplotype.left) { + edges.back().from = alt.pos - curr_pos; + assert_lt(edges.back().from, backbone_nodes); + } else { + assert_gt(nodes.size(), 2); + edges.back().from = (index_t)nodes.size() - 2; + } + edges.back().to = (index_t)nodes.size() - 1; + } + if(j == haplotype.right) { + edges.expand(); + edges.back().from = (index_t)nodes.size() - 1; + edges.back().to = alt.pos - curr_pos + 2; + assert_lt(edges.back().to, backbone_nodes); + } + } + else if(alt.type == ALT_SNP_DEL) { + assert_gt(alt.len, 0); + assert_leq(alt.pos - curr_pos + alt.len, s.length()); + edges.expand(); + if(j == haplotype.left) { + edges.back().from = alt.pos - curr_pos; + assert_lt(edges.back().from, backbone_nodes); + } else { + edges.back().from = (index_t)nodes.size() - 1; + } + j += (alt.len - 1); + assert_leq(j, haplotype.right); + if(j == haplotype.right) { + edges.back().to = alt.pos - curr_pos + alt.len + 1; + assert_lt(edges.back().to, backbone_nodes); + } else { + edges.back().to = (index_t)nodes.size(); + } + } else { + assert_eq(alt.type, ALT_SNP_INS) + assert_gt(alt.len, 0); + for(size_t k = 0; k < alt.len; k++) { + uint64_t bp = alt.seq >> ((alt.len - k - 1) << 1); + bp &= 0x3; + char ch = "ACGT"[bp]; + nodes.expand(); + nodes.back().label = ch; + nodes.back().value = (index_t)INDEX_MAX; + if(prev_ALT_type == ALT_SNP_DEL && k == 0) continue; + edges.expand(); + edges.back().from = ((k == 0 && j == haplotype.left) ? alt.pos - curr_pos : (index_t)nodes.size() - 2); + edges.back().to = (index_t)nodes.size() - 1; + } + if(j == haplotype.right) { + edges.expand(); + edges.back().from = (index_t)nodes.size() - 1; + edges.back().to = alt.pos - curr_pos + 1; + } + } +#ifndef NDEBUG + snp_set.insert(snpIDs[ID_i]); +#endif + ID_i++; + prev_ALT_type = alt.type; + } else { + int nt = s[j]; + assert_lt(nt, 4); + nodes.expand(); + nodes.back().label = "ACGT"[nt]; + nodes.back().value = j; + if(prev_ALT_type != ALT_SNP_DEL) { + edges.expand(); + if(j == haplotype.left && prev_ALT_type == ALT_NONE) { + edges.back().from = j - curr_pos; + assert_lt(edges.back().from, backbone_nodes); + } else { + edges.back().from = (index_t)nodes.size() - 2; + } + edges.back().to = (index_t)nodes.size() - 1; + } + if(j == haplotype.right) { + edges.expand(); + edges.back().from = (index_t)nodes.size() - 1; + edges.back().to = j - curr_pos + 2; + assert_lt(edges.back().to, backbone_nodes); + } + prev_ALT_type = ALT_SNP_SGL; + } + } + } + + // Create nodes and edges for splice sites + for(; alt_idx < alts.size(); alt_idx++) { + const ALT& alt = alts[alt_idx]; + if(alt.pos < curr_pos) continue; + if(alt.pos >= curr_pos + curr_len) break; + if(!alt.splicesite()) continue; + if(alt.excluded) continue; + assert_lt(alt.left, alt.right); + edges.expand(); + edges.back().from = alt.left - curr_pos; + edges.back().to = alt.right - curr_pos + 2; + assert_lt(edges.back().from, backbone_nodes); + assert_lt(edges.back().to, backbone_nodes); + } + +#ifndef NDEBUG + if(refGraph.debug) { + cerr << "Nodes:" << endl; + for(size_t i = 0; i < nodes.size(); i++) { + const Node& node = nodes[i]; + cerr << "\t" << i << "\t" << node.label << "\t" << node.value << endl; + } + cerr << endl; + cerr << "Edges: " << endl; + for(size_t i = 0; i < edges.size(); i++) { + const Edge& edge = edges[i]; + cerr << "\t" << i << "\t" << edge.from << " --> " << edge.to << endl; + } + cerr << endl; + } +#endif + + if(!isReverseDeterministic(nodes, edges)) { + reverseDeterminize(nodes, edges, lastNode, curr_pos > 0 ? curr_pos + 1 : 0); + assert(isReverseDeterministic(nodes, edges)); + } + + // Identify head + index_t head_node = (index_t)nodes.size(); + for(index_t i = 0; i < nodes.size(); i++) { + if(nodes[i].label == 'Y') { + head_node = i; + break; + } + } + assert_lt(head_node, nodes.size()); + index_t tail_node = lastNode; assert_lt(tail_node, nodes.size()); + + // Update edges + const index_t invalid = (index_t)INDEX_MAX; + bool head_off = curr_pos > 0, tail_off = curr_pos + curr_len < jlen; + for(index_t i = 0; i < edges.size(); i++) { + index_t from = edges[i].from; + from = from + num_nodes; + if(head_off && edges[i].from > head_node) from -= 1; + if(tail_off && edges[i].from > tail_node) from -= 1; + if(head_off && edges[i].from == head_node) { + edges[i].from = invalid; + } else { + edges[i].from = from; + } + + index_t to = edges[i].to; + to = to + num_nodes; + if(head_off && edges[i].to > head_node) to -= 1; + if(tail_off && edges[i].to > tail_node) to -= 1; + if(tail_off && edges[i].to == tail_node) { + edges[i].to = invalid; + } else { + edges[i].to = to; + } + } + head_node = tail_node = invalid; + // Also update lastNode + if(!tail_off) { + lastNode += num_nodes; + if(head_off) lastNode -= 1; + } + + // Connect head nodes with tail nodes in the previous automaton + index_t num_head_nodes = 0; + index_t tmp_num_edges = (index_t)edges.size(); + if(head_off) { + EList nodes_to_head; + for(index_t i = 0; i < tmp_num_edges; i++) { + if(edges[i].from == head_node) { + num_head_nodes++; + if(prev_tail_nodes.size() > 0) { + for(index_t j = 0; j < prev_tail_nodes.size(); j++) { + edges.expand(); + edges.back().from = prev_tail_nodes[j]; + edges.back().to = edges[i].to; + assert_lt(edges.back().from, edges.back().to); + } + } else { + nodes_to_head.push_back(edges[i].to); + } + } + } + + if(nodes_to_head.size() > 0) { + assert_gt(thread_id, 0); + assert_eq(prev_tail_nodes.size(), 0); + writeIndex(rg_out_file, (index_t)nodes_to_head.size(), bigEndian); + for(index_t i = 0; i < nodes_to_head.size(); i++) { + writeIndex(rg_out_file, nodes_to_head[i], bigEndian); + } + } + } + + // Need to check if it's reverse-deterministic + if(num_head_nodes > 1) { + threadParam->multipleHeadNodes = true; + } + + // List tail nodes + prev_tail_nodes.clear(); + if(tail_off) { + for(index_t i = 0; i < tmp_num_edges; i++) { + if(edges[i].to == tail_node) { + prev_tail_nodes.push_back(edges[i].from); + } + } + } + + // Write nodes and edges + index_t tmp_num_nodes = (index_t)nodes.size(); + assert_gt(tmp_num_nodes, 2); + if(head_off) tmp_num_nodes--; + if(tail_off) tmp_num_nodes--; + writeIndex(rg_out_file, tmp_num_nodes, bigEndian); + ASSERT_ONLY(index_t num_nodes_written = 0); + for(index_t i = 0; i < nodes.size(); i++) { + if(head_off && nodes[i].label == 'Y') continue; + if(tail_off && nodes[i].label == 'Z') continue; + nodes[i].write(rg_out_file, bigEndian); + ASSERT_ONLY(num_nodes_written++); + } + assert_eq(tmp_num_nodes, num_nodes_written); + tmp_num_edges = (index_t)edges.size(); + assert_geq(tmp_num_edges, num_head_nodes + prev_tail_nodes.size()); + if(head_off) tmp_num_edges -= num_head_nodes; + if(tail_off) tmp_num_edges -= prev_tail_nodes.size(); + writeIndex(rg_out_file, tmp_num_edges, bigEndian); + ASSERT_ONLY(index_t num_edges_written = 0); + for(index_t i = 0; i < edges.size(); i++) { + if(head_off && edges[i].from == head_node) continue; + if(tail_off && edges[i].to == tail_node) continue; + edges[i].write(rg_out_file, bigEndian); + ASSERT_ONLY(num_edges_written++); + } + assert_eq(tmp_num_edges, num_edges_written); + + // Clear nodes and edges + nodes.clear(); edges.clear(); + + curr_pos += curr_len; + num_nodes += tmp_num_nodes; + num_edges += tmp_num_edges; + } + + if(nthreads > 1 && thread_id + 1 < (index_t)nthreads && prev_tail_nodes.size() > 0) { + writeIndex(rg_out_file, (index_t)prev_tail_nodes.size(), bigEndian); + for(index_t i = 0; i < prev_tail_nodes.size(); i++) { + writeIndex(rg_out_file, prev_tail_nodes[i], bigEndian); + } + } + + // Close out file handle + rg_out_file.close(); +} + +template +bool RefGraph::isReverseDeterministic(EList& nodes, EList& edges) +{ + if(edges.size() <= 0) return true; + + // Sort edges by "to" nodes + sortEdgesTo(edges); + + index_t curr_to = (index_t)INDEX_MAX; + EList seen; seen.resize(5); seen.fillZero(); + for(index_t i = 0; i < edges.size(); i++) { + index_t from = edges[i].from; + assert_lt(from, nodes.size()); + char nt = nodes[from].label; + assert(nt == 'A' || nt == 'C' || nt == 'G' || nt == 'T' || nt == 'Y'); + if(nt == 'Y') nt = 4; + else nt = asc2dna[(int)nt]; + assert_lt(nt, seen.size()); + if(curr_to != edges[i].to) { + curr_to = edges[i].to; + seen.fillZero(); + seen[nt] = true; + } else { + if(seen[nt]) { + return false; + } + seen[nt] = true; + } + } + + return true; +} + + +template +void RefGraph::reverseDeterminize(EList& nodes, EList& edges, index_t& lastNode, index_t lastNode_add) +{ + EList cnodes; cnodes.ensure(nodes.size()); + map cnode_map; + deque active_cnodes; + EList cedges; cedges.ensure(edges.size()); + + // Start from the final node ('Z') + assert_lt(lastNode, nodes.size()); + const Node& last_node = nodes[lastNode]; + cnodes.expand(); + cnodes.back().reset(); + cnodes.back().label = last_node.label; + cnodes.back().value = last_node.value; + cnodes.back().nodes.push_back(lastNode); + active_cnodes.push_back(0); + cnode_map[cnodes.back().nodes] = 0; + sortEdgesTo(edges); + + index_t firstNode = 0; // Y -> ... -> Z + EList predecessors; + while(!active_cnodes.empty()) { + index_t cnode_id = active_cnodes.front(); active_cnodes.pop_front(); + assert_lt(cnode_id, cnodes.size()); + + // Find predecessors of this composite node + predecessors.clear(); + for(size_t i = 0; i < cnodes[cnode_id].nodes.size(); i++) { + index_t node_id = cnodes[cnode_id].nodes.getID((index_t)i); + pair edge_range = findEdgesTo(edges, node_id); + assert_leq(edge_range.first, edge_range.second); + assert_leq(edge_range.second, edges.size()); + for(index_t j = edge_range.first; j < edge_range.second; j++) { + assert_eq(edges[j].to, node_id); + predecessors.push_back(edges[j].from); + } + } + + if(predecessors.size() >= 2) { + // Remove redundant nodes + predecessors.sort(); + index_t new_size = (index_t)(unique(predecessors.begin(), predecessors.end()) - predecessors.begin()); + predecessors.resize(new_size); + + // Create composite nodes by labels + stable_sort(predecessors.begin(), predecessors.end(), TempNodeLabelCmp(nodes)); + } + + for(size_t i = 0; i < predecessors.size();) { + index_t node_id = predecessors[i]; + assert_lt(node_id, nodes.size()); + const Node& node = nodes[node_id]; i++; + + cnodes.expand(); + cnodes.back().reset(); + cnodes.back().label = node.label; + cnodes.back().value = node.value; + cnodes.back().nodes.push_back(node_id); + + if(node.label == 'Y' && firstNode == 0) { + firstNode = (index_t)cnodes.size() - 1; + } + + while(i < predecessors.size()) { + index_t next_node_id = predecessors[i]; + assert_lt(next_node_id, nodes.size()); + const Node& next_node = nodes[next_node_id]; + if(next_node.label != node.label) break; + cnodes.back().nodes.push_back(next_node_id); + if(next_node.value != (index_t)INDEX_MAX) { + if(cnodes.back().value == (index_t)INDEX_MAX) { + cnodes.back().value = next_node.value; + } else { + cnodes.back().value = max(cnodes.back().value, next_node.value); + } + } + i++; + } + + // Create edges from this new composite node to current composite node + typename map::iterator existing = cnode_map.find(cnodes.back().nodes); + if(existing == cnode_map.end()) { + cnode_map[cnodes.back().nodes] = (index_t)cnodes.size() - 1; + active_cnodes.push_back((index_t)cnodes.size() - 1); + cedges.push_back(CompositeEdge((index_t)cnodes.size() - 1, cnode_id)); + } else { + cnodes.pop_back(); + cedges.push_back(CompositeEdge((*existing).second, cnode_id)); + } + + // Increment indegree + cnodes[cnode_id].id++; + } + } + + // Interchange from and to + for(index_t i = 0; i < cedges.size(); i++) { + index_t tmp = cedges[i].from; + cedges[i].from = cedges[i].to; + cedges[i].to = tmp; + } + sort(cedges.begin(), cedges.end()); + active_cnodes.push_back(0); + while(!active_cnodes.empty()) { + index_t cnode_id = active_cnodes.front(); active_cnodes.pop_front(); + assert_lt(cnode_id, cnodes.size()); + const CompositeNode& cnode = cnodes[cnode_id]; + index_t i = (index_t)cedges.bsearchLoBound(CompositeEdge(cnode_id, 0)); + while(i < cedges.size()) { + assert_geq(cedges[i].from, cnode_id); + if(cedges[i].from != cnode_id) break; + index_t predecessor_cnode_id = cedges[i].to; + assert_lt(predecessor_cnode_id, cnodes.size()); + CompositeNode& predecessor_cnode = cnodes[predecessor_cnode_id]; + if(cnode.value == predecessor_cnode.value + 1) { + active_cnodes.push_back(predecessor_cnode_id); + break; + } + i++; + } + } + // Restore from and to by interchanging them + for(index_t i = 0; i < cedges.size(); i++) { + index_t tmp = cedges[i].from; + cedges[i].from = cedges[i].to; + cedges[i].to = tmp; + } + + // Create new nodes + lastNode = 0; // Invalidate lastNode + nodes.resizeExact(cnodes.size()); nodes.clear(); + assert_neq(firstNode, 0); + assert_lt(firstNode, cnodes.size()); + CompositeNode& first_node = cnodes[firstNode]; + first_node.id = 0; + nodes.expand(); + nodes.back() = first_node.getNode(); + active_cnodes.push_back(firstNode); + sort(cedges.begin(), cedges.end()); + while(!active_cnodes.empty()) { + index_t cnode_id = active_cnodes.front(); active_cnodes.pop_front(); + assert_lt(cnode_id, cnodes.size()); + index_t i = (index_t)cedges.bsearchLoBound(CompositeEdge(cnode_id, 0)); + while(i < cedges.size()) { + assert_geq(cedges[i].from, cnode_id); + if(cedges[i].from != cnode_id) break; + index_t successor_cnode_id = cedges[i].to; + assert_lt(successor_cnode_id, cnodes.size()); + CompositeNode& successor_cnode = cnodes[successor_cnode_id]; + assert_gt(successor_cnode.id, 0); + successor_cnode.id--; + if(successor_cnode.id == 0) { + active_cnodes.push_back(successor_cnode_id); + successor_cnode.id = (index_t)nodes.size(); + nodes.expand(); + nodes.back() = successor_cnode.getNode(); + if(nodes.back().label == 'Z') { + assert_eq(lastNode, 0); + assert_gt(nodes.size(), 1); + lastNode = (index_t)nodes.size() - 1; + } + } + i++; + } + } + + // Create new edges + edges.resizeExact(cedges.size()); edges.clear(); + for(index_t i = 0; i < cedges.size(); i++) { + const CompositeEdge& edge = cedges[i]; + edges.expand(); + edges.back() = edge.getEdge(cnodes); + } + sortEdgesFrom(edges); + +#if 0 +#ifndef NDEBUG + if(debug) { + cerr << "Nodes:" << endl; + for(size_t i = 0; i < nodes.size(); i++) { + const Node& node = nodes[i]; + cerr << "\t" << i << "\t" << node.label << "\t" << node.value << (node.backbone ? "\tbackbone" : "") << endl; + } + cerr << endl; + cerr << "Edges: " << endl; + for(size_t i = 0; i < edges.size(); i++) { + const Edge& edge = edges[i]; + cerr << "\t" << i << "\t" << edge.from << " --> " << edge.to << endl; + } + cerr << endl; + } +#endif +#endif +} + +template +class PathGraph { +public: + struct PathNode { + index_t from; + index_t to; + pair key; + + void setSorted() { to = (index_t)INDEX_MAX; } + bool isSorted() const { return to == (index_t)INDEX_MAX; } + + index_t value() const { return to; } + index_t outdegree() const { return key.first; } + + bool operator< (const PathNode& o) const { + return key < o.key; + }; + }; + + static inline index_t PathNodeFrom (PathNode& a) { + return a.from; + } + + static inline index_t PathNodeKey (PathNode& a) { + return a.key.first; + } + + struct PathNodeKeySecondCmp { + bool operator() (const PathNode& a, const PathNode& b) const { + return a.key.second < b.key.second; + } + }; + + struct PathNodeFromCmp { + bool operator() (const PathNode& a, const PathNode& b) const { + return a.from < b.from; + } + }; + + struct PathNodeToCmp { + bool operator() (const PathNode& a, const PathNode& b) const { + return a.to < b.to; + } + }; + + struct PathEdge { + index_t from; + union { + index_t to; + index_t ranking; + }; + char label; + + PathEdge() { reset(); } + + PathEdge(index_t from_, index_t ranking_, char label_) : from(from_), ranking(ranking_), label(label_) {} + + void reset() { + from = 0; + ranking = 0; + label = 0; + } + + bool operator< (const PathEdge& o) const { + return label < o.label || (label == o.label && ranking < o.ranking); + }; + + }; + + static inline index_t PathEdgeTo (PathEdge& a) { + return a.to; + } + + struct PathEdgeFromCmp { + bool operator() (const PathEdge& a, const PathEdge& b) const { + return a.from < b.from || (a.from == b.from && a.to < b.to); + } + }; + + struct PathEdgeToCmp { + bool operator() (const PathEdge& a, const PathEdge& b) const { + return a.to < b.to || (a.to == b.to && a.from < b.from); + } + }; + +public: + // Create a new graph in which paths are represented using nodes + PathGraph( + RefGraph& parent, + const string& base_fname, + size_t max_num_nodes_ = std::numeric_limits::max(), + int nthreads_ = 1, + bool verbose_ = false); + + ~PathGraph() {} + + void printInfo(); + + bool generateEdges(RefGraph& parent); + + index_t getNumNodes() const { return (index_t)nodes.size(); } + index_t getNumEdges() const { return (index_t)edges.size(); } + + bool isSorted() const { return sorted; } + + bool nextRow(int& gbwtChar, int& F, int& M, index_t& pos) { + if(report_node_idx >= nodes.size()) return false; + bool firstOutEdge = false; + if(report_edge_range.first >= report_edge_range.second) { + report_edge_range = getEdges(report_node_idx, false /* from? */); + firstOutEdge = true; + if(report_node_idx == 0) { + report_M = pair(0, 0); + } + } + assert_lt(report_edge_range.first, report_edge_range.second); + assert_lt(report_edge_range.first, edges.size()); + const PathEdge& edge = edges[report_edge_range.first]; + gbwtChar = edge.label; + assert_lt(report_node_idx, nodes.size()); + F = (firstOutEdge ? 1 : 0); + + report_edge_range.first++; + if(report_edge_range.first >= report_edge_range.second) { + report_node_idx++; + } + assert_lt(report_M.first, nodes.size()); + pos = nodes[report_M.first].to; + M = (report_M.second == 0 ? 1 : 0); + report_M.second++; + if(report_M.second >= nodes[report_M.first].key.first) { + report_M.first++; + report_M.second = 0; + } + return true; + } + + index_t nextFLocation() { + if(report_F_node_idx >= nodes.size()) return (index_t)INDEX_MAX; + index_t ret = report_F_location; + pair edge_range = getEdges(report_F_node_idx, false /* from? */); + report_F_node_idx++; + assert_lt(edge_range.first, edge_range.second); + report_F_location += (edge_range.second - edge_range.first); + return ret; + } + +private: + void makeFromRef(RefGraph& base); + void generationOne(); + void earlyGeneration(); + void firstPruneGeneration(); + void lateGeneration(); + + void mergeUpdateRank(); + pair nextMaximalSet(pair range); + pair getEdges(index_t node, bool by_from); // Create index first. + + struct CreateNewNodesParams { + PathNode* st; + PathNode* en; + PathNode* curr; + index_t* sub_temp_nodes; + PathGraph* graph; + }; + static void createNewNodesCounter(void* vp); + static void createNewNodesMaker(void* vp); + void createNewNodes(); + + struct GenEdgesParams { + typename RefGraph::Edge* st; + typename RefGraph::Edge* en; + EList* label_index; + EList* nodes; + EList* edges; + EList::Node>* ref_nodes; + }; + static void generateEdgesCounter(void* vp); + static void generateEdgesMaker(void* vp); + +private: + int nthreads; + bool verbose; + EList from_table; + EList past_nodes; + EList nodes; + EList edges; + index_t ranks; + index_t max_from; //number of nodes in RefGraph + index_t temp_nodes; // Total number of nodes created before sorting. + index_t generation; // Sorted by paths of length 2^generation. + bool sorted; + + // For reporting GBWT char, F, and M values + index_t report_node_idx; + pair report_edge_range; + pair report_M; + // For reporting location in F corresponding to 1 bit in M + index_t report_F_node_idx; + index_t report_F_location; + + size_t max_num_nodes; + + // following variables are for debugging purposes +#ifndef NDEBUG + bool debug; +#endif + + EList bwt_string; + EList F_array; + EList M_array; + EList bwt_counts; + + // brute-force implementations + index_t select(const EList& array, index_t p, char c) { + if(p <= 0) return 0; + for(index_t i = 0; i < array.size(); i++) { + if(array[i] == c) { + assert_gt(p, 0); + p--; + if(p == 0) + return i; + } + } + return (index_t)array.size(); + } + + index_t select1(const EList& array, index_t p) { + return select(array, p, 1); + } + + index_t rank(const EList& array, index_t p, char c) { + index_t count = 0; + assert_lt(p, array.size()); + for(index_t i = 0; i <= p; i++) { + if(array[i] == c) + count++; + } + return count; + } + + index_t rank1(const EList& array, index_t p) { + return rank(array, p, 1); + } + + // for debugging purposes +#ifndef NDEBUG +public: EList > ftab; +#endif +}; + +//creates prefix-sorted PathGraph Nodes given a reverse determinized RefGraph +//outputs nodes sorted by their from attribute +template +PathGraph::PathGraph( + RefGraph& base, + const string& base_fname, + size_t max_num_nodes_, + int nthreads_, + bool verbose_) : +nthreads(nthreads_), verbose(verbose_), +ranks(0), temp_nodes(0), generation(0), sorted(false), +report_node_idx(0), report_edge_range(pair(0, 0)), report_M(pair(0, 0)), +report_F_node_idx(0), report_F_location(0), +max_num_nodes(max_num_nodes_) +{ +#ifndef NDEBUG + debug = base.nodes.size() <= 20; +#endif + // Fill nodes with a PathNode for each edge in base.edges. + // Set max_from. + makeFromRef(base); + + // Write RefGraph into a file + const bool file_rf = base.nodes.size() > (1 << 22); + const bool bigEndian = false; + const string rf_fname = base_fname + ".rf"; + if(file_rf) { + base.write(rf_fname, bigEndian); + base.nullify(); + } + + // In the first generation the nodes enter, not quite sorted by from. + // We use a counting sort to sort the nodes, otherwise same as early generation. + generationOne(); + // In early generations no nodes become sorted. + // Therefore, we skip the pruning step and leave the + // nodes sorted by from. + while(generation < 3) { + earlyGeneration(); + } + // On the first generation we perform a pruning step, + // we are forced to sort the entire list of nodes by rank + // in order to perform pruning step. + firstPruneGeneration(); + // In later generations, most nodes are already sorted, so we + // perform a more expensive random access join with nodes in rank order + // in return for avoiding having to sort by rank in order to prune nodes. + while(!isSorted()) { + lateGeneration(); + } + // In the generateEdges method it is convenient to begin with nodes sorted by from. + // We perform this action here, while we still have past_nodes allocated to avoid + // an in-place sort. + nodes.resizeNoCopyExact(past_nodes.size()); + radix_sort_copy(past_nodes.begin(), past_nodes.end(), nodes.ptr(), + &PathNodeFrom, max_from, nthreads); + past_nodes.nullify(); + from_table.nullify(); + + if(file_rf) { + base.read(rf_fname, bigEndian); + std::remove(rf_fname.c_str()); + } +} + +//make original unsorted PathNodes given a RefGraph +template +void PathGraph::makeFromRef(RefGraph& base) { + // Create a path node per edge with a key set to from node's label + temp_nodes = (index_t)base.edges.size() + 1; + max_from = 0; + nodes.reserveExact(temp_nodes); + for(index_t i = 0; i < base.edges.size(); i++) { + const typename RefGraph::Edge& e = base.edges[i]; + nodes.expand(); + nodes.back().from = e.from; + if(e.from > max_from) max_from = e.from; + nodes.back().to = e.to; + + switch(base.nodes[e.from].label) { + case 'A': + nodes.back().key = pair(0, 0); + break; + case 'C': + nodes.back().key = pair(1, 0); + break; + case 'G': + nodes.back().key = pair(2, 0); + break; + case 'T': + nodes.back().key = pair(3, 0); + break; + case 'Y': + nodes.back().key = pair(4, 0); + break; + default: + assert(false); + throw 1; + } + } + // Final node. + assert_lt(base.lastNode, base.nodes.size()); + assert_eq(base.nodes[base.lastNode].label, 'Z'); + nodes.expand(); + nodes.back().from = nodes.back().to = base.lastNode; + if(base.lastNode > max_from) max_from = base.lastNode; + nodes.back().key = pair(5, 0); + printInfo(); +} + +template +void PathGraph::generationOne() { + //nodes enter almost sorted by from + //this is only generation method that whose + // incoming nodes are in the nodes EList + generation++; + //Sort nodes by from using counting sort + //Copy into past_nodes in the process + //first count number with each from value + for(PathNode* node = nodes.begin(); node != nodes.end(); node++) { + nodes[node->from].key.second++; + } + //convert into an index + index_t tot = nodes[0].key.second; + nodes[0].key.second = 0; + for(index_t i = 1; i < max_from + 2; i++) { + tot += nodes[i].key.second; + nodes[i].key.second = tot - nodes[i].key.second; + } + // use past_nodes as from_table + past_nodes.resizeExact(nodes.size()); + for(PathNode* node = nodes.begin(); node != nodes.end(); node++) { + past_nodes[nodes[node->from].key.second++] = *node; + } + //reset index + for(index_t i = max_from + 1; i > 0; i--) { + past_nodes[i].key.second = nodes[i - 1].key.second; + } + past_nodes[0].key.second = 0; + //Now query direct-access table + createNewNodes(); + printInfo(); + past_nodes.swap(nodes); +} + +template +void PathGraph::earlyGeneration() { + //past_nodes enter sorted by from + //do not yet need to perform pruning step + generation++; + for(index_t i = 0; i < past_nodes.size(); i++) { + past_nodes[past_nodes[i].from + 1].key.second = i + 1; + } + createNewNodes(); + printInfo(); + past_nodes.swap(nodes); +} + +template +void PathGraph::firstPruneGeneration() { + //past_nodes enter sorted by from + //first generation where we need to perform pruning step + // results in us needing to sort entirety of nodes after they are made + generation++; + //here past_nodes is already sorted by .from + // first count where to start each from value + time_t start = time(0); + //Build from_index + for(index_t i = 0; i < past_nodes.size(); i++) { + past_nodes[past_nodes[i].from + 1].key.second = i + 1; + } + if(verbose) cerr << "BUILT FROM_INDEX: " << time(0) - start << endl; + start = time(0); + // Now query against direct-access table + createNewNodes(); + past_nodes.resizeNoCopyExact(nodes.size()); + + if(verbose) cerr << "RESIZE NODES: " << time(0) - start << endl; + start = time(0); + + //max_rank always corresponds to repeated Z's + // Z is mapped to 0x101 + // therefore max rank = 101101101101101101101101 = (101) 8 times + index_t max_rank = 11983725; + radix_sort_copy, index_t>(nodes.begin(), nodes.end(), past_nodes.ptr(), + &PathNodeKey, max_rank, nthreads); + + if(verbose) cerr << "SORT NODES: " << time(0) - start << endl; + start = time(0); + + nodes.swap(past_nodes); + mergeUpdateRank(); + + if(verbose) cerr << "MERGE, UPDATE RANK: " << time(0) - start << endl; + start = time(0); + + printInfo(); + past_nodes.swap(nodes); +} + +template +void PathGraph::lateGeneration() { + //past_nodes enter sorted by rank + //build direct-access table sorted by from, + //but query with original nodes sorted by rank + //since nodes we query with are sorted by rank, + // the nodes produced are automatically sorted by key.first + // therefore we only need to sort clusters with same key.first + generation++; + time_t overall = time(0); + time_t indiv = time(0); + assert_gt(nthreads, 0); + assert_neq(past_nodes.size(), ranks); + from_table.resizeNoCopy(past_nodes.size()); + + if(verbose) cerr << "ALLOCATE FROM_TABLE: " << time(0) - indiv << endl; + indiv = time(0); + + radix_sort_copy(past_nodes.begin(), past_nodes.end(), from_table.ptr(), + &PathNodeFrom, max_from, nthreads); + + if(verbose) cerr << "BUILD TABLE: " << time(0) - indiv << endl; + indiv = time(0); + + //Build from_index + index_t from_table_size = from_table.size(); + for(index_t i = 0; i < from_table_size; i++) { + if(from_table[i].from + 1 >= from_table.size()) { + from_table.resize(from_table[i].from + 2); + } + from_table[from_table[i].from + 1].key.second = i + 1; + } + + if(verbose) cerr << "BUILD INDEX: " << time(0) - indiv << endl; + + createNewNodes(); + + indiv = time(0); + + mergeUpdateRank(); + + if(from_table_size != from_table.size()) { + assert_lt(from_table_size, from_table.size()); + from_table.resize(from_table_size); + } + + if(verbose) cerr << "MERGEUPDATERANK: " << time(0) - indiv << endl; + if(verbose) cerr << "TOTAL TIME: " << time(0) - overall << endl; + + if(ranks >= (index_t)max_num_nodes) { + throw ExplosionException(); + } + + printInfo(); + past_nodes.swap(nodes); +} + +//----------------------------------------------------------------------------------------------- + +template +void PathGraph::createNewNodesCounter(void* vp) { + CreateNewNodesParams* params = (CreateNewNodesParams*)vp; + PathNode* st = params->st; + PathNode* en = params->en; + PathGraph& graph = *(params->graph); + + size_t count = 0; + if(graph.generation > 4) { + for(PathNode* node = st; node != en; node++) { + if(node->isSorted()) { + count++; + } else { + count += graph.from_table[node->to + 1].key.second - graph.from_table[node->to].key.second; + } + } + } else { + for(PathNode* node = st; node != en; node++) { + count += graph.past_nodes[node->to + 1].key.second - graph.past_nodes[node->to].key.second; + } + } + *(params->sub_temp_nodes) = (index_t)count; + + //check for overflow + if(count > (index_t)-1) { + cerr << "exceeded integer bounds, remove adjacent SNPs, use haplotypes, or switch to a large index (--large-index)" << endl; + throw 1; + } +} +template +void PathGraph::createNewNodesMaker(void* vp) { + CreateNewNodesParams* params = (CreateNewNodesParams*)vp; + PathNode* st = params->st; + PathNode* en = params->en; + PathNode* curr = params->curr; + PathGraph& graph = *(params->graph); + if(graph.generation > 4) { + for(PathNode* node = st; node != en; node++) { + if(node->isSorted()) { + *curr++ = *node; + } else { + for(index_t j = graph.from_table[node->to].key.second; j < graph.from_table[node->to + 1].key.second; j++) { + curr->from = node->from; + curr->to = graph.from_table[j].to; + (curr++)->key = pair(node->key.first, graph.from_table[j].key.first); + } + } + } + } else if(graph.generation == 4) { + for(PathNode* node = st; node != en; node++) { + for(index_t j = graph.past_nodes[node->to].key.second; j < graph.past_nodes[node->to + 1].key.second; j++) { + curr->from = node->from; + curr->to = graph.past_nodes[j].to; + (curr++)->key = pair(node->key.first, graph.past_nodes[j].key.first); + } + } + } else { + for(PathNode* node = st; node != en; node++) { + for(index_t j = graph.past_nodes[node->to].key.second; j < graph.past_nodes[node->to + 1].key.second; j++) { + curr->from = node->from; + curr->to = graph.past_nodes[j].to; + index_t bit_shift = 1 << (graph.generation - 1); + bit_shift = (bit_shift << 1) + bit_shift; + (curr++)->key = pair((node->key.first << bit_shift) + graph.past_nodes[j].key.first, 0); + } + } + } +} + +template +void PathGraph::createNewNodes() { + time_t indiv = time(0); + AutoArray threads(nthreads); + EList params; params.resizeExact(nthreads); + EList sub_temp_nodes; sub_temp_nodes.resizeExact(nthreads); sub_temp_nodes.fillZero(); + PathNode* st = past_nodes.begin(); + PathNode* en = st + past_nodes.size() / nthreads; + for(int i = 0; i < nthreads; i++) { + params[i].sub_temp_nodes = &sub_temp_nodes[i]; + params[i].st = st; + params[i].en = en; + params[i].graph = this; + if(nthreads == 1) { + createNewNodesCounter((void*)¶ms[0]); + } else { + threads[i] = new tthread::thread(&createNewNodesCounter, (void*)¶ms[i]); + } + st = en; + if(i + 2 == nthreads) { + en = past_nodes.end(); + } else { + en = st + past_nodes.size() / nthreads; + } + } + + if(nthreads > 1) { + for(int i = 0; i < nthreads; i++) + threads[i]->join(); + } + if(verbose) cerr << "COUNTED NEW NODES: " << time(0) - indiv << endl; + indiv = time(0); + //update all label indexes + temp_nodes = 0; + for(int i = 0; i < nthreads; i++) { + // done to check if we exceed index_t range + size_t val = (size_t)temp_nodes + (size_t)sub_temp_nodes[i]; + if(val > (index_t)-1) { + cerr << "exceeded integer bounds, remove adjacent SNPs, use haplotypes, or switch to a large index (--large-index)" << endl; + throw 1; + } + temp_nodes = (index_t)val; + } + if(verbose) cerr << "COUNTED TEMP NODES: " << time(0) - indiv << endl; + indiv = time(0); + nodes.resizeNoCopyExact(temp_nodes); + if(verbose) cerr << "RESIZED NODES: " << time(0) - indiv << endl; + indiv = time(0); + temp_nodes = 0; + for(int i = 0; i < nthreads; i++) { + params[i].curr = nodes.begin() + temp_nodes; + temp_nodes += sub_temp_nodes[i]; + } + if(verbose) cerr << "RESIZED NODES: " << time(0) - indiv << endl; + indiv = time(0); + //make new nodes + for(int i = 0; i < nthreads; i++) { + if(nthreads == 1) { + createNewNodesMaker((void*)¶ms[0]); + } else { + threads[i] = new tthread::thread(&createNewNodesMaker, (void*)¶ms[i]); + } + } + + if(nthreads > 1) { + for(int i = 0; i < nthreads; i++) + threads[i]->join(); + } + if(verbose) cerr << "MADE NEW NODES: " << time(0) - indiv << endl; + indiv = time(0); +} + +//------------------------------------------------------------------------------------ + +template +void PathGraph::mergeUpdateRank() +{ + if(generation == 4) { + // Merge equivalent nodes + index_t curr = 0; + pair range(0, 0); // Empty range + while(true) { + range = nextMaximalSet(range); + if(range.first >= range.second) + break; + nodes[curr] = nodes[range.first]; curr++; + } + nodes.resize(curr); + + // Set nodes that become sorted as sorted + PathNode* candidate = &nodes.front(); + pair key = candidate->key; ranks = 1; + for(index_t i = 1; i < nodes.size(); i++) { + if(nodes[i].key != key) { + if(candidate != NULL) { + candidate->setSorted(); + } + candidate = &nodes[i]; + key = candidate->key; ranks++; + } else { + candidate = NULL; + } + } + if(candidate != NULL) { + candidate->setSorted(); + } + ranks = 0; + key = nodes.front().key; + for(index_t i = 0; i < nodes.size(); i++) { + PathNode& node = nodes[i]; + if(node.key != key) { + key = node.key; + ranks++; + } + node.key = pair(ranks, 0); + } + ranks++; + } else { + PathNode* block_start = nodes.begin(); + PathNode* curr = nodes.begin(); + PathNode* node = nodes.begin(); + ranks = 0; + do { + node++; + if(node == nodes.end() || node->key.first != block_start->key.first) { + if(node - block_start == 1) { + block_start->key.first = ranks++; + *curr++ = *block_start; + } else { + sort(block_start, node, PathNodeKeySecondCmp()); + while(block_start != node) { + //extend shift while share same key + index_t shift = 1; + while(block_start + shift != node && block_start->key == (block_start + shift)->key) { + shift++; + } + //check if all share .from + //if they share same from, then they are a mergable set + bool merge = true; + for(PathNode* n = block_start; n != (block_start + shift); n++) { + if(n->from != block_start->from) { + merge = false; + break; + } + } + //if not mergable, just write all to array + if(!merge) { + for(PathNode* n = block_start; n != (block_start + shift); n++) { + n->key.first = ranks; + *curr++ = *n; + } + ranks++; + } else if(curr == nodes.begin() || !(curr - 1)->isSorted() || (curr - 1)->from != block_start->from) { + block_start->setSorted(); + block_start->key.first = ranks++; + *curr++ = *block_start; + } + block_start += shift; + } + // if we are at the last node or the last node is mergable into the previous node, we are done + if(node == nodes.end()) break; + if(node + 1 == nodes.end()) { + assert(curr >= nodes.begin() + 1); + if((curr - 1)->isSorted() && node->from == (curr - 1)->from) + break; + } + // check if we can safely merge the node immediately following the unsorted cluster into the previous node + // must be that: + // 1) node is not itself part of an unsorted cluster + // 2) the previous node is sorted + // 3) the nodes share the same from attribute + assert(node + 1 < nodes.end()); + if(node->key.first != (node + 1)->key.first) { + assert(curr >= nodes.begin() + 1); + if((curr - 1)->isSorted() && node->from == (curr - 1)->from) + node++; + } + } + block_start = node; + } + } while(node != nodes.end()); + nodes.resizeExact((index_t)(curr - nodes.begin())); + } + // if all nodes have unique rank we are done! + if(ranks == nodes.size()) sorted = true; +} + + +// Returns the next maximal mergeable set of PathNodes. +// A set of PathNodes sharing adjacent keys is mergeable, if each of the +// PathNodes begins in the same GraphNode, and no other PathNode shares +// the key. If the maximal set is empty, returns the next PathNode. +template +pair PathGraph::nextMaximalSet(pair range) { + if(range.second >= nodes.size()) { + return pair(0, 0); + } + range.first = range.second; + range.second = range.first + 1; + if(range.first > 0 && nodes[range.first - 1].key == nodes[range.first].key) { + return range; + } + + for(index_t i = range.second; i < nodes.size(); i++) { + if(nodes[i - 1].key != nodes[i].key) { + range.second = i; + } + if(nodes[i].from != nodes[range.first].from) { + return range; + } + } + range.second = (index_t)nodes.size(); + return range; +} + +//----------------------------------------------------------------------------------------- + +template +void PathGraph::printInfo() +{ + if(verbose) { + cerr << "Generation " << generation + << " (" << temp_nodes << " -> " << nodes.size() << " nodes, " + << ranks << " ranks)" << endl; + } +} + +//------------------------------------------------------------------------------------------ + +template +void PathGraph::generateEdgesCounter(void* vp) { + GenEdgesParams* params = (GenEdgesParams*)vp; + typename RefGraph::Edge* st = params->st; + typename RefGraph::Edge* en = params->en; + EList& label_index = *(params->label_index); + EList::Node>& ref_nodes = *(params->ref_nodes); + EList& nodes = *(params->nodes); + //first count edges, fill out label_index + for(typename RefGraph::Edge* edge = st; edge != en; edge++) { + char curr_label = ref_nodes[edge->from].label; + int curr_label_index; + switch(curr_label) { + case 'A': curr_label_index = 0; break; + case 'C': curr_label_index = 1; break; + case 'G': curr_label_index = 2; break; + case 'T': curr_label_index = 3; break; + case 'Y': curr_label_index = 4; break; + case 'Z': curr_label_index = 5; break; + default: assert(false); throw 1; + } + assert_lt(edge->to + 1, nodes.size()); + assert_lt(nodes[edge->to].key.second, nodes[edge->to + 1].key.second); + label_index[curr_label_index] += nodes[edge->to + 1].key.second - nodes[edge->to].key.second; + } +} + +template +void PathGraph::generateEdgesMaker(void* vp) { + GenEdgesParams* params = (GenEdgesParams*)vp; + typename RefGraph::Edge* st = params->st; + typename RefGraph::Edge* en = params->en; + EList& label_index = *(params->label_index); + EList::Node>& ref_nodes = *(params->ref_nodes); + EList& edges = *(params->edges); + EList& nodes = *(params->nodes); + for(typename RefGraph::Edge* edge = st; edge != en; edge++) { + char curr_label = ref_nodes[edge->from].label; + int curr_label_index; + switch(curr_label) { + case 'A': curr_label_index = 0; break; + case 'C': curr_label_index = 1; break; + case 'G': curr_label_index = 2; break; + case 'T': curr_label_index = 3; break; + case 'Y': curr_label_index = 4; break; + case 'Z': curr_label_index = 5; break; + default: assert(false); throw 1; + } + for(index_t j = nodes[edge->to].key.second; j < nodes[edge->to + 1].key.second; j++) { + edges[label_index[curr_label_index]++] = PathEdge(edge->from, nodes[j].key.first, curr_label); + } + } +} + +template +bool PathGraph::generateEdges(RefGraph& base) +{ + //entering we have: + // nodes - sorted by from + // edges - empty + // base.nodes - almost sorted by from/to + // base.edges - almost sorted by from/to + + //need to join: + // nodes.from -> base.nodes[] + // nodes.from -> base.edges.to + // nodes.from -> edges.from + + if(!sorted) return false; + + time_t indiv = time(0); + time_t overall = time(0); + + //replace nodes.to with genomic position + //fast because both roughly ordered by from + for(PathNode* node = nodes.begin(); node != nodes.end(); node++) { + node->to = base.nodes[node->from].value; + } + + if(verbose) cerr << "NODE.TO -> GENOME POS: " << time(0) - indiv << endl; + indiv = time(0); + + // build an index for nodes + index_t node_size = nodes.size(); + for(index_t i = 0; i < node_size; i++) { + // very rare case where the number of prefix-sorted nodes is smaller than the number of the initial nodes + // , which could happen with a very small graph and a variant as follows + // ATAGAGCAGTTCTGAAAAACACTTTTTGTTGAATCTGCAAG(T)GGACATTTGGATAGATTTGAAGATTTCGTTGGAAACGGGAATATCTTCATATCAAATG + // (G) + // where G(T) and G(G) will be combined as there is no other node that intervene those two path nodes. + if(nodes[i].from + 1 >= nodes.size()) { + nodes.resize(nodes[i].from + 2); + } + nodes[nodes[i].from + 1].key.second = i + 1; + } + + if(verbose) cerr << "BUILD FROM_INDEX " << time(0) - indiv << endl; + indiv = time(0); + + // Now join nodes.from to edges.to + // fast because base.edges roughly sorted by to + + //count number of edges + AutoArray threads(nthreads); + EList params; params.resizeExact(nthreads); + ELList label_index; label_index.resize(nthreads); + typename RefGraph::Edge* st = base.edges.begin(); + typename RefGraph::Edge* en = st + base.edges.size() / nthreads; + for(int i = 0; i < nthreads; i++) { + label_index[i].resizeExact(6); + label_index[i].fillZero(); + params[i].label_index = &label_index[i]; + params[i].st = st; + params[i].en = en; + params[i].nodes = &nodes; + params[i].edges = &edges; + params[i].ref_nodes = &base.nodes; + if(nthreads == 1) { + generateEdgesCounter((void*)¶ms[0]); + } else { + threads[i] = new tthread::thread(&generateEdgesCounter, (void*)¶ms[i]); + } + st = en; + if(i + 2 == nthreads) { + en = base.edges.end(); + } else { + en = st + base.edges.size() / nthreads; + } + } + + if(nthreads > 1) { + for(int i = 0; i < nthreads; i++) + threads[i]->join(); + } + + if(verbose) cerr << "COUNTED NEW EDGES: " << time(0) - indiv << endl; + indiv = time(0); + //update all label indexes + index_t tot = label_index[0][0]; + label_index[0][0] = 0; + for(int i = 1; i < nthreads; i++) { + tot += label_index[i][0]; + label_index[i][0] = tot - label_index[i][0]; + } + for(int j = 1; j < 6; j++) { + for(int i = 0; i < nthreads; i++) { + tot += label_index[i][j]; + label_index[i][j] = tot - label_index[i][j]; + } + } + edges.resizeExact(tot); + //make new edges + for(int i = 0; i < nthreads; i++) { + if(nthreads == 1) { + generateEdgesMaker((void*)¶ms[0]); + } else { + threads[i] = new tthread::thread(&generateEdgesMaker, (void*)¶ms[i]); + } + } + + if(nthreads > 1) { + for(int i = 0; i < nthreads; i++) { + threads[i]->join(); + } + } + base.nullify(); + + // delete unused nodes + if(node_size != nodes.size()) { + assert_lt(node_size, nodes.size()); + nodes.resize(node_size); + } + + if(verbose) cerr << "MADE NEW EDGES: " << time(0) - indiv << endl; + indiv = time(0); + + EList& index = label_index[nthreads - 1]; + + EList temp_edges; temp_edges.resizeExact(edges.size()); + + radix_sort_copy, index_t>(edges.begin() , edges.begin() + index[0], temp_edges.ptr(), + &PathEdgeTo, (index_t)nodes.size(), nthreads); + radix_sort_copy, index_t>(edges.begin() + index[0], edges.begin() + index[1], temp_edges.ptr() + index[0], + &PathEdgeTo, (index_t)nodes.size(), nthreads); + radix_sort_copy, index_t>(edges.begin() + index[1], edges.begin() + index[2], temp_edges.ptr() + index[1], + &PathEdgeTo, (index_t)nodes.size(), nthreads); + radix_sort_copy, index_t>(edges.begin() + index[2], edges.begin() + index[3], temp_edges.ptr() + index[2], + &PathEdgeTo, (index_t)nodes.size(), nthreads); + for(index_t i = index[3]; i < edges.size(); i++) { + temp_edges[i] = edges[i]; + } + sort(temp_edges.begin() + index[3], temp_edges.begin() + index[4]); + sort(temp_edges.begin() + index[4], temp_edges.begin() + index[5]); + edges.xfer(temp_edges); + + if(verbose) cerr << "SORTED NEW EDGES: " << time(0) - indiv << endl; + indiv = time(0); + + EList past_nodes; past_nodes.resizeExact(nodes.size()); + radix_sort_copy, index_t>(nodes.begin(), nodes.end(), past_nodes.ptr(), &PathNodeKey, ranks, nthreads); + nodes.xfer(past_nodes); + + if(verbose) cerr << "RE-SORTED NODES: " << time(0) - indiv << endl; + indiv = time(0); + +#ifndef NDEBUG + if(debug) { + cerr << "just after creating path edges" << endl; + cerr << "Ref edges" << endl; + for(size_t i = 0; i < base.edges.size(); i++) { + const typename RefGraph::Edge& edge = base.edges[i]; + cerr << "\t" << i << "\t" << edge.from << " --> " << edge.to << endl; + } + + cerr << "Path nodes" << endl; + for(size_t i = 0; i < nodes.size(); i++) { + const PathNode& node = nodes[i]; + cerr << "\t" << i << "\t(" << node.key.first << ", " << node.key.second << ")\t" + << node.from << " --> " << node.to << endl; + } + + cerr << "Path edges" << endl; + for(size_t i = 0; i < edges.size(); i++) { + const PathEdge& edge = edges[i]; + cerr << "\t" << i << "\tfrom: " << edge.from << "\tranking: " << edge.ranking << "\t" << edge.label << endl; + } + } +#endif + +#ifndef NDEBUG + + // Switch char array[x][y]; to char** array; + if(debug) { + cerr << "after sorting nodes by ranking and edges by label and ranking" << endl; + cerr << "Path nodes" << endl; + for(size_t i = 0; i < nodes.size(); i++) { + const PathNode& node = nodes[i]; + cerr << "\t" << i << "\t(" << node.key.first << ", " << node.key.second << ")\t" + << node.from << " --> " << node.to << endl; + } + + cerr << "Path edges" << endl; + for(size_t i = 0; i < edges.size(); i++) { + const PathEdge& edge = edges[i]; + cerr << "\t" << i << "\tfrom: " << edge.from << "\tranking: " << edge.ranking << "\t" << edge.label << endl; + } + } +#endif + + // Sets PathNode.to = GraphNode.value and PathNode.key.first to outdegree + // Replaces (from.from, to) with (from, to) + { + PathNode* node = nodes.begin(); node->key.first = 0; + PathEdge* edge = edges.begin(); + while(node != nodes.end() && edge != edges.end()) { + if(edge->from == node->from) { + edge->from = (index_t)(node - nodes.begin()); edge++; + node->key.first++; + } else { + node++; node->key.first = 0; + } + } + } + + if(verbose) cerr << "PROCESS EDGES: " << time(0) - indiv << endl; + indiv = time(0); + + // Remove 'Y' node + assert_gt(nodes.size(), 2); + nodes.back().key.first = nodes[nodes.size() - 2].key.first; + nodes[nodes.size() - 2] = nodes.back(); + nodes.pop_back(); + // Adjust edges accordingly + for(size_t i = 0; i < edges.size(); i++) { + PathEdge& edge = edges[i]; + if(edge.label == 'Y') { + edge.label = 'Z'; + } else if(edge.ranking >= nodes.size()) { + assert_eq(edge.ranking, nodes.size()); + edge.ranking -= 1; + } + } + if(verbose) cerr << "REMOVE Y: " << time(0) - indiv << endl; + indiv = time(0); + +#ifndef NDEBUG + if(debug) { + cerr << "Path nodes" << endl; + for(size_t i = 0; i < nodes.size(); i++) { + const PathNode& node = nodes[i]; + cerr << "\t" << i << "\t(" << node.key.first << ", " << node.key.second << ")\t" + << node.from << " --> " << node.to << endl; + } + + cerr << "Path edges" << endl; + for(size_t i = 0; i < edges.size(); i++) { + const PathEdge& edge = edges[i]; + cerr << "\t" << i << "\tfrom: " << edge.from << "\tranking: " << edge.ranking << "\t" << edge.label << endl; + } + } +#endif + temp_edges.resizeExact(edges.size()); + radix_sort_copy(edges.begin(), edges.end(), temp_edges.ptr(), &PathEdgeTo, (index_t)nodes.size(), nthreads); + edges.xfer(temp_edges); + for(index_t i = 0; i < edges.size(); i++) { + nodes[edges[i].ranking].key.second = i + 1; + } + + if(verbose) cerr << "SORT, Make index: " << time(0) - indiv << endl; + if(verbose) cerr << "TOTAL: " << time(0) - overall << endl; + + return true; + +//----------------------------------------------------------------------------------------------------- + bwt_string.clear(); + F_array.clear(); + M_array.clear(); + bwt_counts.resizeExact(5); bwt_counts.fillZero(); + for(index_t node = 0; node < nodes.size(); node++) { + pair edge_range = getEdges(node, false /* from? */); + for(index_t i = edge_range.first; i < edge_range.second; i++) { + assert_lt(i, edges.size()); + char label = edges[i].label; + if(label == 'Y') { + label = 'Z'; + } + bwt_string.push_back(label); + F_array.push_back(i == edge_range.first ? 1 : 0); + + if(label != 'Z') { + char nt = asc2dna[(int)label]; + assert_lt(nt + 1, bwt_counts.size()); + bwt_counts[nt + 1]++; + } + } + for(index_t i = 0; i < nodes[node].key.first; i++) { + M_array.push_back(i == 0 ? 1 : 0); + } + } + assert_gt(bwt_string.size(), 0); + assert_eq(bwt_string.size(), F_array.size()); + assert_eq(bwt_string.size(), M_array.size()); + + for(size_t i = 0; i < bwt_counts.size(); i++) { + if(i > 0) bwt_counts[i] += bwt_counts[i - 1]; + } + +#ifndef NDEBUG + if(debug) { + cerr << "Path nodes (final)" << endl; + for(size_t i = 0; i < nodes.size(); i++) { + const PathNode& node = nodes[i]; + cerr << "\t" << i << "\t(" << node.key.first << ", " << node.key.second << ")\t" + << node.from << " --> " << node.to << endl; + } + + cerr << "Path edges (final)" << endl; + for(size_t i = 0; i < edges.size(); i++) { + const PathEdge& edge = edges[i]; + cerr << "\t" << i << "\tfrom: " << edge.from << "\tranking: " << edge.ranking << "\t" << edge.label << endl; + } + + cerr << "i\tBWT\tF\tM" << endl; + for(index_t i = 0; i < bwt_string.size(); i++) { + cerr << i << "\t" << bwt_string[i] << "\t" // BWT char + << (int)F_array[i] << "\t" // F bit value + << (int)M_array[i] << endl; // M bit value + } + + for(size_t i = 0; i < bwt_counts.size(); i++) { + cerr << i << "\t" << bwt_counts[i] << endl; + } + } +#endif + + // Test searches, based on paper_example +#if 1 + EList queries; EList answers; +# if 1 +# if 1 + queries.push_back("GACGT"); answers.push_back(9); + queries.push_back("GATGT"); answers.push_back(9); + queries.push_back("GACT"); answers.push_back(9); + queries.push_back("ATGT"); answers.push_back(4); + queries.push_back("GTAC"); answers.push_back(10); + queries.push_back("ACTG"); answers.push_back(3); +# else + // rs55902548, at 402, ref, alt, unknown alt + queries.push_back("GGCAGCTCCCATGGGTACACACTGGGCCCAGAACTGGGATGGAGGATGCA"); + // queries.push_back("GGCAGCTCCCATGGGTACACACTGGTCCCAGAACTGGGATGGAGGATGCA"); + // queries.push_back("GGCAGCTCCCATGGGTACACACTGGACCCAGAACTGGGATGGAGGATGCA"); + + // rs5759268, at 926787, ref, alt, unknown alt + // queries.push_back("AAATTGCTCAGCCTTGTGCTGTGCACACCTGGTTCTCTTTCCAGTGTTAT"); + // queries.push_back("AAATTGCTCAGCCTTGTGCTGTGCATACCTGGTTCTCTTTCCAGTGTTAT"); + // queries.push_back("AAATTGCTCAGCCTTGTGCTGTGCAGACCTGGTTCTCTTTCCAGTGTTAT"); +# endif + + for(size_t q = 0; q < queries.size(); q++) { + const string& query = queries[q]; + assert_gt(query.length(), 0); + index_t top = 0, bot = edges.size(); + index_t node_top = 0, node_bot = 0; + cerr << "Aligning " << query << endl; + index_t i = 0; + for(; i < query.length(); i++) { + if(top >= bot) break; + int nt = query[query.length() - i - 1]; + nt = asc2dna[nt]; + assert_lt(nt, 4); + + cerr << "\t" << i << "\tBWT range: [" << top << ", " << bot << ")" << endl; + + top = bwt_counts[(int)nt] + (top <= 0 ? 0 : rank(bwt_string, top - 1, "ACGT"[nt])); + bot = bwt_counts[(int)nt] + rank(bwt_string, bot - 1, "ACGT"[nt]); + cerr << "\t\tLF BWT range: [" << top << ", " << bot << ")" << endl; + + node_top = rank1(M_array, top) - 1; + node_bot = rank1(M_array, bot - 1); + cerr << "\t\tnode range: [" << node_top << ", " << node_bot << ")" << endl; + + top = select1(F_array, node_top + 1); + bot = select1(F_array, node_bot + 1); + } + cerr << "\t" << i << "\tBWT range: [" << top << ", " << bot << ")" << endl; + // assert_eq(top, answers[q]); + cerr << "finished... "; + if(node_top < node_bot && node_top < nodes.size()) { + index_t pos = nodes[node_top].to; + index_t gpos = pos; + const EList& szs = base.szs; + for(index_t i = 0; i < szs.size(); i++) { + gpos += szs[i].off; + if(pos < szs[i].len) break; + pos -= szs[i].len; + } + + cerr << "being aligned at " << gpos; + } + cerr << endl << endl; + } +# endif + + // See inconsistencies between F and M arraystimy thread +# if 0 + cerr << endl << endl; + EList tmp_F; + for(index_t i = 0; i < F_array.size(); i++) { + if(F_array[i] == 1) tmp_F.push_back(i); + } + + EList tmp_M; + for(index_t i = 0; i < M_array.size(); i++) { + if(M_array[i] == 1) tmp_M.push_back(i); + } + + index_t max_diff = 0; + assert_eq(tmp_F.size(), tmp_M.size()); + for(index_t i = 0; i < tmp_F.size(); i++) { + index_t diff = (tmp_F[i] >= tmp_M[i] ? tmp_F[i] - tmp_M[i] : tmp_M[i] - tmp_F[i]); + if(diff > max_diff) { + max_diff = diff; + cerr << i << "\tdiff: " << max_diff << "\t" << (tmp_F[i] >= tmp_M[i] ? "+" : "-") << endl; + } + } + cerr << "Final: " << tmp_F.back() << " vs. " << tmp_M.back() << endl; +# endif + +#endif + return true; +} + +//-------------------------------------------------------------------------- + +template +pair PathGraph::getEdges(index_t node, bool by_from) { + if(node >= nodes.size()) { + cerr << "Error: Trying to get edges " << (by_from ? "from " : "to ") << node << endl; + } + if(nodes[node].key.second == 0) { + return pair(0, 0); + } + if(node == 0) { + return pair(0, nodes[node].key.second); + } else { + return pair(nodes[node - 1].key.second, nodes[node].key.second); + } +} + +#endif /*GBWT_GRAPH_H_*/ diff --git a/gfm.cpp b/gfm.cpp new file mode 100644 index 0000000..9637020 --- /dev/null +++ b/gfm.cpp @@ -0,0 +1,72 @@ +/* + * Copyright 2015, Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#include +#include "gfm.h" + +using namespace std; + +#ifdef BOWTIE_64BIT_INDEX + +const std::string gfm_ext("ht2l"); + +#else + +const std::string gfm_ext("ht2"); + +#endif // BOWTIE_64BIT_INDEX + +string gLastIOErrMsg; + +/** + * Try to find the Bowtie index specified by the user. First try the + * exact path given by the user. Then try the user-provided string + * appended onto the path of the "indexes" subdirectory below this + * executable, then try the provided string appended onto + * "$HISAT2_INDEXES/". + */ +string adjustEbwtBase(const string& cmdline, + const string& gfmFileBase, + bool verbose) +{ + string str = gfmFileBase; + ifstream in; + if(verbose) cout << "Trying " << str.c_str() << endl; + in.open((str + ".1." + gfm_ext).c_str(), ios_base::in | ios::binary); + if(!in.is_open()) { + if(verbose) cout << " didn't work" << endl; + in.close(); + if(getenv("HISAT2_INDEXES") != NULL) { + str = string(getenv("HISAT2_INDEXES")) + "/" + gfmFileBase; + if(verbose) cout << "Trying " << str.c_str() << endl; + in.open((str + ".1." + gfm_ext).c_str(), ios_base::in | ios::binary); + if(!in.is_open()) { + if(verbose) cout << " didn't work" << endl; + in.close(); + } else { + if(verbose) cout << " worked" << endl; + } + } + } + if(!in.is_open()) { + cerr << "Could not locate a HISAT2 index corresponding to basename \"" << gfmFileBase.c_str() << "\"" << endl; + throw 1; + } + return str; +} diff --git a/gfm.h b/gfm.h new file mode 100644 index 0000000..3d8fd2e --- /dev/null +++ b/gfm.h @@ -0,0 +1,6980 @@ +/* + * Copyright 2015, Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#ifndef GFM_H_ +#define GFM_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef BOWTIE_MM +#include +#include +#endif +#include "shmem.h" +#include "alphabet.h" +#include "assert_helpers.h" +#include "bitpack.h" +#include "blockwise_sa.h" +#include "endian_swap.h" +#include "word_io.h" +#include "random_source.h" +#include "ref_read.h" +#include "threading.h" +#include "str_util.h" +#include "mm.h" +#include "timer.h" +#include "reference.h" +#include "search_globals.h" +#include "ds.h" +#include "random_source.h" +#include "mem_ids.h" +#include "btypes.h" +#include "tokenize.h" +#include "repeat.h" +#include "repeat_kmer.h" + +#ifdef POPCNT_CAPABILITY +#include "processor_support.h" +#endif + +#include "gbwt_graph.h" + +using namespace std; + +// From ccnt_lut.cpp, automatically generated by gen_lookup_tables.pl +extern uint8_t cCntLUT_4[4][4][256]; +extern uint8_t cCntLUT_4_rev[4][4][256]; +extern uint8_t cCntBIT[8][256]; + +extern bool threeN; + +static const uint64_t c_table[4] = { + 0xffffffffffffffff, + 0xaaaaaaaaaaaaaaaa, + 0x5555555555555555, + 0x0000000000000000 +}; + +#ifndef VMSG_NL +#define VMSG_NL(...) \ +if(this->verbose()) { \ + stringstream tmp; \ + tmp << __VA_ARGS__ << endl; \ + this->verbose(tmp.str()); \ +} +#endif + +#ifndef VMSG +#define VMSG(...) \ +if(this->verbose()) { \ + stringstream tmp; \ + tmp << __VA_ARGS__; \ + this->verbose(tmp.str()); \ +} +#endif + +/** + * Flags describing type of Ebwt. + */ +enum GFM_FLAGS { + GFM_ENTIRE_REV = 4 // true -> reverse Ebwt is the whole + // concatenated string reversed, rather than + // each stretch reversed +}; + +/** + * Extended Burrows-Wheeler transform header. This together with the + * actual data arrays and other text-specific parameters defined in + * class Ebwt constitute the entire Ebwt. + */ +template +class GFMParams { + +public: + GFMParams() { } + + GFMParams( + index_t len, + index_t gbwtLen, + index_t numNodes, + int32_t lineRate, + int32_t offRate, + int32_t ftabChars, + index_t eftabLen, + bool entireReverse) + { + init(len, gbwtLen, numNodes, lineRate, offRate, ftabChars, eftabLen, entireReverse); + } + + GFMParams(const GFMParams& gh) { + init(gh._len, gh._gbwtLen, gh._numNodes, gh._lineRate, gh._offRate, + gh._ftabChars, gh._eftabLen, gh._entireReverse); + } + + void init( + index_t len, + index_t gbwtLen, + index_t numNodes, + int32_t lineRate, + int32_t offRate, + int32_t ftabChars, + index_t eftabLen, + bool entireReverse) + { + _entireReverse = entireReverse; + _linearFM = (len + 1 == gbwtLen || gbwtLen == 0); + _len = len; + _gbwtLen = (gbwtLen == 0 ? len + 1 : gbwtLen); + _numNodes = (numNodes == 0 ? len + 1 : numNodes); + if(_linearFM) { + _sz = (len+3)/4; + _gbwtSz = _gbwtLen/4 + 1; + } else { + _sz = (len+1)/2; + _gbwtSz = _gbwtLen/2 + 1; + } + _lineRate = lineRate; + _origOffRate = offRate; + _offRate = offRate; + _offMask = std::numeric_limits::max() << _offRate; + _ftabChars = ftabChars; + _eftabLen = eftabLen; + _eftabSz = _eftabLen*sizeof(index_t); + _ftabLen = (1 << (_ftabChars*2))+1; + _ftabSz = _ftabLen*sizeof(index_t); + _offsLen = (_numNodes + (1 << _offRate) - 1) >> _offRate; + _offsSz = _offsLen*sizeof(index_t); + _lineSz = 1 << _lineRate; + _sideSz = _lineSz * 1 /* lines per side */; + if(_linearFM) { + _sideGbwtSz = _sideSz - (sizeof(index_t) * 4); + _sideGbwtLen = _sideGbwtSz << 2; + } else { + _sideGbwtSz = _sideSz - (sizeof(index_t) * 6); + _sideGbwtLen = _sideGbwtSz << 1; + } + _numSides = (_gbwtSz+(_sideGbwtSz)-1)/(_sideGbwtSz); + _numLines = _numSides * 1 /* lines per side */; + _gbwtTotLen = _numSides * _sideSz; + _gbwtTotSz = _gbwtTotLen; + assert(repOk()); + } + + index_t len() const { return _len; } + index_t lenNucs() const { return _len; } + index_t gbwtLen() const { return _gbwtLen; } + index_t sz() const { return _sz; } + index_t gbwtSz() const { return _gbwtSz; } + int32_t lineRate() const { return _lineRate; } + int32_t origOffRate() const { return _origOffRate; } + int32_t offRate() const { return _offRate; } + index_t offMask() const { return _offMask; } + int32_t ftabChars() const { return _ftabChars; } + index_t eftabLen() const { return _eftabLen; } + index_t eftabSz() const { return _eftabSz; } + index_t ftabLen() const { return _ftabLen; } + index_t ftabSz() const { return _ftabSz; } + index_t offsLen() const { return _offsLen; } + index_t offsSz() const { return _offsSz; } + index_t lineSz() const { return _lineSz; } + index_t sideSz() const { return _sideSz; } + index_t sideGbtSz() const { return _sideGbwtSz; } + index_t sideGbwtLen() const { return _sideGbwtLen; } + index_t numSides() const { return _numSides; } + index_t numLines() const { return _numLines; } + index_t gbwtTotLen() const { return _gbwtTotLen; } + index_t gbwtTotSz() const { return _gbwtTotSz; } + bool entireReverse() const { return _entireReverse; } + bool linearFM() const { return _linearFM; } + index_t numNodes() const { return _numNodes; } + + /** + * Set a new suffix-array sampling rate, which involves updating + * rate, mask, sample length, and sample size. + */ + void setOffRate(int __offRate) { + _offRate = __offRate; + _offMask = std::numeric_limits::max() << _offRate; + _offsLen = (_gbwtLen + (1 << _offRate) - 1) >> _offRate; + _offsSz = _offsLen * sizeof(index_t); + } + +#ifndef NDEBUG + /// Check that this EbwtParams is internally consistent + bool repOk() const { + // assert_gt(_len, 0); + assert_gt(_lineRate, 3); + assert_geq(_offRate, 0); + assert_leq(_ftabChars, 16); + assert_geq(_ftabChars, 1); + assert_lt(_lineRate, 32); + assert_lt(_ftabChars, 32); + assert_eq(0, _gbwtTotSz % _lineSz); + return true; + } +#endif + + /** + * Pretty-print the header contents to the given output stream. + */ + void print(ostream& out) const { + out << "Headers:" << endl + << " len: " << _len << endl + << " gbwtLen: " << _gbwtLen << endl + << " nodes: " << _numNodes << endl + << " sz: " << _sz << endl + << " gbwtSz: " << _gbwtSz << endl + << " lineRate: " << _lineRate << endl + << " offRate: " << _offRate << endl + << " offMask: 0x" << hex << _offMask << dec << endl + << " ftabChars: " << _ftabChars << endl + << " eftabLen: " << _eftabLen << endl + << " eftabSz: " << _eftabSz << endl + << " ftabLen: " << _ftabLen << endl + << " ftabSz: " << _ftabSz << endl + << " offsLen: " << _offsLen << endl + << " offsSz: " << _offsSz << endl + << " lineSz: " << _lineSz << endl + << " sideSz: " << _sideSz << endl + << " sideGbwtSz: " << _sideGbwtSz << endl + << " sideGbwtLen: " << _sideGbwtLen << endl + << " numSides: " << _numSides << endl + << " numLines: " << _numLines << endl + << " gbwtTotLen: " << _gbwtTotLen << endl + << " gbwtTotSz: " << _gbwtTotSz << endl + << " reverse: " << _entireReverse << endl + << " linearFM: " << (_linearFM ? "Yes" : "No") << endl; + } + + index_t _len; + index_t _gbwtLen; + index_t _sz; + index_t _gbwtSz; + int32_t _lineRate; + int32_t _origOffRate; + int32_t _offRate; + index_t _offMask; + int32_t _ftabChars; + index_t _eftabLen; + index_t _eftabSz; + index_t _ftabLen; + index_t _ftabSz; + index_t _offsLen; + index_t _offsSz; + index_t _lineSz; + index_t _sideSz; + index_t _sideGbwtSz; + index_t _sideGbwtLen; + index_t _numSides; + index_t _numLines; + index_t _gbwtTotLen; + index_t _gbwtTotSz; + bool _entireReverse; + bool _linearFM; + index_t _numNodes; +}; + +/** + * Exception to throw when a file-realted error occurs. + */ +class GFMFileOpenException : public std::runtime_error { +public: + GFMFileOpenException(const std::string& msg = "") : + std::runtime_error(msg) { } +}; + +/** + * Calculate size of file with given name. + */ +static inline int64_t fileSize(const char* name) { + std::ifstream f; + f.open(name, std::ios_base::binary | std::ios_base::in); + if (!f.good() || f.eof() || !f.is_open()) { return 0; } + f.seekg(0, std::ios_base::beg); + std::ifstream::pos_type begin_pos = f.tellg(); + f.seekg(0, std::ios_base::end); + return static_cast(f.tellg() - begin_pos); +} + +/** + * Encapsulates a location in the gbwt text in terms of the side it + * occurs in and its offset within the side. + */ +template +struct SideLocus { + SideLocus() : + _sideByteOff(0), + _sideNum(0), + _charOff(0), + _by(-1), + _bp(-1) { } + + /** + * Construct from row and other relevant information about the Ebwt. + */ + SideLocus(index_t row, const GFMParams& ep, const uint8_t* ebwt) { + initFromRow(row, ep, ebwt); + } + + /** + * Init two SideLocus objects from a top/bot pair, using the result + * from one call to initFromRow to possibly avoid a second call. + */ + static void initFromTopBot( + index_t top, + index_t bot, + const GFMParams& gp, + const uint8_t* gfm, + SideLocus& ltop, + SideLocus& lbot) + { + const index_t sideGbwtLen = gp._sideGbwtLen; + assert_gt(bot, top); + ltop.initFromRow(top, gp, gfm); + index_t spread = bot - top; + // Many cache misses on the following lines + if(ltop._charOff + spread < sideGbwtLen) { + lbot._charOff = ltop._charOff + spread; + lbot._sideNum = ltop._sideNum; + lbot._sideByteOff = ltop._sideByteOff; + lbot._by = lbot._charOff >> 2; + assert_lt(lbot._by, (int)gp._sideGbwtSz); + lbot._bp = lbot._charOff & 0x3; + } else { + lbot.initFromRow(bot, gp, gfm); + } + } + + /** + * Calculate SideLocus based on a row and other relevant + * information about the shape of the Ebwt. + */ + void initFromRow( + index_t row, + const GFMParams& gp, + const uint8_t* gfm) { + const index_t sideSz = gp._sideSz; + // Side length is hard-coded for now; this allows the compiler + // to do clever things to accelerate / and %. + _sideNum = row / gp._sideGbwtLen; + assert_lt(_sideNum, gp._numSides); + _charOff = row % gp._sideGbwtLen; + _sideByteOff = _sideNum * sideSz; + assert_leq(row, gp._gbwtLen); + assert_leq(_sideByteOff + sideSz, gp._gbwtTotSz); + // Tons of cache misses on the next line + _by = _charOff >> 2; // byte within side + assert_lt(_by, (int)gp._sideGbwtSz); + _bp = _charOff & 0x3; // bit-pair within byte + } + + /** + * Init two SideLocus objects from a top/bot pair, using the result + * from one call to initFromRow to possibly avoid a second call. + */ + static void initFromTopBot_bit( + index_t top, + index_t bot, + const GFMParams& gp, + const uint8_t* gfm, + SideLocus& ltop, + SideLocus& lbot) + { + const index_t sideGbwtLen = gp._sideGbwtLen; + // assert_gt(bot, top); + ltop.initFromRow_bit(top, gp, gfm); + index_t spread = bot - top; + // Many cache misses on the following lines + if(ltop._charOff + spread < sideGbwtLen) { + lbot._charOff = ltop._charOff + spread; + lbot._sideNum = ltop._sideNum; + lbot._sideByteOff = ltop._sideByteOff; + lbot._by = lbot._charOff >> 3; + assert_lt(lbot._by, (int)gp._sideGbwtSz); + lbot._bp = lbot._charOff & 0x7; + } else { + lbot.initFromRow_bit(bot, gp, gfm); + } + } + + /** + * Calculate SideLocus based on a row and other relevant + * information about the shape of the Ebwt. + */ + void initFromRow_bit( + index_t row, + const GFMParams& gp, + const uint8_t* gfm) { + const index_t sideSz = gp._sideSz; + // Side length is hard-coded for now; this allows the compiler + // to do clever things to accelerate / and %. + _sideNum = row / gp._sideGbwtLen; + assert_lt(_sideNum, gp._numSides); + _charOff = row % gp._sideGbwtLen; + _sideByteOff = _sideNum * sideSz; + assert_lt(row, gp._gbwtLen); + assert_leq(_sideByteOff + sideSz, gp._gbwtTotSz); + // Tons of cache misses on the next line + _by = _charOff >> 3; // byte within side + assert_lt(_by, (int)gp._sideGbwtSz); + _bp = _charOff & 0x7; // bit-pair within byte + } + + /** + * Transform this SideLocus to refer to the next side (i.e. the one + * corresponding to the next side downstream). Set all cursors to + * point to the beginning of the side. + */ + void nextSide(const GFMParams& gp) { + assert(valid()); + _sideByteOff += gp.sideSz(); + _sideNum++; + _by = _bp = _charOff = 0; + assert(valid()); + } + + /** + * Return true iff this is an initialized SideLocus + */ + bool valid() const { + if(_bp != -1) { + return true; + } + return false; + } + + /** + * Convert locus to BW row it corresponds to. + */ + index_t toBWRow(const GFMParams& gp) const; + +#ifndef NDEBUG + /** + * Check that SideLocus is internally consistent and consistent + * with the (provided) EbwtParams. + */ + bool repOk(const GFMParams& gp) const { + ASSERT_ONLY(index_t row = toBWRow(gp)); + assert_leq(row, gp._gbwtLen); + assert_range(-1, 3, _bp); + assert_range(0, (int)gp._sideGbwtSz, _by); + return true; + } +#endif + + /// Make this look like an invalid SideLocus + void invalidate() { + _bp = -1; + } + + /** + * Return a read-only pointer to the beginning of the top side. + */ + const uint8_t *side(const uint8_t* gbwt) const { + return gbwt + _sideByteOff; + } + + /** + * Return a read-only pointer to the beginning of the top side. + */ + const uint8_t *next_side(const GFMParams& gp, const uint8_t* gbwt) const { + if(_sideByteOff + gp._sideSz < gp._ebwtTotSz) { + return gbwt + _sideByteOff + gp._sideSz; + } else { + return NULL; + } + } + + index_t _sideByteOff; // offset of top side within ebwt[] + index_t _sideNum; // index of side + index_t _charOff; // character offset within side + int32_t _by; // byte within side (not adjusted for bw sides) + int32_t _bp; // bitpair within byte (not adjusted for bw sides) +}; + +/** + * Convert locus to BW row it corresponds to. + */ +template +inline index_t SideLocus::toBWRow(const GFMParams& gp) const { + return _sideNum * (gp._sideGbwtSz << (gp.linearFM() ? 2 : 1)) + _charOff; +} + +#ifdef POPCNT_CAPABILITY // wrapping of "struct" +struct USE_POPCNT_GENERIC { +#endif + // Use this standard bit-bashing population count + inline static int pop64(uint64_t x) { + // Lots of cache misses on following lines (>10K) + x = x - ((x >> 1) & 0x5555555555555555llu); + x = (x & 0x3333333333333333llu) + ((x >> 2) & 0x3333333333333333llu); + x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0Fllu; + x = x + (x >> 8); + x = x + (x >> 16); + x = x + (x >> 32); + return (int)(x & 0x3Fllu); + } +#ifdef POPCNT_CAPABILITY // wrapping a "struct" +}; +#endif + +#ifdef POPCNT_CAPABILITY +struct USE_POPCNT_INSTRUCTION { + inline static int pop64(uint64_t x) { + int64_t count; +#ifdef USING_MSC_COMPILER + count = __popcnt64(x); +#else + asm ("popcntq %[x],%[count]\n": [count] "=&r" (count): [x] "r" (x)); +#endif + return (int)count; + } +}; +#endif + +/** + * Tricky-bit-bashing bitpair counting for given two-bit value (0-3) + * within a 64-bit argument. + */ +#ifdef POPCNT_CAPABILITY +template +#endif +inline static int countInU64(int c, uint64_t dw) { + uint64_t c0 = c_table[c]; + uint64_t x0 = dw ^ c0; + uint64_t x1 = (x0 >> 1); + uint64_t x2 = x1 & (0x5555555555555555); + uint64_t x3 = x0 & x2; +#ifdef POPCNT_CAPABILITY + uint64_t tmp = Operation().pop64(x3); +#else + uint64_t tmp = pop64(x3); +#endif + return (int) tmp; +} + +#ifdef POPCNT_CAPABILITY // wrapping of "struct" +struct USE_POPCNT_GENERIC_BITS { + // Use this standard bit-bashing population count + inline static uint64_t pop64(uint64_t x) { +#else +// Use this standard bit-bashing population count + inline static uint64_t pop6464(uint64_t x) { +#endif + x -= (x >> 1) & 0x5555555555555555ULL; + x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL); + x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0fULL; + return int((x * 0x0101010101010101ULL) >> 56); + } +#ifdef POPCNT_CAPABILITY // wrapping a "struct" +}; +#endif + +/** + * Tricky-bit-bashing bitpair counting for given two-bit value (0-3) + * within a 64-bit argument. + */ +#ifdef POPCNT_CAPABILITY +template +#endif +inline static int countInU64_bits(uint64_t dw) { +#ifdef POPCNT_CAPABILITY + uint64_t tmp = Operation().pop64(dw); +#else + uint64_t tmp = pop6464(dw); +#endif + return (int) tmp; +} + +// Forward declarations for Ebwt class +class GFMSearchParams; + +/** + * Extended Burrows-Wheeler transform data. + * + * An Ebwt may be transferred to and from RAM with calls to + * evictFromMemory() and loadIntoMemory(). By default, a newly-created + * Ebwt is not loaded into memory; if the user would like to use a + * newly-created Ebwt to answer queries, they must first call + * loadIntoMemory(). + */ +template +class GFM { +public: + #define GFM_INITS \ + _toBigEndian(currentlyBigEndian()), \ + _overrideOffRate(overrideOffRate), \ + _verbose(verbose), \ + _passMemExc(passMemExc), \ + _sanity(sanityCheck), \ + fw_(fw), \ + _in1(NULL), \ + _in2(NULL), \ + _nPat(0), \ + _nFrag(0), \ + _plen(EBWT_CAT), \ + _rstarts(EBWT_CAT), \ + _fchr(EBWT_CAT), \ + _ftab(EBWT_CAT), \ + _eftab(EBWT_CAT), \ + _offs(EBWT_CAT), \ + _gfm(EBWT_CAT), \ + _useMm(false), \ + useShmem_(false), \ + _refnames(EBWT_CAT), \ + mmFile1_(NULL), \ + mmFile2_(NULL), \ + _nthreads(1) + + GFM() {} + /// Construct a GFM from the given input file + GFM(const string& in, + ALTDB* altdb, + RepeatDB* repeatdb, + EList* readLens, + int needEntireReverse, + bool fw, + int32_t overrideOffRate, // = -1, + int32_t offRatePlus, // = -1, + bool useMm, // = false, + bool useShmem, // = false, + bool mmSweep, // = false, + bool loadNames, // = false, + bool loadSASamp, // = true, + bool loadFtab, // = true, + bool loadRstarts, // = true, + bool loadSpliceSites, // = true, + bool verbose, // = false, + bool startVerbose, // = false, + bool passMemExc, // = false, + bool sanityCheck, // = false) + bool useHaplotype, // = false + bool skipLoading = false) : + GFM_INITS + { + assert(!useMm || !useShmem); + +#ifdef POPCNT_CAPABILITY + ProcessorSupport ps; + _usePOPCNTinstruction = ps.POPCNTenabled(); +#endif + + packed_ = false; + _useMm = useMm; + useShmem_ = useShmem; + _in1Str = in + ".1." + gfm_ext; + _in2Str = in + ".2." + gfm_ext; + + if(skipLoading) return; + + if(repeatdb == NULL) { + readIntoMemory( + fw ? -1 : needEntireReverse, // need REF_READ_REVERSE + loadSASamp, // load the SA sample portion? + loadFtab, // load the ftab & eftab? + loadRstarts, // load the rstarts array? + true, // stop after loading the header portion? + &_gh, // params + mmSweep, // mmSweep + loadNames, // loadNames + startVerbose); // startVerbose + // If the offRate has been overridden, reflect that in the + // _eh._offRate field + if(offRatePlus > 0 && _overrideOffRate == -1) { + _overrideOffRate = _gh._offRate + offRatePlus; + } + if(_overrideOffRate > _gh._offRate) { + _gh.setOffRate(_overrideOffRate); + assert_eq(_overrideOffRate, _gh._offRate); + } + } + + // Read ALTs + EList >& alts = altdb->alts(); + EList >& haplotypes = altdb->haplotypes(); + EList& altnames = altdb->altnames(); + alts.clear(); altnames.clear(); + string in7Str = in + ".7." + gfm_ext; + string in8Str = in + ".8." + gfm_ext; + + // open alts + if(verbose || startVerbose) cerr << "Opening \"" << in7Str.c_str() << "\"" << endl; + ifstream in7(in7Str.c_str(), ios::binary); + if(!in7.good()) { + cerr << "Could not open index file " << in7Str.c_str() << endl; + } + + EList to_alti; + index_t to_alti_far = 0; + readI32(in7, this->toBe()); + index_t numAlts = readIndex(in7, this->toBe()); + + + // open altnames + if(verbose || startVerbose) cerr << "Opening \"" << in8Str.c_str() << "\"" << endl; + ifstream in8(in8Str.c_str(), ios::binary); + if(!in8.good()) { + cerr << "Could not open index file " << in8Str.c_str() << endl; + } + + readI32(in8, this->toBe()); + index_t numAltnames = readIndex(in8, this->toBe()); + + assert_eq(numAlts, numAltnames); + + if(numAlts > 0) { + alts.resizeExact(numAlts); alts.clear(); + to_alti.resizeExact(numAlts); to_alti.clear(); + while(!in7.eof() && !in8.eof()) { + alts.expand(); + alts.back().read(in7, this->toBe()); + to_alti.push_back(to_alti_far); + to_alti_far++; + + altnames.expand(); + in8 >> altnames.back(); + + if(!loadSpliceSites) { + if(alts.back().splicesite()) { + alts.pop_back(); + assert_gt(numAlts, 0); + altnames.pop_back(); + assert_gt(numAltnames, 0); + numAlts--; + numAltnames--; + to_alti.back() = std::numeric_limits::max(); + to_alti_far--; + } + } + if(alts.size() == numAlts) break; + } + } + assert_eq(alts.size(), numAlts); + assert_eq(to_alti_far, numAlts); + assert_eq(alts.size(), altnames.size()); + // Check if it hits the end of file, and this routine is needed for backward compatibility + if(in7.peek() != std::ifstream::traits_type::eof()) { + index_t numHaplotypes = readIndex(in7, this->toBe()); + if(numHaplotypes > 0) { + haplotypes.resizeExact(numHaplotypes); + haplotypes.clear(); + while(!in7.eof()) { + haplotypes.expand(); + haplotypes.back().read(in7, this->toBe()); + Haplotype& ht = haplotypes.back(); + for(index_t h = 0; h < ht.alts.size(); h++) { + ht.alts[h] = to_alti[ht.alts[h]]; + } + if(haplotypes.size() == numHaplotypes) break; + } + } + if(!useHaplotype) { + haplotypes.nullify(); + } + } + + // Read repeats + _repeat = false; + if(repeatdb != NULL) { + _repeat = true; + + // Number of repeat groups in the index + index_t numRepeatIndex = readIndex(in7, this->toBe()); + assert_gt(numRepeatIndex, 0); + EList > repeatLens; repeatLens.resizeExact(numRepeatIndex); + + for(size_t k = 0; k < numRepeatIndex; k++) { + repeatLens[k].first = readIndex(in7, this->toBe()); + repeatLens[k].second = readIndex(in7, this->toBe()); + } + + if (readLens != NULL && !readLens->empty()) { + // Load subset of repeat groups. + size_t k = 0; + size_t k2 = 0; + + _repeatIncluded.resizeExact(numRepeatIndex); + _repeatIncluded.fillZero(); + + while(k < numRepeatIndex && k2 < readLens->size()) { + if (repeatLens[k].first >= (*readLens)[k2]) { + _repeatIncluded[k] = true; + k2++; + } else { + k++; + } + } + + // at least last repeat group is included + _repeatIncluded[numRepeatIndex - 1] = true; + + _repeatLens.clear(); + for(size_t i = 0; i < numRepeatIndex; i++) { + if (_repeatIncluded[i]) { + _repeatLens.push_back(repeatLens[i]); + } + } + } else { + // Load all repeat groups + _repeatLens = repeatLens; + _repeatIncluded.resizeExact(numRepeatIndex); + _repeatIncluded.fill(true); + } + + repeatdb->read(in7, this->toBe(), _repeatIncluded); + index_t numKmertables = readIndex(in7, this->toBe()); + EList filePos; filePos.resizeExact(numKmertables); + for(size_t k = 0; k < numKmertables; k++) { + filePos[k] = readIndex(in7, this->toBe()); + } + for(size_t k = 0; k < numKmertables; k++) { + if(!_repeatIncluded[k]) + continue; + if(k > 0) { + in7.seekg(filePos[k-1]); + } + _repeat_kmertables.expand(); + _repeat_kmertables.back().read(in7, this->toBe()); + } + in7.seekg(filePos.back()); + } + + in7.close(); + in8.close(); + + // Sort SNPs and Splice Sites based on positions + index_t nalts = (index_t)alts.size(); + for(index_t s = 0; s < nalts; s++) { + ALT alt = alts[s]; + if(alt.snp()) altdb->setSNPs(true); + if(alt.exon()) altdb->setExons(true); + if(alt.splicesite()) { + altdb->setSpliceSites(true); + alts.push_back(alt); + alts.back().left = alt.right; + alts.back().right = alt.left; + altnames.push_back("ssr"); + } else if(alt.deletion()) { + alts.push_back(alt); + alts.back().pos = alt.pos + alt.len - 1; + alts.back().reversed = true; + string altname = altnames[s]; + altnames.push_back(altname); + } + } + if(alts.size() > 1 && alts.size() > nalts) { + assert_eq(alts.size(), altnames.size()); + EList, index_t> > buf; buf.resize(alts.size()); + EList buf2; buf2.resize(alts.size()); + for(size_t i = 0; i < alts.size(); i++) { + buf[i].first = alts[i]; + buf[i].second = (index_t)i; + buf2[i] = altnames[i]; + } + buf.sort(); + for(size_t i = 0; i < alts.size(); i++) { + alts[i] = buf[i].first; + altnames[i] = buf2[buf[i].second]; + if(buf[i].second < numAlts) { + to_alti[buf[i].second] = i; + } + } + } + + if(useHaplotype) { + EList& haplotype_maxrights = altdb->haplotype_maxrights(); + haplotype_maxrights.resizeExact(haplotypes.size()); + for(index_t h = 0; h < haplotypes.size(); h++) { + Haplotype& ht = haplotypes[h]; + for(index_t h2 = 0; h2 < ht.alts.size(); h2++) { + ht.alts[h2] = to_alti[ht.alts[h2]]; + } + if(h == 0) { + haplotype_maxrights[h] = ht.right; + } else { + haplotype_maxrights[h] = std::max(haplotype_maxrights[h - 1], ht.right); + } + } + } + + assert(repeatdb != NULL || repOk()); + } + + /// Construct an Ebwt from the given header parameters and string + /// vector, optionally using a blockwise suffix sorter with the + /// given 'bmax' and 'dcv' parameters. The string vector is + /// ultimately joined and the joined string is passed to buildToDisk(). + GFM( + bool packed, + int needEntireReverse, + int32_t lineRate, + int32_t offRate, + int32_t ftabChars, + const string& file, // base filename for GFM files + bool fw, + int dcv, + EList& szs, + index_t sztot, + const RefReadInParams& refparams, + uint32_t seed, + int32_t overrideOffRate = -1, + bool verbose = false, + bool passMemExc = false, + bool sanityCheck = false) : + GFM_INITS, + _gh( + joinedLen(szs), + 0, + 0, + lineRate, + offRate, + ftabChars, + 0, + refparams.reverse == REF_READ_REVERSE) + { +#ifdef POPCNT_CAPABILITY + ProcessorSupport ps; + _usePOPCNTinstruction = ps.POPCNTenabled(); +#endif + packed_ = packed; + } + + /// Construct an Ebwt from the given header parameters and string + /// vector, optionally using a blockwise suffix sorter with the + /// given 'bmax' and 'dcv' parameters. The string vector is + /// ultimately joined and the joined string is passed to buildToDisk(). + template + GFM( + TStr& s, + bool packed, + int needEntireReverse, + int32_t lineRate, + int32_t offRate, + int32_t ftabChars, + int nthreads, + const string& snpfile, + const string& htfile, + const string& ssfile, + const string& exonfile, + const string& svfile, + const string& repeatfile, + const string& outfile, // base filename for GFM files + bool fw, + bool useBlockwise, + index_t bmax, + index_t bmaxSqrtMult, + index_t bmaxDivN, + int dcv, + EList& is, + EList& szs, + index_t sztot, + const RefReadInParams& refparams, + EList* parent_szs, + EList* parent_refnames, + uint32_t seed, + int32_t overrideOffRate = -1, + bool verbose = false, + bool passMemExc = false, + bool sanityCheck = false) : + GFM_INITS, + _gh( + joinedLen(szs), + 0, + 0, + lineRate, + offRate, + ftabChars, + 0, + refparams.reverse == REF_READ_REVERSE) + { + assert_gt(nthreads, 0); + _nthreads = nthreads; +#ifdef POPCNT_CAPABILITY + ProcessorSupport ps; + _usePOPCNTinstruction = ps.POPCNTenabled(); +#endif + _in1Str = outfile + ".1." + gfm_ext; + _in2Str = outfile + ".2." + gfm_ext; + packed_ = packed; + // Open output files + ofstream fout1(_in1Str.c_str(), ios::binary); + if(!fout1.good()) { + cerr << "Could not open index file for writing: \"" << _in1Str.c_str() << "\"" << endl + << "Please make sure the directory exists and that permissions allow writing by" << endl + << "HISAT2." << endl; + throw 1; + } + ofstream fout2(_in2Str.c_str(), ios::binary); + if(!fout2.good()) { + cerr << "Could not open index file for writing: \"" << _in2Str.c_str() << "\"" << endl + << "Please make sure the directory exists and that permissions allow writing by" << endl + << "HISAT2." << endl; + throw 1; + } + + // Build + initFromVector( + s, + snpfile, + htfile, + ssfile, + exonfile, + svfile, + repeatfile, + is, + szs, + sztot, + refparams, + fout1, + fout2, + outfile, + useBlockwise, + bmax, + bmaxSqrtMult, + bmaxDivN, + dcv, + parent_szs, + parent_refnames, + seed, + verbose); + // Close output files + fout1.flush(); + int64_t tellpSz1 = (int64_t)fout1.tellp(); + VMSG_NL("Wrote " << fout1.tellp() << " bytes to primary GFM file: " << _in1Str.c_str()); + fout1.close(); + bool err = false; + if(tellpSz1 > fileSize(_in1Str.c_str())) { + err = true; + cerr << "Index is corrupt: File size for " << _in1Str.c_str() << " should have been " << tellpSz1 + << " but is actually " << fileSize(_in1Str.c_str()) << "." << endl; + } + fout2.flush(); + int64_t tellpSz2 = (int64_t)fout2.tellp(); + VMSG_NL("Wrote " << fout2.tellp() << " bytes to secondary GFM file: " << _in2Str.c_str()); + fout2.close(); + if(tellpSz2 > fileSize(_in2Str.c_str())) { + err = true; + cerr << "Index is corrupt: File size for " << _in2Str.c_str() << " should have been " << tellpSz2 + << " but is actually " << fileSize(_in2Str.c_str()) << "." << endl; + } + if(err) { + cerr << "Please check if there is a problem with the disk or if disk is full." << endl; + throw 1; + } + // Reopen as input streams + VMSG_NL("Re-opening _in1 and _in2 as input streams"); + if(_sanity) { + VMSG_NL("Sanity-checking Bt2"); + assert(!isInMemory()); + readIntoMemory( + fw ? -1 : needEntireReverse, // 1 -> need the reverse to be reverse-of-concat + true, // load SA sample (_offs[])? + true, // load ftab (_ftab[] & _eftab[])? + true, // load r-starts (_rstarts[])? + false, // just load header? + NULL, // Params object to fill + false, // mm sweep? + true, // load names? + false); // verbose startup? + // sanityCheckAll(refparams.reverse); + evictFromMemory(); + assert(!isInMemory()); + } + VMSG_NL("Returning from GFM constructor"); + } + + /** + * Static constructor for a pair of forward/reverse indexes for the + * given reference string. + */ + template + static pair + fromString( + const char* str, + bool packed, + int reverse, + bool bigEndian, + int32_t lineRate, + int32_t offRate, + int32_t ftabChars, + const string& file, + bool useBlockwise, + index_t bmax, + index_t bmaxSqrtMult, + index_t bmaxDivN, + int dcv, + uint32_t seed, + bool verbose, + bool autoMem, + bool sanity) + { + EList strs(EBWT_CAT); + strs.push_back(std::string(str)); + return fromStrings( + strs, + packed, + reverse, + bigEndian, + lineRate, + offRate, + ftabChars, + file, + useBlockwise, + bmax, + bmaxSqrtMult, + bmaxDivN, + dcv, + seed, + verbose, + autoMem, + sanity); + } + + /** + * Static constructor for a pair of forward/reverse indexes for the + * given list of reference strings. + */ + template + static pair + fromStrings( + const EList& strs, + bool packed, + int reverse, + bool bigEndian, + int32_t lineRate, + int32_t offRate, + int32_t ftabChars, + const string& file, + bool useBlockwise, + index_t bmax, + index_t bmaxSqrtMult, + index_t bmaxDivN, + int dcv, + uint32_t seed, + bool verbose, + bool autoMem, + bool sanity) + { + assert(!strs.empty()); + EList is(EBWT_CAT); + RefReadInParams refparams(false /* color */, REF_READ_FORWARD, false, false); + // Adapt sequence strings to stringstreams open for input + auto_ptr ss(new stringstream()); + for(index_t i = 0; i < strs.size(); i++) { + (*ss) << ">" << i << endl << strs[i] << endl; + } + auto_ptr fb(new FileBuf(ss.get())); + assert(!fb->eof()); + assert(fb->get() == '>'); + ASSERT_ONLY(fb->reset()); + assert(!fb->eof()); + is.push_back(fb.get()); + // Vector for the ordered list of "records" comprising the input + // sequences. A record represents a stretch of unambiguous + // characters in one of the input sequences. + EList szs(EBWT_CAT); + std::pair sztot; + sztot = BitPairReference::szsFromFasta(is, + file, + bigEndian, + refparams, + szs, + sanity); + // Construct Ebwt from input strings and parameters + GFM *gfmFw = new GFM( + TStr(), + packed, + -1, // fw + lineRate, + offRate, // suffix-array sampling rate + ftabChars, // number of chars in initial arrow-pair calc + file, // basename for .?.ebwt files + true, // fw? + useBlockwise, // useBlockwise + bmax, // block size for blockwise SA builder + bmaxSqrtMult, // block size as multiplier of sqrt(len) + bmaxDivN, // block size as divisor of len + dcv, // difference-cover period + is, // list of input streams + szs, // list of reference sizes + sztot.first, // total size of all unambiguous ref chars + refparams, // reference read-in parameters + seed, // pseudo-random number generator seed + -1, // override offRate + verbose, // be talkative + autoMem, // pass exceptions up to the toplevel so that we can adjust memory settings automatically + sanity); // verify results and internal consistency + refparams.reverse = reverse; + szs.clear(); + sztot = BitPairReference::szsFromFasta(is, + file, + bigEndian, + refparams, + szs, + sanity); + // Construct Ebwt from input strings and parameters + GFM *gfmBw = new GFM( + TStr(), + packed, + reverse == REF_READ_REVERSE, + lineRate, + offRate, // suffix-array sampling rate + ftabChars, // number of chars in initial arrow-pair calc + file + ".rev",// basename for .?.ebwt files + false, // fw? + useBlockwise, // useBlockwise + bmax, // block size for blockwise SA builder + bmaxSqrtMult, // block size as multiplier of sqrt(len) + bmaxDivN, // block size as divisor of len + dcv, // difference-cover period + is, // list of input streams + szs, // list of reference sizes + sztot.first, // total size of all unambiguous ref chars + refparams, // reference read-in parameters + seed, // pseudo-random number generator seed + -1, // override offRate + verbose, // be talkative + autoMem, // pass exceptions up to the toplevel so that we can adjust memory settings automatically + sanity); // verify results and internal consistency + return make_pair(gfmFw, gfmBw); + } + + /// Return true iff the Ebwt is packed + bool isPacked() { return packed_; } + + /** + * Write the rstarts array given the szs array for the reference. + */ + void szsToDisk(const EList& szs, ostream& os, int reverse); + + bool checkPosToSzs(const EList& szs, index_t start_idx, index_t pos) + { + assert(szs[start_idx].first); + for(index_t i = start_idx; i < szs.size(); i++) { + if((i != start_idx) && (szs[i].first)) { + // span to next chr + return false; + } + + if(pos < szs[i].off) { + return false; + } else { + pos -= szs[i].off; + if(pos < szs[i].len) { + return true; + } + pos -= szs[i].len; + } + } + assert(false); + return false; + } + + /** + * Helper for the constructors above. Takes a vector of text + * strings and joins them into a single string with a call to + * joinToDisk, which does a join (with padding) and writes some of + * the resulting data directly to disk rather than keep it in + * memory. It then constructs a suffix-array producer (what kind + * depends on 'useBlockwise') for the resulting sequence. The + * suffix-array producer can then be used to obtain chunks of the + * joined string's suffix array. + */ + template + void initFromVector(TStr& s, + const string& snpfile, + const string& htfile, + const string& ssfile, + const string& exonfile, + const string& svfile, + const string& repeatfile, + EList& is, + EList& szs, + index_t sztot, + const RefReadInParams& refparams, + ofstream& out1, + ofstream& out2, + const string& outfile, + bool useBlockwise, + index_t bmax, + index_t bmaxSqrtMult, + index_t bmaxDivN, + int dcv, + EList* parent_szs, + EList* parent_refnames, + uint32_t seed, + bool verbose) + { + // Compose text strings into single string + VMSG_NL("Calculating joined length"); + index_t jlen; + jlen = joinedLen(szs); + _repeat = (parent_szs != NULL); + assert_geq(jlen, sztot); + VMSG_NL("Writing header"); + writeFromMemory(true, out1, out2); + try { + VMSG_NL("Reserving space for joined string"); + s.resize(jlen); + VMSG_NL("Joining reference sequences"); + if(refparams.reverse == REF_READ_REVERSE) { + { + Timer timer(cerr, " Time to join reference sequences: ", _verbose); + joinToDisk(is, szs, sztot, refparams, s, out1, out2); + } { + Timer timer(cerr, " Time to reverse reference sequence: ", _verbose); + EList tmp(EBWT_CAT); + s.reverse(); + reverseRefRecords(szs, tmp, false, verbose); + szsToDisk(tmp, out1, refparams.reverse); + } + } else { + Timer timer(cerr, " Time to join reference sequences: ", _verbose); + joinToDisk(is, szs, sztot, refparams, s, out1, out2); + szsToDisk(szs, out1, refparams.reverse); + } + + { + Timer timer(cerr, " Time to read SNPs and splice sites: ", _verbose); + _alts.clear(); + _altnames.clear(); + EList > chr_szs; + index_t tmp_len = 0; + for(index_t i = 0; i < szs.size(); i++) { + if(szs[i].first) { + chr_szs.expand(); + chr_szs.back().first = tmp_len; + chr_szs.back().second = i; + } + tmp_len += (index_t)szs[i].len; + } + + // Write SNPs into 7.ht2 and 8.ht2 + string file7 = outfile + ".7." + gfm_ext; + string file8 = outfile + ".8." + gfm_ext; + + // Open output stream for the '.7.gfm_ext' file which will + // hold SNPs (except IDs). + ofstream fout7(file7.c_str(), ios::binary); + if(!fout7.good()) { + cerr << "Could not open index file for writing: \"" << file7.c_str() << "\"" << endl + << "Please make sure the directory exists and that permissions allow writing by" << endl + << "HISAT2." << endl; + throw 1; + } + // Open output stream for the '.8.gfm_ext' file which will + // hold SNP IDs. + ofstream fout8(file8.c_str(), ios::binary); + if(!fout8.good()) { + cerr << "Could not open index file for writing: \"" << file8.c_str() << "\"" << endl + << "Please make sure the directory exists and that permissions allow writing by" << endl + << "HISAT2." << endl; + throw 1; + } + writeIndex(fout7, 1, this->toBe()); // endianness sentinel + writeIndex(fout8, 1, this->toBe()); // endianness sentinel + + for(index_t i = 0; i < _refnames.size(); i++) { + _refnames_nospace.push_back(""); + for(index_t j = 0; j < _refnames[i].size(); j++) { + char c = _refnames[i][j]; + if(c == ' ') break; + _refnames_nospace.back().push_back(c); + } + } + + map snpID2num; + if(snpfile != "") { + ifstream snp_file(snpfile.c_str(), ios::in); + if(!snp_file.is_open()) { + cerr << "Error: could not open " << snpfile.c_str() << endl; + throw 1; + } + + while(!snp_file.eof()) { + // rs73387790 single 22:20000001-21000000 145 A + string snp_id; + snp_file >> snp_id; + if(snp_id.empty() || snp_id[0] == '#') { + string line; + getline(snp_file, line); + continue; + } + string type, chr; + index_t genome_pos; + char snp_ch = '\0'; + string ins_seq; + index_t del_len = 0; + snp_file >> type >> chr >> genome_pos; + if(type == "single") { + snp_file >> snp_ch; + } else if(type == "deletion") { + snp_file >> del_len; + } else if(type == "insertion") { + snp_file >> ins_seq; + } + index_t chr_idx = 0; + for(; chr_idx < _refnames_nospace.size(); chr_idx++) { + if(chr == _refnames_nospace[chr_idx]) + break; + } + if(chr_idx >= _refnames_nospace.size()) { + continue; + } + assert_eq(chr_szs.size(), _refnames_nospace.size()); + assert_lt(chr_idx, chr_szs.size()); + pair tmp_pair = chr_szs[chr_idx]; + const index_t sofar_len = tmp_pair.first; + const index_t szs_idx = tmp_pair.second; + bool involve_Ns = false; + index_t pos = genome_pos; + index_t add_pos = 0; + assert(szs[szs_idx].first); + for(index_t i = szs_idx; i < szs.size(); i++) { + if(i != szs_idx && szs[i].first) { + break; + } + if(pos < szs[i].off) { + involve_Ns = true; + break; + } else { + pos -= szs[i].off; + if(pos == 0) { + if(type == "deletion" || type == "insertion") { + involve_Ns = true; + break; + } + } + if(pos < szs[i].len) { + break; + } else { + pos -= szs[i].len; + add_pos += szs[i].len; + } + } + } + + if(involve_Ns) { + continue; + } + pos = sofar_len + add_pos + pos; + if(chr_idx + 1 < chr_szs.size()) { + if(pos >= chr_szs[chr_idx + 1].first) { + continue; + } + } else { + if(pos >= jlen){ + continue; + } + } + + _alts.expand(); + ALT& snp = _alts.back(); + snp.pos = pos; + if(type == "single") { + snp.type = ALT_SNP_SGL; + snp_ch = toupper(snp_ch); + if(snp_ch != 'A' && snp_ch != 'C' && snp_ch != 'G' && snp_ch != 'T') { + _alts.pop_back(); + continue; + } + uint64_t bp = asc2dna[(int)snp_ch]; + assert_lt(bp, 4); + if((int)bp == s[pos]) { + if (!threeN) { + cerr << "Warning: single type should have a different base than " << "ACGTN"[(int)s[pos]] + << " (" << snp_id << ") at " << genome_pos << " on " << chr << endl; + } + _alts.pop_back(); + continue; + // throw 1; + } + snp.len = 1; + snp.seq = bp; + } else if(type == "deletion") { + snp.type = ALT_SNP_DEL; + snp.len = del_len; + snp.seq = 0; + snp.reversed = false; + } else if(type == "insertion") { + snp.type = ALT_SNP_INS; + snp.len = (index_t)ins_seq.size(); + if(snp.len > sizeof(snp.seq) * 4) { + _alts.pop_back(); + continue; + } + snp.seq = 0; + bool failed = false; + for(size_t i = 0; i < ins_seq.size(); i++) { + char ch = toupper(ins_seq[i]); + if(ch != 'A' && ch != 'C' && ch != 'G' && ch != 'T') { + failed = true; + break; + } + uint64_t bp = asc2dna[(int)ch]; + assert_lt(bp, 4); + snp.seq = (snp.seq << 2) | bp; + } + if(failed) { + _alts.pop_back(); + continue; + } + } else { + cerr << "Error: unknown snp type " << type << endl; + throw 1; + } + _altnames.push_back(snp_id); + assert_eq(_alts.size(), _altnames.size()); + snpID2num[snp_id] = (index_t)_alts.size() - 1; + } + snp_file.close(); + assert_eq(_alts.size(), _altnames.size()); + } + + _haplotypes.clear(); + if(_alts.size() > 0 && htfile != "") { + ifstream ht_file(htfile.c_str(), ios::in); + if(!ht_file.is_open()) { + cerr << "Error: could not open "<< htfile.c_str() << endl; + throw 1; + } + + while(!ht_file.eof()) { + // ht66 A*01:01:01:01 371 533 66,69,72,75,76,77,84,88,90,92,95 + string ht_id; + ht_file >> ht_id; + if(ht_id.empty() || ht_id[0] == '#') { + string line; + getline(ht_file, line); + continue; + } + string chr, alt_list; + index_t left, right; // inclusive [left, right] + ht_file >> chr >> left >> right >> alt_list; + assert_leq(left, right); + index_t chr_idx = 0; + for(; chr_idx < _refnames_nospace.size(); chr_idx++) { + if(chr == _refnames_nospace[chr_idx]) + break; + } + if(chr_idx >= _refnames_nospace.size()) { + continue; + } + assert_eq(chr_szs.size(), _refnames_nospace.size()); + assert_lt(chr_idx, chr_szs.size()); + pair tmp_pair = chr_szs[chr_idx]; + const index_t sofar_len = tmp_pair.first; + const index_t szs_idx = tmp_pair.second; + bool inside_Ns = false; + index_t add_pos = 0; + assert(szs[szs_idx].first); + for(index_t i = szs_idx; i < szs.size(); i++) { + if(i != szs_idx && szs[i].first) break; + if(left < szs[i].off) { + inside_Ns = true; + break; + } else { + left -= szs[i].off; + right -= szs[i].off; + if(left < szs[i].len) { + if(right >= szs[i].len) { + inside_Ns = true; + } + break; + } else { + left -= szs[i].len; + right -= szs[i].len; + add_pos += szs[i].len; + } + } + } + if(inside_Ns) { + continue; + } + left = sofar_len + add_pos + left; + right = sofar_len + add_pos + right; + if(chr_idx + 1 < chr_szs.size()) { + if(right >= chr_szs[chr_idx + 1].first) { + continue; + } + } else { + if(right >= jlen) { + continue; + } + } + _haplotypes.expand(); + _haplotypes.back().left = left; + _haplotypes.back().right = right; + EList alts; + tokenize(alt_list, ",", alts); + assert_gt(alts.size(), 0); + _haplotypes.back().alts.clear(); + for(size_t i = 0; i < alts.size(); i++) { + const string& alt = alts[i]; + if(snpID2num.find(alt) != snpID2num.end()) { + _haplotypes.back().alts.push_back(snpID2num[alt]); + } + } + if(_haplotypes.back().alts.size() <= 0) { + _haplotypes.pop_back(); + } + } + _haplotypes.sort(); + ht_file.close(); + } else { + for(index_t a = 0; a < _alts.size(); a++) { + const ALT& alt = _alts[a]; + if(!alt.snp()) continue; + _haplotypes.expand(); + _haplotypes.back().left = alt.pos; + if(alt.deletion()) { + _haplotypes.back().right = alt.pos + alt.len - 1; + } else { + _haplotypes.back().right = alt.pos; + } + _haplotypes.back().alts.clear(); + _haplotypes.back().alts.push_back(a); + } + } + + if(ssfile != "") { + ifstream ss_file(ssfile.c_str(), ios::in); + if(!ss_file.is_open()) { + cerr << "Error: could not open " << ssfile.c_str() << endl; + throw 1; + } + map ss_seq; + while(!ss_file.eof()) { + // 22 16062315 16062810 + + string chr; + ss_file >> chr; + if(chr.empty() || chr[0] == '#') { + string line; + getline(ss_file, line); + continue; + } + index_t left, right; + char strand; + ss_file >> left >> right >> strand; + // Convert exonic position to intronic position + left += 1; right -= 1; + if(left >= right) continue; + index_t chr_idx = 0; + for(; chr_idx < _refnames_nospace.size(); chr_idx++) { + if(chr == _refnames_nospace[chr_idx]) + break; + } + if(chr_idx >= _refnames_nospace.size()) continue; + assert_eq(chr_szs.size(), _refnames_nospace.size()); + assert_lt(chr_idx, chr_szs.size()); + pair tmp_pair = chr_szs[chr_idx]; + const index_t sofar_len = tmp_pair.first; + const index_t szs_idx = tmp_pair.second; + + // check whether ambiguous base is in exon's last and first base + if(!checkPosToSzs(szs, szs_idx, left - 1) + || !checkPosToSzs(szs, szs_idx, right + 1)) { + //cerr << "Skip ss. " << chr << ", " << left - 1 << ", " << right + 1 << endl; + continue; + } + + bool inside_Ns = false; + index_t add_pos = 0; + assert(szs[szs_idx].first); + for(index_t i = szs_idx; i < szs.size(); i++) { + if(i != szs_idx && szs[i].first) break; + if(left < szs[i].off) { + inside_Ns = true; + break; + } else { + left -= szs[i].off; + right -= szs[i].off; + if(left < szs[i].len) { + if(right >= szs[i].len) { + inside_Ns = true; + } + break; + } else { + left -= szs[i].len; + right -= szs[i].len; + add_pos += szs[i].len; + } + } + } + if(inside_Ns) continue; + left = sofar_len + add_pos + left; + right = sofar_len + add_pos + right; + if(chr_idx + 1 < chr_szs.size()) { + if(right >= chr_szs[chr_idx + 1].first) continue; + } else { + if(right >= jlen) continue; + } + + // Avoid splice sites in repetitive sequences + // Otherwise, it will likely explode due to an exponential number of combinations + index_t seqlen = 16; assert_leq(seqlen, 16); + if(left >= seqlen && right + 1 + seqlen <= s.length()) { + uint64_t seq = 0; + for(index_t si = left - seqlen; si < left; si++) { + seq = seq << 2 | s[si]; + } + for(index_t si = right + 1; si < right + 1 + seqlen; si++) { + seq = seq << 2 | s[si]; + } + if(_alts.size() > 0) { + if(_alts.back().left == left && + _alts.back().right == right) continue; + } + if(ss_seq.find(seq) == ss_seq.end()) ss_seq[seq] = 1; + else ss_seq[seq]++; + } + + _alts.expand(); + ALT& alt = _alts.back(); + alt.type = ALT_SPLICESITE; + alt.left = left; + alt.right = right; + alt.fw = (strand == '+' ? true : false); + alt.excluded = false; + _altnames.push_back("ss"); + } + ss_file.close(); + assert_eq(_alts.size(), _altnames.size()); + + for(size_t i = 0; i < _alts.size(); i++) { + ALT& alt = _alts[i]; + if(!alt.splicesite()) continue; + index_t seqlen = 16; assert_leq(seqlen, 16); + if(alt.left >= seqlen && alt.right + 1 + seqlen <= s.length()) { + uint64_t seq = 0; + for(index_t si = alt.left - seqlen; si < alt.left; si++) { + seq = seq << 2 | s[si]; + } + for(index_t si = alt.right + 1; si < alt.right + 1 + seqlen; si++) { + seq = seq << 2 | s[si]; + } + assert(ss_seq.find(seq) != ss_seq.end()); + alt.excluded = ss_seq[seq] > 1; + } + } + } + + if(exonfile != "") { + ifstream exon_file(exonfile.c_str(), ios::in); + if(!exon_file.is_open()) { + cerr << "Error: could not open " << ssfile.c_str() << endl; + throw 1; + } + while(!exon_file.eof()) { + // 22 16062156 16062315 + + string chr; + exon_file >> chr; + if(chr.empty() || chr[0] == '#') { + string line; + getline(exon_file, line); + continue; + } + index_t left, right; + char strand; + exon_file >> left >> right >> strand; + // Convert exonic position to intronic position + left += 1; right -= 1; + if(left >= right) continue; + index_t chr_idx = 0; + for(; chr_idx < _refnames_nospace.size(); chr_idx++) { + if(chr == _refnames_nospace[chr_idx]) + break; + } + if(chr_idx >= _refnames_nospace.size()) continue; + assert_eq(chr_szs.size(), _refnames_nospace.size()); + assert_lt(chr_idx, chr_szs.size()); + pair tmp_pair = chr_szs[chr_idx]; + const index_t sofar_len = tmp_pair.first; + const index_t szs_idx = tmp_pair.second; + bool inside_Ns = false; + index_t add_pos = 0; + assert(szs[szs_idx].first); + for(index_t i = szs_idx; i < szs.size(); i++) { + if(i != szs_idx && szs[i].first) break; + if(left < szs[i].off) { + inside_Ns = true; + break; + } else { + left -= szs[i].off; + right -= szs[i].off; + if(left < szs[i].len) { + if(right >= szs[i].len) { + inside_Ns = true; + } + break; + } else { + left -= szs[i].len; + right -= szs[i].len; + add_pos += szs[i].len; + } + } + } + if(inside_Ns) continue; + left = sofar_len + add_pos + left; + right = sofar_len + add_pos + right; + if(chr_idx + 1 < chr_szs.size()) { + if(right >= chr_szs[chr_idx + 1].first) continue; + } else { + if(right >= jlen) continue; + } + + _alts.expand(); + ALT& alt = _alts.back(); + alt.type = ALT_EXON; + alt.left = left; + alt.right = right; + alt.fw = (strand == '+' ? true : false); + _altnames.push_back("exon"); + } + exon_file.close(); + } + + // Todo - implement structural variations + if(svfile != "") { + cerr << "Warning: SV option is not implemented " << svfile.c_str() << endl; + } + + // Sort SNPs and Splice Sites based on positions + if(_alts.size() > 1) { + assert_eq(_alts.size(), _altnames.size()); + EList, index_t> > buf; buf.resize(_alts.size()); + EList buf2; buf2.resize(_alts.size()); + for(size_t i = 0; i < _alts.size(); i++) { + buf[i].first = _alts[i]; + buf[i].second = (index_t)i; + buf2[i] = _altnames[i]; + } + buf.sort(); + for(size_t i = 0; i < _alts.size(); i++) { + _alts[i] = buf[i].first; + _altnames[i] = buf2[buf[i].second]; + } + + EList buf3; buf3.resize(_alts.size()); + for(size_t i = 0; i < buf3.size(); i++) { + index_t before = buf[i].second; + assert_lt(before, buf3.size()); + buf3[before] = (index_t)i; + } + for(size_t h = 0; h < _haplotypes.size(); h++) { + EList& alts = _haplotypes[h].alts; + for(size_t a = 0; a < alts.size(); a++) { + index_t before = alts[a]; + assert_lt(before, buf3.size()); + alts[a] = buf3[before]; + } + } +#ifndef NDEBUG + for(size_t i = 0; i < _alts.size(); i++) { + if(i + 1 < _alts.size()) { + assert(_alts[i] < _alts[i+1]); + } + const ALT& alt = _alts[i]; + if(alt.snp()) { + assert(_altnames[i] != ""); + } else if(alt.splicesite()) { + assert(_altnames[i] == "ss"); + } else if(alt.exon()) { + assert(_altnames[i] == "exon"); + } else { + assert(false); + } + } +#endif + } + + writeIndex(fout7, (index_t)_alts.size(), this->toBe()); + writeIndex(fout8, (index_t)_alts.size(), this->toBe()); + for(index_t i = 0; i < _alts.size(); i++) { + _alts[i].write(fout7, this->toBe()); + fout8 << _altnames[i] << endl; + } + + writeIndex(fout7, (index_t)_haplotypes.size(), this->toBe()); + for(index_t i = 0; i < _haplotypes.size(); i++) { + _haplotypes[i].write(fout7, this->toBe()); + } + + EList >& repeats = _repeatdb.repeats(); + if(_repeat) { + ifstream repeat_file(repeatfile.c_str(), ios::in); + if(!repeat_file.is_open()) { + cerr << "Error: could not open " << ssfile.c_str() << endl; + throw 1; + } + if(parent_szs == NULL) { + throw 1; + } + if(parent_refnames == NULL) { + throw 1; + } + + EList > parent_chr_szs; + index_t tmp_len = 0; + for(index_t i = 0; i < parent_szs->size(); i++) { + if((*parent_szs)[i].first) { + parent_chr_szs.expand(); + parent_chr_szs.back().first = tmp_len; + parent_chr_szs.back().second = i; + } + tmp_len += (index_t)(*parent_szs)[i].len; + } + index_t parent_jlen = joinedLen(*parent_szs); + + string prev_repName = ""; + while(!repeat_file.eof()) { + // >rep1*0 rep 0 100 470 0 + // 20_rep:26622650:+ 20_rep:26628088:+ 20_rep:26632508:+ 20_rep:26635636:+ + // 20_rep:26669936:+ 20_rep:26672654:+ 20_rep:26675373:+ 20_rep:26678095:+ + string repName, repAlleleName; + repeat_file >> repAlleleName; + if(repAlleleName.empty()) // Reached the end of file + break; + if(repAlleleName[0] != '>') { + cerr << "Error: the file format is not correct" << endl; + throw 1; + } + repAlleleName = repAlleleName.substr(1); // Remove '>' + index_t alleleID = 0; + size_t star_pos = repAlleleName.find('*'); + if(star_pos >= repAlleleName.length()) { + repName = repAlleleName; + } else { + repName = repAlleleName.substr(0, star_pos); + string strID = repAlleleName.substr(star_pos + 1); + istringstream(strID) >> alleleID; + } + + string refRepName; + index_t repPos, repLen; + repeat_file >> refRepName >> repPos >> repLen; + index_t rep_idx = 0; + for(; rep_idx < _refnames_nospace.size(); rep_idx++) { + if(refRepName == _refnames_nospace[rep_idx]) + break; + } + if(rep_idx >= _refnames_nospace.size()) { + cerr << "Error: " << refRepName << " is not found in " << endl; + throw 1; + } + + if(repeats.size() == 0 || + repeats.back().repID != rep_idx || + repeats.back().repName != repName) { + if(repeats.size() > 0) { + repeats.back().positions.sort(); + } + repeats.expand(); + repeats.back().init(repName, rep_idx, repPos, repLen); + } + + // update repPos and repLen + if(repPos < repeats.back().repPos) { + repeats.back().repLen += (repeats.back().repPos - repPos); + repeats.back().repPos = repPos; + } + if(repPos + repLen > repeats.back().repPos + repeats.back().repLen) { + repeats.back().repLen = repPos + repLen - repeats.back().repPos; + } + + size_t baseOff = 0; + if(repeats.size() > 1 && repeats[repeats.size() - 2].repID == rep_idx) { + baseOff = repeats[repeats.size() - 2].repPos + repeats[repeats.size() - 2].repLen; + } + + index_t numCoords, numAlts; + repeat_file >> numCoords >> numAlts; + EList snpIDs; + EList snpStrIDs; + if(numAlts > 0) { + string snpStrID; + repeat_file >> snpStrID; + tokenize(snpStrID, ",", snpStrIDs); + if(snpStrIDs.size() != numAlts) { + assert(false); + cerr << "Error: the number of SNPs (" << snpIDs.size() << ", " << snpStrID << ") does not equal to " << numAlts << endl; + throw 1; + } + for(index_t i = 0; i < snpStrIDs.size(); i++) { + if(snpID2num.find(snpStrIDs[i]) == snpID2num.end()) { + cerr << "Error: " << snpStrIDs[i] << " is not found" << endl; + throw 1; + } + index_t numID = snpID2num[snpStrIDs[i]]; + snpIDs.push_back(numID); + } + } + + EList >& positions = repeats.back().positions; + size_t sofar_numCoords = positions.size(); + while(positions.size() - sofar_numCoords < numCoords) { + string chr_pos; + repeat_file >> chr_pos; + size_t colon_pos = chr_pos.find(':'); + if(colon_pos + 1 >= chr_pos.length()) { + cerr << "Error: : is not found in " << chr_pos << endl; + throw 1; + } + string chr = chr_pos.substr(0, colon_pos); + string strPos = chr_pos.substr(colon_pos + 1, chr_pos.length() - colon_pos - 3); + bool repfw = (chr_pos[chr_pos.length() - 1] == '+'); + index_t pos = 0; + istringstream(strPos) >> pos; + index_t chr_idx = 0; + for(; chr_idx < parent_refnames->size(); chr_idx++) { + if(chr == (*parent_refnames)[chr_idx]) + break; + } + if(chr_idx >= parent_refnames->size()) { + cerr << "Error: " << chr << " is not found in " << endl; + throw 1; + } + assert_eq(parent_chr_szs.size(), parent_refnames->size()); + assert_lt(chr_idx, parent_chr_szs.size()); + + positions.expand(); + positions.back().tid = chr_idx; + positions.back().toff = pos; + positions.back().fw = repfw; + positions.back().alleleID = alleleID; + + pair tmp_pair = parent_chr_szs[chr_idx]; + const index_t sofar_len = tmp_pair.first; + const index_t szs_idx = tmp_pair.second; + bool involve_Ns = false; + index_t add_pos = 0; + assert((*parent_szs)[szs_idx].first); + for(index_t i = szs_idx; i < parent_szs->size(); i++) { + if(i != szs_idx && (*parent_szs)[i].first) { + break; + } + if(pos < (*parent_szs)[i].off) { + involve_Ns = true; + break; + } else { + pos -= (*parent_szs)[i].off; + if(pos < (*parent_szs)[i].len) { + break; + } else { + pos -= (*parent_szs)[i].len; + add_pos += (*parent_szs)[i].len; + } + } + } + if(involve_Ns) { + assert(false); + throw 1; + } + pos = sofar_len + add_pos + pos; + if(chr_idx + 1 < parent_chr_szs.size()) { + if(pos >= parent_chr_szs[chr_idx + 1].first) { + assert(false); + throw 1; + } + } else { + if(pos >= parent_jlen){ + assert(false); + throw 1; + } + } + + positions.back().joinedOff = pos; + } + repeats.back().alleles.expand(); + assert_geq(repPos, baseOff); + repeats.back().alleles.back().init(repPos - baseOff, repLen); + + } + if(repeats.size() > 0) { + repeats.back().positions.sort(); + } + repeat_file.close(); + + index_t total_repeat_len = 0; + for(size_t r = 0; r + 1 < repeats.size(); r++) { + if(repeats[r].repID != repeats[r+1].repID) { + index_t repeat_len = repeats[r].repPos + repeats[r].repLen; + total_repeat_len += repeat_len; + } + } + index_t repeat_len = repeats.back().repPos + repeats.back().repLen; + total_repeat_len += repeat_len; + if(total_repeat_len != s.length()) { + cerr << "Error: repeat length (" << repeats.back().repPos + repeats.back().repLen; + cerr << ") does not match sequence length (" << s.length() << ")" << endl; + throw 1; + } + + _repeatLens.resizeExact(szs.size()); + for(size_t i = 0; i < _repeatLens.size(); i++) { + _repeatLens[i].first = numeric_limits::max(); + _repeatLens[i].second = 0; + } + for(size_t i = 0; i < repeats.size(); i++) { + index_t id = repeats[i].repID; + index_t len = repeats[i].repLen; + assert_lt(id, _repeatLens.size()); + if(_repeatLens[id].first > len) { + _repeatLens[id].first = len; + } + if(_repeatLens[id].second < len) { + _repeatLens[id].second = len; + } + } + + writeIndex(fout7, _repeatLens.size(), this->toBe()); + for(size_t i = 0; i < _repeatLens.size(); i++) { + writeIndex(fout7, _repeatLens[i].first, this->toBe()); + writeIndex(fout7, _repeatLens[i].second, this->toBe()); + } + _repeatdb.write(fout7, this->toBe()); + writeIndex(fout7, chr_szs.size(), this->toBe()); // number of repeat indexes + EList seqs; + EList tableFilePos; + streampos filepos = fout7.tellp(); + for(size_t i = 0; i < chr_szs.size(); i++) { + writeIndex(fout7, 0, this->toBe()); + } + for(size_t i = 0; i < repeats.size(); i++) { + const Repeat& repeat = repeats[i]; + assert_lt(repeat.repID, chr_szs.size()); + index_t template_len = 0; + if(repeat.repID + 1 < chr_szs.size()) { + template_len = chr_szs[repeat.repID + 1].first - chr_szs[repeat.repID].first; + } else { + template_len = s.length() - chr_szs[repeat.repID].first; + } + assert_leq(repeat.repPos + repeat.repLen, template_len); + index_t pos = chr_szs[repeat.repID].first + repeat.repPos; + assert_leq(pos + repeat.repLen, s.length()); + seqs.expand(); + seqs.back().clear(); + for(index_t j = 0; j < repeat.repLen; j++) { + int c = s[pos + j]; + assert_range(0, 3, c); + seqs.back().push_back("ACGT"[c]); + } + + if(i + 1 == repeats.size() || repeats[i].repID != repeats[i+1].repID) { + const size_t w = RB_Minimizer::default_w, k = RB_Minimizer::default_k; + RB_KmerTable kmer_table; + kmer_table.build(seqs, w, k); + kmer_table.write(fout7, this->toBe()); + seqs.clear(); + tableFilePos.push_back(fout7.tellp()); + } + } + assert_eq(tableFilePos.size(), chr_szs.size()); + streampos origpos = fout7.tellp(); + fout7.seekp(filepos); + for(size_t i = 0; i < tableFilePos.size(); i++) { + writeIndex(fout7, tableFilePos[i], this->toBe()); + } + fout7.seekp(origpos); + } + + fout7.close(); + fout8.close(); + } + // Joined reference sequence now in 's' + } catch(bad_alloc& e) { + // If we throw an allocation exception in the try block, + // that means that the joined version of the reference + // string itself is too larger to fit in memory. The only + // alternatives are to tell the user to give us more memory + // or to try again with a packed representation of the + // reference (if we haven't tried that already). + cerr << "Could not allocate space for a joined string of " << jlen << " elements." << endl; + if(!isPacked() && _passMemExc) { + // Pass the exception up so that we can retry using a + // packed string representation + throw e; + } + // There's no point passing this exception on. The fact + // that we couldn't allocate the joined string means that + // --bmax is irrelevant - the user should re-run with + // ebwt-build-packed + if(isPacked()) { + cerr << "Please try running bowtie-build on a computer with more memory." << endl; + } else { + cerr << "Please try running bowtie-build in packed mode (-p/--packed) or in automatic" << endl + << "mode (-a/--auto), or try again on a computer with more memory." << endl; + } + if(sizeof(void*) == 4) { + cerr << "If this computer has more than 4 GB of memory, try using a 64-bit executable;" << endl + << "this executable is 32-bit." << endl; + } + throw 1; + } + + // Succesfully obtained joined reference string + assert_geq(s.length(), jlen); + if(bmax != (index_t)OFF_MASK) { + // VMSG_NL("bmax according to bmax setting: " << bmax); + } + else if(bmaxSqrtMult != (index_t)OFF_MASK) { + bmax *= bmaxSqrtMult; + // VMSG_NL("bmax according to bmaxSqrtMult setting: " << bmax); + } + else if(bmaxDivN != (index_t)OFF_MASK) { + bmax = max(jlen / (bmaxDivN * _nthreads), 1); + // VMSG_NL("bmax according to bmaxDivN setting: " << bmax); + } + else { + bmax = (uint32_t)sqrt(s.length()); + // VMSG_NL("bmax defaulted to: " << bmax); + } + + int iter = 0; + bool first = true; + streampos out1pos = out1.tellp(); + streampos out2pos = out2.tellp(); + + if(!_repeat) { + // Look for bmax/dcv parameters that work. + while(true) { + if(!first && bmax < 40 && _passMemExc) { + cerr << "Could not find approrpiate bmax/dcv settings for building this index." << endl; + if(!isPacked()) { + // Throw an exception exception so that we can + // retry using a packed string representation + throw bad_alloc(); + } else { + cerr << "Already tried a packed string representation." << endl; + } + cerr << "Please try indexing this reference on a computer with more memory." << endl; + if(sizeof(void*) == 4) { + cerr << "If this computer has more than 4 GB of memory, try using a 64-bit executable;" << endl + << "this executable is 32-bit." << endl; + } + throw 1; + } + if(!first) { + out1.seekp(out1pos); + out2.seekp(out2pos); + } + if(dcv > 4096) dcv = 4096; + if((iter % 6) == 5 && dcv < 4096 && dcv != 0) { + dcv <<= 1; // double difference-cover period + } else { + bmax -= (bmax >> 2); // reduce by 25% + } + iter++; + try { + if(_alts.empty()) { + VMSG("Using parameters --bmax " << bmax); + if(dcv == 0) { + VMSG_NL(" and *no difference cover*"); + } else { + VMSG_NL(" --dcv " << dcv); + } + { + VMSG_NL(" Doing ahead-of-time memory usage test"); + // Make a quick-and-dirty attempt to force a bad_alloc iff + // we would have thrown one eventually as part of + // constructing the DifferenceCoverSample + dcv <<= 1; + index_t sz = (index_t)DifferenceCoverSample::simulateAllocs(s, dcv >> 1); + if(_nthreads > 1) sz *= (_nthreads + 1); + AutoArray tmp(sz, EBWT_CAT); + dcv >>= 1; + // Likewise with the KarkkainenBlockwiseSA + sz = (index_t)KarkkainenBlockwiseSA::simulateAllocs(s, bmax); + AutoArray tmp2(sz, EBWT_CAT); + // Now throw in the 'ftab' and 'isaSample' structures + // that we'll eventually allocate in buildToDisk + AutoArray ftab(_gh._ftabLen * 2, EBWT_CAT); + AutoArray side(_gh._sideSz, EBWT_CAT); + // Grab another 20 MB out of caution + AutoArray extra(20*1024*1024, EBWT_CAT); + // If we made it here without throwing bad_alloc, then we + // passed the memory-usage stress test + VMSG(" Passed! Constructing with these parameters: --bmax " << bmax << " --dcv " << dcv); + if(isPacked()) { + VMSG(" --packed"); + } + VMSG_NL(""); + } + VMSG_NL("Constructing suffix-array element generator"); + KarkkainenBlockwiseSA bsa(s, bmax, _nthreads, dcv, seed, _sanity, _passMemExc, _verbose, outfile); + assert(bsa.suffixItrIsReset()); + assert_eq(bsa.size(), s.length()+1); + VMSG_NL("Converting suffix-array elements to index image"); + buildToDisk(bsa, s, out1, out2); + } else { + RefGraph* graph = new RefGraph( + s, + szs, + _alts, + _haplotypes, + outfile, + _nthreads, + verbose); + PathGraph* pg = new PathGraph( + *graph, + outfile, + std::numeric_limits::max(), + _nthreads, + verbose); + + if(verbose) { cerr << "Generating edges... " << endl; } + if(!pg->generateEdges(*graph)) { return; } + // Re-initialize GFM parameters to reflect real number of edges (gbwt string) + _gh.init( + _gh.len(), + pg->getNumEdges(), + pg->getNumNodes(), + _gh.lineRate(), + _gh.offRate(), + _gh.ftabChars(), + 0, + _gh.entireReverse()); + buildToDisk(*pg, s, out1, out2); + delete pg; pg = NULL; + delete graph; graph = NULL; + } + out1.flush(); out2.flush(); + if(out1.fail() || out2.fail()) { + cerr << "An error occurred writing the index to disk. Please check if the disk is full." << endl; + throw 1; + } + break; + } catch(bad_alloc& e) { + if(_passMemExc) { + VMSG_NL(" Ran out of memory; automatically trying more memory-economical parameters."); + } else { + cerr << "Out of memory while constructing suffix array. Please try using a smaller" << endl + << "number of blocks by specifying a smaller --bmax or a larger --bmaxdivn" << endl; + throw 1; + } + } + first = false; + } + assert(repOk()); + + // Now write reference sequence names on the end + assert_eq(this->_refnames.size(), this->_nPat); + for(index_t i = 0; i < this->_refnames.size(); i++) { + out1 << this->_refnames[i].c_str() << endl; + } + out1 << '\0'; + out1.flush(); out2.flush(); + if(out1.fail() || out2.fail()) { + cerr << "An error occurred writing the index to disk. Please check if the disk is full." << endl; + throw 1; + } + } + VMSG_NL("Returning from initFromVector"); + } + + /** + * Return the length that the joined string of the given string + * list will have. Note that this is indifferent to how the text + * fragments correspond to input sequences - it just cares about + * the lengths of the fragments. + */ + index_t joinedLen(EList& szs) { + index_t ret = 0; + for(unsigned int i = 0; i < szs.size(); i++) { + ret += (index_t)szs[i].len; + } + return ret; + } + + /// Destruct an Ebwt + ~GFM() { + _fchr.reset(); + _ftab.reset(); + _eftab.reset(); + _plen.reset(); + _rstarts.reset(); + _offs.reset(); + _gfm.reset(); + if(offs() != NULL && useShmem_) { + FREE_SHARED(offs()); + } + if(gfm() != NULL && useShmem_) { + FREE_SHARED(gfm()); + } + if (_in1 != NULL) fclose(_in1); + if (_in2 != NULL) fclose(_in2); + } + + /// Accessors + inline const GFMParams& gh() const { return _gh; } + index_t numZOffs() const { return _zOffs.size(); } + index_t zOff(index_t i) const { assert_lt(i, _zOffs.size()); return _zOffs[i]; } + index_t zGbwtByteOff(index_t i) const { assert_lt(i, _zGbwtByteOffs.size()); return _zGbwtByteOffs[i]; } + int zGbwtBpOff(index_t i) const { assert_lt(i, _zGbwtBpOffs.size()); return _zGbwtBpOffs[i]; } + index_t nPat() const { return _nPat; } + index_t nFrag() const { return _nFrag; } + inline index_t* fchr() { return _fchr.get(); } + inline index_t* ftab() { return _ftab.get(); } + inline index_t* eftab() { return _eftab.get(); } + inline index_t* offs() { return _offs.get(); } + inline index_t* plen() { return _plen.get(); } + inline index_t* rstarts() { return _rstarts.get(); } + inline uint8_t* gfm() { return _gfm.get(); } + inline const index_t* fchr() const { return _fchr.get(); } + inline const index_t* ftab() const { return _ftab.get(); } + inline const index_t* eftab() const { return _eftab.get(); } + inline const index_t* offs() const { return _offs.get(); } + inline const index_t* plen() const { return _plen.get(); } + inline const index_t* rstarts() const { return _rstarts.get(); } + inline const uint8_t* gfm() const { return _gfm.get(); } + inline const EList >& alts() const { return _alts; } + inline const EList& altnames() const { return _altnames; } + bool toBe() const { return _toBigEndian; } + bool verbose() const { return _verbose; } + bool sanityCheck() const { return _sanity; } + EList& refnames() { return _refnames; } + bool fw() const { return fw_; } + bool repeat() const { return _repeat; } + const EList& getRepeatIncluded() const { return _repeatIncluded; } + +#ifdef POPCNT_CAPABILITY + bool _usePOPCNTinstruction; +#endif + + /** + * Returns true iff the index contains the given string (exactly). The + * given string must contain only unambiguous characters. TODO: + * support skipping of ambiguous characters. + */ + bool contains( + const BTDnaString& str, + index_t *top = NULL, + index_t *bot = NULL) const; + + /** + * Returns true iff the index contains the given string (exactly). The + * given string must contain only unambiguous characters. TODO: + * support skipping of ambiguous characters. + */ + bool contains( + const char *str, + index_t *top = NULL, + index_t *bot = NULL) const + { + return contains(BTDnaString(str, true), top, bot); + } + + /// Return true iff the Ebwt is currently in memory + bool isInMemory() const { + if(gfm() != NULL) { + // Note: We might have skipped loading _offs, _ftab, + // _eftab, and _rstarts depending on whether this is the + // reverse index and what algorithm is being used. + assert(_gh.repOk()); + //assert(_ftab != NULL); + //assert(_eftab != NULL); + assert(fchr() != NULL); + //assert(_offs != NULL); + //assert(_rstarts != NULL); + // assert_neq(_zGbwtByteOff, INDEX_MAX); + // assert_neq(_zGbwtBpOff, -1); + return true; + } else { + assert(ftab() == NULL); + assert(eftab() == NULL); + assert(fchr() == NULL); + assert(offs() == NULL); + assert(rstarts() == NULL); + assert_eq(_zOffs.size(), 0); + assert_eq(_zGbwtByteOffs.size(), 0); + assert_eq(_zGbwtBpOffs.size(), 0); + return false; + } + } + + /// Return true iff the Ebwt is currently stored on disk + bool isEvicted() const { + return !isInMemory(); + } + + /** + * Load this Ebwt into memory by reading it in from the _in1 and + * _in2 streams. + */ + void loadIntoMemory( + int needEntireReverse, + bool loadSASamp, + bool loadFtab, + bool loadRstarts, + bool loadNames, + bool verbose) + { + readIntoMemory( + needEntireReverse, // require reverse index to be concatenated reference reversed + loadSASamp, // load the SA sample portion? + loadFtab, // load the ftab (_ftab[] and _eftab[])? + loadRstarts, // load the r-starts (_rstarts[])? + false, // stop after loading the header portion? + NULL, // params + false, // mmSweep + loadNames, // loadNames + verbose); // startVerbose + } + + /** + * Frees memory associated with the Ebwt. + */ + void evictFromMemory() { + assert(isInMemory()); + _fchr.free(); + _ftab.free(); + _eftab.free(); + _rstarts.free(); + _offs.free(); // might not be under control of APtrWrap + _gfm.free(); // might not be under control of APtrWrap + // Keep plen; it's small and the client may want to seq it + // even when the others are evicted. + //_plen = NULL; + _zOffs.clear(); + _zGbwtByteOffs.clear(); + _zGbwtBpOffs.clear(); + } + + /** + * Turn a substring of 'seq' starting at offset 'off' and having + * length equal to the index's 'ftabChars' into an int that can be + * used to index into the ftab array. + */ + index_t ftabSeqToInt( + const BTDnaString& seq, + index_t off, + bool rev) const + { + int fc = _gh._ftabChars; + index_t lo = off, hi = lo + fc; + assert_leq(hi, seq.length()); + index_t ftabOff = 0; + for(int i = 0; i < fc; i++) { + bool fwex = fw(); + if(rev) fwex = !fwex; + // We add characters to the ftabOff in the order they would + // have been consumed in a normal search. For BWT, this + // means right-to-left order; for BWT' it's left-to-right. + int c = (fwex ? seq[lo + i] : seq[hi - i - 1]); + if(c > 3) { + return std::numeric_limits::max(); + } + assert_range(0, 3, c); + ftabOff <<= 2; + ftabOff |= c; + } + return ftabOff; + } + + /** + * Non-static facade for static function ftabHi. + */ + index_t ftabHi(index_t i) const { + return GFM::ftabHi( + ftab(), + eftab(), + _gh.linearFM() ? _gh._len : _gh._gbwtLen, + _gh._ftabLen, + _gh._eftabLen, + i); + } + + /** + * Get "high interpretation" of ftab entry at index i. The high + * interpretation of a regular ftab entry is just the entry + * itself. The high interpretation of an extended entry is the + * second correpsonding ui32 in the eftab. + * + * It's a static member because it's convenient to ask this + * question before the Ebwt is fully initialized. + */ + static index_t ftabHi( + const index_t *ftab, + const index_t *eftab, + index_t gbwtLen, + index_t ftabLen, + index_t eftabLen, + index_t i) + { + assert_lt(i, ftabLen); + if(ftab[i] <= gbwtLen) { + return ftab[i]; + } else { + index_t efIdx = ftab[i] ^ (index_t)INDEX_MAX; + assert_lt(efIdx*2+1, eftabLen); + return eftab[efIdx*2+1]; + } + } + + /** + * Non-static facade for static function ftabLo. + */ + index_t ftabLo(index_t i) const { + return GFM::ftabLo( + ftab(), + eftab(), + _gh.linearFM() ? _gh._len : _gh._gbwtLen, + _gh._ftabLen, + _gh._eftabLen, + i); + } + + /** + * Get low bound of ftab range. + */ + index_t ftabLo(const BTDnaString& seq, index_t off) const { + return ftabLo(ftabSeqToInt(seq, off, false)); + } + + /** + * Get high bound of ftab range. + */ + index_t ftabHi(const BTDnaString& seq, index_t off) const { + return ftabHi(ftabSeqToInt(seq, off, false)); + } + + /** + * Extract characters from seq starting at offset 'off' and going either + * forward or backward, depending on 'rev'. Order matters when compiling + * the integer that gets looked up in the ftab. Each successive character + * is ORed into the least significant bit-pair, and characters are + * integrated in the direction of the search. + */ + bool + ftabLoHi( + const BTDnaString& seq, // sequence to extract from + index_t off, // offset into seq to begin extracting + bool rev, // reverse while extracting + index_t& top, + index_t& bot) const + { + index_t fi = ftabSeqToInt(seq, off, rev); + if(fi == std::numeric_limits::max()) { + return false; + } + top = ftabHi(fi); + bot = ftabLo(fi+1); + assert_geq(bot, top); + return true; + } + + /** + * Get "low interpretation" of ftab entry at index i. The low + * interpretation of a regular ftab entry is just the entry + * itself. The low interpretation of an extended entry is the + * first correpsonding ui32 in the eftab. + * + * It's a static member because it's convenient to ask this + * question before the Ebwt is fully initialized. + */ + static index_t ftabLo( + const index_t *ftab, + const index_t *eftab, + index_t gbwtLen, + index_t ftabLen, + index_t eftabLen, + index_t i) + { + assert_lt(i, ftabLen); + if(ftab[i] <= gbwtLen) { + return ftab[i]; + } else { + index_t efIdx = ftab[i] ^ (index_t)INDEX_MAX; + assert_lt(efIdx*2+1, eftabLen); + return eftab[efIdx*2]; + } + } + + /** + * Try to resolve the reference offset of the BW element 'elt'. If + * it can be resolved immediately, return the reference offset. If + * it cannot be resolved immediately, return 0xffffffff. + */ + index_t tryOffset(index_t elt, index_t node) const { + assert(offs() != NULL); + for(index_t i = 0; i < _zOffs.size(); i++) { + if(elt == _zOffs[i]) return 0; + } + if((node & _gh._offMask) == node) { + index_t nodeOff = node >> _gh._offRate; + assert_lt(nodeOff, _gh._offsLen); + index_t off = offs()[nodeOff]; + return off; + } else { + // Try looking at zoff + return (index_t)INDEX_MAX; + } + } + + /** + * Try to resolve the reference offset of the BW element 'elt' such + * that the offset returned is at the right-hand side of the + * forward reference substring involved in the hit. + */ + index_t tryOffset( + index_t elt, + bool fw, + index_t hitlen) const + { + index_t off = tryOffset(elt); + if(off != (index_t)INDEX_MAX && !fw) { + assert_lt(off, _gh._len); + off = _gh._len - off - 1; + assert_geq(off, hitlen-1); + off -= (hitlen-1); + assert_lt(off, _gh._len); + } + return off; + } + + /** + * Walk 'steps' steps to the left and return the row arrived at. + */ + index_t walkLeft(index_t row, index_t steps) const; + + /** + * Resolve the reference offset of the BW element 'elt'. + */ + index_t getOffset(index_t row, index_t node = 0) const; + + /** + * Resolve the reference offset of the BW element 'elt' such that + * the offset returned is at the right-hand side of the forward + * reference substring involved in the hit. + */ + index_t getOffset( + index_t elt, + bool fw, + index_t hitlen) const; + + /** + * When using read() to create an Ebwt, we have to set a couple of + * additional fields in the Ebwt object that aren't part of the + * parameter list and are not stored explicitly in the file. Right + * now, this just involves initializing _zEbwtByteOff and + * _zEbwtBpOff from _zOff. + */ + void postReadInit(const GFMParams& gh) { + _zGbwtByteOffs.resizeExact(_zOffs.size()); + _zGbwtBpOffs.resizeExact(_zOffs.size()); + for(index_t i = 0; i < _zOffs.size(); i++) { + index_t sideNum = _zOffs[i] / gh._sideGbwtLen; + index_t sideCharOff = _zOffs[i] % gh._sideGbwtLen; + index_t sideByteOff = sideNum * gh._sideSz; + _zGbwtByteOffs[i] = sideCharOff >> 2; + assert_lt(_zGbwtByteOffs[i], gh._sideGbwtSz); + _zGbwtBpOffs[i] = sideCharOff & 3; + assert_lt(_zGbwtBpOffs[i], 4); + _zGbwtByteOffs[i] += sideByteOff; + } + assert(repOk(gh)); // Ebwt should be fully initialized now + } + + /** + * Given basename of an Ebwt index, read and return its flag. + */ + static int32_t readVersionFlags(const string& instr, int& major, int& minor, string& extra_version); + + static void readProgramVersion(int& major_version, int& minor_version, string& extra_version) { + char extra[256] = {0,}; + int second_version; + sscanf(HISAT2_VERSION, "%d.%d.%d-%s", + &second_version, + &major_version, + &minor_version, + extra); + extra_version = extra; + } + + static void readIndexVersion(int index_version, int& major_version, int& minor_version, string& extra_version) { + major_version = (index_version >> 16) & 0xff; + minor_version = (index_version >> 8) & 0xff; + if((index_version & 0xff) == 1) { + extra_version = "alpha"; + } else if((index_version & 0xff) == 2) { + extra_version = "beta"; + } else { + extra_version = ""; + } + } + + static int getIndexVersion() { + int major_version = 0, minor_version = 0; + string extra_version; + readProgramVersion(major_version, minor_version, extra_version); + int version = 2; // HISAT2 + version = (version << 8) | (major_version & 0xff); + version = (version << 8) | (minor_version & 0xff); + version = version << 8; + if(extra_version == "alpha") { + version |= 0x1; + } else if(extra_version == "beta") { + version |= 0x2; + } + return version; + } + + /** + * Pretty-print the Ebwt to the given output stream. + */ + void print(ostream& out) const { + print(out, _gh); + } + + /** + * Pretty-print the Ebwt and given EbwtParams to the given output + * stream. + */ + void print(ostream& out, const GFMParams& gh) const { + gh.print(out); // print params + return; + out << "Ebwt (" << (isInMemory()? "memory" : "disk") << "):" << endl; + for(index_t i = 0; i < _zOffs.size(); i++) { + out << " " << (i+1) << " zOffs: " << _zOffs[i] << endl + << " " << (i+1) << " zGbwtByteOff: " << _zGbwtByteOffs[i] << endl + << " " << (i+1) << " zGbwtBpOff: " << _zGbwtBpOffs[i] << endl; + } + out << " nPat: " << _nPat << endl + << " plen: "; + if(plen() == NULL) { + out << "NULL" << endl; + } else { + out << "non-NULL, [0] = " << plen()[0] << endl; + } + out << " rstarts: "; + if(rstarts() == NULL) { + out << "NULL" << endl; + } else { + out << "non-NULL, [0] = " << rstarts()[0] << endl; + } + out << " ebwt: "; + if(gfm() == NULL) { + out << "NULL" << endl; + } else { + out << "non-NULL, [0] = " << gfm()[0] << endl; + } + out << " fchr: "; + if(fchr() == NULL) { + out << "NULL" << endl; + } else { + out << "non-NULL, [0] = " << fchr()[0] << endl; + } + out << " ftab: "; + if(ftab() == NULL) { + out << "NULL" << endl; + } else { + out << "non-NULL, [0] = " << ftab()[0] << endl; + } + out << " eftab: "; + if(eftab() == NULL) { + out << "NULL" << endl; + } else { + out << "non-NULL, [0] = " << eftab()[0] << endl; + } + out << " offs: "; + if(offs() == NULL) { + out << "NULL" << endl; + } else { + out << "non-NULL, [0] = " << offs()[0] << endl; + } + } + + // Building + template static TStr join(EList& l, uint32_t seed); + template static void join(EList& l, EList& szs, index_t sztot, const RefReadInParams& refparams, uint32_t seed, TStr& s, bool include_rc = false, bool CGtoTG = false); + template void joinToDisk(EList& l, EList& szs, index_t sztot, const RefReadInParams& refparams, TStr& ret, ostream& out1, ostream& out2); + template void buildToDisk(PathGraph& gbwt, const TStr& s, ostream& out1, ostream& out2, streampos headerPos = -1); + template void buildToDisk(InorderBlockwiseSA& sa, const TStr& s, ostream& out1, ostream& out2, streampos headerPos = -1); + + // I/O + void readIntoMemory(int needEntireRev, bool loadSASamp, bool loadFtab, bool loadRstarts, bool justHeader, GFMParams *params, bool mmSweep, bool loadNames, bool startVerbose, bool subIndex = false); + void writeFromMemory(bool justHeader, ostream& out1, ostream& out2) const; + void writeFromMemory(bool justHeader, const string& out1, const string& out2) const; + + // Sanity checking + void sanityCheckUpToSide(int upToSide) const; + void sanityCheckAll(int reverse) const; + void restore(SString& s) const; + void checkOrigs(const EList >& os, bool mirror) const; + + // Searching and reporting + bool joinedToTextOff(index_t qlen, index_t off, index_t& tidx, index_t& textoff, index_t& tlen, bool rejectStraddle, bool& straddled) const; + bool textOffToJoined(index_t tid, index_t tlen, index_t& off) const; + +#define WITHIN_BWT_LEN(x) \ + assert_leq(x[0], this->_gh._sideGbwtLen); \ + assert_leq(x[1], this->_gh._sideGbwtLen); \ + assert_leq(x[2], this->_gh._sideGbwtLen); \ + assert_leq(x[3], this->_gh._sideGbwtLen) + +#define WITHIN_FCHR(x) \ + assert_leq(x[0], this->fchr()[1]); \ + assert_leq(x[1], this->fchr()[2]); \ + assert_leq(x[2], this->fchr()[3]); \ + assert_leq(x[3], this->fchr()[4]) + +#define WITHIN_FCHR_DOLLARA(x) \ + assert_leq(x[0], this->fchr()[1]+1); \ + assert_leq(x[1], this->fchr()[2]); \ + assert_leq(x[2], this->fchr()[3]); \ + assert_leq(x[3], this->fchr()[4]) + + /** + * Count all occurrences of character c from the beginning of the + * forward side to and add in the occ[] count up to the side + * break just prior to the side. + * + * A Bowtie 2 side is shaped like: + * + * XXXXXXXXXXXXXXXX [A] [C] [G] [T] + * --------48------ -4- -4- -4- -4- (numbers in bytes) + */ + inline index_t countBt2Side(const SideLocus& l, int c) const { + assert_range(0, 3, c); + assert_range(0, (int)this->_gh._sideGbwtSz-1, (int)l._by); + assert_range(0, 3, (int)l._bp); + const uint8_t *side = l.side(this->gfm()); + index_t cCnt = countUpTo(l, c); + assert_leq(cCnt, l.toBWRow(_gh)); + assert_leq(cCnt, this->_gh._sideGbwtLen); + assert_eq(_zGbwtByteOffs.size(), _zGbwtBpOffs.size()); + for(index_t i = 0; i < _zGbwtByteOffs.size(); i++) { + index_t zGbwtByteOff = _zGbwtByteOffs[i]; + if(c == 0 && l._sideByteOff <= zGbwtByteOff && l._sideByteOff + l._by >= zGbwtByteOff) { + // Adjust for the fact that we represented $ with an 'A', but + // shouldn't count it as an 'A' here + int zGbwtBpOff = _zGbwtBpOffs[i]; + if((l._sideByteOff + l._by > zGbwtByteOff) || + (l._sideByteOff + l._by == zGbwtByteOff && l._bp > zGbwtBpOff)) + { + cCnt--; // Adjust for '$' looking like an 'A' + } + } + } + index_t ret; + // Now factor in the occ[] count at the side break + const uint8_t *acgt8 = side + _gh._sideGbwtSz; + if(!_gh._linearFM) { + acgt8 += (sizeof(index_t) << 1); + } + const index_t *acgt = reinterpret_cast(acgt8); + assert_leq(acgt[0], this->_gh._numSides * this->_gh._sideGbwtLen); // b/c it's used as padding + assert_lt(acgt[1], this->_gh._gbwtLen); + assert_lt(acgt[2], this->_gh._gbwtLen); + assert_lt(acgt[3], this->_gh._gbwtLen); + ret = acgt[c] + cCnt + this->fchr()[c]; +#ifndef NDEBUG + assert_leq(ret, this->fchr()[c+1]); // can't have jumpded into next char's section + if(c == 0) { + assert_leq(cCnt, this->_gh._sideGbwtLen); + } else { + assert_leq(ret, this->_gh._gbwtLen); + } +#endif + return ret; + } + + /** + * Count all occurrences of all four nucleotides up to the starting + * point (which must be in a forward side) given by 'l' storing the + * result in 'cntsUpto', then count nucleotide occurrences within the + * range of length 'num' storing the result in 'cntsIn'. Also, keep + * track of the characters occurring within the range by setting + * 'masks' accordingly (masks[1][10] == true -> 11th character is a + * 'C', and masks[0][10] == masks[2][10] == masks[3][10] == false. + */ + inline void countBt2SideRange( + const SideLocus& l, // top locus + index_t num, // number of elts in range to tall + index_t* cntsUpto, // A/C/G/T counts up to top + index_t* cntsIn, // A/C/G/T counts within range + EList *masks) const // masks indicating which range elts = A/C/G/T + { + assert_gt(num, 0); + assert_range(0, (int)this->_gh._sideGbwtSz-1, (int)l._by); + assert_range(0, 3, (int)l._bp); + countUpToEx(l, cntsUpto); + WITHIN_FCHR_DOLLARA(cntsUpto); + WITHIN_BWT_LEN(cntsUpto); + const uint8_t *side = l.side(this->gfm()); + assert_eq(_zGbwtByteOffs.size(), _zGbwtBpOffs.size()); + for(index_t i = 0; i < _zGbwtByteOffs.size(); i++) { + index_t zGbwtByteOff = _zGbwtByteOffs[i]; + if(l._sideByteOff <= zGbwtByteOff && l._sideByteOff + l._by >= zGbwtByteOff) { + // Adjust for the fact that we represented $ with an 'A', but + // shouldn't count it as an 'A' here + int zGbwtBpOff = _zGbwtBpOffs[i]; + if((l._sideByteOff + l._by > zGbwtByteOff) || + (l._sideByteOff + l._by == zGbwtByteOff && l._bp > zGbwtBpOff)) + { + cntsUpto[0]--; // Adjust for '$' looking like an 'A' + } + } + } + // Now factor in the occ[] count at the side break + const index_t *acgt = reinterpret_cast(side + _gh._sideGbwtSz); + if(!this->_gh.linearFM()) acgt += 2; + assert_leq(acgt[0], this->fchr()[1] + this->_gh.sideGbwtLen()); + assert_leq(acgt[1], this->fchr()[2]-this->fchr()[1]); + assert_leq(acgt[2], this->fchr()[3]-this->fchr()[2]); + assert_leq(acgt[3], this->fchr()[4]-this->fchr()[3]); + assert_leq(acgt[0], this->_gh._gbwtLen + this->_gh.sideGbwtLen()); + assert_leq(acgt[1], this->_gh._gbwtLen); + assert_leq(acgt[2], this->_gh._gbwtLen); + assert_leq(acgt[3], this->_gh._gbwtLen); + cntsUpto[0] += (acgt[0] + this->fchr()[0]); + cntsUpto[1] += (acgt[1] + this->fchr()[1]); + cntsUpto[2] += (acgt[2] + this->fchr()[2]); + cntsUpto[3] += (acgt[3] + this->fchr()[3]); + masks[0].resize(num); + masks[1].resize(num); + masks[2].resize(num); + masks[3].resize(num); + WITHIN_FCHR_DOLLARA(cntsUpto); + WITHIN_FCHR_DOLLARA(cntsIn); + // 'cntsUpto' is complete now. + // Walk forward until we've tallied the entire 'In' range + index_t nm = 0; + // Rest of this side + nm += countBt2SideRange2(l, true, num - nm, cntsIn, masks, nm); + assert_eq(nm, cntsIn[0] + cntsIn[1] + cntsIn[2] + cntsIn[3]); + assert_leq(nm, num); + SideLocus lcopy = l; + while(nm < num) { + // Subsequent sides, if necessary + lcopy.nextSide(this->_gh); + nm += countBt2SideRange2(lcopy, false, num - nm, cntsIn, masks, nm); + WITHIN_FCHR_DOLLARA(cntsIn); + assert_leq(nm, num); + assert_eq(nm, cntsIn[0] + cntsIn[1] + cntsIn[2] + cntsIn[3]); + } + assert_eq(num, cntsIn[0] + cntsIn[1] + cntsIn[2] + cntsIn[3]); + WITHIN_FCHR_DOLLARA(cntsIn); + } + + /** + * Count all occurrences of character c from the beginning of the + * forward side to and add in the occ[] count up to the side + * break just prior to the side. + * + * A forward side is shaped like: + * + * [A] [C] XXXXXXXXXXXXXXXX + * -4- -4- --------56------ (numbers in bytes) + * ^ + * Side ptr (result from SideLocus.side()) + * + * And following it is a reverse side shaped like: + * + * [G] [T] XXXXXXXXXXXXXXXX + * -4- -4- --------56------ (numbers in bytes) + * ^ + * Side ptr (result from SideLocus.side()) + * + */ + inline void countBt2SideEx(const SideLocus& l, index_t* arrs) const { + assert_range(0, (int)this->_gh._sideGbwtSz-1, (int)l._by); + assert_range(0, 3, (int)l._bp); + countUpToEx(l, arrs); + assert_eq(_zGbwtByteOffs.size(), _zGbwtBpOffs.size()); + for(index_t i = 0; i < _zGbwtByteOffs.size(); i++) { + index_t zGbwtByteOff = _zGbwtByteOffs[i]; + if(l._sideByteOff <= zGbwtByteOff && l._sideByteOff + l._by >= zGbwtByteOff) { + // Adjust for the fact that we represented $ with an 'A', but + // shouldn't count it as an 'A' here + int zGbwtBpOff = _zGbwtBpOffs[i]; + if((l._sideByteOff + l._by > zGbwtByteOff) || + (l._sideByteOff + l._by == zGbwtByteOff && l._bp > zGbwtBpOff)) + { + arrs[0]--; // Adjust for '$' looking like an 'A' + } + } + } + WITHIN_FCHR(arrs); + WITHIN_BWT_LEN(arrs); + // Now factor in the occ[] count at the side break + const uint8_t *side = l.side(this->gfm()); + const uint8_t *acgt16 = side + this->_gh._sideSz - sizeof(index_t) * 4; + const index_t *acgt = reinterpret_cast(acgt16); + assert_leq(acgt[0], this->fchr()[1] + this->_gh.sideGbwtLen()); + assert_leq(acgt[1], this->fchr()[2]-this->fchr()[1]); + assert_leq(acgt[2], this->fchr()[3]-this->fchr()[2]); + assert_leq(acgt[3], this->fchr()[4]-this->fchr()[3]); + assert_leq(acgt[0], this->_gh._len + this->_gh.sideGbwtLen()); + assert_leq(acgt[1], this->_gh._len); + assert_leq(acgt[2], this->_gh._len); + assert_leq(acgt[3], this->_gh._len); + arrs[0] += (acgt[0] + this->fchr()[0]); + arrs[1] += (acgt[1] + this->fchr()[1]); + arrs[2] += (acgt[2] + this->fchr()[2]); + arrs[3] += (acgt[3] + this->fchr()[3]); + WITHIN_FCHR(arrs); + } + + /** + * Count all occurrences of character 1 from the beginning of the + * forward side to and add in the occ[] count up to the side + * break just prior to the side. + * + */ + inline index_t countMSide(const SideLocus& l) const { + assert_range(0, (int)this->_gh._sideGbwtSz-1, (int)l._by); + assert_range(0, 7, (int)l._bp); + index_t cCnt = countUpTo_bits(l, false /* F? */); + const uint8_t *side = l.side(this->gfm()); + cCnt += *(index_t*)(side + _gh._sideGbwtSz + sizeof(index_t)); + assert_leq(cCnt, l.toBWRow(_gh)); + assert_leq(cCnt, this->_gh._numNodes); + return cCnt; + } + + /** + * Counts the number of occurrences of character 'c' in the given Ebwt + * side up to (but not including) the given byte/bitpair (by/bp). + * + * This is a performance-critical function. This is the top search- + * related hit in the time profile. + * + * Function gets 11.09% in profile + */ + inline index_t countUpTo(const SideLocus& l, int c) const { + // Count occurrences of c in each 64-bit (using bit trickery); + // Someday countInU64() and pop() functions should be + // vectorized/SSE-ized in case that helps. + bool usePOPCNT = false; + index_t cCnt = 0; + const uint8_t *side = l.side(this->gfm()); + int i = 0; +#ifdef POPCNT_CAPABILITY + if(_usePOPCNTinstruction) { + usePOPCNT = true; + int by = l._by + (l._bp > 0 ? 1 : 0); + for(; i < by; i += 8) { + if(i + 8 < by) { + cCnt += countInU64(c, *(uint64_t*)&side[i]); + } else { + index_t by_shift = 8 - (by - i); + index_t bp_shift = (l._bp > 0 ? 4 - l._bp : 0); + index_t shift = (by_shift << 3) + (bp_shift << 1); + uint64_t side_i = *(uint64_t*)&side[i]; + side_i = (_toBigEndian ? side_i >> shift : side_i << shift); + index_t cCnt_add = countInU64(c, side_i); + if(c == 0) cCnt_add -= (shift >> 1); +#ifndef NDEBUG + index_t cCnt_temp = 0; + for(int j = i; j < l._by; j++) { + cCnt_temp += cCntLUT_4[0][c][side[j]]; + } + if(l._bp > 0) { + cCnt_temp += cCntLUT_4[(int)l._bp][c][side[l._by]]; + } + assert_eq(cCnt_add, cCnt_temp); +#endif + cCnt += cCnt_add; + break; + } + } + } else { + for(; i + 7 < l._by; i += 8) { + cCnt += countInU64(c, *(uint64_t*)&side[i]); + } + } +#else + for(; i + 7 < l._by; i += 8) { + cCnt += countInU64(c, *(uint64_t*)&side[i]); + } +#endif + + if(!usePOPCNT) { + // Count occurences of c in the rest of the side (using LUT) + for(; i < l._by; i++) { + cCnt += cCntLUT_4[0][c][side[i]]; + } + + // Count occurences of c in the rest of the byte + if(l._bp > 0) { + cCnt += cCntLUT_4[(int)l._bp][c][side[i]]; + } + } + + return cCnt; + } + + /** + * Counts the number of occurrences of character 'c' in the given Ebwt + * side down to the given byte/bitpair (by/bp). + * + */ + inline index_t countDownTo(const SideLocus& l, int c) const { + // Count occurrences of c in each 64-bit (using bit trickery); + // Someday countInU64() and pop() functions should be + // vectorized/SSE-ized in case that helps. + index_t cCnt = 0; + const uint8_t *side = l.side(this->gfm()); + int i = 64 - 4 * sizeof(index_t) - 1; +#ifdef POPCNT_CAPABILITY + if ( _usePOPCNTinstruction) { + for(; i - 7 > l._by; i -= 8) { + cCnt += countInU64(c, *(uint64_t*)&side[i-7]); + } + } + else { + for(; i + 7 > l._by; i -= 8) { + cCnt += countInU64(c, *(uint64_t*)&side[i-7]); + } + } +#else + for(; i + 7 > l._by; i -= 8) { + cCnt += countInU64(c, *(uint64_t*)&side[i-7]); + } +#endif + // Count occurences of c in the rest of the side (using LUT) + for(; i > l._by; i--) { + cCnt += cCntLUT_4_rev[0][c][side[i]]; + } + // Count occurences of c in the rest of the byte + if(l._bp > 0) { + cCnt += cCntLUT_4_rev[4-(int)l._bp][c][side[i]]; + } else { + cCnt += cCntLUT_4_rev[0][c][side[i]]; + } + return cCnt; + } + + /** + * Tricky-bit-bashing bitpair counting for given two-bit value (0-3) + * within a 64-bit argument. + * + * Function gets 2.32% in profile + */ +#ifdef POPCNT_CAPABILITY + template +#endif + inline static void countInU64Ex(uint64_t dw, index_t* arrs) { + uint64_t c0 = c_table[0]; + uint64_t x0 = dw ^ c0; + uint64_t x1 = (x0 >> 1); + uint64_t x2 = x1 & (0x5555555555555555llu); + uint64_t x3 = x0 & x2; +#ifdef POPCNT_CAPABILITY + uint64_t tmp = Operation().pop64(x3); +#else + uint64_t tmp = pop64(x3); +#endif + arrs[0] += (uint32_t) tmp; + + c0 = c_table[1]; + x0 = dw ^ c0; + x1 = (x0 >> 1); + x2 = x1 & (0x5555555555555555llu); + x3 = x0 & x2; +#ifdef POPCNT_CAPABILITY + tmp = Operation().pop64(x3); +#else + tmp = pop64(x3); +#endif + arrs[1] += (uint32_t) tmp; + + c0 = c_table[2]; + x0 = dw ^ c0; + x1 = (x0 >> 1); + x2 = x1 & (0x5555555555555555llu); + x3 = x0 & x2; +#ifdef POPCNT_CAPABILITY + tmp = Operation().pop64(x3); +#else + tmp = pop64(x3); +#endif + arrs[2] += (uint32_t) tmp; + + c0 = c_table[3]; + x0 = dw ^ c0; + x1 = (x0 >> 1); + x2 = x1 & (0x5555555555555555llu); + x3 = x0 & x2; +#ifdef POPCNT_CAPABILITY + tmp = Operation().pop64(x3); +#else + tmp = pop64(x3); +#endif + arrs[3] += (uint32_t) tmp; + } + + /** + * Counts the number of occurrences of all four nucleotides in the + * given side up to (but not including) the given byte/bitpair (by/bp). + * Count for 'a' goes in arrs[0], 'c' in arrs[1], etc. + */ + inline void countUpToEx(const SideLocus& l, index_t* arrs) const { + int i = 0; + // Count occurrences of each nucleotide in each 64-bit word using + // bit trickery; note: this seems does not seem to lend a + // significant boost to performance in practice. If you comment + // out this whole loop (which won't affect correctness - it will + // just cause the following loop to take up the slack) then runtime + // does not change noticeably. Someday the countInU64() and pop() + // functions should be vectorized/SSE-ized in case that helps. + const uint8_t *side = l.side(this->gfm()); +#ifdef POPCNT_CAPABILITY + if (_usePOPCNTinstruction) { + for(; i+7 < l._by; i += 8) { + countInU64Ex(*(uint64_t*)&side[i], arrs); + } + } + else { + for(; i+7 < l._by; i += 8) { + countInU64Ex(*(uint64_t*)&side[i], arrs); + } + } +#else + for(; i+7 < l._by; i += 8) { + countInU64Ex(*(uint64_t*)&side[i], arrs); + } +#endif + // Count occurences of nucleotides in the rest of the side (using LUT) + // Many cache misses on following lines (~20K) + for(; i < l._by; i++) { + arrs[0] += cCntLUT_4[0][0][side[i]]; + arrs[1] += cCntLUT_4[0][1][side[i]]; + arrs[2] += cCntLUT_4[0][2][side[i]]; + arrs[3] += cCntLUT_4[0][3][side[i]]; + } + // Count occurences of c in the rest of the byte + if(l._bp > 0) { + arrs[0] += cCntLUT_4[(int)l._bp][0][side[i]]; + arrs[1] += cCntLUT_4[(int)l._bp][1][side[i]]; + arrs[2] += cCntLUT_4[(int)l._bp][2][side[i]]; + arrs[3] += cCntLUT_4[(int)l._bp][3][side[i]]; + } + } + + /** + * Counts the number of occurrences of character 'c' in the given Ebwt + * side up to (but not including) the given byte/bitpair (by/bp). + * + * This is a performance-critical function. This is the top search- + * related hit in the time profile. + */ + inline index_t countUpTo_bits(const SideLocus& l, bool F) const { + // Count occurrences of c in each 64-bit (using bit trickery); + // Someday countInU64() and pop() functions should be + // vectorized/SSE-ized in case that helps. + bool usePOPCNT = false; + index_t cCnt = 0; + const uint8_t *side = l.side(this->gfm()); + if(F) { + side += (_gh._sideGbwtSz >> 1); + } else { + side += (_gh._sideGbwtSz - (_gh._sideGbwtSz >> 2)); + } + int i = 0; +#ifdef POPCNT_CAPABILITY + if(_usePOPCNTinstruction) { + usePOPCNT = true; + int by = l._by + (l._bp > 0 ? 1 : 0); + for(; i < by; i += 8) { + if(i + 8 < by) { + cCnt += countInU64_bits(*(uint64_t*)&side[i]); + } else { + index_t by_shift = 8 - (by - i); + index_t bp_shift = (l._bp > 0 ? 8 - l._bp : 0); + index_t shift = (by_shift << 3) + bp_shift; + uint64_t side_i = *(uint64_t*)&side[i]; + side_i = (_toBigEndian ? side_i >> shift : side_i << shift); + index_t cCnt_add = countInU64_bits(side_i); +#ifndef NDEBUG + index_t cCnt_temp = 0; + for(int j = i; j < l._by; j++) { + cCnt_temp += cCntBIT[0][side[j]]; + } + if(l._bp > 0) { + cCnt_temp += cCntBIT[(int)l._bp][side[l._by]]; + } + assert_eq(cCnt_add, cCnt_temp); +#endif + cCnt += cCnt_add; + break; + } + } + } else { + for(; i + 7 < l._by; i += 8) { + cCnt += countInU64_bits(*(uint64_t*)&side[i]); + } + } +#else + for(; i + 7 < l._by; i += 8) { + cCnt += countInU64_bits(*(uint64_t*)&side[i]); + } +#endif + + if(!usePOPCNT) { + // Count occurences of c in the rest of the side (using LUT) + for(; i < l._by; i++) { + cCnt += cCntBIT[0][side[i]]; + } + + // Count occurences of c in the rest of the byte + if(l._bp > 0) { + cCnt += cCntBIT[(int)l._bp][side[i]]; + } + } + + return cCnt; + } + + +#ifndef NDEBUG + /** + * Given top and bot loci, calculate counts of all four DNA chars up to + * those loci. Used for more advanced backtracking-search. + */ + inline void mapLFEx( + const SideLocus& l, + index_t *arrs + ASSERT_ONLY(, bool overrideSanity = false) + ) const + { + assert_eq(0, arrs[0]); + assert_eq(0, arrs[1]); + assert_eq(0, arrs[2]); + assert_eq(0, arrs[3]); + countBt2SideEx(l, arrs); + if(_sanity && !overrideSanity) { + // Make sure results match up with individual calls to mapLF; + // be sure to override sanity-checking in the callee, or we'll + // have infinite recursion + assert_eq(mapLF(l, 0, true), arrs[0]); + assert_eq(mapLF(l, 1, true), arrs[1]); + assert_eq(mapLF(l, 2, true), arrs[2]); + assert_eq(mapLF(l, 3, true), arrs[3]); + } + } +#endif + + /** + * Given top and bot rows, calculate counts of all four DNA chars up to + * those loci. + */ + inline void mapLFEx( + index_t top, + index_t bot, + index_t *tops, + index_t *bots + ASSERT_ONLY(, bool overrideSanity = false) + ) const + { + SideLocus ltop, lbot; + SideLocus::initFromTopBot(top, bot, _gh, gfm(), ltop, lbot); + mapLFEx(ltop, lbot, tops, bots ASSERT_ONLY(, overrideSanity)); + } + + /** + * Given top and bot loci, calculate counts of all four DNA chars up to + * those loci. Used for more advanced backtracking-search. + */ + inline void mapLFEx( + const SideLocus& ltop, + const SideLocus& lbot, + index_t *tops, + index_t *bots + ASSERT_ONLY(, bool overrideSanity = false) + ) const + { + assert(ltop.repOk(this->gh())); + assert(lbot.repOk(this->gh())); + assert_eq(0, tops[0]); assert_eq(0, bots[0]); + assert_eq(0, tops[1]); assert_eq(0, bots[1]); + assert_eq(0, tops[2]); assert_eq(0, bots[2]); + assert_eq(0, tops[3]); assert_eq(0, bots[3]); + countBt2SideEx(ltop, tops); + countBt2SideEx(lbot, bots); +#ifndef NDEBUG + if(_sanity && !overrideSanity) { + // Make sure results match up with individual calls to mapLF; + // be sure to override sanity-checking in the callee, or we'll + // have infinite recursion + assert_eq(mapLF(ltop, 0, true), tops[0]); + assert_eq(mapLF(ltop, 1, true), tops[1]); + assert_eq(mapLF(ltop, 2, true), tops[2]); + assert_eq(mapLF(ltop, 3, true), tops[3]); + assert_eq(mapLF(lbot, 0, true), bots[0]); + assert_eq(mapLF(lbot, 1, true), bots[1]); + assert_eq(mapLF(lbot, 2, true), bots[2]); + assert_eq(mapLF(lbot, 3, true), bots[3]); + } +#endif + } + + /** + * Counts the number of occurrences of all four nucleotides in the + * given side from the given byte/bitpair (l->_by/l->_bp) (or the + * beginning of the side if l == 0). Count for 'a' goes in arrs[0], + * 'c' in arrs[1], etc. + * + * Note: must account for $. + * + * Must fill in masks + */ + inline index_t countBt2SideRange2( + const SideLocus& l, + bool startAtLocus, + index_t num, + index_t* arrs, + EList *masks, + index_t maskOff) const + { + assert(!masks[0].empty()); + assert_eq(masks[0].size(), masks[1].size()); + assert_eq(masks[0].size(), masks[2].size()); + assert_eq(masks[0].size(), masks[3].size()); + ASSERT_ONLY(index_t myarrs[4] = {0, 0, 0, 0}); + index_t nm = 0; // number of nucleotides tallied so far + int iby = 0; // initial byte offset + int ibp = 0; // initial base-pair offset + if(startAtLocus) { + iby = l._by; + ibp = l._bp; + } else { + // Start at beginning + } + int by = iby, bp = ibp; + assert_lt(bp, 4); + index_t sideGbwtSz = this->_gh._sideGbwtSz >> (this->_gh.linearFM() ? 0 : 1); + assert_lt(by, (int)sideGbwtSz); + const uint8_t *side = l.side(this->gfm()); + while(nm < num) { + int c = (side[by] >> (bp * 2)) & 3; + assert_lt(maskOff + nm, masks[c].size()); + masks[0][maskOff + nm] = masks[1][maskOff + nm] = + masks[2][maskOff + nm] = masks[3][maskOff + nm] = false; + assert_range(0, 3, c); + // Note: we tally $ just like an A + arrs[c]++; // tally it + ASSERT_ONLY(myarrs[c]++); + masks[c][maskOff + nm] = true; // not dead + nm++; + if(++bp == 4) { + bp = 0; + by++; + assert_leq(by, (int)sideGbwtSz); + if(by == (int)sideGbwtSz) { + // Fell off the end of the side + break; + } + } + } + WITHIN_FCHR_DOLLARA(arrs); +#ifndef NDEBUG + if(_sanity) { + // Make sure results match up with a call to mapLFEx. + index_t tops[4] = {0, 0, 0, 0}; + index_t bots[4] = {0, 0, 0, 0}; + index_t top = l.toBWRow(gh()); + index_t bot = top + nm; + mapLFEx(top, bot, tops, bots, false); + assert(myarrs[0] == (bots[0] - tops[0]) || myarrs[0] == (bots[0] - tops[0])+1); + assert_eq(myarrs[1], bots[1] - tops[1]); + assert_eq(myarrs[2], bots[2] - tops[2]); + assert_eq(myarrs[3], bots[3] - tops[3]); + } +#endif + return nm; + } + + /** + * Return the final character in row i (i.e. the i'th character in the + * BWT transform). Note that the 'L' in the name of the function + * stands for 'last', as in the literature. + */ + inline int rowL(const SideLocus& l) const { + // Extract and return appropriate bit-pair + return unpack_2b_from_8b(l.side(this->gfm())[l._by], l._bp); + } + + /** + * Return the final character in row i (i.e. the i'th character in the + * BWT transform). Note that the 'L' in the name of the function + * stands for 'last', as in the literature. + */ + inline int rowL(index_t i) const { + // Extract and return appropriate bit-pair + SideLocus l; + l.initFromRow(i, _gh, gfm()); + return rowL(l); + } + + /** + * Given top and bot loci, calculate counts of all four DNA chars up to + * those loci. Used for more advanced backtracking-search. + */ + inline void mapLFRange( + const SideLocus& ltop, + const SideLocus& lbot, + index_t num, // Number of elts + index_t* cntsUpto, // A/C/G/T counts up to top + index_t* cntsIn, // A/C/G/T counts within range + EList *masks + ASSERT_ONLY(, bool overrideSanity = false) + ) const + { + assert(ltop.repOk(this->gh())); + assert(lbot.repOk(this->gh())); + assert_eq(num, lbot.toBWRow(this->gh()) - ltop.toBWRow(this->gh())); + assert_eq(0, cntsUpto[0]); assert_eq(0, cntsIn[0]); + assert_eq(0, cntsUpto[1]); assert_eq(0, cntsIn[1]); + assert_eq(0, cntsUpto[2]); assert_eq(0, cntsIn[2]); + assert_eq(0, cntsUpto[3]); assert_eq(0, cntsIn[3]); + countBt2SideRange(ltop, num, cntsUpto, cntsIn, masks); + assert_eq(num, cntsIn[0] + cntsIn[1] + cntsIn[2] + cntsIn[3]); +#ifndef NDEBUG + if(_sanity && !overrideSanity) { + // Make sure results match up with individual calls to mapLF; + // be sure to override sanity-checking in the callee, or we'll + // have infinite recursion + index_t tops[4] = {0, 0, 0, 0}; + index_t bots[4] = {0, 0, 0, 0}; + assert(ltop.repOk(this->gh())); + assert(lbot.repOk(this->gh())); + mapLFEx(ltop, lbot, tops, bots, false); + for(int i = 0; i < 4; i++) { + assert(cntsUpto[i] == tops[i] || tops[i] == bots[i]); + if(i == 0) { + assert(cntsIn[i] == bots[i]-tops[i] || + cntsIn[i] == bots[i]-tops[i]+1); + } else { + assert_eq(cntsIn[i], bots[i]-tops[i]); + } + } + } +#endif + } + + /** + * Given row i, return the row that the LF mapping maps i to. + */ + inline index_t mapLF( + const SideLocus& l + ASSERT_ONLY(, bool overrideSanity = false) + ) const + { + ASSERT_ONLY(index_t srcrow = l.toBWRow(_gh)); + index_t ret; + assert(l.side(this->gfm()) != NULL); + int c = rowL(l); + assert_lt(c, 4); + assert_geq(c, 0); + ret = countBt2Side(l, c); + assert_lt(ret, this->_gh._gbwtLen); + assert_neq(srcrow, ret); +#ifndef NDEBUG + if(_sanity && !overrideSanity) { + // Make sure results match up with results from mapLFEx; + // be sure to override sanity-checking in the callee, or we'll + // have infinite recursion + index_t arrs[] = { 0, 0, 0, 0 }; + mapLFEx(l, arrs, true); + assert_eq(arrs[c], ret); + } +#endif + return ret; + } + + /** + * Given row i and character c, return the row that the LF mapping maps + * i to on character c. + */ + inline index_t mapLF( + const SideLocus& l, int c + ASSERT_ONLY(, bool overrideSanity = false) + ) const + { + index_t ret; + assert_lt(c, 4); + assert_geq(c, 0); + ret = countBt2Side(l, c); + assert_lt(ret, this->_gh._gbwtLen); +#ifndef NDEBUG + if(_sanity && !overrideSanity) { + // Make sure results match up with results from mapLFEx; + // be sure to override sanity-checking in the callee, or we'll + // have infinite recursion + index_t arrs[] = { 0, 0, 0, 0 }; + mapLFEx(l, arrs, true); + assert_eq(arrs[c], ret); + } +#endif + return ret; + } + + /** + * Given row i and character c, return the row that the GLF mapping maps + * i to on character c. + */ + inline pair mapLF( + SideLocus& tloc, SideLocus& bloc, int c, + pair* node_range = NULL + ASSERT_ONLY(, bool overrideSanity = false) + ) const + { + assert_lt(c, 4); + assert_geq(c, 0); + index_t top = mapLF(tloc, c); + index_t bot = mapLF(bloc, c); + if(node_range != NULL) { + node_range->first = top; node_range->second = bot; + } + return pair(top, bot); + } + + /** + * Given row i and character c, return the row that the GLF mapping maps + * i to on character c. + */ + inline pair mapGLF( + SideLocus& tloc, SideLocus& bloc, int c, + pair* node_range = NULL, + EList >* node_iedges = NULL, + index_t k = 5 + ASSERT_ONLY(, bool overrideSanity = false) + ) const + { + assert_lt(c, 4); + assert_geq(c, 0); + index_t top = mapLF(tloc, c); + index_t bot = mapLF(bloc, c); + if(gh().linearFM()) { + if(node_range != NULL) { + node_range->first = top; node_range->second = bot; + } + if(node_iedges != NULL) { + node_iedges->clear(); + } + return pair(top, bot); + } + if(top + 1 >= gh()._gbwtLen || top >= bot) { + assert_eq(top, bot); + return pair(0, 0); + } + + tloc.initFromRow_bit(top + 1, gh(), gfm()); + index_t node_top = rank_M(tloc) - 1; + + index_t top_F_loc = 0, top_M_occ = 0; + size_t iter = 0; + while(true) { + const uint8_t *side = tloc.side(gfm()) + gh()._sideGbwtSz - gh()._sideSz * iter; + top_F_loc = *((index_t*)side); + side += sizeof(index_t); + top_M_occ = *((index_t*)side); + assert_leq(top_M_occ, node_top + 1); + if(top_M_occ <= node_top) break; + iter++; + } + if(top_M_occ > 0) top_F_loc++; + + tloc.initFromRow_bit(top_F_loc, gh(), gfm()); + + if(node_top + 1 > top_M_occ) { + top = select_F(tloc, node_top + 1 - top_M_occ); + } else { + top = top_F_loc; + } + + bloc.initFromRow_bit(bot, gh(), gfm()); + index_t node_bot = rank_M(bloc); + + const uint8_t *side = bloc.side(gfm()) + gh()._sideGbwtSz; + index_t bot_F_loc = *((index_t*)side); + side += sizeof(index_t); + index_t bot_M_occ = *((index_t*)side); + assert_leq(bot_M_occ, node_bot + 1); + if(bot_M_occ > 0) bot_F_loc++; + + bloc.initFromRow_bit(bot_F_loc, gh(), gfm()); + + if(node_bot + 1 > bot_M_occ) { + bot = select_F(bloc, node_bot + 1 - bot_M_occ); + } else { + bot = bot_F_loc; + } + + if(node_range != NULL) { + (*node_range).first = node_top; + (*node_range).second = node_bot; + } + assert_leq(node_bot - node_top, bot - top); + if(node_iedges != NULL && node_bot - node_top <= k && node_bot - node_top < bot - top) { + getInEdgeCount(top, bot, *node_iedges); + } + + return pair(top, bot); + } + + /** + * Given top and bot loci, calculate counts of all four DNA chars up to + * those loci. Also, update a set of tops and bots for the reverse + * index/direction using the idea from the bi-directional BWT paper. + */ + inline void mapBiLFEx( + const SideLocus& ltop, + const SideLocus& lbot, + index_t *tops, + index_t *bots, + index_t *topsP, // topsP[0] = top + index_t *botsP + ASSERT_ONLY(, bool overrideSanity = false) + ) const + { +#ifndef NDEBUG + for(int i = 0; i < 4; i++) { + assert_eq(0, tops[0]); assert_eq(0, bots[0]); + } +#endif + countBt2SideEx(ltop, tops); + countBt2SideEx(lbot, bots); +#ifndef NDEBUG + if(_sanity && !overrideSanity) { + // Make sure results match up with individual calls to mapLF; + // be sure to override sanity-checking in the callee, or we'll + // have infinite recursion + assert_eq(mapLF(ltop, 0, true), tops[0]); + assert_eq(mapLF(ltop, 1, true), tops[1]); + assert_eq(mapLF(ltop, 2, true), tops[2]); + assert_eq(mapLF(ltop, 3, true), tops[3]); + assert_eq(mapLF(lbot, 0, true), bots[0]); + assert_eq(mapLF(lbot, 1, true), bots[1]); + assert_eq(mapLF(lbot, 2, true), bots[2]); + assert_eq(mapLF(lbot, 3, true), bots[3]); + } +#endif + // bots[0..3] - tops[0..3] = # of ways to extend the suffix with an + // A, C, G, T + botsP[0] = topsP[0] + (bots[0] - tops[0]); + topsP[1] = botsP[0]; + botsP[1] = topsP[1] + (bots[1] - tops[1]); + topsP[2] = botsP[1]; + botsP[2] = topsP[2] + (bots[2] - tops[2]); + topsP[3] = botsP[2]; + botsP[3] = topsP[3] + (bots[3] - tops[3]); + } + + /** + * Given row and its locus information, proceed on the given character + * and return the next row, or all-fs if we can't proceed on that + * character. Returns 0xffffffff if this row ends in $. + */ + inline index_t mapLF1( + index_t row, // starting row + const SideLocus& l, // locus for starting row + int c // character to proceed on + ASSERT_ONLY(, bool overrideSanity = false) + ) const + { + if(rowL(l) != c) return (index_t)INDEX_MAX; + for(index_t i = 0; i < _zOffs.size(); i++) { + if(row == _zOffs[i]) return (index_t)INDEX_MAX; + } + index_t ret; + assert_lt(c, 4); + assert_geq(c, 0); + ret = countBt2Side(l, c); + assert_lt(ret, this->_gh._gbwtLen); +#ifndef NDEBUG + if(_sanity && !overrideSanity) { + // Make sure results match up with results from mapLFEx; + // be sure to override sanity-checking in the callee, or we'll + // have infinite recursion + index_t arrs[] = { 0, 0, 0, 0 }; + mapLFEx(l, arrs, true); + assert_eq(arrs[c], ret); + } +#endif + return ret; + } + + + /** + * Given row and its locus information, set the row to LF(row) and + * return the character that was in the final column. + */ + inline int mapLF1( + index_t& row, // starting row + const SideLocus& l // locus for starting row + ASSERT_ONLY(, bool overrideSanity = false) + ) const + { + for(index_t i = 0; i < _zOffs.size(); i++) { + if(row == _zOffs[i]) return -1; + } + int c = rowL(l); + assert_range(0, 3, c); + row = countBt2Side(l, c); + assert_lt(row, this->_gh._gbwtLen); +#ifndef NDEBUG + if(_sanity && !overrideSanity) { + // Make sure results match up with results from mapLFEx; + // be sure to override sanity-checking in the callee, or we'll + // have infinite recursion + index_t arrs[] = { 0, 0, 0, 0 }; + mapLFEx(l, arrs, true); + assert_eq(arrs[c], row); + } +#endif + return c; + } + + /** + * Given row and its locus information, proceed on the given character + * and return the next row, or all-fs if we can't proceed on that + * character. Returns 0xffffffff if this row ends in $. + */ + inline pair mapGLF1( + index_t row, // starting row + SideLocus& l, // locus for starting row + int c, // character to proceed + pair* node_range = NULL + ASSERT_ONLY(, bool overrideSanity = false) + ) const + { + assert_lt(c, 4); + assert_geq(c, 0); + index_t top = mapLF1(row, l, c); + if(top == (index_t)INDEX_MAX) return pair(0, 0); + if(gh().linearFM()) { + if(node_range != NULL) { + node_range->first = top; node_range->second = top + 1; + } + return pair(top, top + 1); + } + index_t bot = top; + + l.initFromRow_bit(top + 1, gh(), gfm()); + index_t node_top = rank_M(l) - 1; + + index_t F_loc = 0, M_occ = 0; + size_t iter = 0; + while(true) { + const uint8_t *side = l.side(gfm()) + gh()._sideGbwtSz - gh()._sideSz * iter; + F_loc = *((index_t*)side); + side += sizeof(index_t); + M_occ = *((index_t*)side); + assert_leq(M_occ, node_top + 1); + if(M_occ <= node_top) break; + iter++; + } + if(M_occ > 0) F_loc++; + + l.initFromRow_bit(F_loc, gh(), gfm()); + + if(node_top + 1 > M_occ) { + top = select_F(l, node_top + 1 - M_occ); + } else { + top = F_loc; + } + + index_t node_bot = node_top + 1; + if(node_bot + 1 > M_occ) { + SideLocus l2; +#if 0 + l2.initFromRow_bit(top + 1, gh(), gfm()); + bot = select_F(l2, 1); + ASSERT_ONLY(index_t bot2 = select_F(l, node_bot + 1 - M_occ)); + assert_eq(bot, bot2); +#else + bot = select_F(l, node_bot + 1 - M_occ); +#endif + } else { + bot = F_loc; + } + + if(node_range != NULL) { + (*node_range).first = node_top; + (*node_range).second = node_bot; + } + + return pair(top, bot); + } + + /** + * Given row and its locus information, proceed on the given character + * and return the next row, or all-fs if we can't proceed on that + * character. Returns 0xffffffff if this row ends in $. + */ + inline pair mapGLF1( + index_t row, // starting row + SideLocus& l, // locus for starting row + pair* node_range = NULL + ASSERT_ONLY(, bool overrideSanity = false) + ) const + { + for(index_t i = 0; i < _zOffs.size(); i++) { + if(row == _zOffs[i]) return pair((index_t)INDEX_MAX, (index_t)INDEX_MAX); + } + + mapLF1(row, l); + index_t top = row; + if(top == (index_t)INDEX_MAX) return pair(0, 0); + if(gh().linearFM()) { + if(node_range != NULL) { + node_range->first = top; node_range->second = top + 1; + } + return pair(top, top + 1); + } + index_t bot = top; + + l.initFromRow_bit(top + 1, gh(), gfm()); + index_t node_top = rank_M(l) - 1; + + index_t F_loc = 0, M_occ = 0; + size_t iter = 0; + while(true) { + const uint8_t *side = l.side(gfm()) + gh()._sideGbwtSz - gh()._sideSz * iter; + F_loc = *((index_t*)side); + side += sizeof(index_t); + M_occ = *((index_t*)side); + assert_leq(M_occ, node_top + 1); + if(M_occ <= node_top) break; + iter++; + } + if(M_occ > 0) F_loc++; + + l.initFromRow_bit(F_loc, gh(), gfm()); + + if(node_top + 1 > M_occ) { + top = select_F(l, node_top + 1 - M_occ); + } else { + top = F_loc; + } + + index_t node_bot = node_top + 1; + if(node_bot + 1 > M_occ) { +#if 0 + l2.initFromRow_bit(top + 1, gh(), gfm()); + bot = select_F(l2, 1); + ASSERT_ONLY(index_t bot2 = select_F(l, node_bot + 1 - M_occ)); + assert_eq(bot, bot2); +#else + bot = select_F(l, node_bot + 1 - M_occ); +#endif + } else { + bot = F_loc; + } + + if(node_range != NULL) { + (*node_range).first = node_top; + (*node_range).second = node_bot; + } + + return pair(top, bot); + } + + /** + * Given row i, return rank + */ + inline index_t rank_M( + const SideLocus& l + ASSERT_ONLY(, bool overrideSanity = false) + ) const + { + index_t ret = countMSide(l); + assert_leq(ret, this->_gh._numNodes); + return ret; + } + + /** + * Given row i, return select + */ + inline index_t select_F( + SideLocus l, + index_t count + ASSERT_ONLY(, bool overrideSanity = false) + ) const + { + assert_gt(count, 0); + const uint8_t *side = l.side(this->gfm()) + (_gh._sideGbwtSz >> 1); + while(true) { + index_t remainingBitsSide = (_gh._sideGbwtSz << 1) - l._charOff; + assert_gt(remainingBitsSide, 0); + index_t minSide = (count < remainingBitsSide ? count : remainingBitsSide); + uint64_t bits = *(uint64_t*)&side[l._by]; + uint8_t advance = 64; + if(l._bp > 0) { + bits >>= l._bp; + advance -= l._bp; + } + if(minSide < advance) { + advance = minSide; + bits <<= (64 - minSide); + } + uint8_t tmp_count = 0; +#ifdef POPCNT_CAPABILITY + if(_usePOPCNTinstruction) { + tmp_count = countInU64_bits(bits); + } else { + tmp_count = countInU64_bits(bits); + } +#else + tmp_count = countInU64_bits(bits); +#endif + assert_leq(tmp_count, count); + count -= tmp_count; + if(count == 0) { + assert_gt(advance, 0); + l._charOff += (advance - 1); + assert_lt(l._charOff, _gh._sideGbwtSz << 1); + l._by = l._charOff >> 3; + l._bp = l._charOff & 0x7; + break; + } + + assert_leq(l._charOff + advance, (_gh._sideGbwtSz << 1)); + if(l._charOff + advance == (_gh._sideGbwtSz << 1)) { + l.nextSide(_gh); + side = l.side(this->gfm()) + (_gh._sideGbwtSz >> 1); + } else { + l._charOff += advance; + l._by = l._charOff >> 3; + l._bp = l._charOff & 0x7; + } + } + return l.toBWRow(_gh); + } + + /** + * + */ + inline void getInEdgeCount( + index_t top, + index_t bot, + EList >& node_iedges) const + { + assert_lt(top, bot); + node_iedges.clear(); + SideLocus l; l.initFromRow_bit(top, _gh, gfm()); + const uint8_t *side = l.side(this->gfm()) + (_gh._sideGbwtSz >> 1); + assert_lt(l._by, (_gh._sideGbwtSz >> 2)); + assert_eq((side[l._by] >> l._bp) & 0x1, 0x1); + bool first = true; + index_t curr_node = 0; + index_t num0s = 0; + while(top < bot) { + if(first) { + first = false; + } else { + int bit = (side[l._by] >> l._bp) & 0x1; + if(bit == 0x1) { + curr_node++; + num0s = 0; + } else { + num0s++; + if(num0s == 1) { + node_iedges.expand(); + node_iedges.back().first = curr_node; + } + node_iedges.back().second = num0s; + } + } + if(l._charOff + 1 == (_gh._sideGbwtSz << 1)) { + l.nextSide(_gh); + side = l.side(this->gfm()) + (_gh._sideGbwtSz >> 1); + } else { + l._charOff++; + l._by = l._charOff >> 3; + l._bp = l._charOff & 0x7; + } + top++; + } + } + + +#ifndef NDEBUG + /// Check that in-memory Ebwt is internally consistent with respect + /// to given EbwtParams; assert if not + bool inMemoryRepOk(const GFMParams& gh) const { + assert_eq(_zOffs.size(), _zGbwtByteOffs.size()); + assert_eq(_zOffs.size(), _zGbwtBpOffs.size()); + for(index_t i = 0; i < _zOffs.size(); i++) { + assert_geq(_zGbwtBpOffs[i], 0); + assert_lt(_zGbwtBpOffs[i], 4); + assert_lt(_zGbwtByteOffs[i], gh._gbwtTotSz); + assert_lt(_zOffs[i], gh._gbwtLen); + } + assert_geq(_nFrag, _nPat); + assert_eq(_alts.size(), _altnames.size()); + return true; + } + + /// Check that in-memory Ebwt is internally consistent; assert if + /// not + bool inMemoryRepOk() const { + return repOk(_gh); + } + + /// Check that Ebwt is internally consistent with respect to given + /// EbwtParams; assert if not + bool repOk(const GFMParams& gh) const { + assert(_gh.repOk()); + if(isInMemory()) { + return inMemoryRepOk(gh); + } + return true; + } + + /// Check that Ebwt is internally consistent; assert if not + bool repOk() const { + return repOk(_gh); + } +#endif + + bool _toBigEndian; + int32_t _overrideOffRate; + bool _verbose; + bool _passMemExc; + bool _sanity; + bool fw_; // true iff this is a forward index + FILE *_in1; // input fd for primary index file + FILE *_in2; // input fd for secondary index file + string _in1Str; // filename for primary index file + string _in2Str; // filename for secondary index file + EList _zOffs; + EList _zGbwtByteOffs; + EList _zGbwtBpOffs; + index_t _nPat; /// number of reference texts + index_t _nFrag; /// number of fragments + APtrWrap _plen; + APtrWrap _rstarts; // starting offset of fragments / text indexes + // _fchr, _ftab and _eftab are expected to be relatively small + // (usually < 1MB, perhaps a few MB if _fchr is particularly large + // - like, say, 11). For this reason, we don't bother with writing + // them to disk through separate output streams; we + APtrWrap _fchr; + APtrWrap _ftab; + APtrWrap _eftab; // "extended" entries for _ftab + // _offs may be extremely large. E.g. for DNA w/ offRate=4 (one + // offset every 16 rows), the total size of _offs is the same as + // the total size of the input sequence + APtrWrap _offs; + + // _ebwt is the Extended Burrows-Wheeler Transform itself, and thus + // is at least as large as the input sequence. + APtrWrap _gfm; + bool _useMm; /// use memory-mapped files to hold the index + bool useShmem_; /// use shared memory to hold large parts of the index + EList _refnames; /// names of the reference sequences + EList _refnames_nospace; // names of the reference sequences (names stop at space) + char *mmFile1_; + char *mmFile2_; + int _nthreads; + GFMParams _gh; + bool packed_; + + static const uint64_t default_bmax = INDEX_MAX; + static const uint64_t default_bmaxMultSqrt = INDEX_MAX; + static const uint64_t default_bmaxDivN = 4; + static const int default_dcv = 1024; + static const bool default_noDc = false; + static const bool default_useBlockwise = true; + static const uint32_t default_seed = 0; +#ifdef BOWTIE_64BIT_INDEX + static const int default_lineRate_gfm = 8; + static const int default_lineRate_fm = 7; +#else + static const int default_lineRate_gfm = 7; + static const int default_lineRate_fm = 6; +#endif + static const int default_offRate = 5; + static const int default_offRatePlus = 0; + static const int default_ftabChars = 10; + static const bool default_bigEndian = false; + + // data used to build an index + EList > _alts; + EList _altnames; + EList > _haplotypes; + RepeatDB _repeatdb; + EList _repeat_kmertables; + + bool _repeat; + EList > _repeatLens; + EList _repeatIncluded; + +protected: + + ostream& log() const { + return cerr; // TODO: turn this into a parameter + } + + /// Print a verbose message and flush (flushing is helpful for + /// debugging) + void verbose(const string& s) const { + if(this->verbose()) { + this->log() << s.c_str(); + this->log().flush(); + } + } +}; + +/** + * Read reference names from an input stream 'in' for an Ebwt primary + * file and store them in 'refnames'. + */ +template +void readEbwtRefnames(istream& in, EList& refnames) { + // _in1 must already be open with the get cursor at the + // beginning and no error flags set. + assert(in.good()); + assert_eq((streamoff)in.tellg(), ios::beg); + + // Read endianness hints from both streams + bool switchEndian = false; + uint32_t one = readU32(in, switchEndian); // 1st word of primary stream + if(one != 1) { + assert_eq((1u<<24), one); + switchEndian = true; + } + readU32(in, switchEndian); // version + + // Reads header entries one by one from primary stream + index_t len = readIndex(in, switchEndian); + index_t gbwtLen = readIndex(in, switchEndian); + index_t numNodes = readIndex(in, switchEndian); + int32_t lineRate = readI32(in, switchEndian); + /*int32_t linesPerSide =*/ readI32(in, switchEndian); + int32_t offRate = readI32(in, switchEndian); + int32_t ftabChars = readI32(in, switchEndian); + index_t eftabLen = readIndex(in, switchEndian); + // BTL: chunkRate is now deprecated + int32_t flags = readI32(in, switchEndian); + bool entireReverse = false; + if(flags < 0) { + entireReverse = (((-flags) & GFM_ENTIRE_REV) != 0); + } + + // Create a new EbwtParams from the entries read from primary stream + GFMParams gh(len, gbwtLen, numNodes, lineRate, offRate, ftabChars, eftabLen, entireReverse); + + index_t nPat = readIndex(in, switchEndian); // nPat + in.seekg(nPat*sizeof(index_t), ios_base::cur); // skip plen + + // Skip rstarts + index_t nFrag = readIndex(in, switchEndian); + in.seekg(nFrag*sizeof(index_t)*3, ios_base::cur); + + // Skip ebwt + in.seekg(gh._gbwtTotLen, ios_base::cur); + + // Skip zOff from primary stream + index_t numZOffs = readIndex(in, switchEndian); + in.seekg(numZOffs * sizeof(index_t), ios_base::cur); + + // Skip fchr + in.seekg(5 * sizeof(index_t), ios_base::cur); + + // Skip ftab + in.seekg(gh._ftabLen*sizeof(index_t), ios_base::cur); + + // Skip eftab + in.seekg(gh._eftabLen*sizeof(index_t), ios_base::cur); + + // Read reference sequence names from primary index file + while(true) { + char c = '\0'; + in.read(&c, 1); + if(in.eof()) break; + if(c == '\0') break; + else if(c == '\n') { + refnames.push_back(""); + } else { + if(refnames.size() == 0) { + refnames.push_back(""); + } + refnames.back().push_back(c); + } + } + if(refnames.back().empty()) { + refnames.pop_back(); + } + + // Be kind + in.clear(); in.seekg(0, ios::beg); + assert(in.good()); +} + +/** + * Read reference names from the index with basename 'in' and store + * them in 'refnames'. + */ +template +void readEbwtRefnames(const string& instr, EList& refnames) { + ifstream in; + // Initialize our primary and secondary input-stream fields + in.open((instr + ".1." + gfm_ext).c_str(), ios_base::in | ios::binary); + if(!in.is_open()) { + throw GFMFileOpenException("Cannot open file " + instr); + } + assert(in.is_open()); + assert(in.good()); + assert_eq((streamoff)in.tellg(), ios::beg); + readEbwtRefnames(in, refnames); +} + +/////////////////////////////////////////////////////////////////////// +// +// Functions for building Ebwts +// +/////////////////////////////////////////////////////////////////////// + +/** + * Join several text strings together in a way that's compatible with + * the text-chunking scheme dictated by chunkRate parameter. + * + * The non-static member Ebwt::join additionally builds auxilliary + * arrays that maintain a mapping between chunks in the joined string + * and the original text strings. + */ +template +template +TStr GFM::join(EList& l, uint32_t seed) { + RandomSource rand; // reproducible given same seed + rand.init(seed); + TStr ret; + index_t guessLen = 0; + for(index_t i = 0; i < l.size(); i++) { + guessLen += length(l[i]); + } + ret.resize(guessLen); + index_t off = 0; + for(size_t i = 0; i < l.size(); i++) { + TStr& s = l[i]; + assert_gt(s.length(), 0); + for(size_t j = 0; j < s.size(); j++) { + ret.set(s[j], off++); + } + } + return ret; +} + +/** + * Join several text strings together in a way that's compatible with + * the text-chunking scheme dictated by chunkRate parameter. + * + * The non-static member Ebwt::join additionally builds auxilliary + * arrays that maintain a mapping between chunks in the joined string + * and the original text strings. + */ +template +template +void GFM::join(EList& l, + EList& szs, + index_t sztot, + const RefReadInParams& refparams, + uint32_t seed, + TStr& s, + bool include_rc, + bool CGtoTG) +{ + RandomSource rand; // reproducible given same seed + rand.init(seed); + RefReadInParams rpcp = refparams; + index_t guessLen = sztot; + if(include_rc) { + s.resize(guessLen << 1); + } else { + s.resize(guessLen); + } + ASSERT_ONLY(index_t szsi = 0); + TIndexOffU dstoff = 0; + for(index_t i = 0; i < l.size(); i++) { + // For each sequence we can pull out of istream l[i]... + assert(!l[i]->eof()); + bool first = true; + while(!l[i]->eof()) { + RefRecord rec = fastaRefReadAppend(*l[i], first, s, dstoff, rpcp); + first = false; + index_t bases = (index_t)rec.len; + assert_eq(rec.off, szs[szsi].off); + assert_eq(rec.len, szs[szsi].len); + assert_eq(rec.first, szs[szsi].first); + ASSERT_ONLY(szsi++); + if(bases == 0) continue; + } + } + + // Change 'C' in CG to 'T' so that CG becomes TG + if(CGtoTG) { + for(TIndexOffU i = 0; i + 1 < guessLen; i++) { + int nt1 = s[i], nt2 = s[i+1]; + if(nt1 == 1 && nt2 == 2) { + s[i] = 3; + } + } + } + + // Append reverse complement + if(include_rc) { + for (TIndexOffU i = 0; i < guessLen; i++) { + int nt = s[guessLen - i - 1]; + assert_range(0, 3, nt); + s[guessLen + i] = dnacomp[nt]; + } + } +} + +/** + * Join several text strings together according to the text-chunking + * scheme specified in the EbwtParams. Ebwt fields calculated in this + * function are written directly to disk. + * + * It is assumed, but not required, that the header values have already + * been written to 'out1' before this function is called. + * + * The static member Ebwt::join just returns a joined version of a + * list of strings without building any of the auxilliary arrays. + */ +template +template +void GFM::joinToDisk( + EList& l, + EList& szs, + index_t sztot, + const RefReadInParams& refparams, + TStr& ret, + ostream& out1, + ostream& out2) +{ + RefReadInParams rpcp = refparams; + assert_gt(szs.size(), 0); + assert_gt(l.size(), 0); + assert_gt(sztot, 0); + // Not every fragment represents a distinct sequence - many + // fragments may correspond to a single sequence. Count the + // number of sequences here by counting the number of "first" + // fragments. + this->_nPat = 0; + this->_nFrag = 0; + for(index_t i = 0; i < szs.size(); i++) { + if(szs[i].len > 0) this->_nFrag++; + if(szs[i].first && szs[i].len > 0) this->_nPat++; + } + assert_gt(this->_nPat, 0); + assert_geq(this->_nFrag, this->_nPat); + _rstarts.reset(); + writeIndex(out1, this->_nPat, this->toBe()); + // Allocate plen[] + try { + this->_plen.init(new index_t[this->_nPat], this->_nPat); + } catch(bad_alloc& e) { + cerr << "Out of memory allocating plen[] in Ebwt::join()" + << " at " << __FILE__ << ":" << __LINE__ << endl; + throw e; + } + // For each pattern, set plen + int npat = -1; + for(index_t i = 0; i < szs.size(); i++) { + if(szs[i].first && szs[i].len > 0) { + if(npat >= 0) { + writeIndex(out1, this->plen()[npat], this->toBe()); + } + npat++; + this->plen()[npat] = (szs[i].len + szs[i].off); + } else { + this->plen()[npat] += (szs[i].len + szs[i].off); + } + } + assert_eq((index_t)npat, this->_nPat-1); + writeIndex(out1, this->plen()[npat], this->toBe()); + // Write the number of fragments + writeIndex(out1, this->_nFrag, this->toBe()); + index_t seqsRead = 0; + ASSERT_ONLY(index_t szsi = 0); + ASSERT_ONLY(index_t entsWritten = 0); + index_t dstoff = 0; + // For each filebuf + for(unsigned int i = 0; i < l.size(); i++) { + assert(!l[i]->eof()); + bool first = true; + index_t patoff = 0; + // For each *fragment* (not necessary an entire sequence) we + // can pull out of istream l[i]... + while(!l[i]->eof()) { + string name; + // Push a new name onto our vector + _refnames.push_back(""); + RefRecord rec = fastaRefReadAppend( + *l[i], first, ret, dstoff, rpcp, &_refnames.back()); + first = false; + index_t bases = rec.len; + if(rec.first && rec.len > 0) { + if(_refnames.back().length() == 0) { + // If name was empty, replace with an index + ostringstream stm; + stm << seqsRead; + _refnames.back() = stm.str(); + } + } else { + // This record didn't actually start a new sequence so + // no need to add a name + //assert_eq(0, _refnames.back().length()); + _refnames.pop_back(); + } + // Increment seqsRead if this is the first fragment + if(rec.first && rec.len > 0) seqsRead++; + assert_lt(szsi, szs.size()); + assert_eq(rec.off, szs[szsi].off); + assert_eq(rec.len, szs[szsi].len); + assert_eq(rec.first, szs[szsi].first); + assert(rec.first || rec.off > 0); + ASSERT_ONLY(szsi++); + assert_leq(bases, this->plen()[seqsRead-1]); + // Reset the patoff if this is the first fragment + if(rec.first) patoff = 0; + patoff += rec.off; // add fragment's offset from end of last frag. + // Adjust rpcps + //index_t seq = seqsRead-1; +#ifndef NDEBUG + if(bases > 0) { + ASSERT_ONLY(entsWritten++); + } +#endif + // This is where rstarts elements are written to the output stream + //writeU32(out1, oldRetLen, this->toBe()); // offset from beginning of joined string + //writeU32(out1, seq, this->toBe()); // sequence id + //writeU32(out1, patoff, this->toBe()); // offset into sequence + patoff += (index_t)bases; + } + assert_gt(szsi, 0); + l[i]->reset(); + assert(!l[i]->eof()); +#ifndef NDEBUG + int c = l[i]->get(); + assert_eq('>', c); + assert(!l[i]->eof()); + l[i]->reset(); + assert(!l[i]->eof()); +#endif + } + assert_eq(entsWritten, this->_nFrag); +} + +/** + * Build an Ebwt from a string 's' and its suffix array 'sa' (which + * might actually be a suffix array *builder* that builds blocks of the + * array on demand). The bulk of the Ebwt, i.e. the ebwt and offs + * arrays, is written directly to disk. This is by design: keeping + * those arrays in memory needlessly increases the footprint of the + * building process. Instead, we prefer to build the Ebwt directly + * "to disk" and then read it back into memory later as necessary. + * + * It is assumed that the header values and join-related values (nPat, + * plen) have already been written to 'out1' before this function + * is called. When this function is finished, it will have + * additionally written ebwt, zOff, fchr, ftab and eftab to the primary + * file and offs to the secondary file. + * + * Assume DNA/RNA/any alphabet with 4 or fewer elements. + * Assume occ array entries are 32 bits each. + * + * @param sa the suffix array to convert to a Ebwt + * @param s the original string + * @param out + */ +template +template +void GFM::buildToDisk( + PathGraph& gbwt, + const TStr& s, + ostream& out1, + ostream& out2, + streampos headerPos) +{ + const GFMParams& gh = this->_gh; + assert(gh.repOk()); + assert_lt(s.length(), gh.gbwtLen()); + assert_eq(s.length(), gh._len); + assert_gt(gh._lineRate, 3); + + index_t gbwtLen = gh._gbwtLen; + streampos out1pos = out1.tellp(); + if(headerPos < 0) { + out1.seekp(8 + sizeof(index_t)); + } else { + out1.seekp(headerPos); + } + writeIndex(out1, gbwtLen, this->toBe()); + writeIndex(out1, gh._numNodes, this->toBe()); + out1.seekp(out1pos); + + index_t ftabLen = gh._ftabLen; + index_t sideSz = gh._sideSz; + index_t gbwtTotSz = gh._gbwtTotSz; + index_t fchr[] = {0, 0, 0, 0, 0}; + EList ftab(EBWT_CAT); + EList zOffs; + + // Save # of occurrences of each character as we walk along the bwt + index_t occ[4] = {0, 0, 0, 0}; + index_t occSave[4] = {0, 0, 0, 0}; + // # of occurrences of 1 in M arrays + index_t M_occ = 0, M_occSave = 0; + // Location in F that corresponds to 1 in M + index_t F_loc = 0, F_locSave = 0; + + // Record rows that should "absorb" adjacent rows in the ftab. + try { + VMSG_NL("Allocating ftab, absorbFtab"); + ftab.resize(ftabLen); + ftab.fillZero(); + } catch(bad_alloc &e) { + cerr << "Out of memory allocating ftab[] " + << "in GFM::buildToDisk() at " << __FILE__ << ":" + << __LINE__ << endl; + throw e; + } + + // Allocate the side buffer; holds a single side as its being + // constructed and then written to disk. Reused across all sides. +#ifdef SIXTY4_FORMAT + EList gfmSide(EBWT_CAT); +#else + EList gfmSide(EBWT_CAT); +#endif + try { + // Used to calculate ftab and eftab, but having gfm costs a lot of memory + _gfm.init(new uint8_t[gh._gbwtTotLen], gh._gbwtTotLen, true); +#ifdef SIXTY4_FORMAT + gfmSide.resize(sideSz >> 3); +#else + gfmSide.resize(sideSz); +#endif + } catch(bad_alloc &e) { + cerr << "Out of memory allocating ebwtSide[] in " + << "GFM::buildToDisk() at " << __FILE__ << ":" + << __LINE__ << endl; + throw e; + } + + // Points to the base offset within ebwt for the side currently + // being written + index_t side = 0; + + // Whether we're assembling a forward or a reverse bucket + bool fw = true; + int sideCur = 0; + + index_t si = 0; // string offset (chars) + ASSERT_ONLY(bool inSA = true); // true iff saI still points inside suffix + // array (as opposed to the padding at the + // end) + // Iterate over packed bwt bytes + VMSG_NL("Entering GFM loop"); + ASSERT_ONLY(index_t beforeGbwtOff = (index_t)out1.tellp()); + while(side < gbwtTotSz) { + // Sanity-check our cursor into the side buffer + assert_geq(sideCur, 0); + assert_lt(sideCur, (int)gh._sideGbwtSz); + assert_eq(0, side % sideSz); // 'side' must be on side boundary + if(sideCur == 0) { + memset(gfmSide.ptr(), 0, gh._sideGbwtSz); + gfmSide[sideCur] = 0; // clear + } + assert_lt(side + sideCur, gbwtTotSz); + // Iterate over bit-pairs in the si'th character of the BWT +#ifdef SIXTY4_FORMAT + for(int bpi = 0; bpi < 32; bpi++, si++) +#else + for(int bpi = 0; bpi < 4; bpi++, si++) +#endif + { + int gbwtChar = 0; // one of A, C, G, T, and Z + int F= 0, M = 0; // either 0 or 1 + index_t pos = 0; // pos on joined string + bool count = true; + if(si < gbwtLen) { + gbwt.nextRow(gbwtChar, F, M, pos); + + // (that might have triggered sa to calc next suf block) + if(gbwtChar == 'Z') { + // Don't add the 'Z' in the last column to the BWT + // transform; we can't encode a $ (only A C T or G) + // and counting it as, say, an A, will mess up the + // LF mapping + gbwtChar = 0; count = false; +#ifndef NDEBUG + if(zOffs.size() > 0) { + assert_gt(si, zOffs.back()); + } +#endif + zOffs.push_back(si); // remember GBWT row that corresponds to the 0th suffix + } else { + gbwtChar = asc2dna[gbwtChar]; + assert_lt(gbwtChar, 4); + // Update the fchr + fchr[gbwtChar]++; + } + + assert_lt(F, 2); + assert_lt(M, 2); + if(M == 1) { + assert_neq(F_loc, numeric_limits::max()); + F_loc = gbwt.nextFLocation(); +#ifndef NDEBUG + if(F_loc > 0) { + assert_gt(F_loc, F_locSave); + } +#endif + } + // Suffix array offset boundary? - update offset array + if(M == 1 && (M_occ & gh._offMask) == M_occ) { + assert_lt((M_occ >> gh._offRate), gh._offsLen); + // Write offsets directly to the secondary output + // stream, thereby avoiding keeping them in memory + writeIndex(out2, pos, this->toBe()); + } + } else { + // Strayed off the end of the SA, now we're just + // padding out a bucket +#ifndef NDEBUG + if(inSA) { + // Assert that we wrote all the characters in the + // string before now + assert_eq(si, gbwtLen); + inSA = false; + } +#endif + // 'A' used for padding; important that padding be + // counted in the occ[] array + gbwtChar = 0; + F = M = 0; + } + if(count) occ[gbwtChar]++; + if(M) M_occ++; + // Append BWT char to bwt section of current side + if(fw) { + // Forward bucket: fill from least to most +#ifdef SIXTY4_FORMAT + gfmSide[sideCur] |= ((uint64_t)gbwtChar << (bpi << 1)); + if(gbwtChar > 0) assert_gt(gfmSide[sideCur], 0); + // To be implemented ... + assert(false); + cerr << "Not implemented" << endl; + exit(1); +#else + pack_2b_in_8b(gbwtChar, gfmSide[sideCur], bpi); + assert_eq((gfmSide[sideCur] >> (bpi*2)) & 3, gbwtChar); + + int F_sideCur = (gh._sideGbwtSz + sideCur) >> 1; + int F_bpi = bpi + ((sideCur & 0x1) << 2); // Can be used as M_bpi as well + pack_1b_in_8b(F, gfmSide[F_sideCur], F_bpi); + assert_eq((gfmSide[F_sideCur] >> F_bpi) & 1, F); + + int M_sideCur = F_sideCur + (gh._sideGbwtSz >> 2); + pack_1b_in_8b(M, gfmSide[M_sideCur], F_bpi); + assert_eq((gfmSide[M_sideCur] >> F_bpi) & 1, M); +#endif + } else { + // Backward bucket: fill from most to least +#ifdef SIXTY4_FORMAT + gfmSide[sideCur] |= ((uint64_t)gbwtChar << ((31 - bpi) << 1)); + if(gbwtChar > 0) assert_gt(gfmSide[sideCur], 0); + // To be implemented ... + assert(false); + cerr << "Not implemented" << endl; + exit(1); +#else + pack_2b_in_8b(gbwtChar, gfmSide[sideCur], 3-bpi); + assert_eq((gfmSide[sideCur] >> ((3-bpi)*2)) & 3, gbwtChar); + // To be implemented ... + assert(false); + cerr << "Not implemented" << endl; + exit(1); +#endif + } + } // end loop over bit-pairs + assert_eq(0, (occ[0] + occ[1] + occ[2] + occ[3] + zOffs.size()) & 3); +#ifdef SIXTY4_FORMAT + assert_eq(0, si & 31); +#else + assert_eq(0, si & 3); +#endif + + sideCur++; + if((sideCur << 1) == (int)gh._sideGbwtSz) { + sideCur = 0; + index_t *uside = reinterpret_cast(gfmSide.ptr()); + // Write 'A', 'C', 'G', 'T', and '1' in M tallies + side += sideSz; + assert_leq(side, gh._gbwtTotSz); + uside[(sideSz / sizeof(index_t))-6] = endianizeIndex(F_locSave, this->toBe()); + uside[(sideSz / sizeof(index_t))-5] = endianizeIndex(M_occSave, this->toBe()); + uside[(sideSz / sizeof(index_t))-4] = endianizeIndex(occSave[0], this->toBe()); + uside[(sideSz / sizeof(index_t))-3] = endianizeIndex(occSave[1], this->toBe()); + uside[(sideSz / sizeof(index_t))-2] = endianizeIndex(occSave[2], this->toBe()); + uside[(sideSz / sizeof(index_t))-1] = endianizeIndex(occSave[3], this->toBe()); + F_locSave = F_loc; + M_occSave = M_occ; + occSave[0] = occ[0]; + occSave[1] = occ[1]; + occSave[2] = occ[2]; + occSave[3] = occ[3]; + // Write backward side to primary file + out1.write((const char *)gfmSide.ptr(), sideSz); + + // + memcpy(((char*)_gfm.get()) + side - sideSz, (const char *)gfmSide.ptr(), sideSz); + } + } + VMSG_NL("Exited GFM loop"); + // Assert that our loop counter got incremented right to the end + assert_eq(side, gh._gbwtTotSz); + // Assert that we wrote the expected amount to out1 + assert_eq(((index_t)out1.tellp() - beforeGbwtOff), gh._gbwtTotSz); + // assert that the last thing we did was write a forward bucket + + // + // Write zOffs to primary stream + // + assert_gt(zOffs.size(), 0); + writeIndex(out1, (index_t)zOffs.size(), this->toBe()); + for(size_t i = 0; i < zOffs.size(); i++) { + writeIndex(out1, zOffs[i], this->toBe()); + } + + // + // Finish building fchr + // + // Exclusive prefix sum on fchr + for(int i = 1; i < 4; i++) { + fchr[i] += fchr[i-1]; + } + assert_lt(fchr[3], gbwtLen); + // Shift everybody up by one + for(int i = 4; i >= 1; i--) { + fchr[i] = fchr[i-1]; + } + fchr[0] = 0; + if(_verbose) { + for(int i = 0; i < 5; i++) + cerr << "fchr[" << "ACGT$"[i] << "]: " << fchr[i] << endl; + } + // Write fchr to primary file + for(int i = 0; i < 5; i++) { + writeIndex(out1, fchr[i], this->toBe()); + } + _fchr.init(new index_t[5], 5, true); + memcpy(_fchr.get(), fchr, sizeof(index_t) * 5); + + + // Initialize _zGbwtByteOffs and _zGbwtBpOffs + _zOffs = zOffs; + postReadInit(gh); + + // Build ftab and eftab + EList > tFtab; + tFtab.resizeExact(ftabLen - 1); + for(index_t i = 0; i + 1 < ftabLen; i++) { + index_t q = i; + pair range(0, gh._gbwtLen); + SideLocus tloc, bloc; + SideLocus::initFromTopBot(range.first, range.second, gh, gfm(), tloc, bloc); + index_t j = 0; + for(; j < (index_t)gh._ftabChars; j++) { + int nt = q & 0x3; q >>= 2; + if(bloc.valid()) { + range = mapGLF(tloc, bloc, nt); + } else { + range = mapGLF1(range.first, tloc, nt); + } + if(range.first == (index_t)INDEX_MAX || range.first >= range.second) { + break; + } + if(range.first + 1 == range.second) { + tloc.initFromRow(range.first, gh, gfm()); + bloc.invalidate(); + } else { + SideLocus::initFromTopBot(range.first, range.second, gh, gfm(), tloc, bloc); + } + } + + if(range.first >= range.second || j < (index_t)gh._ftabChars) { + if(i == 0) { + tFtab[i].first = tFtab[i].second = 0; + } else { + tFtab[i].first = tFtab[i].second = tFtab[i-1].second; + } + } else { + tFtab[i].first = range.first; + tFtab[i].second = range.second; + } + +#ifndef NDEBUG + if(gbwt.ftab.size() > i) { + assert_eq(tFtab[i].first, gbwt.ftab[i].first); + assert_eq(tFtab[i].second, gbwt.ftab[i].second); + } +#endif + } + + // Clear memory + _gfm.reset(); + _fchr.reset(); + _zOffs.clear(); + _zGbwtByteOffs.clear(); + _zGbwtBpOffs.clear(); + + // + // Finish building ftab and build eftab + // + // Prefix sum on ftable + index_t eftabLen = 0; + for(index_t i = 1; i + 1 < ftabLen; i++) { + if(tFtab[i-1].second != tFtab[i].first) { + eftabLen += 2; + } + } + + if(gh._gbwtLen + (eftabLen >> 1) < gh._gbwtLen) { + cerr << "Too many eftab entries: " + << gh._gbwtLen << " + " << (eftabLen >> 1) + << " > " << (index_t)INDEX_MAX << endl; + throw 1; + } + + EList eftab(EBWT_CAT); + try { + eftab.resize(eftabLen); + eftab.fillZero(); + } catch(bad_alloc &e) { + cerr << "Out of memory allocating eftab[] " + << "in GFM::buildToDisk() at " << __FILE__ << ":" + << __LINE__ << endl; + throw e; + } + index_t eftabCur = 0; + ftab[0] = tFtab[0].first; + ftab[1] = tFtab[0].second; + for(index_t i = 1; i + 1 < ftabLen; i++) { + if(ftab[i] != tFtab[i].first) { + index_t lo = ftab[i]; + index_t hi = tFtab[i].first; + assert_lt(eftabCur*2+1, eftabLen); + eftab[eftabCur*2] = lo; + eftab[eftabCur*2+1] = hi; + // one node can be shared, and one node can have at most four incoming edges + assert_leq(lo, hi + 4); + ftab[i] = (eftabCur++) ^ (index_t)INDEX_MAX; // insert pointer into eftab + assert_eq(lo, GFM::ftabLo(ftab.ptr(), eftab.ptr(), gbwtLen, ftabLen, eftabLen, i)); + assert_eq(hi, GFM::ftabHi(ftab.ptr(), eftab.ptr(), gbwtLen, ftabLen, eftabLen, i)); + } + ftab[i+1] = tFtab[i].second; + } +#ifndef NDEBUG + for(index_t i = 0; i + 1 < ftabLen; i++ ){ + assert_eq(tFtab[i].first, GFM::ftabHi(ftab.ptr(), eftab.ptr(), gbwtLen, ftabLen, eftabLen, i)); + assert_eq(tFtab[i].second, GFM::ftabLo(ftab.ptr(), eftab.ptr(), gbwtLen, ftabLen, eftabLen, i+1)); + } +#endif + // Write ftab to primary file + for(index_t i = 0; i < ftabLen; i++) { + writeIndex(out1, ftab[i], this->toBe()); + } + // Write eftab to primary file + out1pos = out1.tellp(); + if(headerPos < 0) { + out1.seekp(24 + sizeof(index_t) * 3); + } else { + out1.seekp((int)headerPos + 16 + sizeof(index_t) * 2); + } + writeIndex(out1, eftabLen, this->toBe()); + out1.seekp(out1pos); + for(index_t i = 0; i < eftabLen; i++) { + writeIndex(out1, eftab[i], this->toBe()); + } + // Note: if you'd like to sanity-check the Ebwt, you'll have to + // read it back into memory first! + assert(!isInMemory()); + VMSG_NL("Exiting GFM::buildToDisk()"); +} + +/** + * Build an Ebwt from a string 's' and its suffix array 'sa' (which + * might actually be a suffix array *builder* that builds blocks of the + * array on demand). The bulk of the Ebwt, i.e. the ebwt and offs + * arrays, is written directly to disk. This is by design: keeping + * those arrays in memory needlessly increases the footprint of the + * building process. Instead, we prefer to build the Ebwt directly + * "to disk" and then read it back into memory later as necessary. + * + * It is assumed that the header values and join-related values (nPat, + * plen) have already been written to 'out1' before this function + * is called. When this function is finished, it will have + * additionally written ebwt, zOff, fchr, ftab and eftab to the primary + * file and offs to the secondary file. + * + * Assume DNA/RNA/any alphabet with 4 or fewer elements. + * Assume occ array entries are 32 bits each. + * + * @param sa the suffix array to convert to a Ebwt + * @param s the original string + * @param out + */ +template +template +void GFM::buildToDisk( + InorderBlockwiseSA& sa, + const TStr& s, + ostream& out1, + ostream& out2, + streampos headerPos) +{ + const GFMParams& gh = this->_gh; + assert(gh.repOk()); + assert(gh.linearFM()); + assert_lt(s.length(), gh.gbwtLen()); + assert_eq(s.length(), gh._len); + assert_gt(gh._lineRate, 3); + + index_t len = gh._len; + index_t gbwtLen = gh._gbwtLen; + assert_eq(len + 1, gbwtLen); + streampos out1pos = out1.tellp(); + if(headerPos < 0) { + out1.seekp(8 + sizeof(index_t)); + } else { + out1.seekp(headerPos); + } + writeIndex(out1, gbwtLen, this->toBe()); + writeIndex(out1, gh._numNodes, this->toBe()); + out1.seekp(out1pos); + + index_t ftabLen = gh._ftabLen; + index_t sideSz = gh._sideSz; + index_t gbwtTotSz = gh._gbwtTotSz; + index_t fchr[] = {0, 0, 0, 0, 0}; + EList ftab(EBWT_CAT); + EList zOffs; + + // Save # of occurrences of each character as we walk along the bwt + index_t occ[4] = {0, 0, 0, 0}; + index_t occSave[4] = {0, 0, 0, 0}; + + // Record rows that should "absorb" adjacent rows in the ftab. + // The absorbed rows represent suffixes shorter than the ftabChars + // cutoff. + uint8_t absorbCnt = 0; + EList absorbFtab(EBWT_CAT); + try { + VMSG_NL("Allocating ftab, absorbFtab"); + ftab.resize(ftabLen); + ftab.fillZero(); + absorbFtab.resize(ftabLen); + absorbFtab.fillZero(); + } catch(bad_alloc &e) { + cerr << "Out of memory allocating ftab[] or absorbFtab[] " + << "in GFM::buildToDisk() at " << __FILE__ << ":" + << __LINE__ << endl; + throw e; + } + + // Allocate the side buffer; holds a single side as its being + // constructed and then written to disk. Reused across all sides. +#ifdef SIXTY4_FORMAT + EList gfmSide(EBWT_CAT); +#else + EList gfmSide(EBWT_CAT); +#endif + try { +#ifdef SIXTY4_FORMAT + gfmSide.resize(sideSz >> 3); +#else + gfmSide.resize(sideSz); +#endif + } catch(bad_alloc &e) { + cerr << "Out of memory allocating gfmSide[] in " + << "GFM::buildToDisk() at " << __FILE__ << ":" + << __LINE__ << endl; + throw e; + } + + // Points to the base offset within ebwt for the side currently + // being written + index_t side = 0; + + // Whether we're assembling a forward or a reverse bucket + bool fw = true; + int sideCur = 0; + + // Have we skipped the '$' in the last column yet? + ASSERT_ONLY(bool dollarSkipped = false); + + index_t si = 0; // string offset (chars) + ASSERT_ONLY(index_t lastSufInt = 0); + ASSERT_ONLY(bool inSA = true); // true iff saI still points inside suffix + // array (as opposed to the padding at the + // end) + // Iterate over packed bwt bytes + VMSG_NL("Entering GFM loop"); + ASSERT_ONLY(index_t beforeGbwtOff = (index_t)out1.tellp()); + while(side < gbwtTotSz) { + // Sanity-check our cursor into the side buffer + assert_geq(sideCur, 0); + assert_lt(sideCur, (int)gh._sideGbwtSz); + assert_eq(0, side % sideSz); // 'side' must be on side boundary + gfmSide[sideCur] = 0; // clear + assert_lt(side + sideCur, gbwtTotSz); + // Iterate over bit-pairs in the si'th character of the BWT +#ifdef SIXTY4_FORMAT + for(int bpi = 0; bpi < 32; bpi++, si++) +#else + for(int bpi = 0; bpi < 4; bpi++, si++) +#endif + { + int bwtChar; + bool count = true; + if(si <= len) { + // Still in the SA; extract the bwtChar + index_t saElt = sa.nextSuffix(); + // (that might have triggered sa to calc next suf block) + if(saElt == 0) { + // Don't add the '$' in the last column to the BWT + // transform; we can't encode a $ (only A C T or G) + // and counting it as, say, an A, will mess up the + // LR mapping + bwtChar = 0; count = false; + ASSERT_ONLY(dollarSkipped = true); + zOffs.push_back(si); // remember the SA row that + // corresponds to the 0th suffix + } else { + bwtChar = (int)(s[saElt-1]); + assert_lt(bwtChar, 4); + // Update the fchr + fchr[bwtChar]++; + } + // Update ftab + if((len-saElt) >= (index_t)gh._ftabChars) { + // Turn the first ftabChars characters of the + // suffix into an integer index into ftab. The + // leftmost (lowest index) character of the suffix + // goes in the most significant bit pair if the + // integer. + index_t sufInt = 0; + for(int i = 0; i < gh._ftabChars; i++) { + sufInt <<= 2; + assert_lt((index_t)i, len-saElt); + sufInt |= (unsigned char)(s[saElt+i]); + } + // Assert that this prefix-of-suffix is greater + // than or equal to the last one (true b/c the + // suffix array is sorted) +#ifndef NDEBUG + if(lastSufInt > 0) assert_geq(sufInt, lastSufInt); + lastSufInt = sufInt; +#endif + // Update ftab + assert_lt(sufInt+1, ftabLen); + ftab[sufInt+1]++; + if(absorbCnt > 0) { + // Absorb all short suffixes since the last + // transition into this transition + absorbFtab[sufInt] = absorbCnt; + absorbCnt = 0; + } + } else { + // Otherwise if suffix is fewer than ftabChars + // characters long, then add it to the 'absorbCnt'; + // it will be absorbed into the next transition + assert_lt(absorbCnt, 255); + absorbCnt++; + } + // Suffix array offset boundary? - update offset array + if((si & gh._offMask) == si) { + assert_lt((si >> gh._offRate), gh._offsLen); + // Write offsets directly to the secondary output + // stream, thereby avoiding keeping them in memory + writeIndex(out2, saElt, this->toBe()); + } + } else { + // Strayed off the end of the SA, now we're just + // padding out a bucket +#ifndef NDEBUG + if(inSA) { + // Assert that we wrote all the characters in the + // string before now + assert_eq(si, len+1); + inSA = false; + } +#endif + // 'A' used for padding; important that padding be + // counted in the occ[] array + bwtChar = 0; + } + if(count) occ[bwtChar]++; + // Append BWT char to bwt section of current side + if(fw) { + // Forward bucket: fill from least to most +#ifdef SIXTY4_FORMAT + ebwtSide[sideCur] |= ((uint64_t)bwtChar << (bpi << 1)); + if(bwtChar > 0) assert_gt(ebwtSide[sideCur], 0); +#else + pack_2b_in_8b(bwtChar, gfmSide[sideCur], bpi); + assert_eq((gfmSide[sideCur] >> (bpi*2)) & 3, bwtChar); +#endif + } else { + // Backward bucket: fill from most to least +#ifdef SIXTY4_FORMAT + ebwtSide[sideCur] |= ((uint64_t)bwtChar << ((31 - bpi) << 1)); + if(bwtChar > 0) assert_gt(ebwtSide[sideCur], 0); +#else + pack_2b_in_8b(bwtChar, gfmSide[sideCur], 3-bpi); + assert_eq((gfmSide[sideCur] >> ((3-bpi)*2)) & 3, bwtChar); +#endif + } + } // end loop over bit-pairs + assert_eq(dollarSkipped ? 3 : 0, (occ[0] + occ[1] + occ[2] + occ[3]) & 3); +#ifdef SIXTY4_FORMAT + assert_eq(0, si & 31); +#else + assert_eq(0, si & 3); +#endif + + sideCur++; + if(sideCur == (int)gh._sideGbwtSz) { + sideCur = 0; + index_t *uside = reinterpret_cast(gfmSide.ptr()); + // Write 'A', 'C', 'G', 'T', and '1' in M tallies + side += sideSz; + assert_leq(side, gh._gbwtTotSz); + uside[(sideSz / sizeof(index_t))-4] = endianizeIndex(occSave[0], this->toBe()); + uside[(sideSz / sizeof(index_t))-3] = endianizeIndex(occSave[1], this->toBe()); + uside[(sideSz / sizeof(index_t))-2] = endianizeIndex(occSave[2], this->toBe()); + uside[(sideSz / sizeof(index_t))-1] = endianizeIndex(occSave[3], this->toBe()); + occSave[0] = occ[0]; + occSave[1] = occ[1]; + occSave[2] = occ[2]; + occSave[3] = occ[3]; + // Write backward side to primary file + out1.write((const char *)gfmSide.ptr(), sideSz); + } + } + VMSG_NL("Exited GFM loop"); + if(absorbCnt > 0) { + // Absorb any trailing, as-yet-unabsorbed short suffixes into + // the last element of ftab + absorbFtab[ftabLen-1] = absorbCnt; + } + + // Assert that our loop counter got incremented right to the end + assert_eq(side, gh._gbwtTotSz); + // Assert that we wrote the expected amount to out1 + assert_eq(((index_t)out1.tellp() - beforeGbwtOff), gh._gbwtTotSz); + // assert that the last thing we did was write a forward bucket + + // + // Write zOffs to primary stream + // + assert_eq(zOffs.size(), 1); + writeIndex(out1, (index_t)zOffs.size(), this->toBe()); + for(size_t i = 0; i < zOffs.size(); i++) { + assert_neq(zOffs[i], (index_t)OFF_MASK); + writeIndex(out1, zOffs[i], this->toBe()); + } + + // + // Finish building fchr + // + // Exclusive prefix sum on fchr + for(int i = 1; i < 4; i++) { + fchr[i] += fchr[i-1]; + } + assert_lt(fchr[3], gbwtLen); + // Shift everybody up by one + for(int i = 4; i >= 1; i--) { + fchr[i] = fchr[i-1]; + } + fchr[0] = 0; + if(_verbose) { + for(int i = 0; i < 5; i++) + cerr << "fchr[" << "ACGT$"[i] << "]: " << fchr[i] << endl; + } + // Write fchr to primary file + for(int i = 0; i < 5; i++) { + writeIndex(out1, fchr[i], this->toBe()); + } + + // + // Finish building ftab and build eftab + // + // Prefix sum on ftable + index_t eftabLen = 0; + assert_eq(0, absorbFtab[0]); + for(index_t i = 1; i < ftabLen; i++) { + if(absorbFtab[i] > 0) eftabLen += 2; + } + assert_leq(eftabLen, (index_t)gh._ftabChars*2); + eftabLen = gh._ftabChars*2; + EList eftab(EBWT_CAT); + try { + eftab.resize(eftabLen); + eftab.fillZero(); + } catch(bad_alloc &e) { + cerr << "Out of memory allocating eftab[] " + << "in GFM::buildToDisk() at " << __FILE__ << ":" + << __LINE__ << endl; + throw e; + } + index_t eftabCur = 0; + for(index_t i = 1; i < ftabLen; i++) { + index_t lo = ftab[i] + GFM::ftabHi(ftab.ptr(), eftab.ptr(), len, ftabLen, eftabLen, i-1); + if(absorbFtab[i] > 0) { + // Skip a number of short pattern indicated by absorbFtab[i] + index_t hi = lo + absorbFtab[i]; + assert_lt(eftabCur*2+1, eftabLen); + eftab[eftabCur*2] = lo; + eftab[eftabCur*2+1] = hi; + ftab[i] = (eftabCur++) ^ (index_t)OFF_MASK; // insert pointer into eftab + assert_eq(lo, GFM::ftabLo(ftab.ptr(), eftab.ptr(), len, ftabLen, eftabLen, i)); + assert_eq(hi, GFM::ftabHi(ftab.ptr(), eftab.ptr(), len, ftabLen, eftabLen, i)); + } else { + ftab[i] = lo; + } + } + assert_eq(GFM::ftabHi(ftab.ptr(), eftab.ptr(), len, ftabLen, eftabLen, ftabLen-1), len+1); + // Write ftab to primary file + for(index_t i = 0; i < ftabLen; i++) { + writeIndex(out1, ftab[i], this->toBe()); + } + // Write eftab to primary file + out1pos = out1.tellp(); + if(headerPos < 0) { + out1.seekp(24 + sizeof(index_t) * 3); + } else { + out1.seekp((int)headerPos + 16 + sizeof(index_t) * 2); + } + writeIndex(out1, eftabLen, this->toBe()); + out1.seekp(out1pos); + for(index_t i = 0; i < eftabLen; i++) { + writeIndex(out1, eftab[i], this->toBe()); + } + + // Note: if you'd like to sanity-check the Ebwt, you'll have to + // read it back into memory first! + assert(!isInMemory()); + VMSG_NL("Exiting GFM::buildToDisk()"); +} + +extern string gLastIOErrMsg; + +/* Checks whether a call to read() failed or not. */ +inline bool is_read_err(int fdesc, ssize_t ret, size_t count) { + if (ret < 0) { + std::stringstream sstm; + sstm << "ERRNO: " << errno << " ERR Msg:" << strerror(errno) << std::endl; + gLastIOErrMsg = sstm.str(); + return true; + } + return false; +} + +/* Checks whether a call to fread() failed or not. */ +inline bool is_fread_err(FILE* file_hd, size_t ret, size_t count) { + if (ferror(file_hd)) { + gLastIOErrMsg = "Error Reading File!"; + return true; + } + return false; +} + + +/////////////////////////////////////////////////////////////////////// +// +// Functions for searching Ebwts +// (But most of them are defined in the header) +// +/////////////////////////////////////////////////////////////////////// + +/** + * Take an offset into the joined text and translate it into the + * reference of the index it falls on, the offset into the reference, + * and the length of the reference. Use a binary search through the + * sorted list of reference fragment ranges t + */ +template +bool GFM::joinedToTextOff( + index_t qlen, + index_t off, + index_t& tidx, + index_t& textoff, + index_t& tlen, + bool rejectStraddle, + bool& straddled) const +{ + assert(rstarts() != NULL); // must have loaded rstarts + index_t top = 0; + index_t bot = _nFrag; // 1 greater than largest addressable element + index_t elt = (index_t)INDEX_MAX; + // Begin binary search + while(true) { + index_t oldelt = elt; + elt = top + ((bot - top) >> 1); + if(oldelt == elt) { + tidx = (index_t)INDEX_MAX; + return false; + } + index_t lower = rstarts()[elt*3]; + index_t upper; + if(elt == _nFrag-1) { + upper = _gh._len; + } else { + upper = rstarts()[((elt+1)*3)]; + } + assert_gt(upper, lower); + index_t fraglen = upper - lower; + if(lower <= off) { + if(upper > off) { // not last element, but it's within + // off is in this range; check if it falls off + if(off + qlen > upper) { + straddled = true; + if(rejectStraddle) { + // it falls off; signal no-go and return + tidx = (index_t)INDEX_MAX; + return false; + } + } + // This is the correct text idx whether the index is + // forward or reverse + tidx = rstarts()[(elt*3)+1]; + assert_lt(tidx, this->_nPat); + assert_leq(fraglen, this->plen()[tidx]); + // it doesn't fall off; now calculate textoff. + // Initially it's the number of characters that precede + // the alignment in the fragment + index_t fragoff = off - rstarts()[(elt*3)]; + if(!this->fw_) { + fragoff = fraglen - fragoff - 1; + fragoff -= (qlen-1); + } + // Add the alignment's offset into the fragment + // ('fragoff') to the fragment's offset within the text + textoff = fragoff + rstarts()[(elt*3)+2]; + assert_lt(textoff, this->plen()[tidx]); + break; // done with binary search + } else { + // 'off' belongs somewhere in the region between elt + // and bot + top = elt; + } + } else { + // 'off' belongs somewhere in the region between top and + // elt + bot = elt; + } + // continue with binary search + } + tlen = this->plen()[tidx]; + return true; +} + +template +bool GFM::textOffToJoined( + index_t tid, + index_t textoff, + index_t& off) const +{ + assert(rstarts() != NULL); // must have loaded rstarts + index_t top = 0; + index_t bot = _nFrag; // 1 greater than largest addressable element + index_t elt = (index_t)INDEX_MAX; + // Begin binary search + while(true) { + ASSERT_ONLY(index_t oldelt = elt); + elt = top + ((bot - top) >> 1); + assert_neq(oldelt, elt); // must have made progress + index_t elt_tid = rstarts()[elt*3 + 1]; + if(elt_tid == tid) { + while(true) { + if(tid != rstarts()[elt*3+1]) { + return false; + } + if(rstarts()[elt*3 + 2] <= textoff) break; + if(elt == 0) return false; + elt--; + } + while(true) { + assert_leq(rstarts()[elt*3+2], textoff); + if(elt + 1 == _nFrag || + tid + 1 == rstarts()[(elt+1)*3 + 1] || + textoff < rstarts()[(elt+1)*3 + 2]) { + off = rstarts()[elt*3] + (textoff - rstarts()[elt*3 + 2]); + if(elt + 1 < _nFrag && + tid == rstarts()[(elt+1)*3 + 1] && + off >= rstarts()[(elt+1)*3]) { + return false; + } + break; + } + elt++; + } + break; // done with binary search + } else if(elt_tid < tid) { + top = elt; + } else { + bot = elt; + } + // continue with binary search + } + return true; +} + +/** + * Walk 'steps' steps to the left and return the row arrived at. If we + * walk through the dollar sign, return 0xffffffff. + */ +template +index_t GFM::walkLeft(index_t row, index_t steps) const { + assert(offs() != NULL); + assert_neq((index_t)INDEX_MAX, row); + SideLocus l; + if(steps > 0) l.initFromRow(row, _gh, gfm()); + while(steps > 0) { + for(index_t i = 0; i < _zOffs.size(); i++) { + if(row == _zOffs[i]) return (index_t)INDEX_MAX; + } + pair range = this->mapGLF1(row, l, (pair *)NULL ASSERT_ONLY(, false)); + index_t newrow = range.first; + assert_neq((index_t)INDEX_MAX, newrow); + assert_neq(newrow, row); + row = newrow; + steps--; + if(steps > 0) l.initFromRow(row, _gh, gfm()); + } + return row; +} + +/** + * Resolve the reference offset of the BW element 'elt'. + */ +template +index_t GFM::getOffset(index_t row, index_t node) const { + assert(offs() != NULL); + assert_neq((index_t)INDEX_MAX, row); + for(index_t i = 0; i < _zOffs.size(); i++) { + if(row == _zOffs[i]) return 0; + } + if((node & _gh._offMask) == node) { + index_t off = this->offs()[node >> _gh._offRate]; + if(off != (index_t)INDEX_MAX) + return off; + } + index_t jumps = 0; + SideLocus l; + l.initFromRow(row, _gh, gfm()); + while(true) { + pair node_range(0, 0); + pair range = this->mapGLF1(row, l, &node_range ASSERT_ONLY(, false)); + index_t newrow = range.first; + jumps++; + assert_neq((index_t)INDEX_MAX, newrow); + assert_neq(newrow, row); + row = newrow; + for(index_t i = 0; i < _zOffs.size(); i++) { + if(row == _zOffs[i]) return jumps; + } + + if((node_range.first & _gh._offMask) == node_range.first) { + index_t off = this->offs()[node_range.first >> _gh._offRate]; + if(off != (index_t)INDEX_MAX) + return jumps + off; + } + l.initFromRow(row, _gh, gfm()); + } +} + +/** + * Resolve the reference offset of the BW element 'elt' such that + * the offset returned is at the right-hand side of the forward + * reference substring involved in the hit. + */ +template +index_t GFM::getOffset( + index_t elt, + bool fw, + index_t hitlen) const +{ + index_t off = getOffset(elt); + assert_neq((index_t)INDEX_MAX, off); + if(!fw) { + assert_lt(off, _gh._len); + off = _gh._len - off - 1; + assert_geq(off, hitlen-1); + off -= (hitlen-1); + assert_lt(off, _gh._len); + } + return off; +} + +/** + * Returns true iff the index contains the given string (exactly). The given + * string must contain only unambiguous characters. TODO: support ambiguous + * characters in 'str'. + */ +template +bool GFM::contains( + const BTDnaString& str, + index_t *otop, + index_t *obot) const +{ + assert(isInMemory()); + SideLocus tloc, bloc; + if(str.empty()) { + if(otop != NULL && obot != NULL) *otop = *obot = 0; + return true; + } + int c = str[str.length()-1]; + assert_range(0, 4, c); + index_t top = 0, bot = 0; + if(c < 4) { + top = fchr()[c]; + bot = fchr()[c+1]; + } else { + bool set = false; + for(int i = 0; i < 4; i++) { + if(fchr()[c] < fchr()[c+1]) { + if(set) { + return false; + } else { + set = true; + top = fchr()[c]; + bot = fchr()[c+1]; + } + } + } + } + assert_geq(bot, top); + tloc.initFromRow(top, gh(), gfm()); + bloc.initFromRow(bot, gh(), gfm()); + ASSERT_ONLY(index_t lastDiff = bot - top); + for(int64_t i = (int64_t)str.length()-2; i >= 0; i--) { + c = str[i]; + assert_range(0, 4, c); + if(c <= 3) { + top = mapLF(tloc, c); + bot = mapLF(bloc, c); + } else { + index_t sz = bot - top; + int c1 = mapLF1(top, tloc ASSERT_ONLY(, false)); + bot = mapLF(bloc, c1); + assert_leq(bot - top, sz); + if(bot - top < sz) { + // Encountered an N and could not proceed through it because + // there was more than one possible nucleotide we could replace + // it with + return false; + } + } + assert_geq(bot, top); + assert_leq(bot-top, lastDiff); + ASSERT_ONLY(lastDiff = bot-top); + if(i > 0) { + tloc.initFromRow(top, gh(), gfm()); + bloc.initFromRow(bot, gh(), gfm()); + } + } + if(otop != NULL && obot != NULL) { + *otop = top; *obot = bot; + } + return bot > top; +} + +/////////////////////////////////////////////////////////////////////// +// +// Functions for reading and writing Ebwts +// +/////////////////////////////////////////////////////////////////////// + +/** + * Read an Ebwt from file with given filename. + */ +template +void GFM::readIntoMemory( + int needEntireRev, + bool loadSASamp, + bool loadFtab, + bool loadRstarts, + bool justHeader, + GFMParams *params, + bool mmSweep, + bool loadNames, + bool startVerbose, + bool subIndex) +{ + bool switchEndian; // dummy; caller doesn't care +#ifdef BOWTIE_MM + char *mmFile[] = { NULL, NULL }; +#endif + if(_in1Str.length() > 0 && !subIndex) { + if(_verbose || startVerbose) { + cerr << " About to open input files: "; + logTime(cerr); + } + // Initialize our primary and secondary input-stream fields + if(_in1 != NULL) fclose(_in1); + if(_verbose || startVerbose) cerr << "Opening \"" << _in1Str.c_str() << "\"" << endl; + if((_in1 = fopen(_in1Str.c_str(), "rb")) == NULL) { + cerr << "Could not open index file " << _in1Str.c_str() << endl; + } + if(loadSASamp) { + if(_in2 != NULL) fclose(_in2); + if(_verbose || startVerbose) cerr << "Opening \"" << _in2Str.c_str() << "\"" << endl; + if((_in2 = fopen(_in2Str.c_str(), "rb")) == NULL) { + cerr << "Could not open index file " << _in2Str.c_str() << endl; + } + } + if(_verbose || startVerbose) { + cerr << " Finished opening input files: "; + logTime(cerr); + } + +#ifdef BOWTIE_MM + if(_useMm /*&& !justHeader*/) { + const char *names[] = {_in1Str.c_str(), _in2Str.c_str()}; + int fds[] = { fileno(_in1), fileno(_in2) }; + for(int i = 0; i < (loadSASamp ? 2 : 1); i++) { + if(_verbose || startVerbose) { + cerr << " Memory-mapping input file " << (i+1) << ": "; + logTime(cerr); + } + struct stat sbuf; + if (stat(names[i], &sbuf) == -1) { + perror("stat"); + cerr << "Error: Could not stat index file " << names[i] << " prior to memory-mapping" << endl; + throw 1; + } + mmFile[i] = (char*)mmap((void *)0, (size_t)sbuf.st_size, + PROT_READ, MAP_SHARED, fds[(size_t)i], 0); + if(mmFile[i] == (void *)(-1)) { + perror("mmap"); + cerr << "Error: Could not memory-map the index file " << names[i] << endl; + throw 1; + } + if(mmSweep) { + int sum = 0; + for(off_t j = 0; j < sbuf.st_size; j += 1024) { + sum += (int) mmFile[i][j]; + } + if(startVerbose) { + cerr << " Swept the memory-mapped ebwt index file 1; checksum: " << sum << ": "; + logTime(cerr); + } + } + } + mmFile1_ = mmFile[0]; + mmFile2_ = loadSASamp ? mmFile[1] : NULL; + } +#endif + } +#ifdef BOWTIE_MM + else if(_useMm && !justHeader) { + mmFile[0] = mmFile1_; + mmFile[1] = mmFile2_; + } + if(_useMm && !justHeader) { + assert(mmFile[0] == mmFile1_); + assert(mmFile[1] == mmFile2_); + } +#endif + + if(_verbose || startVerbose) { + cerr << " Reading header: "; + logTime(cerr); + } + + // Read endianness hints from both streams + size_t bytesRead = 0; + if(!subIndex) { + switchEndian = false; + uint32_t one = readU32(_in1, switchEndian); // 1st word of primary stream + bytesRead += 4; + if(loadSASamp) { +#ifndef NDEBUG + assert_eq(one, readU32(_in2, switchEndian)); // should match! +#else + readU32(_in2, switchEndian); +#endif + } + if(one != 1) { + assert_eq((1u<<24), one); + assert_eq(1, endianSwapU32(one)); + switchEndian = true; + } + + _toBigEndian = switchEndian; + + // Can't switch endianness and use memory-mapped files; in order to + // support this, someone has to modify the file to switch + // endiannesses appropriately, and we can't do this inside Bowtie + // or we might be setting up a race condition with other processes. + if(switchEndian && _useMm) { + cerr << "Error: Can't use memory-mapped files when the index is the opposite endianness" << endl; + throw 1; + } + + // Reads header entries one by one from primary stream + int index_version = (int)readU32(_in1, switchEndian); bytesRead += 4; + int major_index_version, minor_index_version; + string index_version_extra; + readIndexVersion(index_version, major_index_version, minor_index_version, index_version_extra); + int major_program_version, minor_program_version; + string program_version_extra; + readProgramVersion(major_program_version, minor_program_version, program_version_extra); + if(major_program_version < major_index_version || + (major_program_version == major_index_version && minor_program_version < minor_index_version)) { + cerr << "Warning: the current version of HISAT2 (" << HISAT2_VERSION << ") is older than the version (2." + << major_index_version << "." << minor_index_version; + if(index_version_extra.length() > 0) { + cerr << "-" << index_version_extra; + } + cerr << ") used to build the index." << endl; + cerr << " Users are strongly recommended to update HISAT2 to the latest version." << endl; + } + } else { + switchEndian = _toBigEndian; + } + + index_t len = readIndex(_in1, switchEndian); + bytesRead += sizeof(index_t); + index_t gbwtLen = readIndex(_in1, switchEndian); + bytesRead += sizeof(index_t); + assert_lt(len, gbwtLen); + index_t numNodes = readIndex(_in1, switchEndian); + bytesRead += sizeof(index_t); + int32_t lineRate = readI32(_in1, switchEndian); + bytesRead += 4; + /*int32_t linesPerSide =*/ readI32(_in1, switchEndian); + bytesRead += 4; + int32_t offRate = readI32(_in1, switchEndian); + bytesRead += 4; + // TODO: add isaRate to the actual file format (right now, the + // user has to tell us whether there's an ISA sample and what the + // sampling rate is. + int32_t ftabChars = readI32(_in1, switchEndian); + bytesRead += 4; + index_t eftabLen = readIndex(_in1, switchEndian); + bytesRead += sizeof(index_t); + // chunkRate was deprecated in an earlier version of Bowtie; now + // we use it to hold flags. + int32_t flags = readI32(_in1, switchEndian); + bool entireRev = false; + if(flags < 0 && (((-flags) & GFM_ENTIRE_REV) == 0)) { + if(needEntireRev != -1 && needEntireRev != 0) { + cerr << "Error: This index is compatible with 0.* versions of Bowtie, but not with 2.*" << endl + << "versions. Please build or download a version of the index that is compitble" << endl + << "with Bowtie 2.* (i.e. built with bowtie-build 2.* or later)" << endl; + throw 1; + } + } else entireRev = true; + bytesRead += 4; + + // Create a new EbwtParams from the entries read from primary stream + GFMParams *gh; + bool deleteGh = false; + if(params != NULL) { + params->init(len, gbwtLen, numNodes, lineRate, offRate, ftabChars, eftabLen, entireRev); + if(_verbose || startVerbose) params->print(cerr); + gh = params; + } else { + gh = new GFMParams(len, gbwtLen, numNodes, lineRate, offRate, ftabChars, eftabLen, entireRev); + deleteGh = true; + } + + // Set up overridden suffix-array-sample parameters + index_t offsLen = gh->_offsLen; + index_t offRateDiff = 0; + index_t offsLenSampled = offsLen; + if(_overrideOffRate > offRate) { + offRateDiff = _overrideOffRate - offRate; + } + if(offRateDiff > 0) { + offsLenSampled >>= offRateDiff; + if((offsLen & ~(((index_t)INDEX_MAX) << offRateDiff)) != 0) { + offsLenSampled++; + } + } + + // Can't override the offrate or isarate and use memory-mapped + // files; ultimately, all processes need to copy the sparser sample + // into their own memory spaces. +#if 0 + if(_useMm && (offRateDiff)) { + cerr << "Error: Can't use memory-mapped files when the offrate is overridden" << endl; + throw 1; + } +#endif + + // Read nPat from primary stream + this->_nPat = readIndex(_in1, switchEndian); + bytesRead += sizeof(index_t); + _plen.reset(); + // Read plen from primary stream + if(_useMm) { +#ifdef BOWTIE_MM + _plen.init((index_t*)(mmFile[0] + bytesRead), _nPat, false); + bytesRead += _nPat*sizeof(index_t); + fseek(_in1, _nPat*sizeof(index_t), SEEK_CUR); +#endif + } else { + try { + if(_verbose || startVerbose) { + cerr << "Reading plen (" << this->_nPat << "): "; + logTime(cerr); + } + _plen.init(new index_t[_nPat], _nPat, true); + if(switchEndian) { + for(index_t i = 0; i < this->_nPat; i++) { + plen()[i] = readIndex(_in1, switchEndian); + } + } else { + size_t r = MM_READ(_in1, (void*)(plen()), _nPat*sizeof(index_t)); + if(r != (size_t)(_nPat*sizeof(index_t))) { + cerr << "Error reading _plen[] array: " << r << ", " << _nPat*sizeof(index_t) << endl; + throw 1; + } + } + } catch(bad_alloc& e) { + cerr << "Out of memory allocating plen[] in Ebwt::read()" + << " at " << __FILE__ << ":" << __LINE__ << endl; + throw e; + } + } + + // TODO: I'm not consistent on what "header" means. Here I'm using + // "header" to mean everything that would exist in memory if we + // started to build the Ebwt but stopped short of the build*() step + // (i.e. everything up to and including join()). + if(justHeader) { + // Be kind + if(deleteGh) delete gh; +#ifdef BOWTIE_MM + fseek(_in1, 0, SEEK_SET); + if(loadSASamp) fseek(_in2, 0, SEEK_SET); +#else + rewind(_in1); + if(loadSASamp) rewind(_in2); +#endif + return; + } + + bool shmemLeader; + + this->_nFrag = readIndex(_in1, switchEndian); + bytesRead += sizeof(index_t); + if(_verbose || startVerbose) { + cerr << "Reading rstarts (" << this->_nFrag*3 << "): "; + logTime(cerr); + } + assert_geq(this->_nFrag, this->_nPat); + _rstarts.reset(); + if(loadRstarts) { + if(_useMm) { +#ifdef BOWTIE_MM + _rstarts.init((index_t*)(mmFile[0] + bytesRead), _nFrag*3, false); + bytesRead += this->_nFrag*sizeof(index_t)*3; + fseek(_in1, this->_nFrag*sizeof(index_t)*3, SEEK_CUR); +#endif + } else { + _rstarts.init(new index_t[_nFrag*3], _nFrag*3, true); + if(switchEndian) { + for(size_t i = 0; i < (size_t)(this->_nFrag*3); i += 3) { + // fragment starting position in joined reference + // string, text id, and fragment offset within text + this->rstarts()[i] = readIndex(_in1, switchEndian); + this->rstarts()[i+1] = readIndex(_in1, switchEndian); + this->rstarts()[i+2] = readIndex(_in1, switchEndian); + } + } else { + size_t r = MM_READ(_in1, (void *)rstarts(), this->_nFrag*sizeof(index_t)*3); + if(r != (size_t)(this->_nFrag*sizeof(index_t)*3)) { + cerr << "Error reading _rstarts[] array: " << r << ", " << (this->_nFrag*sizeof(index_t)*3) << endl; + throw 1; + } + } + } + } else { + // Skip em + assert(rstarts() == NULL); + bytesRead += this->_nFrag*sizeof(index_t)*3; + fseek(_in1, this->_nFrag*sizeof(index_t)*3, SEEK_CUR); + } + + _gfm.reset(); + if(_useMm) { +#ifdef BOWTIE_MM + _gfm.init((uint8_t*)(mmFile[0] + bytesRead), gh->_gbwtTotLen, false); + bytesRead += gh->_gbwtTotLen; + fseek(_in1, gh->_gbwtTotLen, SEEK_CUR); +#endif + } else { + // Allocate ebwt (big allocation) + if(_verbose || startVerbose) { + cerr << "Reading ebwt (" << gh->_gbwtTotLen << "): "; + logTime(cerr); + } + bool shmemLeader = true; + if(useShmem_) { + uint8_t *tmp = NULL; + shmemLeader = ALLOC_SHARED_U8( + (_in1Str + "[ebwt]"), gh->_gbwtTotLen, &tmp, + "gfm[]", (_verbose || startVerbose)); + assert(tmp != NULL); + _gfm.init(tmp, gh->_gbwtTotLen, false); + if(_verbose || startVerbose) { + cerr << " shared-mem " << (shmemLeader ? "leader" : "follower") << endl; + } + } else { + try { + _gfm.init(new uint8_t[gh->_gbwtTotLen], gh->_gbwtTotLen, true); + } catch(bad_alloc& e) { + cerr << "Out of memory allocating the gfm[] array for the Bowtie index. Please try" << endl + << "again on a computer with more memory." << endl; + throw 1; + } + } + if(shmemLeader) { + // Read ebwt from primary stream + uint64_t bytesLeft = gh->_gbwtTotLen; + char *pgbwt = (char*)this->gfm(); + while (bytesLeft>0){ + size_t r = MM_READ(this->_in1, (void *)pgbwt, bytesLeft); + if(MM_IS_IO_ERR(this->_in1, r, bytesLeft)) { + cerr << "Error reading _ebwt[] array: " << r << ", " + << bytesLeft << endl; + throw 1; + } + pgbwt += r; + bytesLeft -= r; + } + if(switchEndian) { + uint8_t *side = this->gfm(); + for(size_t i = 0; i < gh->_numSides; i++) { + index_t *cums = reinterpret_cast(side + gh->_sideSz - sizeof(index_t)*2); + cums[0] = endianSwapIndex(cums[0]); + cums[1] = endianSwapIndex(cums[1]); + side += this->_gh._sideSz; + } + } +#ifdef BOWTIE_SHARED_MEM + if(useShmem_) NOTIFY_SHARED(gfm(), gh->_gbwtTotLen); +#endif + } else { + // Seek past the data and wait until master is finished + fseek(_in1, gh->_gbwtTotLen, SEEK_CUR); +#ifdef BOWTIE_SHARED_MEM + if(useShmem_) WAIT_SHARED(gfm(), gh->_gbwtTotLen); +#endif + } + } + + // Read zOff from primary stream + _zOffs.clear(); + index_t num_zOffs = readIndex(_in1, switchEndian); + bytesRead += sizeof(index_t); + for(index_t i = 0; i < num_zOffs; i++) { + index_t zOff = readIndex(_in1, switchEndian); + bytesRead += sizeof(index_t); + assert_lt(zOff, gbwtLen); + _zOffs.push_back(zOff); + } + + try { + // Read fchr from primary stream + if(_verbose || startVerbose) cerr << "Reading fchr (5)" << endl; + _fchr.reset(); + if(_useMm) { +#ifdef BOWTIE_MM + _fchr.init((index_t*)(mmFile[0] + bytesRead), 5, false); + bytesRead += 5*sizeof(index_t); + fseek(_in1, 5*sizeof(index_t), SEEK_CUR); +#endif + } else { + _fchr.init(new index_t[5], 5, true); + for(int i = 0; i < 5; i++) { + this->fchr()[i] = readIndex(_in1, switchEndian); + assert_leq(this->fchr()[i], gbwtLen); + assert(i <= 0 || this->fchr()[i] >= this->fchr()[i-1]); + } + } + assert_gt(this->fchr()[4], this->fchr()[0]); + // Read ftab from primary stream + if(_verbose || startVerbose) { + if(loadFtab) { + cerr << "Reading ftab (" << gh->_ftabLen << "): "; + logTime(cerr); + } else { + cerr << "Skipping ftab (" << gh->_ftabLen << "): "; + } + } + _ftab.reset(); + if(loadFtab) { + if(_useMm) { +#ifdef BOWTIE_MM + _ftab.init((index_t*)(mmFile[0] + bytesRead), gh->_ftabLen, false); + bytesRead += gh->_ftabLen*sizeof(index_t); + fseek(_in1, gh->_ftabLen*sizeof(index_t), SEEK_CUR); +#endif + } else { + _ftab.init(new index_t[gh->_ftabLen], gh->_ftabLen, true); + if(switchEndian) { + for(size_t i = 0; i < gh->_ftabLen; i++) + this->ftab()[i] = readIndex(_in1, switchEndian); + } else { + size_t r = MM_READ(_in1, (void *)ftab(), gh->_ftabLen*sizeof(index_t)); + if(r != (size_t)(gh->_ftabLen*sizeof(index_t))) { + cerr << "Error reading _ftab[] array: " << r << ", " << (gh->_ftabLen*sizeof(index_t)) << endl; + throw 1; + } + } + } + // Read etab from primary stream + if(_verbose || startVerbose) { + if(loadFtab) { + cerr << "Reading eftab (" << gh->_eftabLen << "): "; + logTime(cerr); + } else { + cerr << "Skipping eftab (" << gh->_eftabLen << "): "; + } + + } + _eftab.reset(); + if(_useMm) { +#ifdef BOWTIE_MM + _eftab.init((index_t*)(mmFile[0] + bytesRead), gh->_eftabLen, false); + bytesRead += gh->_eftabLen*sizeof(index_t); + fseek(_in1, gh->_eftabLen*sizeof(index_t), SEEK_CUR); +#endif + } else { + _eftab.init(new index_t[gh->_eftabLen], gh->_eftabLen, true); + if(switchEndian) { + for(size_t i = 0; i < gh->_eftabLen; i++) + this->eftab()[i] = readIndex(_in1, switchEndian); + } else { + size_t r = MM_READ(_in1, (void *)this->eftab(), gh->_eftabLen*sizeof(index_t)); + if(r != (size_t)(gh->_eftabLen*sizeof(index_t))) { + cerr << "Error reading _eftab[] array: " << r << ", " << (gh->_eftabLen*sizeof(index_t)) << endl; + throw 1; + } + } + } + for(index_t i = 0; i < gh->_eftabLen; i++) { + if(i > 0 && this->eftab()[i] > 0) { + assert_geq(this->eftab()[i] + 4, this->eftab()[i-1]); + } else if(i > 0 && this->eftab()[i-1] == 0) { + assert_eq(0, this->eftab()[i]); + } + } + } else { + assert(ftab() == NULL); + assert(eftab() == NULL); + // Skip ftab + bytesRead += gh->_ftabLen*sizeof(index_t); + fseek(_in1, gh->_ftabLen*sizeof(index_t), SEEK_CUR); + // Skip eftab + bytesRead += sizeof(index_t); + bytesRead += gh->_eftabLen*sizeof(index_t); + fseek(_in1, gh->_eftabLen*sizeof(index_t), SEEK_CUR); + } + } catch(bad_alloc& e) { + cerr << "Out of memory allocating fchr[], ftab[] or eftab[] arrays for the Bowtie index." << endl + << "Please try again on a computer with more memory." << endl; + throw 1; + } + + // Read reference sequence names from primary index file (or not, + // if --refidx is specified) + if(loadNames) { + while(true) { + char c = '\0'; + if(MM_READ(_in1, (void *)(&c), (size_t)1) != (size_t)1) break; + bytesRead++; + if(c == '\0') break; + else if(c == '\n') { + this->_refnames.push_back(""); + } else { + if(this->_refnames.size() == 0) { + this->_refnames.push_back(""); + } + this->_refnames.back().push_back(c); + } + } + } + + _offs.reset(); + if(loadSASamp) { + bytesRead = 4; // reset for secondary index file (already read 1-sentinel) + + shmemLeader = true; + if(_verbose || startVerbose) { + cerr << "Reading offs (" << offsLenSampled << " " << std::setw(2) << sizeof(index_t)*8 << "-bit words): "; + logTime(cerr); + } + + if(!_useMm) { + if(!useShmem_) { + // Allocate offs_ + try { + _offs.init(new index_t[offsLenSampled], offsLenSampled, true); + } catch(bad_alloc& e) { + cerr << "Out of memory allocating the offs[] array for the Bowtie index." << endl + << "Please try again on a computer with more memory." << endl; + throw 1; + } + } else { + index_t *tmp = NULL; + shmemLeader = ALLOC_SHARED_U32( + (_in2Str + "[offs]"), offsLenSampled*sizeof(index_t), &tmp, + "offs", (_verbose || startVerbose)); + _offs.init((index_t*)tmp, offsLenSampled, false); + } + } + + if(_overrideOffRate < 32) { + if(shmemLeader) { + // Allocate offs (big allocation) + if(switchEndian || offRateDiff > 0) { + assert(!_useMm); + const index_t blockMaxSz = (index_t)(2 * 1024 * 1024); // 2 MB block size + const index_t blockMaxSzU = (blockMaxSz / sizeof(index_t)); // # U32s per block + char *buf; + try { + buf = new char[blockMaxSz]; + } catch(std::bad_alloc& e) { + cerr << "Error: Out of memory allocating part of _offs array: '" << e.what() << "'" << endl; + throw e; + } + for(index_t i = 0; i < offsLen; i += blockMaxSzU) { + index_t block = min((index_t)blockMaxSzU, (index_t)(offsLen - i)); + size_t r = MM_READ(_in2, (void *)buf, block * sizeof(index_t)); + if(r != (size_t)(block * sizeof(index_t))) { + cerr << "Error reading block of _offs[] array: " << r << ", " << (block * sizeof(index_t)) << endl; + throw 1; + } + index_t idx = i >> offRateDiff; + for(index_t j = 0; j < block; j += (1 << offRateDiff)) { + assert_lt(idx, offsLenSampled); + this->offs()[idx] = ((index_t*)buf)[j]; + if(switchEndian) { + this->offs()[idx] = endianSwapIndex(this->offs()[idx]); + } + idx++; + } + } + delete[] buf; + } else { + if(_useMm) { +#ifdef BOWTIE_MM + _offs.init((index_t*)(mmFile[1] + bytesRead), offsLen, false); + bytesRead += (offsLen * sizeof(index_t)); + fseek(_in2, (offsLen * sizeof(index_t)), SEEK_CUR); +#endif + } else { + // Workaround for small-index mode where MM_READ may + // not be able to handle read amounts greater than 2^32 + // bytes. + uint64_t bytesLeft = (offsLen * sizeof(index_t)); + char *offs = (char *)this->offs(); + + while(bytesLeft > 0) { + size_t r = MM_READ(_in2, (void*)offs, bytesLeft); + if(MM_IS_IO_ERR(_in2,r,bytesLeft)) { + cerr << "Error reading block of _offs[] array: " + << r << ", " << bytesLeft << gLastIOErrMsg << endl; + throw 1; + } + offs += r; + bytesLeft -= r; + } + } + } +#ifdef BOWTIE_SHARED_MEM + if(useShmem_) NOTIFY_SHARED(offs(), offsLenSampled*sizeof(index_t)); +#endif + } else { + // Not the shmem leader + fseek(_in2, offsLenSampled*sizeof(index_t), SEEK_CUR); +#ifdef BOWTIE_SHARED_MEM + if(useShmem_) WAIT_SHARED(offs(), offsLenSampled*sizeof(index_t)); +#endif + } + } + } + + this->postReadInit(*gh); // Initialize fields of Ebwt not read from file + if(_verbose || startVerbose) print(cerr, *gh); + + // The fact that _ebwt and friends actually point to something + // (other than NULL) now signals to other member functions that the + // Ebwt is loaded into memory. + + // Be kind + if(deleteGh) delete gh; + + if(!subIndex) { +#ifdef BOWTIE_MM + fseek(_in1, 0, SEEK_SET); + if(loadSASamp) fseek(_in2, 0, SEEK_SET); +#else + rewind(_in1); + if(loadSASamp) rewind(_in2); +#endif + } +} + +/** + * Read reference names from an input stream 'in' for an Ebwt primary + * file and store them in 'refnames'. + */ +template +void readGFMRefnames(istream& in, EList& refnames) { + // _in1 must already be open with the get cursor at the + // beginning and no error flags set. + assert(in.good()); + assert_eq((streamoff)in.tellg(), ios::beg); + + // Read endianness hints from both streams + bool switchEndian = false; + uint32_t one = readU32(in, switchEndian); // 1st word of primary stream + if(one != 1) { + assert_eq((1u<<24), one); + switchEndian = true; + } + + // Reads header entries one by one from primary stream + readU32(in, switchEndian); // version + index_t len = readIndex(in, switchEndian); + index_t gbwtLen = readIndex(in, switchEndian); + index_t numNodes = readIndex(in, switchEndian); + int32_t lineRate = readI32(in, switchEndian); + /*int32_t linesPerSide =*/ readI32(in, switchEndian); + int32_t offRate = readI32(in, switchEndian); + int32_t ftabChars = readI32(in, switchEndian); + index_t eftabLen = readIndex(in, switchEndian); + // BTL: chunkRate is now deprecated + int32_t flags = readI32(in, switchEndian); + bool entireReverse = false; + if(flags < 0) { + entireReverse = (((-flags) & GFM_ENTIRE_REV) != 0); + } + + // Create a new EbwtParams from the entries read from primary stream + GFM gh(len, gbwtLen, numNodes, lineRate, offRate, ftabChars, eftabLen, entireReverse); + + index_t nPat = readIndex(in, switchEndian); // nPat + in.seekg(nPat*sizeof(index_t), ios_base::cur); // skip plen + + // Skip rstarts + index_t nFrag = readIndex(in, switchEndian); + in.seekg(nFrag*sizeof(index_t)*3, ios_base::cur); + + // Skip ebwt + in.seekg(gh._ebwtTotLen, ios_base::cur); + + // Skip zOff from primary stream + index_t numZOffs = readIndex(in, switchEndian); + in.seekg(numZOffs * sizeof(index_t), ios_base::cur); + + // Skip fchr + in.seekg(5 * sizeof(index_t), ios_base::cur); + + // Skip ftab + in.seekg(gh._ftabLen*sizeof(index_t), ios_base::cur); + + // Skip eftab + in.seekg(gh._eftabLen*sizeof(index_t), ios_base::cur); + + // Read reference sequence names from primary index file + while(true) { + char c = '\0'; + in.read(&c, 1); + if(in.eof()) break; + if(c == '\0') break; + else if(c == '\n') { + refnames.push_back(""); + } else { + if(refnames.size() == 0) { + refnames.push_back(""); + } + refnames.back().push_back(c); + } + } + if(refnames.back().empty()) { + refnames.pop_back(); + } + + // Be kind + in.clear(); in.seekg(0, ios::beg); + assert(in.good()); +} + +/** + * Read reference names from the index with basename 'in' and store + * them in 'refnames'. + */ +template +void readGFMRefnames(const string& instr, EList& refnames) { + ifstream in; + // Initialize our primary and secondary input-stream fields + in.open((instr + ".1." + gfm_ext).c_str(), ios_base::in | ios::binary); + if(!in.is_open()) { + throw GFMFileOpenException("Cannot open file " + instr); + } + assert(in.is_open()); + assert(in.good()); + assert_eq((streamoff)in.tellg(), ios::beg); + readGFMRefnames(in, refnames); +} + +/** + * Read just enough of the Ebwt's header to get its flags + */ +template +int32_t GFM::readVersionFlags(const string& instr, int& major, int& minor, string& extra_version) { + ifstream in; + // Initialize our primary and secondary input-stream fields + in.open((instr + ".1." + gfm_ext).c_str(), ios_base::in | ios::binary); + if(!in.is_open()) { + throw GFMFileOpenException("Cannot open file " + instr); + } + assert(in.is_open()); + assert(in.good()); + bool switchEndian = false; + uint32_t one = readU32(in, switchEndian); // 1st word of primary stream + if(one != 1) { + assert_eq((1u<<24), one); + assert_eq(1, endianSwapU32(one)); + switchEndian = true; + } + index_t version = readU32(in, switchEndian); + readIndexVersion(version, major, minor, extra_version); + + readIndex(in, switchEndian); + readIndex(in, switchEndian); + readIndex(in, switchEndian); + readI32(in, switchEndian); + readI32(in, switchEndian); + readI32(in, switchEndian); + readI32(in, switchEndian); + readIndex(in, switchEndian); + int32_t flags = readI32(in, switchEndian); + return flags; +} + + +/** + * Write an extended Burrows-Wheeler transform to a pair of output + * streams. + * + * @param out1 output stream to primary file + * @param out2 output stream to secondary file + * @param be write in big endian? + */ +template +void GFM::writeFromMemory(bool justHeader, + ostream& out1, + ostream& out2) const +{ + const GFMParams& gh = this->_gh; + assert(gh.repOk()); + uint32_t be = this->toBe(); + assert(out1.good()); + assert(out2.good()); + + // When building an Ebwt, these header parameters are known + // "up-front", i.e., they can be written to disk immediately, + // before we join() or buildToDisk() + writeI32(out1, 1, be); // endian hint for priamry stream + writeI32(out2, 1, be); // endian hint for secondary stream + int version = getIndexVersion(); + writeI32(out1, version, be); // version + writeIndex(out1, gh._len, be); // length of string (and bwt and suffix array) + writeIndex(out1, 0, be); // dummy for gbwt len + writeIndex(out1, 0, be); // dummy for number of nodes + writeI32(out1, gh._lineRate, be); // 2^lineRate = size in bytes of 1 line + writeI32(out1, 2, be); // not used + writeI32(out1, gh._offRate, be); // every 2^offRate chars is "marked" + writeI32(out1, gh._ftabChars, be); // number of 2-bit chars used to address ftab + writeIndex(out1, 0, be); // eftab length + int32_t flags = 1; + if(gh._entireReverse) flags |= GFM_ENTIRE_REV; + writeI32(out1, -flags, be); // BTL: chunkRate is now deprecated + + if(!justHeader) { + assert(rstarts() != NULL); + assert(offs() != NULL); + assert(ftab() != NULL); + assert(eftab() != NULL); + assert(isInMemory()); + // These Ebwt parameters are known after the inputs strings have + // been joined() but before they have been built(). These can + // written to the disk next and then discarded from memory. + writeIndex(out1, this->_nPat, be); + for(index_t i = 0; i < this->_nPat; i++) + writeIndex(out1, this->plen()[i], be); + assert_geq(this->_nFrag, this->_nPat); + writeIndex(out1, this->_nFrag, be); + for(size_t i = 0; i < this->_nFrag*3; i++) + writeIndex(out1, this->rstarts()[i], be); + + // These Ebwt parameters are discovered only as the Ebwt is being + // built (in buildToDisk()). Of these, only 'offs' and 'ebwt' are + // terribly large. 'ebwt' is written to the primary file and then + // discarded from memory as it is built; 'offs' is similarly + // written to the secondary file and discarded. + writeIndex(out1, gh._gbwtTotLen, be); + out1.write((const char *)this->gfm(), gh._gbwtTotLen); + writeIndex(out1, (index_t)_zOffs.size(), be); + for(index_t i = 0; i < _zOffs.size(); i++) + writeIndex(out1, _zOffs[i], be); + index_t offsLen = gh._offsLen; + for(index_t i = 0; i < offsLen; i++) + writeIndex(out2, this->offs()[i], be); + + // 'fchr', 'ftab' and 'eftab' are not fully determined until the + // loop is finished, so they are written to the primary file after + // all of 'ebwt' has already been written and only then discarded + // from memory. + for(int i = 0; i < 5; i++) + writeIndex(out1, this->fchr()[i], be); + for(index_t i = 0; i < gh._ftabLen; i++) + writeIndex(out1, this->ftab()[i], be); + for(index_t i = 0; i < gh._eftabLen; i++) + writeIndex(out1, this->eftab()[i], be); + } +} + +/** + * Given a pair of strings representing output filenames, and assuming + * this Ebwt object is currently in memory, write out this Ebwt to the + * specified files. + * + * If sanity-checking is enabled, then once the streams have been + * fully written and closed, we reopen them and read them into a + * (hopefully) exact copy of this Ebwt. We then assert that the + * current Ebwt and the copy match in all of their fields. + */ +template +void GFM::writeFromMemory(bool justHeader, + const string& out1, + const string& out2) const +{ + ASSERT_ONLY(const GFMParams& gh = this->_gh); + assert(isInMemory()); + assert(gh.repOk()); + + ofstream fout1(out1.c_str(), ios::binary); + ofstream fout2(out2.c_str(), ios::binary); + writeFromMemory(justHeader, fout1, fout2); + fout1.close(); + fout2.close(); + + // Read the file back in and assert that all components match + if(_sanity) { +#if 0 + if(_verbose) + cout << "Re-reading \"" << out1 << "\"/\"" << out2 << "\" for sanity check" << endl; + Ebwt copy(out1, out2, _verbose, _sanity); + assert(!isInMemory()); + copy.loadIntoMemory(eh._color ? 1 : 0, true, false, false); + assert(isInMemory()); + assert_eq(eh._lineRate, copy.eh()._lineRate); + assert_eq(eh._offRate, copy.eh()._offRate); + assert_eq(eh._ftabChars, copy.eh()._ftabChars); + assert_eq(eh._len, copy.eh()._len); + assert_eq(_zOff, copy.zOff()); + assert_eq(_zEbwtBpOff, copy.zEbwtBpOff()); + assert_eq(_zEbwtByteOff, copy.zEbwtByteOff()); + assert_eq(_nPat, copy.nPat()); + for(index_t i = 0; i < _nPat; i++) + assert_eq(this->_plen[i], copy.plen()[i]); + assert_eq(this->_nFrag, copy.nFrag()); + for(size_t i = 0; i < this->nFrag*3; i++) { + assert_eq(this->_rstarts[i], copy.rstarts()[i]); + } + for(index_t i = 0; i < 5; i++) + assert_eq(this->_fchr[i], copy.fchr()[i]); + for(size_t i = 0; i < eh._ftabLen; i++) + assert_eq(this->ftab()[i], copy.ftab()[i]); + for(size_t i = 0; i < eh._eftabLen; i++) + assert_eq(this->eftab()[i], copy.eftab()[i]); + for(index_t i = 0; i < eh._offsLen; i++) + assert_eq(this->_offs[i], copy.offs()[i]); + for(index_t i = 0; i < eh._ebwtTotLen; i++) + assert_eq(this->ebwt()[i], copy.ebwt()[i]); + copy.sanityCheckAll(); + if(_verbose) + cout << "Read-in check passed for \"" << out1 << "\"/\"" << out2 << "\"" << endl; +#endif + } +} + +/** + * Write the rstarts array given the szs array for the reference. + */ +template +void GFM::szsToDisk(const EList& szs, ostream& os, int reverse) { + size_t seq = 0; + index_t off = 0; + index_t totlen = 0; + for(size_t i = 0; i < szs.size(); i++) { + if(szs[i].len == 0) continue; + if(szs[i].first) off = 0; + off += szs[i].off; + if(szs[i].first && szs[i].len > 0) seq++; + index_t seqm1 = (index_t)(seq-1); + assert_lt(seqm1, _nPat); + index_t fwoff = off; + if(reverse == REF_READ_REVERSE) { + // Invert pattern idxs + seqm1 = _nPat - seqm1 - 1; + // Invert pattern idxs + assert_leq(off + szs[i].len, plen()[seqm1]); + fwoff = plen()[seqm1] - (off + szs[i].len); + } + writeIndex(os, totlen, this->toBe()); // offset from beginning of joined string + writeIndex(os, (index_t)seqm1, this->toBe()); // sequence id + writeIndex(os, (index_t)fwoff, this->toBe()); // offset into sequence + totlen += szs[i].len; + off += szs[i].len; + } +} + + +/////////////////////////////////////////////////////////////////////// +// +// Functions for printing and sanity-checking Ebwts +// +/////////////////////////////////////////////////////////////////////// + +/** + * Check that the ebwt array is internally consistent up to (and not + * including) the given side index by re-counting the chars and + * comparing against the embedded occ[] arrays. + */ +template +void GFM::sanityCheckUpToSide(int upToSide) const { + assert(isInMemory()); + index_t occ[] = {0, 0, 0, 0}; + ASSERT_ONLY(index_t occ_save[] = {0, 0, 0, 0}); + index_t cur = 0; // byte pointer + const GFMParams& gh = this->_gh; + bool fw = false; + while(cur < (upToSide * gh._sideSz)) { + assert_leq(cur + gh._sideSz, gh._gbwtTotLen); + for(index_t i = 0; i < gh._sideGbwtSz; i++) { + uint8_t by = this->gfm()[cur + (fw ? i : gh._sideGbwtSz-i-1)]; + for(int j = 0; j < 4; j++) { + // Unpack from lowest to highest bit pair + int twoBit = unpack_2b_from_8b(by, fw ? j : 3-j); + occ[twoBit]++; + } + assert_eq(0, (occ[0] + occ[1] + occ[2] + occ[3]) % 4); + } + assert_eq(0, (occ[0] + occ[1] + occ[2] + occ[3]) % gh._sideGbwtLen); + // Finished forward bucket; check saved [A], [C], [G] and [T] + // against the index_ts encoded here + ASSERT_ONLY(const index_t *ugbwt = reinterpret_cast(&gfm()[cur + gh._sideGbwtSz])); + ASSERT_ONLY(index_t as = ugbwt[0]); + ASSERT_ONLY(index_t cs = ugbwt[1]); + ASSERT_ONLY(index_t gs = ugbwt[2]); + ASSERT_ONLY(index_t ts = ugbwt[3]); + assert(as == occ_save[0] || as == occ_save[0]-1); + assert_eq(cs, occ_save[1]); + assert_eq(gs, occ_save[2]); + assert_eq(ts, occ_save[3]); +#ifndef NDEBUG + occ_save[0] = occ[0]; + occ_save[1] = occ[1]; + occ_save[2] = occ[2]; + occ_save[3] = occ[3]; +#endif + cur += gh._sideSz; + } +} + +/** + * Sanity-check various pieces of the Ebwt + */ +template +void GFM::sanityCheckAll(int reverse) const { + const GFMParams& gh = this->_gh; + assert(isInMemory()); + // Check ftab + for(index_t i = 1; i < gh._ftabLen; i++) { + assert_geq(this->ftabHi(i), this->ftabLo(i-1)); + assert_geq(this->ftabLo(i), this->ftabHi(i-1)); + assert_leq(this->ftabHi(i), gh._gbwtLen); + } + assert_eq(this->ftabHi(gh._ftabLen-1), gh._gbwtLen); + + // Check offs + int seenLen = (gh._gbwtLen + 31) >> ((index_t)5); + uint32_t *seen; + try { + seen = new uint32_t[seenLen]; // bitvector marking seen offsets + } catch(bad_alloc& e) { + cerr << "Out of memory allocating seen[] at " << __FILE__ << ":" << __LINE__ << endl; + throw e; + } + memset(seen, 0, 4 * seenLen); + index_t offsLen = gh._offsLen; + for(index_t i = 0; i < offsLen; i++) { + assert_lt(this->offs()[i], gh._gbwtLen); + int w = this->offs()[i] >> 5; + int r = this->offs()[i] & 31; + assert_eq(0, (seen[w] >> r) & 1); // shouldn't have been seen before + seen[w] |= (1 << r); + } + delete[] seen; + + // Check nPat + assert_gt(this->_nPat, 0); + + // Check plen, flen + for(index_t i = 0; i < this->_nPat; i++) { + assert_geq(this->plen()[i], 0); + } + + // Check rstarts + if(this->rstarts() != NULL) { + for(index_t i = 0; i < this->_nFrag-1; i++) { + assert_gt(this->rstarts()[(i+1)*3], this->rstarts()[i*3]); + if(reverse == REF_READ_REVERSE) { + assert(this->rstarts()[(i*3)+1] >= this->rstarts()[((i+1)*3)+1]); + } else { + assert(this->rstarts()[(i*3)+1] <= this->rstarts()[((i+1)*3)+1]); + } + } + } + + // Check ebwt + sanityCheckUpToSide(gh._numSides); + VMSG_NL("Ebwt::sanityCheck passed"); +} + +/** + * Transform this Ebwt into the original string in linear time by using + * the LF mapping to walk backwards starting at the row correpsonding + * to the end of the string. The result is written to s. The Ebwt + * must be in memory. + */ +template +void GFM::restore(SString& s) const { + assert(isInMemory()); + s.resize(this->_gh._len); + index_t jumps = 0; + index_t i = this->_gh._len; // should point to final SA elt (starting with '$') + SideLocus l(i, this->_gh, this->gfm()); + while(true) { + for(index_t j = 0; j < _zOffs.size(); j++) { + if(i == _zOffs[j]) break; + } + assert_lt(jumps, this->_gh._len); + //if(_verbose) cout << "restore: i: " << i << endl; + // Not a marked row; go back a char in the original string + index_t newi = mapLF(l ASSERT_ONLY(, false)); + assert_neq(newi, i); + s[this->_gh._len - jumps - 1] = rowL(l); + i = newi; + l.initFromRow(i, this->_gh, this->gfm()); + jumps++; + } + assert_eq(jumps, this->_gh._len); +} + +/** + * Check that this Ebwt, when restored via restore(), matches up with + * the given array of reference sequences. For sanity checking. + */ +template +void GFM::checkOrigs( + const EList >& os, + bool mirror) const +{ + SString rest; + restore(rest); + index_t restOff = 0; + size_t i = 0, j = 0; + if(mirror) { + // TODO: FIXME + return; + } + while(i < os.size()) { + size_t olen = os[i].length(); + int lastorig = -1; + for(; j < olen; j++) { + size_t joff = j; + if(mirror) joff = olen - j - 1; + if((int)os[i][joff] == 4) { + // Skip over Ns + lastorig = -1; + if(!mirror) { + while(j < olen && (int)os[i][j] == 4) j++; + } else { + while(j < olen && (int)os[i][olen-j-1] == 4) j++; + } + j--; + continue; + } + assert_eq(os[i][joff], rest[restOff]); + lastorig = (int)os[i][joff]; + restOff++; + } + if(j == os[i].length()) { + // Moved to next sequence + i++; + j = 0; + } else { + // Just jumped over a gap + } + } +} + +/** + * Try to find the Bowtie index specified by the user. First try the + * exact path given by the user. Then try the user-provided string + * appended onto the path of the "indexes" subdirectory below this + * executable, then try the provided string appended onto + * "$HISAT2_INDEXES/". + */ +string adjustEbwtBase(const string& cmdline, + const string& ebwtFileBase, + bool verbose = false); + +#endif /*GFM_H_*/ diff --git a/gp.h b/gp.h new file mode 100644 index 0000000..02744fa --- /dev/null +++ b/gp.h @@ -0,0 +1,83 @@ +/* + * Copyright 2016, Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +/* + * gp.h + * + */ + +#ifndef GP_H_ +#define GP_H_ + +#include +#include + +/** + * Encapsulates alignment policy for graph + */ +class GraphPolicy { + +public: + + GraphPolicy() { reset(); } + + GraphPolicy(size_t maxAltsTried, + bool useHaplotype, + bool haplotypeOnly, + bool enableCODIS) + { + init(maxAltsTried, + useHaplotype, + haplotypeOnly, + enableCODIS); + } + + /** + */ + void reset() { + init(0); + } + + /** + */ + void init(size_t maxAltsTried, + bool useHaplotype = false, + bool haplotypeOnly = false, + bool enableCODIS = false) + { + maxAltsTried_ = maxAltsTried; + useHaplotype_ = useHaplotype; + haplotypeOnly_ = haplotypeOnly; + enableCODIS_ = enableCODIS; + } + + size_t maxAltsTried() const { return maxAltsTried_; } + bool useHaplotype() const { return useHaplotype_; } + bool haplotypeOnly() const { return haplotypeOnly_; } + bool enableCODIS() const { return enableCODIS_; } + + +private: + size_t maxAltsTried_; + bool useHaplotype_; + bool haplotypeOnly_; + bool enableCODIS_; +}; + +#endif /*ndef GP_H_*/ diff --git a/group_walk.cpp b/group_walk.cpp new file mode 100644 index 0000000..4abb1de --- /dev/null +++ b/group_walk.cpp @@ -0,0 +1,20 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include "group_walk.h" diff --git a/group_walk.h b/group_walk.h new file mode 100644 index 0000000..2a8c6d2 --- /dev/null +++ b/group_walk.h @@ -0,0 +1,1624 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +/* + * group_walk.h + * + * Classes and routines for walking a set of BW ranges backwards from the edge + * of a seed hit with the goal of resolving the offset of each row in each + * range. Here "offset" means offset into the concatenated string of all + * references. The main class is 'GroupWalk' and an important helper is + * 'GWState'. + * + * For each combination of seed offset and orientation, there is an associated + * QVal. Each QVal describes a (possibly empty) set of suffix array ranges. + * Call these "seed range sets." Each range in the set is "backed" by a range + * of the salist, represented as a PListSlice. Such a range is the origin of a + * walk. + * + * When an offset is resolved, it is entered into the salist via the + * PListSlice. Note that other routines in this same thread might also be + * setting elements of the salist, so routines here should expect that elements + * can go from unresolved to resolved at any time. + * + * What bookkeeping do we have to do as we walk? Before the first step, we + * convert the initial QVal into a list of SATuples; the SATuples are our link + * to the correpsonding ranges in the suffix array. The list of SATuples is + * then converted to a list of GWState objects; these keep track of where we + * are in our walk (e.g. what 'top' and 'bot' are, how many steps have we gone, + * etc) as well as how the elements in the current range correspond to elements + * from the original range. + * + * The user asks the GroupWalk to resolve another offset by calling advance(). + * advance() can be called in various ways: + * + * (a) The user can request that the GroupWalk proceed until a + * *particular* element is resolved, then return that resolved + * element. Other elements may be resolved along the way, but + * those results are buffered and may be dispensed in future calls + * to advance(). + * + * (b) The user can request that the GroupWalk select an as-yet- + * unreported element at random and and proceed until that element + * is resolved and report it. Again, other elements may be + * resolved along the way but they are buffered. + * + * (c) The user can request that the GroupWalk resolve elements in a + * particular BW range (with a particular offset and orientation) + * in an order of its choosing. The GroupWalk in this case + * attempts to resolve as many offsets as possible as quickly as + * possible, and returns them as soon as they're found. The res_ + * buffer is used in this case. + * + * (d) Like (c) but resolving elements at a paritcular offset and + * orientation instead of at a specific BW range. The res_ buffer + * is used in this case, since there's a chance that the + * + * There are simple ways to heuristically reduce the problem size while + * maintaining randomness. For instance, the user put a ceiling on the + * number of elements that we walk from any given seed offset or range. + * We can then trim away random subranges to reduce the size of the + * problem. There is no need for the caller to do this for us. + */ + +#ifndef GROUP_WALK_H_ +#define GROUP_WALK_H_ + +#include +#include +#include "ds.h" +#include "gfm.h" +#include "read.h" +#include "reference.h" +#include "mem_ids.h" + +/** + * Encapsulate an SA range and an associated list of slots where the resolved + * offsets can be placed. + */ +template +class SARangeWithOffs { + +public: + + SARangeWithOffs() { reset(); }; + + SARangeWithOffs( + index_t tf, + index_t bf, + index_t ntf, + index_t nbf, + const EList >& n_iedge_count, + size_t len, + const T& o) { + init(tf, bf, ntf, nbf, n_iedge_count, len, o); + } + + void init( + index_t tf, + index_t bf, + index_t ntf, + index_t nbf, + const EList >& n_iedge_count, + size_t len_, + const T& o) { + topf = tf; + botf = bf; + assert_lt(topf, botf); + node_top = ntf; + node_bot = nbf; + assert_leq(node_bot - node_top, botf - topf); + node_iedge_count = n_iedge_count; + len = len_, + offs = o; + } + + /** + * Reset to uninitialized state. + */ + void reset() { topf = (index_t)INDEX_MAX; } + + /** + * Return true if this is initialized. + */ + bool inited() const { + return topf != (index_t)INDEX_MAX; + } + + /** + * Return the number of times this reference substring occurs in the + * reference, which is also the size of the 'offs' TSlice. + */ + size_t size() const { return offs.size(); } + + index_t topf; // top in GBWT index + index_t botf; + index_t node_top; // top node + index_t node_bot; + EList > node_iedge_count; + size_t len; // length of the reference sequence involved + T offs; // offsets +}; + +/** + * A group of per-thread state that can be shared between all the GroupWalks + * used in that thread. + */ +template +struct GroupWalkState { + + GroupWalkState(int cat) : map(cat) { + masks[0].setCat(cat); + masks[1].setCat(cat); + masks[2].setCat(cat); + masks[3].setCat(cat); + } + + EList masks[4]; // temporary list for masks; used in GWState + EList map; // temporary list of GWState maps +}; + +/** + * Encapsulates counters that encode how much work the walk-left logic + * has done. + */ +struct WalkMetrics { + + WalkMetrics() { + reset(); + } + + /** + * Sum each across this object and 'm'. This is the only safe way + * to update a WalkMetrics shared by many threads. + */ + void merge(const WalkMetrics& m, bool getLock = false) { + ThreadSafe ts(&mutex_m, getLock); + bwops += m.bwops; + branches += m.branches; + resolves += m.resolves; + refresolves += m.refresolves; + reports += m.reports; + } + + /** + * Set all to 0. + */ + void reset() { + bwops = branches = resolves = refresolves = reports = 0; + } + + uint64_t bwops; // Burrows-Wheeler operations + uint64_t branches; // BW range branch-offs + uint64_t resolves; // # offs resolved with BW walk-left + uint64_t refresolves; // # resolutions caused by reference scanning + uint64_t reports; // # offs reported (1 can be reported many times) + MUTEX_T mutex_m; +}; + +/** + * Coordinates for a BW element that the GroupWalk might resolve. + */ +template +struct GWElt { + + GWElt() { reset(); } + + /** + * Reset GWElt to uninitialized state. + */ + void reset() { + offidx = range = elt = len = (index_t)OFF_MASK; + fw = false; + } + + /** + * Initialize this WalkResult. + */ + void init( + index_t oi, + bool f, + index_t r, + index_t e, + index_t l) + { + offidx = oi; + fw = f; + range = r; + elt = e; + len = l; + } + + /** + * Return true iff this GWElt and the given GWElt refer to the same + * element. + */ + bool operator==(const GWElt& o) const { + return offidx == o.offidx && + fw == o.fw && + range == o.range && + elt == o.elt && + len == o.len; + } + + /** + * Return true iff this GWElt and the given GWElt refer to + * different elements. + */ + bool operator!=(const GWElt& o) const { + return !(*this == o); + } + + index_t offidx; // seed offset index + bool fw; // strand + index_t range; // range + index_t elt; // element + index_t len; // length +}; + +/** + * A record encapsulating the result of looking up one BW element in + * the Bowtie index. + */ +template +struct WalkResult { + + WalkResult() { reset(); } + + /** + * Reset GWElt to uninitialized state. + */ + void reset() { + elt.reset(); + bwrow = toff = (index_t)OFF_MASK; + } + + /** + * Initialize this WalkResult. + */ + void init( + index_t oi, // seed offset index + bool f, // strand + index_t r, // range + index_t e, // element + index_t bwr, // BW row + index_t len, // length + index_t to) // text offset + { + elt.init(oi, f, r, e, len); + bwrow = bwr; + toff = to; + } + + GWElt elt; // element resolved + index_t bwrow; // SA row resolved + index_t toff; // resolved offset from SA sample +}; + +/** + * A GW hit encapsulates an SATuple describing a reference substring + * in the cache, along with a bool indicating whether each element of + * the hit has been reported yet. + */ +template +class GWHit { + +public: + GWHit() : + fmap(0, GW_CAT), + offidx((index_t)OFF_MASK), + fw(false), + range((index_t)OFF_MASK), + len((index_t)OFF_MASK), + reported_(0, GW_CAT), + nrep_(0) + { + assert(repOkBasic()); + } + + /** + * Initialize with a new SA range. Resolve the done vector so that + * there's one bool per suffix array element. + */ + void init( + SARangeWithOffs& sa, + index_t oi, + bool f, + index_t r) + { + nrep_ = 0; + offidx = oi; + fw = f; + range = r; + len = (index_t)sa.len; + reported_.resize(sa.offs.size()); + reported_.fill(false); + fmap.resize(sa.offs.size()); + fmap.fill(make_pair((index_t)OFF_MASK, (index_t)OFF_MASK)); + } + + /** + * Clear contents of sat and done. + */ + void reset() { + reported_.clear(); + fmap.clear(); + nrep_ = 0; + offidx = (index_t)OFF_MASK; + fw = false; + range = (index_t)OFF_MASK; + len = (index_t)OFF_MASK; + } + +#ifndef NDEBUG + /** + * Check that GWHit is internally consistent. If a pointer to an + * EList of GWStates is given, we assume that it is the EList + * corresponding to this GWHit and check whether the forward and + * reverse mappings match up for the as-yet-unresolved elements. + */ + bool repOk(const SARangeWithOffs& sa) const { + assert_eq(reported_.size(), sa.offs.size()); + assert_eq(fmap.size(), sa.offs.size()); + // Shouldn't be any repeats among as-yet-unresolveds + size_t nrep = 0; + for(size_t i = 0; i < fmap.size(); i++) { + if(reported_[i]) nrep++; + if(sa.offs[i] != (index_t)OFF_MASK) { + continue; + } + for(size_t j = i+1; j < fmap.size(); j++) { + if(sa.offs[j] != (index_t)OFF_MASK) { + continue; + } + assert(fmap[i] != fmap[j]); + } + } + assert_eq(nrep_, nrep); + return true; + } + + /** + * Return true iff this GWHit is not obviously corrupt. + */ + bool repOkBasic() { + return true; + } +#endif + + /** + * Set the ith element to be reported. + */ + void setReported(index_t i) { + assert(!reported_[i]); + assert_lt(i, reported_.size()); + reported_[i] = true; + nrep_++; + } + + /** + * Return true iff element i has been reported. + */ + bool reported(index_t i) const { + assert_lt(i, reported_.size()); + return reported_[i]; + } + + /** + * Return true iff all elements have been reported. + */ + bool done() const { + assert_leq(nrep_, reported_.size()); + return nrep_ == reported_.size(); + } + + EList, 16> fmap; // forward map; to GWState & elt + index_t offidx; // offset idx + bool fw; // orientation + index_t range; // original range index + index_t len; // length of hit + +protected: + + EList reported_; // per-elt bool indicating whether it's been reported + index_t nrep_; +}; + +/** + * Encapsulates the progress made along a particular path from the original + * range. + */ +template +class GWState { + +public: + + GWState() : map_(0, GW_CAT) { + reset(); assert(repOkBasic()); + } + + /** + * Initialize this GWState with new gfm, top, bot, step, and sat. + * + * We assume map is already set up. + * + * Returns true iff at least one elt was resolved. + */ + template + pair init( + const GFM& gfm, // index to walk left in + const BitPairReference& ref, // bitpair-encoded reference + SARangeWithOffs& sa, // SA range with offsets + EList& sts, // EList of GWStates for range being advanced + GWHit& hit, // Corresponding hit structure + index_t range, // which range is this? + bool reportList, // if true, "report" resolved offsets immediately by adding them to 'res' list + EList, 16>* res, // EList where resolved offsets should be appended + index_t tp, // top of range at this step + index_t bt, // bot of range at this step + index_t n_tp, // node at top + index_t n_bt, // node at bot + const EList >& n_iedge_count, + index_t st, // # steps taken to get to this step + WalkMetrics& met) + { + assert_gt(bt, tp); + assert_gt(n_bt, n_tp); + assert_geq(bt - tp, n_bt - n_tp); + assert_lt(range, sts.size()); + top = tp; + bot = bt; + node_top = n_tp; + node_bot = n_bt; + node_iedge_count = n_iedge_count; + step = st; + assert(!inited_); + ASSERT_ONLY(inited_ = true); + ASSERT_ONLY(lastStep_ = step-1); + return init(gfm, ref, sa, sts, hit, range, reportList, res, met); + } + + /** + * Initialize this GWState. + * + * We assume map is already set up, and that 'step' is equal to the + * number of steps taken to get to the new top/bot pair *currently* + * in the top and bot fields. + * + * Returns a pair of numbers, the first being the number of + * resolved but unreported offsets found during this advance, the + * second being the number of as-yet-unresolved offsets. + */ + template + pair init( + const GFM& gfm, // forward Bowtie index + const BitPairReference& ref, // bitpair-encoded reference + SARangeWithOffs& sa, // SA range with offsets + EList& st, // EList of GWStates for advancing range + GWHit& hit, // Corresponding hit structure + index_t range, // range being inited + bool reportList, // report resolutions, adding to 'res' list? + EList, 16>* res, // EList to append resolutions + WalkMetrics& met) // update these metrics + { + assert(inited_); + assert_eq(step, lastStep_+1); + ASSERT_ONLY(lastStep_++); + assert_leq((index_t)step, gfm.gh().len()); + assert_lt(range, st.size()); + pair ret = make_pair(0, 0); + index_t trimBegin = 0, trimEnd = 0; + bool empty = true; // assume all resolved until proven otherwise + // Commit new information, if any, to the PListSlide. Also, + // trim and check if we're done. + assert_eq(node_bot - node_top, map_.size()); + ASSERT_ONLY(index_t num_orig_iedges = 0, orig_e = 0); + index_t num_iedges = 0, e = 0; + for(size_t i = mapi_; i < map_.size(); i++) { + bool resolved = (off((index_t)i, sa) != (index_t)OFF_MASK); + if(!resolved) { +#ifndef NDEBUG + while(orig_e < sa.node_iedge_count.size()) { + if(map((index_t)i) <= sa.node_iedge_count[orig_e].first) { + break; + } + num_orig_iedges += sa.node_iedge_count[orig_e].second; + orig_e++; + } +#endif + while(e < node_iedge_count.size()) { + if(i <= node_iedge_count[e].first) { + break; + } + num_iedges += node_iedge_count[e].second; + e++; + } + // Elt not resolved yet; try to resolve it now + index_t bwrow = (index_t)(top + i + num_iedges); + index_t node = (index_t)(node_top + i); + index_t toff = gfm.tryOffset(bwrow, node); + ASSERT_ONLY(index_t origBwRow = sa.topf + map((index_t)i) + num_orig_iedges); + ASSERT_ONLY(index_t origNode = sa.node_top + map((index_t)i)); + assert_eq(bwrow, gfm.walkLeft(origBwRow, step)); + if(toff != (index_t)OFF_MASK) { + // Yes, toff was resolvable + assert_eq(toff, gfm.getOffset(bwrow, node)); + met.resolves++; + toff += step; + assert_eq(toff, gfm.getOffset(origBwRow, origNode)); + setOff((index_t)i, toff, sa, met); + if(!reportList) ret.first++; +#if 0 +// used to be #ifndef NDEBUG, but since we no longer require that the reference +// string info be included, this is no longer relevant. + + // Sanity check that the reference characters under this + // hit match the seed characters in hit.satup->key.seq. + // This is NOT a check that we associated the exact right + // text offset with the BW row. This is an important + // distinction because when resolved offsets are filled in + // via refernce scanning, they are not necessarily the + // exact right text offsets to associate with the + // respective BW rows but they WILL all be correct w/r/t + // the reference sequence underneath, which is what really + // matters here. + index_t tidx = (index_t)OFF_MASK, tof, tlen; + bool straddled = false; + gfm.joinedToTextOff( + hit.len, // length of seed + toff, // offset in joined reference string + tidx, // reference sequence id + tof, // offset in reference coordinates + tlen, // length of reference sequence + true, // don't reject straddlers + straddled); + if(tidx != (index_t)OFF_MASK && + hit.satup->key.seq != std::numeric_limits::max()) + { + // key: 2-bit characters packed into a 64-bit word with + // the least significant bitpair corresponding to the + // rightmost character on the Watson reference strand. + uint64_t key = hit.satup->key.seq; + for(int64_t j = tof + hit.len-1; j >= tof; j--) { + // Get next reference base to the left + int c = ref.getBase(tidx, j); + assert_range(0, 3, c); + // Must equal least significant bitpair of key + if(c != (int)(key & 3)) { + // Oops; when we jump to the piece of the + // reference where the seed hit is, it doesn't + // match the seed hit. Before dying, check + // whether we have the right spot in the joined + // reference string + SString jref; + gfm.restore(jref); + uint64_t key2 = hit.satup->key.seq; + for(int64_t k = toff + hit.len-1; k >= toff; k--) { + int c = jref[k]; + assert_range(0, 3, c); + assert_eq(c, (int)(key2 & 3)); + key2 >>= 2; + } + assert(false); + } + key >>= 2; + } + } +#endif + } + } + // Is the element resolved? We ask this regardless of how it was + // resolved (whether this function did it just now, whether it did + // it a while ago, or whether some other function outside GroupWalk + // did it). + if(off((index_t)i, sa) != (index_t)OFF_MASK) { + if(reportList && !hit.reported(map((index_t)i))) { + // Report it + index_t toff = off((index_t)i, sa); + assert(res != NULL); + res->expand(); + index_t origBwRow = sa.topf + map((index_t)i); + res->back().init( + hit.offidx, // offset idx + hit.fw, // orientation + hit.range, // original range index + map((index_t)i), // original element offset + origBwRow, // BW row resolved + hit.len, // hit length + toff); // text offset + hit.setReported(map((index_t)i)); + met.reports++; + } + // Offset resolved + if(empty) { + // Haven't seen a non-empty entry yet, so we + // can trim this from the beginning. + trimBegin++; + } else { + trimEnd++; + } + } else { + // Offset not yet resolved + ret.second++; + trimEnd = 0; + empty = false; + // Set the forward map in the corresponding GWHit + // object to point to the appropriate element of our + // range + assert_geq(i, mapi_); + index_t bmap = map((index_t)i); + hit.fmap[bmap].first = range; + hit.fmap[bmap].second = (index_t)i; +#ifndef NDEBUG + for(size_t j = 0; j < bmap; j++) { + if(sa.offs[j] == (index_t)OFF_MASK && + hit.fmap[j].first == range) + { + assert_neq(i, hit.fmap[j].second); + } + } +#endif + } + } + + // Trim from beginning + assert_geq(trimBegin, 0); + mapi_ += trimBegin; + if(trimBegin > 0) { + top += trimBegin; + index_t e = 0; + for(; e < node_iedge_count.size(); e++) { + if(node_iedge_count[e].first >= trimBegin) break; + assert_geq(top, node_iedge_count[e].second); + top += node_iedge_count[e].second; + } + if(e > 0) node_iedge_count.erase(0, e); + for(e = 0; e < node_iedge_count.size(); e++) { + assert_geq(node_iedge_count[e].first, trimBegin); + node_iedge_count[e].first -= trimBegin; + } + } + + node_top += trimBegin; + if(trimEnd > 0) { + // Trim from end + map_.resize(map_.size() - trimEnd); + bot -= trimEnd; + index_t node_range = node_bot - node_top; + while(node_iedge_count.size() > 0) { + if(node_iedge_count.back().first < (node_range - trimEnd)) break; + assert_geq(bot, node_iedge_count.back().second); + bot -= node_iedge_count.back().second; + node_iedge_count.pop_back(); + } + } + node_bot -= trimEnd; +#ifndef NDEBUG + assert_leq(node_top, node_bot); + index_t num_nodes = node_bot - node_top; + index_t add = 0; + for(index_t e = 0; e < node_iedge_count.size(); e++) { + assert_lt(node_iedge_count[e].first, num_nodes); + add += node_iedge_count[e].second; + } + assert_eq(bot - top, num_nodes + add); + +#endif + if(empty) { + assert(done()); +#ifndef NDEBUG + // If range is done, all elements from map should be + // resolved + for(size_t i = mapi_; i < map_.size(); i++) { + assert_neq((index_t)OFF_MASK, off((index_t)i, sa)); + } + // If this range is done, then it should be the case that + // all elements in the corresponding GWHit that point to + // this range are resolved. + for(size_t i = 0; i < hit.fmap.size(); i++) { + if(sa.offs[i] == (index_t)OFF_MASK) { + assert_neq(range, hit.fmap[i].first); + } + } +#endif + return ret; + } else { + assert(!done()); + } + // Is there a dollar sign in the middle of the range? + tmp_zOffs.clear(); + for(index_t i = 0; i < gfm._zOffs.size(); i++) { +#ifndef NDEBUG + if(i > 0) { + assert_lt(gfm._zOffs[i-1], gfm._zOffs[i]); + } +#endif + assert_neq(top, gfm._zOffs[i]); + // assert_neq(bot-1, gfm._zOffs[i]); + if(gfm._zOffs[i] > top && gfm._zOffs[i] < bot) { + tmp_zOffs.push_back(gfm._zOffs[i]); + } + } + + // Yes, the dollar sign is in the middle of this range. We + // must split it into the two ranges on either side of the + // dollar. Let 'bot' and 'top' delimit the portion of the + // range prior to the dollar. + if(tmp_zOffs.size() > 0) { + tmp_gbwt_to_node.clear(); + index_t n = 0, e = 0; + for(index_t r = 0; r < (bot - top); r++) { + tmp_gbwt_to_node.push_back(n); + if(e < node_iedge_count.size()) { + assert_leq(n, node_iedge_count[e].first); + if(n == node_iedge_count[e].first) { + for(index_t a = 0; a < node_iedge_count[e].second; a++) { + tmp_gbwt_to_node.push_back(n); + r++; + } + e++; + } + } + n++; + } + assert_eq(bot - top, tmp_gbwt_to_node.size()); + for(index_t i = 0; i < tmp_zOffs.size(); i++) { + // Note: might be able to do additional trimming off the end. + // Create a new range for the portion after the dollar. + index_t new_top = tmp_zOffs[i] + 1; + if(i + 1 < tmp_zOffs.size() && new_top == tmp_zOffs[i+1]) { + continue; + } + assert_leq(new_top - top, tmp_gbwt_to_node.size()); + if(new_top - top == tmp_gbwt_to_node.size()) { + break; + } + index_t new_node_top = tmp_gbwt_to_node[new_top - top] + node_top; + assert_lt(new_node_top, node_bot); + index_t new_bot; + if(i + 1 < tmp_zOffs.size()) { + new_bot = tmp_zOffs[i+1]; + } else { + new_bot = bot; + } + index_t new_node_bot = node_bot; + if(new_bot - top < tmp_gbwt_to_node.size()) { + new_node_bot = node_top + tmp_gbwt_to_node[new_bot - top]; + if(new_bot - top > 0 && + tmp_gbwt_to_node[new_bot - top] == tmp_gbwt_to_node[new_bot - top - 1]) { + new_node_bot++; + } + } + tmp_node_iedge_count.clear(); + if(new_top >= new_bot) continue; + for(index_t j = new_top - top; j + 1 < new_bot - top;) { + index_t n = tmp_gbwt_to_node[j]; + index_t j2 = j + 1; + while(j2 < new_bot - top) { + if(n != tmp_gbwt_to_node[j2]) { + break; + } + j2++; + } + if(j + 1 < j2) { + tmp_node_iedge_count.expand(); + assert_lt(node_top, new_node_top); + tmp_node_iedge_count.back().first = n - (new_node_top - node_top); + tmp_node_iedge_count.back().second = j2 - j - 1; + } + j = j2; + } + st.expand(); + st.back().reset(); + st.back().initMap(new_node_bot - new_node_top); + for(index_t j = new_node_top; j < new_node_bot; j++) { + st.back().map_[j - new_node_top] = map(j - node_top + mapi_); + } + st.back().init( + gfm, + ref, + sa, + st, + hit, + (index_t)st.size()-1, + reportList, + res, + new_top, + new_bot, + new_node_top, + new_node_bot, + tmp_node_iedge_count, + step, + met); + } + assert_eq((index_t)map_.size(), node_bot - node_top + mapi_); + bot = tmp_zOffs[0]; + assert_lt(bot - top, tmp_gbwt_to_node.size()); + node_bot = tmp_gbwt_to_node[bot - top - 1] + node_top + 1; + map_.resize(node_bot - node_top + mapi_); + index_t width = node_bot - node_top; + for(index_t e = 0; e < node_iedge_count.size(); e++) { + if(node_iedge_count[e].first >= node_bot - node_top) { + node_iedge_count.resize(e); + break; + } + width += node_iedge_count[e].second; + } + if(width != bot - top) { + assert_eq(width, bot - top + 1); + assert_gt(node_iedge_count.size(), 0); + assert_gt(node_iedge_count.back().second, 0); + node_iedge_count.back().second -= 1; + if(node_iedge_count.back().second == 0) { + node_iedge_count.resize(node_iedge_count.size()- 1); + } + } + } + assert_gt(bot, top); + // Prepare SideLocus's for next step + if(bot-top > 1) { + SideLocus::initFromTopBot(top, bot, gfm.gh(), gfm.gfm(), tloc, bloc); + assert(tloc.valid()); assert(tloc.repOk(gfm.gh())); + assert(bloc.valid()); assert(bloc.repOk(gfm.gh())); + } else { + tloc.initFromRow(top, gfm.gh(), gfm.gfm()); + assert(tloc.valid()); assert(tloc.repOk(gfm.gh())); + bloc.invalidate(); + } + return ret; + } + +#ifndef NDEBUG + /** + * Check if this GWP is internally consistent. + */ + bool repOk( + const GFM& gfm, + GWHit& hit, + index_t range) const + { + assert(done() || bot > top); + assert(doneResolving(hit) || (tloc.valid() && tloc.repOk(gfm.gh()))); + assert(doneResolving(hit) || bot == top+1 || (bloc.valid() && bloc.repOk(gfm.gh()))); + assert_eq(map_.size()-mapi_, bot-top); + // Make sure that 'done' is compatible with whether we have >= + // 1 elements left to resolve. + int left = 0; + for(size_t i = mapi_; i < map_.size(); i++) { + ASSERT_ONLY(index_t row = (index_t)(top + i - mapi_)); + ASSERT_ONLY(index_t origRow = hit.satup->topf + map(i)); + assert(step == 0 || row != origRow); + assert_eq(row, gfm.walkLeft(origRow, step)); + assert_lt(map_[i], hit.satup->offs.size()); + if(off(i, hit) == (index_t)OFF_MASK) left++; + } + assert(repOkMapRepeats()); + assert(repOkMapInclusive(hit, range)); + return true; + } + + /** + * Return true iff this GWState is not obviously corrupt. + */ + bool repOkBasic() { + assert_geq(bot, top); + return true; + } + + /** + * Check that the fmap elements pointed to by our map_ include all + * of the fmap elements that point to this range. + */ + bool repOkMapInclusive(GWHit& hit, index_t range) const { + for(size_t i = 0; i < hit.fmap.size(); i++) { + if(hit.satup->offs[i] == (index_t)OFF_MASK) { + if(range == hit.fmap[i].first) { + ASSERT_ONLY(bool found = false); + for(size_t j = mapi_; j < map_.size(); j++) { + if(map(j) == i) { + ASSERT_ONLY(found = true); + break; + } + } + assert(found); + } + } + } + return true; + } + + /** + * Check that no two elements in map_ are the same. + */ + bool repOkMapRepeats() const { + for(size_t i = mapi_; i < map_.size(); i++) { + for(size_t j = i+1; j < map_.size(); j++) { + assert_neq(map_[i], map_[j]); + } + } + return true; + } +#endif + + /** + * Return the offset currently assigned to the ith element. If it + * has not yet been resolved, return 0xffffffff. + */ + index_t off( + index_t i, + const SARangeWithOffs& sa) + { + assert_geq(i, mapi_); + assert_lt(i, map_.size()); + assert_lt(map_[i], sa.offs.size()); + return sa.offs.get(map_[i]); + } + + /** + * Return the offset of the element within the original range's + * PListSlice that the ith element of this range corresponds to. + */ + index_t map(index_t i) const { + assert_geq(i, mapi_); + assert_lt(i, map_.size()); + return map_[i]; + } + + /** + * Return the offset of the first untrimmed offset in the map. + */ + index_t mapi() const { + return mapi_; + } + + /** + * Return number of active elements in the range being tracked by + * this GWState. + */ + index_t size() const { + return (index_t)(map_.size() - mapi_); + } + + /** + * Return true iff all elements in this leaf range have been + * resolved. + */ + bool done() const { + return size() == 0; + } + + /** + * Set the PListSlice element that corresponds to the ith element + * of 'map' to the specified offset. + */ + void setOff( + index_t i, + index_t off, + SARangeWithOffs& sa, + WalkMetrics& met) + { + assert_lt(i + mapi_, map_.size()); + assert_lt(map_[i + mapi_], sa.offs.size()); + size_t saoff = map_[i + mapi_]; + sa.offs[saoff] = off; + assert_eq(off, sa.offs[saoff]); + } + + /** + * Advance this GWState by one step (i.e. one BW operation). In + * the event of a "split", more elements are added to the EList + * 'st', which must have room for at least 3 more elements without + * needing another expansion. If an expansion of 'st' is + * triggered, this GWState object becomes invalid. + * + * Returns a pair of numbers, the first being the number of + * resolved but unreported offsets found during this advance, the + * second being the number of as-yet-unresolved offsets. + */ + template + pair advance( + const GFM& gfm, // the forward Bowtie index, for stepping left + const BitPairReference& ref, // bitpair-encoded reference + SARangeWithOffs& sa, // SA range with offsets + GWHit& hit, // the associated GWHit object + index_t range, // which range is this? + bool reportList, // if true, "report" resolved offsets immediately by adding them to 'res' list + EList, 16>* res, // EList where resolved offsets should be appended + EList& st, // EList of GWStates for range being advanced + GroupWalkState& gws, // temporary storage for masks + WalkMetrics& met, + PerReadMetrics& prm) + { + ASSERT_ONLY(index_t origTop = top); + ASSERT_ONLY(index_t origBot = bot); + assert_geq(step, 0); + assert_eq(step, lastStep_); + // assert_geq(st.capacity(), st.size() + 4); + assert(tloc.valid()); assert(tloc.repOk(gfm.gh())); + assert_eq(node_bot-node_top, (index_t)(map_.size()-mapi_)); + pair ret = make_pair(0, 0); + assert_eq(top, tloc.toBWRow(gfm.gh())); + if(bot - top > 1) { + bool first = true; + ASSERT_ONLY(index_t sum = 0); + index_t newtop = 0, newbot = 0; + index_t new_node_top = 0, new_node_bot = 0; + gws.map.clear(); + // Still multiple elements being tracked + index_t curtop = top, curbot = bot; + index_t cur_node_top = node_top, cur_node_bot = node_bot; + for(index_t e = 0; e < node_iedge_count.size() + 1; e++) { + if(e >= node_iedge_count.size()) { + if(e > 0) { + curtop = curbot + node_iedge_count[e-1].second; + curbot = bot; + if(curtop >= curbot) { + assert_eq(curtop, curbot); + break; + } + cur_node_top = cur_node_bot; + cur_node_bot = node_bot; + } + } else { + if(e > 0) { + curtop = curbot + node_iedge_count[e-1].second; + assert_lt(node_iedge_count[e-1].first, node_iedge_count[e].first); + curbot = curtop + (node_iedge_count[e].first - node_iedge_count[e-1].first); + cur_node_top = cur_node_bot; + } else { + curbot = curtop + node_iedge_count[e].first + 1; + } + cur_node_bot = node_top + node_iedge_count[e].first + 1; + } + assert_lt(curtop, curbot); + index_t upto[4], in[4]; + upto[0] = in[0] = upto[1] = in[1] = + upto[2] = in[2] = upto[3] = in[3] = 0; + // assert_eq(bot, bloc.toBWRow(gfm.gh())); + met.bwops++; + prm.nExFmops++; + // Assert that there's not a dollar sign in the middle of + // this range +#ifndef NDEBUG + for(index_t i = 0; i < gfm._zOffs.size(); i++) { + assert(curbot <= gfm._zOffs[i] || curtop > gfm._zOffs[i]); + } +#endif + SideLocus curtloc, curbloc; + SideLocus::initFromTopBot(curtop, curbot, gfm.gh(), gfm.gfm(), curtloc, curbloc); + gfm.mapLFRange(curtloc, curbloc, curbot-curtop, upto, in, gws.masks); +#ifndef NDEBUG + for(int i = 0; i < 4; i++) { + assert_eq(curbot-curtop, (index_t)(gws.masks[i].size())); + } +#endif + + for(int i = 0; i < 4; i++) { + if(in[i] > 0) { + // Non-empty range resulted + if(first) { + // For the first one, + first = false; + pair range, node_range; + backup_node_iedge_count.clear(); + SideLocus::initFromTopBot(curtop, curbot, gfm.gh(), gfm.gfm(), curtloc, curbloc); + range = gfm.mapGLF(curtloc, curbloc, i, &node_range, &backup_node_iedge_count, cur_node_bot - cur_node_top); + newtop = range.first; + newbot = range.second; + new_node_top = node_range.first; + new_node_bot = node_range.second; + // Range narrowed so we have to look at the masks + for(size_t j = 0; j < gws.masks[i].size(); j++) { + assert_lt(j+mapi_+(cur_node_top - node_top), map_.size()); + if(gws.masks[i][j]) { + gws.map.push_back(map_[j+mapi_+(cur_node_top - node_top)]); + assert(gws.map.size() <= 1 || gws.map.back() != gws.map[gws.map.size()-2]); +#if 0 + // If this element is not yet resolved, + // then check that it really is the + // expected number of steps to the left + // of the corresponding element in the + // root range + assert_lt(gws.map.back(), sa.size()); + if(sa.offs[gws.map.back()] == (index_t)OFF_MASK) { + assert_eq(newtop + gws.map.size() - 1, + gfm.walkLeft(sa.topf + gws.map.back(), step+1)); + } +#endif + } + } + assert_lt(new_node_top, new_node_bot); + if(new_node_bot - new_node_top < gws.map.size()) { + assert_eq(curbot - curtop, cur_node_bot - cur_node_top); + SideLocus tmptloc, tmpbloc; + pair tmp_node_range; + index_t j1 = 0, j2 = 0; + for(index_t c = 0; c < gws.masks[i].size(); c++) { + if(gws.masks[i][c]) { + j1 = c; + break; + } + } + for(index_t j = 0; j + 1 < gws.map.size(); j++) { + for(index_t c = j1 + 1; c < gws.masks[i].size(); c++) { + if(gws.masks[i][c]) { + j2 = c; + break; + } + } + assert_lt(j1, j2); + SideLocus::initFromTopBot(curtop + j1, curtop + j2 + 1, gfm.gh(), gfm.gfm(), tmptloc, tmpbloc); + gfm.mapGLF(tmptloc, tmpbloc, i, &tmp_node_range); + assert_gt(tmp_node_range.second - tmp_node_range.first, 0); + if(tmp_node_range.second - tmp_node_range.first == 1) { + index_t jmap = gws.map[j]; + assert_lt(jmap, sa.offs.size()); + sa.offs[jmap] = gws.map[j]; + gws.map[j] = (index_t)OFF_MASK; + } + j1 = j2; + j2 = 0; + } + for(index_t j = 0; j < gws.map.size();) { + if(gws.map[j] == (index_t)OFF_MASK) { + gws.map.erase(j); + } else j++; + } +#ifndef NDEBUG + for(index_t j = 0; j < gws.map.size(); j++) { + assert_neq(gws.map[j], (index_t)OFF_MASK); + } +#endif + } + assert_eq(new_node_bot - new_node_top, (index_t)(gws.map.size())); + } else { + // For each beyond the first, create a new + // GWState and add it to the GWState list. + // NOTE: this can cause the underlying list to + // be expanded which in turn might leave 'st' + // pointing to bad memory. + st.expand(); + st.back().reset(); + tmp_node_iedge_count.clear(); + pair range, node_range; + SideLocus::initFromTopBot(curtop, curbot, gfm.gh(), gfm.gfm(), curtloc, curbloc); + range = gfm.mapGLF(curtloc, curbloc, i, &node_range, &tmp_node_iedge_count, cur_node_bot - cur_node_top); + assert_geq(range.second - range.first, node_range.second - node_range.first); + index_t ntop = range.first; + index_t nbot = range.second; + st.back().mapi_ = 0; + st.back().map_.clear(); + met.branches++; + // Range narrowed so we have to look at the masks + for(size_t j = 0; j < gws.masks[i].size(); j++) { + if(gws.masks[i][j]) st.back().map_.push_back(map_[j+mapi_+(cur_node_top - node_top)]); + } + assert_lt(node_range.first, node_range.second); + if(node_range.second - node_range.first < st.back().map_.size()) { + assert_eq(curbot - curtop, cur_node_bot - cur_node_top); + SideLocus tmptloc, tmpbloc; + pair tmp_node_range; + index_t j1 = 0, j2 = 0; + for(index_t c = 0; c < gws.masks[i].size(); c++) { + if(gws.masks[i][c]) { + j1 = c; + break; + } + } + for(index_t j = 0; j + 1 < st.back().map_.size(); j++) { + for(index_t c = j1 + 1; c < gws.masks[i].size(); c++) { + if(gws.masks[i][c]) { + j2 = c; + break; + } + } + assert_lt(j1, j2); + SideLocus::initFromTopBot(curtop + j1, curtop + j2 + 1, gfm.gh(), gfm.gfm(), tmptloc, tmpbloc); + gfm.mapGLF(tmptloc, tmpbloc, i, &tmp_node_range); + assert_gt(tmp_node_range.second - tmp_node_range.first, 0); + if(tmp_node_range.second - tmp_node_range.first == 1) { + index_t jmap = st.back().map_[j]; + assert_lt(jmap, sa.offs.size()); + sa.offs[jmap] = st.back().map_[j]; + st.back().map_[j] = (index_t)OFF_MASK; + } + j1 = j2; + j2 = 0; + } + for(index_t j = 0; j < st.back().map_.size();) { + if(st.back().map_[j] == (index_t)OFF_MASK) { + st.back().map_.erase(j); + } else j++; + } +#ifndef NDEBUG + for(index_t j = 0; j < st.back().map_.size(); j++) { + assert_neq(st.back().map_[j], (index_t)OFF_MASK); + } +#endif + } + assert_eq(node_range.second - node_range.first, st.back().map_.size()); + pair rret = + st.back().init( + gfm, // forward Bowtie index + ref, // bitpair-encodede reference + sa, // SA range with offsets + st, // EList of all GWStates associated with original range + hit, // associated GWHit object + (index_t)st.size()-1, // range offset + reportList, // if true, report hits to 'res' list + res, // report hits here if reportList is true + ntop, // BW top of new range + nbot, // BW bot of new range + node_range.first, + node_range.second, + tmp_node_iedge_count, + step+1, // # steps taken to get to this new range + met); // update these metrics + ret.first += rret.first; + ret.second += rret.second; + } + ASSERT_ONLY(sum += in[i]); + } + } + } + mapi_ = 0; + // assert_eq(new_node_bot-new_node_top, sum); + assert_gt(newbot, newtop); + assert(top != newtop || bot != newbot); + //assert(!(newtop < top && newbot > top)); + top = newtop; + bot = newbot; + node_top = new_node_top; + node_bot = new_node_bot; + node_iedge_count = backup_node_iedge_count; + backup_node_iedge_count.clear(); + if(!gws.map.empty()) { + map_ = gws.map; + } + //assert(repOkMapRepeats()); + //assert(repOkMapInclusive(hit, range)); + assert_eq(node_bot-node_top, (index_t)map_.size()); + } else { + // Down to one element + assert_eq(bot, top+1); + assert_eq(1, map_.size()-mapi_); + // Sets top, returns char walked through (which we ignore) + ASSERT_ONLY(index_t oldtop = top); + met.bwops++; + prm.nExFmops++; + pair node_range(0, 0); + pair range = gfm.mapGLF1(top, tloc, &node_range); + top = range.first; + assert_neq(top, oldtop); + bot = top+1; + node_top = node_range.first; + node_bot = node_range.second; + if(mapi_ > 0) { + map_[0] = map_[mapi_]; + mapi_ = 0; + } + map_.resize(1); + } + assert(top != origTop || bot != origBot); + step++; + assert_gt(step, 0); + assert_leq((index_t)step, gfm.gh().len()); + pair rret = + init( + gfm, // forward GFM index + ref, // bitpair-encodede reference + sa, // SA range with offsets + st, // EList of all GWStates associated with original range + hit, // associated GWHit object + range, // range offset + reportList, // if true, report hits to 'res' list + res, // report hits here if reportList is true + met); // update these metrics + ret.first += rret.first; + ret.second += rret.second; + return ret; + } + + /** + * Clear all state in preparation for the next walk. + */ + void reset() { + top = bot = node_top = node_bot = step = mapi_ = 0; + ASSERT_ONLY(lastStep_ = -1); + ASSERT_ONLY(inited_ = false); + tloc.invalidate(); + bloc.invalidate(); + map_.clear(); + node_iedge_count.clear(); + backup_node_iedge_count.clear(); + tmp_node_iedge_count.clear(); + } + + /** + * Resize the map_ field to the given size. + */ + void initMap(size_t newsz) { + mapi_ = 0; + map_.resize(newsz); + for(size_t i = 0; i < newsz; i++) { + map_[i] = (index_t)i; + } + } + + /** + * Return true iff all rows corresponding to this GWState have been + * resolved and reported. + */ + bool doneReporting(const GWHit& hit) const { + for(size_t i = mapi_; i < map_.size(); i++) { + if(!hit.reported(map(i))) return false; + } + return true; + } + + /** + * Return true iff all rows corresponding to this GWState have been + * resolved (but not necessarily reported). + */ + bool doneResolving(const SARangeWithOffs& sa) const { + for(size_t i = mapi_; i < map_.size(); i++) { + if(sa.offs[map((index_t)i)] == (index_t)OFF_MASK) return false; + } + return true; + } + + SideLocus tloc; // SideLocus for top + SideLocus bloc; // SideLocus for bottom + index_t top; // top elt of range in GBWT + index_t bot; // bot elt of range in GBWT + index_t node_top; + index_t node_bot; + EList > node_iedge_count; + int step; // how many steps have we walked to the left so far + + // temporary + EList > backup_node_iedge_count; + EList > tmp_node_iedge_count; + + EList tmp_zOffs; + EList tmp_gbwt_to_node; + +protected: + + ASSERT_ONLY(bool inited_); + ASSERT_ONLY(int lastStep_); + EList map_; // which elts in range 'range' we're tracking + index_t mapi_; // first untrimmed element of map +}; + +template +class GroupWalk2S { +public: + typedef EList, S> TStateV; + + GroupWalk2S() : st_(8, GW_CAT) { + reset(); + } + + /** + * Reset the GroupWalk in preparation for the next SeedResults. + */ + void reset() { + elt_ = rep_ = 0; + ASSERT_ONLY(inited_ = false); + } + + /** + * Initialize a new group walk w/r/t a QVal object. + */ + void init( + const GFM& gfmFw, // forward Bowtie index for walking left + const BitPairReference& ref, // bitpair-encoded reference + SARangeWithOffs& sa, // SA range with offsets + RandomSource& rnd, // pseudo-random generator for sampling rows + WalkMetrics& met) // update metrics here + { + reset(); +#ifndef NDEBUG + inited_ = true; +#endif + // Init GWHit + hit_.init(sa, 0, false, 0); + // Init corresponding GWState + st_.resize(1); + st_.back().reset(); + assert(st_.back().repOkBasic()); + index_t top = sa.topf; + index_t bot = sa.botf; + index_t node_top = sa.node_top; + index_t node_bot = (index_t)(node_top + sa.size()); + st_.back().initMap(sa.size()); + st_.ensure(4); + st_.back().init( + gfmFw, // Bowtie index + ref, // bitpair-encoded reference + sa, // SA range with offsets + st_, // EList + hit_, // GWHit + 0, // range 0 + false, // put resolved elements into res_? + NULL, // put resolved elements here + top, // GBW row at top + bot, // GBW row at bot + node_top, // node at top + node_bot, // node at bot + sa.node_iedge_count, + 0, // # steps taken + met); // update metrics here + elt_ += sa.size(); + assert(hit_.repOk(sa)); + } + + // + // ELEMENT-BASED + // + + /** + * Advance the GroupWalk until all elements have been resolved. + */ + void resolveAll(WalkMetrics& met, PerReadMetrics& prm) { + WalkResult res; // ignore results for now + for(size_t i = 0; i < elt_; i++) { + advanceElement((index_t)i, res, met, prm); + } + } + + /** + * Advance the GroupWalk until the specified element has been + * resolved. + */ + bool advanceElement( + index_t elt, // element within the range + const GFM& gfmFw, // forward Bowtie index for walking left + const BitPairReference& ref, // bitpair-encoded reference + SARangeWithOffs& sa, // SA range with offsets + GroupWalkState& gws, // GroupWalk state; scratch space + WalkResult& res, // put the result here + WalkMetrics& met, // metrics + PerReadMetrics& prm) // per-read metrics + { + assert(inited_); + assert(!done()); + assert(hit_.repOk(sa)); + assert_lt(elt, sa.size()); // elt must fall within range + // Until we've resolved our element of interest... + while(sa.offs[elt] == (index_t)OFF_MASK) { + // Get the GWState that contains our element of interest + size_t range = hit_.fmap[elt].first; + assert_lt(range, st_.size()); + st_.ensure(st_[range].node_bot - st_[range].node_top); + // st_.ensure(4); + GWState& st = st_[range]; + assert(!st.doneResolving(sa)); + // Returns a pair of numbers, the first being the number of + // resolved but unreported offsets found during this advance, the + // second being the number of as-yet-unresolved offsets. + st.advance( + gfmFw, + ref, + sa, + hit_, + (index_t)range, + false, + NULL, + st_, + gws, + met, + prm); + assert(sa.offs[elt] != (index_t)OFF_MASK || + !st_[hit_.fmap[elt].first].doneResolving(sa)); + } + assert_neq((index_t)OFF_MASK, sa.offs[elt]); + // Report it! + if(!hit_.reported(elt)) { + hit_.setReported(elt); + } + met.reports++; + res.init( + 0, // seed offset + false, // orientation + 0, // range + elt, // element + sa.topf + elt, // bw row + (index_t)sa.len, // length of hit + sa.offs[elt]); // resolved text offset + rep_++; + return true; + } + + /** + * Return true iff all elements have been resolved and reported. + */ + bool done() const { return rep_ == elt_; } + +#ifndef NDEBUG + /** + * Check that GroupWalk is internally consistent. + */ + bool repOk(const SARangeWithOffs& sa) const { + assert(hit_.repOk(sa)); + assert_leq(rep_, elt_); + // This is a lot of work + size_t resolved = 0, reported = 0; + // For each element + const size_t sz = sa.size(); + for(size_t m = 0; m < sz; m++) { + // Is it resolved? + if(sa.offs[m] != (index_t)OFF_MASK) { + resolved++; + } else { + assert(!hit_.reported(m)); + } + // Is it reported? + if(hit_.reported(m)) { + reported++; + } + assert_geq(resolved, reported); + } + assert_geq(resolved, reported); + assert_eq(rep_, reported); + assert_eq(elt_, sz); + return true; + } +#endif + + /** + * Return the number of BW elements that we can resolve. + */ + index_t numElts() const { return elt_; } + + /** + * Return the size occupied by this GroupWalk and all its constituent + * objects. + */ + size_t totalSizeBytes() const { + return 2 * sizeof(size_t) + st_.totalSizeBytes() + sizeof(GWHit); + } + /** + * Return the capacity of this GroupWalk and all its constituent objects. + */ + size_t totalCapacityBytes() const { + return 2 * sizeof(size_t) + st_.totalCapacityBytes() + sizeof(GWHit); + } + +#ifndef NDEBUG + bool initialized() const { return inited_; } +#endif + +protected: + + ASSERT_ONLY(bool inited_); // initialized? + + index_t elt_; // # BW elements under the control of the GropuWalk + index_t rep_; // # BW elements reported + + // For each orientation and seed offset, keep a GWState object that + // holds the state of the walk so far. + TStateV st_; + + // For each orientation and seed offset, keep an EList of GWHit. + GWHit hit_; +}; + +#endif /*GROUP_WALK_H_*/ diff --git a/hgfm.h b/hgfm.h new file mode 100644 index 0000000..89cb7aa --- /dev/null +++ b/hgfm.h @@ -0,0 +1,2655 @@ +/* + * Copyright 2015, Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#ifndef HGFM_H_ +#define HGFM_H_ + +#include "hier_idx_common.h" +#include "gfm.h" + +/** + * Extended Burrows-Wheeler transform data. + * LocalEbwt is a specialized Ebwt index that represents ~64K bps + * and therefore uses two bytes as offsets within 64K bps. + * This class has only two additional member variables to denote the genomic sequenuce it represents: + * (1) the contig index and (2) the offset within the contig. + * + */ +template +class LocalGFM : public GFM { + typedef GFM PARENT_CLASS; +public: + /// Construct an Ebwt from the given input file + LocalGFM(const string& in, + ALTDB* altdb, + FILE *in5, + FILE *in6, + char *mmFile5, + char *mmFile6, + full_index_t& tidx, + full_index_t& localOffset, + full_index_t& joinedOffset, + bool switchEndian, + size_t& bytesRead, + size_t& bytesRead2, + int needEntireReverse, + bool fw, + int32_t overrideOffRate, // = -1, + int32_t offRatePlus, // = -1, + uint32_t lineRate, + uint32_t offRate, + uint32_t ftabChars, + bool useMm, // = false, + bool useShmem, // = false, + bool mmSweep, // = false, + bool loadNames, // = false, + bool loadSASamp, // = true, + bool loadFtab, // = true, + bool loadRstarts, // = true, + bool verbose, // = false, + bool startVerbose, // = false, + bool passMemExc, // = false, + bool sanityCheck, // = false) + bool useHaplotype) : // = false + GFM(in, + altdb, + NULL, + NULL, + needEntireReverse, + fw, + overrideOffRate, + offRatePlus, + useMm, + useShmem, + mmSweep, + loadNames, + loadSASamp, + loadFtab, + loadRstarts, + true, // load Splice Sites + verbose, + startVerbose, + passMemExc, + sanityCheck, + useHaplotype, + true) + { + this->_in1Str = in + ".5." + gfm_ext; + this->_in2Str = in + ".5." + gfm_ext; + readIntoMemory( + in5, + in6, + mmFile5, + mmFile6, + tidx, + localOffset, + joinedOffset, + switchEndian, + bytesRead, + bytesRead2, + needEntireReverse, + loadSASamp, + loadFtab, + loadRstarts, + false, //justHeader + lineRate, + offRate, + ftabChars, + mmSweep, + loadNames, + startVerbose); + + _tidx = tidx; + _localOffset = localOffset; + _joinedOffset = joinedOffset; + + // If the offRate has been overridden, reflect that in the + // _eh._offRate field + if(offRatePlus > 0 && this->_overrideOffRate == -1) { + this->_overrideOffRate = this->_gh._offRate + offRatePlus; + } + if(this->_overrideOffRate > this->_gh._offRate) { + this->_gh.setOffRate(this->_overrideOffRate); + assert_eq(this->_overrideOffRate, this->_gh._offRate); + } + assert(this->repOk()); + } + + + /// Construct an Ebwt from the given header parameters and string + /// vector, optionally using a blockwise suffix sorter with the + /// given 'bmax' and 'dcv' parameters. The string vector is + /// ultimately joined and the joined string is passed to buildToDisk(). + template + LocalGFM( + TStr& s, + const EList& sa, + PathGraph* pg, + full_index_t tidx, + full_index_t localOffset, + full_index_t joinedOffset, + EList >& alts, + index_t local_size, + bool packed, + int needEntireReverse, + int32_t lineRate, + int32_t offRate, + int32_t ftabChars, + const string& file, // base filename for EBWT files + bool fw, + int dcv, + EList& szs, + index_t sztot, + const RefReadInParams& refparams, + uint32_t seed, + ostream& out5, + ostream& out6, + int32_t overrideOffRate = -1, + bool verbose = false, + bool passMemExc = false, + bool sanityCheck = false) : + GFM(packed, + needEntireReverse, + lineRate, + offRate, + ftabChars, + file, + fw, + dcv, + szs, + sztot, + refparams, + seed, + overrideOffRate, + verbose, + passMemExc, + sanityCheck) + { + const GFMParams& gh = this->_gh; + assert(gh.repOk()); + uint32_t be = this->toBe(); + assert(out5.good()); + assert(out6.good()); + _tidx = tidx; + _localOffset = localOffset; + _joinedOffset = joinedOffset; + writeIndex(out5, tidx, be); + writeIndex(out5, localOffset, be); + writeIndex(out5, joinedOffset, be); + writeIndex(out5, gh._len, be); // length of string (and bwt and suffix array) + streampos headerPos = out5.tellp(); + writeIndex(out5, 0, be); // gbwtLen + writeIndex(out5, 0, be); // num of nodes + writeIndex(out5, 0, be); // eftabLen + if(gh._len > 0) { + assert_gt(szs.size(), 0); + assert_gt(sztot, 0); + // Not every fragment represents a distinct sequence - many + // fragments may correspond to a single sequence. Count the + // number of sequences here by counting the number of "first" + // fragments. + this->_nPat = 0; + this->_nFrag = 0; + for(size_t i = 0; i < szs.size(); i++) { + if(szs[i].len > 0) this->_nFrag++; + if(szs[i].first && szs[i].len > 0) this->_nPat++; + } + assert_eq(this->_nPat, 1); + assert_geq(this->_nFrag, this->_nPat); + this->_rstarts.reset(); + writeIndex(out5, this->_nPat, be); + assert_eq(this->_nPat, 1); + this->_plen.init(new index_t[this->_nPat], this->_nPat); + // For each pattern, set plen + int npat = -1; + for(size_t i = 0; i < szs.size(); i++) { + if(szs[i].first && szs[i].len > 0) { + if(npat >= 0) { + writeIndex(out5, this->plen()[npat], be); + } + npat++; + this->plen()[npat] = (szs[i].len + szs[i].off); + } else { + this->plen()[npat] += (szs[i].len + szs[i].off); + } + } + assert_eq((index_t)npat, this->_nPat-1); + writeIndex(out5, this->plen()[npat], be); + // Write the number of fragments + writeIndex(out5, this->_nFrag, be); + + if(refparams.reverse == REF_READ_REVERSE) { + EList tmp(EBWT_CAT); + reverseRefRecords(szs, tmp, false, verbose); + this->szsToDisk(tmp, out5, refparams.reverse); + } else { + this->szsToDisk(szs, out5, refparams.reverse); + } + + if(alts.empty()) { + assert(pg == NULL); + buildToDisk(sa, s, out5, out6, headerPos); + } else { + assert(pg != NULL); + // Re-initialize GFM parameters to reflect real number of edges (gbwt string) + this->_gh.init( + this->_gh.len(), + pg->getNumEdges(), + pg->getNumNodes(), + this->_gh.lineRate(), + this->_gh.offRate(), + this->_gh.ftabChars(), + 0, + this->_gh.entireReverse()); + buildToDisk(*pg, s, out5, out6, headerPos); + } + } + + out5.flush(); out6.flush(); + if(out5.fail() || out6.fail()) { + cerr << "An error occurred writing the index to disk. Please check if the disk is full." << endl; + throw 1; + } + } + + template void buildToDisk( + PathGraph& gbwt, + const TStr& s, + ostream& out1, + ostream& out2, + streampos headerPos); + + template void buildToDisk( + const EList& sa, + const TStr& s, + ostream& out1, + ostream& out2, + streampos headerPos); + + // I/O + void readIntoMemory( + FILE *in5, + FILE *in6, + char *mmFile5, + char *mmFile6, + full_index_t& tidx, + full_index_t& localOffset, + full_index_t& joinedOffset, + bool switchEndian, + size_t& bytesRead, + size_t& bytesRead2, + int needEntireRev, + bool loadSASamp, + bool loadFtab, + bool loadRstarts, + bool justHeader, + int32_t lineRate, + int32_t offRate, + int32_t ftabChars, + bool mmSweep, + bool loadNames, + bool startVerbose); + + /** + * Sanity-check various pieces of the Ebwt + */ + void sanityCheckAll(int reverse) const { + if(this->_gh._len > 0) { + PARENT_CLASS::sanityCheckAll(reverse); + } + } + + bool empty() const { return this->_gh._len == 0; } + +public: + full_index_t _tidx; + full_index_t _localOffset; + full_index_t _joinedOffset; +}; + +/** + * Build an Ebwt from a string 's' and its suffix array 'sa' (which + * might actually be a suffix array *builder* that builds blocks of the + * array on demand). The bulk of the Ebwt, i.e. the ebwt and offs + * arrays, is written directly to disk. This is by design: keeping + * those arrays in memory needlessly increases the footprint of the + * building process. Instead, we prefer to build the Ebwt directly + * "to disk" and then read it back into memory later as necessary. + * + * It is assumed that the header values and join-related values (nPat, + * plen) have already been written to 'out1' before this function + * is called. When this function is finished, it will have + * additionally written ebwt, zOff, fchr, ftab and eftab to the primary + * file and offs to the secondary file. + * + * Assume DNA/RNA/any alphabet with 4 or fewer elements. + * Assume occ array entries are 32 bits each. + * + * @param sa the suffix array to convert to a Ebwt + * @param s the original string + * @param out + */ +template +template +void LocalGFM::buildToDisk( + PathGraph& gbwt, + const TStr& s, + ostream& out5, + ostream& out6, + streampos headerPos) +{ + assert_leq(s.length(), std::numeric_limits::max()); + const GFMParams& gh = this->_gh; + + assert(gh.repOk()); + assert_lt(s.length(), gh.gbwtLen()); + assert_eq(s.length(), gh._len); + assert_gt(gh._lineRate, 3); + + index_t gbwtLen = gh._gbwtLen; + streampos out5pos = out5.tellp(); + out5.seekp(headerPos); + writeIndex(out5, gbwtLen, this->toBe()); + writeIndex(out5, gh._numNodes, this->toBe()); + headerPos = out5.tellp(); + out5.seekp(out5pos); + index_t ftabLen = gh._ftabLen; + index_t sideSz = gh._sideSz; + index_t gbwtTotSz = gh._gbwtTotSz; + index_t fchr[] = {0, 0, 0, 0, 0}; + EList ftab(EBWT_CAT); + EList zOffs; + + // Save # of occurrences of each character as we walk along the bwt + index_t occ[4] = {0, 0, 0, 0}; + index_t occSave[4] = {0, 0, 0, 0}; + // # of occurrences of 1 in M arrays + index_t M_occ = 0, M_occSave = 0; + // Location in F that corresponds to 1 in M + index_t F_loc = 0, F_locSave = 0; + + try { + VMSG_NL("Allocating ftab, absorbFtab"); + ftab.resize(ftabLen); + ftab.fillZero(); + } catch(bad_alloc &e) { + cerr << "Out of memory allocating ftab[] or absorbFtab[] " + << "in LocalGFM::buildToDisk() at " << __FILE__ << ":" + << __LINE__ << endl; + throw e; + } + + // Allocate the side buffer; holds a single side as its being + // constructed and then written to disk. Reused across all sides. +#ifdef SIXTY4_FORMAT + EList gfmSide(EBWT_CAT); +#else + EList gfmSide(EBWT_CAT); +#endif + try { + // Used to calculate ftab and eftab, but having gfm costs a lot of memory + this->_gfm.init(new uint8_t[gh._gbwtTotLen], gh._gbwtTotLen, true); +#ifdef SIXTY4_FORMAT + gfmSide.resize(sideSz >> 3); +#else + gfmSide.resize(sideSz); +#endif + } catch(bad_alloc &e) { + cerr << "Out of memory allocating ebwtSide[] in " + << "LocalGFM::buildToDisk() at " << __FILE__ << ":" + << __LINE__ << endl; + throw e; + } + + // Points to the base offset within ebwt for the side currently + // being written + index_t side = 0; + + // Whether we're assembling a forward or a reverse bucket + bool fw = true; + int sideCur = 0; + + index_t si = 0; // string offset (chars) + ASSERT_ONLY(bool inSA = true); // true iff saI still points inside suffix + // array (as opposed to the padding at the + // end) + // Iterate over packed bwt bytes + VMSG_NL("Entering LocalGFM loop"); + ASSERT_ONLY(uint32_t beforeGbwtOff = (uint32_t)out5.tellp()); + while(side < gbwtTotSz) { + // Sanity-check our cursor into the side buffer + assert_geq(sideCur, 0); + assert_lt(sideCur, (int)gh._sideGbwtSz); + assert_eq(0, side % sideSz); // 'side' must be on side boundary + gfmSide[sideCur] = 0; // clear + if(sideCur == 0) { + memset(gfmSide.ptr(), 0, gh._sideGbwtSz); + gfmSide[sideCur] = 0; // clear + } + assert_lt(side + sideCur, gbwtTotSz); + // Iterate over bit-pairs in the si'th character of the BWT +#ifdef SIXTY4_FORMAT + for(int bpi = 0; bpi < 32; bpi++, si++) { +#else + for(int bpi = 0; bpi < 4; bpi++, si++) { +#endif + int gbwtChar = 0; + int F = 0, M = 0; + full_index_t pos = 0; + bool count = true; + if(si < gbwtLen) { + gbwt.nextRow(gbwtChar, F, M, pos); + + // (that might have triggered sa to calc next suf block) + if(gbwtChar == 'Z') { + // Don't add the '$' in the last column to the BWT + // transform; we can't encode a $ (only A C T or G) + // and counting it as, say, an A, will mess up the + // LR mapping + gbwtChar = 0; count = false; +#ifndef NDEBUG + if(zOffs.size() > 0) { + assert_gt(si, zOffs.back()); + } +#endif + zOffs.push_back(si); // remember GBWT row that corresponds to the 0th suffix + } else { + gbwtChar = asc2dna[gbwtChar]; + assert_lt(gbwtChar, 4); + // Update the fchr + fchr[gbwtChar]++; + } + assert_lt(F, 2); + assert_lt(M, 2); + if(M == 1) { + assert_neq(F_loc, numeric_limits::max()); + F_loc = gbwt.nextFLocation(); +#ifndef NDEBUG + if(F_loc > 0) { + assert_gt(F_loc, F_locSave); + } +#endif + } + // Suffix array offset boundary? - update offset array + if(M == 1 && (M_occ & gh._offMask) == M_occ) { + assert_lt((M_occ >> gh._offRate), gh._offsLen); + // Write offsets directly to the secondary output + // stream, thereby avoiding keeping them in memory + writeIndex(out6, pos, this->toBe()); + } + } else { + // Strayed off the end of the SA, now we're just + // padding out a bucket +#ifndef NDEBUG + if(inSA) { + // Assert that we wrote all the characters in the + // string before now + assert_eq(si, gbwtLen); + inSA = false; + } +#endif + // 'A' used for padding; important that padding be + // counted in the occ[] array + gbwtChar = 0; + F = M = 0; + } + if(count) occ[gbwtChar]++; + if(M) M_occ++; + // Append BWT char to bwt section of current side + if(fw) { + // Forward bucket: fill from least to most +#ifdef SIXTY4_FORMAT + gfmSide[sideCur] |= ((uint64_t)gbwtChar << (bpi << 1)); + if(gbwtChar > 0) assert_gt(gfmSide[sideCur], 0); + assert(false); + cerr << "Not implemented" << endl; + exit(1); +#else + pack_2b_in_8b(gbwtChar, gfmSide[sideCur], bpi); + assert_eq((gfmSide[sideCur] >> (bpi*2)) & 3, gbwtChar); + + int F_sideCur = (gh._sideGbwtSz + sideCur) >> 1; + int F_bpi = bpi + ((sideCur & 0x1) << 2); // Can be used as M_bpi as well + pack_1b_in_8b(F, gfmSide[F_sideCur], F_bpi); + assert_eq((gfmSide[F_sideCur] >> F_bpi) & 1, F); + + int M_sideCur = F_sideCur + (gh._sideGbwtSz >> 2); + pack_1b_in_8b(M, gfmSide[M_sideCur], F_bpi); + assert_eq((gfmSide[M_sideCur] >> F_bpi) & 1, M); +#endif + } else { + // Backward bucket: fill from most to least +#ifdef SIXTY4_FORMAT + gfmSide[sideCur] |= ((uint64_t)gbwtChar << ((31 - bpi) << 1)); + if(gbwtChar > 0) assert_gt(gfmSide[sideCur], 0); + // To be implemented ... + assert(false); + cerr << "Not implemented" << endl; + exit(1); +#else + pack_2b_in_8b(gbwtChar, gfmSide[sideCur], 3-bpi); + assert_eq((gfmSide[sideCur] >> ((3-bpi)*2)) & 3, gbwtChar); + // To be implemented ... + assert(false); + cerr << "Not implemented" << endl; + exit(1); +#endif + } + } // end loop over bit-pairs + assert_eq(0, (occ[0] + occ[1] + occ[2] + occ[3] + zOffs.size()) & 3); +#ifdef SIXTY4_FORMAT + assert_eq(0, si & 31); +#else + assert_eq(0, si & 3); +#endif + + sideCur++; + if((sideCur << 1) == (int)gh._sideGbwtSz) { + sideCur = 0; + index_t *uside = reinterpret_cast(gfmSide.ptr()); + // Write 'A', 'C', 'G' and 'T' tallies + side += sideSz; + assert_leq(side, gh._gbwtTotSz); + uside[(sideSz / sizeof(index_t))-6] = endianizeIndex(F_locSave, this->toBe()); + uside[(sideSz / sizeof(index_t))-5] = endianizeIndex(M_occSave, this->toBe()); + uside[(sideSz / sizeof(index_t))-4] = endianizeIndex(occSave[0], this->toBe()); + uside[(sideSz / sizeof(index_t))-3] = endianizeIndex(occSave[1], this->toBe()); + uside[(sideSz / sizeof(index_t))-2] = endianizeIndex(occSave[2], this->toBe()); + uside[(sideSz / sizeof(index_t))-1] = endianizeIndex(occSave[3], this->toBe()); + F_locSave = F_loc; + M_occSave = M_occ; + occSave[0] = occ[0]; + occSave[1] = occ[1]; + occSave[2] = occ[2]; + occSave[3] = occ[3]; + // Write backward side to primary file + out5.write((const char *)gfmSide.ptr(), sideSz); + + // + memcpy(((char*)this->_gfm.get()) + side - sideSz, (const char *)gfmSide.ptr(), sideSz); + } + } + VMSG_NL("Exited LocalGFM loop"); + // Assert that our loop counter got incremented right to the end + assert_eq(side, gh._gbwtTotSz); + // Assert that we wrote the expected amount to out5 + assert_eq(((uint32_t)out5.tellp() - beforeGbwtOff), gh._gbwtTotSz); + // assert that the last thing we did was write a forward bucket + + // + // Write zOffs to primary stream + // + assert_gt(zOffs.size(), 0); + writeIndex(out5, zOffs.size(), this->toBe()); + for(size_t i = 0; i < zOffs.size(); i++) { + writeIndex(out5, zOffs[i], this->toBe()); + } + + // + // Finish building fchr + // + // Exclusive prefix sum on fchr + for(int i = 1; i < 4; i++) { + fchr[i] += fchr[i-1]; + } + assert_lt(fchr[3], gbwtLen); + // Shift everybody up by one + for(int i = 4; i >= 1; i--) { + fchr[i] = fchr[i-1]; + } + fchr[0] = 0; + // Write fchr to primary file + for(int i = 0; i < 5; i++) { + writeIndex(out5, fchr[i], this->toBe()); + } + this->_fchr.init(new index_t[5], 5, true); + memcpy(this->_fchr.get(), fchr, sizeof(index_t) * 5); + + // Initialize _zGbwtByteOffs and _zGbwtBpOffs + this->_zOffs = zOffs; + this->postReadInit(gh); + + // Build ftab and eftab + EList > tFtab; + tFtab.resizeExact(ftabLen - 1); + for(index_t i = 0; i + 1 < ftabLen; i++) { + index_t q = i; + pair range(0, gh._gbwtLen); + SideLocus tloc, bloc; + SideLocus::initFromTopBot(range.first, range.second, gh, this->gfm(), tloc, bloc); + index_t j = 0; + for(; j < gh._ftabChars; j++) { + int nt = q & 0x3; q >>= 2; + if(bloc.valid()) { + range = this->mapGLF(tloc, bloc, nt); + } else { + range = this->mapGLF1(range.first, tloc, nt); + } + if(range.first == (index_t)INDEX_MAX || range.first >= range.second) { + break; + } + if(range.first + 1 == range.second) { + tloc.initFromRow(range.first, gh, this->gfm()); + bloc.invalidate(); + } else { + SideLocus::initFromTopBot(range.first, range.second, gh, this->gfm(), tloc, bloc); + } + } + + if(range.first >= range.second || j < gh._ftabChars) { + if(i == 0) { + tFtab[i].first = tFtab[i].second = 0; + } else { + tFtab[i].first = tFtab[i].second = tFtab[i-1].second; + } + } else { + tFtab[i].first = range.first; + tFtab[i].second = range.second; + } + +#ifndef NDEBUG + if(gbwt.ftab.size() > i) { + assert_eq(tFtab[i].first, gbwt.ftab[i].first); + assert_eq(tFtab[i].second, gbwt.ftab[i].second); + } +#endif + } + + // Clear memory + this->_gfm.reset(); + this->_fchr.reset(); + this->_zOffs.clear(); + this->_zGbwtByteOffs.clear(); + this->_zGbwtBpOffs.clear(); + + // + // Finish building ftab and build eftab + // + // Prefix sum on ftable + index_t eftabLen = 0; + for(index_t i = 1; i + 1 < ftabLen; i++) { + if(tFtab[i-1].second != tFtab[i].first) { + eftabLen += 2; + } + } + if(gh._gbwtLen + (eftabLen >> 1) < gh._gbwtLen) { + cerr << "Too many eftab entries: " + << gh._gbwtLen << " + " << (eftabLen >> 1) + << " > " << (index_t)INDEX_MAX << endl; + throw 1; + } + EList eftab(EBWT_CAT); + try { + eftab.resize(eftabLen); + eftab.fillZero(); + } catch(bad_alloc &e) { + cerr << "Out of memory allocating eftab[] " + << "in LocalGFM::buildToDisk() at " << __FILE__ << ":" + << __LINE__ << endl; + throw e; + } + index_t eftabCur = 0; + ftab[0] = tFtab[0].first; + ftab[1] = tFtab[0].second; + for(index_t i = 1; i + 1 < ftabLen; i++) { + if(ftab[i] != tFtab[i].first) { + index_t lo = ftab[i]; + index_t hi = tFtab[i].first; + assert_lt(eftabCur*2+1, eftabLen); + eftab[eftabCur*2] = lo; + eftab[eftabCur*2+1] = hi; + assert_leq(lo, hi + 4); + ftab[i] = (eftabCur++) ^ (index_t)INDEX_MAX; // insert pointer into eftab + assert_eq(lo, GFM::ftabLo(ftab.ptr(), eftab.ptr(), gbwtLen, ftabLen, eftabLen, i)); + assert_eq(hi, GFM::ftabHi(ftab.ptr(), eftab.ptr(), gbwtLen, ftabLen, eftabLen, i)); + } + ftab[i+1] = tFtab[i].second; + } +#ifndef NDEBUG + for(index_t i = 0; i + 1 < ftabLen; i++ ){ + assert_eq(tFtab[i].first, GFM::ftabHi(ftab.ptr(), eftab.ptr(), gbwtLen, ftabLen, eftabLen, i)); + assert_eq(tFtab[i].second, GFM::ftabLo(ftab.ptr(), eftab.ptr(), gbwtLen, ftabLen, eftabLen, i+1)); + } +#endif + // Write ftab to primary file + for(index_t i = 0; i < ftabLen; i++) { + writeIndex(out5, ftab[i], this->toBe()); + } + // Write eftab to primary file + out5pos = out5.tellp(); + out5.seekp(headerPos); + writeIndex(out5, eftabLen, this->toBe()); + out5.seekp(out5pos); + for(index_t i = 0; i < eftabLen; i++) { + writeIndex(out5, eftab[i], this->toBe()); + } + // Note: if you'd like to sanity-check the Ebwt, you'll have to + // read it back into memory first! + assert(!this->isInMemory()); + VMSG_NL("Exiting LocalGFM::buildToDisk()"); +} + +/** + * Build an Ebwt from a string 's' and its suffix array 'sa' (which + * might actually be a suffix array *builder* that builds blocks of the + * array on demand). The bulk of the Ebwt, i.e. the ebwt and offs + * arrays, is written directly to disk. This is by design: keeping + * those arrays in memory needlessly increases the footprint of the + * building process. Instead, we prefer to build the Ebwt directly + * "to disk" and then read it back into memory later as necessary. + * + * It is assumed that the header values and join-related values (nPat, + * plen) have already been written to 'out1' before this function + * is called. When this function is finished, it will have + * additionally written ebwt, zOff, fchr, ftab and eftab to the primary + * file and offs to the secondary file. + * + * Assume DNA/RNA/any alphabet with 4 or fewer elements. + * Assume occ array entries are 32 bits each. + * + * @param sa the suffix array to convert to a Ebwt + * @param s the original string + * @param out + */ +template +template +void LocalGFM::buildToDisk( + const EList& sa, + const TStr& s, + ostream& out5, + ostream& out6, + streampos headerPos) +{ + assert_leq(s.length(), std::numeric_limits::max()); + const GFMParams& gh = this->_gh; + assert(gh.repOk()); + assert(gh.linearFM()); + assert_lt(s.length(), gh.gbwtLen()); + assert_eq(s.length(), gh._len); + assert_gt(gh._lineRate, 3); + + index_t len = gh._len; + index_t gbwtLen = gh._gbwtLen; + assert_eq(len + 1, gbwtLen); + streampos out5pos = out5.tellp(); + out5.seekp(headerPos); + writeIndex(out5, gbwtLen, this->toBe()); + writeIndex(out5, gh._numNodes, this->toBe()); + headerPos = out5.tellp(); + out5.seekp(out5pos); + + index_t ftabLen = gh._ftabLen; + index_t sideSz = gh._sideSz; + index_t gbwtTotSz = gh._gbwtTotSz; + index_t fchr[] = {0, 0, 0, 0, 0}; + EList ftab(EBWT_CAT); + EList zOffs; + + // Save # of occurrences of each character as we walk along the bwt + index_t occ[4] = {0, 0, 0, 0}; + index_t occSave[4] = {0, 0, 0, 0}; + + // Record rows that should "absorb" adjacent rows in the ftab. + // The absorbed rows represent suffixes shorter than the ftabChars + // cutoff. + uint8_t absorbCnt = 0; + EList absorbFtab(EBWT_CAT); + try { + VMSG_NL("Allocating ftab, absorbFtab"); + ftab.resize(ftabLen); + ftab.fillZero(); + absorbFtab.resize(ftabLen); + absorbFtab.fillZero(); + } catch(bad_alloc &e) { + cerr << "Out of memory allocating ftab[] or absorbFtab[] " + << "in LocalGFM::buildToDisk() at " << __FILE__ << ":" + << __LINE__ << endl; + throw e; + } + + // Allocate the side buffer; holds a single side as its being + // constructed and then written to disk. Reused across all sides. +#ifdef SIXTY4_FORMAT + EList gfmSide(EBWT_CAT); +#else + EList gfmSide(EBWT_CAT); +#endif + try { +#ifdef SIXTY4_FORMAT + gfmSide.resize(sideSz >> 3); +#else + gfmSide.resize(sideSz); +#endif + } catch(bad_alloc &e) { + cerr << "Out of memory allocating gfmSide[] in " + << "LocalGFM::buildToDisk() at " << __FILE__ << ":" + << __LINE__ << endl; + throw e; + } + + // Points to the base offset within ebwt for the side currently + // being written + index_t side = 0; + + // Whether we're assembling a forward or a reverse bucket + bool fw = true; + int sideCur = 0; + + // Have we skipped the '$' in the last column yet? + ASSERT_ONLY(bool dollarSkipped = false); + + index_t si = 0; // string offset (chars) + ASSERT_ONLY(uint32_t lastSufInt = 0); + ASSERT_ONLY(bool inSA = true); // true iff saI still points inside suffix + // array (as opposed to the padding at the + // end) + // Iterate over packed bwt bytes + VMSG_NL("Entering LocalGFM loop"); + ASSERT_ONLY(uint32_t beforeGbwtOff = (uint32_t)out5.tellp()); + while(side < gbwtTotSz) { + // Sanity-check our cursor into the side buffer + assert_geq(sideCur, 0); + assert_lt(sideCur, (int)gh._sideGbwtSz); + assert_eq(0, side % sideSz); // 'side' must be on side boundary + gfmSide[sideCur] = 0; // clear + assert_lt(side + sideCur, gbwtTotSz); + // Iterate over bit-pairs in the si'th character of the BWT +#ifdef SIXTY4_FORMAT + for(int bpi = 0; bpi < 32; bpi++, si++) { +#else + for(int bpi = 0; bpi < 4; bpi++, si++) { +#endif + int bwtChar; + bool count = true; + if(si <= len) { + // Still in the SA; extract the bwtChar + index_t saElt = (index_t)sa[si]; + // (that might have triggered sa to calc next suf block) + if(saElt == 0) { + // Don't add the '$' in the last column to the BWT + // transform; we can't encode a $ (only A C T or G) + // and counting it as, say, an A, will mess up the + // LR mapping + bwtChar = 0; count = false; + ASSERT_ONLY(dollarSkipped = true); + zOffs.push_back(si); // remember the SA row that + // corresponds to the 0th suffix + } else { + bwtChar = (int)(s[saElt-1]); + assert_lt(bwtChar, 4); + // Update the fchr + fchr[bwtChar]++; + } + // Update ftab + if((len-saElt) >= (index_t)gh._ftabChars) { + // Turn the first ftabChars characters of the + // suffix into an integer index into ftab. The + // leftmost (lowest index) character of the suffix + // goes in the most significant bit pair if the + // integer. + uint32_t sufInt = 0; + for(int i = 0; i < gh._ftabChars; i++) { + sufInt <<= 2; + assert_lt((index_t)i, len-saElt); + sufInt |= (unsigned char)(s[saElt+i]); + } + // Assert that this prefix-of-suffix is greater + // than or equal to the last one (true b/c the + // suffix array is sorted) +#ifndef NDEBUG + if(lastSufInt > 0) assert_geq(sufInt, lastSufInt); + lastSufInt = sufInt; +#endif + // Update ftab + assert_lt(sufInt+1, ftabLen); + ftab[sufInt+1]++; + if(absorbCnt > 0) { + // Absorb all short suffixes since the last + // transition into this transition + absorbFtab[sufInt] = absorbCnt; + absorbCnt = 0; + } + } else { + // Otherwise if suffix is fewer than ftabChars + // characters long, then add it to the 'absorbCnt'; + // it will be absorbed into the next transition + assert_lt(absorbCnt, 255); + absorbCnt++; + } + // Suffix array offset boundary? - update offset array + if((si & gh._offMask) == si) { + assert_lt((si >> gh._offRate), gh._offsLen); + // Write offsets directly to the secondary output + // stream, thereby avoiding keeping them in memory + writeIndex(out6, saElt, this->toBe()); + } + } else { + // Strayed off the end of the SA, now we're just + // padding out a bucket +#ifndef NDEBUG + if(inSA) { + // Assert that we wrote all the characters in the + // string before now + assert_eq(si, len+1); + inSA = false; + } +#endif + // 'A' used for padding; important that padding be + // counted in the occ[] array + bwtChar = 0; + } + if(count) occ[bwtChar]++; + // Append BWT char to bwt section of current side + if(fw) { + // Forward bucket: fill from least to most +#ifdef SIXTY4_FORMAT + gfmSide[sideCur] |= ((uint64_t)bwtChar << (bpi << 1)); + if(bwtChar > 0) assert_gt(gfmSide[sideCur], 0); +#else + pack_2b_in_8b(bwtChar, gfmSide[sideCur], bpi); + assert_eq((gfmSide[sideCur] >> (bpi*2)) & 3, bwtChar); +#endif + } else { + // Backward bucket: fill from most to least +#ifdef SIXTY4_FORMAT + gfmSide[sideCur] |= ((uint64_t)bwtChar << ((31 - bpi) << 1)); + if(bwtChar > 0) assert_gt(gfmSide[sideCur], 0); +#else + pack_2b_in_8b(bwtChar, gfmSide[sideCur], 3-bpi); + assert_eq((gfmSide[sideCur] >> ((3-bpi)*2)) & 3, bwtChar); +#endif + } + } // end loop over bit-pairs + assert_eq(dollarSkipped ? 3 : 0, (occ[0] + occ[1] + occ[2] + occ[3]) & 3); +#ifdef SIXTY4_FORMAT + assert_eq(0, si & 31); +#else + assert_eq(0, si & 3); +#endif + + sideCur++; + if(sideCur == (int)gh._sideGbwtSz) { + sideCur = 0; + index_t *uside = reinterpret_cast(gfmSide.ptr()); + // Write 'A', 'C', 'G' and 'T' tallies + side += sideSz; + assert_leq(side, gh._gbwtTotSz); + uside[(sideSz / sizeof(index_t))-4] = endianizeIndex(occSave[0], this->toBe()); + uside[(sideSz / sizeof(index_t))-3] = endianizeIndex(occSave[1], this->toBe()); + uside[(sideSz / sizeof(index_t))-2] = endianizeIndex(occSave[2], this->toBe()); + uside[(sideSz / sizeof(index_t))-1] = endianizeIndex(occSave[3], this->toBe()); + occSave[0] = occ[0]; + occSave[1] = occ[1]; + occSave[2] = occ[2]; + occSave[3] = occ[3]; + // Write backward side to primary file + out5.write((const char *)gfmSide.ptr(), sideSz); + } + } + VMSG_NL("Exited LocalGFM loop"); + if(absorbCnt > 0) { + // Absorb any trailing, as-yet-unabsorbed short suffixes into + // the last element of ftab + absorbFtab[ftabLen-1] = absorbCnt; + } + // Assert that our loop counter got incremented right to the end + assert_eq(side, gh._gbwtTotSz); + // Assert that we wrote the expected amount to out5 + assert_eq(((uint32_t)out5.tellp() - beforeGbwtOff), gh._gbwtTotSz); + // assert that the last thing we did was write a forward bucket + + // + // Write zOffs to primary stream + // + assert_eq(zOffs.size(), 1); + writeIndex(out5, zOffs.size(), this->toBe()); + for(size_t i = 0; i < zOffs.size(); i++) { + assert_neq(zOffs[i], (index_t)OFF_MASK); + writeIndex(out5, zOffs[i], this->toBe()); + } + + // + // Finish building fchr + // + // Exclusive prefix sum on fchr + for(int i = 1; i < 4; i++) { + fchr[i] += fchr[i-1]; + } + assert_lt(fchr[3], gbwtLen); + // Shift everybody up by one + for(int i = 4; i >= 1; i--) { + fchr[i] = fchr[i-1]; + } + fchr[0] = 0; + // Write fchr to primary file + for(int i = 0; i < 5; i++) { + writeIndex(out5, fchr[i], this->toBe()); + } + + // + // Finish building ftab and build eftab + // + // Prefix sum on ftable + index_t eftabLen = 0; + assert_eq(0, absorbFtab[0]); + for(index_t i = 1; i < ftabLen; i++) { + if(absorbFtab[i] > 0) eftabLen += 2; + } + assert_leq(eftabLen, (index_t)gh._ftabChars*2); + eftabLen = gh._ftabChars*2; + EList eftab(EBWT_CAT); + try { + eftab.resize(eftabLen); + eftab.fillZero(); + } catch(bad_alloc &e) { + cerr << "Out of memory allocating eftab[] " + << "in LocalGFM::buildToDisk() at " << __FILE__ << ":" + << __LINE__ << endl; + throw e; + } + index_t eftabCur = 0; + for(index_t i = 1; i < ftabLen; i++) { + index_t lo = ftab[i] + GFM::ftabHi(ftab.ptr(), eftab.ptr(), len, ftabLen, eftabLen, i-1); + if(absorbFtab[i] > 0) { + // Skip a number of short pattern indicated by absorbFtab[i] + index_t hi = lo + absorbFtab[i]; + assert_lt(eftabCur*2+1, eftabLen); + eftab[eftabCur*2] = lo; + eftab[eftabCur*2+1] = hi; + ftab[i] = (eftabCur++) ^ (index_t)OFF_MASK; // insert pointer into eftab + assert_eq(lo, GFM::ftabLo(ftab.ptr(), eftab.ptr(), len, ftabLen, eftabLen, i)); + assert_eq(hi, GFM::ftabHi(ftab.ptr(), eftab.ptr(), len, ftabLen, eftabLen, i)); + } else { + ftab[i] = lo; + } + } + assert_eq(GFM::ftabHi(ftab.ptr(), eftab.ptr(), len, ftabLen, eftabLen, ftabLen-1), len+1); + // Write ftab to primary file + for(index_t i = 0; i < ftabLen; i++) { + writeIndex(out5, ftab[i], this->toBe()); + } + // Write eftab to primary file + out5pos = out5.tellp(); + out5.seekp(headerPos); + writeIndex(out5, eftabLen, this->toBe()); + out5.seekp(out5pos); + for(index_t i = 0; i < eftabLen; i++) { + writeIndex(out5, eftab[i], this->toBe()); + } + + // Note: if you'd like to sanity-check the Ebwt, you'll have to + // read it back into memory first! + assert(!this->isInMemory()); + VMSG_NL("Exiting LocalGFM::buildToDisk()"); +} + +/** + * Read an Ebwt from file with given filename. + */ +template +void LocalGFM::readIntoMemory( + FILE *in5, + FILE *in6, + char *mmFile5, + char *mmFile6, + full_index_t& tidx, + full_index_t& localOffset, + full_index_t& joinedOffset, + bool switchEndian, + size_t& bytesRead, + size_t& bytesRead2, + int entireRev, + bool loadSASamp, + bool loadFtab, + bool loadRstarts, + bool justHeader, + int32_t lineRate, + int32_t offRate, + int32_t ftabChars, + bool mmSweep, + bool loadNames, + bool startVerbose) + { +#ifdef BOWTIE_MM + char *mmFile[] = { mmFile5, mmFile6 }; +#endif + + // Reads header entries one by one from primary stream + tidx = readIndex(in5, switchEndian); bytesRead += sizeof(full_index_t); + localOffset = readIndex(in5, switchEndian); bytesRead += sizeof(full_index_t); + joinedOffset = readIndex(in5, switchEndian); bytesRead += sizeof(full_index_t); + index_t len = readIndex(in5, switchEndian); bytesRead += sizeof(index_t); + index_t gbwtLen = readIndex(in5, switchEndian); bytesRead += sizeof(index_t); + index_t numNodes = readIndex(in5, switchEndian); bytesRead += sizeof(index_t); + index_t eftabLen = readIndex(in5, switchEndian); bytesRead += sizeof(index_t); + + // Create a new EbwtParams from the entries read from primary stream + this->_gh.init(len, gbwtLen, numNodes, lineRate, offRate, ftabChars, eftabLen, entireRev); + + if(len <= 0) { + return; + } + + // Set up overridden suffix-array-sample parameters + uint32_t offsLen = this->_gh._offsLen; + uint32_t offRateDiff = 0; + uint32_t offsLenSampled = offsLen; + if(this->_overrideOffRate > offRate) { + offRateDiff = this->_overrideOffRate - offRate; + } + if(offRateDiff > 0) { + offsLenSampled >>= offRateDiff; + if((offsLen & ~((index_t)OFF_MASK << offRateDiff)) != 0) { + offsLenSampled++; + } + } + + // Can't override the offrate or isarate and use memory-mapped + // files; ultimately, all processes need to copy the sparser sample + // into their own memory spaces. + if(this->_useMm && (offRateDiff)) { + cerr << "Error: Can't use memory-mapped files when the offrate is overridden" << endl; + throw 1; + } + + // Read nPat from primary stream + this->_nPat = readIndex(in5, switchEndian); + assert_eq(this->_nPat, 1); + bytesRead += sizeof(index_t); + this->_plen.reset(); + + // Read plen from primary stream + if(this->_useMm) { +#ifdef BOWTIE_MM + this->_plen.init((index_t*)(mmFile[0] + bytesRead), this->_nPat, false); + bytesRead += this->_nPat*sizeof(index_t); + fseek(in5, this->_nPat*sizeof(index_t), SEEK_CUR); +#endif + } else { + try { + if(this->_verbose || startVerbose) { + cerr << "Reading plen (" << this->_nPat << "): "; + logTime(cerr); + } + this->_plen.init(new index_t[this->_nPat], this->_nPat, true); + if(switchEndian) { + for(index_t i = 0; i < this->_nPat; i++) { + this->plen()[i] = readIndex(in5, switchEndian); + } + } else { + size_t r = MM_READ(in5, (void*)(this->plen()), this->_nPat*sizeof(index_t)); + if(r != (size_t)(this->_nPat*sizeof(index_t))) { + cerr << "Error reading _plen[] array: " << r << ", " << this->_nPat*sizeof(index_t) << endl; + throw 1; + } + } + } catch(bad_alloc& e) { + cerr << "Out of memory allocating plen[] in Ebwt::read()" + << " at " << __FILE__ << ":" << __LINE__ << endl; + throw e; + } + } + + bool shmemLeader; + + // TODO: I'm not consistent on what "header" means. Here I'm using + // "header" to mean everything that would exist in memory if we + // started to build the Ebwt but stopped short of the build*() step + // (i.e. everything up to and including join()). + if(justHeader) return; + + this->_nFrag = readIndex(in5, switchEndian); + bytesRead += sizeof(index_t); + if(this->_verbose || startVerbose) { + cerr << "Reading rstarts (" << this->_nFrag*3 << "): "; + logTime(cerr); + } + assert_geq(this->_nFrag, this->_nPat); + this->_rstarts.reset(); + if(loadRstarts) { + if(this->_useMm) { +#ifdef BOWTIE_MM + this->_rstarts.init((index_t*)(mmFile[0] + bytesRead), this->_nFrag*3, false); + bytesRead += this->_nFrag*sizeof(index_t)*3; + fseek(in5, this->_nFrag*sizeof(index_t)*3, SEEK_CUR); +#endif + } else { + this->_rstarts.init(new index_t[this->_nFrag*3], this->_nFrag*3, true); + if(switchEndian) { + for(index_t i = 0; i < this->_nFrag*3; i += 3) { + // fragment starting position in joined reference + // string, text id, and fragment offset within text + this->rstarts()[i] = readIndex(in5, switchEndian); + this->rstarts()[i+1] = readIndex(in5, switchEndian); + this->rstarts()[i+2] = readIndex(in5, switchEndian); + } + } else { + size_t r = MM_READ(in5, (void *)this->rstarts(), this->_nFrag*sizeof(index_t)*3); + if(r != (size_t)(this->_nFrag*sizeof(index_t)*3)) { + cerr << "Error reading _rstarts[] array: " << r << ", " << (this->_nFrag*sizeof(index_t)*3) << endl; + throw 1; + } + } + } + } else { + // Skip em + assert(this->rstarts() == NULL); + bytesRead += this->_nFrag*sizeof(index_t)*3; + fseek(in5, this->_nFrag*sizeof(index_t)*3, SEEK_CUR); + } + + this->_gfm.reset(); + if(this->_useMm) { +#ifdef BOWTIE_MM + this->_gfm.init((uint8_t*)(mmFile[0] + bytesRead), this->_gh._gbwtTotLen, false); + bytesRead += this->_gh._gbwtTotLen; + fseek(in5, this->_gh._gbwtTotLen, SEEK_CUR); +#endif + } else { + // Allocate ebwt (big allocation) + if(this->_verbose || startVerbose) { + cerr << "Reading ebwt (" << this->_gh._gbwtTotLen << "): "; + logTime(cerr); + } + bool shmemLeader = true; + if(this->useShmem_) { + uint8_t *tmp = NULL; + shmemLeader = ALLOC_SHARED_U8( + (this->_in1Str + "[gfm]"), this->_gh._gbwtTotLen, &tmp, + "gfm[]", (this->_verbose || startVerbose)); + assert(tmp != NULL); + this->_gfm.init(tmp, this->_gh._gbwtTotLen, false); + if(this->_verbose || startVerbose) { + cerr << " shared-mem " << (shmemLeader ? "leader" : "follower") << endl; + } + } else { + try { + this->_gfm.init(new uint8_t[this->_gh._gbwtTotLen], this->_gh._gbwtTotLen, true); + } catch(bad_alloc& e) { + cerr << "Out of memory allocating the ebwt[] array for the Bowtie index. Please try" << endl + << "again on a computer with more memory." << endl; + throw 1; + } + } + if(shmemLeader) { + // Read ebwt from primary stream + uint64_t bytesLeft = this->_gh._gbwtTotLen; + char *pgbwt = (char*)this->gfm(); + + while (bytesLeft>0){ + size_t r = MM_READ(in5, (void *)pgbwt, bytesLeft); + if(MM_IS_IO_ERR(in5, r, bytesLeft)) { + cerr << "Error reading _ebwt[] array: " << r << ", " + << bytesLeft << endl; + throw 1; + } + pgbwt += r; + bytesLeft -= r; + } + if(switchEndian) { + uint8_t *side = this->gfm(); + for(size_t i = 0; i < this->_gh._numSides; i++) { + index_t *cums = reinterpret_cast(side + this->_gh._sideSz - sizeof(index_t)*2); + cums[0] = endianSwapIndex(cums[0]); + cums[1] = endianSwapIndex(cums[1]); + side += this->_gh._sideSz; + } + } +#ifdef BOWTIE_SHARED_MEM + if(useShmem_) NOTIFY_SHARED(this->gfm(), this->_gh._gbwtTotLen); +#endif + } else { + // Seek past the data and wait until master is finished + fseek(in5, this->_gh._gbwtTotLen, SEEK_CUR); +#ifdef BOWTIE_SHARED_MEM + if(useShmem_) WAIT_SHARED(this->gfm(), this->_gh._gbwtTotLen); +#endif + } + } + + // Read zOff from primary stream + this->_zOffs.clear(); + index_t num_zOffs = readIndex(in5, switchEndian); + bytesRead += sizeof(index_t); + for(index_t i = 0; i < num_zOffs; i++) { + index_t zOff = readIndex(in5, switchEndian); + bytesRead += sizeof(index_t); + assert_lt(zOff, gbwtLen); + this->_zOffs.push_back(zOff); + } + + try { + // Read fchr from primary stream + if(this->_verbose || startVerbose) cerr << "Reading fchr (5)" << endl; + this->_fchr.reset(); + if(this->_useMm) { +#ifdef BOWTIE_MM + this->_fchr.init((index_t*)(mmFile[0] + bytesRead), 5, false); + bytesRead += 5*sizeof(index_t); + fseek(in5, 5*sizeof(index_t), SEEK_CUR); +#endif + } else { + this->_fchr.init(new index_t[5], 5, true); + for(index_t i = 0; i < 5; i++) { + this->fchr()[i] = readIndex(in5, switchEndian); + assert_leq(this->fchr()[i], gbwtLen); + assert(i <= 0 || this->fchr()[i] >= this->fchr()[i-1]); + } + } + assert_gt(this->fchr()[4], this->fchr()[0]); + // Read ftab from primary stream + if(this->_verbose || startVerbose) { + if(loadFtab) { + cerr << "Reading ftab (" << this->_gh._ftabLen << "): "; + logTime(cerr); + } else { + cerr << "Skipping ftab (" << this->_gh._ftabLen << "): "; + } + } + this->_ftab.reset(); + if(loadFtab) { + if(this->_useMm) { +#ifdef BOWTIE_MM + this->_ftab.init((index_t*)(mmFile[0] + bytesRead), this->_gh._ftabLen, false); + bytesRead += this->_gh._ftabLen*sizeof(index_t); + fseek(in5, this->_gh._ftabLen*sizeof(index_t), SEEK_CUR); +#endif + } else { + this->_ftab.init(new index_t[this->_gh._ftabLen], this->_gh._ftabLen, true); + if(switchEndian) { + for(uint32_t i = 0; i < this->_gh._ftabLen; i++) + this->ftab()[i] = readIndex(in5, switchEndian); + } else { + size_t r = MM_READ(in5, (void *)this->ftab(), this->_gh._ftabLen*sizeof(index_t)); + if(r != (size_t)(this->_gh._ftabLen*sizeof(index_t))) { + cerr << "Error reading _ftab[] array: " << r << ", " << (this->_gh._ftabLen*sizeof(index_t)) << endl; + throw 1; + } + } + } + // Read etab from primary stream + if(this->_verbose || startVerbose) { + if(loadFtab) { + cerr << "Reading eftab (" << this->_gh._eftabLen << "): "; + logTime(cerr); + } else { + cerr << "Skipping eftab (" << this->_gh._eftabLen << "): "; + } + + } + this->_eftab.reset(); + if(this->_useMm) { +#ifdef BOWTIE_MM + this->_eftab.init((index_t*)(mmFile[0] + bytesRead), this->_gh._eftabLen, false); + bytesRead += this->_gh._eftabLen*sizeof(index_t); + fseek(in5, this->_gh._eftabLen*sizeof(index_t), SEEK_CUR); +#endif + } else { + this->_eftab.init(new index_t[this->_gh._eftabLen], this->_gh._eftabLen, true); + if(switchEndian) { + for(uint32_t i = 0; i < this->_gh._eftabLen; i++) + this->eftab()[i] = readIndex(in5, switchEndian); + } else { + size_t r = MM_READ(in5, (void *)this->eftab(), this->_gh._eftabLen*sizeof(index_t)); + if(r != (size_t)(this->_gh._eftabLen*sizeof(index_t))) { + cerr << "Error reading _eftab[] array: " << r << ", " << (this->_gh._eftabLen*sizeof(index_t)) << endl; + throw 1; + } + } + } + for(uint32_t i = 0; i < this->_gh._eftabLen; i++) { + if(i > 0 && this->eftab()[i] > 0) { + assert_geq(this->eftab()[i] + 4, this->eftab()[i-1]); + } else if(i > 0 && this->eftab()[i-1] == 0) { + assert_eq(0, this->eftab()[i]); + } + } + } else { + assert(this->ftab() == NULL); + assert(this->eftab() == NULL); + // Skip ftab + bytesRead += this->_gh._ftabLen*sizeof(index_t); + fseek(in5, this->_gh._ftabLen*sizeof(index_t), SEEK_CUR); + // Skip eftab + bytesRead += this->_gh._eftabLen*sizeof(index_t); + fseek(in5, this->_gh._eftabLen*sizeof(index_t), SEEK_CUR); + } + } catch(bad_alloc& e) { + cerr << "Out of memory allocating fchr[], ftab[] or eftab[] arrays for the Bowtie index." << endl + << "Please try again on a computer with more memory." << endl; + throw 1; + } + + this->_offs.reset(); + if(loadSASamp) { + shmemLeader = true; + if(this->_verbose || startVerbose) { + cerr << "Reading offs (" << offsLenSampled << " " << std::setw(2) << sizeof(index_t)*8 << "-bit words): "; + logTime(cerr); + } + + if(!this->_useMm) { + if(!this->useShmem_) { + // Allocate offs_ + try { + this->_offs.init(new index_t[offsLenSampled], offsLenSampled, true); + } catch(bad_alloc& e) { + cerr << "Out of memory allocating the offs[] array for the Bowtie index." << endl + << "Please try again on a computer with more memory." << endl; + throw 1; + } + } else { + index_t *tmp = NULL; + shmemLeader = ALLOC_SHARED_U32( + (this->_in2Str + "[offs]"), offsLenSampled*2, &tmp, + "offs", (this->_verbose || startVerbose)); + this->_offs.init((index_t*)tmp, offsLenSampled, false); + } + } + + if(this->_overrideOffRate < 32) { + if(shmemLeader) { + // Allocate offs (big allocation) + if(switchEndian || offRateDiff > 0) { + assert(!this->_useMm); + const uint32_t blockMaxSz = (2 * 1024 * 1024); // 2 MB block size + const uint32_t blockMaxSzUIndex = (blockMaxSz / sizeof(index_t)); // # UIndexs per block + char *buf; + try { + buf = new char[blockMaxSz]; + } catch(std::bad_alloc& e) { + cerr << "Error: Out of memory allocating part of _offs array: '" << e.what() << "'" << endl; + throw e; + } + for(index_t i = 0; i < offsLen; i += blockMaxSzUIndex) { + index_t block = min((index_t)blockMaxSzUIndex, (index_t)(offsLen - i)); + size_t r = MM_READ(in6, (void *)buf, block * sizeof(index_t)); + if(r != (size_t)(block * sizeof(index_t))) { + cerr << "Error reading block of _offs[] array: " << r << ", " << (block * sizeof(index_t)) << endl; + throw 1; + } + index_t idx = i >> offRateDiff; + for(index_t j = 0; j < block; j += (1 << offRateDiff)) { + assert_lt(idx, offsLenSampled); + this->offs()[idx] = ((index_t*)buf)[j]; + if(switchEndian) { + this->offs()[idx] = endianSwapIndex(this->offs()[idx]); + } + idx++; + } + } + delete[] buf; + } else { + if(this->_useMm) { +#ifdef BOWTIE_MM + this->_offs.init((index_t*)(mmFile[1] + bytesRead2), offsLen, false); + bytesRead2 += (offsLen * sizeof(index_t)); + fseek(in6, (offsLen * sizeof(index_t)), SEEK_CUR); +#endif + } else { + // If any of the high two bits are set + if((offsLen & 0xc0000000) != 0) { + if(sizeof(char *) <= 4) { + cerr << "Sanity error: sizeof(char *) <= 4 but offsLen is " << hex << offsLen << endl; + throw 1; + } + // offsLen << 2 overflows, so do it in four reads + char *offs = (char *)this->offs(); + for(size_t i = 0; i < sizeof(index_t); i++) { + size_t r = MM_READ(in6, (void*)offs, offsLen); + if(r != (size_t)(offsLen)) { + cerr << "Error reading block of _offs[] array: " << r << ", " << offsLen << endl; + throw 1; + } + offs += offsLen; + } + } else { + // Do it all in one read + size_t r = MM_READ(in6, (void*)this->offs(), offsLen * sizeof(index_t)); + if(r != (size_t)(offsLen * sizeof(index_t))) { + cerr << "Error reading _offs[] array: " << r << ", " << (offsLen * sizeof(index_t)) << endl; + throw 1; + } + } + } + } +#ifdef BOWTIE_SHARED_MEM + if(this->useShmem_) NOTIFY_SHARED(this->offs(), offsLenSampled*sizeof(index_t)); +#endif + } else { + // Not the shmem leader + fseek(in6, offsLenSampled*sizeof(index_t), SEEK_CUR); +#ifdef BOWTIE_SHARED_MEM + if(this->useShmem_) WAIT_SHARED(this->offs(), offsLenSampled*sizeof(index_t)); +#endif + } + } + } + + this->postReadInit(this->_gh); // Initialize fields of Ebwt not read from file + if(this->_verbose || startVerbose) this->print(cerr, this->_gh); +} + +/** + * Extended Burrows-Wheeler transform data. + * HierEbwt is a specialized Ebwt index that represents one global index and a large set of local indexes. + * + */ +template +class HGFM : public GFM { + typedef GFM PARENT_CLASS; +public: + /// Construct a GFM from the given input file + HGFM(const string& in, + ALTDB* altdb, + RepeatDB* repeatdb, + EList* readLens, + int needEntireReverse, + bool fw, + int32_t overrideOffRate, // = -1, + int32_t offRatePlus, // = -1, + bool useMm, // = false, + bool useShmem, // = false, + bool mmSweep, // = false, + bool loadNames, // = false, + bool loadSASamp, // = true, + bool loadFtab, // = true, + bool loadRstarts, // = true, + bool loadSpliceSites, // = true, + bool verbose, // = false, + bool startVerbose, // = false, + bool passMemExc, // = false, + bool sanityCheck, // = false + bool useHaplotype, // = false + bool skipLoading = false) : + GFM(in, + altdb, + repeatdb, + readLens, + needEntireReverse, + fw, + overrideOffRate, + offRatePlus, + useMm, + useShmem, + mmSweep, + loadNames, + loadSASamp, + loadFtab, + loadRstarts, + loadSpliceSites, + verbose, + startVerbose, + passMemExc, + sanityCheck, + useHaplotype, + skipLoading), + _in5(NULL), + _in6(NULL) + { + _in5Str = in + ".5." + gfm_ext; + _in6Str = in + ".6." + gfm_ext; + } + + /// Construct a HGFM from the given header parameters and string + /// vector, optionally using a blockwise suffix sorter with the + /// given 'bmax' and 'dcv' parameters. The string vector is + /// ultimately joined and the joined string is passed to buildToDisk(). + template + HGFM( + TStr& s, + bool packed, + int needEntireReverse, + int32_t lineRate, + int32_t offRate, + int32_t ftabChars, + int32_t localOffRate, + int32_t localFtabChars, + int nthreads, + const string& snpfile, + const string& htfile, + const string& ssfile, + const string& exonfile, + const string& svfile, + const string& repeatfile, + const string& outfile, // base filename for GFM files + bool fw, + bool useBlockwise, + TIndexOffU bmax, + TIndexOffU bmaxSqrtMult, + TIndexOffU bmaxDivN, + int dcv, + EList& is, + EList& szs, + index_t sztot, + const RefReadInParams& refparams, + bool localIndex, // create local indexes? + EList* parent_szs, + EList* parent_refnames, + uint32_t seed, + int32_t overrideOffRate = -1, + bool verbose = false, + bool passMemExc = false, + bool sanityCheck = false); + + HGFM() {} + + ~HGFM() { + clearLocalGFMs(); + } + + /** + * Load this Ebwt into memory by reading it in from the _in1 and + * _in2 streams. + */ + void loadIntoMemory( + int needEntireReverse, + bool loadSASamp, + bool loadFtab, + bool loadRstarts, + bool loadNames, + bool verbose) + { + readIntoMemory( + needEntireReverse, // require reverse index to be concatenated reference reversed + loadSASamp, // load the SA sample portion? + loadFtab, // load the ftab (_ftab[] and _eftab[])? + loadRstarts, // load the r-starts (_rstarts[])? + false, // stop after loading the header portion? + NULL, // params + false, // mmSweep + loadNames, // loadNames + verbose); // startVerbose + } + + // I/O + void readIntoMemory( + int needEntireRev, + bool loadSASamp, + bool loadFtab, + bool loadRstarts, + bool justHeader, + GFMParams *params, + bool mmSweep, + bool loadNames, + bool startVerbose); + + /** + * Frees memory associated with the Ebwt. + */ + void evictFromMemory() { + assert(PARENT_CLASS::isInMemory()); + clearLocalGFMs(); + PARENT_CLASS::evictFromMemory(); + } + + /** + * Sanity-check various pieces of the Ebwt + */ + void sanityCheckAll(int reverse) const { + PARENT_CLASS::sanityCheckAll(reverse); + for(size_t tidx = 0; tidx < _localGFMs.size(); tidx++) { + for(size_t local_idx = 0; local_idx < _localGFMs[tidx].size(); local_idx++) { + assert(_localGFMs[tidx][local_idx] != NULL); + _localGFMs[tidx][local_idx]->sanityCheckAll(reverse); + } + } + } + + const LocalGFM* getLocalGFM(index_t tidx, index_t offset) const { + assert_lt(tidx, _localGFMs.size()); + const EList*>& localGFMs = _localGFMs[tidx]; + index_t offsetidx = offset / local_index_interval; + if(offsetidx >= localGFMs.size()) { + return NULL; + } else { + return localGFMs[offsetidx]; + } + } + + const LocalGFM* prevLocalGFM(const LocalGFM* currLocalGFM) const { + assert(currLocalGFM != NULL); + index_t tidx = currLocalGFM->_tidx; + index_t offset = currLocalGFM->_localOffset; + if(offset < local_index_interval) { + return NULL; + } else { + return getLocalGFM(tidx, offset - local_index_interval); + } + } + + const LocalGFM* nextLocalGFM(const LocalGFM* currLocalGFM) const { + assert(currLocalGFM != NULL); + index_t tidx = currLocalGFM->_tidx; + index_t offset = currLocalGFM->_localOffset; + return getLocalGFM(tidx, offset + local_index_interval); + } + + void clearLocalGFMs() { + for(size_t tidx = 0; tidx < _localGFMs.size(); tidx++) { + for(size_t local_idx = 0; local_idx < _localGFMs[tidx].size(); local_idx++) { + assert(_localGFMs[tidx][local_idx] != NULL); + delete _localGFMs[tidx][local_idx]; + } + + _localGFMs[tidx].clear(); + } + + _localGFMs.clear(); + } + + +public: + index_t _nrefs; /// the number of reference sequences + EList _refLens; /// approx lens of ref seqs (excludes trailing ambig chars) + + EList*> > _localGFMs; + index_t _nlocalGFMs; + //index_t _local_index_interval; + + FILE *_in5; // input fd for primary index file + FILE *_in6; // input fd for secondary index file + string _in5Str; + string _in6Str; + + char *mmFile5_; + char *mmFile6_; + +private: + struct ThreadParam { + // input + SString s; + EList > alts; + EList > haplotypes; + bool bigEndian; + index_t local_offset; + index_t curr_sztot; + EList conv_local_szs; + index_t local_sztot; + index_t index_size; + string file; + EList sa; + index_t dcv; + index_t seed; + + // output + RefGraph* rg; + PathGraph* pg; + + // communication + bool done; + bool last; + bool mainThread; + }; + static void gbwt_worker(void* vp); +}; + + +template +void HGFM::gbwt_worker(void* vp) +{ + ThreadParam& tParam = *(ThreadParam*)vp; + while(!tParam.last) { + if(tParam.mainThread) { + assert(!tParam.done); + if(tParam.s.length() <= 0) { + tParam.done = true; + return; + } + } else { + while(tParam.done) { + if(tParam.last) return; +#if defined(_TTHREAD_WIN32_) + Sleep(1); +#elif defined(_TTHREAD_POSIX_) + const static timespec ts = {0, 1000000}; // 1 millisecond + nanosleep(&ts, NULL); +#endif + } + if(tParam.s.length() <= 0) { + tParam.done = true; + continue; + } + } + while(true) { + if(tParam.alts.empty()) { + KarkkainenBlockwiseSA > bsa( + tParam.s, + (index_t)(tParam.s.length()+1), + 1, + tParam.dcv, + tParam.seed, + false, /* this->_sanity */ + false, /* this->_passMemExc */ + false); /* this->_verbose */ + assert(bsa.suffixItrIsReset()); + assert_eq(bsa.size(), tParam.s.length()+1); + tParam.sa.clear(); + for(index_t i = 0; i < bsa.size(); i++) { + tParam.sa.push_back(bsa.nextSuffix()); + } + } else { + tParam.rg = NULL, tParam.pg = NULL; + bool exploded = false; + try { + tParam.rg = new RefGraph( + tParam.s, + tParam.conv_local_szs, + tParam.alts, + tParam.haplotypes, + tParam.file, + 1, /* num threads */ + false); /* verbose? */ + tParam.pg = new PathGraph( + *tParam.rg, + tParam.file, + local_max_gbwt, + 1, /* num threads */ + false); /* verbose? */ + } catch (const NongraphException& err) { + cerr << "Warning: no variants or splice sites in this graph (" << tParam.curr_sztot << ")" << endl; + delete tParam.rg; + delete tParam.pg; + tParam.alts.clear(); + continue; + } catch (const ExplosionException& err) { + exploded = true; + } + if(!exploded) { + if(!tParam.pg->generateEdges(*tParam.rg)) { + cerr << "An error occurred - generateEdges" << endl; + throw 1; + } + exploded = tParam.pg->getNumEdges() > local_max_gbwt; + } + if(exploded) { + cerr << "Warning: a local graph exploded (offset: " << tParam.curr_sztot << ", length: " << tParam.local_sztot << ")" << endl; + + delete tParam.pg; tParam.pg = NULL; + delete tParam.rg; tParam.rg = NULL; + if(tParam.alts.size() <= 1) { + tParam.alts.clear(); + } else { + for(index_t s = 2; s < tParam.alts.size(); s += 2) { + tParam.alts[s >> 1] = tParam.alts[s]; + } + tParam.alts.resize(tParam.alts.size() >> 1); + tParam.haplotypes.clear(); + for(index_t a = 0; a < tParam.alts.size(); a++) { + const ALT& alt = tParam.alts[a]; + if(!alt.snp()) continue; + tParam.haplotypes.expand(); + tParam.haplotypes.back().left = alt.pos; + if(alt.deletion()) { + tParam.haplotypes.back().right = alt.pos + alt.len - 1; + } else { + tParam.haplotypes.back().right = alt.pos; + } + tParam.haplotypes.back().alts.clear(); + tParam.haplotypes.back().alts.push_back(a); + } + } + continue; + } + } + break; + } + tParam.done = true; + if(tParam.mainThread) break; + } +} + +/// Construct a GFM from the given header parameters and string +/// vector, optionally using a blockwise suffix sorter with the +/// given 'bmax' and 'dcv' parameters. The string vector is +/// ultimately joined and the joined string is passed to buildToDisk(). +template +template +HGFM::HGFM( + TStr& s, + bool packed, + int needEntireReverse, + int32_t lineRate, + int32_t offRate, + int32_t ftabChars, + int32_t localOffRate, + int32_t localFtabChars, + int nthreads, + const string& snpfile, + const string& htfile, + const string& ssfile, + const string& exonfile, + const string& svfile, + const string& repeatfile, + const string& outfile, // base filename for EBWT files + bool fw, + bool useBlockwise, + TIndexOffU bmax, + TIndexOffU bmaxSqrtMult, + TIndexOffU bmaxDivN, + int dcv, + EList& is, + EList& szs, + index_t sztot, + const RefReadInParams& refparams, + bool localIndex, + EList* parent_szs, + EList* parent_refnames, + uint32_t seed, + int32_t overrideOffRate, + bool verbose, + bool passMemExc, + bool sanityCheck) : + GFM(s, + packed, + needEntireReverse, + lineRate, + offRate, + ftabChars, + nthreads, + snpfile, + htfile, + ssfile, + exonfile, + svfile, + repeatfile, + outfile, + fw, + useBlockwise, + bmax, + bmaxSqrtMult, + bmaxDivN, + dcv, + is, + szs, + sztot, + refparams, + parent_szs, + parent_refnames, + seed, + overrideOffRate, + verbose, + passMemExc, + sanityCheck), + _in5(NULL), + _in6(NULL) +{ + _in5Str = outfile + ".5." + gfm_ext; + _in6Str = outfile + ".6." + gfm_ext; + + // const bool repeat_index = (parent_szs != NULL); + + int32_t local_lineRate; + if(snpfile == "" && ssfile == "" && exonfile == "") { + local_lineRate = local_lineRate_fm; + } else { + local_lineRate = local_lineRate_gfm; + } + + // Open output files + ofstream fout5(_in5Str.c_str(), ios::binary); + if(!fout5.good()) { + cerr << "Could not open index file for writing: \"" << _in5Str.c_str() << "\"" << endl + << "Please make sure the directory exists and that permissions allow writing by" << endl + << "HISAT2." << endl; + throw 1; + } + ofstream fout6(_in6Str.c_str(), ios::binary); + if(!fout6.good()) { + cerr << "Could not open index file for writing: \"" << _in6Str.c_str() << "\"" << endl + << "Please make sure the directory exists and that permissions allow writing by" << endl + << "HISAT2." << endl; + throw 1; + } + + // Split the whole genome into a set of local indexes + _nrefs = 0; + _nlocalGFMs = 0; + + index_t cumlen = 0; + typedef EList EList_RefRecord; + ELList all_local_recs; + + if(localIndex) { + // For each unambiguous stretch... + for(index_t i = 0; i < szs.size(); i++) { + const RefRecord& rec = szs[i]; + if(rec.first) { + if(_nrefs > 0) { + // refLens_ links each reference sequence with the total number + // of ambiguous and unambiguous characters in it. + _refLens.push_back(cumlen); + } + cumlen = 0; + _nrefs++; + all_local_recs.expand(); + assert_eq(_nrefs, all_local_recs.size()); + } else if(i == 0) { + cerr << "First record in reference index file was not marked as " + << "'first'" << endl; + throw 1; + } + + assert_gt(_nrefs, 0); + assert_eq(_nrefs, all_local_recs.size()); + EList& ref_local_recs = all_local_recs[_nrefs-1]; + index_t next_cumlen = cumlen + rec.off + rec.len; + index_t local_off = (cumlen / local_index_interval) * local_index_interval; + if(local_off >= local_index_interval) { + local_off -= local_index_interval; + } + for(;local_off < next_cumlen; local_off += local_index_interval) { + if(local_off + local_index_size <= cumlen) { + continue; + } + index_t local_idx = local_off / local_index_interval; + + if(local_idx >= ref_local_recs.size()) { + assert_eq(local_idx, ref_local_recs.size()); + ref_local_recs.expand(); + _nlocalGFMs++; + } + assert_lt(local_idx, ref_local_recs.size()); + EList_RefRecord& local_recs = ref_local_recs[local_idx]; + assert_gt(local_off + local_index_size, cumlen); + local_recs.expand(); + if(local_off + local_index_size < cumlen + rec.off) { + local_recs.back().off = local_off + local_index_size - std::max(local_off, cumlen); + local_recs.back().len = 0; + } else { + if(local_off < cumlen + rec.off) { + local_recs.back().off = rec.off - (local_off > cumlen ? local_off - cumlen : 0); + } else { + local_recs.back().off = 0; + } + local_recs.back().len = std::min(next_cumlen, local_off + local_index_size) - std::max(local_off, cumlen + rec.off); + } + local_recs.back().first = (local_recs.size() == 1); + } + cumlen = next_cumlen; + } + + // Store a cap entry for the end of the last reference seq + _refLens.push_back(cumlen); + +#ifndef NDEBUG + EList temp_szs; + index_t temp_sztot = 0; + index_t temp_nlocalGFMs = 0; + for(size_t tidx = 0; tidx < all_local_recs.size(); tidx++) { + assert_lt(tidx, _refLens.size()); + EList& ref_local_recs = all_local_recs[tidx]; + assert_eq((_refLens[tidx] + local_index_interval - 1) / local_index_interval, ref_local_recs.size()); + temp_szs.expand(); + temp_szs.back().off = 0; + temp_szs.back().len = 0; + temp_szs.back().first = true; + index_t temp_ref_len = 0; + index_t temp_ref_sztot = 0; + temp_nlocalGFMs += ref_local_recs.size(); + for(size_t i = 0; i < ref_local_recs.size(); i++) { + EList_RefRecord& local_recs = ref_local_recs[i]; + index_t local_len = 0; + for(size_t j = 0; j < local_recs.size(); j++) { + assert(local_recs[j].off != 0 || local_recs[j].len != 0); + assert(j != 0 || local_recs[j].first); + RefRecord local_rec = local_recs[j]; + if(local_len < local_index_interval && local_recs[j].off > 0){ + if(local_len + local_recs[j].off > local_index_interval) { + temp_ref_len += (local_index_interval - local_len); + local_rec.off = local_index_interval - local_len; + } else { + temp_ref_len += local_recs[j].off; + } + } else { + local_rec.off = 0; + } + local_len += local_recs[j].off; + if(local_len < local_index_interval && local_recs[j].len > 0) { + if(local_len + local_recs[j].len > local_index_interval) { + temp_ref_len += (local_index_interval - local_len); + temp_ref_sztot += (local_index_interval - local_len); + local_rec.len = local_index_interval - local_len; + } else { + temp_ref_len += local_recs[j].len; + temp_ref_sztot += local_recs[j].len; + } + } else { + local_rec.len = 0; + } + local_len += local_recs[j].len; + if(local_rec.off > 0) { + if(temp_szs.back().len > 0) { + temp_szs.expand(); + temp_szs.back().off = local_rec.off; + temp_szs.back().len = local_rec.len; + temp_szs.back().first = false; + } else { + temp_szs.back().off += local_rec.off; + temp_szs.back().len = local_rec.len; + } + } else if(local_rec.len > 0) { + temp_szs.back().len += local_rec.len; + } + } + if(i + 2 < ref_local_recs.size()) { + assert_eq(local_len, local_index_size); + assert_eq(temp_ref_len % local_index_interval, 0); + } else if (i + 1 < ref_local_recs.size()) { + assert_leq(local_len, local_index_size); + assert_geq(local_len, local_index_interval); + } else { + assert_eq(local_len % local_index_interval, _refLens[tidx] % local_index_interval); + } + } + assert_eq(temp_ref_len, _refLens[tidx]); + temp_sztot += temp_ref_sztot; + } + assert_eq(temp_sztot, sztot); + for(size_t i = 0; i < temp_szs.size(); i++) { + assert_lt(i, szs.size()); + assert_eq(temp_szs[i].off, szs[i].off); + assert_eq(temp_szs[i].len, szs[i].len); + assert_eq(temp_szs[i].first, szs[i].first); + } + assert_eq(temp_szs.size(), szs.size()); + assert_eq(_nlocalGFMs, temp_nlocalGFMs); +#endif + } + + uint32_t be = this->toBe(); + assert(fout5.good()); + assert(fout6.good()); + + // const local_index_t new_localFtabChars = (repeat_index ? 4 : localFtabChars); + const local_index_t new_localFtabChars = localFtabChars; + + // When building an Ebwt, these header parameters are known + // "up-front", i.e., they can be written to disk immediately, + // before we join() or buildToDisk() + writeI32(fout5, 1, be); // endian hint for priamry stream + writeI32(fout6, 1, be); // endian hint for secondary stream + writeIndex(fout5, _nlocalGFMs, be); // number of local Ebwts + writeI32(fout5, local_lineRate, be); // 2^lineRate = size in bytes of 1 line + writeI32(fout5, 2, be); // not used + writeI32(fout5, (int32_t)localOffRate, be); // every 2^offRate chars is "marked" + writeI32(fout5, (int32_t)new_localFtabChars, be); // number of 2-bit chars used to address ftab + int32_t flags = 1; + if(this->_gh._entireReverse) flags |= GFM_ENTIRE_REV; + writeI32(fout5, -flags, be); // BTL: chunkRate is now deprecated + + if(localIndex) { + assert_gt(this->_nthreads, 0); + AutoArray threads(this->_nthreads - 1); + EList tParams; tParams.reserveExact((size_t)this->_nthreads); + for(index_t t = 0; t < (index_t)this->_nthreads; t++) { + tParams.expand(); + tParams.back().s.clear(); + tParams.back().rg = NULL; + tParams.back().pg = NULL; + tParams.back().file = outfile; + tParams.back().done = true; + tParams.back().last = false; + tParams.back().dcv = 1024; + tParams.back().seed = seed; + if(t + 1 < (index_t)this->_nthreads) { + tParams.back().mainThread = false; + threads[t] = new tthread::thread(gbwt_worker, (void*)&tParams.back()); + } else { + tParams.back().mainThread = true; + } + } + + // build local FM indexes + index_t curr_sztot = 0; + EList > alts; + for(size_t tidx = 0; tidx < _refLens.size(); tidx++) { + index_t refLen = _refLens[tidx]; + index_t local_offset = 0; + _localGFMs.expand(); + assert_lt(tidx, _localGFMs.size()); + while(local_offset < refLen) { + index_t t = 0; + while(local_offset < refLen && t < (index_t)this->_nthreads) { + assert_lt(t, tParams.size()); + ThreadParam& tParam = tParams[t]; + + tParam.index_size = std::min(refLen - local_offset, local_index_size); + assert_lt(tidx, all_local_recs.size()); + assert_lt(local_offset / local_index_interval, all_local_recs[tidx].size()); + EList_RefRecord& local_szs = all_local_recs[tidx][local_offset / local_index_interval]; + + tParam.conv_local_szs.clear(); + index_t local_len = 0, local_sztot = 0, local_sztot_interval = 0; + for(size_t i = 0; i < local_szs.size(); i++) { + assert(local_szs[i].off != 0 || local_szs[i].len != 0); + assert(i != 0 || local_szs[i].first); + tParam.conv_local_szs.push_back(local_szs[i]); + local_len += local_szs[i].off; + if(local_len < local_index_interval && local_szs[i].len > 0) { + if(local_len + local_szs[i].len > local_index_interval) { + local_sztot_interval += (local_index_interval - local_len); + } else { + local_sztot_interval += local_szs[i].len; + } + } + local_sztot += local_szs[i].len; + local_len += local_szs[i].len; + } + + // Extract sequence corresponding to this local index + tParam.s.resize(local_sztot); + if(refparams.reverse == REF_READ_REVERSE) { + tParam.s.install(s.buf() + s.length() - curr_sztot - local_sztot, local_sztot); + } else { + tParam.s.install(s.buf() + curr_sztot, local_sztot); + } + + // Extract ALTs corresponding to this local index + map alt_map; + tParam.alts.clear(); + ALT alt; + alt.pos = curr_sztot; + index_t alt_i = (index_t)this->_alts.bsearchLoBound(alt); + for(; alt_i < this->_alts.size(); alt_i++) { + const ALT& alt = this->_alts[alt_i]; + if(alt.snp()) { + if(alt.mismatch()) { + if(curr_sztot + local_sztot <= alt.pos) break; + } else if(alt.insertion()) { + if(curr_sztot + local_sztot < alt.pos) break; + } else { + assert(alt.deletion()); + if(curr_sztot + local_sztot < alt.pos + alt.len) break; + } + if(curr_sztot <= alt.pos) { + alt_map[alt_i] = (index_t)tParam.alts.size(); + tParam.alts.push_back(alt); + tParam.alts.back().pos -= curr_sztot; + } + } else if(alt.splicesite()) { + if(alt.excluded) continue; + if(curr_sztot + local_sztot <= alt.right + 1) continue; + if(curr_sztot <= alt.left) { + tParam.alts.push_back(alt); + tParam.alts.back().left -= curr_sztot; + tParam.alts.back().right -= curr_sztot; + } + } else { + assert(alt.exon()); + } + } + + // Extract haplotypes + tParam.haplotypes.clear(); + Haplotype haplotype; + haplotype.left = curr_sztot; + index_t haplotpye_i = (index_t)this->_haplotypes.bsearchLoBound(haplotype); + for(; haplotpye_i < this->_haplotypes.size(); haplotpye_i++) { + const Haplotype& haplotype = this->_haplotypes[haplotpye_i]; + if(curr_sztot + local_sztot <= haplotype.right) continue; + if(curr_sztot <= haplotype.left) { + tParam.haplotypes.push_back(haplotype); + tParam.haplotypes.back().left -= curr_sztot; + tParam.haplotypes.back().right -= curr_sztot; + for(index_t a = 0; a < tParam.haplotypes.back().alts.size(); a++) { + index_t alt_i = tParam.haplotypes.back().alts[a]; + if(alt_map.find(alt_i) == alt_map.end()) { + assert(false); + tParam.haplotypes.pop_back(); + break; + } + tParam.haplotypes.back().alts[a] = alt_map[alt_i]; + } + } + } + + tParam.local_offset = local_offset; + tParam.curr_sztot = curr_sztot; + tParam.local_sztot = local_sztot; + + assert(tParam.rg == NULL); + assert(tParam.pg == NULL); + tParam.done = false; + curr_sztot += local_sztot_interval; + local_offset += local_index_interval; + + t++; + } + + if(!tParams.back().done) { + gbwt_worker((void*)&tParams.back()); + } + + for(index_t t2 = 0; t2 < t; t2++) { + ThreadParam& tParam = tParams[t2]; + while(!tParam.done) { +#if defined(_TTHREAD_WIN32_) + Sleep(1); +#elif defined(_TTHREAD_POSIX_) + const static timespec ts = {0, 1000000}; // 1 millisecond + nanosleep(&ts, NULL); +#endif + } + + LocalGFM( + tParam.s, + tParam.sa, + tParam.pg, + (index_t)tidx, + tParam.local_offset, + tParam.curr_sztot, + tParam.alts, + tParam.index_size, + packed, + needEntireReverse, + local_lineRate, + localOffRate, // suffix-array sampling rate + new_localFtabChars, // number of chars in initial arrow-pair calc + outfile, // basename for .?.ebwt files + fw, // fw + dcv, // difference-cover period + tParam.conv_local_szs, // list of reference sizes + tParam.local_sztot, // total size of all unambiguous ref chars + refparams, // reference read-in parameters + seed, // pseudo-random number generator seed + fout5, + fout6, + -1, // override offRate + false, // be silent + passMemExc, // pass exceptions up to the toplevel so that we can adjust memory settings automatically + sanityCheck); // verify results and internal consistency + tParam.s.clear(); + if(tParam.rg != NULL) { + assert(tParam.pg != NULL); + delete tParam.rg; tParam.rg = NULL; + delete tParam.pg; tParam.pg = NULL; + } + } + } + } + assert_eq(curr_sztot, sztot); + if(this->_nthreads > 1) { + for(index_t i = 0; i + 1 < (index_t)this->_nthreads; i++) { + tParams[i].last = true; + threads[i]->join(); + } + } + } + + fout5 << '\0'; + fout5.flush(); fout6.flush(); + if(fout5.fail() || fout6.fail()) { + cerr << "An error occurred writing the index to disk. Please check if the disk is full." << endl; + throw 1; + } + VMSG_NL("Returning from initFromVector"); + + // Close output files + fout5.flush(); + int64_t tellpSz5 = (int64_t)fout5.tellp(); + VMSG_NL("Wrote " << fout5.tellp() << " bytes to primary GFM file: " << _in5Str.c_str()); + fout5.close(); + bool err = false; + if(tellpSz5 > fileSize(_in5Str.c_str())) { + err = true; + cerr << "Index is corrupt: File size for " << _in5Str.c_str() << " should have been " << tellpSz5 + << " but is actually " << fileSize(_in5Str.c_str()) << "." << endl; + } + fout6.flush(); + int64_t tellpSz6 = (int64_t)fout6.tellp(); + VMSG_NL("Wrote " << fout6.tellp() << " bytes to secondary GFM file: " << _in6Str.c_str()); + fout6.close(); + if(tellpSz6 > fileSize(_in6Str.c_str())) { + err = true; + cerr << "Index is corrupt: File size for " << _in6Str.c_str() << " should have been " << tellpSz6 + << " but is actually " << fileSize(_in6Str.c_str()) << "." << endl; + } + if(err) { + cerr << "Please check if there is a problem with the disk or if disk is full." << endl; + throw 1; + } + // Reopen as input streams + VMSG_NL("Re-opening _in5 and _in5 as input streams"); + if(this->_sanity) { + VMSG_NL("Sanity-checking ht2"); + assert(!this->isInMemory()); + readIntoMemory( + fw ? -1 : needEntireReverse, // 1 -> need the reverse to be reverse-of-concat + true, // load SA sample (_offs[])? + true, // load ftab (_ftab[] & _eftab[])? + true, // load r-starts (_rstarts[])? + false, // just load header? + NULL, // Params object to fill + false, // mm sweep? + true, // load names? + false); // verbose startup? + sanityCheckAll(refparams.reverse); + evictFromMemory(); + assert(!this->isInMemory()); + } + VMSG_NL("Returning from HGFM constructor"); +} + + +/** + * Read an Ebwt from file with given filename. + */ +template +void HGFM::readIntoMemory( + int needEntireRev, + bool loadSASamp, + bool loadFtab, + bool loadRstarts, + bool justHeader, + GFMParams *params, + bool mmSweep, + bool loadNames, + bool startVerbose) +{ + PARENT_CLASS::readIntoMemory(needEntireRev, + loadSASamp, + loadFtab, + loadRstarts, + justHeader || needEntireRev == 1, + params, + mmSweep, + loadNames, + startVerbose); + + bool switchEndian; // dummy; caller doesn't care +#ifdef BOWTIE_MM + char *mmFile[] = { NULL, NULL }; +#endif + if(_in5Str.length() > 0) { + if(this->_verbose || startVerbose) { + cerr << " About to open input files: "; + logTime(cerr); + } + // Initialize our primary and secondary input-stream fields + if(_in5 != NULL) fclose(_in5); + if(this->_verbose || startVerbose) cerr << "Opening \"" << _in5Str.c_str() << "\"" << endl; + if((_in5 = fopen(_in5Str.c_str(), "rb")) == NULL) { + cerr << "Could not open index file " << _in5Str.c_str() << endl; + } + if(loadSASamp) { + if(_in6 != NULL) fclose(_in6); + if(this->_verbose || startVerbose) cerr << "Opening \"" << _in6Str.c_str() << "\"" << endl; + if((_in6 = fopen(_in6Str.c_str(), "rb")) == NULL) { + cerr << "Could not open index file " << _in6Str.c_str() << endl; + } + } + if(this->_verbose || startVerbose) { + cerr << " Finished opening input files: "; + logTime(cerr); + } + +#ifdef BOWTIE_MM + if(this->_useMm /*&& !justHeader*/) { + const char *names[] = {_in5Str.c_str(), _in6Str.c_str()}; + int fds[] = { fileno(_in5), fileno(_in6) }; + for(int i = 0; i < (loadSASamp ? 2 : 1); i++) { + if(this->_verbose || startVerbose) { + cerr << " ¯ " << (i+1) << ": "; + logTime(cerr); + } + struct stat sbuf; + if (stat(names[i], &sbuf) == -1) { + perror("stat"); + cerr << "Error: Could not stat index file " << names[i] << " prior to memory-mapping" << endl; + throw 1; + } + mmFile[i] = (char*)mmap((void *)0, (size_t)sbuf.st_size, + PROT_READ, MAP_SHARED, fds[(size_t)i], 0); + if(mmFile[i] == (void *)(-1)) { + perror("mmap"); + cerr << "Error: Could not memory-map the index file " << names[i] << endl; + throw 1; + } + if(mmSweep) { + int sum = 0; + for(off_t j = 0; j < sbuf.st_size; j += 1024) { + sum += (int) mmFile[i][j]; + } + if(startVerbose) { + cerr << " Swept the memory-mapped ebwt index file 1; checksum: " << sum << ": "; + logTime(cerr); + } + } + } + mmFile5_ = mmFile[0]; + mmFile6_ = loadSASamp ? mmFile[1] : NULL; + } +#endif + } +#ifdef BOWTIE_MM + else if(this->_useMm && !justHeader) { + mmFile[0] = mmFile5_; + mmFile[1] = mmFile6_; + } + if(this->_useMm && !justHeader) { + assert(mmFile[0] == mmFile5_); + assert(mmFile[1] == mmFile6_); + } +#endif + + if(this->_verbose || startVerbose) { + cerr << " Reading header: "; + logTime(cerr); + } + + // Read endianness hints from both streams + size_t bytesRead = 0, bytesRead2 = 4; + switchEndian = false; + uint32_t one = readU32(_in5, switchEndian); // 1st word of primary stream + bytesRead += 4; + if(loadSASamp) { +#ifndef NDEBUG + assert_eq(one, readU32(_in6, switchEndian)); // should match! +#else + readU32(_in6, switchEndian); +#endif + } + if(one != 1) { + assert_eq((1u<<24), one); + assert_eq(1, endianSwapU32(one)); + switchEndian = true; + } + + // Can't switch endianness and use memory-mapped files; in order to + // support this, someone has to modify the file to switch + // endiannesses appropriately, and we can't do this inside Bowtie + // or we might be setting up a race condition with other processes. + if(switchEndian && this->_useMm) { + cerr << "Error: Can't use memory-mapped files when the index is the opposite endianness" << endl; + throw 1; + } + + _nlocalGFMs = readIndex(_in5, switchEndian); bytesRead += sizeof(index_t); + int32_t lineRate = readI32(_in5, switchEndian); bytesRead += 4; + readI32(_in5, switchEndian); bytesRead += 4; + int32_t offRate = readI32(_in5, switchEndian); bytesRead += 4; + // TODO: add isaRate to the actual file format (right now, the + // user has to tell us whether there's an ISA sample and what the + // sampling rate is. + int32_t ftabChars = readI32(_in5, switchEndian); bytesRead += 4; + /*int32_t flag =*/ readI32(_in5, switchEndian); bytesRead += 4; + + if(this->_verbose || startVerbose) { + cerr << " number of local indexes: " << _nlocalGFMs << endl + << " local offRate: " << offRate << endl + << " local ftabLen: " << (1 << (2 * ftabChars)) << endl + << " local ftabSz: " << (2 << (2 * ftabChars)) << endl + ; + } + + clearLocalGFMs(); + + index_t tidx = 0, localOffset = 0, joinedOffset = 0; + string base = ""; + for(size_t i = 0; i < _nlocalGFMs; i++) { + LocalGFM *localGFM = new LocalGFM(base, + NULL, + _in5, + _in6, + mmFile5_, + mmFile6_, + tidx, + localOffset, + joinedOffset, + switchEndian, + bytesRead, + bytesRead2, + needEntireRev, + this->fw_, + -1, // overrideOffRate + -1, // offRatePlus + (uint32_t)lineRate, + (uint32_t)offRate, + (uint32_t)ftabChars, + this->_useMm, + this->useShmem_, + mmSweep, + loadNames, + loadSASamp, + loadFtab, + loadRstarts, + false, // _verbose + false, + this->_passMemExc, + this->_sanity, + false); // use haplotypes? + + if(tidx >= _localGFMs.size()) { + assert_eq(tidx, _localGFMs.size()); + _localGFMs.expand(); + } + assert_eq(tidx + 1, _localGFMs.size()); + _localGFMs.back().push_back(localGFM); + } + +#ifdef BOWTIE_MM + fseek(_in5, 0, SEEK_SET); + fseek(_in6, 0, SEEK_SET); +#else + rewind(_in5); rewind(_in6); +#endif +} + +#endif /*HGFM_H_*/ diff --git a/hi_aligner.h b/hi_aligner.h new file mode 100644 index 0000000..ef2c735 --- /dev/null +++ b/hi_aligner.h @@ -0,0 +1,7006 @@ +/* + * Copyright 2015, Daehwan Kim + * + * This file is part of HISAT 2. + * This file is edited by Yun (Leo) Zhang for HISAT-3N. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#ifndef HI_ALIGNER_H_ +#define HI_ALIGNER_H_ + +#include +#include +#include +#include "qual.h" +#include "ds.h" +#include "sstring.h" +#include "alphabet.h" +#include "edit.h" +#include "read.h" +// Threading is necessary to synchronize the classes that dump +// intermediate alignment results to files. Otherwise, all data herein +// is constant and shared, or per-thread. +#include "threading.h" +#include "aligner_result.h" +#include "aligner_cache.h" +#include "scoring.h" +#include "mem_ids.h" +#include "simple_func.h" +#include "aligner_driver.h" +#include "aligner_sw_driver.h" +#include "group_walk.h" +#include "tp.h" +#include "gp.h" + +// Allow longer introns for long anchored reads involving canonical splice sites +inline uint32_t MaxIntronLen(uint32_t anchor, uint32_t minAnchorLen) { + uint32_t intronLen = 0; + if(anchor >= minAnchorLen) { + if(anchor < 2) anchor = 2; + uint32_t shift = (anchor << 1) - 4; + shift = min(max(shift, 13), 30); + intronLen = 1 << shift; + } + return intronLen; +} + +inline float intronLen_prob(uint32_t anchor, uint32_t intronLen, uint32_t maxIntronLen) { + uint32_t expected_intron_len = maxIntronLen; + if(anchor < 14) expected_intron_len = 1 << ((anchor << 1) + 4); + if(expected_intron_len > maxIntronLen) expected_intron_len = maxIntronLen; + assert_gt(expected_intron_len, 0); + float result = ((float)intronLen) / ((float)expected_intron_len); + if(result > 1.0f) result = 1.0f; + return result; +} + +// Allow longer introns for long anchored reads involving non-canonical splice sites +inline uint32_t MaxIntronLen_noncan(uint32_t anchor, uint32_t minAnchorLen_noncan) { + uint32_t intronLen = 0; + if(anchor >= minAnchorLen_noncan) { + if(anchor < 5) anchor = 5; + uint32_t shift = (anchor << 1) - 10; + shift = min(shift, 30); + intronLen = 1 << shift; + } + return intronLen; +} + +inline float intronLen_prob_noncan(uint32_t anchor, uint32_t intronLen, uint32_t maxIntronLen) { + uint32_t expected_intron_len = maxIntronLen; + if(anchor < 16) expected_intron_len = 1 << (anchor << 1); + if(expected_intron_len > maxIntronLen) expected_intron_len = maxIntronLen; + assert_gt(expected_intron_len, 0); + float result = ((float)intronLen) / ((float)expected_intron_len); + if(result > 1.0f) result = 1.0f; + return result; +} + +/** + * Hit types for BWTHit class below + * Three hit types to anchor a read on the genome + * + */ +enum { + CANDIDATE_HIT = 1, + PSEUDOGENE_HIT, + ANCHOR_HIT, +}; + +/** + * Simple struct for holding a partial alignment for the read + * The alignment locations are represented by FM offsets [top, bot), + * and later genomic offsets are calculated when necessary + */ +template +struct BWTHit { + + BWTHit() { reset(); } + + void reset() { + _top = _bot = 0; + _node_top = _node_bot = 0; + _node_iedge_count.clear(); + _fw = true; + _bwoff = (index_t)INDEX_MAX; + _len = 0; + _coords.clear(); + _anchor_examined = false; + _hit_type = CANDIDATE_HIT; + } + + void init( + index_t top, + index_t bot, + index_t node_top, + index_t node_bot, + const EList >& node_iedge_count, + bool fw, + uint32_t bwoff, + uint32_t len, + index_t hit_type = CANDIDATE_HIT) + { + assert_leq(node_bot - node_top, bot - top); +#ifndef NDEBUG + if(node_bot - node_top < bot - top) { + assert_gt(node_iedge_count.size(), 0); + } +#endif + _top = top; + _bot = bot; + _node_top = node_top; + _node_bot = node_bot; + _node_iedge_count = node_iedge_count; + _fw = fw; + _bwoff = bwoff; + _len = len; + _coords.clear(); + _anchor_examined = false; + _hit_type = hit_type; + } + + bool hasGenomeCoords() const { return !_coords.empty(); } + + /** + * Return true iff there is no hit. + */ + bool empty() const { + return _bot <= _top; + } + + /** + * Higher score = higher priority. + */ + bool operator<(const BWTHit& o) const { + return _len > o._len; + } + + /** + * Return the size of the alignments SA ranges. + */ + index_t size() const { + assert_leq(_top, _bot); + return _bot - _top; + } + + index_t len() const { + assert_gt(_len, 0); + return _len; + } + +#ifndef NDEBUG + /** + * Check that hit is sane w/r/t read. + */ + bool repOk(const Read& rd) const { + assert_gt(_bot, _top); + assert_neq(_bwoff, (index_t)INDEX_MAX); + assert_gt(_len, 0); + return true; + } +#endif + + index_t _top; // start of the range in the FM index + index_t _bot; // end of the range in the FM index + index_t _node_top; + index_t _node_bot; + EList > _node_iedge_count; + bool _fw; // whether read is forward or reverse complemented + index_t _bwoff; // current base of a read to search from the right end + index_t _len; // read length + + EList _coords; // genomic offsets corresponding to [_top, _bot) + + bool _anchor_examined; // whether or not this hit is examined + index_t _hit_type; // hit type (anchor hit, pseudogene hit, or candidate hit) +}; + + +/** + * Simple struct for holding alignments for the read + * The alignments are represented by chains of BWTHits + */ +template +struct ReadBWTHit { + + ReadBWTHit() { reset(); } + + void reset() { + _fw = true; + _len = 0; + _cur = 0; + _done = false; + _numPartialSearch = 0; + _numUniqueSearch = 0; + _repeat = false; + _partialHits.clear(); + } + + void init( + bool fw, + index_t len) + { + _fw = fw; + assert_gt(len, 0); + _len = len; + _cur = 0; + _done = false; + _numPartialSearch = 0; + _numUniqueSearch = 0; + _repeat = false; + _partialHits.clear(); + } + + bool done() { +#ifndef NDEBUG + assert_gt(_len, 0); + if(_cur >= _len) { + assert(_done); + } +#endif + return _done; + } + + void done(bool done) { + assert(!_done); + assert(done); + _done = done; + } + + index_t len() const { return _len; } + index_t cur() const { return _cur; } + bool repeat() const { return _repeat; } + + index_t offsetSize() { return (index_t)_partialHits.size(); } + size_t numPartialSearch() { return _numPartialSearch; } + index_t numActualPartialSearch() + { + assert_leq(_numUniqueSearch, _numPartialSearch); + return (index_t)(_numPartialSearch - _numUniqueSearch); + } + + bool width(index_t offset_) { + assert_lt(offset_, _partialHits.size()); + return _partialHits[offset_].size(); + } + + bool hasGenomeCoords(index_t offset_) { + assert_lt(offset_, _partialHits.size()); + index_t width_ = width(offset_); + if(width_ == 0) { + return true; + } else { + return _partialHits[offset_].hasGenomeCoords(); + } + } + + bool hasAllGenomeCoords() { + if(_cur < _len) return false; + if(_partialHits.size() <= 0) return false; + for(size_t oi = 0; oi < _partialHits.size(); oi++) { + if(!_partialHits[oi].hasGenomeCoords()) + return false; + } + return true; + } + + /** + * + */ + index_t minWidth(index_t& offset) const { + index_t minWidth_ = (index_t)INDEX_MAX; + index_t minWidthLen_ = 0; + for(size_t oi = 0; oi < _partialHits.size(); oi++) { + const BWTHit& hit = _partialHits[oi]; + if(hit.empty()) continue; + // if(!hit.hasGenomeCoords()) continue; + assert_gt(hit.size(), 0); + if((minWidth_ > hit.size()) || + (minWidth_ == hit.size() && minWidthLen_ < hit.len())) { + minWidth_ = hit.size(); + minWidthLen_ = hit.len(); + offset = (index_t)oi; + } + } + return minWidth_; + } + + // add policy for calculating a search score + int64_t searchScore(index_t minK) { + int64_t score = 0; + const int64_t penaltyPerOffset = minK * minK; + for(size_t i = 0; i < _partialHits.size(); i++) { + index_t len = _partialHits[i]._len; + score += (len * len); + } + + assert_geq(_numPartialSearch, _partialHits.size()); + index_t actualPartialSearch = numActualPartialSearch(); + score -= (actualPartialSearch * penaltyPerOffset); + score -= (1 << (actualPartialSearch << 1)); + return score; + } + + BWTHit& getPartialHit(index_t offset_) { + assert_lt(offset_, _partialHits.size()); + return _partialHits[offset_]; + } + + bool adjustOffset(index_t minK) { + assert_gt(_partialHits.size(), 0); + const BWTHit& hit = _partialHits.back(); + if(hit.len() >= minK + 3) { + return false; + } + assert_geq(_cur, hit.len()); + index_t origCur = _cur - hit.len(); + _cur = origCur + max(hit.len(), minK + 1) - minK; + _partialHits.pop_back(); + return true; + } + + void setOffset(index_t offset) { + assert_lt(offset, _len); + _cur = offset; + } + +#ifndef NDEBUG + /** + */ + bool repOk() const { + for(size_t i = 0; i < _partialHits.size(); i++) { + if(i == 0) { + assert_geq(_partialHits[i]._bwoff, 0); + } + + if(i + 1 < _partialHits.size()) { + assert_leq(_partialHits[i]._bwoff + _partialHits[i]._len, _partialHits[i+1]._bwoff); + } else { + assert_eq(i+1, _partialHits.size()); + assert_eq(_partialHits[i]._bwoff + _partialHits[i]._len, _cur); + } + } + return true; + } +#endif + + bool _fw; + index_t _len; + index_t _cur; + bool _done; + index_t _numPartialSearch; + index_t _numUniqueSearch; + index_t _cur_local; + bool _repeat; + + EList > _partialHits; +}; + + +/** + * this is per-thread data, which are shared by GenomeHit classes + * the main purpose of this struct is to avoid extensive use of memory related functions + * such as new and delete - those are really slow and lock based + */ +template +struct SharedTempVars { + SStringExpandable raw_refbuf; + SStringExpandable raw_refbuf2; + EList temp_scores; + EList temp_scores2; + + // Align with alternatives + EList > ssOffs; + EList > offDiffs; + EList > raw_refbufs; + EList alt_edits; + ELList candidate_edits; + ELList > ht_llist; + Haplotype cmp_ht; + + ASSERT_ONLY(SStringExpandable destU32); + + ASSERT_ONLY(BTDnaString editstr); + ASSERT_ONLY(BTDnaString partialseq); + ASSERT_ONLY(BTDnaString refstr); + ASSERT_ONLY(EList reflens); + ASSERT_ONLY(EList refoffs); + + LinkedEList > raw_edits; + LinkedEList > > raw_ht_lists; +}; + +/** + * GenomeHit represents read alignment or alignment of a part of a read + * Two GenomeHits that represents alignments of different parts of a read + * can be combined together. Also, GenomeHit can be extended in both directions. + */ +template +struct GenomeHit { + GenomeHit() : + _fw(false), + _rdoff((index_t)INDEX_MAX), + _len((index_t)INDEX_MAX), + _trim5(0), + _trim3(0), + _tidx((index_t)INDEX_MAX), + _toff((index_t)INDEX_MAX), + _joinedOff((index_t)INDEX_MAX), + _repeat(false), + _edits(NULL), + _ht_list(NULL), + _score(MIN_I64), + _localscore(MIN_I64), + _hitcount(1), + _edits_node(NULL), + _ht_list_node(NULL), + _sharedVars(NULL) + { + } + + GenomeHit(const GenomeHit& otherHit) : + _edits(NULL), + _ht_list(NULL), + _hitcount(1), + _edits_node(NULL), + _ht_list_node(NULL), + _sharedVars(NULL) + { + init(otherHit._fw, + otherHit._rdoff, + otherHit._len, + otherHit._trim5, + otherHit._trim3, + otherHit._tidx, + otherHit._toff, + otherHit._joinedOff, + *(otherHit._sharedVars), + otherHit._repeat, + otherHit._edits, + otherHit._ht_list, + otherHit._score, + otherHit._localscore, + otherHit._splicescore); + } + + GenomeHit& operator=(const GenomeHit& otherHit) { + if(this == &otherHit) return *this; + init(otherHit._fw, + otherHit._rdoff, + otherHit._len, + otherHit._trim5, + otherHit._trim3, + otherHit._tidx, + otherHit._toff, + otherHit._joinedOff, + *(otherHit._sharedVars), + otherHit._repeat, + otherHit._edits, + otherHit._ht_list, + otherHit._score, + otherHit._localscore, + otherHit._splicescore); + + return *this; + } + + ~GenomeHit() { + if(_edits_node != NULL) { + assert(_edits != NULL); + assert(_sharedVars != NULL); + _sharedVars->raw_edits.delete_node(_edits_node); + _edits = NULL; + _edits_node = NULL; + } + if(_ht_list_node != NULL) { + assert(_ht_list != NULL); + assert(_sharedVars != NULL); + _sharedVars->raw_ht_lists.delete_node(_ht_list_node); + _ht_list = NULL; + _ht_list_node = NULL; + } + _sharedVars = NULL; + } + + void init( + bool fw, + index_t rdoff, + index_t len, + index_t trim5, + index_t trim3, + index_t tidx, + index_t toff, + index_t joinedOff, + SharedTempVars& sharedVars, + bool repeat = false, + EList* edits = NULL, + EList >* ht_list = NULL, + int64_t score = 0, + int64_t localscore = 0, + double splicescore = 0.0) + { + _fw = fw; + _rdoff = rdoff; + _len = len; + _trim5 = trim5; + _trim3 = trim3; + _tidx = tidx; + _toff = toff; + _joinedOff = joinedOff; + _repeat = repeat; + _score = score; + _localscore = localscore; + _splicescore = splicescore; + + assert(_sharedVars == NULL || _sharedVars == &sharedVars); + _sharedVars = &sharedVars; + if(_edits == NULL) { + assert(_edits_node == NULL); + _edits_node = _sharedVars->raw_edits.new_node(); + assert(_edits_node != NULL); + _edits = &(_edits_node->payload); + } + assert(_edits != NULL); + _edits->clear(); + if(edits != NULL) *_edits = *edits; + + if(_ht_list == NULL) { + assert(_ht_list_node == NULL); + _ht_list_node = _sharedVars->raw_ht_lists.new_node(); + assert(_ht_list_node != NULL); + _ht_list = &(_ht_list_node->payload); + } + assert(_ht_list != NULL); + _ht_list->clear(); + if(ht_list != NULL) *_ht_list = *ht_list; + + _hitcount = 1; + } + + + bool inited() const { + return _len >= 0 && _len < (index_t)INDEX_MAX; + } + + /** + * Check if it is compatible with another GenomeHit with respect to indels or introns + */ + bool compatibleWith( + const GenomeHit& otherHit, + index_t minIntronLen, + index_t maxIntronLen, + bool no_spliced_alignment = false) const; + + /** + * Combine itself with another GenomeHit + */ + bool combineWith( + const GenomeHit& otherHit, + const Read& rd, + const GFM& gfm, + const BitPairReference& ref, + const ALTDB& altdb, + const RepeatDB& repeatdb, + SpliceSiteDB& ssdb, + SwAligner& swa, + SwMetrics& swm, + const Scoring& sc, + TAlScore minsc, + RandomSource& rnd, // pseudo-random source + index_t minK_local, + index_t minIntronLen, + index_t maxIntronLen, + index_t minAnchorLen, // minimum anchor length for canonical splice site + index_t minAnchorLen_noncan, // minimum anchor length for non-canonical splice site + const index_t maxAltsTried, + const SpliceSite* spliceSite = NULL, // penalty for splice site + bool no_spliced_alignment = false); + + /** + * Extend the partial alignment (GenomeHit) bidirectionally + */ + bool extend( + const Read& rd, + const GFM& gfm, + const BitPairReference& ref, + const ALTDB& altdb, + const RepeatDB& repeatdb, + SpliceSiteDB& ssdb, + SwAligner& swa, + SwMetrics& swm, + PerReadMetrics& prm, + const Scoring& sc, + TAlScore minsc, + RandomSource& rnd, // pseudo-random source + index_t minK_local, + const TranscriptomePolicy& tpol, + const GraphPolicy& gpol, + index_t& leftext, + index_t& rightext, + index_t mm = 0); + + /** + * Adjust alignment with respect to SNPs, usually updating Edits + * + */ + static bool adjustWithALT( + index_t rdoff, + index_t len, + const Coord& coord, + SharedTempVars& sharedVars, + EList >& genomeHits, + const Read& rd, + const GFM& gfm, + const ALTDB& altdb, + const BitPairReference& ref, + const GraphPolicy& gpol); + + /** + * Adjust alignment with respect to SNPs, usually updating Edits + * + */ + bool adjustWithALT( + const Read& rd, + const GFM& gfm, + const ALTDB& altdb, + const BitPairReference& ref, + const GraphPolicy& gpol); + + /* + * + */ + static void findSSOffs( + const GFM& gfm, + const ALTDB& altdb, + index_t start, + index_t end, + EList >& ssOffs); + + /* + * Find offset differences due to deletions + */ + static index_t findOffDiffs( + const GFM& gfm, + const ALTDB& altdb, + index_t start, + index_t end, + EList >& offDiffs); + + /* + * + */ + static index_t alignWithALTs( + const EList >& alts, + const EList >& haplotypes, + const EList& haplotype_maxrights, + index_t joinedOff, + const BTDnaString& rdseq, + index_t base_rdoff, + index_t rdoff, + index_t rdlen, + const BitPairReference& ref, + SharedTempVars& sharedVar, + index_t tidx, + int rfoff, + index_t rflen, + bool left, + const GraphPolicy& gpol, + EList& edits, + ELList >& ht_llist, + EList >& ht_list, + Haplotype& cmp_ht, + int cycle_3N, + ELList* candidate_edits = NULL, + index_t mm = 0, + index_t* numNs = NULL) + { + int best_rdoff = (int)rdoff; + if(numNs != NULL) *numNs = 0; + index_t numALTsTried = 0; + EList& alt_edits = sharedVar.alt_edits; + alt_edits = edits; + index_t nedits = (index_t)edits.size(); + if(candidate_edits != NULL) candidate_edits->clear(); + ht_llist.clear(); + // ht_llist.expand(); + // ht_llist[0] = ht_list; + alignWithALTs_recur( + alts, + haplotypes, + haplotype_maxrights, + joinedOff, + rdseq, + rdoff - base_rdoff, + rdoff, + rdlen, + ref, + sharedVar.raw_refbufs, + ASSERT_ONLY(sharedVar.destU32,) + alt_edits, + best_rdoff, + NULL, /* rfseq */ + tidx, + rfoff, + rflen, + left, + edits, + mm, + ht_llist, + cmp_ht, + candidate_edits, + 0, /* tmp_numNs */ + numNs, + 0, /* dep */ + gpol, + numALTsTried, + cycle_3N); + index_t extlen = 0; + if(left) { + assert_geq(best_rdoff, -1); + assert_leq(best_rdoff, (int)rdoff); + extlen = rdoff - best_rdoff; + } else { + assert_leq(best_rdoff, (int)(rdoff + rdlen)); + assert_geq(best_rdoff, (int)rdoff); + extlen = best_rdoff - rdoff; + } + if(extlen > 0 && edits.size() > 0) { + const Edit& f = edits.front(); + if(f.pos + extlen == base_rdoff + 1) { + if(f.type == EDIT_TYPE_READ_GAP || + f.type == EDIT_TYPE_REF_GAP || + f.type == EDIT_TYPE_SPL) { + extlen = 0; + } + if(f.type == EDIT_TYPE_MM && f.chr == 'N') { + extlen = 0; + } + } + const Edit& b = edits.back(); + if(extlen > 0 && b.pos == rdoff - base_rdoff + extlen - 1) { + if(b.type == EDIT_TYPE_READ_GAP || + b.type == EDIT_TYPE_REF_GAP) { + extlen = 0; + } + } + if(extlen == 0 && edits.size() > nedits) { + if(left) { + edits.erase(0, edits.size() - nedits); + } else { + edits.resize(nedits); + } + } + } + return extlen; + } + + /* + * + */ + static index_t alignWithALTs_recur( + const EList >& alts, + const EList >& haplotypes, + const EList& haplotype_maxrights, + index_t joinedOff, + const BTDnaString& rdseq, + index_t rdoff_add, + index_t rdoff, + index_t rdlen, + const BitPairReference& ref, + EList >& raw_refbufs, + ASSERT_ONLY(SStringExpandable destU32,) + EList& tmp_edits, + int& best_rdoff, + const char* rfseq, + index_t tidx, + int rfoff, + index_t rflen, + bool left, + EList& edits, + index_t mm, + ELList >& ht_llist, + Haplotype& cmp_ht, + ELList* candidate_edits, + index_t tmp_numNs, + index_t* numNs, + index_t dep, + const GraphPolicy& gpol, + index_t& numALTsTried, + int cycle_3N, + ALT_TYPE prev_alt_type = ALT_NONE); + + /** + * For alignment involving indel, move the indels + * to the left most possible position + */ + void leftAlign(const Read& rd); + + index_t rdoff() const { return _rdoff; } + index_t len() const { return _len; } + index_t trim5() const { return _trim5; } + index_t trim3() const { return _trim3; } + + void trim5(index_t trim5, + const Read& rd, + SpliceSiteDB& ssdb, + const Scoring& sc, + index_t minK_local, + index_t minIntronLen, + index_t maxIntronLen, + index_t minAnchorLen, + index_t minAnchorLen_noncan, + const BitPairReference& ref) + { + assert_eq(_rdoff, trim5); + assert_eq(_trim5, 0); + _trim5 = trim5; + calculateScore(rd, + ssdb, + sc, + minK_local, + minIntronLen, + maxIntronLen, + minAnchorLen, + minAnchorLen_noncan, + ref); + } + void trim3(index_t trim3, + const Read& rd, + SpliceSiteDB& ssdb, + const Scoring& sc, + index_t minK_local, + index_t minIntronLen, + index_t maxIntronLen, + index_t minAnchorLen, + index_t minAnchorLen_noncan, + const BitPairReference& ref) + { + _trim3 = trim3; + calculateScore(rd, + ssdb, + sc, + minK_local, + minIntronLen, + maxIntronLen, + minAnchorLen, + minAnchorLen_noncan, + ref); + } + + // for repeat alignments + // reverse fw + void reverse(const Read& rd) + { + _fw = !_fw; + index_t end = _trim5 + _rdoff + _len; + assert_leq(end, rd.length()); + _rdoff = rd.length() - end; + index_t tmp_trim = _trim5; + _trim5 = _trim3; + _trim3 = tmp_trim; + Edit::invertPoss(*_edits, rd.length()); + // complements + Edit::complement(*_edits); + } + + index_t ref() const { return _tidx; } + index_t refoff() const { return _toff; } + index_t fw() const { return _fw; } + + bool repeat() const { return _repeat; } + void repeat(bool repeat) { _repeat = repeat;} + + index_t hitcount() const { return _hitcount; } + + /** + * Leftmost coordinate + */ + Coord coord() const { + return Coord(_tidx, _toff, _fw); + } + + int64_t score() const { return _score; } + int64_t localscore() const { return _localscore; } + double splicescore() const { return _splicescore; } + + const EList& edits() const { return *_edits; } + + /** + * Retrieve the partial alignment from the left until indel or intron + */ + void getLeft(index_t& rdoff, + index_t& len, + index_t& toff, + int64_t* score = NULL, + const Read* rd = NULL, + const Scoring* sc = NULL) const + { + assert(inited()); + toff = _toff, rdoff = _rdoff, len = _len; + const BTString* qual = NULL; + if(score != NULL) { + assert(rd != NULL); + assert(sc != NULL); + *score = 0; + qual = &(_fw ? rd->qual : rd->qualRev); + } + for(index_t i = 0; i < _edits->size(); i++) { + const Edit& edit = (*_edits)[i]; + if(edit.type == EDIT_TYPE_SPL || + edit.type == EDIT_TYPE_READ_GAP || + edit.type == EDIT_TYPE_REF_GAP || + (edit.type == EDIT_TYPE_MM && edit.snpID != (index_t)INDEX_MAX)) { + len = edit.pos; + break; + } + if(score != NULL) { + if(edit.type == EDIT_TYPE_MM) { + assert(qual != NULL); + if(edit.snpID == (index_t)INDEX_MAX) { + *score += sc->score( + dna2col[edit.qchr] - '0', + asc2dnamask[edit.chr], + (*qual)[this->_rdoff + edit.pos] - 33); + } + } + } + } + assert_geq(len, 0); + } + + /** + * Retrieve the partial alignment from the right until indel or intron + */ + void getRight(index_t& rdoff, + index_t& len, + index_t& toff, + int64_t* score = NULL, + const Read* rd = NULL, + const Scoring* sc = NULL) const + { + assert(inited()); + toff = _toff, rdoff = _rdoff, len = _len; + const BTString* qual = NULL; + if(score != NULL) { + assert(rd != NULL); + assert(sc != NULL); + *score = 0; + qual = &(_fw ? rd->qual : rd->qualRev); + } + if(_edits->size() == 0) return; + for(int i = (int)_edits->size() - 1; i >= 0; i--) { + const Edit& edit = (*_edits)[i]; + if(edit.type == EDIT_TYPE_SPL || + edit.type == EDIT_TYPE_READ_GAP || + edit.type == EDIT_TYPE_REF_GAP || + (edit.type == EDIT_TYPE_MM && edit.snpID != (index_t)INDEX_MAX)) { + rdoff = _rdoff + edit.pos; + assert_lt(edit.pos, _len); + len = _len - edit.pos; + if(edit.type == EDIT_TYPE_REF_GAP) { + assert_lt(edit.pos + 1, _len); + assert_gt(len, 1); + rdoff++; + len--; + } else if(edit.type == EDIT_TYPE_MM) { + assert_leq(edit.pos + 1, _len); + assert_geq(len, 1); + rdoff++; + len--; + } + toff = getRightOff() - len; + break; + } + if(score != NULL) { + if(edit.type == EDIT_TYPE_MM) { + assert(qual != NULL); + if(edit.snpID == (index_t)INDEX_MAX) { + *score += sc->score( + dna2col[edit.qchr] - '0', + asc2dnamask[edit.chr], + (*qual)[this->_rdoff + edit.pos] - 33); + } + } + } + } + assert_geq(len, 0); + } + + /** + * Retrieve the genomic offset of the right end + */ + index_t getRightOff() const { + assert(inited()); + index_t toff = _toff + _len; + for(index_t i = 0; i < _edits->size(); i++) { + const Edit& ed = (*_edits)[i]; + if(ed.type == EDIT_TYPE_SPL) { + toff += ed.splLen; + } else if(ed.type == EDIT_TYPE_READ_GAP) { + toff++; + } else if(ed.type == EDIT_TYPE_REF_GAP) { + assert_gt(toff, 0); + toff--; + } + } + return toff; + } + + /** + * Retrieve left anchor length and number of edits in the anchor + */ + void getLeftAnchor(index_t& leftanchor, + index_t& nedits) const + { + assert(inited()); + leftanchor = _len; + nedits = 0; + for(index_t i = 0; i < _edits->size(); i++) { + const Edit& edit = (*_edits)[i]; + if(edit.type == EDIT_TYPE_SPL) { + leftanchor = edit.pos; + break; + } else if(edit.type == EDIT_TYPE_MM || + edit.type == EDIT_TYPE_READ_GAP || + edit.type == EDIT_TYPE_REF_GAP) { + nedits++; + } + } + } + + /** + * Retrieve right anchor length and number of edits in the anchor + */ + void getRightAnchor(index_t& rightanchor, + index_t& nedits) const + { + rightanchor = _len; + nedits = 0; + if(_edits->size() == 0) return; + for(int i = (int)_edits->size() - 1; i >= 0; i--) { + const Edit& edit = (*_edits)[i]; + if(edit.type == EDIT_TYPE_SPL) { + rightanchor = _len - edit.pos - 1; + break; + } else if(edit.type == EDIT_TYPE_MM || + edit.type == EDIT_TYPE_READ_GAP || + edit.type == EDIT_TYPE_REF_GAP) { + nedits++; + } + } + } + + + /** + * Is it spliced alignment? + * Return: first is spliced-alignment, second is spliced-alignment to known transcripts + */ + pair spliced() const { + pair result(false, true); + for(index_t i = 0; i < _edits->size(); i++) { + const Edit& e = (*_edits)[i]; + if(e.type == EDIT_TYPE_SPL) { + result.first = true; + result.second &= e.knownSpl; + } + } + result.second &= result.first; + return result; + } + + /** + * + */ + bool spliced_consistently() const { + int splDir = SPL_UNKNOWN; + for(index_t i = 0; i < _edits->size(); i++) { + const Edit& edit = (*_edits)[i]; + if(edit.type != EDIT_TYPE_SPL) continue; + if(splDir != SPL_UNKNOWN) { + if(edit.splDir != SPL_UNKNOWN) { + if(splDir == SPL_FW || splDir == SPL_SEMI_FW) { + if(edit.splDir != SPL_FW && edit.splDir != SPL_SEMI_FW) + return false; + } + if(splDir == SPL_RC || splDir == SPL_SEMI_RC) { + if(edit.splDir != SPL_RC && edit.splDir != SPL_SEMI_RC) + return false; + } + } + } else { + splDir = edit.splDir; + } + } + return true; + } + + /** + * return one of EDIT_SPL_FW, EDIT_SPL_RC, EDIT_SPL_UNKNOWN + */ + int splicing_dir() const { + int splDir = SPL_UNKNOWN; + for(index_t i = 0; i < _edits->size(); i++) { + const Edit& edit = (*_edits)[i]; + if(edit.type != EDIT_TYPE_SPL) continue; + if(splDir != SPL_UNKNOWN) { + if(edit.splDir != SPL_UNKNOWN) { + if(splDir == SPL_FW || splDir == SPL_SEMI_FW) { + if(edit.splDir != SPL_FW && edit.splDir != SPL_SEMI_FW) + return SPL_UNKNOWN; + } + if(splDir == SPL_RC || splDir == SPL_SEMI_RC) { + if(edit.splDir != SPL_RC && edit.splDir != SPL_SEMI_RC) + return SPL_UNKNOWN; + } + } + } else { + splDir = edit.splDir; + } + } + if(splDir == SPL_FW || splDir == SPL_SEMI_FW) + return SPL_FW; + else if(splDir == SPL_RC || splDir == SPL_SEMI_RC) + return SPL_RC; + else + return SPL_UNKNOWN; + } + + bool operator== (const GenomeHit& other) const { + if(_fw != other._fw || + _rdoff != other._rdoff || + _len != other._len || + _tidx != other._tidx || + _toff != other._toff || + _trim5 != other._trim5 || + _trim3 != other._trim3) { + return false; + } + if(_edits->size() != other._edits->size()) return false; + for(index_t i = 0; i < _edits->size(); i++) { + const Edit& e = (*_edits)[i]; + const Edit& oe = (*other._edits)[i]; + if(e.isReadGap()) { + if(!oe.isReadGap()) return false; + } else if(e.isRefGap()) { + if(!oe.isRefGap()) return false; + } else { + if(!(e == oe)) { + return false; + } + } + } + // daehwan - this may not be true when some splice sites are provided from outside + // assert_eq(_score, other._score); + return true; + } + + bool contains(const GenomeHit& other) const { + return (*this) == other; + } + + /** + * Return number of mismatches in the alignment. + */ + int mms() const { +#if 0 + if (_e2.inited()) return 2; + else if(_e1.inited()) return 1; + else return 0; +#endif + return 0; + } + + /** + * Return the number of Ns involved in the alignment. + */ + int ns() const { +#if 0 + int ns = 0; + if(_e1.inited() && _e1.hasN()) { + ns++; + if(_e2.inited() && _e2.hasN()) { + ns++; + } + } + return ns; +#endif + return 0; + } + + int ngaps() const { + return 0; + } + +#ifndef NDEBUG + /** + * Check that hit is sane w/r/t read. + */ + bool repOk(const Read& rd, const BitPairReference& ref); +#endif + + void replace_edits_with_alts(const Read& rd, + const EList >& alts, + SpliceSiteDB& ssdb, + const Scoring& sc, + index_t minK_local, + index_t minIntronLen, + index_t maxIntronLen, + index_t minAnchorLen, + index_t minAnchorLen_noncan, + const BitPairReference& ref) { + assert(inited()); + if(alts.size() <= 0) + return; + if(_edits->size() <= 0) + return; + + index_t joinedOff = _joinedOff; + int offset = 0; + size_t i = 0, next_i; + while(i < _edits->size()) { + next_i = i + 1; + Edit& ed = (*_edits)[i]; + if(ed.type == EDIT_TYPE_SPL) { + assert(false); + } else if(ed.type == EDIT_TYPE_READ_GAP || ed.type == EDIT_TYPE_REF_GAP) { + for(; next_i < _edits->size(); next_i++) { + Edit& next_ed = (*_edits)[next_i]; + if(ed.type != next_ed.type) break; + } + } + + if(ed.snpID == (index_t)INDEX_MAX) { + ALT cmp_alt; + cmp_alt.pos = joinedOff + ed.pos + offset; + index_t alt_i = (index_t)alts.bsearchLoBound(cmp_alt); + for(; alt_i < alts.size(); alt_i++) { + const ALT& alt = alts[alt_i]; + if(alt.left > cmp_alt.pos) break; + if(ed.type == EDIT_TYPE_MM) { + if(alt.type != ALT_SNP_SGL) continue; + if("ACGT"[alt.seq] == ed.qchr) { + ed.snpID = alt_i; + break; + } + } else { + size_t gap = next_i - i; + if(ed.type == EDIT_TYPE_READ_GAP) { + if(alt.type != ALT_SNP_DEL) continue; + if(alt.len == gap) { + for(size_t ii = i; ii < next_i; ii++) { + Edit& ii_ed = (*_edits)[ii]; + ii_ed.snpID = alt_i; + } + break; + } + } else { + assert_eq(ed.type, EDIT_TYPE_REF_GAP); + if(alt.type != ALT_SNP_INS) continue; + if(alt.len == gap) { + uint64_t seq = 0; + for(size_t ii = i; ii < next_i; ii++) { + Edit& ii_ed = (*_edits)[ii]; + seq = (seq << 2) | asc2dna[ii_ed.qchr]; + } + if(alt.seq == seq) { + for(size_t ii = i; ii < next_i; ii++) { + Edit& ii_ed = (*_edits)[ii]; + ii_ed.snpID = alt_i; + } + break; + } + } + } + } + } + } + + if(ed.type == EDIT_TYPE_SPL) { + offset += ed.splLen; + } else if(ed.type == EDIT_TYPE_READ_GAP || ed.type == EDIT_TYPE_REF_GAP) { + size_t gap = next_i - i; + if(ed.type == EDIT_TYPE_READ_GAP) { + assert_gt(joinedOff, gap); + offset += gap; + } else { + offset -= gap; + } + } + + i = next_i; + } + + calculateScore(rd, + ssdb, + sc, + minK_local, + minIntronLen, + maxIntronLen, + minAnchorLen, + minAnchorLen_noncan, + ref); + } + +private: + /** + * Calculate alignment score + */ + int64_t calculateScore( + const Read& rd, + SpliceSiteDB& ssdb, + const Scoring& sc, + index_t minK_local, + index_t minIntronLen, + index_t maxIntronLen, + index_t minAnchorLen, + index_t minAnchorLen_noncan, + const BitPairReference& ref); + +public: + bool _fw; + index_t _rdoff; + index_t _len; + index_t _trim5; + index_t _trim3; + + index_t _tidx; + index_t _toff; + index_t _joinedOff; + bool _repeat; + EList* _edits; + EList >* _ht_list; + int64_t _score; + int64_t _localscore; + double _splicescore; + + index_t _hitcount; // for selection purposes + + LinkedEListNode >* _edits_node; + LinkedEListNode > >* _ht_list_node; + SharedTempVars* _sharedVars; +}; + +/** + * Check if it is compatible with another GenomeHit with respect to indels or introns + */ +template +bool GenomeHit::compatibleWith( + const GenomeHit& otherHit, + index_t minIntronLen, + index_t maxIntronLen, + bool no_spliced_alignment) const +{ + if(this == &otherHit) return false; + // check if they are on the same strand and on the same contig + if(_fw != otherHit._fw || _tidx != otherHit._tidx) return false; + // make sure itself is closer to the left end of read than otherHit + if(_rdoff > otherHit._rdoff) return false; + // do not consider a case itself (read portion) includes otherHit + if(_rdoff + _len > otherHit._rdoff + otherHit._len) return false; + // make sure itself comes before otherHit wrt. genomic positions + if(_toff > otherHit._toff) return false; + + index_t this_rdoff, this_len, this_toff; + this->getRight(this_rdoff, this_len, this_toff); + assert_geq(this_len, 0); + index_t other_rdoff, other_len, other_toff; + otherHit.getLeft(other_rdoff, other_len, other_toff); + assert_geq(other_len, 0); + + if(this_rdoff > other_rdoff) return false; + if(this_rdoff + this_len > other_rdoff + other_len) return false; + if(this_toff > other_toff) return false; + + index_t refdif = other_toff - this_toff; + index_t rddif = other_rdoff - this_rdoff; + + // check if there is a deletion, an insertion, or a potential intron + // between the two partial alignments + if(!no_spliced_alignment) { + if(refdif > rddif + maxIntronLen) { + return false; + } + } + return true; +} + +static inline char get_ref_base(int threeN, int* mapping, char base) +{ + return threeN ? mapping[base] : base; +} + +/** + * Combine itself with another GenomeHit + * while allowing mismatches, an insertion, a deletion, or an intron + */ +template +bool GenomeHit::combineWith( + const GenomeHit& otherHit, + const Read& rd, + const GFM& gfm, + const BitPairReference& ref, + const ALTDB& altdb, + const RepeatDB& repeatdb, + SpliceSiteDB& ssdb, + SwAligner& swa, + SwMetrics& swm, + const Scoring& sc, + TAlScore minsc, + RandomSource& rnd, // pseudo-random source + index_t minK_local, + index_t minIntronLen, + index_t maxIntronLen, + index_t minAnchorLen, // minimum anchor length for canonical splice site + index_t minAnchorLen_noncan, // minimum anchor length for non-canonical splice site + const index_t maxAltsTried, + const SpliceSite* spliceSite, // penalty for splice site + bool no_spliced_alignment) +{ + if(this == &otherHit) return false; + assert(compatibleWith(otherHit, minIntronLen, maxIntronLen, no_spliced_alignment)); + assert_eq(this->_tidx, otherHit._tidx); + assert_lt(this->_tidx, ref.numRefs()); + + // get the partial part of the alignment from the right + // until an indel or splice sites + index_t this_rdoff, this_len, this_toff; + int64_t this_score; + this->getRight(this_rdoff, this_len, this_toff, &this_score, &rd, &sc); + assert_geq(this_len, 0); + assert_leq(this_score, 0); + assert_geq(this_score, this->_score); + + // get the partial part of the other alignment from the left + // until an indel or splice sites + index_t other_rdoff, other_len, other_toff; + int64_t other_score; + otherHit.getLeft(other_rdoff, other_len, other_toff, &other_score, &rd, &sc); + assert_geq(other_len, 0); + assert_leq(other_score, 0); + assert_geq(other_score, otherHit._score); + + assert_leq(this_rdoff, other_rdoff); + if(this_len != 0 && + other_len != 0 && + this_rdoff + this_len > other_rdoff + other_len) return false; + assert_leq(this_rdoff + this_len, other_rdoff + other_len); + index_t len = other_rdoff - this_rdoff + other_len; + const index_t reflen = ref.approxLen(_tidx); + if(this_toff + len > reflen) return false; + assert_leq(this_toff + len, reflen); + + // check if an indel or an intron is necessary + index_t refdif = other_toff - this_toff; + index_t rddif = other_rdoff - this_rdoff; + bool spliced = false, ins = false, del = false; + if(refdif != rddif) { + if(refdif > rddif) { + if(!no_spliced_alignment && refdif - rddif >= minIntronLen) { + assert_leq(refdif - rddif, maxIntronLen); + spliced = true; + } else { + del = true; + } + } else { + ins = true; + } + } +#ifndef NDEBUG + if(ins) { + assert(!spliced && !del); + } else { + if(spliced) assert(!del); + else assert(!spliced); + } +#endif + + if(no_spliced_alignment) { + if(spliced) return false; + } + + // if the combination of the two alignments does not involve an indel or an intron, + // then simply combine them and return + if(!spliced && !ins && !del && this_rdoff + this_len == other_rdoff) { + index_t addoff = otherHit._rdoff - this->_rdoff; + for(index_t i = 0; i < otherHit._edits->size(); i++) { + _edits->push_back((*otherHit._edits)[i]); + _edits->back().pos += addoff; + } + _len += otherHit._len; + calculateScore( + rd, + ssdb, + sc, + minK_local, + minIntronLen, + maxIntronLen, + minAnchorLen, + minAnchorLen_noncan, + ref); + assert(repOk(rd, ref)); + return true; + } + + // calculate the maximum gap lengths based on the current score and the mimumimu alignment score to be reported + const BTDnaString& seq = this->_fw ? rd.patFw : rd.patRc; + const BTString& qual = this->_fw ? rd.qual : rd.qualRev; + index_t rdlen = (index_t)seq.length(); + int64_t remainsc = minsc - (_score - this_score) - (otherHit._score - other_score); + if(remainsc > 0) remainsc = 0; + int read_gaps = 0, ref_gaps = 0; + if(!spliced) { + read_gaps = sc.maxReadGaps(remainsc + sc.canSpl(), rdlen); + ref_gaps = sc.maxRefGaps(remainsc + sc.canSpl(), rdlen); + } + if(ins) { + if(refdif + ref_gaps < rddif) { + return false; + } + } else if(del) { + if(rddif + read_gaps < refdif) { + return false; + } + } + int this_ref_ext = read_gaps; + if(spliced) this_ref_ext += (int)intronic_len; + if(this_toff + len > reflen) return false; + if(this_toff + len + this_ref_ext > reflen) this_ref_ext = reflen - (this_toff + len); + assert(_sharedVars != NULL); + SStringExpandable& raw_refbuf = _sharedVars->raw_refbuf; + EList& temp_scores = _sharedVars->temp_scores; + EList& temp_scores2 = _sharedVars->temp_scores2; + ASSERT_ONLY(SStringExpandable& destU32 = _sharedVars->destU32); + raw_refbuf.resize(len + this_ref_ext + 16); + int off = ref.getStretch( + reinterpret_cast(raw_refbuf.wbuf()), + (size_t)this->_tidx, + (size_t)this_toff, + len + this_ref_ext + ASSERT_ONLY(, destU32)); + assert_lt(off, 16); + char *refbuf = raw_refbuf.wbuf() + off, *refbuf2 = NULL; + + // discover a splice site, an insertion, or a deletion + index_t maxscorei = (index_t)INDEX_MAX; + int64_t maxscore = MIN_I64; + uint32_t maxspldir = SPL_UNKNOWN; + float maxsplscore = 0.0f; + // allow an indel near a splice site + index_t splice_gap_maxscorei = (index_t)INDEX_MAX; + int64_t donor_seq = 0, acceptor_seq = 0; + int splice_gap_off = 0; + + int refConversion_3N[5] = {0, 1, 2, 3, 4}; + if (threeN){ + if (((rd.threeN_cycle == threeN_type1conversion_FW || rd.threeN_cycle == threeN_type2conversion_RC) && !rd.oppositeConversion_3N) || + ((rd.threeN_cycle == threeN_type1conversion_RC || rd.threeN_cycle == threeN_type2conversion_FW) && rd.oppositeConversion_3N)) { + // type 1 conversion + refConversion_3N[asc2dna[hs3N_convertedFrom]] = asc2dna[hs3N_convertedTo]; + } else { + // type 2 conversion + refConversion_3N[asc2dna[hs3N_convertedFromComplement]] = asc2dna[hs3N_convertedToComplement]; + } + } + + if(spliced || ins || del) { + int other_ref_ext = min(read_gaps + (int)intronic_len, other_toff + other_len - len); + SStringExpandable& raw_refbuf2 = _sharedVars->raw_refbuf2; + raw_refbuf2.resize(len + other_ref_ext + 16); + int off2 = ref.getStretch( + reinterpret_cast(raw_refbuf2.wbuf()), + (size_t)otherHit._tidx, + (size_t)(other_toff + other_len - len - other_ref_ext), + len + other_ref_ext + ASSERT_ONLY(, destU32)); + refbuf2 = raw_refbuf2.wbuf() + off2 + other_ref_ext; + temp_scores.resize(len); + temp_scores2.resize(len); + if(spliced) { + static const char GT = 0x23, AG = 0x02; + static const char GTrc = 0x01, AGrc = 0x13; + static const char GC = 0x21, GCrc = 0x21; + static const char AT = 0x03, AC = 0x01; + static const char ATrc = 0x03, ACrc = 0x20; + static const char AA = 0x00, AArc = 0x33; + int i; + for(i = 0; i < (int)len; i++) { + int rdc = seq[this_rdoff + i], rfc = get_ref_base(threeN, refConversion_3N, refbuf[i]); + if(i > 0) { + temp_scores[i] = temp_scores[i-1]; + } else { + temp_scores[i] = 0; + } + if(rdc != rfc) { + temp_scores[i] += sc.score(rdc, 1 << rfc, qual[this_rdoff + i] - 33); + } + if(temp_scores[i] < remainsc) { + break; + } + } + int i_limit = min(i, len); + int i2; + for(i2 = len - 1; i2 >= 0; i2--) { + int rdc = seq[this_rdoff + i2], rfc = get_ref_base(threeN, refConversion_3N, refbuf2[i2]); + if((index_t)(i2 + 1) < len) { + temp_scores2[i2] = temp_scores2[i2+1]; + } else { + temp_scores2[i2] = 0; + } + if(rdc != rfc) { + temp_scores2[i2] += sc.score(rdc, 1 << rfc, qual[this_rdoff + i2] - 33); + } + if(temp_scores2[i2] < remainsc) { + break; + } + } + int i2_limit = max(i2, 0); + if(spliceSite != NULL){ + assert_leq(this_toff, (int)spliceSite->left()); + if(i2_limit <= (int)(spliceSite->left() - this_toff)) { + i2_limit = (int)(spliceSite->left() - this_toff); + i_limit = i2_limit + 1; + } else { + i_limit = i2_limit; + } + } + for(i = i2_limit, i2 = i2_limit + 1; + i < i_limit && i2 < (int)len; + i++, i2++) { + int64_t tempscore = temp_scores[i] + temp_scores2[i2]; + char donor = 0xff, acceptor = 0xff; + if((index_t)(i + 2) < len + this_ref_ext) { + donor = refbuf[i + 1]; + donor = (donor << 4) | refbuf[i + 2]; + } + if(i2 - 2 >= -other_ref_ext) { + acceptor = refbuf2[i2 - 2]; + acceptor = (acceptor << 4) | refbuf2[i2 - 1]; + } + bool canonical = false, semi_canonical = false; + uint32_t spldir = SPL_UNKNOWN; + if((donor == GT && acceptor == AG) /* || (donor == AT && acceptor == AC) */) { + spldir = SPL_FW; + canonical = true; + } else if((donor == AGrc && acceptor == GTrc) /* || (donor == ACrc && acceptor == ATrc) */) { + spldir = SPL_RC; + canonical = true; + } else if((donor == GC && acceptor == AG) || (donor == AT && acceptor == AC)) { + spldir = SPL_SEMI_FW; + semi_canonical = true; + } else if((donor == AGrc && acceptor == GCrc) || (donor == ACrc && acceptor == ATrc)) { + spldir = SPL_SEMI_RC; + semi_canonical = true; + } + tempscore -= (canonical ? sc.canSpl() : sc.noncanSpl()); + int64_t temp_donor_seq = 0, temp_acceptor_seq = 0; + float splscore = 0.0f; + if(canonical) { + // in case of canonical splice site, extract donor side sequence and acceptor side sequence + // to calculate a score of the splicing event. + if(spldir == SPL_FW) { + if(i + 1 >= (int)donor_exonic_len && + (int)(len + this_ref_ext) > i + (int)donor_intronic_len && + i2 + (int)other_ref_ext >= (int)acceptor_intronic_len && + (int)len > i2 + (int)acceptor_exonic_len - 1) { + int from = i + 1 - (int)donor_exonic_len; + int to = i + (int)donor_intronic_len; + for(int j = from; j <= to; j++) { + assert_geq(j, 0); + assert_lt(j, (int)(len + this_ref_ext)); + int base = refbuf[j]; + if(base > 3) base = 0; + temp_donor_seq = temp_donor_seq << 2 | base; + } + from = i2 - acceptor_intronic_len; + to = i2 + acceptor_exonic_len - 1; + for(int j = from; j <= to; j++) { + assert_geq(j, -(int)other_ref_ext); + assert_lt(j, (int)len); + int base = refbuf2[j]; + if(base > 3) base = 0; + temp_acceptor_seq = temp_acceptor_seq << 2 | base; + } + } + } else if(spldir == SPL_RC) { + if(i + 1 >= (int)acceptor_exonic_len && + (int)(len + this_ref_ext) > i + (int)acceptor_intronic_len && + i2 + (int)other_ref_ext >= (int)donor_intronic_len && + (int)len > i2 + (int)donor_exonic_len - 1) { + int from = i + 1 - (int)acceptor_exonic_len; + int to = i + (int)acceptor_intronic_len; + for(int j = to; j >= from; j--) { + assert_geq(j, 0); + assert_lt(j, (int)(len + this_ref_ext)); + int base = refbuf[j]; + if(base > 3) base = 0; + temp_acceptor_seq = temp_acceptor_seq << 2 | (base ^ 0x3); + } + from = i2 - donor_intronic_len; + to = i2 + donor_exonic_len - 1; + for(int j = to; j >= from; j--) { + assert_geq(j, -(int)other_ref_ext); + assert_lt(j, (int)len); + int base = refbuf2[j]; + if(base > 3) base = 0; + temp_donor_seq = temp_donor_seq << 2 | (base ^ 0x3); + } + } + } + + splscore = SpliceSiteDB::probscore(temp_donor_seq, temp_acceptor_seq); + } + // daehwan - for debugging purposes + // choose a splice site with the better score + if((maxspldir == SPL_UNKNOWN && spldir == SPL_UNKNOWN && maxscore < tempscore) || + (maxspldir == SPL_UNKNOWN && spldir == SPL_UNKNOWN && maxscore == tempscore && semi_canonical) || + (maxspldir != SPL_UNKNOWN && spldir != SPL_UNKNOWN && (maxscore < tempscore || (maxscore == tempscore && maxsplscore < splscore))) || + (maxspldir == SPL_UNKNOWN && spldir != SPL_UNKNOWN)) { + maxscore = tempscore; + maxscorei = i; + maxspldir = spldir; + maxsplscore = splscore; + if(maxspldir != SPL_UNKNOWN) { + donor_seq = temp_donor_seq; + acceptor_seq = temp_acceptor_seq; + } else { + donor_seq = 0; + acceptor_seq = 0; + } + } + } + } else { + // discover an insertion or a deletion + assert(ins || del); + int inslen = (ins ? rddif - refdif : 0); + int dellen = (del ? refdif - rddif : 0); + int64_t gap_penalty; + if(ins) { + gap_penalty = -(sc.refGapOpen() + sc.refGapExtend() * (inslen - 1)); + } else { + assert(del); + gap_penalty = -(sc.readGapOpen() + sc.readGapExtend() * (dellen - 1)); + } + if(gap_penalty < remainsc) return false; + int i; + for(i = 0; i < (int)len; i++) { + int rdc = seq[this_rdoff + i], rfc = get_ref_base(threeN, refConversion_3N, refbuf[i]); + if(i > 0) { + temp_scores[i] = temp_scores[i-1]; + } else { + temp_scores[i] = 0; + } + if(rdc != rfc) { + temp_scores[i] += sc.score(rdc, 1 << rfc, qual[this_rdoff + i] - 33); + } + if(temp_scores[i] + gap_penalty < remainsc) { + break; + } + } + int i_limit = min(i, len); + int i2; + for(i2 = len - 1; i2 >= 0; i2--) { + int rdc = seq[this_rdoff + i2], rfc = get_ref_base(threeN, refConversion_3N, refbuf2[i2]); + if((index_t)(i2 + 1) < len) { + temp_scores2[i2] = temp_scores2[i2+1]; + } else { + temp_scores2[i2] = 0; + } + if(rdc != rfc) { + temp_scores2[i2] += sc.score(rdc, 1 << rfc, qual[this_rdoff + i2] - 33); + } + if(temp_scores2[i2] + gap_penalty < remainsc) { + break; + } + } + int i2_limit = (i2 < inslen ? 0 : i2 - inslen); + for(i = i2_limit, i2 = i2_limit + 1 + inslen; + i < i_limit && i2 < (int)len; + i++, i2++) { + int64_t tempscore = temp_scores[i] + temp_scores2[i2] + gap_penalty; + if(maxscore < tempscore) { + maxscore = tempscore; + maxscorei = i; + } + } + } + if(maxscore == MIN_I64) return false; + assert_lt(maxscorei, len); + if(spliced && spliceSite == NULL) { + uint32_t shorter_anchor_len = min(maxscorei + 1, len - maxscorei - 1); + assert_leq(this_toff, other_toff); + if(maxspldir == SPL_SEMI_FW || maxspldir == SPL_SEMI_RC || maxspldir == SPL_UNKNOWN) { + if(shorter_anchor_len < minAnchorLen_noncan) { + float intronLenProb = intronLen_prob_noncan(shorter_anchor_len, other_toff - this_toff, maxIntronLen); + if(intronLenProb > 0.01f) + return false; + } + } else { + if(shorter_anchor_len < minAnchorLen) { + float intronLenProb = intronLen_prob(shorter_anchor_len, other_toff - this_toff, maxIntronLen); + if(intronLenProb > 0.01f) + return false; + } + } + } + if(maxscore < remainsc) + return false; + } + + bool clear = true; + for(int i = (int)_edits->size() - 1; i >= 0; i--) { + const Edit& edit = (*_edits)[i]; + if(edit.type == EDIT_TYPE_SPL || + edit.type == EDIT_TYPE_READ_GAP || + edit.type == EDIT_TYPE_REF_GAP || + (edit.type == EDIT_TYPE_MM && edit.snpID != (index_t)INDEX_MAX)) { + _edits->resize(i+1); + clear = false; + break; + } + } + if(clear) this->_edits->clear(); + // combine two alignments while updating edits + if(spliced) { + assert_geq(this_rdoff, this->_rdoff); + index_t addoff = this_rdoff - this->_rdoff; + int rd_gap_off = -min(splice_gap_off, 0); + int ref_gap_off = max(splice_gap_off, 0); + for(int i = 0; i < (int)len; i++) { + assert_lt(this_rdoff + i, rdlen); + int rdc = seq[this_rdoff + i]; + assert_range(0, 4, rdc); + int rfc; + if(splice_gap_maxscorei <= maxscorei) { + if(i <= (int)splice_gap_maxscorei) { + rfc = get_ref_base(threeN, refConversion_3N, refbuf[i]); + } else if(i <= (int)maxscorei) { + rfc = get_ref_base(threeN, refConversion_3N, refbuf[i - ref_gap_off + rd_gap_off]); + } else { + rfc = get_ref_base(threeN, refConversion_3N, refbuf2[i]); + } + } else { + if(i <= (int)maxscorei) { + rfc = get_ref_base(threeN, refConversion_3N, refbuf[i]); + } else if(i <= (int)splice_gap_maxscorei) { + rfc = get_ref_base(threeN, refConversion_3N, refbuf2[i + ref_gap_off - rd_gap_off]); + } else { + rfc = get_ref_base(threeN, refConversion_3N, refbuf2[i]); + } + } + assert_range(0, 4, rfc); + if(rdc != rfc) { + Edit e((uint32_t)(i + addoff), rfc, rdc, EDIT_TYPE_MM, false); + _edits->push_back(e); + } + if(i == (int)maxscorei) { + index_t left = this_toff + i + 1; + if(splice_gap_maxscorei <= maxscorei) { + left = left - ref_gap_off + rd_gap_off; + } + index_t right = other_toff + other_len - (len - i - 1); + if(splice_gap_maxscorei > maxscorei) { + right = right + ref_gap_off - rd_gap_off; + } + index_t skipLen = 0; + assert_lt(left, right); + skipLen = right - left; + Edit e((uint32_t)(i + 1 + addoff), 0, 0, EDIT_TYPE_SPL, skipLen, maxspldir, spliceSite != NULL, false); + e.donor_seq = donor_seq; + e.acceptor_seq = acceptor_seq; + _edits->push_back(e); + } + if(i == (int)splice_gap_maxscorei && splice_gap_off != 0) { + if(rd_gap_off > 0) { + assert_lt(left, right); + for(index_t j = 0; j < (index_t)rd_gap_off; j++) { + int temp_rfc_off = i + 1 + j; + int temp_rfc; + if(i < (int)maxscorei) { + temp_rfc = get_ref_base(threeN, refConversion_3N, refbuf[temp_rfc_off]); + } else { + temp_rfc = get_ref_base(threeN, refConversion_3N, refbuf2[temp_rfc_off - rd_gap_off]); + } + assert_range(0, 4, temp_rfc); + Edit e((uint32_t)(i + 1 + addoff), "ACGTN"[temp_rfc], '-', EDIT_TYPE_READ_GAP); + _edits->push_back(e); + } + } else { + assert_gt(ref_gap_off, 0); + for(index_t j = 0; j < (index_t)ref_gap_off; j++) { + assert_lt(this_rdoff + i + 1 + j, rdlen); + int temp_rdc = seq[this_rdoff + i + 1 + j]; + assert_range(0, 4, temp_rdc); + Edit e((uint32_t)(i + 1 + j + addoff), '-', "ACGTN"[temp_rdc], EDIT_TYPE_REF_GAP); + _edits->push_back(e); + } + i += ref_gap_off; + } + } + } + } else { + index_t ins_len = 0; + for(index_t i = 0; i < len; i++) { + char rdc = seq[this_rdoff + i]; + char rfc = (i <= maxscorei ? get_ref_base(threeN, refConversion_3N, refbuf[i]) : get_ref_base(threeN, refConversion_3N, refbuf2[i])); + assert_geq(this_rdoff, this->_rdoff); + index_t addoff = this_rdoff - this->_rdoff; + if(rdc != rfc) { + ALT cmp_alt; + assert_geq(this_toff, this->_toff); + cmp_alt.pos = this->_joinedOff + i + (this_toff - this->_toff) - ins_len; + index_t alt_i = (index_t)altdb.alts().bsearchLoBound(cmp_alt); + index_t add_alt_i = std::numeric_limits::max(); + for(; alt_i < altdb.alts().size(); alt_i++) { + const ALT& alt = altdb.alts()[alt_i]; + if(alt.left > cmp_alt.pos) break; + if(alt.type != ALT_SNP_SGL) continue; + if(alt.seq == rdc) { + add_alt_i = alt_i; + break; + } + } + + Edit e((uint32_t)(i + addoff), rfc, rdc, EDIT_TYPE_MM, false, add_alt_i); + _edits->push_back(e); + } + if(i == maxscorei) { + index_t left = this_toff + i + 1; + if(other_toff + other_len < len - i - 1) + return false; + index_t right = other_toff + other_len - (len - i - 1); + index_t skipLen = 0; + if(del) { + assert_lt(left, right); + skipLen = right - left; + for(index_t j = 0; j < skipLen; j++) { + int temp_rfc; + if(i + 1 + j < len) temp_rfc = get_ref_base(threeN, refConversion_3N, refbuf[i + 1 + j]); + else temp_rfc = get_ref_base(threeN, refConversion_3N, ref.getBase(this->_tidx, this_toff + i + 1 + j)); + assert_range(0, 4, temp_rfc); + Edit e((uint32_t)(i + 1 + addoff), "ACGTN"[temp_rfc], '-', EDIT_TYPE_READ_GAP); + _edits->push_back(e); + } + } else { + assert(ins); + assert_lt(right, left); + skipLen = left - right; + for(index_t j = 0; j < skipLen; j++) { + assert_lt(this_rdoff + i + 1 + j, seq.length()); + int temp_rdc = seq[this_rdoff + i + 1 + j]; + assert_range(0, 4, temp_rdc); + Edit e((uint32_t)(i + 1 + j + addoff), '-', "ACGTN"[temp_rdc], EDIT_TYPE_REF_GAP); + _edits->push_back(e); + } + i += skipLen; + ins_len += skipLen; + } + } + } + } + index_t fsi = (index_t)otherHit._edits->size(); + for(index_t i = 0; i < otherHit._edits->size(); i++) { + const Edit& edit = (*otherHit._edits)[i]; + if(edit.type == EDIT_TYPE_SPL || + edit.type == EDIT_TYPE_READ_GAP || + edit.type == EDIT_TYPE_REF_GAP || + (edit.type == EDIT_TYPE_MM && edit.snpID != (index_t)INDEX_MAX)) { + fsi = i; + break; + } + } + assert_leq(this->_rdoff, otherHit._rdoff); + index_t addoff = otherHit._rdoff - this->_rdoff; + for(index_t i = fsi; i < otherHit._edits->size(); i++) { + _edits->push_back((*otherHit._edits)[i]); + _edits->back().pos += addoff; + } + // for alignment involving indel, left align so that + // indels go to the left most of the combined alignment + if(ins || del || (spliced && splice_gap_off != 0)) { + leftAlign(rd); + } + + // update alignment score, trims + assert_leq(this->_rdoff + this->_len, otherHit._rdoff + otherHit._len); + _len = otherHit._rdoff + otherHit._len - this->_rdoff; + assert_eq(_trim3, 0); + _trim3 += otherHit._trim3; + calculateScore( + rd, + ssdb, + sc, + minK_local, + minIntronLen, + maxIntronLen, + minAnchorLen, + minAnchorLen_noncan, + ref); +#ifndef NDEBUG + if(_joinedOff != (index_t)INDEX_MAX) { + ASSERT_ONLY(bool straddled = false); + ASSERT_ONLY(index_t tmp_tidx = 0, tmp_toff = 0, tmp_tlen = 0); + gfm.joinedToTextOff( + 0, + _joinedOff, + tmp_tidx, + tmp_toff, + tmp_tlen, + true, // reject straddlers? + straddled); // straddled? + assert_eq(tmp_tidx, _tidx); + assert_eq(tmp_toff, _toff); + } +#endif + assert(repOk(rd, ref)); + return true; +} + +/** + * Extend the partial alignment (GenomeHit) bidirectionally + */ +template +bool GenomeHit::extend( + const Read& rd, + const GFM& gfm, + const BitPairReference& ref, + const ALTDB& altdb, + const RepeatDB& repeatdb, + SpliceSiteDB& ssdb, + SwAligner& swa, + SwMetrics& swm, + PerReadMetrics& prm, + const Scoring& sc, + TAlScore minsc, + RandomSource& rnd, // pseudo-random source + index_t minK_local, + const TranscriptomePolicy& tpol, + const GraphPolicy& gpol, + index_t& leftext, + index_t& rightext, + index_t mm) +{ + assert_lt(this->_tidx, ref.numRefs()); + index_t max_leftext = leftext, max_rightext = rightext; + assert(max_leftext > 0 || max_rightext > 0); + leftext = 0, rightext = 0; + index_t rdlen = (index_t)rd.length(); + bool doLeftAlign = false; + assert(_sharedVars != NULL); + + const index_t minIntronLen = tpol.minIntronLen(); + const index_t maxIntronLen = tpol.maxIntronLen(); + const index_t minAnchorLen = tpol.minAnchorLen(); + const index_t minAnchorLen_noncan = tpol.minAnchorLen_noncan(); + + // extend the alignment further in the left direction + // with 'mm' mismatches allowed + const BTDnaString& seq = _fw ? rd.patFw : rd.patRc; + if(max_leftext > 0 && _rdoff > 0) { + assert_gt(_rdoff, 0); + index_t left_rdoff, left_len, left_toff; + this->getLeft(left_rdoff, left_len, left_toff); + assert_eq(left_rdoff, _rdoff); + assert_eq(left_toff, _toff); + if(_toff <= 0) return false; + int rl = (int)_toff - (int)_rdoff; + assert_geq(_score, minsc); + index_t reflen = _rdoff + 10; + rl -= (reflen - _rdoff); + if(rl < 0) { + reflen += rl; + rl = 0; + } + index_t numNs = 0; + index_t num_prev_edits = (index_t)_edits->size(); + index_t best_ext = alignWithALTs( + altdb.alts(), + altdb.haplotypes(), + altdb.haplotype_maxrights(), + this->_joinedOff, + seq, + this->_rdoff - 1, + this->_rdoff - 1, + this->_rdoff, + ref, + *_sharedVars, + _tidx, + rl, + reflen, + true, /* left? */ + gpol, + *this->_edits, + _sharedVars->ht_llist, + *this->_ht_list, + _sharedVars->cmp_ht, + rd.threeN_cycle, + NULL, + mm, + &numNs); + // Do not allow for any edits including known snps and splice sites when extending zero-length hit + if(_len == 0 && mm == 0 && _edits->size() > 0) { + _edits->clear(); + return false; + } + if(best_ext > 0) { + leftext = best_ext; + assert_leq(num_prev_edits, _edits->size()); + index_t added_edits = (index_t)_edits->size() - num_prev_edits; + int ref_ext = (int)best_ext; + for(index_t i = 0; i < added_edits; i++) { + const Edit& edit = (*_edits)[i]; + if(edit.type == EDIT_TYPE_REF_GAP) ref_ext--; + else if(edit.type == EDIT_TYPE_READ_GAP) ref_ext++; + else if(edit.type == EDIT_TYPE_SPL) ref_ext += edit.splLen; + } + assert_leq(best_ext, _rdoff); + _rdoff -= best_ext; + assert_leq(ref_ext, _toff); + _toff -= ref_ext; + _len += best_ext; + assert_leq(_len, rdlen); + assert_leq((int)numNs, ref_ext); + assert_leq(ref_ext - (int)numNs, _joinedOff); + _joinedOff -= (ref_ext - (int)numNs); + for(index_t i = 0; i < _edits->size(); i++) { + if(i < added_edits) { + assert_geq((*_edits)[i].pos, _rdoff); + (*_edits)[i].pos -= _rdoff; + } else { + (*_edits)[i].pos += best_ext; + } + } + } + } + + // extend the alignment further in the right direction + // with 'mm' mismatches allowed + if(max_rightext > 0 && _rdoff + _len < rdlen) { + index_t right_rdoff, right_len, right_toff; + this->getRight(right_rdoff, right_len, right_toff); + index_t rl = right_toff + right_len; + assert_eq(_rdoff + _len, right_rdoff + right_len); + index_t rr = rdlen - (right_rdoff + right_len); + index_t tlen = ref.approxLen(_tidx); + if(rl < tlen) { + index_t reflen = rr + 10; + if(rl + reflen > tlen) { + reflen = tlen - rl; + } + int ref_ext = (int)_len; + for(index_t ei = 0; ei < _edits->size(); ei++) { + const Edit& e = (*_edits)[ei]; + if(e.type == EDIT_TYPE_REF_GAP) ref_ext--; + else if(e.type == EDIT_TYPE_READ_GAP) ref_ext++; + else if(e.type == EDIT_TYPE_SPL) ref_ext += e.splLen; + else if(e.type == EDIT_TYPE_MM && e.chr == 'N') ref_ext--; + } + + index_t best_ext = alignWithALTs( + altdb.alts(), + altdb.haplotypes(), + altdb.haplotype_maxrights(), + this->_joinedOff + ref_ext, + seq, + this->_rdoff, + this->_rdoff + this->_len, + rdlen - (this->_rdoff + this->_len), + ref, + *_sharedVars, + _tidx, + (int)rl, + reflen, + false, + gpol, + *this->_edits, + _sharedVars->ht_llist, + *this->_ht_list, + _sharedVars->cmp_ht, + rd.threeN_cycle, + NULL, + mm); + // Do not allow for any edits including known snps and splice sites when extending zero-length hit + if(_len == 0 && mm == 0 && _edits->size() > 0) { + _edits->clear(); + return false; + } + if(best_ext > 0) { + rightext = best_ext; + _len += best_ext; + } + } + } + +#ifndef NDEBUG + if(_joinedOff != (index_t)INDEX_MAX && seq[_rdoff] < 4) { + ASSERT_ONLY(bool straddled = false); + ASSERT_ONLY(index_t tmp_tidx = 0, tmp_toff = 0, tmp_tlen = 0); + gfm.joinedToTextOff( + 0, + _joinedOff, + tmp_tidx, + tmp_toff, + tmp_tlen, + true, // reject straddlers? + straddled); // straddled? + if(!gfm.repeat()) { + assert_eq(tmp_tidx, _tidx); + } + assert_eq(tmp_toff, _toff); + } +#endif + + if(doLeftAlign) leftAlign(rd); + assert_leq(_rdoff + _len, rdlen); + calculateScore( + rd, + ssdb, + sc, + minK_local, + minIntronLen, + maxIntronLen, + minAnchorLen, + minAnchorLen_noncan, + ref); + assert(repOk(rd, ref)); + return leftext > 0 || rightext > 0; +} + +/** + * Adjust alignment with respect to SNPs, usually updating Edits + * + */ +template +bool GenomeHit::adjustWithALT( + index_t rdoff, + index_t len, + const Coord& coord, + SharedTempVars& sharedVars, + EList >& genomeHits, + const Read& rd, + const GFM& gfm, + const ALTDB& altdb, + const BitPairReference& ref, + const GraphPolicy& gpol) +{ + if(gfm.gh().linearFM()) { + genomeHits.expand(); + genomeHits.back().init( + coord.orient(), + rdoff, + len, + 0, // trim5 + 0, // trim3 + (index_t)coord.ref(), + (index_t)coord.off(), + (index_t)coord.joinedOff(), + sharedVars); + return true; + } + index_t width = 1 << (gfm.gh()._offRate + 2); + EList >& ssOffs = sharedVars.ssOffs; + findSSOffs(gfm, altdb, (coord.joinedOff() >= width ? (index_t)(coord.joinedOff() - width) : 0), (index_t)(coord.joinedOff() + width), ssOffs); + assert_gt(ssOffs.size(), 0); + bool found = false; + for(index_t s = 0; s < ssOffs.size(); s++) { + index_t off = (index_t)coord.off(); + index_t joinedOff = (index_t)coord.joinedOff(); + pair& ssOff = ssOffs[s]; + if(ssOff.first > 0) { + assert_neq(ssOff.second, 0); + if(ssOff.second > 0) { + off += ssOff.first; + joinedOff += ssOff.first; + } else { + off -= ssOff.first; + joinedOff -= ssOff.first; + } + } + size_t numGenomeHits = genomeHits.size(); + genomeHits.expand(); + genomeHits.back().init( + coord.orient(), + rdoff, + len, + 0, // trim5 + 0, // trim3 + (index_t)coord.ref(), + off, + joinedOff, + sharedVars); + GenomeHit& genomeHit = genomeHits.back(); + EList >& offDiffs = sharedVars.offDiffs; + const index_t single_offDiffs_size = findOffDiffs(gfm, + altdb, + (genomeHit._joinedOff >= width ? genomeHit._joinedOff - width : 0), + genomeHit._joinedOff + width, + offDiffs); + assert_leq(single_offDiffs_size, offDiffs.size()); + + const BTDnaString& seq = genomeHit._fw ? rd.patFw : rd.patRc; + const EList >& alts = altdb.alts(); + + index_t orig_joinedOff = genomeHit._joinedOff; + index_t orig_toff = genomeHit._toff; + bool found2 = false; + // maxAltsTried is not directly related to the size of offDiffs, + // but let's make the size of offDiffs is determined by maxAltsTried + const index_t max_offDiffs_size = max(4, gpol.maxAltsTried() / 4); + if(offDiffs.size() - single_offDiffs_size > max_offDiffs_size) offDiffs.resize(single_offDiffs_size + max_offDiffs_size); + for(index_t o = 0; o < offDiffs.size() && !found2; o++) { + const pair& offDiff = offDiffs[o]; +#ifndef NDEBUG + if(o == 0) { + assert_eq(offDiff.first, 0); + assert_eq(offDiff.second, 0); + } +#endif + if(offDiff.second >= 0) { + genomeHit._joinedOff = orig_joinedOff + offDiff.first; + genomeHit._toff = orig_toff + offDiff.first; + } else { + if(orig_toff < offDiff.first) continue; + assert_geq(orig_joinedOff, offDiff.first); + genomeHit._joinedOff = orig_joinedOff - offDiff.first; + genomeHit._toff = orig_toff - offDiff.first; + } + + genomeHit._edits->clear(); + ELList& candidate_edits = sharedVars.candidate_edits; + candidate_edits.clear(); + index_t reflen = genomeHit._len + 10; + index_t alignedLen = alignWithALTs( + alts, + altdb.haplotypes(), + altdb.haplotype_maxrights(), + genomeHit._joinedOff, + seq, + genomeHit._rdoff, + genomeHit._rdoff, + genomeHit._len, + ref, + sharedVars, + genomeHit._tidx, + (int)genomeHit._toff, + reflen, + false, /* left? */ + gpol, + *genomeHit._edits, + sharedVars.ht_llist, + *genomeHit._ht_list, + sharedVars.cmp_ht, + rd.threeN_cycle, + &candidate_edits); + if(alignedLen == genomeHit._len) { + found2 = true; + assert(genomeHit.repOk(rd, ref)); + for(index_t i = 0; i < genomeHits.size() - 1; i++) { + if(genomeHits[i] == genomeHits.back()) { + found2 = false; + } + } + if(found2) { + for(index_t e = 0; e < candidate_edits.size(); e++) { + genomeHits.expand(); + genomeHits.back() = genomeHits[genomeHits.size() - 2]; + *(genomeHits.back()._edits) = candidate_edits[e]; + assert(genomeHits.back().repOk(rd, ref)); + for(size_t i = 0; i < genomeHits.size() - 1; i++) { + if(genomeHits[i] == genomeHits.back()) { + genomeHits.pop_back(); + break; + } + } + } + } + } else { + genomeHit._edits->clear(); + } + } + if(!found2) genomeHits.pop_back(); + found = genomeHits.size() > numGenomeHits; + } + return found; +} + +/** + * Adjust alignment with respect to SNPs, usually updating Edits + * + */ +template +bool GenomeHit::adjustWithALT( + const Read& rd, + const GFM& gfm, + const ALTDB& altdb, + const BitPairReference& ref, + const GraphPolicy& gpol) +{ + if(gfm.gh().linearFM()) return true; + assert_lt(this->_tidx, ref.numRefs()); + + assert(_sharedVars != NULL); + EList >& offDiffs = _sharedVars->offDiffs; + index_t width = 1 << (gfm.gh()._offRate + 2); + const index_t single_offDiffs_size = findOffDiffs(gfm, + altdb, + (this->_joinedOff >= width ? this->_joinedOff - width : 0), + this->_joinedOff + width, + offDiffs); + assert_leq(single_offDiffs_size, offDiffs.size()); + + const BTDnaString& seq = _fw ? rd.patFw : rd.patRc; + const EList >& alts = altdb.alts(); + + index_t orig_joinedOff = this->_joinedOff; + index_t orig_toff = this->_toff; + bool found = false; + // maxAltsTried is not directly related to the size of offDiffs, + // but let's make the size of offDiffs is determined by maxAltsTried + const index_t max_offDiffs_size = max(4, gpol.maxAltsTried() / 4); + if(offDiffs.size() - single_offDiffs_size > max_offDiffs_size) offDiffs.resize(single_offDiffs_size + max_offDiffs_size); + for(index_t o = 0; o < offDiffs.size() && !found; o++) { + const pair& offDiff = offDiffs[o]; +#ifndef NDEBUG + if(o == 0) { + assert_eq(offDiff.first, 0); + assert_eq(offDiff.second, 0); + } +#endif + if(offDiff.second >= 0) { + this->_joinedOff = orig_joinedOff + offDiff.first; + this->_toff = orig_toff + offDiff.first; + } else { + if(orig_toff < offDiff.first) continue; + assert_geq(orig_joinedOff, offDiff.first); + this->_joinedOff = orig_joinedOff - offDiff.first; + this->_toff = orig_toff - offDiff.first; + } + index_t reflen = this->_len + 10; + index_t alignedLen = alignWithALTs( + alts, + altdb.haplotypes(), + altdb.haplotype_maxrights(), + this->_joinedOff, + seq, + this->_rdoff, + this->_rdoff, + this->_len, + ref, + *_sharedVars, + this->_tidx, + (int)this->_toff, + reflen, + false, /* left? */ + gpol, + *this->_edits, + _sharedVars->ht_llist, + *this->_ht_list, + _sharedVars->cmp_ht, + rd.threeN_cycle, + &_sharedVars->candidate_edits); + if(alignedLen == this->_len) { + found = true; + } else { + this->_edits->clear(); + } + } +#ifndef NDEBUG + if(found) { + assert(repOk(rd, ref)); + } +#endif + return found; +} + +/* + * Find offset differences due to splice sites + */ +template +void GenomeHit::findSSOffs( + const GFM& gfm, + const ALTDB& altdb, + index_t start, + index_t end, + EList >& ssOffs) +{ + ssOffs.clear(); + ssOffs.expand(); + ssOffs.back().first = ssOffs.back().second = 0; + if(gfm.gh().linearFM() || !altdb.hasSpliceSites()) return; + const EList >& alts = altdb.alts(); + + // Find splice sites included in this region + ALT alt_search; + alt_search.left = start; + for(index_t i = (index_t)alts.bsearchLoBound(alt_search); i < alts.size(); i++) { + const ALT& alt = alts[i]; + if(alt.left >= end) break; + if(!alt.splicesite()) continue; + // + if(alt.left < alt.right) { + ssOffs.expand(); + ssOffs.back().first = alt.right - alt.left + 1; + ssOffs.back().second = 1; + + const index_t relax = 5; + if(alt.right > relax) alt_search.left = alt.right - relax; + else alt_search.left = 0; + for(index_t j = (index_t)alts.bsearchLoBound(alt_search); j < alts.size(); j++) { + const ALT& alt2 = alts[j]; + if(!alt2.splicesite()) continue; + if(alt2.left < alt2.right) continue; + if(alt2.left + alt2.right == alt.left + alt.right) continue; + if(alt2.left > alt.right + relax) break; + ssOffs.expand(); + if(alt2.right < alt.left) { + ssOffs.back().first = alt.left - alt2.right; + ssOffs.back().second = -1; + } else { + ssOffs.back().first = alt2.right - alt.left; + ssOffs.back().second = 1; + } + } + } else { + ssOffs.expand(); + ssOffs.back().first = alt.left - alt.right + 1; + ssOffs.back().second = -1; + } + } + + if(ssOffs.size() > 1) { + ssOffs.sort(); + index_t new_size = (index_t)(unique(ssOffs.begin(), ssOffs.end()) - ssOffs.begin()); + ssOffs.resize(new_size); + } +} + + +/* + * Find offset differences due to indels + */ +template +index_t GenomeHit::findOffDiffs( + const GFM& gfm, + const ALTDB& altdb, + index_t start, + index_t end, + EList >& offDiffs) +{ + offDiffs.clear(); + offDiffs.expand(); + offDiffs.back().first = offDiffs.back().second = 0; + if(gfm.gh().linearFM()) return offDiffs.size(); + const EList >& alts = altdb.alts(); + pair alt_range; + + // Find SNPs included in this region + { + ALT alt_search; + alt_search.pos = start; + alt_range.first = alt_range.second = (index_t)alts.bsearchLoBound(alt_search); + for(alt_range.second = alt_range.first; alt_range.second < alts.size(); alt_range.second++) { + const ALT& alt = alts[alt_range.second]; + if(alt.splicesite() && alt.left > alt.right) continue; + if(alt.deletion() && alt.reversed) continue; + if(alt.pos >= end) break; + } + } + if(alt_range.first >= alt_range.second) return offDiffs.size(); + + for(index_t second = alt_range.second; second > alt_range.first; second--) { + assert_leq(second, alts.size()); + const ALT& alt = alts[second - 1]; + if(!alt.gap() || alt.splicesite() || (alt.deletion() && alt.reversed)) + continue; + int off = 0; + if(alt.type == ALT_SNP_DEL) { + off = alt.len; + } else { + assert_eq(alt.type, ALT_SNP_INS); + off = -alt.len; + } + assert_neq(off, 0); + offDiffs.expand(); + offDiffs.back().first = abs(off); + offDiffs.back().second = (off > 0 ? 1 : -1); + } + + if(offDiffs.size() > 1) { + offDiffs.sort(); + index_t new_size = (index_t)(unique(offDiffs.begin(), offDiffs.end()) - offDiffs.begin()); + offDiffs.resize(new_size); + } + + const index_t single_offDiffs_size = offDiffs.size(); + for(index_t second = alt_range.second; second > alt_range.first; second--) { + assert_leq(alt_range.second, alts.size()); + const ALT& alt = alts[second - 1]; + if(!alt.gap() || alt.splicesite() || (alt.deletion() && alt.reversed)) + continue; + int off = 0; + if(alt.type == ALT_SNP_DEL) { + off = alt.len; + } else { + assert_eq(alt.type, ALT_SNP_INS); + off = -alt.len; + } + for(index_t second2 = second - 1; second2 > alt_range.first; second2--) { + const ALT& alt2 = alts[second2 - 1]; + if(!alt2.gap() || alt2.splicesite() || (alt2.deletion() && alt2.reversed)) + continue; + if(alt2.type == ALT_SNP_DEL) { + if(alt2.pos + alt2.len >= alt.pos) + continue; + off += alt2.len; + } else { + assert_eq(alt2.type, ALT_SNP_INS); + if(alt2.pos >= alt.pos) + continue; + off -= alt2.len; + } + bool found = false; + for(index_t i = 0; i < offDiffs.size(); i++) { + int off_cmp = offDiffs[i].first * offDiffs[i].second; + if(off == off_cmp) { + found = true; + break; + } + } + if(!found) { + offDiffs.expand(); + offDiffs.back().first = abs(off); + offDiffs.back().second = (off > 0 ? 1 : -1); + } + } + } + + return single_offDiffs_size; +} + + +/* + * + */ +template +void add_haplotypes( + const EList >& alts, + const EList >& haplotypes, + const EList& haplotype_maxrights, + Haplotype& cmp_ht, + EList >& ht_list, + index_t rdlen, + bool left_ext = true, + bool initial = false) +{ + pair ht_range; + ht_range.first = ht_range.second = (int)haplotypes.bsearchLoBound(cmp_ht); + if(ht_range.first >= haplotypes.size()) + return; + + if(left_ext) { + for(; ht_range.first >= 0; ht_range.first--) { + const Haplotype& ht = haplotypes[ht_range.first]; + if(!initial) { + if(ht.right >= cmp_ht.left) continue; + } + index_t ht_maxright = haplotype_maxrights[ht_range.first]; + assert_geq(ht_maxright, ht.right); + if(ht_maxright + rdlen - 1 < cmp_ht.left) break; + if(ht.alts.size() <= 0) continue; + bool added = false; + for(index_t h = 0; h < ht_list.size(); h++) { + if(ht_list[h].first == ht_range.first) { + added = true; + break; + } + } + if(added) continue; + ht_list.expand(); + ht_list.back().first = ht_range.first; + assert_gt(ht.alts.size(), 0); + if(ht.right < cmp_ht.left) { + ht_list.back().second = ht.alts.size() - 1; + } else { + assert(initial); + ht_list.back().second = ht.alts.size(); + for(int a = (int)ht.alts.size() - 1; a >= 0; a--) { + index_t alti = ht.alts[a]; + assert_lt(alti, alts.size()); + const ALT& alt = alts[alti]; + assert(alt.snp()); + ht_list.back().second = (index_t)a; + if(cmp_ht.left > alt.pos) break; + } + if(ht_list.back().second == ht.alts.size()) { + ht_list.pop_back(); + } + } + } + } else { + if(initial) { + for(; ht_range.first >= 0; ht_range.first--) { + const Haplotype& ht = haplotypes[ht_range.first]; + index_t ht_maxright = haplotype_maxrights[ht_range.first]; + assert_geq(ht_maxright, ht.right); + if(ht_maxright < cmp_ht.left) break; + if(ht.right < cmp_ht.left || ht.left > cmp_ht.left) continue; + if(ht.alts.size() <= 0) continue; + bool added = false; + for(index_t h = 0; h < ht_list.size(); h++) { + if(ht_list[h].first == ht_range.first) { + added = true; + break; + } + } + if(added) continue; + ht_list.expand(); + ht_list.back().first = ht_range.first; + assert_gt(ht.alts.size(), 0); + ht_list.back().second = ht.alts.size(); + for(index_t a = 0; a < ht.alts.size(); a++) { + index_t alti = ht.alts[a]; + assert_lt(alti, alts.size()); + const ALT& alt = alts[alti]; + assert(alt.snp()); + ht_list.back().second = a; + if(cmp_ht.left <= alt.pos) break; + } + if(ht_list.back().second == ht.alts.size()) { + ht_list.pop_back(); + } + } + } + + for(; ht_range.second < haplotypes.size(); ht_range.second++) { + const Haplotype& ht = haplotypes[ht_range.second]; + if(ht.left < cmp_ht.right) continue; + if(ht.left >= cmp_ht.right + rdlen) break; + if(ht.alts.size() <= 0) continue; + bool added = false; + for(index_t h = 0; h < ht_list.size(); h++) { + if(ht_list[h].first == ht_range.second) { + added = true; + break; + } + } + if(added) continue; + ht_list.expand(); + ht_list.back().first = ht_range.second; + assert_gt(ht.alts.size(), 0); + ht_list.back().second = 0; + } + } +} + +/* + * + */ +template +index_t GenomeHit::alignWithALTs_recur( + const EList >& alts, + const EList >& haplotypes, + const EList& haplotype_maxrights, + index_t joinedOff, + const BTDnaString& rdseq, + index_t rdoff_add, + index_t rdoff, + index_t rdlen, + const BitPairReference& ref, + EList >& raw_refbufs, + ASSERT_ONLY(SStringExpandable destU32,) + EList& tmp_edits, + int& best_rdoff, + const char* rfseq, + index_t tidx, + int rfoff, + index_t rflen, + bool left, + EList& edits, + index_t mm, + ELList >& ht_llist, + Haplotype& cmp_ht, + ELList* candidate_edits, + index_t tmp_numNs, + index_t* numNs, + index_t dep, + const GraphPolicy& gpol, + index_t& numALTsTried, + int cycle_3N, + ALT_TYPE prev_alt_type) +{ + if(numALTsTried > gpol.maxAltsTried() + dep) return 0; + assert_gt(rdlen, 0); + assert_gt(rflen, 0); + if(ht_llist.size() <= dep) ht_llist.expand(); + if(raw_refbufs.size() <= dep) raw_refbufs.expand(); + if(rfoff < -16) return 0; + size_t contig_len = ref.approxLen(tidx); + if(rfoff >= contig_len) return 0; + if(rfoff >= 0 && rfoff + rflen > contig_len) { + rflen = contig_len - rfoff; + } else if(rfoff < 0 && rflen > contig_len) { + rflen = contig_len; + } + if(rflen == 0) return 0; + if(rfseq == NULL) { + SStringExpandable& raw_refbuf = raw_refbufs[dep]; + raw_refbuf.resize(rflen + 16 + 16); + raw_refbuf.fill(0x4); + int off = ref.getStretch( + reinterpret_cast(raw_refbuf.wbuf() + 16), + tidx, + max(rfoff, 0), + rfoff > 0 ? rflen : rflen + rfoff + ASSERT_ONLY(, destU32)); + assert_lt(off, 16); + rfseq = raw_refbuf.wbuf() + 16 + off + min(rfoff, 0); + } + + int refConversion_3N[5] = {0, 1, 2, 3, 4}; + if (threeN){ + if (cycle_3N == 0 || cycle_3N == 3) { + // C to T conversion + refConversion_3N[asc2dna[hs3N_convertedFrom]] = asc2dna[hs3N_convertedTo]; + } else { + //G to A conversion + refConversion_3N[asc2dna[hs3N_convertedFromComplement]] = asc2dna[hs3N_convertedToComplement]; + + } + } + + if(left) { + index_t tmp_mm = 0; + int min_rd_i = (int)rdoff; + int mm_min_rd_i = (int)rdoff; + index_t mm_tmp_numNs = 0; + for(int rf_i = (int)rflen - 1; rf_i >= 0 && mm_min_rd_i >= 0; rf_i--, mm_min_rd_i--) { + int rf_bp = get_ref_base(threeN, refConversion_3N, rfseq[rf_i]); + int rd_bp = rdseq[mm_min_rd_i]; + if(rf_bp != rd_bp || rd_bp == 4) { + if(tmp_mm == 0) { + min_rd_i = mm_min_rd_i; + } + if(tmp_mm >= mm) break; + tmp_mm++; + Edit e( + mm_min_rd_i, + "ACGTN"[rf_bp], + "ACGTN"[rd_bp], + EDIT_TYPE_MM); + tmp_edits.insert(e, 0); + } + if(rf_bp == 4) { + if(tmp_mm == 0) tmp_numNs++; + mm_tmp_numNs++; + } + } + if(tmp_mm == 0) { + min_rd_i = mm_min_rd_i; + } + if(mm_min_rd_i < best_rdoff) { + best_rdoff = mm_min_rd_i; + edits = tmp_edits; + if(numNs != NULL) *numNs = mm_tmp_numNs; + } + if(mm_min_rd_i < 0) return rdlen; + if(tmp_mm > 0) { + tmp_edits.erase(0, tmp_mm); + tmp_mm = 0; + } + + // Find SNPs included in this region + pair alt_range(0, 0); + if(alts.size() > 0) { + ALT cmp_alt; + const index_t minK = 16; + assert_leq(mm_min_rd_i, rdoff); + index_t rd_diff = rdoff - mm_min_rd_i; + rd_diff = (rd_diff > minK ? rd_diff - minK : 0); + if(gpol.enableCODIS()) { + rd_diff = 0; + } + if(rd_diff >= joinedOff) { + cmp_alt.pos = joinedOff; + } else { + cmp_alt.pos = joinedOff - rd_diff; + } + alt_range.first = alt_range.second = (int)alts.bsearchLoBound(cmp_alt); + if(alt_range.first >= alts.size()) { + assert_gt(alts.size(), 0); + alt_range.first = alt_range.second = alt_range.second - 1; + } + for(; alt_range.first >= 0; alt_range.first--) { + const ALT& alt = alts[alt_range.first]; + if(alt.snp()) { + if(alt.deletion() && !alt.reversed) continue; + if(alt.pos + rdlen < joinedOff) break; + } else if(alt.splicesite()) { + if(alt.left < alt.right) continue; + if(alt.left + rdlen - 1 < joinedOff) break; + } else { + assert(alt.exon()); + continue; + } + } + } + + // Update and find Haplotypes + EList >& ht_list = ht_llist[dep]; + ht_list.clear(); + if(gpol.useHaplotype() && haplotypes.size() > 0) { + if(dep > 0) { + EList >& ht_prev_list = ht_llist[dep-1]; + for(index_t p = 0; p < ht_prev_list.size(); p++) { + const pair& ht_ref = ht_prev_list[p]; + const Haplotype& ht = haplotypes[ht_ref.first]; + assert_lt(ht_ref.second, ht.alts.size()); + index_t alt_id = ht.alts[ht_ref.second]; + assert_gt(tmp_edits.size(), 0); + const ALT& alt = alts[tmp_edits[0].snpID]; + const ALT& ht_alt = alts[alt_id]; + if(!alt.isSame(ht_alt)) continue; + if(ht_ref.second == 0) { + cmp_ht.left = cmp_ht.right = joinedOff; + add_haplotypes(alts, + haplotypes, + haplotype_maxrights, + cmp_ht, + ht_list, + rdlen); + } else { + ht_list.push_back(ht_ref); + ht_list.back().second--; + } + } + } + if(ht_list.size() <= 0) { + cmp_ht.left = cmp_ht.right = joinedOff; + add_haplotypes(alts, + haplotypes, + haplotype_maxrights, + cmp_ht, + ht_list, + rdlen, + true, // left_ext? + dep == 0); // initial? + } + } + + assert_geq(rdoff, 0); + const index_t orig_nedits = (index_t)tmp_edits.size(); + for(; alt_range.second > alt_range.first; alt_range.second--) { + ALT alt = alts[alt_range.second]; + if(alt.pos >= joinedOff) continue; + if(alt.splicesite()) { + if(alt.left < alt.right) continue; + index_t tmp = alt.left; + alt.left = alt.right; + alt.right = tmp; + } + if(alt.deletion()) { + if(!alt.reversed) continue; + alt.pos = alt.pos - alt.len + 1; + } + if(alt.exon()) continue; + bool alt_compatible = false; + int rf_i = (int)rflen - 1, rd_i = (int)rdoff; + int diff = 0; + if(alt.type == ALT_SNP_SGL) { + diff = joinedOff - alt.pos - 1; + } else if(alt.type == ALT_SNP_DEL) { + if(alt.pos + alt.len >= joinedOff) continue; + diff = joinedOff - (alt.pos + alt.len); + } else if(alt.type == ALT_SNP_INS) { + diff = joinedOff - alt.pos; + } else { + assert(alt.splicesite()); + diff = joinedOff - (alt.right + 1); + } + if(rf_i < diff || rd_i < diff) continue; + rf_i -= diff; + rd_i -= diff; + int rd_bp = rdseq[rd_i]; + if(rd_i < min_rd_i) { + if(alt.type == ALT_SNP_INS) { + if(rd_i + 1 >= min_rd_i) continue; + } + break; + } + + // Check to see if there is a haplotype that supports this alt + if(ht_list.size() > 0 && alt.snp()) { + bool ht_found = false; + for(index_t h = 0; h < ht_list.size(); h++) { + const pair& ht_ref = ht_list[h]; + const Haplotype& ht = haplotypes[ht_ref.first]; + assert_lt(ht_ref.second, ht.alts.size()); + index_t ht_alti = ht.alts[ht_ref.second]; + const ALT& ht_alt = alts[ht_alti]; + if(alts[alt_range.second].isSame(ht_alt)) { + ht_found = true; + break; + } + } + if(!ht_found) continue; + } + + if(alt.type == ALT_SNP_SGL) { + if(rd_bp == (int)alt.seq) { + int rf_bp = get_ref_base(threeN, refConversion_3N, rfseq[rf_i]); + Edit e( + rd_i, + "ACGTN"[rf_bp], + "ACGTN"[rd_bp], + EDIT_TYPE_MM, + true, /* chars? */ + alt_range.second); + tmp_edits.insert(e, 0); + rd_i--; + rf_i--; + alt_compatible = true; + } + } else if(alt.type == ALT_SNP_DEL) { + if(rfoff + rf_i > (int)alt.len) { + if(rf_i > (int)alt.len) { + for(index_t i = 0; i < alt.len; i++) { + int rf_bp = get_ref_base(threeN, refConversion_3N, rfseq[rf_i - i]); + Edit e( + rd_i + 1, + "ACGTN"[rf_bp], + '-', + EDIT_TYPE_READ_GAP, + true, /* chars? */ + alt_range.second); + tmp_edits.insert(e, 0); + } + + } else { + // long deletions + int new_rfoff = rfoff - alt.len; + index_t new_rflen = rf_i + alt.len + 10; + if(raw_refbufs.size() <= dep + 1) raw_refbufs.expand(); + SStringExpandable& raw_refbuf = raw_refbufs[dep + 1]; + raw_refbuf.resize(new_rflen + 16 + 16); + raw_refbuf.fill(0x4); + int off = ref.getStretch( + reinterpret_cast(raw_refbuf.wbuf() + 16), + tidx, + max(new_rfoff, 0), + new_rfoff > 0 ? new_rflen : new_rflen + new_rfoff + ASSERT_ONLY(, destU32)); + assert_lt(off, 16); + const char* new_rfseq = raw_refbuf.wbuf() + 16 + off + min(new_rfoff, 0); + for(int i = 0; i < alt.len; i++) { + int rf_bp = get_ref_base(threeN, refConversion_3N, new_rfseq[rf_i - i + alt.len]); + Edit e( + rd_i + 1, + "ACGTN"[rf_bp], + '-', + EDIT_TYPE_READ_GAP, + true, /* chars? */ + alt_range.second); + tmp_edits.insert(e, 0); + } + } + rf_i -= (int)alt.len; + alt_compatible = true; + } + } else if(alt.type == ALT_SNP_INS) { + if(rd_i > (int)alt.len) { + bool same_seq = true; + for(index_t i = 0; i < alt.len; i++) { + rd_bp = rdseq[rd_i - i]; + int snp_bp = (alt.seq >> (i << 1)) & 0x3; + if(rd_bp != snp_bp) { + same_seq = false; + break; + } + Edit e( + rd_i - i, + '-', + "ACGTN"[rd_bp], + EDIT_TYPE_REF_GAP, + true, /* chars? */ + alt_range.second); + tmp_edits.insert(e, 0); + } + if(same_seq) { + rd_i -= (int)alt.len; + alt_compatible = true; + } + } + } else if(alt.type == ALT_SPLICESITE) { + bool add_splicesite = true; + if(rd_i == rdoff && prev_alt_type == ALT_SPLICESITE) { + add_splicesite = false; + } + if(add_splicesite) { + assert_lt(rd_i, rflen); + assert_lt(alt.left, alt.right); + index_t intronLen = alt.right - alt.left + 1; + Edit e(rd_i + 1, + 0, + 0, + EDIT_TYPE_SPL, + intronLen, + alt.fw ? SPL_FW : SPL_RC, + true, /* known splice site? */ + false); /* chrs? */ + tmp_edits.insert(e, 0); + alt_compatible = true; + } + } + if(alt_compatible) { + numALTsTried++; + assert_leq(rd_i, (int)rdoff); + if(rd_i < 0) { + best_rdoff = rd_i; + edits = tmp_edits; + return rdlen; + } + index_t next_joinedOff = alt.pos; + int next_rfoff = rfoff, next_rdoff = rd_i; + const char* next_rfseq = rfseq; + int next_rflen = rf_i + 1, next_rdlen = rd_i + 1; + if(alt.splicesite()) { + assert_lt(alt.left, alt.right); + next_joinedOff = alt.left; + index_t intronLen = alt.right - alt.left + 1; + assert_geq(next_rfoff, intronLen); + next_rfoff -= intronLen; + next_rfseq = NULL; + } + if(next_rflen < next_rdlen) { + int add_len = next_rdlen + 10 - next_rflen; + if(next_rfoff < add_len) add_len = next_rfoff; + next_rfoff -= add_len; + next_rflen += add_len; + next_rfseq = NULL; + } + index_t alignedLen = alignWithALTs_recur( + alts, + haplotypes, + haplotype_maxrights, + next_joinedOff, + rdseq, + rdoff_add, + next_rdoff, + next_rdlen, + ref, + raw_refbufs, + ASSERT_ONLY(destU32,) + tmp_edits, + best_rdoff, + next_rfseq, + tidx, + next_rfoff, + next_rflen, + left, + edits, + mm, + ht_llist, + cmp_ht, + candidate_edits, + tmp_numNs, + numNs, + dep + 1, + gpol, + numALTsTried, + cycle_3N, + alt.type); + if(alignedLen == next_rdlen) return rdlen; + } + // Restore to the earlier state + assert_leq(orig_nedits, tmp_edits.size()); + if(orig_nedits < tmp_edits.size()) tmp_edits.erase(0, tmp_edits.size() - orig_nedits); + } + return 0; + } else { + index_t tmp_mm = 0; + index_t max_rd_i = 0; + index_t mm_max_rd_i = 0; + index_t mm_tmp_numNs = 0; + for(index_t rf_i = 0; rf_i < rflen && mm_max_rd_i < rdlen; rf_i++, mm_max_rd_i++) { + int rf_bp = get_ref_base(threeN, refConversion_3N, rfseq[rf_i]); + int rd_bp = rdseq[rdoff + mm_max_rd_i]; + if(rf_bp != rd_bp || rd_bp == 4) { + if(tmp_mm == 0) { + max_rd_i = mm_max_rd_i; + } + if(tmp_mm >= mm) break; + tmp_mm++; + Edit e( + mm_max_rd_i + rdoff_add, + "ACGTN"[rf_bp], + "ACGTN"[rd_bp], + EDIT_TYPE_MM); + tmp_edits.push_back(e); + } + if(rf_bp == 4) { + if(tmp_mm == 0) tmp_numNs++; + mm_tmp_numNs++; + } + } + if(tmp_mm == 0) { + max_rd_i = mm_max_rd_i; + } + if(mm_max_rd_i + rdoff > best_rdoff) { + best_rdoff = mm_max_rd_i + rdoff; + edits = tmp_edits; + if(numNs != NULL) *numNs = mm_tmp_numNs; + if(candidate_edits != NULL) candidate_edits->clear(); + } else if(mm_max_rd_i + rdoff == best_rdoff) { + if(candidate_edits != NULL) { + candidate_edits->expand(); + candidate_edits->back() = tmp_edits; + } + } + if(mm_max_rd_i == rflen) { + return mm_max_rd_i; + } + + // Find SNPs included in this region + pair alt_range; + { + ALT cmp_alt; + const index_t minK = 16; + index_t rd_diff = (max_rd_i > minK ? max_rd_i - minK : 0); + if(gpol.enableCODIS()) { + rd_diff = 0; + } + cmp_alt.pos = joinedOff + rd_diff; + alt_range.first = alt_range.second = (index_t)alts.bsearchLoBound(cmp_alt); + if(alt_range.first >= alts.size()) return 0; + for(; alt_range.second < alts.size(); alt_range.second++) { + const ALT& alt = alts[alt_range.second]; + if(alt.splicesite()) { + if(alt.left > alt.right) continue; + } + if(alt.deletion()) { + if(alt.reversed) continue; + } + if(alt.left > joinedOff + max_rd_i) break; + } + } + if(mm_max_rd_i == rdlen) { + bool further_search = false; + for(index_t s = alt_range.first; s < alt_range.second; s++) { + const ALT& alt = alts[s]; + if(alt.splicesite() && alt.left < alt.right) { + further_search = true; + break; + } + } + if(!further_search) return mm_max_rd_i; + } + if(tmp_mm > 0) { + tmp_edits.resize(tmp_edits.size() - tmp_mm); + tmp_mm = 0; + } + + // Update and find Haplotypes + EList >& ht_list = ht_llist[dep]; + ht_list.clear(); + if(gpol.useHaplotype() && haplotypes.size() > 0) { + if(dep > 0) { + EList >& ht_prev_list = ht_llist[dep-1]; + for(index_t p = 0; p < ht_prev_list.size(); p++) { + const pair& ht_ref = ht_prev_list[p]; + const Haplotype& ht = haplotypes[ht_ref.first]; + if(ht_ref.second < ht.alts.size()) { + index_t alt_id = ht.alts[ht_ref.second]; + assert_gt(tmp_edits.size(), 0); + const ALT& alt = alts[tmp_edits.back().snpID]; + const ALT& ht_alt = alts[alt_id]; + if(!alt.isSame(ht_alt)) continue; + } + if(ht_ref.second + 1 >= ht.alts.size() && joinedOff > ht.right) { + cmp_ht.left = cmp_ht.right = joinedOff; + add_haplotypes(alts, + haplotypes, + haplotype_maxrights, + cmp_ht, + ht_list, + rdlen, + false); // left_ext? + } else { + ht_list.push_back(ht_ref); + ht_list.back().second++; + } + } + } + if(ht_list.size() <= 0) { + cmp_ht.left = cmp_ht.right = joinedOff; + add_haplotypes(alts, + haplotypes, + haplotype_maxrights, + cmp_ht, + ht_list, + rdlen, + false, // left_ext? + dep == 0 && rdoff_add == 0); // initial? + } + } + + const index_t orig_nedits = (index_t)tmp_edits.size(); + for(; alt_range.first < alt_range.second; alt_range.first++) { + const ALT& alt = alts[alt_range.first]; + if(alt.splicesite()) { + if(alt.left > alt.right) continue; + } + if(alt.exon()) continue; + if(alt.deletion()) { + if(alt.reversed) continue; + } + bool alt_compatible = false; + assert_leq(joinedOff, alt.pos); + index_t rf_i, rd_i; + rf_i = rd_i = alt.pos - joinedOff; + if(rd_i >= rdlen) continue; + assert_leq(rd_i, max_rd_i); + int rf_bp = get_ref_base(threeN, refConversion_3N, rfseq[rf_i]); + int rd_bp = rdseq[rdoff + rd_i]; + + // Check to see if there is a haplotype that supports this alt + if(ht_list.size() > 0 && alt.snp()) { + bool ht_found = false; + for(index_t h = 0; h < ht_list.size(); h++) { + const pair& ht_ref = ht_list[h]; + const Haplotype& ht = haplotypes[ht_ref.first]; + if(ht_ref.second >= ht.alts.size()) + continue; + index_t ht_alti = ht.alts[ht_ref.second]; + const ALT& ht_alt = alts[ht_alti]; + if(alts[alt_range.first].isSame(ht_alt)) { + ht_found = true; + break; + } + } + if(!ht_found) continue; + } + + if(alt.type == ALT_SNP_SGL) { + if(rd_bp == (int)alt.seq) { + Edit e( + rd_i + rdoff_add, + "ACGTN"[rf_bp], + "ACGTN"[rd_bp], + EDIT_TYPE_MM, + true, /* chars? */ + alt_range.first); + tmp_edits.push_back(e); + rd_i++; + rf_i++; + alt_compatible = true; + } + } else if(alt.type == ALT_SNP_DEL) { + bool try_del = rd_i > 0; + if(rd_i == 0 && dep > 0) { + // Avoid consecutive deletions + assert_gt(tmp_edits.size(), 0); + const Edit& e = tmp_edits.back(); + if(e.type != EDIT_TYPE_READ_GAP) { + try_del = true; + } + } + if(try_del) { + if(rf_i + alt.len <= rflen) { + for(index_t i = 0; i < alt.len; i++) { + rf_bp = get_ref_base(threeN, refConversion_3N, rfseq[rf_i + i]); + Edit e( + rd_i + rdoff_add, + "ACGTN"[rf_bp], + '-', + EDIT_TYPE_READ_GAP, + true, /* chars? */ + alt_range.first); + tmp_edits.push_back(e); + } + rf_i += alt.len; + alt_compatible = true; + } else { + // long deletions + index_t new_rflen = rf_i + alt.len + 10; + if(raw_refbufs.size() <= dep + 1) raw_refbufs.expand(); + SStringExpandable& raw_refbuf = raw_refbufs[dep + 1]; + raw_refbuf.resize(new_rflen + 16 + 16); + raw_refbuf.fill(0x4); + int off = ref.getStretch( + reinterpret_cast(raw_refbuf.wbuf() + 16), + tidx, + max(rfoff, 0), + rfoff > 0 ? new_rflen : new_rflen + rfoff + ASSERT_ONLY(, destU32)); + assert_lt(off, 16); + const char* new_rfseq = raw_refbuf.wbuf() + 16 + off + min(rfoff, 0); + for(index_t i = 0; i < alt.len; i++) { + rf_bp = get_ref_base(threeN, refConversion_3N, new_rfseq[rf_i + i]); + Edit e( + rd_i + rdoff_add, + "ACGTN"[rf_bp], + '-', + EDIT_TYPE_READ_GAP, + true, /* chars? */ + alt_range.first); + tmp_edits.push_back(e); + } + rf_i += alt.len; + alt_compatible = true; + } + } + } else if(alt.type == ALT_SNP_INS) { + if(rd_i + alt.len <= rdlen && rf_i > 0) { + bool same_seq = true; + for(index_t i = 0; i < alt.len; i++) { + rd_bp = rdseq[rdoff + rd_i + i]; + int snp_bp = (alt.seq >> ((alt.len - i - 1) << 1)) & 0x3; + if(rd_bp != snp_bp) { + same_seq = false; + break; + } + Edit e( + rd_i + i + rdoff_add, + '-', + "ACGTN"[rd_bp], + EDIT_TYPE_REF_GAP, + true, /* chars? */ + alt_range.first); + tmp_edits.push_back(e); + } + if(same_seq) { + rd_i += alt.len; + alt_compatible = true; + } + } + } else if(alt.type == ALT_SPLICESITE) { + bool try_splice = rd_i > 0; + if(rd_i == 0 && dep > 0) { + // Avoid consecutive introns + assert_gt(tmp_edits.size(), 0); + const Edit& e = tmp_edits.back(); + if(e.type != EDIT_TYPE_SPL) { + try_splice = true; + } + } + if(try_splice) { + assert_lt(rd_i, rflen); + index_t intronLen = alt.right - alt.left + 1; + Edit e(rd_i + rdoff_add, + 0, + 0, + EDIT_TYPE_SPL, + intronLen, + alt.fw ? SPL_FW : SPL_RC, + true, /* known splice site? */ + false); /* chrs? */ + tmp_edits.push_back(e); + alt_compatible = true; + } + } + if(alt_compatible) { + numALTsTried++; + if(rd_i == rdlen) { + assert_leq(best_rdoff, rdoff + rd_i); + if(best_rdoff < rdoff + rd_i) { + if(candidate_edits != NULL) candidate_edits->clear(); + } + if(candidate_edits != NULL) { + candidate_edits->expand(); + candidate_edits->back() = tmp_edits; + } + best_rdoff = rdoff + rd_i; + edits = tmp_edits; + return rd_i; + } + index_t next_joinedOff = 0; + int next_rfoff = rfoff + rf_i, next_rdoff = rdoff + rd_i; + const char* next_rfseq = rfseq + rf_i; + index_t next_rflen = rflen - rf_i, next_rdlen = rdlen - rd_i; + if(alt.type == ALT_SNP_SGL) { + next_joinedOff = alt.pos + 1; + } else if(alt.type == ALT_SNP_DEL) { + next_joinedOff = alt.pos + alt.len; + if(rflen <= rf_i) { + next_rflen = 0; // Will reset next_rfseq and next_rflen below + } + } else if(alt.type == ALT_SNP_INS) { + next_joinedOff = alt.pos; + } else if(alt.type == ALT_SPLICESITE) { + next_joinedOff = alt.right + 1; + index_t intronLen = alt.right - alt.left + 1; + next_rfoff += intronLen; + next_rfseq = NULL; + } else { + assert(false); + } + if(next_rflen < next_rdlen) { + next_rflen = next_rdlen + 10; + next_rfseq = NULL; + } + index_t alignedLen = alignWithALTs_recur( + alts, + haplotypes, + haplotype_maxrights, + next_joinedOff, + rdseq, + rdoff_add + rd_i, + next_rdoff, + next_rdlen, + ref, + raw_refbufs, + ASSERT_ONLY(destU32,) + tmp_edits, + best_rdoff, + next_rfseq, + tidx, + next_rfoff, + next_rflen, + left, + edits, + mm, + ht_llist, + cmp_ht, + candidate_edits, + tmp_numNs, + numNs, + dep + 1, + gpol, + numALTsTried, + cycle_3N, + alt.type); + if(alignedLen > 0) { + assert_leq(rdoff + rd_i + alignedLen, best_rdoff); + bool search_further = false; + if(alt.splicesite()) { + for(index_t sf = alt_range.first + 1; sf < alt_range.second; sf++) { + const ALT& alt2 = alts[sf]; + if(alt2.splicesite() && alt2.left < alt2.right) { + search_further = true; + break; + } + } + } + if(!search_further) { + if(rd_i + alignedLen == rdlen) { + return rd_i + alignedLen; + } + } + } + } + + // Restore to the earlier state + assert_leq(orig_nedits, tmp_edits.size()); + if(orig_nedits < tmp_edits.size()) tmp_edits.resize(orig_nedits); + } + return 0; + } +} + + +/** + * For alignment involving indel, move the indels + * to the left most possible position + */ +template +void GenomeHit::leftAlign(const Read& rd) +{ + ASSERT_ONLY(const index_t rdlen = (index_t)rd.length()); + const BTDnaString& seq = _fw ? rd.patFw : rd.patRc; + for(index_t ei = 0; ei < _edits->size(); ei++) { + Edit& edit = (*_edits)[ei]; + if(edit.type != EDIT_TYPE_READ_GAP && edit.type != EDIT_TYPE_REF_GAP) + continue; + if(edit.snpID != (index_t)INDEX_MAX) + continue; + index_t ei2 = ei + 1; + for(; ei2 < _edits->size(); ei2++) { + const Edit& edit2 = (*_edits)[ei2]; + if(edit2.type != edit.type) break; + if(edit.type == EDIT_TYPE_READ_GAP) { + if(edit.pos != edit2.pos) break; + } else { + assert_eq(edit.type, EDIT_TYPE_REF_GAP); + if(edit.pos + ei2 - ei != edit2.pos) break; + } + } + assert_gt(ei2, 0); + ei2 -= 1; + Edit& edit2 = (*_edits)[ei2]; + int b = 0; + if(ei > 0) { + const Edit& prev_edit = (*_edits)[ei - 1]; + b = prev_edit.pos; + } + int l = edit.pos - 1; + while(l > b) { + assert_lt(l, (int)rdlen); + int rdc = seq[_rdoff + l]; + assert_range(0, 4, rdc); + char rfc = (edit.type == EDIT_TYPE_READ_GAP ? edit2.chr : edit2.qchr); + if(rfc != "ACGTN"[rdc]) break; + for(int ei3 = ei2; ei3 > (int)ei; ei3--) { + if(edit.type == EDIT_TYPE_READ_GAP) { + (*_edits)[ei3].chr = (*_edits)[ei3 - 1].chr; + } else { + (*_edits)[ei3].qchr = (*_edits)[ei3 - 1].qchr; + } + (*_edits)[ei3].pos -= 1; + } + rdc = seq[_rdoff + l]; + assert_range(0, 4, rdc); + if(edit.type == EDIT_TYPE_READ_GAP) { + edit.chr = "ACGTN"[rdc]; + } else { + edit.qchr = "ACGTN"[rdc]; + } + edit.pos -= 1; + l--; + } + ei = ei2; + } +} + +#ifndef NDEBUG +/** + * Check that hit is sane w/r/t read. + */ +template +bool GenomeHit::repOk(const Read& rd, const BitPairReference& ref) +{ + if(_len <= 0) return true; + assert(_sharedVars != NULL); + SStringExpandable& raw_refbuf = _sharedVars->raw_refbuf; + SStringExpandable& destU32 = _sharedVars->destU32; + + BTDnaString& editstr = _sharedVars->editstr; + BTDnaString& partialseq = _sharedVars->partialseq; + BTDnaString& refstr = _sharedVars->refstr; + EList& reflens = _sharedVars->reflens; + EList& refoffs = _sharedVars->refoffs; + + editstr.clear(); partialseq.clear(); refstr.clear(); + reflens.clear(); refoffs.clear(); + + const BTDnaString& seq = _fw ? rd.patFw : rd.patRc; + partialseq.install(seq.buf() + this->_rdoff, (size_t)this->_len); + Edit::toRef(partialseq, *_edits, editstr); + + index_t refallen = 0; + int64_t reflen = 0; + int64_t refoff = this->_toff; + refoffs.push_back((index_t)refoff); + size_t eidx = 0; + for(size_t i = 0; i < _len; i++, reflen++, refoff++) { + while(eidx < _edits->size() && (*_edits)[eidx].pos == i) { + const Edit& edit = (*_edits)[eidx]; + if(edit.isReadGap()) { + reflen++; + refoff++; + } else if(edit.isRefGap()) { + reflen--; + refoff--; + } + if(edit.isSpliced()) { + assert_gt(reflen, 0); + refallen += reflen; + reflens.push_back((index_t)reflen); + reflen = 0; + refoff += edit.splLen; + assert_gt(refoff, 0); + refoffs.push_back((index_t)refoff); + } + eidx++; + } + } + assert_gt(reflen, 0); + refallen += (index_t)reflen; + reflens.push_back((index_t)reflen); + assert_gt(reflens.size(), 0); + assert_gt(refoffs.size(), 0); + assert_eq(reflens.size(), refoffs.size()); + refstr.clear(); + + int refConversion_3N[5] = {0, 1, 2, 3, 4}; + if (threeN){ + if (((rd.threeN_cycle == threeN_type1conversion_FW || rd.threeN_cycle == threeN_type2conversion_RC) && !rd.oppositeConversion_3N) || + ((rd.threeN_cycle == threeN_type1conversion_RC || rd.threeN_cycle == threeN_type2conversion_FW) && rd.oppositeConversion_3N)) { + // type 1 conversion + refConversion_3N[asc2dna[hs3N_convertedFrom]] = asc2dna[hs3N_convertedTo]; + } else { + // type 2 conversion + refConversion_3N[asc2dna[hs3N_convertedFromComplement]] = asc2dna[hs3N_convertedToComplement]; + } + } + + for(index_t i = 0; i < reflens.size(); i++) { + assert_gt(reflens[i], 0); + if(i > 0) { + assert_gt(refoffs[i], refoffs[i-1]); + } + raw_refbuf.resize(reflens[i] + 16); + raw_refbuf.clear(); + int off = ref.getStretch( + reinterpret_cast(raw_refbuf.wbuf()), + (size_t)this->_tidx, + (size_t)max(refoffs[i], 0), + reflens[i], + destU32); + assert_leq(off, 16); + for(index_t j = 0; j < reflens[i]; j++) { + char rfc = refConversion_3N[*(raw_refbuf.buf()+off+j)]; + refstr.append(rfc); + } + char* bufA = raw_refbuf.wbuf() + off; + string test_string = ""; + string bases = "ACGTN"; + for (int k = 0; k < reflens[i]; k++) { + int a = bufA[k]; + test_string += bases[a]; + } + } + if(refstr != editstr) { + cerr << "Decoded nucleotides and edits don't match reference:" << endl; + //cerr << " score: " << score.score() + //<< " (" << gaps << " gaps)" << endl; + cerr << " edits: "; + Edit::print(cerr, *_edits); + cerr << endl; + cerr << " decoded nucs: " << partialseq << endl; + cerr << " edited nucs: " << editstr << endl; + cerr << " reference nucs: " << refstr << endl; + assert(0); + } + + return true; +} +#endif + +/** + * Calculate alignment score + */ +template +int64_t GenomeHit::calculateScore( + const Read& rd, + SpliceSiteDB& ssdb, + const Scoring& sc, + index_t minK_local, + index_t minIntronLen, + index_t maxIntronLen, + index_t minAnchorLen, + index_t minAnchorLen_noncan, + const BitPairReference& ref) +{ + int64_t score = 0; + double splicescore = 0; + int64_t localscore = 0; + index_t numsplices = 0; + index_t mm = 0; + const BTDnaString& seq = _fw ? rd.patFw : rd.patRc; + const BTString& qual = _fw ? rd.qual : rd.qualRev; + index_t rdlen = (index_t)seq.length(); + int64_t toff_base = _toff; + bool conflict_splicesites = false; + uint8_t whichsense = SPL_UNKNOWN; + for(index_t i = 0; i < _edits->size(); i++) { + const Edit& edit = (*_edits)[i]; + assert_lt(edit.pos, _len); + if(edit.type == EDIT_TYPE_MM) { + if(edit.snpID == std::numeric_limits::max()) { + int pen = sc.score( + dna2col[edit.qchr] - '0', + asc2dnamask[edit.chr], + qual[this->_rdoff + edit.pos] - 33); + score += pen; + mm++; + } + } else if(edit.type == EDIT_TYPE_SPL) { + // int left = toff_base + edit.pos - 1; + // assert_geq(left, 0); + // int right = left + edit.splLen + 1; + // assert_geq(right, 0); + if(!edit.knownSpl) { + int left_anchor_len = _rdoff + edit.pos; + assert_gt(left_anchor_len, 0); + assert_lt(left_anchor_len, (int)rdlen); + int right_anchor_len = rdlen - left_anchor_len; + index_t mm2 = 0; + for(index_t j = i + 1; j < _edits->size(); j++) { + const Edit& edit2 = (*_edits)[j]; + if(edit2.type == EDIT_TYPE_MM || + edit2.type == EDIT_TYPE_READ_GAP || + edit2.type == EDIT_TYPE_REF_GAP) mm2++; + } + left_anchor_len -= (mm * 2); + right_anchor_len -= (mm2 * 2); + int shorter_anchor_len = min(left_anchor_len, right_anchor_len); + if(shorter_anchor_len <= 0) shorter_anchor_len = 1; + assert_gt(shorter_anchor_len, 0); + uint32_t intronLen_thresh = ((edit.splDir == SPL_FW || edit.splDir == SPL_RC) ? + MaxIntronLen(shorter_anchor_len, minAnchorLen) : + MaxIntronLen_noncan(shorter_anchor_len, minAnchorLen_noncan)); + if(intronLen_thresh < maxIntronLen) { + if(edit.splLen > intronLen_thresh) { + score += MIN_I32; + } + + if(edit.splDir == SPL_FW || edit.splDir == SPL_RC) { + float probscore = ssdb.probscore(edit.donor_seq, edit.acceptor_seq); + + float probscore_thresh = 0.8f; + if(edit.splLen >> 16) probscore_thresh = 0.99f; + else if(edit.splLen >> 15) probscore_thresh = 0.97f; + else if(edit.splLen >> 14) probscore_thresh = 0.94f; + else if(edit.splLen >> 13) probscore_thresh = 0.91f; + else if(edit.splLen >> 12) probscore_thresh = 0.88f; + if(probscore < probscore_thresh) score += MIN_I32; + } + if(shorter_anchor_len == left_anchor_len) { + if(_trim5 > 0) score += MIN_I32; + for(int j = (int)i - 1; j >= 0; j--) { + if((*_edits)[j].type == EDIT_TYPE_MM || + (*_edits)[j].type == EDIT_TYPE_READ_GAP || + (*_edits)[j].type == EDIT_TYPE_REF_GAP) + score += MIN_I32; + } + } else { + if(_trim3 > 0) score += MIN_I32; + for(index_t j = i + 1; j < _edits->size(); j++) { + if((*_edits)[j].type == EDIT_TYPE_MM || + (*_edits)[j].type == EDIT_TYPE_READ_GAP || + (*_edits)[j].type == EDIT_TYPE_REF_GAP) + score += MIN_I32; + } + } + } + + if(edit.snpID == std::numeric_limits::max()) { + if(edit.splDir == SPL_FW || edit.splDir == SPL_RC) { + score -= sc.canSpl((int)edit.splLen); + } else { + score -= sc.noncanSpl((int)edit.splLen); + } + } + + // daehwan - for debugging purposes + if(shorter_anchor_len <= 15) { + numsplices += 1; + splicescore += (double)edit.splLen; + } + } + + if(!conflict_splicesites) { + if(whichsense == SPL_UNKNOWN) { + whichsense = edit.splDir; + } else if(edit.splDir != SPL_UNKNOWN) { + assert_neq(whichsense, SPL_UNKNOWN); + if(edit.splDir == SPL_FW || edit.splDir == SPL_SEMI_FW) { + if(whichsense != SPL_FW && whichsense != SPL_SEMI_FW) { + conflict_splicesites = true; + } + } + if(edit.splDir == SPL_RC || edit.splDir == SPL_SEMI_RC) { + if(whichsense != SPL_RC && whichsense != SPL_SEMI_RC) { + conflict_splicesites = true; + } + } + } + } + + toff_base += edit.splLen; + } else if(edit.type == EDIT_TYPE_READ_GAP) { + bool open = true; + if(i > 0 && + (*_edits)[i-1].type == EDIT_TYPE_READ_GAP && + (*_edits)[i-1].pos == edit.pos) { + open = false; + } + if(edit.snpID == std::numeric_limits::max()) { + if(open) score -= sc.readGapOpen(); + else score -= sc.readGapExtend(); + } + toff_base++; + } else if(edit.type == EDIT_TYPE_REF_GAP) { + bool open = true; + if(i > 0 && + (*_edits)[i-1].type == EDIT_TYPE_REF_GAP && + (*_edits)[i-1].pos + 1 == edit.pos) { + open = false; + } + if(edit.snpID == std::numeric_limits::max()) { + if(open) score -= sc.refGapOpen(); + else score -= sc.refGapExtend(); + } + toff_base--; + } +#ifndef NDEBUG + else { + assert(false); + } +#endif + } + + // Penalty for soft-clipping + for(index_t i = 0; i < _trim5; i++) { + score -= sc.sc(qual[i]); + } + + for(index_t i = 0; i < _trim3; i++) { + score -= sc.sc(qual[i]); + } + + if(conflict_splicesites) { + score -= sc.conflictSpl(); + } + + if (numsplices > 1) splicescore /= (double)numsplices; + score += (_len - mm) * sc.match(); + _score = score; + _splicescore = splicescore; + _localscore = localscore; + + return score; +} + +/** + * Encapsulates counters that measure how much work has been done by + * hierarchical indexing + */ +struct HIMetrics { + + HIMetrics() : mutex_m() { + reset(); + } + + void reset() { + anchoratts = 0; + localatts = 0; + localindexatts = 0; + localextatts = 0; + localsearchrecur = 0; + globalgenomecoords = 0; + localgenomecoords = 0; + } + + void init( + uint64_t localatts_, + uint64_t anchoratts_, + uint64_t localindexatts_, + uint64_t localextatts_, + uint64_t localsearchrecur_, + uint64_t globalgenomecoords_, + uint64_t localgenomecoords_) + { + localatts = localatts_; + anchoratts = anchoratts_; + localindexatts = localindexatts_; + localextatts = localextatts_; + localsearchrecur = localsearchrecur_; + globalgenomecoords = globalgenomecoords_; + localgenomecoords = localgenomecoords_; + } + + /** + * Merge (add) the counters in the given HIMetrics object into this + * object. This is the only safe way to update a HIMetrics shared + * by multiple threads. + */ + void merge(const HIMetrics& r, bool getLock = false) { + ThreadSafe ts(&mutex_m, getLock); + localatts += r.localatts; + anchoratts += r.anchoratts; + localindexatts += r.localindexatts; + localextatts += r.localextatts; + localsearchrecur += r.localsearchrecur; + globalgenomecoords += r.globalgenomecoords; + localgenomecoords += r.localgenomecoords; + } + + uint64_t localatts; // # attempts of local search + uint64_t anchoratts; // # attempts of anchor search + uint64_t localindexatts; // # attempts of local index search + uint64_t localextatts; // # attempts of extension search + uint64_t localsearchrecur; + uint64_t globalgenomecoords; + uint64_t localgenomecoords; + + MUTEX_T mutex_m; +}; + +/** + * With a hierarchical indexing, SplicedAligner provides several alignment strategies + * , which enable effective alignment of RNA-seq reads + */ +template +class HI_Aligner { + +public: + + /** + * Initialize with index. + */ + HI_Aligner( + const GFM& gfm, + bool anchorStop = true, + uint64_t threads_rids_mindist = 0) : + _anchorStop(anchorStop), + _gwstate(GW_CAT), + _gwstate_local(GW_CAT), + _thread_rids_mindist(threads_rids_mindist) + { + index_t genomeLen = gfm.gh().len(); + _minK = 0; + while(genomeLen > 0) { + genomeLen >>= 2; + _minK++; + } + _minK_local = 8; + + } + + + HI_Aligner() { + } + + /** + */ + void initRead(Read *rd, bool nofw, bool norc, TAlScore minsc, TAlScore maxpen, bool rightendonly = false) { + assert(rd != NULL); + _rds[0] = rd; + _rds[1] = NULL; + _paired = false; + _rightendonly = rightendonly; + _nofw[0] = nofw; + _nofw[1] = true; + _norc[0] = norc; + _norc[1] = true; + _minsc[0] = minsc; + _minsc[1] = INDEX_MAX; + _maxpen[0] = maxpen; + _maxpen[1] = INDEX_MAX; + for(size_t fwi = 0; fwi < 2; fwi++) { + bool fw = (fwi == 0); + _hits[0][fwi].init(fw, (index_t)_rds[0]->length()); + } + _genomeHits.clear(); + _genomeHits_rep[0].clear(); + _hits_searched[0].clear(); + assert(!_paired); + } + + /** + */ + void initReads(Read *rds[2], bool nofw[2], bool norc[2], TAlScore minsc[2], TAlScore maxpen[2]) { + assert(rds[0] != NULL && rds[1] != NULL); + _paired = true; + _rightendonly = false; + for(size_t rdi = 0; rdi < 2; rdi++) { + _rds[rdi] = rds[rdi]; + _nofw[rdi] = nofw[rdi]; + _norc[rdi] = norc[rdi]; + _minsc[rdi] = minsc[rdi]; + _maxpen[rdi] = maxpen[rdi]; + for(size_t fwi = 0; fwi < 2; fwi++) { + bool fw = (fwi == 0); + _hits[rdi][fwi].init(fw, (index_t)_rds[rdi]->length()); + } + _hits_searched[rdi].clear(); + } + _genomeHits.clear(); + _genomeHits_rep[0].clear(); + _genomeHits_rep[1].clear(); + _concordantIdxInspected.first = _concordantIdxInspected.second = 0; + assert(_paired); + assert(!_rightendonly); + } + + /** + * Aligns a read or a pair + * This funcion is called per read or pair + */ + virtual + int go( + const Scoring& sc, + const PairedEndPolicy& pepol, // paired-end policy + const TranscriptomePolicy& tpol, + const GraphPolicy& gpol, + const GFM& gfm, + const GFM* rgfm, + const ALTDB& altdb, + const RepeatDB& repeatdb, + const ALTDB& raltdb, + const BitPairReference& ref, + const BitPairReference* rref, + SwAligner& swa, + SpliceSiteDB& ssdb, + WalkMetrics& wlm, + PerReadMetrics& prm, + SwMetrics& swm, + HIMetrics& him, + RandomSource& rnd, + AlnSinkWrap& sink) + { + const ReportingParams& rp = sink.reportingParams(); + + index_t rdi; + bool fw; + bool found[2][2] = {{true, true}, {this->_paired, this->_paired}}; + // given read and its reverse complement + // (and mate and the reverse complement of mate in case of pair alignment), + // pick up one with best partial alignment + while(nextBWT(sc, pepol, tpol, gpol, gfm, altdb, ref, rdi, fw, wlm, prm, him, rnd, sink)) { + // given the partial alignment, try to extend it to full alignments + index_t fwi = (fw == true ? 0 : 1); + found[rdi][fwi] = align(sc, pepol, tpol, gpol, gfm, altdb, repeatdb, ref, swa, ssdb, rdi, fw, wlm, prm, swm, him, rnd, sink); + if(!found[0][0] && !found[0][1] && !found[1][0] && !found[1][1]) { + break; + } + + // try to combine this alignment with some of mate alignments + // to produce pair alignment + if(this->_paired) { + pairReads(sc, pepol, tpol, gpol, gfm, altdb, repeatdb, ref, wlm, prm, him, rnd, sink); + // if(sink.bestPair() >= _minsc[0] + _minsc[1]) break; + } + } + + // if no concordant pair is found, try to use alignment of one-end + // as an anchor to align the other-end + if(this->_paired) { + if(sink.numPair() == 0 && + (sink.bestUnp1() >= _minsc[0] || sink.bestUnp2() >= _minsc[1])) { + bool mate_found = false; + const EList *rs[2] = {NULL, NULL}; + sink.getUnp1(rs[0]); assert(rs[0] != NULL); + sink.getUnp2(rs[1]); assert(rs[1] != NULL); + index_t rs_size[2] = {(index_t)rs[0]->size(), (index_t)rs[1]->size()}; + for(index_t i = 0; i < 2; i++) { + for(index_t j = 0; j < rs_size[i]; j++) { + const AlnRes& res = (*rs[i])[j]; + bool fw = (res.orient() == 1); + mate_found |= alignMate( + sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + swa, + ssdb, + i, + fw, + wlm, + prm, + swm, + him, + rnd, + sink, + (index_t)res.refid(), + (index_t)res.refoff()); + } + } + + if(mate_found) { + pairReads( + sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + wlm, + prm, + him, + rnd, + sink); + } + } + } + + + // Determine whether reads map to repetitive sequences + bool repeat[2][2] = {{false, false}, {false, false}}; + bool perform_repeat_alignment = false; + + index_t indexIdx[2] = {0, 0}; + +#if 1 + if(rgfm != NULL && !((RFM*)rgfm)->empty()) { + + // use repeat index to decide whether a read or a pair is from repetitive sequences + indexIdx[0] = ((RFM*)rgfm)->getLocalRFM_idx((*_rds)[0].length()); + if(_paired) { + indexIdx[1] = ((RFM*)rgfm)->getLocalRFM_idx((*_rds)[1].length()); + } + LocalRFM& rfm = ((RFM*)rgfm)->getLocalRFM(indexIdx[0]); + bool skip_repeat[2][2] = {{false, false}, {false, false}}; + if(_paired) { + const EList *rs[2] = {NULL, NULL}; + sink.getPair(rs[0], rs[1]); + assert_eq(rs[0]->size(), rs[1]->size()); + TAlScore bestScore[2][2] = {{_minsc[rdi], _minsc[rdi]}, {_minsc[rdi], _minsc[rdi]}}; + for(size_t r = 0; r < rs[0]->size(); r++) { + const AlnRes& rs1 = (*rs[0])[r]; + const AlnRes& rs2 = (*rs[1])[r]; + TAlScore score = rs1.score().score() + rs2.score().score(); + int fwi[2] = {rs1.fw() ? 0 : 1, rs2.fw() ? 0 : 1}; + if(score > bestScore[fwi[0]][fwi[1]]) { + bestScore[fwi[0]][fwi[1]] = score; + } + } + for(index_t fwi = 0; fwi < 2; fwi++) { + for(index_t fwi2 = 0; fwi2 < 2; fwi2++) { + if(bestScore[fwi][fwi2] < 0) + continue; + + ReadBWTHit& hit = _hits[0][fwi]; + bool unique = false; + for(size_t hi = 0; hi < hit.offsetSize(); hi++) { + BWTHit& partialHit = hit.getPartialHit(hi); + if(partialHit.len() >= _minK + 8 && partialHit.size() == 1) { + unique = true; + break; + } + } + if(!unique) + continue; + + bool unique2 = false; + ReadBWTHit& hit2 = _hits[1][fwi2]; + for(size_t hi = 0; hi < hit2.offsetSize(); hi++) { + BWTHit& partialHit = hit2.getPartialHit(hi); + if(partialHit.len() >= _minK + 8 && partialHit.size() == 1) { + unique2 = true; + break; + } + } + if(!unique2) + continue; + + skip_repeat[0][fwi] = skip_repeat[1][fwi2] = true; + } + } + } else { + const EList *rs = NULL; + if(rdi == 0) sink.getUnp1(rs); + else sink.getUnp2(rs); + + TAlScore bestScore[2] = {_minsc[rdi], _minsc[rdi]}; + for(index_t r = 0; r < rs->size(); r++) { + TAlScore score = (*rs)[r].score().score(); + if((*rs)[r].fw()) { + if(score > bestScore[0]) { + bestScore[0] = score; + } + } else { + if(score > bestScore[1]) { + bestScore[1] = score; + } + } + } + for(index_t fwi = 0; fwi < 2; fwi++) { + if(bestScore[fwi] < 0) + continue; + + ReadBWTHit& hit = _hits[rdi][fwi]; + index_t offsetSize = hit.offsetSize(); + for(size_t hi = 0; hi < offsetSize; hi++) { + BWTHit& partialHit = hit.getPartialHit(hi); + if(partialHit.len() >= _minK + 8 && partialHit.size() == 1) { + skip_repeat[rdi][fwi] = true; + break; + } + } + if(skip_repeat[rdi][fwi]) break; + } + } + + for(size_t rdi = 0; rdi < (_paired ? 2 : 1); rdi++) { + for(size_t fwi = 0; fwi < 2; fwi++) { + if(skip_repeat[rdi][fwi]) continue; + bool fw = (fwi == 0); + _hits[rdi][fwi].init(fw, (index_t)_rds[rdi]->length()); + } + } + + while(nextBWT(sc, pepol, tpol, gpol, rfm, altdb, *rref, rdi, fw, wlm, prm, him, rnd, sink)); + for(size_t rdi = 0; rdi < (_paired ? 2 : 1); rdi++) { + for(size_t fwi = 0; fwi < 2; fwi++) { + if(skip_repeat[rdi][fwi]) continue; + ReadBWTHit& hit = _hits[rdi][fwi]; + index_t offsetSize = hit.offsetSize(); + //assert_gt(offsetSize, 0); + for(size_t hi = 0; hi < offsetSize; hi++) { + BWTHit& partialHit = hit.getPartialHit(hi); + if(partialHit.len() >= (rref->getMinK() << 1)) { + repeat[rdi][fwi] = true; + perform_repeat_alignment = true; + break; + } + } + } + } + } +#else + // use minimizer to decide whether a read or a pair is from repetitive sequences + perform_repeat_alignment = false; + for(size_t rdi = 0; rdi < (_paired ? 2 : 1); rdi++) { + Read& read = *_rds[rdi]; + for(size_t fwi = 0; fwi < 2; fwi++) { + const BTDnaString& seq = (fwi == 0 ? read.patFw : read.patRc); + repeat[rdi][fwi] = repeat_kmertable.isRepeat(seq, _tmp_minimizers); + perform_repeat_alignment |= repeat[rdi][fwi]; + } + } +#endif + + // Handle alignment to repetitive regions + if(rgfm != NULL && + perform_repeat_alignment) { + LocalRFM& rfm = ((RFM*)rgfm)->getLocalRFM(indexIdx[0]); + RB_KmerTable& repeatKmertable = ((RFM*)rgfm)->getKmertable(indexIdx[0]); + + _repeatConcordant.clear(); + index_t prev_align_size[2] = {0, 0}; + for(size_t rdi = 0; rdi < (_paired ? 2 : 1); rdi++) { + const EList *rs = NULL; + if(rdi == 0) sink.getUnp1(rs); + else sink.getUnp2(rs); + prev_align_size[rdi] = rs->size(); + } + + for(size_t rdi = 0; rdi < (_paired ? 2 : 1); rdi++) { + for(size_t fwi = 0; fwi < 2; fwi++) { + if(!repeat[rdi][fwi]) continue; + + // choose candidate partial alignments for further alignment + index_t maxsize = max(rp.khits, rp.kseeds); + +#if 0 + ReadBWTHit& hit = _hits[rdi][fwi]; + if(!hit.done()) continue; + getAnchorHits(rfm, + pepol, + tpol, + gpol, + altdb, + repeatdb, + *rref, + rnd, + rdi, + fwi == 0, // fw + _genomeHits_rep[rdi], + _genomeHits_rep[rdi].size() + maxsize, + _sharedVars, + wlm, + prm, + him, + true); // repeat? +#else + + getRepeatHits(rfm, + pepol, + tpol, + gpol, + raltdb, + repeatdb, + repeatKmertable, + *rref, + rnd, + rdi, + fwi == 0, // fw + indexIdx[0], + _genomeHits_rep[rdi], + _genomeHits_rep[rdi].size() + maxsize, + _sharedVars, + sc, + swa, + ssdb, + swm, + wlm, + prm, + him, + sink); +#endif + } + } + + EList, RepeatCoord > >& positions = _positions; + for(size_t rdi = 0; rdi < (_paired ? 2 : 1); rdi++) { + for(size_t i = 0; i < _genomeHits_rep[rdi].size(); i++) { + if(_genomeHits_rep[rdi][i].len() < (_minK << 1)) continue; + + // DK - debugging purposes +#if 0 + positions.clear(); + repeatdb.getCoords(_genomeHits_rep[rdi][i]._tidx, + _genomeHits_rep[rdi][i]._joinedOff, + _genomeHits_rep[rdi][i]._joinedOff + _genomeHits_rep[rdi][i].len(), + _snpIDs, + raltdb, + positions, + rp.khits * 1000); +#endif + + const EList *rs = NULL; + if(rdi == 0) sink.getUnp2(rs); + else sink.getUnp1(rs); + assert(rs != NULL); + + bool candidate_found = false; + for(size_t j = 0; j < prev_align_size[1-rdi]; j++) { + const AlnRes& res = (*rs)[j]; + if(res.repeat()) + continue; + + TAlScore estScore = res.score().score() + _genomeHits_rep[rdi][i].score(); + if(sink.bestPair() >= estScore && sink.numBestPair().first > rp.khits) + break; + + positions.clear(); + index_t joinedOff = 0; + gfm.textOffToJoined(res.refid(), res.refoff(), joinedOff); + repeatdb.findCoords(joinedOff, + joinedOff + res.refExtent(), + _genomeHits_rep[rdi][i]._tidx, + _genomeHits_rep[rdi][i]._joinedOff, + _genomeHits_rep[rdi][i]._joinedOff + _genomeHits_rep[rdi][i].len(), + _snpIDs, + raltdb, + positions, + rp.khits * 10); + if(positions.size() <= 0) + continue; + + for(size_t p = 0; p < positions.size(); p++) { + if(positions[p].first.tid != res.refid()) continue; + if(positions[p].first.toff + 1000 < res.refoff() || + res.refoff() + 1000 < positions[p].first.toff) continue; + if(sink.bestPair() >= estScore && sink.numBestPair().first > rp.khits) + break; + + candidate_found = true; + + _genomeHits.clear(); + _genomeHits.expand(); + _genomeHits.back() = _genomeHits_rep[rdi][i]; + _genomeHits.back()._tidx = positions[p].first.tid; + _genomeHits.back()._toff = positions[p].first.toff; + _genomeHits.back()._joinedOff = positions[p].first.joinedOff; + if(!positions[p].first.fw) { + _genomeHits.back().reverse(*_rds[rdi]); + _rds[rdi]->oppositeConversion_3N = true; + } else { + _rds[rdi]->oppositeConversion_3N = false; + } + + // extend the partial alignments bidirectionally using + // local search, extension, and (less often) global search + hybridSearch(sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + swa, + ssdb, + rdi, + _genomeHits.back()._fw, + wlm, + prm, + swm, + him, + rnd, + sink); + } + + if(candidate_found) { + pairReads( + sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + wlm, + prm, + him, + rnd, + sink); + } + } + + if(rdi == 0 && _paired) { + for(size_t j = 0; j < _genomeHits_rep[1].size(); j++) { + if(_genomeHits_rep[1][j].len() < (_minK << 1)) continue; + + TAlScore estScore = _genomeHits_rep[0][i].score() + _genomeHits_rep[1][j].score(); + // if(sink.bestPair() >= estScore && sink.numBestPair().first > rp.khits) + // break; + + positions.clear(); + repeatdb.findCommonCoords(_genomeHits_rep[0][i]._tidx, + _genomeHits_rep[0][i]._joinedOff, + _genomeHits_rep[0][i]._joinedOff + _genomeHits_rep[0][i].len(), + _snpIDs, + _genomeHits_rep[1][j]._tidx, + _genomeHits_rep[1][j]._joinedOff, + _genomeHits_rep[1][j]._joinedOff + _genomeHits_rep[1][j].len(), + _snpIDs2, + raltdb, + positions, + rp.khits * 10); + if(positions.size() <= 0) continue; + + _repeatConcordant.expand(); + _repeatConcordant.back().first = _genomeHits_rep[0][i]._joinedOff; + _repeatConcordant.back().second = _genomeHits_rep[1][j]._joinedOff; + + for(size_t p = 0; p < positions.size(); p++) { + if(sink.bestPair() >= estScore && sink.numBestPair().first > rp.khits) + break; + + _genomeHits.clear(); + _genomeHits.expand(); + _genomeHits.back() = _genomeHits_rep[0][i]; + _genomeHits.back()._tidx = positions[p].first.tid; + _genomeHits.back()._toff = positions[p].first.toff; + _genomeHits.back()._joinedOff = positions[p].first.joinedOff; + if(!positions[p].first.fw) { + _genomeHits.back().reverse(*_rds[0]); + _rds[0]->oppositeConversion_3N = true; + } else { + _rds[0]->oppositeConversion_3N = false; + } + + // extend the partial alignments bidirectionally using + // local search, extension, and (less often) global search + hybridSearch(sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + swa, + ssdb, + 0, + _genomeHits.back()._fw, + wlm, + prm, + swm, + him, + rnd, + sink); + + _genomeHits.clear(); + _genomeHits.expand(); + _genomeHits.back() = _genomeHits_rep[1][j]; + _genomeHits.back()._tidx = positions[p].second.tid; + _genomeHits.back()._toff = positions[p].second.toff; + _genomeHits.back()._joinedOff = positions[p].second.joinedOff; + if(!positions[p].second.fw) { + _genomeHits.back().reverse(*_rds[1]); + _rds[1]->oppositeConversion_3N = true; + } else { + _rds[1]->oppositeConversion_3N = false; + } + + // extend the partial alignments bidirectionally using + // local search, extension, and (less often) global search + hybridSearch(sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + swa, + ssdb, + 1, + _genomeHits.back()._fw, + wlm, + prm, + swm, + him, + rnd, + sink); + } + + if(positions.size() > 0) { + pairReads( + sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + wlm, + prm, + him, + rnd, + sink); + } + } + } // if(rdi == 0) + } // for(size_t i = 0; i < _genomeHits_rep[rdi].size() + + bool align2repeat = false; + if(_paired) { + index_t numBestPair = sink.numBestPair().first; + align2repeat = (numBestPair == 0 || numBestPair > rp.khits); + } else { + const EList *rs = NULL; + if(rdi == 0) sink.getUnp1(rs); + else sink.getUnp2(rs); + assert(rs != NULL); + align2repeat = (rs->size() == 0 || sink.numBestUnp(rdi).first > rp.khits); + } + + _rds[0]->oppositeConversion_3N = false; + if (_paired) { + _rds[1]->oppositeConversion_3N = false; + } + + if(align2repeat) { + for(size_t i = 0; i < _genomeHits_rep[rdi].size(); i++) { + _genomeHits.clear(); + _genomeHits.expand(); + _genomeHits.back() = _genomeHits_rep[rdi][i]; + _genomeHits.back()._repeat = true; + hybridSearch(sc, + pepol, + tpol, + gpol, + rfm, + altdb, + repeatdb, + *rref, + swa, + ssdb, + rdi, + _genomeHits.back()._fw, + wlm, + prm, + swm, + him, + rnd, + sink); + } + + if(_paired && rdi == 1) { + if(sink.numBestUnp(rdi).first > rp.khits) { + pairReads( + sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + wlm, + prm, + him, + rnd, + sink); + } + } + } + + } // for(size_t rdi = 0 + } // repeat + + return EXTEND_POLICY_FULFILLED; + } + + /** + * Given a read or its reverse complement (or mate), + * align the unmapped portion using the global FM index + */ + virtual + bool nextBWT( + const Scoring& sc, + const PairedEndPolicy& pepol, // paired-end policy + const TranscriptomePolicy& tpol, + const GraphPolicy& gpol, + const GFM& gfm, + const ALTDB& altdb, + const BitPairReference& ref, + index_t& rdi, + bool& fw, + WalkMetrics& wlm, + PerReadMetrics& prm, + HIMetrics& him, + RandomSource& rnd, + AlnSinkWrap& sink) + { + const ReportingParams& rp = sink.reportingParams(); + + // Pick up a candidate from a read or its reverse complement + // (for pair, also consider mate and its reverse complement) + while(pickNextReadToSearch(rdi, fw)) { + size_t mineFw = 0, mineRc = 0; + index_t fwi = (fw ? 0 : 1); + ReadBWTHit& hit = _hits[rdi][fwi]; + assert(!hit.done()); + bool pseudogeneStop = gfm.gh().linearFM() && !tpol.no_spliced_alignment(); + bool anchorStop = _anchorStop && !gfm.repeat(); + if(!rp.secondary) { + index_t numSearched = hit.numActualPartialSearch(); + int64_t bestScore = 0; + if(rdi == 0) { + bestScore = sink.bestUnp1(); + if(bestScore >= _minsc[rdi]) { + // do not further align this candidate + // unless it may be at least as good as the alignment of its reverse complement + index_t maxmm = (index_t)((-bestScore + sc.mmpMax - 1) / sc.mmpMax); + if(numSearched > maxmm + sink.bestSplicedUnp1() + 1) { + hit.done(true); + if(_paired) { + if(sink.bestUnp2() >= _minsc[1-rdi] && + sink.numPair() > 0) return false; + else continue; + } else { + return false; + } + } + } + } else { + assert(_paired); + assert_eq(rdi, 1); + bestScore = sink.bestUnp2(); + if(bestScore >= _minsc[rdi]) { + // Do not further extend this alignment + // unless it may be at least as good as the previous alignemnt + index_t maxmm = (index_t)((-bestScore + sc.mmpMax - 1) / sc.mmpMax); + if(numSearched > maxmm + sink.bestSplicedUnp2() + 1) { + hit.done(true); + if(_paired) { + if(sink.bestUnp1() >= _minsc[1-rdi] && + sink.numPair() > 0) return false; + else continue; + } else { + return false; + } + } + } + } + + ReadBWTHit& rchit = _hits[rdi][1-fwi]; + if(rchit.done() && bestScore < _minsc[rdi]) { + if(numSearched > rchit.numActualPartialSearch() + (anchorStop ? 1 : 0)) { + hit.done(true); + return false; + } + } + } + + // Align this read beginning from previously stopped base + // stops when it is uniquelly mapped with at least 28bp or + // it may involve processed pseudogene + partialSearch( + gfm, + *_rds[rdi], + sc, + sink.reportingParams(), + fw, + 0, + mineFw, + mineRc, + hit, + rnd, + pseudogeneStop, + anchorStop); + + assert(hit.repOk()); + if(hit.done()) return true; + // Advance hit._cur by 1 + if(!pseudogeneStop) { + if(hit._cur + 1 < hit._len) hit._cur++; + } + if(anchorStop) { + hit.done(true); + return true; + } + // hit.adjustOffset(_minK); + } + + return false; + } + + /** + * Given partial alignments of a read, try to further extend + * the alignment bidirectionally + */ + virtual + bool align( + const Scoring& sc, + const PairedEndPolicy& pepol, // paired-end policy + const TranscriptomePolicy& tpol, + const GraphPolicy& gpol, + const GFM& gfm, + const ALTDB& altdb, + const RepeatDB& repeatdb, + const BitPairReference& ref, + SwAligner& swa, + SpliceSiteDB& ssdb, + index_t rdi, + bool fw, + WalkMetrics& wlm, + PerReadMetrics& prm, + SwMetrics& swm, + HIMetrics& him, + RandomSource& rnd, + AlnSinkWrap& sink); + + /** + * Given the alignment of its mate as an anchor, + * align the read + */ + virtual + bool alignMate( + const Scoring& sc, + const PairedEndPolicy& pepol, // paired-end policy + const TranscriptomePolicy& tpol, + const GraphPolicy& gpol, + const GFM& gfm, + const ALTDB& altdb, + const RepeatDB& repeatdb, + const BitPairReference& ref, + SwAligner& swa, + SpliceSiteDB& ssdb, + index_t rdi, + bool fw, + WalkMetrics& wlm, + PerReadMetrics& prm, + SwMetrics& swm, + HIMetrics& him, + RandomSource& rnd, + AlnSinkWrap& sink, + index_t tidx, + index_t toff); + + /** + * Given a partial alignment of a read, try to further extend + * the alignment bidirectionally using a combination of + * local search, extension, and global search + */ + virtual + void hybridSearch( + const Scoring& sc, + const PairedEndPolicy& pepol, // paired-end policy + const TranscriptomePolicy& tpol, + const GraphPolicy& gpol, + const GFM& gfm, + const ALTDB& altdb, + const RepeatDB& repeatdb, + const BitPairReference& ref, + SwAligner& swa, + SpliceSiteDB& ssdb, + index_t rdi, + bool fw, + WalkMetrics& wlm, + PerReadMetrics& prm, + SwMetrics& swm, + HIMetrics& him, + RandomSource& rnd, + AlnSinkWrap& sink) + {} + + /** + * Given a partial alignment of a read, try to further extend + * the alignment bidirectionally using a combination of + * local search, extension, and global search + */ + virtual + int64_t hybridSearch_recur( + const Scoring& sc, + const PairedEndPolicy& pepol, // paired-end policy + const TranscriptomePolicy& tpol, + const GraphPolicy& gpol, + const GFM& gfm, + const ALTDB& altdb, + const RepeatDB& repeatdb, + const BitPairReference& ref, + SwAligner& swa, + SpliceSiteDB& ssdb, + index_t rdi, + const GenomeHit& hit, + index_t hitoff, + index_t hitlen, + WalkMetrics& wlm, + PerReadMetrics& prm, + SwMetrics& swm, + HIMetrics& him, + RandomSource& rnd, + AlnSinkWrap& sink, + bool alignMate = false, + index_t dep = 0) + { return numeric_limits::min(); } + + /** + * Choose a candidate for alignment from a read or its reverse complement + * (also from a mate or its reverse complement for pair) + */ + bool pickNextReadToSearch(index_t& rdi, bool& fw) { + rdi = 0; fw = true; + bool picked = false; + int64_t maxScore = std::numeric_limits::min(); + for(index_t rdi2 = 0; rdi2 < (_paired ? 2 : 1); rdi2++) { + assert(_rds[rdi2] != NULL); + for(index_t fwi = 0; fwi < 2; fwi++) { + if (fwi == 0 && _nofw[rdi2]) continue; + else if(fwi == 1 && _norc[rdi2]) continue; + + if(_hits[rdi2][fwi].done()) continue; + int64_t curScore = _hits[rdi2][fwi].searchScore((index_t)_minK); + if(_hits[rdi2][fwi].cur() == 0) { + curScore = std::numeric_limits::max(); + } + assert_gt(curScore, std::numeric_limits::min()); + if(curScore > maxScore) { + maxScore = curScore; + rdi = rdi2; + fw = (fwi == 0); + picked = true; + } + } + } + + return picked; + } + + /** + * Align a part of a read without any edits + */ + index_t partialSearch( + const GFM& gfm, // GFM index + const Read& read, // read to align + const Scoring& sc, // scoring scheme + const ReportingParams& rp, + bool fw, // don't align forward read + size_t mineMax, // don't care about edit bounds > this + size_t& mineFw, // minimum # edits for forward read + size_t& mineRc, // minimum # edits for revcomp read + ReadBWTHit& hit, // holds all the seed hits (and exact hit) + RandomSource& rnd, + bool& pseudogeneStop, // stop if mapped to multiple locations due to processed pseudogenes + bool& anchorStop, + index_t maxHitLen = (index_t)INDEX_MAX); + + /** + * Global FM index search + */ + index_t globalGFMSearch( + const GFM& gfm, // GFM index + const Read& read, // read to align + const Scoring& sc, // scoring scheme + const ReportingParams& rp, + bool fw, + index_t hitoff, + index_t& hitlen, + index_t& top, + index_t& bot, + index_t& node_top, + index_t& node_bot, + EList >& node_iedge_count, + RandomSource& rnd, + bool& uniqueStop, + index_t maxHitLen = (index_t)INDEX_MAX); + + /** + * Local FM index search + */ + index_t localGFMSearch( + const LocalGFM& gfm, // GFM index + const Read& read, // read to align + const Scoring& sc, // scoring scheme + const ReportingParams& rp, + bool fw, + index_t rdoff, + index_t& hitlen, + local_index_t& top, + local_index_t& bot, + local_index_t& node_top, + local_index_t& node_bot, + EList >& local_node_iedge_count, + RandomSource& rnd, + bool& uniqueStop, + local_index_t minUniqueLen, + local_index_t maxHitLen = (local_index_t)INDEX_MAX, + local_index_t maxHits = 0); + + /** + * Convert FM offsets to the corresponding genomic offset (chromosome id, offset) + **/ + bool getGenomeCoords( + const GFM& gfm, + const ALTDB& altdb, + const BitPairReference& ref, + RandomSource& rnd, + index_t top, + index_t bot, + index_t node_top, + index_t node_bot, + const EList >& node_iedge_count, + bool fw, + index_t maxelt, + index_t rdoff, + index_t rdlen, + EList& coords, + WalkMetrics& met, + PerReadMetrics& prm, + HIMetrics& him, + bool rejectStraddle, + bool& straddled); + + /** + * Convert FM offsets to the corresponding genomic offset (chromosome id, offset) + **/ + bool getGenomeCoords_local( + const GFM& gfm, + const ALTDB& altdb, + const BitPairReference& ref, + RandomSource& rnd, + local_index_t top, + local_index_t bot, + local_index_t node_top, + local_index_t node_bot, + const EList >& node_iedge_count, + bool fw, + index_t rdoff, + index_t rdlen, + EList& coords, + WalkMetrics& met, + PerReadMetrics& prm, + HIMetrics& him, + bool rejectStraddle, + bool& straddled); + + /** + * Given a set of partial alignments for a read, + * choose some that are longer and mapped to fewer places + */ + index_t getAnchorHits( + const GFM& gfm, + const PairedEndPolicy& pepol, // paired-end policy + const TranscriptomePolicy& tpol, + const GraphPolicy& gpol, + const ALTDB& altdb, + const RepeatDB& repeatdb, + const BitPairReference& ref, + RandomSource& rnd, + index_t rdi, + bool fw, + EList >& genomeHits, + index_t maxGenomeHitSize, + SharedTempVars& sharedVars, + WalkMetrics& wlm, + PerReadMetrics& prm, + HIMetrics& him, + bool repeat = false) + { + index_t fwi = (fw ? 0 : 1); + assert_lt(rdi, 2); + assert(_rds[rdi] != NULL); + ReadBWTHit& hit = _hits[rdi][fwi]; + assert(hit.done()); + index_t offsetSize = hit.offsetSize(); + assert_gt(offsetSize, 0); + for(size_t hi = 0; hi < offsetSize; hi++) { + index_t hj = 0; + for(; hj < offsetSize; hj++) { + BWTHit& partialHit_j = hit.getPartialHit(hj); + if(partialHit_j.empty() || + partialHit_j.hasGenomeCoords() || + partialHit_j.len() <= _minK + 2) continue; + else break; + } + if(hj >= offsetSize) break; + for(index_t hk = hj + 1; hk < offsetSize; hk++) { + BWTHit& partialHit_j = hit.getPartialHit(hj); + BWTHit& partialHit_k = hit.getPartialHit(hk); + if(partialHit_k.empty() || + partialHit_k.hasGenomeCoords() || + partialHit_k.len() <= _minK + 2) continue; + + if(partialHit_j._hit_type == partialHit_k._hit_type) { + if((partialHit_j.size() > partialHit_k.size()) || + (partialHit_j.size() == partialHit_k.size() && partialHit_j.len() < partialHit_k.len())) { + hj = hk; + } + } else { + if(partialHit_k._hit_type > partialHit_j._hit_type) { + hj = hk; + } + } + } + BWTHit& partialHit = hit.getPartialHit(hj); + assert(!partialHit.hasGenomeCoords()); + + // Retrieve genomic coordinates + // If there are too many genomic coordinates to get, + // then we randomly choose and retrieve a small set of them + assert_leq(genomeHits.size(), maxGenomeHitSize); + index_t remainedGenomeHitSize = maxGenomeHitSize - genomeHits.size(); + if(remainedGenomeHitSize <= 0) + break; + index_t expectedNumCoords = partialHit._node_bot - partialHit._node_top; + bool straddled = false; + if(expectedNumCoords <= remainedGenomeHitSize) { + getGenomeCoords( + gfm, + altdb, + ref, + rnd, + partialHit._top, + partialHit._bot, + partialHit._node_top, + partialHit._node_bot, + partialHit._node_iedge_count, + fw, + partialHit._bot - partialHit._top, + hit._len - partialHit._bwoff - partialHit._len, + partialHit._len, + partialHit._coords, + wlm, + prm, + him, + false, // reject straddled + straddled); + } else { + index_t edgeIdx = 0; + index_t top = partialHit._top; + index_t added = 0; + for(index_t node = partialHit._node_top; node < partialHit._node_bot; node++, expectedNumCoords--) { + index_t bot = top + 1; + _tmp_node_iedge_count.clear(); + if(edgeIdx < partialHit._node_iedge_count.size()) { + assert_leq(node - partialHit._node_top, partialHit._node_iedge_count[edgeIdx].first); + if(node - partialHit._node_top == partialHit._node_iedge_count[edgeIdx].first) { + bot += partialHit._node_iedge_count[edgeIdx].second; + _tmp_node_iedge_count.expand(); + _tmp_node_iedge_count.back().first = 0; + _tmp_node_iedge_count.back().second = partialHit._node_iedge_count[edgeIdx].second; + edgeIdx++; + } + } + assert_lt(added, remainedGenomeHitSize); + uint32_t rndi = rnd.nextU32() % expectedNumCoords; + if(rndi < remainedGenomeHitSize - added) { + getGenomeCoords( + gfm, + altdb, + ref, + rnd, + top, + bot, + node, + node + 1, + _tmp_node_iedge_count, + fw, + partialHit._bot - partialHit._top, + hit._len - partialHit._bwoff - partialHit._len, + partialHit._len, + partialHit._coords, + wlm, + prm, + him, + false, // reject straddled + straddled); + added++; + if(added >= remainedGenomeHitSize) break; + } + top = bot; + } + } + + if(!partialHit.hasGenomeCoords()) continue; + EList& coords = partialHit._coords; + assert_gt(coords.size(), 0); + const index_t genomeHit_size = (index_t)genomeHits.size(); + if(genomeHit_size + coords.size() > maxGenomeHitSize) { + coords.shufflePortion(0, coords.size(), rnd); + } + for(index_t k = 0; k < coords.size(); k++) { + const Coord& coord = coords[k]; + if(coord.ref() == numeric_limits::max()) + continue; + index_t len = partialHit._len; + index_t rdoff = hit._len - partialHit._bwoff - len; + bool overlapped = false; + for(index_t l = 0; l < genomeHit_size; l++) { + GenomeHit& genomeHit = genomeHits[l]; + if(genomeHit.ref() != (index_t)coord.ref() || genomeHit.fw() != coord.fw()) continue; + assert_lt(genomeHit.rdoff(), hit._len); + assert_lt(rdoff, hit._len); + index_t hitoff = genomeHit.refoff() + hit._len - genomeHit.rdoff(); + index_t hitoff2 = (index_t)coord.off() + hit._len - rdoff; + int64_t hitoff_diff = (tpol.no_spliced_alignment() ? 0 : tpol.maxIntronLen()); + if(abs((int64_t)hitoff - (int64_t)hitoff2) <= hitoff_diff) { + overlapped = true; + genomeHit._hitcount++; + break; + } + } + if(repeat) { + if(!repeatdb.repeatExist(coord.ref(), coord.off(), coord.off() + len)) { + continue; + } + } + if(!overlapped) { + GenomeHit::adjustWithALT( + rdoff, + len, + coord, + _sharedVars, + genomeHits, + *_rds[rdi], + gfm, + altdb, + ref, + gpol); + } + if(partialHit._hit_type == CANDIDATE_HIT && genomeHits.size() >= maxGenomeHitSize) break; + } + if(partialHit._hit_type == CANDIDATE_HIT && genomeHits.size() >= maxGenomeHitSize) break; + } + + return (index_t)genomeHits.size(); + } + + /** + * + */ + index_t getRepeatHits( + const GFM& gfm, + const PairedEndPolicy& pepol, // paired-end policy + const TranscriptomePolicy& tpol, + const GraphPolicy& gpol, + const ALTDB& altdb, + const RepeatDB& repeatdb, + const RB_KmerTable& repeat_kmertable, + const BitPairReference& ref, + RandomSource& rnd, + index_t rdi, + bool fw, + index_t repID, + EList >& genomeHits, + index_t maxGenomeHitSize, + SharedTempVars& sharedVars, + const Scoring& sc, + SwAligner& swa, + SpliceSiteDB& ssdb, + SwMetrics& swm, + WalkMetrics& wlm, + PerReadMetrics& prm, + HIMetrics& him, + AlnSinkWrap& sink) + { + assert_lt(rdi, 2); + assert(_rds[rdi] != NULL); + Read& rd = *_rds[rdi]; + const BTDnaString& seq = fw ? rd.patFw : rd.patRc; + repeat_kmertable.findAlignments(seq, + _tmp_minimizers, + _tmp_position2D, + _tmp_alignments); + + const TAlScore cushion = sc.mmpMax; + + TAlScore bestScore = _minsc[rdi]; + size_t prev_numHits = genomeHits.size(); + for(index_t i = 0; i < _tmp_alignments.size(); i++) { + const RB_Alignment& coord = _tmp_alignments[i]; + index_t len = seq.length(); + index_t rdoff = 0; + if(!repeatdb.repeatExist(repID, coord.pos, coord.pos + len)) { + continue; + } + + genomeHits.expand(); + GenomeHit& genomeHit = genomeHits.back(); + genomeHit.init(fw, + rdoff, + 0, + 0, // trim5 + 0, // trim3 + repID, // ref, + coord.pos, + coord.pos, + this->_sharedVars); + + index_t maxmm = (index_t)(-bestScore / sc.mmpMax); + index_t leftext = 0, rightext = len; + genomeHit.extend(rd, + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + prm, + sc, + this->_minsc[rdi], + rnd, + (index_t)this->_minK_local, + tpol, + gpol, + leftext, + rightext, + maxmm); + + if(genomeHit.len() < len) { + genomeHits.pop_back(); + continue; + } + + if(genomeHit.score() > bestScore) { + bestScore = genomeHit.score(); + size_t remove_count = 0; + size_t k = prev_numHits; + for(size_t j = prev_numHits; j < genomeHits.size(); j++) { + if(genomeHits[j].score() >= max(_minsc[rdi], bestScore - cushion)) { + assert_leq(k, j); + if(k < j) { + genomeHits[k] = genomeHits[j]; + } + k++; + } else { + remove_count++; + } + } + assert_eq(k + remove_count, genomeHits.size()); + assert_leq(prev_numHits + remove_count, genomeHits.size()); + if(remove_count > 0) { + genomeHits.resize(genomeHits.size() - remove_count); + } + } else if(genomeHit.score() < max(_minsc[rdi], bestScore - cushion)) { + genomeHits.pop_back(); + } + } + + return (index_t)genomeHits.size(); + } + + bool pairReads( + const Scoring& sc, + const PairedEndPolicy& pepol, // paired-end policy + const TranscriptomePolicy& tpol, + const GraphPolicy& gpol, + const GFM& gfm, + const ALTDB& altdb, + const RepeatDB& repeatdb, + const BitPairReference& ref, + WalkMetrics& wlm, + PerReadMetrics& prm, + HIMetrics& him, + RandomSource& rnd, + AlnSinkWrap& sink); + + /** + * + **/ + bool reportHit( + const Scoring& sc, + const PairedEndPolicy& pepol, // paired-end policy + const TranscriptomePolicy& tpol, + const GraphPolicy& gpol, + const GFM& gfm, + const ALTDB& altdb, + const RepeatDB& repeatdb, + const BitPairReference& ref, + const SpliceSiteDB& ssdb, + AlnSinkWrap& sink, + index_t rdi, + const GenomeHit& hit, + bool alignMate = false, + const GenomeHit* ohit = NULL); + + /** + * check this alignment is already examined + **/ + bool redundant( + AlnSinkWrap& sink, + index_t rdi, + index_t tidx, + index_t toff); + + /** + * check this alignment is already examined + **/ + bool redundant( + AlnSinkWrap& sink, + index_t rdi, + const GenomeHit& hit); + + + /** + * + **/ + bool isSearched( + const GenomeHit& hit, + index_t rdi); + + /** + * + **/ + void addSearched(const GenomeHit& hit, + index_t rdi); + + +protected: + + Read * _rds[2]; + bool _paired; + bool _rightendonly; + bool _nofw[2]; + bool _norc[2]; + TAlScore _minsc[2]; + TAlScore _maxpen[2]; + + bool _anchorStop; + + ReadBWTHit _hits[2][2]; + + EList _offs; + SARangeWithOffs, index_t> _sas; + GroupWalk2S, 16> _gws; + GroupWalkState _gwstate; + + EList _offs_local; + SARangeWithOffs, local_index_t> _sas_local; + GroupWalk2S, 16> _gws_local; + GroupWalkState _gwstate_local; + + // temporary and shared variables used for GenomeHit + // this should be defined before _genomeHits and _hits_searched + SharedTempVars _sharedVars; + + // temporary and shared variables for AlnRes + LinkedEList > _rawEdits; + + // temporary + EList > _genomeHits; + EList > _genomeHits_rep[2]; + EList _snpIDs; + EList _snpIDs2; + EList _genomeHits_done; + ELList _coords; + EList, RepeatCoord > > _positions; + ELList _spliceSites; + + pair _concordantIdxInspected; + EList > _repeatConcordant; + + size_t _minK; // log4 of the size of a genome + size_t _minK_local; // log4 of the size of a local index (8) + + ELList > _local_genomeHits; + EList _anchors_added; + uint64_t max_localindexatts; + + uint64_t bwops_; // Burrows-Wheeler operations + uint64_t bwedits_; // Burrows-Wheeler edits + + // + EList > _hits_searched[2]; + + uint64_t _thread_rids_mindist; + + // + EList > _node_iedge_count; + EList > _tmp_node_iedge_count; + + EList > _local_node_iedge_count; + EList > _tmp_local_node_iedge_count; + + EList > _tmp_minimizers; + ELList _tmp_position2D; + EList _tmp_alignments; + + // For AlnRes::matchesRef + ASSERT_ONLY(EList raw_matches_); + ASSERT_ONLY(BTDnaString tmp_rf_); + ASSERT_ONLY(BTDnaString tmp_rdseq_); + ASSERT_ONLY(BTString tmp_qseq_); +}; + +#define HIER_INIT_LOCS(top, bot, tloc, bloc, e) { \ + if(bot - top == 1) { \ + tloc.initFromRow(top, (e).gh(), (e).gfm()); \ + bloc.invalidate(); \ + } else { \ + SideLocus::initFromTopBot(top, bot, (e).gh(), (e).gfm(), tloc, bloc); \ + assert(bloc.valid()); \ + } \ +} + +#define HIER_SANITY_CHECK_4TUP(t, b, tp, bp) { \ + ASSERT_ONLY(cur_index_t tot = (b[0]-t[0])+(b[1]-t[1])+(b[2]-t[2])+(b[3]-t[3])); \ + ASSERT_ONLY(cur_index_t totp = (bp[0]-tp[0])+(bp[1]-tp[1])+(bp[2]-tp[2])+(bp[3]-tp[3])); \ + assert_eq(tot, totp); \ +} + +#define LOCAL_INIT_LOCS(top, bot, tloc, bloc, e) { \ + if(bot - top == 1) { \ + tloc.initFromRow(top, (e).gh(), (e).gfm()); \ + bloc.invalidate(); \ + } else { \ + SideLocus::initFromTopBot(top, bot, (e).gh(), (e).gfm(), tloc, bloc); \ + assert(bloc.valid()); \ + } \ +} + +/** + * Given partial alignments of a read, try to further extend + * the alignment bidirectionally + */ +template +bool HI_Aligner::align( + const Scoring& sc, + const PairedEndPolicy& pepol, // paired-end policy + const TranscriptomePolicy& tpol, + const GraphPolicy& gpol, + const GFM& gfm, + const ALTDB& altdb, + const RepeatDB& repeatdb, + const BitPairReference& ref, + SwAligner& swa, + SpliceSiteDB& ssdb, + index_t rdi, + bool fw, + WalkMetrics& wlm, + PerReadMetrics& prm, + SwMetrics& swm, + HIMetrics& him, + RandomSource& rnd, + AlnSinkWrap& sink) +{ + + const ReportingParams& rp = sink.reportingParams(); + index_t fwi = (fw ? 0 : 1); + assert_lt(rdi, 2); + assert(_rds[rdi] != NULL); + ReadBWTHit& hit = _hits[rdi][fwi]; + assert(hit.done()); + index_t minOff = 0; + if(hit.minWidth(minOff) == std::numeric_limits::max()) return false; + + // Don't try to align if the potential alignment for this read might be + // worse than the best alignment of its reverse complement + int64_t bestScore = (rdi == 0 ? sink.bestUnp1() : sink.bestUnp2()); + index_t num_spliced = (rdi == 0 ? sink.bestSplicedUnp1() : sink.bestSplicedUnp2()); + if(bestScore < _minsc[rdi]) bestScore = _minsc[rdi]; + index_t maxmm = (index_t)((-bestScore + sc.mmpMax - 1) / sc.mmpMax); + index_t numActualPartialSearch = hit.numActualPartialSearch(); + if(!rp.secondary && numActualPartialSearch > maxmm + num_spliced + 1) return true; + + // choose candidate partial alignments for further alignment + const index_t maxsize = max(rp.khits, rp.kseeds); + _genomeHits.clear(); + index_t numHits = getAnchorHits(gfm, + pepol, + tpol, + gpol, + altdb, + repeatdb, + ref, + rnd, + rdi, + fw, + _genomeHits, + maxsize, + _sharedVars, + wlm, + prm, + him, + gfm.repeat()); + if(numHits <= 0) return false; + + // limit the number of local index searches used for alignment of the read + uint64_t add = 0; + if(rp.secondary) add = (-_minsc[rdi] / sc.mmpMax) * numHits * 2; + else add = (-_minsc[rdi] / sc.mmpMax) * numHits; + max_localindexatts = him.localindexatts + max(10, add); + // extend the partial alignments bidirectionally using + // local search, extension, and (less often) global search + hybridSearch(sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + swa, + ssdb, + rdi, + fw, + wlm, + prm, + swm, + him, + rnd, + sink); + + return true; +} + + +/** + * Given the alignment of its mate as an anchor, + * align the read + */ +template +bool HI_Aligner::alignMate( + const Scoring& sc, + const PairedEndPolicy& pepol, // paired-end policy + const TranscriptomePolicy& tpol, + const GraphPolicy& gpol, + const GFM& gfm, + const ALTDB& altdb, + const RepeatDB& repeatdb, + const BitPairReference& ref, + SwAligner& swa, + SpliceSiteDB& ssdb, + index_t rdi, + bool fw, + WalkMetrics& wlm, + PerReadMetrics& prm, + SwMetrics& swm, + HIMetrics& him, + RandomSource& rnd, + AlnSinkWrap& sink, + index_t tidx, + index_t toff) +{ + const ReportingParams& rp = sink.reportingParams(); + + assert_lt(rdi, 2); + index_t ordi = 1 - rdi; + bool ofw = (fw == gMate2fw ? gMate1fw : gMate2fw); + assert(_rds[ordi] != NULL); + const Read& ord = *_rds[ordi]; + index_t rdlen = (index_t)ord.length(); + assert_gt(rdlen, 0); + + _genomeHits.clear(); + if(_coords.size() == 0) { + _coords.expand(); + } + EList& coords = _coords.front(); + + // local search to find anchors + const HGFM* hGFM = (const HGFM*)(&gfm); + const LocalGFM* lGFM = hGFM->getLocalGFM(tidx, toff); + bool first = true; + index_t count = 0; + index_t max_hitlen = 0; + while(count++ < 2) { + if(first) { + first = false; + } else { + if(_genomeHits.size() > 0) break; + if(fw) { + lGFM = hGFM->nextLocalGFM(lGFM); + } else { + lGFM = hGFM->prevLocalGFM(lGFM); + } + if(lGFM == NULL || lGFM->empty()) break; + } + index_t hitoff = rdlen - 1; + while(hitoff >= _minK_local - 1) { + index_t hitlen = 0; + local_index_t top = (local_index_t)INDEX_MAX, bot = (local_index_t)INDEX_MAX; + local_index_t node_top = (local_index_t)INDEX_MAX, node_bot = (local_index_t)INDEX_MAX; + _local_node_iedge_count.clear(); + bool uniqueStop = false; + index_t nelt = localGFMSearch( + *lGFM, // GFM index + ord, // read to align + sc, // scoring scheme + sink.reportingParams(), + ofw, + hitoff, + hitlen, + top, + bot, + node_top, + node_bot, + _local_node_iedge_count, + rnd, + uniqueStop, + _minK_local); + assert_leq(top, bot); + assert_eq(nelt, (index_t)(node_bot - node_top)); + assert_leq(hitlen, hitoff + 1); + if(nelt > 0 && nelt <= rp.kseeds && hitlen > max_hitlen) { + coords.clear(); + bool straddled = false; + getGenomeCoords_local( + *lGFM, + altdb, + ref, + rnd, + top, + bot, + node_top, + node_bot, + _local_node_iedge_count, + ofw, + hitoff - hitlen + 1, + hitlen, + coords, + wlm, + prm, + him, + true, // reject straddled? + straddled); + assert_leq(coords.size(), nelt); + _genomeHits.clear(); + for(index_t ri = 0; ri < coords.size(); ri++) { + const Coord& coord = coords[ri]; + if(tpol.no_spliced_alignment()) { + if(coord.off() + pepol.maxFragLen() * 2 < toff || toff + pepol.maxFragLen() * 2 < coord.off()) + continue; + } + + GenomeHit::adjustWithALT( + hitoff - hitlen + 1, + hitlen, + coord, + _sharedVars, + _genomeHits, + *this->_rds[ordi], + gfm, + altdb, + ref, + gpol); + } + max_hitlen = hitlen; + } + assert_leq(hitlen, hitoff + 1); + if(hitlen > 0) hitoff -= (hitlen - 1); + if(hitoff > 0) hitoff -= 1; + } // while(hitoff >= minHitLen - 1) + } // while(count++ < 2) + + // randomly select + const index_t maxsize = rp.kseeds; + if(_genomeHits.size() > maxsize) { + _genomeHits.shufflePortion(0, _genomeHits.size(), rnd); + _genomeHits.resize(maxsize); + } + + // local search using the anchor + for(index_t hi = 0; hi < _genomeHits.size(); hi++) { + him.anchoratts++; + GenomeHit& genomeHit = _genomeHits[hi]; + index_t leftext = (index_t)INDEX_MAX, rightext = (index_t)INDEX_MAX; + genomeHit.extend( + ord, + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + prm, + sc, + _minsc[ordi], + rnd, + (index_t)_minK_local, + tpol, + gpol, + leftext, + rightext); + hybridSearch_recur( + sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + swa, + ssdb, + ordi, + genomeHit, + genomeHit.rdoff(), + genomeHit.len(), + wlm, + prm, + swm, + him, + rnd, + sink, + true); // alignMate? + } + + return true; +} + + +/** + * convert FM offsets to the corresponding genomic offset (chromosome id, offset) + **/ +template +bool HI_Aligner::getGenomeCoords( + const GFM& gfm, + const ALTDB& altdb, + const BitPairReference& ref, + RandomSource& rnd, + index_t top, + index_t bot, + index_t node_top, + index_t node_bot, + const EList >& node_iedge_count, + bool fw, + index_t maxelt, + index_t rdoff, + index_t rdlen, + EList& coords, + WalkMetrics& met, + PerReadMetrics& prm, + HIMetrics& him, + bool rejectStraddle, + bool& straddled) +{ + straddled = false; + assert_gt(bot, top); + assert_leq(node_bot - node_top, bot - top); + index_t nelt = node_bot - node_top; + nelt = min(nelt, maxelt); + him.globalgenomecoords += nelt; + _offs.resize(nelt); + _offs.fill((index_t)INDEX_MAX); + _sas.init( + top, + bot, + node_top, + node_bot, + node_iedge_count, + rdlen, + EListSlice(_offs, 0, nelt)); + _gws.init(gfm, ref, _sas, rnd, met); + + for(index_t off = 0; off < nelt; off++) { + WalkResult wr; + index_t tidx = 0, toff = 0, tlen = 0; + _gws.advanceElement( + off, + gfm, // forward Bowtie index for walking left + ref, // bitpair-encoded reference + _sas, // SA range with offsets + _gwstate, // GroupWalk state; scratch space + wr, // put the result here + met, // metrics + prm); // per-read metrics + assert_neq(wr.toff, (index_t)INDEX_MAX); + bool straddled2 = false; + gfm.joinedToTextOff( + wr.elt.len, + wr.toff, + tidx, + toff, + tlen, + rejectStraddle, // reject straddlers? + straddled2); // straddled? + + straddled |= straddled2; + + if(tidx == (index_t)INDEX_MAX) { + // The seed hit straddled a reference boundary so the seed + // hit isn't valid + return false; + } + index_t global_toff = toff, global_tidx = tidx; + + // Coordinate of the seed hit w/r/t the pasted reference string + coords.expand(); + if(!straddled2) { + coords.back().init(global_tidx, (int64_t)global_toff, fw, wr.toff); + } else { + coords.back().init(numeric_limits::max(), (int64_t)global_toff, fw, wr.toff); + } + } + + return true; +} + +/** + * convert FM offsets to the corresponding genomic offset (chromosome id, offset) + **/ +template +bool HI_Aligner::getGenomeCoords_local( + const GFM& gfm, + const ALTDB& altdb, + const BitPairReference& ref, + RandomSource& rnd, + local_index_t top, + local_index_t bot, + local_index_t node_top, + local_index_t node_bot, + const EList >& node_iedge_count, + bool fw, + index_t rdoff, + index_t rdlen, + EList& coords, + WalkMetrics& met, + PerReadMetrics& prm, + HIMetrics& him, + bool rejectStraddle, + bool& straddled) +{ + straddled = false; + assert_gt(bot, top); + assert_leq(node_bot - node_top, bot - top); + index_t nelt = node_bot - node_top; + him.localgenomecoords += nelt; + _offs_local.resize(nelt); + _offs_local.fill((local_index_t)INDEX_MAX); + _sas_local.init( + top, + bot, + node_top, + node_bot, + node_iedge_count, + rdlen, + EListSlice(_offs_local, 0, nelt)); + _gws_local.init(gfm, ref, _sas_local, rnd, met); + + for(local_index_t off = 0; off < nelt; off++) { + WalkResult wr; + local_index_t tidx = 0, toff = 0, tlen = 0; + _gws_local.advanceElement( + off, + gfm, // forward Bowtie index for walking left + ref, // bitpair-encoded reference + _sas_local, // SA range with offsets + _gwstate_local, // GroupWalk state; scratch space + wr, // put the result here + met, // metrics + prm); // per-read metrics + assert_neq(wr.toff, (local_index_t)INDEX_MAX); + bool straddled2 = false; + bool result = gfm.joinedToTextOff( + wr.elt.len, + wr.toff, + tidx, + toff, + tlen, + rejectStraddle, // reject straddlers? + straddled2); // straddled? + if(!result) continue; + + straddled |= straddled2; + + if(tidx == (local_index_t)INDEX_MAX) { + // The seed hit straddled a reference boundary so the seed + // hit isn't valid + return false; + } + LocalGFM* localGFM = (LocalGFM*)&gfm; + index_t global_tidx = localGFM->_tidx; + index_t global_toff = toff + localGFM->_localOffset; + index_t joinedOff = wr.toff + localGFM->_joinedOffset; + if(global_toff < rdoff) continue; + + // Coordinate of the seed hit w/r/t the pasted reference string + coords.expand(); + coords.back().init(global_tidx, (int64_t)global_toff, fw, joinedOff); + } + + return true; +} + + +/** + * examine alignments of left and right reads to produce concordant pair alignment + **/ +template +bool HI_Aligner::pairReads( + const Scoring& sc, + const PairedEndPolicy& pepol, // paired-end policy + const TranscriptomePolicy& tpol, + const GraphPolicy& gpol, + const GFM& gfm, + const ALTDB& altdb, + const RepeatDB& repeatdb, + const BitPairReference& ref, + WalkMetrics& wlm, + PerReadMetrics& prm, + HIMetrics& him, + RandomSource& rnd, + AlnSinkWrap& sink) +{ + const ReportingParams& rp = sink.reportingParams(); + assert(_paired); + const EList *rs1 = NULL, *rs2 = NULL; + sink.getUnp1(rs1); assert(rs1 != NULL); + sink.getUnp2(rs2); assert(rs2 != NULL); + index_t start_i = _concordantIdxInspected.first, start_j = _concordantIdxInspected.second; + _concordantIdxInspected.first = rs1->size(); + _concordantIdxInspected.second = rs2->size(); + for(index_t i = 0; i < rs1->size(); i++) { + for(index_t j = (i >= start_i ? 0 : start_j); j < rs2->size(); j++) { + if(sink.state().doneConcordant()) { + return true; + } + const AlnRes& r1 = (*rs1)[i]; + Coord left = r1.refcoord(), right = r1.refcoord_right(); + assert_eq(left.ref(), right.ref()); + const AlnRes& r2 = (*rs2)[j]; + Coord left2 = r2.refcoord(), right2 = r2.refcoord_right(); + assert_eq(left2.ref(), right2.ref()); + + if(r1.repeat() != r2.repeat()) + continue; + + bool dna_frag_pass = true; + if(r1.repeat() && r2.repeat()) { + bool found = false; + for(size_t r = 0; r < _repeatConcordant.size(); r++) { + if(_repeatConcordant[r].first == left.off() && + _repeatConcordant[r].second == left2.off()) { + found = true; + break; + } + } + if(!found) { + dna_frag_pass = false; + } + } else{ + if(left.ref() != left2.ref()) continue; + assert_eq(left.orient(), right.orient()); + assert_eq(left2.orient(), right2.orient()); + if(left.orient() == gMate1fw) { + if(left2.orient() != gMate2fw) continue; + } else { + if(left2.orient() == gMate2fw) continue; + Coord temp = left; left = left2; left2 = temp; + temp = right; right = right2; right2 = temp; + } + if(left.off() > left2.off()) continue; + if(right.off() > right2.off()) continue; + if(right.off() + (int)tpol.maxIntronLen() < left2.off()) continue; + assert_geq(r1.score().score(), _minsc[0]); + assert_geq(r2.score().score(), _minsc[1]); + if(tpol.no_spliced_alignment()){ + int pairCl = PE_ALS_DISCORD; + assert_eq(r1.refid(), r2.refid()); + index_t off1, off2, len1, len2; + bool fw1, fw2; + if(r1.refoff() < r2.refoff()) { + off1 = r1.refoff(); off2 = r2.refoff(); + len1 = r1.refExtent(); len2 = r2.refExtent(); + fw1 = r1.fw(); fw2 = r2.fw(); + } else { + off1 = r2.refoff(); off2 = r1.refoff(); + len1 = r2.refExtent(); len2 = r1.refExtent(); + fw1 = r2.fw(); fw2 = r1.fw(); + } + // Check that final mate alignments are consistent with + // paired-end fragment constraints + pairCl = pepol.peClassifyPair( + off1, + len1, + fw1, + off2, + len2, + fw2); + dna_frag_pass = (pairCl != PE_ALS_DISCORD); + } + } + + if(!tpol.no_spliced_alignment() || dna_frag_pass) { + TAlScore threshold = sink.bestPair(); + if(sink.bestUnp1() >= _minsc[0] && sink.bestUnp2() >= _minsc[1]) { + TAlScore tmp = sink.bestUnp1() + sink.bestUnp2() - (r1.readLength() + r2.readLength()) * 0.03 * sc.mm(255); + if(tmp > threshold) { + threshold = tmp; + } + } + if(r1.score().score() + r2.score().score() >= threshold || rp.secondary) { + sink.report(0, &r1, &r2); + } + } + } + } + return true; +} + + +/** + * report read (or pair) alignment + **/ +template +bool HI_Aligner::reportHit( + const Scoring& sc, + const PairedEndPolicy& pepol, // paired-end policy + const TranscriptomePolicy& tpol, + const GraphPolicy& gpol, + const GFM& gfm, + const ALTDB& altdb, + const RepeatDB& repeatdb, + const BitPairReference& ref, + const SpliceSiteDB& ssdb, + AlnSinkWrap& sink, + index_t rdi, + const GenomeHit& hit, + bool alignMate, + const GenomeHit* ohit) +{ + assert_lt(rdi, 2); + assert(_rds[rdi] != NULL); + const Read& rd = *_rds[rdi]; + index_t rdlen = (index_t)rd.length(); + if(hit.rdoff() - hit.trim5() > 0 || hit.len() + hit.trim5() + hit.trim3() < rdlen) return false; + if(hit.score() < _minsc[rdi]) return false; + if(!sink.reportingParams().repeat && hit.repeat()) return false; + + // Edits are represented from 5' end of read to 3' end, not an alignment of read + EList& edits = const_cast&>(hit.edits()); + if(hit.trim5() > 0) { + for(size_t i = 0; i < edits.size(); i++) { + edits[i].pos += hit.trim5(); + } + } + if(!hit.fw()) { + Edit::invertPoss(edits, rdlen, false); + } + // in case of multiple exonic alignments, choose the ones near (known) splice sites + // this helps eliminate cases of reads being mapped to pseudogenes + pair spliced = hit.spliced(); // pair + if(tpol.xs_only() && spliced.first) { + if(hit.splicing_dir() == SPL_UNKNOWN) + return false; + } + if(!tpol.no_spliced_alignment() && tpol.avoid_pseudogene()) { + if(!spliced.first) { + assert(!spliced.second); + const index_t max_exon_size = 10000; + index_t left = 0; + if(hit.refoff() > max_exon_size) { + left = hit.refoff() - max_exon_size; + } + index_t right = hit.refoff() + hit.len() + max_exon_size; + spliced.first = ssdb.hasSpliceSites( + hit.ref(), + left, + right, + left, + right, + true); // include novel splice sites + if(altdb.hasExons()) { + spliced.second = ssdb.insideExon(hit.ref(), hit.refoff(), hit.refoff() + hit.len() - 1); + } + } + } + if(tpol.transcriptome_mapping_only() && !spliced.second) + return false; + + AlnScore asc( + hit.score(), // numeric score + hit.ns(), // # Ns + hit.ngaps(), // # gaps + hit.repeat(), + hit.splicescore(), // splice scorehit + spliced.second, // mapped to known transcripts? + spliced.first, // spliced alignment or near splice sites (novel)? + hit.trim5(), // left trim length + hit.trim3()); // right trim length + bool softTrim = hit.trim5() > 0 || hit.trim3() > 0; + AlnRes rs; + rs.init( + rdlen, // # chars after hard trimming + rd.rdid, // read ID + asc, // alignment score + &hit.edits(), // nucleotide edits array + 0, // nucleotide edits first pos + hit.edits().size(), // nucleotide edits last pos + NULL, // ambig base array + 0, // ambig base first pos + 0, // ambig base last pos + hit.coord(), // coord of leftmost aligned char in ref + hit.repeat() ? gfm.plen()[0] : gfm.plen()[hit.ref()], // length of reference aligned to + &_rawEdits, + -1, // # seed mms allowed + -1, // seed length + -1, // seed interval + 0, // minimum score for valid alignment (daehwan) + -1, // nuc5p (for colorspace) + -1, // nuc3p (for colorspace) + false, // soft pre-trimming? + 0, // 5p pre-trimming + 0, // 3p pre-trimming + softTrim, // soft trimming? + hit.fw() ? hit.trim5() : hit.trim3(), // 5p trimming + hit.fw() ? hit.trim3() : hit.trim5(), // 3p trimming + hit.repeat()); // repeat? + if(!hit.fw()) { + Edit::invertPoss(edits, rdlen, false); + } + if(hit.trim5() > 0) { + for(size_t i = 0; i < edits.size(); i++) { + edits[i].pos -= hit.trim5(); + } + } + + + //rs.setRefNs(nrefn); + /*assert(rs.matchesRef( + rd, + ref, + tmp_rf_, + tmp_rdseq_, + tmp_qseq_, + _sharedVars.raw_refbuf, + _sharedVars.destU32, + raw_matches_, + _sharedVars.raw_refbuf2, + _sharedVars.reflens, + _sharedVars.refoffs));*/ + if(ohit == NULL) { + bool done; + if(rdi == 0 && !_rightendonly) { + done = sink.report(0, &rs, NULL, alignMate); + } else { + done = sink.report(0, NULL, &rs, alignMate); + } + return done; + } + + assert(ohit != NULL); + const Read& ord = *_rds[1-rdi]; + index_t ordlen = (index_t)ord.length(); + if(ohit->rdoff() - ohit->trim5() > 0 || ohit->len() + ohit->trim5() + ohit->trim3() < ordlen) return false; + if(ohit->score() < _minsc[1-rdi]) return false; + EList& oedits = const_cast&>(ohit->edits()); + if(ohit->trim5() > 0) { + for(size_t i = 0; i < oedits.size(); i++) { + oedits[i].pos += ohit->trim5(); + } + } + if(!ohit->fw()) { + Edit::invertPoss(oedits, ordlen, false); + } + AlnScore oasc( + ohit->score(), // numeric score + ohit->ns(), // # Ns + ohit->ngaps(), // # gaps + ohit->repeat()); // repeat? + bool osoftTrim = ohit->trim5() > 0 || ohit->trim3() > 0; + AlnRes ors; + ors.init( + ordlen, // # chars after hard trimming + ord.rdid, // read ID + oasc, // alignment score + &ohit->edits(), // nucleotide edits array + 0, // nucleotide edits first pos + ohit->edits().size(), // nucleotide edits last pos + NULL, // ambig base array + 0, // ambig base first pos + 0, // ambig base last pos + ohit->coord(), // coord of leftmost aligned char in ref + gfm.plen()[ohit->ref()], // length of reference aligned to + &_rawEdits, + -1, // # seed mms allowed + -1, // seed length + -1, // seed interval + 0, // minimum score for valid alignment (daehwan) + -1, // nuc5p (for colorspace) + -1, // nuc3p (for colorspace) + false, // soft pre-trimming? + 0, // 5p pre-trimming + 0, // 3p pre-trimming + osoftTrim, // soft trimming? + ohit->fw() ? ohit->trim5() : ohit->trim3(), // 5p trimming + ohit->fw() ? ohit->trim3() : ohit->trim5(), // 3p trimming + ohit->repeat()); // repeat? + if(!ohit->fw()) { + Edit::invertPoss(oedits, ordlen, false); + } + if(ohit->trim5() > 0) { + for(size_t i = 0; i < oedits.size(); i++) { + oedits[i].pos -= ohit->trim5(); + } + } + //rs.setRefNs(nrefn); + assert(ors.matchesRef( + ord, + ref, + tmp_rf_, + tmp_rdseq_, + tmp_qseq_, + _sharedVars.raw_refbuf, + _sharedVars.destU32, + raw_matches_, + _sharedVars.raw_refbuf2, + _sharedVars.reflens, + _sharedVars.refoffs)); + + bool done; + if(rdi == 0) { + done = sink.report(0, &rs, &ors); + } else { + done = sink.report(0, &ors, &rs); + } + return done; +} + +/** + * check this alignment is already examined + **/ +template +bool HI_Aligner::redundant( + AlnSinkWrap& sink, + index_t rdi, + index_t tidx, + index_t toff) +{ + assert_lt(rdi, 2); + const EList* rs = NULL; + if(rdi == 0) sink.getUnp1(rs); + else sink.getUnp2(rs); + assert(rs != NULL); + for(index_t i = 0; i < rs->size(); i++) { + Coord coord_left = (*rs)[i].refcoord(), coord_right = (*rs)[i].refcoord_right(); + assert_eq(coord_left.ref(), coord_right.ref()); + assert_lt(coord_left.off(), coord_right.off()); + assert_eq(coord_left.orient(), coord_right.orient()); + + if(tidx != coord_left.ref()) continue; + if(toff >= coord_left.off() && toff <= coord_right.off()) return true; + } + + return false; +} + + +/** + * check this alignment is already examined + **/ +template +bool HI_Aligner::redundant( + AlnSinkWrap& sink, + index_t rdi, + const GenomeHit& hit) +{ + assert_lt(rdi, 2); + assert(_rds[rdi] != NULL); + index_t rdlen = (index_t)_rds[rdi]->length(); + const EList* rs = NULL; + if(rdi == 0) sink.getUnp1(rs); + else sink.getUnp2(rs); + assert(rs != NULL); + for(index_t i = 0; i < rs->size(); i++) { + const AlnRes& rsi = (*rs)[i]; + if(rsi.refcoord() == hit.coord()) { + const EList& editsi = rsi.ned(); + const EList& edits = hit.edits(); + if(editsi.size() == edits.size()) { + size_t eidx = 0; + if(!hit.fw()) { + Edit::invertPoss(const_cast&>(edits), rdlen, false); + } + // daehwan: this is a temporary solution to compare edits + for(; eidx < editsi.size(); eidx++) { + if(!(editsi[eidx] == edits[eidx])) { + break; + } + } + if(!hit.fw()) { + Edit::invertPoss(const_cast&>(edits), rdlen, false); + } + if(eidx >= editsi.size()) { + assert_eq(eidx, editsi.size()); + return true; + } + } + } + } + + return false; +} + + +/** + * Sweep right-to-left and left-to-right using exact matching. Remember all + * the SA ranges encountered along the way. Report exact matches if there are + * any. Calculate a lower bound on the number of edits in an end-to-end + * alignment. + */ +template +index_t HI_Aligner::partialSearch( + const GFM& gfm, // BWT index + const Read& read, // read to align + const Scoring& sc, // scoring scheme + const ReportingParams& rp, + bool fw, + size_t mineMax, // don't care about edit bounds > this + size_t& mineFw, // minimum # edits for forward read + size_t& mineRc, // minimum # edits for revcomp read + ReadBWTHit& hit, // holds all the seed hits (and exact hit) + RandomSource& rnd, // pseudo-random source + bool& pseudogeneStop, + bool& anchorStop, + index_t maxHitLen) +{ + bool pseudogeneStop_ = pseudogeneStop, anchorStop_ = anchorStop; + pseudogeneStop = anchorStop = false; + const index_t ftabLen = gfm.gh().ftabChars(); + const bool linearFM = gfm.gh().linearFM(); + SideLocus tloc, bloc; + const index_t len = (index_t)read.length(); + const BTDnaString& seq = fw ? read.patFw : read.patRc; + assert(!seq.empty()); + + size_t nelt = 0; + EList >& partialHits = hit._partialHits; + index_t& cur = hit._cur; + assert_lt(cur, hit._len); + + hit._numPartialSearch++; + + index_t offset = cur; + index_t dep = offset; + pair range(0, 0); + pair rangeTemp(0, 0); + pair node_range(0, 0); + pair node_rangeTemp(0, 0); + _node_iedge_count.clear(); + _tmp_node_iedge_count.clear(); + index_t left = len - dep; + assert_gt(left, 0); + if(left < ftabLen + 1) { + cur = hit._len; + partialHits.expand(); + partialHits.back().init((index_t)INDEX_MAX, + (index_t)INDEX_MAX, + (index_t)INDEX_MAX, + (index_t)INDEX_MAX, + _node_iedge_count, + fw, + (index_t)offset, + (index_t)(cur - offset)); + hit.done(true); + return 0; + } + // Does N interfere with use of Ftab? + for(index_t i = 0; i < ftabLen; i++) { + int c = seq[len-dep-1-i]; + if(c > 3) { + cur += (i+1); + partialHits.expand(); + partialHits.back().init((index_t)INDEX_MAX, + (index_t)INDEX_MAX, + (index_t)INDEX_MAX, + (index_t)INDEX_MAX, + _node_iedge_count, + fw, + (index_t)offset, + (index_t)(cur - offset)); + if(cur >= hit._len) { + hit.done(true); + } + return 0; + } + } + + // Use ftab + gfm.ftabLoHi(seq, len - dep - ftabLen, false, range.first, range.second); + dep += ftabLen; + if(range.first >= range.second) { + cur = dep; + partialHits.expand(); + partialHits.back().init((index_t)INDEX_MAX, + (index_t)INDEX_MAX, + (index_t)INDEX_MAX, + (index_t)INDEX_MAX, + _node_iedge_count, + fw, + (index_t)offset, + (index_t)(cur - offset)); + if(cur >= hit._len) { + hit.done(true); + } + return 0; + } + index_t same_range = 0, similar_range = 0; + HIER_INIT_LOCS(range.first, range.second, tloc, bloc, gfm); + // Keep going + while(dep < len && dep - offset < maxHitLen) { + int c = seq[len-dep-1]; + if(c > 3) { + rangeTemp.first = rangeTemp.second = 0; + node_rangeTemp.first = node_rangeTemp.second = 0; + _tmp_node_iedge_count.clear(); + } else { + if(bloc.valid()) { + bwops_ += 2; + if(linearFM) { + rangeTemp = gfm.mapLF(tloc, bloc, c, &node_rangeTemp); + } else { + rangeTemp = gfm.mapGLF(tloc, bloc, c, &node_rangeTemp, &_tmp_node_iedge_count, (index_t)rp.kseeds); + } + } else { + bwops_++; + rangeTemp = gfm.mapGLF1(range.first, tloc, c, &node_rangeTemp); + if(rangeTemp.first + 1 < rangeTemp.second) { + assert_eq(node_rangeTemp.first + 1, node_rangeTemp.second); + _tmp_node_iedge_count.clear(); + _tmp_node_iedge_count.expand(); + _tmp_node_iedge_count.back().first = 0; + _tmp_node_iedge_count.back().second = rangeTemp.second - rangeTemp.first - 1; + } + } + } + if(rangeTemp.first >= rangeTemp.second) { + break; + } + if(pseudogeneStop_) { + if(node_rangeTemp.second - node_rangeTemp.first < node_range.second - node_range.first && node_range.second - node_range.first <= min(5, (index_t)rp.khits)) { + static const index_t minLenForPseudogene = (index_t)_minK + 6; + if(dep - offset >= minLenForPseudogene && similar_range >= 5) { + hit._numUniqueSearch++; + pseudogeneStop = true; + break; + } + } + if(node_rangeTemp.second - node_rangeTemp.first != 1) { + if(node_rangeTemp.second - node_rangeTemp.first + 2 >= node_range.second - node_range.first) similar_range++; + else if(node_rangeTemp.second - node_rangeTemp.first + 4 < node_range.second - node_range.first) similar_range = 0; + } else { + pseudogeneStop_ = false; + } + } + + if(anchorStop_) { + if(node_rangeTemp.second - node_rangeTemp.first != 1 && node_range.second - node_range.first == node_rangeTemp.second - node_rangeTemp.first) { + same_range++; + if(same_range >= 5) { + anchorStop_ = false; + } + } else { + same_range = 0; + } + + if(dep - offset >= _minK + 8 && node_rangeTemp.second - node_rangeTemp.first >= 4) { + anchorStop_ = false; + } + } + + range = rangeTemp; + node_range = node_rangeTemp; + if(_tmp_node_iedge_count.size() > 0) { + _node_iedge_count = _tmp_node_iedge_count; + _tmp_node_iedge_count.clear(); + } else { + _node_iedge_count.clear(); + } + dep++; + + if(anchorStop_) { + if(dep - offset >= _minK + 12 && range.second - range.first == 1) { + hit._numUniqueSearch++; + anchorStop = true; + break; + } + } + + HIER_INIT_LOCS(range.first, range.second, tloc, bloc, gfm); + } + + // Done + if(range.first < range.second) { + assert_leq(node_range.second - node_range.first, range.second - range.first); + assert_gt(dep, offset); + assert_leq(dep, len); + partialHits.expand(); + index_t hit_type = CANDIDATE_HIT; + if(anchorStop) hit_type = ANCHOR_HIT; + else if(pseudogeneStop) hit_type = PSEUDOGENE_HIT; + bool report = node_range.first < node_range.second; + if(node_range.second - node_range.first < range.second - range.first) { + if(_node_iedge_count.size() == 0) report = false; + } + if(report) { +#ifndef NDEBUG + if(node_range.second - node_range.first < range.second - range.first) { + ASSERT_ONLY(index_t add = 0); + for(index_t e = 0; e < _node_iedge_count.size(); e++) { + if(e > 0) { + assert_lt(_node_iedge_count[e-1].first, _node_iedge_count[e].first); + } + assert_gt(_node_iedge_count[e].second, 0); + add += _node_iedge_count[e].second; + } + assert_eq(node_range.second - node_range.first + add, range.second - range.first); + } else { + assert(_node_iedge_count.empty()); + } +#endif + partialHits.back().init(range.first, + range.second, + node_range.first, + node_range.second, + _node_iedge_count, + fw, + (index_t)offset, + (index_t)(dep - offset), + hit_type); + } else { + _node_iedge_count.clear(); + partialHits.back().init(INDEX_MAX, + INDEX_MAX, + INDEX_MAX, + INDEX_MAX, + _node_iedge_count, + fw, + (index_t)offset, + (index_t)(dep - offset), + hit_type); + } + + nelt += (node_range.second - node_range.first); + cur = dep; + if(cur >= hit._len) { + if(hit_type == CANDIDATE_HIT) hit._numUniqueSearch++; + hit.done(true); + } + } + return (index_t)nelt; +} + + +/** + */ +template +index_t HI_Aligner::globalGFMSearch( + const GFM& gfm, // BWT index + const Read& read, // read to align + const Scoring& sc, // scoring scheme + const ReportingParams& rp, + bool fw, + index_t hitoff, + index_t& hitlen, + index_t& top, + index_t& bot, + index_t& node_top, + index_t& node_bot, + EList >& node_iedge_count, + RandomSource& rnd, + bool& uniqueStop, + index_t maxHitLen) +{ + bool uniqueStop_ = uniqueStop; + uniqueStop = false; + const index_t ftabLen = gfm.gh().ftabChars(); + const bool linearFM = gfm.gh().linearFM(); + SideLocus tloc, bloc; + const index_t len = (index_t)read.length(); + + size_t nelt = 0; + const BTDnaString& seq = fw ? read.patFw : read.patRc; + assert(!seq.empty()); + + index_t offset = len - hitoff - 1; + index_t dep = offset; + pair range(0, 0); + pair rangeTemp(0, 0); + pair node_range(0, 0); + pair node_rangeTemp(0, 0); + node_iedge_count.clear(); + _tmp_node_iedge_count.clear(); + index_t left = len - dep; + assert_gt(left, 0); + if(left < ftabLen + 1) { + hitlen = left; + return 0; + } + + // Does N interfere with use of Ftab? + for(index_t i = 0; i < ftabLen; i++) { + int c = seq[len-dep-1-i]; + if(c > 3) { + hitlen = (i+1); + return 0; + } + } + + // Use ftab + gfm.ftabLoHi(seq, len - dep - ftabLen, false, range.first, range.second); + dep += ftabLen; + if(range.first >= range.second) { + hitlen = ftabLen; + return 0; + } + + HIER_INIT_LOCS(range.first, range.second, tloc, bloc, gfm); + // Keep going + while(dep < len) { + int c = seq[len-dep-1]; + if(c > 3) { + rangeTemp.first = rangeTemp.second = 0; + node_rangeTemp.first = node_rangeTemp.second = 0; + _tmp_node_iedge_count.clear(); + } else { + if(bloc.valid()) { + bwops_ += 2; + if(linearFM) { + rangeTemp = gfm.mapLF(tloc, bloc, c, &node_rangeTemp); + } else { + rangeTemp = gfm.mapGLF(tloc, bloc, c, &node_rangeTemp, &_tmp_node_iedge_count, (index_t)rp.kseeds); + } + } else { + bwops_++; + rangeTemp = gfm.mapGLF1(range.first, tloc, c, &node_rangeTemp); + if(rangeTemp.first + 1 < rangeTemp.second) { + assert_eq(node_rangeTemp.first + 1, node_rangeTemp.second); + _tmp_node_iedge_count.clear(); + _tmp_node_iedge_count.expand(); + _tmp_node_iedge_count.back().first = 0; + _tmp_node_iedge_count.back().second = rangeTemp.second - rangeTemp.first - 1; + } + } + } + if(rangeTemp.first >= rangeTemp.second) { + break; + } + + range = rangeTemp; + node_range = node_rangeTemp; + if(_tmp_node_iedge_count.size() > 0) { + node_iedge_count = _tmp_node_iedge_count; + _tmp_node_iedge_count.clear(); + } else { + node_iedge_count.clear(); + } + dep++; + + if(uniqueStop_) { + if(range.second - range.first == 1 && dep - offset >= _minK) { + uniqueStop = true; + break; + } + } + + HIER_INIT_LOCS(range.first, range.second, tloc, bloc, gfm); + } + + // Done + if(node_range.first < node_range.second && node_range.second - node_range.first <= rp.kseeds) { + assert_leq(node_range.second - node_range.first, range.second - range.first); +#ifndef NDEBUG + if(node_range.second - node_range.first < range.second - range.first) { + ASSERT_ONLY(index_t add = 0); + for(index_t e = 0; e < node_iedge_count.size(); e++) { + if(e > 0) { + assert_lt(node_iedge_count[e-1].first, node_iedge_count[e].first); + } + assert_gt(node_iedge_count[e].second, 0); + add += node_iedge_count[e].second; + } + assert_eq(node_range.second - node_range.first + add, range.second - range.first); + } else { + assert(node_iedge_count.empty()); + } +#endif + assert_gt(dep, offset); + assert_leq(dep, len); + top = range.first; bot = range.second; + node_top = node_range.first; node_bot = node_range.second; + nelt += (node_bot - node_top); + hitlen = dep - offset; + } + return (index_t)nelt; +} + + +/** + * + **/ +template +index_t HI_Aligner::localGFMSearch( + const LocalGFM& gfm, // GFM index + const Read& read, // read to align + const Scoring& sc, // scoring scheme + const ReportingParams& rp, + bool fw, + index_t rdoff, + index_t& hitlen, + local_index_t& top, + local_index_t& bot, + local_index_t& node_top, + local_index_t& node_bot, + EList >& local_node_iedge_count, + RandomSource& rnd, + bool& uniqueStop, + local_index_t minUniqueLen, + local_index_t maxHitLen, + local_index_t maxHits) +{ + maxHits = max(maxHits, rp.kseeds); + bool uniqueStop_ = uniqueStop; + uniqueStop = false; + const local_index_t ftabLen = (local_index_t)gfm.gh().ftabChars(); + const bool linearFM = gfm.gh().linearFM(); + SideLocus tloc, bloc; + const local_index_t len = (local_index_t)read.length(); + size_t nelt = 0; + + const BTDnaString& seq = fw ? read.patFw : read.patRc; + assert(!seq.empty()); + + local_index_t offset = len - rdoff - 1; + local_index_t dep = offset; + pair range(0, 0); + pair rangeTemp(0, 0); + pair node_range(0, 0); + pair node_rangeTemp(0, 0); + top = bot = node_top = node_bot = 0; + local_node_iedge_count.clear(); + _tmp_local_node_iedge_count.clear(); + local_index_t left = len - dep; + assert_gt(left, 0); + if(left < ftabLen + 1) { + hitlen = left; + return 0; + } + // Does N interfere with use of Ftab? + for(local_index_t i = 0; i < ftabLen; i++) { + int c = seq[len-dep-1-i]; + if(c > 3) { + hitlen = i + 1; + return 0; + } + } + + gfm.ftabLoHi(seq, len - dep - ftabLen, false, range.first, range.second); + dep += ftabLen; + if(range.first >= range.second) { + hitlen = ftabLen; + return 0; + } + LOCAL_INIT_LOCS(range.first, range.second, tloc, bloc, gfm); + // Keep going + while(dep < len) { + int c = seq[len-dep-1]; + if(c > 3) { + rangeTemp.first = rangeTemp.second = 0; + node_rangeTemp.first = node_rangeTemp.second = 0; + _tmp_local_node_iedge_count.clear(); + } else { + if(bloc.valid()) { + bwops_ += 2; + if(linearFM) { + rangeTemp = gfm.mapLF(tloc, bloc, c, &node_rangeTemp); + } else { + rangeTemp = gfm.mapGLF(tloc, bloc, c, &node_rangeTemp, &_tmp_local_node_iedge_count, rp.kseeds); + } + } else { + bwops_++; + rangeTemp = gfm.mapGLF1(range.first, tloc, c, &node_rangeTemp); + if(rangeTemp.first + 1 < rangeTemp.second) { + assert_eq(node_rangeTemp.first + 1, node_rangeTemp.second); + _tmp_local_node_iedge_count.clear(); + _tmp_local_node_iedge_count.expand(); + _tmp_local_node_iedge_count.back().first = 0; + _tmp_local_node_iedge_count.back().second = rangeTemp.second - rangeTemp.first - 1; + } + } + } + if(rangeTemp.first >= rangeTemp.second) { + break; + } + + range = rangeTemp; + node_range = node_rangeTemp; + if(_tmp_local_node_iedge_count.size() > 0) { + local_node_iedge_count = _tmp_local_node_iedge_count; + _tmp_local_node_iedge_count.clear(); + } else { + local_node_iedge_count.clear(); + } + dep++; + + if(uniqueStop_) { + if(range.second - range.first == 1 && dep - offset >= minUniqueLen) { + uniqueStop = true; + break; + } + } + + if(dep - offset >= maxHitLen) break; + LOCAL_INIT_LOCS(range.first, range.second, tloc, bloc, gfm); + } + + // Done + if(node_range.first < node_range.second && node_range.second - node_range.first <= maxHits) { + assert_leq(node_range.second - node_range.first, range.second - range.first); +#ifndef NDEBUG + if(node_range.second - node_range.first < range.second - range.first) { + ASSERT_ONLY(index_t add = 0); + for(index_t e = 0; e < local_node_iedge_count.size(); e++) { + if(e > 0) { + assert_lt(local_node_iedge_count[e-1].first, local_node_iedge_count[e].first); + } + assert_gt(local_node_iedge_count[e].second, 0); + add += local_node_iedge_count[e].second; + } + assert_eq(node_range.second - node_range.first + add, range.second - range.first); + } else { + assert(local_node_iedge_count.empty()); + } +#endif + assert_gt(dep, offset); + assert_leq(dep, len); + top = range.first; bot = range.second; + node_top = node_range.first; node_bot = node_range.second; + nelt += (node_bot - node_top); + hitlen = dep - offset; + } + + return (index_t)nelt; +} + +/** + * + **/ +template +bool HI_Aligner::isSearched( + const GenomeHit& hit, + index_t rdi) +{ + assert_lt(rdi, 2); + EList >& searchedHits = _hits_searched[rdi]; + for(index_t i = 0; i < searchedHits.size(); i++) { + if(searchedHits[i].contains(hit)) return true; + } + return false; +} + +/** + * + **/ +template +void HI_Aligner::addSearched( + const GenomeHit& hit, + index_t rdi) +{ + assert_lt(rdi, 2); + assert(!isSearched(hit, rdi)); + EList >& searchedHits = _hits_searched[rdi]; + searchedHits.push_back(hit); +} + +#endif /*HI_ALIGNER_H_*/ diff --git a/hier_idx_common.h b/hier_idx_common.h new file mode 100644 index 0000000..1e794e6 --- /dev/null +++ b/hier_idx_common.h @@ -0,0 +1,43 @@ +/* + * Copyright 2015, Daehwan Kim + * + * This file is part of HISAT 2. + * + * Beast is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#ifndef HIERGBWT_COMMON_H_ +#define HIERGBWT_COMMON_H_ + +// maximum size of a sequence represented by a local index +static const uint32_t local_index_size = (1 << 16) - (1 << 13); // 1 << 5 is necessary for eftab index +static const uint32_t local_max_gbwt = (1 << 16) - (1 << 11); + +// size of the overlapped sequence between the sequences represented by two consecutive local indexes +static const uint32_t local_index_overlap = 1024; + +// interval between two consecutive local indexes +static const uint32_t local_index_interval = local_index_size - local_index_overlap; + +// line rate in local indexes +static const int32_t local_lineRate_fm = 6; +static const int32_t local_lineRate_gfm = 7; + +// how many rows are marked in a local index, every 2^th row is marked +static const int32_t local_offRate = 3; + +// the look table in a local index 4^ entries +static const int32_t local_ftabChars = 6; + +#endif /*HIERGBWT_COMMON_H_*/ diff --git a/hisat-3n b/hisat-3n new file mode 100644 index 0000000..dcfee89 --- /dev/null +++ b/hisat-3n @@ -0,0 +1,782 @@ +#!/usr/bin/env perl + +# +# Copyright 2015, Daehwan Kim +# +# This file is part of HISAT 2. +# +# HISAT 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# HISAT 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HISAT 2. If not, see . +# + +# hisat-3n: +# +# A wrapper script for hisat-3n. Provides various advantages over running +# hisat directly, including: +# +# 1. Handling compressed inputs +# 2. Redirecting output to various files +# 3. Output directly to bam (not currently supported) + +use strict; +use warnings; +use Getopt::Long qw(GetOptions); +use File::Spec; +use POSIX; + + +my ($vol,$script_path,$prog); +$prog = File::Spec->rel2abs( __FILE__ ); + +while (-f $prog && -l $prog){ + my (undef, $dir, undef) = File::Spec->splitpath($prog); + $prog = File::Spec->rel2abs(readlink($prog), $dir); +} + +($vol,$script_path,$prog) + = File::Spec->splitpath($prog); +my $os_is_nix = ($^O eq "linux") || ($^O eq "darwin"); +my $align_bin_s = $os_is_nix ? 'hisat2-align-s' : 'hisat2-align-s.exe'; +my $build_bin = $os_is_nix ? 'hisat2-build' : 'hisat2-build.exe'; +my $align_bin_l = $os_is_nix ? 'hisat2-align-l' : 'hisat2-align-l.exe'; +my $align_prog_s= File::Spec->catpath($vol,$script_path,$align_bin_s); +my $align_prog_l= File::Spec->catpath($vol,$script_path,$align_bin_l); +my $align_prog = $align_prog_s; +my $read_stat_prog = File::Spec->catpath($vol,$script_path,"hisat2_read_statistics.py"); +my $idx_ext_l = 'ht2l'; +my $idx_ext_s = 'ht2'; +my $idx_ext = $idx_ext_s; +my $seq_in_args = 0; +my $skip_read_stat = 0; +my %signo = (); +my @signame = (); +my $basechange_pair = 'CT'; + +# HISAT-3N basechange type +my %conversion_type = ( + "AT" => 0, "AC" => 0, "CG" => 0, "CT" => 0, "GA" => 0, "TG" => 0, + # need to reverse From,To bases + "CA" => 1, "GC" => 1, "TA" => 1, "TC" => 1, "AG" => 1, "GT" => 1, + ); + +my %convertion_rc = ( + "AT" => "TA", "AC" => "TG", "CG" => "GC", + "CT" => "GA", "GA" => "CT", "TG" => "AC", + ); + +sub getConversion { + my $from = uc $_[0]; + my $to = uc $_[1]; + my $conv_pair = $from.$to; + + my $val = $conversion_type{$conv_pair}; + + if (defined $val) { + if ($val) { + $conv_pair = $to.$from; + } + } else { + return undef; + } + + return $conv_pair; +} + +{ + # Get signal info + use Config; + my $i = 0; + for my $name (split(' ', $Config{sig_name})) { + $signo{$name} = $i; + $signame[$i] = $name; + $i++; + } +} + +(-x "$align_prog") || + Fail("Expected hisat2 to be in same directory with hisat2-align-s and hisat2-align-l:\n$script_path\n"); + +(-x "$read_stat_prog") || ($skip_read_stat = 1); + +# Get description of arguments from HISAT so that we can distinguish HISAT +# args from wrapper args +sub getHt2Desc($) { + my $d = shift; + my $cmd = "'$align_prog' --wrapper basic-0 --arg-desc"; + open(my $fh, "$cmd |") || Fail("Failed to run command '$cmd'\n"); + while(readline $fh) { + chomp; + next if /^\s*$/; + my @ts = split(/\t/); + $d->{$ts[0]} = $ts[1]; + } + close($fh); + $? == 0 || Fail("Description of arguments failed!\n"); +} + +my %desc = (); +my %wrapped = ("1" => 1, "2" => 1); +getHt2Desc(\%desc); + +# Given an option like -1, determine whether it's wrapped (i.e. should be +# handled by this script rather than being passed along to HISAT) +sub isWrapped($) { return defined($wrapped{$_[0]}); } + +my @orig_argv = @ARGV; + +my @ht2w_args = (); # options for wrapper +my @ht2_args = (); # options for HISAT +my $saw_dd = 0; +for(0..$#ARGV) { + if($ARGV[$_] eq "--") { + $saw_dd = 1; + next; + } + push @ht2w_args, $ARGV[$_] if !$saw_dd; + push @ht2_args, $ARGV[$_] if $saw_dd; +} +if(!$saw_dd) { + @ht2_args = @ht2w_args; + @ht2w_args= (); +} + +my $debug = 0; +my %read_fns = (); +my %read_compress = (); +my $cap_out = undef; # Filename for passthrough +my $no_unal = 0; +my $large_idx = 0; +# Remove whitespace +for my $i (0..$#ht2_args) { + $ht2_args[$i]=~ s/^\s+//; $ht2_args[$i] =~ s/\s+$//; +} + +# We've handled arguments that the user has explicitly directed either to the +# wrapper or to hisat, now we capture some of the hisat arguments that +# ought to be handled in the wrapper +for(my $i = 0; $i < scalar(@ht2_args); $i++) { + next unless defined($ht2_args[$i]); + my $arg = $ht2_args[$i]; + my @args = split(/=/, $arg); + if(scalar(@args) > 2) { + $args[1] = join("=", @args[1..$#args]); + } + $arg = $args[0]; + if($arg eq "-U" || $arg eq "--unpaired") { + $ht2_args[$i] = undef; + $arg =~ s/^-U//; $arg =~ s/^--unpaired//; + if($arg ne "") { + # Argument was part of this token + my @args = split(/,/, $arg); + for my $a (@args) { push @ht2w_args, ("-U", $a); } + } else { + # Argument is in the next token + $i < scalar(@ht2_args)-1 || Fail("Argument expected in next token!\n"); + $i++; + my @args = split(/,/, $ht2_args[$i]); + for my $a (@args) { push @ht2w_args, ("-U", $a); } + $ht2_args[$i] = undef; + } + } + if($arg =~ /^--?([12])/ && $arg !~ /^--?12/) { + my $mate = $1; + $ht2_args[$i] = undef; + $arg =~ s/^--?[12]//; + if($arg ne "") { + # Argument was part of this token + my @args = split(/,/, $arg); + for my $a (@args) { push @ht2w_args, ("-$mate", $a); } + } else { + # Argument is in the next token + $i < scalar(@ht2_args)-1 || Fail("Argument expected in next token!\n"); + $i++; + my @args = split(/,/, $ht2_args[$i]); + for my $a (@args) { push @ht2w_args, ("-$mate", $a); } + $ht2_args[$i] = undef; + } + } + if($arg eq "--debug") { + $debug = 1; + $ht2_args[$i] = undef; + } + if($arg eq "--no-unal") { + $no_unal = 1; + $ht2_args[$i] = undef; + } + if($arg eq "--large-index") { + $large_idx = 1; + $ht2_args[$i] = undef; + } + if($arg eq "--skip-read-lengths") { + $skip_read_stat = 1; + $ht2_args[$i] = undef + } + if($arg eq "-c") { + $seq_in_args = 1; + } + if($arg eq "--base-change") { + my $base_args = ""; + if (scalar @args > 1 && $args[1] ne "") { + $base_args = $args[1]; + } else { + Fail("${arg} option takes an argument.\n") if ($i >= scalar(@ht2_args)-1); + $base_args = $ht2_args[$i+1]; + $i++; + } + my @basechange_opt = split(/,/, $base_args); + + Fail("Invalid --base-change option\n") if (scalar @basechange_opt != 2); + $basechange_pair = getConversion($basechange_opt[0], $basechange_opt[1]); + + Fail("Invalid --base-change option\n") unless defined $basechange_pair; + } + + for my $rarg ("un-conc", "al-conc", "al-conc-disc", "un", "al") { + if($arg =~ /^--${rarg}$/ || $arg =~ /^--${rarg}-gz$/ || $arg =~ /^--${rarg}-bz2$/) { + $ht2_args[$i] = undef; + if(scalar(@args) > 1 && $args[1] ne "") { + $read_fns{$rarg} = $args[1]; + } else { + $i < scalar(@ht2_args)-1 || Fail("--${rarg}* option takes an argument.\n"); + $read_fns{$rarg} = $ht2_args[$i+1]; + $ht2_args[$i+1] = undef; + } + $read_compress{$rarg} = ""; + $read_compress{$rarg} = "gzip" if $arg eq "--${rarg}-gz"; + $read_compress{$rarg} = "bzip2" if $arg eq "--${rarg}-bz2"; + last; + } + } +} + +# If the user asked us to redirect some reads to files, or to suppress +# unaligned reads, then we need to capture the output from HISAT and pass it +# through this wrapper. +my $passthru = 0; +if(scalar(keys %read_fns) > 0 || $no_unal) { + $passthru = 1; + push @ht2_args, "--passthrough"; + $cap_out = "-"; + for(my $i = 0; $i < scalar(@ht2_args); $i++) { + next unless defined($ht2_args[$i]); + my $arg = $ht2_args[$i]; + if($arg eq "-S" || $arg eq "--output") { + $i < scalar(@ht2_args)-1 || Fail("-S/--output takes an argument.\n"); + $cap_out = $ht2_args[$i+1]; + $ht2_args[$i] = undef; + $ht2_args[$i+1] = undef; + } + } +} +my @tmp = (); +for (@ht2_args) { push(@tmp, $_) if defined($_); } +@ht2_args = @tmp; + +my @unps = (); +my @mate1s = (); +my @mate2s = (); +my @to_delete = (); +my @to_kills = (); +my $temp_dir = "/tmp"; +my $bam_out = 0; +my $ref_str = undef; +my $no_pipes = 0; +my $keep = 0; +my $verbose = 0; +my $readpipe = undef; +my $log_fName = undef; + +my @ht2w_args_cp = (@ht2w_args>0) ? @ht2w_args : @ht2_args; +Getopt::Long::Configure("pass_through","no_ignore_case"); + +my @old_ARGV = @ARGV; +@ARGV = @ht2w_args_cp; + +GetOptions( + "1=s" => \@mate1s, + "2=s" => \@mate2s, + "reads|U=s" => \@unps, + "temp-directory=s" => \$temp_dir, + "bam" => \$bam_out, + "no-named-pipes" => \$no_pipes, + "ref-string|reference-string=s" => \$ref_str, + "keep" => \$keep, + "verbose" => \$verbose, + "log-file=s" => \$log_fName +); + +@ARGV = @old_ARGV; + +my $old_stderr; + +if ($log_fName) { + open($old_stderr, ">&STDERR") or Fail("Cannot dup STDERR!\n"); + open(STDERR, ">", $log_fName) or Fail("Cannot redirect to log file $log_fName.\n"); +} + +Info("Before arg handling:\n"); +Info(" Wrapper args:\n[ @ht2w_args ]\n"); +Info(" Binary args:\n[ @ht2_args ]\n"); + +# check read lengths +# if read_files have more than 1 files, use first one, +my @read_files = (scalar(@unps) > 0) ? @unps : @mate1s; +if ((scalar(@read_files) > 0) + && ($seq_in_args == 0) + && ($skip_read_stat == 0)) { + Info("Check read length: $read_files[0]\n"); + my $cmd = "'$read_stat_prog' $read_files[0]"; + my $read_len_str = ""; + + open(my $fh, "$cmd |") || Fail("Failed to run command '$cmd'\n"); + while(readline $fh) { + chomp; + next if /^\s*$/; + my @ts = split(/ /); + if (scalar(@ts) > 4) { + $read_len_str = $ts[4]; + } else { + $read_len_str = ""; + } + } + close($fh); + + if (($read_len_str ne "") && ($read_len_str ne "0")) { + Info("Read Length String: $read_len_str\n"); + push @ht2_args, ("--read-lengths", $read_len_str); + } +} + +sub check_file_exist($$$) { + my ($unps, $mate1s, $mate2s) = @_; + for my $fn (@$unps, @$mate1s, @$mate2s) { + if (not -f $fn) { + Fail("Read file '%s' doesn't exist\n", $fn); + return 1; + } + } + return 0; +} + +sub cat_file($$) { + my ($ifn, $ofh) = @_; + my $ifh = undef; + if($ifn =~ /\.gz$/) { + open($ifh, "gzip -dc $ifn |") || + Fail("Could not open gzipped read file: $ifn \n"); + } elsif($ifn =~ /\.bz2/) { + open($ifh, "bzip2 -dc $ifn |") || + Fail("Could not open bzip2ed read file: $ifn \n"); + } else { + open($ifh, $ifn) || Fail("Could not open read file: $ifn \n"); + } + while(readline $ifh) { print {$ofh} $_; } + close($ifh); +} + +# Return non-zero if and only if the input should be wrapped (i.e. because +# it's compressed). +sub wrapInput($$$) { + my ($unps, $mate1s, $mate2s) = @_; + for my $fn (@$unps, @$mate1s, @$mate2s) { + return 1 if $fn =~ /\.gz$/ || $fn =~ /\.bz2$/; + } + return 0; +} + +sub Info { + if ($verbose) { + print STDERR "(INFO): " ,@_; + } +} + +sub Error { + my @msg = @_; + $msg[0] = "(ERR): ".$msg[0]; + printf STDERR @msg; +} + +sub Fail { + Error(@_); + die("Exiting now ...\n"); +} + +sub Extract_IndexName_From { + my $index_opt = $ref_str ? '--index' : '-x'; + for (my $i=0; $i<@_; $i++) { + if ($_[$i] eq $index_opt){ + my $idx_basename = $_[$i+1]; + my @idx_filenames = glob($idx_basename . "*.ht2{,l}"); + unless(@idx_filenames) { + if(exists $ENV{"HISAT2_INDEXES"}) { + @idx_filenames = glob("$ENV{'HISAT2_INDEXES'}/$idx_basename" . "ht2{,l}"); + } + + if(!@idx_filenames) { + Fail("\"" . $idx_basename . "\" does not exist\n"); + } + $idx_basename = "$ENV{'HISAT2_INDEXES'}/$idx_basename" + } + + return $idx_basename; + } + } + Info("Cannot find any index option (--reference-string, --ref-string or -x) in the given command line.\n"); +} + +sub Get_IndexFileName { + my $isLargeIndex = $_[0]; + my $IndexPrefix = $_[1]; + my $IndexFileName = "${IndexPrefix}.3n.1.1."; + + if ($isLargeIndex) { + $IndexFileName = $IndexFileName.$idx_ext_l; + } else { + $IndexFileName = $IndexFileName.$idx_ext_s; + } + + return $IndexFileName; +} + +if($seq_in_args == 0) { + check_file_exist(\@unps, \@mate1s, \@mate2s); +} + +if(wrapInput(\@unps, \@mate1s, \@mate2s)) { + if(scalar(@mate2s) > 0) { + # + # Wrap paired-end inputs + # + # Put reads into temporary files or fork off processes to feed named pipes + scalar(@mate2s) == scalar(@mate1s) || + Fail("Different number of files specified with --reads/-1 as with -2\n"); + # Make a named pipe for delivering mate #1s + my $m1fn = "$temp_dir/$$.inpipe1"; + push @to_delete, $m1fn; + push @ht2_args, "-1 $m1fn"; + # Create named pipe 1 for writing + if(!$no_pipes) { + mkfifo($m1fn, 0700) || Fail("mkfifo($m1fn) failed.\n"); + } + my $pid = 0; + $pid = fork() unless $no_pipes; + push @to_kills, $pid unless $no_pipes; + if($pid == 0) { + # Open named pipe 1 for writing + open(my $ofh, ">$m1fn") || Fail("Can't open '$m1fn' for writing\n"); + for my $ifn (@mate1s) { cat_file($ifn, $ofh); } + close($ofh); + exit 0 unless $no_pipes; + } + # Make a named pipe for delivering mate #2s + my $m2fn = "$temp_dir/$$.inpipe2"; + push @to_delete, $m2fn; + push @ht2_args, "-2 $m2fn"; + # Create named pipe 2 for writing + if(!$no_pipes) { + mkfifo($m2fn, 0700) || Fail("mkfifo($m2fn) failed.\n"); + } + $pid = 0; + $pid = fork() unless $no_pipes; + push @to_kills, $pid unless $no_pipes; + if($pid == 0) { + # Open named pipe 2 for writing + open(my $ofh, ">$m2fn") || Fail("Can't open '$m2fn' for writing.\n"); + for my $ifn (@mate2s) { cat_file($ifn, $ofh); } + close($ofh); + exit 0 unless $no_pipes; + } + } + if(scalar(@unps) > 0) { + # + # Wrap unpaired inputs. + # + # Make a named pipe for delivering unpaired reads + my $ufn = "$temp_dir/$$.unp"; + push @to_delete, $ufn; + push @ht2_args, "-U $ufn"; + # Create named pipe 2 for writing + if(!$no_pipes) { + mkfifo($ufn, 0700) || Fail("mkfifo($ufn) failed.\n"); + } + my $pid = 0; + $pid = fork() unless $no_pipes; + if($pid == 0) { + # Open named pipe 2 for writing + open(my $ofh, ">$ufn") || Fail("Can't open '$ufn' for writing.\n"); + for my $ifn (@unps) { cat_file($ifn, $ofh); } + close($ofh); + exit 0 unless $no_pipes; + } + } +} else { + if(scalar(@mate2s) > 0) { + # Just pass all the mate arguments along to the binary + push @ht2_args, ("-1", join(",", @mate1s)); + push @ht2_args, ("-2", join(",", @mate2s)); + } + if(scalar(@unps) > 0) { + push @ht2_args, ("-U", join(",", @unps)); + } +} + +if(defined($ref_str)) { + my $ofn = "$temp_dir/$$.ref_str.fa"; + open(my $ofh, ">$ofn") || + Fail("could not open temporary fasta file '$ofn' for writing.\n"); + print {$ofh} ">1\n$ref_str\n"; + close($ofh); + push @to_delete, $ofn; + system("$build_bin $ofn $ofn") == 0 || + Fail("hisat2-build returned non-0 exit level.\n"); + push @ht2_args, ("--index", "$ofn"); + push @to_delete, ("$ofn.1.".$idx_ext, "$ofn.2.".$idx_ext, + "$ofn.3.".$idx_ext, "$ofn.4.".$idx_ext, + "$ofn.5.".$idx_ext, "$ofn.6.".$idx_ext, + "$ofn.7.".$idx_ext, "$ofn.8.".$idx_ext); +} + +Info("After arg handling:\n"); +Info(" Binary args:\n[ @ht2_args ]\n"); + +sub find_hisat3n_index { + my $isLargeIndex = $_[0]; + my $IndexName = $_[1]; + + my $basepair = $basechange_pair; + my $rc_basepair = $convertion_rc{$basechange_pair}; + + my @IndexNames = ( + $IndexName.".3n.$basepair.1", + $IndexName.".3n.$rc_basepair.1" + ); + + my @IndexFullName = (); + if ($isLargeIndex) { + for (@IndexNames) { + push @IndexFullName, $_.".$idx_ext_l"; + } + } else { + for (@IndexNames) { + push @IndexFullName, $_.".$idx_ext_s"; + } + } + + if (-f $IndexFullName[0] && -f $IndexFullName[1]) { + return @IndexFullName; + } + + # find old-name-format index file + @IndexNames = ( + $IndexName.".3n.1.1", + $IndexName.".3n.2.1" + ); + @IndexFullName = (); + if ($isLargeIndex) { + for (@IndexNames) { + push @IndexFullName, $_.".$idx_ext_l"; + } + } else { + for (@IndexNames) { + push @IndexFullName, $_.".$idx_ext_s"; + } + } + + if (-f $IndexFullName[0] && -f $IndexFullName[1]) { + return @IndexFullName; + } + + # empty array + return (); +} + +my $index_name = Extract_IndexName_From(@ht2_args); +my $def_index_filename_s = $index_name.".3n.$basechange_pair.1.$idx_ext_s"; +my $def_index_filename_l = $index_name.".3n.$basechange_pair.1.$idx_ext_l"; + +if ($large_idx) { + Info("Using a large index enforced by user.\n"); + $align_prog = $align_prog_l; + $idx_ext = $idx_ext_l; + my @IndexNames = find_hisat3n_index($large_idx, $index_name); + if (scalar @IndexNames == 0) { + Fail("Cannot find the large index $def_index_filename_l\n"); + } + Info("Using large index ($IndexNames[0]).\n"); +} +else { + my @LargeIndexNames = find_hisat3n_index(1, $index_name); + my @SmallIndexNames = find_hisat3n_index(0, $index_name); + + if ((scalar @LargeIndexNames > 0) && (scalar @SmallIndexNames == 0)) { + Info("Cannot find a small index but a large one seems to be present.\n"); + Info("Switching to using the large index ($LargeIndexNames[0]).\n"); + $align_prog = $align_prog_l; + $idx_ext = $idx_ext_l; + } + else { + Info("Using the small index (${def_index_filename_s}).\n") + } +} + +my $debug_str = ($debug ? "-debug" : ""); +push @ht2_args, "--3N"; +# Construct command invoking hisat2-align +my $cmd = "'$align_prog$debug_str' --wrapper basic-0 ".join(" ", @ht2_args); + +# Possibly add read input on an anonymous pipe +$cmd = "$readpipe $cmd" if defined($readpipe); + +Info("$cmd\n"); +my $ret; +if(defined($cap_out)) { + # Open HISAT2 pipe + open(HT, "$cmd |") || Fail("Could not open HISAT2 pipe: '$cmd |'\n"); + # Open output pipe + my $ofh = *STDOUT; + my @fhs_to_close = (); + if($cap_out ne "-") { + open($ofh, ">$cap_out") || + Fail("Could not open output file '$cap_out' for writing.\n"); + } + my %read_fhs = (); + for my $i ("al", "un", "al-conc", "al-conc-disc", "un-conc") { + if(defined($read_fns{$i})) { + my ($vol, $base_spec_dir, $base_fname) = File::Spec->splitpath($read_fns{$i}); + if (-d $read_fns{$i}) { + $base_spec_dir = $read_fns{$i}; + $base_fname = undef; + } + if($i =~ /-conc$/ || $i =~ /-conc-disc$/) { + # Open 2 output files, one for mate 1, one for mate 2 + my ($fn1, $fn2); + if ($base_fname) { + ($fn1, $fn2) = ($base_fname,$base_fname); + } + else { + ($fn1, $fn2) = ($i.'-mate',$i.'-mate'); + } + if($fn1 =~ /%/) { + $fn1 =~ s/%/1/g; $fn2 =~ s/%/2/g; + } elsif($fn1 =~ /\.[^.]*$/) { + $fn1 =~ s/\.([^.]*)$/.1.$1/; + $fn2 =~ s/\.([^.]*)$/.2.$1/; + } else { + $fn1 .= ".1"; + $fn2 .= ".2"; + } + $fn1 = File::Spec->catpath($vol,$base_spec_dir,$fn1); + $fn2 = File::Spec->catpath($vol,$base_spec_dir,$fn2); + $fn1 ne $fn2 || Fail("$fn1\n$fn2\n"); + my ($redir1, $redir2) = (">$fn1", ">$fn2"); + $redir1 = "| gzip -c $redir1" if $read_compress{$i} eq "gzip"; + $redir1 = "| bzip2 -c $redir1" if $read_compress{$i} eq "bzip2"; + $redir2 = "| gzip -c $redir2" if $read_compress{$i} eq "gzip"; + $redir2 = "| bzip2 -c $redir2" if $read_compress{$i} eq "bzip2"; + open($read_fhs{$i}{1}, $redir1) || Fail("Could not open --$i mate-1 output file '$fn1'\n"); + open($read_fhs{$i}{2}, $redir2) || Fail("Could not open --$i mate-2 output file '$fn2'\n"); + push @fhs_to_close, $read_fhs{$i}{1}; + push @fhs_to_close, $read_fhs{$i}{2}; + } else { + my $redir = ">".File::Spec->catpath($vol,$base_spec_dir,$i."-seqs"); + if ($base_fname) { + $redir = ">$read_fns{$i}"; + } + $redir = "| gzip -c $redir" if $read_compress{$i} eq "gzip"; + $redir = "| bzip2 -c $redir" if $read_compress{$i} eq "bzip2"; + open($read_fhs{$i}, $redir) || Fail("Could not open --$i output file '$read_fns{$i}'\n"); + push @fhs_to_close, $read_fhs{$i}; + } + } + } + while() { + chomp; + my $filt = 0; + unless(substr($_, 0, 1) eq "@") { + # If we are supposed to output certain reads to files... + my $tab1_i = index($_, "\t") + 1; + my $tab2_i = index($_, "\t", $tab1_i); + my $fl = substr($_, $tab1_i, $tab2_i - $tab1_i); + my $unal = ($fl & 4) != 0; + my $secondary = ($fl & 256) != 0; + $filt = 1 if $no_unal && $unal; + if($passthru) { + if(scalar(keys %read_fhs) == 0) { + # Next line is read with some whitespace escaped + my $l = ; + } else { + my $mate1 = (($fl & 64) != 0); + my $mate2 = (($fl & 128) != 0); + my $unp = !$mate1 && !$mate2; + my $pair = !$unp; + # Next line is read with some whitespace escaped + my $l = ; + chomp($l); + $l =~ s/%(..)/chr(hex($1))/eg; + if((defined($read_fhs{un}) || defined($read_fhs{al})) && $unp && !$secondary) { + if($unal) { + # Failed to align + print {$read_fhs{un}} $l if defined($read_fhs{un}); + } else { + # Aligned + print {$read_fhs{al}} $l if defined($read_fhs{al}); + } + } + if((defined($read_fhs{"un-conc"}) || defined($read_fhs{"al-conc"}) || defined($read_fhs{"al-conc-disc"})) && $pair && !$secondary) { + my $conc = (($fl & 2) != 0); + my $conc_disc = ($fl & 4) == 0 || ($fl & 8) == 0; + if($conc && $mate1) { + print {$read_fhs{"al-conc"}{1}} $l if defined($read_fhs{"al-conc"}); + } elsif($conc && $mate2) { + print {$read_fhs{"al-conc"}{2}} $l if defined($read_fhs{"al-conc"}); + } elsif(!$conc && $mate1) { + print {$read_fhs{"un-conc"}{1}} $l if defined($read_fhs{"un-conc"}); + } elsif(!$conc && $mate2) { + print {$read_fhs{"un-conc"}{2}} $l if defined($read_fhs{"un-conc"}); + } + if($conc_disc && $mate1) { + print {$read_fhs{"al-conc-disc"}{1}} $l if defined($read_fhs{"al-conc-disc"}); + } elsif($conc_disc && $mate2) { + print {$read_fhs{"al-conc-disc"}{2}} $l if defined($read_fhs{"al-conc-disc"}); + } + } + } + } + } + print {$ofh} "$_\n" if !$filt; + } + for my $k (@fhs_to_close) { close($k); } + close($ofh); + close(HT); + $ret = $?; +} else { + $ret = system($cmd); +} +kill 'TERM', @to_kills; + +if(!$keep) { for(@to_delete) { unlink($_); } } + +if ($ret == -1) { + Error("Failed to execute hisat2-align: $!\n"); + exit 1; +} elsif ($ret & 127) { + my $signm = "(unknown)"; + $signm = $signame[$ret & 127] if defined($signame[$ret & 127]); + my $ad = ""; + $ad = "(core dumped)" if (($ret & 128) != 0); + Error("hisat2-align died with signal %d (%s) $ad\n", ($ret & 127), $signm); + exit 1; +} elsif($ret != 0) { + Error("hisat2-align exited with value %d\n", ($ret >> 8)); +} +exit ($ret >> 8); diff --git a/hisat-3n-build b/hisat-3n-build new file mode 100644 index 0000000..54f511e --- /dev/null +++ b/hisat-3n-build @@ -0,0 +1,148 @@ +#!/usr/bin/env python + +""" + Copyright 2015, Daehwan Kim + + This file is part of HISAT 2. + + HISAT 2 is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + HISAT 2 is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with HISAT 2. If not, see . +""" + + +import os +import sys +import inspect +import logging +import re + + + +def build_args(): + """ + Parse the wrapper arguments. Returns the options, tuple. + """ + + parsed_args = {} + to_remove = [] + argv = sys.argv[:] + for i, arg in enumerate(argv): + if arg == '--large-index': + parsed_args[arg] = "" + to_remove.append(i) + elif arg == '--debug': + parsed_args[arg] = "" + to_remove.append(i) + elif arg == '--verbose': + parsed_args[arg] = "" + to_remove.append(i) + + for i in reversed(to_remove): + del argv[i] + + return parsed_args, argv + + +def main(): + logging.basicConfig(level=logging.ERROR, + format='%(levelname)s: %(message)s' + ) + delta = 200 + small_index_max_size= 4 * 1024**3 - delta + build_bin_name = "hisat2-build" + build_bin_s = "hisat2-build-s" + build_bin_l = "hisat2-build-l" + curr_script = os.path.realpath(inspect.getsourcefile(main)) + ex_path = os.path.dirname(curr_script) + build_bin_spec = os.path.join(ex_path,build_bin_s) + #repeat_bin_spec = os.path.join(ex_path,"hisat2-repeat") + + options, argv = build_args() + + if '--verbose' in options: + logging.getLogger().setLevel(logging.INFO) + + if '--debug' in options: + build_bin_spec += '-debug' + build_bin_l += '-debug' + + if '--large-index' in options: + build_bin_spec = os.path.join(ex_path,build_bin_l) + elif len(argv) >= 2: + ref_fnames = argv[-2] + tot_size = 0 + for fn in ref_fnames.split(','): + if os.path.exists(fn): + statinfo = os.stat(fn) + tot_size += statinfo.st_size + if tot_size > small_index_max_size: + build_bin_spec = os.path.join(ex_path, build_bin_l) + + + if "--repeat-index" in argv: + #build repeat index first + if argv[1].startswith('-'): + outputName = argv[-1] + else: + outputName = argv[2] + nThread = "1" + repeatLength = "100-300" + repeatCount = "5" + referenceName = "" + base_change = "" + removeRepeatLength = False + for i in range(len(argv)): + if argv[i] == "--base-change": + base_change = argv[i+1] + if argv[i] == "-p": + nThread = argv[i+1] + elif argv[i] == "-f": + referenceName = argv[i+1] + elif argv[i] == "--repeat-index": + if ("--repeat-index" != argv[-1]): + match = re.match(r'\d+-\d+', argv[i+1]) + if match: + repeatLength = argv[i+1] + removeRepeatLength = True + if removeRepeatLength: + argv.remove(repeatLength) + if len(referenceName) == 0: + if argv[1].startswith('-'): + referenceName = argv[-2] + else: + referenceName = argv[1] + cwd = sys.path[0] + + repeatArgv = [] + repeatArgv += [cwd + "/hisat2-repeat"] + repeatArgv += ["-p", nThread] + repeatArgv += ["--repeat-length", repeatLength] + repeatArgv += ["--repeat-count", repeatCount] + repeatArgv += ["--base-change", base_change] + repeatArgv += ["--3N"] + repeatArgv += [referenceName, outputName] + try: + os.system(" ".join(repeatArgv)) + except ValueError: + print("Can not automatically generate repeat database for HISAT-3N. Please manually generate repeat database by using hisat2-repeat.") + + argv.append("--3N") + + argv[0] = build_bin_name + argv.insert(1, 'basic-0') + argv.insert(1, '--wrapper') + logging.info('Command: %s %s' % (build_bin_spec, ' '.join(argv[1:]))) + os.execv(build_bin_spec, argv) + +if __name__ == "__main__": + main() diff --git a/hisat2 b/hisat2 new file mode 100644 index 0000000..0f128b9 --- /dev/null +++ b/hisat2 @@ -0,0 +1,665 @@ +#!/usr/bin/env perl + +# +# Copyright 2015, Daehwan Kim +# +# This file is part of HISAT 2. +# +# HISAT 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# HISAT 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HISAT 2. If not, see . +# + +# hisat2: +# +# A wrapper script for hisat2. Provides various advantages over running +# hisat directly, including: +# +# 1. Handling compressed inputs +# 2. Redirecting output to various files +# 3. Output directly to bam (not currently supported) + +use strict; +use warnings; +use Getopt::Long qw(GetOptions); +use File::Spec; +use POSIX; + + +my ($vol,$script_path,$prog); +$prog = File::Spec->rel2abs( __FILE__ ); + +while (-f $prog && -l $prog){ + my (undef, $dir, undef) = File::Spec->splitpath($prog); + $prog = File::Spec->rel2abs(readlink($prog), $dir); +} + +($vol,$script_path,$prog) + = File::Spec->splitpath($prog); +my $os_is_nix = ($^O eq "linux") || ($^O eq "darwin"); +my $align_bin_s = $os_is_nix ? 'hisat2-align-s' : 'hisat2-align-s.exe'; +my $build_bin = $os_is_nix ? 'hisat2-build' : 'hisat2-build.exe'; +my $align_bin_l = $os_is_nix ? 'hisat2-align-l' : 'hisat2-align-l.exe'; +my $align_prog_s= File::Spec->catpath($vol,$script_path,$align_bin_s); +my $align_prog_l= File::Spec->catpath($vol,$script_path,$align_bin_l); +my $align_prog = $align_prog_s; +my $read_stat_prog = File::Spec->catpath($vol,$script_path,"hisat2_read_statistics.py"); +my $idx_ext_l = 'ht2l'; +my $idx_ext_s = 'ht2'; +my $idx_ext = $idx_ext_s; +my $seq_in_args = 0; +my $skip_read_stat = 0; +my %signo = (); +my @signame = (); + +{ + # Get signal info + use Config; + my $i = 0; + for my $name (split(' ', $Config{sig_name})) { + $signo{$name} = $i; + $signame[$i] = $name; + $i++; + } +} + +(-x "$align_prog") || + Fail("Expected hisat2 to be in same directory with hisat2-align-s and hisat2-align-l:\n$script_path\n"); + +(-x "$read_stat_prog") || ($skip_read_stat = 1); + +# Get description of arguments from HISAT so that we can distinguish HISAT +# args from wrapper args +sub getHt2Desc($) { + my $d = shift; + my $cmd = "'$align_prog' --wrapper basic-0 --arg-desc"; + open(my $fh, "$cmd |") || Fail("Failed to run command '$cmd'\n"); + while(readline $fh) { + chomp; + next if /^\s*$/; + my @ts = split(/\t/); + $d->{$ts[0]} = $ts[1]; + } + close($fh); + $? == 0 || Fail("Description of arguments failed!\n"); +} + +my %desc = (); +my %wrapped = ("1" => 1, "2" => 1); +getHt2Desc(\%desc); + +# Given an option like -1, determine whether it's wrapped (i.e. should be +# handled by this script rather than being passed along to HISAT) +sub isWrapped($) { return defined($wrapped{$_[0]}); } + +my @orig_argv = @ARGV; + +my @ht2w_args = (); # options for wrapper +my @ht2_args = (); # options for HISAT +my $saw_dd = 0; +for(0..$#ARGV) { + if($ARGV[$_] eq "--") { + $saw_dd = 1; + next; + } + push @ht2w_args, $ARGV[$_] if !$saw_dd; + push @ht2_args, $ARGV[$_] if $saw_dd; +} +if(!$saw_dd) { + @ht2_args = @ht2w_args; + @ht2w_args= (); +} + +my $debug = 0; +my %read_fns = (); +my %read_compress = (); +my $cap_out = undef; # Filename for passthrough +my $no_unal = 0; +my $large_idx = 0; +# Remove whitespace +for my $i (0..$#ht2_args) { + $ht2_args[$i]=~ s/^\s+//; $ht2_args[$i] =~ s/\s+$//; +} + +# We've handled arguments that the user has explicitly directed either to the +# wrapper or to hisat, now we capture some of the hisat arguments that +# ought to be handled in the wrapper +for(my $i = 0; $i < scalar(@ht2_args); $i++) { + next unless defined($ht2_args[$i]); + my $arg = $ht2_args[$i]; + my @args = split(/=/, $arg); + if(scalar(@args) > 2) { + $args[1] = join("=", @args[1..$#args]); + } + $arg = $args[0]; + if($arg eq "-U" || $arg eq "--unpaired") { + $ht2_args[$i] = undef; + $arg =~ s/^-U//; $arg =~ s/^--unpaired//; + if($arg ne "") { + # Argument was part of this token + my @args = split(/,/, $arg); + for my $a (@args) { push @ht2w_args, ("-U", $a); } + } else { + # Argument is in the next token + $i < scalar(@ht2_args)-1 || Fail("Argument expected in next token!\n"); + $i++; + my @args = split(/,/, $ht2_args[$i]); + for my $a (@args) { push @ht2w_args, ("-U", $a); } + $ht2_args[$i] = undef; + } + } + if($arg =~ /^--?([12])/ && $arg !~ /^--?12/) { + my $mate = $1; + $ht2_args[$i] = undef; + $arg =~ s/^--?[12]//; + if($arg ne "") { + # Argument was part of this token + my @args = split(/,/, $arg); + for my $a (@args) { push @ht2w_args, ("-$mate", $a); } + } else { + # Argument is in the next token + $i < scalar(@ht2_args)-1 || Fail("Argument expected in next token!\n"); + $i++; + my @args = split(/,/, $ht2_args[$i]); + for my $a (@args) { push @ht2w_args, ("-$mate", $a); } + $ht2_args[$i] = undef; + } + } + if($arg eq "--debug") { + $debug = 1; + $ht2_args[$i] = undef; + } + if($arg eq "--no-unal") { + $no_unal = 1; + $ht2_args[$i] = undef; + } + if($arg eq "--large-index") { + $large_idx = 1; + $ht2_args[$i] = undef; + } + if($arg eq "--skip-read-lengths") { + $skip_read_stat = 1; + $ht2_args[$i] = undef + } + if($arg eq "-c") { + $seq_in_args = 1; + } + for my $rarg ("un-conc", "al-conc", "al-conc-disc", "un", "al") { + if($arg =~ /^--${rarg}$/ || $arg =~ /^--${rarg}-gz$/ || $arg =~ /^--${rarg}-bz2$/) { + $ht2_args[$i] = undef; + if(scalar(@args) > 1 && $args[1] ne "") { + $read_fns{$rarg} = $args[1]; + } else { + $i < scalar(@ht2_args)-1 || Fail("--${rarg}* option takes an argument.\n"); + $read_fns{$rarg} = $ht2_args[$i+1]; + $ht2_args[$i+1] = undef; + } + $read_compress{$rarg} = ""; + $read_compress{$rarg} = "gzip" if $arg eq "--${rarg}-gz"; + $read_compress{$rarg} = "bzip2" if $arg eq "--${rarg}-bz2"; + last; + } + } +} +# If the user asked us to redirect some reads to files, or to suppress +# unaligned reads, then we need to capture the output from HISAT and pass it +# through this wrapper. +my $passthru = 0; +if(scalar(keys %read_fns) > 0 || $no_unal) { + $passthru = 1; + push @ht2_args, "--passthrough"; + $cap_out = "-"; + for(my $i = 0; $i < scalar(@ht2_args); $i++) { + next unless defined($ht2_args[$i]); + my $arg = $ht2_args[$i]; + if($arg eq "-S" || $arg eq "--output") { + $i < scalar(@ht2_args)-1 || Fail("-S/--output takes an argument.\n"); + $cap_out = $ht2_args[$i+1]; + $ht2_args[$i] = undef; + $ht2_args[$i+1] = undef; + } + } +} +my @tmp = (); +for (@ht2_args) { push(@tmp, $_) if defined($_); } +@ht2_args = @tmp; + +my @unps = (); +my @mate1s = (); +my @mate2s = (); +my @to_delete = (); +my @to_kills = (); +my $temp_dir = "/tmp"; +my $bam_out = 0; +my $ref_str = undef; +my $no_pipes = 0; +my $keep = 0; +my $verbose = 0; +my $readpipe = undef; +my $log_fName = undef; +my $help = 0; + +my @ht2w_args_cp = (@ht2w_args>0) ? @ht2w_args : @ht2_args; +Getopt::Long::Configure("pass_through","no_ignore_case"); + +my @old_ARGV = @ARGV; +@ARGV = @ht2w_args_cp; + +GetOptions( + "1=s" => \@mate1s, + "2=s" => \@mate2s, + "reads|U=s" => \@unps, + "temp-directory=s" => \$temp_dir, + "bam" => \$bam_out, + "no-named-pipes" => \$no_pipes, + "ref-string|reference-string=s" => \$ref_str, + "keep" => \$keep, + "verbose" => \$verbose, + "log-file=s" => \$log_fName, + "help|h" => \$help +); + +@ARGV = @old_ARGV; + +my $old_stderr; + +if ($log_fName) { + open($old_stderr, ">&STDERR") or Fail("Cannot dup STDERR!\n"); + open(STDERR, ">", $log_fName) or Fail("Cannot redirect to log file $log_fName.\n"); +} + +Info("Before arg handling:\n"); +Info(" Wrapper args:\n[ @ht2w_args ]\n"); +Info(" Binary args:\n[ @ht2_args ]\n"); + +# check read lengths +# if read_files have more than 1 files, use first one, +my @read_files = (scalar(@unps) > 0) ? @unps : @mate1s; +if ((scalar(@read_files) > 0) + && ($seq_in_args == 0) + && ($skip_read_stat == 0)) { + Info("Check read length: $read_files[0]\n"); + my $cmd = "'$read_stat_prog' $read_files[0]"; + my $read_len_str = ""; + + open(my $fh, "$cmd |") || Fail("Failed to run command '$cmd'\n"); + while(readline $fh) { + chomp; + next if /^\s*$/; + my @ts = split(/ /); + if (scalar(@ts) > 4) { + $read_len_str = $ts[4]; + } else { + $read_len_str = ""; + } + } + close($fh); + + if (($read_len_str ne "") && ($read_len_str ne "0")) { + Info("Read Length String: $read_len_str\n"); + push @ht2_args, ("--read-lengths", $read_len_str); + } +} + +sub check_file_exist($$$) { + my ($unps, $mate1s, $mate2s) = @_; + for my $fn (@$unps, @$mate1s, @$mate2s) { + if (not -f $fn) { + Fail("Read file '%s' doesn't exist\n", $fn); + return 1; + } + } + return 0; +} + +sub cat_file($$) { + my ($ifn, $ofh) = @_; + my $ifh = undef; + if($ifn =~ /\.gz$/) { + open($ifh, "gzip -dc $ifn |") || + Fail("Could not open gzipped read file: $ifn \n"); + } elsif($ifn =~ /\.bz2/) { + open($ifh, "bzip2 -dc $ifn |") || + Fail("Could not open bzip2ed read file: $ifn \n"); + } else { + open($ifh, $ifn) || Fail("Could not open read file: $ifn \n"); + } + while(readline $ifh) { print {$ofh} $_; } + close($ifh); +} + +# Return non-zero if and only if the input should be wrapped (i.e. because +# it's compressed). +sub wrapInput($$$) { + my ($unps, $mate1s, $mate2s) = @_; + for my $fn (@$unps, @$mate1s, @$mate2s) { + return 1 if $fn =~ /\.gz$/ || $fn =~ /\.bz2$/; + } + return 0; +} + +sub Info { + if ($verbose) { + print STDERR "(INFO): " ,@_; + } +} + +sub Error { + my @msg = @_; + $msg[0] = "(ERR): ".$msg[0]; + printf STDERR @msg; +} + +sub Fail { + Error(@_); + die("Exiting now ...\n"); +} + +sub Extract_IndexName_From { + my $index_opt = $ref_str ? '--index' : '-x'; + for (my $i=0; $i<@_; $i++) { + if ($_[$i] eq $index_opt){ + my $idx_basename = $_[$i+1]; + my @idx_filenames = glob($idx_basename . "*.ht2{,l}"); + unless(@idx_filenames) { + if(exists $ENV{"HISAT2_INDEXES"}) { + @idx_filenames = glob("$ENV{'HISAT2_INDEXES'}/$idx_basename" . "ht2{,l}"); + } + + if(!@idx_filenames) { + Fail("\"" . $idx_basename . "\" does not exist\n"); + } + $idx_basename = "$ENV{'HISAT2_INDEXES'}/$idx_basename" + } + + return $idx_basename; + } + } + Info("Cannot find any index option (--reference-string, --ref-string or -x) in the given command line.\n"); +} + +if($seq_in_args == 0) { + check_file_exist(\@unps, \@mate1s, \@mate2s); +} + +if(wrapInput(\@unps, \@mate1s, \@mate2s)) { + if(scalar(@mate2s) > 0) { + # + # Wrap paired-end inputs + # + # Put reads into temporary files or fork off processes to feed named pipes + scalar(@mate2s) == scalar(@mate1s) || + Fail("Different number of files specified with --reads/-1 as with -2\n"); + # Make a named pipe for delivering mate #1s + my $m1fn = "$temp_dir/$$.inpipe1"; + push @to_delete, $m1fn; + push @ht2_args, "-1 $m1fn"; + # Create named pipe 1 for writing + if(!$no_pipes) { + mkfifo($m1fn, 0700) || Fail("mkfifo($m1fn) failed.\n"); + } + my $pid = 0; + $pid = fork() unless $no_pipes; + push @to_kills, $pid unless $no_pipes; + if($pid == 0) { + # Open named pipe 1 for writing + open(my $ofh, ">$m1fn") || Fail("Can't open '$m1fn' for writing\n"); + for my $ifn (@mate1s) { cat_file($ifn, $ofh); } + close($ofh); + exit 0 unless $no_pipes; + } + # Make a named pipe for delivering mate #2s + my $m2fn = "$temp_dir/$$.inpipe2"; + push @to_delete, $m2fn; + push @ht2_args, "-2 $m2fn"; + # Create named pipe 2 for writing + if(!$no_pipes) { + mkfifo($m2fn, 0700) || Fail("mkfifo($m2fn) failed.\n"); + } + $pid = 0; + $pid = fork() unless $no_pipes; + push @to_kills, $pid unless $no_pipes; + if($pid == 0) { + # Open named pipe 2 for writing + open(my $ofh, ">$m2fn") || Fail("Can't open '$m2fn' for writing.\n"); + for my $ifn (@mate2s) { cat_file($ifn, $ofh); } + close($ofh); + exit 0 unless $no_pipes; + } + } + if(scalar(@unps) > 0) { + # + # Wrap unpaired inputs. + # + # Make a named pipe for delivering unpaired reads + my $ufn = "$temp_dir/$$.unp"; + push @to_delete, $ufn; + push @ht2_args, "-U $ufn"; + # Create named pipe 2 for writing + if(!$no_pipes) { + mkfifo($ufn, 0700) || Fail("mkfifo($ufn) failed.\n"); + } + my $pid = 0; + $pid = fork() unless $no_pipes; + if($pid == 0) { + # Open named pipe 2 for writing + open(my $ofh, ">$ufn") || Fail("Can't open '$ufn' for writing.\n"); + for my $ifn (@unps) { cat_file($ifn, $ofh); } + close($ofh); + exit 0 unless $no_pipes; + } + } +} else { + if(scalar(@mate2s) > 0) { + # Just pass all the mate arguments along to the binary + push @ht2_args, ("-1", join(",", @mate1s)); + push @ht2_args, ("-2", join(",", @mate2s)); + } + if(scalar(@unps) > 0) { + push @ht2_args, ("-U", join(",", @unps)); + } +} + +if(defined($ref_str)) { + my $ofn = "$temp_dir/$$.ref_str.fa"; + open(my $ofh, ">$ofn") || + Fail("could not open temporary fasta file '$ofn' for writing.\n"); + print {$ofh} ">1\n$ref_str\n"; + close($ofh); + push @to_delete, $ofn; + system("$build_bin $ofn $ofn") == 0 || + Fail("hisat2-build returned non-0 exit level.\n"); + push @ht2_args, ("--index", "$ofn"); + push @to_delete, ("$ofn.1.".$idx_ext, "$ofn.2.".$idx_ext, + "$ofn.3.".$idx_ext, "$ofn.4.".$idx_ext, + "$ofn.5.".$idx_ext, "$ofn.6.".$idx_ext, + "$ofn.7.".$idx_ext, "$ofn.8.".$idx_ext); +} + +Info("After arg handling:\n"); +Info(" Binary args:\n[ @ht2_args ]\n"); + +my $index_name = Extract_IndexName_From(@ht2_args); + +if ($large_idx) { + Info("Using a large index enforced by user.\n"); + $align_prog = $align_prog_l; + $idx_ext = $idx_ext_l; + if (not -f $index_name.".1.".$idx_ext_l) { + Fail("Cannot find the large index ${index_name}.1.${idx_ext_l}\n"); + } + Info("Using large index (${index_name}.1.${idx_ext_l}).\n"); +} +else { + if ((-f $index_name.".1.".$idx_ext_l) && + (not -f $index_name.".1.".$idx_ext_s)) { + Info("Cannot find a small index but a large one seems to be present.\n"); + Info("Switching to using the large index (${index_name}.1.${idx_ext_l}).\n"); + $align_prog = $align_prog_l; + $idx_ext = $idx_ext_l; + } + else { + Info("Using the small index (${index_name}.1.${idx_ext_s}).\n") + } +} + +my $debug_str = ($debug ? "-debug" : ""); + +# Construct command invoking hisat2-align +my $cmd = "'$align_prog$debug_str' --wrapper basic-0 ".join(" ", @ht2_args); + +# Possibly add read input on an anonymous pipe +$cmd = "$readpipe $cmd" if defined($readpipe); + +Info("$cmd\n"); +my $ret; +if(defined($cap_out)) { + # Open HISAT2 pipe + open(HT, "$cmd |") || Fail("Could not open HISAT2 pipe: '$cmd |'\n"); + # Open output pipe + my $ofh = *STDOUT; + my @fhs_to_close = (); + if($cap_out ne "-") { + open($ofh, ">$cap_out") || + Fail("Could not open output file '$cap_out' for writing.\n"); + } + my %read_fhs = (); + for my $i ("al", "un", "al-conc", "al-conc-disc", "un-conc") { + if(defined($read_fns{$i})) { + my ($vol, $base_spec_dir, $base_fname) = File::Spec->splitpath($read_fns{$i}); + if (-d $read_fns{$i}) { + $base_spec_dir = $read_fns{$i}; + $base_fname = undef; + } + if($i =~ /-conc$/ || $i =~ /-conc-disc$/) { + # Open 2 output files, one for mate 1, one for mate 2 + my ($fn1, $fn2); + if ($base_fname) { + ($fn1, $fn2) = ($base_fname,$base_fname); + } + else { + ($fn1, $fn2) = ($i.'-mate',$i.'-mate'); + } + if($fn1 =~ /%/) { + $fn1 =~ s/%/1/g; $fn2 =~ s/%/2/g; + } elsif($fn1 =~ /\.[^.]*$/) { + $fn1 =~ s/\.([^.]*)$/.1.$1/; + $fn2 =~ s/\.([^.]*)$/.2.$1/; + } else { + $fn1 .= ".1"; + $fn2 .= ".2"; + } + $fn1 = File::Spec->catpath($vol,$base_spec_dir,$fn1); + $fn2 = File::Spec->catpath($vol,$base_spec_dir,$fn2); + $fn1 ne $fn2 || Fail("$fn1\n$fn2\n"); + my ($redir1, $redir2) = (">$fn1", ">$fn2"); + $redir1 = "| gzip -c $redir1" if $read_compress{$i} eq "gzip"; + $redir1 = "| bzip2 -c $redir1" if $read_compress{$i} eq "bzip2"; + $redir2 = "| gzip -c $redir2" if $read_compress{$i} eq "gzip"; + $redir2 = "| bzip2 -c $redir2" if $read_compress{$i} eq "bzip2"; + open($read_fhs{$i}{1}, $redir1) || Fail("Could not open --$i mate-1 output file '$fn1'\n"); + open($read_fhs{$i}{2}, $redir2) || Fail("Could not open --$i mate-2 output file '$fn2'\n"); + push @fhs_to_close, $read_fhs{$i}{1}; + push @fhs_to_close, $read_fhs{$i}{2}; + } else { + my $redir = ">".File::Spec->catpath($vol,$base_spec_dir,$i."-seqs"); + if ($base_fname) { + $redir = ">$read_fns{$i}"; + } + $redir = "| gzip -c $redir" if $read_compress{$i} eq "gzip"; + $redir = "| bzip2 -c $redir" if $read_compress{$i} eq "bzip2"; + open($read_fhs{$i}, $redir) || Fail("Could not open --$i output file '$read_fns{$i}'\n"); + push @fhs_to_close, $read_fhs{$i}; + } + } + } + while() { + chomp; + my $filt = 0; + unless(substr($_, 0, 1) eq "@") { + # If we are supposed to output certain reads to files... + my $tab1_i = index($_, "\t") + 1; + my $tab2_i = index($_, "\t", $tab1_i); + my $fl = substr($_, $tab1_i, $tab2_i - $tab1_i); + my $unal = ($fl & 4) != 0; + my $secondary = ($fl & 256) != 0; + $filt = 1 if $no_unal && $unal; + if($passthru) { + if(scalar(keys %read_fhs) == 0) { + # Next line is read with some whitespace escaped + my $l = ; + } else { + my $mate1 = (($fl & 64) != 0); + my $mate2 = (($fl & 128) != 0); + my $unp = !$mate1 && !$mate2; + my $pair = !$unp; + # Next line is read with some whitespace escaped + my $l = ; + chomp($l); + $l =~ s/%(..)/chr(hex($1))/eg; + if((defined($read_fhs{un}) || defined($read_fhs{al})) && $unp && !$secondary) { + if($unal) { + # Failed to align + print {$read_fhs{un}} $l if defined($read_fhs{un}); + } else { + # Aligned + print {$read_fhs{al}} $l if defined($read_fhs{al}); + } + } + if((defined($read_fhs{"un-conc"}) || defined($read_fhs{"al-conc"}) || defined($read_fhs{"al-conc-disc"})) && $pair && !$secondary) { + my $conc = (($fl & 2) != 0); + my $conc_disc = ($fl & 4) == 0 || ($fl & 8) == 0; + if($conc && $mate1) { + print {$read_fhs{"al-conc"}{1}} $l if defined($read_fhs{"al-conc"}); + } elsif($conc && $mate2) { + print {$read_fhs{"al-conc"}{2}} $l if defined($read_fhs{"al-conc"}); + } elsif(!$conc && $mate1) { + print {$read_fhs{"un-conc"}{1}} $l if defined($read_fhs{"un-conc"}); + } elsif(!$conc && $mate2) { + print {$read_fhs{"un-conc"}{2}} $l if defined($read_fhs{"un-conc"}); + } + if($conc_disc && $mate1) { + print {$read_fhs{"al-conc-disc"}{1}} $l if defined($read_fhs{"al-conc-disc"}); + } elsif($conc_disc && $mate2) { + print {$read_fhs{"al-conc-disc"}{2}} $l if defined($read_fhs{"al-conc-disc"}); + } + } + } + } + } + print {$ofh} "$_\n" if !$filt; + } + for my $k (@fhs_to_close) { close($k); } + close($ofh); + close(HT); + $ret = $?; +} else { + $ret = system($cmd); +} +kill 'TERM', @to_kills; + +if(!$keep) { for(@to_delete) { unlink($_); } } + +if ($ret == -1) { + Error("Failed to execute hisat2-align: $!\n"); + exit 1; +} elsif ($ret & 127) { + my $signm = "(unknown)"; + $signm = $signame[$ret & 127] if defined($signame[$ret & 127]); + my $ad = ""; + $ad = "(core dumped)" if (($ret & 128) != 0); + Error("hisat2-align died with signal %d (%s) $ad\n", ($ret & 127), $signm); + exit 1; +} elsif($ret != 0) { + Error("hisat2-align exited with value %d\n", ($ret >> 8)); +} +exit ($ret >> 8); diff --git a/hisat2-build b/hisat2-build new file mode 100644 index 0000000..35f23e6 --- /dev/null +++ b/hisat2-build @@ -0,0 +1,95 @@ +#!/usr/bin/env python + +""" + Copyright 2015, Daehwan Kim + + This file is part of HISAT 2. + + HISAT 2 is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + HISAT 2 is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with HISAT 2. If not, see . +""" + + +import os +import sys +import inspect +import logging + + +def build_args(): + """ + Parse the wrapper arguments. Returns the options, tuple. + """ + + parsed_args = {} + to_remove = [] + argv = sys.argv[:] + for i, arg in enumerate(argv): + if arg == '--large-index': + parsed_args[arg] = "" + to_remove.append(i) + elif arg == '--debug': + parsed_args[arg] = "" + to_remove.append(i) + elif arg == '--verbose': + parsed_args[arg] = "" + to_remove.append(i) + + for i in reversed(to_remove): + del argv[i] + + return parsed_args, argv + + +def main(): + logging.basicConfig(level=logging.ERROR, + format='%(levelname)s: %(message)s' + ) + delta = 200 + small_index_max_size= 4 * 1024**3 - delta + build_bin_name = "hisat2-build" + build_bin_s = "hisat2-build-s" + build_bin_l = "hisat2-build-l" + curr_script = os.path.realpath(inspect.getsourcefile(main)) + ex_path = os.path.dirname(curr_script) + build_bin_spec = os.path.join(ex_path,build_bin_s) + + options, argv = build_args() + + if '--verbose' in options: + logging.getLogger().setLevel(logging.INFO) + + if '--debug' in options: + build_bin_spec += '-debug' + build_bin_l += '-debug' + + if '--large-index' in options: + build_bin_spec = os.path.join(ex_path,build_bin_l) + elif len(argv) >= 2: + ref_fnames = argv[-2] + tot_size = 0 + for fn in ref_fnames.split(','): + if os.path.exists(fn): + statinfo = os.stat(fn) + tot_size += statinfo.st_size + if tot_size > small_index_max_size: + build_bin_spec = os.path.join(ex_path,build_bin_l) + + argv[0] = build_bin_name + argv.insert(1, 'basic-0') + argv.insert(1, '--wrapper') + logging.info('Command: %s %s' % (build_bin_spec, ' '.join(argv[1:]))) + os.execv(build_bin_spec, argv) + +if __name__ == "__main__": + main() diff --git a/hisat2-build-new b/hisat2-build-new new file mode 100644 index 0000000..9b13701 --- /dev/null +++ b/hisat2-build-new @@ -0,0 +1,100 @@ +#!/usr/bin/env python + +""" + Copyright 2018, Chanhee Park and Daehwan Kim + + This file is part of HISAT 2. + + HISAT 2 is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + HISAT 2 is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with HISAT 2. If not, see . +""" + + +import os +import sys +import inspect +import logging + + +def build_args(): + """ + Parse the wrapper arguments. Returns the options, tuple. + """ + + parsed_args = {} + to_remove = [] + argv = sys.argv[:] + for i, arg in enumerate(argv): + if arg == '--large-index': + parsed_args[arg] = "" + to_remove.append(i) + elif arg == '--debug': + parsed_args[arg] = "" + to_remove.append(i) + elif arg == '--verbose': + parsed_args[arg] = "" + to_remove.append(i) + + for i in reversed(to_remove): + del argv[i] + + return parsed_args, argv + + +def main(): + logging.basicConfig(level=logging.ERROR, + format='%(levelname)s: %(message)s' + ) + delta = 200 + small_index_max_size= 4 * 1024**3 - delta + constuct_bin_name = "hisat2-construct-nonrepetitive-genome" + constuct_bin_s = "hisat2-construct-nonrepetitive-genome-s" + constuct_bin_l = "hisat2-construct-nonrepetitive-genome-l" + build_bin_name = "hisat2-build" + build_bin_s = "hisat2-build-s" + build_bin_l = "hisat2-build-l" + curr_script = os.path.realpath(inspect.getsourcefile(main)) + ex_path = os.path.dirname(curr_script) + build_bin_spec = os.path.join(ex_path,build_bin_s) + + options, argv = build_args() + + if '--verbose' in options: + logging.getLogger().setLevel(logging.INFO) + + if '--debug' in options: + build_bin_spec += '-debug' + build_bin_l += '-debug' + + if '--large-index' in options: + build_bin_spec = os.path.join(ex_path,build_bin_l) + elif len(argv) >= 2: + ref_fnames = argv[-2] + tot_size = 0 + for fn in ref_fnames.split(','): + if os.path.exists(fn): + statinfo = os.stat(fn) + tot_size += statinfo.st_size + if tot_size > small_index_max_size: + build_bin_spec = os.path.join(ex_path,build_bin_l) + + argv[0] = build_bin_name + argv.insert(1, 'basic-0') + argv.insert(1, '--wrapper') + logging.info('Command: %s %s' % (build_bin_spec, ' '.join(argv[1:]))) + os.execv(build_bin_spec, argv) + +if __name__ == "__main__": + main() + + diff --git a/hisat2-inspect b/hisat2-inspect new file mode 100644 index 0000000..2b5dea3 --- /dev/null +++ b/hisat2-inspect @@ -0,0 +1,73 @@ +#!/usr/bin/env python + +""" + Copyright 2015, Daehwan Kim + + This file is part of HISAT 2. + + HISAT 2 is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + HISAT 2 is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with HISAT 2. If not, see . +""" + + +import os +import imp +import inspect +import logging + + +def main(): + logging.basicConfig(level=logging.ERROR, + format='%(levelname)s: %(message)s' + ) + inspect_bin_name = "hisat2-inspect" + inspect_bin_s = "hisat2-inspect-s" + inspect_bin_l = "hisat2-inspect-l" + idx_ext_l = '.1.ht2l'; + idx_ext_s = '.1.ht2'; + curr_script = os.path.realpath(inspect.getsourcefile(main)) + ex_path = os.path.dirname(curr_script) + inspect_bin_spec = os.path.join(ex_path,inspect_bin_s) + bld = imp.load_source('hisat2-build',os.path.join(ex_path,'hisat2-build')) + options,arguments = bld.build_args() + + if '--verbose' in options: + logging.getLogger().setLevel(logging.INFO) + + if '--debug' in options: + inspect_bin_spec += '-debug' + inspect_bin_l += '-debug' + + if '--large-index' in options: + inspect_bin_spec = os.path.join(ex_path,inspect_bin_l) + elif len(arguments) >= 1: + idx_basename = arguments[-1] + large_idx_exists = os.path.exists(idx_basename + idx_ext_l) + small_idx_exists = os.path.exists(idx_basename + idx_ext_s) + if not large_idx_exists and not small_idx_exists: + env_path = os.getenv('HISAT2_INDEXES', '') + large_idx_exists = os.path.exists(env_path + '/' + idx_basename + idx_ext_l) + small_idx_exists = os.path.exists(env_path + '/' + idx_basename + idx_ext_s) + + if large_idx_exists and not small_idx_exists: + inspect_bin_spec = os.path.join(ex_path,inspect_bin_l) + + arguments[0] = inspect_bin_name + arguments.insert(1, 'basic-0') + arguments.insert(1, '--wrapper') + logging.info('Command: %s %s' % (inspect_bin_spec,' '.join(arguments[1:]))) + os.execv(inspect_bin_spec, arguments) + + +if __name__ == "__main__": + main() diff --git a/hisat2.cpp b/hisat2.cpp new file mode 100644 index 0000000..9df7586 --- /dev/null +++ b/hisat2.cpp @@ -0,0 +1,4978 @@ +/* + * Copyright 2015, Daehwan Kim + * + * This file is part of HISAT 2. + * This file is edited by Yun (Leo) Zhang for HISAT-3N. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "alphabet.h" +#include "assert_helpers.h" +#include "endian_swap.h" +#include "hgfm.h" +#include "rfm.h" +#include "formats.h" +#include "sequence_io.h" +#include "tokenize.h" +#include "aln_sink.h" +#include "pat.h" +#include "threading.h" +#include "ds.h" +#include "aligner_metrics.h" +#include "sam.h" +#include "aligner_seed.h" +#include "splice_site.h" +#include "spliced_aligner.h" +#include "aligner_seed_policy.h" +#include "aligner_sw.h" +#include "aligner_sw_driver.h" +#include "aligner_cache.h" +#include "util.h" +#include "pe.h" +#include "tp.h" +#include "gp.h" +#include "simple_func.h" +#include "presets.h" +#include "opts.h" +#include "outq.h" +#include "repeat_kmer.h" +#include "hisat2lib/ht2.h" +//#include "utility_3n.h" + + +using namespace std; + +MemoryTally gMemTally; + +static EList mates1; // mated reads (first mate) +static EList mates2; // mated reads (second mate) +static EList mates12; // mated reads (1st/2nd interleaved in 1 file) +static string adjIdxBase; +static string adjIdxBases_3N[2]; +bool gColor; // colorspace (not supported) +int gVerbose; // be talkative +static bool startVerbose; // be talkative at startup +int gQuiet; // print nothing but the alignments +static int sanityCheck; // enable expensive sanity checks +static int format; // default read format is FASTQ +static string origString; // reference text, or filename(s) +static int seed; // srandom() seed +static int timing; // whether to report basic timing data +static int metricsIval; // interval between alignment metrics messages (0 = no messages) +static string metricsFile;// output file to put alignment metrics in +static bool metricsStderr;// output file to put alignment metrics in +static bool metricsPerRead; // report a metrics tuple for every read +static bool allHits; // for multihits, report just one +static bool showVersion; // just print version and quit? +static int ipause; // pause before maching? +static uint32_t qUpto; // max # of queries to read +int gTrim5; // amount to trim from 5' end +int gTrim3; // amount to trim from 3' end +static int offRate; // keep default offRate +static bool solexaQuals; // quality strings are solexa quals, not phred, and subtract 64 (not 33) +static bool phred64Quals; // quality chars are phred, but must subtract 64 (not 33) +static bool integerQuals; // quality strings are space-separated strings of integers, not ASCII +static int nthreads; // number of pthreads operating concurrently +static int outType; // style of output +static bool noRefNames; // true -> print reference indexes; not names +static uint32_t khits; // number of hits per read; >1 is much slower +static uint32_t mhits; // don't report any hits if there are > mhits +static int partitionSz; // output a partitioning key in first field +static bool useSpinlock; // false -> don't use of spinlocks even if they're #defines +static bool fileParallel; // separate threads read separate input files in parallel +static bool useShmem; // use shared memory to hold the index +static bool useMm; // use memory-mapped files to hold the index +static bool mmSweep; // sweep through memory-mapped files immediately after mapping +int gMinInsert; // minimum insert size +int gMaxInsert; // maximum insert size +bool gMate1fw; // -1 mate aligns in fw orientation on fw strand +bool gMate2fw; // -2 mate aligns in rc orientation on fw strand +bool gFlippedMatesOK; // allow mates to be in wrong order +bool gDovetailMatesOK; // allow one mate to extend off the end of the other +bool gContainMatesOK; // allow one mate to contain the other in PE alignment +bool gOlapMatesOK; // allow mates to overlap in PE alignment +bool gExpandToFrag; // incr max frag length to =larger mate len if necessary +bool gReportDiscordant; // find and report discordant paired-end alignments +bool gReportMixed; // find and report unpaired alignments for paired reads +static uint32_t cacheLimit; // ranges w/ size > limit will be cached +static uint32_t cacheSize; // # words per range cache +static uint32_t skipReads; // # reads/read pairs to skip +bool gNofw; // don't align fw orientation of read +bool gNorc; // don't align rc orientation of read +static uint32_t fastaContLen; +static uint32_t fastaContFreq; +static bool hadoopOut; // print Hadoop status and summary messages +static bool fuzzy; +static bool fullRef; +static bool samTruncQname; // whether to truncate QNAME to 255 chars +static bool samOmitSecSeqQual; // omit SEQ/QUAL for 2ndary alignments? +static bool samNoUnal; // don't print records for unaligned reads +static bool samNoHead; // don't print any header lines in SAM output +static bool samNoSQ; // don't print @SQ header lines +static bool sam_print_as; +static bool sam_print_xs; // XS:i +static bool sam_print_xss; // Xs:i and Ys:i +static bool sam_print_yn; // YN:i and Yn:i +static bool sam_print_xn; +static bool sam_print_cs; +static bool sam_print_cq; +static bool sam_print_x0; +static bool sam_print_x1; +static bool sam_print_xm; +static bool sam_print_xo; +static bool sam_print_xg; +static bool sam_print_nm; +static bool sam_print_md; +static bool sam_print_yf; +static bool sam_print_yi; +static bool sam_print_ym; +static bool sam_print_yp; +static bool sam_print_yt; +static bool sam_print_ys; +static bool sam_print_zs; +static bool sam_print_xr; +static bool sam_print_xt; +static bool sam_print_xd; +static bool sam_print_xu; +static bool sam_print_yl; +static bool sam_print_ye; +static bool sam_print_yu; +static bool sam_print_xp; +static bool sam_print_yr; +static bool sam_print_zb; +static bool sam_print_zr; +static bool sam_print_zf; +static bool sam_print_zm; +static bool sam_print_zi; +static bool sam_print_zp; +static bool sam_print_zu; +static bool sam_print_xs_a; +static bool sam_print_nh; +static bool bwaSwLike; +static float bwaSwLikeC; +static float bwaSwLikeT; +static bool qcFilter; +static bool sortByScore; // prioritize alignments to report by score? +bool gReportOverhangs; // false -> filter out alignments that fall off the end of a reference sequence +static string rgid; // ID: setting for @RG header line +static string rgs; // SAM outputs for @RG header line +static string rgs_optflag; // SAM optional flag to add corresponding to @RG ID +static bool msample; // whether to report a random alignment when maxed-out via -m/-M +int gGapBarrier; // # diags on top/bot only to be entered diagonally +static EList qualities; +static EList qualities1; +static EList qualities2; +static string polstr; // temporary holder for policy string +static bool msNoCache; // true -> disable local cache +static int bonusMatchType; // how to reward matches +static int bonusMatch; // constant reward if bonusMatchType=constant +static int penMmcType; // how to penalize mismatches +int penMmcMax; // max mm penalty +static int penMmcMin; // min mm penalty +static int penScMax; // max sc penalty +static int penScMin; // min sc penalty +static int penNType; // how to penalize Ns in the read +static int penN; // constant if N pelanty is a constant +static bool penNCatPair; // concatenate mates before N filtering? +static bool localAlign; // do local alignment in DP steps +static bool noisyHpolymer; // set to true if gap penalties should be reduced to be consistent with a sequencer that under- and overcalls homopolymers +static int penRdGapConst; // constant cost of extending a gap in the read +static int penRfGapConst; // constant cost of extending a gap in the reference +static int penRdGapLinear; // coeff of linear term for cost of gap extension in read +static int penRfGapLinear; // coeff of linear term for cost of gap extension in ref +SimpleFunc scoreMin; // minimum valid score as function of read len +static SimpleFunc nCeil; // max # Ns allowed as function of read len +static SimpleFunc msIval; // interval between seeds as function of read len +static double descConsExp; // how to adjust score minimum as we descent further into index-assisted alignment +static size_t descentLanding; // don't place a search root if it's within this many positions of end +static SimpleFunc descentTotSz; // maximum space a DescentDriver can use in bytes +static SimpleFunc descentTotFmops; // maximum # FM ops a DescentDriver can perform +static int multiseedMms; // mismatches permitted in a multiseed seed +static int multiseedLen; // length of multiseed seeds +static size_t multiseedOff; // offset to begin extracting seeds +static uint32_t seedCacheLocalMB; // # MB to use for non-shared seed alignment cacheing +static uint32_t seedCacheCurrentMB; // # MB to use for current-read seed hit cacheing +static uint32_t exactCacheCurrentMB; // # MB to use for current-read seed hit cacheing +static size_t maxhalf; // max width on one side of DP table +static bool seedSumm; // print summary information about seed hits, not alignments +static bool doUngapped; // do ungapped alignment +static size_t maxIters; // stop after this many extend loop iterations +static size_t maxUg; // stop after this many ungap extends +static size_t maxDp; // stop after this many DPs +static size_t maxItersIncr; // amt to add to maxIters for each -k > 1 +static size_t maxEeStreak; // stop after this many end-to-end fails in a row +static size_t maxUgStreak; // stop after this many ungap fails in a row +static size_t maxDpStreak; // stop after this many dp fails in a row +static size_t maxStreakIncr; // amt to add to streak for each -k > 1 +static size_t maxMateStreak; // stop seed range after this many mate-find fails +static bool doExtend; // extend seed hits +static bool enable8; // use 8-bit SSE where possible? +static size_t cminlen; // longer reads use checkpointing +static size_t cpow2; // checkpoint interval log2 +static bool doTri; // do triangular mini-fills? +static string defaultPreset; // default preset; applied immediately +static bool ignoreQuals; // all mms incur same penalty, regardless of qual +static string wrapper; // type of wrapper script, so we can print correct usage +static EList queries; // list of query files +static string outfile; // write SAM output to this file +static int mapqv; // MAPQ calculation version +static int tighten; // -M tighten mode (0=none, 1=best, 2=secbest+1) +static bool doExactUpFront; // do exact search up front if seeds seem good enough +static bool do1mmUpFront; // do 1mm search up front if seeds seem good enough +static size_t do1mmMinLen; // length below which we disable 1mm e2e search +static int seedBoostThresh; // if average non-zero position has more than this many elements +static size_t maxSeeds; // maximum number of seeds allowed +static size_t nSeedRounds; // # seed rounds +static bool reorder; // true -> reorder SAM recs in -p mode +static float sampleFrac; // only align random fraction of input reads +static bool arbitraryRandom; // pseudo-randoms no longer a function of read properties +static bool bowtie2p5; +static bool useTempSpliceSite; +static int penCanSplice; +static int penNoncanSplice; +static int penConflictSplice; +static SimpleFunc penCanIntronLen; +static SimpleFunc penNoncanIntronLen; +static size_t minIntronLen; +static size_t maxIntronLen; +static string knownSpliceSiteInfile; // +static string novelSpliceSiteInfile; // +static string novelSpliceSiteOutfile; // +static bool secondary; +static bool no_spliced_alignment; +static int rna_strandness; // +static bool splicesite_db_only; // + +static bool anchorStop; +static bool pseudogeneStop; +static bool tranMapOnly; // transcriptome mapping only +static bool tranAssm; // alignments selected for downstream transcript assembly such as StringTie and Cufflinks +static string tranAssm_program; +static bool avoid_pseudogene; + +#ifdef USE_SRA +static EList sra_accs; +#endif + +static string bt2indexs[2]; // read Bowtie 2 index from files with this prefix +static EList > extra_opts; +static size_t extra_opts_cur; + +static EList thread_rids; +static MUTEX_T thread_rids_mutex; +static uint64_t thread_rids_mindist; + +static bool rmChrName; // remove "chr" from reference names (e.g., chr18 to 18) +static bool addChrName; // add "chr" to reference names (e.g., 18 to chr18) + +static size_t max_alts_tried; +static bool use_haplotype; +static bool enable_codis; + +static bool templateLenAdjustment; +static string alignSumFile; // write alignment summary stat. to this file +static bool newAlignSummary; + +static int bowtie2_dp; // Bowtie2's dynamic programming alignment (0: no dynamic programming, 1: conditional dynamic programming, and 2: uncoditional dynamic programming) +static bool fast; // --fast +static bool sensitive; // --sensitive +static bool very_sensitive; // --very-sensitive + +static bool repeat; +static bool use_repeat_index; +static EList readLens; + +// 3N variable +bool threeN = false; // indicator for 3N mode. +bool base_change_entered; // set true once user used --base-change + +char usrInput_convertedFrom; // user input converted from. the nucleotide is replaced by others in sample preparation protocol. for sequence comparison step in HISAT-3N. +char usrInput_convertedTo; // user input converted To. the nucleotide to others in sample preparation protocol. for sequence comparison step in HISAT-3N. +char usrInput_convertedFromComplement; // the complement of usrInput_convertedFrom. for sequence comparison step in HISAT-3N. +char usrInput_convertedToComplement; // the complement of usrInput_convertedTo. for sequence comparison step in HISAT-3N. + +char hs3N_convertedFrom; // the actual converted from by HISAT-3N. use in + strand. +char hs3N_convertedTo; // the actual converted to by HISAT-3N. use in + strand. +char hs3N_convertedFromComplement; // the complement of hs3N_convertedFrom. use in - strand. +char hs3N_convertedToComplement; // the complement of hs3N_convertedTo. use in - strand. + +string threeN_indexTags[2]; + +vector repeatHandles; // the 2 repeat handles helps expand the repeat alignment information. 0 for + strand. 1 for - strand. +struct ht2_index_getrefnames_result *refNameMap; // chromosome names and it's index for repeat alignment. +int repeatLimit; // expand #repeatLimit of qualified position in repeat alignment. +bool uniqueOutputOnly; // only output the unique alignment result. +int nMappingCycle; // =1 for standard HISAT2, =4 for HISAT-3N +bool mappingCycles[4]; // this array will indicate which mapping cycle will be run +int directional3NMapping; // =0 for non-directional mapping, =1 for directional mapping and read1/single-end map to fw reference, =2 for reverse directional mapping and read1/single-end map to rc reference. + +#define DMAX std::numeric_limits::max() + +static void resetOptions() { + mates1.clear(); + mates2.clear(); + mates12.clear(); + adjIdxBase = ""; + adjIdxBases_3N[0] = ""; + adjIdxBases_3N[1] = ""; + gColor = false; + gVerbose = 0; + startVerbose = 0; + gQuiet = false; + sanityCheck = 0; // enable expensive sanity checks + format = FASTQ; // default read format is FASTQ + origString = ""; // reference text, or filename(s) + seed = 0; // srandom() seed + timing = 0; // whether to report basic timing data + metricsIval = 1; // interval between alignment metrics messages (0 = no messages) + metricsFile = ""; // output file to put alignment metrics in + metricsStderr = false; // print metrics to stderr (in addition to --metrics-file if it's specified + metricsPerRead = false; // report a metrics tuple for every read? + allHits = false; // for multihits, report just one + showVersion = false; // just print version and quit? + ipause = 0; // pause before maching? + qUpto = 0xffffffff; // max # of queries to read + gTrim5 = 0; // amount to trim from 5' end + gTrim3 = 0; // amount to trim from 3' end + offRate = -1; // keep default offRate + solexaQuals = false; // quality strings are solexa quals, not phred, and subtract 64 (not 33) + phred64Quals = false; // quality chars are phred, but must subtract 64 (not 33) + integerQuals = false; // quality strings are space-separated strings of integers, not ASCII + nthreads = 1; // number of pthreads operating concurrently + outType = OUTPUT_SAM; // style of output + noRefNames = false; // true -> print reference indexes; not names + khits = 10; // number of hits per read; >1 is much slower + mhits = 0; // stop after finding this many alignments+1 + partitionSz = 0; // output a partitioning key in first field + useSpinlock = true; // false -> don't use of spinlocks even if they're #defines + fileParallel = false; // separate threads read separate input files in parallel + useShmem = false; // use shared memory to hold the index + useMm = false; // use memory-mapped files to hold the index + mmSweep = false; // sweep through memory-mapped files immediately after mapping + gMinInsert = 0; // minimum insert size + gMaxInsert = 1000; // maximum insert size + gMate1fw = true; // -1 mate aligns in fw orientation on fw strand + gMate2fw = false; // -2 mate aligns in rc orientation on fw strand + gFlippedMatesOK = false; // allow mates to be in wrong order + gDovetailMatesOK = false; // allow one mate to extend off the end of the other + gContainMatesOK = true; // allow one mate to contain the other in PE alignment + gOlapMatesOK = true; // allow mates to overlap in PE alignment + gExpandToFrag = true; // incr max frag length to =larger mate len if necessary + gReportDiscordant = true; // find and report discordant paired-end alignments + gReportMixed = true; // find and report unpaired alignments for paired reads + + cacheLimit = 5; // ranges w/ size > limit will be cached + cacheSize = 0; // # words per range cache + skipReads = 0; // # reads/read pairs to skip + gNofw = false; // don't align fw orientation of read + gNorc = false; // don't align rc orientation of read + fastaContLen = 0; + fastaContFreq = 0; + hadoopOut = false; // print Hadoop status and summary messages + fuzzy = false; // reads will have alternate basecalls w/ qualities + fullRef = false; // print entire reference name instead of just up to 1st space + samTruncQname = true; // whether to truncate QNAME to 255 chars + samOmitSecSeqQual = false; // omit SEQ/QUAL for 2ndary alignments? + samNoUnal = false; // omit SAM records for unaligned reads + samNoHead = false; // don't print any header lines in SAM output + samNoSQ = false; // don't print @SQ header lines + sam_print_as = true; + sam_print_xs = true; + sam_print_xss = false; // Xs:i and Ys:i + sam_print_yn = false; // YN:i and Yn:i + sam_print_xn = true; + sam_print_cs = false; + sam_print_cq = false; + sam_print_x0 = true; + sam_print_x1 = true; + sam_print_xm = true; + sam_print_xo = true; + sam_print_xg = true; + sam_print_nm = true; + sam_print_md = true; + sam_print_yf = true; + sam_print_yi = false; + sam_print_ym = false; + sam_print_yp = false; + sam_print_yt = true; + sam_print_ys = true; + sam_print_zs = false; + sam_print_xr = false; + sam_print_xt = false; + sam_print_xd = false; + sam_print_xu = false; + sam_print_yl = false; + sam_print_ye = false; + sam_print_yu = false; + sam_print_xp = false; + sam_print_yr = false; + sam_print_zb = false; + sam_print_zr = false; + sam_print_zf = false; + sam_print_zm = false; + sam_print_zi = false; + sam_print_zp = false; + sam_print_zu = false; + sam_print_xs_a = true; + sam_print_nh = true; + bwaSwLike = false; + bwaSwLikeC = 5.5f; + bwaSwLikeT = 20.0f; + qcFilter = false; // don't believe upstream qc by default + sortByScore = true; // prioritize alignments to report by score? + rgid = ""; // SAM outputs for @RG header line + rgs = ""; // SAM outputs for @RG header line + rgs_optflag = ""; // SAM optional flag to add corresponding to @RG ID + msample = true; + gGapBarrier = 4; // disallow gaps within this many chars of either end of alignment + qualities.clear(); + qualities1.clear(); + qualities2.clear(); + polstr.clear(); + msNoCache = true; // true -> disable local cache + bonusMatchType = DEFAULT_MATCH_BONUS_TYPE; + bonusMatch = DEFAULT_MATCH_BONUS; + penMmcType = DEFAULT_MM_PENALTY_TYPE; + penMmcMax = DEFAULT_MM_PENALTY_MAX; + penMmcMin = DEFAULT_MM_PENALTY_MIN; + penScMax = DEFAULT_SC_PENALTY_MAX; + penScMin = DEFAULT_SC_PENALTY_MIN; + penNType = DEFAULT_N_PENALTY_TYPE; + penN = DEFAULT_N_PENALTY; + penNCatPair = DEFAULT_N_CAT_PAIR; // concatenate mates before N filtering? + localAlign = false; // do local alignment in DP steps + noisyHpolymer = false; + penRdGapConst = DEFAULT_READ_GAP_CONST; + penRfGapConst = DEFAULT_REF_GAP_CONST; + penRdGapLinear = DEFAULT_READ_GAP_LINEAR; + penRfGapLinear = DEFAULT_REF_GAP_LINEAR; + scoreMin.init (SIMPLE_FUNC_LINEAR, 0.0f, -0.2f); + // scoreMin.init (SIMPLE_FUNC_CONST, -18, 0); + nCeil.init (SIMPLE_FUNC_LINEAR, 0.0f, DMAX, 2.0f, 0.1f); + msIval.init (SIMPLE_FUNC_LINEAR, 1.0f, DMAX, DEFAULT_IVAL_B, DEFAULT_IVAL_A); + descConsExp = 2.0; + descentLanding = 20; + descentTotSz.init(SIMPLE_FUNC_LINEAR, 1024.0, DMAX, 0.0, 1024.0); + descentTotFmops.init(SIMPLE_FUNC_LINEAR, 100.0, DMAX, 0.0, 10.0); + multiseedMms = DEFAULT_SEEDMMS; + multiseedLen = DEFAULT_SEEDLEN; + multiseedOff = 0; + seedCacheLocalMB = 32; // # MB to use for non-shared seed alignment cacheing + seedCacheCurrentMB = 20; // # MB to use for current-read seed hit cacheing + exactCacheCurrentMB = 20; // # MB to use for current-read seed hit cacheing + maxhalf = 15; // max width on one side of DP table + seedSumm = false; // print summary information about seed hits, not alignments + doUngapped = true; // do ungapped alignment + maxIters = 400; // max iterations of extend loop + maxUg = 300; // stop after this many ungap extends + maxDp = 300; // stop after this many dp extends + maxItersIncr = 20; // amt to add to maxIters for each -k > 1 + maxEeStreak = 15; // stop after this many end-to-end fails in a row + maxUgStreak = 15; // stop after this many ungap fails in a row + maxDpStreak = 15; // stop after this many dp fails in a row + maxStreakIncr = 10; // amt to add to streak for each -k > 1 + maxMateStreak = 10; // in PE: abort seed range after N mate-find fails + doExtend = true; // do seed extensions + enable8 = true; // use 8-bit SSE where possible? + cminlen = 2000; // longer reads use checkpointing + cpow2 = 4; // checkpoint interval log2 + doTri = false; // do triangular mini-fills? + defaultPreset = "sensitive%LOCAL%"; // default preset; applied immediately + extra_opts.clear(); + extra_opts_cur = 0; + bt2indexs[0].clear(); // read Bowtie 2 index from files with this prefix + bt2indexs[1].clear(); + ignoreQuals = false; // all mms incur same penalty, regardless of qual + wrapper.clear(); // type of wrapper script, so we can print correct usage + queries.clear(); // list of query files + outfile.clear(); // write SAM output to this file + mapqv = 2; // MAPQ calculation version + tighten = 3; // -M tightening mode + doExactUpFront = true; // do exact search up front if seeds seem good enough + do1mmUpFront = true; // do 1mm search up front if seeds seem good enough + seedBoostThresh = 300; // if average non-zero position has more than this many elements + nSeedRounds = 2; // # rounds of seed searches to do for repetitive reads + maxSeeds = 0; // maximum number of seeds allowed + do1mmMinLen = 60; // length below which we disable 1mm search + reorder = false; // reorder SAM records with -p > 1 + sampleFrac = 1.1f; // align all reads + arbitraryRandom = false; // let pseudo-random seeds be a function of read properties + bowtie2p5 = false; + useTempSpliceSite = true; + penCanSplice = 0; + penNoncanSplice = 12; + penConflictSplice = 1000000; + penCanIntronLen.init(SIMPLE_FUNC_LOG, -8, 1); + penNoncanIntronLen.init(SIMPLE_FUNC_LOG, -8, 1); + minIntronLen = 20; + maxIntronLen = 500000; + knownSpliceSiteInfile = ""; + novelSpliceSiteInfile = ""; + novelSpliceSiteOutfile = ""; + secondary = false; // allow secondary alignments + no_spliced_alignment = false; + rna_strandness = RNA_STRANDNESS_UNKNOWN; + splicesite_db_only = false; + anchorStop = true; + pseudogeneStop = true; + tranMapOnly = false; + tranAssm = false; + tranAssm_program = ""; + avoid_pseudogene = false; + +#ifdef USE_SRA + sra_accs.clear(); +#endif + + rmChrName = false; + addChrName = false; + + max_alts_tried = 16; + use_haplotype = false; + enable_codis = false; + + templateLenAdjustment = true; + alignSumFile = ""; + newAlignSummary = false; + + bowtie2_dp = 0; // disable Bowtie2's dynamic programming alignment + fast = false; + sensitive = false; + very_sensitive = false; + + repeat = false; // true iff alignments to repeat sequences are directly reported. + use_repeat_index = true; + readLens.clear(); + + refNameMap = NULL; + threeN = false; + repeatLimit = 1000; + uniqueOutputOnly = false; + base_change_entered = false; + threeN_indexTags[0] = ".3n."; + threeN_indexTags[1] = ".3n."; + nMappingCycle = 1; + directional3NMapping = 0; + for (int i = 0; i < 4; i++){ + mappingCycles[i] = false; + } +} + +static const char *short_options = "fF:qbzhcu:rv:s:aP:t3:5:w:p:k:M:1:2:I:X:CQ:N:i:L:U:x:S:g:O:D:R:"; + +static struct option long_options[] = { + {(char*)"verbose", no_argument, 0, ARG_VERBOSE}, + {(char*)"startverbose", no_argument, 0, ARG_STARTVERBOSE}, + {(char*)"quiet", no_argument, 0, ARG_QUIET}, + {(char*)"sanity", no_argument, 0, ARG_SANITY}, + {(char*)"pause", no_argument, &ipause, 1}, + {(char*)"orig", required_argument, 0, ARG_ORIG}, + {(char*)"all", no_argument, 0, 'a'}, + {(char*)"solexa-quals", no_argument, 0, ARG_SOLEXA_QUALS}, + {(char*)"integer-quals",no_argument, 0, ARG_INTEGER_QUALS}, + {(char*)"int-quals", no_argument, 0, ARG_INTEGER_QUALS}, + {(char*)"metrics", required_argument, 0, ARG_METRIC_IVAL}, + {(char*)"metrics-file", required_argument, 0, ARG_METRIC_FILE}, + {(char*)"metrics-stderr",no_argument, 0, ARG_METRIC_STDERR}, + {(char*)"metrics-per-read", no_argument, 0, ARG_METRIC_PER_READ}, + {(char*)"met-read", no_argument, 0, ARG_METRIC_PER_READ}, + {(char*)"met", required_argument, 0, ARG_METRIC_IVAL}, + {(char*)"met-file", required_argument, 0, ARG_METRIC_FILE}, + {(char*)"met-stderr", no_argument, 0, ARG_METRIC_STDERR}, + {(char*)"time", no_argument, 0, 't'}, + {(char*)"trim3", required_argument, 0, '3'}, + {(char*)"trim5", required_argument, 0, '5'}, + {(char*)"seed", required_argument, 0, ARG_SEED}, + {(char*)"qupto", required_argument, 0, 'u'}, + {(char*)"upto", required_argument, 0, 'u'}, + {(char*)"version", no_argument, 0, ARG_VERSION}, + {(char*)"filepar", no_argument, 0, ARG_FILEPAR}, + {(char*)"help", no_argument, 0, 'h'}, + {(char*)"threads", required_argument, 0, 'p'}, + {(char*)"khits", required_argument, 0, 'k'}, + {(char*)"minins", required_argument, 0, 'I'}, + {(char*)"maxins", required_argument, 0, 'X'}, + {(char*)"quals", required_argument, 0, 'Q'}, + {(char*)"Q1", required_argument, 0, ARG_QUALS1}, + {(char*)"Q2", required_argument, 0, ARG_QUALS2}, + {(char*)"refidx", no_argument, 0, ARG_REFIDX}, + {(char*)"partition", required_argument, 0, ARG_PARTITION}, + {(char*)"ff", no_argument, 0, ARG_FF}, + {(char*)"fr", no_argument, 0, ARG_FR}, + {(char*)"rf", no_argument, 0, ARG_RF}, + {(char*)"cachelim", required_argument, 0, ARG_CACHE_LIM}, + {(char*)"cachesz", required_argument, 0, ARG_CACHE_SZ}, + {(char*)"nofw", no_argument, 0, ARG_NO_FW}, + {(char*)"norc", no_argument, 0, ARG_NO_RC}, + {(char*)"skip", required_argument, 0, 's'}, + {(char*)"12", required_argument, 0, ARG_ONETWO}, + {(char*)"tab5", required_argument, 0, ARG_TAB5}, + {(char*)"tab6", required_argument, 0, ARG_TAB6}, + {(char*)"phred33-quals", no_argument, 0, ARG_PHRED33}, + {(char*)"phred64-quals", no_argument, 0, ARG_PHRED64}, + {(char*)"phred33", no_argument, 0, ARG_PHRED33}, + {(char*)"phred64", no_argument, 0, ARG_PHRED64}, + {(char*)"solexa1.3-quals", no_argument, 0, ARG_PHRED64}, + {(char*)"mm", no_argument, 0, ARG_MM}, + {(char*)"shmem", no_argument, 0, ARG_SHMEM}, + {(char*)"mmsweep", no_argument, 0, ARG_MMSWEEP}, + {(char*)"hadoopout", no_argument, 0, ARG_HADOOPOUT}, + {(char*)"fuzzy", no_argument, 0, ARG_FUZZY}, + {(char*)"fullref", no_argument, 0, ARG_FULLREF}, + {(char*)"usage", no_argument, 0, ARG_USAGE}, + {(char*)"sam-no-qname-trunc", no_argument, 0, ARG_SAM_NO_QNAME_TRUNC}, + {(char*)"sam-omit-sec-seq", no_argument, 0, ARG_SAM_OMIT_SEC_SEQ}, + {(char*)"omit-sec-seq", no_argument, 0, ARG_SAM_OMIT_SEC_SEQ}, + {(char*)"sam-no-head", no_argument, 0, ARG_SAM_NOHEAD}, + {(char*)"sam-nohead", no_argument, 0, ARG_SAM_NOHEAD}, + {(char*)"sam-noHD", no_argument, 0, ARG_SAM_NOHEAD}, + {(char*)"sam-no-hd", no_argument, 0, ARG_SAM_NOHEAD}, + {(char*)"sam-nosq", no_argument, 0, ARG_SAM_NOSQ}, + {(char*)"sam-no-sq", no_argument, 0, ARG_SAM_NOSQ}, + {(char*)"sam-noSQ", no_argument, 0, ARG_SAM_NOSQ}, + {(char*)"no-head", no_argument, 0, ARG_SAM_NOHEAD}, + {(char*)"no-hd", no_argument, 0, ARG_SAM_NOHEAD}, + {(char*)"no-sq", no_argument, 0, ARG_SAM_NOSQ}, + {(char*)"no-HD", no_argument, 0, ARG_SAM_NOHEAD}, + {(char*)"no-SQ", no_argument, 0, ARG_SAM_NOSQ}, + {(char*)"no-unal", no_argument, 0, ARG_SAM_NO_UNAL}, + {(char*)"color", no_argument, 0, 'C'}, + {(char*)"sam-RG", required_argument, 0, ARG_SAM_RG}, + {(char*)"sam-rg", required_argument, 0, ARG_SAM_RG}, + {(char*)"sam-rg-id", required_argument, 0, ARG_SAM_RGID}, + {(char*)"RG", required_argument, 0, ARG_SAM_RG}, + {(char*)"rg", required_argument, 0, ARG_SAM_RG}, + {(char*)"rg-id", required_argument, 0, ARG_SAM_RGID}, + {(char*)"snpphred", required_argument, 0, ARG_SNPPHRED}, + {(char*)"snpfrac", required_argument, 0, ARG_SNPFRAC}, + {(char*)"gbar", required_argument, 0, ARG_GAP_BAR}, + {(char*)"qseq", no_argument, 0, ARG_QSEQ}, + {(char*)"policy", required_argument, 0, ARG_ALIGN_POLICY}, + {(char*)"preset", required_argument, 0, 'P'}, + {(char*)"seed-summ", no_argument, 0, ARG_SEED_SUMM}, + {(char*)"seed-summary", no_argument, 0, ARG_SEED_SUMM}, + {(char*)"overhang", no_argument, 0, ARG_OVERHANG}, + {(char*)"no-cache", no_argument, 0, ARG_NO_CACHE}, + {(char*)"cache", no_argument, 0, ARG_USE_CACHE}, + {(char*)"454", no_argument, 0, ARG_NOISY_HPOLY}, + {(char*)"ion-torrent", no_argument, 0, ARG_NOISY_HPOLY}, + {(char*)"no-mixed", no_argument, 0, ARG_NO_MIXED}, + {(char*)"no-discordant",no_argument, 0, ARG_NO_DISCORDANT}, + // {(char*)"local", no_argument, 0, ARG_LOCAL}, + {(char*)"end-to-end", no_argument, 0, ARG_END_TO_END}, + {(char*)"ungapped", no_argument, 0, ARG_UNGAPPED}, + {(char*)"no-ungapped", no_argument, 0, ARG_UNGAPPED_NO}, + {(char*)"sse8", no_argument, 0, ARG_SSE8}, + {(char*)"no-sse8", no_argument, 0, ARG_SSE8_NO}, + {(char*)"scan-narrowed",no_argument, 0, ARG_SCAN_NARROWED}, + {(char*)"qc-filter", no_argument, 0, ARG_QC_FILTER}, + {(char*)"bwa-sw-like", no_argument, 0, ARG_BWA_SW_LIKE}, + {(char*)"multiseed", required_argument, 0, ARG_MULTISEED_IVAL}, + {(char*)"ma", required_argument, 0, ARG_SCORE_MA}, + {(char*)"mp", required_argument, 0, ARG_SCORE_MMP}, + {(char*)"sp", required_argument, 0, ARG_SCORE_SCP}, + {(char*)"no-softclip", no_argument, 0, ARG_NO_SOFTCLIP}, + {(char*)"np", required_argument, 0, ARG_SCORE_NP}, + {(char*)"rdg", required_argument, 0, ARG_SCORE_RDG}, + {(char*)"rfg", required_argument, 0, ARG_SCORE_RFG}, + {(char*)"score-min", required_argument, 0, ARG_SCORE_MIN}, + {(char*)"min-score", required_argument, 0, ARG_SCORE_MIN}, + {(char*)"n-ceil", required_argument, 0, ARG_N_CEIL}, + {(char*)"dpad", required_argument, 0, ARG_DPAD}, + {(char*)"mapq-print-inputs",no_argument, 0, ARG_SAM_PRINT_YI}, + {(char*)"very-fast", no_argument, 0, ARG_PRESET_VERY_FAST}, + {(char*)"fast", no_argument, 0, ARG_PRESET_FAST}, + {(char*)"sensitive", no_argument, 0, ARG_PRESET_SENSITIVE}, + {(char*)"very-sensitive", no_argument, 0, ARG_PRESET_VERY_SENSITIVE}, + // {(char*)"very-fast-local", no_argument, 0, ARG_PRESET_VERY_FAST_LOCAL}, + // {(char*)"fast-local", no_argument, 0, ARG_PRESET_FAST_LOCAL}, + // {(char*)"sensitive-local", no_argument, 0, ARG_PRESET_SENSITIVE_LOCAL}, + // {(char*)"very-sensitive-local", no_argument, 0, ARG_PRESET_VERY_SENSITIVE_LOCAL}, + {(char*)"no-score-priority",no_argument, 0, ARG_NO_SCORE_PRIORITY}, + {(char*)"seedlen", required_argument, 0, 'L'}, + {(char*)"seedmms", required_argument, 0, 'N'}, + {(char*)"seedival", required_argument, 0, 'i'}, + {(char*)"ignore-quals", no_argument, 0, ARG_IGNORE_QUALS}, + {(char*)"index", required_argument, 0, 'x'}, + {(char*)"arg-desc", no_argument, 0, ARG_DESC}, + {(char*)"wrapper", required_argument, 0, ARG_WRAPPER}, + {(char*)"unpaired", required_argument, 0, 'U'}, + {(char*)"output", required_argument, 0, 'S'}, + {(char*)"mapq-v", required_argument, 0, ARG_MAPQ_V}, + {(char*)"dovetail", no_argument, 0, ARG_DOVETAIL}, + {(char*)"no-dovetail", no_argument, 0, ARG_NO_DOVETAIL}, + {(char*)"contain", no_argument, 0, ARG_CONTAIN}, + {(char*)"no-contain", no_argument, 0, ARG_NO_CONTAIN}, + {(char*)"overlap", no_argument, 0, ARG_OVERLAP}, + {(char*)"no-overlap", no_argument, 0, ARG_NO_OVERLAP}, + {(char*)"tighten", required_argument, 0, ARG_TIGHTEN}, + {(char*)"exact-upfront", no_argument, 0, ARG_EXACT_UPFRONT}, + {(char*)"1mm-upfront", no_argument, 0, ARG_1MM_UPFRONT}, + {(char*)"no-exact-upfront", no_argument, 0, ARG_EXACT_UPFRONT_NO}, + {(char*)"no-1mm-upfront", no_argument, 0, ARG_1MM_UPFRONT_NO}, + {(char*)"1mm-minlen", required_argument, 0, ARG_1MM_MINLEN}, + {(char*)"seed-off", required_argument, 0, 'O'}, + {(char*)"seed-boost", required_argument, 0, ARG_SEED_BOOST_THRESH}, + {(char*)"max-seeds", required_argument, 0, ARG_MAX_SEEDS}, + {(char*)"read-times", no_argument, 0, ARG_READ_TIMES}, + {(char*)"show-rand-seed", no_argument, 0, ARG_SHOW_RAND_SEED}, + {(char*)"dp-fail-streak", required_argument, 0, ARG_DP_FAIL_STREAK_THRESH}, + {(char*)"ee-fail-streak", required_argument, 0, ARG_EE_FAIL_STREAK_THRESH}, + {(char*)"ug-fail-streak", required_argument, 0, ARG_UG_FAIL_STREAK_THRESH}, + {(char*)"fail-streak", required_argument, 0, 'D'}, + {(char*)"dp-fails", required_argument, 0, ARG_DP_FAIL_THRESH}, + {(char*)"ug-fails", required_argument, 0, ARG_UG_FAIL_THRESH}, + {(char*)"extends", required_argument, 0, ARG_EXTEND_ITERS}, + {(char*)"no-extend", no_argument, 0, ARG_NO_EXTEND}, + {(char*)"mapq-extra", no_argument, 0, ARG_MAPQ_EX}, + {(char*)"seed-rounds", required_argument, 0, 'R'}, + {(char*)"reorder", no_argument, 0, ARG_REORDER}, + {(char*)"passthrough", no_argument, 0, ARG_READ_PASSTHRU}, + {(char*)"sample", required_argument, 0, ARG_SAMPLE}, + {(char*)"cp-min", required_argument, 0, ARG_CP_MIN}, + {(char*)"cp-ival", required_argument, 0, ARG_CP_IVAL}, + {(char*)"tri", no_argument, 0, ARG_TRI}, + {(char*)"nondeterministic", no_argument, 0, ARG_NON_DETERMINISTIC}, + {(char*)"non-deterministic", no_argument, 0, ARG_NON_DETERMINISTIC}, + // {(char*)"local-seed-cache-sz", required_argument, 0, ARG_LOCAL_SEED_CACHE_SZ}, + {(char*)"seed-cache-sz", required_argument, 0, ARG_CURRENT_SEED_CACHE_SZ}, + {(char*)"no-unal", no_argument, 0, ARG_SAM_NO_UNAL}, + {(char*)"test-25", no_argument, 0, ARG_TEST_25}, + // TODO: following should be a function of read length? + {(char*)"desc-kb", required_argument, 0, ARG_DESC_KB}, + {(char*)"desc-landing", required_argument, 0, ARG_DESC_LANDING}, + {(char*)"desc-exp", required_argument, 0, ARG_DESC_EXP}, + {(char*)"desc-fmops", required_argument, 0, ARG_DESC_FMOPS}, + {(char*)"no-temp-splicesite", no_argument, 0, ARG_NO_TEMPSPLICESITE}, + {(char*)"pen-cansplice", required_argument, 0, ARG_PEN_CANSPLICE}, + {(char*)"pen-noncansplice", required_argument, 0, ARG_PEN_NONCANSPLICE}, + {(char*)"pen-conflictsplice", required_argument, 0, ARG_PEN_CONFLICTSPLICE}, + {(char*)"pen-intronlen", required_argument, 0, ARG_PEN_CANINTRONLEN}, + {(char*)"pen-canintronlen", required_argument, 0, ARG_PEN_CANINTRONLEN}, + {(char*)"pen-noncanintronlen", required_argument, 0, ARG_PEN_NONCANINTRONLEN}, + {(char*)"min-intronlen", required_argument, 0, ARG_MIN_INTRONLEN}, + {(char*)"max-intronlen", required_argument, 0, ARG_MAX_INTRONLEN}, + {(char*)"known-splicesite-infile", required_argument, 0, ARG_KNOWN_SPLICESITE_INFILE}, + {(char*)"novel-splicesite-infile", required_argument, 0, ARG_NOVEL_SPLICESITE_INFILE}, + {(char*)"novel-splicesite-outfile", required_argument, 0, ARG_NOVEL_SPLICESITE_OUTFILE}, + {(char*)"secondary", no_argument, 0, ARG_SECONDARY}, + {(char*)"no-spliced-alignment", no_argument, 0, ARG_NO_SPLICED_ALIGNMENT}, + {(char*)"rna-strandness", required_argument, 0, ARG_RNA_STRANDNESS}, + {(char*)"splicesite-db-only", no_argument, 0, ARG_SPLICESITE_DB_ONLY}, + {(char*)"no-anchorstop", no_argument, 0, ARG_NO_ANCHORSTOP}, + {(char*)"transcriptome-mapping-only", no_argument, 0, ARG_TRANSCRIPTOME_MAPPING_ONLY}, + {(char*)"tmo", no_argument, 0, ARG_TRANSCRIPTOME_MAPPING_ONLY}, + {(char*)"downstream-transcriptome-assembly", no_argument, 0, ARG_TRANSCRIPTOME_ASSEMBLY}, + {(char*)"dta", no_argument, 0, ARG_TRANSCRIPTOME_ASSEMBLY}, + {(char*)"dta-cufflinks", no_argument, 0, ARG_TRANSCRIPTOME_ASSEMBLY_CUFFLINKS}, + {(char*)"avoid-pseudogene",no_argument, 0, ARG_AVOID_PSEUDOGENE}, + {(char*)"no-templatelen-adjustment", no_argument, 0, ARG_NO_TEMPLATELEN_ADJUSTMENT}, +#ifdef USE_SRA + {(char*)"sra-acc", required_argument, 0, ARG_SRA_ACC}, +#endif + {(char*)"remove-chrname", no_argument, 0, ARG_REMOVE_CHRNAME}, + {(char*)"add-chrname", no_argument, 0, ARG_ADD_CHRNAME}, + {(char*)"max-altstried", required_argument, 0, ARG_MAX_ALTSTRIED}, + {(char*)"haplotype", no_argument, 0, ARG_HAPLOTYPE}, + {(char*)"enable-codis", no_argument, 0, ARG_CODIS}, + {(char*)"summary-file", required_argument, 0, ARG_SUMMARY_FILE}, + {(char*)"new-summary", no_argument, 0, ARG_NEW_SUMMARY}, + {(char*)"enable-dp", no_argument, 0, ARG_DP}, + {(char*)"bowtie2-dp", required_argument, 0, ARG_DP}, + {(char*)"repeat", no_argument, 0, ARG_REPEAT}, + {(char*)"no-repeat-index", no_argument, 0, ARG_NO_REPEAT_INDEX}, + {(char*)"read-lengths", required_argument, 0, ARG_READ_LENGTHS}, + {(char*)"base-change", required_argument, 0, ARG_BASE_CHANGE}, + {(char*)"repeat-limit", required_argument, 0, ARG_REPEAT_LIMIT}, + {(char*)"unique-only", no_argument, 0, ARG_UNIQUE_ONLY}, + {(char*)"3N", no_argument, 0, ARG_3N}, + {(char*)"directional-mapping", no_argument, 0, ARG_DIRECTIONAL}, + {(char*)"directional-mapping-reverse", no_argument, 0, ARG_DIRECTIONAL_REVERSE}, + {(char*)0, 0, 0, 0} // terminator +}; + +/** + * Print out a concise description of what options are taken and whether they + * take an argument. + */ +static void printArgDesc(ostream& out) { + // struct option { + // const char *name; + // int has_arg; + // int *flag; + // int val; + // }; + size_t i = 0; + while(long_options[i].name != 0) { + out << long_options[i].name << "\t" + << (long_options[i].has_arg == no_argument ? 0 : 1) + << endl; + i++; + } + size_t solen = strlen(short_options); + for(i = 0; i < solen; i++) { + // Has an option? Does if next char is : + if(i == solen-1) { + assert_neq(':', short_options[i]); + cout << (char)short_options[i] << "\t" << 0 << endl; + } else { + if(short_options[i+1] == ':') { + // Option with argument + cout << (char)short_options[i] << "\t" << 1 << endl; + i++; // skip the ':' + } else { + // Option with no argument + cout << (char)short_options[i] << "\t" << 0 << endl; + } + } + } +} + +/** + * Print a summary usage message to the provided output stream. + */ +static void printUsage(ostream& out) { + out << "HISAT2 version " << string(HISAT2_VERSION).c_str() << " by Daehwan Kim (infphilo@gmail.com, www.ccb.jhu.edu/people/infphilo)" << endl; + string tool_name = "hisat2-align"; + if(wrapper == "basic-0") { + tool_name = "hisat2"; + } + out << "Usage: " << endl +#ifdef USE_SRA + << " " << tool_name.c_str() << " [options]* -x {-1 -2 | -U | --sra-acc } [-S ]" << endl +#else + << " " << tool_name.c_str() << " [options]* -x {-1 -2 | -U } [-S ]" << endl +#endif + << endl + << " Index filename prefix (minus trailing .X." << gfm_ext << ")." << endl + << " Files with #1 mates, paired with files in ." << endl; + if(wrapper == "basic-0") { + out << " Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2)." << endl; + } + out << " Files with #2 mates, paired with files in ." << endl; + if(wrapper == "basic-0") { + out << " Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2)." << endl; + } + out << " Files with unpaired reads." << endl; + if(wrapper == "basic-0") { + out << " Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2)." << endl; + } +#ifdef USE_SRA + out << " Comma-separated list of SRA accession numbers, e.g. --sra-acc SRR353653,SRR353654." << endl; +#endif + out << " File for SAM output (default: stdout)" << endl + << endl + << " , , can be comma-separated lists (no whitespace) and can be" << endl + << " specified many times. E.g. '-U file1.fq,file2.fq -U file3.fq'." << endl + // Wrapper script should write line next + << endl + << "Options (defaults in parentheses):" << endl + << endl + << " Input:" << endl + << " -q query input files are FASTQ .fq/.fastq (default)" << endl + << " --qseq query input files are in Illumina's qseq format" << endl + << " -f query input files are (multi-)FASTA .fa/.mfa" << endl + << " -r query input files are raw one-sequence-per-line" << endl + << " -c , , are sequences themselves, not files" << endl + << " -s/--skip skip the first reads/pairs in the input (none)" << endl + << " -u/--upto stop after first reads/pairs (no limit)" << endl + << " -5/--trim5 trim bases from 5'/left end of reads (0)" << endl + << " -3/--trim3 trim bases from 3'/right end of reads (0)" << endl + << " --phred33 qualities are Phred+33 (default)" << endl + << " --phred64 qualities are Phred+64" << endl + << " --int-quals qualities encoded as space-delimited integers" << endl +#ifdef USE_SRA + << " --sra-acc SRA accession ID" << endl +#endif + << endl + + << " Presets: Same as:" << endl + // << " For --end-to-end:" << endl + // << " --very-fast -D 5 -R 1 -N 0 -L 22 -i S,0,2.50" << endl + // << " --fast -D 10 -R 2 -N 0 -L 22 -i S,0,2.50" << endl + // << " --sensitive -D 15 -R 2 -N 0 -L 22 -i S,1,1.15 (default)" << endl + // << " --very-sensitive -D 20 -R 3 -N 0 -L 20 -i S,1,0.50" << endl + << " --fast --no-repeat-index" << endl + << " --sensitive --bowtie2-dp 1 -k 30 --score-min L,0,-0.5" << endl + << " --very-sensitive --bowtie2-dp 2 -k 50 --score-min L,0,-1" << endl + << endl + << " Alignment:" << endl + //<< " -N max # mismatches in seed alignment; can be 0 or 1 (0)" << endl + //<< " -L length of seed substrings; must be >3, <32 (22)" << endl + //<< " -i interval between seed substrings w/r/t read len (S,1,1.15)" << endl + << " --bowtie2-dp use Bowtie2's dynamic programming alignment algorithm (0) - 0: no dynamic programming, 1: conditional dynamic programming, and 2: unconditional dynamic programming (slowest)" << endl + << " --n-ceil func for max # non-A/C/G/Ts permitted in aln (L,0,0.15)" << endl + //<< " --dpad include extra ref chars on sides of DP table (15)" << endl + //<< " --gbar disallow gaps within nucs of read extremes (4)" << endl + << " --ignore-quals treat all quality values as 30 on Phred scale (off)" << endl + << " --nofw do not align forward (original) version of read (off)" << endl + << " --norc do not align reverse-complement version of read (off)" << endl + << " --no-repeat-index do not use repeat index" << endl + << endl + << " 3N-Alignment:" << endl + << " --base-change the converted nucleotide and converted to nucleotide (C,T)" << endl + << " --directional-mapping make directional mapping, please use this option only if your reads are prepared with a strand specific library (off)" << endl + << " --repeat-limit maximum number of repeat will be expanded for repeat alignment (1000)" << endl + << " --unique-only only output the reads have unique alignment (off)" << endl + << endl + << " Spliced Alignment:" << endl + << " --pen-cansplice penalty for a canonical splice site (0)" << endl + << " --pen-noncansplice penalty for a non-canonical splice site (12)" << endl + // << " --pen-conflictsplice penalty for conflicting splice sites (1000000)" << endl + << " --pen-canintronlen penalty for long introns (G,-8,1) with canonical splice sites" << endl + << " --pen-noncanintronlen penalty for long introns (G,-8,1) with noncanonical splice sites" << endl + << " --min-intronlen minimum intron length (20)" << endl + << " --max-intronlen maximum intron length (500000)" << endl + << " --known-splicesite-infile provide a list of known splice sites" << endl + << " --novel-splicesite-outfile report a list of splice sites" << endl + << " --novel-splicesite-infile provide a list of novel splice sites" << endl + << " --no-temp-splicesite disable the use of splice sites found" << endl + << " --no-spliced-alignment disable spliced alignment" << endl + << " --rna-strandness specify strand-specific information (unstranded)" << endl + << " --tmo reports only those alignments within known transcriptome" << endl + << " --dta reports alignments tailored for transcript assemblers" << endl + << " --dta-cufflinks reports alignments tailored specifically for cufflinks" << endl + << " --avoid-pseudogene tries to avoid aligning reads to pseudogenes (experimental option)" << endl + << " --no-templatelen-adjustment disables template length adjustment for RNA-seq reads" << endl + << endl + << " Scoring:" << endl + //<< " --ma match bonus (0 for --end-to-end, 2 for --local) " << endl + << " --mp , max and min penalties for mismatch; lower qual = lower penalty <6,2>" << endl + << " --sp , max and min penalties for soft-clipping; lower qual = lower penalty <2,1>" << endl + << " --no-softclip no soft-clipping" << endl + << " --np penalty for non-A/C/G/Ts in read/ref (1)" << endl + << " --rdg , read gap open, extend penalties (5,3)" << endl + << " --rfg , reference gap open, extend penalties (5,3)" << endl + << " --score-min min acceptable alignment score w/r/t read length" << endl + << " (L,0.0,-0.2)" << endl + << endl + << " Reporting:" << endl + << " -k It searches for at most distinct, primary alignments for each read. Primary alignments mean " << endl + << " alignments whose alignment score is equal to or higher than any other alignments. The search terminates " << endl + << " when it cannot find more distinct valid alignments, or when it finds , whichever happens first. " << endl + << " The alignment score for a paired-end alignment equals the sum of the alignment scores of " << endl + << " the individual mates. Each reported read or pair alignment beyond the first has the SAM ‘secondary’ bit " << endl + << " (which equals 256) set in its FLAGS field. For reads that have more than distinct, " << endl + << " valid alignments, hisat2 does not guarantee that the alignments reported are the best possible " << endl + << " in terms of alignment score. Default: 5 (linear index) or 10 (graph index)." << endl + << " Note: HISAT2 is not designed with large values for -k in mind, and when aligning reads to long, " << endl + << " repetitive genomes, large -k could make alignment much slower." << endl + << " --max-seeds HISAT2, like other aligners, uses seed-and-extend approaches. HISAT2 tries to extend seeds to " << endl + << " full-length alignments. In HISAT2, --max-seeds is used to control the maximum number of seeds that " << endl + << " will be extended. For DNA-read alignment (--no-spliced-alignment), HISAT2 extends up to these many seeds" << endl + << " and skips the rest of the seeds. For RNA-read alignment, HISAT2 skips extending seeds and reports " << endl + << " no alignments if the number of seeds is larger than the number specified with the option, " << endl + << " to be compatible with previous versions of HISAT2. Large values for --max-seeds may improve alignment " << endl + << " sensitivity, but HISAT2 is not designed with large values for --max-seeds in mind, and when aligning " << endl + << " reads to long, repetitive genomes, large --max-seeds could make alignment much slower. " << endl + << " The default value is the maximum of 5 and the value that comes with -k times 2." << endl + << " -a/--all HISAT2 reports all alignments it can find. Using the option is equivalent to using both --max-seeds " << endl + << " and -k with the maximum value that a 64-bit signed integer can represent (9,223,372,036,854,775,807)." << endl + << " --repeat report alignments to repeat sequences directly" << endl + << endl + //<< " Effort:" << endl + //<< " -D give up extending after failed extends in a row (15)" << endl + //<< " -R for reads w/ repetitive seeds, try sets of seeds (2)" << endl + //<< endl + << " Paired-end:" << endl + << " -I/--minins minimum fragment length (0), only valid with --no-spliced-alignment" << endl + << " -X/--maxins maximum fragment length (500), only valid with --no-spliced-alignment" << endl + << " --fr/--rf/--ff -1, -2 mates align fw/rev, rev/fw, fw/fw (--fr)" << endl + << " --no-mixed suppress unpaired alignments for paired reads" << endl + << " --no-discordant suppress discordant alignments for paired reads" << endl + << endl + << " Output:" << endl; + //if(wrapper == "basic-0") { + // out << " --bam output directly to BAM (by piping through 'samtools view')" << endl; + //} + out << " -t/--time print wall-clock time taken by search phases" << endl; + if(wrapper == "basic-0") { + out << " --un write unpaired reads that didn't align to " << endl + << " --al write unpaired reads that aligned at least once to " << endl + << " --un-conc write pairs that didn't align concordantly to " << endl + << " --al-conc write pairs that aligned concordantly at least once to " << endl + << " (Note: for --un, --al, --un-conc, or --al-conc, add '-gz' to the option name, e.g." << endl + << " --un-gz , to gzip compress output, or add '-bz2' to bzip2 compress output.)" << endl; + } + out << " --summary-file print alignment summary to this file." << endl + << " --new-summary print alignment summary in a new style, which is more machine-friendly." << endl + << " --quiet print nothing to stderr except serious errors" << endl + // << " --refidx refer to ref. seqs by 0-based index rather than name" << endl + << " --met-file send metrics to file at (off)" << endl + << " --met-stderr send metrics to stderr (off)" << endl + << " --met report internal counters & metrics every secs (1)" << endl + // Following is supported in the wrapper instead + // << " --no-unal suppress SAM records for unaligned reads" << endl + << " --no-head suppress header lines, i.e. lines starting with @" << endl + << " --no-sq suppress @SQ header lines" << endl + << " --rg-id set read group id, reflected in @RG line and RG:Z: opt field" << endl + << " --rg add (\"lab:value\") to @RG line of SAM header." << endl + << " Note: @RG line only printed when --rg-id is set." << endl + << " --omit-sec-seq put '*' in SEQ and QUAL fields for secondary alignments." << endl + << endl + << " Performance:" << endl + << " -o/--offrate override offrate of index; must be >= index's offrate" << endl + << " -p/--threads number of alignment threads to launch (1)" << endl + << " --reorder force SAM output order to match order of input reads" << endl +#ifdef BOWTIE_MM + << " --mm use memory-mapped I/O for index; many 'hisat2's can share" << endl +#endif +#ifdef BOWTIE_SHARED_MEM + //<< " --shmem use shared mem for index; many 'hisat2's can share" << endl +#endif + << endl + << " Other:" << endl + << " --qc-filter filter out reads that are bad according to QSEQ filter" << endl + << " --seed seed for random number generator (0)" << endl + << " --non-deterministic seed rand. gen. arbitrarily instead of using read attributes" << endl + << " --remove-chrname remove 'chr' from reference names in alignment" << endl + << " --add-chrname add 'chr' to reference names in alignment " << endl + // << " --verbose verbose output for debugging" << endl + << " --version print version information and quit" << endl + << " -h/--help print this usage message" << endl + ; + if(wrapper.empty()) { + cerr << endl + << "*** Warning ***" << endl + << "'hisat2-align' was run directly. It is recommended that you run the wrapper script 'hisat2' instead." << endl + << endl; + } +} + +/** + * Parse an int out of optarg and enforce that it be at least 'lower'; + * if it is less than 'lower', than output the given error message and + * exit with an error and a usage message. + */ +static int parseInt(int lower, int upper, const char *errmsg, const char *arg) { + long l; + char *endPtr= NULL; + l = strtol(arg, &endPtr, 10); + if (endPtr != NULL) { + if (l < lower || l > upper) { + cerr << errmsg << endl; + printUsage(cerr); + throw 1; + } + return (int32_t)l; + } + cerr << errmsg << endl; + printUsage(cerr); + throw 1; + return -1; +} + +/** + * Upper is maximum int by default. + */ +static int parseInt(int lower, const char *errmsg, const char *arg) { + return parseInt(lower, std::numeric_limits::max(), errmsg, arg); +} + +/** + * Parse a T string 'str'. + */ +template +T parse(const char *s) { + T tmp; + stringstream ss(s); + ss >> tmp; + return tmp; +} + +/** + * Parse a pair of Ts from a string, 'str', delimited with 'delim'. + */ +template +pair parsePair(const char *str, char delim) { + string s(str); + EList ss; + tokenize(s, delim, ss); + pair ret; + ret.first = parse(ss[0].c_str()); + ret.second = parse(ss[1].c_str()); + return ret; +} + +/** + * Parse a pair of Ts from a string, 'str', delimited with 'delim'. + */ +template +void parseTuple(const char *str, char delim, EList& ret) { + string s(str); + EList ss; + tokenize(s, delim, ss); + for(size_t i = 0; i < ss.size(); i++) { + ret.push_back(parse(ss[i].c_str())); + } +} + +static string applyPreset(const string& sorig, Presets& presets) { + string s = sorig; + size_t found = s.find("%LOCAL%"); + if(found != string::npos) { + s.replace(found, strlen("%LOCAL%"), localAlign ? "-local" : ""); + } + if(gVerbose) { + cerr << "Applying preset: '" << s.c_str() << "' using preset menu '" + << presets.name() << "'" << endl; + } + string pol; + presets.apply(s, pol, extra_opts); + return pol; +} + +static bool saw_M; +static bool saw_a; +static bool saw_k; +static EList presetList; + +/** + * TODO: Argument parsing is very, very flawed. The biggest problem is that + * there are two separate worlds of arguments, the ones set via polstr, and + * the ones set directly in variables. This makes for nasty interactions, + * e.g., with the -M option being resolved at an awkward time relative to + * the -k and -a options. + */ +static void parseOption(int next_option, const char *arg) { + switch (next_option) { + case ARG_TEST_25: bowtie2p5 = true; break; + case ARG_DESC_KB: descentTotSz = SimpleFunc::parse(arg, 0.0, 1024.0, 1024.0, DMAX); break; + case ARG_DESC_FMOPS: descentTotFmops = SimpleFunc::parse(arg, 0.0, 10.0, 100.0, DMAX); break; + case ARG_DESC_LANDING: descentLanding = parse(arg); break; + case ARG_DESC_EXP: { + descConsExp = parse(arg); + if(descConsExp < 0.0) { + cerr << "Error: --desc-exp must be greater than or equal to 0" << endl; + throw 1; + } + break; + } + case '1': tokenize(arg, ",", mates1); break; + case '2': tokenize(arg, ",", mates2); break; + case ARG_ONETWO: tokenize(arg, ",", mates12); format = TAB_MATE5; break; + case ARG_TAB5: tokenize(arg, ",", mates12); format = TAB_MATE5; break; + case ARG_TAB6: tokenize(arg, ",", mates12); format = TAB_MATE6; break; + case 'f': format = FASTA; break; + case 'F': { + format = FASTA_CONT; + pair p = parsePair(arg, ','); + fastaContLen = p.first; + fastaContFreq = p.second; + break; + } + case ARG_BWA_SW_LIKE: { + bwaSwLikeC = 5.5f; + bwaSwLikeT = 30; + bwaSwLike = true; + localAlign = true; + // -a INT Score of a match [1] + // -b INT Mismatch penalty [3] + // -q INT Gap open penalty [5] + // -r INT Gap extension penalty. The penalty for a contiguous + // gap of size k is q+k*r. [2] + polstr += ";MA=1;MMP=C3;RDG=5,2;RFG=5,2"; + break; + } + case 'q': format = FASTQ; break; + case 'r': format = RAW; break; + case 'c': format = CMDLINE; break; + case ARG_QSEQ: format = QSEQ; break; + case 'C': { + cerr << "Error: -C specified but Bowtie 2 does not support colorspace input." << endl; + throw 1; + break; + } + case 'I': + gMinInsert = parseInt(0, "-I arg must be positive", arg); + break; + case 'X': + gMaxInsert = parseInt(1, "-X arg must be at least 1", arg); + break; + case ARG_NO_DISCORDANT: gReportDiscordant = false; break; + case ARG_NO_MIXED: gReportMixed = false; break; + case 's': + skipReads = (uint32_t)parseInt(0, "-s arg must be positive", arg); + break; + case ARG_FF: gMate1fw = true; gMate2fw = true; break; + case ARG_RF: gMate1fw = false; gMate2fw = true; break; + case ARG_FR: gMate1fw = true; gMate2fw = false; break; + case ARG_SHMEM: useShmem = true; break; + case ARG_SEED_SUMM: seedSumm = true; break; + case ARG_MM: { +#ifdef BOWTIE_MM + useMm = true; + break; +#else + cerr << "Memory-mapped I/O mode is disabled because bowtie was not compiled with" << endl + << "BOWTIE_MM defined. Memory-mapped I/O is not supported under Windows. If you" << endl + << "would like to use memory-mapped I/O on a platform that supports it, please" << endl + << "refrain from specifying BOWTIE_MM=0 when compiling Bowtie." << endl; + throw 1; +#endif + } + case ARG_MMSWEEP: mmSweep = true; break; + case ARG_HADOOPOUT: hadoopOut = true; break; + case ARG_SOLEXA_QUALS: solexaQuals = true; break; + case ARG_INTEGER_QUALS: integerQuals = true; break; + case ARG_PHRED64: phred64Quals = true; break; + case ARG_PHRED33: solexaQuals = false; phred64Quals = false; break; + case ARG_OVERHANG: gReportOverhangs = true; break; + case ARG_NO_CACHE: msNoCache = true; break; + case ARG_USE_CACHE: msNoCache = false; break; + case ARG_LOCAL_SEED_CACHE_SZ: + seedCacheLocalMB = (uint32_t)parseInt(1, "--local-seed-cache-sz arg must be at least 1", arg); + break; + case ARG_CURRENT_SEED_CACHE_SZ: + seedCacheCurrentMB = (uint32_t)parseInt(1, "--seed-cache-sz arg must be at least 1", arg); + break; + case ARG_REFIDX: noRefNames = true; break; + case ARG_FUZZY: fuzzy = true; break; + case ARG_FULLREF: fullRef = true; break; + case ARG_GAP_BAR: + gGapBarrier = parseInt(1, "--gbar must be no less than 1", arg); + break; + case ARG_SEED: + seed = parseInt(0, "--seed arg must be at least 0", arg); + break; + case ARG_NON_DETERMINISTIC: + arbitraryRandom = true; + break; + case 'u': + qUpto = (uint32_t)parseInt(1, "-u/--qupto arg must be at least 1", arg); + break; + case 'Q': + tokenize(arg, ",", qualities); + integerQuals = true; + break; + case ARG_QUALS1: + tokenize(arg, ",", qualities1); + integerQuals = true; + break; + case ARG_QUALS2: + tokenize(arg, ",", qualities2); + integerQuals = true; + break; + case ARG_CACHE_LIM: + cacheLimit = (uint32_t)parseInt(1, "--cachelim arg must be at least 1", arg); + break; + case ARG_CACHE_SZ: + cacheSize = (uint32_t)parseInt(1, "--cachesz arg must be at least 1", arg); + cacheSize *= (1024 * 1024); // convert from MB to B + break; + case ARG_WRAPPER: wrapper = arg; break; + case 'p': + nthreads = parseInt(1, "-p/--threads arg must be at least 1", arg); + break; + case ARG_FILEPAR: + fileParallel = true; + break; + case '3': gTrim3 = parseInt(0, "-3/--trim3 arg must be at least 0", arg); break; + case '5': gTrim5 = parseInt(0, "-5/--trim5 arg must be at least 0", arg); break; + case 'h': printUsage(cout); throw 0; break; + case ARG_USAGE: printUsage(cout); throw 0; break; + // + // NOTE that unlike in Bowtie 1, -M, -a and -k are mutually + // exclusive here. + // + case 'M': { + msample = true; + mhits = parse(arg); + if(saw_a || saw_k) { + cerr << "Warning: -M, -k and -a are mutually exclusive. " + << "-M will override" << endl; + khits = 1; + } + assert_eq(1, khits); + saw_M = true; + cerr << "Warning: -M is deprecated. Use -D and -R to adjust " << + "effort instead." << endl; + break; + } + case ARG_EXTEND_ITERS: { + maxIters = parse(arg); + break; + } + case ARG_NO_EXTEND: { + doExtend = false; + break; + } + case 'R': { polstr += ";ROUNDS="; polstr += arg; break; } + case 'D': { polstr += ";DPS="; polstr += arg; break; } + case ARG_DP_MATE_STREAK_THRESH: { + maxMateStreak = parse(arg); + break; + } + case ARG_DP_FAIL_STREAK_THRESH: { + maxDpStreak = parse(arg); + break; + } + case ARG_EE_FAIL_STREAK_THRESH: { + maxEeStreak = parse(arg); + break; + } + case ARG_UG_FAIL_STREAK_THRESH: { + maxUgStreak = parse(arg); + break; + } + case ARG_DP_FAIL_THRESH: { + maxDp = parse(arg); + break; + } + case ARG_UG_FAIL_THRESH: { + maxUg = parse(arg); + break; + } + case ARG_MAX_SEEDS: { + maxSeeds = parse(arg); + break; + } + case ARG_SEED_BOOST_THRESH: { + seedBoostThresh = parse(arg); + break; + } + case 'a': { + msample = false; + allHits = true; + mhits = 0; // disable -M + if(saw_M || saw_k) { + cerr << "Warning: -M, -k and -a are mutually exclusive. " + << "-a will override" << endl; + } + saw_a = true; + break; + } + case 'k': { + msample = false; + khits = (uint32_t)parseInt(1, "-k arg must be at least 1", arg); + mhits = 0; // disable -M + if(saw_M || saw_a) { + cerr << "Warning: -M, -k and -a are mutually exclusive. " + << "-k will override" << endl; + } + saw_k = true; + break; + } + case ARG_VERBOSE: gVerbose = 1; break; + case ARG_STARTVERBOSE: startVerbose = true; break; + case ARG_QUIET: gQuiet = true; break; + case ARG_SANITY: sanityCheck = true; break; + case 't': timing = true; break; + case ARG_METRIC_IVAL: { + metricsIval = parseInt(1, "--metrics arg must be at least 1", arg); + break; + } + case ARG_METRIC_FILE: metricsFile = arg; break; + case ARG_METRIC_STDERR: metricsStderr = true; break; + case ARG_METRIC_PER_READ: metricsPerRead = true; break; + case ARG_NO_FW: gNofw = true; break; + case ARG_NO_RC: gNorc = true; break; + case ARG_SAM_NO_QNAME_TRUNC: samTruncQname = false; break; + case ARG_SAM_OMIT_SEC_SEQ: samOmitSecSeqQual = true; break; + case ARG_SAM_NO_UNAL: samNoUnal = true; break; + case ARG_SAM_NOHEAD: samNoHead = true; break; + case ARG_SAM_NOSQ: samNoSQ = true; break; + case ARG_SAM_PRINT_YI: sam_print_yi = true; break; + case ARG_REORDER: reorder = true; break; + case ARG_MAPQ_EX: { + sam_print_zp = true; + sam_print_zu = true; + sam_print_xp = true; + sam_print_xss = true; + sam_print_yn = true; + break; + } + case ARG_SHOW_RAND_SEED: { + sam_print_zs = true; + break; + } + case ARG_SAMPLE: + sampleFrac = parse(arg); + break; + case ARG_CP_MIN: + cminlen = parse(arg); + break; + case ARG_CP_IVAL: + cpow2 = parse(arg); + break; + case ARG_TRI: + doTri = true; + break; + case ARG_READ_PASSTHRU: { + sam_print_xr = true; + break; + } + case ARG_READ_TIMES: { + sam_print_xt = true; + sam_print_xd = true; + sam_print_xu = true; + sam_print_yl = true; + sam_print_ye = true; + sam_print_yu = true; + sam_print_yr = true; + sam_print_zb = true; + sam_print_zr = true; + sam_print_zf = true; + sam_print_zm = true; + sam_print_zi = true; + break; + } + case ARG_SAM_RG: { + string argstr = arg; + if(argstr.substr(0, 3) == "ID:") { + rgid = "\t"; + rgid += argstr; + rgs_optflag = "RG:Z:" + argstr.substr(3); + } else { + rgs += '\t'; + rgs += argstr; + } + break; + } + case ARG_SAM_RGID: { + string argstr = arg; + rgid = "\t"; + rgid = "\tID:" + argstr; + rgs_optflag = "RG:Z:" + argstr; + break; + } + case ARG_PARTITION: partitionSz = parse(arg); break; + case ARG_DPAD: + maxhalf = parseInt(0, "--dpad must be no less than 0", arg); + break; + case ARG_ORIG: + if(arg == NULL || strlen(arg) == 0) { + cerr << "--orig arg must be followed by a string" << endl; + printUsage(cerr); + throw 1; + } + origString = arg; + break; + case ARG_LOCAL: localAlign = true; break; + case ARG_END_TO_END: localAlign = false; break; + case ARG_SSE8: enable8 = true; break; + case ARG_SSE8_NO: enable8 = false; break; + case ARG_UNGAPPED: doUngapped = true; break; + case ARG_UNGAPPED_NO: doUngapped = false; break; + // case ARG_NO_DOVETAIL: gDovetailMatesOK = false; break; + // case ARG_NO_CONTAIN: gContainMatesOK = false; break; + // case ARG_NO_OVERLAP: gOlapMatesOK = false; break; + // case ARG_DOVETAIL: gDovetailMatesOK = true; break; + // case ARG_CONTAIN: gContainMatesOK = true; break; + // case ARG_OVERLAP: gOlapMatesOK = true; break; + case ARG_QC_FILTER: qcFilter = true; break; + case ARG_NO_SCORE_PRIORITY: sortByScore = false; break; + case ARG_IGNORE_QUALS: ignoreQuals = true; break; + case ARG_MAPQ_V: mapqv = parse(arg); break; + case ARG_TIGHTEN: tighten = parse(arg); break; + case ARG_EXACT_UPFRONT: doExactUpFront = true; break; + case ARG_1MM_UPFRONT: do1mmUpFront = true; break; + case ARG_EXACT_UPFRONT_NO: doExactUpFront = false; break; + case ARG_1MM_UPFRONT_NO: do1mmUpFront = false; break; + case ARG_1MM_MINLEN: do1mmMinLen = parse(arg); break; + case ARG_NOISY_HPOLY: noisyHpolymer = true; break; + case 'x' : bt2indexs[0] = arg; break; + case ARG_PRESET_VERY_FAST_LOCAL: localAlign = true; + case ARG_PRESET_VERY_FAST: { + presetList.push_back("very-fast%LOCAL%"); break; + } + case ARG_PRESET_FAST_LOCAL: localAlign = true; + case ARG_PRESET_FAST: { + fast = true; + presetList.push_back("fast%LOCAL%"); break; + } + case ARG_PRESET_SENSITIVE_LOCAL: localAlign = true; + case ARG_PRESET_SENSITIVE: { + sensitive = true; + presetList.push_back("sensitive%LOCAL%"); break; + } + case ARG_PRESET_VERY_SENSITIVE_LOCAL: localAlign = true; + case ARG_PRESET_VERY_SENSITIVE: { + very_sensitive = true; + presetList.push_back("very-sensitive%LOCAL%"); break; + } + case 'P': { presetList.push_back(arg); break; } + case ARG_ALIGN_POLICY: { + if(strlen(arg) > 0) { + polstr += ";"; polstr += arg; + } + break; + } + case 'N': { polstr += ";SEED="; polstr += arg; break; } + case 'L': { + int64_t len = parse(arg); + if(len < 0) { + cerr << "Error: -L argument must be >= 0; was " << arg << endl; + throw 1; + } + if(len > 32) { + cerr << "Error: -L argument must be <= 32; was" << arg << endl; + throw 1; + } + polstr += ";SEEDLEN="; polstr += arg; break; + } + case 'O': + multiseedOff = parse(arg); + break; + case 'i': { + EList args; + tokenize(arg, ",", args); + if(args.size() > 3 || args.size() == 0) { + cerr << "Error: expected 3 or fewer comma-separated " + << "arguments to -i option, got " + << args.size() << endl; + throw 1; + } + // Interval-settings arguments + polstr += (";IVAL=" + args[0]); // Function type + if(args.size() > 1) { + polstr += ("," + args[1]); // Constant term + } + if(args.size() > 2) { + polstr += ("," + args[2]); // Coefficient + } + break; + } + case ARG_MULTISEED_IVAL: { + polstr += ";"; + // Split argument by comma + EList args; + tokenize(arg, ",", args); + if(args.size() > 5 || args.size() == 0) { + cerr << "Error: expected 5 or fewer comma-separated " + << "arguments to --multiseed option, got " + << args.size() << endl; + throw 1; + } + // Seed mm and length arguments + polstr += "SEED="; + polstr += (args[0]); // # mismatches + if(args.size() > 1) polstr += ("," + args[ 1]); // length + if(args.size() > 2) polstr += (";IVAL=" + args[2]); // Func type + if(args.size() > 3) polstr += ("," + args[ 3]); // Constant term + if(args.size() > 4) polstr += ("," + args[ 4]); // Coefficient + break; + } + case ARG_N_CEIL: { + // Split argument by comma + EList args; + tokenize(arg, ",", args); + if(args.size() > 3) { + cerr << "Error: expected 3 or fewer comma-separated " + << "arguments to --n-ceil option, got " + << args.size() << endl; + throw 1; + } + if(args.size() == 0) { + cerr << "Error: expected at least one argument to --n-ceil option" << endl; + throw 1; + } + polstr += ";NCEIL="; + if(args.size() == 3) { + polstr += (args[0] + "," + args[1] + "," + args[2]); + } else { + if(args.size() == 1) { + polstr += ("C," + args[0]); + } else { + polstr += (args[0] + "," + args[1]); + } + } + break; + } + case ARG_SCORE_MA: polstr += ";MA="; polstr += arg; break; + case ARG_SCORE_MMP: { + EList args; + tokenize(arg, ",", args); + if(args.size() > 2 || args.size() == 0) { + cerr << "Error: expected 1 or 2 comma-separated " + << "arguments to --mp option, got " << args.size() << endl; + throw 1; + } + if(args.size() >= 1) { + polstr += ";MMP=Q,"; + polstr += args[0]; + if(args.size() >= 2) { + polstr += ","; + polstr += args[1]; + } + } + break; + } + case ARG_SCORE_SCP: { + EList args; + tokenize(arg, ",", args); + if(args.size() > 2 || args.size() == 0) { + cerr << "Error: expected 1 or 2 comma-separated " + << "arguments to --sp option, got " << args.size() << endl; + throw 1; + } + if(args.size() >= 1) { + polstr += ";SCP=Q,"; + polstr += args[0]; + if(args.size() >= 2) { + polstr += ","; + polstr += args[1]; + } + } + break; + } + case ARG_NO_SOFTCLIP: { + ostringstream convert; + convert << std::numeric_limits::max(); + polstr += ";SCP=Q,"; + polstr += convert.str(); + polstr += ","; + polstr += convert.str(); + break; + } + case ARG_SCORE_NP: polstr += ";NP=C"; polstr += arg; break; + case ARG_SCORE_RDG: polstr += ";RDG="; polstr += arg; break; + case ARG_SCORE_RFG: polstr += ";RFG="; polstr += arg; break; + case ARG_SCORE_MIN: { + polstr += ";"; + EList args; + tokenize(arg, ",", args); + if(args.size() > 3 && args.size() == 0) { + cerr << "Error: expected 3 or fewer comma-separated " + << "arguments to --n-ceil option, got " + << args.size() << endl; + throw 1; + } + polstr += ("MIN=" + args[0]); + if(args.size() > 1) { + polstr += ("," + args[1]); + } + if(args.size() > 2) { + polstr += ("," + args[2]); + } + break; + } + case ARG_DESC: printArgDesc(cout); throw 0; + case 'S': outfile = arg; break; + case 'U': { + EList args; + tokenize(arg, ",", args); + for(size_t i = 0; i < args.size(); i++) { + queries.push_back(args[i]); + } + break; + } + case ARG_VERSION: showVersion = 1; break; + case ARG_NO_TEMPSPLICESITE: useTempSpliceSite = false; break; + case ARG_PEN_CANSPLICE: { + penCanSplice = parseInt(0, "--pen-cansplice arg must be at least 0", arg); + break; + } + case ARG_PEN_NONCANSPLICE: { + penNoncanSplice = parseInt(0, "--pen-noncansplice arg must be at least 0", arg); + break; + } + case ARG_PEN_CONFLICTSPLICE: { + penConflictSplice = parseInt(0, "--pen-conflictsplice arg must be at least 0", arg); + break; + } + case ARG_PEN_CANINTRONLEN: { + polstr += ";"; + EList args; + tokenize(arg, ",", args); + if(args.size() > 3 && args.size() == 0) { + cerr << "Error: expected 3 or fewer comma-separated " + << "arguments to --n-ceil option, got " + << args.size() << endl; + throw 1; + } + polstr += ("CANINTRONLEN=" + args[0]); + if(args.size() > 1) { + polstr += ("," + args[1]); + } + if(args.size() > 2) { + polstr += ("," + args[2]); + } + break; + } + case ARG_PEN_NONCANINTRONLEN: { + polstr += ";"; + EList args; + tokenize(arg, ",", args); + if(args.size() > 3 && args.size() == 0) { + cerr << "Error: expected 3 or fewer comma-separated " + << "arguments to --n-ceil option, got " + << args.size() << endl; + throw 1; + } + polstr += ("NONCANINTRONLEN=" + args[0]); + if(args.size() > 1) { + polstr += ("," + args[1]); + } + if(args.size() > 2) { + polstr += ("," + args[2]); + } + break; + } + case ARG_MIN_INTRONLEN: { + minIntronLen = parseInt(20, "--min-intronlen arg must be at least 20", arg); + break; + } + case ARG_MAX_INTRONLEN: { + maxIntronLen = parseInt(20, "--max-intronlen arg must be at least 20", arg); + break; + } + case ARG_KNOWN_SPLICESITE_INFILE: knownSpliceSiteInfile = arg; break; + case ARG_NOVEL_SPLICESITE_INFILE: novelSpliceSiteInfile = arg; break; + case ARG_NOVEL_SPLICESITE_OUTFILE: novelSpliceSiteOutfile = arg; break; + case ARG_SECONDARY: secondary = true; break; + case ARG_NO_SPLICED_ALIGNMENT: no_spliced_alignment = true; break; + case ARG_RNA_STRANDNESS: { + string strandness = arg; + if(strandness == "F") rna_strandness = RNA_STRANDNESS_F; + else if(strandness == "R") rna_strandness = RNA_STRANDNESS_R; + else if(strandness == "FR") rna_strandness = RNA_STRANDNESS_FR; + else if(strandness == "RF") rna_strandness = RNA_STRANDNESS_RF; + else { + cerr << "Error: should be one of F, R, FR, or RF " << endl; + throw 1; + } + break; + } + case ARG_SPLICESITE_DB_ONLY: { + splicesite_db_only = true; + break; + } + case ARG_NO_ANCHORSTOP: { + anchorStop = false; + break; + } + case ARG_TRANSCRIPTOME_MAPPING_ONLY: { + tranMapOnly = true; + break; + } + case ARG_TRANSCRIPTOME_ASSEMBLY: { + tranAssm = true; + break; + } + case ARG_TRANSCRIPTOME_ASSEMBLY_CUFFLINKS: { + tranAssm = true; + tranAssm_program = "cufflinks"; + break; + } + case ARG_AVOID_PSEUDOGENE: { + avoid_pseudogene = true; + break; + } +#ifdef USE_SRA + case ARG_SRA_ACC: { + tokenize(arg, ",", sra_accs); format = SRA_FASTA; + break; + } +#endif + case ARG_REMOVE_CHRNAME: { + rmChrName = true; + break; + } + case ARG_ADD_CHRNAME: { + addChrName = true; + break; + } + case ARG_MAX_ALTSTRIED: { + max_alts_tried = parseInt(8, "--max-altstried arg must be at least 8", arg); + break; + } + case ARG_HAPLOTYPE: { + use_haplotype = true; + break; + } + case ARG_CODIS: { + enable_codis = true; + break; + } + case ARG_NO_TEMPLATELEN_ADJUSTMENT: { + templateLenAdjustment = false; + break; + } + case ARG_SUMMARY_FILE: { + alignSumFile = arg; + break; + } + case ARG_NEW_SUMMARY: { + newAlignSummary = true; + break; + } + case ARG_DP: { + bowtie2_dp = parseInt(0, "--bowtie2-dp arg must be 0, 1, or 2", arg); + break; + } + case ARG_REPEAT: { + repeat = true; + break; + } + case ARG_NO_REPEAT_INDEX: { + use_repeat_index = false; + break; + } + case ARG_READ_LENGTHS: { + EList str_readLens; + tokenize(arg, ",", str_readLens); + for(size_t i = 0; i < str_readLens.size(); i++) { + int readLen = parseInt(0, "--read-lengths arg must be at least 0", str_readLens[i].c_str()); + readLens.push_back(readLen); + } + readLens.sort(); + break; + } + case ARG_BASE_CHANGE: { + // Split argument by comma + EList args; + tokenize(arg, ",", args); + if(args.size() != 2) { + cerr << "Error: expected 2 comma-separated " + << "arguments to --base-change option, got " << args.size() << endl; + throw 1; + } + base_change_entered = true; + usrInput_convertedFrom = toupper(args[0][0]); + usrInput_convertedTo = toupper(args[1][0]); + + string s = "ACGT"; + if ((s.find(usrInput_convertedFrom) == std::string::npos) || (s.find(usrInput_convertedTo) == std::string::npos)) { + cerr << "Please enter the nucleotide in 'ACGT' for --base-change option." << endl; + throw 1; + } + + if (usrInput_convertedFrom == usrInput_convertedTo) { + cerr << "Please enter two different base for --base-change option. If you wish to align normal reads without nucleotide conversion, please use hisat2." << endl; + throw 1; + } + + break; + } + case ARG_3N: { + threeN = true; + break; + } + case ARG_REPEAT_LIMIT: { + repeatLimit = parseInt(1, "--repeat-limit arg must be at least 1", arg);; + break; + } + case ARG_UNIQUE_ONLY: { + uniqueOutputOnly = true; + break; + } + case ARG_DIRECTIONAL: { + directional3NMapping = 1; + break; + } + case ARG_DIRECTIONAL_REVERSE: { + directional3NMapping = 2; + break; + } + default: + printUsage(cerr); + throw 1; + } +} + +/** + * Read command-line arguments + */ +static void parseOptions(int argc, const char **argv) { + int option_index = 0; + int next_option; + saw_M = false; + saw_a = false; + saw_k = false; + presetList.clear(); + if(startVerbose) { cerr << "Parsing options: "; logTime(cerr, true); } + while(true) { + next_option = getopt_long( + argc, const_cast(argv), + short_options, long_options, &option_index); + const char * arg = optarg; + if(next_option == EOF) { + if(extra_opts_cur < extra_opts.size()) { + next_option = extra_opts[extra_opts_cur].first; + arg = extra_opts[extra_opts_cur].second.c_str(); + extra_opts_cur++; + } else { + break; + } + } + parseOption(next_option, arg); + } + // Now parse all the presets. Might want to pick which presets version to + // use according to other parameters. + auto_ptr presets(new PresetsV0()); + // Apply default preset + if(!defaultPreset.empty()) { + polstr = applyPreset(defaultPreset, *presets.get()) + polstr; + } + // Apply specified presets + for(size_t i = 0; i < presetList.size(); i++) { + polstr += applyPreset(presetList[i], *presets.get()); + } + for(size_t i = 0; i < extra_opts.size(); i++) { + next_option = extra_opts[extra_opts_cur].first; + const char *arg = extra_opts[extra_opts_cur].second.c_str(); + parseOption(next_option, arg); + } + + if (showVersion) { + return; + } + + // Remove initial semicolons + while(!polstr.empty() && polstr[0] == ';') { + polstr = polstr.substr(1); + } + if(gVerbose) { + cerr << "Final policy string: '" << polstr.c_str() << "'" << endl; + } + + if (threeN && !base_change_entered) { + cerr << "--base-change must be set for HISAT-3N" << endl; + printUsage(cerr); + throw 1; + } + if (!threeN && base_change_entered) { + cerr << "Please do not use --base-change for HISAT2. To align nucleotide conversion reads, please use HISAT-3N" << endl; + printUsage(cerr); + throw 1; + } + + if (threeN) { + usrInput_convertedFromComplement = asc2dnacomp[usrInput_convertedFrom]; + usrInput_convertedToComplement = asc2dnacomp[usrInput_convertedTo]; + + getConversion(usrInput_convertedFrom, usrInput_convertedTo, hs3N_convertedFrom, hs3N_convertedTo); + hs3N_convertedFromComplement = asc2dnacomp[hs3N_convertedFrom]; + hs3N_convertedToComplement = asc2dnacomp[hs3N_convertedTo]; + + asc2dna_3N[0][hs3N_convertedFrom] = asc2dna[hs3N_convertedTo]; + asc2dna_3N[0][tolower(hs3N_convertedFrom)] = asc2dna[hs3N_convertedTo]; + asc2dna_3N[1][hs3N_convertedFromComplement] = asc2dna[hs3N_convertedToComplement]; + asc2dna_3N[1][tolower(hs3N_convertedFromComplement)] = asc2dna[hs3N_convertedToComplement]; + + threeN_indexTags[0] += hs3N_convertedFrom; + threeN_indexTags[0] += hs3N_convertedTo; + threeN_indexTags[1] += hs3N_convertedFromComplement; + threeN_indexTags[1] += hs3N_convertedToComplement; + + nMappingCycle = 4; + + if (hs3N_convertedFrom == hs3N_convertedToComplement || directional3NMapping == 1) { + mappingCycles[0] = true; + mappingCycles[1] = true; + } + else if (directional3NMapping == 2) { + mappingCycles[2] = true; + mappingCycles[3] = true; + } + else { + for (int i = 0; i < 4; i++){ + mappingCycles[i] = true; + } + } + } + else + { + nMappingCycle = 1; + mappingCycles[0] = true; + } + + size_t failStreakTmp = 0; + SeedAlignmentPolicy::parseString( + polstr, + localAlign, + noisyHpolymer, + ignoreQuals, + bonusMatchType, + bonusMatch, + penMmcType, + penMmcMax, + penMmcMin, + penScMax, + penScMin, + penNType, + penN, + penRdGapConst, + penRfGapConst, + penRdGapLinear, + penRfGapLinear, + scoreMin, + nCeil, + penNCatPair, + multiseedMms, + multiseedLen, + msIval, + failStreakTmp, + nSeedRounds, + &penCanIntronLen, + &penNoncanIntronLen); + if(failStreakTmp > 0) { + maxEeStreak = failStreakTmp; + maxUgStreak = failStreakTmp; + maxDpStreak = failStreakTmp; + } + if(saw_a || saw_k || true) { + msample = false; + mhits = 0; + } else { + assert_gt(mhits, 0); + msample = true; + } + + if(fast) { + use_repeat_index = false; + } else if(sensitive) { + if(bowtie2_dp == 0) { + bowtie2_dp = 1; + } + + if(khits < 10) { + khits = 10; + saw_k = true; + } + scoreMin.init(SIMPLE_FUNC_LINEAR, 0.0f, -0.5f); + } else if(very_sensitive) { + bowtie2_dp = 2; + if(khits < 30) { + khits = 30; + saw_k = true; + } + scoreMin.init(SIMPLE_FUNC_LINEAR, 0.0f, -1.0f); + } + + if(mates1.size() != mates2.size()) { + cerr << "Error: " << mates1.size() << " mate files/sequences were specified with -1, but " << mates2.size() << endl + << "mate files/sequences were specified with -2. The same number of mate files/" << endl + << "sequences must be specified with -1 and -2." << endl; + throw 1; + } + if(qualities.size() && format != FASTA) { + cerr << "Error: one or more quality files were specified with -Q but -f was not" << endl + << "enabled. -Q works only in combination with -f and -C." << endl; + throw 1; + } + if(qualities1.size() && format != FASTA) { + cerr << "Error: one or more quality files were specified with --Q1 but -f was not" << endl + << "enabled. --Q1 works only in combination with -f and -C." << endl; + throw 1; + } + if(qualities2.size() && format != FASTA) { + cerr << "Error: one or more quality files were specified with --Q2 but -f was not" << endl + << "enabled. --Q2 works only in combination with -f and -C." << endl; + throw 1; + } + if(qualities1.size() > 0 && mates1.size() != qualities1.size()) { + cerr << "Error: " << mates1.size() << " mate files/sequences were specified with -1, but " << qualities1.size() << endl + << "quality files were specified with --Q1. The same number of mate and quality" << endl + << "files must sequences must be specified with -1 and --Q1." << endl; + throw 1; + } + if(qualities2.size() > 0 && mates2.size() != qualities2.size()) { + cerr << "Error: " << mates2.size() << " mate files/sequences were specified with -2, but " << qualities2.size() << endl + << "quality files were specified with --Q2. The same number of mate and quality" << endl + << "files must sequences must be specified with -2 and --Q2." << endl; + throw 1; + } + if(!rgs.empty() && rgid.empty()) { + cerr << "Warning: --rg was specified without --rg-id also " + << "being specified. @RG line is not printed unless --rg-id " + << "is specified." << endl; + } + // Check for duplicate mate input files + if(format != CMDLINE) { + for(size_t i = 0; i < mates1.size(); i++) { + for(size_t j = 0; j < mates2.size(); j++) { + if(mates1[i] == mates2[j] && !gQuiet) { + cerr << "Warning: Same mate file \"" << mates1[i].c_str() << "\" appears as argument to both -1 and -2" << endl; + } + } + } + } + // If both -s and -u are used, we need to adjust qUpto accordingly + // since it uses rdid to know if we've reached the -u limit (and + // rdids are all shifted up by skipReads characters) + if(qUpto + skipReads > qUpto) { + qUpto += skipReads; + } + if(useShmem && useMm && !gQuiet) { + cerr << "Warning: --shmem overrides --mm..." << endl; + useMm = false; + } + if(gGapBarrier < 1) { + cerr << "Warning: --gbar was set less than 1 (=" << gGapBarrier + << "); setting to 1 instead" << endl; + gGapBarrier = 1; + } + if(multiseedMms >= multiseedLen) { + assert_gt(multiseedLen, 0); + cerr << "Warning: seed mismatches (" << multiseedMms + << ") is less than seed length (" << multiseedLen + << "); setting mismatches to " << (multiseedMms-1) + << " instead" << endl; + multiseedMms = multiseedLen-1; + } + sam_print_zm = sam_print_zm && bowtie2p5; +#ifndef NDEBUG + if(!gQuiet) { + cerr << "Warning: Running in debug mode. Please use debug mode only " + << "for diagnosing errors, and not for typical use of HISAT2." + << endl; + } +#endif +} + +static const char *argv0 = NULL; + +/// Create a PatternSourcePerThread for the current thread according +/// to the global params and return a pointer to it +static PatternSourcePerThreadFactory* +createPatsrcFactory(PairedPatternSource& _patsrc, int tid) { + PatternSourcePerThreadFactory *patsrcFact; + patsrcFact = new WrappedPatternSourcePerThreadFactory(_patsrc); + assert(patsrcFact != NULL); + return patsrcFact; +} + +#define PTHREAD_ATTRS (PTHREAD_CREATE_JOINABLE | PTHREAD_CREATE_DETACHED) + +typedef TIndexOffU index_t; +typedef uint16_t local_index_t; +static PairedPatternSource* multiseed_patsrc; +static HGFM* multiseed_gfm; +static RFM* multiseed_rgfm; +//static HGFM* multiseed_gfms[2]; +//static RFM* multiseed_rgfms[2]; +static Scoring* multiseed_sc; +static BitPairReference* multiseed_refs; +static BitPairReference* multiseed_rrefs; +//static BitPairReference* multiseed_refss[2]; +//static BitPairReference* multiseed_rrefss[2]; +static AlnSink* multiseed_msink; +static OutFileBuf* multiseed_metricsOfb; +static SpliceSiteDB* ssdb; +static ALTDB* altdb; +static RepeatDB* repeatdb; +static ALTDB* raltdb; + +static ALTDB *altdbs_3N[2]; +static RepeatDB *repeatdbs_3N[2]; +static ALTDB *raltdbs_3N[2]; +static TranscriptomePolicy* multiseed_tpol; +static GraphPolicy* gpol; + + +class reference3N { +public: + const HGFM* multiseed_gfm[2]; + const RFM* multiseed_rgfm[2]; + const BitPairReference* multiseed_rrefs[2]; + + reference3N() { + + } + + void load(EList* >& gfms_3N, + RFM* rgfms_3N[2], + BitPairReference* rrefss[2]) { + for (int i = 0; i < 2; i++) { + multiseed_gfm[i] = gfms_3N[i]; + multiseed_rgfm[i] = rgfms_3N[i]; + multiseed_rrefs[i] = rrefss[i]; + } + } +}; + +reference3N ref3N; + +/** + * Metrics for measuring the work done by the outer read alignment + * loop. + */ +struct OuterLoopMetrics { + + OuterLoopMetrics() { + reset(); + } + + /** + * Set all counters to 0. + */ + void reset() { + reads = bases = srreads = srbases = + freads = fbases = ureads = ubases = 0; + } + + /** + * Sum the counters in m in with the conters in this object. This + * is the only safe way to update an OuterLoopMetrics that's shared + * by multiple threads. + */ + void merge( + const OuterLoopMetrics& m, + bool getLock = false) + { + ThreadSafe ts(&mutex_m, getLock); + reads += m.reads; + bases += m.bases; + srreads += m.srreads; + srbases += m.srbases; + freads += m.freads; + fbases += m.fbases; + ureads += m.ureads; + ubases += m.ubases; + } + + uint64_t reads; // total reads + uint64_t bases; // total bases + uint64_t srreads; // same-read reads + uint64_t srbases; // same-read bases + uint64_t freads; // filtered reads + uint64_t fbases; // filtered bases + uint64_t ureads; // unfiltered reads + uint64_t ubases; // unfiltered bases + MUTEX_T mutex_m; +}; + +/** + * Collection of all relevant performance metrics when aligning in + * multiseed mode. + */ +struct PerfMetrics { + + PerfMetrics() : first(true) { reset(); } + + /** + * Set all counters to 0. + */ + void reset() { + olm.reset(); + sdm.reset(); + wlm.reset(); + swmSeed.reset(); + swmMate.reset(); + rpm.reset(); + dpSse8Seed.reset(); // 8-bit SSE seed extensions + dpSse8Mate.reset(); // 8-bit SSE mate finds + dpSse16Seed.reset(); // 16-bit SSE seed extensions + dpSse16Mate.reset(); // 16-bit SSE mate finds + nbtfiltst = 0; + nbtfiltsc = 0; + nbtfiltdo = 0; + + olmu.reset(); + sdmu.reset(); + wlmu.reset(); + swmuSeed.reset(); + swmuMate.reset(); + rpmu.reset(); + dpSse8uSeed.reset(); // 8-bit SSE seed extensions + dpSse8uMate.reset(); // 8-bit SSE mate finds + dpSse16uSeed.reset(); // 16-bit SSE seed extensions + dpSse16uMate.reset(); // 16-bit SSE mate finds + nbtfiltst_u = 0; + nbtfiltsc_u = 0; + nbtfiltdo_u = 0; + + him.reset(); + } + + /** + * Merge a set of specific metrics into this object. + */ + void merge( + const OuterLoopMetrics *ol, + const SeedSearchMetrics *sd, + const WalkMetrics *wl, + const SwMetrics *swSeed, + const SwMetrics *swMate, + const ReportingMetrics *rm, + const SSEMetrics *dpSse8Ex, + const SSEMetrics *dpSse8Ma, + const SSEMetrics *dpSse16Ex, + const SSEMetrics *dpSse16Ma, + uint64_t nbtfiltst_, + uint64_t nbtfiltsc_, + uint64_t nbtfiltdo_, + const HIMetrics *hi, + bool getLock) + { + ThreadSafe ts(&mutex_m, getLock); + if(ol != NULL) { + olmu.merge(*ol, false); + } + if(sd != NULL) { + sdmu.merge(*sd, false); + } + if(wl != NULL) { + wlmu.merge(*wl, false); + } + if(swSeed != NULL) { + swmuSeed.merge(*swSeed, false); + } + if(swMate != NULL) { + swmuMate.merge(*swMate, false); + } + if(rm != NULL) { + rpmu.merge(*rm, false); + } + if(dpSse8Ex != NULL) { + dpSse8uSeed.merge(*dpSse8Ex, false); + } + if(dpSse8Ma != NULL) { + dpSse8uMate.merge(*dpSse8Ma, false); + } + if(dpSse16Ex != NULL) { + dpSse16uSeed.merge(*dpSse16Ex, false); + } + if(dpSse16Ma != NULL) { + dpSse16uMate.merge(*dpSse16Ma, false); + } + nbtfiltst_u += nbtfiltst_; + nbtfiltsc_u += nbtfiltsc_; + nbtfiltdo_u += nbtfiltdo_; + if(hi != NULL) { + him.merge(*hi, false); + } + } + + /** + * Reports a matrix of results, incl. column labels, to an OutFileBuf. + * Optionally also sends results to stderr (unbuffered). Can optionally + * print a per-read record with the read name at the beginning. + */ + void reportInterval( + OutFileBuf* o, // file to send output to + bool metricsStderr, // additionally output to stderr? + bool total, // true -> report total, otherwise incremental + bool sync, // synchronize output + const BTString *name) // non-NULL name pointer if is per-read record + { + ThreadSafe ts(&mutex_m, sync); + ostringstream stderrSs; + time_t curtime = time(0); + char buf[1024]; + if(first) { + const char *str = + /* 1 */ "Time" "\t" + /* 2 */ "Read" "\t" + /* 3 */ "Base" "\t" + /* 4 */ "SameRead" "\t" + /* 5 */ "SameReadBase" "\t" + /* 6 */ "UnfilteredRead" "\t" + /* 7 */ "UnfilteredBase" "\t" + + /* 8 */ "Paired" "\t" + /* 9 */ "Unpaired" "\t" + /* 10 */ "AlConUni" "\t" + /* 11 */ "AlConRep" "\t" + /* 12 */ "AlConFail" "\t" + /* 13 */ "AlDis" "\t" + /* 14 */ "AlConFailUni" "\t" + /* 15 */ "AlConFailRep" "\t" + /* 16 */ "AlConFailFail" "\t" + /* 17 */ "AlConRepUni" "\t" + /* 18 */ "AlConRepRep" "\t" + /* 19 */ "AlConRepFail" "\t" + /* 20 */ "AlUnpUni" "\t" + /* 21 */ "AlUnpRep" "\t" + /* 22 */ "AlUnpFail" "\t" + + /* 23 */ "SeedSearch" "\t" + /* 24 */ "IntraSCacheHit" "\t" + /* 25 */ "InterSCacheHit" "\t" + /* 26 */ "OutOfMemory" "\t" + /* 27 */ "AlBWOp" "\t" + /* 28 */ "AlBWBranch" "\t" + /* 29 */ "ResBWOp" "\t" + /* 30 */ "ResBWBranch" "\t" + /* 31 */ "ResResolve" "\t" + /* 34 */ "ResReport" "\t" + /* 35 */ "RedundantSHit" "\t" + + /* 36 */ "BestMinEdit0" "\t" + /* 37 */ "BestMinEdit1" "\t" + /* 38 */ "BestMinEdit2" "\t" + + /* 39 */ "ExactAttempts" "\t" + /* 40 */ "ExactSucc" "\t" + /* 41 */ "ExactRanges" "\t" + /* 42 */ "ExactRows" "\t" + /* 43 */ "ExactOOMs" "\t" + + /* 44 */ "1mmAttempts" "\t" + /* 45 */ "1mmSucc" "\t" + /* 46 */ "1mmRanges" "\t" + /* 47 */ "1mmRows" "\t" + /* 48 */ "1mmOOMs" "\t" + + /* 49 */ "UngappedSucc" "\t" + /* 50 */ "UngappedFail" "\t" + /* 51 */ "UngappedNoDec" "\t" + + /* 52 */ "DPExLt10Gaps" "\t" + /* 53 */ "DPExLt5Gaps" "\t" + /* 54 */ "DPExLt3Gaps" "\t" + + /* 55 */ "DPMateLt10Gaps" "\t" + /* 56 */ "DPMateLt5Gaps" "\t" + /* 57 */ "DPMateLt3Gaps" "\t" + + /* 58 */ "DP16ExDps" "\t" + /* 59 */ "DP16ExDpSat" "\t" + /* 60 */ "DP16ExDpFail" "\t" + /* 61 */ "DP16ExDpSucc" "\t" + /* 62 */ "DP16ExCol" "\t" + /* 63 */ "DP16ExCell" "\t" + /* 64 */ "DP16ExInner" "\t" + /* 65 */ "DP16ExFixup" "\t" + /* 66 */ "DP16ExGathSol" "\t" + /* 67 */ "DP16ExBt" "\t" + /* 68 */ "DP16ExBtFail" "\t" + /* 69 */ "DP16ExBtSucc" "\t" + /* 70 */ "DP16ExBtCell" "\t" + /* 71 */ "DP16ExCoreRej" "\t" + /* 72 */ "DP16ExNRej" "\t" + + /* 73 */ "DP8ExDps" "\t" + /* 74 */ "DP8ExDpSat" "\t" + /* 75 */ "DP8ExDpFail" "\t" + /* 76 */ "DP8ExDpSucc" "\t" + /* 77 */ "DP8ExCol" "\t" + /* 78 */ "DP8ExCell" "\t" + /* 79 */ "DP8ExInner" "\t" + /* 80 */ "DP8ExFixup" "\t" + /* 81 */ "DP8ExGathSol" "\t" + /* 82 */ "DP8ExBt" "\t" + /* 83 */ "DP8ExBtFail" "\t" + /* 84 */ "DP8ExBtSucc" "\t" + /* 85 */ "DP8ExBtCell" "\t" + /* 86 */ "DP8ExCoreRej" "\t" + /* 87 */ "DP8ExNRej" "\t" + + /* 88 */ "DP16MateDps" "\t" + /* 89 */ "DP16MateDpSat" "\t" + /* 90 */ "DP16MateDpFail" "\t" + /* 91 */ "DP16MateDpSucc" "\t" + /* 92 */ "DP16MateCol" "\t" + /* 93 */ "DP16MateCell" "\t" + /* 94 */ "DP16MateInner" "\t" + /* 95 */ "DP16MateFixup" "\t" + /* 96 */ "DP16MateGathSol" "\t" + /* 97 */ "DP16MateBt" "\t" + /* 98 */ "DP16MateBtFail" "\t" + /* 99 */ "DP16MateBtSucc" "\t" + /* 100 */ "DP16MateBtCell" "\t" + /* 101 */ "DP16MateCoreRej" "\t" + /* 102 */ "DP16MateNRej" "\t" + + /* 103 */ "DP8MateDps" "\t" + /* 104 */ "DP8MateDpSat" "\t" + /* 105 */ "DP8MateDpFail" "\t" + /* 106 */ "DP8MateDpSucc" "\t" + /* 107 */ "DP8MateCol" "\t" + /* 108 */ "DP8MateCell" "\t" + /* 109 */ "DP8MateInner" "\t" + /* 110 */ "DP8MateFixup" "\t" + /* 111 */ "DP8MateGathSol" "\t" + /* 112 */ "DP8MateBt" "\t" + /* 113 */ "DP8MateBtFail" "\t" + /* 114 */ "DP8MateBtSucc" "\t" + /* 115 */ "DP8MateBtCell" "\t" + /* 116 */ "DP8MateCoreRej" "\t" + /* 117 */ "DP8MateNRej" "\t" + + /* 118 */ "DPBtFiltStart" "\t" + /* 119 */ "DPBtFiltScore" "\t" + /* 120 */ "DpBtFiltDom" "\t" + + /* 121 */ "MemPeak" "\t" + /* 122 */ "UncatMemPeak" "\t" // 0 + /* 123 */ "EbwtMemPeak" "\t" // EBWT_CAT + /* 124 */ "CacheMemPeak" "\t" // CA_CAT + /* 125 */ "ResolveMemPeak" "\t" // GW_CAT + /* 126 */ "AlignMemPeak" "\t" // AL_CAT + /* 127 */ "DPMemPeak" "\t" // DP_CAT + /* 128 */ "MiscMemPeak" "\t" // MISC_CAT + /* 129 */ "DebugMemPeak" "\t" // DEBUG_CAT + + /* 130 */ "LocalSearch" "\t" + /* 131 */ "AnchorSearch" "\t" + /* 132 */ "LocalIndexSearch" "\t" + /* 133 */ "LocalExtSearch" "\t" + /* 134 */ "LocalSearchRecur" "\t" + /* 135 */ "GlobalGenomeCoords" "\t" + /* 136 */ "LocalGenomeCoords" "\t" + + + "\n"; + + if(name != NULL) { + if(o != NULL) o->writeChars("Name\t"); + if(metricsStderr) stderrSs << "Name\t"; + } + + if(o != NULL) o->writeChars(str); + if(metricsStderr) stderrSs << str; + first = false; + } + + if(total) mergeIncrementals(); + + // 0. Read name, if needed + if(name != NULL) { + if(o != NULL) { + o->writeChars(name->toZBuf()); + o->write('\t'); + } + if(metricsStderr) { + stderrSs << (*name) << '\t'; + } + } + + // 1. Current time in secs + itoa10(curtime, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + const OuterLoopMetrics& ol = total ? olm : olmu; + + // 2. Reads + itoa10(ol.reads, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 3. Bases + itoa10(ol.bases, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 4. Same-read reads + itoa10(ol.srreads, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 5. Same-read bases + itoa10(ol.srbases, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 6. Unfiltered reads + itoa10(ol.ureads, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 7. Unfiltered bases + itoa10(ol.ubases, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + const ReportingMetrics& rp = total ? rpm : rpmu; + + // 8. Paired reads + itoa10(rp.npaired, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 9. Unpaired reads + itoa10(rp.nunpaired, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 10. Pairs with unique concordant alignments + itoa10(rp.nconcord_uni, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 11. Pairs with repetitive concordant alignments + itoa10(rp.nconcord_rep, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 12. Pairs with 0 concordant alignments + itoa10(rp.nconcord_0, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 13. Pairs with 1 discordant alignment + itoa10(rp.ndiscord, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 14. Mates from unaligned pairs that align uniquely + itoa10(rp.nunp_0_uni, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 15. Mates from unaligned pairs that align repetitively + itoa10(rp.nunp_0_rep, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 16. Mates from unaligned pairs that fail to align + itoa10(rp.nunp_0_0, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 17. Mates from repetitive pairs that align uniquely + itoa10(rp.nunp_rep_uni, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 18. Mates from repetitive pairs that align repetitively + itoa10(rp.nunp_rep_rep, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 19. Mates from repetitive pairs that fail to align + itoa10(rp.nunp_rep_0, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 20. Unpaired reads that align uniquely + itoa10(rp.nunp_uni, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 21. Unpaired reads that align repetitively + itoa10(rp.nunp_rep, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 22. Unpaired reads that fail to align + itoa10(rp.nunp_0, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + const SeedSearchMetrics& sd = total ? sdm : sdmu; + + // 23. Seed searches + itoa10(sd.seedsearch, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 24. Hits in 'current' cache + itoa10(sd.intrahit, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 25. Hits in 'local' cache + itoa10(sd.interhit, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 26. Out of memory + itoa10(sd.ooms, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 27. Burrows-Wheeler ops in aligner + itoa10(sd.bwops, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 28. Burrows-Wheeler branches (edits) in aligner + itoa10(sd.bweds, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + const WalkMetrics& wl = total ? wlm : wlmu; + + // 29. Burrows-Wheeler ops in resolver + itoa10(wl.bwops, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 30. Burrows-Wheeler branches in resolver + itoa10(wl.branches, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 31. Burrows-Wheeler offset resolutions + itoa10(wl.resolves, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 34. Offset reports + itoa10(wl.reports, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + // 35. Redundant seed hit + itoa10(total ? swmSeed.rshit : swmuSeed.rshit, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + // 36. # times the best (out of fw/rc) minimum # edits was 0 + itoa10(total ? sdm.bestmin0 : sdmu.bestmin0, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 37. # times the best (out of fw/rc) minimum # edits was 1 + itoa10(total ? sdm.bestmin1 : sdmu.bestmin1, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 38. # times the best (out of fw/rc) minimum # edits was 2 + itoa10(total ? sdm.bestmin2 : sdmu.bestmin2, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + // 39. Exact aligner attempts + itoa10(total ? swmSeed.exatts : swmuSeed.exatts, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 40. Exact aligner successes + itoa10(total ? swmSeed.exsucc : swmuSeed.exsucc, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 41. Exact aligner ranges + itoa10(total ? swmSeed.exranges : swmuSeed.exranges, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 42. Exact aligner rows + itoa10(total ? swmSeed.exrows : swmuSeed.exrows, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 43. Exact aligner OOMs + itoa10(total ? swmSeed.exooms : swmuSeed.exooms, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + // 44. 1mm aligner attempts + itoa10(total ? swmSeed.mm1atts : swmuSeed.mm1atts, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 45. 1mm aligner successes + itoa10(total ? swmSeed.mm1succ : swmuSeed.mm1succ, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 46. 1mm aligner ranges + itoa10(total ? swmSeed.mm1ranges : swmuSeed.mm1ranges, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 47. 1mm aligner rows + itoa10(total ? swmSeed.mm1rows : swmuSeed.mm1rows, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 48. 1mm aligner OOMs + itoa10(total ? swmSeed.mm1ooms : swmuSeed.mm1ooms, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + // 49 Ungapped aligner success + itoa10(total ? swmSeed.ungapsucc : swmuSeed.ungapsucc, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 50. Ungapped aligner fail + itoa10(total ? swmSeed.ungapfail : swmuSeed.ungapfail, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 51. Ungapped aligner no decision + itoa10(total ? swmSeed.ungapnodec : swmuSeed.ungapnodec, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + // 52. # seed-extend DPs with < 10 gaps + itoa10(total ? swmSeed.sws10 : swmuSeed.sws10, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 53. # seed-extend DPs with < 5 gaps + itoa10(total ? swmSeed.sws5 : swmuSeed.sws5, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 54. # seed-extend DPs with < 3 gaps + itoa10(total ? swmSeed.sws3 : swmuSeed.sws3, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + // 55. # seed-extend DPs with < 10 gaps + itoa10(total ? swmMate.sws10 : swmuMate.sws10, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 56. # seed-extend DPs with < 5 gaps + itoa10(total ? swmMate.sws5 : swmuMate.sws5, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 57. # seed-extend DPs with < 3 gaps + itoa10(total ? swmMate.sws3 : swmuMate.sws3, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + const SSEMetrics& dpSse16s = total ? dpSse16Seed : dpSse16uSeed; + + // 58. 16-bit SSE seed-extend DPs tried + itoa10(dpSse16s.dp, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 59. 16-bit SSE seed-extend DPs saturated + itoa10(dpSse16s.dpsat, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 60. 16-bit SSE seed-extend DPs failed + itoa10(dpSse16s.dpfail, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 61. 16-bit SSE seed-extend DPs succeeded + itoa10(dpSse16s.dpsucc, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 62. 16-bit SSE seed-extend DP columns completed + itoa10(dpSse16s.col, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 63. 16-bit SSE seed-extend DP cells completed + itoa10(dpSse16s.cell, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 64. 16-bit SSE seed-extend DP inner loop iters completed + itoa10(dpSse16s.inner, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 65. 16-bit SSE seed-extend DP fixup loop iters completed + itoa10(dpSse16s.fixup, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 66. 16-bit SSE seed-extend DP gather, cells with potential solutions + itoa10(dpSse16s.gathsol, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 67. 16-bit SSE seed-extend DP backtrace attempts + itoa10(dpSse16s.bt, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 68. 16-bit SSE seed-extend DP failed backtrace attempts + itoa10(dpSse16s.btfail, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 69. 16-bit SSE seed-extend DP succesful backtrace attempts + itoa10(dpSse16s.btsucc, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 70. 16-bit SSE seed-extend DP backtrace cells + itoa10(dpSse16s.btcell, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 71. 16-bit SSE seed-extend DP core-diag rejections + itoa10(dpSse16s.corerej, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 72. 16-bit SSE seed-extend DP N rejections + itoa10(dpSse16s.nrej, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + const SSEMetrics& dpSse8s = total ? dpSse8Seed : dpSse8uSeed; + + // 73. 8-bit SSE seed-extend DPs tried + itoa10(dpSse8s.dp, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 74. 8-bit SSE seed-extend DPs saturated + itoa10(dpSse8s.dpsat, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 75. 8-bit SSE seed-extend DPs failed + itoa10(dpSse8s.dpfail, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 76. 8-bit SSE seed-extend DPs succeeded + itoa10(dpSse8s.dpsucc, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 77. 8-bit SSE seed-extend DP columns completed + itoa10(dpSse8s.col, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 78. 8-bit SSE seed-extend DP cells completed + itoa10(dpSse8s.cell, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 79. 8-bit SSE seed-extend DP inner loop iters completed + itoa10(dpSse8s.inner, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 80. 8-bit SSE seed-extend DP fixup loop iters completed + itoa10(dpSse8s.fixup, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 81. 16-bit SSE seed-extend DP gather, cells with potential solutions + itoa10(dpSse8s.gathsol, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 82. 16-bit SSE seed-extend DP backtrace attempts + itoa10(dpSse8s.bt, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 83. 16-bit SSE seed-extend DP failed backtrace attempts + itoa10(dpSse8s.btfail, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 84. 16-bit SSE seed-extend DP succesful backtrace attempts + itoa10(dpSse8s.btsucc, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 85. 16-bit SSE seed-extend DP backtrace cells + itoa10(dpSse8s.btcell, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 86. 16-bit SSE seed-extend DP core-diag rejections + itoa10(dpSse8s.corerej, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 87. 16-bit SSE seed-extend DP N rejections + itoa10(dpSse8s.nrej, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + const SSEMetrics& dpSse16m = total ? dpSse16Mate : dpSse16uMate; + + // 88. 16-bit SSE mate-finding DPs tried + itoa10(dpSse16m.dp, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 89. 16-bit SSE mate-finding DPs saturated + itoa10(dpSse16m.dpsat, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 90. 16-bit SSE mate-finding DPs failed + itoa10(dpSse16m.dpfail, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 91. 16-bit SSE mate-finding DPs succeeded + itoa10(dpSse16m.dpsucc, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 92. 16-bit SSE mate-finding DP columns completed + itoa10(dpSse16m.col, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 93. 16-bit SSE mate-finding DP cells completed + itoa10(dpSse16m.cell, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 94. 16-bit SSE mate-finding DP inner loop iters completed + itoa10(dpSse16m.inner, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 95. 16-bit SSE mate-finding DP fixup loop iters completed + itoa10(dpSse16m.fixup, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 96. 16-bit SSE mate-finding DP gather, cells with potential solutions + itoa10(dpSse16m.gathsol, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 97. 16-bit SSE mate-finding DP backtrace attempts + itoa10(dpSse16m.bt, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 98. 16-bit SSE mate-finding DP failed backtrace attempts + itoa10(dpSse16m.btfail, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 99. 16-bit SSE mate-finding DP succesful backtrace attempts + itoa10(dpSse16m.btsucc, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 100. 16-bit SSE mate-finding DP backtrace cells + itoa10(dpSse16m.btcell, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 101. 16-bit SSE mate-finding DP core-diag rejections + itoa10(dpSse16m.corerej, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 102. 16-bit SSE mate-finding DP N rejections + itoa10(dpSse16m.nrej, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + const SSEMetrics& dpSse8m = total ? dpSse8Mate : dpSse8uMate; + + // 103. 8-bit SSE mate-finding DPs tried + itoa10(dpSse8m.dp, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 104. 8-bit SSE mate-finding DPs saturated + itoa10(dpSse8m.dpsat, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 105. 8-bit SSE mate-finding DPs failed + itoa10(dpSse8m.dpfail, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 106. 8-bit SSE mate-finding DPs succeeded + itoa10(dpSse8m.dpsucc, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 107. 8-bit SSE mate-finding DP columns completed + itoa10(dpSse8m.col, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 108. 8-bit SSE mate-finding DP cells completed + itoa10(dpSse8m.cell, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 109. 8-bit SSE mate-finding DP inner loop iters completed + itoa10(dpSse8m.inner, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 110. 8-bit SSE mate-finding DP fixup loop iters completed + itoa10(dpSse8m.fixup, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 111. 16-bit SSE mate-finding DP gather, cells with potential solutions + itoa10(dpSse8m.gathsol, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 112. 16-bit SSE mate-finding DP backtrace attempts + itoa10(dpSse8m.bt, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 113. 16-bit SSE mate-finding DP failed backtrace attempts + itoa10(dpSse8m.btfail, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 114. 16-bit SSE mate-finding DP succesful backtrace attempts + itoa10(dpSse8m.btsucc, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 115. 16-bit SSE mate-finding DP backtrace cells + itoa10(dpSse8m.btcell, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 116. 16-bit SSE mate-finding DP core rejections + itoa10(dpSse8m.corerej, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 117. 16-bit SSE mate-finding N rejections + itoa10(dpSse8m.nrej, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + // 118. Backtrace candidates filtered due to starting cell + itoa10(total ? nbtfiltst : nbtfiltst_u, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 119. Backtrace candidates filtered due to low score + itoa10(total ? nbtfiltsc : nbtfiltsc_u, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 120. Backtrace candidates filtered due to domination + itoa10(total ? nbtfiltdo : nbtfiltdo_u, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + // 121. Overall memory peak + itoa10(gMemTally.peak() >> 20, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 122. Uncategorized memory peak + itoa10(gMemTally.peak(0) >> 20, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 123. Ebwt memory peak + itoa10(gMemTally.peak(EBWT_CAT) >> 20, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 124. Cache memory peak + itoa10(gMemTally.peak(CA_CAT) >> 20, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 125. Resolver memory peak + itoa10(gMemTally.peak(GW_CAT) >> 20, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 126. Seed aligner memory peak + itoa10(gMemTally.peak(AL_CAT) >> 20, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 127. Dynamic programming aligner memory peak + itoa10(gMemTally.peak(DP_CAT) >> 20, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 128. Miscellaneous memory peak + itoa10(gMemTally.peak(MISC_CAT) >> 20, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 129. Debug memory peak + itoa10(gMemTally.peak(DEBUG_CAT) >> 20, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + // 130 + itoa10(him.localatts, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 131 + itoa10(him.anchoratts, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 132 + itoa10(him.localindexatts, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 133 + itoa10(him.localextatts, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 134 + itoa10(him.localsearchrecur, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 135 + itoa10(him.globalgenomecoords, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 136 + itoa10(him.localgenomecoords, buf); + if(metricsStderr) stderrSs << buf; + if(o != NULL) { o->writeChars(buf); } + + if(o != NULL) { o->write('\n'); } + if(metricsStderr) cerr << stderrSs.str().c_str() << endl; + if(!total) mergeIncrementals(); + } + + void mergeIncrementals() { + olm.merge(olmu, false); + sdm.merge(sdmu, false); + wlm.merge(wlmu, false); + swmSeed.merge(swmuSeed, false); + swmMate.merge(swmuMate, false); + dpSse8Seed.merge(dpSse8uSeed, false); + dpSse8Mate.merge(dpSse8uMate, false); + dpSse16Seed.merge(dpSse16uSeed, false); + dpSse16Mate.merge(dpSse16uMate, false); + nbtfiltst_u += nbtfiltst; + nbtfiltsc_u += nbtfiltsc; + nbtfiltdo_u += nbtfiltdo; + + olmu.reset(); + sdmu.reset(); + wlmu.reset(); + swmuSeed.reset(); + swmuMate.reset(); + rpmu.reset(); + dpSse8uSeed.reset(); + dpSse8uMate.reset(); + dpSse16uSeed.reset(); + dpSse16uMate.reset(); + nbtfiltst_u = 0; + nbtfiltsc_u = 0; + nbtfiltdo_u = 0; + } + + // Total over the whole job + OuterLoopMetrics olm; // overall metrics + SeedSearchMetrics sdm; // metrics related to seed alignment + WalkMetrics wlm; // metrics related to walking left (i.e. resolving reference offsets) + SwMetrics swmSeed; // metrics related to DP seed-extend alignment + SwMetrics swmMate; // metrics related to DP mate-finding alignment + ReportingMetrics rpm; // metrics related to reporting + SSEMetrics dpSse8Seed; // 8-bit SSE seed extensions + SSEMetrics dpSse8Mate; // 8-bit SSE mate finds + SSEMetrics dpSse16Seed; // 16-bit SSE seed extensions + SSEMetrics dpSse16Mate; // 16-bit SSE mate finds + uint64_t nbtfiltst; + uint64_t nbtfiltsc; + uint64_t nbtfiltdo; + + // Just since the last update + OuterLoopMetrics olmu; // overall metrics + SeedSearchMetrics sdmu; // metrics related to seed alignment + WalkMetrics wlmu; // metrics related to walking left (i.e. resolving reference offsets) + SwMetrics swmuSeed; // metrics related to DP seed-extend alignment + SwMetrics swmuMate; // metrics related to DP mate-finding alignment + ReportingMetrics rpmu; // metrics related to reporting + SSEMetrics dpSse8uSeed; // 8-bit SSE seed extensions + SSEMetrics dpSse8uMate; // 8-bit SSE mate finds + SSEMetrics dpSse16uSeed; // 16-bit SSE seed extensions + SSEMetrics dpSse16uMate; // 16-bit SSE mate finds + uint64_t nbtfiltst_u; + uint64_t nbtfiltsc_u; + uint64_t nbtfiltdo_u; + + // + HIMetrics him; + + MUTEX_T mutex_m; // lock for when one ob + bool first; // yet to print first line? + time_t lastElapsed; // used in reportInterval to measure time since last call +}; + +static PerfMetrics metrics; + +// Cyclic rotations +#define ROTL(n, x) (((x) << (n)) | ((x) >> (32-n))) +#define ROTR(n, x) (((x) >> (n)) | ((x) << (32-n))) + +static inline void printMmsSkipMsg( + const PatternSourcePerThread& ps, + bool paired, + bool mate1, + int seedmms) +{ + ostringstream os; + if(paired) { + os << "Warning: skipping mate #" << (mate1 ? '1' : '2') + << " of read '" << (mate1 ? ps.bufa().name : ps.bufb().name) + << "' because length (" << (mate1 ? ps.bufa().patFw.length() : ps.bufb().patFw.length()) + << ") <= # seed mismatches (" << seedmms << ")" << endl; + } else { + os << "Warning: skipping read '" << (mate1 ? ps.bufa().name : ps.bufb().name) + << "' because length (" << (mate1 ? ps.bufa().patFw.length() : ps.bufb().patFw.length()) + << ") <= # seed mismatches (" << seedmms << ")" << endl; + } + cerr << os.str().c_str(); +} + +static inline void printLenSkipMsg( + const PatternSourcePerThread& ps, + bool paired, + bool mate1) +{ + ostringstream os; + if(paired) { + os << "Warning: skipping mate #" << (mate1 ? '1' : '2') + << " of read '" << (mate1 ? ps.bufa().name : ps.bufb().name) + << "' because it was < 2 characters long" << endl; + } else { + os << "Warning: skipping read '" << (mate1 ? ps.bufa().name : ps.bufb().name) + << "' because it was < 2 characters long" << endl; + } + cerr << os.str().c_str(); +} + +static inline void printLocalScoreMsg( + const PatternSourcePerThread& ps, + bool paired, + bool mate1) +{ + ostringstream os; + if(paired) { + os << "Warning: minimum score function gave negative number in " + << "--local mode for mate #" << (mate1 ? '1' : '2') + << " of read '" << (mate1 ? ps.bufa().name : ps.bufb().name) + << "; setting to 0 instead" << endl; + } else { + os << "Warning: minimum score function gave negative number in " + << "--local mode for read '" << (mate1 ? ps.bufa().name : ps.bufb().name) + << "; setting to 0 instead" << endl; + } + cerr << os.str().c_str(); +} + +static inline void printEEScoreMsg( + const PatternSourcePerThread& ps, + bool paired, + bool mate1) +{ + ostringstream os; + if(paired) { + os << "Warning: minimum score function gave positive number in " + << "--end-to-end mode for mate #" << (mate1 ? '1' : '2') + << " of read '" << (mate1 ? ps.bufa().name : ps.bufb().name) + << "; setting to 0 instead" << endl; + } else { + os << "Warning: minimum score function gave positive number in " + << "--end-to-end mode for read '" << (mate1 ? ps.bufa().name : ps.bufb().name) + << "; setting to 0 instead" << endl; + } + cerr << os.str().c_str(); +} + + +#define MERGE_METRICS(met, sync) { \ + msink.mergeMetrics(rpm); \ + met.merge( \ + &olm, \ + &sdm, \ + &wlm, \ + &swmSeed, \ + &swmMate, \ + &rpm, \ + &sseU8ExtendMet, \ + &sseU8MateMet, \ + &sseI16ExtendMet, \ + &sseI16MateMet, \ + nbtfiltst, \ + nbtfiltsc, \ + nbtfiltdo, \ + &him, \ + sync); \ + olm.reset(); \ + sdm.reset(); \ + wlm.reset(); \ + swmSeed.reset(); \ + swmMate.reset(); \ + rpm.reset(); \ + sseU8ExtendMet.reset(); \ + sseU8MateMet.reset(); \ + sseI16ExtendMet.reset(); \ + sseI16MateMet.reset(); \ + him.reset(); \ +} + +#define MERGE_SW(x) { \ + x.merge( \ + sseU8ExtendMet, \ + sseU8MateMet, \ + sseI16ExtendMet, \ + sseI16MateMet, \ + nbtfiltst, \ + nbtfiltsc, \ + nbtfiltdo); \ + x.resetCounters(); \ +} + + + + + +/** + * Called once per thread. Sets up per-thread pointers to the shared global + * data structures, creates per-thread structures, then enters the alignment + * loop. The general flow of the alignment loop is: + * + * - If it's been a while and we're the master thread, report some alignment + * metrics + * - Get the next read/pair + * - Check if this read/pair is identical to the previous + * + If identical, check whether we can skip any or all alignment stages. If + * we can skip all stages, report the result immediately and move to next + * read/pair + * + If not identical, continue + * - + */ +static void multiseedSearchWorker_hisat2(void *vp) { + int tid = *((int*)vp); + + if (threeN) { + assert(ref3N.multiseed_gfm[0] != NULL); + assert(ref3N.multiseed_gfm[1] != NULL); + } else { + assert(multiseed_gfm != NULL); + } + + assert(multiseedMms == 0); + + // for regular Hisat2 + PairedPatternSource& patsrc = *multiseed_patsrc; + const HGFM& gfm = *multiseed_gfm; + const RFM* rgfm = multiseed_rgfm; + const Scoring& sc = *multiseed_sc; + const BitPairReference& ref = *multiseed_refs; + const BitPairReference* rref = multiseed_rrefs; + AlnSink& msink = *multiseed_msink; + OutFileBuf* metricsOfb = multiseed_metricsOfb; + + // for Hisat-3N + const HGFM* gfm_3N[2]; + const RFM* rgfm_3N[2]; + const BitPairReference* rref_3N[2]; + + for (int i = 0; i < 2; i++) { + gfm_3N[i] = ref3N.multiseed_gfm[i]; + rgfm_3N[i] = ref3N.multiseed_rgfm[i]; + rref_3N[i] = ref3N.multiseed_rrefs[i]; + } + + // Sinks: these are so that we can print tables encoding counts for + // events of interest on a per-read, per-seed, per-join, or per-SW + // level. These in turn can be used to diagnose performance + // problems, or generally characterize performance. + + //const BitPairReference& refs = *multiseed_refs; + auto_ptr patsrcFact(createPatsrcFactory(patsrc, tid)); + auto_ptr ps(patsrcFact->create()); + + // Instantiate an object for holding reporting-related parameters. + if(maxSeeds == 0) { + maxSeeds = max(5, khits * 2); + } + ReportingParams rp( + (allHits ? std::numeric_limits::max() : khits), // -k + (allHits ? std::numeric_limits::max() : maxSeeds), // --max-seeds + mhits, // -m/-M + 0, // penalty gap (not used now) + msample, // true -> -M was specified, otherwise assume -m + gReportDiscordant, // report discordang paired-end alignments? + gReportMixed, // report unpaired alignments for paired reads? + secondary, + localAlign, + bowtie2_dp, + sensitive | very_sensitive, + repeat); + + // Instantiate a mapping quality calculator + auto_ptr bmapq(new_mapq(mapqv, scoreMin, sc)); + + + // Make a per-thread wrapper for the global MHitSink object. + + AlnSinkWrap* msinkwrap; + if (threeN) { + msinkwrap = new AlnSinkWrap3N( + msink, // global sink + rp, // reporting parameters + *bmapq.get(), // MAPQ calculator + (size_t)tid, // thread id + mappingCycles, + secondary, // secondary alignments + no_spliced_alignment ? NULL : ssdb, + thread_rids_mindist); + } else { + msinkwrap = new AlnSinkWrap( + msink, // global sink + rp, // reporting parameters + *bmapq.get(), // MAPQ calculator + (size_t)tid, // thread id + secondary, // secondary alignments + no_spliced_alignment ? NULL : ssdb, + thread_rids_mindist); + } + + SplicedAligner splicedAligner(threeN? *gfm_3N[0]: gfm, + anchorStop, + thread_rids_mindist); + SwAligner sw; + OuterLoopMetrics olm; + SeedSearchMetrics sdm; + WalkMetrics wlm; + SwMetrics swmSeed, swmMate; + ReportingMetrics rpm; + RandomSource rnd, rndArb; + SSEMetrics sseU8ExtendMet; + SSEMetrics sseU8MateMet; + SSEMetrics sseI16ExtendMet; + SSEMetrics sseI16MateMet; + DescentMetrics descm; + uint64_t nbtfiltst = 0; // TODO: find a new home for these + uint64_t nbtfiltsc = 0; // TODO: find a new home for these + uint64_t nbtfiltdo = 0; // TODO: find a new home for these + HIMetrics him; + + ASSERT_ONLY(BTDnaString tmp); + + int pepolFlag; + if(gMate1fw && gMate2fw) { + pepolFlag = PE_POLICY_FF; + } else if(gMate1fw && !gMate2fw) { + pepolFlag = PE_POLICY_FR; + } else if(!gMate1fw && gMate2fw) { + pepolFlag = PE_POLICY_RF; + } else { + pepolFlag = PE_POLICY_RR; + } + assert_geq(gMaxInsert, gMinInsert); + assert_geq(gMinInsert, 0); + PairedEndPolicy pepol( + pepolFlag, + gMaxInsert, + gMinInsert, + localAlign, + gFlippedMatesOK, + gDovetailMatesOK, + gContainMatesOK, + gOlapMatesOK, + gExpandToFrag); + + PerfMetrics metricsPt; // per-thread metrics object; for read-level metrics + BTString nametmp; + + PerReadMetrics prm; + + // Used by thread with threadid == 1 to measure time elapsed + time_t iTime = time(0); + + // Keep track of whether last search was exhaustive for mates 1 and 2 + bool exhaustive[2] = { false, false }; + // Keep track of whether mates 1/2 were filtered out last time through + bool filt[2] = { true, true }; + // Keep track of whether mates 1/2 were filtered out due Ns last time + bool nfilt[2] = { true, true }; + // Keep track of whether mates 1/2 were filtered out due to not having + // enough characters to rise about the score threshold. + bool scfilt[2] = { true, true }; + // Keep track of whether mates 1/2 were filtered out due to not having + // more characters than the number of mismatches permitted in a seed. + bool lenfilt[2] = { true, true }; + // Keep track of whether mates 1/2 were filtered out by upstream qc + bool qcfilt[2] = { true, true }; + + rndArb.init((uint32_t)time(0)); + int mergei = 0; + int mergeival = 16; + while(true) { + bool success = false, done = false, paired = false; + ps->nextReadPair(success, done, paired, outType != OUTPUT_SAM); + if(!success && done) { + break; + } else if(!success) { + continue; + } + TReadId rdid = ps->rdid(); + if(nthreads > 1 && useTempSpliceSite) { + assert_gt(tid, 0); + assert_leq(tid, thread_rids.size()); + assert(thread_rids[tid - 1] == 0 || rdid > thread_rids[tid - 1]); + thread_rids[tid - 1] = (rdid > 0 ? rdid - 1 : 0); + while(true) { + uint64_t min_rdid = thread_rids[0]; + { + for(size_t i = 1; i < thread_rids.size(); i++) { + if(thread_rids[i] < min_rdid) { + min_rdid = thread_rids[i]; + } + } + } + + if(min_rdid + thread_rids_mindist < rdid) { +#if defined(_TTHREAD_WIN32_) + Sleep(0); +#elif defined(_TTHREAD_POSIX_) + sched_yield(); +#endif + } else break; + } + } + + bool sample = true; + if(arbitraryRandom) { + ps->bufa().seed = rndArb.nextU32(); + ps->bufb().seed = rndArb.nextU32(); + } + if(sampleFrac < 1.0f) { + rnd.init(ROTL(ps->bufa().seed, 2)); + sample = rnd.nextFloat() < sampleFrac; + } + if(rdid >= skipReads && rdid < qUpto && sample) { + // Align this read/pair + bool retry = true; + // + // Check if there is metrics reporting for us to do. + // + if(metricsIval > 0 && + (metricsOfb != NULL || metricsStderr) && + !metricsPerRead && + ++mergei == mergeival) + { + // Do a periodic merge. Update global metrics, in a + // synchronized manner if needed. + MERGE_METRICS(metrics, nthreads > 1); + mergei = 0; + // Check if a progress message should be printed + if(tid == 0) { + // Only thread 1 prints progress messages + time_t curTime = time(0); + if(curTime - iTime >= metricsIval) { + metrics.reportInterval(metricsOfb, metricsStderr, false, true, NULL); + iTime = curTime; + } + } + } + prm.reset(); // per-read metrics + prm.doFmString = false; + if(sam_print_xt) { + gettimeofday(&prm.tv_beg, &prm.tz_beg); + } + // Try to align this read + int mappingCycle = 0; + bool gNofw3N = false; + bool gNorc3N = false; + // for threeN (3N) mode, we need to map the read 4 times. for regular mode, only 1 time. + while(retry || mappingCycle < nMappingCycle) { + + msinkwrap->resetInit_(); + if (threeN) { + ps->changePlan3N(mappingCycle); + gNorc3N = (mappingCycle == threeN_type1conversion_FW || mappingCycle == threeN_type2conversion_FW); + gNofw3N = !gNorc3N; + } + retry = false; + assert_eq(ps->bufa().color, false); + if (!mappingCycles[mappingCycle]) + { + mappingCycle++; + continue; + } + + + olm.reads++; + bool pair = paired; + const size_t rdlen1 = ps->bufa().length(); + const size_t rdlen2 = pair ? ps->bufb().length() : 0; + olm.bases += (rdlen1 + rdlen2); + msinkwrap->nextRead( + &ps->bufa(), + pair ? &ps->bufb() : NULL, + rdid, + sc.qualitiesMatter()); + assert(msinkwrap->inited()); + size_t rdlens[2] = { rdlen1, rdlen2 }; + // Calculate the minimum valid score threshold for the read + TAlScore minsc[2], maxpen[2]; + maxpen[0] = maxpen[1] = 0; + minsc[0] = minsc[1] = std::numeric_limits::max(); + if(bwaSwLike) { + // From BWA-SW manual: "Given an l-long query, the + // threshold for a hit to be retained is + // a*max{T,c*log(l)}." We try to recreate that here. + float a = (float)sc.match(30); + float T = bwaSwLikeT, c = bwaSwLikeC; + minsc[0] = (TAlScore)max(a*T, a*c*log(rdlens[0])); + if(paired) { + minsc[1] = (TAlScore)max(a*T, a*c*log(rdlens[1])); + } + } else { + minsc[0] = scoreMin.f(rdlens[0]); + if(paired) minsc[1] = scoreMin.f(rdlens[1]); + if(localAlign) { + if(minsc[0] < 0) { + if(!gQuiet) printLocalScoreMsg(*ps, paired, true); + minsc[0] = 0; + } + if(paired && minsc[1] < 0) { + if(!gQuiet) printLocalScoreMsg(*ps, paired, false); + minsc[1] = 0; + } + } else { + if(minsc[0] > 0) { + if(!gQuiet) printEEScoreMsg(*ps, paired, true); + minsc[0] = 0; + } + if(paired && minsc[1] > 0) { + if(!gQuiet) printEEScoreMsg(*ps, paired, false); + minsc[1] = 0; + } + } + } + + // N filter; does the read have too many Ns? + size_t readns[2] = {0, 0}; + sc.nFilterPair( + &ps->bufa().patFw, + pair ? &ps->bufb().patFw : NULL, + readns[0], + readns[1], + nfilt[0], + nfilt[1]); + // Score filter; does the read enough character to rise above + // the score threshold? + scfilt[0] = sc.scoreFilter(minsc[0], rdlens[0]); + scfilt[1] = sc.scoreFilter(minsc[1], rdlens[1]); + lenfilt[0] = lenfilt[1] = true; + if(rdlens[0] <= (size_t)multiseedMms || rdlens[0] < 2) { + if(!gQuiet) printMmsSkipMsg(*ps, paired, true, multiseedMms); + lenfilt[0] = false; + } + if((rdlens[1] <= (size_t)multiseedMms || rdlens[1] < 2) && paired) { + if(!gQuiet) printMmsSkipMsg(*ps, paired, false, multiseedMms); + lenfilt[1] = false; + } + if(rdlens[0] < 2) { + if(!gQuiet) printLenSkipMsg(*ps, paired, true); + lenfilt[0] = false; + } + if(rdlens[1] < 2 && paired) { + if(!gQuiet) printLenSkipMsg(*ps, paired, false); + lenfilt[1] = false; + } + qcfilt[0] = qcfilt[1] = true; + if(qcFilter) { + qcfilt[0] = (ps->bufa().filter != '0'); + qcfilt[1] = (ps->bufb().filter != '0'); + } + filt[0] = (nfilt[0] && scfilt[0] && lenfilt[0] && qcfilt[0]); + filt[1] = (nfilt[1] && scfilt[1] && lenfilt[1] && qcfilt[1]); + prm.nFilt += (filt[0] ? 0 : 1) + (filt[1] ? 0 : 1); + Read* rds[2] = { &ps->bufa(), &ps->bufb() }; + // For each mate... + assert(msinkwrap->empty()); + //size_t minedfw[2] = { 0, 0 }; + //size_t minedrc[2] = { 0, 0 }; + // Calcualte nofw / no rc + bool nofw[2] = { false, false }; + bool norc[2] = { false, false }; + if (threeN) { + nofw[0] = paired ? (gMate1fw ? gNofw3N : gNorc3N) : gNofw3N; + norc[0] = paired ? (gMate1fw ? gNorc3N : gNofw3N) : gNorc3N; + nofw[1] = paired ? (gMate2fw ? gNofw3N : gNorc3N) : gNofw3N; + norc[1] = paired ? (gMate2fw ? gNorc3N : gNofw3N) : gNorc3N; + } else { + nofw[0] = paired ? (gMate1fw ? gNofw : gNorc) : gNofw; + norc[0] = paired ? (gMate1fw ? gNorc : gNofw) : gNorc; + nofw[1] = paired ? (gMate2fw ? gNofw : gNorc) : gNofw; + norc[1] = paired ? (gMate2fw ? gNorc : gNofw) : gNorc; + } + + // Calculate nceil + int nceil[2] = { 0, 0 }; + nceil[0] = nCeil.f((double)rdlens[0]); + nceil[0] = min(nceil[0], (int)rdlens[0]); + if(paired) { + nceil[1] = nCeil.f((double)rdlens[1]); + nceil[1] = min(nceil[1], (int)rdlens[1]); + } + exhaustive[0] = exhaustive[1] = false; + //size_t matemap[2] = { 0, 1 }; + bool pairPostFilt = filt[0] && filt[1]; + if(pairPostFilt) { + rnd.init(ps->bufa().seed ^ ps->bufb().seed); + } else { + rnd.init(ps->bufa().seed); + } + // Calculate interval length for both mates + int interval[2] = { 0, 0 }; + for(size_t mate = 0; mate < (pair ? 2:1); mate++) { + interval[mate] = msIval.f((double)rdlens[mate]); + if(filt[0] && filt[1]) { + // Boost interval length by 20% for paired-end reads + interval[mate] = (int)(interval[mate] * 1.2 + 0.5); + } + interval[mate] = max(interval[mate], 1); + } + // Calculate streak length + size_t streak[2] = { maxDpStreak, maxDpStreak }; + size_t mtStreak[2] = { maxMateStreak, maxMateStreak }; + size_t mxDp[2] = { maxDp, maxDp }; + size_t mxUg[2] = { maxUg, maxUg }; + size_t mxIter[2] = { maxIters, maxIters }; + if(allHits) { + streak[0] = streak[1] = std::numeric_limits::max(); + mtStreak[0] = mtStreak[1] = std::numeric_limits::max(); + mxDp[0] = mxDp[1] = std::numeric_limits::max(); + mxUg[0] = mxUg[1] = std::numeric_limits::max(); + mxIter[0] = mxIter[1] = std::numeric_limits::max(); + } else if(khits > 1) { + for(size_t mate = 0; mate < 2; mate++) { + streak[mate] += (khits-1) * maxStreakIncr; + mtStreak[mate] += (khits-1) * maxStreakIncr; + mxDp[mate] += (khits-1) * maxItersIncr; + mxUg[mate] += (khits-1) * maxItersIncr; + mxIter[mate] += (khits-1) * maxItersIncr; + } + } + if(filt[0] && filt[1]) { + streak[0] = (size_t)ceil((double)streak[0] / 2.0); + streak[1] = (size_t)ceil((double)streak[1] / 2.0); + assert_gt(streak[1], 0); + } + assert_gt(streak[0], 0); + // Calculate # seed rounds for each mate + size_t nrounds[2] = { nSeedRounds, nSeedRounds }; + if(filt[0] && filt[1]) { + nrounds[0] = (size_t)ceil((double)nrounds[0] / 2.0); + nrounds[1] = (size_t)ceil((double)nrounds[1] / 2.0); + assert_gt(nrounds[1], 0); + } + assert_gt(nrounds[0], 0); + // Increment counters according to what got filtered + for(size_t mate = 0; mate < (pair ? 2:1); mate++) { + if(!filt[mate]) { + // Mate was rejected by N filter + olm.freads++; // reads filtered out + olm.fbases += rdlens[mate]; // bases filtered out + } else { + //shs[mate].clear(); + //shs[mate].nextRead(mate == 0 ? ps->bufa() : ps->bufb()); + //assert(shs[mate].empty()); + olm.ureads++; // reads passing filter + olm.ubases += rdlens[mate]; // bases passing filter + } + } + //size_t eePeEeltLimit = std::numeric_limits::max(); + // Whether we're done with mate1 / mate2 + bool done[2] = { !filt[0], !filt[1] }; + // size_t nelt[2] = {0, 0}; + if(filt[0] && filt[1]) { + splicedAligner.initReads(rds, nofw, norc, minsc, maxpen); + } else if(filt[0]) { + splicedAligner.initRead(rds[0], nofw[0], norc[0], minsc[0], maxpen[0], false); + } else if(filt[1]) { + splicedAligner.initRead(rds[1], nofw[1], norc[1], minsc[1], maxpen[1], true); + } + if(filt[0] || filt[1]) { + int ret; + int threeN_index; + bool useRepeat; + + if (threeN) { + threeN_index = (mappingCycle == threeN_type1conversion_FW || mappingCycle == threeN_type2conversion_RC) ? 0 : 1; + useRepeat = paired ? (ps->bufa().length() >= 100) && (ps->bufb().length() >= 100) : + ps->bufa().length() >= 80; + } + + ret = splicedAligner.go( + sc, + pepol, + *multiseed_tpol, + *gpol, + threeN ? *gfm_3N[threeN_index] : gfm, + threeN ?(useRepeat ? rgfm_3N[threeN_index] : NULL) : rgfm, + threeN ? *altdbs_3N[threeN_index] : *altdb, + threeN ? *repeatdbs_3N[threeN_index] : *repeatdb, + threeN ? *raltdbs_3N[threeN_index] : *raltdb, + ref, + threeN ? rref_3N[threeN_index] : rref, + sw, + *ssdb, + wlm, + prm, + swmSeed, + him, + rnd, + *msinkwrap); + + MERGE_SW(sw); + // daehwan + size_t mate = 0; + + assert_gt(ret, 0); + // Clear out the exact hits so that we don't try to + // extend them again later! + if(ret == EXTEND_EXHAUSTED_CANDIDATES) { + // Not done yet + } else if(ret == EXTEND_POLICY_FULFILLED) { + // Policy is satisfied for this mate at least + if(msinkwrap->state().doneWithMate(mate == 0)) { + done[mate] = true; + } + if(msinkwrap->state().doneWithMate(mate == 1)) { + done[mate^1] = true; + } + } else if(ret == EXTEND_PERFECT_SCORE) { + // We exhausted this mode at least + done[mate] = true; + } else if(ret == EXTEND_EXCEEDED_HARD_LIMIT) { + // We exceeded a per-read limit + done[mate] = true; + } else if(ret == EXTEND_EXCEEDED_SOFT_LIMIT) { + // Not done yet + } else { + // + cerr << "Bad return value: " << ret << endl; + throw 1; + } + if(!done[mate]) { + TAlScore perfectScore = sc.perfectScore(rdlens[mate]); + if(!done[mate] && minsc[mate] == perfectScore) { + done[mate] = true; + } + } + } + + for(size_t i = 0; i < 2; i++) { + assert_leq(prm.nExIters, mxIter[i]); + assert_leq(prm.nExDps, mxDp[i]); + assert_leq(prm.nMateDps, mxDp[i]); + assert_leq(prm.nExUgs, mxUg[i]); + assert_leq(prm.nMateUgs, mxUg[i]); + assert_leq(prm.nDpFail, streak[i]); + assert_leq(prm.nUgFail, streak[i]); + assert_leq(prm.nEeFail, streak[i]); + } + + msinkwrap->finishRead( + NULL, + NULL, + exhaustive[0], // exhausted seed hits for mate 1? + exhaustive[1], // exhausted seed hits for mate 2? + nfilt[0], + nfilt[1], + scfilt[0], + scfilt[1], + lenfilt[0], + lenfilt[1], + qcfilt[0], + qcfilt[1], + sortByScore, // prioritize by alignment score + rnd, // pseudo-random generator + rpm, // reporting metrics + prm, // per-read metrics + sc, // scoring scheme + !seedSumm, // suppress seed summaries? + seedSumm, //rdid suppress alignments? + templateLenAdjustment); + mappingCycle++; + } + + + } // if(rdid >= skipReads && rdid < qUpto) + else if(rdid >= qUpto) { + break; + } + if(metricsPerRead) { + MERGE_METRICS(metricsPt, nthreads > 1); + nametmp = ps->bufa().name; + metricsPt.reportInterval( + metricsOfb, metricsStderr, true, true, &nametmp); + metricsPt.reset(); + } + } // while(true) + + // One last metrics merge + MERGE_METRICS(metrics, nthreads > 1); + delete msinkwrap; + return; +} + +/** + * Called once per alignment job. Sets up global pointers to the + * shared global data structures, creates per-thread structures, then + * enters the search loop. + */ +static void multiseedSearch( + Scoring& sc, + TranscriptomePolicy& tpol, + GraphPolicy& gp, + PairedPatternSource& patsrc, // pattern source + AlnSink& msink, // hit sink + EList* > gfms_3N, // 3N index of original text + RFM* rgfms_3N[2], // 3N index of repeat sequences + BitPairReference* rrefss[2], // 3N repeat reference + HGFM* gfm, // index of original text + RFM* rgfm, // index of repeat sequences + BitPairReference* refs, // base reference + BitPairReference* rrefs, // repeat reference + OutFileBuf *metricsOfb) +{ + multiseed_patsrc = &patsrc; + multiseed_msink = &msink; + multiseed_sc = ≻ + multiseed_tpol = &tpol; + gpol = &gp; + multiseed_metricsOfb = metricsOfb; + multiseed_refs = refs; + if (threeN) { + ref3N.load(gfms_3N, rgfms_3N, rrefss); + } else { + multiseed_gfm = gfm; + multiseed_rgfm = rgfm; + multiseed_rrefs = rrefs; + } + + AutoArray threads(nthreads); + AutoArray tids(nthreads); + // Start the metrics thread + { + Timer _t(cerr, "Multiseed full-index search: ", timing); + + thread_rids.resize(nthreads); + thread_rids.fill(0); + thread_rids_mindist = (nthreads == 1 || !useTempSpliceSite ? 0 : 1000 * nthreads); + for(int i = 0; i < nthreads; i++) { + // Thread IDs start at 1 + tids[i] = i+1; + threads[i] = new tthread::thread(multiseedSearchWorker_hisat2, (void*)&tids[i]); + } + + for (int i = 0; i < nthreads; i++) + threads[i]->join(); + + } + if(!metricsPerRead && (metricsOfb != NULL || metricsStderr)) { + metrics.reportInterval(metricsOfb, metricsStderr, true, false, NULL); + } +} + +static string argstr; + +extern void initializeCntLut(); +extern void initializeCntBit(); + +template +static void driver( + const char * type, + const string bt2indexBases[2], + const string& outfile) +{ + if(gVerbose || startVerbose) { + cerr << "Entered driver(): "; logTime(cerr, true); + } + + if (gVerbose || startVerbose) { + cerr << "Running in " << ((threeN) ? "3N" : "Regular") << " Mode" << endl; + } + + initializeCntLut(); + initializeCntBit(); + + // Vector of the reference sequences; used for sanity-checking + EList > names, os; + EList nameLens, seqLens; + // Read reference sequences from the command-line or from a FASTA file + if(!origString.empty()) { + // Read fasta file(s) + EList origFiles; + tokenize(origString, ",", origFiles); + parseFastas(origFiles, names, nameLens, os, seqLens); + } + PatternParams pp( + format, // file format + fileParallel, // true -> wrap files with separate PairedPatternSources + seed, // pseudo-random seed + useSpinlock, // use spin locks instead of pthreads + solexaQuals, // true -> qualities are on solexa64 scale + phred64Quals, // true -> qualities are on phred64 scale + integerQuals, // true -> qualities are space-separated numbers + fuzzy, // true -> try to parse fuzzy fastq + fastaContLen, // length of sampled reads for FastaContinuous... + fastaContFreq, // frequency of sampled reads for FastaContinuous... + skipReads // skip the first 'skip' patterns + ); + if(gVerbose || startVerbose) { + cerr << "Creating PatternSource: "; logTime(cerr, true); + } + PairedPatternSource *patsrc = PairedPatternSource::setupPatternSources( + queries, // singles, from argv + mates1, // mate1's, from -1 arg + mates2, // mate2's, from -2 arg + mates12, // both mates on each line, from --12 arg +#ifdef USE_SRA + sra_accs, // SRA accessions +#endif + qualities, // qualities associated with singles + qualities1, // qualities associated with m1 + qualities2, // qualities associated with m2 + pp, // read read-in parameters + nthreads, + gVerbose || startVerbose); // be talkative + // Open hit output file + if(gVerbose || startVerbose) { + cerr << "Opening hit output file: "; logTime(cerr, true); + } + OutFileBuf *fout; + if(!outfile.empty()) { + fout = new OutFileBuf(outfile.c_str(), false); + } else { + fout = new OutFileBuf(); + } + + // Initialize GFM object and read in header + if(gVerbose || startVerbose) { + cerr << "About to initialize fw GFM: "; logTime(cerr, true); + } + + // for 3N + if (threeN) { + for (int i = 0; i < 2; i++) { + altdbs_3N[i] = new ALTDB(); + repeatdbs_3N[i] = new RepeatDB(); + raltdbs_3N[i] = new ALTDB(); + } + } + + EList* >gfms_3N; + RFM* rgfms_3N[2]; + for (int i = 0; i < 2; i++) { + rgfms_3N[i] = NULL; + } + bool rep_index_exists_3N[2]{false}; + bool rep_index_exists = false; + string rep_adjIdxBase_3N[2]; + string rep_adjIdxBase; + + HGFM* gfm; + RFM* rgfm = NULL; + + if (threeN) { + for (int j = 0; j < 2; j++) { + adjIdxBases_3N[j] = adjustEbwtBase(argv0, bt2indexBases[j], gVerbose); + HGFM *tmp_gfm = new HGFM( + adjIdxBases_3N[j], + altdbs_3N[j], + NULL, + NULL, + -1, // fw index + true, // index is for the forward direction + /* overriding: */ offRate, + 0, // amount to add to index offrate or <= 0 to do nothing + useMm, // whether to use memory-mapped files + useShmem, // whether to use shared memory + mmSweep, // sweep memory-mapped files + !noRefNames, // load names? + true, // load SA sample? + true, // load ftab? + true, // load rstarts? + !no_spliced_alignment, // load splice sites? + gVerbose, // whether to be talkative + startVerbose, // talkative during initialization + false /*passMemExc*/, + sanityCheck, + use_haplotype); //use haplotypes? + + gfms_3N.push_back(tmp_gfm); + + if(sanityCheck && !os.empty()) { + // Sanity check number of patterns and pattern lengths in GFM + // against original strings + assert_eq(os.size(), gfms_3N[j]->nPat()); + + for(size_t i = 0; i < os.size(); i++) { + assert_eq(os[i].length(), gfms_3N[j]->plen()[i]); + } + } + if(sanityCheck && !os.empty()) { + gfms_3N[j]->loadIntoMemory( + -1, // fw index + true, // load SA sample + true, // load ftab + true, // load rstarts + !noRefNames, + startVerbose); + gfms_3N[j]->checkOrigs(os, false); + gfms_3N[j]->evictFromMemory(); + } + { + // Load the other half of the index into memory + assert(!gfms_3N[j]->isInMemory()); + Timer _t(cerr, "Time loading forward index: ", timing); + gfms_3N[j]->loadIntoMemory( + -1, // not the reverse index + true, // load SA samp? (yes, need forward index's SA samp) + true, // load ftab (in forward index) + true, // load rstarts (in forward index) + !noRefNames, // load names? + startVerbose); + } + + + rep_adjIdxBase_3N[j] = adjIdxBases_3N[j] + ".rep"; + { + std::ifstream infile((rep_adjIdxBase_3N[j] + ".1." + gfm_ext.c_str()).c_str()); + rep_index_exists_3N[j] = infile.good(); + } + + if(rep_index_exists_3N[j] && use_repeat_index) { + rgfms_3N[j] = new RFM( + rep_adjIdxBase_3N[j], + raltdbs_3N[j], + repeatdbs_3N[j], + &readLens, + -1, // fw index + true, // index is for the forward direction + /* overriding: */ offRate, + 0, // amount to add to index offrate or <= 0 to do nothing + useMm, // whether to use memory-mapped files + useShmem, // whether to use shared memory + mmSweep, // sweep memory-mapped files + !noRefNames, // load names? + true, // load SA sample? + true, // load ftab? + true, // load rstarts? + !no_spliced_alignment, // load splice sites? + gVerbose, // whether to be talkative + startVerbose, // talkative during initialization + false /*passMemExc*/, + sanityCheck, + false); //use haplotypes? + + // CP to do +#if 0 + if(sanityCheck && !os.empty()) { + // Sanity check number of patterns and pattern lengths in GFM + // against original strings + assert_eq(os.size(), gfm.nPat()); + for(size_t i = 0; i < os.size(); i++) { + assert_eq(os[i].length(), rgfm->plen()[i]); + } + } + // Sanity-check the restored version of the GFM + if(sanityCheck && !os.empty()) { + rgfm->loadIntoMemory( + -1, // fw index + true, // load SA sample + true, // load ftab + true, // load rstarts + !noRefNames, + startVerbose); + rgfm->checkOrigs(os, false); + rgfm->evictFromMemory(); + } +#endif + { + // Load the other half of the index into memory + assert(!rgfms_3N[j]->isInMemory()); + Timer _t(cerr, "Time loading forward index: ", timing); + rgfms_3N[j]->loadIntoMemory( + -1, // not the reverse index + true, // load SA samp? (yes, need forward index's SA samp) + true, // load ftab (in forward index) + true, // load rstarts (in forward index) + !noRefNames, // load names? + startVerbose); + + repeatdbs_3N[j]->construct(gfms_3N[j]->rstarts(), gfms_3N[j]->nFrag()); + } + + if (threeN) { + ht2_option_t option; + ht2_init_options(&option); + + option.altdb = altdbs_3N[j]; + option.raltdb = raltdbs_3N[j]; + option.repeatdb = repeatdbs_3N[j]; + option.gfm = gfms_3N[j]; + option.rgfm = rgfms_3N[j]; + + ht2_handle_t handle = ht2_init(adjIdxBases_3N[j].c_str(), &option); + + repeatHandles.push_back(handle); + if (refNameMap == NULL) { + ht2_index_getrefnames(repeatHandles[0], &refNameMap); + } + } + } + + + if(!saw_k) { + if(gfms_3N[j]->gh().linearFM()) khits = 5; + else khits = 10; + } + } + } else { + altdb = new ALTDB(); + repeatdb = new RepeatDB(); + raltdb = new ALTDB(); + adjIdxBase = adjustEbwtBase(argv0, bt2indexBases[0], gVerbose); + gfm = new HGFM( + adjIdxBase, + altdb, + NULL, + NULL, + -1, // fw index + true, // index is for the forward direction + /* overriding: */ offRate, + 0, // amount to add to index offrate or <= 0 to do nothing + useMm, // whether to use memory-mapped files + useShmem, // whether to use shared memory + mmSweep, // sweep memory-mapped files + !noRefNames, // load names? + true, // load SA sample? + true, // load ftab? + true, // load rstarts? + !no_spliced_alignment, // load splice sites? + gVerbose, // whether to be talkative + startVerbose, // talkative during initialization + false /*passMemExc*/, + sanityCheck, + use_haplotype); //use haplotypes? + if(sanityCheck && !os.empty()) { + // Sanity check number of patterns and pattern lengths in GFM + // against original strings + assert_eq(os.size(), gfm->nPat()); + for(size_t i = 0; i < os.size(); i++) { + assert_eq(os[i].length(), gfm->plen()[i]); + } + } + // Sanity-check the restored version of the GFM + if(sanityCheck && !os.empty()) { + gfm->loadIntoMemory( + -1, // fw index + true, // load SA sample + true, // load ftab + true, // load rstarts + !noRefNames, + startVerbose); + gfm->checkOrigs(os, false); + gfm->evictFromMemory(); + } + { + // Load the other half of the index into memory + assert(!gfm->isInMemory()); + Timer _t(cerr, "Time loading forward index: ", timing); + gfm->loadIntoMemory( + -1, // not the reverse index + true, // load SA samp? (yes, need forward index's SA samp) + true, // load ftab (in forward index) + true, // load rstarts (in forward index) + !noRefNames, // load names? + startVerbose); + } + rep_adjIdxBase = adjIdxBase + ".rep"; + + { + std::ifstream infile((rep_adjIdxBase + ".1." + gfm_ext.c_str()).c_str()); + rep_index_exists = infile.good(); + } + if(rep_index_exists && use_repeat_index) { + rgfm = new RFM( + rep_adjIdxBase, + raltdb, + repeatdb, + &readLens, + -1, // fw index + true, // index is for the forward direction + /* overriding: */ offRate, + 0, // amount to add to index offrate or <= 0 to do nothing + useMm, // whether to use memory-mapped files + useShmem, // whether to use shared memory + mmSweep, // sweep memory-mapped files + !noRefNames, // load names? + true, // load SA sample? + true, // load ftab? + true, // load rstarts? + !no_spliced_alignment, // load splice sites? + gVerbose, // whether to be talkative + startVerbose, // talkative during initialization + false /*passMemExc*/, + sanityCheck, + false); //use haplotypes? + + // CP to do +#if 0 + if(sanityCheck && !os.empty()) { + // Sanity check number of patterns and pattern lengths in GFM + // against original strings + assert_eq(os.size(), gfm.nPat()); + for(size_t i = 0; i < os.size(); i++) { + assert_eq(os[i].length(), rgfm->plen()[i]); + } + } + // Sanity-check the restored version of the GFM + if(sanityCheck && !os.empty()) { + rgfm->loadIntoMemory( + -1, // fw index + true, // load SA sample + true, // load ftab + true, // load rstarts + !noRefNames, + startVerbose); + rgfm->checkOrigs(os, false); + rgfm->evictFromMemory(); + } +#endif + { + // Load the other half of the index into memory + assert(!rgfm->isInMemory()); + Timer _t(cerr, "Time loading forward index: ", timing); + rgfm->loadIntoMemory( + -1, // not the reverse index + true, // load SA samp? (yes, need forward index's SA samp) + true, // load ftab (in forward index) + true, // load rstarts (in forward index) + !noRefNames, // load names? + startVerbose); + + repeatdb->construct(gfm->rstarts(), gfm->nFrag()); + } + } + + if(!saw_k) { + if(gfm->gh().linearFM()) khits = 5; + else khits = 10; + } + } // else threeN + + OutputQueue oq( + *fout, // out file buffer + reorder && nthreads > 1, // whether to reorder when there's >1 thread + nthreads, // # threads + nthreads > 1, // whether to be thread-safe + skipReads); // first read will have this rdid + { + Timer _t(cerr, "Time searching: ", timing); + // Set up penalities + if(bonusMatch > 0 && !localAlign) { + cerr << "Warning: Match bonus always = 0 in --end-to-end mode; ignoring user setting" << endl; + bonusMatch = 0; + } + if(tranAssm) { + penNoncanIntronLen.init(SIMPLE_FUNC_LOG, -8, 2); + } + Scoring sc( + bonusMatch, // constant reward for match + penMmcType, // how to penalize mismatches + penMmcMax, // max mm penalty + penMmcMin, // min mm penalty + penScMax, // max sc penalty + penScMin, // min sc penalty + scoreMin, // min score as function of read len + nCeil, // max # Ns as function of read len + penNType, // how to penalize Ns in the read + penN, // constant if N pelanty is a constant + penNCatPair, // whether to concat mates before N filtering + penRdGapConst, // constant coeff for read gap cost + penRfGapConst, // constant coeff for ref gap cost + penRdGapLinear, // linear coeff for read gap cost + penRfGapLinear, // linear coeff for ref gap cost + gGapBarrier, // # rows at top/bot only entered diagonally + penCanSplice, // canonical splicing penalty + penNoncanSplice,// non-canonical splicing penalty + penConflictSplice, // conflicting splice site penalty + &penCanIntronLen, // penalty as to intron length + &penNoncanIntronLen); // penalty as to intron length + + EList reflens; + // for HISAT-3N + EList refnames_3N[2]; + EList replens_3N[2]; + EList repnames_3N[2]; + EList empty_replens_3N[2]; + EList empty_repnames_3N[2]; + + //for regular hisat2 + EList refnames; + //readEbwtRefnames(adjIdxBase, refnames); + EList replens; + EList repnames; + EList empty_replens; + EList empty_repnames; + + + if (threeN) { + for(size_t i = 0; i < gfms_3N[0]->nPat(); i++) { + reflens.push_back(gfms_3N[0]->plen()[i]); + } + for (int j = 0; j < 2; j++) { + readEbwtRefnames(adjIdxBases_3N[j], refnames_3N[j]); + if (rep_index_exists_3N[j] && use_repeat_index) { + rgfms_3N[j]->getReferenceNames(repnames_3N[j]); + rgfms_3N[j]->getReferenceLens(replens_3N[j]); + } + if(rmChrName && addChrName) { + cerr << "Error: --remove-chrname and --add-chrname cannot be used at the same time" << endl; + throw 1; + } + if(rmChrName) { + for(size_t i = 0; i < refnames_3N[j].size(); i++) { + string& refname = refnames_3N[j][i]; + if(refname.find("chr") == 0) { + refname = refname.substr(3); + } + } + } else if(addChrName) { + for(size_t i = 0; i < refnames_3N[j].size(); i++) { + string& refname = refnames_3N[j][i]; + if(refname.find("chr") != 0) { + refname = string("chr") + refname; + } + } + } + } + } else { + readEbwtRefnames(adjIdxBase, refnames); + for(size_t i = 0; i < gfm->nPat(); i++) { + reflens.push_back(gfm->plen()[i]); + } + if(rep_index_exists && use_repeat_index) { + rgfm->getReferenceNames(repnames); + rgfm->getReferenceLens(replens); + } + if(rmChrName && addChrName) { + cerr << "Error: --remove-chrname and --add-chrname cannot be used at the same time" << endl; + throw 1; + } + if(rmChrName) { + for(size_t i = 0; i < refnames.size(); i++) { + string& refname = refnames[i]; + if(refname.find("chr") == 0) { + refname = refname.substr(3); + } + } + } else if(addChrName) { + for(size_t i = 0; i < refnames.size(); i++) { + string& refname = refnames[i]; + if(refname.find("chr") != 0) { + refname = string("chr") + refname; + } + } + } + } + + SamConfig samc( + threeN ? refnames_3N[0]: refnames, // reference sequence names + reflens, // reference sequence lengths + threeN?(repeat ? repnames_3N[0] : empty_repnames_3N[0]): (repeat ? repnames : empty_repnames), // repeat sequence names + threeN? (repeat ? replens_3N[0] : empty_replens_3N[0]): (repeat ? replens : empty_replens), // repeat sequence lengths + samTruncQname, // whether to truncate QNAME to 255 chars + samOmitSecSeqQual, // omit SEQ/QUAL for 2ndary alignments? + samNoUnal, // omit unaligned-read records? + string("hisat2"), // program id + string("hisat2"), // program name + string(HISAT2_VERSION), // program version + argstr, // command-line + rgs_optflag, // read-group string + rna_strandness, + sam_print_as, + sam_print_xs, + sam_print_xss, + sam_print_yn, + sam_print_xn, + sam_print_cs, + sam_print_cq, + sam_print_x0, + sam_print_x1, + sam_print_xm, + sam_print_xo, + sam_print_xg, + sam_print_nm, + sam_print_md, + sam_print_yf, + sam_print_yi, + sam_print_ym, + sam_print_yp, + sam_print_yt, + sam_print_ys, + sam_print_zs, + sam_print_xr, + sam_print_xt, + sam_print_xd, + sam_print_xu, + sam_print_yl, + sam_print_ye, + sam_print_yu, + sam_print_xp, + sam_print_yr, + sam_print_zb, + sam_print_zr, + sam_print_zf, + sam_print_zm, + sam_print_zi, + sam_print_zp, + sam_print_zu, + sam_print_xs_a, + sam_print_nh); + // Set up hit sink; if sanityCheck && !os.empty() is true, + // then instruct the sink to "retain" hits in a vector in + // memory so that we can easily sanity check them later on + + + AlnSink *mssink = NULL; + + //auto_ptr refss[2]; + auto_ptr refs; + + Timer *_tRef = new Timer(cerr, "Time loading reference: ", timing); + refs = auto_ptr( + new BitPairReference( + threeN ? adjIdxBases_3N[0] : adjIdxBase, + NULL, + false, + sanityCheck, + NULL, + NULL, + false, + useMm, + useShmem, + mmSweep, + gVerbose, + startVerbose) + ); + delete _tRef; + if(!refs->loaded()) throw 1; + + + + + BitPairReference* rrefss[2] = {NULL, }; + BitPairReference* rrefs = NULL; + + if (threeN) { + for (int j = 0; j < 2; j++) { + if (rep_index_exists_3N[j] && use_repeat_index) { + const EList &included = rgfms_3N[j]->getRepeatIncluded(); + rrefss[j] = new BitPairReference( + rep_adjIdxBase_3N[j], + &included, + false, + sanityCheck, + NULL, + NULL, + false, + useMm, + useShmem, + mmSweep, + gVerbose, + startVerbose); + if (!rrefss[j]->loaded()) throw 1; + } + } + } else { + if(rep_index_exists && use_repeat_index) { + const EList& included = rgfm->getRepeatIncluded(); + rrefs = new BitPairReference( + rep_adjIdxBase, + &included, + false, + sanityCheck, + NULL, + NULL, + false, + useMm, + useShmem, + mmSweep, + gVerbose, + startVerbose); + if(!rrefs->loaded()) throw 1; + } + } + + + bool xsOnly = (tranAssm_program == "cufflinks"); + TranscriptomePolicy tpol(minIntronLen, + maxIntronLen, + tranAssm ? 15 : 7, + tranAssm ? 20 : 14, + no_spliced_alignment, + tranMapOnly, + tranAssm, + xsOnly, + avoid_pseudogene); + + GraphPolicy gpol(max_alts_tried, + use_haplotype, + (threeN ? altdbs_3N[0]->haplotypes().size() : altdb->haplotypes().size()) > 0 && use_haplotype, + enable_codis); + + init_junction_prob(); + bool write = novelSpliceSiteOutfile != "" || useTempSpliceSite; + bool read = knownSpliceSiteInfile != "" || novelSpliceSiteInfile != "" || useTempSpliceSite || altdbs_3N[0]->hasSpliceSites(); + ssdb = new SpliceSiteDB( + *(refs.get()), + threeN ? refnames_3N[0] : refnames, + nthreads > 1, // thread-safe + write, // write? + read); // read? + ssdb->read(threeN ? *gfms_3N[0] : *gfm, threeN ? altdbs_3N[0]->alts() : altdb->alts()); + if(knownSpliceSiteInfile != "") { + ifstream ssdb_file(knownSpliceSiteInfile.c_str(), ios::in); + if(ssdb_file.is_open()) { + ssdb->read(ssdb_file, + true); // known splice sites + ssdb_file.close(); + } + } + if(novelSpliceSiteInfile != "") { + ifstream ssdb_file(novelSpliceSiteInfile.c_str(), ios::in); + if(ssdb_file.is_open()) { + ssdb->read(ssdb_file, + false); // novel splice sites + ssdb_file.close(); + } + } + + switch(outType) { + case OUTPUT_SAM: { + if (threeN) { + mssink = new AlnSink3NSam( + oq, // output queue + samc, // settings & routines for SAM output + refnames_3N[0], // reference names + repnames_3N[0], // repeat names + gQuiet, // don't print alignment summary at end + nthreads, + refs.get(), + no_spliced_alignment, + altdbs_3N[0], + ssdb); + } else { + mssink = new AlnSinkSam( + oq, // output queue + samc, // settings & routines for SAM output + refnames, // reference names + repnames, // repeat names + gQuiet, // don't print alignment summary at end + altdb, + ssdb); + }; + + if(!samNoHead) { + bool printHd = true, printSq = true; + BTString buf; + samc.printHeader(buf, rgid, rgs, printHd, !samNoSQ, printSq); + fout->writeString(buf); + } + break; + } + default: + cerr << "Invalid output type: " << outType << endl; + throw 1; + } + if(gVerbose || startVerbose) { + cerr << "Dispatching to search driver: "; logTime(cerr, true); + } + // Set up global constraint + OutFileBuf *metricsOfb = NULL; + if(!metricsFile.empty() && metricsIval > 0) { + metricsOfb = new OutFileBuf(metricsFile); + } + // Do the search for all input reads + assert(patsrc != NULL); + assert(mssink != NULL); + multiseedSearch( + sc, // scoring scheme + tpol, + gpol, + *patsrc, // pattern source + *mssink, // hit sink + gfms_3N, // 3N BWT + rgfms_3N, // 3N + rrefss, // 3N + gfm, // BWT + rgfm, + refs.get(), + rrefs, + metricsOfb); + // Evict any loaded indexes from memory + if (threeN) { + for (int j = 0; j < 2; j++) { + if(gfms_3N[j]->isInMemory()) { + gfms_3N[j]->evictFromMemory(); + } + } + } else { + if(gfm->isInMemory()) { + gfm->evictFromMemory(); + } + } + + if(!gQuiet && !seedSumm) { + size_t repThresh = mhits; + if(repThresh == 0) { + repThresh = std::numeric_limits::max(); + } + mssink->finish(cerr, + repThresh, + gReportDiscordant, + gReportMixed, + newAlignSummary, + hadoopOut); + if(alignSumFile != "") { + ofstream sumfile(alignSumFile.c_str(), ios::out); + if(sumfile.is_open()) { + mssink->finish(sumfile, + repThresh, + gReportDiscordant, + gReportMixed, + newAlignSummary, + false); // hadoopOut + sumfile.close(); + } + } + } + if(ssdb != NULL) { + if(novelSpliceSiteOutfile != "") { + ofstream ssdb_file(novelSpliceSiteOutfile.c_str(), ios::out); + if(ssdb_file.is_open()) { + ssdb->print(ssdb_file); + ssdb_file.close(); + } + } + } + oq.flush(true); + assert_eq(oq.numStarted(), oq.numFinished()); + assert_eq(oq.numStarted(), oq.numFlushed()); + delete patsrc; + delete mssink; + delete ssdb; + delete metricsOfb; + if (threeN) { + for (int i = 0; i < 2; i++) { + if(rep_index_exists_3N[i] && use_repeat_index) { + delete rgfms_3N[i]; + delete rrefss[i]; + delete repeatdbs_3N[i]; + delete raltdbs_3N[i]; + } + delete gfms_3N[i]; + delete altdbs_3N[i]; + } + if(rep_index_exists_3N[0] && use_repeat_index){ + for (int k = 0; k < 2; k++) { + ht2_close(repeatHandles[k]); + } + } + } else { + delete altdb; + delete repeatdb; + delete raltdb; + delete rgfm; + delete rrefs; + delete gfm; + } + if (refNameMap != NULL) { + free(refNameMap); + } + if(fout != NULL) { + delete fout; + } + } +} + +// C++ name mangling is disabled for the bowtie() function to make it +// easier to use Bowtie as a library. +extern "C" { + +/** + * Main bowtie entry function. Parses argc/argv style command-line + * options, sets global configuration variables, and calls the driver() + * function. + */ +int hisat2(int argc, const char **argv) { + try { + // Reset all global state, including getopt state + opterr = optind = 1; + resetOptions(); + for(int i = 0; i < argc; i++) { + argstr += argv[i]; + if(i < argc-1) argstr += " "; + } + if(startVerbose) { cerr << "Entered main(): "; logTime(cerr, true); } + parseOptions(argc, argv); + argv0 = argv[0]; + if(showVersion) { + cout << argv0 << " version " << HISAT2_VERSION << endl; + if(sizeof(void*) == 4) { + cout << "32-bit" << endl; + } else if(sizeof(void*) == 8) { + cout << "64-bit" << endl; + } else { + cout << "Neither 32- nor 64-bit: sizeof(void*) = " << sizeof(void*) << endl; + } + cout << "Built on " << BUILD_HOST << endl; + cout << BUILD_TIME << endl; + cout << "Compiler: " << COMPILER_VERSION << endl; + cout << "Options: " << COMPILER_OPTIONS << endl; + cout << "Sizeof {int, long, long long, void*, size_t, off_t}: {" + << sizeof(int) + << ", " << sizeof(long) << ", " << sizeof(long long) + << ", " << sizeof(void *) << ", " << sizeof(size_t) + << ", " << sizeof(off_t) << "}" << endl; + return 0; + } + { + Timer _t(cerr, "Overall time: ", timing); + if(startVerbose) { + cerr << "Parsing index and read arguments: "; logTime(cerr, true); + } + + // Get index basename (but only if it wasn't specified via --index) + if(bt2indexs[0].empty()) { + if(optind >= argc) { + cerr << "No index, query, or output file specified!" << endl; + printUsage(cerr); + return 1; + } + bt2indexs[0] = argv[optind++]; + } + if (threeN) { + bt2indexs[1] = bt2indexs[0]; + if (fileExist(bt2indexs[0] + threeN_indexTags[0] + ".1." + gfm_ext)) { + bt2indexs[0] += threeN_indexTags[0]; + bt2indexs[1] += threeN_indexTags[1]; + } else if (fileExist(bt2indexs[0] + ".3n.1.1." + gfm_ext)) { + bt2indexs[0] += ".3n.1"; + bt2indexs[1] += ".3n.2"; + if (!((usrInput_convertedFrom == 'C' && usrInput_convertedTo == 'T') || + (usrInput_convertedFrom == 'T' && usrInput_convertedTo == 'C'))) { + cerr << "Your current hisat-3n index only support C-to-T or T-to-C base change. Please build new hisat-3n index to support " + << usrInput_convertedFrom << " to " << usrInput_convertedTo << "change." << endl; + printUsage(cerr); + return 1; + } + } else { + cerr << "Index is not exist, please use hisat-3n-build to build index first. Please use the same --base-change argument for both hisat-3n-build and hisat-3n." << endl; + printUsage(cerr); + return 1; + } + } + + // Get query filename + bool got_reads = !queries.empty() || !mates1.empty() || !mates12.empty(); +#ifdef USE_SRA + got_reads = got_reads || !sra_accs.empty(); +#endif + if(minIntronLen > maxIntronLen) { + cerr << "--min-intronlen(" << minIntronLen << ") should not be greater than --max-intronlen(" + << maxIntronLen << ")" << endl; + printUsage(cerr); + return 1; + } + if(optind >= argc) { + if(!got_reads) { + printUsage(cerr); + cerr << "***" << endl +#ifdef USE_SRA + << "Error: Must specify at least one read input with -U/-1/-2/--sra-acc" << endl; +#else + << "Error: Must specify at least one read input with -U/-1/-2" << endl; + +#endif + return 1; + } + } else if(!got_reads) { + // Tokenize the list of query files + tokenize(argv[optind++], ",", queries); + if(queries.empty()) { + cerr << "Tokenized query file list was empty!" << endl; + printUsage(cerr); + return 1; + } + } + + // Get output filename + if(optind < argc && outfile.empty()) { + outfile = argv[optind++]; + cerr << "Warning: Output file '" << outfile.c_str() + << "' was specified without -S. This will not work in " + << "future HISAT 2 versions. Please use -S instead." + << endl; + } + + // Extra parametesr? + if(optind < argc) { + cerr << "Extra parameter(s) specified: "; + for(int i = optind; i < argc; i++) { + cerr << "\"" << argv[i] << "\""; + if(i < argc-1) cerr << ", "; + } + cerr << endl; + if(mates1.size() > 0) { + cerr << "Note that if files are specified using -1/-2, a file cannot" << endl + << "also be specified. Please run HISAT2 separately for mates and singles." << endl; + } + throw 1; + } + + // Optionally summarize + if(gVerbose) { + cout << "Input bt2 file: \"" << bt2indexs[0].c_str() << "\"" << endl; + cout << "Input bt2 file: \"" << bt2indexs[1].c_str() << "\"" << endl; + cout << "Query inputs (DNA, " << file_format_names[format].c_str() << "):" << endl; + for(size_t i = 0; i < queries.size(); i++) { + cout << " " << queries[i].c_str() << endl; + } + cout << "Quality inputs:" << endl; + for(size_t i = 0; i < qualities.size(); i++) { + cout << " " << qualities[i].c_str() << endl; + } + cout << "Output file: \"" << outfile.c_str() << "\"" << endl; + cout << "Local endianness: " << (currentlyBigEndian()? "big":"little") << endl; + cout << "Sanity checking: " << (sanityCheck? "enabled":"disabled") << endl; + #ifdef NDEBUG + cout << "Assertions: disabled" << endl; + #else + cout << "Assertions: enabled" << endl; + #endif + } + if(ipause) { + cout << "Press key to continue..." << endl; + getchar(); + } + driver >("DNA", bt2indexs, outfile); + } + return 0; + } catch(std::exception& e) { + cerr << "Error: Encountered exception: '" << e.what() << "'" << endl; + cerr << "Command: "; + for(int i = 0; i < argc; i++) cerr << argv[i] << " "; + cerr << endl; + return 1; + } catch(int e) { + if(e != 0) { + cerr << "Error: Encountered internal HISAT2 exception (#" << e << ")" << endl; + cerr << "Command: "; + for(int i = 0; i < argc; i++) cerr << argv[i] << " "; + cerr << endl; + } + return e; + } +} // bowtie() +} // extern "C" diff --git a/hisat2.sln b/hisat2.sln new file mode 100644 index 0000000..a073d42 --- /dev/null +++ b/hisat2.sln @@ -0,0 +1,82 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 14 +VisualStudioVersion = 14.0.25420.1 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "codeStubs", "MSVCC\codeStubs.vcxproj", "{08C70FF5-9E0C-42DF-B7BF-C1AF1B0D2CFF}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "search", "MSVCC\search.vcxproj", "{43F946A8-9184-456B-A522-7B411C10A4EB}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "search64", "MSVCC\search64.vcxproj", "{D281E44B-92E9-4CEA-A666-B05530F65000}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "shared", "MSVCC\shared.vcxproj", "{8BEEB701-AA26-4D2B-827B-64071F88AFB3}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "shared64", "MSVCC\shared64.vcxproj", "{E1F391B2-4A3F-43CA-9192-14536ABE783E}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hisat2-align-l", "MSVCC\hisat2-align-l.vcxproj", "{55F86D23-4245-4050-BD2D-CC5D4FD0C36B}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hisat2-align-s", "MSVCC\hisat2-align-s.vcxproj", "{9D5066DB-ACD2-42E9-BFE6-98C8E7DEA7DA}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hisat2-build-l", "MSVCC\hisat2-build-l.vcxproj", "{262E4D55-07C3-4B18-B8E2-F3B6AA4C583E}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hisat2-build-s", "MSVCC\hisat2-build-s.vcxproj", "{3B90EEC9-CDFF-424E-B3A3-4B7A5326A43F}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hisat2-inspect-l", "MSVCC\hisat2-inspect-l.vcxproj", "{E0A17972-1AF6-429A-A902-3913656B5CFC}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hisat2-inspect-s", "MSVCC\hisat2-inspect-s.vcxproj", "{DA85C3F8-8CDD-4ED4-AF86-05B5556670F7}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {08C70FF5-9E0C-42DF-B7BF-C1AF1B0D2CFF}.Debug|x64.ActiveCfg = Debug|x64 + {08C70FF5-9E0C-42DF-B7BF-C1AF1B0D2CFF}.Debug|x64.Build.0 = Debug|x64 + {08C70FF5-9E0C-42DF-B7BF-C1AF1B0D2CFF}.Release|x64.ActiveCfg = Release|x64 + {08C70FF5-9E0C-42DF-B7BF-C1AF1B0D2CFF}.Release|x64.Build.0 = Release|x64 + {43F946A8-9184-456B-A522-7B411C10A4EB}.Debug|x64.ActiveCfg = Debug|x64 + {43F946A8-9184-456B-A522-7B411C10A4EB}.Debug|x64.Build.0 = Debug|x64 + {43F946A8-9184-456B-A522-7B411C10A4EB}.Release|x64.ActiveCfg = Release|x64 + {43F946A8-9184-456B-A522-7B411C10A4EB}.Release|x64.Build.0 = Release|x64 + {D281E44B-92E9-4CEA-A666-B05530F65000}.Debug|x64.ActiveCfg = Debug|x64 + {D281E44B-92E9-4CEA-A666-B05530F65000}.Debug|x64.Build.0 = Debug|x64 + {D281E44B-92E9-4CEA-A666-B05530F65000}.Release|x64.ActiveCfg = Release|x64 + {D281E44B-92E9-4CEA-A666-B05530F65000}.Release|x64.Build.0 = Release|x64 + {8BEEB701-AA26-4D2B-827B-64071F88AFB3}.Debug|x64.ActiveCfg = Debug|x64 + {8BEEB701-AA26-4D2B-827B-64071F88AFB3}.Debug|x64.Build.0 = Debug|x64 + {8BEEB701-AA26-4D2B-827B-64071F88AFB3}.Release|x64.ActiveCfg = Release|x64 + {8BEEB701-AA26-4D2B-827B-64071F88AFB3}.Release|x64.Build.0 = Release|x64 + {E1F391B2-4A3F-43CA-9192-14536ABE783E}.Debug|x64.ActiveCfg = Debug|x64 + {E1F391B2-4A3F-43CA-9192-14536ABE783E}.Debug|x64.Build.0 = Debug|x64 + {E1F391B2-4A3F-43CA-9192-14536ABE783E}.Release|x64.ActiveCfg = Release|x64 + {E1F391B2-4A3F-43CA-9192-14536ABE783E}.Release|x64.Build.0 = Release|x64 + {55F86D23-4245-4050-BD2D-CC5D4FD0C36B}.Debug|x64.ActiveCfg = Debug|x64 + {55F86D23-4245-4050-BD2D-CC5D4FD0C36B}.Debug|x64.Build.0 = Debug|x64 + {55F86D23-4245-4050-BD2D-CC5D4FD0C36B}.Release|x64.ActiveCfg = Release|x64 + {55F86D23-4245-4050-BD2D-CC5D4FD0C36B}.Release|x64.Build.0 = Release|x64 + {9D5066DB-ACD2-42E9-BFE6-98C8E7DEA7DA}.Debug|x64.ActiveCfg = Debug|x64 + {9D5066DB-ACD2-42E9-BFE6-98C8E7DEA7DA}.Debug|x64.Build.0 = Debug|x64 + {9D5066DB-ACD2-42E9-BFE6-98C8E7DEA7DA}.Release|x64.ActiveCfg = Release|x64 + {9D5066DB-ACD2-42E9-BFE6-98C8E7DEA7DA}.Release|x64.Build.0 = Release|x64 + {262E4D55-07C3-4B18-B8E2-F3B6AA4C583E}.Debug|x64.ActiveCfg = Debug|x64 + {262E4D55-07C3-4B18-B8E2-F3B6AA4C583E}.Debug|x64.Build.0 = Debug|x64 + {262E4D55-07C3-4B18-B8E2-F3B6AA4C583E}.Release|x64.ActiveCfg = Release|x64 + {262E4D55-07C3-4B18-B8E2-F3B6AA4C583E}.Release|x64.Build.0 = Release|x64 + {3B90EEC9-CDFF-424E-B3A3-4B7A5326A43F}.Debug|x64.ActiveCfg = Debug|x64 + {3B90EEC9-CDFF-424E-B3A3-4B7A5326A43F}.Debug|x64.Build.0 = Debug|x64 + {3B90EEC9-CDFF-424E-B3A3-4B7A5326A43F}.Release|x64.ActiveCfg = Release|x64 + {3B90EEC9-CDFF-424E-B3A3-4B7A5326A43F}.Release|x64.Build.0 = Release|x64 + {E0A17972-1AF6-429A-A902-3913656B5CFC}.Debug|x64.ActiveCfg = Debug|x64 + {E0A17972-1AF6-429A-A902-3913656B5CFC}.Debug|x64.Build.0 = Debug|x64 + {E0A17972-1AF6-429A-A902-3913656B5CFC}.Release|x64.ActiveCfg = Release|x64 + {E0A17972-1AF6-429A-A902-3913656B5CFC}.Release|x64.Build.0 = Release|x64 + {DA85C3F8-8CDD-4ED4-AF86-05B5556670F7}.Debug|x64.ActiveCfg = Debug|x64 + {DA85C3F8-8CDD-4ED4-AF86-05B5556670F7}.Debug|x64.Build.0 = Debug|x64 + {DA85C3F8-8CDD-4ED4-AF86-05B5556670F7}.Release|x64.ActiveCfg = Release|x64 + {DA85C3F8-8CDD-4ED4-AF86-05B5556670F7}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/hisat2.xcodeproj/project.pbxproj b/hisat2.xcodeproj/project.pbxproj new file mode 100644 index 0000000..c1b8291 --- /dev/null +++ b/hisat2.xcodeproj/project.pbxproj @@ -0,0 +1,1307 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 48; + objects = { + +/* Begin PBXBuildFile section */ + 606F132420E1528D008903D6 /* hisat2_repeat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 606F132220E1528D008903D6 /* hisat2_repeat.cpp */; }; + 606F132520E1528D008903D6 /* hisat2_repeat_main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 606F132320E1528D008903D6 /* hisat2_repeat_main.cpp */; }; + 60A82E9E21274F1B0040293D /* bit_packed_array.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 60A82E9C21274F1B0040293D /* bit_packed_array.cpp */; }; + E84A236320D2AF1600C12106 /* repeat_builder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E84A236220D2AF1500C12106 /* repeat_builder.cpp */; }; + E8D9218220C85FE900378C5B /* diff_sample.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CDF1C20B2B600C03464 /* diff_sample.cpp */; }; + E8D9218520C85FE900378C5B /* limit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CEC1C20B2B600C03464 /* limit.cpp */; }; + E8D9218620C85FE900378C5B /* multikey_qsort.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CEF1C20B2B600C03464 /* multikey_qsort.cpp */; }; + E8D9218720C85FE900378C5B /* random_source.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF51C20B2B600C03464 /* random_source.cpp */; }; + E8D9218820C85FE900378C5B /* tinythread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30D021C20B2B600C03464 /* tinythread.cpp */; }; + E8D9218920C85FE900378C5B /* alphabet.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CDC1C20B2B600C03464 /* alphabet.cpp */; }; + E8D9218A20C85FE900378C5B /* ccnt_lut.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CDE1C20B2B600C03464 /* ccnt_lut.cpp */; }; + E8D9218B20C85FE900378C5B /* ds.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE11C20B2B600C03464 /* ds.cpp */; }; + E8D9218C20C85FE900378C5B /* edit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE21C20B2B600C03464 /* edit.cpp */; }; + E8D9218D20C85FE900378C5B /* gfm.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE31C20B2B600C03464 /* gfm.cpp */; }; + E8D9218E20C85FE900378C5B /* ref_read.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF91C20B2B600C03464 /* ref_read.cpp */; }; + E8D9218F20C85FE900378C5B /* reference.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CFA1C20B2B600C03464 /* reference.cpp */; }; + E8D9219020C85FE900378C5B /* shmem.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CFC1C20B2B600C03464 /* shmem.cpp */; }; + E8D9219920C878B700378C5B /* aligner_result.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD01C20B2B600C03464 /* aligner_result.cpp */; }; + E8D9219A20C878B700378C5B /* aligner_sw_driver.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD41C20B2B600C03464 /* aligner_sw_driver.cpp */; }; + E8D9219B20C878B700378C5B /* aligner_sw.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD51C20B2B600C03464 /* aligner_sw.cpp */; }; + E8D9219C20C878B700378C5B /* aligner_swsse_ee_i16.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD61C20B2B600C03464 /* aligner_swsse_ee_i16.cpp */; }; + E8D9219D20C878B700378C5B /* aligner_swsse_ee_u8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD71C20B2B600C03464 /* aligner_swsse_ee_u8.cpp */; }; + E8D9219E20C878B700378C5B /* aligner_swsse_loc_i16.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD81C20B2B600C03464 /* aligner_swsse_loc_i16.cpp */; }; + E8D9219F20C878B700378C5B /* aligner_swsse_loc_u8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD91C20B2B600C03464 /* aligner_swsse_loc_u8.cpp */; }; + E8D921A020C878B700378C5B /* aligner_swsse.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CDA1C20B2B600C03464 /* aligner_swsse.cpp */; }; + E8D921A120C878EC00378C5B /* dp_framer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE01C20B2B600C03464 /* dp_framer.cpp */; }; + E8D921A220C8791000378C5B /* scoring.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CFB1C20B2B600C03464 /* scoring.cpp */; }; + E8D921A320C8791000378C5B /* simple_func.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CFD1C20B2B600C03464 /* simple_func.cpp */; }; + E8D921A420C8794400378C5B /* aligner_bt.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CCD1C20B2B600C03464 /* aligner_bt.cpp */; }; + E8D921A520C8798500378C5B /* qual.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF41C20B2B600C03464 /* qual.cpp */; }; + E8D921A620C879AB00378C5B /* mask.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CEE1C20B2B600C03464 /* mask.cpp */; }; + E8EAEDC41C24663D00E62E69 /* alphabet.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CDC1C20B2B600C03464 /* alphabet.cpp */; }; + E8EAEDC51C24663D00E62E69 /* ccnt_lut.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CDE1C20B2B600C03464 /* ccnt_lut.cpp */; }; + E8EAEDC61C24663D00E62E69 /* ds.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE11C20B2B600C03464 /* ds.cpp */; }; + E8EAEDC71C24663D00E62E69 /* edit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE21C20B2B600C03464 /* edit.cpp */; }; + E8EAEDC81C24663D00E62E69 /* gfm.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE31C20B2B600C03464 /* gfm.cpp */; }; + E8EAEDC91C24663D00E62E69 /* ref_read.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF91C20B2B600C03464 /* ref_read.cpp */; }; + E8EAEDCA1C24663D00E62E69 /* reference.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CFA1C20B2B600C03464 /* reference.cpp */; }; + E8EAEDCB1C24663D00E62E69 /* shmem.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CFC1C20B2B600C03464 /* shmem.cpp */; }; + E8EAEDCC1C24666400E62E69 /* limit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CEC1C20B2B600C03464 /* limit.cpp */; }; + E8EAEDCD1C24666400E62E69 /* multikey_qsort.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CEF1C20B2B600C03464 /* multikey_qsort.cpp */; }; + E8EAEDCE1C24666400E62E69 /* random_source.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF51C20B2B600C03464 /* random_source.cpp */; }; + E8EAEDCF1C24666400E62E69 /* tinythread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30D021C20B2B600C03464 /* tinythread.cpp */; }; + E8EAEDD01C24667700E62E69 /* hisat2_build_main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE61C20B2B600C03464 /* hisat2_build_main.cpp */; }; + E8EAEDD11C24667700E62E69 /* hisat2_build.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE71C20B2B600C03464 /* hisat2_build.cpp */; }; + E8EAEDD41C24675500E62E69 /* hisat2_build_main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE61C20B2B600C03464 /* hisat2_build_main.cpp */; }; + E8EAEDD51C24675500E62E69 /* hisat2_build.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE71C20B2B600C03464 /* hisat2_build.cpp */; }; + E8EAEDD61C24675500E62E69 /* limit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CEC1C20B2B600C03464 /* limit.cpp */; }; + E8EAEDD71C24675500E62E69 /* multikey_qsort.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CEF1C20B2B600C03464 /* multikey_qsort.cpp */; }; + E8EAEDD81C24675500E62E69 /* random_source.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF51C20B2B600C03464 /* random_source.cpp */; }; + E8EAEDD91C24675500E62E69 /* tinythread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30D021C20B2B600C03464 /* tinythread.cpp */; }; + E8EAEDDA1C24675500E62E69 /* alphabet.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CDC1C20B2B600C03464 /* alphabet.cpp */; }; + E8EAEDDB1C24675500E62E69 /* ccnt_lut.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CDE1C20B2B600C03464 /* ccnt_lut.cpp */; }; + E8EAEDDC1C24675500E62E69 /* ds.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE11C20B2B600C03464 /* ds.cpp */; }; + E8EAEDDD1C24675500E62E69 /* edit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE21C20B2B600C03464 /* edit.cpp */; }; + E8EAEDDE1C24675500E62E69 /* gfm.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE31C20B2B600C03464 /* gfm.cpp */; }; + E8EAEDDF1C24675500E62E69 /* ref_read.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF91C20B2B600C03464 /* ref_read.cpp */; }; + E8EAEDE01C24675500E62E69 /* reference.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CFA1C20B2B600C03464 /* reference.cpp */; }; + E8EAEDE11C24675500E62E69 /* shmem.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CFC1C20B2B600C03464 /* shmem.cpp */; }; + E8EAEDE81C246CDC00E62E69 /* diff_sample.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CDF1C20B2B600C03464 /* diff_sample.cpp */; }; + E8EAEDE91C246DA200E62E69 /* alphabet.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CDC1C20B2B600C03464 /* alphabet.cpp */; }; + E8EAEDEA1C246DA200E62E69 /* ccnt_lut.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CDE1C20B2B600C03464 /* ccnt_lut.cpp */; }; + E8EAEDEB1C246DA200E62E69 /* ds.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE11C20B2B600C03464 /* ds.cpp */; }; + E8EAEDEC1C246DA200E62E69 /* edit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE21C20B2B600C03464 /* edit.cpp */; }; + E8EAEDED1C246DA200E62E69 /* gfm.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE31C20B2B600C03464 /* gfm.cpp */; }; + E8EAEDEE1C246DA200E62E69 /* multikey_qsort.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CEF1C20B2B600C03464 /* multikey_qsort.cpp */; }; + E8EAEDEF1C246DA200E62E69 /* ref_read.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF91C20B2B600C03464 /* ref_read.cpp */; }; + E8EAEDF01C246DA200E62E69 /* reference.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CFA1C20B2B600C03464 /* reference.cpp */; }; + E8EAEDF11C246DA200E62E69 /* shmem.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CFC1C20B2B600C03464 /* shmem.cpp */; }; + E8EAEDF21C246DB200E62E69 /* hisat2_inspect.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE81C20B2B600C03464 /* hisat2_inspect.cpp */; }; + E8EAEDF31C246DB200E62E69 /* limit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CEC1C20B2B600C03464 /* limit.cpp */; }; + E8EAEDF41C246DB200E62E69 /* random_source.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF51C20B2B600C03464 /* random_source.cpp */; }; + E8EAEDF51C246DB200E62E69 /* tinythread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30D021C20B2B600C03464 /* tinythread.cpp */; }; + E8EAEDF61C246E3100E62E69 /* hisat2_main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE91C20B2B600C03464 /* hisat2_main.cpp */; }; + E8EAEDF71C246E3100E62E69 /* hisat2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CEA1C20B2B600C03464 /* hisat2.cpp */; }; + E8EAEDF81C246E3100E62E69 /* pat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF11C20B2B600C03464 /* pat.cpp */; }; + E8EAEDF91C246E3100E62E69 /* qual.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF41C20B2B600C03464 /* qual.cpp */; }; + E8EAEDFA1C246E5100E62E69 /* aligner_cache.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CCE1C20B2B600C03464 /* aligner_cache.cpp */; }; + E8EAEDFB1C246E5100E62E69 /* aligner_seed_policy.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD11C20B2B600C03464 /* aligner_seed_policy.cpp */; }; + E8EAEDFC1C246E5100E62E69 /* aligner_seed.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD21C20B2B600C03464 /* aligner_seed.cpp */; }; + E8EAEDFD1C246E5100E62E69 /* aligner_seed2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD31C20B2B600C03464 /* aligner_seed2.cpp */; }; + E8EAEDFE1C246E5100E62E69 /* aligner_sw_driver.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD41C20B2B600C03464 /* aligner_sw_driver.cpp */; }; + E8EAEDFF1C246E5100E62E69 /* aligner_sw.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD51C20B2B600C03464 /* aligner_sw.cpp */; }; + E8EAEE001C246E5100E62E69 /* read_qseq.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF71C20B2B600C03464 /* read_qseq.cpp */; }; + E8EAEE011C246EB200E62E69 /* aligner_result.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD01C20B2B600C03464 /* aligner_result.cpp */; }; + E8EAEE021C246EB200E62E69 /* aln_sink.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CDB1C20B2B600C03464 /* aln_sink.cpp */; }; + E8EAEE031C246EB200E62E69 /* dp_framer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE01C20B2B600C03464 /* dp_framer.cpp */; }; + E8EAEE041C246EB200E62E69 /* mask.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CEE1C20B2B600C03464 /* mask.cpp */; }; + E8EAEE051C246EB200E62E69 /* pe.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF21C20B2B600C03464 /* pe.cpp */; }; + E8EAEE061C246EB200E62E69 /* presets.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF31C20B2B600C03464 /* presets.cpp */; }; + E8EAEE071C246EB200E62E69 /* ref_coord.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF81C20B2B600C03464 /* ref_coord.cpp */; }; + E8EAEE081C246EB200E62E69 /* scoring.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CFB1C20B2B600C03464 /* scoring.cpp */; }; + E8EAEE091C246EB200E62E69 /* unique.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30D031C20B2B600C03464 /* unique.cpp */; }; + E8EAEE0A1C246EE900E62E69 /* aligner_bt.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CCD1C20B2B600C03464 /* aligner_bt.cpp */; }; + E8EAEE0B1C246EE900E62E69 /* aligner_swsse_ee_i16.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD61C20B2B600C03464 /* aligner_swsse_ee_i16.cpp */; }; + E8EAEE0C1C246EE900E62E69 /* aligner_swsse_ee_u8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD71C20B2B600C03464 /* aligner_swsse_ee_u8.cpp */; }; + E8EAEE0D1C246EE900E62E69 /* aligner_swsse_loc_i16.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD81C20B2B600C03464 /* aligner_swsse_loc_i16.cpp */; }; + E8EAEE0E1C246EE900E62E69 /* aligner_swsse_loc_u8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD91C20B2B600C03464 /* aligner_swsse_loc_u8.cpp */; }; + E8EAEE0F1C246EE900E62E69 /* aligner_swsse.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CDA1C20B2B600C03464 /* aligner_swsse.cpp */; }; + E8EAEE101C246EE900E62E69 /* outq.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF01C20B2B600C03464 /* outq.cpp */; }; + E8EAEE111C246EE900E62E69 /* random_source.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF51C20B2B600C03464 /* random_source.cpp */; }; + E8EAEE121C246EE900E62E69 /* random_util.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF61C20B2B600C03464 /* random_util.cpp */; }; + E8EAEE131C246EE900E62E69 /* simple_func.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CFD1C20B2B600C03464 /* simple_func.cpp */; }; + E8EAEE141C246EE900E62E69 /* sse_util.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30D001C20B2B600C03464 /* sse_util.cpp */; }; + E8EAEE151C246EFC00E62E69 /* aligner_driver.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CCF1C20B2B600C03464 /* aligner_driver.cpp */; }; + E8EAEE161C246EFC00E62E69 /* splice_site.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CFF1C20B2B600C03464 /* splice_site.cpp */; }; + E8EAEE171C2487FF00E62E69 /* diff_sample.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CDF1C20B2B600C03464 /* diff_sample.cpp */; }; + E8EAEE181C24886400E62E69 /* ccnt_lut.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CDE1C20B2B600C03464 /* ccnt_lut.cpp */; }; + E8EAEE191C2488AF00E62E69 /* ds.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE11C20B2B600C03464 /* ds.cpp */; }; + E8EAEE1A1C2488D600E62E69 /* limit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CEC1C20B2B600C03464 /* limit.cpp */; }; + E8EAEE1B1C2488EA00E62E69 /* edit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE21C20B2B600C03464 /* edit.cpp */; }; + E8EAEE1C1C24891800E62E69 /* alphabet.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CDC1C20B2B600C03464 /* alphabet.cpp */; }; + E8EAEE1D1C24892B00E62E69 /* gfm.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE31C20B2B600C03464 /* gfm.cpp */; }; + E8EAEE1E1C24894300E62E69 /* tinythread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30D021C20B2B600C03464 /* tinythread.cpp */; }; + E8EAEE1F1C24896600E62E69 /* reference.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CFA1C20B2B600C03464 /* reference.cpp */; }; + E8EAEE201C24899300E62E69 /* ref_read.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF91C20B2B600C03464 /* ref_read.cpp */; }; + E8FD97B41C81CCED00861B09 /* ref_read.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF91C20B2B600C03464 /* ref_read.cpp */; }; + E8FD97B51C81CCED00861B09 /* reference.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CFA1C20B2B600C03464 /* reference.cpp */; }; + E8FD97B61C81CCED00861B09 /* tinythread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30D021C20B2B600C03464 /* tinythread.cpp */; }; + E8FD97B71C81CCED00861B09 /* gfm.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE31C20B2B600C03464 /* gfm.cpp */; }; + E8FD97B81C81CCED00861B09 /* alphabet.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CDC1C20B2B600C03464 /* alphabet.cpp */; }; + E8FD97B91C81CCED00861B09 /* edit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE21C20B2B600C03464 /* edit.cpp */; }; + E8FD97BA1C81CCED00861B09 /* limit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CEC1C20B2B600C03464 /* limit.cpp */; }; + E8FD97BB1C81CCED00861B09 /* ds.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE11C20B2B600C03464 /* ds.cpp */; }; + E8FD97BC1C81CCED00861B09 /* ccnt_lut.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CDE1C20B2B600C03464 /* ccnt_lut.cpp */; }; + E8FD97BD1C81CCED00861B09 /* diff_sample.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CDF1C20B2B600C03464 /* diff_sample.cpp */; }; + E8FD97BE1C81CCED00861B09 /* aligner_driver.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CCF1C20B2B600C03464 /* aligner_driver.cpp */; }; + E8FD97BF1C81CCED00861B09 /* splice_site.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CFF1C20B2B600C03464 /* splice_site.cpp */; }; + E8FD97C01C81CCED00861B09 /* aligner_bt.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CCD1C20B2B600C03464 /* aligner_bt.cpp */; }; + E8FD97C11C81CCED00861B09 /* aligner_swsse_ee_i16.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD61C20B2B600C03464 /* aligner_swsse_ee_i16.cpp */; }; + E8FD97C21C81CCED00861B09 /* aligner_swsse_ee_u8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD71C20B2B600C03464 /* aligner_swsse_ee_u8.cpp */; }; + E8FD97C31C81CCED00861B09 /* aligner_swsse_loc_i16.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD81C20B2B600C03464 /* aligner_swsse_loc_i16.cpp */; }; + E8FD97C41C81CCED00861B09 /* aligner_swsse_loc_u8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD91C20B2B600C03464 /* aligner_swsse_loc_u8.cpp */; }; + E8FD97C51C81CCED00861B09 /* aligner_swsse.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CDA1C20B2B600C03464 /* aligner_swsse.cpp */; }; + E8FD97C61C81CCED00861B09 /* outq.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF01C20B2B600C03464 /* outq.cpp */; }; + E8FD97C71C81CCED00861B09 /* random_source.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF51C20B2B600C03464 /* random_source.cpp */; }; + E8FD97C81C81CCED00861B09 /* random_util.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF61C20B2B600C03464 /* random_util.cpp */; }; + E8FD97C91C81CCED00861B09 /* simple_func.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CFD1C20B2B600C03464 /* simple_func.cpp */; }; + E8FD97CA1C81CCED00861B09 /* sse_util.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30D001C20B2B600C03464 /* sse_util.cpp */; }; + E8FD97CB1C81CCED00861B09 /* aligner_result.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD01C20B2B600C03464 /* aligner_result.cpp */; }; + E8FD97CC1C81CCED00861B09 /* aln_sink.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CDB1C20B2B600C03464 /* aln_sink.cpp */; }; + E8FD97CD1C81CCED00861B09 /* dp_framer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE01C20B2B600C03464 /* dp_framer.cpp */; }; + E8FD97CE1C81CCED00861B09 /* mask.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CEE1C20B2B600C03464 /* mask.cpp */; }; + E8FD97CF1C81CCED00861B09 /* pe.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF21C20B2B600C03464 /* pe.cpp */; }; + E8FD97D01C81CCED00861B09 /* presets.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF31C20B2B600C03464 /* presets.cpp */; }; + E8FD97D11C81CCED00861B09 /* ref_coord.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF81C20B2B600C03464 /* ref_coord.cpp */; }; + E8FD97D21C81CCED00861B09 /* scoring.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CFB1C20B2B600C03464 /* scoring.cpp */; }; + E8FD97D31C81CCED00861B09 /* unique.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30D031C20B2B600C03464 /* unique.cpp */; }; + E8FD97D41C81CCED00861B09 /* aligner_cache.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CCE1C20B2B600C03464 /* aligner_cache.cpp */; }; + E8FD97D51C81CCED00861B09 /* aligner_seed_policy.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD11C20B2B600C03464 /* aligner_seed_policy.cpp */; }; + E8FD97D61C81CCED00861B09 /* aligner_seed.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD21C20B2B600C03464 /* aligner_seed.cpp */; }; + E8FD97D71C81CCED00861B09 /* aligner_seed2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD31C20B2B600C03464 /* aligner_seed2.cpp */; }; + E8FD97D81C81CCED00861B09 /* aligner_sw_driver.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD41C20B2B600C03464 /* aligner_sw_driver.cpp */; }; + E8FD97D91C81CCED00861B09 /* aligner_sw.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CD51C20B2B600C03464 /* aligner_sw.cpp */; }; + E8FD97DA1C81CCED00861B09 /* read_qseq.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF71C20B2B600C03464 /* read_qseq.cpp */; }; + E8FD97DB1C81CCED00861B09 /* hisat2_main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CE91C20B2B600C03464 /* hisat2_main.cpp */; }; + E8FD97DC1C81CCED00861B09 /* hisat2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CEA1C20B2B600C03464 /* hisat2.cpp */; }; + E8FD97DD1C81CCED00861B09 /* pat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF11C20B2B600C03464 /* pat.cpp */; }; + E8FD97DE1C81CCED00861B09 /* qual.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CF41C20B2B600C03464 /* qual.cpp */; }; + E8FD97E51C8343EF00861B09 /* diff_sample.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E8C30CDF1C20B2B600C03464 /* diff_sample.cpp */; }; +/* End PBXBuildFile section */ + +/* Begin PBXCopyFilesBuildPhase section */ + E8C30C581C20A5DD00C03464 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; + E8C30C681C20A64A00C03464 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; + E8C30C731C20A65400C03464 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; + E8D9219220C85FE900378C5B /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; + E8EAEDE31C24675500E62E69 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; + E8FD97E01C81CCED00861B09 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ + 606F132220E1528D008903D6 /* hisat2_repeat.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hisat2_repeat.cpp; sourceTree = ""; }; + 606F132320E1528D008903D6 /* hisat2_repeat_main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hisat2_repeat_main.cpp; sourceTree = ""; }; + 60A82E9C21274F1B0040293D /* bit_packed_array.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bit_packed_array.cpp; sourceTree = ""; }; + 60A82E9D21274F1B0040293D /* bit_packed_array.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bit_packed_array.h; sourceTree = ""; }; + E80538341DA83933008894D0 /* gp.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = gp.h; sourceTree = ""; }; + E84A236120D2AF0600C12106 /* repeat_builder.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = repeat_builder.h; sourceTree = ""; }; + E84A236220D2AF1500C12106 /* repeat_builder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = repeat_builder.cpp; sourceTree = ""; }; + E8C30C5A1C20A5DD00C03464 /* hisat2x */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = hisat2x; sourceTree = BUILT_PRODUCTS_DIR; }; + E8C30C6A1C20A64A00C03464 /* hisat2-buildx */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "hisat2-buildx"; sourceTree = BUILT_PRODUCTS_DIR; }; + E8C30C751C20A65400C03464 /* hisat2-inspectx */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "hisat2-inspectx"; sourceTree = BUILT_PRODUCTS_DIR; }; + E8C30C7C1C20B2B600C03464 /* aligner_bt.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = aligner_bt.h; sourceTree = ""; }; + E8C30C7D1C20B2B600C03464 /* aligner_cache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = aligner_cache.h; sourceTree = ""; }; + E8C30C7E1C20B2B600C03464 /* aligner_driver.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = aligner_driver.h; sourceTree = ""; }; + E8C30C7F1C20B2B600C03464 /* aligner_metrics.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = aligner_metrics.h; sourceTree = ""; }; + E8C30C801C20B2B600C03464 /* aligner_report.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = aligner_report.h; sourceTree = ""; }; + E8C30C811C20B2B600C03464 /* aligner_result.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = aligner_result.h; sourceTree = ""; }; + E8C30C821C20B2B600C03464 /* aligner_seed_policy.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = aligner_seed_policy.h; sourceTree = ""; }; + E8C30C831C20B2B600C03464 /* aligner_seed.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = aligner_seed.h; sourceTree = ""; }; + E8C30C841C20B2B600C03464 /* aligner_seed2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = aligner_seed2.h; sourceTree = ""; }; + E8C30C851C20B2B600C03464 /* aligner_sw_common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = aligner_sw_common.h; sourceTree = ""; }; + E8C30C861C20B2B600C03464 /* aligner_sw_driver.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = aligner_sw_driver.h; sourceTree = ""; }; + E8C30C871C20B2B600C03464 /* aligner_sw_nuc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = aligner_sw_nuc.h; sourceTree = ""; }; + E8C30C881C20B2B600C03464 /* aligner_sw.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = aligner_sw.h; sourceTree = ""; }; + E8C30C891C20B2B600C03464 /* aligner_swsse.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = aligner_swsse.h; sourceTree = ""; }; + E8C30C8A1C20B2B600C03464 /* aln_sink.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = aln_sink.h; sourceTree = ""; }; + E8C30C8B1C20B2B600C03464 /* alphabet.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alphabet.h; sourceTree = ""; }; + E8C30C8C1C20B2B600C03464 /* alt.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alt.h; sourceTree = ""; }; + E8C30C8D1C20B2B600C03464 /* assert_helpers.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = assert_helpers.h; sourceTree = ""; }; + E8C30C8E1C20B2B600C03464 /* banded.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = banded.h; sourceTree = ""; }; + E8C30C8F1C20B2B600C03464 /* binary_sa_search.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = binary_sa_search.h; sourceTree = ""; }; + E8C30C901C20B2B600C03464 /* bitpack.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bitpack.h; sourceTree = ""; }; + E8C30C911C20B2B600C03464 /* blockwise_sa.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = blockwise_sa.h; sourceTree = ""; }; + E8C30C921C20B2B600C03464 /* bp_aligner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bp_aligner.h; sourceTree = ""; }; + E8C30C931C20B2B600C03464 /* btypes.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = btypes.h; sourceTree = ""; }; + E8C30C951C20B2B600C03464 /* diff_sample.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = diff_sample.h; sourceTree = ""; }; + E8C30C961C20B2B600C03464 /* dp_framer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = dp_framer.h; sourceTree = ""; }; + E8C30C971C20B2B600C03464 /* ds.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ds.h; sourceTree = ""; }; + E8C30C981C20B2B600C03464 /* edit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = edit.h; sourceTree = ""; }; + E8C30C991C20B2B600C03464 /* endian_swap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = endian_swap.h; sourceTree = ""; }; + E8C30C9A1C20B2B600C03464 /* fast_mutex.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fast_mutex.h; sourceTree = ""; }; + E8C30C9B1C20B2B600C03464 /* filebuf.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = filebuf.h; sourceTree = ""; }; + E8C30C9C1C20B2B600C03464 /* formats.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = formats.h; sourceTree = ""; }; + E8C30C9D1C20B2B600C03464 /* gbwt_graph.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = gbwt_graph.h; sourceTree = ""; }; + E8C30C9E1C20B2B600C03464 /* gfm.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = gfm.h; sourceTree = ""; }; + E8C30C9F1C20B2B600C03464 /* group_walk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = group_walk.h; sourceTree = ""; }; + E8C30CA01C20B2B600C03464 /* hgfm.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hgfm.h; sourceTree = ""; }; + E8C30CA11C20B2B600C03464 /* hi_aligner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hi_aligner.h; sourceTree = ""; }; + E8C30CA21C20B2B600C03464 /* hier_idx_common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hier_idx_common.h; sourceTree = ""; }; + E8C30CA31C20B2B600C03464 /* ival_list.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ival_list.h; sourceTree = ""; }; + E8C30CA41C20B2B600C03464 /* limit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = limit.h; sourceTree = ""; }; + E8C30CA51C20B2B600C03464 /* ls.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ls.h; sourceTree = ""; }; + E8C30CA61C20B2B600C03464 /* mask.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mask.h; sourceTree = ""; }; + E8C30CA71C20B2B600C03464 /* mem_ids.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mem_ids.h; sourceTree = ""; }; + E8C30CA81C20B2B600C03464 /* mm.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mm.h; sourceTree = ""; }; + E8C30CA91C20B2B600C03464 /* multikey_qsort.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = multikey_qsort.h; sourceTree = ""; }; + E8C30CAA1C20B2B600C03464 /* opts.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = opts.h; sourceTree = ""; }; + E8C30CAB1C20B2B600C03464 /* outq.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = outq.h; sourceTree = ""; }; + E8C30CAC1C20B2B600C03464 /* pat.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pat.h; sourceTree = ""; }; + E8C30CAD1C20B2B600C03464 /* pe.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pe.h; sourceTree = ""; }; + E8C30CAE1C20B2B600C03464 /* presets.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = presets.h; sourceTree = ""; }; + E8C30CAF1C20B2B600C03464 /* processor_support.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = processor_support.h; sourceTree = ""; }; + E8C30CB01C20B2B600C03464 /* qual.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = qual.h; sourceTree = ""; }; + E8C30CB11C20B2B600C03464 /* radix_sort.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = radix_sort.h; sourceTree = ""; }; + E8C30CB21C20B2B600C03464 /* random_source.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = random_source.h; sourceTree = ""; }; + E8C30CB31C20B2B600C03464 /* random_util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = random_util.h; sourceTree = ""; }; + E8C30CB41C20B2B600C03464 /* read.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = read.h; sourceTree = ""; }; + E8C30CB51C20B2B600C03464 /* ref_coord.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ref_coord.h; sourceTree = ""; }; + E8C30CB61C20B2B600C03464 /* ref_read.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ref_read.h; sourceTree = ""; }; + E8C30CB71C20B2B600C03464 /* reference.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = reference.h; sourceTree = ""; }; + E8C30CB81C20B2B600C03464 /* sam.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sam.h; sourceTree = ""; }; + E8C30CB91C20B2B600C03464 /* scoring.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = scoring.h; sourceTree = ""; }; + E8C30CBA1C20B2B600C03464 /* search_globals.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = search_globals.h; sourceTree = ""; }; + E8C30CBB1C20B2B600C03464 /* sequence_io.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sequence_io.h; sourceTree = ""; }; + E8C30CBC1C20B2B600C03464 /* shmem.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = shmem.h; sourceTree = ""; }; + E8C30CBD1C20B2B600C03464 /* simple_func.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simple_func.h; sourceTree = ""; }; + E8C30CBE1C20B2B600C03464 /* splice_site_mem.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = splice_site_mem.h; sourceTree = ""; }; + E8C30CBF1C20B2B600C03464 /* splice_site.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = splice_site.h; sourceTree = ""; }; + E8C30CC01C20B2B600C03464 /* spliced_aligner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = spliced_aligner.h; sourceTree = ""; }; + E8C30CC11C20B2B600C03464 /* sse_util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sse_util.h; sourceTree = ""; }; + E8C30CC21C20B2B600C03464 /* sstring.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sstring.h; sourceTree = ""; }; + E8C30CC31C20B2B600C03464 /* str_util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = str_util.h; sourceTree = ""; }; + E8C30CC41C20B2B600C03464 /* threading.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = threading.h; sourceTree = ""; }; + E8C30CC51C20B2B600C03464 /* timer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = timer.h; sourceTree = ""; }; + E8C30CC61C20B2B600C03464 /* tinythread.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tinythread.h; sourceTree = ""; }; + E8C30CC71C20B2B600C03464 /* tokenize.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tokenize.h; sourceTree = ""; }; + E8C30CC81C20B2B600C03464 /* tp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tp.h; sourceTree = ""; }; + E8C30CC91C20B2B600C03464 /* unique.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = unique.h; sourceTree = ""; }; + E8C30CCA1C20B2B600C03464 /* util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = util.h; sourceTree = ""; }; + E8C30CCB1C20B2B600C03464 /* word_io.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = word_io.h; sourceTree = ""; }; + E8C30CCC1C20B2B600C03464 /* zbox.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = zbox.h; sourceTree = ""; }; + E8C30CCD1C20B2B600C03464 /* aligner_bt.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligner_bt.cpp; sourceTree = ""; }; + E8C30CCE1C20B2B600C03464 /* aligner_cache.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligner_cache.cpp; sourceTree = ""; }; + E8C30CCF1C20B2B600C03464 /* aligner_driver.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligner_driver.cpp; sourceTree = ""; }; + E8C30CD01C20B2B600C03464 /* aligner_result.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligner_result.cpp; sourceTree = ""; }; + E8C30CD11C20B2B600C03464 /* aligner_seed_policy.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligner_seed_policy.cpp; sourceTree = ""; }; + E8C30CD21C20B2B600C03464 /* aligner_seed.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligner_seed.cpp; sourceTree = ""; }; + E8C30CD31C20B2B600C03464 /* aligner_seed2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligner_seed2.cpp; sourceTree = ""; }; + E8C30CD41C20B2B600C03464 /* aligner_sw_driver.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligner_sw_driver.cpp; sourceTree = ""; }; + E8C30CD51C20B2B600C03464 /* aligner_sw.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligner_sw.cpp; sourceTree = ""; }; + E8C30CD61C20B2B600C03464 /* aligner_swsse_ee_i16.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligner_swsse_ee_i16.cpp; sourceTree = ""; }; + E8C30CD71C20B2B600C03464 /* aligner_swsse_ee_u8.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligner_swsse_ee_u8.cpp; sourceTree = ""; }; + E8C30CD81C20B2B600C03464 /* aligner_swsse_loc_i16.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligner_swsse_loc_i16.cpp; sourceTree = ""; }; + E8C30CD91C20B2B600C03464 /* aligner_swsse_loc_u8.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligner_swsse_loc_u8.cpp; sourceTree = ""; }; + E8C30CDA1C20B2B600C03464 /* aligner_swsse.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligner_swsse.cpp; sourceTree = ""; }; + E8C30CDB1C20B2B600C03464 /* aln_sink.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aln_sink.cpp; sourceTree = ""; }; + E8C30CDC1C20B2B600C03464 /* alphabet.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alphabet.cpp; sourceTree = ""; }; + E8C30CDD1C20B2B600C03464 /* banded.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = banded.cpp; sourceTree = ""; }; + E8C30CDE1C20B2B600C03464 /* ccnt_lut.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ccnt_lut.cpp; sourceTree = ""; }; + E8C30CDF1C20B2B600C03464 /* diff_sample.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = diff_sample.cpp; sourceTree = ""; }; + E8C30CE01C20B2B600C03464 /* dp_framer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = dp_framer.cpp; sourceTree = ""; }; + E8C30CE11C20B2B600C03464 /* ds.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ds.cpp; sourceTree = ""; }; + E8C30CE21C20B2B600C03464 /* edit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = edit.cpp; sourceTree = ""; }; + E8C30CE31C20B2B600C03464 /* gfm.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = gfm.cpp; sourceTree = ""; }; + E8C30CE41C20B2B600C03464 /* group_walk.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = group_walk.cpp; sourceTree = ""; }; + E8C30CE51C20B2B600C03464 /* hisat_bp.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hisat_bp.cpp; sourceTree = ""; }; + E8C30CE61C20B2B600C03464 /* hisat2_build_main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hisat2_build_main.cpp; sourceTree = ""; }; + E8C30CE71C20B2B600C03464 /* hisat2_build.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hisat2_build.cpp; sourceTree = ""; }; + E8C30CE81C20B2B600C03464 /* hisat2_inspect.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hisat2_inspect.cpp; sourceTree = ""; }; + E8C30CE91C20B2B600C03464 /* hisat2_main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hisat2_main.cpp; sourceTree = ""; }; + E8C30CEA1C20B2B600C03464 /* hisat2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hisat2.cpp; sourceTree = ""; }; + E8C30CEB1C20B2B600C03464 /* ival_list.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ival_list.cpp; sourceTree = ""; }; + E8C30CEC1C20B2B600C03464 /* limit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = limit.cpp; sourceTree = ""; }; + E8C30CED1C20B2B600C03464 /* ls.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ls.cpp; sourceTree = ""; }; + E8C30CEE1C20B2B600C03464 /* mask.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mask.cpp; sourceTree = ""; }; + E8C30CEF1C20B2B600C03464 /* multikey_qsort.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = multikey_qsort.cpp; sourceTree = ""; }; + E8C30CF01C20B2B600C03464 /* outq.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = outq.cpp; sourceTree = ""; }; + E8C30CF11C20B2B600C03464 /* pat.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pat.cpp; sourceTree = ""; }; + E8C30CF21C20B2B600C03464 /* pe.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pe.cpp; sourceTree = ""; }; + E8C30CF31C20B2B600C03464 /* presets.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = presets.cpp; sourceTree = ""; }; + E8C30CF41C20B2B600C03464 /* qual.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = qual.cpp; sourceTree = ""; }; + E8C30CF51C20B2B600C03464 /* random_source.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = random_source.cpp; sourceTree = ""; }; + E8C30CF61C20B2B600C03464 /* random_util.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = random_util.cpp; sourceTree = ""; }; + E8C30CF71C20B2B600C03464 /* read_qseq.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = read_qseq.cpp; sourceTree = ""; }; + E8C30CF81C20B2B600C03464 /* ref_coord.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ref_coord.cpp; sourceTree = ""; }; + E8C30CF91C20B2B600C03464 /* ref_read.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ref_read.cpp; sourceTree = ""; }; + E8C30CFA1C20B2B600C03464 /* reference.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = reference.cpp; sourceTree = ""; }; + E8C30CFB1C20B2B600C03464 /* scoring.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = scoring.cpp; sourceTree = ""; }; + E8C30CFC1C20B2B600C03464 /* shmem.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = shmem.cpp; sourceTree = ""; }; + E8C30CFD1C20B2B600C03464 /* simple_func.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = simple_func.cpp; sourceTree = ""; }; + E8C30CFE1C20B2B600C03464 /* splice_site_new.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = splice_site_new.cpp; sourceTree = ""; }; + E8C30CFF1C20B2B600C03464 /* splice_site.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = splice_site.cpp; sourceTree = ""; }; + E8C30D001C20B2B600C03464 /* sse_util.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sse_util.cpp; sourceTree = ""; }; + E8C30D011C20B2B600C03464 /* sstring.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sstring.cpp; sourceTree = ""; }; + E8C30D021C20B2B600C03464 /* tinythread.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = tinythread.cpp; sourceTree = ""; }; + E8C30D031C20B2B600C03464 /* unique.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = unique.cpp; sourceTree = ""; }; + E8D9219620C85FE900378C5B /* hisat2-repeatx */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "hisat2-repeatx"; sourceTree = BUILT_PRODUCTS_DIR; }; + E8D921A720CC37B300378C5B /* repeat.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = repeat.h; sourceTree = ""; }; + E8E2A13C2118D082008F4EA4 /* repeat_kmer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = repeat_kmer.h; sourceTree = ""; }; + E8E2A13D2130695C008F4EA4 /* rfm.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = rfm.h; sourceTree = ""; }; + E8EAEDE71C24675500E62E69 /* hisat2-buildlx */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "hisat2-buildlx"; sourceTree = BUILT_PRODUCTS_DIR; }; + E8FD97E41C81CCED00861B09 /* hisat2lx */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = hisat2lx; sourceTree = BUILT_PRODUCTS_DIR; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + E8C30C571C20A5DD00C03464 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + E8C30C671C20A64A00C03464 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + E8C30C721C20A65400C03464 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + E8D9219120C85FE900378C5B /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + E8EAEDE21C24675500E62E69 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + E8FD97DF1C81CCED00861B09 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + E8C30C511C20A5DD00C03464 = { + isa = PBXGroup; + children = ( + E8C30C651C20A61400C03464 /* Document */, + E8C30C641C20A60F00C03464 /* Source */, + E8C30C5B1C20A5DD00C03464 /* Products */, + ); + sourceTree = ""; + }; + E8C30C5B1C20A5DD00C03464 /* Products */ = { + isa = PBXGroup; + children = ( + E8C30C5A1C20A5DD00C03464 /* hisat2x */, + E8C30C6A1C20A64A00C03464 /* hisat2-buildx */, + E8C30C751C20A65400C03464 /* hisat2-inspectx */, + E8EAEDE71C24675500E62E69 /* hisat2-buildlx */, + E8FD97E41C81CCED00861B09 /* hisat2lx */, + E8D9219620C85FE900378C5B /* hisat2-repeatx */, + ); + name = Products; + sourceTree = ""; + }; + E8C30C641C20A60F00C03464 /* Source */ = { + isa = PBXGroup; + children = ( + E8C30C7C1C20B2B600C03464 /* aligner_bt.h */, + E8C30C7D1C20B2B600C03464 /* aligner_cache.h */, + E8C30C7E1C20B2B600C03464 /* aligner_driver.h */, + E8C30C7F1C20B2B600C03464 /* aligner_metrics.h */, + E8C30C801C20B2B600C03464 /* aligner_report.h */, + E8C30C811C20B2B600C03464 /* aligner_result.h */, + E8C30C821C20B2B600C03464 /* aligner_seed_policy.h */, + E8C30C831C20B2B600C03464 /* aligner_seed.h */, + E8C30C841C20B2B600C03464 /* aligner_seed2.h */, + E8C30C851C20B2B600C03464 /* aligner_sw_common.h */, + E8C30C861C20B2B600C03464 /* aligner_sw_driver.h */, + E8C30C871C20B2B600C03464 /* aligner_sw_nuc.h */, + E8C30C881C20B2B600C03464 /* aligner_sw.h */, + E8C30C891C20B2B600C03464 /* aligner_swsse.h */, + E8C30C8A1C20B2B600C03464 /* aln_sink.h */, + E8C30C8B1C20B2B600C03464 /* alphabet.h */, + E8C30C8C1C20B2B600C03464 /* alt.h */, + E8C30C8D1C20B2B600C03464 /* assert_helpers.h */, + E8C30C8E1C20B2B600C03464 /* banded.h */, + E8C30C8F1C20B2B600C03464 /* binary_sa_search.h */, + 60A82E9D21274F1B0040293D /* bit_packed_array.h */, + E8C30C901C20B2B600C03464 /* bitpack.h */, + E8C30C911C20B2B600C03464 /* blockwise_sa.h */, + E8C30C921C20B2B600C03464 /* bp_aligner.h */, + E8C30C931C20B2B600C03464 /* btypes.h */, + E8C30C951C20B2B600C03464 /* diff_sample.h */, + E8C30C961C20B2B600C03464 /* dp_framer.h */, + E8C30C971C20B2B600C03464 /* ds.h */, + E8C30C981C20B2B600C03464 /* edit.h */, + E8C30C991C20B2B600C03464 /* endian_swap.h */, + E8C30C9A1C20B2B600C03464 /* fast_mutex.h */, + E8C30C9B1C20B2B600C03464 /* filebuf.h */, + E8C30C9C1C20B2B600C03464 /* formats.h */, + E8C30C9D1C20B2B600C03464 /* gbwt_graph.h */, + E8C30C9E1C20B2B600C03464 /* gfm.h */, + E80538341DA83933008894D0 /* gp.h */, + E8C30C9F1C20B2B600C03464 /* group_walk.h */, + E8C30CA01C20B2B600C03464 /* hgfm.h */, + E8C30CA11C20B2B600C03464 /* hi_aligner.h */, + E8C30CA21C20B2B600C03464 /* hier_idx_common.h */, + E8C30CA31C20B2B600C03464 /* ival_list.h */, + E8C30CA41C20B2B600C03464 /* limit.h */, + E8C30CA51C20B2B600C03464 /* ls.h */, + E8C30CA61C20B2B600C03464 /* mask.h */, + E8C30CA71C20B2B600C03464 /* mem_ids.h */, + E8C30CA81C20B2B600C03464 /* mm.h */, + E8C30CA91C20B2B600C03464 /* multikey_qsort.h */, + E8C30CAA1C20B2B600C03464 /* opts.h */, + E8C30CAB1C20B2B600C03464 /* outq.h */, + E8C30CAC1C20B2B600C03464 /* pat.h */, + E8C30CAD1C20B2B600C03464 /* pe.h */, + E8C30CAE1C20B2B600C03464 /* presets.h */, + E8C30CAF1C20B2B600C03464 /* processor_support.h */, + E8C30CB01C20B2B600C03464 /* qual.h */, + E8C30CB11C20B2B600C03464 /* radix_sort.h */, + E8C30CB21C20B2B600C03464 /* random_source.h */, + E8C30CB31C20B2B600C03464 /* random_util.h */, + E8C30CB41C20B2B600C03464 /* read.h */, + E8C30CB51C20B2B600C03464 /* ref_coord.h */, + E8C30CB61C20B2B600C03464 /* ref_read.h */, + E8C30CB71C20B2B600C03464 /* reference.h */, + E8D921A720CC37B300378C5B /* repeat.h */, + E84A236120D2AF0600C12106 /* repeat_builder.h */, + E8E2A13C2118D082008F4EA4 /* repeat_kmer.h */, + E8E2A13D2130695C008F4EA4 /* rfm.h */, + E8C30CB81C20B2B600C03464 /* sam.h */, + E8C30CB91C20B2B600C03464 /* scoring.h */, + E8C30CBA1C20B2B600C03464 /* search_globals.h */, + E8C30CBB1C20B2B600C03464 /* sequence_io.h */, + E8C30CBC1C20B2B600C03464 /* shmem.h */, + E8C30CBD1C20B2B600C03464 /* simple_func.h */, + E8C30CBE1C20B2B600C03464 /* splice_site_mem.h */, + E8C30CBF1C20B2B600C03464 /* splice_site.h */, + E8C30CC01C20B2B600C03464 /* spliced_aligner.h */, + E8C30CC11C20B2B600C03464 /* sse_util.h */, + E8C30CC21C20B2B600C03464 /* sstring.h */, + E8C30CC31C20B2B600C03464 /* str_util.h */, + E8C30CC41C20B2B600C03464 /* threading.h */, + E8C30CC51C20B2B600C03464 /* timer.h */, + E8C30CC61C20B2B600C03464 /* tinythread.h */, + E8C30CC71C20B2B600C03464 /* tokenize.h */, + E8C30CC81C20B2B600C03464 /* tp.h */, + E8C30CC91C20B2B600C03464 /* unique.h */, + E8C30CCA1C20B2B600C03464 /* util.h */, + E8C30CCB1C20B2B600C03464 /* word_io.h */, + E8C30CCC1C20B2B600C03464 /* zbox.h */, + E8C30CCD1C20B2B600C03464 /* aligner_bt.cpp */, + E8C30CCE1C20B2B600C03464 /* aligner_cache.cpp */, + E8C30CCF1C20B2B600C03464 /* aligner_driver.cpp */, + E8C30CD01C20B2B600C03464 /* aligner_result.cpp */, + E8C30CD11C20B2B600C03464 /* aligner_seed_policy.cpp */, + E8C30CD21C20B2B600C03464 /* aligner_seed.cpp */, + E8C30CD31C20B2B600C03464 /* aligner_seed2.cpp */, + E8C30CD41C20B2B600C03464 /* aligner_sw_driver.cpp */, + E8C30CD51C20B2B600C03464 /* aligner_sw.cpp */, + E8C30CD61C20B2B600C03464 /* aligner_swsse_ee_i16.cpp */, + E8C30CD71C20B2B600C03464 /* aligner_swsse_ee_u8.cpp */, + E8C30CD81C20B2B600C03464 /* aligner_swsse_loc_i16.cpp */, + E8C30CD91C20B2B600C03464 /* aligner_swsse_loc_u8.cpp */, + E8C30CDA1C20B2B600C03464 /* aligner_swsse.cpp */, + E8C30CDB1C20B2B600C03464 /* aln_sink.cpp */, + E8C30CDC1C20B2B600C03464 /* alphabet.cpp */, + E8C30CDD1C20B2B600C03464 /* banded.cpp */, + 60A82E9C21274F1B0040293D /* bit_packed_array.cpp */, + E8C30CDE1C20B2B600C03464 /* ccnt_lut.cpp */, + E8C30CDF1C20B2B600C03464 /* diff_sample.cpp */, + E8C30CE01C20B2B600C03464 /* dp_framer.cpp */, + E8C30CE11C20B2B600C03464 /* ds.cpp */, + E8C30CE21C20B2B600C03464 /* edit.cpp */, + E8C30CE31C20B2B600C03464 /* gfm.cpp */, + E8C30CE41C20B2B600C03464 /* group_walk.cpp */, + E8C30CE51C20B2B600C03464 /* hisat_bp.cpp */, + E8C30CE61C20B2B600C03464 /* hisat2_build_main.cpp */, + E8C30CE71C20B2B600C03464 /* hisat2_build.cpp */, + E8C30CE81C20B2B600C03464 /* hisat2_inspect.cpp */, + E8C30CE91C20B2B600C03464 /* hisat2_main.cpp */, + 606F132320E1528D008903D6 /* hisat2_repeat_main.cpp */, + 606F132220E1528D008903D6 /* hisat2_repeat.cpp */, + E8C30CEA1C20B2B600C03464 /* hisat2.cpp */, + E8C30CEB1C20B2B600C03464 /* ival_list.cpp */, + E8C30CEC1C20B2B600C03464 /* limit.cpp */, + E8C30CED1C20B2B600C03464 /* ls.cpp */, + E8C30CEE1C20B2B600C03464 /* mask.cpp */, + E8C30CEF1C20B2B600C03464 /* multikey_qsort.cpp */, + E8C30CF01C20B2B600C03464 /* outq.cpp */, + E8C30CF11C20B2B600C03464 /* pat.cpp */, + E8C30CF21C20B2B600C03464 /* pe.cpp */, + E8C30CF31C20B2B600C03464 /* presets.cpp */, + E8C30CF41C20B2B600C03464 /* qual.cpp */, + E8C30CF51C20B2B600C03464 /* random_source.cpp */, + E8C30CF61C20B2B600C03464 /* random_util.cpp */, + E8C30CF71C20B2B600C03464 /* read_qseq.cpp */, + E8C30CF81C20B2B600C03464 /* ref_coord.cpp */, + E8C30CF91C20B2B600C03464 /* ref_read.cpp */, + E8C30CFA1C20B2B600C03464 /* reference.cpp */, + E84A236220D2AF1500C12106 /* repeat_builder.cpp */, + E8C30CFB1C20B2B600C03464 /* scoring.cpp */, + E8C30CFC1C20B2B600C03464 /* shmem.cpp */, + E8C30CFD1C20B2B600C03464 /* simple_func.cpp */, + E8C30CFE1C20B2B600C03464 /* splice_site_new.cpp */, + E8C30CFF1C20B2B600C03464 /* splice_site.cpp */, + E8C30D001C20B2B600C03464 /* sse_util.cpp */, + E8C30D011C20B2B600C03464 /* sstring.cpp */, + E8C30D021C20B2B600C03464 /* tinythread.cpp */, + E8C30D031C20B2B600C03464 /* unique.cpp */, + ); + name = Source; + sourceTree = ""; + }; + E8C30C651C20A61400C03464 /* Document */ = { + isa = PBXGroup; + children = ( + ); + name = Document; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + E8C30C591C20A5DD00C03464 /* hisat2x */ = { + isa = PBXNativeTarget; + buildConfigurationList = E8C30C611C20A5DD00C03464 /* Build configuration list for PBXNativeTarget "hisat2x" */; + buildPhases = ( + E8C30C561C20A5DD00C03464 /* Sources */, + E8C30C571C20A5DD00C03464 /* Frameworks */, + E8C30C581C20A5DD00C03464 /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = hisat2x; + productName = hisat2; + productReference = E8C30C5A1C20A5DD00C03464 /* hisat2x */; + productType = "com.apple.product-type.tool"; + }; + E8C30C691C20A64A00C03464 /* hisat2-buildx */ = { + isa = PBXNativeTarget; + buildConfigurationList = E8C30C6E1C20A64A00C03464 /* Build configuration list for PBXNativeTarget "hisat2-buildx" */; + buildPhases = ( + E8C30C661C20A64A00C03464 /* Sources */, + E8C30C671C20A64A00C03464 /* Frameworks */, + E8C30C681C20A64A00C03464 /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = "hisat2-buildx"; + productName = "hisat2-buildx"; + productReference = E8C30C6A1C20A64A00C03464 /* hisat2-buildx */; + productType = "com.apple.product-type.tool"; + }; + E8C30C741C20A65400C03464 /* hisat2-inspectx */ = { + isa = PBXNativeTarget; + buildConfigurationList = E8C30C791C20A65400C03464 /* Build configuration list for PBXNativeTarget "hisat2-inspectx" */; + buildPhases = ( + E8C30C711C20A65400C03464 /* Sources */, + E8C30C721C20A65400C03464 /* Frameworks */, + E8C30C731C20A65400C03464 /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = "hisat2-inspectx"; + productName = "hisat2-inspectx"; + productReference = E8C30C751C20A65400C03464 /* hisat2-inspectx */; + productType = "com.apple.product-type.tool"; + }; + E8D9218020C85FE900378C5B /* hisat2-repeatx */ = { + isa = PBXNativeTarget; + buildConfigurationList = E8D9219320C85FE900378C5B /* Build configuration list for PBXNativeTarget "hisat2-repeatx" */; + buildPhases = ( + E8D9218120C85FE900378C5B /* Sources */, + E8D9219120C85FE900378C5B /* Frameworks */, + E8D9219220C85FE900378C5B /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = "hisat2-repeatx"; + productName = "hisat2-buildx"; + productReference = E8D9219620C85FE900378C5B /* hisat2-repeatx */; + productType = "com.apple.product-type.tool"; + }; + E8EAEDD21C24675500E62E69 /* hisat2-buildlx */ = { + isa = PBXNativeTarget; + buildConfigurationList = E8EAEDE41C24675500E62E69 /* Build configuration list for PBXNativeTarget "hisat2-buildlx" */; + buildPhases = ( + E8EAEDD31C24675500E62E69 /* Sources */, + E8EAEDE21C24675500E62E69 /* Frameworks */, + E8EAEDE31C24675500E62E69 /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = "hisat2-buildlx"; + productName = "hisat2-buildx"; + productReference = E8EAEDE71C24675500E62E69 /* hisat2-buildlx */; + productType = "com.apple.product-type.tool"; + }; + E8FD97B21C81CCED00861B09 /* hisat2lx */ = { + isa = PBXNativeTarget; + buildConfigurationList = E8FD97E11C81CCED00861B09 /* Build configuration list for PBXNativeTarget "hisat2lx" */; + buildPhases = ( + E8FD97B31C81CCED00861B09 /* Sources */, + E8FD97DF1C81CCED00861B09 /* Frameworks */, + E8FD97E01C81CCED00861B09 /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = hisat2lx; + productName = hisat2; + productReference = E8FD97E41C81CCED00861B09 /* hisat2lx */; + productType = "com.apple.product-type.tool"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + E8C30C521C20A5DD00C03464 /* Project object */ = { + isa = PBXProject; + attributes = { + LastUpgradeCheck = 0930; + ORGANIZATIONNAME = "Daehwan Kim"; + TargetAttributes = { + E8C30C591C20A5DD00C03464 = { + CreatedOnToolsVersion = 7.2; + }; + E8C30C691C20A64A00C03464 = { + CreatedOnToolsVersion = 7.2; + }; + E8C30C741C20A65400C03464 = { + CreatedOnToolsVersion = 7.2; + }; + }; + }; + buildConfigurationList = E8C30C551C20A5DD00C03464 /* Build configuration list for PBXProject "hisat2" */; + compatibilityVersion = "Xcode 8.0"; + developmentRegion = English; + hasScannedForEncodings = 0; + knownRegions = ( + en, + ); + mainGroup = E8C30C511C20A5DD00C03464; + productRefGroup = E8C30C5B1C20A5DD00C03464 /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + E8C30C591C20A5DD00C03464 /* hisat2x */, + E8C30C691C20A64A00C03464 /* hisat2-buildx */, + E8C30C741C20A65400C03464 /* hisat2-inspectx */, + E8EAEDD21C24675500E62E69 /* hisat2-buildlx */, + E8FD97B21C81CCED00861B09 /* hisat2lx */, + E8D9218020C85FE900378C5B /* hisat2-repeatx */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXSourcesBuildPhase section */ + E8C30C561C20A5DD00C03464 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + E8EAEE201C24899300E62E69 /* ref_read.cpp in Sources */, + E8EAEE1F1C24896600E62E69 /* reference.cpp in Sources */, + E8EAEE1E1C24894300E62E69 /* tinythread.cpp in Sources */, + E8EAEE1D1C24892B00E62E69 /* gfm.cpp in Sources */, + E8EAEE1C1C24891800E62E69 /* alphabet.cpp in Sources */, + E8EAEE1B1C2488EA00E62E69 /* edit.cpp in Sources */, + E8EAEE1A1C2488D600E62E69 /* limit.cpp in Sources */, + E8EAEE191C2488AF00E62E69 /* ds.cpp in Sources */, + E8EAEE181C24886400E62E69 /* ccnt_lut.cpp in Sources */, + E8EAEE171C2487FF00E62E69 /* diff_sample.cpp in Sources */, + E8EAEE151C246EFC00E62E69 /* aligner_driver.cpp in Sources */, + E8EAEE161C246EFC00E62E69 /* splice_site.cpp in Sources */, + E8EAEE0A1C246EE900E62E69 /* aligner_bt.cpp in Sources */, + E8EAEE0B1C246EE900E62E69 /* aligner_swsse_ee_i16.cpp in Sources */, + E8EAEE0C1C246EE900E62E69 /* aligner_swsse_ee_u8.cpp in Sources */, + E8EAEE0D1C246EE900E62E69 /* aligner_swsse_loc_i16.cpp in Sources */, + E8EAEE0E1C246EE900E62E69 /* aligner_swsse_loc_u8.cpp in Sources */, + E8EAEE0F1C246EE900E62E69 /* aligner_swsse.cpp in Sources */, + E8EAEE101C246EE900E62E69 /* outq.cpp in Sources */, + E8EAEE111C246EE900E62E69 /* random_source.cpp in Sources */, + E8EAEE121C246EE900E62E69 /* random_util.cpp in Sources */, + E8EAEE131C246EE900E62E69 /* simple_func.cpp in Sources */, + E8EAEE141C246EE900E62E69 /* sse_util.cpp in Sources */, + E8EAEE011C246EB200E62E69 /* aligner_result.cpp in Sources */, + E8EAEE021C246EB200E62E69 /* aln_sink.cpp in Sources */, + E8EAEE031C246EB200E62E69 /* dp_framer.cpp in Sources */, + E8EAEE041C246EB200E62E69 /* mask.cpp in Sources */, + E8EAEE051C246EB200E62E69 /* pe.cpp in Sources */, + E8EAEE061C246EB200E62E69 /* presets.cpp in Sources */, + E8EAEE071C246EB200E62E69 /* ref_coord.cpp in Sources */, + E8EAEE081C246EB200E62E69 /* scoring.cpp in Sources */, + E8EAEE091C246EB200E62E69 /* unique.cpp in Sources */, + E8EAEDFA1C246E5100E62E69 /* aligner_cache.cpp in Sources */, + E8EAEDFB1C246E5100E62E69 /* aligner_seed_policy.cpp in Sources */, + E8EAEDFC1C246E5100E62E69 /* aligner_seed.cpp in Sources */, + E8EAEDFD1C246E5100E62E69 /* aligner_seed2.cpp in Sources */, + E8EAEDFE1C246E5100E62E69 /* aligner_sw_driver.cpp in Sources */, + E8EAEDFF1C246E5100E62E69 /* aligner_sw.cpp in Sources */, + E8EAEE001C246E5100E62E69 /* read_qseq.cpp in Sources */, + E8EAEDF61C246E3100E62E69 /* hisat2_main.cpp in Sources */, + E8EAEDF71C246E3100E62E69 /* hisat2.cpp in Sources */, + E8EAEDF81C246E3100E62E69 /* pat.cpp in Sources */, + E8EAEDF91C246E3100E62E69 /* qual.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + E8C30C661C20A64A00C03464 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + E8EAEDE81C246CDC00E62E69 /* diff_sample.cpp in Sources */, + E8EAEDD01C24667700E62E69 /* hisat2_build_main.cpp in Sources */, + E8EAEDD11C24667700E62E69 /* hisat2_build.cpp in Sources */, + E8EAEDCC1C24666400E62E69 /* limit.cpp in Sources */, + E8EAEDCD1C24666400E62E69 /* multikey_qsort.cpp in Sources */, + E8EAEDCE1C24666400E62E69 /* random_source.cpp in Sources */, + E8EAEDCF1C24666400E62E69 /* tinythread.cpp in Sources */, + E8EAEDC41C24663D00E62E69 /* alphabet.cpp in Sources */, + E8EAEDC51C24663D00E62E69 /* ccnt_lut.cpp in Sources */, + E8EAEDC61C24663D00E62E69 /* ds.cpp in Sources */, + E8EAEDC71C24663D00E62E69 /* edit.cpp in Sources */, + E8EAEDC81C24663D00E62E69 /* gfm.cpp in Sources */, + E8EAEDC91C24663D00E62E69 /* ref_read.cpp in Sources */, + E8EAEDCA1C24663D00E62E69 /* reference.cpp in Sources */, + E8EAEDCB1C24663D00E62E69 /* shmem.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + E8C30C711C20A65400C03464 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + E8EAEDF21C246DB200E62E69 /* hisat2_inspect.cpp in Sources */, + E8EAEDF31C246DB200E62E69 /* limit.cpp in Sources */, + E8EAEDF41C246DB200E62E69 /* random_source.cpp in Sources */, + E8EAEDF51C246DB200E62E69 /* tinythread.cpp in Sources */, + E8EAEDE91C246DA200E62E69 /* alphabet.cpp in Sources */, + E8EAEDEA1C246DA200E62E69 /* ccnt_lut.cpp in Sources */, + E8EAEDEB1C246DA200E62E69 /* ds.cpp in Sources */, + E8EAEDEC1C246DA200E62E69 /* edit.cpp in Sources */, + E8EAEDED1C246DA200E62E69 /* gfm.cpp in Sources */, + E8EAEDEE1C246DA200E62E69 /* multikey_qsort.cpp in Sources */, + E8EAEDEF1C246DA200E62E69 /* ref_read.cpp in Sources */, + E8EAEDF01C246DA200E62E69 /* reference.cpp in Sources */, + E8EAEDF11C246DA200E62E69 /* shmem.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + E8D9218120C85FE900378C5B /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + E8D921A620C879AB00378C5B /* mask.cpp in Sources */, + E8D921A520C8798500378C5B /* qual.cpp in Sources */, + E8D921A420C8794400378C5B /* aligner_bt.cpp in Sources */, + E8D921A220C8791000378C5B /* scoring.cpp in Sources */, + E8D921A320C8791000378C5B /* simple_func.cpp in Sources */, + 60A82E9E21274F1B0040293D /* bit_packed_array.cpp in Sources */, + E8D921A120C878EC00378C5B /* dp_framer.cpp in Sources */, + E8D9219920C878B700378C5B /* aligner_result.cpp in Sources */, + E8D9219A20C878B700378C5B /* aligner_sw_driver.cpp in Sources */, + E8D9219B20C878B700378C5B /* aligner_sw.cpp in Sources */, + E8D9219C20C878B700378C5B /* aligner_swsse_ee_i16.cpp in Sources */, + E8D9219D20C878B700378C5B /* aligner_swsse_ee_u8.cpp in Sources */, + E8D9219E20C878B700378C5B /* aligner_swsse_loc_i16.cpp in Sources */, + E8D9219F20C878B700378C5B /* aligner_swsse_loc_u8.cpp in Sources */, + E8D921A020C878B700378C5B /* aligner_swsse.cpp in Sources */, + E8D9218220C85FE900378C5B /* diff_sample.cpp in Sources */, + E8D9218520C85FE900378C5B /* limit.cpp in Sources */, + E8D9218620C85FE900378C5B /* multikey_qsort.cpp in Sources */, + E8D9218720C85FE900378C5B /* random_source.cpp in Sources */, + E8D9218820C85FE900378C5B /* tinythread.cpp in Sources */, + 606F132420E1528D008903D6 /* hisat2_repeat.cpp in Sources */, + 606F132520E1528D008903D6 /* hisat2_repeat_main.cpp in Sources */, + E8D9218920C85FE900378C5B /* alphabet.cpp in Sources */, + E8D9218A20C85FE900378C5B /* ccnt_lut.cpp in Sources */, + E8D9218B20C85FE900378C5B /* ds.cpp in Sources */, + E84A236320D2AF1600C12106 /* repeat_builder.cpp in Sources */, + E8D9218C20C85FE900378C5B /* edit.cpp in Sources */, + E8D9218D20C85FE900378C5B /* gfm.cpp in Sources */, + E8D9218E20C85FE900378C5B /* ref_read.cpp in Sources */, + E8D9218F20C85FE900378C5B /* reference.cpp in Sources */, + E8D9219020C85FE900378C5B /* shmem.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + E8EAEDD31C24675500E62E69 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + E8FD97E51C8343EF00861B09 /* diff_sample.cpp in Sources */, + E8EAEDD41C24675500E62E69 /* hisat2_build_main.cpp in Sources */, + E8EAEDD51C24675500E62E69 /* hisat2_build.cpp in Sources */, + E8EAEDD61C24675500E62E69 /* limit.cpp in Sources */, + E8EAEDD71C24675500E62E69 /* multikey_qsort.cpp in Sources */, + E8EAEDD81C24675500E62E69 /* random_source.cpp in Sources */, + E8EAEDD91C24675500E62E69 /* tinythread.cpp in Sources */, + E8EAEDDA1C24675500E62E69 /* alphabet.cpp in Sources */, + E8EAEDDB1C24675500E62E69 /* ccnt_lut.cpp in Sources */, + E8EAEDDC1C24675500E62E69 /* ds.cpp in Sources */, + E8EAEDDD1C24675500E62E69 /* edit.cpp in Sources */, + E8EAEDDE1C24675500E62E69 /* gfm.cpp in Sources */, + E8EAEDDF1C24675500E62E69 /* ref_read.cpp in Sources */, + E8EAEDE01C24675500E62E69 /* reference.cpp in Sources */, + E8EAEDE11C24675500E62E69 /* shmem.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + E8FD97B31C81CCED00861B09 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + E8FD97B41C81CCED00861B09 /* ref_read.cpp in Sources */, + E8FD97B51C81CCED00861B09 /* reference.cpp in Sources */, + E8FD97B61C81CCED00861B09 /* tinythread.cpp in Sources */, + E8FD97B71C81CCED00861B09 /* gfm.cpp in Sources */, + E8FD97B81C81CCED00861B09 /* alphabet.cpp in Sources */, + E8FD97B91C81CCED00861B09 /* edit.cpp in Sources */, + E8FD97BA1C81CCED00861B09 /* limit.cpp in Sources */, + E8FD97BB1C81CCED00861B09 /* ds.cpp in Sources */, + E8FD97BC1C81CCED00861B09 /* ccnt_lut.cpp in Sources */, + E8FD97BD1C81CCED00861B09 /* diff_sample.cpp in Sources */, + E8FD97BE1C81CCED00861B09 /* aligner_driver.cpp in Sources */, + E8FD97BF1C81CCED00861B09 /* splice_site.cpp in Sources */, + E8FD97C01C81CCED00861B09 /* aligner_bt.cpp in Sources */, + E8FD97C11C81CCED00861B09 /* aligner_swsse_ee_i16.cpp in Sources */, + E8FD97C21C81CCED00861B09 /* aligner_swsse_ee_u8.cpp in Sources */, + E8FD97C31C81CCED00861B09 /* aligner_swsse_loc_i16.cpp in Sources */, + E8FD97C41C81CCED00861B09 /* aligner_swsse_loc_u8.cpp in Sources */, + E8FD97C51C81CCED00861B09 /* aligner_swsse.cpp in Sources */, + E8FD97C61C81CCED00861B09 /* outq.cpp in Sources */, + E8FD97C71C81CCED00861B09 /* random_source.cpp in Sources */, + E8FD97C81C81CCED00861B09 /* random_util.cpp in Sources */, + E8FD97C91C81CCED00861B09 /* simple_func.cpp in Sources */, + E8FD97CA1C81CCED00861B09 /* sse_util.cpp in Sources */, + E8FD97CB1C81CCED00861B09 /* aligner_result.cpp in Sources */, + E8FD97CC1C81CCED00861B09 /* aln_sink.cpp in Sources */, + E8FD97CD1C81CCED00861B09 /* dp_framer.cpp in Sources */, + E8FD97CE1C81CCED00861B09 /* mask.cpp in Sources */, + E8FD97CF1C81CCED00861B09 /* pe.cpp in Sources */, + E8FD97D01C81CCED00861B09 /* presets.cpp in Sources */, + E8FD97D11C81CCED00861B09 /* ref_coord.cpp in Sources */, + E8FD97D21C81CCED00861B09 /* scoring.cpp in Sources */, + E8FD97D31C81CCED00861B09 /* unique.cpp in Sources */, + E8FD97D41C81CCED00861B09 /* aligner_cache.cpp in Sources */, + E8FD97D51C81CCED00861B09 /* aligner_seed_policy.cpp in Sources */, + E8FD97D61C81CCED00861B09 /* aligner_seed.cpp in Sources */, + E8FD97D71C81CCED00861B09 /* aligner_seed2.cpp in Sources */, + E8FD97D81C81CCED00861B09 /* aligner_sw_driver.cpp in Sources */, + E8FD97D91C81CCED00861B09 /* aligner_sw.cpp in Sources */, + E8FD97DA1C81CCED00861B09 /* read_qseq.cpp in Sources */, + E8FD97DB1C81CCED00861B09 /* hisat2_main.cpp in Sources */, + E8FD97DC1C81CCED00861B09 /* hisat2.cpp in Sources */, + E8FD97DD1C81CCED00861B09 /* pat.cpp in Sources */, + E8FD97DE1C81CCED00861B09 /* qual.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + E8C30C5F1C20A5DD00C03464 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CODE_SIGN_IDENTITY = "-"; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + BOWTIE_MM, + "MACOS=1", + POPCNT_CAPABILITY, + BOWTIE2, + "HISAT2_VERSION=\"\\\"2.2.0\\\"\"", + "HISAT2_VERSION2=\"\\\"`cat VERSION`\\\"\"", + "BUILD_HOST=\"\\\"`hostname`\\\"\"", + "BUILD_TIME=\"\\\"`date\\\"\"", + "COMPILER_VERSION=\"\\\"`$(CXX) -v 2>&1 | tail -1`\\\"\"", + "COMPILER_OPTIONS=\"\\\"test\\\"\"", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = NO; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + MACOSX_DEPLOYMENT_TARGET = 10.12; + MTL_ENABLE_DEBUG_INFO = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = macosx; + }; + name = Debug; + }; + E8C30C601C20A5DD00C03464 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CODE_SIGN_IDENTITY = "-"; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_NO_COMMON_BLOCKS = YES; + GCC_PREPROCESSOR_DEFINITIONS = ( + NDEBUG, + "DEBUG=0", + BOWTIE_MM, + "MACOS=1", + POPCNT_CAPABILITY, + BOWTIE2, + "HISAT2_VERSION=\"\\\"2.2.0\\\"\"", + "BUILD_HOST=\"\\\"`hostname`\\\"\"", + "BUILD_TIME=\"\\\"`date`\\\"\"", + "COMPILER_VERSION=\"\\\"`$(CXX) -v 2>&1 | tail -1`\\\"\"", + "COMPILER_OPTIONS=\"\\\"test\\\"\"", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = NO; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + MACOSX_DEPLOYMENT_TARGET = 10.12; + MTL_ENABLE_DEBUG_INFO = NO; + SDKROOT = macosx; + }; + name = Release; + }; + E8C30C621C20A5DD00C03464 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Debug; + }; + E8C30C631C20A5DD00C03464 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Release; + }; + E8C30C6F1C20A64A00C03464 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Debug; + }; + E8C30C701C20A64A00C03464 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + GCC_PREPROCESSOR_DEFINITIONS = "$(inherited)"; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Release; + }; + E8C30C7A1C20A65400C03464 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Debug; + }; + E8C30C7B1C20A65400C03464 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Release; + }; + E8D9219420C85FE900378C5B /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + GCC_PREPROCESSOR_DEFINITIONS = ( + "$(inherited)", + "DEBUG=1", + BOWTIE_MM, + "MACOS=1", + POPCNT_CAPABILITY, + BOWTIE2, + "HISAT2_VERSION=\"\\\"2.2.0\\\"\"", + "HISAT2_VERSION2=\"\\\"`cat VERSION`\\\"\"", + "BUILD_HOST=\"\\\"`hostname`\\\"\"", + "BUILD_TIME=\"\\\"`date\\\"\"", + "COMPILER_VERSION=\"\\\"`$(CXX) -v 2>&1 | tail -1`\\\"\"", + "COMPILER_OPTIONS=\"\\\"test\\\"\"", + DEBUGLOG, + BOWTIE_64BIT_INDEX, + ); + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Debug; + }; + E8D9219520C85FE900378C5B /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + GCC_PREPROCESSOR_DEFINITIONS = ( + "$(inherited)", + BOWTIE_64BIT_INDEX, + ); + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Release; + }; + E8EAEDE51C24675500E62E69 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + GCC_PREPROCESSOR_DEFINITIONS = ( + "$(inherited)", + BOWTIE_64BIT_INDEX, + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = NO; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Debug; + }; + E8EAEDE61C24675500E62E69 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + GCC_PREPROCESSOR_DEFINITIONS = ( + "$(inherited)", + "DEBUG=0", + BOWTIE_MM, + "MACOS=1", + POPCNT_CAPABILITY, + BOWTIE2, + BOWTIE_64BIT_INDEX, + "HISAT2_VERSION=\"\\\"`cat VERSION`\\\"\"", + "BUILD_HOST=\"\\\"`hostname`\\\"\"", + "BUILD_TIME=\"\\\"`date`\\\"\"", + "COMPILER_VERSION=\"\\\"`$(CXX) -v 2>&1 | tail -1`\\\"\"", + "COMPILER_OPTIONS=\"\\\"test\\\"\"", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = NO; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Release; + }; + E8FD97E21C81CCED00861B09 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + GCC_PREPROCESSOR_DEFINITIONS = ( + "$(inherited)", + "DEBUG=1", + BOWTIE_MM, + "MACOS=1", + POPCNT_CAPABILITY, + BOWTIE2, + "HISAT2_VERSION=\"\\\"2.0.2-beta\\\"\"", + "HISAT2_VERSION2=\"\\\"`cat VERSION`\\\"\"", + "BUILD_HOST=\"\\\"`hostname`\\\"\"", + "BUILD_TIME=\"\\\"`date\\\"\"", + "COMPILER_VERSION=\"\\\"`$(CXX) -v 2>&1 | tail -1`\\\"\"", + "COMPILER_OPTIONS=\"\\\"test\\\"\"", + BOWTIE_64BIT_INDEX, + ); + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Debug; + }; + E8FD97E31C81CCED00861B09 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + GCC_PREPROCESSOR_DEFINITIONS = ( + "$(inherited)", + "DEBUG=0", + BOWTIE_MM, + "MACOS=1", + POPCNT_CAPABILITY, + BOWTIE2, + "HISAT2_VERSION=\"\\\"2.2.0\\\"\"", + "BUILD_HOST=\"\\\"`hostname`\\\"\"", + "BUILD_TIME=\"\\\"`date`\\\"\"", + "COMPILER_VERSION=\"\\\"`$(CXX) -v 2>&1 | tail -1`\\\"\"", + "COMPILER_OPTIONS=\"\\\"test\\\"\"", + BOWTIE_64BIT_INDEX, + ); + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + E8C30C551C20A5DD00C03464 /* Build configuration list for PBXProject "hisat2" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + E8C30C5F1C20A5DD00C03464 /* Debug */, + E8C30C601C20A5DD00C03464 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + E8C30C611C20A5DD00C03464 /* Build configuration list for PBXNativeTarget "hisat2x" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + E8C30C621C20A5DD00C03464 /* Debug */, + E8C30C631C20A5DD00C03464 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + E8C30C6E1C20A64A00C03464 /* Build configuration list for PBXNativeTarget "hisat2-buildx" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + E8C30C6F1C20A64A00C03464 /* Debug */, + E8C30C701C20A64A00C03464 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + E8C30C791C20A65400C03464 /* Build configuration list for PBXNativeTarget "hisat2-inspectx" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + E8C30C7A1C20A65400C03464 /* Debug */, + E8C30C7B1C20A65400C03464 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + E8D9219320C85FE900378C5B /* Build configuration list for PBXNativeTarget "hisat2-repeatx" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + E8D9219420C85FE900378C5B /* Debug */, + E8D9219520C85FE900378C5B /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + E8EAEDE41C24675500E62E69 /* Build configuration list for PBXNativeTarget "hisat2-buildlx" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + E8EAEDE51C24675500E62E69 /* Debug */, + E8EAEDE61C24675500E62E69 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + E8FD97E11C81CCED00861B09 /* Build configuration list for PBXNativeTarget "hisat2lx" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + E8FD97E21C81CCED00861B09 /* Debug */, + E8FD97E31C81CCED00861B09 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = E8C30C521C20A5DD00C03464 /* Project object */; +} diff --git a/hisat2_build.cpp b/hisat2_build.cpp new file mode 100644 index 0000000..4ea2976 --- /dev/null +++ b/hisat2_build.cpp @@ -0,0 +1,970 @@ +/* + * Copyright 2015, Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#include +#include +#include +#include +#include +#include "assert_helpers.h" +#include "endian_swap.h" +#include "formats.h" +#include "sequence_io.h" +#include "tokenize.h" +#include "timer.h" +#include "ref_read.h" +#include "filebuf.h" +#include "reference.h" +#include "ds.h" +#include "gfm.h" +#include "hgfm.h" +#include "rfm.h" +#include "utility_3n.h" + + +/** + * \file Driver for the bowtie-build indexing tool. + */ + +#include +#include +#include +#include +#include + +MemoryTally gMemTally; +// Build parameters +int verbose; +static int sanityCheck; +static int format; +static TIndexOffU bmax; +static TIndexOffU bmaxMultSqrt; +static uint32_t bmaxDivN; +static int dcv; +static int noDc; +static int entireSA; +static int seed; +static int showVersion; +// GFM parameters +static int32_t lineRate; +static bool lineRate_provided; +static int32_t linesPerSide; +static int32_t offRate; +static int32_t ftabChars; +static int32_t localOffRate; +static int32_t localFtabChars; +static int bigEndian; +static bool nsToAs; +static bool autoMem; +static bool packed; +static bool writeRef; +static bool justRef; +static bool reverseEach; +static int nthreads; // number of pthreads operating concurrently +static string wrapper; +static string snp_fname; +static string ht_fname; +static string ss_fname; +static string exon_fname; +static string sv_fname; +static string repeat_ref_fname; +static string repeat_info_fname; +static string repeat_snp_fname; +static string repeat_haplotype_fname; + +bool threeN = false; +bool repeatIndex = false; +bool base_change_entered; +char convertedFrom; +char convertedTo; +char convertedFromComplement; +char convertedToComplement; + +ConvertMatrix3N baseChange; + +static void resetOptions() { + verbose = true; // be talkative (default) + sanityCheck = 0; // do slow sanity checks + format = FASTA; // input sequence format + bmax = OFF_MASK; // max blockwise SA bucket size + bmaxMultSqrt = OFF_MASK; // same, as multplier of sqrt(n) + bmaxDivN = 4; // same, as divisor of n + dcv = 1024; // bwise SA difference-cover sample sz + noDc = 0; // disable difference-cover sample + entireSA = 0; // 1 = disable blockwise SA + seed = 0; // srandom seed + showVersion = 0; // just print version and quit? + // GFM parameters + lineRate = GFM::default_lineRate_gfm; + lineRate_provided = false; + linesPerSide = 1; // 1 64-byte line on a side + offRate = 4; // sample 1 out of 16 SA elts + ftabChars = 10; // 10 chars in initial lookup table + localOffRate = 3; + localFtabChars = 6; + bigEndian = 0; // little endian + nsToAs = false; // convert reference Ns to As prior to indexing + autoMem = true; // automatically adjust memory usage parameters + packed = false; // + writeRef = true; // write compact reference to .3.ht2/.4.ht2 + justRef = false; // *just* write compact reference, don't index + reverseEach = false; + nthreads = 1; + wrapper.clear(); + snp_fname = ""; + ht_fname = ""; + ss_fname = ""; + exon_fname = ""; + sv_fname = ""; + repeat_ref_fname = ""; + repeat_info_fname = ""; + repeat_snp_fname = ""; + repeat_haplotype_fname = ""; + threeN = false; + repeatIndex = false; + base_change_entered = false; + convertedFrom = 'C'; + convertedTo = 'T'; + convertedFromComplement = asc2dnacomp[convertedFrom]; + convertedToComplement = asc2dnacomp[convertedTo]; +} + +// Argument constants for getopts +enum { + ARG_BMAX = 256, + ARG_BMAX_MULT, + ARG_BMAX_DIV, + ARG_DCV, + ARG_SEED, + ARG_CUTOFF, + ARG_PMAP, + ARG_NTOA, + ARG_USAGE, + ARG_REVERSE_EACH, + ARG_SA, + ARG_WRAPPER, + ARG_LOCAL_OFFRATE, + ARG_LOCAL_FTABCHARS, + ARG_SNP, + ARG_HAPLOTYPE, + ARG_SPLICESITE, + ARG_EXON, + ARG_SV, + ARG_REPEAT_REF, + ARG_REPEAT_INFO, + ARG_REPEAT_SNP, + ARG_REPEAT_HAPLOTYPE, + ARG_3N, + ARG_REPEAT_INDEX, + ARG_BASE_CHANGE +}; + +/** + * Print a detailed usage message to the provided output stream. + */ +static void printUsage(ostream& out) { + out << "HISAT2 version " << string(HISAT2_VERSION).c_str() << " by Daehwan Kim (infphilo@gmail.com, http://www.ccb.jhu.edu/people/infphilo)" << endl; + +#ifdef BOWTIE_64BIT_INDEX + string tool_name = "hisat2-build-l"; +#else + string tool_name = "hisat2-build-s"; +#endif + if(wrapper == "basic-0") { + tool_name = "hisat2-build"; + } + + out << "Usage: hisat2-build [options]* " << endl + << " reference_in comma-separated list of files with ref sequences" << endl + << " hisat2_index_base write " << gfm_ext << " data to files with this dir/basename" << endl + << "Options:" << endl + << " -c reference sequences given on cmd line (as" << endl + << " )" << endl; + if(wrapper == "basic-0") { + out << " --large-index force generated index to be 'large', even if ref" << endl + << " has fewer than 4 billion nucleotides" << endl; + } + out << " -a/--noauto disable automatic -p/--bmax/--dcv memory-fitting" << endl + << " -p number of threads" << endl + << " --bmax max bucket sz for blockwise suffix-array builder" << endl + << " --bmaxdivn max bucket sz as divisor of ref len (default: 4)" << endl + << " --dcv diff-cover period for blockwise (default: 1024)" << endl + << " --nodc disable diff-cover (algorithm becomes quadratic)" << endl + << " -r/--noref don't build .3/.4.ht2 (packed reference) portion" << endl + << " -3/--justref just build .3/.4.ht2 (packed reference) portion" << endl + << " -o/--offrate SA is sampled every 2^offRate BWT chars (default: 5)" << endl + << " -t/--ftabchars # of chars consumed in initial lookup (default: 10)" << endl + << " --localoffrate SA (local) is sampled every 2^offRate BWT chars (default: 3)" << endl + << " --localftabchars # of chars consumed in initial lookup in a local index (default: 6)" << endl + << " --snp SNP file name" << endl + << " --haplotype haplotype file name" << endl + << " --ss Splice site file name" << endl + << " --exon Exon file name" << endl + << " --repeat-ref Repeat reference file name" << endl + << " --repeat-info Repeat information file name" << endl + << " --repeat-snp Repeat snp file name" << endl + << " --repeat-haplotype Repeat haplotype file name" << endl + << " --seed seed for random number generator" << endl + << " --base-change the converted nucleotide and converted to nucleotide (default:C,T)" << endl + << " --repeat-index-[,-] automatically build repeat database and repeat index, enter the minimum-maximum repeat length pairs (default: 100-300)" << endl + << " -q/--quiet disable verbose output (for debugging)" << endl + << " -h/--help print detailed description of tool and its options" << endl + << " --usage print this usage message" << endl + << " --version print version information and quit" << endl + ; + + if(wrapper.empty()) { + cerr << endl + << "*** Warning ***" << endl + << "'" << tool_name << "' was run directly. It is recommended " + << "that you run the wrapper script 'hisat2-build' instead." + << endl << endl; + } +} + +static const char *short_options = "qrap:h?nscfl:i:o:t:h:3C"; + +static struct option long_options[] = { + {(char*)"quiet", no_argument, 0, 'q'}, + {(char*)"sanity", no_argument, 0, 's'}, + {(char*)"threads", required_argument, 0, 'p'}, + {(char*)"little", no_argument, &bigEndian, 0}, + {(char*)"big", no_argument, &bigEndian, 1}, + {(char*)"bmax", required_argument, 0, ARG_BMAX}, + {(char*)"bmaxmultsqrt", required_argument, 0, ARG_BMAX_MULT}, + {(char*)"bmaxdivn", required_argument, 0, ARG_BMAX_DIV}, + {(char*)"dcv", required_argument, 0, ARG_DCV}, + {(char*)"nodc", no_argument, &noDc, 1}, + {(char*)"seed", required_argument, 0, ARG_SEED}, + {(char*)"entiresa", no_argument, &entireSA, 1}, + {(char*)"version", no_argument, &showVersion, 1}, + {(char*)"noauto", no_argument, 0, 'a'}, + {(char*)"noblocks", required_argument, 0, 'n'}, + {(char*)"linerate", required_argument, 0, 'l'}, + {(char*)"linesperside", required_argument, 0, 'i'}, + {(char*)"offrate", required_argument, 0, 'o'}, + {(char*)"ftabchars", required_argument, 0, 't'}, + {(char*)"localoffrate", required_argument, 0, ARG_LOCAL_OFFRATE}, + {(char*)"localftabchars", required_argument, 0, ARG_LOCAL_FTABCHARS}, + {(char*)"snp", required_argument, 0, ARG_SNP}, + {(char*)"haplotype", required_argument, 0, ARG_HAPLOTYPE}, + {(char*)"ss", required_argument, 0, ARG_SPLICESITE}, + {(char*)"exon", required_argument, 0, ARG_EXON}, + {(char*)"sv", required_argument, 0, ARG_SV}, + {(char*)"repeat-ref", required_argument, 0, ARG_REPEAT_REF}, + {(char*)"repeat-info", required_argument, 0, ARG_REPEAT_INFO}, + {(char*)"repeat-snp", required_argument, 0, ARG_REPEAT_SNP}, + {(char*)"repeat-haplotype", required_argument, 0, ARG_REPEAT_HAPLOTYPE}, + {(char*)"help", no_argument, 0, 'h'}, + {(char*)"ntoa", no_argument, 0, ARG_NTOA}, + {(char*)"justref", no_argument, 0, '3'}, + {(char*)"noref", no_argument, 0, 'r'}, + {(char*)"sa", no_argument, 0, ARG_SA}, + {(char*)"reverse-each", no_argument, 0, ARG_REVERSE_EACH}, + {(char*)"usage", no_argument, 0, ARG_USAGE}, + {(char*)"wrapper", required_argument, 0, ARG_WRAPPER}, + {(char*)"3N", no_argument, 0, ARG_3N}, + {(char*)"repeat-index", no_argument, 0, ARG_REPEAT_INDEX}, + {(char*)"base-change", required_argument, 0, ARG_BASE_CHANGE}, + {(char*)0, 0, 0, 0} // terminator +}; + +/** + * Parse an int out of optarg and enforce that it be at least 'lower'; + * if it is less than 'lower', then output the given error message and + * exit with an error and a usage message. + */ +template +static T parseNumber(T lower, const char *errmsg) { + char *endPtr= NULL; + T t = (T)strtoll(optarg, &endPtr, 10); + if (endPtr != NULL) { + if (t < lower) { + cerr << errmsg << endl; + printUsage(cerr); + throw 1; + } + return t; + } + cerr << errmsg << endl; + printUsage(cerr); + throw 1; + return -1; +} + +/** + * Read command-line arguments + */ +static void parseOptions(int argc, const char **argv) { + int option_index = 0; + int next_option; + do { + next_option = getopt_long( + argc, const_cast(argv), + short_options, long_options, &option_index); + switch (next_option) { + case ARG_WRAPPER: + wrapper = optarg; + break; + case 'f': format = FASTA; break; + case 'c': format = CMDLINE; break; + //case 'p': packed = true; break; + case 'C': + cerr << "Error: -C specified but Bowtie 2 does not support colorspace input." << endl; + throw 1; + break; + case 'l': + lineRate = parseNumber(3, "-l/--lineRate arg must be at least 3"); + lineRate_provided = true; + break; + case 'i': + linesPerSide = parseNumber(1, "-i/--linesPerSide arg must be at least 1"); + break; + case 'o': + offRate = parseNumber(0, "-o/--offRate arg must be at least 0"); + break; + case ARG_LOCAL_OFFRATE: + localOffRate = parseNumber(0, "-o/--localoffrate arg must be at least 0"); + break; + case '3': + justRef = true; + break; + case 't': + ftabChars = parseNumber(1, "-t/--ftabChars arg must be at least 1"); + break; + case ARG_LOCAL_FTABCHARS: + localFtabChars = parseNumber(1, "-t/--localftabchars arg must be at least 1"); + break; + case 'n': + // all f-s is used to mean "not set", so put 'e' on end + bmax = 0xfffffffe; + break; + case 'h': + case ARG_USAGE: + printUsage(cout); + throw 0; + break; + case ARG_SNP: + snp_fname = optarg; + break; + case ARG_HAPLOTYPE: + ht_fname = optarg; + break; + case ARG_SPLICESITE: + ss_fname = optarg; + break; + case ARG_EXON: + exon_fname = optarg; + break; + case ARG_SV: + sv_fname = optarg; + break; + case ARG_REPEAT_REF: + repeat_ref_fname = optarg; + break; + case ARG_REPEAT_INFO: + repeat_info_fname = optarg; + break; + case ARG_REPEAT_SNP: + repeat_snp_fname = optarg; + break; + case ARG_REPEAT_HAPLOTYPE: + repeat_haplotype_fname = optarg; + break; + case ARG_BMAX: + bmax = parseNumber(1, "--bmax arg must be at least 1"); + bmaxMultSqrt = OFF_MASK; // don't use multSqrt + bmaxDivN = 0xffffffff; // don't use multSqrt + break; + case ARG_BMAX_MULT: + bmaxMultSqrt = parseNumber(1, "--bmaxmultsqrt arg must be at least 1"); + bmax = OFF_MASK; // don't use bmax + bmaxDivN = 0xffffffff; // don't use multSqrt + break; + case ARG_BMAX_DIV: + bmaxDivN = parseNumber(1, "--bmaxdivn arg must be at least 1"); + bmax = OFF_MASK; // don't use bmax + bmaxMultSqrt = OFF_MASK; // don't use multSqrt + break; + case ARG_DCV: + dcv = parseNumber(3, "--dcv arg must be at least 3"); + break; + case ARG_SEED: + seed = parseNumber(0, "--seed arg must be at least 0"); + break; + case ARG_REVERSE_EACH: + reverseEach = true; + break; + case ARG_NTOA: nsToAs = true; break; + case ARG_3N: threeN = true; break; + case ARG_REPEAT_INDEX: repeatIndex = true; break; + case ARG_BASE_CHANGE: { + EList args; + tokenize(optarg, ",", args); + if(args.size() != 2) { + cerr << "Error: expected 2 comma-separated " + << "arguments to --base-change option, got " << args.size() << endl; + throw 1; + } + + getConversion(args[0][0], args[1][0], convertedFrom, convertedTo); + + string s = "ACGT"; + if ((s.find(convertedFrom) == std::string::npos) || (s.find(convertedTo) == std::string::npos)) { + cerr << "Please enter the nucleotide in 'ACGT' for --base-change option." << endl; + throw 1; + } + + if (convertedFrom == convertedTo) { + cerr << "Please enter two different base for --base-change option. If you wish to build index without nucleotide conversion, please use hisat2-build." << endl; + throw 1; + } + + base_change_entered = true; + } + case 'a': autoMem = false; break; + case 'q': verbose = false; break; + case 's': sanityCheck = true; break; + case 'r': writeRef = false; break; + case 'p': + nthreads = parseNumber(1, "-p arg must be at least 1"); + break; + + case -1: /* Done with options. */ + break; + case 0: + if (long_options[option_index].flag != 0) + break; + default: + printUsage(cerr); + throw 1; + } + } while(next_option != -1); + if(bmax < 40) { + cerr << "Warning: specified bmax is very small (" << bmax << "). This can lead to" << endl + << "extremely slow performance and memory exhaustion. Perhaps you meant to specify" << endl + << "a small --bmaxdivn?" << endl; + } +} + +EList filesWritten; + +/** + * Delete all the index files that we tried to create. For when we had to + * abort the index-building process due to an error. + */ +static void deleteIdxFiles( + const string& outfile, + bool doRef, + bool justRef) +{ + + for(size_t i = 0; i < filesWritten.size(); i++) { + cerr << "Deleting \"" << filesWritten[i].c_str() + << "\" file written during aborted indexing attempt." << endl; + remove(filesWritten[i].c_str()); + } +} + +extern void initializeCntLut(); +extern void initializeCntBit(); + + +/** + * Drive the index construction process and optionally sanity-check the + * result. + */ +template +static void driver( + const string& infile, + EList& infiles, + const string& snpfile, + const string& htfile, + const string& ssfile, + const string& exonfile, + const string& svfile, + const string& repeatfile, + const string& outfile, + bool packed, + int reverse, + bool localindex = true, + EList* parent_szs = NULL, + EList* parent_refnames = NULL, + EList* output_szs = NULL, + EList* output_refnames = NULL) +{ + initializeCntLut(); + initializeCntBit(); + EList is(MISC_CAT); + bool bisulfite = false; + bool repeat = parent_szs != NULL; + RefReadInParams refparams(false, reverse, nsToAs, bisulfite); + assert_gt(infiles.size(), 0); + if(format == CMDLINE) { + // Adapt sequence strings to stringstreams open for input + stringstream *ss = new stringstream(); + for(size_t i = 0; i < infiles.size(); i++) { + (*ss) << ">" << i << endl << infiles[i].c_str() << endl; + } + FileBuf *fb = new FileBuf(ss); + assert(fb != NULL); + assert(!fb->eof()); + assert(fb->get() == '>'); + ASSERT_ONLY(fb->reset()); + assert(!fb->eof()); + is.push_back(fb); + } else { + // Adapt sequence files to ifstreams + for(size_t i = 0; i < infiles.size(); i++) { + FILE *f = fopen(infiles[i].c_str(), "r"); + if (f == NULL) { + cerr << "Error: could not open "<< infiles[i].c_str() << endl; + throw 1; + } + FileBuf *fb = new FileBuf(f); + assert(fb != NULL); + if(fb->peek() == -1 || fb->eof()) { + cerr << "Warning: Empty fasta file: '" << infile.c_str() << "'" << endl; + continue; + } + assert(!fb->eof()); + assert(fb->get() == '>'); + ASSERT_ONLY(fb->reset()); + assert(!fb->eof()); + is.push_back(fb); + } + } + if(is.empty()) { + cerr << "Warning: All fasta inputs were empty" << endl; + throw 1; + } + filesWritten.push_back(outfile + ".1." + gfm_ext); + filesWritten.push_back(outfile + ".2." + gfm_ext); + // Vector for the ordered list of "records" comprising the input + // sequences. A record represents a stretch of unambiguous + // characters in one of the input sequences. + EList szs(MISC_CAT); + std::pair sztot; + { + if(verbose) cerr << "Reading reference sizes" << endl; + Timer _t(cerr, " Time reading reference sizes: ", verbose); + if(!reverse && (writeRef || justRef)) { + filesWritten.push_back(outfile + ".3." + gfm_ext); + filesWritten.push_back(outfile + ".4." + gfm_ext); + sztot = BitPairReference::szsFromFasta(is, outfile, bigEndian, refparams, szs, sanityCheck); + if (threeN) { + // save the unchanged reference in .3.ht2 and .4.ht2 + baseChange.restoreNormal(); + EList tmp_szs(MISC_CAT); + BitPairReference::szsFromFasta(is, outfile, bigEndian, refparams, tmp_szs, sanityCheck); + baseChange.restoreConversion(); + } + } else { + assert(false); + sztot = BitPairReference::szsFromFasta(is, string(), bigEndian, refparams, szs, sanityCheck); + } + } + if(justRef) return; + assert_gt(sztot.first, 0); + assert_gt(sztot.second, 0); + assert_gt(szs.size(), 0); + + // Construct index from input strings and parameters + filesWritten.push_back(outfile + ".5." + gfm_ext); + filesWritten.push_back(outfile + ".6." + gfm_ext); + filesWritten.push_back(outfile + ".7." + gfm_ext); + filesWritten.push_back(outfile + ".8." + gfm_ext); + TStr s; + GFM* gfm = NULL; + if(!repeat) { // base index + gfm = new HGFM( + s, + packed, + 1, // TODO: maybe not? + lineRate, + offRate, // suffix-array sampling rate + ftabChars, // number of chars in initial arrow-pair calc + localOffRate, + localFtabChars, + nthreads, + snpfile, + htfile, + ssfile, + exonfile, + svfile, + repeatfile, + outfile, // basename for .?.ht2 files + reverse == 0, // fw + !entireSA, // useBlockwise + bmax, // block size for blockwise SA builder + bmaxMultSqrt, // block size as multiplier of sqrt(len) + bmaxDivN, // block size as divisor of len + noDc? 0 : dcv,// difference-cover period + is, // list of input streams + szs, // list of reference sizes + (TIndexOffU)sztot.first, // total size of all unambiguous ref chars + refparams, // reference read-in parameters + localindex, // create local indexes? + parent_szs, // parent szs + parent_refnames, // parent refence names + seed, // pseudo-random number generator seed + -1, // override offRate + verbose, // be talkative + autoMem, // pass exceptions up to the toplevel so that we can adjust memory settings automatically + sanityCheck); // verify results and internal consistency + } else { // repeat index + gfm = new RFM( + s, + packed, + 1, // TODO: maybe not? + lineRate, + offRate, // suffix-array sampling rate + ftabChars, // number of chars in initial arrow-pair calc + localOffRate, + localFtabChars, + nthreads, + snpfile, + htfile, + ssfile, + exonfile, + svfile, + repeatfile, + outfile, // basename for .?.ht2 files + reverse == 0, // fw + !entireSA, // useBlockwise + bmax, // block size for blockwise SA builder + bmaxMultSqrt, // block size as multiplier of sqrt(len) + bmaxDivN, // block size as divisor of len + noDc? 0 : dcv,// difference-cover period + is, // list of input streams + szs, // list of reference sizes + (TIndexOffU)sztot.first, // total size of all unambiguous ref chars + refparams, // reference read-in parameters + localindex, // create local indexes? + parent_szs, // parent szs + parent_refnames, // parent refence names + seed, // pseudo-random number generator seed + -1, // override offRate + verbose, // be talkative + autoMem, // pass exceptions up to the toplevel so that we can adjust memory settings automatically + sanityCheck); // verify results and internal consistency + } + + if(output_szs != NULL) { + *output_szs = szs; + } + if(output_refnames != NULL) { + *output_refnames = gfm->_refnames_nospace; + } + + // Note that the Ebwt is *not* resident in memory at this time. To + // load it into memory, call ebwt.loadIntoMemory() + if(verbose) { + // Print Ebwt's vital stats + gfm->gh().print(cerr); + } + if(sanityCheck) { + // Try restoring the original string (if there were + // multiple texts, what we'll get back is the joined, + // padded string, not a list) + gfm->loadIntoMemory( + reverse ? (refparams.reverse == REF_READ_REVERSE) : 0, + true, // load SA sample? + true, // load ftab? + true, // load rstarts? + false, + false); + SString s2; + gfm->restore(s2); + gfm->evictFromMemory(); + { + SString joinedss; + GFM<>::join >( + is, // list of input streams + szs, // list of reference sizes + (TIndexOffU)sztot.first, // total size of all unambiguous ref chars + refparams, // reference read-in parameters + seed, // pseudo-random number generator seed + joinedss); + if(refparams.reverse == REF_READ_REVERSE) { + joinedss.reverse(); + } + assert_eq(joinedss.length(), s2.length()); + assert(sstr_eq(joinedss, s2)); + } + if(verbose) { + if(s2.length() < 1000) { + cout << "Passed restore check: " << s2.toZBuf() << endl; + } else { + cout << "Passed restore check: (" << s2.length() << " chars)" << endl; + } + } + } + + delete gfm; +} + +static const char *argv0 = NULL; + +extern "C" { +/** + * main function. Parses command-line arguments. + */ +int hisat2_build(int argc, const char **argv) { + string outfile; + try { + // Reset all global state, including getopt state + opterr = optind = 1; + resetOptions(); + + string infile; + EList infiles(MISC_CAT); + + parseOptions(argc, argv); + argv0 = argv[0]; + if(showVersion) { + cout << argv0 << " version " << string(HISAT2_VERSION).c_str() << endl; + if(sizeof(void*) == 4) { + cout << "32-bit" << endl; + } else if(sizeof(void*) == 8) { + cout << "64-bit" << endl; + } else { + cout << "Neither 32- nor 64-bit: sizeof(void*) = " << sizeof(void*) << endl; + } + cout << "Built on " << BUILD_HOST << endl; + cout << BUILD_TIME << endl; + cout << "Compiler: " << COMPILER_VERSION << endl; + cout << "Options: " << COMPILER_OPTIONS << endl; + cout << "Sizeof {int, long, long long, void*, size_t, off_t}: {" + << sizeof(int) + << ", " << sizeof(long) << ", " << sizeof(long long) + << ", " << sizeof(void *) << ", " << sizeof(size_t) + << ", " << sizeof(off_t) << "}" << endl; + return 0; + } + + if (!threeN && base_change_entered) { + cerr << "Please do not use --base-change for hisat2-build. To build hisat-3n index, please use hisat-3n-build." << endl; + printUsage(cerr); + throw 1; + } + if (threeN) { + convertedFromComplement = asc2dnacomp[convertedFrom]; + convertedToComplement = asc2dnacomp[convertedTo]; + } + // Get input filename + if(optind >= argc) { + cerr << "No input sequence or sequence file specified!" << endl; + printUsage(cerr); + return 1; + } + infile = argv[optind++]; + + // Get output filename + if(optind >= argc) { + cerr << "No output file specified!" << endl; + printUsage(cerr); + return 1; + } + outfile = argv[optind++]; + + tokenize(infile, ",", infiles); + if(infiles.size() < 1) { + cerr << "Tokenized input file list was empty!" << endl; + printUsage(cerr); + return 1; + } + + if(!lineRate_provided) { + if(snp_fname == "" && ss_fname == "" && exon_fname == "") { + lineRate = GFM::default_lineRate_fm; + } else { + lineRate = GFM::default_lineRate_gfm; + } + } + + // Optionally summarize + if(verbose) { + cerr << "Settings:" << endl + << " Output files: \"" << outfile.c_str() << (threeN?".3n":"") << ".*." << gfm_ext << "\"" << endl + << " Line rate: " << lineRate << " (line is " << (1< parent_szs(MISC_CAT); + EList parent_refnames; + string dummy_fname = ""; + + int nloop = threeN ? 2 : 1; // if threeN == true, nloop = 2. else one loop + for (int i = 0; i < nloop; i++) { + string tag = ""; + if (threeN) { + tag += ".3n."; + if (i == 0) { + tag += convertedFrom; + tag += convertedTo; + baseChange.convert(convertedFrom, convertedTo); + } else { + tag += convertedFromComplement; + tag += convertedToComplement; + baseChange.convert(convertedFromComplement, convertedToComplement); + } + + string indexFilename = outfile + tag + ".6.ht2"; + if (fileExist(indexFilename)) { + cerr << "*** Find index for " << outfile + tag << ",skip this index building process." << endl; + cerr << " To re-build your hisat-3n index, please delete the old index manually before running hisat-3n-build." << endl; + continue; + } + } + + driver >(infile, + infiles, + snp_fname, + ht_fname, + ss_fname, + exon_fname, + sv_fname, + dummy_fname, + outfile + tag, + false, + REF_READ_FORWARD, + true, // create local indexes + NULL, // no parent szs + NULL, // no parent refnames + &parent_szs, // get parent szs + &parent_refnames); // get parent refnames + + if(repeat_ref_fname.length() > 0) { + string repeat_ref_fname_3N; + string repeat_info_fname_3N; + if (threeN) { + repeat_ref_fname_3N = repeat_ref_fname + tag + ".rep.fa"; + repeat_info_fname_3N = repeat_info_fname + tag + ".rep.info"; + } + EList repeat_infiles(MISC_CAT); + tokenize(repeat_ref_fname_3N, ",", repeat_infiles); + driver >(repeat_ref_fname_3N, + repeat_infiles, + repeat_snp_fname, + repeat_haplotype_fname, + dummy_fname, + dummy_fname, + dummy_fname, + repeat_info_fname_3N, + outfile + tag + ".rep", + false, + REF_READ_FORWARD, + true, // create local index? + &parent_szs, + &parent_refnames); + } else if (repeatIndex) { + string repeat_ref_fname_3N = outfile + tag + ".rep.fa"; + string repeat_info_fname_3N = outfile + tag + ".rep.info"; + EList repeat_infiles(MISC_CAT); + tokenize(repeat_ref_fname_3N, ",", repeat_infiles); + driver >(repeat_ref_fname_3N, + repeat_infiles, + repeat_snp_fname, + repeat_haplotype_fname, + dummy_fname, + dummy_fname, + dummy_fname, + repeat_info_fname_3N, + outfile + tag + ".rep", + false, + REF_READ_FORWARD, + true, // create local index? + &parent_szs, + &parent_refnames); + } + } + } catch(bad_alloc& e) { + if(autoMem) { + cerr << "Switching to a packed string representation." << endl; + packed = true; + } else { + throw e; + } + } + } + return 0; + } catch(std::exception& e) { + cerr << "Error: Encountered exception: '" << e.what() << "'" << endl; + cerr << "Command: "; + for(int i = 0; i < argc; i++) cerr << argv[i] << " "; + cerr << endl; + deleteIdxFiles(outfile, writeRef || justRef, justRef); + return 1; + } catch(int e) { + if(e != 0) { + cerr << "Error: Encountered internal HISAT2 exception (#" << e << ")" << endl; + cerr << "Command: "; + for(int i = 0; i < argc; i++) cerr << argv[i] << " "; + cerr << endl; + } + deleteIdxFiles(outfile, writeRef || justRef, justRef); + return e; + } +} +} diff --git a/hisat2_build_main.cpp b/hisat2_build_main.cpp new file mode 100644 index 0000000..166814d --- /dev/null +++ b/hisat2_build_main.cpp @@ -0,0 +1,70 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include +#include +#include +#include +#include "tokenize.h" +#include "ds.h" +#include "mem_ids.h" + +using namespace std; + +extern "C" { + int hisat2_build(int argc, const char **argv); +} + +/** + * bowtie-build main function. It is placed in a separate source file + * to make it slightly easier to compile as a library. + * + * If the user specifies -A as the first two arguments, main + * will interpret that file as having one set of command-line arguments + * per line, and will dispatch each batch of arguments one at a time to + * bowtie-build. + */ +int main(int argc, const char **argv) { + if(argc > 2 && strcmp(argv[1], "-A") == 0) { + const char *file = argv[2]; + ifstream in; + in.open(file); + char buf[4096]; + int lastret = -1; + while(in.getline(buf, 4095)) { + EList args(MISC_CAT); + args.push_back(string(argv[0])); + tokenize(buf, " \t", args); + const char **myargs = (const char**)malloc(sizeof(char*)*args.size()); + for(size_t i = 0; i < args.size(); i++) { + myargs[i] = args[i].c_str(); + } + if(args.size() == 1) continue; + lastret = hisat2_build((int)args.size(), myargs); + free(myargs); + } + if(lastret == -1) { + cerr << "Warning: No arg strings parsed from " << file << endl; + return 0; + } + return lastret; + } else { + return hisat2_build(argc, argv); + } +} diff --git a/hisat2_extract_exons.py b/hisat2_extract_exons.py new file mode 100644 index 0000000..50602f2 --- /dev/null +++ b/hisat2_extract_exons.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 + +# +# Copyright 2015, Daehwan Kim +# +# This file is part of HISAT 2. +# +# HISAT 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# HISAT 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HISAT 2. If not, see . +# + +from sys import stderr, exit +from collections import defaultdict as dd, Counter +from argparse import ArgumentParser, FileType + + +def extract_exons(gtf_file, verbose = False): + genes = dd(list) + trans = {} + + # Parse valid exon lines from the GTF file into a dict by transcript_id + for line in gtf_file: + line = line.strip() + if not line or line.startswith('#'): + continue + if '#' in line: + line = line.split('#')[0].strip() + + try: + chrom, source, feature, left, right, score, \ + strand, frame, values = line.split('\t') + except ValueError: + continue + left, right = int(left), int(right) + + if feature != 'exon' or left >= right: + continue + + values_dict = {} + for attr in values.split(';')[:-1]: + attr, _, val = attr.strip().partition(' ') + values_dict[attr] = val.strip('"') + + if 'gene_id' not in values_dict or \ + 'transcript_id' not in values_dict: + continue + + transcript_id = values_dict['transcript_id'] + if transcript_id not in trans: + trans[transcript_id] = [chrom, strand, [[left, right]]] + genes[values_dict['gene_id']].append(transcript_id) + else: + trans[transcript_id][2].append([left, right]) + + # Sort exons and merge where separating introns are <=5 bps + for tran, [chrom, strand, exons] in trans.items(): + exons.sort() + tmp_exons = [exons[0]] + for i in range(1, len(exons)): + if exons[i][0] - tmp_exons[-1][1] <= 5: + tmp_exons[-1][1] = exons[i][1] + else: + tmp_exons.append(exons[i]) + trans[tran] = [chrom, strand, tmp_exons] + + # Calculate and print the unique junctions + tmp_exons = set() + for chrom, strand, texons in trans.values(): + for i in range(len(texons)): + tmp_exons.add((chrom, texons[i][0], texons[i][1], strand)) + tmp_exons = sorted(tmp_exons) + if len(tmp_exons) <= 0: + return + + exons = [tmp_exons[0]] + for exon in tmp_exons[1:]: + prev_exon = exons[-1] + if exon[0] != prev_exon[0]: + exons.append(exon) + continue + assert prev_exon[1] <= exon[1] + if prev_exon[2] < exon[1]: + exons.append(exon) + continue + + if prev_exon[2] < exon[2]: + strand = prev_exon[3] + if strand not in "+-": + strand = exon[3] + exons[-1] = (prev_exon[0], prev_exon[1], exon[2], strand) + + for chrom, left, right, strand in exons: + # Zero-based offset + print('{}\t{}\t{}\t{}'.format(chrom, left-1, right-1, strand)) + + # Print some stats if asked + if verbose: + None + """ + exon_lengths, intron_lengths, trans_lengths = \ + Counter(), Counter(), Counter() + for chrom, strand, exons in trans.values(): + tran_len = 0 + for i, exon in enumerate(exons): + exon_len = exon[1]-exon[0]+1 + exon_lengths[exon_len] += 1 + tran_len += exon_len + if i == 0: + continue + intron_lengths[exon[0] - exons[i-1][1]] += 1 + trans_lengths[tran_len] += 1 + + print('genes: {}, genes with multiple isoforms: {}'.format( + len(genes), sum(len(v) > 1 for v in genes.values())), + file=stderr) + print('transcripts: {}, transcript avg. length: {:d}'.format( + len(trans), sum(trans_lengths.elements())/len(trans)), + file=stderr) + print('exons: {}, exon avg. length: {:d}'.format( + sum(exon_lengths.values()), + sum(exon_lengths.elements())/sum(exon_lengths.values())), + file=stderr) + print('introns: {}, intron avg. length: {:d}'.format( + sum(intron_lengths.values()), + sum(intron_lengths.elements())/sum(intron_lengths.values())), + file=stderr) + print('average number of exons per transcript: {:d}'.format( + sum(exon_lengths.values())/len(trans)), + file=stderr) + """ + + +if __name__ == '__main__': + parser = ArgumentParser( + description='Extract exons from a GTF file') + parser.add_argument('gtf_file', + nargs='?', + type=FileType('r'), + help='input GTF file (use "-" for stdin)') + parser.add_argument('-v', '--verbose', + dest='verbose', + action='store_true', + help='also print some statistics to stderr') + + args = parser.parse_args() + if not args.gtf_file: + parser.print_help() + exit(1) + extract_exons(args.gtf_file, args.verbose) diff --git a/hisat2_extract_snps_haplotypes_UCSC.py b/hisat2_extract_snps_haplotypes_UCSC.py new file mode 100644 index 0000000..820d127 --- /dev/null +++ b/hisat2_extract_snps_haplotypes_UCSC.py @@ -0,0 +1,578 @@ +#!/usr/bin/env python3 + +# +# Copyright 2015, Daehwan Kim +# +# This file is part of HISAT 2. +# +# HISAT 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# HISAT 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HISAT 2. If not, see . +# + + +import sys, subprocess +import re +from argparse import ArgumentParser, FileType +from functools import cmp_to_key + + +""" +""" +def reverse_complement(seq): + result = "" + for nt in seq: + base = nt + if nt == 'A': + base = 'T' + elif nt == 'a': + base = 't' + elif nt == 'C': + base = 'G' + elif nt == 'c': + base = 'g' + elif nt == 'G': + base = 'C' + elif nt == 'g': + base = 'c' + elif nt == 'T': + base = 'A' + elif nt == 't': + base = 'a' + + result = base + result + + return result + + +""" +""" +def read_genome(genome_file): + chr_dic = {} + chr_name, sequence = "", "" + for line in genome_file: + if line.startswith(">"): + if chr_name and sequence: + chr_dic[chr_name] = sequence + chr_name = line.strip().split()[0][1:] + sequence = "" + else: + sequence += line.strip() + if chr_name and sequence: + chr_dic[chr_name] = sequence + return chr_dic + + +""" +Compare two variants [chr, pos, type, data, dic] +""" +def compare_vars(a, b): + a_chr, a_pos, a_type, a_data = a[:4] + b_chr, b_pos, b_type, b_data = b[:4] + + # daehwan - for debugging purposes + if a_chr != b_chr: + print(a) + print(b) + + assert a_chr == b_chr + if a_pos != b_pos: + return a_pos - b_pos + if a_type != b_type: + if a_type == 'I': + return -1 + elif b_type == 'I': + return 1 + if a_type == 'S': + return -1 + else: + return 1 + if a_data < b_data: + return -1 + elif a_data > b_data: + return 1 + else: + return 0 + + +""" +""" +def compatible_vars(a, b): + a_chr, a_pos, a_type, a_data = a[:4] + b_chr, b_pos, b_type, b_data = b[:4] + assert a_chr == b_chr + assert a_pos <= b_pos + if a_pos == b_pos: + return False + if a_type == 'D': + if b_pos <= a_pos + a_data: + return False + return True + + +""" +""" +def generate_haplotypes(snp_file, + haplotype_file, + vars, + inter_gap, + intra_gap, + num_haplotypes): + assert len(vars) > 0 + + # Sort variants and remove redundant variants + vars = sorted(vars, key=cmp_to_key(compare_vars)) + tmp_vars = [] + v = 0 + while v < len(vars): + var = vars[v] + for v2 in range(v + 1, len(vars)): + var2 = vars[v2] + if compare_vars(var, var2) == 0: + v += 1 + else: + assert compare_vars(var, var2) < 0 + break + tmp_vars.append(var) + v += 1 + vars = tmp_vars + + # Create new variant ID for variants with the same ID + # e.g. same two variant ID, rs60160543, are split into rs60160543.0 and rs60160543.1 + vars_count = {} + for var in vars: + id = var[4]["id"] + if id not in vars_count: + vars_count[id] = 0 + vars_count[id] += 1 + vars_duplicate = set() + for id, count in vars_count.items(): + if count <= 1: + continue + vars_duplicate.add(id) + vars_count = {} + for var in vars: + id = var[4]["id"] + if id not in vars_count: + vars_count[id] = 0 + else: + vars_count[id] += 1 + if id not in vars_duplicate: + var[4]["id2"] = id + else: + var[4]["id2"] = "%s.%d" % (id, vars_count[id]) + + # variant compatibility + vars_cmpt = [-1 for i in range(len(vars))] + for v in range(len(vars)): + var_chr, var_pos, var_type, var_data = vars[v][:4] + if var_type == 'D': + var_pos += (var_data - 1) + for v2 in range(v + 1, len(vars)): + if vars_cmpt[v2] >= 0: + continue + var2_chr, var2_pos = vars[v2][:2] + if var_chr != var2_chr: + break + if var_pos + inter_gap < var2_pos: + break + vars_cmpt[v2] = v + + # Assign genotypes for those missing genotypes + genotypes_list = [] + for v in range(len(vars)): + var = vars[v] + var_dic = var[4] + freq = var_dic["freq"] + used = [False for i in range(100)] + if vars_cmpt[v] >= 0: + v2 = v - 1 + while v2 >= vars_cmpt[v]: + var2 = vars[v2] + if not compatible_vars(var2, var) or \ + freq >= 0.1: + var2_dic = var2[4] + assert "genotype" in var2_dic + genotype_num = var2_dic["genotype"] + used[genotype_num] = True + v2 -= 1 + + assert False in used + for i in range(len(used)): + if not used[i]: + var_dic["genotype"] = i + break + genotypes_list.append(var_dic["genotype"]) + + # Write SNPs into a file (.snp) + for var in vars: + chr, pos, type, data, var_dic = var + varID = var_dic["id2"] + if type == 'S': + type = "single" + elif type == 'D': + type = "deletion" + else: + assert type == 'I' + type = "insertion" + print("%s\t%s\t%s\t%s\t%s" % (varID, type, chr, pos, data), + file=snp_file) + + # genotypes_list looks like + # Var0: 0 + # Var1: 0 + # Var2: 1 + # Var3: 2 + # Get haplotypes from genotypes_list + + max_genotype_num = max(genotypes_list) + haplotypes = ["" for i in range(max_genotype_num + 1)] + for i in range(len(genotypes_list)): + num = genotypes_list[i] + if haplotypes[num] == "": + haplotypes[num] = str(i) + else: + haplotypes[num] += ("#%d" % i) + haplotypes = set(haplotypes) + + # haplotypes look like + # '8#10#12#23', '8#12#23', '5#8#12#23#30' + + # Split some haplotypes that include large gaps inside + def split_haplotypes(haplotypes): + split_haplotypes = set() + for haplotype in haplotypes: + haplotype = haplotype.split('#') + assert len(haplotype) > 0 + if len(haplotype) == 1: + split_haplotypes.add(haplotype[0]) + continue + prev_s, s = 0, 1 + while s < len(haplotype): + _, prev_locus, prev_type, prev_data, _ = vars[int(haplotype[s-1])] + _, locus, type, data, _ = vars[int(haplotype[s])] + prev_locus, locus = int(prev_locus), int(locus) + if prev_type == 'D': + prev_locus += (int(prev_data) - 1) + if prev_locus + intra_gap < locus: + split_haplotypes.add('#'.join(haplotype[prev_s:s])) + prev_s = s + s += 1 + if s == len(haplotype): + split_haplotypes.add('#'.join(haplotype[prev_s:s])) + return split_haplotypes + + haplotypes2 = sorted(list(split_haplotypes(haplotypes))) + + def cmp_haplotype(a, b): + a = a.split('#') + _, a1_locus, _, _, _ = vars[int(a[0])] + _, a2_locus, a2_type, a2_data, _ = vars[int(a[-1])] + a_begin, a_end = int(a1_locus), int(a2_locus) + if a2_type == 'D': + a_end += (int(a2_data) - 1) + b = b.split('#') + _, b1_locus, _, _, _ = vars[int(b[0])] + _, b2_locus, b2_type, b2_data, _ = vars[int(b[-1])] + b_begin, b_end = int(b1_locus), int(b2_locus) + if b2_type == 'D': + b_end += (int(b2_data) - 1) + if a_begin != b_begin: + return a_begin - b_begin + return a_end - b_end + + haplotypes = sorted(list(haplotypes2), key=cmp_to_key(cmp_haplotype)) + + # Write haplotypes + for h_i in range(len(haplotypes)): + h = haplotypes[h_i].split('#') + chr, h1_locus, _, _, _ = vars[int(h[0])] + _, h2_locus, h2_type, h2_data, _ = vars[int(h[-1])] + h_begin, h_end = int(h1_locus), int(h2_locus) + if h2_type == 'D': + h_end += (int(h2_data) - 1) + assert h_begin <= h_end + h_new_begin = h_begin + for h_j in reversed(range(0, h_i)): + hc = haplotypes[h_j].split('#') + _, hc_begin, hc_type, hc_data, _ = vars[int(hc[-1])] + hc_begin = int(hc_begin) + hc_end = hc_begin + if hc_type == 'D': + hc_end += (int(hc_data) - 1) + if hc_end + inter_gap < h_begin: + break + if h_new_begin > hc_end: + h_new_begin = hc_end + assert h_new_begin <= h_begin + h_add = [] + for id in h: + var_dic = vars[int(id)][4] + h_add.append(var_dic["id2"]) + print("ht%d\t%s\t%d\t%d\t%s" % (num_haplotypes, chr, h_new_begin, h_end, ','.join(h_add)), + file=haplotype_file) + num_haplotypes += 1 + + return num_haplotypes + + +""" +""" +def main(genome_file, + snp_fname, + base_fname, + inter_gap, + intra_gap, + verbose, + testset): + # load genomic sequences + chr_dic = read_genome(genome_file) + + if testset: + ref_testset_file = open(base_fname + ".ref.testset.fa", "w") + alt_testset_file = open(base_fname + ".alt.testset.fa", "w") + + snp_out_file = open(base_fname + ".snp", 'w') + haplotype_out_file = open(base_fname + ".haplotype", 'w') + + # load SNPs + snp_list = [] + prev_chr, curr_right = "", -1 + num_haplotypes = 0 + if snp_fname.endswith(".gz"): + snp_cmd = ["gzip", "-cd", snp_fname] + else: + snp_cmd = ["cat", snp_fname] + snp_proc = subprocess.Popen(snp_cmd, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=open("/dev/null", 'w')) + ids_seen = set() + for line in snp_proc.stdout: + if not line or line.startswith('#'): + continue + + line = line.strip() + try: + fields = line.split('\t') + """ + id, chr, start, end, rs_id, score, strand, refNCBI, refUCSC, observed, molType, classType, valid, \ + avHet, avHetSE, func, locType, weight, exceptions, submitterCount, submitters, \ + alleleFreqCount, alleles, alleleNs, alleleFreqs, bitfields = fields + """ + id, chr, start, end, rs_id, score, strand, refNCBI, refUCSC, observed, molType, classType = fields[:12] + alleleFreqs = fields[-2].split(',')[:-1] + if len(alleleFreqs) > 0: + try: + float(alleleFreqs[0]) + except ValueError: + alleleFreqs = [] + except ValueError: + continue + + start, end = int(start), int(end) + score = int(score) + + if molType != "genomic": + continue + + if classType not in ["single", "deletion", "insertion"]: + continue + + if classType == "single": + if start + 1 != end: + continue + elif classType == "deletion": + assert start < end + else: + assert classType == "insertion" + if start != end: + continue + + if chr not in chr_dic: + continue + chr_seq = chr_dic[chr] + chr_len = len(chr_seq) + + if start >= len(chr_seq): + continue + + if rs_id in ids_seen: + continue + ids_seen.add(rs_id) + + if (prev_chr != chr or curr_right + inter_gap < start) and \ + len(snp_list) > 0: + num_haplotypes = generate_haplotypes(snp_out_file, + haplotype_out_file, + snp_list, + inter_gap, + intra_gap, + num_haplotypes) + snp_list = [] + + observed = observed.upper() + allele_list = observed.split("/") + if len(alleleFreqs) == 0: + alleleFreqs = [0.0 for i in range(len(allele_list))] + + # Reverse complement alleles if strand is negative + if strand == "-": + tmp_allele_list = [] + for allele in allele_list: + tmp_allele_list.append(reverse_complement(allele)) + allele_list = tmp_allele_list + + if classType == "single": + allele_count = min(len(allele_list), len(alleleFreqs)) + ref_base = chr_seq[start].upper() + if ref_base not in allele_list: + continue + for a in range(allele_count): + allele = allele_list[a] + freq = float(alleleFreqs[a]) + if allele not in "ACGT" or len(allele) != 1: + continue + if allele == ref_base: + continue + snp_list.append([chr, start, 'S', allele, {"id":rs_id, "freq":freq}]) + + if testset: + ref_seq = chr_seq[start-50:start+50] + alt_seq = chr_seq[start-50:start] + allele + chr_seq[start+1:start+50] + print(">%s_single_%d" % (rs_id, start - 50), file=ref_testset_file) + print(ref_seq, file=ref_testset_file) + print(">%s_single_%d_%s" % (rs_id, start - 50, ref_seq), file=alt_testset_file) + print(alt_seq, file=alt_testset_file) + + elif classType == "deletion": + if start > 0: + prev_base = chr_seq[start-1].upper() + if prev_base not in "ACGT": + continue + + if len(allele_list) != 2 or \ + len(allele_list) != len(alleleFreqs): + continue + + freq = 0.0 + if allele_list[0] == "-": + freq = float(alleleFreqs[1]) + else: + if allele_list[1] != "-": + continue + freq = float(alleleFreqs[0]) + + delLen = end - start + snp_list.append([chr, start, 'D', delLen, {"id":rs_id, "freq":freq}]) + if testset and delLen > 0 and delLen <= 10: + ref_seq = chr_seq[start-50:start+50] + alt_seq = chr_seq[start-50:start] + chr_seq[start+delLen:start+50+delLen] + print(">%s_deletion_%d" % (rs_id, start - 50), file=ref_testset_file) + print(ref_seq, file=ref_testset_file) + print(">%s_deletion_%d_%s" % (rs_id, start - 50, ref_seq), file=alt_testset_file) + print(alt_seq, file=alt_testset_file) + else: + assert classType == "insertion" + if start > 0: + prev_base = chr_seq[start-1].upper() + if prev_base not in "ACGT": + continue + allele_count = min(len(allele_list), len(alleleFreqs)) + for a in range(allele_count): + allele = allele_list[a] + freq = float(alleleFreqs[a]) + if allele == "-" or len(allele) <= 0: + continue + if re.match('^[ACGT]+$', allele): + snp_list.append([chr, start, 'I', allele, {"id":rs_id, "freq":freq}]) + insLen = len(allele) + if testset and insLen > 0 and insLen <= 10: + ref_seq = chr_seq[start-50:start+50] + alt_seq = chr_seq[start-50:start] + allele + chr_seq[start:start+50-insLen] + print(">%s_insertion_%d" % (rs_id, start - 50), file=ref_testset_file) + print(ref_seq, file=ref_testset_file) + print(">%s_insertion_%d_%s" % (rs_id, start - 50, ref_seq), file=alt_testset_file) + print(alt_seq, file=alt_testset_file) + + if curr_right < end: + curr_right = end + + if prev_chr != chr: + curr_right = end + prev_chr = chr + + if testset: + ref_testset_file.close() + alt_testset_file.close() + + if len(snp_list) > 0: + generate_haplotypes(snp_out_file, + haplotype_out_file, + snp_list, + inter_gap, + intra_gap, + num_haplotypes) + snp_list = [] + + snp_out_file.close() + haplotype_out_file.close() + + + +if __name__ == '__main__': + parser = ArgumentParser( + description='Extract SNPs and haplotypes from a SNP file downloaded from UCSC (e.g. http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/snp144.txt.gz)') + parser.add_argument('genome_file', + nargs='?', + type=FileType('r'), + help='input genome file (e.g. genome.fa)') + parser.add_argument('snp_fname', + nargs='?', + type=str, + help='input snp file downloaded from UCSC (plain text or gzipped file is accepted: snp144Common.txt or snp144Common.txt.gz)') + parser.add_argument("base_fname", + nargs='?', + type=str, + help="base filename for SNPs and haplotypes") + parser.add_argument("--inter-gap", + dest="inter_gap", + type=int, + default=30, + help="Maximum distance for variants to be in the same haplotype") + parser.add_argument("--intra-gap", + dest="intra_gap", + type=int, + default=50, + help="Break a haplotype into several haplotypes") + parser.add_argument('-v', '--verbose', + dest='verbose', + action='store_true', + help='also print some statistics to stderr') + parser.add_argument('--testset', + dest='testset', + action='store_true', + help='print test reads') + + args = parser.parse_args() + if not args.genome_file or \ + not args.snp_fname or \ + not args.base_fname: + parser.print_help() + exit(1) + main(args.genome_file, + args.snp_fname, + args.base_fname, + args.inter_gap, + args.intra_gap, + args.verbose, + args.testset) diff --git a/hisat2_extract_snps_haplotypes_VCF.py b/hisat2_extract_snps_haplotypes_VCF.py new file mode 100644 index 0000000..2eb92a4 --- /dev/null +++ b/hisat2_extract_snps_haplotypes_VCF.py @@ -0,0 +1,923 @@ +#!/usr/bin/env python3 +# +# Copyright 2016, Daehwan Kim +# +# This file is part of HISAT 2. +# +# HISAT 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# HISAT 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HISAT 2. If not, see . +# + + +import sys, os, subprocess +from argparse import ArgumentParser, FileType +from functools import cmp_to_key + +digit2str = [str(i) for i in range(10)] + +""" +""" +def read_genome(genome_file): + chr_dic = {} + chr_name, sequence = "", "" + for line in genome_file: + if line.startswith(">"): + if chr_name and sequence: + chr_dic[chr_name] = sequence + chr_name = line.strip().split()[0][1:] + sequence = "" + else: + sequence += line.strip() + if chr_name and sequence: + chr_dic[chr_name] = sequence + return chr_dic + + +""" +Compare two variants [chr, pos, type, data, dic] +""" +def compare_vars(a, b): + a_chr, a_pos, a_type, a_data = a[:4] + b_chr, b_pos, b_type, b_data = b[:4] + assert a_chr == b_chr + if a_pos != b_pos: + return a_pos - b_pos + if a_type != b_type: + if a_type == 'I': + return -1 + elif b_type == 'I': + return 1 + if a_type == 'S': + return -1 + else: + return 1 + if a_data < b_data: + return -1 + elif a_data > b_data: + return 1 + else: + return 0 + + +""" +""" +def compatible_vars(a, b): + a_chr, a_pos, a_type, a_data = a[:4] + b_chr, b_pos, b_type, b_data = b[:4] + assert a_chr == b_chr + assert a_pos <= b_pos + + if a_pos == b_pos: + return False + if a_type == 'D': + if b_pos <= a_pos + a_data: + return False + return True + + +""" +Given a VCF line, the function reports a list of variants [pos, type, data] +type: 'S' for single nucleotide polymorphism, 'D' for deletion, and 'I' for insertion +""" +def extract_vars(chr_dic, chr, pos, ref_allele, alt_alleles, varID): + chr_seq = chr_dic[chr] + vars = [] + assert ',' not in ref_allele + alt_alleles = alt_alleles.split(',') + for a in range(len(alt_alleles)): + alt_allele = alt_allele2 = alt_alleles[a] + if 'N' in alt_allele: + continue + ref_allele2, pos2 = ref_allele, pos + + if chr_seq[pos:pos+len(ref_allele)] != ref_allele: + print("Error: the reference genome you provided seems to be incompatible with the VCF file at %d of chromosome %s where %s is in the reference genome while %s is in the VCF file" % (pos, chr, chr_seq[pos:pos+len(ref_allele)], ref_allele), file=sys.stderr) + + def warning_msg(): + print("Warning) ref allele (%s) and alt allele (%s in %s) at chr%s:%d are excluded." % \ + (ref_allele, alt_allele, ','.join(alt_alleles), chr, pos + 1), file=sys.stderr) + + min_len = min(len(ref_allele2), len(alt_allele2)) + if min_len >= 2: + if len(ref_allele2) != len(alt_allele2): + if ref_allele2[:min_len-1] != alt_allele2[:min_len-1]: + warning_msg() + continue + ref_allele2, alt_allele2 = ref_allele2[min_len-1:], alt_allele2[min_len-1:] + pos2 += (min_len - 1) + else: + if ref_allele2[1:] != alt_allele2[1:]: + warning_msg() + continue + ref_allele2, alt_allele2 = ref_allele2[0], alt_allele2[0] + + type, data = '', '' + if len(ref_allele2) == 1 and len(alt_allele2) == 1: + if ref_allele2 == alt_allele2: + warning_msg() + continue + type = 'S' + data = alt_allele2 + elif len(ref_allele2) == 1: + assert len(alt_allele2) > 1 + if ref_allele2[0] != alt_allele2[0]: + warning_msg() + continue + alt_allele2 = alt_allele2[1:] + pos2 += 1 + type = 'I' + data = alt_allele2 + if len(data) > 32: + continue + elif len(alt_allele2) == 1: + assert len(ref_allele2) > 1 + if ref_allele2[0] != alt_allele2[0]: + warning_msg() + continue + ref_allele2 = ref_allele2[1:] + pos2 += 1 + type = 'D' + data = len(ref_allele2) + else: + warning_msg() + continue + varID2 = varID + if len(alt_alleles) > 1: + varID2 = "%s.%d" % (varID, a) + vars.append([chr, pos2, type, data, {"id":varID, "id2":varID2}]) + + return vars + + +""" +""" +def generate_haplotypes(snp_file, + haplotype_file, + vars, + inter_gap, + intra_gap, + num_genomes, + num_haplotypes): + assert len(vars) > 0 + + # Sort variants and remove redundant variants + vars = sorted(vars, key=cmp_to_key(compare_vars)) + tmp_vars = [] + v = 0 + while v < len(vars): + var = vars[v] + for v2 in range(v + 1, len(vars)): + var2 = vars[v2] + if compare_vars(var, var2) == 0: + v += 1 + if "CLNSIG" not in var[4]: + if "CLNSIG" in var2[4]: + var[4]["CLNSIG"] = var2[4]["CLNSIG"] + if "genotype" not in var[4]: + if "genotype" in var2[4]: + var[4]["genotype"] = var2[4]["genotype"] + else: + assert compare_vars(var, var2) < 0 + break + tmp_vars.append(var) + v += 1 + vars = tmp_vars + + # Write SNPs into a file (.snp) + for var in vars: + chr, pos, type, data, var_dic = var + varID = var_dic["id2"] + if type == 'S': + type = "single" + elif type == 'D': + type = "deletion" + else: + assert type == 'I' + type = "insertion" + print("%s\t%s\t%s\t%s\t%s" % \ + (varID, type, chr, pos, data), file=snp_file) + + # variant compatibility + vars_cmpt = [-1 for i in range(len(vars))] + for v in range(len(vars)): + var_chr, var_pos, var_type, var_data = vars[v][:4] + if var_type == 'D': + var_pos += (var_data - 1) + for v2 in range(v + 1, len(vars)): + if vars_cmpt[v2] >= 0: + continue + var2_chr, var2_pos, var2_type = vars[v2][:3] + assert var_chr == var2_chr + if var_type == 'D' and var2_type == 'D': + if var_pos + 1 < var2_pos: + break + else: + if var_pos < var2_pos: + break + vars_cmpt[v2] = v + + # Assign genotypes for those missing genotypes + genotypes_list = [] + if num_genomes > 0: + max_genotype_num = 1 + for v in range(len(vars)): + var = vars[v] + var_dic = var[4] + if "genotype" not in var_dic: + used = [True, True] + [False for i in range(8)] + if vars_cmpt[v] >= 0: + v2 = v - 1 + while v2 >= vars_cmpt[v]: + var2 = vars[v2] + if not compatible_vars(var2, var): + var2_dic = var2[4] + assert "genotype" in var2_dic + genotype_num = int(var2_dic["genotype"][0]) + used[genotype_num] = True + v2 -= 1 + + assert False in used + for i in range(len(used)): + if not used[i]: + var_dic["genotype"] = ("%d" % i) * (num_genomes * 2) + if i > max_genotype_num: + max_genotype_num = i + break + genotypes_list.append(var_dic["genotype"]) + + num_chromosomes = len(genotypes_list[0]) + # daehwan - for debugging purposes + """ + for v in range(len(vars)): + var = vars[v] + var_chr, var_pos, var_type, var_data, var_dic = var + print v, var_chr, var_pos, var_type, var_data, var_dic["id"], var_dic["id2"], + if "CLNSIG" in var_dic: + print "CLNSIG:", var_dic["CLNSIG"], + if "genotype" in var_dic: + print var_dic["genotype"][:50], + print + """ + + # genotypes_list looks like + # Var0: 000001000 + # Var1: 010000000 + # Var2: 001100000 + # Var3: 222222222 + # Get haplotypes from genotypes_list + haplotypes = set() + cnv_genotypes = ["" for i in range(num_chromosomes)] + for genotypes in genotypes_list: + for i in range(len(genotypes)): + genotype = genotypes[i] + cnv_genotypes[i] += genotype + + cnv_genotypes = set(cnv_genotypes) + for raw_haplotype in cnv_genotypes: + for num in range(1, max_genotype_num + 1): + num_str = str(num) + if num_str not in raw_haplotype: + continue + haplotype = "" + for i in range(len(raw_haplotype)): + if raw_haplotype[i] == num_str: + if haplotype == "": + haplotype = str(i) + else: + haplotype += ("#%d" % i) + assert haplotype != "" + haplotypes.add(haplotype) + + else: + for v in range(len(vars)): + var = vars[v] + var_dic = var[4] + used = [False for i in range(100)] + if vars_cmpt[v] >= 0: + v2 = v - 1 + while v2 >= vars_cmpt[v]: + var2 = vars[v2] + if not compatible_vars(var2, var): + var2_dic = var2[4] + assert "genotype" in var2_dic + genotype_num = var2_dic["genotype"] + used[genotype_num] = True + v2 -= 1 + + assert False in used + for i in range(len(used)): + if not used[i]: + var_dic["genotype"] = i + break + genotypes_list.append(var_dic["genotype"]) + + # genotypes_list looks like + # Var0: 0 + # Var1: 0 + # Var2: 1 + # Var3: 2 + # Get haplotypes from genotypes_list + max_genotype_num = max(genotypes_list) + haplotypes = ["" for i in range(max_genotype_num + 1)] + for i in range(len(genotypes_list)): + num = genotypes_list[i] + if haplotypes[num] == "": + haplotypes[num] = str(i) + else: + haplotypes[num] += ("#%d" % i) + haplotypes = set(haplotypes) + + # haplotypes look like + # '8#10#12#23', '8#12#23', '5#8#12#23#30' + + # Split some haplotypes that include large gaps inside + def split_haplotypes(haplotypes): + split_haplotypes = set() + for haplotype in haplotypes: + haplotype = haplotype.split('#') + assert len(haplotype) > 0 + if len(haplotype) == 1: + split_haplotypes.add(haplotype[0]) + continue + prev_s, s = 0, 1 + while s < len(haplotype): + _, prev_locus, prev_type, prev_data, _ = vars[int(haplotype[s-1])] + _, locus, type, data, _ = vars[int(haplotype[s])] + prev_locus, locus = int(prev_locus), int(locus) + if prev_type == 'D': + prev_locus += (int(prev_data) - 1) + if prev_locus + intra_gap < locus: + split_haplotypes.add('#'.join(haplotype[prev_s:s])) + prev_s = s + s += 1 + if s == len(haplotype): + split_haplotypes.add('#'.join(haplotype[prev_s:s])) + return split_haplotypes + + haplotypes2 = sorted(list(split_haplotypes(haplotypes))) + + def cmp_haplotype(a, b): + a = a.split('#') + _, a1_locus, _, _, _ = vars[int(a[0])] + _, a2_locus, a2_type, a2_data, _ = vars[int(a[-1])] + a_begin, a_end = int(a1_locus), int(a2_locus) + if a2_type == 'D': + a_end += (int(a2_data) - 1) + b = b.split('#') + _, b1_locus, _, _, _ = vars[int(b[0])] + _, b2_locus, b2_type, b2_data, _ = vars[int(b[-1])] + b_begin, b_end = int(b1_locus), int(b2_locus) + if b2_type == 'D': + b_end += (int(b2_data) - 1) + if a_begin != b_begin: + return a_begin - b_begin + return a_end - b_end + + haplotypes = sorted(list(haplotypes2), key=cmp_to_key(cmp_haplotype)) + + # daehwan - for debugging purposes + """ + dis = prev_locus - locus + print "\n[%d, %d]: %d haplotypes" % (i, j, len(haplotypes)), dis + if len(cur_vars) in range(0, 1000): + # print "vars:", sorted(list(cur_vars), cmp=cmp_varKey + print "num:", len(haplotypes) + for haplotype in haplotypes: + print haplotype.split('#') + print "\nnum:", len(haplotypes2) + for haplotype in haplotypes2: + print haplotype.split('#') + """ + + # Write haplotypes + for h_i in range(len(haplotypes)): + h = haplotypes[h_i].split('#') + chr, h1_locus, _, _, _ = vars[int(h[0])] + _, h2_locus, h2_type, h2_data, _ = vars[int(h[-1])] + h_begin, h_end = int(h1_locus), int(h2_locus) + if h2_type == 'D': + h_end += (int(h2_data) - 1) + assert h_begin <= h_end + h_new_begin = h_begin + for h_j in reversed(range(0, h_i)): + hc = haplotypes[h_j].split('#') + _, hc_begin, hc_type, hc_data, _ = vars[int(hc[-1])] + hc_begin = int(hc_begin) + hc_end = hc_begin + if hc_type == 'D': + hc_end += (int(hc_data) - 1) + if hc_end + inter_gap < h_begin: + break + if h_new_begin > hc_end: + h_new_begin = hc_end + assert h_new_begin <= h_begin + h_add = [] + for id in h: + var_dic = vars[int(id)][4] + h_add.append(var_dic["id2"]) + print("ht%d\t%s\t%d\t%d\t%s" % \ + (num_haplotypes, chr, h_new_begin, h_end, ','.join(h_add)), file=haplotype_file) + num_haplotypes += 1 + + return num_haplotypes + + +""" +""" +def main(genome_file, + VCF_fnames, + base_fname, + inter_gap, + intra_gap, + only_rs, + reference_type, + genotype_vcf, + genotype_gene_list, + extra_files, + verbose): + # Load genomic sequences + chr_dic = read_genome(genome_file) + + # GRCh38 - ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/supporting/GRCh38_positions + # ALL.chr22.phase3_shapeit2_mvncall_integrated_v3plus_nounphased.rsID.genotypes.GRCh38_dbSNP_no_SVs.vcf.gz + # GRCh37 - ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502 + # ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz + + # List of variants (e.g. ClinVar database) + genotype_var_list = {} + # List of genomic regions to be processed + genotype_ranges = {} + if genotype_vcf != "": + var_set = set() + assert len(genotype_gene_list) > 0 + if genotype_vcf.endswith(".gz"): + vcf_cmd = ["gzip", "-cd", genotype_vcf] + else: + vcf_cmd = ["cat", genotype_vcf] + vcf_proc = subprocess.Popen(vcf_cmd, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=open("/dev/null", 'w')) + for line in vcf_proc.stdout: + if line.startswith("#"): + continue + + chr, pos, varID, ref_allele, alt_alleles, qual, filter, info = line.strip().split('\t') + pos = int(pos) - 1 + if chr not in chr_dic: + continue + + gene = None + for g in genotype_gene_list: + if info.find(g) != -1: + gene = g + break + if not gene: + continue + + CLNSIG = -1 + for item in info.split(';'): + if not item.startswith("CLNSIG"): + continue + try: + key, value = item.split('=') + CLNSIG = int(value) + except ValueError: + continue + if CLNSIG not in [4, 5]: + continue + if CLNSIG == 4: + CLNSIG = "Likely pathogenic" + else: + CLNSIG = "Pathogenic" + + vars = extract_vars(chr_dic, chr, pos, ref_allele, alt_alleles, varID) + if len(vars) == 0: + continue + + if chr not in genotype_var_list: + genotype_var_list[chr] = [] + genotype_ranges[chr] = {} + if gene not in genotype_ranges[chr]: + genotype_ranges[chr][gene] = [len(chr_dic[chr]), -1] + + for var in vars: + var_chr, var_pos, var_ref_allele, var_alt_allele = var[:4] + var_str = "%s-%d-%s-%s" % (var_chr, var_pos, var_ref_allele, var_alt_allele) + if var_str in var_set: + continue + var[4]["CLNSIG"] = CLNSIG + var[4]["gene"] = gene + + genotype_var_list[chr].append(var) + if var_pos < genotype_ranges[chr][gene][0]: + genotype_ranges[chr][gene][0] = var_pos + if var_pos > genotype_ranges[chr][gene][1]: + genotype_ranges[chr][gene][1] = var_pos + + var_set.add(var_str) + + print("Number of variants in %s is:" % (genotype_vcf), file=sys.stderr) + for chr, vars in genotype_var_list.items(): + vars = sorted(vars, cmp=compare_vars) + print("\tChromosome %s: %d variants" % (chr, len(vars)), file=sys.stderr) + + for chr, gene_ranges in genotype_ranges.items(): + for gene, value in gene_ranges.items(): + gene_ranges[gene] = [value[0] - 100, value[1] + 100] + value = genotype_ranges[chr][gene] + if verbose: + print("%s\t%s\t%d-%d" % (chr, gene, value[0], value[1]), file=sys.stderr) + + if extra_files or True: + clnsig_file = open("%s.clnsig" % base_fname, 'w') + for chr, vars in genotype_var_list.items(): + for var in vars: + varID = var[4]["id2"] + CLNSIG = var[4]["CLNSIG"] + gene = var[4]["gene"] + print("%s\t%s\t%s" % (varID, gene, CLNSIG), file=clnsig_file) + clnsig_file.close() + + SNP_file = open("%s.snp" % base_fname, 'w') + haplotype_file = open("%s.haplotype" % base_fname, 'w') + + # Write reference information and backbone sequences into files + if extra_files: + ref_file = open("%s.ref" % base_fname, 'w') + for chr, gene_ranges in genotype_ranges.items(): + for gene, value in gene_ranges.items(): + left, right = value + if reference_type == "gene": + left, right = 0, right - left + print("%s\t%s\t%d\t%d" % (gene, chr, left, right), file=ref_file) + ref_file.close() + + if reference_type == "gene": + backbone_file = open("%s_backbone.fa" % base_fname, 'w') + for chr, gene_ranges in genotype_ranges.items(): + for gene, value in gene_ranges.items(): + left, right = value + left, right = 0, right - left + print(">%s" % (gene), file=backbone_file) + backbone_seq = chr_dic[chr][value[0]:value[1]+1] + for s in range(0, len(backbone_seq), 60): + print(backbone_seq[s:s+60], file=backbone_file) + backbone_file.close() + elif reference_type == "chromosome": + first = True + for chr in genotype_ranges.keys(): + if first: + os.system("samtools faidx genome.fa %s > %s_backbone.fa" % (chr, base_fname)) + first = False + else: + os.system("samtools faidx genome.fa %s >> %s_backbone.fa" % (chr, base_fname)) + else: + assert reference_type == "genome" + os.system("cp genome.fa %s_backbone.fa" % base_fname) + + num_genomes = 0 + num_haplotypes = 0 + num_unassigned = 0 + unnamed_var_count = 0 + for VCF_fname in VCF_fnames: + empty_VCF_file = False + if VCF_fname == "/dev/null" or \ + not os.path.exists(VCF_fname): + empty_VCF_file = True + + if reference_type != "genome" and \ + len(genotype_gene_list) > 0: + continue + + if not empty_VCF_file: + if VCF_fname.endswith(".gz"): + vcf_cmd = ["gzip", "-cd", VCF_fname] + else: + vcf_cmd = ["cat", VCF_fname] + vcf_proc = subprocess.Popen(vcf_cmd, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=open("/dev/null", 'w')) + + genomeIDs = [] + vars, genotypes_list = [], [] + prev_varID, prev_chr, prev_pos = "", "", -1 + num_lines = 0 + for line in vcf_proc.stdout: + num_lines += 1 + if line.startswith("##"): + continue + + fields = line.strip().split('\t') + + chr, pos, varID, ref_allele, alt_alleles, qual, filter, info = fields[:8] + if prev_chr != chr: + curr_right = -1 + + if len(fields) >= 9: + format = fields[8] + + genotypes = [] + if len(fields) >= 10: + genotypes = fields[9:] + + if line.startswith("#"): + genomeIDs = genotypes + num_genomes = len(genomeIDs) + continue + + assert len(genotypes) == len(genomeIDs) + + if varID == ".": + unnamed_var_count += 1 + varID = "un%d" % unnamed_var_count + + if only_rs and not varID.startswith("rs"): + continue + + if ';' in varID: + continue + + if varID == prev_varID: + continue + + if chr not in chr_dic: + continue + + chr_seq = chr_dic[chr] + chr_genotype_vars = [] + chr_genotype_ranges = {} + if len(genotype_gene_list) > 0: + assert chr in genotype_var_list + chr_genotype_vars = genotype_var_list[chr] + assert chr in genotype_ranges + chr_genotype_ranges = genotype_ranges[chr] + + pos = int(pos) - 1 + offset = 0 + gene = None + if num_lines % 10000 == 1: + print("\t%s:%d\r" % (chr, pos), file=sys.stderr) + + if chr_genotype_ranges: + skip = True + for gene_, range_ in chr_genotype_ranges.items(): + if pos > range_[0] and pos < range_[1]: + skip = False + break + if skip: + continue + if len(vars) == 0: + for var in chr_genotype_vars: + var_chr, var_pos, var_type, var_data, var_dic = var + if var_pos < range_[0]: + continue + if var_pos > range_[1]: + break + if reference_type == "gene": + var_pos -= range_[0] + vars.append([gene_, var_pos, var_type, var_data, var_dic]) + curr_right = range_[1] + if reference_type == "gene": + offset = range_[0] + gene = gene_ + + if pos == prev_pos: + continue + + if len(vars) > 0 and \ + (curr_right + inter_gap < pos or prev_chr != chr): + num_haplotypes = generate_haplotypes(SNP_file, + haplotype_file, + vars, + inter_gap, + intra_gap, + num_genomes, + num_haplotypes) + vars = [] + + def add_vars(pos, + offset, + gene, + varID, + ref_allele, + alt_alleles, + vars, + genotypes): + tmp_vars = extract_vars(chr_dic, chr, pos, ref_allele, alt_alleles, varID) + max_right = -1 + for v in range(len(tmp_vars)): + var = tmp_vars[v] + _, pos2, type, data = var[:4] + cnv_genotypes = [] + for genotype in genotypes: + P1, P2 = genotype[0], genotype[2] + if P1 == digit2str[v + 1]: + cnv_genotypes.append('1') + else: + cnv_genotypes.append('0') + if P2 == digit2str[v + 1]: + cnv_genotypes.append('1') + else: + cnv_genotypes.append('0') + + # Skip SNPs not present in a given population (e.g. 2,504 genomes in 1000 Genomes Project) + if cnv_genotypes != [] and \ + '1' not in cnv_genotypes: + continue + + tmp_varID = var[4]["id2"] + var_dic = {"id":varID, "id2":tmp_varID, "genotype":''.join(cnv_genotypes)} + if reference_type == "gene": + vars.append([gene, pos2 - offset, type, data, var_dic]) + else: + vars.append([chr, pos2, type, data, var_dic]) + right = pos2 + if type == 'D': + right += (int(data) - 1) + if max_right < right: + max_right = right + return max_right + + right = add_vars(pos, + offset, + gene, + varID, + ref_allele, + alt_alleles, + vars, + genotypes) + if curr_right < right: + curr_right = right + + prev_varID = varID + prev_chr = chr + prev_pos = pos + + if len(vars) > 0: + num_haplotypes = generate_haplotypes(SNP_file, + haplotype_file, + vars, + inter_gap, + intra_gap, + num_genomes, + num_haplotypes) + vars = [] + + else: + for chr in genotype_var_list.keys(): + chr_seq = chr_dic[chr] + chr_genotype_vars = genotype_var_list[chr] + curr_right = -1 + vars = [] + for var in chr_genotype_vars: + var_chr, var_pos, var_type, var_data, var_dic = var + num_genomes = 0 + if len(vars) > 0 and curr_right + inter_gap < var_pos: + num_haplotypes = generate_haplotypes(SNP_file, + haplotype_file, + vars, + inter_gap, + intra_gap, + num_genomes, + num_haplotypes) + vars = [] + vars.append([var_chr, var_pos, var_type, var_data, var_dic]) + curr_right = var_pos + if var_type == 'D': + curr_right += (var_data - 1) + + if len(vars) > 0: + num_haplotypes = generate_haplotypes(SNP_file, + haplotype_file, + vars, + inter_gap, + intra_gap, + num_genomes, + num_haplotypes) + vars = [] + + + SNP_file.close() + haplotype_file.close() + + if genotype_vcf != "": + clnsig_file.close() + + + +if __name__ == '__main__': + parser = ArgumentParser( + description='Extract SNPs and haplotypes from VCF files') + parser.add_argument('genome_file', + nargs='?', + type=FileType('r'), + help='input genome file (e.g. genome.fa)') + parser.add_argument('VCF_fnames', + nargs='?', + type=str, + help='A comma-seperated VCF files (plain text or gzipped file is accepted: GRCh38_dbSNP_no_SVs.vcf or GRCh38_dbSNP_no_SVs.vcf.gz') + parser.add_argument("base_fname", + nargs='?', + type=str, + help="base filename for SNPs and haplotypes") + parser.add_argument("--reference-type", + dest="reference_type", + type=str, + default="genome", + help="Reference type: gene, chromosome, and genome (default: genome)") + parser.add_argument("--inter-gap", + dest="inter_gap", + type=int, + default=30, + help="Maximum distance for variants to be in the same haplotype (default: 30)") + parser.add_argument("--intra-gap", + dest="intra_gap", + type=int, + default=50, + help="Break a haplotype into several haplotypes (default: 50)") + parser.add_argument('--non-rs', + dest='only_rs', + action='store_false', + help='Allow SNP IDs not beginning with rs') + parser.add_argument('--genotype-vcf', + dest='genotype_vcf', + type=str, + default="", + help='VCF file name for genotyping (default: empty)') + parser.add_argument('--genotype-gene-list', + dest='genotype_gene_list', + type=str, + default="", + help='A comma-separated list of genes to be genotyped (default: empty)') + parser.add_argument('--extra-files', + dest='extra_files', + action='store_true', + help='Output extra files such as _backbone.fa and .ref') + parser.add_argument('-v', '--verbose', + dest='verbose', + action='store_true', + help='also print some statistics to stderr') + + args = parser.parse_args() + if not args.genome_file or \ + not args.VCF_fnames or \ + not args.base_fname: + parser.print_help() + exit(1) + args.VCF_fnames = args.VCF_fnames.split(',') + + if args.genotype_vcf != "": + if args.genotype_gene_list == "": + genes = set() + if args.genotype_vcf.endswith(".gz"): + vcf_cmd = ["gzip", "-cd", args.genotype_vcf] + else: + vcf_cmd = ["cat", args.genotype_vcf] + vcf_proc = subprocess.Popen(vcf_cmd, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=open("/dev/null", 'w')) + for line in vcf_proc.stdout: + if line.startswith("#"): + continue + + info = line.strip().split()[-1] + if info.find("GENEINFO=") == -1: + continue + gene = info.split("GENEINFO=")[1] + gene = gene.split(':')[0] + genes.add(gene) + args.genotype_gene_list = list(genes) + else: + args.genotype_gene_list = args.genotype_gene_list.split(',') + + if len(args.genotype_gene_list) == 0: + print("Error: please specify --genotype-gene-list.", file=sys.stderr) + sys.exit(1) + + else: + args.genotype_gene_list = [] + + main(args.genome_file, + args.VCF_fnames, + args.base_fname, + args.inter_gap, + args.intra_gap, + args.only_rs, + args.reference_type, + args.genotype_vcf, + args.genotype_gene_list, + args.extra_files, + args.verbose) diff --git a/hisat2_extract_splice_sites.py b/hisat2_extract_splice_sites.py new file mode 100644 index 0000000..cba9234 --- /dev/null +++ b/hisat2_extract_splice_sites.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 + +# +# Copyright 2015, Daehwan Kim +# +# This file is part of HISAT 2. +# +# HISAT 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# HISAT 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HISAT 2. If not, see . +# + +from sys import stderr, exit +from collections import defaultdict as dd, Counter +from argparse import ArgumentParser, FileType + + +def extract_splice_sites(gtf_file, verbose=False): + genes = dd(list) + trans = {} + + # Parse valid exon lines from the GTF file into a dict by transcript_id + for line in gtf_file: + line = line.strip() + if not line or line.startswith('#'): + continue + if '#' in line: + line = line.split('#')[0].strip() + + try: + chrom, source, feature, left, right, score, \ + strand, frame, values = line.split('\t') + except ValueError: + continue + left, right = int(left), int(right) + + if feature != 'exon' or left >= right: + continue + + values_dict = {} + for attr in values.split(';'): + if attr: + attr, _, val = attr.strip().partition(' ') + values_dict[attr] = val.strip('"') + + if 'gene_id' not in values_dict or \ + 'transcript_id' not in values_dict: + continue + + transcript_id = values_dict['transcript_id'] + if transcript_id not in trans: + trans[transcript_id] = [chrom, strand, [[left, right]]] + genes[values_dict['gene_id']].append(transcript_id) + else: + trans[transcript_id][2].append([left, right]) + + # Sort exons and merge where separating introns are <=5 bps + for tran, [chrom, strand, exons] in trans.items(): + exons.sort() + tmp_exons = [exons[0]] + for i in range(1, len(exons)): + if exons[i][0] - tmp_exons[-1][1] <= 5: + tmp_exons[-1][1] = exons[i][1] + else: + tmp_exons.append(exons[i]) + trans[tran] = [chrom, strand, tmp_exons] + + # Calculate and print the unique junctions + junctions = set() + for chrom, strand, exons in trans.values(): + for i in range(1, len(exons)): + junctions.add((chrom, exons[i-1][1], exons[i][0], strand)) + junctions = sorted(junctions) + for chrom, left, right, strand in junctions: + # Zero-based offset + print('{}\t{}\t{}\t{}'.format(chrom, left-1, right-1, strand)) + + # Print some stats if asked + if verbose: + exon_lengths, intron_lengths, trans_lengths = \ + Counter(), Counter(), Counter() + for chrom, strand, exons in trans.values(): + tran_len = 0 + for i, exon in enumerate(exons): + exon_len = exon[1]-exon[0]+1 + exon_lengths[exon_len] += 1 + tran_len += exon_len + if i == 0: + continue + intron_lengths[exon[0] - exons[i-1][1]] += 1 + trans_lengths[tran_len] += 1 + + print('genes: {}, genes with multiple isoforms: {}'.format( + len(genes), sum(len(v) > 1 for v in genes.values())), + file=stderr) + print('transcripts: {}, transcript avg. length: {:.0f}'.format( + len(trans), sum(trans_lengths.elements())//len(trans)), + file=stderr) + print('exons: {}, exon avg. length: {:.0f}'.format( + sum(exon_lengths.values()), + sum(exon_lengths.elements())//sum(exon_lengths.values())), + file=stderr) + print('introns: {}, intron avg. length: {:.0f}'.format( + sum(intron_lengths.values()), + sum(intron_lengths.elements())//sum(intron_lengths.values())), + file=stderr) + print('average number of exons per transcript: {:.0f}'.format( + sum(exon_lengths.values())//len(trans)), + file=stderr) + + +if __name__ == '__main__': + parser = ArgumentParser( + description='Extract splice junctions from a GTF file') + parser.add_argument('gtf_file', + nargs='?', + type=FileType('r'), + help='input GTF file (use "-" for stdin)') + parser.add_argument('-v', '--verbose', + dest='verbose', + action='store_true', + help='also print some statistics to stderr') + + args = parser.parse_args() + if not args.gtf_file: + parser.print_help() + exit(1) + extract_splice_sites(args.gtf_file, args.verbose) diff --git a/hisat2_inspect.cpp b/hisat2_inspect.cpp new file mode 100644 index 0000000..f6cb71e --- /dev/null +++ b/hisat2_inspect.cpp @@ -0,0 +1,791 @@ +/* + * Copyright 2015, Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#include +#include +#include +#include + +#include "assert_helpers.h" +#include "endian_swap.h" +#include "hgfm.h" +#include "reference.h" +#include "ds.h" +#include "alt.h" + +using namespace std; + +MemoryTally gMemTally; + +static bool showVersion = false; // just print version and quit? +int verbose = 0; // be talkative +static int names_only = 0; // just print the sequence names in the index +static int snp_only = 0; +static int splicesite_only = 0; +static int splicesite_all_only = 0; +static int exon_only = 0; +static int summarize_only = 0; // just print summary of index and quit +static int across = 60; // number of characters across in FASTA output +static bool refFromGFM = false; // true -> when printing reference, decode it from Gbwt instead of reading it from BitPairReference +static string wrapper; +static const char *short_options = "vhnsea:"; + +enum { + ARG_VERSION = 256, + ARG_WRAPPER, + ARG_USAGE, + ARG_SNP, + ARG_SPLICESITE, + ARG_SPLICESITE_ALL, + ARG_EXON, +}; + +static struct option long_options[] = { + {(char*)"verbose", no_argument, 0, 'v'}, + {(char*)"version", no_argument, 0, ARG_VERSION}, + {(char*)"usage", no_argument, 0, ARG_USAGE}, + {(char*)"names", no_argument, 0, 'n'}, + {(char*)"snp", no_argument, 0, ARG_SNP}, + {(char*)"ss", no_argument, 0, ARG_SPLICESITE}, + {(char*)"ss-all", no_argument, 0, ARG_SPLICESITE_ALL}, + {(char*)"exon", no_argument, 0, ARG_EXON}, + {(char*)"summary", no_argument, 0, 's'}, + {(char*)"help", no_argument, 0, 'h'}, + {(char*)"across", required_argument, 0, 'a'}, + {(char*)"gbwt-ref", no_argument, 0, 'g'}, + {(char*)"wrapper", required_argument, 0, ARG_WRAPPER}, + {(char*)0, 0, 0, 0} // terminator +}; + +/** + * Print a summary usage message to the provided output stream. + */ +static void printUsage(ostream& out) { + out << "HISAT2 version " << string(HISAT2_VERSION).c_str() << " by Daehwan Kim (infphilo@gmail.com, http://www.ccb.jhu.edu/people/infphilo)" << endl; + out + << "Usage: hisat2-inspect [options]* " << endl + << " ht2 filename minus trailing .1." << gfm_ext << "/.2." << gfm_ext << endl + << endl + << " By default, prints FASTA records of the indexed nucleotide sequences to" << endl + << " standard out. With -n, just prints names. With -s, just prints a summary of" << endl + << " the index parameters and sequences. With -e, preserves colors if applicable." << endl + << endl + << "Options:" << endl; + if(wrapper == "basic-0") { + out << " --large-index force inspection of the 'large' index, even if a" << endl + << " 'small' one is present." << endl; + } + out << " -a/--across Number of characters across in FASTA output (default: 60)" << endl + << " -s/--summary Print summary incl. ref names, lengths, index properties" << endl + << " -n/--names Print reference sequence names only" << endl + << " --snp Print SNPs" << endl + << " --ss Print splice sites" << endl + << " --ss-all Print splice sites including those not in the global index" << endl + << " --exon Print exons" << endl + << " -e/--ht2-ref Reconstruct reference from ." << gfm_ext << " (slow, preserves colors)" << endl + << " -v/--verbose Verbose output (for debugging)" << endl + << " -h/--help print detailed description of tool and its options" << endl + << " --usage print this usage message" << endl + ; + if(wrapper.empty()) { + cerr << endl + << "*** Warning ***" << endl + << "'hisat-inspect' was run directly. It is recommended " + << "to use the wrapper script instead." + << endl << endl; + } +} + +/** + * Parse an int out of optarg and enforce that it be at least 'lower'; + * if it is less than 'lower', than output the given error message and + * exit with an error and a usage message. + */ +static int parseInt(int lower, const char *errmsg) { + long l; + char *endPtr= NULL; + l = strtol(optarg, &endPtr, 10); + if (endPtr != NULL) { + if (l < lower) { + cerr << errmsg << endl; + printUsage(cerr); + throw 1; + } + return (int32_t)l; + } + cerr << errmsg << endl; + printUsage(cerr); + throw 1; + return -1; +} + +/** + * Read command-line arguments + */ +static void parseOptions(int argc, char **argv) { + int option_index = 0; + int next_option; + do { + next_option = getopt_long(argc, argv, short_options, long_options, &option_index); + switch (next_option) { + case ARG_WRAPPER: + wrapper = optarg; + break; + case ARG_USAGE: + case 'h': + printUsage(cout); + throw 0; + break; + case 'v': verbose = true; break; + case ARG_VERSION: showVersion = true; break; + case 'g': refFromGFM = true; break; + case 'n': names_only = true; break; + case ARG_SNP: snp_only = true; break; + case ARG_SPLICESITE: splicesite_only = true; break; + case ARG_SPLICESITE_ALL: splicesite_all_only = true; break; + case ARG_EXON: exon_only = true; break; + case 's': summarize_only = true; break; + case 'a': across = parseInt(-1, "-a/--across arg must be at least 1"); break; + case -1: break; /* Done with options. */ + case 0: + if (long_options[option_index].flag != 0) + break; + default: + printUsage(cerr); + throw 1; + } + } while(next_option != -1); +} + +static void print_fasta_record( + ostream& fout, + const string& defline, + const string& seq) +{ + fout << ">"; + fout << defline.c_str() << endl; + + if(across > 0) { + size_t i = 0; + while (i + across < seq.length()) + { + fout << seq.substr(i, across).c_str() << endl; + i += across; + } + if (i < seq.length()) + fout << seq.substr(i).c_str() << endl; + } else { + fout << seq.c_str() << endl; + } +} + +/** + * Given output stream, BitPairReference, reference index, name and + * length, print the whole nucleotide reference with the appropriate + * number of columns. + */ +static void print_ref_sequence( + ostream& fout, + BitPairReference& ref, + const string& name, + size_t refi, + size_t len) +{ + bool newlines = across > 0; + int myacross = across > 0 ? across : 60; + size_t incr = myacross * 1000; + uint32_t *buf = new uint32_t[(incr + 128)/4]; + fout << ">" << name.c_str() << "\n"; + ASSERT_ONLY(SStringExpandable destU32); + for(size_t i = 0; i < len; i += incr) { + size_t amt = min(incr, len-i); + assert_leq(amt, incr); + int off = ref.getStretch(buf, refi, i, amt ASSERT_ONLY(, destU32)); + uint8_t *cb = ((uint8_t*)buf) + off; + for(size_t j = 0; j < amt; j++) { + if(newlines && j > 0 && (j % myacross) == 0) fout << "\n"; + assert_range(0, 4, (int)cb[j]); + fout << "ACGTN"[(int)cb[j]]; + } + fout << "\n"; + } + delete []buf; +} + +/** + * Create a BitPairReference encapsulating the reference portion of the + * index at the given basename. Iterate through the reference + * sequences, sending each one to print_ref_sequence to print. + */ +static void print_ref_sequences( + ostream& fout, + const EList& refnames, + const TIndexOffU* plen, + const string& adjustedGFMFileBase) +{ + BitPairReference ref( + adjustedGFMFileBase, // input basename + NULL, + false, // true -> expect colorspace reference + false, // sanity-check reference + NULL, // infiles + NULL, // originals + false, // infiles are sequences + false, // memory-map + false, // use shared memory + false, // sweep mm-mapped ref + verbose, // be talkative + verbose); // be talkative at startup + assert_eq(ref.numRefs(), refnames.size()); + for(size_t i = 0; i < ref.numRefs(); i++) { + print_ref_sequence( + fout, + ref, + refnames[i], + i, + plen[i]); + } +} + +/** + * Given an index, reconstruct the reference by LF mapping through the + * entire thing. + */ +template +static void print_index_sequences(ostream& fout, GFM& gfm) +{ + EList* refnames = &(gfm.refnames()); + + TStr cat_ref; + gfm.restore(cat_ref); + + TIndexOffU curr_ref = OFF_MASK; + string curr_ref_seq = ""; + TIndexOffU curr_ref_len = OFF_MASK; + TIndexOffU last_text_off = 0; + size_t orig_len = cat_ref.length(); + TIndexOffU tlen = OFF_MASK; + bool first = true; + for(size_t i = 0; i < orig_len; i++) { + TIndexOffU tidx = OFF_MASK; + TIndexOffU textoff = OFF_MASK; + tlen = OFF_MASK; + bool straddled = false; + gfm.joinedToTextOff(1 /* qlen */, (TIndexOffU)i, tidx, textoff, tlen, true, straddled); + + if (tidx != OFF_MASK && textoff < tlen) + { + if (curr_ref != tidx) + { + if (curr_ref != OFF_MASK) + { + // Add trailing gaps, if any exist + if(curr_ref_seq.length() < curr_ref_len) { + curr_ref_seq += string(curr_ref_len - curr_ref_seq.length(), 'N'); + } + print_fasta_record(fout, (*refnames)[curr_ref], curr_ref_seq); + } + curr_ref = tidx; + curr_ref_seq = ""; + curr_ref_len = tlen; + last_text_off = 0; + first = true; + } + + TIndexOffU textoff_adj = textoff; + if(first && textoff > 0) textoff_adj++; + if (textoff_adj - last_text_off > 1) + curr_ref_seq += string(textoff_adj - last_text_off - 1, 'N'); + + curr_ref_seq.push_back("ACGT"[int(cat_ref[i])]); + last_text_off = textoff; + first = false; + } + } + if (curr_ref < refnames->size()) + { + // Add trailing gaps, if any exist + if(curr_ref_seq.length() < curr_ref_len) { + curr_ref_seq += string(curr_ref_len - curr_ref_seq.length(), 'N'); + } + print_fasta_record(fout, (*refnames)[curr_ref], curr_ref_seq); + } + +} + +static char *argv0 = NULL; + +template +static void print_index_sequence_names(const string& fname, ostream& fout) +{ + EList p_refnames; + readEbwtRefnames(fname, p_refnames); + for(size_t i = 0; i < p_refnames.size(); i++) { + cout << p_refnames[i].c_str() << endl; + } +} + +/** + * Print a short summary of what's in the index and its flags. + */ +template +static void print_snps( + const string& fname, + ostream& fout) +{ + ALTDB altdb; + GFM gfm( + fname, + &altdb, + NULL, + NULL, + -1, // don't require entire reverse + true, // index is for the forward direction + -1, // offrate (-1 = index default) + 0, // offrate-plus (0 = index default) + false, // use memory-mapped IO + false, // use shared memory + false, // sweep memory-mapped memory + true, // load names? + false, // load SA sample? + false, // load ftab? + false, // load rstarts? + true, // load splice sites? + verbose, // be talkative? + verbose, // be talkative at startup? + false, // pass up memory exceptions? + false, // sanity check? + false); // use haplotypes? + gfm.loadIntoMemory( + -1, // need entire reverse + true, // load SA sample + true, // load ftab + true, // load rstarts + true, // load names + verbose); // verbose + EList p_refnames; + readEbwtRefnames(fname, p_refnames); + const EList >& alts = altdb.alts(); + const EList& altnames = altdb.altnames(); + assert_eq(alts.size(), altnames.size()); + for(size_t i = 0; i < alts.size(); i++) { + const ALT& alt = alts[i]; + if(!alt.snp()) + continue; + if(alt.deletion() && alt.reversed) + continue; + string type = "single"; + if(alt.type == ALT_SNP_DEL) { + type = "deletion"; + } else if(alt.type == ALT_SNP_INS) { + type = "insertion"; + } + index_t tidx = 0, toff = 0, tlen = 0; + bool straddled2 = false; + gfm.joinedToTextOff( + 1, + alt.pos, + tidx, + toff, + tlen, + true, // reject straddlers? + straddled2); // straddled? + cout << altnames[i] << "\t" + << type << "\t"; + assert_lt(tidx, p_refnames.size()); + cout << p_refnames[tidx] << "\t" + << toff << "\t"; + if(alt.type == ALT_SNP_SGL) { + cout << "ACGT"[alt.seq & 0x3]; + } else if(alt.type == ALT_SNP_DEL) { + cout << alt.len; + } else if(alt.type == ALT_SNP_INS) { + for(index_t i = 0; i < alt.len; i++) { + int nt = (alt.seq >> ((alt.len - i - 1) << 1)) & 0x3; + cout << "ACGT"[nt]; + } + } + cout << endl; + } +} + +/** + * Print a short summary of what's in the index and its flags. + */ +template +static void print_splicesites( + const string& fname, + ostream& fout) +{ + ALTDB altdb; + GFM gfm( + fname, + &altdb, + NULL, + NULL, + -1, // don't require entire reverse + true, // index is for the forward direction + -1, // offrate (-1 = index default) + 0, // offrate-plus (0 = index default) + false, // use memory-mapped IO + false, // use shared memory + false, // sweep memory-mapped memory + true, // load names? + false, // load SA sample? + false, // load ftab? + false, // load rstarts? + true, // load splice sites? + verbose, // be talkative? + verbose, // be talkative at startup? + false, // pass up memory exceptions? + false, // sanity check? + false); // use haplotypes? + gfm.loadIntoMemory( + -1, // need entire reverse + true, // load SA sample + true, // load ftab + true, // load rstarts + true, // load names + verbose); // verbose + EList p_refnames; + readEbwtRefnames(fname, p_refnames); + const EList >& alts = altdb.alts(); + for(size_t i = 0; i < alts.size(); i++) { + const ALT& alt = alts[i]; + if(!alt.splicesite()) continue; + if(alt.left >= alt.right) continue; + if(!splicesite_all_only && alt.excluded) continue; + index_t tidx = 0, toff = 0, tlen = 0; + bool straddled2 = false; + gfm.joinedToTextOff( + 1, + alt.left, + tidx, + toff, + tlen, + true, // reject straddlers? + straddled2); // straddled? + index_t tidx2 = 0, toff2 = 0, tlen2 = 0; + gfm.joinedToTextOff( + 1, + alt.right, + tidx2, + toff2, + tlen2, + true, // reject straddlers? + straddled2); // straddled? + assert_eq(tidx, tidx2); + assert_lt(tidx, p_refnames.size()); + cout << p_refnames[tidx] << "\t" + << toff - 1 << "\t" + << toff2 + 1 << "\t" + << (alt.fw > 0 ? "+" : "-") << endl; + } +} + +/** + * Print a short summary of what's in the index and its flags. + */ +template +static void print_exons( + const string& fname, + ostream& fout) +{ + ALTDB altdb; + GFM gfm( + fname, + &altdb, + NULL, + NULL, + -1, // don't require entire reverse + true, // index is for the forward direction + -1, // offrate (-1 = index default) + 0, // offrate-plus (0 = index default) + false, // use memory-mapped IO + false, // use shared memory + false, // sweep memory-mapped memory + true, // load names? + false, // load SA sample? + false, // load ftab? + false, // load rstarts? + true, // load splice sites? + verbose, // be talkative? + verbose, // be talkative at startup? + false, // pass up memory exceptions? + false, // sanity check? + false); // use haplotypes? + gfm.loadIntoMemory( + -1, // need entire reverse + true, // load SA sample + true, // load ftab + true, // load rstarts + true, // load names + verbose); // verbose + EList p_refnames; + readEbwtRefnames(fname, p_refnames); + const EList >& alts = altdb.alts(); + for(size_t i = 0; i < alts.size(); i++) { + const ALT& alt = alts[i]; + if(!alt.exon()) continue; + index_t tidx = 0, toff = 0, tlen = 0; + bool straddled2 = false; + gfm.joinedToTextOff( + 1, + alt.left, + tidx, + toff, + tlen, + true, // reject straddlers? + straddled2); // straddled? + index_t tidx2 = 0, toff2 = 0, tlen2 = 0; + gfm.joinedToTextOff( + 1, + alt.right, + tidx2, + toff2, + tlen2, + true, // reject straddlers? + straddled2); // straddled? + assert_eq(tidx, tidx2); + assert_lt(tidx, p_refnames.size()); + cout << p_refnames[tidx] << "\t" + << toff - 1 << "\t" + << toff2 + 1 << "\t" + << (alt.fw > 0 ? "+" : "-") << endl; + } +} + +/** + * Print a short summary of what's in the index and its flags. + */ +template +static void print_index_summary( + const string& fname, + ostream& fout) +{ + int major, minor; + string extra_version; + int32_t flags = GFM::readVersionFlags(fname, major, minor, extra_version); + bool entireReverse = false; + ALTDB altdb; + GFM gfm( + fname, + &altdb, + NULL, + NULL, + -1, // don't require entire reverse + true, // index is for the forward direction + -1, // offrate (-1 = index default) + 0, // offrate-plus (0 = index default) + false, // use memory-mapped IO + false, // use shared memory + false, // sweep memory-mapped memory + true, // load names? + false, // load SA sample? + false, // load ftab? + false, // load rstarts? + true, // load splice sites? + verbose, // be talkative? + verbose, // be talkative at startup? + false, // pass up memory exceptions? + false, // sanity check? + false); // use haplotypes? + EList p_refnames; + readEbwtRefnames(fname, p_refnames); + cout << "Index version" << "\t2." << major << '.' << minor; + if(extra_version != "") { + cout << "-" << extra_version; + } + cout << endl; + cout << "Flags" << '\t' << (-flags) << endl; + cout << "2.0-compatible" << '\t' << (entireReverse ? "1" : "0") << endl; + cout << "SA-Sample" << "\t1 in " << (1 << gfm.gh().offRate()) << endl; + cout << "FTab-Chars" << '\t' << gfm.gh().ftabChars() << endl; + assert_eq(gfm.nPat(), p_refnames.size()); + for(size_t i = 0; i < p_refnames.size(); i++) { + cout << "Sequence-" << (i+1) + << '\t' << p_refnames[i].c_str() + << '\t' << gfm.plen()[i] + << endl; + } + index_t numSnps = 0, numSpliceSites = 0, numExons = 0; + const EList >& alts = altdb.alts(); + for(size_t i = 0; i < alts.size(); i++) { + const ALT& alt = alts[i]; + if(alt.snp()) { + numSnps++; + } else if(alt.splicesite()) { + if(alt.left < alt.right) { + numSpliceSites++; + } + } else if(alt.exon()) { + numExons++; + } + } + cout << "Num. SNPs: " << numSnps << endl; + cout << "Num. Splice Sites: " << numSpliceSites << endl; + cout << "Num. Exons: " << numExons << endl; +} + +extern void initializeCntLut(); +extern void initializeCntBit(); + +static void driver( + const string& ebwtFileBase, + const string& query) +{ + initializeCntLut(); + initializeCntBit(); + + // Adjust + string adjustedEbwtFileBase = adjustEbwtBase(argv0, ebwtFileBase, verbose); + + if (names_only) { + print_index_sequence_names(adjustedEbwtFileBase, cout); + } else if(summarize_only) { + print_index_summary(adjustedEbwtFileBase, cout); + } else if(snp_only) { + print_snps(adjustedEbwtFileBase, cout); + } else if(splicesite_only || splicesite_all_only) { + print_splicesites(adjustedEbwtFileBase, cout); + } else if(exon_only) { + print_exons(adjustedEbwtFileBase, cout); + } else { + // Initialize Ebwt object + ALTDB altdb; + HGFM gfm( + adjustedEbwtFileBase, + &altdb, + NULL, + NULL, + -1, // don't care about entire-reverse + true, // index is for the forward direction + -1, // offrate (-1 = index default) + 0, // offrate-plus (0 = index default) + false, // use memory-mapped IO + false, // use shared memory + false, // sweep memory-mapped memory + true, // load names? + true, // load SA sample? + true, // load ftab? + true, // load rstarts? + true, // load splice sites? + false, // be talkative? + false, // be talkative at startup? + false, // pass up memory exceptions? + false, // sanity check? + false); // use haplotypes? + + gfm.loadIntoMemory( + -1, // need entire reverse + true, // load SA sample + true, // load ftab + true, // load rstarts + true, // load names + verbose); // verbose + + // Load whole index into memory + if(refFromGFM) { + print_index_sequences >(cout, gfm); + } else { + EList refnames; + readEbwtRefnames(adjustedEbwtFileBase, refnames); + print_ref_sequences( + cout, + refnames, + gfm.plen(), + adjustedEbwtFileBase); + } + // Evict any loaded indexes from memory + if(gfm.isInMemory()) { + gfm.evictFromMemory(); + } + } +} + +/** + * main function. Parses command-line arguments. + */ +int main(int argc, char **argv) { + try { + string ebwtFile; // read serialized Ebwt from this file + string query; // read query string(s) from this file + EList queries; + string outfile; // write query results to this file + argv0 = argv[0]; + parseOptions(argc, argv); + if(showVersion) { + cout << argv0 << " version " << HISAT2_VERSION << endl; + if(sizeof(void*) == 4) { + cout << "32-bit" << endl; + } else if(sizeof(void*) == 8) { + cout << "64-bit" << endl; + } else { + cout << "Neither 32- nor 64-bit: sizeof(void*) = " << sizeof(void*) << endl; + } + cout << "Built on " << BUILD_HOST << endl; + cout << BUILD_TIME << endl; + cout << "Compiler: " << COMPILER_VERSION << endl; + cout << "Options: " << COMPILER_OPTIONS << endl; + cout << "Sizeof {int, long, long long, void*, size_t, off_t}: {" + << sizeof(int) + << ", " << sizeof(long) << ", " << sizeof(long long) + << ", " << sizeof(void *) << ", " << sizeof(size_t) + << ", " << sizeof(off_t) << "}" << endl; + return 0; + } + + // Get input filename + if(optind >= argc) { + cerr << "No index name given!" << endl; + printUsage(cerr); + return 1; + } + ebwtFile = argv[optind++]; + + // Optionally summarize + if(verbose) { + cout << "Input ht2 file: \"" << ebwtFile.c_str() << "\"" << endl; + cout << "Output file: \"" << outfile.c_str() << "\"" << endl; + cout << "Local endianness: " << (currentlyBigEndian()? "big":"little") << endl; +#ifdef NDEBUG + cout << "Assertions: disabled" << endl; +#else + cout << "Assertions: enabled" << endl; +#endif + } + driver(ebwtFile, query); + return 0; + } catch(std::exception& e) { + cerr << "Error: Encountered exception: '" << e.what() << "'" << endl; + cerr << "Command: "; + for(int i = 0; i < argc; i++) cerr << argv[i] << " "; + cerr << endl; + return 1; + } catch(int e) { + if(e != 0) { + cerr << "Error: Encountered internal HISAT2 exception (#" << e << ")" << endl; + cerr << "Command: "; + for(int i = 0; i < argc; i++) cerr << argv[i] << " "; + cerr << endl; + } + return e; + } +} + diff --git a/hisat2_main.cpp b/hisat2_main.cpp new file mode 100644 index 0000000..0d74204 --- /dev/null +++ b/hisat2_main.cpp @@ -0,0 +1,69 @@ +/* + * Copyright 2015, Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include +#include +#include +#include +#include "tokenize.h" +#include "ds.h" + +using namespace std; + +extern "C" { + int hisat2(int argc, const char **argv); +} + +/** + * Bowtie main function. It is placed in a separate source file to + * make it slightly easier to compile Bowtie as a library. + * + * If the user specifies -A as the first two arguments, main + * will interpret that file as having one set of command-line arguments + * per line, and will dispatch each batch of arguments one at a time to + * bowtie. + */ +int main(int argc, const char **argv) { + if(argc > 2 && strcmp(argv[1], "-A") == 0) { + const char *file = argv[2]; + ifstream in; + in.open(file); + char buf[4096]; + int lastret = -1; + while(in.getline(buf, 4095)) { + EList args; + args.push_back(string(argv[0])); + tokenize(buf, " \t", args); + const char **myargs = (const char**)malloc(sizeof(char*)*args.size()); + for(size_t i = 0; i < args.size(); i++) { + myargs[i] = args[i].c_str(); + } + if(args.size() == 1) continue; + lastret = hisat2((int)args.size(), myargs); + free(myargs); + } + if(lastret == -1) { + cerr << "Warning: No arg strings parsed from " << file << endl; + return 0; + } + return lastret; + } else { + return hisat2(argc, argv); + } +} diff --git a/hisat2_read_statistics.py b/hisat2_read_statistics.py new file mode 100644 index 0000000..9d6e946 --- /dev/null +++ b/hisat2_read_statistics.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 + +# +# Copyright 2018, Chanhee Park and Daehwan Kim +# +# This file is part of HISAT 2. +# +# HISAT 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# HISAT 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HISAT 2. If not, see . +# + +import os, sys, math, gzip, bz2 +from argparse import ArgumentParser, FileType +""" +""" +COMPRESSION_NON = 0 +COMPRESSION_GZIP = 1 +COMPRESSION_BZIP2 = 2 + +SEQUENCE_UNKNOWN = -1 +SEQUENCE_FASTA = 0 +SEQUENCE_FASTQ = 1 + +FASTA_EXTENSIONS = ["fa", "fasta", "fna"] +FASTQ_EXTENSIONS = ["fq", "fastq"] + +MAX_SKIP_LINES = 10000 +""" +""" +def parser_FQ(fp): + # skip empty line + skip_line_count = 0 + while skip_line_count < MAX_SKIP_LINES: + line = fp.readline() + + if line == "": + # end of file + return + + if line[0] == '@': + break + + skip_line_count += 1 + + if skip_line_count == MAX_SKIP_LINES: + raise ValueError("Invalid file format") + + while True: + id = line[1:].split()[0] + seq = "" + + line = fp.readline() + if line == "": + return + + seq = line.strip() + yield id, seq + + line = fp.readline() # '+' + line = fp.readline() # quality + line = fp.readline() # next ID + if line == "": + return + +""" +""" +def parser_FA(fp): + # skip empty line + skip_line_count = 0 + while skip_line_count < MAX_SKIP_LINES: + line = fp.readline() + + if line == "": + # end of file + return + + if line[0] == '>': + break + + skip_line_count += 1 + + if skip_line_count == MAX_SKIP_LINES: + raise ValueError("Invalid file format") + + while True: + id = line[1:].split()[0] + seq = "" + + while True: + line = fp.readline() + if line == "": + break + + if line[0] == '>': + break + + seq += line.strip() + + yield id, seq + + if line == "": + return + + +""" +""" +def parse_type(fname): + compression_type = COMPRESSION_NON + sequence_type = SEQUENCE_UNKNOWN + + ff = fname.split('.') + + ext = ff[-1] + if ext.lower() == "gz": + compression_type = COMPRESSION_GZIP + ext = ff[-2] + elif ext.lower() == "bz2": + compression_type = COMPRESSION_BZIP2 + ext = ff[-2] + + if ext.lower() in FASTA_EXTENSIONS: + sequence_type = SEQUENCE_FASTA + elif ext.lower() in FASTQ_EXTENSIONS: + sequence_type = SEQUENCE_FASTQ + + return sequence_type, compression_type + +""" +""" +def generate_stats(length_map): + mn = 0 # minimun read length + mx = 0 # maximum read length + cnt = 0 # number of reads + avg = 0 # average read length + + sum = 0 + + if len(length_map) == 0: + return cnt, mn, mx, avg + + # sort keys + sorted_map = sorted(length_map) + + mn = sorted_map[0] + mx = sorted_map[-1] + + for k, v in length_map.items(): + sum += k * v + cnt += v + + avg = sum // cnt + + return cnt, mn, mx, avg + +""" +""" +def reads_stat(read_file, read_count): + length_map = {} + try: + sequence_type, compression_type = parse_type(read_file) + + if compression_type == COMPRESSION_GZIP: + fp = gzip.open(read_file, 'rt') + elif compression_type == COMPRESSION_BZIP2: + fp = bz2.open(read_file, 'rt') + else: + assert (compression_type == COMPRESSION_NON) + fp = open(read_file, 'r') + + if sequence_type == SEQUENCE_FASTA: + fstream = parser_FA(fp) + elif sequence_type == SEQUENCE_FASTQ: + fstream = parser_FQ(fp) + else: + raise ValueError("Unsupported file format") + + cnt = 0 + for id, seq in fstream: + l = len(seq) + if l in length_map: + length_map[l] += 1 + else: + length_map[l] = 1 + + cnt += 1 + if read_count > 0 and cnt >= read_count: + break + + fp.close() + + except BaseException as e: + print("Warning: {}".format(e), file=sys.stderr) + + cnt, mn, mx, avg = generate_stats(length_map) + # sort by (read count, read length) + length_map = sorted(length_map.items(), key=lambda t: (t[1], t[0]), reverse=True) + if len(length_map) == 0: + length_map.append((0, 0)) + print(cnt, mn, mx, avg, ",".join([str(k) for (k,v) in length_map])) + + +if __name__ == '__main__': + + parser = ArgumentParser( + description='Compute statistics of reads. Show number of reads and minimum, maximum, average length of reads') + + parser.add_argument('read_file', + nargs='?', + type=str, + help='reads file') + + parser.add_argument('-n', + dest='read_count', + action='store', + type=int, + default=10000, + help='reads count (default: 10000)') + + args = parser.parse_args() + + if not args.read_file: + parser.print_help() + exit(1) + + reads_stat(args.read_file, args.read_count) + diff --git a/hisat2_repeat.cpp b/hisat2_repeat.cpp new file mode 100644 index 0000000..a92d34a --- /dev/null +++ b/hisat2_repeat.cpp @@ -0,0 +1,996 @@ +/* + * Copyright 2018, Chanhee Park and Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include "assert_helpers.h" +#include "endian_swap.h" +#include "formats.h" +#include "sequence_io.h" +#include "tokenize.h" +#include "timer.h" +#include "ref_read.h" +#include "filebuf.h" +#include "reference.h" +#include "ds.h" +#include "gfm.h" +#include "aligner_sw.h" +#include "aligner_result.h" +#include "search_globals.h" +#include "scoring.h" +#include "mask.h" +#include "repeat_builder.h" +#include "utility_3n.h" + +/** + * \file Driver for the bowtie-build indexing tool. + */ + +#include +#include +#include +#include +#include + +MemoryTally gMemTally; +// Build parameters +int verbose; +static int sanityCheck; +static int format; +static TIndexOffU bmax; +static TIndexOffU bmaxMultSqrt; +static uint32_t bmaxDivN; +static int dcv; +static int noDc; +static int entireSA; +static int seed; +static int showVersion; +// GFM parameters +static int32_t lineRate; +static bool lineRate_provided; +static int32_t linesPerSide; +static int32_t offRate; +static int32_t ftabChars; +static int32_t localOffRate; +static int32_t localFtabChars; +static int bigEndian; +static bool autoMem; +static int nthreads; // number of pthreads operating concurrently +static string wrapper; +static TIndexOffU seed_length; +static TIndexOffU seed_count; +static TIndexOffU repeat_count; +static TIndexOffU min_repeat_length; +static TIndexOffU max_repeat_length; +static EList > repeat_length_pair; +static TIndexOffU max_repeat_edit; +static TIndexOffU max_repeat_matchlen; +static bool symmetric_extend; +static bool repeat_indel; +static bool forward_only; +static bool CGtoTG; +static string repeat_str1; +static string repeat_str2; +TIndexOffU max_seed_mm; +TIndexOffU max_seed_repeat; +TIndexOffU max_seed_extlen; +static bool save_sa; +static bool load_sa; + +bool threeN = false; +char convertedFrom; +char convertedTo; +char convertedFromComplement; +char convertedToComplement; +bool base_change_entered; + +static void resetOptions() { + verbose = true; // be talkative (default) + sanityCheck = 0; // do slow sanity checks + format = FASTA; // input sequence format + bmax = OFF_MASK; // max blockwise SA bucket size + bmaxMultSqrt = OFF_MASK; // same, as multplier of sqrt(n) + bmaxDivN = 4; // same, as divisor of n + dcv = 1024; // bwise SA difference-cover sample sz + noDc = 0; // disable difference-cover sample + entireSA = 0; // 1 = disable blockwise SA + seed = 0; // srandom seed + showVersion = 0; // just print version and quit? + // GFM parameters + lineRate = GFM::default_lineRate_gfm; + lineRate_provided = false; + linesPerSide = 1; // 1 64-byte line on a side + offRate = 4; // sample 1 out of 16 SA elts + ftabChars = 10; // 10 chars in initial lookup table + localOffRate = 3; + localFtabChars = 6; + bigEndian = 0; // little endian + autoMem = true; // automatically adjust memory usage parameters + nthreads = 1; + seed_length = 50; + seed_count = 5; + min_repeat_length = 100; + max_repeat_length = numeric_limits::max(); + repeat_length_pair.clear(); + repeat_count = 5; + max_repeat_edit = 10; + max_repeat_matchlen = min_repeat_length / 2; // half of repeat_length + repeat_indel = false; + symmetric_extend = true; + forward_only = false; + CGtoTG = false; + max_seed_mm = 5; + max_seed_repeat = 5; + max_seed_extlen = 25; + save_sa = false; + load_sa = false; + wrapper.clear(); + threeN = false; + convertedFrom = 'C'; + convertedTo = 'T'; + convertedFromComplement = asc2dnacomp[convertedFrom]; + convertedToComplement = asc2dnacomp[convertedTo]; + base_change_entered = false; +} + +// Argument constants for getopts +enum { + ARG_BMAX = 256, + ARG_BMAX_MULT, + ARG_BMAX_DIV, + ARG_DCV, + ARG_SEED, + ARG_CUTOFF, + ARG_PMAP, + ARG_NTOA, + ARG_USAGE, + ARG_REVERSE_EACH, + ARG_SA, + ARG_SEED_LENGTH, + ARG_SEED_COUNT, + ARG_MIN_REPEAT_LENGTH, + ARG_MAX_REPEAT_LENGTH, + ARG_REPEAT_LENGTH, + ARG_REPEAT_COUNT, + ARG_REPEAT_EDIT, + ARG_REPEAT_MATCHLEN, + ARG_REPEAT_INDEL, + ARG_WRAPPER, + ARG_ASYMMETRIC_EXTEND, + ARG_FORWARD_ONLY, + ARG_CGTOTG, + ARG_REPEAT_STR1, + ARG_REPEAT_STR2, + ARG_MAX_SEED_MM, + ARG_MAX_SEED_REPEAT, + ARG_MAX_SEED_EXTLEN, + ARG_SAVE_SA, + ARG_LOAD_SA, + ARG_3N, + ARG_BASE_CHANGE +}; + +/** + * Print a detailed usage message to the provided output stream. + */ +static void printUsage(ostream& out) { + out << "HISAT2 version " << string(HISAT2_VERSION).c_str() << " by Chanhee Park and Daehwan Kim " << endl; + + string tool_name = "hisat2-repeat"; + out << "Usage: " << tool_name << " [options]* " << endl + << " reference_in comma-separated list of files with ref sequences" << endl + << "Options:" << endl + << " -c reference sequences given on cmd line (as" << endl + << " )" << endl; + if(wrapper == "basic-0") { + out << " --large-index force generated index to be 'large', even if ref" << endl + << " has fewer than 4 billion nucleotides" << endl; + } + out << " -a/--noauto disable automatic -p/--bmax/--dcv memory-fitting" << endl + << " -p number of threads" << endl + << " --bmax max bucket sz for blockwise suffix-array builder" << endl + << " --bmaxdivn max bucket sz as divisor of ref len (default: 4)" << endl + << " --dcv diff-cover period for blockwise (default: 1024)" << endl + << " --nodc disable diff-cover (algorithm becomes quadratic)" << endl + << " --seed-length seed length (default: 50)" << endl + << " --seed-count seed count (default: 5)" << endl + << " --min-repeat-length minimum repeat length (default: 100)" << endl + << " --max-repeat-length maximum repeat length (default: 65535)" << endl + << " --repeat-length -[,-] minimum-maximum repeat length pairs" << endl + << " --repeat-count minimum repeat count (default: 5)" << endl + << " --repeat-edit maximum repeat edit distance (default: 10)" << endl + << " --repeat-matchlen " << endl + << " --repeat-indel" << endl + << " --repeat-str1" << endl + << " --repeat-str2" << endl + << " --asymmetric-extend extend seeds asymmetrically" << endl + << " --forward-only use forward strand only" << endl + << " --CGtoTG change CG to TG" << endl + << " --max-seed-mm " << endl + << " --max-seed-repeat " << endl + << " --max-seed-extlen " << endl + << " --save-sa" << endl + << " --load-sa" << endl + << " --3N make 3N repeat database" << endl + << " --base-change the converted nucleotide and converted to nucleotide (default:C,T)" << endl + << " -q/--quiet disable verbose output (for debugging)" << endl + << " -h/--help print detailed description of tool and its options" << endl + << " --usage print this usage message" << endl + << " --version print version information and quit" << endl + ; +} + +static const char *short_options = "qrap:h?nscfl:i:o:t:h:3C"; + +static struct option long_options[] = { + {(char*)"quiet", no_argument, 0, 'q'}, + {(char*)"sanity", no_argument, 0, 's'}, + {(char*)"threads", required_argument, 0, 'p'}, + {(char*)"little", no_argument, &bigEndian, 0}, + {(char*)"big", no_argument, &bigEndian, 1}, + {(char*)"bmax", required_argument, 0, ARG_BMAX}, + {(char*)"bmaxmultsqrt", required_argument, 0, ARG_BMAX_MULT}, + {(char*)"bmaxdivn", required_argument, 0, ARG_BMAX_DIV}, + {(char*)"dcv", required_argument, 0, ARG_DCV}, + {(char*)"nodc", no_argument, &noDc, 1}, + {(char*)"seed", required_argument, 0, ARG_SEED}, + {(char*)"entiresa", no_argument, &entireSA, 1}, + {(char*)"version", no_argument, &showVersion, 1}, + {(char*)"noauto", no_argument, 0, 'a'}, + {(char*)"noblocks", required_argument, 0, 'n'}, + {(char*)"linerate", required_argument, 0, 'l'}, + {(char*)"linesperside", required_argument, 0, 'i'}, + {(char*)"usage", no_argument, 0, ARG_USAGE}, + {(char*)"seed-length", required_argument, 0, ARG_SEED_LENGTH}, + {(char*)"seed-count", required_argument, 0, ARG_SEED_COUNT}, + {(char*)"min-repeat-length", required_argument, 0, ARG_MIN_REPEAT_LENGTH}, + {(char*)"max-repeat-length", required_argument, 0, ARG_MAX_REPEAT_LENGTH}, + {(char*)"repeat-length", required_argument, 0, ARG_REPEAT_LENGTH}, + {(char*)"repeat-count", required_argument, 0, ARG_REPEAT_COUNT}, + {(char*)"repeat-edit", required_argument, 0, ARG_REPEAT_EDIT}, + {(char*)"repeat-matchlen",required_argument, 0, ARG_REPEAT_MATCHLEN}, + {(char*)"repeat-indel", no_argument, 0, ARG_REPEAT_INDEL}, + {(char*)"wrapper", required_argument, 0, ARG_WRAPPER}, + {(char*)"asymmetric-extend", no_argument, 0, ARG_ASYMMETRIC_EXTEND}, + {(char*)"forward-only", no_argument, 0, ARG_FORWARD_ONLY}, + {(char*)"CGtoTG", no_argument, 0, ARG_CGTOTG}, + {(char*)"repeat-str1", required_argument, 0, ARG_REPEAT_STR1}, + {(char*)"repeat-str2", required_argument, 0, ARG_REPEAT_STR2}, + {(char*)"max-seed-mm", required_argument, 0, ARG_MAX_SEED_MM}, + {(char*)"max-seed-repeat",required_argument, 0, ARG_MAX_SEED_REPEAT}, + {(char*)"max-seed-extlen",required_argument, 0, ARG_MAX_SEED_EXTLEN}, + {(char*)"save-sa", no_argument, 0, ARG_SAVE_SA}, + {(char*)"load-sa", no_argument, 0, ARG_LOAD_SA}, + {(char*)"3N", no_argument, 0, ARG_3N}, + {(char*)"base-change", required_argument, 0, ARG_BASE_CHANGE}, + {(char*)0, 0, 0, 0} // terminator +}; + +/** + * Parse an int out of optarg and enforce that it be at least 'lower'; + * if it is less than 'lower', then output the given error message and + * exit with an error and a usage message. + */ +template +static T parseNumber(const char *str, T lower, const char *errmsg) +{ + char *endPtr= NULL; + T t = (T)strtoll(str, &endPtr, 10); + if(endPtr != NULL) { + if(t < lower) { + cerr << errmsg << endl; + printUsage(cerr); + throw 1; + } + return t; + } + cerr << errmsg << endl; + printUsage(cerr); + throw 1; + return -1; +} + +template +static T parseNumber(T lower, const char *errmsg) { + return parseNumber(optarg, lower, errmsg); +} + +static void parsePair(EList >& repeat_pair) +{ + string tok; + istringstream ss(optarg); + + while(getline(ss, tok, ',')) { + if(tok.empty()) { + continue; + } + + TIndexOffU min_len, max_len; + + size_t pos = tok.find('-'); + if(pos == string::npos) { + // min + min_len = parseNumber(tok.c_str(), 1, "min-repeat-length must be at least 1"); + max_len = numeric_limits::max(); + } else if(pos == tok.length() - 1) { + // min- + min_len = parseNumber(tok.substr(0, pos).c_str(), 1, "min-repeat-length must be at least 1"); + max_len = numeric_limits::max(); + } else if(pos == 0) { + // -max + // not support? + min_len = 100; + max_len = parseNumber(tok.substr(pos + 1).c_str(), 1, "max-repeat-length must be at least 1"); + } else { + min_len = parseNumber(tok.substr(0, pos).c_str(), 1, "min-repeat-length must be at least 1"); + max_len = parseNumber(tok.substr(pos + 1).c_str(), 1, "max-repeat-length must be at least 1"); + } + + if(min_len > max_len) { + printUsage(cerr); + throw 1; + } + if(max_len > numeric_limits::max()) { + printUsage(cerr); + throw 1; + } + + repeat_pair.push_back(pair(min_len, max_len)); + } +} + +/** + * Read command-line arguments + */ +static void parseOptions(int argc, const char **argv) { + int option_index = 0; + int next_option; + bool saw_max_repeat_matchlen = false; + do { + next_option = getopt_long( + argc, const_cast(argv), + short_options, long_options, &option_index); + switch (next_option) { + case ARG_WRAPPER: + wrapper = optarg; + break; + case 'f': format = FASTA; break; + case 'c': format = CMDLINE; break; + //case 'p': packed = true; break; + case 'C': + cerr << "Error: -C specified but Bowtie 2 does not support colorspace input." << endl; + throw 1; + break; + case 'l': + lineRate = parseNumber(3, "-l/--lineRate arg must be at least 3"); + lineRate_provided = true; + break; + case 'i': + linesPerSide = parseNumber(1, "-i/--linesPerSide arg must be at least 1"); + break; + case 'o': + offRate = parseNumber(0, "-o/--offRate arg must be at least 0"); + break; + case 'n': + // all f-s is used to mean "not set", so put 'e' on end + bmax = 0xfffffffe; + break; + case 'h': + case ARG_USAGE: + printUsage(cout); + throw 0; + break; + case ARG_BMAX: + bmax = parseNumber(1, "--bmax arg must be at least 1"); + bmaxMultSqrt = OFF_MASK; // don't use multSqrt + bmaxDivN = 0xffffffff; // don't use multSqrt + break; + case ARG_BMAX_MULT: + bmaxMultSqrt = parseNumber(1, "--bmaxmultsqrt arg must be at least 1"); + bmax = OFF_MASK; // don't use bmax + bmaxDivN = 0xffffffff; // don't use multSqrt + break; + case ARG_BMAX_DIV: + bmaxDivN = parseNumber(1, "--bmaxdivn arg must be at least 1"); + bmax = OFF_MASK; // don't use bmax + bmaxMultSqrt = OFF_MASK; // don't use multSqrt + break; + case ARG_DCV: + dcv = parseNumber(3, "--dcv arg must be at least 3"); + break; + case ARG_SEED: + seed = parseNumber(0, "--seed arg must be at least 0"); + break; + case ARG_SEED_LENGTH: + seed_length = parseNumber(1, "--seed-length arg must be at least 1"); + break; + case ARG_SEED_COUNT: + seed_count = parseNumber(2, "--repeat-count arg must be at least 2"); + break; + case ARG_MIN_REPEAT_LENGTH: + min_repeat_length = parseNumber(1, "--min-repeat-length arg must be at least 1"); + break; + case ARG_MAX_REPEAT_LENGTH: + max_repeat_length = parseNumber(1, "--max-repeat-length arg must be at least 1"); + break; + case ARG_REPEAT_LENGTH: + parsePair(repeat_length_pair); + break; + case ARG_REPEAT_COUNT: + repeat_count = parseNumber(2, "--repeat-count arg must be at least 2"); + break; + case ARG_REPEAT_EDIT: + max_repeat_edit = parseNumber(0, "--repeat-edit arg must be at least 0"); + break; + case ARG_REPEAT_MATCHLEN: + max_repeat_matchlen = parseNumber(0, "--repeat-matchlen arg must be at least 0"); + saw_max_repeat_matchlen = true; + break; + case ARG_REPEAT_INDEL: + repeat_indel = true; + break; + case ARG_ASYMMETRIC_EXTEND: + symmetric_extend = false; + break; + case ARG_FORWARD_ONLY: + forward_only = true; + break; + case ARG_CGTOTG: + CGtoTG = true; + break; + case ARG_REPEAT_STR1: + repeat_str1 = optarg; + break; + case ARG_REPEAT_STR2: + repeat_str2 = optarg; + break; + case ARG_MAX_SEED_MM: + max_seed_mm = parseNumber(1, "--max_seed_mm arg must be at least 1"); + break; + case ARG_MAX_SEED_REPEAT: + max_seed_repeat = parseNumber(5, "--max_seed_repeat arg must be at least 5"); + break; + case ARG_MAX_SEED_EXTLEN: + max_seed_extlen = parseNumber(0, "--max_seed_extlen arg must be at least 0"); + break; + case ARG_SAVE_SA: + save_sa = true; + break; + case ARG_LOAD_SA: + load_sa = true; + break; + case ARG_3N: { + threeN = true; + break; + } + case ARG_BASE_CHANGE: { + EList args; + tokenize(optarg, ",", args); + if(args.size() != 2) { + cerr << "Error: expected 2 comma-separated " + << "arguments to --base-change option, got " << args.size() << endl; + throw 1; + } + getConversion(args[0][0], args[1][0], convertedFrom, convertedTo); + + string s = "ACGT"; + if ((s.find(convertedFrom) == std::string::npos) || (s.find(convertedTo) == std::string::npos)) { + cerr << "Please enter the nucleotide in 'ACGT' for --base-change option." << endl; + throw 1; + } + + if (convertedFrom == convertedTo) { + cerr << "Please enter two different base for --base-change option. If you wish to build the repeat database without nucleotide conversion, please do not use --base-change and --3N options." << endl; + throw 1; + } + + base_change_entered = true; + } + case 'a': autoMem = false; break; + case 'q': verbose = false; break; + case 's': sanityCheck = true; break; + case 'p': + nthreads = parseNumber(1, "-p arg must be at least 1"); + break; + + case -1: /* Done with options. */ + break; + case 0: + if (long_options[option_index].flag != 0) + break; + default: + printUsage(cerr); + throw 1; + } + } while(next_option != -1); + + if(bmax < 40) { + cerr << "Warning: specified bmax is very small (" << bmax << "). This can lead to" << endl + << "extremely slow performance and memory exhaustion. Perhaps you meant to specify" << endl + << "a small --bmaxdivn?" << endl; + } + + if(!saw_max_repeat_matchlen) { + max_repeat_matchlen = min_repeat_length / 2; + } +} + +extern void initializeCntLut(); +extern void initializeCntBit(); + + +ConvertMatrix3N baseChange; +/** + * Drive the index construction process and optionally sanity-check the + * result. + */ +template +static void driver( + const string& infile, + EList& infiles, + const string& outfile, + bool packed, + bool forward_only, + bool CGtoTG) +{ + initializeCntLut(); + initializeCntBit(); + + EList is(MISC_CAT); + bool bisulfite = false; + bool nsToAs = false; + RefReadInParams refparams(false, false /* reverse */, nsToAs, bisulfite); + assert_gt(infiles.size(), 0); + if(format == CMDLINE) { + // Adapt sequence strings to stringstreams open for input + stringstream *ss = new stringstream(); + for(size_t i = 0; i < infiles.size(); i++) { + (*ss) << ">" << i << endl << infiles[i].c_str() << endl; + } + FileBuf *fb = new FileBuf(ss); + assert(fb != NULL); + assert(!fb->eof()); + assert(fb->get() == '>'); + ASSERT_ONLY(fb->reset()); + assert(!fb->eof()); + is.push_back(fb); + } else { + // Adapt sequence files to ifstreams + for(size_t i = 0; i < infiles.size(); i++) { + FILE *f = fopen(infiles[i].c_str(), "r"); + if (f == NULL) { + cerr << "Error: could not open "<< infiles[i].c_str() << endl; + throw 1; + } + FileBuf *fb = new FileBuf(f); + assert(fb != NULL); + if(fb->peek() == -1 || fb->eof()) { + cerr << "Warning: Empty fasta file: '" << infile.c_str() << "'" << endl; + continue; + } + assert(!fb->eof()); + assert(fb->get() == '>'); + ASSERT_ONLY(fb->reset()); + assert(!fb->eof()); + is.push_back(fb); + } + } + if(is.empty()) { + cerr << "Warning: All fasta inputs were empty" << endl; + throw 1; + } + + // Vector for the ordered list of "records" comprising the input + // sequences. A record represents a stretch of unambiguous + // characters in one of the input sequences. + EList szs; + EList ref_names; + std::pair sztot; + { + if(verbose) cerr << "Reading reference sizes" << endl; + Timer _t(cerr, " Time reading reference sizes: ", verbose); + sztot = BitPairReference::szsFromFasta(is, "", bigEndian, refparams, szs, sanityCheck, &ref_names); + } + + assert_gt(sztot.first, 0); + assert_gt(sztot.second, 0); + assert_gt(szs.size(), 0); + + // Compose text strings into single string + cerr << "Calculating joined length" << endl; + TIndexOffU jlen = 0; + for(unsigned int i = 0; i < szs.size(); i++) { + jlen += (TIndexOffU)szs[i].len; + } + // assert_geq(jlen, sztot); + cerr << " Joined length: " << jlen << endl; + TStr s; + { + bool both_strand = forward_only ? false : true; + cerr << "Reserving space for joined string" << endl; + cerr << "Joining reference sequences" << endl; + Timer timer(cerr, " Time to join reference sequences: ", verbose); + GFM::join( + is, + szs, + (TIndexOffU) sztot.first, + refparams, + seed, + s, + both_strand, // include reverse complemented sequence + CGtoTG); //Change CG to TG + } + + TStr sOriginal; + if (threeN) { + baseChange.restoreNormal(); + bool both_strand = forward_only ? false : true; + for (int i = 0; i < is.size(); i++) { + is[i]->reset(); + } + GFM::join( + is, + szs, + (TIndexOffU) sztot.first, + refparams, + seed, + sOriginal, + both_strand, // include reverse complemented sequence + CGtoTG); //Change CG to TG + baseChange.restoreConversion(); + + long long int guessLen = s.length() / 2; + for (TIndexOffU i = 0; i < guessLen; i++) { + int nt = sOriginal[guessLen - i - 1]; + assert_range(0, 3, nt); + s[guessLen + i] = dnacomp[nt]; + } + } else { + sOriginal = s; + } + + // Successfully obtained joined reference string +#ifndef NDEBUG + if(forward_only) { + assert_geq(s.length(), jlen); + } else { + assert_geq(s.length(), jlen << 1); + } +#endif + + + BitPackedArray suffix_array; + + bool sa_file_exist = false; + string sa_fname = outfile + ".rep.sa"; + if(load_sa) { + ifstream fp(sa_fname, std::ifstream::binary); + sa_file_exist = fp.is_open(); + } + + if(load_sa && sa_file_exist) { + cerr << "Load SA from " << sa_fname << endl; + suffix_array.readFile(sa_fname); + } else { + suffix_array.init(s.length() + 1); + + if(bmax != (TIndexOffU) OFF_MASK) { + // VMSG_NL("bmax according to bmax setting: " << bmax); + } else if(bmaxDivN != (uint32_t) OFF_MASK) { + bmax = max(jlen / (bmaxDivN * nthreads), 1); + // VMSG_NL("bmax according to bmaxDivN setting: " << bmax); + } else { + bmax = (uint32_t) sqrt(s.length()); + // VMSG_NL("bmax defaulted to: " << bmax); + } + int iter = 0; + bool first = true; + bool passMemExc = false, sanity = false; + // Look for bmax/dcv parameters that work. + while(true) { + if(!first && bmax < 40 && passMemExc) { + cerr << "Could not find appropriate bmax/dcv settings for building this index." << endl; + cerr << "Please try indexing this reference on a computer with more memory." << endl; + if (sizeof(void *) == 4) { + cerr << "If this computer has more than 4 GB of memory, try using a 64-bit executable;" << endl + << "this executable is 32-bit." << endl; + } + throw 1; + } + if(dcv > 4096) dcv = 4096; + if((iter % 6) == 5 && dcv < 4096 && dcv != 0) { + dcv <<= 1; // double difference-cover period + } else { + bmax -= (bmax >> 2); // reduce by 25% + } + iter++; + + suffix_array.reset(); + + try { + cerr << "Using parameters --bmax " << bmax << endl; + if(dcv == 0) { + cerr << " and *no difference cover*" << endl; + } else { + cerr << " --dcv " << dcv << endl; + } + { + cerr << " Doing ahead-of-time memory usage test" << endl; + // Make a quick-and-dirty attempt to force a bad_alloc iff + // we would have thrown one eventually as part of + // constructing the DifferenceCoverSample + dcv <<= 1; + TIndexOffU sz = (TIndexOffU) DifferenceCoverSample::simulateAllocs(s, dcv >> 1); + if(nthreads > 1) sz *= (nthreads + 1); + AutoArray tmp(sz, EBWT_CAT); + dcv >>= 1; + // Likewise with the KarkkainenBlockwiseSA + sz = (TIndexOffU) KarkkainenBlockwiseSA::simulateAllocs(s, bmax); + AutoArray tmp2(sz, EBWT_CAT); + // Grab another 20 MB out of caution + AutoArray extra(20 * 1024 * 1024, EBWT_CAT); + // If we made it here without throwing bad_alloc, then we + // passed the memory-usage stress test + cerr << " Passed! Constructing with these parameters: --bmax " << bmax << " --dcv " << dcv + << endl; + cerr << "" << endl; + } + cerr << "Constructing suffix-array element generator" << endl; + + KarkkainenBlockwiseSA bsa(s, + bmax, + nthreads, + dcv, + seed, + sanity, + passMemExc, + false /* verbose */, + outfile); + + assert(bsa.suffixItrIsReset()); + assert_eq(bsa.size(), s.length() + 1); + + TIndexOffU count = 0; + + while(count < s.length() + 1) { + TIndexOffU saElt = bsa.nextSuffix(); + count++; + + if(count && (count % 10000000 == 0)) { + cerr << "SA count " << count << endl; + } + + if(saElt == s.length()) { + assert_eq(count, s.length() + 1); + break; + } + + suffix_array.pushBack(saElt); + } + + break; + + } catch (bad_alloc &e) { + if(passMemExc) { + cerr << " Ran out of memory; automatically trying more memory-economical parameters." << endl; + } else { + cerr << "Out of memory while constructing suffix array. Please try using a smaller" << endl + << "number of blocks by specifying a smaller --bmax or a larger --bmaxdivn" << endl; + throw 1; + } + } + first = false; + } + + if(save_sa) { + suffix_array.writeFile(sa_fname); + } + } + + cerr << "suffix_array: " << endl; + suffix_array.dump(); + + + // Build Repeats + { + if(repeat_length_pair.empty()) { + repeat_length_pair.push_back(pair(min_repeat_length, max_repeat_length)); + } + + RepeatParameter rp; + rp.seed_len = seed_length; + rp.seed_count = seed_count; + rp.seed_mm = max_seed_mm; + rp.max_edit = max_repeat_edit; + rp.symmetric_extend = symmetric_extend; + rp.extend_unit_len = max_seed_extlen; + for(size_t i = 0; i < repeat_length_pair.size(); i++) { + rp.min_repeat_len = repeat_length_pair[i].first; + rp.max_repeat_len = repeat_length_pair[i].second; + rp.repeat_count = repeat_count; + rp.append_result = (i != 0); + + RepeatBuilder repeatBuilder(s, + sOriginal, + szs, + ref_names, + forward_only, + outfile); + cerr << "RepeatBuilder: " << outfile << " " << rp.min_repeat_len << "-" << rp.max_repeat_len << endl; + + { + Timer _t(cerr, " Time reading suffix array: ", verbose); + repeatBuilder.readSA(rp, suffix_array); + } + + repeatBuilder.build(rp); + repeatBuilder.saveFile(rp); + } + } +} + +static const char *argv0 = NULL; + +extern "C" { +/** + * main function. Parses command-line arguments. + */ +int hisat2_repeat(int argc, const char **argv) { + string outfile; + try { + // Reset all global state, including getopt state + opterr = optind = 1; + resetOptions(); + + string infile; + EList infiles(MISC_CAT); + + parseOptions(argc, argv); + argv0 = argv[0]; + if(showVersion) { + cout << argv0 << " version " << string(HISAT2_VERSION).c_str() << endl; + if(sizeof(void*) == 4) { + cout << "32-bit" << endl; + } else if(sizeof(void*) == 8) { + cout << "64-bit" << endl; + } else { + cout << "Neither 32- nor 64-bit: sizeof(void*) = " << sizeof(void*) << endl; + } + cout << "Built on " << BUILD_HOST << endl; + cout << BUILD_TIME << endl; + cout << "Compiler: " << COMPILER_VERSION << endl; + cout << "Options: " << COMPILER_OPTIONS << endl; + cout << "Sizeof {int, long, long long, void*, size_t, off_t}: {" + << sizeof(int) + << ", " << sizeof(long) << ", " << sizeof(long long) + << ", " << sizeof(void *) << ", " << sizeof(size_t) + << ", " << sizeof(off_t) << "}" << endl; + return 0; + } + + if (!threeN && base_change_entered) { + cerr << "To build hisat-3n repeat database, please add argument --3N. To build the hisat2 repeat database, please remove the argument --base-change." << endl; + printUsage(cerr); + throw 1; + } + if (threeN) { + convertedFromComplement = asc2dnacomp[convertedFrom]; + convertedToComplement = asc2dnacomp[convertedTo]; + } + // Get input filename + if(optind >= argc) { + cerr << "No input sequence or sequence file specified!" << endl; + printUsage(cerr); + return 1; + } + infile = argv[optind++]; + + if(optind >= argc) { + cerr << "No output file specified!" << endl; + printUsage(cerr); + return 1; + } + outfile = argv[optind++]; + + tokenize(infile, ",", infiles); + if(infiles.size() < 1) { + cerr << "Tokenized input file list was empty!" << endl; + printUsage(cerr); + return 1; + } + + // Optionally summarize + if(verbose) { + cerr << "Settings:" << endl; + // << " Output files: \"" << outfile.c_str() << ".*." << gfm_ext << "\"" << endl; + cerr << " Endianness: " << (bigEndian? "big":"little") << endl + << " Actual local endianness: " << (currentlyBigEndian()? "big":"little") << endl + << " Sanity checking: " << (sanityCheck? "enabled":"disabled") << endl; + #ifdef NDEBUG + cerr << " Assertions: disabled" << endl; + #else + cerr << " Assertions: enabled" << endl; + #endif + cerr << " Random seed: " << seed << endl; + cerr << " Sizeofs: void*:" << sizeof(void*) << ", int:" << sizeof(int) << ", long:" << sizeof(long) << ", size_t:" << sizeof(size_t) << endl; + cerr << "Input files DNA, " << file_format_names[format].c_str() << ":" << endl; + for(size_t i = 0; i < infiles.size(); i++) { + cerr << " " << infiles[i].c_str() << endl; + } + } + // Seed random number generator + srand(seed); + { + Timer timer(cerr, "Total time for call to driver() for forward index: ", verbose); + try { + int nloop = threeN ? 2 : 1; // if threeN == true, nloop = 2. else one loop + for (int i = 0; i < nloop; i++) { + string tag = ""; + if (threeN) { + tag += ".3n."; + if (i == 0) { + tag += convertedFrom; + tag += convertedTo; + baseChange.convert(convertedFrom, convertedTo); + } else { + tag += convertedFromComplement; + tag += convertedToComplement; + baseChange.convert(convertedFromComplement, convertedToComplement); + } + + string indexFilename = outfile + tag + ".rep.fa"; + if (fileExist(indexFilename)) { + cerr << "*** Find repeat database for " << outfile + tag << ",skip this repeat database building process." << endl; + cerr << " To re-build your hisat-3n repeat database, please delete the old index manually before running hisat2-repeat or hisat-3n-build." << endl; + continue; + } + } + driver >(infile, infiles, outfile + tag, false, forward_only, CGtoTG); + } + + } catch(bad_alloc& e) { + if(autoMem) { + cerr << "Switching to a packed string representation." << endl; + } else { + throw e; + } + } + } + return 0; + } catch(std::exception& e) { + cerr << "Error: Encountered exception: '" << e.what() << "'" << endl; + cerr << "Command: "; + for(int i = 0; i < argc; i++) cerr << argv[i] << " "; + cerr << endl; + return 1; + } catch(int e) { + if(e != 0) { + cerr << "Error: Encountered internal HISAT2 exception (#" << e << ")" << endl; + cerr << "Command: "; + for(int i = 0; i < argc; i++) cerr << argv[i] << " "; + cerr << endl; + } + return e; + } +} +} diff --git a/hisat2_repeat_main.cpp b/hisat2_repeat_main.cpp new file mode 100644 index 0000000..a5f3cc0 --- /dev/null +++ b/hisat2_repeat_main.cpp @@ -0,0 +1,70 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include +#include +#include +#include +#include "tokenize.h" +#include "ds.h" +#include "mem_ids.h" + +using namespace std; + +extern "C" { + int hisat2_repeat(int argc, const char **argv); +} + +/** + * bowtie-build main function. It is placed in a separate source file + * to make it slightly easier to compile as a library. + * + * If the user specifies -A as the first two arguments, main + * will interpret that file as having one set of command-line arguments + * per line, and will dispatch each batch of arguments one at a time to + * bowtie-build. + */ +int main(int argc, const char **argv) { + if(argc > 2 && strcmp(argv[1], "-A") == 0) { + const char *file = argv[2]; + ifstream in; + in.open(file); + char buf[4096]; + int lastret = -1; + while(in.getline(buf, 4095)) { + EList args(MISC_CAT); + args.push_back(string(argv[0])); + tokenize(buf, " \t", args); + const char **myargs = (const char**)malloc(sizeof(char*)*args.size()); + for(size_t i = 0; i < args.size(); i++) { + myargs[i] = args[i].c_str(); + } + if(args.size() == 1) continue; + lastret = hisat2_repeat((int)args.size(), myargs); + free(myargs); + } + if(lastret == -1) { + cerr << "Warning: No arg strings parsed from " << file << endl; + return 0; + } + return lastret; + } else { + return hisat2_repeat(argc, argv); + } +} diff --git a/hisat2_simulate_reads.py b/hisat2_simulate_reads.py new file mode 100644 index 0000000..c503522 --- /dev/null +++ b/hisat2_simulate_reads.py @@ -0,0 +1,971 @@ +#!/usr/bin/env python3 +# +# Copyright 2015, Daehwan Kim +# +# This file is part of HISAT 2. +# +# HISAT 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# HISAT 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HISAT 2. If not, see . +# + +import os, sys, math, random, re +from collections import defaultdict, Counter +from argparse import ArgumentParser, FileType + + +""" +""" +def reverse_complement(seq): + result = "" + for nt in seq: + base = nt + if nt == 'A': + base = 'T' + elif nt == 'a': + base = 't' + elif nt == 'C': + base = 'G' + elif nt == 'c': + base = 'g' + elif nt == 'G': + base = 'C' + elif nt == 'g': + base = 'c' + elif nt == 'T': + base = 'A' + elif nt == 't': + base = 'a' + + result = base + result + + return result + + +""" +python2 style randint +""" +def myrandint(m, x): + s = x - m + 1 + return m + int(random.random() * s) + +""" +Random source for sequencing errors +""" +class ErrRandomSource: + def __init__(self, prob = 0.0, size = 1 << 20): + self.size = size + self.rands = [] + for i in range(self.size): + if random.random() < prob: + self.rands.append(1) + else: + self.rands.append(0) + self.cur = 0 + + def getRand(self): + assert self.cur < len(self.rands) + rand = self.rands[self.cur] + self.cur = (self.cur + 1) % len(self.rands) + return rand + + +""" +""" +def read_genome(genome_file): + chr_dic = {} + + chr_name, sequence = "", "" + for line in genome_file: + if line[0] == ">": + if chr_name and sequence: + chr_dic[chr_name] = sequence + chr_name = line.strip().split()[0][1:] + sequence = "" + else: + sequence += line[:-1] + + if chr_name and sequence: + chr_dic[chr_name] = sequence + + + chr_filter = [str(x) for x in list(range(1, 23)) + ['X', 'Y']] + #chr_filter = None + + if chr_filter: + for chr_id, chr_seq in chr_dic.items(): + if not chr_id in chr_filter: + chr_dic.pop(chr_id, None) + + return chr_dic + + +""" +""" +def read_transcript(genome_seq, gtf_file, frag_len): + genes = defaultdict(list) + transcripts = {} + + # Parse valid exon lines from the GTF file into a dict by transcript_id + for line in gtf_file: + line = line.strip() + if not line or line.startswith('#'): + continue + if '#' in line: + line = line.split('#')[0].strip() + try: + chrom, source, feature, left, right, score, \ + strand, frame, values = line.split('\t') + except ValueError: + continue + if chrom not in genome_seq: + continue + + # Zero-based offset + left, right = int(left) - 1, int(right) - 1 + if feature != 'exon' or left >= right: + continue + + values_dict = {} + for attr in values.split(';')[:-1]: + attr, _, val = attr.strip().partition(' ') + values_dict[attr] = val.strip('"') + + if 'gene_id' not in values_dict or \ + 'transcript_id' not in values_dict: + continue + + transcript_id = values_dict['transcript_id'] + if transcript_id not in transcripts: + transcripts[transcript_id] = [chrom, strand, [[left, right]]] + genes[values_dict['gene_id']].append(transcript_id) + else: + transcripts[transcript_id][2].append([left, right]) + + # Sort exons and merge where separating introns are <=5 bps + for tran, [chr, strand, exons] in transcripts.items(): + exons.sort() + tmp_exons = [exons[0]] + for i in range(1, len(exons)): + if exons[i][0] - tmp_exons[-1][1] <= 5: + tmp_exons[-1][1] = exons[i][1] + else: + tmp_exons.append(exons[i]) + transcripts[tran] = [chr, strand, tmp_exons] + + tmp_transcripts = {} + for tran, [chr, strand, exons] in transcripts.items(): + exon_lens = [e[1] - e[0] + 1 for e in exons] + transcript_len = sum(exon_lens) + if transcript_len >= frag_len: + tmp_transcripts[tran] = [chr, strand, transcript_len, exons] + + transcripts = tmp_transcripts + + return genes, transcripts + + +""" +""" +def read_snp(snp_file): + snps = defaultdict(list) + for line in snp_file: + line = line.strip() + if not line or line.startswith('#'): + continue + try: + snpID, type, chr, pos, data = line.split('\t') + except ValueError: + continue + + assert type in ["single", "deletion", "insertion"] + if type == "deletion": + data = int(data) + snps[chr].append([snpID, type, int(pos), data]) + + return snps + + +""" +""" +def sanity_check_input(genome_seq, genes, transcripts, snps, frag_len): + num_canon_ss, num_ss = 0, 0 + for transcript, [chr, strand, transcript_len, exons] in transcripts.items(): + assert transcript_len >= frag_len + if len(exons) <= 1: + continue + if chr not in genome_seq: + continue + chr_seq = genome_seq[chr] + for i in range(len(exons) - 1): + left1, right1 = exons[i] + assert left1 < right1 + left2, right2 = exons[i+1] + assert left2 < right2 + assert left1 < left2 and right1 < right2 + donor = chr_seq[right1+1:right1+3] + acceptor = chr_seq[left2-2:left2] + if strand == "-": + donor, acceptor = reverse_complement(acceptor), reverse_complement(donor) + if donor == "GT" and acceptor == "AG": + num_canon_ss += 1 + num_ss += 1 + + if num_ss > 0: + print("GT/AG splice sites: {}/{} ({:.2%})".format(num_canon_ss, num_ss, (float(num_canon_ss) / num_ss)), file=sys.stderr) + + num_alt_single, num_single = 0, 0 + for chr, chr_snps in snps.items(): + if chr not in genome_seq: + continue + chr_seq = genome_seq[chr] + prev_snp = None + for snp in chr_snps: + snpID, type, pos, data = snp + if prev_snp: + assert prev_snp[2] <= pos + prev_snp = snp + if type != "single": + continue + assert pos < len(chr_seq) + if chr_seq[pos] != data: + num_alt_single += 1 + num_single += 1 + + if num_single > 0: + print("Alternative bases: {}/{} ({:.2%})".format(num_alt_single, num_single, (float(num_alt_single) / num_single)), file=sys.stderr) + + +""" +""" +def generate_rna_expr_profile(expr_profile_type, num_transcripts = 10000): + # Modelling and simulating generic RNA-Seq experiments with the flux simulator + # http://nar.oxfordjournals.org/content/suppl/2012/06/29/gks666.DC1/nar-02667-n-2011-File002.pdf + def calc_expr(x, a): + x, a, b = float(x), 9500.0, 9500.0 + k = -0.6 + return (x**k) * math.exp(x/a * (x/b)**2) + + expr_profile = [0.0] * num_transcripts + for i in range(len(expr_profile)): + if expr_profile_type == "flux": + expr_profile[i] = calc_expr(i + 1, num_transcripts) + elif expr_profile_type == "constant": + expr_profile[i] = 1.0 + else: + assert False + + expr_sum = sum(expr_profile) + expr_profile = [expr_profile[i] / expr_sum for i in range(len(expr_profile))] + assert abs(sum(expr_profile) - 1.0) < 0.001 + return expr_profile + + +""" +""" +def generate_dna_expr_profile(genome_seq): + expr_profile = [] + for chr_id, chr_seq in genome_seq.items(): + expr_profile.append(len(chr_seq)) + expr_sum = float(sum(expr_profile)) + expr_profile = [expr_profile[i] / expr_sum for i in range(len(expr_profile))] + assert abs(sum(expr_profile) - 1.0) < 0.001 + return expr_profile + + +""" +""" +def getSNPs(chr_snps, left, right): + low, high = 0, len(chr_snps) + while low < high: + mid = (low + high) // 2 + snpID, type, pos, data = chr_snps[mid] + if pos < left: + low = mid + 1 + else: + high = mid - 1 + + snps = [] + for snp in chr_snps[low:]: + snpID, type, pos, data = snp + pos2 = pos + if type == "deletion": + pos2 += data + if pos2 >= right: + break + if pos >= left: + if len(snps) > 0: + _, prev_type, prev_pos, prev_data = snps[-1] + assert prev_pos <= pos + prev_pos2 = prev_pos + if prev_type == "deletion": + prev_pos2 += prev_data + if pos <= prev_pos2: + continue + snps.append(snp) + + return snps + + +""" +""" +def getSamAlignment(rna, exons, chr_seq, trans_seq, frag_pos, read_len, chr_snps, snp_prob, err_rand_src, max_mismatch): + # Find the genomic position for frag_pos and exon number + tmp_frag_pos, tmp_read_len = frag_pos, read_len + pos, cigars, cigar_descs = exons[0][0], [], [] + e_pos = 0 + prev_e = None + for e_i in range(len(exons)): + e = exons[e_i] + if prev_e: + i_len = e[0] - prev_e[1] - 1 + pos += i_len + e_len = e[1] - e[0] + 1 + if e_len <= tmp_frag_pos: + tmp_frag_pos -= e_len + pos += e_len + else: + pos += tmp_frag_pos + e_pos = tmp_frag_pos + break + prev_e = e + + # Define Cigar and its descriptions + assert e_i < len(exons) + e_len = exons[e_i][1] - exons[e_i][0] + 1 + assert e_pos < e_len + cur_pos = pos + match_len = 0 + prev_e = None + mismatch, remain_trans_len = 0, len(trans_seq) - (frag_pos + read_len) + assert remain_trans_len >= 0 + for e_i in range(e_i, len(exons)): + e = exons[e_i] + if prev_e: + i_len = e[0] - prev_e[1] - 1 + cur_pos += i_len + cigars.append(("{}N".format(i_len))) + cigar_descs.append([]) + tmp_e_left = e_left = e[0] + e_pos + e_pos = 0 + + # Retreive SNPs + if rna: + snps = getSNPs(chr_snps, e_left, e[1]) + else: + snps = getSNPs(chr_snps, frag_pos, frag_pos + read_len) + + if snp_prob < 1.0 and len(snps) > 0: + snps_ = [] + for snp in snps: + if random.random() <= snp_prob: + snps_.append(snp) + snps = snps_ + + # Simulate mismatches due to sequencing errors + mms = [] + for i in range(e_left, min(e[1], e_left + tmp_read_len - 1)): + if err_rand_src.getRand() == 1: + assert i < len(chr_seq) + err_base = "A" + #rand = random.randint(0, 2) + rand = myrandint(0, 2) + if chr_seq[i] == "A": + err_base = "GCT"[rand] + elif chr_seq[i] == "C": + err_base = "AGT"[rand] + elif chr_seq[i] == "G": + err_base = "ACT"[rand] + else: + err_base = "ACG"[rand] + mms.append(["", "single", i, err_base]) + + tmp_diffs = snps + mms +# def diff_sort(a , b): +# return a[2] - b[2] + + tmp_diffs = sorted(tmp_diffs, key=lambda t: t[2]) + diffs = [] + if len(tmp_diffs) > 0: + diffs = tmp_diffs[:1] + for diff in tmp_diffs[1:]: + _, tmp_type, tmp_pos, tmp_data = diff + _, prev_type, prev_pos, prev_data = diffs[-1] + if prev_type == "deletion": + prev_pos += prev_data + if tmp_pos <= prev_pos: + continue + diffs.append(diff) + + cigar_descs.append([]) + prev_diff = None + for diff in diffs: + diff_id, diff_type, diff_pos, diff_data = diff + if prev_diff: + prev_diff_id, prev_diff_type, prev_diff_pos, prev_diff_data = prev_diff + if prev_diff_type == "deletion": + prev_diff_pos += prev_diff_data + assert prev_diff_pos < diff_pos + diff_pos2 = diff_pos + if diff_type == "deletion": + diff_pos2 += diff_data + if e_left + tmp_read_len - 1 < diff_pos2 or e[1] < diff_pos2: + break + + if diff_type == "single": + if mismatch + 1 > max_mismatch: + continue + cigar_descs[-1].append([diff_pos - tmp_e_left, diff_data, diff_id]) + tmp_e_left = diff_pos + 1 + mismatch += 1 + elif diff_type == "deletion": + del_len = diff_data + if mismatch + del_len > max_mismatch: + continue + if len(cigars) <= 0 and diff_pos - e_left <= 0: + continue + if remain_trans_len < del_len: + continue + remain_trans_len -= del_len + if diff_pos - e_left > 0: + cigars.append("{}M".format(diff_pos - e_left)) + cigar_descs[-1].append([diff_pos - tmp_e_left, "", ""]) + cigar_descs.append([]) + cigars.append("{}D".format(del_len)) + cigar_descs[-1].append([0, del_len, diff_id]) + cigar_descs.append([]) + tmp_read_len -= (diff_pos - e_left) + e_left = tmp_e_left = diff_pos + del_len + + elif diff_type == "insertion": + ins_len = len(diff_data) + if mismatch + ins_len > max_mismatch: + continue + if len(cigars) <= 0 and diff_pos - e_left <= 0: + continue + if e_left + tmp_read_len - 1 < diff_pos + ins_len: + break + if diff_pos - e_left > 0: + cigars.append("{}M".format(diff_pos - e_left)) + cigar_descs[-1].append([diff_pos - tmp_e_left, "", ""]) + cigar_descs.append([]) + cigars.append("{}I".format(ins_len)) + cigar_descs[-1].append([0, diff_data, diff_id]) + cigar_descs.append([]) + tmp_read_len -= (diff_pos - e_left) + tmp_read_len -= ins_len + e_left = tmp_e_left = diff_pos + + else: + assert False + prev_diff = diff + + e_right = min(e[1], e_left + tmp_read_len - 1) + e_len = e_right - e_left + 1 + remain_e_len = e_right - tmp_e_left + 1 + if remain_e_len > 0: + cigar_descs[-1].append([remain_e_len, "", ""]) + if e_len < tmp_read_len: + tmp_read_len -= e_len + cigars.append(("{}M".format(e_len))) + else: + assert e_len == tmp_read_len + cigars.append(("{}M".format(tmp_read_len))) + tmp_read_len = 0 + break + prev_e = e + + # Define MD, XM, NM, Zs, read_seq + MD, XM, NM, Zs, read_seq = "", 0, 0, "", "" + assert len(cigars) == len(cigar_descs) + MD_match_len, Zs_match_len = 0, 0 + cur_trans_pos = frag_pos + for c in range(len(cigars)): + cigar = cigars[c] + cigar_len, cigar_op = int(cigar[:-1]), cigar[-1] + cigar_desc = cigar_descs[c] + if cigar_op == 'N': + continue + if cigar_op == 'M': + for add_match_len, alt_base, snp_id in cigar_desc: + MD_match_len += add_match_len + Zs_match_len += add_match_len + assert cur_trans_pos + add_match_len <= len(trans_seq) + read_seq += trans_seq[cur_trans_pos:cur_trans_pos+add_match_len] + cur_trans_pos += add_match_len + if alt_base != "": + if MD_match_len > 0: + MD += ("{}".format(MD_match_len)) + MD_match_len = 0 + MD += trans_seq[cur_trans_pos] + if snp_id != "": + if Zs != "": + Zs += "," + Zs += ("{}|S|{}".format(Zs_match_len, snp_id)) + Zs_match_len = 0 + else: + Zs_match_len += 1 + if snp_id == "": + XM += 1 + NM += 1 + read_seq += alt_base + cur_trans_pos += 1 + elif cigar_op == 'D': + assert len(cigar_desc) == 1 + add_match_len, del_len, snp_id = cigar_desc[0] + MD_match_len += add_match_len + Zs_match_len += add_match_len + if MD_match_len > 0: + MD += ("{}".format(MD_match_len)) + MD_match_len = 0 + MD += ("^{}".format(trans_seq[cur_trans_pos:cur_trans_pos+cigar_len])) + read_seq += trans_seq[cur_trans_pos:cur_trans_pos+add_match_len] + if Zs != "": + Zs += "," + Zs += ("{}|D|{}".format(Zs_match_len, cigar_desc[0][-1])) + Zs_match_len = 0 + cur_trans_pos += cigar_len + elif cigar_op == 'I': + assert len(cigar_desc) == 1 + add_match_len, ins_seq, snp_id = cigar_desc[0] + ins_len = len(ins_seq) + MD_match_len += add_match_len + Zs_match_len += add_match_len + read_seq += trans_seq[cur_trans_pos:cur_trans_pos+add_match_len] + read_seq += ins_seq + if Zs != "": + Zs += "," + Zs += ("{}|I|{}".format(Zs_match_len, cigar_desc[0][-1])) + Zs_match_len = 0 + else: + assert False + + if MD_match_len > 0: + MD += ("{}".format(MD_match_len)) + + if len(read_seq) != read_len: + print("read length differs:", len(read_seq), "vs.", read_len, file=sys.stderr) + print(pos, "".join(cigars), cigar_descs, MD, XM, NM, Zs, file=sys.stderr) + assert False + + return pos, cigars, cigar_descs, MD, XM, NM, Zs, read_seq + + +""" +""" +cigar_re = re.compile('\d+\w') +def samRepOk(genome_seq, read_seq, chr, pos, cigar, XM, NM, MD, Zs, max_mismatch): + assert chr in genome_seq + chr_seq = genome_seq[chr] + assert pos < len(chr_seq) + + # Calculate XM and NM based on Cigar and Zs + cigars = cigar_re.findall(cigar) + cigars = [[int(cigars[i][:-1]), cigars[i][-1]] for i in range(len(cigars))] + ref_pos, read_pos = pos, 0 + ann_ref_seq, ann_ref_rel, ann_read_seq, ann_read_rel = [], [], [], [] + for i in range(len(cigars)): + cigar_len, cigar_op = cigars[i] + if cigar_op == "M": + partial_ref_seq = chr_seq[ref_pos:ref_pos+cigar_len] + partial_read_seq = read_seq[read_pos:read_pos+cigar_len] + assert len(partial_ref_seq) == len(partial_read_seq) + ann_ref_seq += list(partial_ref_seq) + ann_read_seq += list(partial_read_seq) + for j in range(len(partial_ref_seq)): + if partial_ref_seq[j] == partial_read_seq[j]: + ann_ref_rel.append("=") + ann_read_rel.append("=") + else: + ann_ref_rel.append("X") + ann_read_rel.append("X") + ref_pos += cigar_len + read_pos += cigar_len + elif cigar_op == "D": + partial_ref_seq = chr_seq[ref_pos:ref_pos+cigar_len] + ann_ref_rel += list(partial_ref_seq) + ann_ref_seq += list(partial_ref_seq) + ann_read_rel += (["-"] * cigar_len) + ann_read_seq += (["-"] * cigar_len) + ref_pos += cigar_len + elif cigar_op == "I": + partial_read_seq = read_seq[read_pos:read_pos+cigar_len] + ann_ref_rel += (["-"] * cigar_len) + ann_ref_seq += (["-"] * cigar_len) + ann_read_rel += list(partial_read_seq) + ann_read_seq += list(partial_read_seq) + read_pos += cigar_len + elif cigar_op == "N": + ref_pos += cigar_len + else: + assert False + + assert len(ann_ref_seq) == len(ann_read_seq) + assert len(ann_ref_seq) == len(ann_ref_rel) + assert len(ann_ref_seq) == len(ann_read_rel) + ann_Zs_seq = ["0" for i in range(len(ann_ref_seq))] + + Zss, Zs_i, snp_pos_add = [], 0, 0 + if Zs != "": + Zss = Zs.split(',') + Zss = [zs.split('|') for zs in Zss] + + ann_read_pos = 0 + for zs in Zss: + zs_pos, zs_type, zs_id = zs + zs_pos = int(zs_pos) + for i in range(zs_pos): + while ann_read_rel[ann_read_pos] == '-': + ann_read_pos += 1 + ann_read_pos += 1 + if zs_type == "S": + ann_Zs_seq[ann_read_pos] = "1" + ann_read_pos += 1 + elif zs_type == "D": + while ann_read_rel[ann_read_pos] == '-': + ann_Zs_seq[ann_read_pos] = "1" + ann_read_pos += 1 + elif zs_type == "I": + while ann_ref_rel[ann_read_pos] == '-': + ann_Zs_seq[ann_read_pos] = "1" + ann_read_pos += 1 + else: + assert False + + tMD, tXM, tNM = "", 0, 0 + match_len = 0 + i = 0 + while i < len(ann_ref_seq): + if ann_ref_rel[i] == "=": + assert ann_read_rel[i] == "=" + match_len += 1 + i += 1 + continue + assert ann_read_rel[i] != "=" + if ann_ref_rel[i] == "X" and ann_read_rel[i] == "X": + if match_len > 0: + tMD += ("{}".format(match_len)) + match_len = 0 + tMD += ann_ref_seq[i] + if ann_Zs_seq[i] == "0": + tXM += 1 + tNM += 1 + i += 1 + else: + assert ann_ref_rel[i] == "-" or ann_read_rel[i] == "-" + if ann_ref_rel[i] == '-': + while ann_ref_rel[i] == '-': + if ann_Zs_seq[i] == "0": + tNM += 1 + i += 1 + else: + assert ann_read_rel[i] == '-' + del_seq = "" + while ann_read_rel[i] == '-': + del_seq += ann_ref_seq[i] + if ann_Zs_seq[i] == "0": + tNM += 1 + i += 1 + if match_len > 0: + tMD += ("{}".format(match_len)) + match_len = 0 + tMD += ("^{}".format(del_seq)) + + if match_len > 0: + tMD += ("{}".format(match_len)) + + if tMD != MD or tXM != XM or tNM != NM or XM > max_mismatch or XM != NM: + print(chr, pos, cigar, MD, XM, NM, Zs, file=sys.stderr) + print(tMD, tXM, tNM, file=sys.stderr) + assert False + + +""" +""" +def simulate_reads(genome_file, gtf_file, snp_file, base_fname, + rna, paired_end, read_len, frag_len, + num_frag, expr_profile_type, repeat_fname, + error_rate, max_mismatch, + random_seed, snp_prob, sanity_check, verbose): + random.seed(random_seed, version=1) + err_rand_src = ErrRandomSource(error_rate / 100.0) + + if read_len > frag_len: + frag_len = read_len + + genome_seq = read_genome(genome_file) + if rna: + genes, transcripts = read_transcript(genome_seq, gtf_file, frag_len) + else: + genes, transcripts = {}, {} + snps = read_snp(snp_file) + + if sanity_check: + sanity_check_input(genome_seq, genes, transcripts, snps, frag_len) + + if rna: + num_transcripts = min(len(transcripts), 10000) + expr_profile = generate_rna_expr_profile(expr_profile_type, num_transcripts) + else: + expr_profile = generate_dna_expr_profile(genome_seq) + + expr_profile = [int(expr_profile[i] * num_frag) for i in range(len(expr_profile))] + assert num_frag >= sum(expr_profile) + while sum(expr_profile) < num_frag: + for i in range(min(num_frag - sum(expr_profile), len(expr_profile))): + expr_profile[i] += 1 + assert num_frag == sum(expr_profile) + + repeat_loci = {} + if repeat_fname != "" and os.path.exists(repeat_fname): + for line in open(repeat_fname): + if line.startswith('>'): + continue + coords = line.strip().split() + for coord in coords: + chr, pos, strand = coord.split(':') + if chr not in repeat_loci: + repeat_loci[chr] = [] + repeat_loci[chr].append([int(pos), strand]) + + if rna: + transcript_ids = sorted(list(transcripts.keys())) + random.shuffle(transcript_ids, random=random.random) + assert len(transcript_ids) >= len(expr_profile) + else: + chr_ids = list(genome_seq.keys()) + + sam_file = open(base_fname + ".sam", "w") + + # Write SAM header + print("@HD\tVN:1.0\tSO:unsorted", file=sam_file) + for chr in genome_seq.keys(): + print("@SQ\tSN:%s\tLN:%d" % (chr, len(genome_seq[chr])), file=sam_file) + + read_file = open(base_fname + "_1.fa", "w") + if paired_end: + read2_file = open(base_fname + "_2.fa", "w") + + cur_read_id = 1 + for t in range(len(expr_profile)): + t_num_frags = expr_profile[t] + if rna: + transcript_id = transcript_ids[t] + chr, strand, transcript_len, exons = transcripts[transcript_id] + print(transcript_id, t_num_frags, file=sys.stderr) + else: + chr = chr_ids[t] + print(chr, t_num_frags, file=sys.stderr) + + assert chr in genome_seq + chr_seq = genome_seq[chr] + chr_len = len(chr_seq) + if chr in repeat_loci: + chr_repeat_loci = repeat_loci[chr] + else: + chr_repeat_loci = [] + + if rna: + t_seq = "" + for e in exons: + assert e[0] < e[1] + t_seq += chr_seq[e[0]:e[1]+1] + assert len(t_seq) == transcript_len + else: + t_seq = chr_seq + exons = [[0, chr_len - 1]] + + if chr in snps: + chr_snps = snps[chr] + else: + chr_snps = [] + + for f in range(t_num_frags): + if rna: + #frag_pos = random.randint(0, transcript_len - frag_len) + frag_pos = myrandint(0, transcript_len - frag_len) + else: + while True: + if len(chr_repeat_loci): + #locus_id = random.randint(0, len(chr_repeat_loci) - 1) + locus_id = myrandint(0, len(chr_repeat_loci) - 1) + frag_pos = chr_repeat_loci[locus_id][0] + else: + #frag_pos = random.randint(0, chr_len - frag_len) + frag_pos = myrandint(0, chr_len - frag_len) + if 'N' not in chr_seq[frag_pos:frag_pos + frag_len]: + break + + # SAM specification (v1.4) + # http://samtools.sourceforge.net/ + flag, flag2 = 99, 163 # 83, 147 + pos, cigars, cigar_descs, MD, XM, NM, Zs, read_seq = getSamAlignment(rna, exons, chr_seq, t_seq, frag_pos, read_len, chr_snps, snp_prob, err_rand_src, max_mismatch) + pos2, cigars2, cigar2_descs, MD2, XM2, NM2, Zs2, read2_seq = getSamAlignment(rna, exons, chr_seq, t_seq, frag_pos+frag_len-read_len, read_len, chr_snps, snp_prob, err_rand_src, max_mismatch) + swapped = False + if paired_end: + #if random.randint(0, 1) == 1: + if myrandint(0, 1) == 1: + swapped = True + if swapped: + flag, flag2 = flag - 16, flag2 - 16 + pos, pos2 = pos2, pos + cigars, cigars2 = cigars2, cigars + cigar_descs, cigar2_descs = cigar2_descs, cigar_descs + read_seq, read2_seq = read2_seq, read_seq + XM, XM2 = XM2, XM + NM, NM2 = NM2, NM + MD, MD2 = MD2, MD + Zs, Zs2 = Zs2, Zs + + cigar_str, cigar2_str = "".join(cigars), "".join(cigars2) + if sanity_check: + samRepOk(genome_seq, read_seq, chr, pos, cigar_str, XM, NM, MD, Zs, max_mismatch) + samRepOk(genome_seq, read2_seq, chr, pos2, cigar2_str, XM2, NM2, MD2, Zs2, max_mismatch) + + if Zs != "": + Zs = ("\tZs:Z:{}".format(Zs)) + if Zs2 != "": + Zs2 = ("\tZs:Z:{}".format(Zs2)) + + if rna: + XS = "\tXS:A:{}".format(strand) + TI = "\tTI:Z:{}".format(transcript_id) + else: + XS, TI = "", "" + + print(">{}".format(cur_read_id), file=read_file) + if swapped: + print(reverse_complement(read_seq), file=read_file) + else: + print(read_seq, file=read_file) + print("{}\t{}\t{}\t{}\t255\t{}\t{}\t{}\t0\t{}\t*\tXM:i:{}\tNM:i:{}\tMD:Z:{}{}{}{}".format(cur_read_id, flag, chr, pos + 1, cigar_str, chr, pos2 + 1, read_seq, XM, NM, MD, Zs, XS, TI), file=sam_file) + if paired_end: + print(">{}".format(cur_read_id), file=read2_file) + if swapped: + print(read2_seq, file=read2_file) + else: + print(reverse_complement(read2_seq), file=read2_file) + print("{}\t{}\t{}\t{}\t255\t{}\t{}\t{}\t0\t{}\t*\tXM:i:{}\tNM:i:{}\tMD:Z:{}{}{}{}".format(cur_read_id, flag2, chr, pos2 + 1, cigar2_str, chr, pos + 1, read2_seq, XM2, NM2, MD2, Zs2, XS, TI), file=sam_file) + + cur_read_id += 1 + + sam_file.close() + read_file.close() + if paired_end: + read2_file.close() + + +if __name__ == '__main__': + parser = ArgumentParser( + description='Simulate reads from GENOME (fasta) and GTF files') + parser.add_argument('genome_file', + nargs='?', + type=FileType('r'), + help='input GENOME file') + parser.add_argument('gtf_file', + nargs='?', + type=FileType('r'), + help='input GTF file') + parser.add_argument('snp_file', + nargs='?', + type=FileType('r'), + help='input SNP file') + parser.add_argument('base_fname', + nargs='?', + type=str, + help='output base filename') + parser.add_argument('-d', '--dna', + dest='rna', + action='store_false', + default=True, + help='DNA-seq reads (default: RNA-seq reads)') + parser.add_argument('--single-end', + dest='paired_end', + action='store_false', + default=True, + help='single-end reads (default: paired-end reads)') + parser.add_argument('-r', '--read-length', + dest='read_len', + action='store', + type=int, + default=100, + help='read length (default: 100)') + parser.add_argument('-f', '--fragment-length', + dest='frag_len', + action='store', + type=int, + default=250, + help='fragment length (default: 250)') + parser.add_argument('-n', '--num-fragment', + dest='num_frag', + action='store', + type=int, + default=1000000, + help='number of fragments (default: 1000000)') + parser.add_argument('-e', '--expr-profile', + dest='expr_profile', + action='store', + type=str, + default='flux', + help='expression profile: flux or constant (default: flux)') + parser.add_argument('--repeat-info', + dest='repeat_fname', + action='store', + type=str, + default='', + help='repeat information filename') + parser.add_argument('--error-rate', + dest='error_rate', + action='store', + type=float, + default=0.0, + help='per-base sequencing error rate (%%) (default: 0.0)') + parser.add_argument('--max-mismatch', + dest='max_mismatch', + action='store', + type=int, + default=3, + help='max mismatches due to sequencing errors (default: 3)') + parser.add_argument('--random-seed', + dest='random_seed', + action='store', + type=int, + default=0, + help='random seeding value (default: 0)') + parser.add_argument('--snp-prob', + dest='snp_prob', + action='store', + type=float, + default=1.0, + help='probability of a read including a snp when the read spans the snp ranging from 0.0 to 1.0 (default: 1.0)') + parser.add_argument('--sanity-check', + dest='sanity_check', + action='store_true', + help='sanity check') + parser.add_argument('-v', '--verbose', + dest='verbose', + action='store_true', + help='also print some statistics to stderr') + parser.add_argument('--version', + action='version', + version='%(prog)s 2.0.0-alpha') + args = parser.parse_args() + if not args.genome_file or not args.gtf_file or not args.snp_file: + parser.print_help() + exit(1) + if not args.rna: + args.expr_profile = "constant" + simulate_reads(args.genome_file, args.gtf_file, args.snp_file, args.base_fname, + args.rna, args.paired_end, args.read_len, args.frag_len, + args.num_frag, args.expr_profile, args.repeat_fname, + args.error_rate, args.max_mismatch, + args.random_seed, args.snp_prob, args.sanity_check, args.verbose) diff --git a/hisat2lib/ht2.h b/hisat2lib/ht2.h new file mode 100644 index 0000000..aee5f53 --- /dev/null +++ b/hisat2lib/ht2.h @@ -0,0 +1,162 @@ +/* + * Copyright 2018, Chanhee Park and Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#ifndef __HT2_H__ +#define __HT2_H__ + +#ifdef __cplusplus +extern "C" +{ +#endif +#include +#include + +typedef int ht2_error_t; + +enum { + HT2_OK = 0, + + HT2_ERR = -1, + HT2_ERR_NOT_REPEAT = -2, +}; +#define HT2_RET_OK(x) ((x) == HT2_OK) + +typedef void* ht2_handle_t; + +struct ht2_options { + int offRate; + + int useMm; + int useShmem; + int mmSweep; + int noRefNames; + int noSplicedAlignment; + int gVerbose; + int startVerbose; + int sanityCheck; + + int useHaplotype; + + void *altdb; + void *raltdb; + void *repeatdb; + + void *gfm; + void *rgfm; +}; + +typedef struct ht2_options ht2_option_t; + + + +/************************************************************************** + * + * Initialize APIs + * + **************************************************************************/ + +ht2_handle_t ht2_init(const char *name, ht2_option_t *options); +void ht2_close(ht2_handle_t); + +ht2_error_t ht2_init_options(ht2_option_t *options); + + +/************************************************************************** + * + * Index APIs + * + **************************************************************************/ + +const char* ht2_index_getrefnamebyid(ht2_handle_t handle, uint32_t chr_id); + +struct ht2_index_getrefnames_result { + int count; + char* names[0]; +}; + +/** + * @brief + * + * @param handle + * @param result_ptr pointer to result. Caller must relese memory by free(). + * + * @return + */ +ht2_error_t ht2_index_getrefnames(ht2_handle_t handle, struct ht2_index_getrefnames_result **result_ptr); + + +/************************************************************************** + * + * Repeat APIs + * + **************************************************************************/ + +struct ht2_position { + uint32_t chr_id; + int direction; /* 0 - forward, 1 - reverse */ + uint64_t pos; /* 0-based */ +}; + +struct ht2_repeat_expand_result { + int count; + struct ht2_position positions[0]; +}; + + +/** + * @brief + * + * @param handle + * @param repeat_name + * @param repeat_pos repeat position on repeat sequence(0-based) + * @param repeat_len + * @param result_ptr pointer to result. caller must release memory by free(). + * ex) free(result_ptr); + * + * @return + */ +ht2_error_t ht2_repeat_expand(ht2_handle_t handle, + const char *repeat_name, + uint64_t repeat_pos, + uint64_t repeat_len, + struct ht2_repeat_expand_result **result_ptr); + +/************************************************************************** + * + * Alignment APIs + * + **************************************************************************/ +/* TODO */ + + +/************************************************************************** + * + * ETC APIs + * + **************************************************************************/ +/* TODO */ +void ht2_test_1(ht2_handle_t); +void ht2_repeat_dump_repeatmap(ht2_handle_t handle); + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __HT2_H__ */ diff --git a/hisat2lib/ht2_alignment.cpp b/hisat2lib/ht2_alignment.cpp new file mode 100644 index 0000000..6cb1e2d --- /dev/null +++ b/hisat2lib/ht2_alignment.cpp @@ -0,0 +1,20 @@ +/* + * Copyright 2018, Chanhee Park and Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + + diff --git a/hisat2lib/ht2_handle.h b/hisat2lib/ht2_handle.h new file mode 100644 index 0000000..5ffc104 --- /dev/null +++ b/hisat2lib/ht2_handle.h @@ -0,0 +1,47 @@ +/* + * Copyright 2018, Chanhee Park and Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#ifndef __HT2_HANDLE_H__ +#define __HT2_HANDLE_H__ + +#if 1 +#define EXPORT __attribute__((visibility("default"))) +#else +#define EXPORT +#endif + +typedef TIndexOffU index_t; +typedef uint16_t local_index_t; + +struct ht2_handle { + ALTDB* altdb; + ALTDB* raltdb; + RepeatDB* repeatdb; + + HGFM* gfm; + RFM *rgfm; + + string tmp_str; + + string ht2_idx_name; + + struct ht2_options options; +}; + +#endif /* __HT2_HANDLE_H__ */ diff --git a/hisat2lib/ht2_index.cpp b/hisat2lib/ht2_index.cpp new file mode 100644 index 0000000..35d5a8b --- /dev/null +++ b/hisat2lib/ht2_index.cpp @@ -0,0 +1,80 @@ +/* + * Copyright 2018, Chanhee Park and Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#include + +#include "ds.h" +#include "repeat.h" +#include "rfm.h" + +#include "ht2.h" +#include "ht2_handle.h" + + +EXPORT +const char* ht2_index_getrefnamebyid(ht2_handle_t handle, uint32_t chr_id) +{ + struct ht2_handle *hp = (struct ht2_handle *)handle; + + size_t refname_size = hp->gfm->_refnames.size(); + + if(chr_id < (refname_size - 1)) { + return hp->gfm->_refnames[chr_id].c_str(); + } + + return NULL; +} + + + +EXPORT +ht2_error_t ht2_index_getrefnames(ht2_handle_t handle, struct ht2_index_getrefnames_result **result_ptr) +{ + struct ht2_handle *hp = (struct ht2_handle *)handle; + + size_t refname_size = hp->gfm->_refnames.size(); + if(refname_size == 0) { + return HT2_ERR; + } + + size_t result_hdr_size = sizeof(struct ht2_index_getrefnames_result) + sizeof(char *) * refname_size; + size_t result_buf_size = 0; + + for(size_t i = 0; i < refname_size - 1; i++) { + result_buf_size += strlen(hp->gfm->_refnames[i].c_str()) + 1; + } + + void* ptr = malloc(result_hdr_size + result_buf_size); + char* buf_ptr = (char *)ptr + result_hdr_size; + + memset(ptr, 0, result_hdr_size + result_buf_size); + struct ht2_index_getrefnames_result* result = (struct ht2_index_getrefnames_result *)ptr; + + result->count = refname_size - 1; + result->names[0] = buf_ptr; + for(size_t i = 0; i < refname_size - 1; i++) { + size_t rlen = strlen(hp->gfm->_refnames[i].c_str()); + strcpy(result->names[i], hp->gfm->_refnames[i].c_str()); + result->names[i + 1] = result->names[i] + rlen + 1; + } + + (*result_ptr) = result; + + return HT2_OK; +} diff --git a/hisat2lib/ht2_init.cpp b/hisat2lib/ht2_init.cpp new file mode 100644 index 0000000..1056486 --- /dev/null +++ b/hisat2lib/ht2_init.cpp @@ -0,0 +1,248 @@ +/* + * Copyright 2018, Chanhee Park and Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#include + +#include "ds.h" +#include "repeat.h" +#include "rfm.h" + +#include "ht2.h" +#include "ht2_handle.h" + +using namespace std; + +#ifdef HISAT2_BUILD_LIB +MemoryTally gMemTally; +#endif + +static const struct ht2_options ht2_default_options = { + .offRate = -1, + + .useMm = false, + .useShmem = false, + .mmSweep = false, + .noRefNames = false, + .noSplicedAlignment = false, + .gVerbose = false, + .startVerbose = false, + .sanityCheck = 0, + + .useHaplotype = false, + + .altdb = NULL, + .raltdb = NULL, + .repeatdb = NULL, + .gfm = NULL, + .rgfm = NULL, +}; + +static int is_external_index(struct ht2_options *opt) +{ + if (opt->altdb != NULL || + opt->raltdb != NULL || + opt->repeatdb != NULL || + opt->gfm != NULL || + opt->rgfm != NULL) { + return 1; + } + + return 0; +} + +static void free_handle(struct ht2_handle *hp) +{ + if (!is_external_index(&hp->options)) { + if(hp->altdb) { + delete hp->altdb; + } + + if(hp->raltdb) { + delete hp->raltdb; + } + + if(hp->repeatdb) { + delete hp->repeatdb; + } + + if(hp->gfm) { + if(hp->gfm->isInMemory()) { + hp->gfm->evictFromMemory(); + } + delete hp->gfm; + } + + if(hp->rgfm) { + if(hp->rgfm->isInMemory()) { + hp->rgfm->evictFromMemory(); + } + delete hp->rgfm; + } + } + + delete hp; +} + +static void init_handle(struct ht2_handle *hp) +{ + + struct ht2_options *opt = &hp->options; + + if (is_external_index(opt)) { + + hp->altdb = (ALTDB *)opt->altdb; + hp->repeatdb = (RepeatDB *)opt->repeatdb; + hp->raltdb = (ALTDB *)opt->raltdb; + + hp->gfm = (HGFM* )opt->gfm; + hp->rgfm = (RFM *)opt->rgfm; + + } else { + + hp->altdb = new ALTDB(); + hp->repeatdb = new RepeatDB(); + hp->raltdb = new ALTDB(); + + hp->gfm = new HGFM( + hp->ht2_idx_name, + hp->altdb, + NULL, + NULL, + -1, + true, + opt->offRate, + 0, + opt->useMm, + opt->useShmem, + opt->mmSweep, + !opt->noRefNames, + true, + true, + true, + !opt->noSplicedAlignment, + opt->gVerbose, + opt->startVerbose, + false, + opt->sanityCheck, + opt->useHaplotype); + + + // Load the other half of the index into memory + assert(!hp->gfm->isInMemory()); + + hp->gfm->loadIntoMemory( + -1, + true, + true, + true, + !opt->noRefNames, + opt->startVerbose); + + hp->rgfm = new RFM( + hp->ht2_idx_name + ".rep", + hp->raltdb, + hp->repeatdb, + NULL, + -1, + true, + opt->offRate, + 0, + opt->useMm, + opt->useShmem, + opt->mmSweep, + !opt->noRefNames, + true, + true, + true, + !opt->noSplicedAlignment, + opt->gVerbose, + opt->startVerbose, + false, + opt->sanityCheck, + false); + + + assert(!hp->rgfm->isInMemory()); + hp->rgfm->loadIntoMemory( + -1, + true, + true, + true, + !opt->noRefNames, + opt->startVerbose); + + hp->repeatdb->construct(hp->gfm->rstarts(), hp->gfm->nFrag()); + } +} + +EXPORT +ht2_handle_t ht2_init(const char *name, ht2_option_t *options) +{ + struct ht2_handle *handle = new ht2_handle; + + handle->ht2_idx_name = name; + if(options) { + memcpy(&handle->options, options, sizeof(struct ht2_options)); + } else { + memcpy(&handle->options, &ht2_default_options, sizeof(struct ht2_options)); + } + + // Init + init_handle(handle); + + handle->tmp_str = name; + + return (ht2_handle_t)handle; +} + +EXPORT +void ht2_close(ht2_handle_t handle) +{ + struct ht2_handle *hp = (struct ht2_handle *)handle; + if(hp == NULL) { + return; + } + + free_handle(hp); +} + + +EXPORT +ht2_error_t ht2_init_options(ht2_option_t *options) +{ + if(options == NULL) { + return HT2_ERR; + } + + memcpy(options, &ht2_default_options, sizeof(ht2_default_options)); + + return HT2_OK; +} + +EXPORT +void ht2_test_1(ht2_handle_t handle) +{ + struct ht2_handle *hp = (struct ht2_handle *)handle; + + size_t refname_size = hp->gfm->_refnames.size(); + cerr << "ht2lib: " << "gfm refnames: " << refname_size << endl; + for(size_t i = 0; i < refname_size; i++) { + cerr << "ht2lib: " << " " << i << " -> " << hp->gfm->_refnames[i] << endl; + } +} diff --git a/hisat2lib/ht2_repeat.cpp b/hisat2lib/ht2_repeat.cpp new file mode 100644 index 0000000..7f40256 --- /dev/null +++ b/hisat2lib/ht2_repeat.cpp @@ -0,0 +1,103 @@ +/* + * Copyright 2018, Chanhee Park and Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#include + +#include "ds.h" +#include "repeat.h" +#include "rfm.h" + +#include "ht2.h" +#include "ht2_handle.h" + +EXPORT +void ht2_repeat_dump_repeatmap(ht2_handle_t handle) +{ + struct ht2_handle *hp = (struct ht2_handle *)handle; + + size_t localRFMSize = hp->rgfm->_localRFMs.size(); + + cerr << "ht2lib: " << "LocalRFM size: " << localRFMSize << endl; + cerr << "ht2lib: " << "Dump repeatMap" << endl; + + // repID -> 0 + const EList >& repeatMap = hp->repeatdb->repeatMap()[0]; + + cerr << repeatMap.size() << endl; + + for(size_t i = 0; i < repeatMap.size(); i++) { + cerr << repeatMap[i].first << ", " << repeatMap[i].second << endl; + } + +} + + +EXPORT +ht2_error_t ht2_repeat_expand(ht2_handle_t handle, + const char *repeat_name, + uint64_t repeat_pos, + uint64_t repeat_len, + struct ht2_repeat_expand_result **result_ptr) +{ + struct ht2_handle *hp = (struct ht2_handle *)handle; + + index_t rep_id = hp->rgfm->getLocalRFM_idx(repeat_name); + + TIndexOffU left = repeat_pos; + TIndexOffU right = left + repeat_len; + + bool ret = hp->repeatdb->repeatExist(rep_id, left, right); + if(!ret) { + return HT2_ERR_NOT_REPEAT; + } + + + /* get coord */ + EList, RepeatCoord > > positions; + + EList snp_id_list; + snp_id_list.clear(); + + hp->repeatdb->getCoords( + rep_id, + left, right, + snp_id_list, + *(hp->raltdb), + positions + ); + + /* build result */ + size_t result_size = sizeof(struct ht2_repeat_expand_result) + positions.size() * sizeof(struct ht2_position); + + struct ht2_repeat_expand_result *result = (struct ht2_repeat_expand_result *)malloc(result_size); + + result->count = positions.size(); + + for(size_t i = 0; i < positions.size(); i++) { + const RepeatCoord& coord = positions[i].first; + + result->positions[i].chr_id = coord.tid; + result->positions[i].pos = coord.toff; + result->positions[i].direction = coord.fw ? 0 : 1; + } + + *(result_ptr) = result; + return HT2_OK; +} + diff --git a/hisat2lib/java_jni/HT2Module.java b/hisat2lib/java_jni/HT2Module.java new file mode 100644 index 0000000..26fbd53 --- /dev/null +++ b/hisat2lib/java_jni/HT2Module.java @@ -0,0 +1,122 @@ +/* + * Copyright 2018, Chanhee Park and Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +import java.util.Map; +import java.util.HashMap; +import java.util.List; +import java.util.ArrayList; + +public class HT2Module { + static { + System.loadLibrary("ht2jni"); + } + + public static class HT2Position { + int chr_id; + int direction; + long position; + + HT2Position(int id, int dir, int pos) { + this.chr_id = id; + this.direction = dir; + this.position = pos; + } + + @Override + public String toString() + { + return getClass().getSimpleName() + + "[chr_id=" + chr_id + + ", direction=" + direction + + ", position=" + position + "]"; + } + } + + private native long init(String indexName, Map options); + private native void close(long handle); + private native Map get_options(); + + private native String index_getrefnamebyid(long handle, int id); + private native List index_getrefnames(long handle); + private native List repeat_expand(long handle, String name, long rpos, long rlen); + + private long handle; + + HT2Module() { + handle = 0; + } + + public void initLibrary(String indexName) { + initLibrary(indexName, null); + } + + public void initLibrary(String indexName, Map options) + { + if(handle == 0) { + handle = init(indexName, options); + } + } + + public Map initOption() + { + return get_options(); + } + + public String getRefNameById(int chr_id) + { + if(handle == 0) { + // TODO: Exception + return ""; + } + + // TODO: Exception(OutOfIndex) + return index_getrefnamebyid(handle, chr_id); + } + + public List getRefNames() + { + if(handle == 0) { + // TODO: Exception + return new ArrayList<>(); + } + return index_getrefnames(handle); + } + + public List repeatExpand(String name, long position, long length) + { + if(handle == 0) { + // TODO: Exception + return new ArrayList<>(); + } + return repeat_expand(handle, name, position, length); + } + + public void cleanup() + { + if(handle != 0) { + close(handle); + handle = 0; + } + } + + public void finalize() { + cleanup(); + } + +} diff --git a/hisat2lib/java_jni/HT2ModuleExample.java b/hisat2lib/java_jni/HT2ModuleExample.java new file mode 100644 index 0000000..6bee9a7 --- /dev/null +++ b/hisat2lib/java_jni/HT2ModuleExample.java @@ -0,0 +1,82 @@ +/* + * Copyright 2018, Chanhee Park and Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +import java.util.Map; +import java.util.HashMap; +import java.util.List; +import java.util.ArrayList; + +public class HT2ModuleExample { + + public static void main(String[] args) { + HT2Module module = null; + Map ht2Options = null; + String indexPath = "../../evaluation/indexes/HISAT2_22/22_rep"; + //String indexPath = "../../evaluation/indexes/HISAT2/genome_rep"; + + try { + module = new HT2Module(); + + // Get Default Options + //ht2Options = module.InitOption(); + // or + ht2Options = new HashMap<>(); + + //ht2Options.put("gVerbose", 1); + //ht2Options.put("startVerbose", 1); + //ht2Options.put("sanityCheck", 1); + + System.out.println(ht2Options); + + module.initLibrary(indexPath, ht2Options); + //module.initLibrary(indexPath); + + System.out.println("CHR_ID 0: " + module.getRefNameById(0)); + + // in 22_rep, OutOfIndex + System.out.println("CHR_ID 1: " + module.getRefNameById(1)); + + List refnames = module.getRefNames(); + + System.out.println("Refnames size:" + refnames.size()); + for(String name: refnames) { + System.out.println(name); + } + + // repeat for 22_rep + List positions = module.repeatExpand("rep100-300", 8308, 100); + + // repeat for genome_rep + //List positions = module.repeatExpand("rep100-300", 2446692, 100); + + System.out.println("Repeat expand size: " + positions.size()); + for(HT2Module.HT2Position pos: positions) { + String chrName = refnames.get(pos.chr_id).split(" ")[0]; + String direction = pos.direction == 0 ? "+":"-"; + + System.out.println(chrName + ":" + pos.position + ":" + direction); + } + + } finally { + if(module != null) { + module.cleanup(); + } + } + } +} diff --git a/hisat2lib/java_jni/Makefile b/hisat2lib/java_jni/Makefile new file mode 100644 index 0000000..1153d18 --- /dev/null +++ b/hisat2lib/java_jni/Makefile @@ -0,0 +1,72 @@ +# +# Copyright 2018, Chanhee Park and Daehwan Kim +# +# This file is part of HISAT 2. +# +# HISAT 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# HISAT 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HISAT 2. If not, see . +# + +JAVA_HOME := $(JAVA_HOME) + +HISAT2_DIR = ../.. + +CC = gcc + + +MACOS = 0 +ifneq (,$(findstring Darwin,$(shell uname))) +MACOS = 1 +endif + +INCLUDES = -I $(JAVA_HOME)/include +INCLUDES += -I $(JAVA_HOME)/include/linux + +TARGET_LIB = libht2jni.so +SHARED_FLAG = -shared + +ifeq (1,$(MACOS)) +INCLUDES += -I $(JAVA_HOME)/include/darwin +TARGET_LIB = libht2jni.jnilib +SHARED_FLAG = -dynamiclib +endif + +INCLUDES += -I $(HISAT2_DIR)/hisat2lib + +CFLAGS = -fPIC $(INCLUDES) +#CFLAGS += -DDEBUG + +SRCS = ht2module.c + +OBJS = $(SRCS:.c=.o) + +all: lib + +lib: $(TARGET_LIB) + +$(TARGET_LIB): HT2Module.h $(OBJS) + $(CC) $(SHARED_FLAG) -o $@ $(OBJS) $(HISAT2_DIR)/libhisat2lib.a -lstdc++ + +HT2Module.h HT2Module.class: HT2Module.java + javac -h . HT2Module.java + +example: HT2ModuleExample.class + +HT2ModuleExample.class: HT2ModuleExample.java + javac HT2ModuleExample.java + +clean: + rm -f *.class HT2Module.h HT2Module_HT2Position.h *.so *.o $(TARGET_LIB) + +test: all example + java -Djava.library.path=. HT2ModuleExample diff --git a/hisat2lib/java_jni/ht2module.c b/hisat2lib/java_jni/ht2module.c new file mode 100644 index 0000000..692d909 --- /dev/null +++ b/hisat2lib/java_jni/ht2module.c @@ -0,0 +1,432 @@ +/* + * Copyright 2018, Chanhee Park and Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#include +#include +#include "ht2.h" + +#include "HT2Module.h" + +#define CLASSPATH_INTEGER "java/lang/Integer" +#define CLASSPATH_HASHMAP "java/util/HashMap" +#define CLASSPATH_ARRAYLIST "java/util/ArrayList" +#define CLASSPATH_HT2POSITION "HT2Module$HT2Position" + +#ifdef DEBUG +#define DEBUGLOG(fmt, ...) do { fprintf(stderr, "%s:%d:%s(): " fmt, __FILE__, __LINE__, __func__, ##__VA_ARGS__); } while(0) +#else +#define DEBUGLOG(fmt, ...) +#endif + +static jint JNI_VERSION = JNI_VERSION_1_2; + +static jclass classInteger; +static jclass classHashMap; +static jclass classArrayList; +static jclass classHT2Position; + +static jmethodID mthIntegerInit; +static jmethodID mthIntegerIntValue; + +static jmethodID mthHashMapInit; +static jmethodID mthHashMapPut; +static jmethodID mthHashMapGet; + +static jmethodID mthArrayListInit; +static jmethodID mthArrayListEnsureCapacity; +static jmethodID mthArrayListAdd; + +static jmethodID mthHT2PositionInit; + +static jobject NewHashMap(JNIEnv *env) +{ + jobject obj = (*env)->NewObject(env, classHashMap, mthHashMapInit); + return obj; +} + +static jobject NewInteger(JNIEnv *env, int value) +{ + jobject obj = (*env)->NewObject(env, classInteger, mthIntegerInit, value); + return obj; +} + +static jobject NewArrayList(JNIEnv *env) +{ + jobject obj = (*env)->NewObject(env, classArrayList, mthArrayListInit); + return obj; +} + +static jobject NewHT2Position(JNIEnv *env, struct ht2_position *htpos) +{ + jobject obj = (*env)->NewObject(env, classHT2Position, mthHT2PositionInit, + htpos->chr_id, htpos->direction, htpos->pos); + return obj; +} + +static int GetInteger(JNIEnv *env, jobject jobjInt, int *val) +{ + int value = (*env)->CallIntMethod(env, jobjInt, mthIntegerIntValue); + *val = value; + return 0; +} + +static void hashmap_put(JNIEnv *env, jobject jobjMap, + const char *key_str, const int val) +{ + jstring keyObj; + jobject valueObj; + + keyObj = (*env)->NewStringUTF(env, key_str); + valueObj = NewInteger(env, val); + + (*env)->CallObjectMethod(env, jobjMap, mthHashMapPut, keyObj, valueObj); + + (*env)->DeleteLocalRef(env, keyObj); + (*env)->DeleteLocalRef(env, valueObj); +} + +static int hashmap_get(JNIEnv *env, jobject jobjMap, + const char *key_str, int *val) +{ + jstring keyObj; + jobject valueObj; + + keyObj = (*env)->NewStringUTF(env, key_str); + valueObj = (*env)->CallObjectMethod(env, jobjMap, mthHashMapGet, keyObj); + + if(valueObj == NULL) { + DEBUGLOG("Can't find key: %s\n", key_str); + (*env)->DeleteLocalRef(env, keyObj); + return -1; + } + + // Integer -> int + int value = 0; + if(GetInteger(env, valueObj, &value) < 0) { + DEBUGLOG("Can't get Integer\n"); + (*env)->DeleteLocalRef(env, keyObj); + (*env)->DeleteLocalRef(env, valueObj); + return -1; + } + + (*env)->DeleteLocalRef(env, keyObj); + (*env)->DeleteLocalRef(env, valueObj); + + *val = value; + + return 0; +} + +static jobject conv_ht2option(JNIEnv *env, ht2_option_t *opts) +{ + jobject hashMap = NewHashMap(env); + +#define HT2_OPT_BUILD(_name) \ + hashmap_put(env, hashMap, #_name , opts->_name) + + HT2_OPT_BUILD(offRate); + HT2_OPT_BUILD(useMm); + HT2_OPT_BUILD(useShmem); + HT2_OPT_BUILD(mmSweep); + HT2_OPT_BUILD(noRefNames); + HT2_OPT_BUILD(noSplicedAlignment); + HT2_OPT_BUILD(gVerbose); + HT2_OPT_BUILD(startVerbose); + HT2_OPT_BUILD(sanityCheck); + HT2_OPT_BUILD(useHaplotype); + + return hashMap; +} + +static void update_ht2option(JNIEnv *env, ht2_option_t *opts, jobject jmapObject) +{ +#define HT2_OPT_UPDATE(_name) \ + do { \ + int value = 0; \ + if(hashmap_get(env, jmapObject, #_name, &value) == 0) { \ + DEBUGLOG("Using %s, %d\n", #_name, value); \ + opts->_name = value; \ + } else { \ + DEBUGLOG("Using default %s, %d\n", #_name, opts->_name); \ + }\ + } while(0) + + HT2_OPT_UPDATE(offRate); + HT2_OPT_UPDATE(useMm); + HT2_OPT_UPDATE(useShmem); + HT2_OPT_UPDATE(mmSweep); + HT2_OPT_UPDATE(noRefNames); + HT2_OPT_UPDATE(noSplicedAlignment); + HT2_OPT_UPDATE(gVerbose); + HT2_OPT_UPDATE(startVerbose); + HT2_OPT_UPDATE(sanityCheck); + HT2_OPT_UPDATE(useHaplotype); +} + + +static void conv_refnames_result(JNIEnv *env, + struct ht2_index_getrefnames_result *result, jobject jobjList) +{ + size_t i; + + // Resize + DEBUGLOG("count: %d\n", result->count); + (*env)->CallVoidMethod(env, jobjList, mthArrayListEnsureCapacity, result->count); + + for(i = 0; i < result->count; i++) { + DEBUGLOG("names[%d] %s\n", i, result->names[i]); + + jobject elem = (*env)->NewStringUTF(env, result->names[i]); + (*env)->CallObjectMethod(env, jobjList, mthArrayListAdd, elem); + (*env)->DeleteLocalRef(env, elem); + } +} + +static void conv_repeat_expand_result(JNIEnv *env, + struct ht2_repeat_expand_result *result, + jobject jobjList) +{ + size_t i; + + // Resize + DEBUGLOG("count: %d\n", result->count); + (*env)->CallVoidMethod(env, jobjList, mthArrayListEnsureCapacity, result->count); + + // Add Items + for(i = 0; i < result->count; i++) { + struct ht2_position *htpos = &result->positions[i]; + + DEBUGLOG("position[%d]: %u, %d, %lu\n", i, htpos->chr_id, htpos->direction, htpos->pos); + + jobject elem = NewHT2Position(env, htpos); + (*env)->CallObjectMethod(env, jobjList, mthArrayListAdd, elem); + (*env)->DeleteLocalRef(env, elem); + } +} + +JNIEXPORT jlong JNICALL Java_HT2Module_init(JNIEnv *env, + jobject thisObj, jstring indexNameObj, jobject optionMap) +{ + DEBUGLOG("Init\n"); + + const char *index_path = (*env)->GetStringUTFChars(env, indexNameObj, NULL); + if(index_path == NULL) { + DEBUGLOG("Can't get string\n"); + return 0; + } + + DEBUGLOG("Index Path: %s\n", index_path); + + ht2_option_t ht2opt; + ht2_init_options(&ht2opt); + if(optionMap != NULL) { + // update options + update_ht2option(env, &ht2opt, optionMap); + } + + ht2_handle_t handle = ht2_init(index_path, &ht2opt); + + DEBUGLOG("Initailzed %p\n", handle); + + (*env)->ReleaseStringUTFChars(env, indexNameObj, index_path); + + return (jlong)handle; +} + + +JNIEXPORT void JNICALL Java_HT2Module_close(JNIEnv *env, + jobject thisObj, jlong handlePtr) +{ + DEBUGLOG("Closing\n"); + + ht2_handle_t handle = (ht2_handle_t)handlePtr; + + DEBUGLOG("Received handle: %p\n", handle); + + ht2_close(handle); +} + +JNIEXPORT jobject JNICALL Java_HT2Module_get_1options(JNIEnv *env, + jobject thisObj) +{ + + DEBUGLOG("get_option\n"); + + // Get Default options + ht2_option_t ht2opt; + ht2_init_options(&ht2opt); + + // convert ht2opt to java hashmap + jobject hashobj = conv_ht2option(env, &ht2opt); + + return hashobj; +} + +JNIEXPORT jstring JNICALL Java_HT2Module_index_1getrefnamebyid(JNIEnv *env, + jobject thisObj, jlong handlePtr, jint chr_id) +{ + DEBUGLOG("index_getrefnamebyid\n"); + ht2_handle_t handle = (ht2_handle_t)handlePtr; + if(handle == NULL) { + DEBUGLOG("Invalid handle\n"); + return NULL; + } + + const char *refname = ht2_index_getrefnamebyid(handle, chr_id); + if(refname == NULL) { + DEBUGLOG("Can't get refname(%d)\n", chr_id); + return NULL; + } + + return (*env)->NewStringUTF(env, refname); +} + +JNIEXPORT jobject JNICALL Java_HT2Module_index_1getrefnames(JNIEnv *env, + jobject thisObj, jlong handlePtr) +{ + DEBUGLOG("index_getrefnames\n"); + + ht2_handle_t handle = (ht2_handle_t)handlePtr; + if(handle == NULL) { + DEBUGLOG("Invalid handle\n"); + return NULL; + } + + struct ht2_index_getrefnames_result *result = NULL; + ht2_error_t ret = ht2_index_getrefnames(handle, &result); + + jobject refnames = NewArrayList(env); + + if(ret == HT2_OK) { + DEBUGLOG("refnames size: %d\n", result->count); + conv_refnames_result(env, result, refnames); + free(result); + } else { + DEBUGLOG("Can't get refnames\n"); + } + + return refnames; +} + +JNIEXPORT jobject JNICALL Java_HT2Module_repeat_1expand(JNIEnv *env, + jobject thisObj, jlong handlePtr, jstring nameObj, jlong rpos, jlong rlen) +{ + DEBUGLOG("repeat_expand\n"); + + ht2_handle_t handle = (ht2_handle_t)handlePtr; + if(handle == NULL) { + DEBUGLOG("Invalid handle\n"); + return NULL; + } + + const char *name = (*env)->GetStringUTFChars(env, nameObj, NULL); + if(name == NULL) { + DEBUGLOG("Can't get string\n"); + return NULL; + } + + DEBUGLOG("Repeat Expand: %s, %lu, %lu\n", name, rpos, rlen); + + + struct ht2_repeat_expand_result *result = NULL; + ht2_error_t ret = ht2_repeat_expand(handle, name, rpos, rlen, &result); + + jobject positions = NewArrayList(env); + + if(ret == HT2_OK) { + DEBUGLOG("expand position size: %d\n", result->count); + conv_repeat_expand_result(env, result, positions); + free(result); + } else { + DEBUGLOG("Can't expand repeat\n"); + } + + (*env)->ReleaseStringUTFChars(env, nameObj, name); + + return positions; +} + +jint JNI_OnLoad(JavaVM *vm, void *reserved) +{ + DEBUGLOG("JNI Loaded\n"); + + JNIEnv *env; + + if((*vm)->GetEnv(vm, (void **)&env, JNI_VERSION) != JNI_OK) { + return JNI_ERR; + } + + // Load Class +#define LOAD_CLASS(_globalRef, _clazz) \ + do { \ + jclass tmpClassRef = (*env)->FindClass(env, _clazz); \ + if(tmpClassRef == NULL) { \ + DEBUGLOG("Can't find class %s\n", _clazz); \ + return JNI_ERR; \ + } \ + _globalRef = (jclass)(*env)->NewGlobalRef(env, tmpClassRef); \ + (*env)->DeleteLocalRef(env, tmpClassRef); \ + } while(0) + + LOAD_CLASS(classInteger, CLASSPATH_INTEGER); + LOAD_CLASS(classHashMap, CLASSPATH_HASHMAP); + LOAD_CLASS(classArrayList, CLASSPATH_ARRAYLIST); + LOAD_CLASS(classHT2Position, CLASSPATH_HT2POSITION); + + + // Load Method + + // Integer + mthIntegerInit = (*env)->GetMethodID(env, classInteger, "", "(I)V"); + mthIntegerIntValue = (*env)->GetMethodID(env, classInteger, "intValue", "()I"); + + // HashMap + mthHashMapInit = (*env)->GetMethodID(env, classHashMap, "", "()V"); + mthHashMapPut = (*env)->GetMethodID(env, classHashMap, "put", + "(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;"); + mthHashMapGet = (*env)->GetMethodID(env, classHashMap, "get", + "(Ljava/lang/Object;)Ljava/lang/Object;"); + + // ArrayList + mthArrayListInit = (*env)->GetMethodID(env, classArrayList, "", "()V"); + mthArrayListEnsureCapacity = (*env)->GetMethodID(env, classArrayList, "ensureCapacity", "(I)V"); + mthArrayListAdd = (*env)->GetMethodID(env, classArrayList, "add", + "(Ljava/lang/Object;)Z"); + + // HT2Position + mthHT2PositionInit = (*env)->GetMethodID(env, classHT2Position, "", "(III)V"); + + return JNI_VERSION; +} + + +void JNI_OnUnload(JavaVM *vm, void *reserved) +{ + DEBUGLOG("JNI Unload\n"); + JNIEnv *env; + + (*vm)->GetEnv(vm, (void **)&env, JNI_VERSION); + + (*env)->DeleteGlobalRef(env, classInteger); + (*env)->DeleteGlobalRef(env, classHashMap); + (*env)->DeleteGlobalRef(env, classArrayList); + (*env)->DeleteGlobalRef(env, classHT2Position); + +} + diff --git a/hisat2lib/pymodule/Makefile b/hisat2lib/pymodule/Makefile new file mode 100644 index 0000000..63644d4 --- /dev/null +++ b/hisat2lib/pymodule/Makefile @@ -0,0 +1,33 @@ +# +# Copyright 2018, Chanhee Park and Daehwan Kim +# +# This file is part of HISAT 2. +# +# HISAT 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# HISAT 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HISAT 2. If not, see . +# + +all: lib + +lib: + ARCHFLAGS="-arch x86_64" python ./setup.py build + +install: + python ./setup.py install + +clean: + python ./setup.py clean + rm -rf build test + +test: lib + python ./ht2example.py diff --git a/hisat2lib/pymodule/ht2example.py b/hisat2lib/pymodule/ht2example.py new file mode 100644 index 0000000..632ba0e --- /dev/null +++ b/hisat2lib/pymodule/ht2example.py @@ -0,0 +1,68 @@ +#!/usr/bin/python + +# +# Copyright 2018, Chanhee Park and Daehwan Kim +# +# This file is part of HISAT 2. +# +# HISAT 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# HISAT 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HISAT 2. If not, see . + +# +# ./setup.py build +# ./setup.py install +# +import ht2py + +# Path to index +ht2_index = '../../evaluation/indexes/HISAT2_22/22_rep' + +# Get default options +ht2_options = ht2py.get_options() + +print ht2_options +ht2_options['gVerbose'] = 1 +ht2_options['startVerbose'] = 1 +# or +ht2_options = {} + +handle = ht2py.init(ht2_index, ht2_options) + +print ht2py.index_getrefnamebyid(handle, 0) + +#print ht2py.index_getrefnamebyid(handle, 0, 1, 3, 5, 7, 9) +# outofindex +#print ht2py.index_getrefnamebyid(handle, 1) +#print ht2py.index_getrefnamebyid(handle, -1) + +refnames = ht2py.index_getrefnames(handle) + +#for name in refnames: +# print name + +# repeat expansion +positions = ht2py.repeat_expand(handle, 'rep100-300', 8308, 100) + +for pos in positions: + chr_id = pos[0] + direction = pos[1] + chr_pos = pos[2] + + chr_dir = '+' + if direction == 1: + chr_dir = '-' + + print refnames[chr_id].split()[0] + ":" + str(chr_pos) + ':' + chr_dir + +# close handle +ht2py.close(handle) diff --git a/hisat2lib/pymodule/ht2module.c b/hisat2lib/pymodule/ht2module.c new file mode 100644 index 0000000..2771e49 --- /dev/null +++ b/hisat2lib/pymodule/ht2module.c @@ -0,0 +1,333 @@ +/* + * Copyright 2018, Chanhee Park and Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ +#include +#include + +#include "ht2.h" + +#define HT2_HANDLE_ID "handle" + +#ifdef DEBUG +#define DEBUGLOG(fmt, ...) do { fprintf(stderr, "%s:%d:%s(): " fmt, __FILE__, __LINE__, __func__, ##__VA_ARGS__); } while(0) +#else +#define DEBUGLOG(fmt, ...) +#endif + +static ht2_handle_t get_handle(PyObject *cap) +{ + return PyCapsule_GetPointer(cap, HT2_HANDLE_ID); +} + +static PyObject *conv_refnames_result(struct ht2_index_getrefnames_result *result) +{ + PyObject *refnames = NULL; + size_t i = 0; + + if(result == NULL) { + return NULL; + } + + refnames = PyList_New(result->count); + for(i = 0; i < result->count; i++) { + PyObject *str = PyString_FromString(result->names[i]); + PyList_SetItem(refnames, i, str); + } + + return refnames; +} + +static PyObject *conv_repeat_expand_result(struct ht2_repeat_expand_result *result) +{ + PyObject *positions = NULL; + size_t i = 0; + + if(result == NULL) { + return NULL; + } + + positions = PyList_New(result->count); + for(i = 0; i < result->count; i++) { + struct ht2_position *htpos = &result->positions[i]; + + PyList_SetItem(positions, i, + Py_BuildValue("(III)", htpos->chr_id, htpos->direction, htpos->pos) + ); + } + + return positions; +} + + +static PyObject *conv_ht2opt(ht2_option_t *opts) +{ + PyObject *py_opt = NULL; + + py_opt = PyDict_New(); + if(py_opt == NULL) { + return NULL; + } +#define HT2_OPT_BUILD(_pobj, _popt, _name, _type) \ + do {\ + if(PyDict_SetItemString((_pobj), #_name, Py_BuildValue((_type), (_popt)->_name)) < 0) {\ + DEBUGLOG("Can't set item %s\n", #_name);\ + } \ + } while(0) + + + HT2_OPT_BUILD(py_opt, opts, offRate, "i"); + HT2_OPT_BUILD(py_opt, opts, useMm, "i"); + HT2_OPT_BUILD(py_opt, opts, useShmem, "i"); + HT2_OPT_BUILD(py_opt, opts, mmSweep, "i"); + HT2_OPT_BUILD(py_opt, opts, noRefNames, "i"); + HT2_OPT_BUILD(py_opt, opts, noSplicedAlignment, "i"); + HT2_OPT_BUILD(py_opt, opts, gVerbose, "i"); + HT2_OPT_BUILD(py_opt, opts, startVerbose, "i"); + HT2_OPT_BUILD(py_opt, opts, sanityCheck, "i"); + HT2_OPT_BUILD(py_opt, opts, useHaplotype, "i"); + + return py_opt; +} + +static void update_ht2_options(ht2_option_t *ht2opt, PyObject *py_opt) +{ +#define HT2_OPT_UPDATE(_pobj, _ht2opt, _name) \ + do {\ + PyObject *p;\ + if((p = PyDict_GetItemString((_pobj), #_name)) != NULL) { \ + (_ht2opt)->_name = PyInt_AsLong(p); \ + DEBUGLOG(#_name " %d\n", (ht2opt)->_name); \ + if(PyErr_Occurred() != NULL) { \ + DEBUGLOG("Error Occurred"); \ + }\ + }\ + } while (0) + + HT2_OPT_UPDATE(py_opt, ht2opt, offRate); + HT2_OPT_UPDATE(py_opt, ht2opt, useMm); + HT2_OPT_UPDATE(py_opt, ht2opt, mmSweep); + HT2_OPT_UPDATE(py_opt, ht2opt, noRefNames); + HT2_OPT_UPDATE(py_opt, ht2opt, noSplicedAlignment); + HT2_OPT_UPDATE(py_opt, ht2opt, gVerbose); + HT2_OPT_UPDATE(py_opt, ht2opt, startVerbose); + HT2_OPT_UPDATE(py_opt, ht2opt, sanityCheck); + HT2_OPT_UPDATE(py_opt, ht2opt, useHaplotype); + +} + +static PyObject *ht2py_get_options(PyObject *self, PyObject *args) +{ + ht2_option_t ht2opt; + + ht2_init_options(&ht2opt); + + + /* convert ht2_option_t to PyObject(map) */ + + PyObject *pobj = conv_ht2opt(&ht2opt); + + return pobj; +} + +static PyObject *ht2py_init(PyObject *self, PyObject *args) +{ + ht2_handle_t handle; + PyObject *popt = NULL; + char *name = NULL; + + if(!PyArg_ParseTuple(args, "sO", &name, &popt)) { + return NULL; + } + + DEBUGLOG("name %s\n", name); + DEBUGLOG("popt %p\n", popt); + + if(!PyDict_CheckExact(popt)) { + // TODO + // exception + DEBUGLOG("Invalid data type\n"); + return NULL; + } + + ht2_option_t ht2opt; + ht2_init_options(&ht2opt); + update_ht2_options(&ht2opt, popt); + + handle = ht2_init(name, &ht2opt); + + DEBUGLOG("handle %p\n", handle); + + PyObject *cap = PyCapsule_New(handle, HT2_HANDLE_ID, NULL); + + return cap; +} + +static PyObject *ht2py_close(PyObject *self, PyObject *args) +{ + ht2_handle_t handle; + PyObject *cap; + + // Parse Args + // ht2py.close(handle) + // + if(!PyArg_ParseTuple(args, "O", &cap)) { + DEBUGLOG("Can't parse args\n"); + return NULL; + } + + handle = get_handle(cap); + if(handle == NULL) { + DEBUGLOG("Can't get handle\n"); + return NULL; + } + + DEBUGLOG("handle %p\n", handle); + + ht2_close(handle); + + Py_RETURN_NONE; +} + + +static PyObject *ht2py_index_getrefnamebyid(PyObject *self, PyObject *args) +{ + PyObject *cap; + uint32_t chr_id; + + // ht2py.index_getrefnamebyid(handle, chr_id) + + if(!PyArg_ParseTuple(args, "Oi", &cap, &chr_id)) { + DEBUGLOG("Can't parse args\n"); + return NULL; + } + + ht2_handle_t handle = get_handle(cap); + if(handle == NULL) { + DEBUGLOG("Can't get handle\n"); + return NULL; + } + + const char *refname = ht2_index_getrefnamebyid(handle, chr_id); + + if(refname == NULL) { + DEBUGLOG("Can't get refname(%u)\n", chr_id); + return Py_BuildValue("s", ""); + } + + return Py_BuildValue("s", refname); +} + +static PyObject *ht2py_index_getrefnames(PyObject *self, PyObject *args) +{ + ht2_handle_t handle; + PyObject *cap; + + // Parse Args + // ht2py.index_getrefnames(handle) + if(!PyArg_ParseTuple(args, "O", &cap)) { + DEBUGLOG("Can't parse args\n"); + return NULL; + } + + handle = get_handle(cap); + if(handle == NULL) { + DEBUGLOG("Can't get handle\n"); + return NULL; + } + + + struct ht2_index_getrefnames_result *result = NULL; + ht2_error_t ret = ht2_index_getrefnames(handle, &result); + + PyObject *refnames = NULL; + + if(ret == HT2_OK) { + /* Build List of names */ + refnames = conv_refnames_result(result); + free(result); + } else { + refnames = PyList_New(0); + } + + return refnames; +} + +static PyObject *ht2py_repeat_expand(PyObject *self, PyObject *args) +{ + PyObject *cap; + char *name = NULL; + uint64_t rpos = 0; + uint64_t rlen = 0; + + // Parse Args + // ht2py.repeat_expand(handle, 'repeat_name', repeat_pos, repeat_len) + if(!PyArg_ParseTuple(args, "OsLL", &cap, &name, &rpos, &rlen)) { + DEBUGLOG("Can't parse args\n"); + return NULL; + } + + //fprintf(stderr, "%s, %lu, %lu\n", name, rpos, rlen); + + ht2_handle_t handle = get_handle(cap); + if(handle == NULL) { + DEBUGLOG("Can't get handle\n"); + return NULL; + } + + + struct ht2_repeat_expand_result *result = NULL; + ht2_error_t ret = ht2_repeat_expand(handle, name, rpos, rlen, &result); + + PyObject *positions = NULL; + if(ret == HT2_OK) { + /* Build list of position */ + positions = conv_repeat_expand_result(result); + free(result); + } else { + DEBUGLOG("error %d, %s, %lu, %lu\n", ret, + name, rpos, rlen); + positions = PyList_New(0); + } + + return positions; +} + + +static PyMethodDef myMethods[] = { + /* Initialize APIs */ + {"get_options", ht2py_get_options, METH_NOARGS, "Get default options"}, + {"init", ht2py_init, METH_VARARGS, "Initialize HT2Lib handle"}, + {"close", ht2py_close, METH_VARARGS, "Release HT2Lib handle"}, + + /* Index APIs */ + {"index_getrefnamebyid", ht2py_index_getrefnamebyid, METH_VARARGS, "Get reference name"}, + {"index_getrefnames", ht2py_index_getrefnames, METH_VARARGS, "Get all reference names"}, + + /* Repeat APIs */ + {"repeat_expand", ht2py_repeat_expand, METH_VARARGS, "Find reference positions"}, + + /* */ + {NULL, NULL, 0, NULL} +}; + + +PyMODINIT_FUNC +initht2py(void) +{ + (void)Py_InitModule("ht2py", myMethods); +} diff --git a/hisat2lib/pymodule/setup.py b/hisat2lib/pymodule/setup.py new file mode 100644 index 0000000..5ce42d8 --- /dev/null +++ b/hisat2lib/pymodule/setup.py @@ -0,0 +1,33 @@ +#!/usr/bin/python + +# +# Copyright 2018, Chanhee Park and Daehwan Kim +# +# This file is part of HISAT 2. +# +# HISAT 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# HISAT 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HISAT 2. If not, see . + + +from distutils.core import setup, Extension + +module1 = Extension('ht2py', +# define_macros = [('DEBUG', '1')], + include_dirs=['../'], + libraries=['stdc++'], + extra_objects = ['../../libhisat2lib.a'], + sources = ['ht2module.c']) + +setup(name = 'ht2py', + version = '1.0', + ext_modules = [module1]) diff --git a/hisat_3n_table.cpp b/hisat_3n_table.cpp new file mode 100644 index 0000000..0168af2 --- /dev/null +++ b/hisat_3n_table.cpp @@ -0,0 +1,382 @@ +/* + * Copyright 2020, Yun (Leo) Zhang + * + * This file is part of HISAT-3N. + * + * HISAT-3N is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT-3N is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT-3N. If not, see . + */ + + +#include +#include +#include "position_3n_table.h" + +using namespace std; + +string alignmentFileName; +bool standardInMode = false; +string refFileName; +string outputFileName; +bool uniqueOnly = false; +bool multipleOnly = false; +bool CG_only = false; +int nThreads = 1; +long long int loadingBlockSize = 1000000; +char convertFrom = '0'; +char convertTo = '0'; +char convertFromComplement; +char convertToComplement; +bool addedChrName = false; +bool removedChrName = false; + + +Positions* positions; + +bool fileExist (string& filename) { + ifstream file(filename); + return file.good(); +} + +enum { + ARG_ADDED_CHRNAME = 256, + ARG_REMOVED_CHRNAME +}; + +static const char *short_options = "s:r:t:b:umcp:h"; +static struct option long_options[] { + {"alignments", required_argument, 0, 'a'}, + {"ref", required_argument, 0, 'r'}, + {"output-name", required_argument, 0, 'o'}, + {"base-change", required_argument, 0, 'b'}, + {"unique-only", no_argument, 0, 'u'}, + {"multiple-only", no_argument, 0, 'm'}, + {"CG-only", no_argument, 0, 'c'}, + {"threads", required_argument, 0, 'p'}, + {"added-chrname", no_argument, 0, ARG_ADDED_CHRNAME }, + {"removed-chrname", no_argument, 0, ARG_REMOVED_CHRNAME }, + {"help", no_argument, 0, 'h'}, + {0, 0, 0, 0} + }; + +static void printHelp(ostream& out) { + out << "hisat-3n-table developed by Yun (Leo) Zhang" << endl; + out << "Usage:" << endl + << "hisat-3n-table [options]* --alignments --ref --output-name --base-change " << endl + << " SORTED SAM filename. Please enter '-' for standard input." << endl + << " reference file (should be FASTA format)." << endl + << " file name to save the 3n table (tsv format). By default, alignments are written to the “standard out†or “stdout†filehandle (i.e. the console)." << endl + << " the char1 is the nucleotide converted from, the char2 is the nucleotide converted to." << endl; + out << "Options (defaults in parentheses):" << endl + << " Input:" << endl + << " -u/--unique-only only count the base which is in unique mapped reads." << endl + << " -m/--multiple-only only count the base which is in multiple mapped reads." << endl + << " -c/--CG-only only count CG and ignore CH in reference." << endl + << " --added-chrname please add this option if you use --add-chrname during HISAT-3N alignment." << endl + << " --removed-chrname please add this option if you use --remove-chrname during HISAT-3N alignment." << endl + << " -p/--threads number of threads to launch (1)." << endl + << " -h/--help print this usage message." << endl; +} + +static void parseOption(int next_option, const char *optarg) { + switch (next_option) { + case 'a': { + alignmentFileName = optarg; + if (alignmentFileName == "-") { + standardInMode = true; + break; + } + if (!fileExist(alignmentFileName)) { + cerr << "The alignment file is not exist." << endl; + throw (1); + } + break; + } + case 'r': { + refFileName = optarg; + if (!fileExist(refFileName)) { + cerr << "reference (FASTA) file is not exist." << endl; + throw (1); + } + break; + } + case 'o': + outputFileName = optarg; + break; + case 'b': { + string arg = optarg; + if (arg.size() != 3 || arg[1] != ',') { + cerr << "Error: expected 2 comma-separated " + << "arguments to --base-change option (e.g. C,T), got " << arg << endl; + throw 1; + } + convertFrom = toupper(arg.front()); + convertTo = toupper(arg.back()); + break; + } + case 'u':{ + uniqueOnly = true; + break; + } + case 'm': { + multipleOnly = true; + break; + } + case 'c': { + CG_only = true; + break; + } + case 'h': { + printHelp(cerr); + throw 0; + } + case 'p': { + nThreads = stoi(optarg); + if (nThreads < 1) { + nThreads = 1; + } + break; + } + case ARG_ADDED_CHRNAME: { + addedChrName = true; + break; + } + case ARG_REMOVED_CHRNAME: { + removedChrName = true; + break; + } + default: + printHelp(cerr); + throw 1; + } +} + +static void parseOptions(int argc, const char **argv) { + int option_index = 0; + int next_option; + while (true) { + next_option = getopt_long(argc, const_cast(argv), short_options, + long_options, &option_index); + if (next_option == -1) + break; + parseOption(next_option, optarg); + } + + // check filenames + if (refFileName.empty() || alignmentFileName.empty()) { + cerr << "No reference or SAM file specified!" << endl; + printHelp(cerr); + throw 1; + } + + // give a warning for CG-only + if (CG_only) { + if (convertFrom != 'C' || convertTo != 'T') { + cerr << "Warning! You are using CG-only mode. The the --base-change option is set to: C,T" << endl; + convertFrom = 'C'; + convertTo = 'T'; + } + } + + // check if --base-change is empty + if (convertFrom == '0' || convertTo == '0') { + cerr << "the --base-change argument is required." << endl; + throw 1; + } + + if(removedChrName && addedChrName) { + cerr << "Error: --removed-chrname and --added-chrname cannot be used at the same time" << endl; + throw 1; + } + + // set complements + convertFromComplement = asc2dnacomp[convertFrom]; + convertToComplement = asc2dnacomp[convertTo]; +} + +/** + * give a SAM line, extract the chromosome and position information. + * return true if the SAM line is mapped. return false if SAM line is not maped. + */ +bool getSAMChromosomePos(string* line, string& chr, long long int& pos) { + int startPosition = 0; + int endPosition = 0; + int count = 0; + + while ((endPosition = line->find("\t", startPosition)) != string::npos) { + if (count == 2) { + chr = line->substr(startPosition, endPosition - startPosition); + } else if (count == 3) { + pos = stoll(line->substr(startPosition, endPosition - startPosition)); + if (chr == "*") { + return false; + } else { + return true; + } + } + startPosition = endPosition + 1; + count++; + } + return false; +} + +/*void opeInFile(ifstream& f) { + if (alignmentFileName == "-") { + f = cin; + } else { + ifstream alignmentFile; + alignmentFile.open(alignmentFileName, ios_base::in); + return alignmentFile; + } +}*/ + + +int hisat_3n_table() +{ + positions = new Positions(refFileName, nThreads, addedChrName, removedChrName); + + // open #nThreads workers + vector workers; + for (int i = 0; i < nThreads; i++) { + workers.push_back(new thread(&Positions::append, positions, i)); + } + + // open a output thread + thread outputThread; + outputThread = thread(&Positions::outputFunction, positions, outputFileName); + + // main function, initially 2 load loadingBlockSize (2,000,000) bp of reference, set reloadPos to 1 loadingBlockSize, then load SAM data. + // when the samPos larger than the reloadPos load 1 loadingBlockSize bp of reference. + // when the samChromosome is different to current chromosome, finish all sam position and output all. + ifstream inputFile; + istream *alignmentFile = &cin; + if (!standardInMode) { + inputFile.open(alignmentFileName, ios_base::in); + alignmentFile = &inputFile; + } + + string* line; // temporary string to get SAM line. + string samChromosome; // the chromosome name of current SAM line. + long long int samPos; // the position of current SAM line. + long long int reloadPos; // the position in reference that we need to reload. + long long int lastPos = 0; // the position on last SAM line. compare lastPos with samPos to make sure the SAM is sorted. + + while (alignmentFile->good()) { + positions->getFreeStringPointer(line); + if (!getline(*alignmentFile, *line)) { + positions->returnLine(line); + break; + } + + if (line->empty() || line->front() == '@') { + positions->returnLine(line); + continue; + } + // limit the linePool size to save memory + while(positions->linePool.size() > 1000 * nThreads) { + this_thread::sleep_for (std::chrono::microseconds(1)); + } + // if the SAM line is empty or unmapped, get the next SAM line. + if (!getSAMChromosomePos(line, samChromosome, samPos)) { + positions->returnLine(line); + continue; + } + // if the samChromosome is different than current positions' chromosome, finish all SAM line. + // then load a new reference chromosome. + if (samChromosome != positions->chromosome) { + // wait all line is processed + while (!positions->linePool.empty() || positions->outputPositionPool.size() > 100000) { + this_thread::sleep_for (std::chrono::microseconds(1)); + } + positions->appendingFinished(); + positions->moveAllToOutput(); + positions->loadNewChromosome(samChromosome); + reloadPos = loadingBlockSize; + lastPos = 0; + } + // if the samPos is larger than reloadPos, load 1 loadingBlockSize bp in from reference. + while (samPos > reloadPos) { + while (!positions->linePool.empty() || positions->outputPositionPool.size() > 100000) { + this_thread::sleep_for (std::chrono::microseconds(1)); + } + positions->appendingFinished(); + positions->moveBlockToOutput(); + positions->loadMore(); + reloadPos += loadingBlockSize; + } + if (lastPos > samPos) { + cerr << "The input alignment file is not sorted. Please use sorted SAM file as alignment file." << endl; + throw 1; + } + positions->linePool.push(line); + lastPos = samPos; + } + //} + if (!standardInMode) { + inputFile.close(); + } + + + // prepare to close everything. + + // make sure linePool is empty + while (!positions->linePool.empty()) { + this_thread::sleep_for (std::chrono::microseconds(100)); + } + // make sure all workers finished their appending work. + positions->appendingFinished(); + // move all position to outputPool + positions->moveAllToOutput(); + // wait until outputPool is empty + while (!positions->outputPositionPool.empty()) { + this_thread::sleep_for (std::chrono::microseconds(100)); + } + // stop all thread and clean + while(positions->freeLinePool.popFront(line)) { + delete line; + } + positions->working = false; + for (int i = 0; i < nThreads; i++){ + workers[i]->join(); + delete workers[i]; + } + outputThread.join(); + delete positions; + return 0; +} + +int main(int argc, const char** argv) +{ + int ret = 0; + + try { + parseOptions(argc, argv); + ret = hisat_3n_table(); + } catch(std::exception& e) { + cerr << "Error: Encountered exception: '" << e.what() << "'" << endl; + cerr << "Command: "; + for(int i = 0; i < argc; i++) cerr << argv[i] << " "; + cerr << endl; + return 1; + } catch(int e) { + if (e != 0) { + cerr << "Error: Encountered internal HISAT-3N exception (#" << e << ")" << endl; + cerr << "Command: "; + for(int i = 0; i < argc; i++) cerr << argv[i] << " "; + cerr << endl; + } + return e; + } + + return ret; +} diff --git a/hisat_bp.cpp b/hisat_bp.cpp new file mode 100644 index 0000000..a04116c --- /dev/null +++ b/hisat_bp.cpp @@ -0,0 +1,3885 @@ +/* + * Copyright 2014, Daehwan Kim + * + * This file is part of HISAT. + * + * HISAT is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "alphabet.h" +#include "assert_helpers.h" +#include "endian_swap.h" +#include "bt2_idx.h" +#include "bt2_io.h" +#include "bt2_util.h" +#include "hier_idx.h" +#include "formats.h" +#include "sequence_io.h" +#include "tokenize.h" +#include "aln_sink.h" +#include "pat.h" +#include "threading.h" +#include "ds.h" +#include "aligner_metrics.h" +#include "sam.h" +#include "aligner_seed.h" +#include "splice_site.h" +#include "bp_aligner.h" +#include "aligner_seed_policy.h" +#include "aligner_driver.h" +#include "aligner_sw.h" +#include "aligner_sw_driver.h" +#include "aligner_cache.h" +#include "util.h" +#include "pe.h" +#include "simple_func.h" +#include "presets.h" +#include "opts.h" +#include "outq.h" +#include "aligner_seed2.h" + +using namespace std; + +static EList mates1; // mated reads (first mate) +static EList mates2; // mated reads (second mate) +static EList mates12; // mated reads (1st/2nd interleaved in 1 file) +static string adjIdxBase; +bool gColor; // colorspace (not supported) +int gVerbose; // be talkative +static bool startVerbose; // be talkative at startup +int gQuiet; // print nothing but the alignments +static int sanityCheck; // enable expensive sanity checks +static int format; // default read format is FASTQ +static string origString; // reference text, or filename(s) +static int seed; // srandom() seed +static int timing; // whether to report basic timing data +static int metricsIval; // interval between alignment metrics messages (0 = no messages) +static string metricsFile;// output file to put alignment metrics in +static bool metricsStderr;// output file to put alignment metrics in +static bool metricsPerRead; // report a metrics tuple for every read +static bool allHits; // for multihits, report just one +static bool showVersion; // just print version and quit? +static int ipause; // pause before maching? +static uint32_t qUpto; // max # of queries to read +int gTrim5; // amount to trim from 5' end +int gTrim3; // amount to trim from 3' end +static int offRate; // keep default offRate +static bool solexaQuals; // quality strings are solexa quals, not phred, and subtract 64 (not 33) +static bool phred64Quals; // quality chars are phred, but must subtract 64 (not 33) +static bool integerQuals; // quality strings are space-separated strings of integers, not ASCII +static int nthreads; // number of pthreads operating concurrently +static int outType; // style of output +static bool noRefNames; // true -> print reference indexes; not names +static uint32_t khits; // number of hits per read; >1 is much slower +static uint32_t mhits; // don't report any hits if there are > mhits +static int partitionSz; // output a partitioning key in first field +static bool useSpinlock; // false -> don't use of spinlocks even if they're #defines +static bool fileParallel; // separate threads read separate input files in parallel +static bool useShmem; // use shared memory to hold the index +static bool useMm; // use memory-mapped files to hold the index +static bool mmSweep; // sweep through memory-mapped files immediately after mapping +int gMinInsert; // minimum insert size +int gMaxInsert; // maximum insert size +bool gMate1fw; // -1 mate aligns in fw orientation on fw strand +bool gMate2fw; // -2 mate aligns in rc orientation on fw strand +bool gFlippedMatesOK; // allow mates to be in wrong order +bool gDovetailMatesOK; // allow one mate to extend off the end of the other +bool gContainMatesOK; // allow one mate to contain the other in PE alignment +bool gOlapMatesOK; // allow mates to overlap in PE alignment +bool gExpandToFrag; // incr max frag length to =larger mate len if necessary +bool gReportDiscordant; // find and report discordant paired-end alignments +bool gReportMixed; // find and report unpaired alignments for paired reads +static uint32_t cacheLimit; // ranges w/ size > limit will be cached +static uint32_t cacheSize; // # words per range cache +static uint32_t skipReads; // # reads/read pairs to skip +bool gNofw; // don't align fw orientation of read +bool gNorc; // don't align rc orientation of read +static uint32_t fastaContLen; +static uint32_t fastaContFreq; +static bool hadoopOut; // print Hadoop status and summary messages +static bool fuzzy; +static bool fullRef; +static bool samTruncQname; // whether to truncate QNAME to 255 chars +static bool samOmitSecSeqQual; // omit SEQ/QUAL for 2ndary alignments? +static bool samNoUnal; // don't print records for unaligned reads +static bool samNoHead; // don't print any header lines in SAM output +static bool samNoSQ; // don't print @SQ header lines +static bool sam_print_as; +static bool sam_print_xs; // XS:i +static bool sam_print_xss; // Xs:i and Ys:i +static bool sam_print_yn; // YN:i and Yn:i +static bool sam_print_xn; +static bool sam_print_cs; +static bool sam_print_cq; +static bool sam_print_x0; +static bool sam_print_x1; +static bool sam_print_xm; +static bool sam_print_xo; +static bool sam_print_xg; +static bool sam_print_nm; +static bool sam_print_md; +static bool sam_print_yf; +static bool sam_print_yi; +static bool sam_print_ym; +static bool sam_print_yp; +static bool sam_print_yt; +static bool sam_print_ys; +static bool sam_print_zs; +static bool sam_print_xr; +static bool sam_print_xt; +static bool sam_print_xd; +static bool sam_print_xu; +static bool sam_print_yl; +static bool sam_print_ye; +static bool sam_print_yu; +static bool sam_print_xp; +static bool sam_print_yr; +static bool sam_print_zb; +static bool sam_print_zr; +static bool sam_print_zf; +static bool sam_print_zm; +static bool sam_print_zi; +static bool sam_print_zp; +static bool sam_print_zu; +static bool sam_print_xs_a; +static bool bwaSwLike; +static float bwaSwLikeC; +static float bwaSwLikeT; +static bool qcFilter; +static bool sortByScore; // prioritize alignments to report by score? +bool gReportOverhangs; // false -> filter out alignments that fall off the end of a reference sequence +static string rgid; // ID: setting for @RG header line +static string rgs; // SAM outputs for @RG header line +static string rgs_optflag; // SAM optional flag to add corresponding to @RG ID +static bool msample; // whether to report a random alignment when maxed-out via -m/-M +int gGapBarrier; // # diags on top/bot only to be entered diagonally +static EList qualities; +static EList qualities1; +static EList qualities2; +static string polstr; // temporary holder for policy string +static bool msNoCache; // true -> disable local cache +static int bonusMatchType; // how to reward matches +static int bonusMatch; // constant reward if bonusMatchType=constant +static int penMmcType; // how to penalize mismatches +static int penMmcMax; // max mm penalty +static int penMmcMin; // min mm penalty +static int penNType; // how to penalize Ns in the read +static int penN; // constant if N pelanty is a constant +static bool penNCatPair; // concatenate mates before N filtering? +static bool localAlign; // do local alignment in DP steps +static bool noisyHpolymer; // set to true if gap penalties should be reduced to be consistent with a sequencer that under- and overcalls homopolymers +static int penRdGapConst; // constant cost of extending a gap in the read +static int penRfGapConst; // constant cost of extending a gap in the reference +static int penRdGapLinear; // coeff of linear term for cost of gap extension in read +static int penRfGapLinear; // coeff of linear term for cost of gap extension in ref +static SimpleFunc scoreMin; // minimum valid score as function of read len +static SimpleFunc nCeil; // max # Ns allowed as function of read len +static SimpleFunc msIval; // interval between seeds as function of read len +static double descConsExp; // how to adjust score minimum as we descent further into index-assisted alignment +static size_t descentLanding; // don't place a search root if it's within this many positions of end +static SimpleFunc descentTotSz; // maximum space a DescentDriver can use in bytes +static SimpleFunc descentTotFmops; // maximum # FM ops a DescentDriver can perform +static int multiseedMms; // mismatches permitted in a multiseed seed +static int multiseedLen; // length of multiseed seeds +static size_t multiseedOff; // offset to begin extracting seeds +static uint32_t seedCacheLocalMB; // # MB to use for non-shared seed alignment cacheing +static uint32_t seedCacheCurrentMB; // # MB to use for current-read seed hit cacheing +static uint32_t exactCacheCurrentMB; // # MB to use for current-read seed hit cacheing +static size_t maxhalf; // max width on one side of DP table +static bool seedSumm; // print summary information about seed hits, not alignments +static bool doUngapped; // do ungapped alignment +static size_t maxIters; // stop after this many extend loop iterations +static size_t maxUg; // stop after this many ungap extends +static size_t maxDp; // stop after this many DPs +static size_t maxItersIncr; // amt to add to maxIters for each -k > 1 +static size_t maxEeStreak; // stop after this many end-to-end fails in a row +static size_t maxUgStreak; // stop after this many ungap fails in a row +static size_t maxDpStreak; // stop after this many dp fails in a row +static size_t maxStreakIncr; // amt to add to streak for each -k > 1 +static size_t maxMateStreak; // stop seed range after this many mate-find fails +static bool doExtend; // extend seed hits +static bool enable8; // use 8-bit SSE where possible? +static size_t cminlen; // longer reads use checkpointing +static size_t cpow2; // checkpoint interval log2 +static bool doTri; // do triangular mini-fills? +static string defaultPreset; // default preset; applied immediately +static bool ignoreQuals; // all mms incur same penalty, regardless of qual +static string wrapper; // type of wrapper script, so we can print correct usage +static EList queries; // list of query files +static string outfile; // write SAM output to this file +static int mapqv; // MAPQ calculation version +static int tighten; // -M tighten mode (0=none, 1=best, 2=secbest+1) +static bool doExactUpFront; // do exact search up front if seeds seem good enough +static bool do1mmUpFront; // do 1mm search up front if seeds seem good enough +static size_t do1mmMinLen; // length below which we disable 1mm e2e search +static int seedBoostThresh; // if average non-zero position has more than this many elements +static size_t nSeedRounds; // # seed rounds +static bool reorder; // true -> reorder SAM recs in -p mode +static float sampleFrac; // only align random fraction of input reads +static bool arbitraryRandom; // pseudo-randoms no longer a function of read properties +static bool bowtie2p5; +static bool useTempSpliceSite; +static int penCanSplice; +static int penNoncanSplice; +static SimpleFunc penIntronLen; +static string knownSpliceSiteInfile; // +static string novelSpliceSiteInfile; // +static string novelSpliceSiteOutfile; // +static bool no_spliced_alignment; +static int rna_strandness; // + +static string bt2index; // read Bowtie 2 index from files with this prefix +static EList > extra_opts; +static size_t extra_opts_cur; + +static EList thread_rids; +static MUTEX_T thread_rids_mutex; +static uint64_t thread_rids_mindist; + +#define DMAX std::numeric_limits::max() + +static void resetOptions() { + mates1.clear(); + mates2.clear(); + mates12.clear(); + adjIdxBase = ""; + gColor = false; + gVerbose = 0; + startVerbose = 0; + gQuiet = false; + sanityCheck = 0; // enable expensive sanity checks + format = FASTQ; // default read format is FASTQ + origString = ""; // reference text, or filename(s) + seed = 0; // srandom() seed + timing = 0; // whether to report basic timing data + metricsIval = 1; // interval between alignment metrics messages (0 = no messages) + metricsFile = ""; // output file to put alignment metrics in + metricsStderr = false; // print metrics to stderr (in addition to --metrics-file if it's specified + metricsPerRead = false; // report a metrics tuple for every read? + allHits = false; // for multihits, report just one + showVersion = false; // just print version and quit? + ipause = 0; // pause before maching? + qUpto = 0xffffffff; // max # of queries to read + gTrim5 = 0; // amount to trim from 5' end + gTrim3 = 0; // amount to trim from 3' end + offRate = -1; // keep default offRate + solexaQuals = false; // quality strings are solexa quals, not phred, and subtract 64 (not 33) + phred64Quals = false; // quality chars are phred, but must subtract 64 (not 33) + integerQuals = false; // quality strings are space-separated strings of integers, not ASCII + nthreads = 1; // number of pthreads operating concurrently + outType = OUTPUT_SAM; // style of output + noRefNames = false; // true -> print reference indexes; not names + khits = 5; // number of hits per read; >1 is much slower + mhits = 0; // stop after finding this many alignments+1 + partitionSz = 0; // output a partitioning key in first field + useSpinlock = true; // false -> don't use of spinlocks even if they're #defines + fileParallel = false; // separate threads read separate input files in parallel + useShmem = false; // use shared memory to hold the index + useMm = false; // use memory-mapped files to hold the index + mmSweep = false; // sweep through memory-mapped files immediately after mapping + gMinInsert = 0; // minimum insert size + gMaxInsert = 500; // maximum insert size + gMate1fw = true; // -1 mate aligns in fw orientation on fw strand + gMate2fw = false; // -2 mate aligns in rc orientation on fw strand + gFlippedMatesOK = false; // allow mates to be in wrong order + gDovetailMatesOK = false; // allow one mate to extend off the end of the other + gContainMatesOK = true; // allow one mate to contain the other in PE alignment + gOlapMatesOK = true; // allow mates to overlap in PE alignment + gExpandToFrag = true; // incr max frag length to =larger mate len if necessary + gReportDiscordant = true; // find and report discordant paired-end alignments + gReportMixed = true; // find and report unpaired alignments for paired reads + + cacheLimit = 5; // ranges w/ size > limit will be cached + cacheSize = 0; // # words per range cache + skipReads = 0; // # reads/read pairs to skip + gNofw = false; // don't align fw orientation of read + gNorc = false; // don't align rc orientation of read + fastaContLen = 0; + fastaContFreq = 0; + hadoopOut = false; // print Hadoop status and summary messages + fuzzy = false; // reads will have alternate basecalls w/ qualities + fullRef = false; // print entire reference name instead of just up to 1st space + samTruncQname = true; // whether to truncate QNAME to 255 chars + samOmitSecSeqQual = false; // omit SEQ/QUAL for 2ndary alignments? + samNoUnal = false; // omit SAM records for unaligned reads + samNoHead = false; // don't print any header lines in SAM output + samNoSQ = false; // don't print @SQ header lines + sam_print_as = true; + sam_print_xs = true; + sam_print_xss = false; // Xs:i and Ys:i + sam_print_yn = false; // YN:i and Yn:i + sam_print_xn = true; + sam_print_cs = false; + sam_print_cq = false; + sam_print_x0 = true; + sam_print_x1 = true; + sam_print_xm = true; + sam_print_xo = true; + sam_print_xg = true; + sam_print_nm = true; + sam_print_md = true; + sam_print_yf = true; + sam_print_yi = false; + sam_print_ym = false; + sam_print_yp = false; + sam_print_yt = true; + sam_print_ys = true; + sam_print_zs = false; + sam_print_xr = false; + sam_print_xt = false; + sam_print_xd = false; + sam_print_xu = false; + sam_print_yl = false; + sam_print_ye = false; + sam_print_yu = false; + sam_print_xp = false; + sam_print_yr = false; + sam_print_zb = false; + sam_print_zr = false; + sam_print_zf = false; + sam_print_zm = false; + sam_print_zi = false; + sam_print_zp = false; + sam_print_zu = false; + sam_print_xs_a = true; + bwaSwLike = false; + bwaSwLikeC = 5.5f; + bwaSwLikeT = 20.0f; + qcFilter = false; // don't believe upstream qc by default + sortByScore = true; // prioritize alignments to report by score? + rgid = ""; // SAM outputs for @RG header line + rgs = ""; // SAM outputs for @RG header line + rgs_optflag = ""; // SAM optional flag to add corresponding to @RG ID + msample = true; + gGapBarrier = 4; // disallow gaps within this many chars of either end of alignment + qualities.clear(); + qualities1.clear(); + qualities2.clear(); + polstr.clear(); + msNoCache = true; // true -> disable local cache + bonusMatchType = DEFAULT_MATCH_BONUS_TYPE; + bonusMatch = DEFAULT_MATCH_BONUS; + penMmcType = DEFAULT_MM_PENALTY_TYPE; + penMmcMax = DEFAULT_MM_PENALTY_MAX; + penMmcMin = DEFAULT_MM_PENALTY_MIN; + penNType = DEFAULT_N_PENALTY_TYPE; + penN = DEFAULT_N_PENALTY; + penNCatPair = DEFAULT_N_CAT_PAIR; // concatenate mates before N filtering? + localAlign = false; // do local alignment in DP steps + noisyHpolymer = false; + penRdGapConst = DEFAULT_READ_GAP_CONST; + penRfGapConst = DEFAULT_REF_GAP_CONST; + penRdGapLinear = DEFAULT_READ_GAP_LINEAR; + penRfGapLinear = DEFAULT_REF_GAP_LINEAR; + // scoreMin.init (SIMPLE_FUNC_LINEAR, DEFAULT_MIN_CONST, DEFAULT_MIN_LINEAR); + scoreMin.init (SIMPLE_FUNC_CONST, -18, 0); + nCeil.init (SIMPLE_FUNC_LINEAR, 0.0f, DMAX, 2.0f, 0.1f); + msIval.init (SIMPLE_FUNC_LINEAR, 1.0f, DMAX, DEFAULT_IVAL_B, DEFAULT_IVAL_A); + descConsExp = 2.0; + descentLanding = 20; + descentTotSz.init(SIMPLE_FUNC_LINEAR, 1024.0, DMAX, 0.0, 1024.0); + descentTotFmops.init(SIMPLE_FUNC_LINEAR, 100.0, DMAX, 0.0, 10.0); + multiseedMms = DEFAULT_SEEDMMS; + multiseedLen = DEFAULT_SEEDLEN; + multiseedOff = 0; + seedCacheLocalMB = 32; // # MB to use for non-shared seed alignment cacheing + seedCacheCurrentMB = 20; // # MB to use for current-read seed hit cacheing + exactCacheCurrentMB = 20; // # MB to use for current-read seed hit cacheing + maxhalf = 15; // max width on one side of DP table + seedSumm = false; // print summary information about seed hits, not alignments + doUngapped = true; // do ungapped alignment + maxIters = 400; // max iterations of extend loop + maxUg = 300; // stop after this many ungap extends + maxDp = 300; // stop after this many dp extends + maxItersIncr = 20; // amt to add to maxIters for each -k > 1 + maxEeStreak = 15; // stop after this many end-to-end fails in a row + maxUgStreak = 15; // stop after this many ungap fails in a row + maxDpStreak = 15; // stop after this many dp fails in a row + maxStreakIncr = 10; // amt to add to streak for each -k > 1 + maxMateStreak = 10; // in PE: abort seed range after N mate-find fails + doExtend = true; // do seed extensions + enable8 = true; // use 8-bit SSE where possible? + cminlen = 2000; // longer reads use checkpointing + cpow2 = 4; // checkpoint interval log2 + doTri = false; // do triangular mini-fills? + defaultPreset = "sensitive%LOCAL%"; // default preset; applied immediately + extra_opts.clear(); + extra_opts_cur = 0; + bt2index.clear(); // read Bowtie 2 index from files with this prefix + ignoreQuals = false; // all mms incur same penalty, regardless of qual + wrapper.clear(); // type of wrapper script, so we can print correct usage + queries.clear(); // list of query files + outfile.clear(); // write SAM output to this file + mapqv = 2; // MAPQ calculation version + tighten = 3; // -M tightening mode + doExactUpFront = true; // do exact search up front if seeds seem good enough + do1mmUpFront = true; // do 1mm search up front if seeds seem good enough + seedBoostThresh = 300; // if average non-zero position has more than this many elements + nSeedRounds = 2; // # rounds of seed searches to do for repetitive reads + do1mmMinLen = 60; // length below which we disable 1mm search + reorder = false; // reorder SAM records with -p > 1 + sampleFrac = 1.1f; // align all reads + arbitraryRandom = false; // let pseudo-random seeds be a function of read properties + bowtie2p5 = false; + useTempSpliceSite = true; + penCanSplice = 0; + penNoncanSplice = 3; + penIntronLen.init(SIMPLE_FUNC_LOG, -8, 1); + knownSpliceSiteInfile = ""; + novelSpliceSiteInfile = ""; + novelSpliceSiteOutfile = ""; + no_spliced_alignment = false; + rna_strandness = RNA_STRANDNESS_UNKNOWN; +} + +static const char *short_options = "fF:qbzhcu:rv:s:aP:t3:5:w:p:k:M:1:2:I:X:CQ:N:i:L:U:x:S:g:O:D:R:"; + +static struct option long_options[] = { + {(char*)"verbose", no_argument, 0, ARG_VERBOSE}, + {(char*)"startverbose", no_argument, 0, ARG_STARTVERBOSE}, + {(char*)"quiet", no_argument, 0, ARG_QUIET}, + {(char*)"sanity", no_argument, 0, ARG_SANITY}, + {(char*)"pause", no_argument, &ipause, 1}, + {(char*)"orig", required_argument, 0, ARG_ORIG}, + {(char*)"all", no_argument, 0, 'a'}, + {(char*)"solexa-quals", no_argument, 0, ARG_SOLEXA_QUALS}, + {(char*)"integer-quals",no_argument, 0, ARG_INTEGER_QUALS}, + {(char*)"int-quals", no_argument, 0, ARG_INTEGER_QUALS}, + {(char*)"metrics", required_argument, 0, ARG_METRIC_IVAL}, + {(char*)"metrics-file", required_argument, 0, ARG_METRIC_FILE}, + {(char*)"metrics-stderr",no_argument, 0, ARG_METRIC_STDERR}, + {(char*)"metrics-per-read", no_argument, 0, ARG_METRIC_PER_READ}, + {(char*)"met-read", no_argument, 0, ARG_METRIC_PER_READ}, + {(char*)"met", required_argument, 0, ARG_METRIC_IVAL}, + {(char*)"met-file", required_argument, 0, ARG_METRIC_FILE}, + {(char*)"met-stderr", no_argument, 0, ARG_METRIC_STDERR}, + {(char*)"time", no_argument, 0, 't'}, + {(char*)"trim3", required_argument, 0, '3'}, + {(char*)"trim5", required_argument, 0, '5'}, + {(char*)"seed", required_argument, 0, ARG_SEED}, + {(char*)"qupto", required_argument, 0, 'u'}, + {(char*)"upto", required_argument, 0, 'u'}, + {(char*)"version", no_argument, 0, ARG_VERSION}, + {(char*)"filepar", no_argument, 0, ARG_FILEPAR}, + {(char*)"help", no_argument, 0, 'h'}, + {(char*)"threads", required_argument, 0, 'p'}, + {(char*)"khits", required_argument, 0, 'k'}, + {(char*)"minins", required_argument, 0, 'I'}, + {(char*)"maxins", required_argument, 0, 'X'}, + {(char*)"quals", required_argument, 0, 'Q'}, + {(char*)"Q1", required_argument, 0, ARG_QUALS1}, + {(char*)"Q2", required_argument, 0, ARG_QUALS2}, + {(char*)"refidx", no_argument, 0, ARG_REFIDX}, + {(char*)"partition", required_argument, 0, ARG_PARTITION}, + {(char*)"ff", no_argument, 0, ARG_FF}, + {(char*)"fr", no_argument, 0, ARG_FR}, + {(char*)"rf", no_argument, 0, ARG_RF}, + {(char*)"cachelim", required_argument, 0, ARG_CACHE_LIM}, + {(char*)"cachesz", required_argument, 0, ARG_CACHE_SZ}, + {(char*)"nofw", no_argument, 0, ARG_NO_FW}, + {(char*)"norc", no_argument, 0, ARG_NO_RC}, + {(char*)"skip", required_argument, 0, 's'}, + {(char*)"12", required_argument, 0, ARG_ONETWO}, + {(char*)"tab5", required_argument, 0, ARG_TAB5}, + {(char*)"tab6", required_argument, 0, ARG_TAB6}, + {(char*)"phred33-quals", no_argument, 0, ARG_PHRED33}, + {(char*)"phred64-quals", no_argument, 0, ARG_PHRED64}, + {(char*)"phred33", no_argument, 0, ARG_PHRED33}, + {(char*)"phred64", no_argument, 0, ARG_PHRED64}, + {(char*)"solexa1.3-quals", no_argument, 0, ARG_PHRED64}, + {(char*)"mm", no_argument, 0, ARG_MM}, + {(char*)"shmem", no_argument, 0, ARG_SHMEM}, + {(char*)"mmsweep", no_argument, 0, ARG_MMSWEEP}, + {(char*)"hadoopout", no_argument, 0, ARG_HADOOPOUT}, + {(char*)"fuzzy", no_argument, 0, ARG_FUZZY}, + {(char*)"fullref", no_argument, 0, ARG_FULLREF}, + {(char*)"usage", no_argument, 0, ARG_USAGE}, + {(char*)"sam-no-qname-trunc", no_argument, 0, ARG_SAM_NO_QNAME_TRUNC}, + {(char*)"sam-omit-sec-seq", no_argument, 0, ARG_SAM_OMIT_SEC_SEQ}, + {(char*)"omit-sec-seq", no_argument, 0, ARG_SAM_OMIT_SEC_SEQ}, + {(char*)"sam-no-head", no_argument, 0, ARG_SAM_NOHEAD}, + {(char*)"sam-nohead", no_argument, 0, ARG_SAM_NOHEAD}, + {(char*)"sam-noHD", no_argument, 0, ARG_SAM_NOHEAD}, + {(char*)"sam-no-hd", no_argument, 0, ARG_SAM_NOHEAD}, + {(char*)"sam-nosq", no_argument, 0, ARG_SAM_NOSQ}, + {(char*)"sam-no-sq", no_argument, 0, ARG_SAM_NOSQ}, + {(char*)"sam-noSQ", no_argument, 0, ARG_SAM_NOSQ}, + {(char*)"no-head", no_argument, 0, ARG_SAM_NOHEAD}, + {(char*)"no-hd", no_argument, 0, ARG_SAM_NOHEAD}, + {(char*)"no-sq", no_argument, 0, ARG_SAM_NOSQ}, + {(char*)"no-HD", no_argument, 0, ARG_SAM_NOHEAD}, + {(char*)"no-SQ", no_argument, 0, ARG_SAM_NOSQ}, + {(char*)"no-unal", no_argument, 0, ARG_SAM_NO_UNAL}, + {(char*)"color", no_argument, 0, 'C'}, + {(char*)"sam-RG", required_argument, 0, ARG_SAM_RG}, + {(char*)"sam-rg", required_argument, 0, ARG_SAM_RG}, + {(char*)"sam-rg-id", required_argument, 0, ARG_SAM_RGID}, + {(char*)"RG", required_argument, 0, ARG_SAM_RG}, + {(char*)"rg", required_argument, 0, ARG_SAM_RG}, + {(char*)"rg-id", required_argument, 0, ARG_SAM_RGID}, + {(char*)"snpphred", required_argument, 0, ARG_SNPPHRED}, + {(char*)"snpfrac", required_argument, 0, ARG_SNPFRAC}, + {(char*)"gbar", required_argument, 0, ARG_GAP_BAR}, + {(char*)"qseq", no_argument, 0, ARG_QSEQ}, + {(char*)"policy", required_argument, 0, ARG_ALIGN_POLICY}, + {(char*)"preset", required_argument, 0, 'P'}, + {(char*)"seed-summ", no_argument, 0, ARG_SEED_SUMM}, + {(char*)"seed-summary", no_argument, 0, ARG_SEED_SUMM}, + {(char*)"overhang", no_argument, 0, ARG_OVERHANG}, + {(char*)"no-cache", no_argument, 0, ARG_NO_CACHE}, + {(char*)"cache", no_argument, 0, ARG_USE_CACHE}, + {(char*)"454", no_argument, 0, ARG_NOISY_HPOLY}, + {(char*)"ion-torrent", no_argument, 0, ARG_NOISY_HPOLY}, + {(char*)"no-mixed", no_argument, 0, ARG_NO_MIXED}, + {(char*)"no-discordant",no_argument, 0, ARG_NO_DISCORDANT}, + {(char*)"local", no_argument, 0, ARG_LOCAL}, + {(char*)"end-to-end", no_argument, 0, ARG_END_TO_END}, + {(char*)"ungapped", no_argument, 0, ARG_UNGAPPED}, + {(char*)"no-ungapped", no_argument, 0, ARG_UNGAPPED_NO}, + {(char*)"sse8", no_argument, 0, ARG_SSE8}, + {(char*)"no-sse8", no_argument, 0, ARG_SSE8_NO}, + {(char*)"scan-narrowed",no_argument, 0, ARG_SCAN_NARROWED}, + {(char*)"qc-filter", no_argument, 0, ARG_QC_FILTER}, + {(char*)"bwa-sw-like", no_argument, 0, ARG_BWA_SW_LIKE}, + {(char*)"multiseed", required_argument, 0, ARG_MULTISEED_IVAL}, + {(char*)"ma", required_argument, 0, ARG_SCORE_MA}, + {(char*)"mp", required_argument, 0, ARG_SCORE_MMP}, + {(char*)"np", required_argument, 0, ARG_SCORE_NP}, + {(char*)"rdg", required_argument, 0, ARG_SCORE_RDG}, + {(char*)"rfg", required_argument, 0, ARG_SCORE_RFG}, + {(char*)"score-min", required_argument, 0, ARG_SCORE_MIN}, + {(char*)"min-score", required_argument, 0, ARG_SCORE_MIN}, + {(char*)"n-ceil", required_argument, 0, ARG_N_CEIL}, + {(char*)"dpad", required_argument, 0, ARG_DPAD}, + {(char*)"mapq-print-inputs",no_argument, 0, ARG_SAM_PRINT_YI}, + {(char*)"very-fast", no_argument, 0, ARG_PRESET_VERY_FAST}, + {(char*)"fast", no_argument, 0, ARG_PRESET_FAST}, + {(char*)"sensitive", no_argument, 0, ARG_PRESET_SENSITIVE}, + {(char*)"very-sensitive", no_argument, 0, ARG_PRESET_VERY_SENSITIVE}, + {(char*)"very-fast-local", no_argument, 0, ARG_PRESET_VERY_FAST_LOCAL}, + {(char*)"fast-local", no_argument, 0, ARG_PRESET_FAST_LOCAL}, + {(char*)"sensitive-local", no_argument, 0, ARG_PRESET_SENSITIVE_LOCAL}, + {(char*)"very-sensitive-local", no_argument, 0, ARG_PRESET_VERY_SENSITIVE_LOCAL}, + {(char*)"no-score-priority",no_argument, 0, ARG_NO_SCORE_PRIORITY}, + {(char*)"seedlen", required_argument, 0, 'L'}, + {(char*)"seedmms", required_argument, 0, 'N'}, + {(char*)"seedival", required_argument, 0, 'i'}, + {(char*)"ignore-quals", no_argument, 0, ARG_IGNORE_QUALS}, + {(char*)"index", required_argument, 0, 'x'}, + {(char*)"arg-desc", no_argument, 0, ARG_DESC}, + {(char*)"wrapper", required_argument, 0, ARG_WRAPPER}, + {(char*)"unpaired", required_argument, 0, 'U'}, + {(char*)"output", required_argument, 0, 'S'}, + {(char*)"mapq-v", required_argument, 0, ARG_MAPQ_V}, + {(char*)"dovetail", no_argument, 0, ARG_DOVETAIL}, + {(char*)"no-dovetail", no_argument, 0, ARG_NO_DOVETAIL}, + {(char*)"contain", no_argument, 0, ARG_CONTAIN}, + {(char*)"no-contain", no_argument, 0, ARG_NO_CONTAIN}, + {(char*)"overlap", no_argument, 0, ARG_OVERLAP}, + {(char*)"no-overlap", no_argument, 0, ARG_NO_OVERLAP}, + {(char*)"tighten", required_argument, 0, ARG_TIGHTEN}, + {(char*)"exact-upfront", no_argument, 0, ARG_EXACT_UPFRONT}, + {(char*)"1mm-upfront", no_argument, 0, ARG_1MM_UPFRONT}, + {(char*)"no-exact-upfront", no_argument, 0, ARG_EXACT_UPFRONT_NO}, + {(char*)"no-1mm-upfront", no_argument, 0, ARG_1MM_UPFRONT_NO}, + {(char*)"1mm-minlen", required_argument, 0, ARG_1MM_MINLEN}, + {(char*)"seed-off", required_argument, 0, 'O'}, + {(char*)"seed-boost", required_argument, 0, ARG_SEED_BOOST_THRESH}, + {(char*)"read-times", no_argument, 0, ARG_READ_TIMES}, + {(char*)"show-rand-seed", no_argument, 0, ARG_SHOW_RAND_SEED}, + {(char*)"dp-fail-streak", required_argument, 0, ARG_DP_FAIL_STREAK_THRESH}, + {(char*)"ee-fail-streak", required_argument, 0, ARG_EE_FAIL_STREAK_THRESH}, + {(char*)"ug-fail-streak", required_argument, 0, ARG_UG_FAIL_STREAK_THRESH}, + {(char*)"fail-streak", required_argument, 0, 'D'}, + {(char*)"dp-fails", required_argument, 0, ARG_DP_FAIL_THRESH}, + {(char*)"ug-fails", required_argument, 0, ARG_UG_FAIL_THRESH}, + {(char*)"extends", required_argument, 0, ARG_EXTEND_ITERS}, + {(char*)"no-extend", no_argument, 0, ARG_NO_EXTEND}, + {(char*)"mapq-extra", no_argument, 0, ARG_MAPQ_EX}, + {(char*)"seed-rounds", required_argument, 0, 'R'}, + {(char*)"reorder", no_argument, 0, ARG_REORDER}, + {(char*)"passthrough", no_argument, 0, ARG_READ_PASSTHRU}, + {(char*)"sample", required_argument, 0, ARG_SAMPLE}, + {(char*)"cp-min", required_argument, 0, ARG_CP_MIN}, + {(char*)"cp-ival", required_argument, 0, ARG_CP_IVAL}, + {(char*)"tri", no_argument, 0, ARG_TRI}, + {(char*)"nondeterministic", no_argument, 0, ARG_NON_DETERMINISTIC}, + {(char*)"non-deterministic", no_argument, 0, ARG_NON_DETERMINISTIC}, + {(char*)"local-seed-cache-sz", required_argument, 0, ARG_LOCAL_SEED_CACHE_SZ}, + {(char*)"seed-cache-sz", required_argument, 0, ARG_CURRENT_SEED_CACHE_SZ}, + {(char*)"no-unal", no_argument, 0, ARG_SAM_NO_UNAL}, + {(char*)"test-25", no_argument, 0, ARG_TEST_25}, + // TODO: following should be a function of read length? + {(char*)"desc-kb", required_argument, 0, ARG_DESC_KB}, + {(char*)"desc-landing", required_argument, 0, ARG_DESC_LANDING}, + {(char*)"desc-exp", required_argument, 0, ARG_DESC_EXP}, + {(char*)"desc-fmops", required_argument, 0, ARG_DESC_FMOPS}, + {(char*)"no-temp-splicesite", no_argument, 0, ARG_NO_TEMPSPLICESITE}, + {(char*)"pen-cansplice", required_argument, 0, ARG_PEN_CANSPLICE}, + {(char*)"pen-noncansplice", required_argument, 0, ARG_PEN_NONCANSPLICE}, + {(char*)"pen-intronlen", required_argument, 0, ARG_PEN_INTRONLEN}, + {(char*)"known-splicesite-infile", required_argument, 0, ARG_KNOWN_SPLICESITE_INFILE}, + {(char*)"novel-splicesite-infile", required_argument, 0, ARG_NOVEL_SPLICESITE_INFILE}, + {(char*)"novel-splicesite-outfile", required_argument, 0, ARG_NOVEL_SPLICESITE_OUTFILE}, + {(char*)"no-spliced-alignment", no_argument, 0, ARG_NO_SPLICED_ALIGNMENT}, + {(char*)"rna-strandness", required_argument, 0, ARG_RNA_STRANDNESS}, + {(char*)0, 0, 0, 0} // terminator +}; + +/** + * Print out a concise description of what options are taken and whether they + * take an argument. + */ +static void printArgDesc(ostream& out) { + // struct option { + // const char *name; + // int has_arg; + // int *flag; + // int val; + // }; + size_t i = 0; + while(long_options[i].name != 0) { + out << long_options[i].name << "\t" + << (long_options[i].has_arg == no_argument ? 0 : 1) + << endl; + i++; + } + size_t solen = strlen(short_options); + for(i = 0; i < solen; i++) { + // Has an option? Does if next char is : + if(i == solen-1) { + assert_neq(':', short_options[i]); + cout << (char)short_options[i] << "\t" << 0 << endl; + } else { + if(short_options[i+1] == ':') { + // Option with argument + cout << (char)short_options[i] << "\t" << 1 << endl; + i++; // skip the ':' + } else { + // Option with no argument + cout << (char)short_options[i] << "\t" << 0 << endl; + } + } + } +} + +/** + * Print a summary usage message to the provided output stream. + */ +static void printUsage(ostream& out) { + out << "HISAT version " << string(HISAT_VERSION).c_str() << " by Daehwan Kim (infphilo@gmail.com, www.ccb.jhu.edu/people/infphilo)" << endl; + string tool_name = "hisat2-align"; + if(wrapper == "basic-0") { + tool_name = "hisat"; + } + out << "Usage: " << endl + << " " << tool_name.c_str() << " [options]* -x {-1 -2 | -U } [-S ]" << endl + << endl + << " Index filename prefix (minus trailing .X." << gfm_ext << ")." << endl + << " Files with #1 mates, paired with files in ." << endl; + if(wrapper == "basic-0") { + out << " Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2)." << endl; + } + out << " Files with #2 mates, paired with files in ." << endl; + if(wrapper == "basic-0") { + out << " Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2)." << endl; + } + out << " Files with unpaired reads." << endl; + if(wrapper == "basic-0") { + out << " Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2)." << endl; + } + out << " File for SAM output (default: stdout)" << endl + << endl + << " , , can be comma-separated lists (no whitespace) and can be" << endl + << " specified many times. E.g. '-U file1.fq,file2.fq -U file3.fq'." << endl + // Wrapper script should write line next + << endl + << "Options (defaults in parentheses):" << endl + << endl + << " Input:" << endl + << " -q query input files are FASTQ .fq/.fastq (default)" << endl + << " --qseq query input files are in Illumina's qseq format" << endl + << " -f query input files are (multi-)FASTA .fa/.mfa" << endl + << " -r query input files are raw one-sequence-per-line" << endl + << " -c , , are sequences themselves, not files" << endl + << " -s/--skip skip the first reads/pairs in the input (none)" << endl + << " -u/--upto stop after first reads/pairs (no limit)" << endl + << " -5/--trim5 trim bases from 5'/left end of reads (0)" << endl + << " -3/--trim3 trim bases from 3'/right end of reads (0)" << endl + << " --phred33 qualities are Phred+33 (default)" << endl + << " --phred64 qualities are Phred+64" << endl + << " --int-quals qualities encoded as space-delimited integers" << endl + << endl + << " Presets: Same as:" << endl + << " For --end-to-end:" << endl + << " --very-fast -D 5 -R 1 -N 0 -L 22 -i S,0,2.50" << endl + << " --fast -D 10 -R 2 -N 0 -L 22 -i S,0,2.50" << endl + << " --sensitive -D 15 -R 2 -N 0 -L 22 -i S,1,1.15 (default)" << endl + << " --very-sensitive -D 20 -R 3 -N 0 -L 20 -i S,1,0.50" << endl + << endl + << " For --local:" << endl + << " --very-fast-local -D 5 -R 1 -N 0 -L 25 -i S,1,2.00" << endl + << " --fast-local -D 10 -R 2 -N 0 -L 22 -i S,1,1.75" << endl + << " --sensitive-local -D 15 -R 2 -N 0 -L 20 -i S,1,0.75 (default)" << endl + << " --very-sensitive-local -D 20 -R 3 -N 0 -L 20 -i S,1,0.50" << endl + << endl + << " Alignment:" << endl + << " -N max # mismatches in seed alignment; can be 0 or 1 (0)" << endl + << " -L length of seed substrings; must be >3, <32 (22)" << endl + << " -i interval between seed substrings w/r/t read len (S,1,1.15)" << endl + << " --n-ceil func for max # non-A/C/G/Ts permitted in aln (L,0,0.15)" << endl + << " --dpad include extra ref chars on sides of DP table (15)" << endl + << " --gbar disallow gaps within nucs of read extremes (4)" << endl + << " --ignore-quals treat all quality values as 30 on Phred scale (off)" << endl + << " --nofw do not align forward (original) version of read (off)" << endl + << " --norc do not align reverse-complement version of read (off)" << endl + << endl + << " Spliced Alignment:" << endl + << " --pen-cansplice penalty for a canonical splice site (0)" << endl + << " --pen-noncansplice penalty for a non-canonical splice site (3)" << endl + << " --pen-intronlen penalty for long introns (G,-8,1)" << endl + << " --known-splicesite-infile provide a list of known splice sites" << endl + << " --novel-splicesite-outfile report a list of splice sites" << endl + << " --novel-splicesite-infile provide a list of novel splice sites" << endl + << " --no-temp-splicesite disable the use of splice sites found" << endl + << " --no-spliced-alignment disable spliced alignment" << endl + << " --rna-strandness Specify strand-specific information (unstranded)" << endl + << endl + << " Scoring:" << endl + << " --ma match bonus (0 for --end-to-end, 2 for --local) " << endl + << " --mp max penalty for mismatch; lower qual = lower penalty (6)" << endl + << " --np penalty for non-A/C/G/Ts in read/ref (1)" << endl + << " --rdg , read gap open, extend penalties (5,3)" << endl + << " --rfg , reference gap open, extend penalties (5,3)" << endl + << " --score-min min acceptable alignment score w/r/t read length" << endl + << " (G,20,8 for local, L,-0.6,-0.6 for end-to-end)" << endl + << endl + << " Reporting:" << endl + << " (default) look for multiple alignments, report best, with MAPQ" << endl + << " OR" << endl + << " -k report up to alns per read; MAPQ not meaningful" << endl + << " OR" << endl + << " -a/--all report all alignments; very slow, MAPQ not meaningful" << endl + << endl + << " Effort:" << endl + << " -D give up extending after failed extends in a row (15)" << endl + << " -R for reads w/ repetitive seeds, try sets of seeds (2)" << endl + << endl + << " Paired-end:" << endl + << " -I/--minins minimum fragment length (0)" << endl + << " -X/--maxins maximum fragment length (500)" << endl + << " --fr/--rf/--ff -1, -2 mates align fw/rev, rev/fw, fw/fw (--fr)" << endl + << " --no-mixed suppress unpaired alignments for paired reads" << endl + << " --no-discordant suppress discordant alignments for paired reads" << endl + << " --no-dovetail not concordant when mates extend past each other" << endl + << " --no-contain not concordant when one mate alignment contains other" << endl + << " --no-overlap not concordant when mates overlap at all" << endl + << endl + << " Output:" << endl; + //if(wrapper == "basic-0") { + // out << " --bam output directly to BAM (by piping through 'samtools view')" << endl; + //} + out << " -t/--time print wall-clock time taken by search phases" << endl; + if(wrapper == "basic-0") { + out << " --un write unpaired reads that didn't align to " << endl + << " --al write unpaired reads that aligned at least once to " << endl + << " --un-conc write pairs that didn't align concordantly to " << endl + << " --al-conc write pairs that aligned concordantly at least once to " << endl + << " (Note: for --un, --al, --un-conc, or --al-conc, add '-gz' to the option name, e.g." << endl + << " --un-gz , to gzip compress output, or add '-bz2' to bzip2 compress output.)" << endl; + } + out << " --quiet print nothing to stderr except serious errors" << endl + // << " --refidx refer to ref. seqs by 0-based index rather than name" << endl + << " --met-file send metrics to file at (off)" << endl + << " --met-stderr send metrics to stderr (off)" << endl + << " --met report internal counters & metrics every secs (1)" << endl + // Following is supported in the wrapper instead + // << " --no-unal supppress SAM records for unaligned reads" << endl + << " --no-head supppress header lines, i.e. lines starting with @" << endl + << " --no-sq supppress @SQ header lines" << endl + << " --rg-id set read group id, reflected in @RG line and RG:Z: opt field" << endl + << " --rg add (\"lab:value\") to @RG line of SAM header." << endl + << " Note: @RG line only printed when --rg-id is set." << endl + << " --omit-sec-seq put '*' in SEQ and QUAL fields for secondary alignments." << endl + << endl + << " Performance:" << endl + << " -o/--offrate override offrate of index; must be >= index's offrate" << endl + << " -p/--threads number of alignment threads to launch (1)" << endl + << " --reorder force SAM output order to match order of input reads" << endl +#ifdef BOWTIE_MM + << " --mm use memory-mapped I/O for index; many 'hisat2's can share" << endl +#endif +#ifdef BOWTIE_SHARED_MEM + //<< " --shmem use shared mem for index; many 'hisat2's can share" << endl +#endif + << endl + << " Other:" << endl + << " --qc-filter filter out reads that are bad according to QSEQ filter" << endl + << " --seed seed for random number generator (0)" << endl + << " --non-deterministic seed rand. gen. arbitrarily instead of using read attributes" << endl + // << " --verbose verbose output for debugging" << endl + << " --version print version information and quit" << endl + << " -h/--help print this usage message" << endl + ; + if(wrapper.empty()) { + cerr << endl + << "*** Warning ***" << endl + << "'hisat2-align' was run directly. It is recommended that you run the wrapper script 'hisat2' instead." << endl + << endl; + } +} + +/** + * Parse an int out of optarg and enforce that it be at least 'lower'; + * if it is less than 'lower', than output the given error message and + * exit with an error and a usage message. + */ +static int parseInt(int lower, int upper, const char *errmsg, const char *arg) { + long l; + char *endPtr= NULL; + l = strtol(arg, &endPtr, 10); + if (endPtr != NULL) { + if (l < lower || l > upper) { + cerr << errmsg << endl; + printUsage(cerr); + throw 1; + } + return (int32_t)l; + } + cerr << errmsg << endl; + printUsage(cerr); + throw 1; + return -1; +} + +/** + * Upper is maximum int by default. + */ +static int parseInt(int lower, const char *errmsg, const char *arg) { + return parseInt(lower, std::numeric_limits::max(), errmsg, arg); +} + +/** + * Parse a T string 'str'. + */ +template +T parse(const char *s) { + T tmp; + stringstream ss(s); + ss >> tmp; + return tmp; +} + +/** + * Parse a pair of Ts from a string, 'str', delimited with 'delim'. + */ +template +pair parsePair(const char *str, char delim) { + string s(str); + EList ss; + tokenize(s, delim, ss); + pair ret; + ret.first = parse(ss[0].c_str()); + ret.second = parse(ss[1].c_str()); + return ret; +} + +/** + * Parse a pair of Ts from a string, 'str', delimited with 'delim'. + */ +template +void parseTuple(const char *str, char delim, EList& ret) { + string s(str); + EList ss; + tokenize(s, delim, ss); + for(size_t i = 0; i < ss.size(); i++) { + ret.push_back(parse(ss[i].c_str())); + } +} + +static string applyPreset(const string& sorig, Presets& presets) { + string s = sorig; + size_t found = s.find("%LOCAL%"); + if(found != string::npos) { + s.replace(found, strlen("%LOCAL%"), localAlign ? "-local" : ""); + } + if(gVerbose) { + cerr << "Applying preset: '" << s.c_str() << "' using preset menu '" + << presets.name() << "'" << endl; + } + string pol; + presets.apply(s, pol, extra_opts); + return pol; +} + +static bool saw_M; +static bool saw_a; +static bool saw_k; +static EList presetList; + +/** + * TODO: Argument parsing is very, very flawed. The biggest problem is that + * there are two separate worlds of arguments, the ones set via polstr, and + * the ones set directly in variables. This makes for nasty interactions, + * e.g., with the -M option being resolved at an awkward time relative to + * the -k and -a options. + */ +static void parseOption(int next_option, const char *arg) { + switch (next_option) { + case ARG_TEST_25: bowtie2p5 = true; break; + case ARG_DESC_KB: descentTotSz = SimpleFunc::parse(arg, 0.0, 1024.0, 1024.0, DMAX); break; + case ARG_DESC_FMOPS: descentTotFmops = SimpleFunc::parse(arg, 0.0, 10.0, 100.0, DMAX); break; + case ARG_DESC_LANDING: descentLanding = parse(arg); break; + case ARG_DESC_EXP: { + descConsExp = parse(arg); + if(descConsExp < 0.0) { + cerr << "Error: --desc-exp must be greater than or equal to 0" << endl; + throw 1; + } + break; + } + case '1': tokenize(arg, ",", mates1); break; + case '2': tokenize(arg, ",", mates2); break; + case ARG_ONETWO: tokenize(arg, ",", mates12); format = TAB_MATE5; break; + case ARG_TAB5: tokenize(arg, ",", mates12); format = TAB_MATE5; break; + case ARG_TAB6: tokenize(arg, ",", mates12); format = TAB_MATE6; break; + case 'f': format = FASTA; break; + case 'F': { + format = FASTA_CONT; + pair p = parsePair(arg, ','); + fastaContLen = p.first; + fastaContFreq = p.second; + break; + } + case ARG_BWA_SW_LIKE: { + bwaSwLikeC = 5.5f; + bwaSwLikeT = 30; + bwaSwLike = true; + localAlign = true; + // -a INT Score of a match [1] + // -b INT Mismatch penalty [3] + // -q INT Gap open penalty [5] + // -r INT Gap extension penalty. The penalty for a contiguous + // gap of size k is q+k*r. [2] + polstr += ";MA=1;MMP=C3;RDG=5,2;RFG=5,2"; + break; + } + case 'q': format = FASTQ; break; + case 'r': format = RAW; break; + case 'c': format = CMDLINE; break; + case ARG_QSEQ: format = QSEQ; break; + case 'C': { + cerr << "Error: -C specified but Bowtie 2 does not support colorspace input." << endl; + throw 1; + break; + } + case 'I': + gMinInsert = parseInt(0, "-I arg must be positive", arg); + break; + case 'X': + gMaxInsert = parseInt(1, "-X arg must be at least 1", arg); + break; + case ARG_NO_DISCORDANT: gReportDiscordant = false; break; + case ARG_NO_MIXED: gReportMixed = false; break; + case 's': + skipReads = (uint32_t)parseInt(0, "-s arg must be positive", arg); + break; + case ARG_FF: gMate1fw = true; gMate2fw = true; break; + case ARG_RF: gMate1fw = false; gMate2fw = true; break; + case ARG_FR: gMate1fw = true; gMate2fw = false; break; + case ARG_SHMEM: useShmem = true; break; + case ARG_SEED_SUMM: seedSumm = true; break; + case ARG_MM: { +#ifdef BOWTIE_MM + useMm = true; + break; +#else + cerr << "Memory-mapped I/O mode is disabled because bowtie was not compiled with" << endl + << "BOWTIE_MM defined. Memory-mapped I/O is not supported under Windows. If you" << endl + << "would like to use memory-mapped I/O on a platform that supports it, please" << endl + << "refrain from specifying BOWTIE_MM=0 when compiling Bowtie." << endl; + throw 1; +#endif + } + case ARG_MMSWEEP: mmSweep = true; break; + case ARG_HADOOPOUT: hadoopOut = true; break; + case ARG_SOLEXA_QUALS: solexaQuals = true; break; + case ARG_INTEGER_QUALS: integerQuals = true; break; + case ARG_PHRED64: phred64Quals = true; break; + case ARG_PHRED33: solexaQuals = false; phred64Quals = false; break; + case ARG_OVERHANG: gReportOverhangs = true; break; + case ARG_NO_CACHE: msNoCache = true; break; + case ARG_USE_CACHE: msNoCache = false; break; + case ARG_LOCAL_SEED_CACHE_SZ: + seedCacheLocalMB = (uint32_t)parseInt(1, "--local-seed-cache-sz arg must be at least 1", arg); + break; + case ARG_CURRENT_SEED_CACHE_SZ: + seedCacheCurrentMB = (uint32_t)parseInt(1, "--seed-cache-sz arg must be at least 1", arg); + break; + case ARG_REFIDX: noRefNames = true; break; + case ARG_FUZZY: fuzzy = true; break; + case ARG_FULLREF: fullRef = true; break; + case ARG_GAP_BAR: + gGapBarrier = parseInt(1, "--gbar must be no less than 1", arg); + break; + case ARG_SEED: + seed = parseInt(0, "--seed arg must be at least 0", arg); + break; + case ARG_NON_DETERMINISTIC: + arbitraryRandom = true; + break; + case 'u': + qUpto = (uint32_t)parseInt(1, "-u/--qupto arg must be at least 1", arg); + break; + case 'Q': + tokenize(arg, ",", qualities); + integerQuals = true; + break; + case ARG_QUALS1: + tokenize(arg, ",", qualities1); + integerQuals = true; + break; + case ARG_QUALS2: + tokenize(arg, ",", qualities2); + integerQuals = true; + break; + case ARG_CACHE_LIM: + cacheLimit = (uint32_t)parseInt(1, "--cachelim arg must be at least 1", arg); + break; + case ARG_CACHE_SZ: + cacheSize = (uint32_t)parseInt(1, "--cachesz arg must be at least 1", arg); + cacheSize *= (1024 * 1024); // convert from MB to B + break; + case ARG_WRAPPER: wrapper = arg; break; + case 'p': + nthreads = parseInt(1, "-p/--threads arg must be at least 1", arg); + break; + case ARG_FILEPAR: + fileParallel = true; + break; + case '3': gTrim3 = parseInt(0, "-3/--trim3 arg must be at least 0", arg); break; + case '5': gTrim5 = parseInt(0, "-5/--trim5 arg must be at least 0", arg); break; + case 'h': printUsage(cout); throw 0; break; + case ARG_USAGE: printUsage(cout); throw 0; break; + // + // NOTE that unlike in Bowtie 1, -M, -a and -k are mutually + // exclusive here. + // + case 'M': { + msample = true; + mhits = parse(arg); + if(saw_a || saw_k) { + cerr << "Warning: -M, -k and -a are mutually exclusive. " + << "-M will override" << endl; + khits = 1; + } + assert_eq(1, khits); + saw_M = true; + cerr << "Warning: -M is deprecated. Use -D and -R to adjust " << + "effort instead." << endl; + break; + } + case ARG_EXTEND_ITERS: { + maxIters = parse(arg); + break; + } + case ARG_NO_EXTEND: { + doExtend = false; + break; + } + case 'R': { polstr += ";ROUNDS="; polstr += arg; break; } + case 'D': { polstr += ";DPS="; polstr += arg; break; } + case ARG_DP_MATE_STREAK_THRESH: { + maxMateStreak = parse(arg); + break; + } + case ARG_DP_FAIL_STREAK_THRESH: { + maxDpStreak = parse(arg); + break; + } + case ARG_EE_FAIL_STREAK_THRESH: { + maxEeStreak = parse(arg); + break; + } + case ARG_UG_FAIL_STREAK_THRESH: { + maxUgStreak = parse(arg); + break; + } + case ARG_DP_FAIL_THRESH: { + maxDp = parse(arg); + break; + } + case ARG_UG_FAIL_THRESH: { + maxUg = parse(arg); + break; + } + case ARG_SEED_BOOST_THRESH: { + seedBoostThresh = parse(arg); + break; + } + case 'a': { + msample = false; + allHits = true; + mhits = 0; // disable -M + if(saw_M || saw_k) { + cerr << "Warning: -M, -k and -a are mutually exclusive. " + << "-a will override" << endl; + } + saw_a = true; + break; + } + case 'k': { + msample = false; + khits = (uint32_t)parseInt(1, "-k arg must be at least 1", arg); + mhits = 0; // disable -M + if(saw_M || saw_a) { + cerr << "Warning: -M, -k and -a are mutually exclusive. " + << "-k will override" << endl; + } + saw_k = true; + break; + } + case ARG_VERBOSE: gVerbose = 1; break; + case ARG_STARTVERBOSE: startVerbose = true; break; + case ARG_QUIET: gQuiet = true; break; + case ARG_SANITY: sanityCheck = true; break; + case 't': timing = true; break; + case ARG_METRIC_IVAL: { + metricsIval = parseInt(1, "--metrics arg must be at least 1", arg); + break; + } + case ARG_METRIC_FILE: metricsFile = arg; break; + case ARG_METRIC_STDERR: metricsStderr = true; break; + case ARG_METRIC_PER_READ: metricsPerRead = true; break; + case ARG_NO_FW: gNofw = true; break; + case ARG_NO_RC: gNorc = true; break; + case ARG_SAM_NO_QNAME_TRUNC: samTruncQname = false; break; + case ARG_SAM_OMIT_SEC_SEQ: samOmitSecSeqQual = true; break; + case ARG_SAM_NO_UNAL: samNoUnal = true; break; + case ARG_SAM_NOHEAD: samNoHead = true; break; + case ARG_SAM_NOSQ: samNoSQ = true; break; + case ARG_SAM_PRINT_YI: sam_print_yi = true; break; + case ARG_REORDER: reorder = true; break; + case ARG_MAPQ_EX: { + sam_print_zp = true; + sam_print_zu = true; + sam_print_xp = true; + sam_print_xss = true; + sam_print_yn = true; + break; + } + case ARG_SHOW_RAND_SEED: { + sam_print_zs = true; + break; + } + case ARG_SAMPLE: + sampleFrac = parse(arg); + break; + case ARG_CP_MIN: + cminlen = parse(arg); + break; + case ARG_CP_IVAL: + cpow2 = parse(arg); + break; + case ARG_TRI: + doTri = true; + break; + case ARG_READ_PASSTHRU: { + sam_print_xr = true; + break; + } + case ARG_READ_TIMES: { + sam_print_xt = true; + sam_print_xd = true; + sam_print_xu = true; + sam_print_yl = true; + sam_print_ye = true; + sam_print_yu = true; + sam_print_yr = true; + sam_print_zb = true; + sam_print_zr = true; + sam_print_zf = true; + sam_print_zm = true; + sam_print_zi = true; + break; + } + case ARG_SAM_RG: { + string argstr = arg; + if(argstr.substr(0, 3) == "ID:") { + rgid = "\t"; + rgid += argstr; + rgs_optflag = "RG:Z:" + argstr.substr(3); + } else { + rgs += '\t'; + rgs += argstr; + } + break; + } + case ARG_SAM_RGID: { + string argstr = arg; + rgid = "\t"; + rgid = "\tID:" + argstr; + rgs_optflag = "RG:Z:" + argstr; + break; + } + case ARG_PARTITION: partitionSz = parse(arg); break; + case ARG_DPAD: + maxhalf = parseInt(0, "--dpad must be no less than 0", arg); + break; + case ARG_ORIG: + if(arg == NULL || strlen(arg) == 0) { + cerr << "--orig arg must be followed by a string" << endl; + printUsage(cerr); + throw 1; + } + origString = arg; + break; + case ARG_LOCAL: localAlign = true; break; + case ARG_END_TO_END: localAlign = false; break; + case ARG_SSE8: enable8 = true; break; + case ARG_SSE8_NO: enable8 = false; break; + case ARG_UNGAPPED: doUngapped = true; break; + case ARG_UNGAPPED_NO: doUngapped = false; break; + case ARG_NO_DOVETAIL: gDovetailMatesOK = false; break; + case ARG_NO_CONTAIN: gContainMatesOK = false; break; + case ARG_NO_OVERLAP: gOlapMatesOK = false; break; + case ARG_DOVETAIL: gDovetailMatesOK = true; break; + case ARG_CONTAIN: gContainMatesOK = true; break; + case ARG_OVERLAP: gOlapMatesOK = true; break; + case ARG_QC_FILTER: qcFilter = true; break; + case ARG_NO_SCORE_PRIORITY: sortByScore = false; break; + case ARG_IGNORE_QUALS: ignoreQuals = true; break; + case ARG_MAPQ_V: mapqv = parse(arg); break; + case ARG_TIGHTEN: tighten = parse(arg); break; + case ARG_EXACT_UPFRONT: doExactUpFront = true; break; + case ARG_1MM_UPFRONT: do1mmUpFront = true; break; + case ARG_EXACT_UPFRONT_NO: doExactUpFront = false; break; + case ARG_1MM_UPFRONT_NO: do1mmUpFront = false; break; + case ARG_1MM_MINLEN: do1mmMinLen = parse(arg); break; + case ARG_NOISY_HPOLY: noisyHpolymer = true; break; + case 'x': bt2index = arg; break; + case ARG_PRESET_VERY_FAST_LOCAL: localAlign = true; + case ARG_PRESET_VERY_FAST: { + presetList.push_back("very-fast%LOCAL%"); break; + } + case ARG_PRESET_FAST_LOCAL: localAlign = true; + case ARG_PRESET_FAST: { + presetList.push_back("fast%LOCAL%"); break; + } + case ARG_PRESET_SENSITIVE_LOCAL: localAlign = true; + case ARG_PRESET_SENSITIVE: { + presetList.push_back("sensitive%LOCAL%"); break; + } + case ARG_PRESET_VERY_SENSITIVE_LOCAL: localAlign = true; + case ARG_PRESET_VERY_SENSITIVE: { + presetList.push_back("very-sensitive%LOCAL%"); break; + } + case 'P': { presetList.push_back(arg); break; } + case ARG_ALIGN_POLICY: { + if(strlen(arg) > 0) { + polstr += ";"; polstr += arg; + } + break; + } + case 'N': { polstr += ";SEED="; polstr += arg; break; } + case 'L': { + int64_t len = parse(arg); + if(len < 0) { + cerr << "Error: -L argument must be >= 0; was " << arg << endl; + throw 1; + } + if(len > 32) { + cerr << "Error: -L argument must be <= 32; was" << arg << endl; + throw 1; + } + polstr += ";SEEDLEN="; polstr += arg; break; + } + case 'O': + multiseedOff = parse(arg); + break; + case 'i': { + EList args; + tokenize(arg, ",", args); + if(args.size() > 3 || args.size() == 0) { + cerr << "Error: expected 3 or fewer comma-separated " + << "arguments to -i option, got " + << args.size() << endl; + throw 1; + } + // Interval-settings arguments + polstr += (";IVAL=" + args[0]); // Function type + if(args.size() > 1) { + polstr += ("," + args[1]); // Constant term + } + if(args.size() > 2) { + polstr += ("," + args[2]); // Coefficient + } + break; + } + case ARG_MULTISEED_IVAL: { + polstr += ";"; + // Split argument by comma + EList args; + tokenize(arg, ",", args); + if(args.size() > 5 || args.size() == 0) { + cerr << "Error: expected 5 or fewer comma-separated " + << "arguments to --multiseed option, got " + << args.size() << endl; + throw 1; + } + // Seed mm and length arguments + polstr += "SEED="; + polstr += (args[0]); // # mismatches + if(args.size() > 1) polstr += ("," + args[ 1]); // length + if(args.size() > 2) polstr += (";IVAL=" + args[2]); // Func type + if(args.size() > 3) polstr += ("," + args[ 3]); // Constant term + if(args.size() > 4) polstr += ("," + args[ 4]); // Coefficient + break; + } + case ARG_N_CEIL: { + // Split argument by comma + EList args; + tokenize(arg, ",", args); + if(args.size() > 3) { + cerr << "Error: expected 3 or fewer comma-separated " + << "arguments to --n-ceil option, got " + << args.size() << endl; + throw 1; + } + if(args.size() == 0) { + cerr << "Error: expected at least one argument to --n-ceil option" << endl; + throw 1; + } + polstr += ";NCEIL="; + if(args.size() == 3) { + polstr += (args[0] + "," + args[1] + "," + args[2]); + } else { + if(args.size() == 1) { + polstr += ("C," + args[0]); + } else { + polstr += (args[0] + "," + args[1]); + } + } + break; + } + case ARG_SCORE_MA: polstr += ";MA="; polstr += arg; break; + case ARG_SCORE_MMP: { + EList args; + tokenize(arg, ",", args); + if(args.size() > 2 || args.size() == 0) { + cerr << "Error: expected 1 or 2 comma-separated " + << "arguments to --mmp option, got " << args.size() << endl; + throw 1; + } + if(args.size() >= 1) { + polstr += ";MMP=Q,"; + polstr += args[0]; + if(args.size() >= 2) { + polstr += ","; + polstr += args[1]; + } + } + break; + } + case ARG_SCORE_NP: polstr += ";NP=C"; polstr += arg; break; + case ARG_SCORE_RDG: polstr += ";RDG="; polstr += arg; break; + case ARG_SCORE_RFG: polstr += ";RFG="; polstr += arg; break; + case ARG_SCORE_MIN: { + polstr += ";"; + EList args; + tokenize(arg, ",", args); + if(args.size() > 3 && args.size() == 0) { + cerr << "Error: expected 3 or fewer comma-separated " + << "arguments to --n-ceil option, got " + << args.size() << endl; + throw 1; + } + polstr += ("MIN=" + args[0]); + if(args.size() > 1) { + polstr += ("," + args[1]); + } + if(args.size() > 2) { + polstr += ("," + args[2]); + } + break; + } + case ARG_DESC: printArgDesc(cout); throw 0; + case 'S': outfile = arg; break; + case 'U': { + EList args; + tokenize(arg, ",", args); + for(size_t i = 0; i < args.size(); i++) { + queries.push_back(args[i]); + } + break; + } + case ARG_VERSION: showVersion = 1; break; + case ARG_NO_TEMPSPLICESITE: useTempSpliceSite = false; break; + case ARG_PEN_CANSPLICE: { + penCanSplice = parseInt(0, "-k arg must be at least 0", arg); + break; + } + case ARG_PEN_NONCANSPLICE: { + penNoncanSplice = parseInt(0, "-k arg must be at least 0", arg); + break; + } + case ARG_PEN_INTRONLEN: { + polstr += ";"; + EList args; + tokenize(arg, ",", args); + if(args.size() > 3 && args.size() == 0) { + cerr << "Error: expected 3 or fewer comma-separated " + << "arguments to --n-ceil option, got " + << args.size() << endl; + throw 1; + } + polstr += ("INTRONLEN=" + args[0]); + if(args.size() > 1) { + polstr += ("," + args[1]); + } + if(args.size() > 2) { + polstr += ("," + args[2]); + } + break; + } + case ARG_KNOWN_SPLICESITE_INFILE: knownSpliceSiteInfile = arg; break; + case ARG_NOVEL_SPLICESITE_INFILE: novelSpliceSiteInfile = arg; break; + case ARG_NOVEL_SPLICESITE_OUTFILE: novelSpliceSiteOutfile = arg; break; + case ARG_NO_SPLICED_ALIGNMENT: no_spliced_alignment = true; break; + case ARG_RNA_STRANDNESS: { + string strandness = arg; + if(strandness == "F") rna_strandness = RNA_STRANDNESS_F; + else if(strandness == "R") rna_strandness = RNA_STRANDNESS_R; + else if(strandness == "FR") rna_strandness = RNA_STRANDNESS_FR; + else if(strandness == "RF") rna_strandness = RNA_STRANDNESS_RF; + else { + // daehwan - throw exception with details + cerr << "Error: should be one of F, R, FR, or RF " << endl; + throw 1; + } + break; + } + default: + printUsage(cerr); + throw 1; + } +} + +/** + * Read command-line arguments + */ +static void parseOptions(int argc, const char **argv) { + int option_index = 0; + int next_option; + saw_M = false; + saw_a = false; + saw_k = true; + presetList.clear(); + if(startVerbose) { cerr << "Parsing options: "; logTime(cerr, true); } + while(true) { + next_option = getopt_long( + argc, const_cast(argv), + short_options, long_options, &option_index); + const char * arg = optarg; + if(next_option == EOF) { + if(extra_opts_cur < extra_opts.size()) { + next_option = extra_opts[extra_opts_cur].first; + arg = extra_opts[extra_opts_cur].second.c_str(); + extra_opts_cur++; + } else { + break; + } + } + parseOption(next_option, arg); + } + // Now parse all the presets. Might want to pick which presets version to + // use according to other parameters. + auto_ptr presets(new PresetsV0()); + // Apply default preset + if(!defaultPreset.empty()) { + polstr = applyPreset(defaultPreset, *presets.get()) + polstr; + } + // Apply specified presets + for(size_t i = 0; i < presetList.size(); i++) { + polstr += applyPreset(presetList[i], *presets.get()); + } + for(size_t i = 0; i < extra_opts.size(); i++) { + next_option = extra_opts[extra_opts_cur].first; + const char *arg = extra_opts[extra_opts_cur].second.c_str(); + parseOption(next_option, arg); + } + // Remove initial semicolons + while(!polstr.empty() && polstr[0] == ';') { + polstr = polstr.substr(1); + } + if(gVerbose) { + cerr << "Final policy string: '" << polstr.c_str() << "'" << endl; + } + size_t failStreakTmp = 0; + SeedAlignmentPolicy::parseString( + polstr, + localAlign, + noisyHpolymer, + ignoreQuals, + bonusMatchType, + bonusMatch, + penMmcType, + penMmcMax, + penMmcMin, + penNType, + penN, + penRdGapConst, + penRfGapConst, + penRdGapLinear, + penRfGapLinear, + scoreMin, + nCeil, + penNCatPair, + multiseedMms, + multiseedLen, + msIval, + failStreakTmp, + nSeedRounds, + &penIntronLen); + if(failStreakTmp > 0) { + maxEeStreak = failStreakTmp; + maxUgStreak = failStreakTmp; + maxDpStreak = failStreakTmp; + } + if(saw_a || saw_k) { + msample = false; + mhits = 0; + } else { + assert_gt(mhits, 0); + msample = true; + } + if(mates1.size() != mates2.size()) { + cerr << "Error: " << mates1.size() << " mate files/sequences were specified with -1, but " << mates2.size() << endl + << "mate files/sequences were specified with -2. The same number of mate files/" << endl + << "sequences must be specified with -1 and -2." << endl; + throw 1; + } + if(qualities.size() && format != FASTA) { + cerr << "Error: one or more quality files were specified with -Q but -f was not" << endl + << "enabled. -Q works only in combination with -f and -C." << endl; + throw 1; + } + if(qualities1.size() && format != FASTA) { + cerr << "Error: one or more quality files were specified with --Q1 but -f was not" << endl + << "enabled. --Q1 works only in combination with -f and -C." << endl; + throw 1; + } + if(qualities2.size() && format != FASTA) { + cerr << "Error: one or more quality files were specified with --Q2 but -f was not" << endl + << "enabled. --Q2 works only in combination with -f and -C." << endl; + throw 1; + } + if(qualities1.size() > 0 && mates1.size() != qualities1.size()) { + cerr << "Error: " << mates1.size() << " mate files/sequences were specified with -1, but " << qualities1.size() << endl + << "quality files were specified with --Q1. The same number of mate and quality" << endl + << "files must sequences must be specified with -1 and --Q1." << endl; + throw 1; + } + if(qualities2.size() > 0 && mates2.size() != qualities2.size()) { + cerr << "Error: " << mates2.size() << " mate files/sequences were specified with -2, but " << qualities2.size() << endl + << "quality files were specified with --Q2. The same number of mate and quality" << endl + << "files must sequences must be specified with -2 and --Q2." << endl; + throw 1; + } + if(!rgs.empty() && rgid.empty()) { + cerr << "Warning: --rg was specified without --rg-id also " + << "being specified. @RG line is not printed unless --rg-id " + << "is specified." << endl; + } + // Check for duplicate mate input files + if(format != CMDLINE) { + for(size_t i = 0; i < mates1.size(); i++) { + for(size_t j = 0; j < mates2.size(); j++) { + if(mates1[i] == mates2[j] && !gQuiet) { + cerr << "Warning: Same mate file \"" << mates1[i].c_str() << "\" appears as argument to both -1 and -2" << endl; + } + } + } + } + // If both -s and -u are used, we need to adjust qUpto accordingly + // since it uses rdid to know if we've reached the -u limit (and + // rdids are all shifted up by skipReads characters) + if(qUpto + skipReads > qUpto) { + qUpto += skipReads; + } + if(useShmem && useMm && !gQuiet) { + cerr << "Warning: --shmem overrides --mm..." << endl; + useMm = false; + } + if(gGapBarrier < 1) { + cerr << "Warning: --gbar was set less than 1 (=" << gGapBarrier + << "); setting to 1 instead" << endl; + gGapBarrier = 1; + } + if(multiseedMms >= multiseedLen) { + assert_gt(multiseedLen, 0); + cerr << "Warning: seed mismatches (" << multiseedMms + << ") is less than seed length (" << multiseedLen + << "); setting mismatches to " << (multiseedMms-1) + << " instead" << endl; + multiseedMms = multiseedLen-1; + } + sam_print_zm = sam_print_zm && bowtie2p5; +#ifndef NDEBUG + if(!gQuiet) { + cerr << "Warning: Running in debug mode. Please use debug mode only " + << "for diagnosing errors, and not for typical use of HISAT." + << endl; + } +#endif +} + +static const char *argv0 = NULL; + +/// Create a PatternSourcePerThread for the current thread according +/// to the global params and return a pointer to it +static PatternSourcePerThreadFactory* +createPatsrcFactory(PairedPatternSource& _patsrc, int tid) { + PatternSourcePerThreadFactory *patsrcFact; + patsrcFact = new WrappedPatternSourcePerThreadFactory(_patsrc); + assert(patsrcFact != NULL); + return patsrcFact; +} + +#define PTHREAD_ATTRS (PTHREAD_CREATE_JOINABLE | PTHREAD_CREATE_DETACHED) + +typedef TIndexOffU index_t; +typedef uint16_t local_index_t; +static PairedPatternSource* multiseed_patsrc; +static HierEbwt* multiseed_ebwtFw; +static HierEbwt* multiseed_ebwtBw; +static Scoring* multiseed_sc; +static BitPairReference* multiseed_refs; +static AlignmentCache* multiseed_ca; // seed cache +static AlnSink* multiseed_msink; +static EList multiseed_refnames; +static OutFileBuf* multiseed_metricsOfb; +static SpliceSiteDB* ssdb; +static MUTEX_T multiseed_mutex; + +/** + * Metrics for measuring the work done by the outer read alignment + * loop. + */ +struct OuterLoopMetrics { + + OuterLoopMetrics() { + reset(); + } + + /** + * Set all counters to 0. + */ + void reset() { + reads = bases = srreads = srbases = + freads = fbases = ureads = ubases = 0; + } + + /** + * Sum the counters in m in with the conters in this object. This + * is the only safe way to update an OuterLoopMetrics that's shared + * by multiple threads. + */ + void merge( + const OuterLoopMetrics& m, + bool getLock = false) + { + ThreadSafe ts(&mutex_m, getLock); + reads += m.reads; + bases += m.bases; + srreads += m.srreads; + srbases += m.srbases; + freads += m.freads; + fbases += m.fbases; + ureads += m.ureads; + ubases += m.ubases; + } + + uint64_t reads; // total reads + uint64_t bases; // total bases + uint64_t srreads; // same-read reads + uint64_t srbases; // same-read bases + uint64_t freads; // filtered reads + uint64_t fbases; // filtered bases + uint64_t ureads; // unfiltered reads + uint64_t ubases; // unfiltered bases + MUTEX_T mutex_m; +}; + +/** + * Collection of all relevant performance metrics when aligning in + * multiseed mode. + */ +struct PerfMetrics { + + PerfMetrics() : first(true) { reset(); } + + /** + * Set all counters to 0. + */ + void reset() { + olm.reset(); + sdm.reset(); + wlm.reset(); + swmSeed.reset(); + swmMate.reset(); + rpm.reset(); + dpSse8Seed.reset(); // 8-bit SSE seed extensions + dpSse8Mate.reset(); // 8-bit SSE mate finds + dpSse16Seed.reset(); // 16-bit SSE seed extensions + dpSse16Mate.reset(); // 16-bit SSE mate finds + nbtfiltst = 0; + nbtfiltsc = 0; + nbtfiltdo = 0; + + olmu.reset(); + sdmu.reset(); + wlmu.reset(); + swmuSeed.reset(); + swmuMate.reset(); + rpmu.reset(); + dpSse8uSeed.reset(); // 8-bit SSE seed extensions + dpSse8uMate.reset(); // 8-bit SSE mate finds + dpSse16uSeed.reset(); // 16-bit SSE seed extensions + dpSse16uMate.reset(); // 16-bit SSE mate finds + nbtfiltst_u = 0; + nbtfiltsc_u = 0; + nbtfiltdo_u = 0; + + him.reset(); + } + + /** + * Merge a set of specific metrics into this object. + */ + void merge( + const OuterLoopMetrics *ol, + const SeedSearchMetrics *sd, + const WalkMetrics *wl, + const SwMetrics *swSeed, + const SwMetrics *swMate, + const ReportingMetrics *rm, + const SSEMetrics *dpSse8Ex, + const SSEMetrics *dpSse8Ma, + const SSEMetrics *dpSse16Ex, + const SSEMetrics *dpSse16Ma, + uint64_t nbtfiltst_, + uint64_t nbtfiltsc_, + uint64_t nbtfiltdo_, + const HIMetrics *hi, + bool getLock) + { + ThreadSafe ts(&mutex_m, getLock); + if(ol != NULL) { + olmu.merge(*ol, false); + } + if(sd != NULL) { + sdmu.merge(*sd, false); + } + if(wl != NULL) { + wlmu.merge(*wl, false); + } + if(swSeed != NULL) { + swmuSeed.merge(*swSeed, false); + } + if(swMate != NULL) { + swmuMate.merge(*swMate, false); + } + if(rm != NULL) { + rpmu.merge(*rm, false); + } + if(dpSse8Ex != NULL) { + dpSse8uSeed.merge(*dpSse8Ex, false); + } + if(dpSse8Ma != NULL) { + dpSse8uMate.merge(*dpSse8Ma, false); + } + if(dpSse16Ex != NULL) { + dpSse16uSeed.merge(*dpSse16Ex, false); + } + if(dpSse16Ma != NULL) { + dpSse16uMate.merge(*dpSse16Ma, false); + } + nbtfiltst_u += nbtfiltst_; + nbtfiltsc_u += nbtfiltsc_; + nbtfiltdo_u += nbtfiltdo_; + if(hi != NULL) { + him.merge(*hi, false); + } + } + + /** + * Reports a matrix of results, incl. column labels, to an OutFileBuf. + * Optionally also sends results to stderr (unbuffered). Can optionally + * print a per-read record with the read name at the beginning. + */ + void reportInterval( + OutFileBuf* o, // file to send output to + bool metricsStderr, // additionally output to stderr? + bool total, // true -> report total, otherwise incremental + bool sync, // synchronize output + const BTString *name) // non-NULL name pointer if is per-read record + { + ThreadSafe ts(&mutex_m, sync); + ostringstream stderrSs; + time_t curtime = time(0); + char buf[1024]; + if(first) { + const char *str = + /* 1 */ "Time" "\t" + /* 2 */ "Read" "\t" + /* 3 */ "Base" "\t" + /* 4 */ "SameRead" "\t" + /* 5 */ "SameReadBase" "\t" + /* 6 */ "UnfilteredRead" "\t" + /* 7 */ "UnfilteredBase" "\t" + + /* 8 */ "Paired" "\t" + /* 9 */ "Unpaired" "\t" + /* 10 */ "AlConUni" "\t" + /* 11 */ "AlConRep" "\t" + /* 12 */ "AlConFail" "\t" + /* 13 */ "AlDis" "\t" + /* 14 */ "AlConFailUni" "\t" + /* 15 */ "AlConFailRep" "\t" + /* 16 */ "AlConFailFail" "\t" + /* 17 */ "AlConRepUni" "\t" + /* 18 */ "AlConRepRep" "\t" + /* 19 */ "AlConRepFail" "\t" + /* 20 */ "AlUnpUni" "\t" + /* 21 */ "AlUnpRep" "\t" + /* 22 */ "AlUnpFail" "\t" + + /* 23 */ "SeedSearch" "\t" + /* 24 */ "IntraSCacheHit" "\t" + /* 25 */ "InterSCacheHit" "\t" + /* 26 */ "OutOfMemory" "\t" + /* 27 */ "AlBWOp" "\t" + /* 28 */ "AlBWBranch" "\t" + /* 29 */ "ResBWOp" "\t" + /* 30 */ "ResBWBranch" "\t" + /* 31 */ "ResResolve" "\t" + /* 34 */ "ResReport" "\t" + /* 35 */ "RedundantSHit" "\t" + + /* 36 */ "BestMinEdit0" "\t" + /* 37 */ "BestMinEdit1" "\t" + /* 38 */ "BestMinEdit2" "\t" + + /* 39 */ "ExactAttempts" "\t" + /* 40 */ "ExactSucc" "\t" + /* 41 */ "ExactRanges" "\t" + /* 42 */ "ExactRows" "\t" + /* 43 */ "ExactOOMs" "\t" + + /* 44 */ "1mmAttempts" "\t" + /* 45 */ "1mmSucc" "\t" + /* 46 */ "1mmRanges" "\t" + /* 47 */ "1mmRows" "\t" + /* 48 */ "1mmOOMs" "\t" + + /* 49 */ "UngappedSucc" "\t" + /* 50 */ "UngappedFail" "\t" + /* 51 */ "UngappedNoDec" "\t" + + /* 52 */ "DPExLt10Gaps" "\t" + /* 53 */ "DPExLt5Gaps" "\t" + /* 54 */ "DPExLt3Gaps" "\t" + + /* 55 */ "DPMateLt10Gaps" "\t" + /* 56 */ "DPMateLt5Gaps" "\t" + /* 57 */ "DPMateLt3Gaps" "\t" + + /* 58 */ "DP16ExDps" "\t" + /* 59 */ "DP16ExDpSat" "\t" + /* 60 */ "DP16ExDpFail" "\t" + /* 61 */ "DP16ExDpSucc" "\t" + /* 62 */ "DP16ExCol" "\t" + /* 63 */ "DP16ExCell" "\t" + /* 64 */ "DP16ExInner" "\t" + /* 65 */ "DP16ExFixup" "\t" + /* 66 */ "DP16ExGathSol" "\t" + /* 67 */ "DP16ExBt" "\t" + /* 68 */ "DP16ExBtFail" "\t" + /* 69 */ "DP16ExBtSucc" "\t" + /* 70 */ "DP16ExBtCell" "\t" + /* 71 */ "DP16ExCoreRej" "\t" + /* 72 */ "DP16ExNRej" "\t" + + /* 73 */ "DP8ExDps" "\t" + /* 74 */ "DP8ExDpSat" "\t" + /* 75 */ "DP8ExDpFail" "\t" + /* 76 */ "DP8ExDpSucc" "\t" + /* 77 */ "DP8ExCol" "\t" + /* 78 */ "DP8ExCell" "\t" + /* 79 */ "DP8ExInner" "\t" + /* 80 */ "DP8ExFixup" "\t" + /* 81 */ "DP8ExGathSol" "\t" + /* 82 */ "DP8ExBt" "\t" + /* 83 */ "DP8ExBtFail" "\t" + /* 84 */ "DP8ExBtSucc" "\t" + /* 85 */ "DP8ExBtCell" "\t" + /* 86 */ "DP8ExCoreRej" "\t" + /* 87 */ "DP8ExNRej" "\t" + + /* 88 */ "DP16MateDps" "\t" + /* 89 */ "DP16MateDpSat" "\t" + /* 90 */ "DP16MateDpFail" "\t" + /* 91 */ "DP16MateDpSucc" "\t" + /* 92 */ "DP16MateCol" "\t" + /* 93 */ "DP16MateCell" "\t" + /* 94 */ "DP16MateInner" "\t" + /* 95 */ "DP16MateFixup" "\t" + /* 96 */ "DP16MateGathSol" "\t" + /* 97 */ "DP16MateBt" "\t" + /* 98 */ "DP16MateBtFail" "\t" + /* 99 */ "DP16MateBtSucc" "\t" + /* 100 */ "DP16MateBtCell" "\t" + /* 101 */ "DP16MateCoreRej" "\t" + /* 102 */ "DP16MateNRej" "\t" + + /* 103 */ "DP8MateDps" "\t" + /* 104 */ "DP8MateDpSat" "\t" + /* 105 */ "DP8MateDpFail" "\t" + /* 106 */ "DP8MateDpSucc" "\t" + /* 107 */ "DP8MateCol" "\t" + /* 108 */ "DP8MateCell" "\t" + /* 109 */ "DP8MateInner" "\t" + /* 110 */ "DP8MateFixup" "\t" + /* 111 */ "DP8MateGathSol" "\t" + /* 112 */ "DP8MateBt" "\t" + /* 113 */ "DP8MateBtFail" "\t" + /* 114 */ "DP8MateBtSucc" "\t" + /* 115 */ "DP8MateBtCell" "\t" + /* 116 */ "DP8MateCoreRej" "\t" + /* 117 */ "DP8MateNRej" "\t" + + /* 118 */ "DPBtFiltStart" "\t" + /* 119 */ "DPBtFiltScore" "\t" + /* 120 */ "DpBtFiltDom" "\t" + + /* 121 */ "MemPeak" "\t" + /* 122 */ "UncatMemPeak" "\t" // 0 + /* 123 */ "EbwtMemPeak" "\t" // EBWT_CAT + /* 124 */ "CacheMemPeak" "\t" // CA_CAT + /* 125 */ "ResolveMemPeak" "\t" // GW_CAT + /* 126 */ "AlignMemPeak" "\t" // AL_CAT + /* 127 */ "DPMemPeak" "\t" // DP_CAT + /* 128 */ "MiscMemPeak" "\t" // MISC_CAT + /* 129 */ "DebugMemPeak" "\t" // DEBUG_CAT + + /* 130 */ "LocalSearch" "\t" + /* 131 */ "AnchorSearch" "\t" + /* 132 */ "LocalIndexSearch" "\t" + /* 133 */ "LocalExtSearch" "\t" + /* 134 */ "LocalSearchRecur" "\t" + /* 135 */ "GlobalGenomeCoords" "\t" + /* 136 */ "LocalGenomeCoords" "\t" + + + "\n"; + + if(name != NULL) { + if(o != NULL) o->writeChars("Name\t"); + if(metricsStderr) stderrSs << "Name\t"; + } + + if(o != NULL) o->writeChars(str); + if(metricsStderr) stderrSs << str; + first = false; + } + + if(total) mergeIncrementals(); + + // 0. Read name, if needed + if(name != NULL) { + if(o != NULL) { + o->writeChars(name->toZBuf()); + o->write('\t'); + } + if(metricsStderr) { + stderrSs << (*name) << '\t'; + } + } + + // 1. Current time in secs + itoa10(curtime, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + const OuterLoopMetrics& ol = total ? olm : olmu; + + // 2. Reads + itoa10(ol.reads, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 3. Bases + itoa10(ol.bases, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 4. Same-read reads + itoa10(ol.srreads, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 5. Same-read bases + itoa10(ol.srbases, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 6. Unfiltered reads + itoa10(ol.ureads, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 7. Unfiltered bases + itoa10(ol.ubases, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + const ReportingMetrics& rp = total ? rpm : rpmu; + + // 8. Paired reads + itoa10(rp.npaired, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 9. Unpaired reads + itoa10(rp.nunpaired, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 10. Pairs with unique concordant alignments + itoa10(rp.nconcord_uni, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 11. Pairs with repetitive concordant alignments + itoa10(rp.nconcord_rep, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 12. Pairs with 0 concordant alignments + itoa10(rp.nconcord_0, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 13. Pairs with 1 discordant alignment + itoa10(rp.ndiscord, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 14. Mates from unaligned pairs that align uniquely + itoa10(rp.nunp_0_uni, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 15. Mates from unaligned pairs that align repetitively + itoa10(rp.nunp_0_rep, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 16. Mates from unaligned pairs that fail to align + itoa10(rp.nunp_0_0, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 17. Mates from repetitive pairs that align uniquely + itoa10(rp.nunp_rep_uni, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 18. Mates from repetitive pairs that align repetitively + itoa10(rp.nunp_rep_rep, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 19. Mates from repetitive pairs that fail to align + itoa10(rp.nunp_rep_0, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 20. Unpaired reads that align uniquely + itoa10(rp.nunp_uni, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 21. Unpaired reads that align repetitively + itoa10(rp.nunp_rep, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 22. Unpaired reads that fail to align + itoa10(rp.nunp_0, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + const SeedSearchMetrics& sd = total ? sdm : sdmu; + + // 23. Seed searches + itoa10(sd.seedsearch, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 24. Hits in 'current' cache + itoa10(sd.intrahit, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 25. Hits in 'local' cache + itoa10(sd.interhit, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 26. Out of memory + itoa10(sd.ooms, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 27. Burrows-Wheeler ops in aligner + itoa10(sd.bwops, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 28. Burrows-Wheeler branches (edits) in aligner + itoa10(sd.bweds, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + const WalkMetrics& wl = total ? wlm : wlmu; + + // 29. Burrows-Wheeler ops in resolver + itoa10(wl.bwops, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 30. Burrows-Wheeler branches in resolver + itoa10(wl.branches, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 31. Burrows-Wheeler offset resolutions + itoa10(wl.resolves, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 34. Offset reports + itoa10(wl.reports, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + // 35. Redundant seed hit + itoa10(total ? swmSeed.rshit : swmuSeed.rshit, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + // 36. # times the best (out of fw/rc) minimum # edits was 0 + itoa10(total ? sdm.bestmin0 : sdmu.bestmin0, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 37. # times the best (out of fw/rc) minimum # edits was 1 + itoa10(total ? sdm.bestmin1 : sdmu.bestmin1, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 38. # times the best (out of fw/rc) minimum # edits was 2 + itoa10(total ? sdm.bestmin2 : sdmu.bestmin2, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + // 39. Exact aligner attempts + itoa10(total ? swmSeed.exatts : swmuSeed.exatts, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 40. Exact aligner successes + itoa10(total ? swmSeed.exsucc : swmuSeed.exsucc, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 41. Exact aligner ranges + itoa10(total ? swmSeed.exranges : swmuSeed.exranges, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 42. Exact aligner rows + itoa10(total ? swmSeed.exrows : swmuSeed.exrows, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 43. Exact aligner OOMs + itoa10(total ? swmSeed.exooms : swmuSeed.exooms, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + // 44. 1mm aligner attempts + itoa10(total ? swmSeed.mm1atts : swmuSeed.mm1atts, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 45. 1mm aligner successes + itoa10(total ? swmSeed.mm1succ : swmuSeed.mm1succ, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 46. 1mm aligner ranges + itoa10(total ? swmSeed.mm1ranges : swmuSeed.mm1ranges, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 47. 1mm aligner rows + itoa10(total ? swmSeed.mm1rows : swmuSeed.mm1rows, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 48. 1mm aligner OOMs + itoa10(total ? swmSeed.mm1ooms : swmuSeed.mm1ooms, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + // 49 Ungapped aligner success + itoa10(total ? swmSeed.ungapsucc : swmuSeed.ungapsucc, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 50. Ungapped aligner fail + itoa10(total ? swmSeed.ungapfail : swmuSeed.ungapfail, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 51. Ungapped aligner no decision + itoa10(total ? swmSeed.ungapnodec : swmuSeed.ungapnodec, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + // 52. # seed-extend DPs with < 10 gaps + itoa10(total ? swmSeed.sws10 : swmuSeed.sws10, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 53. # seed-extend DPs with < 5 gaps + itoa10(total ? swmSeed.sws5 : swmuSeed.sws5, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 54. # seed-extend DPs with < 3 gaps + itoa10(total ? swmSeed.sws3 : swmuSeed.sws3, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + // 55. # seed-extend DPs with < 10 gaps + itoa10(total ? swmMate.sws10 : swmuMate.sws10, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 56. # seed-extend DPs with < 5 gaps + itoa10(total ? swmMate.sws5 : swmuMate.sws5, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 57. # seed-extend DPs with < 3 gaps + itoa10(total ? swmMate.sws3 : swmuMate.sws3, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + const SSEMetrics& dpSse16s = total ? dpSse16Seed : dpSse16uSeed; + + // 58. 16-bit SSE seed-extend DPs tried + itoa10(dpSse16s.dp, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 59. 16-bit SSE seed-extend DPs saturated + itoa10(dpSse16s.dpsat, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 60. 16-bit SSE seed-extend DPs failed + itoa10(dpSse16s.dpfail, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 61. 16-bit SSE seed-extend DPs succeeded + itoa10(dpSse16s.dpsucc, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 62. 16-bit SSE seed-extend DP columns completed + itoa10(dpSse16s.col, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 63. 16-bit SSE seed-extend DP cells completed + itoa10(dpSse16s.cell, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 64. 16-bit SSE seed-extend DP inner loop iters completed + itoa10(dpSse16s.inner, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 65. 16-bit SSE seed-extend DP fixup loop iters completed + itoa10(dpSse16s.fixup, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 66. 16-bit SSE seed-extend DP gather, cells with potential solutions + itoa10(dpSse16s.gathsol, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 67. 16-bit SSE seed-extend DP backtrace attempts + itoa10(dpSse16s.bt, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 68. 16-bit SSE seed-extend DP failed backtrace attempts + itoa10(dpSse16s.btfail, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 69. 16-bit SSE seed-extend DP succesful backtrace attempts + itoa10(dpSse16s.btsucc, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 70. 16-bit SSE seed-extend DP backtrace cells + itoa10(dpSse16s.btcell, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 71. 16-bit SSE seed-extend DP core-diag rejections + itoa10(dpSse16s.corerej, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 72. 16-bit SSE seed-extend DP N rejections + itoa10(dpSse16s.nrej, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + const SSEMetrics& dpSse8s = total ? dpSse8Seed : dpSse8uSeed; + + // 73. 8-bit SSE seed-extend DPs tried + itoa10(dpSse8s.dp, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 74. 8-bit SSE seed-extend DPs saturated + itoa10(dpSse8s.dpsat, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 75. 8-bit SSE seed-extend DPs failed + itoa10(dpSse8s.dpfail, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 76. 8-bit SSE seed-extend DPs succeeded + itoa10(dpSse8s.dpsucc, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 77. 8-bit SSE seed-extend DP columns completed + itoa10(dpSse8s.col, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 78. 8-bit SSE seed-extend DP cells completed + itoa10(dpSse8s.cell, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 79. 8-bit SSE seed-extend DP inner loop iters completed + itoa10(dpSse8s.inner, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 80. 8-bit SSE seed-extend DP fixup loop iters completed + itoa10(dpSse8s.fixup, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 81. 16-bit SSE seed-extend DP gather, cells with potential solutions + itoa10(dpSse8s.gathsol, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 82. 16-bit SSE seed-extend DP backtrace attempts + itoa10(dpSse8s.bt, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 83. 16-bit SSE seed-extend DP failed backtrace attempts + itoa10(dpSse8s.btfail, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 84. 16-bit SSE seed-extend DP succesful backtrace attempts + itoa10(dpSse8s.btsucc, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 85. 16-bit SSE seed-extend DP backtrace cells + itoa10(dpSse8s.btcell, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 86. 16-bit SSE seed-extend DP core-diag rejections + itoa10(dpSse8s.corerej, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 87. 16-bit SSE seed-extend DP N rejections + itoa10(dpSse8s.nrej, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + const SSEMetrics& dpSse16m = total ? dpSse16Mate : dpSse16uMate; + + // 88. 16-bit SSE mate-finding DPs tried + itoa10(dpSse16m.dp, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 89. 16-bit SSE mate-finding DPs saturated + itoa10(dpSse16m.dpsat, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 90. 16-bit SSE mate-finding DPs failed + itoa10(dpSse16m.dpfail, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 91. 16-bit SSE mate-finding DPs succeeded + itoa10(dpSse16m.dpsucc, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 92. 16-bit SSE mate-finding DP columns completed + itoa10(dpSse16m.col, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 93. 16-bit SSE mate-finding DP cells completed + itoa10(dpSse16m.cell, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 94. 16-bit SSE mate-finding DP inner loop iters completed + itoa10(dpSse16m.inner, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 95. 16-bit SSE mate-finding DP fixup loop iters completed + itoa10(dpSse16m.fixup, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 96. 16-bit SSE mate-finding DP gather, cells with potential solutions + itoa10(dpSse16m.gathsol, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 97. 16-bit SSE mate-finding DP backtrace attempts + itoa10(dpSse16m.bt, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 98. 16-bit SSE mate-finding DP failed backtrace attempts + itoa10(dpSse16m.btfail, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 99. 16-bit SSE mate-finding DP succesful backtrace attempts + itoa10(dpSse16m.btsucc, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 100. 16-bit SSE mate-finding DP backtrace cells + itoa10(dpSse16m.btcell, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 101. 16-bit SSE mate-finding DP core-diag rejections + itoa10(dpSse16m.corerej, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 102. 16-bit SSE mate-finding DP N rejections + itoa10(dpSse16m.nrej, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + const SSEMetrics& dpSse8m = total ? dpSse8Mate : dpSse8uMate; + + // 103. 8-bit SSE mate-finding DPs tried + itoa10(dpSse8m.dp, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 104. 8-bit SSE mate-finding DPs saturated + itoa10(dpSse8m.dpsat, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 105. 8-bit SSE mate-finding DPs failed + itoa10(dpSse8m.dpfail, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 106. 8-bit SSE mate-finding DPs succeeded + itoa10(dpSse8m.dpsucc, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 107. 8-bit SSE mate-finding DP columns completed + itoa10(dpSse8m.col, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 108. 8-bit SSE mate-finding DP cells completed + itoa10(dpSse8m.cell, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 109. 8-bit SSE mate-finding DP inner loop iters completed + itoa10(dpSse8m.inner, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 110. 8-bit SSE mate-finding DP fixup loop iters completed + itoa10(dpSse8m.fixup, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 111. 16-bit SSE mate-finding DP gather, cells with potential solutions + itoa10(dpSse8m.gathsol, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 112. 16-bit SSE mate-finding DP backtrace attempts + itoa10(dpSse8m.bt, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 113. 16-bit SSE mate-finding DP failed backtrace attempts + itoa10(dpSse8m.btfail, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 114. 16-bit SSE mate-finding DP succesful backtrace attempts + itoa10(dpSse8m.btsucc, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 115. 16-bit SSE mate-finding DP backtrace cells + itoa10(dpSse8m.btcell, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 116. 16-bit SSE mate-finding DP core rejections + itoa10(dpSse8m.corerej, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 117. 16-bit SSE mate-finding N rejections + itoa10(dpSse8m.nrej, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + // 118. Backtrace candidates filtered due to starting cell + itoa10(total ? nbtfiltst : nbtfiltst_u, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 119. Backtrace candidates filtered due to low score + itoa10(total ? nbtfiltsc : nbtfiltsc_u, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 120. Backtrace candidates filtered due to domination + itoa10(total ? nbtfiltdo : nbtfiltdo_u, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + // 121. Overall memory peak + itoa10(gMemTally.peak() >> 20, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 122. Uncategorized memory peak + itoa10(gMemTally.peak(0) >> 20, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 123. Ebwt memory peak + itoa10(gMemTally.peak(EBWT_CAT) >> 20, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 124. Cache memory peak + itoa10(gMemTally.peak(CA_CAT) >> 20, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 125. Resolver memory peak + itoa10(gMemTally.peak(GW_CAT) >> 20, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 126. Seed aligner memory peak + itoa10(gMemTally.peak(AL_CAT) >> 20, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 127. Dynamic programming aligner memory peak + itoa10(gMemTally.peak(DP_CAT) >> 20, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 128. Miscellaneous memory peak + itoa10(gMemTally.peak(MISC_CAT) >> 20, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 129. Debug memory peak + itoa10(gMemTally.peak(DEBUG_CAT) >> 20, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + + // 130 + itoa10(him.localatts, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 131 + itoa10(him.anchoratts, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 132 + itoa10(him.localindexatts, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 133 + itoa10(him.localextatts, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 134 + itoa10(him.localsearchrecur, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 135 + itoa10(him.globalgenomecoords, buf); + if(metricsStderr) stderrSs << buf << '\t'; + if(o != NULL) { o->writeChars(buf); o->write('\t'); } + // 136 + itoa10(him.localgenomecoords, buf); + if(metricsStderr) stderrSs << buf; + if(o != NULL) { o->writeChars(buf); } + + if(o != NULL) { o->write('\n'); } + if(metricsStderr) cerr << stderrSs.str().c_str() << endl; + if(!total) mergeIncrementals(); + } + + void mergeIncrementals() { + olm.merge(olmu, false); + sdm.merge(sdmu, false); + wlm.merge(wlmu, false); + swmSeed.merge(swmuSeed, false); + swmMate.merge(swmuMate, false); + dpSse8Seed.merge(dpSse8uSeed, false); + dpSse8Mate.merge(dpSse8uMate, false); + dpSse16Seed.merge(dpSse16uSeed, false); + dpSse16Mate.merge(dpSse16uMate, false); + nbtfiltst_u += nbtfiltst; + nbtfiltsc_u += nbtfiltsc; + nbtfiltdo_u += nbtfiltdo; + + olmu.reset(); + sdmu.reset(); + wlmu.reset(); + swmuSeed.reset(); + swmuMate.reset(); + rpmu.reset(); + dpSse8uSeed.reset(); + dpSse8uMate.reset(); + dpSse16uSeed.reset(); + dpSse16uMate.reset(); + nbtfiltst_u = 0; + nbtfiltsc_u = 0; + nbtfiltdo_u = 0; + } + + // Total over the whole job + OuterLoopMetrics olm; // overall metrics + SeedSearchMetrics sdm; // metrics related to seed alignment + WalkMetrics wlm; // metrics related to walking left (i.e. resolving reference offsets) + SwMetrics swmSeed; // metrics related to DP seed-extend alignment + SwMetrics swmMate; // metrics related to DP mate-finding alignment + ReportingMetrics rpm; // metrics related to reporting + SSEMetrics dpSse8Seed; // 8-bit SSE seed extensions + SSEMetrics dpSse8Mate; // 8-bit SSE mate finds + SSEMetrics dpSse16Seed; // 16-bit SSE seed extensions + SSEMetrics dpSse16Mate; // 16-bit SSE mate finds + uint64_t nbtfiltst; + uint64_t nbtfiltsc; + uint64_t nbtfiltdo; + + // Just since the last update + OuterLoopMetrics olmu; // overall metrics + SeedSearchMetrics sdmu; // metrics related to seed alignment + WalkMetrics wlmu; // metrics related to walking left (i.e. resolving reference offsets) + SwMetrics swmuSeed; // metrics related to DP seed-extend alignment + SwMetrics swmuMate; // metrics related to DP mate-finding alignment + ReportingMetrics rpmu; // metrics related to reporting + SSEMetrics dpSse8uSeed; // 8-bit SSE seed extensions + SSEMetrics dpSse8uMate; // 8-bit SSE mate finds + SSEMetrics dpSse16uSeed; // 16-bit SSE seed extensions + SSEMetrics dpSse16uMate; // 16-bit SSE mate finds + uint64_t nbtfiltst_u; + uint64_t nbtfiltsc_u; + uint64_t nbtfiltdo_u; + + // + HIMetrics him; + + MUTEX_T mutex_m; // lock for when one ob + bool first; // yet to print first line? + time_t lastElapsed; // used in reportInterval to measure time since last call +}; + +static PerfMetrics metrics; + +// Cyclic rotations +#define ROTL(n, x) (((x) << (n)) | ((x) >> (32-n))) +#define ROTR(n, x) (((x) >> (n)) | ((x) << (32-n))) + +static inline void printMmsSkipMsg( + const PatternSourcePerThread& ps, + bool paired, + bool mate1, + int seedmms) +{ + ostringstream os; + if(paired) { + os << "Warning: skipping mate #" << (mate1 ? '1' : '2') + << " of read '" << (mate1 ? ps.bufa().name : ps.bufb().name) + << "' because length (" << (mate1 ? ps.bufa().patFw.length() : ps.bufb().patFw.length()) + << ") <= # seed mismatches (" << seedmms << ")" << endl; + } else { + os << "Warning: skipping read '" << (mate1 ? ps.bufa().name : ps.bufb().name) + << "' because length (" << (mate1 ? ps.bufa().patFw.length() : ps.bufb().patFw.length()) + << ") <= # seed mismatches (" << seedmms << ")" << endl; + } + cerr << os.str().c_str(); +} + +static inline void printLenSkipMsg( + const PatternSourcePerThread& ps, + bool paired, + bool mate1) +{ + ostringstream os; + if(paired) { + os << "Warning: skipping mate #" << (mate1 ? '1' : '2') + << " of read '" << (mate1 ? ps.bufa().name : ps.bufb().name) + << "' because it was < 2 characters long" << endl; + } else { + os << "Warning: skipping read '" << (mate1 ? ps.bufa().name : ps.bufb().name) + << "' because it was < 2 characters long" << endl; + } + cerr << os.str().c_str(); +} + +static inline void printLocalScoreMsg( + const PatternSourcePerThread& ps, + bool paired, + bool mate1) +{ + ostringstream os; + if(paired) { + os << "Warning: minimum score function gave negative number in " + << "--local mode for mate #" << (mate1 ? '1' : '2') + << " of read '" << (mate1 ? ps.bufa().name : ps.bufb().name) + << "; setting to 0 instead" << endl; + } else { + os << "Warning: minimum score function gave negative number in " + << "--local mode for read '" << (mate1 ? ps.bufa().name : ps.bufb().name) + << "; setting to 0 instead" << endl; + } + cerr << os.str().c_str(); +} + +static inline void printEEScoreMsg( + const PatternSourcePerThread& ps, + bool paired, + bool mate1) +{ + ostringstream os; + if(paired) { + os << "Warning: minimum score function gave positive number in " + << "--end-to-end mode for mate #" << (mate1 ? '1' : '2') + << " of read '" << (mate1 ? ps.bufa().name : ps.bufb().name) + << "; setting to 0 instead" << endl; + } else { + os << "Warning: minimum score function gave positive number in " + << "--end-to-end mode for read '" << (mate1 ? ps.bufa().name : ps.bufb().name) + << "; setting to 0 instead" << endl; + } + cerr << os.str().c_str(); +} + + +#define MERGE_METRICS(met, sync) { \ + msink.mergeMetrics(rpm); \ + met.merge( \ + &olm, \ + &sdm, \ + &wlm, \ + &swmSeed, \ + &swmMate, \ + &rpm, \ + &sseU8ExtendMet, \ + &sseU8MateMet, \ + &sseI16ExtendMet, \ + &sseI16MateMet, \ + nbtfiltst, \ + nbtfiltsc, \ + nbtfiltdo, \ + &him, \ + sync); \ + olm.reset(); \ + sdm.reset(); \ + wlm.reset(); \ + swmSeed.reset(); \ + swmMate.reset(); \ + rpm.reset(); \ + sseU8ExtendMet.reset(); \ + sseU8MateMet.reset(); \ + sseI16ExtendMet.reset(); \ + sseI16MateMet.reset(); \ + him.reset(); \ +} + +#define MERGE_SW(x) { \ + x.merge( \ + sseU8ExtendMet, \ + sseU8MateMet, \ + sseI16ExtendMet, \ + sseI16MateMet, \ + nbtfiltst, \ + nbtfiltsc, \ + nbtfiltdo); \ + x.resetCounters(); \ +} + +/** + * Called once per thread. Sets up per-thread pointers to the shared global + * data structures, creates per-thread structures, then enters the alignment + * loop. The general flow of the alignment loop is: + * + * - If it's been a while and we're the master thread, report some alignment + * metrics + * - Get the next read/pair + * - Check if this read/pair is identical to the previous + * + If identical, check whether we can skip any or all alignment stages. If + * we can skip all stages, report the result immediately and move to next + * read/pair + * + If not identical, continue + * - + */ +static void multiseedSearchWorker_hisat_bp(void *vp) { + int tid = *((int*)vp); + assert(multiseed_ebwtFw != NULL); + assert(multiseedMms == 0 || multiseed_ebwtBw != NULL); + PairedPatternSource& patsrc = *multiseed_patsrc; + const HierEbwt& ebwtFw = *multiseed_ebwtFw; + const HierEbwt& ebwtBw = *multiseed_ebwtBw; + const Scoring& sc = *multiseed_sc; + const BitPairReference& ref = *multiseed_refs; + AlignmentCache& scShared = *multiseed_ca; + AlnSink& msink = *multiseed_msink; + OutFileBuf* metricsOfb = multiseed_metricsOfb; + + // Sinks: these are so that we can print tables encoding counts for + // events of interest on a per-read, per-seed, per-join, or per-SW + // level. These in turn can be used to diagnose performance + // problems, or generally characterize performance. + + //const BitPairReference& refs = *multiseed_refs; + auto_ptr patsrcFact(createPatsrcFactory(patsrc, tid)); + auto_ptr ps(patsrcFact->create()); + + // Thread-local cache for seed alignments + PtrWrap > scLocal; + if(!msNoCache) { + scLocal.init(new AlignmentCache(seedCacheLocalMB * 1024 * 1024, false)); + } + AlignmentCache scCurrent(seedCacheCurrentMB * 1024 * 1024, false); + // Thread-local cache for current seed alignments + + // Interfaces for alignment and seed caches + AlignmentCacheIface ca( + &scCurrent, + scLocal.get(), + msNoCache ? NULL : &scShared); + + // Instantiate an object for holding reporting-related parameters. + ReportingParams rp( + (allHits ? std::numeric_limits::max() : khits), // -k + mhits, // -m/-M + 0, // penalty gap (not used now) + msample, // true -> -M was specified, otherwise assume -m + gReportDiscordant, // report discordang paired-end alignments? + gReportMixed); // report unpaired alignments for paired reads? + + // Instantiate a mapping quality calculator + auto_ptr bmapq(new_mapq(mapqv, scoreMin, sc)); + + // Make a per-thread wrapper for the global MHitSink object. + AlnSinkWrap msinkwrap( + msink, // global sink + rp, // reporting parameters + *bmapq.get(), // MAPQ calculator + (size_t)tid); // thread id + + BP_Aligner bp_Aligner( + ebwtFw, + multiseed_refnames, + &multiseed_mutex, + thread_rids_mindist, + no_spliced_alignment); + SwAligner sw; + OuterLoopMetrics olm; + SeedSearchMetrics sdm; + WalkMetrics wlm; + SwMetrics swmSeed, swmMate; + ReportingMetrics rpm; + RandomSource rnd, rndArb; + SSEMetrics sseU8ExtendMet; + SSEMetrics sseU8MateMet; + SSEMetrics sseI16ExtendMet; + SSEMetrics sseI16MateMet; + DescentMetrics descm; + uint64_t nbtfiltst = 0; // TODO: find a new home for these + uint64_t nbtfiltsc = 0; // TODO: find a new home for these + uint64_t nbtfiltdo = 0; // TODO: find a new home for these + HIMetrics him; + + ASSERT_ONLY(BTDnaString tmp); + + int pepolFlag; + if(gMate1fw && gMate2fw) { + pepolFlag = PE_POLICY_FF; + } else if(gMate1fw && !gMate2fw) { + pepolFlag = PE_POLICY_FR; + } else if(!gMate1fw && gMate2fw) { + pepolFlag = PE_POLICY_RF; + } else { + pepolFlag = PE_POLICY_RR; + } + assert_geq(gMaxInsert, gMinInsert); + assert_geq(gMinInsert, 0); + PairedEndPolicy pepol( + pepolFlag, + gMaxInsert, + gMinInsert, + localAlign, + gFlippedMatesOK, + gDovetailMatesOK, + gContainMatesOK, + gOlapMatesOK, + gExpandToFrag); + + PerfMetrics metricsPt; // per-thread metrics object; for read-level metrics + BTString nametmp; + + PerReadMetrics prm; + + // Used by thread with threadid == 1 to measure time elapsed + time_t iTime = time(0); + + // Keep track of whether last search was exhaustive for mates 1 and 2 + bool exhaustive[2] = { false, false }; + // Keep track of whether mates 1/2 were filtered out last time through + bool filt[2] = { true, true }; + // Keep track of whether mates 1/2 were filtered out due Ns last time + bool nfilt[2] = { true, true }; + // Keep track of whether mates 1/2 were filtered out due to not having + // enough characters to rise about the score threshold. + bool scfilt[2] = { true, true }; + // Keep track of whether mates 1/2 were filtered out due to not having + // more characters than the number of mismatches permitted in a seed. + bool lenfilt[2] = { true, true }; + // Keep track of whether mates 1/2 were filtered out by upstream qc + bool qcfilt[2] = { true, true }; + + rndArb.init((uint32_t)time(0)); + int mergei = 0; + int mergeival = 16; + while(true) { + bool success = false, done = false, paired = false; + ps->nextReadPair(success, done, paired, outType != OUTPUT_SAM); + if(!success && done) { + break; + } else if(!success) { + continue; + } + TReadId rdid = ps->rdid(); + + if(nthreads > 1 && useTempSpliceSite) { + while(true) { + uint64_t min_rdid = 0; + { + // ThreadSafe t(&thread_rids_mutex, nthreads > 1); + assert_gt(thread_rids.size(), 0); + min_rdid = thread_rids[0]; + for(size_t i = 1; i < thread_rids.size(); i++) { + if(thread_rids[i] < min_rdid) { + min_rdid = thread_rids[i]; + } + } + } + + if(min_rdid + thread_rids_mindist < rdid) { +#if defined(_TTHREAD_WIN32_) + Sleep(0); +#elif defined(_TTHREAD_POSIX_) + sched_yield(); +#endif + } else break; + } + } + + bool sample = true; + if(arbitraryRandom) { + ps->bufa().seed = rndArb.nextU32(); + ps->bufb().seed = rndArb.nextU32(); + } + if(sampleFrac < 1.0f) { + rnd.init(ROTL(ps->bufa().seed, 2)); + sample = rnd.nextFloat() < sampleFrac; + } + if(rdid >= skipReads && rdid < qUpto && sample) { + // Align this read/pair + bool retry = true; + // + // Check if there is metrics reporting for us to do. + // + if(metricsIval > 0 && + (metricsOfb != NULL || metricsStderr) && + !metricsPerRead && + ++mergei == mergeival) + { + // Do a periodic merge. Update global metrics, in a + // synchronized manner if needed. + MERGE_METRICS(metrics, nthreads > 1); + mergei = 0; + // Check if a progress message should be printed + if(tid == 0) { + // Only thread 1 prints progress messages + time_t curTime = time(0); + if(curTime - iTime >= metricsIval) { + metrics.reportInterval(metricsOfb, metricsStderr, false, true, NULL); + iTime = curTime; + } + } + } + prm.reset(); // per-read metrics + prm.doFmString = false; + if(sam_print_xt) { + gettimeofday(&prm.tv_beg, &prm.tz_beg); + } + // Try to align this read + while(retry) { + retry = false; + assert_eq(ps->bufa().color, false); + ca.nextRead(); // clear the cache + olm.reads++; + assert(!ca.aligning()); + bool pair = paired; + const size_t rdlen1 = ps->bufa().length(); + const size_t rdlen2 = pair ? ps->bufb().length() : 0; + olm.bases += (rdlen1 + rdlen2); +#if 0 + msinkwrap.nextRead( + &ps->bufa(), + pair ? &ps->bufb() : NULL, + rdid, + sc.qualitiesMatter()); + assert(msinkwrap.inited()); +#endif + size_t rdlens[2] = { rdlen1, rdlen2 }; + // Calculate the minimum valid score threshold for the read + TAlScore minsc[2], maxpen[2]; + maxpen[0] = maxpen[1] = 0; + minsc[0] = minsc[1] = std::numeric_limits::max(); + if(bwaSwLike) { + // From BWA-SW manual: "Given an l-long query, the + // threshold for a hit to be retained is + // a*max{T,c*log(l)}." We try to recreate that here. + float a = (float)sc.match(30); + float T = bwaSwLikeT, c = bwaSwLikeC; + minsc[0] = (TAlScore)max(a*T, a*c*log(rdlens[0])); + if(paired) { + minsc[1] = (TAlScore)max(a*T, a*c*log(rdlens[1])); + } + } else { + minsc[0] = scoreMin.f(rdlens[0]); + if(paired) minsc[1] = scoreMin.f(rdlens[1]); + if(localAlign) { + if(minsc[0] < 0) { + if(!gQuiet) printLocalScoreMsg(*ps, paired, true); + minsc[0] = 0; + } + if(paired && minsc[1] < 0) { + if(!gQuiet) printLocalScoreMsg(*ps, paired, false); + minsc[1] = 0; + } + } else { + if(minsc[0] > 0) { + if(!gQuiet) printEEScoreMsg(*ps, paired, true); + minsc[0] = 0; + } + if(paired && minsc[1] > 0) { + if(!gQuiet) printEEScoreMsg(*ps, paired, false); + minsc[1] = 0; + } + } + } + + // N filter; does the read have too many Ns? + size_t readns[2] = {0, 0}; + sc.nFilterPair( + &ps->bufa().patFw, + pair ? &ps->bufb().patFw : NULL, + readns[0], + readns[1], + nfilt[0], + nfilt[1]); + // Score filter; does the read enough character to rise above + // the score threshold? + scfilt[0] = sc.scoreFilter(minsc[0], rdlens[0]); + scfilt[1] = sc.scoreFilter(minsc[1], rdlens[1]); + lenfilt[0] = lenfilt[1] = true; + if(rdlens[0] <= (size_t)multiseedMms || rdlens[0] < 2) { + if(!gQuiet) printMmsSkipMsg(*ps, paired, true, multiseedMms); + lenfilt[0] = false; + } + if((rdlens[1] <= (size_t)multiseedMms || rdlens[1] < 2) && paired) { + if(!gQuiet) printMmsSkipMsg(*ps, paired, false, multiseedMms); + lenfilt[1] = false; + } + if(rdlens[0] < 2) { + if(!gQuiet) printLenSkipMsg(*ps, paired, true); + lenfilt[0] = false; + } + if(rdlens[1] < 2 && paired) { + if(!gQuiet) printLenSkipMsg(*ps, paired, false); + lenfilt[1] = false; + } + qcfilt[0] = qcfilt[1] = true; + if(qcFilter) { + qcfilt[0] = (ps->bufa().filter != '0'); + qcfilt[1] = (ps->bufb().filter != '0'); + } + filt[0] = (nfilt[0] && scfilt[0] && lenfilt[0] && qcfilt[0]); + filt[1] = (nfilt[1] && scfilt[1] && lenfilt[1] && qcfilt[1]); + prm.nFilt += (filt[0] ? 0 : 1) + (filt[1] ? 0 : 1); + Read* rds[2] = { &ps->bufa(), &ps->bufb() }; + // For each mate... + assert(msinkwrap.empty()); + //size_t minedfw[2] = { 0, 0 }; + //size_t minedrc[2] = { 0, 0 }; + // Calcualte nofw / no rc + bool nofw[2] = { false, false }; + bool norc[2] = { false, false }; + nofw[0] = paired ? (gMate1fw ? gNofw : gNorc) : gNofw; + norc[0] = paired ? (gMate1fw ? gNorc : gNofw) : gNorc; + nofw[1] = paired ? (gMate2fw ? gNofw : gNorc) : gNofw; + norc[1] = paired ? (gMate2fw ? gNorc : gNofw) : gNorc; + // Calculate nceil + int nceil[2] = { 0, 0 }; + nceil[0] = nCeil.f((double)rdlens[0]); + nceil[0] = min(nceil[0], (int)rdlens[0]); + if(paired) { + nceil[1] = nCeil.f((double)rdlens[1]); + nceil[1] = min(nceil[1], (int)rdlens[1]); + } + exhaustive[0] = exhaustive[1] = false; + //size_t matemap[2] = { 0, 1 }; + bool pairPostFilt = filt[0] && filt[1]; + if(pairPostFilt) { + rnd.init(ps->bufa().seed ^ ps->bufb().seed); + } else { + rnd.init(ps->bufa().seed); + } + // Calculate interval length for both mates + int interval[2] = { 0, 0 }; + for(size_t mate = 0; mate < (pair ? 2:1); mate++) { + interval[mate] = msIval.f((double)rdlens[mate]); + if(filt[0] && filt[1]) { + // Boost interval length by 20% for paired-end reads + interval[mate] = (int)(interval[mate] * 1.2 + 0.5); + } + interval[mate] = max(interval[mate], 1); + } + // Calculate streak length + size_t streak[2] = { maxDpStreak, maxDpStreak }; + size_t mtStreak[2] = { maxMateStreak, maxMateStreak }; + size_t mxDp[2] = { maxDp, maxDp }; + size_t mxUg[2] = { maxUg, maxUg }; + size_t mxIter[2] = { maxIters, maxIters }; + if(allHits) { + streak[0] = streak[1] = std::numeric_limits::max(); + mtStreak[0] = mtStreak[1] = std::numeric_limits::max(); + mxDp[0] = mxDp[1] = std::numeric_limits::max(); + mxUg[0] = mxUg[1] = std::numeric_limits::max(); + mxIter[0] = mxIter[1] = std::numeric_limits::max(); + } else if(khits > 1) { + for(size_t mate = 0; mate < 2; mate++) { + streak[mate] += (khits-1) * maxStreakIncr; + mtStreak[mate] += (khits-1) * maxStreakIncr; + mxDp[mate] += (khits-1) * maxItersIncr; + mxUg[mate] += (khits-1) * maxItersIncr; + mxIter[mate] += (khits-1) * maxItersIncr; + } + } + if(filt[0] && filt[1]) { + streak[0] = (size_t)ceil((double)streak[0] / 2.0); + streak[1] = (size_t)ceil((double)streak[1] / 2.0); + assert_gt(streak[1], 0); + } + assert_gt(streak[0], 0); + // Calculate # seed rounds for each mate + size_t nrounds[2] = { nSeedRounds, nSeedRounds }; + if(filt[0] && filt[1]) { + nrounds[0] = (size_t)ceil((double)nrounds[0] / 2.0); + nrounds[1] = (size_t)ceil((double)nrounds[1] / 2.0); + assert_gt(nrounds[1], 0); + } + assert_gt(nrounds[0], 0); + // Increment counters according to what got filtered + for(size_t mate = 0; mate < (pair ? 2:1); mate++) { + if(!filt[mate]) { + // Mate was rejected by N filter + olm.freads++; // reads filtered out + olm.fbases += rdlens[mate]; // bases filtered out + } else { + //shs[mate].clear(); + //shs[mate].nextRead(mate == 0 ? ps->bufa() : ps->bufb()); + //assert(shs[mate].empty()); + olm.ureads++; // reads passing filter + olm.ubases += rdlens[mate]; // bases passing filter + } + } + //size_t eePeEeltLimit = std::numeric_limits::max(); + // Whether we're done with mate1 / mate2 + bool done[2] = { !filt[0], !filt[1] }; + // size_t nelt[2] = {0, 0}; + if(filt[0] && filt[1]) { + bp_Aligner.initReads(rds, nofw, norc, minsc, maxpen); + } else if(filt[0] || filt[1]) { + bp_Aligner.initRead(rds[0], nofw[0], norc[0], minsc[0], maxpen[0], filt[1]); + } + if(filt[0] || filt[1]) { + int ret = bp_Aligner.go(sc, ebwtFw, ebwtBw, ref, sw, *ssdb, wlm, prm, swmSeed, him, rnd, msinkwrap); + MERGE_SW(sw); + // daehwan + size_t mate = 0; + + assert_gt(ret, 0); + // Clear out the exact hits so that we don't try to + // extend them again later! + if(ret == EXTEND_EXHAUSTED_CANDIDATES) { + // Not done yet + } else if(ret == EXTEND_POLICY_FULFILLED) { + // Policy is satisfied for this mate at least + if(msinkwrap.state().doneWithMate(mate == 0)) { + done[mate] = true; + } + if(msinkwrap.state().doneWithMate(mate == 1)) { + done[mate^1] = true; + } + } else if(ret == EXTEND_PERFECT_SCORE) { + // We exhausted this mode at least + done[mate] = true; + } else if(ret == EXTEND_EXCEEDED_HARD_LIMIT) { + // We exceeded a per-read limit + done[mate] = true; + } else if(ret == EXTEND_EXCEEDED_SOFT_LIMIT) { + // Not done yet + } else { + // + cerr << "Bad return value: " << ret << endl; + throw 1; + } + if(!done[mate]) { + TAlScore perfectScore = sc.perfectScore(rdlens[mate]); + if(!done[mate] && minsc[mate] == perfectScore) { + done[mate] = true; + } + } + } + + for(size_t i = 0; i < 2; i++) { + assert_leq(prm.nExIters, mxIter[i]); + assert_leq(prm.nExDps, mxDp[i]); + assert_leq(prm.nMateDps, mxDp[i]); + assert_leq(prm.nExUgs, mxUg[i]); + assert_leq(prm.nMateUgs, mxUg[i]); + assert_leq(prm.nDpFail, streak[i]); + assert_leq(prm.nUgFail, streak[i]); + assert_leq(prm.nEeFail, streak[i]); + } + +#if 0 + // Commit and report paired-end/unpaired alignments + msinkwrap.finishRead( + NULL, + NULL, + exhaustive[0], // exhausted seed hits for mate 1? + exhaustive[1], // exhausted seed hits for mate 2? + nfilt[0], + nfilt[1], + scfilt[0], + scfilt[1], + lenfilt[0], + lenfilt[1], + qcfilt[0], + qcfilt[1], + sortByScore, // prioritize by alignment score + rnd, // pseudo-random generator + rpm, // reporting metrics + prm, // per-read metrics + sc, // scoring scheme + !seedSumm, // suppress seed summaries? + seedSumm); // suppress alignments? + assert(!retry || msinkwrap.empty()); +#endif + + if(nthreads > 1 && useTempSpliceSite) { + // ThreadSafe t(&thread_rids_mutex, nthreads > 1); + assert_gt(tid, 0); + assert_leq(tid, thread_rids.size()); + assert(thread_rids[tid - 1] == 0 || rdid > thread_rids[tid - 1]); + thread_rids[tid - 1] = rdid; + } + } // while(retry) + } // if(rdid >= skipReads && rdid < qUpto) + else if(rdid >= qUpto) { + break; + } + if(metricsPerRead) { + MERGE_METRICS(metricsPt, nthreads > 1); + nametmp = ps->bufa().name; + metricsPt.reportInterval( + metricsOfb, metricsStderr, true, true, &nametmp); + metricsPt.reset(); + } + } // while(true) + + // One last metrics merge + MERGE_METRICS(metrics, nthreads > 1); + + return; +} + +/** + * Called once per alignment job. Sets up global pointers to the + * shared global data structures, creates per-thread structures, then + * enters the search loop. + */ +static void multiseedSearch( + Scoring& sc, + PairedPatternSource& patsrc, // pattern source + AlnSink& msink, // hit sink + HierEbwt& ebwtFw, // index of original text + HierEbwt& ebwtBw, // index of mirror text + BitPairReference* refs, + const EList& refnames, + OutFileBuf *metricsOfb) +{ + multiseed_patsrc = &patsrc; + multiseed_msink = &msink; + multiseed_ebwtFw = &ebwtFw; + multiseed_ebwtBw = &ebwtBw; + multiseed_sc = ≻ + multiseed_refnames = refnames; + multiseed_metricsOfb = metricsOfb; + multiseed_refs = refs; + AutoArray threads(nthreads); + AutoArray tids(nthreads); + { + // Load the other half of the index into memory + assert(!ebwtFw.isInMemory()); + Timer _t(cerr, "Time loading forward index: ", timing); + ebwtFw.loadIntoMemory( + 0, // colorspace? + -1, // not the reverse index + true, // load SA samp? (yes, need forward index's SA samp) + true, // load ftab (in forward index) + true, // load rstarts (in forward index) + !noRefNames, // load names? + startVerbose); + } +#if 0 + if(multiseedMms > 0 || do1mmUpFront) { + // Load the other half of the index into memory + assert(!ebwtBw.isInMemory()); + Timer _t(cerr, "Time loading mirror index: ", timing); + ebwtBw.loadIntoMemory( + 0, // colorspace? + // It's bidirectional search, so we need the reverse to be + // constructed as the reverse of the concatenated strings. + 1, + true, // load SA samp in reverse index + true, // yes, need ftab in reverse index + true, // load rstarts in reverse index + !noRefNames, // load names? + startVerbose); + } +#endif + // Start the metrics thread + { + Timer _t(cerr, "Multiseed full-index search: ", timing); + + thread_rids.resize(nthreads); + thread_rids.fill(0); + thread_rids_mindist = (nthreads == 1 || !useTempSpliceSite ? 0 : 1000 * nthreads); + + for(int i = 0; i < nthreads; i++) { + // Thread IDs start at 1 + tids[i] = i+1; + threads[i] = new tthread::thread(multiseedSearchWorker_hisat_bp, (void*)&tids[i]); + } + + for (int i = 0; i < nthreads; i++) + threads[i]->join(); + + } + if(!metricsPerRead && (metricsOfb != NULL || metricsStderr)) { + metrics.reportInterval(metricsOfb, metricsStderr, true, false, NULL); + } +} + +static string argstr; + +extern void initializeCntLut(); +extern void initializeCntBit(); + +template +static void driver( + const char * type, + const string& bt2indexBase, + const string& outfile) +{ + if(gVerbose || startVerbose) { + cerr << "Entered driver(): "; logTime(cerr, true); + } + + initializeCntLut(); + initializeCntBit(); + + // Vector of the reference sequences; used for sanity-checking + EList > names, os; + EList nameLens, seqLens; + // Read reference sequences from the command-line or from a FASTA file + if(!origString.empty()) { + // Read fasta file(s) + EList origFiles; + tokenize(origString, ",", origFiles); + parseFastas(origFiles, names, nameLens, os, seqLens); + } + PatternParams pp( + format, // file format + fileParallel, // true -> wrap files with separate PairedPatternSources + seed, // pseudo-random seed + useSpinlock, // use spin locks instead of pthreads + solexaQuals, // true -> qualities are on solexa64 scale + phred64Quals, // true -> qualities are on phred64 scale + integerQuals, // true -> qualities are space-separated numbers + fuzzy, // true -> try to parse fuzzy fastq + fastaContLen, // length of sampled reads for FastaContinuous... + fastaContFreq, // frequency of sampled reads for FastaContinuous... + skipReads // skip the first 'skip' patterns + ); + if(gVerbose || startVerbose) { + cerr << "Creating PatternSource: "; logTime(cerr, true); + } + PairedPatternSource *patsrc = PairedPatternSource::setupPatternSources( + queries, // singles, from argv + mates1, // mate1's, from -1 arg + mates2, // mate2's, from -2 arg + mates12, // both mates on each line, from --12 arg + qualities, // qualities associated with singles + qualities1, // qualities associated with m1 + qualities2, // qualities associated with m2 + pp, // read read-in parameters + gVerbose || startVerbose); // be talkative + // Open hit output file + if(gVerbose || startVerbose) { + cerr << "Opening hit output file: "; logTime(cerr, true); + } + OutFileBuf *fout; + if(!outfile.empty()) { + fout = new OutFileBuf(outfile.c_str(), false); + } else { + fout = new OutFileBuf(); + } + // Initialize Ebwt object and read in header + if(gVerbose || startVerbose) { + cerr << "About to initialize fw Ebwt: "; logTime(cerr, true); + } + adjIdxBase = adjustEbwtBase(argv0, bt2indexBase, gVerbose); + HierEbwt ebwt( + adjIdxBase, + 0, // index is colorspace + -1, // fw index + true, // index is for the forward direction + /* overriding: */ offRate, + 0, // amount to add to index offrate or <= 0 to do nothing + useMm, // whether to use memory-mapped files + useShmem, // whether to use shared memory + mmSweep, // sweep memory-mapped files + !noRefNames, // load names? + true, // load SA sample? + true, // load ftab? + true, // load rstarts? + gVerbose, // whether to be talkative + startVerbose, // talkative during initialization + false /*passMemExc*/, + sanityCheck); + HierEbwt* ebwtBw = NULL; +#if 0 + // We need the mirror index if mismatches are allowed + if(multiseedMms > 0 || do1mmUpFront) { + if(gVerbose || startVerbose) { + cerr << "About to initialize rev Ebwt: "; logTime(cerr, true); + } + ebwtBw = new HierEbwt( + adjIdxBase + ".rev", + 0, // index is colorspace + 1, // TODO: maybe not + false, // index is for the reverse direction + /* overriding: */ offRate, + 0, // amount to add to index offrate or <= 0 to do nothing + useMm, // whether to use memory-mapped files + useShmem, // whether to use shared memory + mmSweep, // sweep memory-mapped files + !noRefNames, // load names? + true, // load SA sample? + true, // load ftab? + true, // load rstarts? + gVerbose, // whether to be talkative + startVerbose, // talkative during initialization + false /*passMemExc*/, + sanityCheck); + } +#endif + if(sanityCheck && !os.empty()) { + // Sanity check number of patterns and pattern lengths in Ebwt + // against original strings + assert_eq(os.size(), ebwt.nPat()); + for(size_t i = 0; i < os.size(); i++) { + assert_eq(os[i].length(), ebwt.plen()[i]); + } + } + // Sanity-check the restored version of the Ebwt + if(sanityCheck && !os.empty()) { + ebwt.loadIntoMemory( + 0, + -1, // fw index + true, // load SA sample + true, // load ftab + true, // load rstarts + !noRefNames, + startVerbose); + ebwt.checkOrigs(os, false, false); + ebwt.evictFromMemory(); + } + OutputQueue oq( + *fout, // out file buffer + reorder && nthreads > 1, // whether to reorder when there's >1 thread + nthreads, // # threads + nthreads > 1, // whether to be thread-safe + skipReads); // first read will have this rdid + { + Timer _t(cerr, "Time searching: ", timing); + // Set up penalities + if(bonusMatch > 0 && !localAlign) { + cerr << "Warning: Match bonus always = 0 in --end-to-end mode; ignoring user setting" << endl; + bonusMatch = 0; + } + Scoring sc( + bonusMatch, // constant reward for match + penMmcType, // how to penalize mismatches + penMmcMax, // max mm pelanty + penMmcMin, // min mm pelanty + scoreMin, // min score as function of read len + nCeil, // max # Ns as function of read len + penNType, // how to penalize Ns in the read + penN, // constant if N pelanty is a constant + penNCatPair, // whether to concat mates before N filtering + penRdGapConst, // constant coeff for read gap cost + penRfGapConst, // constant coeff for ref gap cost + penRdGapLinear, // linear coeff for read gap cost + penRfGapLinear, // linear coeff for ref gap cost + gGapBarrier, // # rows at top/bot only entered diagonally + penCanSplice, // canonical splicing penalty + penNoncanSplice,// non-canonical splicing penalty + &penIntronLen); // penalty as to intron length + EList reflens; + for(size_t i = 0; i < ebwt.nPat(); i++) { + reflens.push_back(ebwt.plen()[i]); + } + EList refnames; + readEbwtRefnames(adjIdxBase, refnames); + SamConfig samc( + refnames, // reference sequence names + reflens, // reference sequence lengths + samTruncQname, // whether to truncate QNAME to 255 chars + samOmitSecSeqQual, // omit SEQ/QUAL for 2ndary alignments? + samNoUnal, // omit unaligned-read records? + string("hisat"), // program id + string("hisat"), // program name + string(HISAT_VERSION), // program version + argstr, // command-line + rgs_optflag, // read-group string + rna_strandness, + sam_print_as, + sam_print_xs, + sam_print_xss, + sam_print_yn, + sam_print_xn, + sam_print_cs, + sam_print_cq, + sam_print_x0, + sam_print_x1, + sam_print_xm, + sam_print_xo, + sam_print_xg, + sam_print_nm, + sam_print_md, + sam_print_yf, + sam_print_yi, + sam_print_ym, + sam_print_yp, + sam_print_yt, + sam_print_ys, + sam_print_zs, + sam_print_xr, + sam_print_xt, + sam_print_xd, + sam_print_xu, + sam_print_yl, + sam_print_ye, + sam_print_yu, + sam_print_xp, + sam_print_yr, + sam_print_zb, + sam_print_zr, + sam_print_zf, + sam_print_zm, + sam_print_zi, + sam_print_zp, + sam_print_zu, + sam_print_xs_a); + // Set up hit sink; if sanityCheck && !os.empty() is true, + // then instruct the sink to "retain" hits in a vector in + // memory so that we can easily sanity check them later on + AlnSink *mssink = NULL; + Timer *_tRef = new Timer(cerr, "Time loading reference: ", timing); + auto_ptr refs( + new BitPairReference( + adjIdxBase, + false, + sanityCheck, + NULL, + NULL, + false, + useMm, + useShmem, + mmSweep, + gVerbose, + startVerbose) + ); + delete _tRef; + if(!refs->loaded()) throw 1; + + init_junction_prob(); + bool write = novelSpliceSiteOutfile != "" || useTempSpliceSite; + bool read = knownSpliceSiteInfile != "" || novelSpliceSiteInfile != "" || useTempSpliceSite; + ssdb = new SpliceSiteDB( + *(refs.get()), + refnames, + nthreads > 1, // thread-safe + write, // write? + read); // read? + if(ssdb != NULL) { + if(knownSpliceSiteInfile != "") { + ifstream ssdb_file(knownSpliceSiteInfile.c_str(), ios::in); + if(ssdb_file.is_open()) { + ssdb->read(ssdb_file, false); // known splice sites + ssdb_file.close(); + } + } + if(novelSpliceSiteInfile != "") { + ifstream ssdb_file(novelSpliceSiteInfile.c_str(), ios::in); + if(ssdb_file.is_open()) { + ssdb->read(ssdb_file, true); // novel splice sites + ssdb_file.close(); + } + } + } + switch(outType) { + case OUTPUT_SAM: { + mssink = new AlnSinkSam( + oq, // output queue + samc, // settings & routines for SAM output + refnames, // reference names + gQuiet, // don't print alignment summary at end + ssdb); +#if 0 + if(!samNoHead) { + bool printHd = true, printSq = true; + BTString buf; + samc.printHeader(buf, rgid, rgs, printHd, !samNoSQ, printSq); + fout->writeString(buf); + } +#endif + break; + } + default: + cerr << "Invalid output type: " << outType << endl; + throw 1; + } + if(gVerbose || startVerbose) { + cerr << "Dispatching to search driver: "; logTime(cerr, true); + } + // Set up global constraint + OutFileBuf *metricsOfb = NULL; + if(!metricsFile.empty() && metricsIval > 0) { + metricsOfb = new OutFileBuf(metricsFile); + } + // Do the search for all input reads + assert(patsrc != NULL); + assert(mssink != NULL); + multiseedSearch( + sc, // scoring scheme + *patsrc, // pattern source + *mssink, // hit sink + ebwt, // BWT + *ebwtBw, // BWT' + refs.get(), + refnames, + metricsOfb); + // Evict any loaded indexes from memory + if(ebwt.isInMemory()) { + ebwt.evictFromMemory(); + } + if(ebwtBw != NULL) { + delete ebwtBw; + } + if(!gQuiet && !seedSumm) { + size_t repThresh = mhits; + if(repThresh == 0) { + repThresh = std::numeric_limits::max(); + } +#if 0 + mssink->finish( + repThresh, + gReportDiscordant, + gReportMixed, + hadoopOut); +#endif + } + if(ssdb != NULL) { + if(novelSpliceSiteOutfile != "") { + ofstream ssdb_file(novelSpliceSiteOutfile.c_str(), ios::out); + if(ssdb_file.is_open()) { + ssdb->print(ssdb_file); + ssdb_file.close(); + } + } + } + oq.flush(true); + assert_eq(oq.numStarted(), oq.numFinished()); + assert_eq(oq.numStarted(), oq.numFlushed()); + delete patsrc; + delete mssink; + delete ssdb; + delete metricsOfb; + if(fout != NULL) { + delete fout; + } + } +} + +// C++ name mangling is disabled for the bowtie() function to make it +// easier to use Bowtie as a library. +extern "C" { + +/** + * Main bowtie entry function. Parses argc/argv style command-line + * options, sets global configuration variables, and calls the driver() + * function. + */ +int hisat(int argc, const char **argv) { + try { + // Reset all global state, including getopt state + opterr = optind = 1; + resetOptions(); + for(int i = 0; i < argc; i++) { + argstr += argv[i]; + if(i < argc-1) argstr += " "; + } + if(startVerbose) { cerr << "Entered main(): "; logTime(cerr, true); } + parseOptions(argc, argv); + argv0 = argv[0]; + if(showVersion) { + cout << argv0 << " version " << HISAT_VERSION << endl; + if(sizeof(void*) == 4) { + cout << "32-bit" << endl; + } else if(sizeof(void*) == 8) { + cout << "64-bit" << endl; + } else { + cout << "Neither 32- nor 64-bit: sizeof(void*) = " << sizeof(void*) << endl; + } + cout << "Built on " << BUILD_HOST << endl; + cout << BUILD_TIME << endl; + cout << "Compiler: " << COMPILER_VERSION << endl; + cout << "Options: " << COMPILER_OPTIONS << endl; + cout << "Sizeof {int, long, long long, void*, size_t, off_t}: {" + << sizeof(int) + << ", " << sizeof(long) << ", " << sizeof(long long) + << ", " << sizeof(void *) << ", " << sizeof(size_t) + << ", " << sizeof(off_t) << "}" << endl; + return 0; + } + { + Timer _t(cerr, "Overall time: ", timing); + if(startVerbose) { + cerr << "Parsing index and read arguments: "; logTime(cerr, true); + } + + // Get index basename (but only if it wasn't specified via --index) + if(bt2index.empty()) { + if(optind >= argc) { + cerr << "No index, query, or output file specified!" << endl; + printUsage(cerr); + return 1; + } + bt2index = argv[optind++]; + } + + // Get query filename + bool got_reads = !queries.empty() || !mates1.empty() || !mates12.empty(); + if(optind >= argc) { + if(!got_reads) { + printUsage(cerr); + cerr << "***" << endl + << "Error: Must specify at least one read input with -U/-1/-2" << endl; + return 1; + } + } else if(!got_reads) { + // Tokenize the list of query files + tokenize(argv[optind++], ",", queries); + if(queries.empty()) { + cerr << "Tokenized query file list was empty!" << endl; + printUsage(cerr); + return 1; + } + } + + // Get output filename + if(optind < argc && outfile.empty()) { + outfile = argv[optind++]; + cerr << "Warning: Output file '" << outfile.c_str() + << "' was specified without -S. This will not work in " + << "future HISAT 2 versions. Please use -S instead." + << endl; + } + + // Extra parametesr? + if(optind < argc) { + cerr << "Extra parameter(s) specified: "; + for(int i = optind; i < argc; i++) { + cerr << "\"" << argv[i] << "\""; + if(i < argc-1) cerr << ", "; + } + cerr << endl; + if(mates1.size() > 0) { + cerr << "Note that if files are specified using -1/-2, a file cannot" << endl + << "also be specified. Please run HISAT2 separately for mates and singles." << endl; + } + throw 1; + } + + // Optionally summarize + if(gVerbose) { + cout << "Input bt2 file: \"" << bt2index.c_str() << "\"" << endl; + cout << "Query inputs (DNA, " << file_format_names[format].c_str() << "):" << endl; + for(size_t i = 0; i < queries.size(); i++) { + cout << " " << queries[i].c_str() << endl; + } + cout << "Quality inputs:" << endl; + for(size_t i = 0; i < qualities.size(); i++) { + cout << " " << qualities[i].c_str() << endl; + } + cout << "Output file: \"" << outfile.c_str() << "\"" << endl; + cout << "Local endianness: " << (currentlyBigEndian()? "big":"little") << endl; + cout << "Sanity checking: " << (sanityCheck? "enabled":"disabled") << endl; + #ifdef NDEBUG + cout << "Assertions: disabled" << endl; + #else + cout << "Assertions: enabled" << endl; + #endif + } + if(ipause) { + cout << "Press key to continue..." << endl; + getchar(); + } + driver >("DNA", bt2index, outfile); + } + return 0; + } catch(std::exception& e) { + cerr << "Error: Encountered exception: '" << e.what() << "'" << endl; + cerr << "Command: "; + for(int i = 0; i < argc; i++) cerr << argv[i] << " "; + cerr << endl; + return 1; + } catch(int e) { + if(e != 0) { + cerr << "Error: Encountered internal HISAT2 exception (#" << e << ")" << endl; + cerr << "Command: "; + for(int i = 0; i < argc; i++) cerr << argv[i] << " "; + cerr << endl; + } + return e; + } +} // bowtie() +} // extern "C" diff --git a/ival_list.cpp b/ival_list.cpp new file mode 100644 index 0000000..acbe460 --- /dev/null +++ b/ival_list.cpp @@ -0,0 +1,165 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include "ival_list.h" + +#ifdef MAIN_IVAL_DS + +#include +#include "random_source.h" + +using namespace std; + +int main(void) { + cerr << "Case 1 ... "; + { + EIvalMergeList list((size_t)5); + list.add(Interval(0, 10, true, 10)); + list.add(Interval(0, 30, true, 10)); + list.add(Interval(0, 20, true, 10)); + assert(!list.locusPresent(Coord(0, 5, true))); + assert(!list.locusPresent(Coord(0, 9, true))); + assert(list.locusPresent(Coord(0, 10, true))); + assert(list.locusPresent(Coord(0, 11, true))); + assert(list.locusPresent(Coord(0, 19, true))); + assert(list.locusPresent(Coord(0, 20, true))); + assert(list.locusPresent(Coord(0, 21, true))); + assert(list.locusPresent(Coord(0, 29, true))); + assert(list.locusPresent(Coord(0, 30, true))); + assert(list.locusPresent(Coord(0, 31, true))); + assert(list.locusPresent(Coord(0, 39, true))); + assert(!list.locusPresent(Coord(0, 40, true))); + assert(!list.locusPresent(Coord(0, 41, true))); + } + cerr << " PASSED" << endl; + + cerr << "Case 2 ... "; + { + EIvalMergeList list((size_t)5); + list.add(Interval(0, 10, true, 10)); + for(size_t i = 5; i < 45; i++) { + assert(list.locusPresent(Coord(0, i, true)) == (i >= 10 && i < 20)); + } + list.clear(); + list.add(Interval(0, 15, true, 10)); + for(size_t i = 5; i < 45; i++) { + assert(list.locusPresent(Coord(0, i, true)) == (i >= 15 && i < 25)); + } + } + cerr << " PASSED" << endl; + + cerr << "Case 3 ... "; + { + EIvalMergeList list((size_t)5); + for(size_t i = 0; i < 20; i++) { + list.add(Interval(0, 10*i, true, 9)); + } + for(size_t i = 0; i < 200; i++) { + assert(list.locusPresent(Coord(0, i, true)) == ((i % 10) != 9)); + assert(!list.locusPresent(Coord(0, i, false))); + assert(!list.locusPresent(Coord(1, i, true))); + } + } + cerr << " PASSED" << endl; + + cerr << "Case 4 ... "; + { + EIvalMergeList list((size_t)5); + for(int i = 19; i >= 0; i--) { + list.add(Interval(0, 10*i, true, 9)); + } + for(size_t i = 0; i < 200; i++) { + assert(list.locusPresent(Coord(0, i, true)) == ((i % 10) != 9)); + assert(!list.locusPresent(Coord(0, i, false))); + assert(!list.locusPresent(Coord(1, i, true))); + } + } + cerr << " PASSED" << endl; + + cerr << "Random testing (1 ref) ... "; + { + RandomSource rnd(34523); + for(size_t c = 0; c < 10; c++) { + EIvalMergeList list1((size_t)16); + EIvalMergeList list2((size_t)2000); + size_t num_intervals = 20; + uint32_t max_width = 100; + for(size_t i = 0; i < num_intervals; i++) { + uint32_t start = rnd.nextU32() % max_width/2; + uint32_t end = (rnd.nextU32() % (max_width - start - 1) + start)+1; + assert_lt(end, max_width); + assert_gt(end, start); + list1.add(Interval(0, start, false, end-start)); + list2.add(Interval(0, start, false, end-start)); + } + assert_geq(num_intervals, list1.size()); + assert_geq(num_intervals, list2.size()); + assert(list1.repOk()); + assert(list2.repOk()); + for(uint32_t i = 0; i < max_width+1; i++) { + assert(list1.repOk()); + assert(list2.repOk()); + ASSERT_ONLY(bool l1 = list1.locusPresent(Coord(0, i, true))); + ASSERT_ONLY(bool l2 = list2.locusPresent(Coord(0, i, true))); + assert_eq(l1, l2); + } + } + } + cerr << " PASSED" << endl; + + cerr << "Random testing (few refs) ... "; + { + RandomSource rnd(34523); + for(size_t c = 0; c < 10; c++) { + EIvalMergeList list1((size_t)16); + EIvalMergeList list2((size_t)2000); + size_t num_intervals = 20; + uint32_t max_width = 100; + for(size_t i = 0; i < num_intervals; i++) { + uint32_t start = rnd.nextU32() % max_width/2; + uint32_t end = (rnd.nextU32() % (max_width - start - 1) + start)+1; + assert_lt(end, max_width); + assert_gt(end, start); + bool orient = (rnd.nextU2() == 0); + TRefId ref = (TRefId)(rnd.nextU32() % 5); + list1.add(Interval(ref, start, orient, end-start)); + list2.add(Interval(ref, start, orient, end-start)); + } + assert_geq(num_intervals, list1.size()); + assert_geq(num_intervals, list2.size()); + assert(list1.repOk()); + assert(list2.repOk()); + for(uint32_t i = 0; i < max_width+1; i++) { + assert(list1.repOk()); + assert(list2.repOk()); + for(int fwi = 0; fwi < 2; fwi++) { + bool fw = (fwi == 0); + for(TRefId refi = 0; refi < 5; refi++) { + ASSERT_ONLY(bool l1 = list1.locusPresent(Coord(refi, i, fw))); + ASSERT_ONLY(bool l2 = list2.locusPresent(Coord(refi, i, fw))); + assert_eq(l1, l2); + } + } + } + } + } + cerr << " PASSED" << endl; +} + +#endif /*def MAIN_IVAL_DS*/ diff --git a/ival_list.h b/ival_list.h new file mode 100644 index 0000000..0fc40a5 --- /dev/null +++ b/ival_list.h @@ -0,0 +1,299 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef IVAL_LIST_H_ +#define IVAL_LIST_H_ + +#include "ds.h" +#include "ref_coord.h" +#include + +/** + * Encapsulates the "union" of a collection of intervals. Intervals are stored + * in a sorted list. Intervals can be added but not removed. Supports just + * one type of query for now: locusPresent(). + */ +class EIvalMergeList { +public: + + static const size_t DEFAULT_UNSORTED_SZ = 16; + + explicit EIvalMergeList(int cat = 0) : + sorted_(cat), + sortedLhs_(cat), + unsorted_(cat), + unsortedSz_(DEFAULT_UNSORTED_SZ) + { } + + explicit EIvalMergeList(size_t unsortedSz, int cat = 0) : + sorted_(cat), + sortedLhs_(cat), + unsorted_(cat), + unsortedSz_(unsortedSz) + { } + + /** + * Set the maximum size of the unsorted list. + */ + void setUnsortedSize(size_t usz) { + unsortedSz_ = usz; + } + + /** + * Add a new interval to the list. + */ + void add(const Interval& i) { + assert_leq(unsorted_.size(), unsortedSz_); + if(unsorted_.size() < unsortedSz_) { + unsorted_.push_back(i); + } + if(unsorted_.size() == unsortedSz_) { + flush(); + } + } + + /** + * Move all unsorted interval information into the sorted list and re-sort. + * Merge overlapping intervals. + */ + void flush() { + for(size_t i = 0; i < unsorted_.size(); i++) { + sorted_.push_back(unsorted_[i]); + } + sorted_.sort(); + merge(); + sortedLhs_.clear(); + for(size_t i = 0; i < sorted_.size(); i++) { + sortedLhs_.push_back(sorted_[i].upstream()); + } + assert(sortedLhs_.sorted()); + unsorted_.clear(); + } + +#ifndef NDEBUG + /** + * Check that this interval list is internally consistent. + */ + bool repOk() const { + assert_eq(sorted_.size(), sortedLhs_.size()); + return true; + } +#endif + + /** + * Remove all ranges from the list. + */ + void reset() { clear(); } + + /** + * Remove all ranges from the list. + */ + void clear() { + sorted_.clear(); + sortedLhs_.clear(); + unsorted_.clear(); + } + + /** + * Return true iff this locus is present in one of the intervals in the + * list. + */ + bool locusPresent(const Coord& loc) const { + return locusPresentUnsorted(loc) || locusPresentSorted(loc); + } + + /** + * Return the number of intervals added since the last call to reset() or + * clear(). + */ + size_t size() const { + return sorted_.size() + unsorted_.size(); + } + + /** + * Return true iff list is empty. + */ + bool empty() const { + return sorted_.empty() && unsorted_.empty(); + } + +protected: + + /** + * Go through the sorted interval list and merge adjacent entries that + * overlap. + */ + void merge() { + size_t nmerged = 0; + for(size_t i = 1; i < sorted_.size(); i++) { + if(sorted_[i-1].downstream() >= sorted_[i].upstream()) { + nmerged++; + assert_leq(sorted_[i-1].upstream(), sorted_[i].upstream()); + Coord up = std::min(sorted_[i-1].upstream(), sorted_[i].upstream()); + Coord dn = std::max(sorted_[i-1].downstream(), sorted_[i].downstream()); + sorted_[i].setUpstream(up); + sorted_[i].setLength(dn.off() - up.off()); + sorted_[i-1].reset(); + } + } + sorted_.sort(); + assert_lt(nmerged, sorted_.size()); + sorted_.resize(sorted_.size()-nmerged); +#ifndef NDEBUG + for(size_t i = 0; i < sorted_.size(); i++) { + assert(sorted_[i].inited()); + } +#endif + } + + /** + * Return true iff the given locus is present in one of the intervals in + * the sorted list. + */ + bool locusPresentSorted(const Coord& loc) const { + assert(repOk()); + if(sorted_.empty()) { + return false; + } + size_t beg = sortedLhs_.bsearchLoBound(loc); + if(beg == sortedLhs_.size() || sortedLhs_[beg] > loc) { + // Check element before + if(beg == 0) { + return false; + } + return sorted_[beg-1].contains(loc); + } else { + assert_eq(loc, sortedLhs_[beg]); + return true; + } + } + + /** + * Return true iff the given locus is present in one of the intervals in + * the unsorted list. + */ + bool locusPresentUnsorted(const Coord& loc) const { + for(size_t i = 0; i < unsorted_.size(); i++) { + if(unsorted_[i].contains(loc)) { + return true; + } + } + return false; + } + + EList sorted_; // LHS, RHS sorted + EList sortedLhs_; // LHS, index into sorted_, sorted + EList unsorted_; // unsorted + size_t unsortedSz_; // max allowed size of unsorted_ +}; + +/** + * Binned version of the above. We bin using the low bits of the reference + * sequence. + */ +class EIvalMergeListBinned { +public: + + static const size_t NBIN = 7; + + explicit EIvalMergeListBinned(int cat = 0) : bins_(1 << NBIN, cat) { + bins_.resize(1 << NBIN); + } + + explicit EIvalMergeListBinned( + size_t unsortedSz, + int cat = 0) : bins_(1 << NBIN, cat) + { + bins_.resize(1 << NBIN); + for(size_t i = 0; i < (1 << NBIN); i++) { + bins_[i].setUnsortedSize(unsortedSz); + } + } + + /** + * Add a new interval to the list. + */ + void add(const Interval& i) { + size_t bin = i.ref() & ~(0xffffffff << NBIN); + assert_lt(bin, bins_.size()); + bins_[bin].add(i); + } + +#ifndef NDEBUG + /** + * Check that this interval list is internally consistent. + */ + bool repOk() const { + for(size_t i = 0; i < bins_.size(); i++) { + assert(bins_[i].repOk()); + } + return true; + } +#endif + + /** + * Remove all ranges from the list. + */ + void reset() { clear(); } + + /** + * Remove all ranges from the list. + */ + void clear() { + for(size_t i = 0; i < bins_.size(); i++) { + bins_[i].clear(); + } + } + + /** + * Return true iff this locus is present in one of the intervals in the + * list. + */ + bool locusPresent(const Coord& loc) const { + size_t bin = loc.ref() & ~(0xffffffff << NBIN); + assert_lt(bin, bins_.size()); + return bins_[bin].locusPresent(loc); + } + + /** + * Return the number of intervals added since the last call to reset() or + * clear(). + */ + size_t size() const { + // TODO: Keep track of size + size_t sz = 0; + for(size_t i = 0; i < bins_.size(); i++) { + sz += bins_[i].size(); + } + return sz; + } + + /** + * Return true iff list is empty. + */ + bool empty() const { + return size() == 0; + } + +protected: + + EList bins_; +}; + +#endif /*ndef IVAL_LIST_H_*/ diff --git a/li_hla/Makefile b/li_hla/Makefile new file mode 100644 index 0000000..9f77758 --- /dev/null +++ b/li_hla/Makefile @@ -0,0 +1,16 @@ +CXX = g++ +CXXFLAGS= -W +LINKPATH= -I./samtools-0.1.19 -L./samtools-0.1.19 +LINKFLAGS = -lbam -lz -lm -lpthread +DEBUG= +OBJECTS = + +all: hla + +hla: main.o + $(CXX) -o $@ $(LINKPATH) $(CXXFLAGS) $(OBJECTS) main.o $(LINKFLAGS) + +main.o: main.cpp alignments.hpp + +clean: + rm -f *.o *.gch hla diff --git a/li_hla/alignments.hpp b/li_hla/alignments.hpp new file mode 100644 index 0000000..bb5d6f4 --- /dev/null +++ b/li_hla/alignments.hpp @@ -0,0 +1,276 @@ +// The class handles reading the bam file + +#ifndef _LSONG_ALIGNMENT_HEADER +#define _LSONG_ALIGNMENT_HEADER + +#include "samtools-0.1.19/sam.h" +#include +#include +#include +#include + +#include "defs.h" + +class Alignments +{ +private: + samfile_t *fpSam ; + bam1_t *b ; + + char fileName[1024] ; + bool opened ; + std::map chrNameToId ; + int onlyChromId ; // ignore other chromosomes + + void Open() + { + fpSam = samopen( fileName, "rb", 0 ) ; + if ( !fpSam->header ) + { + fprintf( stderr, "Can not open %s.\n", fileName ) ; + exit( 1 ) ; + } + + // Collect the chromosome information + for ( int i = 0 ; i < fpSam->header->n_targets ; ++i ) + { + std::string s( fpSam->header->target_name[i] ) ; + chrNameToId[s] = i ; + } + opened = true ; + } +public: + struct _pair segments[MAX_SEG_COUNT] ; + unsigned int segCnt ; + + Alignments() { b = NULL ; opened = false ; onlyChromId = -1 ; } + ~Alignments() {} + + void Open( char *file ) + { + strcpy( fileName, file ) ; + Open() ; + } + + void Rewind() + { + Close() ; + Open() ; + } + + void Close() + { + samclose( fpSam ) ; + fpSam = NULL ; + } + + bool IsOpened() + { + return opened ; + } + + int Next() + { + int i ; + int start = 0, len = 0 ; + uint32_t *rawCigar ; + + while ( 1 ) + { + while ( 1 ) + { + if ( b ) + bam_destroy1( b ) ; + b = bam_init1() ; + + if ( samread( fpSam, b ) <= 0 ) + return 0 ; + if ( b->core.flag & 0xC ) + continue ; + + if ( onlyChromId != -1 && onlyChromId != GetChromId() ) + { + continue ; + } + + if ( ( b->core.flag & 0x900 ) == 0 ) + break ; + } + // Compute the exons segments from the reads + segCnt = 0 ; + start = b->core.pos ; //+ 1 ; + rawCigar = bam1_cigar( b ) ; + for ( i = 0 ; i < b->core.n_cigar ; ++i ) + { + int op = rawCigar[i] & BAM_CIGAR_MASK ; + int num = rawCigar[i] >> BAM_CIGAR_SHIFT ; + + switch ( op ) + { + case BAM_CMATCH: + case BAM_CDEL: + len += num ; break ; + case BAM_CINS: + case BAM_CSOFT_CLIP: + case BAM_CHARD_CLIP: + case BAM_CPAD: + num = 0 ; break ; + case BAM_CREF_SKIP: + { + segments[ segCnt ].a = start ; + segments[ segCnt ].b = start + len - 1 ; + ++segCnt ; + start = start + len + num ; + len = 0 ; + } break ; + default: + len += num ; break ; + } + } + segments[ segCnt ].a = start ; + segments[ segCnt ].b = start + len - 1 ; + ++segCnt ; + + /*for ( i = 0 ; i < segCnt ; ++i ) + printf( "(%d %d) ", segments[i].a, segments[i].b ) ; + printf( "\n" ) ;*/ + + // Check whether the mates are compatible + int mChrId = b->core.mtid ; + int64_t mPos = b->core.mpos ; + + if ( b->core.mtid == b->core.tid ) + { + for ( i = 0 ; i < segCnt - 1 ; ++i ) + { + if ( mPos >= segments[i].b && mPos <= segments[i + 1].a ) + break ; + } + if ( i < segCnt - 1 ) + continue ; + } + + break ; + } + + return 1 ; + } + + + int GetChromId() + { + return b->core.tid ; + } + + char* GetChromName( int tid ) + { + return fpSam->header->target_name[ tid ] ; + } + + int GetChromIdFromName( const char *s ) + { + std::string ss( s ) ; + if ( chrNameToId.find( ss ) == chrNameToId.end() ) + { + printf( "Unknown genome name: %s\n", s ) ; + exit( 1 ) ; + } + return chrNameToId[ss] ; + } + + int GetChromLength( int tid ) + { + return fpSam->header->target_len[ tid ] ; + } + + void GetMatePosition( int &chrId, int64_t &pos ) + { + chrId = b->core.mtid ; + pos = b->core.mpos ; //+ 1 ; + } + + int GetRepeatPosition( int &chrId, int64_t &pos ) + { + // Look at the CC field. + if ( !bam_aux_get( b, "CC" ) || !bam_aux_get( b, "CP" ) ) + { + chrId = -1 ; + pos = -1 ; + return 0 ; + } + + std::string s( bam_aux2Z( bam_aux_get(b, "CC" ) ) ) ; + chrId = chrNameToId[ s ] ; + pos = bam_aux2i( bam_aux_get( b, "CP" ) ) ;// Possible error for 64bit + return 1 ; + } + + bool IsReverse() + { + if ( b->core.flag & 0x10 ) + return true ; + return false ; + } + + bool IsMateReverse() + { + if ( b->core.flag & 0x20 ) + return true ; + return false ; + } + + char *GetReadId() + { + return bam1_qname( b ) ; + } + + bool IsUnique() + { + if ( bam_aux_get( b, "NH" ) ) + { + if ( bam_aux2i( bam_aux_get( b, "NH" ) ) > 1 ) + return false ; + } + return true ; + } + + int GetFieldI( char *f ) + { + if ( bam_aux_get( b, f ) ) + { + return bam_aux2i( bam_aux_get( b, f ) ) ; + } + return -1 ; + } + + char *GetFieldZ( char *f ) + { + if ( bam_aux_get( b, f ) ) + { + return bam_aux2Z( bam_aux_get( b, f ) ) ; + } + return NULL ; + } + + // -1:minus, 0: unknown, 1:plus + int GetStrand() + { + if ( segCnt == 1 ) + return 0 ; + if ( bam_aux_get( b, "XS" ) ) + { + if ( bam_aux2A( bam_aux_get( b, "XS" ) ) == '-' ) + return -1 ; + else + return 1 ; + } + else + return 0 ; + } + + void OnlyChrom( const char *chr ) + { + onlyChromId = GetChromIdFromName( chr ) ; + } +} ; +#endif diff --git a/li_hla/defs.h b/li_hla/defs.h new file mode 100644 index 0000000..55f0649 --- /dev/null +++ b/li_hla/defs.h @@ -0,0 +1,19 @@ +#ifndef _LSONG_RSCAF_DEFS_HEADER +#define _LSONG_RSCAF_DEFS_HEADER + +#include + +#define MAX_SEG_COUNT 127 + +struct _pair +{ + int64_t a, b ; +} ; + +char nucToNum[26] = { 0, -1, 1, -1, -1, -1, 2, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 3, + -1, -1, -1, -1, -1, -1 } ; + +char numToNuc[26] = {'A', 'C', 'G', 'T'} ; +#endif diff --git a/li_hla/main.cpp b/li_hla/main.cpp new file mode 100644 index 0000000..fcf7b4d --- /dev/null +++ b/li_hla/main.cpp @@ -0,0 +1,484 @@ +//usage: a.out prefix_of_allele_information alignment.bam [-b backbone_id] +#include +#include +#include +#include +#include +#include + +#include "alignments.hpp" + + +struct _compatible +{ + int weight ; + double value ; +} ; + +struct _snpInfo +{ + char type ; // d, s, i + int position ; + char nucleotide ; + int length ; +} ; + +struct _mdComponent +{ + char type ; + int length ; + int num ; +} ; + +struct _result +{ + int a, b ; + double logLikelihood ; +} ; + +std::map snpNameToId ; + +std::map alleleNameToId ; +std::vector alleleIdToName ; + + +std::vector snpInfo ; // When using this, we already convert the snp name into snp id +std::vector< std::vector > snpLink ; // What are the allele ids associate with the snp id +std::map > positionToSnp ; // map of the genomic coordinate to the snp id +std::vector< std::vector > alleleSnpList ; // the list of snp ids associate with this allele +std::vector alleleLength ; +std::vector< struct _pair > alignmentCoords ; + +bool CompResult( struct _result a, struct _result b ) +{ + return a.logLikelihood > b.logLikelihood ; +} + +void Split( const char *s, char delimit, std::vector &fields ) +{ + int i ; + fields.clear() ; + if ( s == NULL ) + return ; + + std::string f ; + for ( i = 0 ; s[i] ; ++i ) + { + if ( s[i] == delimit || s[i] == '\n' ) + { + fields.push_back( f ) ; + f.clear() ; + } + else + f.append( 1, s[i] ) ; + } + fields.push_back( f ) ; + f.clear() ; +} + +int main( int argc, char *argv[] ) +{ + int i, j, k ; + FILE *fp ; + char buffer[10100] ; + std::vector fields ; + int binSize = 50 ; + const char *backboneName = NULL ; + + // the compatilibity between the alignment and allele + // The likelihood of this read from this allele + struct _compatible **compatibility ; + int **snpAllele ; // whether a snp showed up in the allele. + + Alignments alignments ; + alignments.Open( argv[2] ) ; + + for ( i = 3 ; i < argc ; ++i ) + { + if ( !strcmp( argv[i], "-b" ) ) + { + backboneName = argv[i + 1] ; + alignments.OnlyChrom( backboneName ) ; + ++i ; + } + else + { + fprintf( stderr, "Unknown argument %s.\n", argv[i] ) ; + exit( 1 ) ; + } + } + + // Parse the files associate with the snps + // Firstly, read in the snp list have the information + sprintf( buffer, "%s.snp", argv[1] ) ; + fp = fopen( buffer, "r" ) ; + k = 0 ; + while ( fgets( buffer, sizeof( buffer ), fp ) ) + { + Split( buffer, '\t', fields ) ; + if ( backboneName && strcmp( fields[2].c_str(), backboneName ) ) + continue ; + snpNameToId[ fields[0] ] = k ; + struct _snpInfo info ; + info.type = fields[1][0] ; + if ( info.type == 'd' ) + { + info.position = atoi( fields[3].c_str() ) ; + info.length = atoi( fields[4].c_str() ) ; + } + else if ( info.type == 'i' ) + { + info.position = atoi( fields[3].c_str() ) ; + info.length = strlen( fields[4].c_str() ) ; + } + else + { + info.position = atoi( fields[3].c_str() ) ; // notice that the snp file is 0-based index. + info.length = 1 ; + info.nucleotide = fields[4][0] ; + } + snpInfo.push_back( info ) ; + std::vector< int > tmpList ; + snpLink.push_back( tmpList ) ; + for ( int p = 0 ; p < info.length ; ++p ) + { + if ( info.type != 'i' || p == 0 ) + { + if ( positionToSnp.find( info.position + p ) == positionToSnp.end() ) + { + positionToSnp[ info.position + p] = tmpList ; + } + positionToSnp[ info.position + p].push_back( k ) ; + } + } + ++k ; + } + fclose( fp ) ; + // Read in the link file. Determine the id of alleles and the association + // of alleles and snps. + // TODO: obtain the length of each allele and take the length into account in the statistical model + // Add the id for the backbound + int backboneLength = 0 ; + sprintf( buffer, "%s_backbone.fa", argv[1] ) ; + fp = fopen( buffer, "r" ) ; + /*for ( i = 1 ; buffer[i] && buffer[i] != ' ' && buffer[i] != '\n' ; ++i ) + ; + buffer[i] = '\0' ; + std::string backboneName( buffer + 1 ) ; + alleleNameToId[ backboneName ] = 0 ; + alleleIdToName.push_back( backboneName ) ;*/ + bool start = false ; + while ( fgets( buffer, sizeof( buffer ), fp ) ) + { + if ( buffer[0] == '>' ) + { + for ( i = 1 ; buffer[i] && buffer[i] != ' ' && buffer[i] != '\n' ; ++i ) + ; + buffer[i] = '\0' ; + if ( !strcmp( backboneName, buffer + 1 ) ) + { + start = true ; + } + else if ( start ) + break ; + } + if ( start && buffer[0] != '>' ) + { + int len = strlen( buffer ) ; + if ( buffer[len - 1 ] == '\n' ) + backboneLength += len - 1 ; + else + backboneLength += len ; + } + } + fclose( fp ) ; + /*k = 0 ; + if ( k == 0 ) + { + std::vector tmpList ; + alleleSnpList.push_back( tmpList ) ; + alleleLength.push_back( backboneLength ) ; + }*/ + + + // scanning the link file + sprintf( buffer, "%s.link", argv[1] ) ; + fp = fopen( buffer, "r" ) ; + k = 0 ; + while ( fgets( buffer, sizeof( buffer ), fp ) ) + { + std::vector tmpFields ; + Split( buffer, '\t', tmpFields ) ; + // skip the snps from other backbones + if ( snpNameToId.find( tmpFields[0] ) == snpNameToId.end() ) + continue ; + + int snpId = snpNameToId[ tmpFields[0] ] ; + Split( tmpFields[1].c_str(), ' ', fields ) ; + int size = fields.size() ; + + for ( i = 0 ; i < size ; ++i ) + { + if ( alleleNameToId.find( fields[i] ) == alleleNameToId.end() ) + { + //printf( "%s %d\n", fields[i].c_str(), k ) ; + alleleNameToId[ fields[i] ] = k ; + alleleIdToName.push_back( fields[i] ) ; + std::vector tmpList ; + alleleSnpList.push_back( tmpList ) ; + alleleLength.push_back( backboneLength ) ; + ++k ; + } + + int alleleId = alleleNameToId[ fields[i] ] ; + //if ( snpId == 118 ) + // printf( "%s: %s %d\n", tmpFields[0].c_str(), fields[i].c_str(), alleleId ) ; + snpLink[ snpId ].push_back( alleleId ) ; + alleleSnpList[ alleleId ].push_back( snpId ) ; + if ( snpInfo[ snpId ].type == 'd' ) + { + alleleLength[ alleleId ] -= snpInfo[ snpId ].length ; + } + else if ( snpInfo[ snpId ].type == 'i' ) + { + alleleLength[ alleleId ] += snpInfo[ snpId ].length ; + } + } + } + fclose( fp ) ; + int numOfAllele = alleleIdToName.size() ; + int numOfSnps = snpLink.size() ; + snpAllele = new int* [numOfSnps] ; + for ( i = 0 ; i < numOfSnps ; ++i ) + { + snpAllele[i] = new int[numOfAllele] ; + memset( snpAllele[i], 0, sizeof( int ) * numOfAllele ) ; + } + for ( i = 0 ; i < numOfSnps ; ++i ) + { + int size = snpLink[i].size() ; + for ( j = 0 ; j < size ; ++j ) + { + snpAllele[i][ snpLink[i][j] ] = 1 ; + } + } + + // Compute the compatbility score for each alignment and the allele + // Get the number of alignment + int numOfAlignments = 0 ; + while ( alignments.Next() ) + ++numOfAlignments ; + + alignments.Rewind() ; + + compatibility = new struct _compatible*[numOfAlignments] ; + for ( i = 0 ; i < numOfAlignments ; ++i ) + { + compatibility[i] = new struct _compatible[ numOfAllele ] ; + for ( j = 0 ; j < numOfAllele ; ++j ) + { + compatibility[i][j].value = 0 ;//-log( (double)alleleLength[j] ) / log( 10.0 ); + } + } + + i = 0 ; + bool *snpHit = new bool[ numOfSnps ] ; + while ( alignments.Next() ) + { + struct _pair coord = alignments.segments[0] ; + alignmentCoords.push_back( coord ) ; + memset( snpHit, 0, sizeof( bool ) * numOfSnps ) ; + Split( alignments.GetFieldZ( "Zs" ), ',', fields ) ; + int size = fields.size() ; + for ( k = 0 ; k < size ; ++k ) + { + std::vector subfields ; + Split( fields[k].c_str(), '|', subfields ) ; + int snpId = snpNameToId[ subfields[2] ] ; + snpHit[ snpId ] = true ; + } + + for ( k = coord.a ; k <= coord.b ; ++k ) + { + int size = positionToSnp[k].size() ; + for ( int l = 0 ; l < size ; ++l ) + { + // if this SNP is hit. Then other allele don't have this snp + // will deduct its likelihood + //TODO: the deduction can be based on the quality score of the read + int tag = 0 ; + int snpId = positionToSnp[k][l] ; + + if ( snpHit[ snpId ] ) + { + tag = 0 ; + } + else + { + // if this SNP is not hit, then every allele containing this snp + // will deduct its likelihood + tag = 1 ; + } + for ( j = 0 ; j < numOfAllele ; ++j ) + { + if ( snpAllele[ snpId ][j] == tag ) + { + int v = -2 ; + //if ( snpInfo[ snpId ].type == 'd' || snpInfo[ snpId ].type == 'i' ) + // v = -4 * snpInfo[ snpId ].length ; + if ( snpInfo[ snpId ].type == 'd' && snpInfo[ snpId ].position < k + && k != coord.a ) + { + // The penality has already been subtracted. + v = 0 ; + } + compatibility[i][j].value += v ; + /*if ( i == 8 && j == 78 ) + { + printf( "Bad snp %d: %d %d\n", tag, k, positionToSnp[k][l] ) ; + }*/ + } + } + } + } + ++i ; + } + //printf( "%d %d\n", numOfAlignments, numOfAllele ) ; + // Now, let's consider every pair of alleles, and compute its log likelihood + double **logLikelihood ; + logLikelihood = new double *[ numOfAllele] ; + for ( j = 0 ; j < numOfAllele ; ++j ) + { + logLikelihood[j] = new double[ numOfAllele ] ; + //memset( logLikelihood[j], 0, sizeof( double ) * numOfAllele ) ; + for ( k = 0 ; k < numOfAllele ; ++k ) + logLikelihood[j][k] = 0 ; + } + + int prevBin = -1 ; + double assignJBin = 0 ; + double assignKBin = 0 ; + for ( j = 0 ; j < numOfAllele ; ++j ) + { + for ( k = j ; k < numOfAllele ; ++k ) + { + double binAdjust = 0 ; + double averageRead = ( (double)numOfAlignments ) / (double)( alleleLength[j] + alleleLength[k] ) * binSize ; + for ( i = 0 ; i < numOfAlignments ; ++i ) + { + double vj = compatibility[i][j].value ; + double vk = compatibility[i][k].value ; + double weightJ = 0, weightK = 0 ; + if ( vj == vk ) + { + weightJ = weightK = 0.5 ; + } + else if ( vj == vk + 2 ) + { + if ( vj == 0 ) + { + weightJ = 1 ; + } + else + { + weightJ = 0.99 ; + weightK = 0.01 ; + } + } + else if ( vk == vj + 2 ) + { + if ( vk == 0 ) + { + weightK = 1 ; + } + else + { + weightJ = 0.01 ; + weightK = 0.99 ; + } + } + else + { + if ( vk > vj ) + weightK = 1 ; + else + weightJ = 1 ; + } + + double l = weightJ * compatibility[i][j].value + weightK * compatibility[i][k].value ; + if ( alignmentCoords[i].a / binSize != prevBin ) + { + if ( prevBin != -1 && + ( assignJBin > averageRead + 4 * sqrt( averageRead ) + || assignKBin > averageRead + 4 * sqrt( averageRead ) ) ) + { + //if ( j == 8 && k == 78 ) + // printf( "%lf: %lf %lf %d %d\n", averageRead, assignJBin, assignKBin, alleleLength[j], alleleLength[k] ) ; + binAdjust -= 4 ; + } + prevBin = alignmentCoords[i].a / binSize ; + assignJBin = 0 ; + assignKBin = 0 ; + } + assignJBin += weightJ ; + assignKBin += weightK ; + + /*if ( j == 8 && k == 78 && l < 0 ) + { + printf( "Bad alignment %d (%s %s). %lf %lf: %lf\n", i, + alleleIdToName[j].c_str(), alleleIdToName[k].c_str(), + compatibility[i][j].value, compatibility[i][k].value, l ) ; + }*/ + logLikelihood[j][k] += l ; + } + logLikelihood[j][k] += ( -log( (double)alleleLength[j] ) / log(10.0 ) - + log( (double)alleleLength[k] ) / log(10.0) ) ; + logLikelihood[j][k] += binAdjust ; + } + } + + // Find the result + double max ; + int maxj = -1 ; + int maxk = -1 ; + std::vector< struct _result > results ; + for ( j = 0 ; j < numOfAllele ; ++j ) + { + for ( k = j ; k < numOfAllele ; ++k ) + { + if ( maxj == -1 || logLikelihood[j][k] > max ) + { + maxj = j ; + maxk = k ; + max = logLikelihood[j][k] ; + } + struct _result r ; + r.a = j ; + r.b = k ; + r.logLikelihood = logLikelihood[j][k] ; + results.push_back( r ) ; + } + } + + //printf( "%s %s %lf\n", alleleIdToName[ maxj ].c_str(), alleleIdToName[ maxk ].c_str(), max) ; + //printf( "%lf\n", logLikelihood[124][128] ) ; + + if ( results.size() == 0 ) + { + printf( "-1 -1 -1\n" ) ; + exit( 1 ) ; + } + std::sort( results.begin(), results.end(), CompResult ) ; + i = 0 ; + printf( "%s %s %lf\n", alleleIdToName[ results[i].a ].c_str(), alleleIdToName[ results[i].b ].c_str(), + results[i].logLikelihood ) ; + k = results.size() ; + for ( i = 1 ; i < k ; ++i ) + { + if ( results[i].logLikelihood != results[0].logLikelihood ) + break ; + printf( "%s %s %lf\n", alleleIdToName[ results[i].a ].c_str(), alleleIdToName[ results[i].b ].c_str(), + results[i].logLikelihood ) ; + } + return 0 ; +} diff --git a/limit.cpp b/limit.cpp new file mode 100644 index 0000000..1146090 --- /dev/null +++ b/limit.cpp @@ -0,0 +1,43 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include +#include "limit.h" + +uint8_t MIN_U8 = std::numeric_limits::min(); +uint8_t MAX_U8 = std::numeric_limits::max(); +uint16_t MIN_U16 = std::numeric_limits::min(); +uint16_t MAX_U16 = std::numeric_limits::max(); +uint32_t MIN_U32 = std::numeric_limits::min(); +uint32_t MAX_U32 = std::numeric_limits::max(); +uint64_t MIN_U64 = std::numeric_limits::min(); +uint64_t MAX_U64 = std::numeric_limits::max(); +size_t MIN_SIZE_T = std::numeric_limits::min(); +size_t MAX_SIZE_T = std::numeric_limits::max(); + +int MIN_I = std::numeric_limits::min(); +int MAX_I = std::numeric_limits::max(); +int8_t MIN_I8 = std::numeric_limits::min(); +int8_t MAX_I8 = std::numeric_limits::max(); +int16_t MIN_I16 = std::numeric_limits::min(); +int16_t MAX_I16 = std::numeric_limits::max(); +int32_t MIN_I32 = std::numeric_limits::min(); +int32_t MAX_I32 = std::numeric_limits::max(); +int64_t MIN_I64 = std::numeric_limits::min(); +int64_t MAX_I64 = std::numeric_limits::max(); diff --git a/limit.h b/limit.h new file mode 100644 index 0000000..06ea072 --- /dev/null +++ b/limit.h @@ -0,0 +1,48 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef LIMIT_H_ +#define LIMIT_H_ + +#include +#include + +extern uint8_t MIN_U8; +extern uint8_t MAX_U8; +extern uint16_t MIN_U16; +extern uint16_t MAX_U16; +extern uint32_t MIN_U32; +extern uint32_t MAX_U32; +extern uint64_t MIN_U64; +extern uint64_t MAX_U64; +extern size_t MIN_SIZE_T; +extern size_t MAX_SIZE_T; + +extern int MIN_I; +extern int MAX_I; +extern int8_t MIN_I8; +extern int8_t MAX_I8; +extern int16_t MIN_I16; +extern int16_t MAX_I16; +extern int32_t MIN_I32; +extern int32_t MAX_I32; +extern int64_t MIN_I64; +extern int64_t MAX_I64; + +#endif diff --git a/ls.cpp b/ls.cpp new file mode 100644 index 0000000..96c28c0 --- /dev/null +++ b/ls.cpp @@ -0,0 +1,142 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifdef MAIN_LS + +#include +#include +#include "sstring.h" +#include "ls.h" +#include "ds.h" + +using namespace std; + +int main(void) { + cerr << "Test LarssonSadakana for int..."; + { + typedef int T; + const char *t = "banana"; + EList sa; + EList isa; + for(size_t i = 0; i < strlen(t); i++) { + isa.push_back(t[i]); + } + isa.push_back(0); // disregarded + sa.resize(isa.size()); + LarssonSadakane ls; + ls.suffixsort(isa.ptr(), sa.ptr(), (T)sa.size()-1, 'z', 0); + assert_eq((T)'a', t[sa[1]]); assert_eq(5, sa[1]); + assert_eq((T)'a', t[sa[2]]); assert_eq(3, sa[2]); + assert_eq((T)'a', t[sa[3]]); assert_eq(1, sa[3]); + assert_eq((T)'b', t[sa[4]]); assert_eq(0, sa[4]); + assert_eq((T)'n', t[sa[5]]); assert_eq(4, sa[5]); + assert_eq((T)'n', t[sa[6]]); assert_eq(2, sa[6]); + } + cerr << "PASSED" << endl; + + cerr << "Test LarssonSadakana for uint32_t..."; + { + typedef uint32_t T; + const char *t = "banana"; + EList sa; + EList isa; + for(size_t i = 0; i < strlen(t); i++) { + isa.push_back(t[i]); + } + isa.push_back(0); // disregarded + sa.resize(isa.size()); + LarssonSadakane ls; + ls.suffixsort( + (int*)isa.ptr(), + (int*)sa.ptr(), + (int)sa.size()-1, + 'z', + 0); + assert_eq((T)'a', t[sa[1]]); assert_eq(5, sa[1]); + assert_eq((T)'a', t[sa[2]]); assert_eq(3, sa[2]); + assert_eq((T)'a', t[sa[3]]); assert_eq(1, sa[3]); + assert_eq((T)'b', t[sa[4]]); assert_eq(0, sa[4]); + assert_eq((T)'n', t[sa[5]]); assert_eq(4, sa[5]); + assert_eq((T)'n', t[sa[6]]); assert_eq(2, sa[6]); + } + cerr << "PASSED" << endl; + + cerr << "Last elt is < or > others ..."; + { + { + typedef int T; + const char *t = "aaa"; + EList sa; + EList isa; + for(size_t i = 0; i < strlen(t); i++) { + isa.push_back(t[i]); + } + isa.push_back(0); // disregarded + sa.resize(isa.size()); + LarssonSadakane ls; + ls.suffixsort(isa.ptr(), sa.ptr(), (T)sa.size()-1, 'z', 0); + assert_eq(3, sa[0]); + assert_eq(2, sa[1]); + assert_eq(1, sa[2]); + assert_eq(0, sa[3]); + } + + { + typedef int T; + const char *t = "aaa"; + EList sa; + EList isa; + for(size_t i = 0; i < strlen(t); i++) { + isa.push_back(t[i]); + } + isa.push_back('y'); // doesn't matter if this is > others + sa.resize(isa.size()); + LarssonSadakane ls; + ls.suffixsort(isa.ptr(), sa.ptr(), (T)sa.size()-1, 'z', 0); + assert_eq(3, sa[0]); + assert_eq(2, sa[1]); + assert_eq(1, sa[2]); + assert_eq(0, sa[3]); + } + + { + typedef int T; + const char *t = "aaa"; + EList sa; + EList isa; + for(size_t i = 0; i < strlen(t); i++) { + isa.push_back(t[i]); + } + isa.push_back('y'); // breaks ties + isa.push_back(0); // disregarded + sa.resize(isa.size()); + LarssonSadakane ls; + ls.suffixsort(isa.ptr(), sa.ptr(), (T)sa.size()-1, 'z', 0); + assert_eq(4, sa[0]); + assert_eq(0, sa[1]); + assert_eq(1, sa[2]); + assert_eq(2, sa[3]); + assert_eq(3, sa[4]); + } + + } + cerr << "PASSED" << endl; +} + +#endif diff --git a/ls.h b/ls.h new file mode 100644 index 0000000..e333f7c --- /dev/null +++ b/ls.h @@ -0,0 +1,333 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +/* Code in this file is ultimately based on: + + qsufsort.c + Copyright 1999, N. Jesper Larsson, all rights reserved. + + This file contains an implementation of the algorithm presented in "Faster + Suffix Sorting" by N. Jesper Larsson (jesper@cs.lth.se) and Kunihiko + Sadakane (sada@is.s.u-tokyo.ac.jp). + + This software may be used freely for any purpose. However, when distributed, + the original source must be clearly stated, and, when the source code is + distributed, the copyright notice must be retained and any alterations in + the code must be clearly marked. No warranty is given regarding the quality + of this software.*/ + +#ifndef LS_H_ +#define LS_H_ + +#include +#include +#include + +template +class LarssonSadakane { + T *I, /* group array, ultimately suffix array.*/ + *V, /* inverse array, ultimately inverse of I.*/ + r, /* number of symbols aggregated by transform.*/ + h; /* length of already-sorted prefixes.*/ + + #define LS_KEY(p) (V[*(p)+(h)]) + #define LS_SWAP(p, q) (tmp=*(p), *(p)=*(q), *(q)=tmp) + #define LS_SMED3(a, b, c) (LS_KEY(a)LS_KEY(c) ? (b) : LS_KEY(a)>LS_KEY(c) ? (c) : (a))) + + /* Subroutine for select_sort_split and sort_split. Sets group numbers for a + group whose lowest position in I is pl and highest position is pm.*/ + + inline void update_group(T *pl, T *pm) { + T g; + g=(T)(pm-I); /* group number.*/ + V[*pl]=g; /* update group number of first position.*/ + if (pl==pm) + *pl=-1; /* one element, sorted group.*/ + else + do /* more than one element, unsorted group.*/ + V[*++pl]=g; /* update group numbers.*/ + while (pl>1); /* small arrays, middle element.*/ + if (n>7) { + pl=p; + pn=p+n-1; + if (n>40) { /* big arrays, pseudomedian of 9.*/ + s=n>>3; + pl=LS_SMED3(pl, pl+s, pl+s+s); + pm=LS_SMED3(pm-s, pm, pm+s); + pn=LS_SMED3(pn-s-s, pn-s, pn); + } + pm=LS_SMED3(pl, pm, pn); /* midsize arrays, median of 3.*/ + } + return LS_KEY(pm); + } + + /* Sorting routine called for each unsorted group. Sorts the array of integers + (suffix numbers) of length n starting at p. The algorithm is a ternary-split + quicksort taken from Bentley & McIlroy, "Engineering a Sort Function", + Software -- Practice and Experience 23(11), 1249-1265 (November 1993). This + function is based on Program 7.*/ + + inline void sort_split(T *p, T n) + { + T *pa, *pb, *pc, *pd, *pl, *pm, *pn; + T f, v, s, t, tmp; + + if (n<7) { /* multi-selection sort smallest arrays.*/ + select_sort_split(p, n); + return; + } + + v=choose_pivot(p, n); + pa=pb=p; + pc=pd=p+n-1; + while (1) { /* split-end partition.*/ + while (pb<=pc && (f=LS_KEY(pb))<=v) { + if (f==v) { + LS_SWAP(pa, pb); + ++pa; + } + ++pb; + } + while (pc>=pb && (f=LS_KEY(pc))>=v) { + if (f==v) { + LS_SWAP(pc, pd); + --pd; + } + --pc; + } + if (pb>pc) + break; + LS_SWAP(pb, pc); + ++pb; + --pc; + } + pn=p+n; + if ((s=(T)(pa-p))>(t=(T)(pb-pa))) + s=t; + for (pl=p, pm=pb-s; s; --s, ++pl, ++pm) + LS_SWAP(pl, pm); + if ((s=(T)(pd-pc))>(t=(T)(pn-pd-1))) + s=t; + for (pl=pb, pm=pn-s; s; --s, ++pl, ++pm) + LS_SWAP(pl, pm); + + s=(T)(pb-pa); + t=(T)(pd-pc); + if (s>0) + sort_split(p, s); + update_group(p+s, p+n-t-1); + if (t>0) + sort_split(p+n-t, t); + } + + /* Bucketsort for first iteration. + + Input: x[0...n-1] holds integers in the range 1...k-1, all of which appear + at least once. x[n] is 0. (This is the corresponding output of transform.) k + must be at most n+1. p is array of size n+1 whose contents are disregarded. + + Output: x is V and p is I after the initial sorting stage of the refined + suffix sorting algorithm.*/ + + inline void bucketsort(T *x, T *p, T n, T k) + { + T *pi, i, c, d, g; + + for (pi=p; pi=p; --pi) { + d=x[c=*pi]; /* c is position, d is next in list.*/ + x[c]=g=i; /* last position equals group number.*/ + if (d == 0 || d > 0) { /* if more than one element in group.*/ + p[i--]=c; /* p is permutation for the sorted x.*/ + do { + d=x[c=d]; /* next in linked list.*/ + x[c]=g; /* group number in x.*/ + p[i--]=c; /* permutation in p.*/ + } while (d == 0 || d > 0); + } else + p[i--]=-1; /* one element, sorted group.*/ + } + } + + /* Transforms the alphabet of x by attempting to aggregate several symbols into + one, while preserving the suffix order of x. The alphabet may also be + compacted, so that x on output comprises all integers of the new alphabet + with no skipped numbers. + + Input: x is an array of size n+1 whose first n elements are positive + integers in the range l...k-1. p is array of size n+1, used for temporary + storage. q controls aggregation and compaction by defining the maximum value + for any symbol during transformation: q must be at least k-l; if q<=n, + compaction is guaranteed; if k-l>n, compaction is never done; if q is + INT_MAX, the maximum number of symbols are aggregated into one. + + Output: Returns an integer j in the range 1...q representing the size of the + new alphabet. If j<=n+1, the alphabet is compacted. The global variable r is + set to the number of old symbols grouped into one. Only x[n] is 0.*/ + + inline T transform(T *x, T *p, T n, T k, T l, T q) + { + T b, c, d, e, i, j, m, s; + T *pi, *pj; + + for (s=0, i=k-l; i; i>>=1) + ++s; /* s is number of bits in old symbol.*/ + e=std::numeric_limits::max()>>s; /* e is for overflow checking.*/ + for (b=d=r=0; r=k-l) { /* if bucketing possible,*/ + j=transform(V, I, n, k, l, n); + bucketsort(V, I, n, j); /* bucketsort on first r positions.*/ + } else { + transform(V, I, n, k, l, std::numeric_limits::max()); + for (i=0; i<=n; ++i) + I[i]=i; /* initialize I with suffix numbers.*/ + h=0; + sort_split(I, n+1); /* quicksort on first r positions.*/ + } + h=r; /* number of symbols aggregated by transform.*/ + + while (*I>=-n) { + pi=I; /* pi is first position of group.*/ + sl=0; /* sl is negated length of sorted groups.*/ + do { + if ((s=*pi) <= 0 && (s=*pi) != 0) { + pi-=s; /* skip over sorted group.*/ + sl+=s; /* add negated length to sl.*/ + } else { + if (sl) { + *(pi+sl)=sl; /* combine sorted groups before pi.*/ + sl=0; + } + pk=I+V[s]+1; /* pk-1 is last position of unsorted group.*/ + sort_split(pi, (T)(pk-pi)); + pi=pk; /* next group.*/ + } + } while (pi<=I+n); + if (sl) /* if the array ends with a sorted group.*/ + *(pi+sl)=sl; /* combine sorted groups at end of I.*/ + h=2*h; /* double sorted-depth.*/ + } + + for (i=0; i<=n; ++i) /* reconstruct suffix array from inverse.*/ + I[V[i]]=i; + } +}; + +#endif /*def LS_H_*/ diff --git a/mask.cpp b/mask.cpp new file mode 100644 index 0000000..ffefdc7 --- /dev/null +++ b/mask.cpp @@ -0,0 +1,36 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include "mask.h" + +// 5-bit pop count +int alts5[32] = { + 0, 1, 1, 2, 1, 2, 2, 3, + 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5 +}; + +// Index of lowest set bit +int firsts5[32] = { + -1, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0 +}; diff --git a/mask.h b/mask.h new file mode 100644 index 0000000..e00c194 --- /dev/null +++ b/mask.h @@ -0,0 +1,79 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef MASK_H_ +#define MASK_H_ + +#include +#include "random_source.h" + +// 5-bit pop count +extern int alts5[32]; + +// Index of lowest set bit +extern int firsts5[32]; + +/** + * Return 1 if a 2-bit-encoded base ('i') matches any bit in the mask ('j') and + * the mask < 16. Returns -1 if either the reference or the read character was + * ambiguous. Returns 0 if the characters unambiguously mismatch. + */ +static inline int matchesEx(int i, int j) { + if(j >= 16 || i > 3) { + // read and/or ref was ambiguous + return -1; + } + return (((1 << i) & j) != 0) ? 1 : 0; +} + +/** + * Return 1 if a 2-bit-encoded base ('i') matches any bit in the mask ('j'). + */ +static inline bool matches(int i, int j) { + return ((1 << i) & j) != 0; +} + +/** + * Given a mask with up to 5 bits, return an index corresponding to a + * set bit in the mask, randomly chosen from among all set bits. + */ +static inline int randFromMask(RandomSource& rnd, int mask) { + assert_gt(mask, 0); + if(alts5[mask] == 1) { + // only one to pick from, pick it via lookup table + return firsts5[mask]; + } + assert_gt(mask, 0); + assert_lt(mask, 32); + int r = rnd.nextU32() % alts5[mask]; + assert_geq(r, 0); + assert_lt(r, alts5[mask]); + // could do the following via lookup table too + for(int i = 0; i < 5; i++) { + if((mask & (1 << i)) != 0) { + if(r == 0) return i; + r--; + } + } + std::cerr << "Shouldn't get here" << std::endl; + throw 1; + return -1; +} + +#endif /*ndef MASK_H_*/ diff --git a/mem_ids.h b/mem_ids.h new file mode 100644 index 0000000..352817b --- /dev/null +++ b/mem_ids.h @@ -0,0 +1,35 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +// For holding index data +#define EBWT_CAT ((int) 1) +// For holding index-building data +#define EBWTB_CAT ((int) 2) +// For holding cache data +#define CA_CAT ((int) 3) +// For holding group-walk-left bookkeeping data +#define GW_CAT ((int) 4) +// For holding alignment bookkeeping data +#define AL_CAT ((int) 5) +// For holding dynamic programming bookkeeping data +#define DP_CAT ((int) 6) +// For holding alignment results and other hit objects +#define RES_CAT ((int) 7) +#define MISC_CAT ((int) 9) +#define DEBUG_CAT ((int)10) diff --git a/mm.h b/mm.h new file mode 100644 index 0000000..00a2335 --- /dev/null +++ b/mm.h @@ -0,0 +1,51 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef MM_H_ +#define MM_H_ + +/** + * mm.h: + * + * Defines that make it easier to handle files in the two different MM + * contexts: i.e. on Linux and Mac where MM is supported and POSIX I/O + * functions work as expected, and on Windows where MM is not supported + * and where there isn't POSIX I/O, + */ +#if 0 +#ifdef BOWTIE_MM +#define MM_FILE_CLOSE(x) if(x > 3) { close(x); } +#define MM_READ_RET ssize_t +// #define MM_READ read +#define MM_SEEK lseek +#define MM_FILE int +#define MM_FILE_INIT -1 +#else +#define MM_FILE_CLOSE(x) if(x != NULL) { fclose(x); } +#define MM_READ_RET size_t +#define MM_SEEK fseek +#define MM_FILE FILE* +#define MM_FILE_INIT NULL +#endif +#endif + +#define MM_READ(file, dest, sz) fread(dest, 1, sz, file) +#define MM_IS_IO_ERR(file_hd, ret, count) is_fread_err(file_hd, ret, count) + +#endif /* MM_H_ */ diff --git a/msvcc/CodeStubs.vcxproj.filters b/msvcc/CodeStubs.vcxproj.filters new file mode 100644 index 0000000..4622db2 --- /dev/null +++ b/msvcc/CodeStubs.vcxproj.filters @@ -0,0 +1,42 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/msvcc/CoreDefs.props b/msvcc/CoreDefs.props new file mode 100644 index 0000000..ed8b7d4 --- /dev/null +++ b/msvcc/CoreDefs.props @@ -0,0 +1,13 @@ + + + + + + + + typeof=decltype;POPCNT_CAPABILITY;ssize_t=SSIZE_T;NOMINMAX;_CRT_SECURE_NO_WARNINGS;_LARGEFILE_SOURCE;_FILE_OFFSET_BITS=64;_GNU_SOURCE;_MBCS;%(PreprocessorDefinitions) + codeStubs + + + + \ No newline at end of file diff --git a/msvcc/SetVersion.vbs b/msvcc/SetVersion.vbs new file mode 100644 index 0000000..89265b1 --- /dev/null +++ b/msvcc/SetVersion.vbs @@ -0,0 +1,14 @@ + +' ask the scripting runtime environemt for access to files +set FS = CreateObject("Scripting.FileSystemObject") +set FileHandler = FS.GetFile("../VERSION") +set inputTextStream = FileHandler.OpenAsTextStream(1) +version = inputTextStream.ReadLine + +WScript.Echo "Version: " & version + +set outputTextStream = FS.CreateTextFile("../version.h",true) +outputTextStream.WriteLine "#define HISAT2_VERSION """ & version & """" +outputTextStream.close + + diff --git a/msvcc/SysDefs.props b/msvcc/SysDefs.props new file mode 100644 index 0000000..b315948 --- /dev/null +++ b/msvcc/SysDefs.props @@ -0,0 +1,17 @@ + + + + + + + + BUILD_TIME="$([System.DateTime]::Now)";BUILD_HOST="$(VisualStudioEdition) OS ver $(OSVersion)";COMPILER_VERSION="MSC $(MSBuildAssemblyVersion)";COMPILER_OPTIONS="$(NuGetRuntimeIdentifier) Runtime vers $(MSBuildRuntimeVersion)" + "version.h" + + + + + + + + \ No newline at end of file diff --git a/msvcc/codeStubs.vcxproj b/msvcc/codeStubs.vcxproj new file mode 100644 index 0000000..b3de4a6 --- /dev/null +++ b/msvcc/codeStubs.vcxproj @@ -0,0 +1,105 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + + + + + + {08C70FF5-9E0C-42DF-B7BF-C1AF1B0D2CFF} + win32stubs + 8.1 + + + + StaticLibrary + true + v140 + MultiByte + + + StaticLibrary + false + v140 + true + MultiByte + + + + + + + + + + + + + + + + $(Platform)\$(ProjectName)\$(Configuration)\ + + + $(Platform)\$(ProjectName)\$(Configuration)\ + + + + Level3 + Disabled + true + _CRT_SECURE_NO_WARNINGS + + + true + + + cscript.exe SetVersion.vbs + ../version.h; + ../VERSION; + Setting version info + + + + + Level3 + MaxSpeed + true + true + true + _CRT_SECURE_NO_WARNINGS;NDEBUG + MultiThreaded + + + true + true + true + + + cscript.exe SetVersion.vbs + ../version.h; + ../VERSION; + Setting version info + + + + + + + \ No newline at end of file diff --git a/msvcc/codeStubs/getopt.c b/msvcc/codeStubs/getopt.c new file mode 100644 index 0000000..8a03080 --- /dev/null +++ b/msvcc/codeStubs/getopt.c @@ -0,0 +1,1058 @@ +/* Getopt for GNU. + NOTE: getopt is now part of the C library, so if you don't know what + "Keep this file name-space clean" means, talk to drepper@gnu.org + before changing it! + + Copyright (C) 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, + 1996, 1997, 1998, 2005 Free Software Foundation, Inc. + + NOTE: This source is derived from an old version taken from the GNU C + Library (glibc). + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, + USA. */ + +/* This tells Alpha OSF/1 not to define a getopt prototype in . + Ditto for AIX 3.2 and . */ +#ifndef _NO_PROTO +# define _NO_PROTO +#endif + +#ifdef HAVE_CONFIG_H +#include +#endif + +#if !defined __STDC__ || !__STDC__ +/* This is a separate conditional since some stdc systems + reject `defined (const)'. */ +# ifndef const +# define const +# endif +#endif + +#ifdef _WIN32 +#include +#define ATTRIBUTE_UNUSED +#else +#include "ansidecl.h" +#endif + +#include + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#define GETOPT_INTERFACE_VERSION 2 +#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 +# include +# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION +# define ELIDE_CODE +# endif +#endif + +#ifndef ELIDE_CODE + + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +/* Don't include stdlib.h for non-GNU C libraries because some of them + contain conflicting prototypes for getopt. */ +# include +# include +#endif /* GNU C library. */ + +#ifdef VMS +# include +# if HAVE_STRING_H - 0 +# include +# endif +#endif + +#ifndef _ +/* This is for other GNU distributions with internationalized messages. + When compiling libc, the _ macro is predefined. */ +# if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC +# include +# define _(msgid) gettext (msgid) +# else +# define _(msgid) (msgid) +# endif +#endif + +/* This version of `getopt' appears to the caller like standard Unix `getopt' + but it behaves differently for the user, since it allows the user + to intersperse the options with the other arguments. + + As `getopt' works, it permutes the elements of ARGV so that, + when it is done, all the options precede everything else. Thus + all application programs are extended to handle flexible argument order. + + Setting the environment variable POSIXLY_CORRECT disables permutation. + Then the behavior is completely standard. + + GNU application programs can use a third alternative mode in which + they can distinguish the relative order of options and other arguments. */ + +#include "getopt.h" + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg = NULL; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +/* 1003.2 says this must be 1 before any call. */ +int optind = 1; + +/* Formerly, initialization of getopt depended on optind==0, which + causes problems with re-calling getopt as programs generally don't + know that. */ + +int __getopt_initialized = 0; + +/* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + +static char *nextchar; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Set to an option character which was unrecognized. + This must be initialized on some systems to avoid linking in the + system's own getopt implementation. */ + +int optopt = '?'; + +/* Describe how to deal with options that follow non-option ARGV-elements. + + If the caller did not specify anything, + the default is REQUIRE_ORDER if the environment variable + POSIXLY_CORRECT is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options; + stop option processing when the first non-option is seen. + This is what Unix does. + This mode of operation is selected by either setting the environment + variable POSIXLY_CORRECT, or using `+' as the first character + of the list of option characters. + + PERMUTE is the default. We permute the contents of ARGV as we scan, + so that eventually all the non-options are at the end. This allows options + to be given in any order, even with programs that were not written to + expect this. + + RETURN_IN_ORDER is an option available to programs that were written + to expect options and other ARGV-elements in any order and that care about + the ordering of the two. We describe each non-option ARGV-element + as if it were the argument of an option with character code 1. + Using `-' as the first character of the list of option characters + selects this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return -1 with `optind' != ARGC. */ + +static enum +{ + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER +} ordering; + +/* Value of POSIXLY_CORRECT environment variable. */ +static char *posixly_correct; + +#ifdef __GNU_LIBRARY__ +/* We want to avoid inclusion of string.h with non-GNU libraries + because there are many ways it can cause trouble. + On some systems, it contains special magic macros that don't work + in GCC. */ +# include +# define my_index strchr +#else + +# if HAVE_STRING_H +# include +# else +# if HAVE_STRINGS_H +# include +# endif +# endif + +/* Avoid depending on library functions or files + whose names are inconsistent. */ + +#if HAVE_STDLIB_H && HAVE_DECL_GETENV +# include +#elif !defined(getenv) +# ifdef __cplusplus +extern "C" { +# endif /* __cplusplus */ +extern char *getenv (const char *); +# ifdef __cplusplus +} +# endif /* __cplusplus */ +#endif + +static char * +my_index (const char *str, int chr) +{ + while (*str) + { + if (*str == chr) + return (char *) str; + str++; + } + return 0; +} + +/* If using GCC, we can safely declare strlen this way. + If not using GCC, it is ok not to declare it. */ +#ifdef __GNUC__ +/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h. + That was relevant to code that was here before. */ +# if (!defined __STDC__ || !__STDC__) && !defined strlen +/* gcc with -traditional declares the built-in strlen to return int, + and has done so at least since version 2.4.5. -- rms. */ +extern int strlen (const char *); +# endif /* not __STDC__ */ +#endif /* __GNUC__ */ + +#endif /* not __GNU_LIBRARY__ */ + +/* Handle permutation of arguments. */ + +/* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first of them; + `last_nonopt' is the index after the last of them. */ + +static int first_nonopt; +static int last_nonopt; + +#ifdef _LIBC +/* Bash 2.0 gives us an environment variable containing flags + indicating ARGV elements that should not be considered arguments. */ + +/* Defined in getopt_init.c */ +extern char *__getopt_nonoption_flags; + +static int nonoption_flags_max_len; +static int nonoption_flags_len; + +static int original_argc; +static char *const *original_argv; + +/* Make sure the environment variable bash 2.0 puts in the environment + is valid for the getopt call we must make sure that the ARGV passed + to getopt is that one passed to the process. */ +static void +__attribute__ ((unused)) +store_args_and_env (int argc, char *const *argv) +{ + /* XXX This is no good solution. We should rather copy the args so + that we can compare them later. But we must not use malloc(3). */ + original_argc = argc; + original_argv = argv; +} +# ifdef text_set_element +text_set_element (__libc_subinit, store_args_and_env); +# endif /* text_set_element */ + +# define SWAP_FLAGS(ch1, ch2) \ + if (nonoption_flags_len > 0) \ + { \ + char __tmp = __getopt_nonoption_flags[ch1]; \ + __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \ + __getopt_nonoption_flags[ch2] = __tmp; \ + } +#else /* !_LIBC */ +# define SWAP_FLAGS(ch1, ch2) +#endif /* _LIBC */ + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +#if defined __STDC__ && __STDC__ +static void exchange (char **); +#endif + +static void +exchange (char **argv) +{ + int bottom = first_nonopt; + int middle = last_nonopt; + int top = optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + +#ifdef _LIBC + /* First make sure the handling of the `__getopt_nonoption_flags' + string can work normally. Our top argument must be in the range + of the string. */ + if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len) + { + /* We must extend the array. The user plays games with us and + presents new arguments. */ + char *new_str = (char *) malloc (top + 1); + if (new_str == NULL) + nonoption_flags_len = nonoption_flags_max_len = 0; + else + { + memset (mempcpy (new_str, __getopt_nonoption_flags, + nonoption_flags_max_len), + '\0', top + 1 - nonoption_flags_max_len); + nonoption_flags_max_len = top + 1; + __getopt_nonoption_flags = new_str; + } + } +#endif + + while (top > middle && middle > bottom) + { + if (top - middle > middle - bottom) + { + /* Bottom segment is the short one. */ + int len = middle - bottom; + register int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + SWAP_FLAGS (bottom + i, top - (middle - bottom) + i); + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else + { + /* Top segment is the short one. */ + int len = top - middle; + register int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + SWAP_FLAGS (bottom + i, middle + i); + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } + + /* Update records for the slots the non-options now occupy. */ + + first_nonopt += (optind - last_nonopt); + last_nonopt = optind; +} + +/* Initialize the internal data when the first call is made. */ + +#if defined __STDC__ && __STDC__ +static const char *_getopt_initialize (int, char *const *, const char *); +#endif +static const char * +_getopt_initialize (int argc ATTRIBUTE_UNUSED, + char *const *argv ATTRIBUTE_UNUSED, + const char *optstring) +{ + /* Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + first_nonopt = last_nonopt = optind; + + nextchar = NULL; + + posixly_correct = getenv ("POSIXLY_CORRECT"); + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + ordering = REQUIRE_ORDER; + ++optstring; + } + else if (posixly_correct != NULL) + ordering = REQUIRE_ORDER; + else + ordering = PERMUTE; + +#ifdef _LIBC + if (posixly_correct == NULL + && argc == original_argc && argv == original_argv) + { + if (nonoption_flags_max_len == 0) + { + if (__getopt_nonoption_flags == NULL + || __getopt_nonoption_flags[0] == '\0') + nonoption_flags_max_len = -1; + else + { + const char *orig_str = __getopt_nonoption_flags; + int len = nonoption_flags_max_len = strlen (orig_str); + if (nonoption_flags_max_len < argc) + nonoption_flags_max_len = argc; + __getopt_nonoption_flags = + (char *) malloc (nonoption_flags_max_len); + if (__getopt_nonoption_flags == NULL) + nonoption_flags_max_len = -1; + else + memset (mempcpy (__getopt_nonoption_flags, orig_str, len), + '\0', nonoption_flags_max_len - len); + } + } + nonoption_flags_len = nonoption_flags_max_len; + } + else + nonoption_flags_len = 0; +#endif + + return optstring; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns -1. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg', otherwise `optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + The elements of ARGV aren't really const, because we permute them. + But we pretend they're const in the prototype to be compatible + with other systems. + + LONGOPTS is a vector of `struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. */ + +int +_getopt_internal (int argc, char *const *argv, const char *optstring, + const struct option *longopts, + int *longind, int long_only) +{ + optarg = NULL; + + if (optind == 0 || !__getopt_initialized) + { + if (optind == 0) + optind = 1; /* Don't scan ARGV[0], the program name. */ + optstring = _getopt_initialize (argc, argv, optstring); + __getopt_initialized = 1; + } + + /* Test whether ARGV[optind] points to a non-option argument. + Either it does not have option syntax, or there is an environment flag + from the shell indicating it is not an option. The later information + is only used when the used in the GNU libc. */ +#ifdef _LIBC +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \ + || (optind < nonoption_flags_len \ + && __getopt_nonoption_flags[optind] == '1')) +#else +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0') +#endif + + if (nextchar == NULL || *nextchar == '\0') + { + /* Advance to the next ARGV-element. */ + + /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been + moved back by the user (who may also have changed the arguments). */ + if (last_nonopt > optind) + last_nonopt = optind; + if (first_nonopt > optind) + first_nonopt = optind; + + if (ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (last_nonopt != optind) + first_nonopt = optind; + + /* Skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (optind < argc && NONOPTION_P) + optind++; + last_nonopt = optind; + } + + /* The special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (optind != argc && !strcmp (argv[optind], "--")) + { + optind++; + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (first_nonopt == last_nonopt) + first_nonopt = optind; + last_nonopt = argc; + + optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (first_nonopt != last_nonopt) + optind = first_nonopt; + return -1; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if (NONOPTION_P) + { + if (ordering == REQUIRE_ORDER) + return -1; + optarg = argv[optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Skip the initial punctuation. */ + + nextchar = (argv[optind] + 1 + + (longopts != NULL && argv[optind][1] == '-')); + } + + /* Decode the current option-ARGV-element. */ + + /* Check whether the ARGV-element is a long option. + + If long_only and the ARGV-element has the form "-f", where f is + a valid short option, don't consider it an abbreviated form of + a long option that starts with f. Otherwise there would be no + way to give the -f short option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an abbreviation of + the long option, just like "--fu", and not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + + if (longopts != NULL + && (argv[optind][1] == '-' + || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1]))))) + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = -1; + int option_index; + + for (nameend = nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) + == (unsigned int) strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + + if (ambig && !exact) + { + if (opterr) + fprintf (stderr, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); + nextchar += strlen (nextchar); + optind++; + optopt = 0; + return '?'; + } + + if (pfound != NULL) + { + option_index = indfound; + optind++; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (opterr) + { + if (argv[optind - 1][1] == '-') + /* --option */ + fprintf (stderr, + _("%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); + else + /* +option or -option */ + fprintf (stderr, + _("%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], pfound->name); + + nextchar += strlen (nextchar); + + optopt = pfound->val; + return '?'; + } + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (opterr) + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + nextchar += strlen (nextchar); + optopt = pfound->val; + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. + Otherwise interpret it as a short option. */ + if (!long_only || argv[optind][1] == '-' + || my_index (optstring, *nextchar) == NULL) + { + if (opterr) + { + if (argv[optind][1] == '-') + /* --option */ + fprintf (stderr, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); + else + /* +option or -option */ + fprintf (stderr, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); + } + nextchar = (char *) ""; + optind++; + optopt = 0; + return '?'; + } + } + + /* Look at and handle the next short option-character. */ + + { + char c = *nextchar++; + char *temp = my_index (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*nextchar == '\0') + ++optind; + + if (temp == NULL || c == ':') + { + if (opterr) + { + if (posixly_correct) + /* 1003.2 specifies the format of this message. */ + fprintf (stderr, _("%s: illegal option -- %c\n"), + argv[0], c); + else + fprintf (stderr, _("%s: invalid option -- %c\n"), + argv[0], c); + } + optopt = c; + return '?'; + } + /* Convenience. Treat POSIX -W foo same as long option --foo */ + if (temp[0] == 'W' && temp[1] == ';') + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = 0; + int option_index; + + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (opterr) + { + /* 1003.2 specifies the format of this message. */ + fprintf (stderr, _("%s: option requires an argument -- %c\n"), + argv[0], c); + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + return c; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + + /* optarg is now the argument, see if it's in the + table of longopts. */ + + for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + if (ambig && !exact) + { + if (opterr) + fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); + nextchar += strlen (nextchar); + optind++; + return '?'; + } + if (pfound != NULL) + { + option_index = indfound; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (opterr) + fprintf (stderr, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); + + nextchar += strlen (nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (opterr) + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + nextchar += strlen (nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + nextchar = NULL; + return 'W'; /* Let the application handle it. */ + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*nextchar != '\0') + { + optarg = nextchar; + optind++; + } + else + optarg = NULL; + nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (opterr) + { + /* 1003.2 specifies the format of this message. */ + fprintf (stderr, + _("%s: option requires an argument -- %c\n"), + argv[0], c); + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + nextchar = NULL; + } + } + return c; + } +} + +int +getopt (int argc, char *const *argv, const char *optstring) +{ + return _getopt_internal (argc, argv, optstring, + (const struct option *) 0, + (int *) 0, + 0); +} + +#endif /* Not ELIDE_CODE. */ + +#ifdef TEST + +/* Compile with -DTEST to make an executable for use in testing + the above definition of `getopt'. */ + +int +main (int argc, char **argv) +{ + int c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind ? optind : 1; + + c = getopt (argc, argv, "abc:d:0123456789"); + if (c == -1) + break; + + switch (c) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value `%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + exit (0); +} + +#endif /* TEST */ diff --git a/msvcc/codeStubs/getopt.h b/msvcc/codeStubs/getopt.h new file mode 100644 index 0000000..0b46fdc --- /dev/null +++ b/msvcc/codeStubs/getopt.h @@ -0,0 +1,154 @@ +/* Declarations for getopt. + Copyright 1989, 1990, 1991, 1992, 1993, 1994, 1996, 1997, 1998, 2000, + 2002 Free Software Foundation, Inc. + + NOTE: The canonical source of this file is maintained with the GNU C Library. + Bugs can be reported to bug-glibc@gnu.org. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, + USA. */ + +#ifndef _GETOPT_H +#define _GETOPT_H 1 + +#ifndef __STDC__ +#define __STDC__ 1 +#define __STDC__UNDEF__ +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message `getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Set to an option character which was unrecognized. */ + +extern int optopt; + +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of `struct option' terminated by an element containing a name which is + zero. + + The field `has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field `flag' is not NULL, it points to a variable that is set + to the value given in the field `val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an `int' to + a compiled-in constant, such as set a value from `optarg', set the + option's `flag' field to zero and its `val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero `flag' field, `getopt' + returns the contents of the `val' field. */ + +struct option +{ +#if defined (__STDC__) && __STDC__ + const char *name; +#else + char *name; +#endif + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct option'. */ + +#define no_argument 0 +#define required_argument 1 +#define optional_argument 2 + +#if defined (__STDC__) && __STDC__ +/* HAVE_DECL_* is a three-state macro: undefined, 0 or 1. If it is + undefined, we haven't run the autoconf check so provide the + declaration without arguments. If it is 0, we checked and failed + to find the declaration so provide a fully prototyped one. If it + is 1, we found it so don't provide any declaration at all. */ +#if !HAVE_DECL_GETOPT +#if defined (__GNU_LIBRARY__) || defined (HAVE_DECL_GETOPT) +/* Many other libraries have conflicting prototypes for getopt, with + differences in the consts, in unistd.h. To avoid compilation + errors, only prototype getopt for the GNU C library. */ +extern int getopt (int argc, char *const *argv, const char *shortopts); +#else +#ifndef __cplusplus +extern int getopt (); +#endif /* __cplusplus */ +#endif +#endif /* !HAVE_DECL_GETOPT */ + +extern int getopt_long (int argc, char *const *argv, const char *shortopts, + const struct option *longopts, int *longind); +extern int getopt_long_only (int argc, char *const *argv, + const char *shortopts, + const struct option *longopts, int *longind); + +/* Internal only. Users should not call this directly. */ +extern int _getopt_internal (int argc, char *const *argv, + const char *shortopts, + const struct option *longopts, int *longind, + int long_only); +#else /* not __STDC__ */ +extern int getopt (); +extern int getopt_long (); +extern int getopt_long_only (); + +extern int _getopt_internal (); +#endif /* __STDC__ */ + +#ifdef __cplusplus +} +#endif + +#ifdef __STDC__UNDEF__ +#undef __STDC__ +#endif + + +#endif /* getopt.h */ diff --git a/msvcc/codeStubs/getopt1.c b/msvcc/codeStubs/getopt1.c new file mode 100644 index 0000000..255b144 --- /dev/null +++ b/msvcc/codeStubs/getopt1.c @@ -0,0 +1,180 @@ +/* getopt_long and getopt_long_only entry points for GNU getopt. + Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98,2005 + Free Software Foundation, Inc. + + NOTE: This source is derived from an old version taken from the GNU C + Library (glibc). + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, + USA. */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#if !defined __STDC__ || !__STDC__ +/* This is a separate conditional since some stdc systems + reject `defined (const)'. */ +#ifndef const +#define const +#endif +#endif + +#include + +#include "getopt.h" + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#define GETOPT_INTERFACE_VERSION 2 +#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 +#include +#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION +#define ELIDE_CODE +#endif +#endif + +#ifndef ELIDE_CODE + + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +#include +#endif + +#ifndef NULL +#define NULL 0 +#endif + +int +getopt_long (int argc, char *const *argv, const char *options, + const struct option *long_options, int *opt_index) +{ + return _getopt_internal (argc, argv, options, long_options, opt_index, 0); +} + +/* Like getopt_long, but '-' as well as '--' can indicate a long option. + If an option that starts with '-' (not '--') doesn't match a long option, + but does match a short option, it is parsed as a short option + instead. */ + +int +getopt_long_only (int argc, char *const *argv, const char *options, + const struct option *long_options, int *opt_index) +{ + return _getopt_internal (argc, argv, options, long_options, opt_index, 1); +} + + +#endif /* Not ELIDE_CODE. */ + +#ifdef TEST + +#include + +int +main (int argc, char **argv) +{ + int c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind ? optind : 1; + int option_index = 0; + static struct option long_options[] = + { + {"add", 1, 0, 0}, + {"append", 0, 0, 0}, + {"delete", 1, 0, 0}, + {"verbose", 0, 0, 0}, + {"create", 0, 0, 0}, + {"file", 1, 0, 0}, + {0, 0, 0, 0} + }; + + c = getopt_long (argc, argv, "abc:d:0123456789", + long_options, &option_index); + if (c == -1) + break; + + switch (c) + { + case 0: + printf ("option %s", long_options[option_index].name); + if (optarg) + printf (" with arg %s", optarg); + printf ("\n"); + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value `%s'\n", optarg); + break; + + case 'd': + printf ("option d with value `%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + exit (0); +} + +#endif /* TEST */ diff --git a/msvcc/codeStubs/sys/time.cpp b/msvcc/codeStubs/sys/time.cpp new file mode 100644 index 0000000..666835c --- /dev/null +++ b/msvcc/codeStubs/sys/time.cpp @@ -0,0 +1,57 @@ +#include +#include + +/* +* This file is a copy of the public domain code that can be found in +* multiple locations and whose prevenance is unclear +* +* This is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with Bowtie 2. If not, see . +*/ + +#if defined(_MSC_VER) || defined(_MSC_EXTENSIONS) + #define DELTA_EPOCH_IN_MICROSECS 11644473600000000Ui64 +#else + #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL +#endif + +int gettimeofday(struct timeval *tv, struct timezone *tz) +{ + FILETIME ft; + unsigned __int64 tmpres = 0; + static int tzflag; + + if (NULL != tv) + { + GetSystemTimeAsFileTime(&ft); + + tmpres |= ft.dwHighDateTime; + tmpres <<= 32; + tmpres |= ft.dwLowDateTime; + + /*converting file time to unix epoch*/ + tmpres -= DELTA_EPOCH_IN_MICROSECS; + tmpres /= 10; /*convert into microseconds*/ + tv->tv_sec = (long)(tmpres / 1000000UL); + tv->tv_usec = (long)(tmpres % 1000000UL); + } + + if (NULL != tz) + { + if (!tzflag) + { + _tzset(); + tzflag++; + } + tz->tz_minuteswest = _timezone / 60; + tz->tz_dsttime = _daylight; + } + + return 0; +} + diff --git a/msvcc/codeStubs/sys/time.h b/msvcc/codeStubs/sys/time.h new file mode 100644 index 0000000..e9c7442 --- /dev/null +++ b/msvcc/codeStubs/sys/time.h @@ -0,0 +1,15 @@ + +#ifndef TIME_STUB_H +#define TIME_STUB_H + +#include //For timeval defn + +struct timezone { + int tz_minuteswest; + int tz_dsttime; +}; + +int gettimeofday(struct timeval *tv, struct timezone *tz); + + +#endif \ No newline at end of file diff --git a/msvcc/codeStubs/unistd.h b/msvcc/codeStubs/unistd.h new file mode 100644 index 0000000..e69de29 diff --git a/msvcc/hisat2-align-l.vcxproj b/msvcc/hisat2-align-l.vcxproj new file mode 100644 index 0000000..237a473 --- /dev/null +++ b/msvcc/hisat2-align-l.vcxproj @@ -0,0 +1,121 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + {08c70ff5-9e0c-42df-b7bf-c1af1b0d2cff} + + + {d281e44b-92e9-4cea-a666-b05530f65000} + + + {e1f391b2-4a3f-43ca-9192-14536abe783e} + + + + {55F86D23-4245-4050-BD2D-CC5D4FD0C36B} + hsat2alignl + 8.1 + hisat2-align-l + + + + Application + true + v140 + MultiByte + + + Application + false + v140 + true + MultiByte + + + + + + + + + + + + + + + + + + + $(Platform)\$(ProjectName)\$(Configuration)\ + + + + + $(Platform)\$(ProjectName)\$(Configuration)\ + + + + + + Level3 + Disabled + true + BOWTIE_64BIT_INDEX;%(PreprocessorDefinitions) + %(AdditionalIncludeDirectories) + "version.h" + + + + + + + + + + + + + + Level3 + MaxSpeed + true + true + true + BOWTIE_64BIT_INDEX;NDEBUG;%(PreprocessorDefinitions) + %(AdditionalIncludeDirectories) + MultiThreaded + "version.h" + + + true + true + + + + + + + + + + + + + + \ No newline at end of file diff --git a/msvcc/hisat2-align-l.vcxproj.filters b/msvcc/hisat2-align-l.vcxproj.filters new file mode 100644 index 0000000..a8bc380 --- /dev/null +++ b/msvcc/hisat2-align-l.vcxproj.filters @@ -0,0 +1,25 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/msvcc/hisat2-align-s.vcxproj b/msvcc/hisat2-align-s.vcxproj new file mode 100644 index 0000000..501ce19 --- /dev/null +++ b/msvcc/hisat2-align-s.vcxproj @@ -0,0 +1,124 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + {08c70ff5-9e0c-42df-b7bf-c1af1b0d2cff} + + + {43f946a8-9184-456b-a522-7b411c10a4eb} + + + {8beeb701-aa26-4d2b-827b-64071f88afb3} + + + + {9D5066DB-ACD2-42E9-BFE6-98C8E7DEA7DA} + hsat2alignl + 8.1 + hisat2-align-s + + + + Application + true + v140 + MultiByte + + + Application + false + v140 + true + MultiByte + + + + + + + + + + + + + + + + + + + $(Platform)\$(ProjectName)\$(Configuration)\ + + + + + $(Platform)\$(ProjectName)\$(Configuration)\ + + + + + + Level3 + Disabled + true + %(PreprocessorDefinitions) + %(AdditionalIncludeDirectories) + "version.h" + + + + + + + + + + + + true + + + + + Level3 + MaxSpeed + true + true + true + NDEBUG;%(PreprocessorDefinitions) + %(AdditionalIncludeDirectories) + MultiThreaded + "version.h" + + + true + true + + + + + + + + + + + + + + \ No newline at end of file diff --git a/msvcc/hisat2-align-s.vcxproj.filters b/msvcc/hisat2-align-s.vcxproj.filters new file mode 100644 index 0000000..a8bc380 --- /dev/null +++ b/msvcc/hisat2-align-s.vcxproj.filters @@ -0,0 +1,25 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/msvcc/hisat2-build-l.vcxproj b/msvcc/hisat2-build-l.vcxproj new file mode 100644 index 0000000..94dac6a --- /dev/null +++ b/msvcc/hisat2-build-l.vcxproj @@ -0,0 +1,119 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + {08c70ff5-9e0c-42df-b7bf-c1af1b0d2cff} + + + {e1f391b2-4a3f-43ca-9192-14536abe783e} + + + + + + + + + {262E4D55-07C3-4B18-B8E2-F3B6AA4C583E} + hsat2alignl + 8.1 + hisat2-build-l + + + + Application + true + v140 + MultiByte + + + Application + false + v140 + true + MultiByte + + + + + + + + + + + + + + + + + + + $(Platform)\$(ProjectName)\$(Configuration)\ + + + + + $(Platform)\$(ProjectName)\$(Configuration)\ + + + + + + Level3 + Disabled + true + BOWTIE_64BIT_INDEX;%(PreprocessorDefinitions) + %(AdditionalIncludeDirectories) + "version.h" + + + + + + + + + + + + + + Level3 + MaxSpeed + true + true + true + BOWTIE_64BIT_INDEX;NDEBUG;%(PreprocessorDefinitions) + %(AdditionalIncludeDirectories) + MultiThreaded + "version.h" + + + true + true + + + + + + + + + + + + + + \ No newline at end of file diff --git a/msvcc/hisat2-build-l.vcxproj.filters b/msvcc/hisat2-build-l.vcxproj.filters new file mode 100644 index 0000000..b29b42d --- /dev/null +++ b/msvcc/hisat2-build-l.vcxproj.filters @@ -0,0 +1,28 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/msvcc/hisat2-build-s.vcxproj b/msvcc/hisat2-build-s.vcxproj new file mode 100644 index 0000000..7a3e5bd --- /dev/null +++ b/msvcc/hisat2-build-s.vcxproj @@ -0,0 +1,122 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + {08c70ff5-9e0c-42df-b7bf-c1af1b0d2cff} + + + {8beeb701-aa26-4d2b-827b-64071f88afb3} + + + + + + + + + {3B90EEC9-CDFF-424E-B3A3-4B7A5326A43F} + hsat2alignl + 8.1 + hisat2-build-s + + + + Application + true + v140 + MultiByte + + + Application + false + v140 + true + MultiByte + + + + + + + + + + + + + + + + + + + $(Platform)\$(ProjectName)\$(Configuration)\ + + + + + $(Platform)\$(ProjectName)\$(Configuration)\ + + + + + + Level3 + Disabled + true + MASSIVE_DATA_RLCSA;%(PreprocessorDefinitions) + %(AdditionalIncludeDirectories) + "version.h" + + + + + + + + + + + + true + + + + + Level3 + MaxSpeed + true + true + true + MASSIVE_DATA_RLCSA;NDEBUG;%(PreprocessorDefinitions) + %(AdditionalIncludeDirectories) + MultiThreaded + "version.h" + + + true + true + + + + + + + + + + + + + + \ No newline at end of file diff --git a/msvcc/hisat2-build-s.vcxproj.filters b/msvcc/hisat2-build-s.vcxproj.filters new file mode 100644 index 0000000..b29b42d --- /dev/null +++ b/msvcc/hisat2-build-s.vcxproj.filters @@ -0,0 +1,28 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/msvcc/hisat2-inspect-l.vcxproj b/msvcc/hisat2-inspect-l.vcxproj new file mode 100644 index 0000000..037afc4 --- /dev/null +++ b/msvcc/hisat2-inspect-l.vcxproj @@ -0,0 +1,95 @@ + + + + + Debug + x64 + + + Release + x64 + + + + {E0A17972-1AF6-429A-A902-3913656B5CFC} + hisat2inspectl + 8.1 + + + + Application + true + v140 + MultiByte + + + Application + false + v140 + true + MultiByte + + + + + + + + + + + + + + + + + + + $(Platform)\$(ProjectName)\$(Configuration)\ + + + $(Platform)\$(ProjectName)\$(Configuration)\ + + + + Level3 + Disabled + true + BOWTIE_64BIT_INDEX;HISAT2_INSPECT_MAIN _MBCS;%(PreprocessorDefinitions) + %(AdditionalIncludeDirectories) + %(ForcedIncludeFiles) + + + + + Level3 + MaxSpeed + true + true + true + BOWTIE_64BIT_INDEX;HISAT2_INSPECT_MAIN;NDEBUG;%(PreprocessorDefinitions) + %(AdditionalIncludeDirectories) + %(ForcedIncludeFiles) + MultiThreaded + + + true + true + + + + + {08c70ff5-9e0c-42df-b7bf-c1af1b0d2cff} + + + {e1f391b2-4a3f-43ca-9192-14536abe783e} + + + + + + + + + \ No newline at end of file diff --git a/msvcc/hisat2-inspect-l.vcxproj.filters b/msvcc/hisat2-inspect-l.vcxproj.filters new file mode 100644 index 0000000..4df5e56 --- /dev/null +++ b/msvcc/hisat2-inspect-l.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/msvcc/hisat2-inspect-s.vcxproj b/msvcc/hisat2-inspect-s.vcxproj new file mode 100644 index 0000000..a026526 --- /dev/null +++ b/msvcc/hisat2-inspect-s.vcxproj @@ -0,0 +1,95 @@ + + + + + Debug + x64 + + + Release + x64 + + + + {DA85C3F8-8CDD-4ED4-AF86-05B5556670F7} + hisat2inspectl + 8.1 + + + + Application + true + v140 + MultiByte + + + Application + false + v140 + true + MultiByte + + + + + + + + + + + + + + + + + + + $(Platform)\$(ProjectName)\$(Configuration)\ + + + $(Platform)\$(ProjectName)\$(Configuration)\ + + + + Level3 + Disabled + true + BOWTIE_64BIT_INDEX;HISAT2_INSPECT_MAIN _MBCS;%(PreprocessorDefinitions) + %(AdditionalIncludeDirectories) + %(ForcedIncludeFiles) + + + + + Level3 + MaxSpeed + true + true + true + HISAT2_INSPECT_MAIN;NDEBUG;%(PreprocessorDefinitions) + %(AdditionalIncludeDirectories) + %(ForcedIncludeFiles) + MultiThreaded + + + true + true + + + + + {08c70ff5-9e0c-42df-b7bf-c1af1b0d2cff} + + + {8beeb701-aa26-4d2b-827b-64071f88afb3} + + + + + + + + + \ No newline at end of file diff --git a/msvcc/hisat2-inspect-s.vcxproj.filters b/msvcc/hisat2-inspect-s.vcxproj.filters new file mode 100644 index 0000000..4df5e56 --- /dev/null +++ b/msvcc/hisat2-inspect-s.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/msvcc/search.vcxproj b/msvcc/search.vcxproj new file mode 100644 index 0000000..be87c67 --- /dev/null +++ b/msvcc/search.vcxproj @@ -0,0 +1,194 @@ + + + + + Debug + x64 + + + Release + x64 + + + + {43F946A8-9184-456B-A522-7B411C10A4EB} + search + 8.1 + search + + + + StaticLibrary + true + v140 + MultiByte + + + StaticLibrary + false + v140 + true + MultiByte + + + + + + + + + + + + + + + + $(Platform)\$(ProjectName)\$(Configuration)\ + + + $(Platform)\$(ProjectName)\$(Configuration)\ + + + + Level3 + Disabled + true + %(PreprocessorDefinitions) + %(AdditionalIncludeDirectories) + + + true + + + + + Level3 + MaxSpeed + true + true + true + NDEBUG;%(PreprocessorDefinitions) + %(AdditionalIncludeDirectories) + MultiThreaded + + + true + true + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/msvcc/search.vcxproj.filters b/msvcc/search.vcxproj.filters new file mode 100644 index 0000000..c22b4aa --- /dev/null +++ b/msvcc/search.vcxproj.filters @@ -0,0 +1,336 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/msvcc/search64.vcxproj b/msvcc/search64.vcxproj new file mode 100644 index 0000000..2a07b48 --- /dev/null +++ b/msvcc/search64.vcxproj @@ -0,0 +1,196 @@ + + + + + Debug + x64 + + + Release + x64 + + + + {D281E44B-92E9-4CEA-A666-B05530F65000} + search + 8.1 + search64 + + + + StaticLibrary + true + v140 + MultiByte + + + StaticLibrary + false + v140 + true + MultiByte + + + + + + + + + + + + + + + $(Platform)\$(ProjectName)\$(Configuration)\ + + + $(Platform)\$(ProjectName)\$(Configuration)\ + + + + Level3 + Disabled + true + BOWTIE_64BIT_INDEX;%(PreprocessorDefinitions) + %(AdditionalIncludeDirectories) + + + true + + + + + Level3 + MaxSpeed + true + true + true + BOWTIE_64BIT_INDEX;NDEBUG;%(PreprocessorDefinitions) + %(AdditionalIncludeDirectories) + MultiThreaded + + + true + true + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/msvcc/search64.vcxproj.filters b/msvcc/search64.vcxproj.filters new file mode 100644 index 0000000..93260ba --- /dev/null +++ b/msvcc/search64.vcxproj.filters @@ -0,0 +1,348 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/msvcc/shared.vcxproj b/msvcc/shared.vcxproj new file mode 100644 index 0000000..8f68cc6 --- /dev/null +++ b/msvcc/shared.vcxproj @@ -0,0 +1,142 @@ + + + + + Debug + x64 + + + Release + x64 + + + + {8BEEB701-AA26-4D2B-827B-64071F88AFB3} + shared + 8.1 + + + + StaticLibrary + true + v140 + MultiByte + + + StaticLibrary + false + v140 + true + MultiByte + + + + + + + + + + + + + + + + $(Platform)\$(ProjectName)\$(Configuration)\ + + + $(Platform)\$(ProjectName)\$(Configuration)\ + + + + Level3 + Disabled + true + %(PreprocessorDefinitions) + %(AdditionalIncludeDirectories) + + + true + + + + + + + + + + + + + + + + + Level3 + MaxSpeed + true + true + true + NDEBUG;%(PreprocessorDefinitions) + %(AdditionalIncludeDirectories) + MultiThreaded + + + true + true + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/msvcc/shared.vcxproj.filters b/msvcc/shared.vcxproj.filters new file mode 100644 index 0000000..0e994b1 --- /dev/null +++ b/msvcc/shared.vcxproj.filters @@ -0,0 +1,111 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/msvcc/shared64.vcxproj b/msvcc/shared64.vcxproj new file mode 100644 index 0000000..82a1a5d --- /dev/null +++ b/msvcc/shared64.vcxproj @@ -0,0 +1,137 @@ + + + + + Debug + x64 + + + Release + x64 + + + + {E1F391B2-4A3F-43CA-9192-14536ABE783E} + shared + 8.1 + + + + StaticLibrary + true + v140 + MultiByte + + + StaticLibrary + false + v140 + true + MultiByte + + + + + + + + + + + + + + + + $(Platform)\$(ProjectName)\$(Configuration)\ + + + $(Platform)\$(ProjectName)\$(Configuration)\ + + + + Level3 + Disabled + true + BOWTIE_64BIT_INDEX;%(PreprocessorDefinitions) + %(AdditionalIncludeDirectories) + + + true + + + + + + + + + + + + + + + + + Level3 + MaxSpeed + true + true + true + BOWTIE_64BIT_INDEX;NDEBUG;%(PreprocessorDefinitions) + %(AdditionalIncludeDirectories) + MultiThreaded + + + true + true + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/msvcc/shared64.vcxproj.filters b/msvcc/shared64.vcxproj.filters new file mode 100644 index 0000000..63de9bf --- /dev/null +++ b/msvcc/shared64.vcxproj.filters @@ -0,0 +1,96 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/multikey_qsort.cpp b/multikey_qsort.cpp new file mode 100644 index 0000000..6afa98b --- /dev/null +++ b/multikey_qsort.cpp @@ -0,0 +1,20 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include "multikey_qsort.h" diff --git a/multikey_qsort.h b/multikey_qsort.h new file mode 100644 index 0000000..623f8f2 --- /dev/null +++ b/multikey_qsort.h @@ -0,0 +1,1237 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef MULTIKEY_QSORT_H_ +#define MULTIKEY_QSORT_H_ + +#include +#include "sequence_io.h" +#include "alphabet.h" +#include "assert_helpers.h" +#include "diff_sample.h" +#include "sstring.h" +#include "btypes.h" + +using namespace std; + +/** + * Swap elements a and b in s + */ +template +static inline void swap(TStr& s, size_t slen, TPos a, TPos b) { + assert_lt(a, slen); + assert_lt(b, slen); + swap(s[a], s[b]); +} + +/** + * Swap elements a and b in array s + */ +template +static inline void swap(TVal* s, size_t slen, TPos a, TPos b) { + assert_lt(a, slen); + assert_lt(b, slen); + swap(s[a], s[b]); +} + +/** + * Helper macro for swapping elements a and b in s. Does some additional + * sainty checking w/r/t begin and end (which are parameters to the sorting + * routines below). + */ +#define SWAP(s, a, b) { \ + assert_geq(a, begin); \ + assert_geq(b, begin); \ + assert_lt(a, end); \ + assert_lt(b, end); \ + swap(s, slen, a, b); \ +} + +/** + * Helper macro for swapping the same pair of elements a and b in two different + * strings s and s2. This is a helpful variant if, for example, the caller + * would like to see how their input was permuted by the sort routine (in that + * case, the caller would let s2 be an array s2[] where s2 is the same length + * as s and s2[i] = i). + */ +#define SWAP2(s, s2, a, b) { \ + SWAP(s, a, b); \ + swap(s2, slen, a, b); \ +} + +#define SWAP1(s, s2, a, b) { \ + SWAP(s, a, b); \ +} + +/** + * Helper macro that swaps a range of elements [i, i+n) with another + * range [j, j+n) in s. + */ +#define VECSWAP(s, i, j, n) { \ + if(n > 0) { vecswap(s, slen, i, j, n, begin, end); } \ +} + +/** + * Helper macro that swaps a range of elements [i, i+n) with another + * range [j, j+n) both in s and s2. + */ +#define VECSWAP2(s, s2, i, j, n) { \ + if(n > 0) { vecswap2(s, slen, s2, i, j, n, begin, end); } \ +} + +/** + * Helper function that swaps a range of elements [i, i+n) with another + * range [j, j+n) in s. begin and end represent the current range under + * consideration by the caller (one of the recursive multikey_quicksort + * routines below). + */ +template +static inline void vecswap(TStr& s, size_t slen, TPos i, TPos j, TPos n, TPos begin, TPos end) { + assert_geq(i, begin); + assert_geq(j, begin); + assert_lt(i, end); + assert_lt(j, end); + while(n-- > 0) { + assert_geq(n, 0); + TPos a = i+n; + TPos b = j+n; + assert_geq(a, begin); + assert_geq(b, begin); + assert_lt(a, end); + assert_lt(b, end); + swap(s, slen, a, b); + } +} + +template +static inline void vecswap(TVal *s, size_t slen, TPos i, TPos j, TPos n, TPos begin, TPos end) { + assert_geq(i, begin); + assert_geq(j, begin); + assert_lt(i, end); + assert_lt(j, end); + while(n-- > 0) { + assert_geq(n, 0); + TPos a = i+n; + TPos b = j+n; + assert_geq(a, begin); + assert_geq(b, begin); + assert_lt(a, end); + assert_lt(b, end); + swap(s, slen, a, b); + } +} + +/** + * Helper function that swaps a range of elements [i, i+n) with another range + * [j, j+n) both in s and s2. begin and end represent the current range under + * consideration by the caller (one of the recursive multikey_quicksort + * routines below). + */ +template +static inline void vecswap2( + TStr& s, + size_t slen, + TStr& s2, + TPos i, + TPos j, + TPos n, + TPos begin, + TPos end) +{ + assert_geq(i, begin); + assert_geq(j, begin); + assert_lt(i, end); + assert_lt(j, end); + while(n-- > 0) { + assert_geq(n, 0); + TPos a = i+n; + TPos b = j+n; + assert_geq(a, begin); + assert_geq(b, begin); + assert_lt(a, end); + assert_lt(b, end); + swap(s, slen, a, b); + swap(s2, slen, a, b); + } +} + +template +static inline void vecswap2(TVal* s, size_t slen, TVal* s2, TPos i, TPos j, TPos n, TPos begin, TPos end) { + assert_geq(i, begin); + assert_geq(j, begin); + assert_lt(i, end); + assert_lt(j, end); + while(n-- > 0) { + assert_geq(n, 0); + TPos a = i+n; + TPos b = j+n; + assert_geq(a, begin); + assert_geq(b, begin); + assert_lt(a, end); + assert_lt(b, end); + swap(s, slen, a, b); + swap(s2, slen, a, b); + } +} + +/// Retrieve an int-ized version of the ath character of string s, or, +/// if a goes off the end of s, return a (user-specified) int greater +/// than any TAlphabet character - 'hi'. +#define CHAR_AT(ss, aa) ((length(s[ss]) > aa) ? (int)(s[ss][aa]) : hi) + +/// Retrieve an int-ized version of the ath character of string s, or, +/// if a goes off the end of s, return a (user-specified) int greater +/// than any TAlphabet character - 'hi'. +#define CHAR_AT_SUF(si, off) \ + (((off + s[si]) < hlen) ? ((int)(host[off + s[si]])) : (hi)) + +/// Retrieve an int-ized version of the ath character of string s, or, +/// if a goes off the end of s, return a (user-specified) int greater +/// than any TAlphabet character - 'hi'. + +#define CHAR_AT_SUF_U8(si, off) char_at_suf_u8(host, hlen, s, si, off, hi) + +// Note that CHOOSE_AND_SWAP_RANDOM_PIVOT is unused +#define CHOOSE_AND_SWAP_RANDOM_PIVOT(sw, ch) { \ + /* Note: rand() didn't really cut it here; it seemed to run out of */ \ + /* randomness and, after a time, returned the same thing over and */ \ + /* over again */ \ + a = (rand() % n) + begin; /* choose pivot between begin and end */ \ + assert_lt(a, end); assert_geq(a, begin); \ + sw(s, s2, begin, a); /* move pivot to beginning */ \ +} + +/** + * Ad-hoc DNA-centric way of choose a pretty good pivot without using + * the pseudo-random number generator. We try to get a 1 or 2 if + * possible, since they'll split things more evenly than a 0 or 4. We + * also avoid swapping in the event that we choose the first element. + */ +#define CHOOSE_AND_SWAP_SMART_PIVOT(sw, ch) { \ + a = begin; /* choose first elt */ \ + /* now try to find a better elt */ \ + if(n >= 5) { /* n is the difference between begin and end */ \ + if (ch(begin+1, depth) == 1 || ch(begin+1, depth) == 2) a = begin+1; \ + else if(ch(begin+2, depth) == 1 || ch(begin+2, depth) == 2) a = begin+2; \ + else if(ch(begin+3, depth) == 1 || ch(begin+3, depth) == 2) a = begin+3; \ + else if(ch(begin+4, depth) == 1 || ch(begin+4, depth) == 2) a = begin+4; \ + if(a != begin) sw(s, s2, begin, a); /* move pivot to beginning */ \ + } \ + /* the element at [begin] now holds the pivot value */ \ +} + +#define CHOOSE_AND_SWAP_PIVOT CHOOSE_AND_SWAP_SMART_PIVOT + +#ifndef NDEBUG + +/** + * Assert that the range of chars at depth 'depth' in strings 'begin' + * to 'end' in string-of-suffix-offsets s is parititioned properly + * according to the ternary paritioning strategy of Bentley and McIlroy + * (*prior to* swapping the = regions to the center) + */ +template +bool assertPartitionedSuf( + const THost& host, + TIndexOffU *s, + size_t slen, + int hi, + int pivot, + size_t begin, + size_t end, + size_t depth) +{ + size_t hlen = host.length(); + int state = 0; // 0 -> 1st = section, 1 -> < section, 2 -> > section, 3 -> 2nd = section + for(size_t i = begin; i < end; i++) { + switch(state) { + case 0: + if (CHAR_AT_SUF(i, depth) < pivot) { state = 1; break; } + else if (CHAR_AT_SUF(i, depth) > pivot) { state = 2; break; } + assert_eq(CHAR_AT_SUF(i, depth), pivot); break; + case 1: + if (CHAR_AT_SUF(i, depth) > pivot) { state = 2; break; } + else if (CHAR_AT_SUF(i, depth) == pivot) { state = 3; break; } + assert_lt(CHAR_AT_SUF(i, depth), pivot); break; + case 2: + if (CHAR_AT_SUF(i, depth) == pivot) { state = 3; break; } + assert_gt(CHAR_AT_SUF(i, depth), pivot); break; + case 3: + assert_eq(CHAR_AT_SUF(i, depth), pivot); break; + } + } + return true; +} + +/** + * Assert that the range of chars at depth 'depth' in strings 'begin' + * to 'end' in string-of-suffix-offsets s is parititioned properly + * according to the ternary paritioning strategy of Bentley and McIlroy + * (*after* swapping the = regions to the center) + */ +template +bool assertPartitionedSuf2( + const THost& host, + TIndexOffU *s, + size_t slen, + int hi, + int pivot, + size_t begin, + size_t end, + size_t depth) +{ + size_t hlen = host.length(); + int state = 0; // 0 -> < section, 1 -> = section, 2 -> > section + for(size_t i = begin; i < end; i++) { + switch(state) { + case 0: + if (CHAR_AT_SUF(i, depth) == pivot) { state = 1; break; } + else if (CHAR_AT_SUF(i, depth) > pivot) { state = 2; break; } + assert_lt(CHAR_AT_SUF(i, depth), pivot); break; + case 1: + if (CHAR_AT_SUF(i, depth) > pivot) { state = 2; break; } + assert_eq(CHAR_AT_SUF(i, depth), pivot); break; + case 2: + assert_gt(CHAR_AT_SUF(i, depth), pivot); break; + } + } + return true; +} +#endif + +/** + * Assert that string s of suffix offsets into string 'host' is a seemingly + * legitimate suffix-offset list (at this time, we just check that it doesn't + * list any suffix twice). + */ +static inline void sanityCheckInputSufs(TIndexOffU *s, size_t slen) { + assert_gt(slen, 0); + for(size_t i = 0; i < slen; i++) { + // Actually, it's convenient to allow the caller to provide + // suffix offsets thare are off the end of the host string. + // See, e.g., build() in diff_sample.cpp. + //assert_lt(s[i], length(host)); + for(size_t j = i+1; j < slen; j++) { + assert_neq(s[i], s[j]); + } + } +} + +/** + * Assert that the string s of suffix offsets into 'host' really are in + * lexicographical order up to depth 'upto'. + */ +template +void sanityCheckOrderedSufs( + const T& host, + size_t hlen, + TIndexOffU *s, + size_t slen, + size_t upto, + size_t lower = 0, + size_t upper = OFF_MASK) +{ + assert_lt(s[0], hlen); + upper = min(upper, slen-1); + for(size_t i = lower; i < upper; i++) { + // Allow s[i+t] to point off the end of the string; this is + // convenient for some callers + if(s[i+1] >= hlen) continue; +#ifndef NDEBUG + if(upto == OFF_MASK) { + assert(sstr_suf_lt(host, s[i], hlen, host, s[i+1], hlen, false)); + } else { + if(sstr_suf_upto_lt(host, s[i], host, s[i+1], upto, false)) { + // operator > treats shorter strings as + // lexicographically smaller, but we want to opposite + //assert(isPrefix(suffix(host, s[i+1]), suffix(host, s[i]))); + } + } +#endif + } +} + +/** + * Main multikey quicksort function for suffixes. Based on Bentley & + * Sedgewick's algorithm on p.5 of their paper "Fast Algorithms for + * Sorting and Searching Strings". That algorithm has been extended in + * three ways: + * + * 1. Deal with keys of different lengths by checking bounds and + * considering off-the-end values to be 'hi' (b/c our goal is the + * BWT transform, we're biased toward considring prefixes as + * lexicographically *greater* than their extensions). + * 2. The multikey_qsort_suffixes version takes a single host string + * and a list of suffix offsets as input. This reduces memory + * footprint compared to an approach that treats its input + * generically as a set of strings (not necessarily suffixes), thus + * requiring that we store at least two integers worth of + * information for each string. + * 3. Sorting functions take an extra "upto" parameter that upper- + * bounds the depth to which the function sorts. + * + * TODO: Consult a tie-breaker (like a difference cover sample) if two + * keys share a long prefix. + */ +template +void mkeyQSortSuf( + const T& host, + size_t hlen, + TIndexOffU *s, + size_t slen, + int hi, + size_t begin, + size_t end, + size_t depth, + size_t upto = OFF_MASK) +{ + // Helper for making the recursive call; sanity-checks arguments to + // make sure that the problem actually got smaller. + #define MQS_RECURSE_SUF(nbegin, nend, ndepth) { \ + assert(nbegin > begin || nend < end || ndepth > depth); \ + if(ndepth < upto) { /* don't exceed depth of 'upto' */ \ + mkeyQSortSuf(host, hlen, s, slen, hi, nbegin, nend, ndepth, upto); \ + } \ + } + assert_leq(begin, slen); + assert_leq(end, slen); + size_t a, b, c, d, /*e,*/ r; + size_t n = end - begin; + if(n <= 1) return; // 1-element list already sorted + CHOOSE_AND_SWAP_PIVOT(SWAP1, CHAR_AT_SUF); // pick pivot, swap it into [begin] + int v = CHAR_AT_SUF(begin, depth); // v <- randomly-selected pivot value + #ifndef NDEBUG + { + bool stillInBounds = false; + for(size_t i = begin; i < end; i++) { + if(depth < (hlen-s[i])) { + stillInBounds = true; + break; + } else { /* already fell off this suffix */ } + } + assert(stillInBounds); // >=1 suffix must still be in bounds + } + #endif + a = b = begin; + c = d = end-1; + while(true) { + // Invariant: everything before a is = pivot, everything + // between a and b is < + int bc = 0; // shouldn't have to init but gcc on Mac complains + while(b <= c && v >= (bc = CHAR_AT_SUF(b, depth))) { + if(v == bc) { + SWAP(s, a, b); a++; + } + b++; + } + // Invariant: everything after d is = pivot, everything + // between c and d is > + int cc = 0; // shouldn't have to init but gcc on Mac complains + while(b <= c && v <= (cc = CHAR_AT_SUF(c, depth))) { + if(v == cc) { + SWAP(s, c, d); d--; + } + c--; + } + if(b > c) break; + SWAP(s, b, c); + b++; + c--; + } + assert(a > begin || c < end-1); // there was at least one =s + assert_lt(d-c, n); // they can't all have been > pivot + assert_lt(b-a, n); // they can't all have been < pivot + assert(assertPartitionedSuf(host, s, slen, hi, v, begin, end, depth)); // check pre-=-swap invariant + r = min(a-begin, b-a); VECSWAP(s, begin, b-r, r); // swap left = to center + r = min(d-c, end-d-1); VECSWAP(s, b, end-r, r); // swap right = to center + assert(assertPartitionedSuf2(host, s, slen, hi, v, begin, end, depth)); // check post-=-swap invariant + r = b-a; // r <- # of <'s + if(r > 0) { + MQS_RECURSE_SUF(begin, begin + r, depth); // recurse on <'s + } + // Do not recurse on ='s if the pivot was the off-the-end value; + // they're already fully sorted + if(v != hi) { + MQS_RECURSE_SUF(begin + r, begin + r + (a-begin) + (end-d-1), depth+1); // recurse on ='s + } + r = d-c; // r <- # of >'s excluding those exhausted + if(r > 0 && v < hi-1) { + MQS_RECURSE_SUF(end-r, end, depth); // recurse on >'s + } +} + +/** + * Toplevel function for multikey quicksort over suffixes. + */ +template +void mkeyQSortSuf( + const T& host, + TIndexOffU *s, + size_t slen, + int hi, + bool verbose = false, + bool sanityCheck = false, + size_t upto = OFF_MASK) +{ + size_t hlen = host.length(); + assert_gt(slen, 0); + if(sanityCheck) sanityCheckInputSufs(s, slen); + mkeyQSortSuf(host, hlen, s, slen, hi, (size_t)0, slen, (size_t)0, upto); + if(sanityCheck) sanityCheckOrderedSufs(host, hlen, s, slen, upto); +} + +/** + * Just like mkeyQSortSuf but all swaps are applied to s2 as well as s. + * This is a helpful variant if, for example, the caller would like to + * see how their input was permuted by the sort routine (in that case, + * the caller would let s2 be an array s2[] where s2 is the same length + * as s and s2[i] = i). + */ +struct QSortRange { + size_t begin; + size_t end; + size_t depth; +}; +template +void mkeyQSortSuf2( + const T& host, + size_t hlen, + TIndexOffU *s, + size_t slen, + TIndexOffU *s2, + int hi, + size_t _begin, + size_t _end, + size_t _depth, + size_t upto = OFF_MASK, + EList* boundaries = NULL) +{ + ELList block_list; + while(true) { + size_t begin = 0, end = 0, depth = 0; + if(block_list.size() == 0) { + begin = _begin; + end = _end; + depth = _depth; + } else { + if(block_list.back().size() > 0) { + begin = block_list.back()[0].begin; + end = block_list.back()[0].end; + depth = block_list.back()[0].depth; + block_list.back().erase(0); + } else { + block_list.resize(block_list.size() - 1); + if(block_list.size() == 0) { + break; + } + } + } + if(depth == upto) { + if(boundaries != NULL) { + (*boundaries).push_back(end); + } + continue; + } + assert_leq(begin, slen); + assert_leq(end, slen); + size_t a, b, c, d, /*e,*/ r; + size_t n = end - begin; + if(n <= 1) { // 1-element list already sorted + if(n == 1 && boundaries != NULL) { + boundaries->push_back(end); + } + continue; + } + CHOOSE_AND_SWAP_PIVOT(SWAP2, CHAR_AT_SUF); // pick pivot, swap it into [begin] + int v = CHAR_AT_SUF(begin, depth); // v <- randomly-selected pivot value +#ifndef NDEBUG + { + bool stillInBounds = false; + for(size_t i = begin; i < end; i++) { + if(depth < (hlen-s[i])) { + stillInBounds = true; + break; + } else { /* already fell off this suffix */ } + } + assert(stillInBounds); // >=1 suffix must still be in bounds + } +#endif + a = b = begin; + c = d = /*e =*/ end-1; + while(true) { + // Invariant: everything before a is = pivot, everything + // between a and b is < + int bc = 0; // shouldn't have to init but gcc on Mac complains + while(b <= c && v >= (bc = CHAR_AT_SUF(b, depth))) { + if(v == bc) { + SWAP2(s, s2, a, b); a++; + } + b++; + } + // Invariant: everything after d is = pivot, everything + // between c and d is > + int cc = 0; // shouldn't have to init but gcc on Mac complains + while(b <= c && v <= (cc = CHAR_AT_SUF(c, depth))) { + if(v == cc) { + SWAP2(s, s2, c, d); d--; /*e--;*/ + } + //else if(c == e && v == hi) e--; + c--; + } + if(b > c) break; + SWAP2(s, s2, b, c); + b++; + c--; + } + assert(a > begin || c < end-1); // there was at least one =s + assert_lt(/*e*/d-c, n); // they can't all have been > pivot + assert_lt(b-a, n); // they can't all have been < pivot + assert(assertPartitionedSuf(host, s, slen, hi, v, begin, end, depth)); // check pre-=-swap invariant + r = min(a-begin, b-a); VECSWAP2(s, s2, begin, b-r, r); // swap left = to center + r = min(d-c, end-d-1); VECSWAP2(s, s2, b, end-r, r); // swap right = to center + assert(assertPartitionedSuf2(host, s, slen, hi, v, begin, end, depth)); // check post-=-swap invariant + r = b-a; // r <- # of <'s + block_list.expand(); + block_list.back().clear(); + if(r > 0) { // recurse on <'s + block_list.back().expand(); + block_list.back().back().begin = begin; + block_list.back().back().end = begin + r; + block_list.back().back().depth = depth; + } + // Do not recurse on ='s if the pivot was the off-the-end value; + // they're already fully sorted + //if(v != hi) { // recurse on ='s + block_list.back().expand(); + block_list.back().back().begin = begin + r; + block_list.back().back().end = begin + r + (a-begin) + (end-d-1); + block_list.back().back().depth = depth + 1; + //} + r = d-c; // r <- # of >'s excluding those exhausted + if(r > 0 /*&& v < hi-1*/) { // recurse on >'s + block_list.back().expand(); + block_list.back().back().begin = end - r; + block_list.back().back().end = end; + block_list.back().back().depth = depth; + } + } +} + +/** + * Toplevel function for multikey quicksort over suffixes with double + * swapping. + */ +template +void mkeyQSortSuf2( + const T& host, + TIndexOffU *s, + size_t slen, + TIndexOffU *s2, + int hi, + bool verbose = false, + bool sanityCheck = false, + size_t upto = OFF_MASK, + EList* boundaries = NULL) +{ + size_t hlen = host.length(); + if(sanityCheck) sanityCheckInputSufs(s, slen); + TIndexOffU *sOrig = NULL; + if(sanityCheck) { + sOrig = new TIndexOffU[slen]; + memcpy(sOrig, s, OFF_SIZE * slen); + } + mkeyQSortSuf2(host, hlen, s, slen, s2, hi, (size_t)0, slen, (size_t)0, upto, boundaries); + if(sanityCheck) { + sanityCheckOrderedSufs(host, hlen, s, slen, upto); + for(size_t i = 0; i < slen; i++) { + assert_eq(s[i], sOrig[s2[i]]); + } + delete[] sOrig; + } +} + +// Ugly but necessary; otherwise the compiler chokes dramatically on +// the DifferenceCoverSample<> template args to the next few functions +template +class DifferenceCoverSample; + +/** + * Constant time + */ +template inline +bool sufDcLt( + const T1& host, + const T2& s1, + const T2& s2, + const DifferenceCoverSample& dc, + bool sanityCheck = false) +{ + size_t diff = dc.tieBreakOff(s1, s2); + ASSERT_ONLY(size_t hlen = host.length()); + assert_lt(diff, dc.v()); + assert_lt(diff, hlen-s1); + assert_lt(diff, hlen-s2); + if(sanityCheck) { + for(size_t i = 0; i < diff; i++) { + assert_eq(host[s1+i], host[s2+i]); + } + } + bool ret = dc.breakTie(s1+diff, s2+diff) < 0; +#ifndef NDEBUG + if(sanityCheck && ret != sstr_suf_lt(host, s1, hlen, host, s2, hlen, false)) { + assert(false); + } +#endif + return ret; +} + +/** + * k log(k) + */ +template inline +void qsortSufDc( + const T& host, + size_t hlen, + TIndexOffU* s, + size_t slen, + const DifferenceCoverSample& dc, + size_t begin, + size_t end, + bool sanityCheck = false) +{ + assert_leq(end, slen); + assert_lt(begin, slen); + assert_gt(end, begin); + size_t n = end - begin; + if(n <= 1) return; // 1-element list already sorted + // Note: rand() didn't really cut it here; it seemed to run out of + // randomness and, after a time, returned the same thing over and + // over again + size_t a = (rand() % n) + begin; // choose pivot between begin and end + assert_lt(a, end); + assert_geq(a, begin); + SWAP(s, end-1, a); // move pivot to end + size_t cur = 0; + for(size_t i = begin; i < end-1; i++) { + if(sufDcLt(host, s[i], s[end-1], dc, sanityCheck)) { + if(sanityCheck) + assert(dollarLt(suffix(host, s[i]), suffix(host, s[end-1]))); + assert_lt(begin + cur, end-1); + SWAP(s, i, begin + cur); + cur++; + } + } + // Put pivot into place + assert_lt(cur, end-begin); + SWAP(s, end-1, begin+cur); + if(begin+cur > begin) qsortSufDc(host, hlen, s, slen, dc, begin, begin+cur); + if(end > begin+cur+1) qsortSufDc(host, hlen, s, slen, dc, begin+cur+1, end); +} + +/** + * Toplevel function for multikey quicksort over suffixes. + */ +template +void mkeyQSortSufDcU8( + const T1& host1, + const T2& host, + size_t hlen, + TIndexOffU* s, + size_t slen, + const DifferenceCoverSample& dc, + int hi, + bool verbose = false, + bool sanityCheck = false) +{ + if(sanityCheck) sanityCheckInputSufs(s, slen); + mkeyQSortSufDcU8(host1, host, hlen, s, slen, dc, hi, 0, slen, 0, sanityCheck); + if(sanityCheck) sanityCheckOrderedSufs(host1, hlen, s, slen, OFF_MASK); +} + +/** + * Return a boolean indicating whether s1 < s2 using the difference + * cover to break the tie. + */ +template inline +bool sufDcLtU8( + const T1& host1, + const T2& host, + size_t hlen, + size_t s1, + size_t s2, + const DifferenceCoverSample& dc, + bool sanityCheck = false) +{ + hlen += 0; + size_t diff = dc.tieBreakOff((TIndexOffU)s1, (TIndexOffU)s2); + assert_lt(diff, dc.v()); + assert_lt(diff, hlen-s1); + assert_lt(diff, hlen-s2); + if(sanityCheck) { + for(size_t i = 0; i < diff; i++) { + assert_eq(host[s1+i], host1[s2+i]); + } + } + bool ret = dc.breakTie((TIndexOffU)(s1+diff), (TIndexOffU)(s2+diff)) < 0; + // Sanity-check return value using dollarLt +#ifndef NDEBUG + bool ret2 = sstr_suf_lt(host1, s1, hlen, host, s2, hlen, false); + assert(!sanityCheck || ret == ret2); +#endif + return ret; +} + +/** + * k log(k) + */ +template inline +void qsortSufDcU8( + const T1& host1, + const T2& host, + size_t hlen, + TIndexOffU* s, + size_t slen, + const DifferenceCoverSample& dc, + size_t begin, + size_t end, + bool sanityCheck = false) +{ + assert_leq(end, slen); + assert_lt(begin, slen); + assert_gt(end, begin); + size_t n = end - begin; + if(n <= 1) return; // 1-element list already sorted + // Note: rand() didn't really cut it here; it seemed to run out of + // randomness and, after a time, returned the same thing over and + // over again + size_t a = (rand() % n) + begin; // choose pivot between begin and end + assert_lt(a, end); + assert_geq(a, begin); + SWAP(s, end-1, a); // move pivot to end + size_t cur = 0; + for(size_t i = begin; i < end-1; i++) { + if(sufDcLtU8(host1, host, hlen, s[i], s[end-1], dc, sanityCheck)) { +#ifndef NDEBUG + if(sanityCheck) { + assert(sstr_suf_lt(host1, s[i], hlen, host1, s[end-1], hlen, false)); + } + assert_lt(begin + cur, end-1); +#endif + SWAP(s, i, begin + cur); + cur++; + } + } + // Put pivot into place + assert_lt(cur, end-begin); + SWAP(s, end-1, begin+cur); + if(begin+cur > begin) qsortSufDcU8(host1, host, hlen, s, slen, dc, begin, begin+cur); + if(end > begin+cur+1) qsortSufDcU8(host1, host, hlen, s, slen, dc, begin+cur+1, end); +} + +#define BUCKET_SORT_CUTOFF (4 * 1024 * 1024) +#define SELECTION_SORT_CUTOFF 6 + +/** + * Straightforwardly obtain a uint8_t-ized version of t[off]. This + * works fine as long as TStr is not packed. + */ +template +inline uint8_t get_uint8(const TStr& t, size_t off) { + return t[off]; +} + +/** + * For incomprehensible generic-programming reasons, getting a uint8_t + * version of a character in a packed String<> requires casting first + * to Dna then to uint8_t. + */ +template<> +inline uint8_t get_uint8(const S2bDnaString& t, size_t off) { + return (uint8_t)t[off]; +} + +/** + * Return character at offset 'off' from the 'si'th suffix in the array + * 's' of suffixes. If the character is out-of-bounds, return hi. + */ +template +static inline int char_at_suf_u8( + const TStr& host, + size_t hlen, + TIndexOffU* s, + size_t si, + size_t off, + uint8_t hi) +{ + return ((off+s[si]) < hlen) ? get_uint8(host, off+s[si]) : (hi); +} + +template +static void selectionSortSufDcU8( + const T1& host1, + const T2& host, + size_t hlen, + TIndexOffU* s, + size_t slen, + const DifferenceCoverSample& dc, + uint8_t hi, + size_t begin, + size_t end, + size_t depth, + bool sanityCheck = false) +{ +#define ASSERT_SUF_LT(l, r) \ + if(sanityCheck && \ + !sstr_suf_lt(host1, s[l], hlen, host1, s[r], hlen, false)) { \ + assert(false); \ + } + + assert_gt(end, begin+1); + assert_leq(end-begin, SELECTION_SORT_CUTOFF); + assert_eq(hi, 4); + size_t v = dc.v(); + if(end == begin+2) { + size_t off = dc.tieBreakOff(s[begin], s[begin+1]); + if(off + s[begin] >= hlen || + off + s[begin+1] >= hlen) + { + off = OFF_MASK; + } + if(off != OFF_MASK) { + if(off < depth) { + qsortSufDcU8(host1, host, hlen, s, slen, dc, + begin, end, sanityCheck); + // It's helpful for debugging if we call this here + if(sanityCheck) { + sanityCheckOrderedSufs(host1, hlen, s, slen, + OFF_MASK, begin, end); + } + return; + } + v = off - depth + 1; + } + } + assert_leq(v, dc.v()); + size_t lim = v; + assert_geq(lim, 0); + for(size_t i = begin; i < end-1; i++) { + size_t targ = i; + size_t targoff = depth + s[i]; + for(size_t j = i+1; j < end; j++) { + assert_neq(j, targ); + size_t joff = depth + s[j]; + size_t k; + for(k = 0; k <= lim; k++) { + assert_neq(j, targ); + uint8_t jc = (k + joff < hlen) ? get_uint8(host, k + joff) : hi; + uint8_t tc = (k + targoff < hlen) ? get_uint8(host, k + targoff) : hi; + assert(jc != hi || tc != hi); + if(jc > tc) { + // the jth suffix is greater than the current + // smallest suffix + ASSERT_SUF_LT(targ, j); + break; + } else if(jc < tc) { + // the jth suffix is less than the current smallest + // suffix, so update smallest to be j + ASSERT_SUF_LT(j, targ); + targ = j; + targoff = joff; + break; + } else if(k == lim) { + // Check whether either string ends immediately + // after this character + assert_leq(k + joff + 1, hlen); + assert_leq(k + targoff + 1, hlen); + if(k + joff + 1 == hlen) { + // targ < j + assert_neq(k + targoff + 1, hlen); + ASSERT_SUF_LT(targ, j); + break; + } else if(k + targoff + 1 == hlen) { + // j < targ + ASSERT_SUF_LT(j, targ); + targ = j; + targoff = joff; + break; + } + } else { + // They're equal so far, keep going + } + } + // The jth suffix was equal to the current smallest suffix + // up to the difference-cover period, so disambiguate with + // difference cover + if(k == lim+1) { + assert_neq(j, targ); + if(sufDcLtU8(host1, host, hlen, s[j], s[targ], dc, sanityCheck)) { + // j < targ + assert(!sufDcLtU8(host1, host, hlen, s[targ], s[j], dc, sanityCheck)); + ASSERT_SUF_LT(j, targ); + targ = j; + targoff = joff; + } else { + assert(sufDcLtU8(host1, host, hlen, s[targ], s[j], dc, sanityCheck)); + ASSERT_SUF_LT(targ, j); // ! + } + } + } + if(i != targ) { + ASSERT_SUF_LT(targ, i); + // swap i and targ + TIndexOffU tmp = s[i]; + s[i] = s[targ]; + s[targ] = tmp; + } + for(size_t j = i+1; j < end; j++) { + ASSERT_SUF_LT(i, j); + } + } + if(sanityCheck) { + sanityCheckOrderedSufs(host1, hlen, s, slen, OFF_MASK, begin, end); + } +} + +template +static void bucketSortSufDcU8( + const T1& host1, + const T2& host, + size_t hlen, + TIndexOffU* s, + size_t slen, + const DifferenceCoverSample& dc, + uint8_t hi, + size_t _begin, + size_t _end, + size_t _depth, + bool sanityCheck = false) +{ + // 5 64-element buckets for bucket-sorting A, C, G, T, $ + TIndexOffU* bkts[4]; + for(size_t i = 0; i < 4; i++) { + bkts[i] = new TIndexOffU[4 * 1024 * 1024]; + } + ELList block_list; + bool first = true; + while(true) { + size_t begin = 0, end = 0; + if(first) { + begin = _begin; + end = _end; + first = false; + } else { + if(block_list.size() == 0) { + break; + } + if(block_list.back().size() > 1) { + end = block_list.back().back(); block_list.back().pop_back(); + begin = block_list.back().back(); + } else { + block_list.resize(block_list.size() - 1); + if(block_list.size() == 0) { + break; + } + } + } + size_t depth = block_list.size() + _depth; + assert_leq(end-begin, BUCKET_SORT_CUTOFF); + assert_eq(hi, 4); + if(end <= begin + 1) { // 1-element list already sorted + continue; + } + if(depth > dc.v()) { + // Quicksort the remaining suffixes using difference cover + // for constant-time comparisons; this is O(k*log(k)) where + // k=(end-begin) + qsortSufDcU8(host1, host, hlen, s, slen, dc, begin, end, sanityCheck); + continue; + } + if(end-begin <= SELECTION_SORT_CUTOFF) { + // Bucket sort remaining items + selectionSortSufDcU8(host1, host, hlen, s, slen, dc, hi, + begin, end, depth, sanityCheck); + if(sanityCheck) { + sanityCheckOrderedSufs(host1, hlen, s, slen, + OFF_MASK, begin, end); + } + continue; + } + size_t cnts[] = { 0, 0, 0, 0, 0 }; + for(size_t i = begin; i < end; i++) { + size_t off = depth + s[i]; + uint8_t c = (off < hlen) ? get_uint8(host, off) : hi; + assert_leq(c, 4); + if(c == 0) { + s[begin + cnts[0]++] = s[i]; + } else { + bkts[c-1][cnts[c]++] = s[i]; + } + } + assert_eq(cnts[0] + cnts[1] + cnts[2] + cnts[3] + cnts[4], end - begin); + size_t cur = begin + cnts[0]; + if(cnts[1] > 0) { memcpy(&s[cur], bkts[0], cnts[1] << (OFF_SIZE/4 + 1)); cur += cnts[1]; } + if(cnts[2] > 0) { memcpy(&s[cur], bkts[1], cnts[2] << (OFF_SIZE/4 + 1)); cur += cnts[2]; } + if(cnts[3] > 0) { memcpy(&s[cur], bkts[2], cnts[3] << (OFF_SIZE/4 + 1)); cur += cnts[3]; } + if(cnts[4] > 0) { memcpy(&s[cur], bkts[3], cnts[4] << (OFF_SIZE/4 + 1)); } + // This frame is now totally finished with bkts[][], so recursive + // callees can safely clobber it; we're not done with cnts[], but + // that's local to the stack frame. + block_list.expand(); + block_list.back().clear(); + block_list.back().push_back(begin); + for(size_t i = 0; i < 4; i++) { + if(cnts[i] > 0) { + block_list.back().push_back(block_list.back().back() + cnts[i]); + } + } + } + // Done + + for(size_t i = 0; i < 4; i++) { + delete [] bkts[i]; + } +} + +/** + * Main multikey quicksort function for suffixes. Based on Bentley & + * Sedgewick's algorithm on p.5 of their paper "Fast Algorithms for + * Sorting and Searching Strings". That algorithm has been extended in + * three ways: + * + * 1. Deal with keys of different lengths by checking bounds and + * considering off-the-end values to be 'hi' (b/c our goal is the + * BWT transform, we're biased toward considring prefixes as + * lexicographically *greater* than their extensions). + * 2. The multikey_qsort_suffixes version takes a single host string + * and a list of suffix offsets as input. This reduces memory + * footprint compared to an approach that treats its input + * generically as a set of strings (not necessarily suffixes), thus + * requiring that we store at least two integers worth of + * information for each string. + * 3. Sorting functions take an extra "upto" parameter that upper- + * bounds the depth to which the function sorts. + */ +template +void mkeyQSortSufDcU8( + const T1& host1, + const T2& host, + size_t hlen, + TIndexOffU* s, + size_t slen, + const DifferenceCoverSample& dc, + int hi, + size_t begin, + size_t end, + size_t depth, + bool sanityCheck = false) +{ + // Helper for making the recursive call; sanity-checks arguments to + // make sure that the problem actually got smaller. + #define MQS_RECURSE_SUF_DC_U8(nbegin, nend, ndepth) { \ + assert(nbegin > begin || nend < end || ndepth > depth); \ + mkeyQSortSufDcU8(host1, host, hlen, s, slen, dc, hi, nbegin, nend, ndepth, sanityCheck); \ + } + assert_leq(begin, slen); + assert_leq(end, slen); + size_t n = end - begin; + if(n <= 1) return; // 1-element list already sorted + if(depth > dc.v()) { + // Quicksort the remaining suffixes using difference cover + // for constant-time comparisons; this is O(k*log(k)) where + // k=(end-begin) + qsortSufDcU8(host1, host, hlen, s, slen, dc, begin, end, sanityCheck); + if(sanityCheck) { + sanityCheckOrderedSufs(host1, hlen, s, slen, OFF_MASK, begin, end); + } + return; + } + if(n <= BUCKET_SORT_CUTOFF) { + // Bucket sort remaining items + bucketSortSufDcU8(host1, host, hlen, s, slen, dc, + (uint8_t)hi, begin, end, depth, sanityCheck); + if(sanityCheck) { + sanityCheckOrderedSufs(host1, hlen, s, slen, OFF_MASK, begin, end); + } + return; + } + size_t a, b, c, d, r; + CHOOSE_AND_SWAP_PIVOT(SWAP1, CHAR_AT_SUF_U8); // choose pivot, swap to begin + int v = CHAR_AT_SUF_U8(begin, depth); // v <- pivot value + #ifndef NDEBUG + { + bool stillInBounds = false; + for(size_t i = begin; i < end; i++) { + if(depth < (hlen-s[i])) { + stillInBounds = true; + break; + } else { /* already fell off this suffix */ } + } + assert(stillInBounds); // >=1 suffix must still be in bounds + } + #endif + a = b = begin; + c = d = end-1; + while(true) { + // Invariant: everything before a is = pivot, everything + // between a and b is < + int bc = 0; // shouldn't have to init but gcc on Mac complains + while(b <= c && v >= (bc = CHAR_AT_SUF_U8(b, depth))) { + if(v == bc) { + SWAP(s, a, b); a++; + } + b++; + } + // Invariant: everything after d is = pivot, everything + // between c and d is > + int cc = 0; // shouldn't have to init but gcc on Mac complains + //bool hiLatch = true; + while(b <= c && v <= (cc = CHAR_AT_SUF_U8(c, depth))) { + if(v == cc) { + SWAP(s, c, d); d--; + } + //else if(hiLatch && cc == hi) { } + c--; + } + if(b > c) break; + SWAP(s, b, c); + b++; + c--; + } + assert(a > begin || c < end-1); // there was at least one =s + assert_lt(d-c, n); // they can't all have been > pivot + assert_lt(b-a, n); // they can't all have been < pivot + r = min(a-begin, b-a); VECSWAP(s, begin, b-r, r); // swap left = to center + r = min(d-c, end-d-1); VECSWAP(s, b, end-r, r); // swap right = to center + r = b-a; // r <- # of <'s + if(r > 0) { + MQS_RECURSE_SUF_DC_U8(begin, begin + r, depth); // recurse on <'s + } + // Do not recurse on ='s if the pivot was the off-the-end value; + // they're already fully sorted + if(v != hi) { + MQS_RECURSE_SUF_DC_U8(begin + r, begin + r + (a-begin) + (end-d-1), depth+1); // recurse on ='s + } + r = d-c; // r <- # of >'s excluding those exhausted + if(r > 0 && v < hi-1) { + MQS_RECURSE_SUF_DC_U8(end-r, end, depth); // recurse on >'s + } +} + + +#endif /*MULTIKEY_QSORT_H_*/ diff --git a/opts.h b/opts.h new file mode 100644 index 0000000..ace3a16 --- /dev/null +++ b/opts.h @@ -0,0 +1,200 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef OPTS_H_ +#define OPTS_H_ + +enum { + ARG_ORIG = 256, // --orig + ARG_SEED, // --seed + ARG_SOLEXA_QUALS, // --solexa-quals + ARG_VERBOSE, // --verbose + ARG_STARTVERBOSE, // --startverbose + ARG_QUIET, // --quiet + ARG_METRIC_IVAL, // --met + ARG_METRIC_FILE, // --met-file + ARG_METRIC_STDERR, // --met-stderr + ARG_METRIC_PER_READ, // --met-per-read + ARG_REFIDX, // --refidx + ARG_SANITY, // --sanity + ARG_PARTITION, // --partition + ARG_INTEGER_QUALS, // --int-quals + ARG_FILEPAR, // --filepar + ARG_SHMEM, // --shmem + ARG_MM, // --mm + ARG_MMSWEEP, // --mmsweep + ARG_FF, // --ff + ARG_FR, // --fr + ARG_RF, // --rf + ARG_NO_MIXED, // --no-mixed + ARG_NO_DISCORDANT, // --no-discordant + ARG_CACHE_LIM, // -- + ARG_CACHE_SZ, // -- + ARG_NO_FW, // --nofw + ARG_NO_RC, // --norc + ARG_SKIP, // --skip + ARG_ONETWO, // --12 + ARG_PHRED64, // --phred64 + ARG_PHRED33, // --phred33 + ARG_HADOOPOUT, // --hadoopout + ARG_FUZZY, // --fuzzy + ARG_FULLREF, // --fullref + ARG_USAGE, // --usage + ARG_SNPPHRED, // --snpphred + ARG_SNPFRAC, // --snpfrac + ARG_SAM_NO_QNAME_TRUNC, // --sam-no-qname-trunc + ARG_SAM_OMIT_SEC_SEQ, // --sam-omit-sec-seq + ARG_SAM_NOHEAD, // --sam-noHD/--sam-nohead + ARG_SAM_NOSQ, // --sam-nosq/--sam-noSQ + ARG_SAM_RG, // --sam-rg + ARG_SAM_RGID, // --sam-rg-id + ARG_GAP_BAR, // --gbar + ARG_QUALS1, // --Q1 + ARG_QUALS2, // --Q2 + ARG_QSEQ, // --qseq + ARG_SEED_SUMM, // --seed-summary + ARG_OVERHANG, // --overhang + ARG_NO_CACHE, // --no-cache + ARG_USE_CACHE, // --cache + ARG_NOISY_HPOLY, // --454/--ion-torrent + ARG_LOCAL, // --local + ARG_END_TO_END, // --end-to-end + ARG_SCAN_NARROWED, // --scan-narrowed + ARG_QC_FILTER, // --qc-filter + ARG_BWA_SW_LIKE, // --bwa-sw-like + ARG_MULTISEED_IVAL, // --multiseed + ARG_SCORE_MIN, // --score-min + ARG_SCORE_MA, // --ma + ARG_SCORE_MMP, // --mp + ARG_SCORE_SCP, // --sp + ARG_NO_SOFTCLIP, // --no-softclip + ARG_SCORE_NP, // --nm + ARG_SCORE_RDG, // --rdg + ARG_SCORE_RFG, // --rfg + ARG_N_CEIL, // --n-ceil + ARG_DPAD, // --dpad + ARG_SAM_PRINT_YI, // --mapq-print-inputs + ARG_ALIGN_POLICY, // --policy + ARG_PRESET_VERY_FAST, // --very-fast + ARG_PRESET_FAST, // --fast + ARG_PRESET_SENSITIVE, // --sensitive + ARG_PRESET_VERY_SENSITIVE, // --very-sensitive + ARG_PRESET_VERY_FAST_LOCAL, // --very-fast-local + ARG_PRESET_FAST_LOCAL, // --fast-local + ARG_PRESET_SENSITIVE_LOCAL, // --sensitive-local + ARG_PRESET_VERY_SENSITIVE_LOCAL, // --very-sensitive-local + ARG_NO_SCORE_PRIORITY, // --no-score-priority + ARG_IGNORE_QUALS, // --ignore-quals + ARG_DESC, // --arg-desc + ARG_TAB5, // --tab5 + ARG_TAB6, // --tab6 + ARG_WRAPPER, // --wrapper + ARG_DOVETAIL, // --dovetail + ARG_NO_DOVETAIL, // --no-dovetail + ARG_CONTAIN, // --contain + ARG_NO_CONTAIN, // --no-contain + ARG_OVERLAP, // --overlap + ARG_NO_OVERLAP, // --no-overlap + ARG_MAPQ_V, // --mapq-v + ARG_SSE8, // --sse8 + ARG_SSE8_NO, // --no-sse8 + ARG_UNGAPPED, // --ungapped + ARG_UNGAPPED_NO, // --no-ungapped + ARG_TIGHTEN, // --tighten + ARG_UNGAP_THRESH, // --ungap-thresh + ARG_EXACT_UPFRONT, // --exact-upfront + ARG_1MM_UPFRONT, // --1mm-upfront + ARG_EXACT_UPFRONT_NO, // --no-exact-upfront + ARG_1MM_UPFRONT_NO, // --no-1mm-upfront + ARG_1MM_MINLEN, // --1mm-minlen + ARG_VERSION, // --version + ARG_SEED_OFF, // --seed-off + ARG_SEED_BOOST_THRESH, // --seed-boost + ARG_MAX_SEEDS, + ARG_READ_TIMES, // --read-times + ARG_EXTEND_ITERS, // --extends + ARG_DP_MATE_STREAK_THRESH, // --db-mate-streak + ARG_DP_FAIL_STREAK_THRESH, // --dp-fail-streak + ARG_UG_FAIL_STREAK_THRESH, // --ug-fail-streak + ARG_EE_FAIL_STREAK_THRESH, // --ee-fail-streak + ARG_DP_FAIL_THRESH, // --dp-fails + ARG_UG_FAIL_THRESH, // --ug-fails + ARG_MAPQ_EX, // --mapq-extra + ARG_NO_EXTEND, // --no-extend + ARG_REORDER, // --reorder + ARG_SHOW_RAND_SEED, // --show-rand-seed + ARG_READ_PASSTHRU, // --passthrough + ARG_SAMPLE, // --sample + ARG_CP_MIN, // --cp-min + ARG_CP_IVAL, // --cp-ival + ARG_TRI, // --tri + ARG_LOCAL_SEED_CACHE_SZ, // --local-seed-cache-sz + ARG_CURRENT_SEED_CACHE_SZ, // --seed-cache-sz + ARG_SAM_NO_UNAL, // --no-unal + ARG_NON_DETERMINISTIC, // --non-deterministic + ARG_TEST_25, // --test-25 + ARG_DESC_KB, // --desc-kb + ARG_DESC_LANDING, // --desc-landing + ARG_DESC_EXP, // --desc-exp + ARG_DESC_FMOPS, // --desc-fmops + ARG_NO_TEMPSPLICESITE, + ARG_PEN_CANSPLICE, + ARG_PEN_NONCANSPLICE, + ARG_PEN_CONFLICTSPLICE, + ARG_PEN_CANINTRONLEN, + ARG_PEN_NONCANINTRONLEN, + ARG_MIN_INTRONLEN, + ARG_MAX_INTRONLEN, + ARG_KNOWN_SPLICESITE_INFILE, + ARG_NOVEL_SPLICESITE_INFILE, + ARG_NOVEL_SPLICESITE_OUTFILE, + ARG_SECONDARY, + ARG_NO_SPLICED_ALIGNMENT, + ARG_RNA_STRANDNESS, + ARG_SPLICESITE_DB_ONLY, + ARG_NO_ANCHORSTOP, + ARG_TRANSCRIPTOME_MAPPING_ONLY, + ARG_TRANSCRIPTOME_ASSEMBLY, + ARG_TRANSCRIPTOME_ASSEMBLY_CUFFLINKS, + ARG_AVOID_PSEUDOGENE, +#ifdef USE_SRA + ARG_SRA_ACC, +#endif + ARG_REMOVE_CHRNAME, + ARG_ADD_CHRNAME, + ARG_MAX_ALTSTRIED, + ARG_HAPLOTYPE, + ARG_CODIS, + ARG_NO_TEMPLATELEN_ADJUSTMENT, + ARG_SUMMARY_FILE, + ARG_NEW_SUMMARY, + ARG_DP, + ARG_REPEAT, + ARG_NO_REPEAT_INDEX, + ARG_READ_LENGTHS, + ARG_BASE_CHANGE, // --base-change + ARG_REPEAT_LIMIT, + ARG_UNIQUE_ONLY, + ARG_3N, + ARG_DIRECTIONAL, + ARG_DIRECTIONAL_REVERSE +}; + +#endif + diff --git a/outq.cpp b/outq.cpp new file mode 100644 index 0000000..1141675 --- /dev/null +++ b/outq.cpp @@ -0,0 +1,201 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include "outq.h" + +/** + * Caller is telling us that they're about to write output record(s) for + * the read with the given id. + */ +void OutputQueue::beginRead(TReadId rdid, size_t threadId) { + ThreadSafe t(&mutex_m, threadSafe_); + nstarted_++; + if(reorder_) { + assert_geq(rdid, cur_); + assert_eq(lines_.size(), finished_.size()); + assert_eq(lines_.size(), started_.size()); + if(rdid - cur_ >= lines_.size()) { + // Make sure there's enough room in lines_, started_ and finished_ + size_t oldsz = lines_.size(); + lines_.resize(rdid - cur_ + 1); + started_.resize(rdid - cur_ + 1); + finished_.resize(rdid - cur_ + 1); + for(size_t i = oldsz; i < lines_.size(); i++) { + started_[i] = finished_[i] = false; + } + } + started_[rdid - cur_] = true; + finished_[rdid - cur_] = false; + } +} + +/** + * Writer is finished writing to + */ +void OutputQueue::finishRead(const BTString& rec, TReadId rdid, size_t threadId) { + ThreadSafe t(&mutex_m, threadSafe_); + if(reorder_) { + assert_geq(rdid, cur_); + assert_eq(lines_.size(), finished_.size()); + assert_eq(lines_.size(), started_.size()); + assert_lt(rdid - cur_, lines_.size()); + assert(started_[rdid - cur_]); + assert(!finished_[rdid - cur_]); + lines_[rdid - cur_] = rec; + nfinished_++; + finished_[rdid - cur_] = true; + flush(false, false); // don't force; already have lock + } else { + // obuf_ is the OutFileBuf for the output file + obuf_.writeString(rec); + nfinished_++; + nflushed_++; + } +} + +/** + * Write already-finished lines starting from cur_. + */ +void OutputQueue::flush(bool force, bool getLock) { + if(!reorder_) { + return; + } + ThreadSafe t(&mutex_m, getLock && threadSafe_); + size_t nflush = 0; + while(nflush < finished_.size() && finished_[nflush]) { + assert(started_[nflush]); + nflush++; + } + // Waiting until we have several in a row to flush cuts down on copies + // (but requires more buffering) + if(force || nflush >= NFLUSH_THRESH) { + for(size_t i = 0; i < nflush; i++) { + assert(started_[i]); + assert(finished_[i]); + obuf_.writeString(lines_[i]); + } + lines_.erase(0, nflush); + started_.erase(0, nflush); + finished_.erase(0, nflush); + cur_ += nflush; + nflushed_ += nflush; + } +} + +#ifdef OUTQ_MAIN + +#include + +using namespace std; + +int main(void) { + cerr << "Case 1 (one thread) ... "; + { + OutFileBuf ofb; + OutputQueue oq(ofb, false); + assert_eq(0, oq.numFlushed()); + assert_eq(0, oq.numStarted()); + assert_eq(0, oq.numFinished()); + oq.beginRead(1); + assert_eq(0, oq.numFlushed()); + assert_eq(1, oq.numStarted()); + assert_eq(0, oq.numFinished()); + oq.beginRead(3); + assert_eq(0, oq.numFlushed()); + assert_eq(2, oq.numStarted()); + assert_eq(0, oq.numFinished()); + oq.beginRead(2); + assert_eq(0, oq.numFlushed()); + assert_eq(3, oq.numStarted()); + assert_eq(0, oq.numFinished()); + oq.flush(); + assert_eq(0, oq.numFlushed()); + assert_eq(3, oq.numStarted()); + assert_eq(0, oq.numFinished()); + oq.beginRead(0); + assert_eq(0, oq.numFlushed()); + assert_eq(4, oq.numStarted()); + assert_eq(0, oq.numFinished()); + oq.flush(); + assert_eq(0, oq.numFlushed()); + assert_eq(4, oq.numStarted()); + assert_eq(0, oq.numFinished()); + oq.finishRead(0); + assert_eq(0, oq.numFlushed()); + assert_eq(4, oq.numStarted()); + assert_eq(1, oq.numFinished()); + oq.flush(); + assert_eq(0, oq.numFlushed()); + assert_eq(4, oq.numStarted()); + assert_eq(1, oq.numFinished()); + oq.flush(true); + assert_eq(1, oq.numFlushed()); + assert_eq(4, oq.numStarted()); + assert_eq(1, oq.numFinished()); + oq.finishRead(2); + assert_eq(1, oq.numFlushed()); + assert_eq(4, oq.numStarted()); + assert_eq(2, oq.numFinished()); + oq.flush(true); + assert_eq(1, oq.numFlushed()); + assert_eq(4, oq.numStarted()); + assert_eq(2, oq.numFinished()); + oq.finishRead(1); + assert_eq(1, oq.numFlushed()); + assert_eq(4, oq.numStarted()); + assert_eq(3, oq.numFinished()); + oq.flush(true); + assert_eq(3, oq.numFlushed()); + assert_eq(4, oq.numStarted()); + assert_eq(3, oq.numFinished()); + } + cerr << "PASSED" << endl; + + cerr << "Case 2 (one thread) ... "; + { + OutFileBuf ofb; + OutputQueue oq(ofb, false); + BTString& buf1 = oq.beginRead(0); + BTString& buf2 = oq.beginRead(1); + BTString& buf3 = oq.beginRead(2); + BTString& buf4 = oq.beginRead(3); + BTString& buf5 = oq.beginRead(4); + assert_eq(5, oq.numStarted()); + assert_eq(0, oq.numFinished()); + buf1.install("A\n"); + buf2.install("B\n"); + buf3.install("C\n"); + buf4.install("D\n"); + buf5.install("E\n"); + oq.finishRead(4); + oq.finishRead(1); + oq.finishRead(0); + oq.finishRead(2); + oq.finishRead(3); + oq.flush(true); + assert_eq(5, oq.numFlushed()); + assert_eq(5, oq.numStarted()); + assert_eq(5, oq.numFinished()); + ofb.flush(); + } + cerr << "PASSED" << endl; + return 0; +} + +#endif /*def ALN_SINK_MAIN*/ diff --git a/outq.h b/outq.h new file mode 100644 index 0000000..00408cf --- /dev/null +++ b/outq.h @@ -0,0 +1,149 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef OUTQ_H_ +#define OUTQ_H_ + +#include "assert_helpers.h" +#include "ds.h" +#include "sstring.h" +#include "read.h" +#include "threading.h" +#include "mem_ids.h" + +/** + * Encapsulates a list of lines of output. If the earliest as-yet-unreported + * read has id N and Bowtie 2 wants to write a record for read with id N+1, we + * resize the lines_ and committed_ lists to have at least 2 elements (1 for N, + * 1 for N+1) and return the BTString * associated with the 2nd element. When + * the user calls commit() for the read with id N, + */ +class OutputQueue { + + static const size_t NFLUSH_THRESH = 8; + +public: + + OutputQueue( + OutFileBuf& obuf, + bool reorder, + size_t nthreads, + bool threadSafe, + TReadId rdid = 0) : + obuf_(obuf), + cur_(rdid), + nstarted_(0), + nfinished_(0), + nflushed_(0), + lines_(RES_CAT), + started_(RES_CAT), + finished_(RES_CAT), + reorder_(reorder), + threadSafe_(threadSafe), + mutex_m() + { + assert(nthreads <= 1 || threadSafe); + } + + /** + * Caller is telling us that they're about to write output record(s) for + * the read with the given id. + */ + void beginRead(TReadId rdid, size_t threadId); + + /** + * Writer is finished writing to + */ + void finishRead(const BTString& rec, TReadId rdid, size_t threadId); + + /** + * Return the number of records currently being buffered. + */ + size_t size() const { + return lines_.size(); + } + + /** + * Return the number of records that have been flushed so far. + */ + TReadId numFlushed() const { + return nflushed_; + } + + /** + * Return the number of records that have been started so far. + */ + TReadId numStarted() const { + return nstarted_; + } + + /** + * Return the number of records that have been finished so far. + */ + TReadId numFinished() const { + return nfinished_; + } + + /** + * Write already-committed lines starting from cur_. + */ + void flush(bool force = false, bool getLock = true); + +protected: + + OutFileBuf& obuf_; + TReadId cur_; + TReadId nstarted_; + TReadId nfinished_; + TReadId nflushed_; + EList lines_; + EList started_; + EList finished_; + bool reorder_; + bool threadSafe_; + MUTEX_T mutex_m; +}; + +class OutputQueueMark { +public: + OutputQueueMark( + OutputQueue& q, + const BTString& rec, + TReadId rdid, + size_t threadId) : + q_(q), + rec_(rec), + rdid_(rdid), + threadId_(threadId) + { + q_.beginRead(rdid, threadId); + } + + ~OutputQueueMark() { + q_.finishRead(rec_, rdid_, threadId_); + } + +protected: + OutputQueue& q_; + const BTString& rec_; + TReadId rdid_; + size_t threadId_; +}; + +#endif diff --git a/pat.cpp b/pat.cpp new file mode 100644 index 0000000..16dfad2 --- /dev/null +++ b/pat.cpp @@ -0,0 +1,1824 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include +#include +#include +#include +#include "sstring.h" + +#include "pat.h" +#include "filebuf.h" +#include "formats.h" + +#ifdef USE_SRA + +#include "tinythread.h" +#include +#include +#include +#include +#include + +#endif + +using namespace std; + +/** + * Return a new dynamically allocated PatternSource for the given + * format, using the given list of strings as the filenames to read + * from or as the sequences themselves (i.e. if -c was used). + */ +PatternSource* PatternSource::patsrcFromStrings( + const PatternParams& p, + const EList& qs, + size_t nthreads) +{ + switch(p.format) { + case FASTA: return new FastaPatternSource(qs, p); + case FASTA_CONT: return new FastaContinuousPatternSource(qs, p); + case RAW: return new RawPatternSource(qs, p); + case FASTQ: return new FastqPatternSource(qs, p); + case TAB_MATE5: return new TabbedPatternSource(qs, p, false); + case TAB_MATE6: return new TabbedPatternSource(qs, p, true); + case CMDLINE: return new VectorPatternSource(qs, p); + case QSEQ: return new QseqPatternSource(qs, p); +#ifdef USE_SRA + case SRA_FASTA: + case SRA_FASTQ: return new SRAPatternSource(qs, p, nthreads); +#endif + default: { + cerr << "Internal error; bad patsrc format: " << p.format << endl; + throw 1; + } + } +} + +/** + * The main member function for dispensing patterns. + * + * Returns true iff a pair was parsed succesfully. + */ +bool PatternSource::nextReadPair( + Read& ra, + Read& rb, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done, + bool& paired, + bool fixName) +{ + // nextPatternImpl does the reading from the ultimate source; + // it is implemented in concrete subclasses + success = done = paired = false; + nextReadPairImpl(ra, rb, rdid, endid, success, done, paired); + if(success) { + // Construct reversed versions of fw and rc seqs/quals + ra.finalize(); + if(!rb.empty()) { + rb.finalize(); + } + // Fill in the random-seed field using a combination of + // information from the user-specified seed and the read + // sequence, qualities, and name + ra.seed = genRandSeed(ra.patFw, ra.qual, ra.name, seed_); + if(!rb.empty()) { + rb.seed = genRandSeed(rb.patFw, rb.qual, rb.name, seed_); + } + } + return success; +} + +/** + * The main member function for dispensing patterns. + */ +bool PatternSource::nextRead( + Read& r, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done) +{ + // nextPatternImpl does the reading from the ultimate source; + // it is implemented in concrete subclasses + nextReadImpl(r, rdid, endid, success, done); + if(success) { + // Construct the reversed versions of the fw and rc seqs + // and quals + r.finalize(); + // Fill in the random-seed field using a combination of + // information from the user-specified seed and the read + // sequence, qualities, and name + r.seed = genRandSeed(r.patFw, r.qual, r.name, seed_); + } + return success; +} + +/** + * Get the next paired or unpaired read from the wrapped + * PairedPatternSource. + */ +bool WrappedPatternSourcePerThread::nextReadPair( + bool& success, + bool& done, + bool& paired, + bool fixName) +{ + PatternSourcePerThread::nextReadPair(success, done, paired, fixName); + ASSERT_ONLY(TReadId lastRdId = rdid_); + buf1_.reset(); + buf2_.reset(); + patsrc_.nextReadPair(buf1_, buf2_, rdid_, endid_, success, done, paired, fixName); + assert(!success || rdid_ != lastRdId); + return success; +} + +/** + * The main member function for dispensing pairs of reads or + * singleton reads. Returns true iff ra and rb contain a new + * pair; returns false if ra contains a new unpaired read. + */ +bool PairedSoloPatternSource::nextReadPair( + Read& ra, + Read& rb, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done, + bool& paired, + bool fixName) +{ + uint32_t cur = cur_; + success = false; + while(cur < src_->size()) { + // Patterns from srca_[cur_] are unpaired + do { + (*src_)[cur]->nextReadPair( + ra, rb, rdid, endid, success, done, paired, fixName); + } while(!success && !done); + if(!success) { + assert(done); + // If patFw is empty, that's our signal that the + // input dried up + lock(); + if(cur + 1 > cur_) cur_++; + cur = cur_; + unlock(); + continue; // on to next pair of PatternSources + } + assert(success); + ra.seed = genRandSeed(ra.patFw, ra.qual, ra.name, seed_); + if(!rb.empty()) { + rb.seed = genRandSeed(rb.patFw, rb.qual, rb.name, seed_); + if(fixName) { + ra.fixMateName(1); + rb.fixMateName(2); + } + } + ra.rdid = rdid; + ra.endid = endid; + if(!rb.empty()) { + rb.rdid = rdid; + rb.endid = endid+1; + } + ra.mate = 1; + rb.mate = 2; + return true; // paired + } + assert_leq(cur, src_->size()); + done = (cur == src_->size()); + return false; +} + +/** + * The main member function for dispensing pairs of reads or + * singleton reads. Returns true iff ra and rb contain a new + * pair; returns false if ra contains a new unpaired read. + */ +bool PairedDualPatternSource::nextReadPair( + Read& ra, + Read& rb, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done, + bool& paired, + bool fixName) +{ + // 'cur' indexes the current pair of PatternSources + uint32_t cur; + { + lock(); + cur = cur_; + unlock(); + } + success = false; + done = true; + while(cur < srca_->size()) { + if((*srcb_)[cur] == NULL) { + paired = false; + // Patterns from srca_ are unpaired + do { + (*srca_)[cur]->nextRead(ra, rdid, endid, success, done); + } while(!success && !done); + if(!success) { + assert(done); + lock(); + if(cur + 1 > cur_) cur_++; + cur = cur_; // Move on to next PatternSource + unlock(); + continue; // on to next pair of PatternSources + } + ra.rdid = rdid; + ra.endid = endid; + ra.mate = 0; + return success; + } else { + paired = true; + // Patterns from srca_[cur_] and srcb_[cur_] are paired + TReadId rdid_a = 0, endid_a = 0; + TReadId rdid_b = 0, endid_b = 0; + bool success_a = false, done_a = false; + bool success_b = false, done_b = false; + // Lock to ensure that this thread gets parallel reads + // in the two mate files + lock(); + do { + (*srca_)[cur]->nextRead(ra, rdid_a, endid_a, success_a, done_a); + } while(!success_a && !done_a); + do { + (*srcb_)[cur]->nextRead(rb, rdid_b, endid_b, success_b, done_b); + } while(!success_b && !done_b); + if(!success_a && success_b) { + cerr << "Error, fewer reads in file specified with -1 than in file specified with -2" << endl; + throw 1; + } else if(!success_a) { + assert(done_a && done_b); + if(cur + 1 > cur_) cur_++; + cur = cur_; // Move on to next PatternSource + unlock(); + continue; // on to next pair of PatternSources + } else if(!success_b) { + cerr << "Error, fewer reads in file specified with -2 than in file specified with -1" << endl; + throw 1; + } + assert_eq(rdid_a, rdid_b); + //assert_eq(endid_a+1, endid_b); + assert_eq(success_a, success_b); + unlock(); + if(fixName) { + ra.fixMateName(1); + rb.fixMateName(2); + } + rdid = rdid_a; + endid = endid_a; + success = success_a; + done = done_a; + ra.rdid = rdid; + ra.endid = endid; + if(!rb.empty()) { + rb.rdid = rdid; + rb.endid = endid+1; + } + ra.mate = 1; + rb.mate = 2; + return success; + } + } + return success; +} + +/** + * Return the number of reads attempted. + */ +pair PairedDualPatternSource::readCnt() const { + uint64_t rets = 0llu, retp = 0llu; + for(size_t i = 0; i < srca_->size(); i++) { + if((*srcb_)[i] == NULL) { + rets += (*srca_)[i]->readCnt(); + } else { + assert_eq((*srca_)[i]->readCnt(), (*srcb_)[i]->readCnt()); + retp += (*srca_)[i]->readCnt(); + } + } + return make_pair(rets, retp); +} + +/** + * Given the values for all of the various arguments used to specify + * the read and quality input, create a list of pattern sources to + * dispense them. + */ +PairedPatternSource* PairedPatternSource::setupPatternSources( + const EList& si, // singles, from argv + const EList& m1, // mate1's, from -1 arg + const EList& m2, // mate2's, from -2 arg + const EList& m12, // both mates on each line, from --12 arg +#ifdef USE_SRA + const EList& sra_accs, +#endif + const EList& q, // qualities associated with singles + const EList& q1, // qualities associated with m1 + const EList& q2, // qualities associated with m2 + const PatternParams& p, // read-in parameters + size_t nthreads, + bool verbose) // be talkative? +{ + EList* a = new EList(); + EList* b = new EList(); + EList* ab = new EList(); + // Create list of pattern sources for paired reads appearing + // interleaved in a single file + for(size_t i = 0; i < m12.size(); i++) { + const EList* qs = &m12; + EList tmp; + if(p.fileParallel) { + // Feed query files one to each PatternSource + qs = &tmp; + tmp.push_back(m12[i]); + assert_eq(1, tmp.size()); + } + ab->push_back(PatternSource::patsrcFromStrings(p, *qs, nthreads)); + if(!p.fileParallel) { + break; + } + } + +#ifdef USE_SRA + for(size_t i = 0; i < sra_accs.size(); i++) { + const EList* qs = &sra_accs; + EList tmp; + if(p.fileParallel) { + // Feed query files one to each PatternSource + qs = &tmp; + tmp.push_back(sra_accs[i]); + assert_eq(1, tmp.size()); + } + ab->push_back(PatternSource::patsrcFromStrings(p, *qs, nthreads)); + if(!p.fileParallel) { + break; + } + } +#endif + + // Create list of pattern sources for paired reads + for(size_t i = 0; i < m1.size(); i++) { + const EList* qs = &m1; + EList tmpSeq; + EList tmpQual; + if(p.fileParallel) { + // Feed query files one to each PatternSource + qs = &tmpSeq; + tmpSeq.push_back(m1[i]); + assert_eq(1, tmpSeq.size()); + } + + PatternSource *patsrc = PatternSource::patsrcFromStrings(p, *qs, nthreads); + patsrc->paired_type = 1; + a->push_back(patsrc); + + if(!p.fileParallel) { + break; + } + } + + // Create list of pattern sources for paired reads + for(size_t i = 0; i < m2.size(); i++) { + const EList* qs = &m2; + EList tmpSeq; + EList tmpQual; + if(p.fileParallel) { + // Feed query files one to each PatternSource + qs = &tmpSeq; + tmpSeq.push_back(m2[i]); + assert_eq(1, tmpSeq.size()); + } + PatternSource *patsrc = PatternSource::patsrcFromStrings(p, *qs, nthreads); + patsrc->paired_type = 2; + b->push_back(patsrc); + + + if(!p.fileParallel) { + break; + } + } + // All mates/mate files must be paired + assert_eq(a->size(), b->size()); + + // Create list of pattern sources for the unpaired reads + for(size_t i = 0; i < si.size(); i++) { + const EList* qs = &si; + PatternSource* patsrc = NULL; + EList tmpSeq; + EList tmpQual; + if(p.fileParallel) { + // Feed query files one to each PatternSource + qs = &tmpSeq; + tmpSeq.push_back(si[i]); + assert_eq(1, tmpSeq.size()); + } + patsrc = PatternSource::patsrcFromStrings(p, *qs, nthreads); + patsrc->paired_type = 1; + assert(patsrc != NULL); + a->push_back(patsrc); + b->push_back(NULL); + if(!p.fileParallel) { + break; + } + } + + PairedPatternSource *patsrc = NULL; +#ifdef USE_SRA + if(m12.size() > 0 || sra_accs.size() > 0) { +#else + if(m12.size() > 0) { +#endif + patsrc = new PairedSoloPatternSource(ab, p); + for(size_t i = 0; i < a->size(); i++) delete (*a)[i]; + for(size_t i = 0; i < b->size(); i++) delete (*b)[i]; + delete a; delete b; + } else { + patsrc = new PairedDualPatternSource(a, b, p); + for(size_t i = 0; i < ab->size(); i++) delete (*ab)[i]; + delete ab; + } + return patsrc; +} + +VectorPatternSource::VectorPatternSource( + const EList& v, + const PatternParams& p) : + PatternSource(p), + cur_(p.skip), + skip_(p.skip), + paired_(false), + v_(), + quals_() +{ + for(size_t i = 0; i < v.size(); i++) { + EList ss; + tokenize(v[i], ":", ss, 2); + assert_gt(ss.size(), 0); + assert_leq(ss.size(), 2); + // Initialize s + string s = ss[0]; + int mytrim5 = gTrim5; + if(gColor && s.length() > 1) { + // This may be a primer character. If so, keep it in the + // 'primer' field of the read buf and parse the rest of the + // read without it. + int c = toupper(s[0]); + if(asc2dnacat[c] > 0) { + // First char is a DNA char + int c2 = toupper(s[1]); + // Second char is a color char + if(asc2colcat[c2] > 0) { + mytrim5 += 2; // trim primer and first color + } + } + } + if(gColor) { + // Convert '0'-'3' to 'A'-'T' + for(size_t i = 0; i < s.length(); i++) { + if(s[i] >= '0' && s[i] <= '4') { + s[i] = "ACGTN"[(int)s[i] - '0']; + } + if(s[i] == '.') s[i] = 'N'; + } + } + if(s.length() <= (size_t)(gTrim3 + mytrim5)) { + // Entire read is trimmed away + s.clear(); + } else { + // Trim on 5' (high-quality) end + if(mytrim5 > 0) { + s.erase(0, mytrim5); + } + // Trim on 3' (low-quality) end + if(gTrim3 > 0) { + s.erase(s.length()-gTrim3); + } + } + // Initialize vq + string vq; + if(ss.size() == 2) { + vq = ss[1]; + } + // Trim qualities + if(vq.length() > (size_t)(gTrim3 + mytrim5)) { + // Trim on 5' (high-quality) end + if(mytrim5 > 0) { + vq.erase(0, mytrim5); + } + // Trim on 3' (low-quality) end + if(gTrim3 > 0) { + vq.erase(vq.length()-gTrim3); + } + } + // Pad quals with Is if necessary; this shouldn't happen + while(vq.length() < s.length()) { + vq.push_back('I'); + } + // Truncate quals to match length of read if necessary; + // this shouldn't happen + if(vq.length() > s.length()) { + vq.erase(s.length()); + } + assert_eq(vq.length(), s.length()); + v_.expand(); + v_.back().installChars(s); + quals_.push_back(BTString(vq)); + trimmed3_.push_back(gTrim3); + trimmed5_.push_back(mytrim5); + ostringstream os; + os << (names_.size()); + names_.push_back(BTString(os.str())); + } + assert_eq(v_.size(), quals_.size()); +} + +bool VectorPatternSource::nextReadImpl( + Read& r, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done) +{ + // Let Strings begin at the beginning of the respective bufs + r.reset(); + lock(); + if(cur_ >= v_.size()) { + unlock(); + // Clear all the Strings, as a signal to the caller that + // we're out of reads + r.reset(); + success = false; + done = true; + assert(r.empty()); + return false; + } + // Copy v_*, quals_* strings into the respective Strings + r.color = gColor; + r.patFw = v_[cur_]; + r.patFw_3N = v_[cur_]; + r.qual = quals_[cur_]; + r.trimmed3 = trimmed3_[cur_]; + r.trimmed5 = trimmed5_[cur_]; + ostringstream os; + os << cur_; + r.name = os.str(); + cur_++; + done = cur_ == v_.size(); + rdid = endid = readCnt_; + readCnt_++; + unlock(); + success = true; + return true; +} + +/** + * This is unused, but implementation is given for completeness. + */ +bool VectorPatternSource::nextReadPairImpl( + Read& ra, + Read& rb, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done, + bool& paired) +{ + // Let Strings begin at the beginning of the respective bufs + ra.reset(); + rb.reset(); + paired = true; + if(!paired_) { + paired_ = true; + cur_ <<= 1; + } + lock(); + if(cur_ >= v_.size()-1) { + unlock(); + // Clear all the Strings, as a signal to the caller that + // we're out of reads + ra.reset(); + rb.reset(); + assert(ra.empty()); + assert(rb.empty()); + success = false; + done = true; + return false; + } + // Copy v_*, quals_* strings into the respective Strings + ra.patFw = v_[cur_]; + ra.qual = quals_[cur_]; + ra.trimmed3 = trimmed3_[cur_]; + ra.trimmed5 = trimmed5_[cur_]; + cur_++; + rb.patFw = v_[cur_]; + rb.qual = quals_[cur_]; + rb.trimmed3 = trimmed3_[cur_]; + rb.trimmed5 = trimmed5_[cur_]; + ostringstream os; + os << readCnt_; + ra.name = os.str(); + rb.name = os.str(); + ra.color = rb.color = gColor; + cur_++; + done = cur_ >= v_.size()-1; + rdid = endid = readCnt_; + readCnt_++; + unlock(); + success = true; + return true; +} + +/** + * Parse a single quality string from fb and store qualities in r. + * Assume the next character obtained via fb.get() is the first + * character of the quality string. When returning, the next + * character returned by fb.peek() or fb.get() should be the first + * character of the following line. + */ +int parseQuals( + Read& r, + FileBuf& fb, + int firstc, + int readLen, + int trim3, + int trim5, + bool intQuals, + bool phred64, + bool solexa64) +{ + int c = firstc; + assert(c != '\n' && c != '\r'); + r.qual.clear(); + if (intQuals) { + while (c != '\r' && c != '\n' && c != -1) { + bool neg = false; + int num = 0; + while(!isspace(c) && !fb.eof()) { + if(c == '-') { + neg = true; + assert_eq(num, 0); + } else { + if(!isdigit(c)) { + char buf[2048]; + cerr << "Warning: could not parse quality line:" << endl; + fb.getPastNewline(); + cerr << fb.copyLastN(buf); + buf[2047] = '\0'; + cerr << buf; + throw 1; + } + assert(isdigit(c)); + num *= 10; + num += (c - '0'); + } + c = fb.get(); + } + if(neg) num = 0; + // Phred-33 ASCII encode it and add it to the back of the + // quality string + r.qual.append('!' + num); + // Skip over next stretch of whitespace + while(c != '\r' && c != '\n' && isspace(c) && !fb.eof()) { + c = fb.get(); + } + } + } else { + while (c != '\r' && c != '\n' && c != -1) { + r.qual.append(charToPhred33(c, solexa64, phred64)); + c = fb.get(); + while(c != '\r' && c != '\n' && isspace(c) && !fb.eof()) { + c = fb.get(); + } + } + } + if ((int)r.qual.length() < readLen-1 || + ((int)r.qual.length() < readLen && !r.color)) + { + tooFewQualities(r.name); + } + r.qual.trimEnd(trim3); + if(r.qual.length()-trim5 < r.patFw.length()) { + assert(gColor && r.primer != -1); + assert_gt(trim5, 0); + trim5--; + } + r.qual.trimBegin(trim5); + if(r.qual.length() <= 0) return 0; + assert_eq(r.qual.length(), r.patFw.length()); + while(fb.peek() == '\n' || fb.peek() == '\r') fb.get(); + return (int)r.qual.length(); +} + +/// Read another pattern from a FASTA input file +bool FastaPatternSource::read( + Read& r, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done) +{ + int c, qc = 0; + success = true; + done = false; + assert(fb_.isOpen()); + r.reset(); + r.color = gColor; + // Pick off the first carat + c = fb_.get(); + if(c < 0) { + bail(r); success = false; done = true; return success; + } + while(c == '#' || c == ';' || c == '\r' || c == '\n') { + c = fb_.peekUptoNewline(); + fb_.resetLastN(); + c = fb_.get(); + } + assert_eq(1, fb_.lastNLen()); + + // Pick off the first carat + if(first_) { + if(c != '>') { + cerr << "Error: reads file does not look like a FASTA file" << endl; + throw 1; + } + first_ = false; + } + assert_eq('>', c); + c = fb_.get(); // get next char after '>' + + // Read to the end of the id line, sticking everything after the '>' + // into *name + //bool warning = false; + while(true) { + if(c < 0 || qc < 0) { + bail(r); success = false; done = true; return success; + } + if(c == '\n' || c == '\r') { + // Break at end of line, after consuming all \r's, \n's + while(c == '\n' || c == '\r') { + if(fb_.peek() == '>') { + // Empty sequence + break; + } + c = fb_.get(); + if(c < 0 || qc < 0) { + bail(r); success = false; done = true; return success; + } + } + break; + } + r.name.append(c); + if(fb_.peek() == '>') { + // Empty sequence + break; + } + c = fb_.get(); + } + if(c == '>') { + // Empty sequences! + cerr << "Warning: skipping empty FASTA read with name '" << r.name << "'" << endl; + fb_.resetLastN(); + rdid = endid = readCnt_; + readCnt_++; + success = true; done = false; return success; + } + assert_neq('>', c); + + // _in now points just past the first character of a sequence + // line, and c holds the first character + int begin = 0; + int mytrim5 = gTrim5; + if(gColor) { + // This is the primer character, keep it in the + // 'primer' field of the read buf and keep parsing + c = toupper(c); + if(asc2dnacat[c] > 0) { + // First char is a DNA char + int c2 = toupper(fb_.peek()); + if(asc2colcat[c2] > 0) { + // Second char is a color char + r.primer = c; + r.trimc = c2; + mytrim5 += 2; + } + } + if(c < 0) { + bail(r); success = false; done = true; return success; + } + } + while(c != '>' && c >= 0) { + if(gColor) { + if(c >= '0' && c <= '4') c = "ACGTN"[(int)c - '0']; + if(c == '.') c = 'N'; + } + if(asc2dnacat[c] > 0 && begin++ >= mytrim5) { + r.patFw.append(asc2dna_3N[0][c]); + if (threeN) { + r.patFw_3N.append(asc2dna_3N[1][c]); + r.originalFw.append((asc2dna[c])); + } + r.qual.append('I'); + } + if(fb_.peek() == '>') break; + c = fb_.get(); + } + + r.patFw.trimEnd(gTrim3); + if (threeN) r.patFw_3N.trimEnd(gTrim3); + r.qual.trimEnd(gTrim3); + r.trimmed3 = gTrim3; + r.trimmed5 = mytrim5; + // Set up a default name if one hasn't been set + if(r.name.empty()) { + char cbuf[20]; + itoa10(readCnt_, cbuf); + r.name.install(cbuf); + } + assert_gt(r.name.length(), 0); + r.readOrigBuf.install(fb_.lastN(), fb_.lastNLen()); + fb_.resetLastN(); + rdid = endid = readCnt_; + readCnt_++; + return success; +} + +/// Read another pattern from a FASTQ input file +bool FastqPatternSource::read( + Read& r, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done) +{ + int c; + int dstLen = 0; + success = true; + done = false; + r.reset(); + r.color = gColor; + r.fuzzy = fuzzy_; + // Pick off the first at + if(first_) { + c = fb_.get(); + if(c != '@') { + c = getOverNewline(fb_); + if(c < 0) { + bail(r); success = false; done = true; return success; + } + } + if(c != '@') { + cerr << "Error: reads file does not look like a FASTQ file" << endl; + throw 1; + } + assert_eq('@', c); + first_ = false; + } + + // Read to the end of the id line, sticking everything after the '@' + // into *name + while(true) { + c = fb_.get(); + if(c < 0) { + bail(r); success = false; done = true; return success; + } + if(c == '\n' || c == '\r') { + // Break at end of line, after consuming all \r's, \n's + while(c == '\n' || c == '\r') { + c = fb_.get(); + if(c < 0) { + bail(r); success = false; done = true; return success; + } + } + break; + } + r.name.append(c); + } + // fb_ now points just past the first character of a + // sequence line, and c holds the first character + int charsRead = 0; + BTDnaString *sbuf = &r.patFw; + int dstLens[] = {0, 0, 0, 0}; + int *dstLenCur = &dstLens[0]; + int mytrim5 = gTrim5; + int altBufIdx = 0; + if(gColor && c != '+') { + // This may be a primer character. If so, keep it in the + // 'primer' field of the read buf and parse the rest of the + // read without it. + c = toupper(c); + if(asc2dnacat[c] > 0) { + // First char is a DNA char + int c2 = toupper(fb_.peek()); + // Second char is a color char + if(asc2colcat[c2] > 0) { + r.primer = c; + r.trimc = c2; + mytrim5 += 2; // trim primer and first color + } + } + if(c < 0) { + bail(r); success = false; done = true; return success; + } + } + int trim5 = 0; + if(c != '+') { + trim5 = mytrim5; + while(c != '+') { + // Convert color numbers to letters if necessary + if(c == '.') c = 'N'; + if(gColor) { + if(c >= '0' && c <= '4') c = "ACGTN"[(int)c - '0']; + } + if(fuzzy_ && c == '-') c = 'A'; + if(isalpha(c)) { + // If it's past the 5'-end trim point + if(charsRead >= trim5) { + r.patFw.append(asc2dna_3N[0][c]); + if (threeN) { + r.patFw_3N.append(asc2dna_3N[1][c]); + r.originalFw.append((asc2dna[c])); + } + (*dstLenCur)++; + } + charsRead++; + } else if(fuzzy_ && c == ' ') { + trim5 = 0; // disable 5' trimming for now + if(charsRead == 0) { + c = fb_.get(); + continue; + } + charsRead = 0; + if(altBufIdx >= 3) { + cerr << "At most 3 alternate sequence strings permitted; offending read: " << r.name << endl; + throw 1; + } + // Move on to the next alternate-sequence buffer + sbuf = &r.altPatFw[altBufIdx++]; + dstLenCur = &dstLens[altBufIdx]; + } + c = fb_.get(); + if(c < 0) { + bail(r); success = false; done = true; return success; + } + } + dstLen = dstLens[0]; + charsRead = dstLen + mytrim5; + } + // Trim from 3' end + if(gTrim3 > 0) { + if((int)r.patFw.length() > gTrim3) { + r.patFw.resize(r.patFw.length() - gTrim3); + dstLen -= gTrim3; + assert_eq((int)r.patFw.length(), dstLen); + } else { + // Trimmed the whole read; we won't be using this read, + // but we proceed anyway so that fb_ is advanced + // properly + r.patFw.clear(); + dstLen = 0; + } + } + assert_eq('+', c); + + // Chew up the optional name on the '+' line + ASSERT_ONLY(int pk =) peekToEndOfLine(fb_); + if(charsRead == 0) { + assert_eq('@', pk); + fb_.get(); + fb_.resetLastN(); + rdid = endid = readCnt_; + readCnt_++; + return success; + } + + // Now read the qualities + if (intQuals_) { + assert(!fuzzy_); + int qualsRead = 0; + char buf[4096]; + if(gColor && r.primer != -1) { + // In case the original quality string is one shorter + mytrim5--; + } + qualToks_.clear(); + tokenizeQualLine(fb_, buf, 4096, qualToks_); + for(unsigned int j = 0; j < qualToks_.size(); ++j) { + char c = intToPhred33(atoi(qualToks_[j].c_str()), solQuals_); + assert_geq(c, 33); + if (qualsRead >= mytrim5) { + r.qual.append(c); + } + ++qualsRead; + } // done reading integer quality lines + if(gColor && r.primer != -1) mytrim5++; + r.qual.trimEnd(gTrim3); + if(r.qual.length() < r.patFw.length()) { + tooFewQualities(r.name); + } else if(r.qual.length() > r.patFw.length() + 1) { + tooManyQualities(r.name); + } + if(r.qual.length() == r.patFw.length()+1 && gColor && r.primer != -1) { + r.qual.remove(0); + } + // Trim qualities on 3' end + if(r.qual.length() > r.patFw.length()) { + r.qual.resize(r.patFw.length()); + assert_eq((int)r.qual.length(), dstLen); + } + peekOverNewline(fb_); + } else { + // Non-integer qualities + altBufIdx = 0; + trim5 = mytrim5; + int qualsRead[4] = {0, 0, 0, 0}; + int *qualsReadCur = &qualsRead[0]; + BTString *qbuf = &r.qual; + if(gColor && r.primer != -1) { + // In case the original quality string is one shorter + trim5--; + } + while(true) { + c = fb_.get(); + if (!fuzzy_ && c == ' ') { + wrongQualityFormat(r.name); + } else if(c == ' ') { + trim5 = 0; // disable 5' trimming for now + if((*qualsReadCur) == 0) continue; + if(altBufIdx >= 3) { + cerr << "At most 3 alternate quality strings permitted; offending read: " << r.name << endl; + throw 1; + } + qbuf = &r.altQual[altBufIdx++]; + qualsReadCur = &qualsRead[altBufIdx]; + continue; + } + if(c < 0) { + break; // let the file end just at the end of a quality line + //bail(r); success = false; done = true; return success; + } + if (c != '\r' && c != '\n') { + if (*qualsReadCur >= trim5) { + c = charToPhred33(c, solQuals_, phred64Quals_); + assert_geq(c, 33); + qbuf->append(c); + } + (*qualsReadCur)++; + } else { + break; + } + } + qualsRead[0] -= gTrim3; + r.qual.trimEnd(gTrim3); + if(r.qual.length() < r.patFw.length()) { + tooFewQualities(r.name); + } else if(r.qual.length() > r.patFw.length()+1) { + tooManyQualities(r.name); + } + if(r.qual.length() == r.patFw.length()+1 && gColor && r.primer != -1) { + r.qual.remove(0); + } + + if(fuzzy_) { + // Trim from 3' end of alternate basecall and quality strings + if(gTrim3 > 0) { + for(int i = 0; i < 3; i++) { + assert_eq(r.altQual[i].length(), r.altPatFw[i].length()); + if((int)r.altQual[i].length() > gTrim3) { + r.altPatFw[i].resize(gTrim3); + r.altQual[i].resize(gTrim3); + } else { + r.altPatFw[i].clear(); + r.altQual[i].clear(); + } + qualsRead[i+1] = dstLens[i+1] = + max(0, dstLens[i+1] - gTrim3); + } + } + // Shift to RHS, and install in Strings + assert_eq(0, r.alts); + for(int i = 1; i < 4; i++) { + if(qualsRead[i] == 0) continue; + if(qualsRead[i] > dstLen) { + // Shift everybody up + int shiftAmt = qualsRead[i] - dstLen; + for(int j = 0; j < dstLen; j++) { + r.altQual[i-1].set(r.altQual[i-1][j+shiftAmt], j); + r.altPatFw[i-1].set(r.altPatFw[i-1][j+shiftAmt], j); + } + r.altQual[i-1].resize(dstLen); + r.altPatFw[i-1].resize(dstLen); + } else if (qualsRead[i] < dstLen) { + r.altQual[i-1].resize(dstLen); + r.altPatFw[i-1].resize(dstLen); + // Shift everybody down + int shiftAmt = dstLen - qualsRead[i]; + for(int j = dstLen-1; j >= shiftAmt; j--) { + r.altQual[i-1].set(r.altQual[i-1][j-shiftAmt], j); + r.altPatFw[i-1].set(r.altPatFw[i-1][j-shiftAmt], j); + } + // Fill in unset positions + for(int j = 0; j < shiftAmt; j++) { + // '!' - indicates no alternate basecall at + // this position + r.altQual[i-1].set(33, j); + } + } + r.alts++; + } + } + + if(c == '\r' || c == '\n') { + c = peekOverNewline(fb_); + } else { + c = peekToEndOfLine(fb_); + } + } + r.readOrigBuf.install(fb_.lastN(), fb_.lastNLen()); + fb_.resetLastN(); + + c = fb_.get(); + // Should either be at end of file or at beginning of next record + assert(c == -1 || c == '@'); + + // Set up a default name if one hasn't been set + if(r.name.empty()) { + char cbuf[20]; + itoa10(readCnt_, cbuf); + r.name.install(cbuf); + } + r.trimmed3 = gTrim3; + r.trimmed5 = mytrim5; + rdid = endid = readCnt_; + readCnt_++; + return success; +} + +/// Read another pattern from a FASTA input file +bool TabbedPatternSource::read( + Read& r, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done) +{ + r.reset(); + r.color = gColor; + success = true; + done = false; + // fb_ is about to dish out the first character of the + // name field + if(parseName(r, NULL, '\t') == -1) { + peekOverNewline(fb_); // skip rest of line + r.reset(); + success = false; + done = true; + return false; + } + assert_neq('\t', fb_.peek()); + + // fb_ is about to dish out the first character of the + // sequence field + int charsRead = 0; + int mytrim5 = gTrim5; + int dstLen = parseSeq(r, charsRead, mytrim5, '\t'); + assert_neq('\t', fb_.peek()); + if(dstLen < 0) { + peekOverNewline(fb_); // skip rest of line + r.reset(); + success = false; + done = true; + return false; + } + + // fb_ is about to dish out the first character of the + // quality-string field + char ct = 0; + if(parseQuals(r, charsRead, dstLen, mytrim5, ct, '\n') < 0) { + peekOverNewline(fb_); // skip rest of line + r.reset(); + success = false; + done = true; + return false; + } + r.trimmed3 = gTrim3; + r.trimmed5 = mytrim5; + assert_eq(ct, '\n'); + assert_neq('\n', fb_.peek()); + r.readOrigBuf.install(fb_.lastN(), fb_.lastNLen()); + fb_.resetLastN(); + rdid = endid = readCnt_; + readCnt_++; + return true; +} + +/// Read another pair of patterns from a FASTA input file +bool TabbedPatternSource::readPair( + Read& ra, + Read& rb, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done, + bool& paired) +{ + success = true; + done = false; + + // Skip over initial vertical whitespace + if(fb_.peek() == '\r' || fb_.peek() == '\n') { + fb_.peekUptoNewline(); + fb_.resetLastN(); + } + + // fb_ is about to dish out the first character of the + // name field + int mytrim5_1 = gTrim5; + if(parseName(ra, &rb, '\t') == -1) { + peekOverNewline(fb_); // skip rest of line + ra.reset(); + rb.reset(); + fb_.resetLastN(); + success = false; + done = true; + return false; + } + assert_neq('\t', fb_.peek()); + + // fb_ is about to dish out the first character of the + // sequence field for the first mate + int charsRead1 = 0; + int dstLen1 = parseSeq(ra, charsRead1, mytrim5_1, '\t'); + if(dstLen1 < 0) { + peekOverNewline(fb_); // skip rest of line + ra.reset(); + rb.reset(); + fb_.resetLastN(); + success = false; + done = true; + return false; + } + assert_neq('\t', fb_.peek()); + + // fb_ is about to dish out the first character of the + // quality-string field + char ct = 0; + if(parseQuals(ra, charsRead1, dstLen1, mytrim5_1, ct, '\t', '\n') < 0) { + peekOverNewline(fb_); // skip rest of line + ra.reset(); + rb.reset(); + fb_.resetLastN(); + success = false; + done = true; + return false; + } + ra.trimmed3 = gTrim3; + ra.trimmed5 = mytrim5_1; + assert(ct == '\t' || ct == '\n' || ct == '\r' || ct == -1); + if(ct == '\r' || ct == '\n' || ct == -1) { + // Only had 3 fields prior to newline, so this must be an unpaired read + rb.reset(); + ra.readOrigBuf.install(fb_.lastN(), fb_.lastNLen()); + fb_.resetLastN(); + success = true; + done = false; + paired = false; + rdid = endid = readCnt_; + readCnt_++; + return success; + } + paired = true; + assert_neq('\t', fb_.peek()); + + // Saw another tab after the third field, so this must be a pair + if(secondName_) { + // The second mate has its own name + if(parseName(rb, NULL, '\t') == -1) { + peekOverNewline(fb_); // skip rest of line + ra.reset(); + rb.reset(); + fb_.resetLastN(); + success = false; + done = true; + return false; + } + assert_neq('\t', fb_.peek()); + } + + // fb_ about to give the first character of the second mate's sequence + int charsRead2 = 0; + int mytrim5_2 = gTrim5; + int dstLen2 = parseSeq(rb, charsRead2, mytrim5_2, '\t'); + if(dstLen2 < 0) { + peekOverNewline(fb_); // skip rest of line + ra.reset(); + rb.reset(); + fb_.resetLastN(); + success = false; + done = true; + return false; + } + assert_neq('\t', fb_.peek()); + + // fb_ is about to dish out the first character of the + // quality-string field + if(parseQuals(rb, charsRead2, dstLen2, mytrim5_2, ct, '\n') < 0) { + peekOverNewline(fb_); // skip rest of line + ra.reset(); + rb.reset(); + fb_.resetLastN(); + success = false; + done = true; + return false; + } + ra.readOrigBuf.install(fb_.lastN(), fb_.lastNLen()); + fb_.resetLastN(); + rb.trimmed3 = gTrim3; + rb.trimmed5 = mytrim5_2; + rdid = endid = readCnt_; + readCnt_++; + return true; +} + +/** + * Parse a name from fb_ and store in r. Assume that the next + * character obtained via fb_.get() is the first character of + * the sequence and the string stops at the next char upto (could + * be tab, newline, etc.). + */ +int TabbedPatternSource::parseName( + Read& r, + Read* r2, + char upto /* = '\t' */) +{ + // Read the name out of the first field + int c = 0; + if(r2 != NULL) r2->name.clear(); + r.name.clear(); + while(true) { + if((c = fb_.get()) < 0) { + return -1; + } + if(c == upto) { + // Finished with first field + break; + } + if(c == '\n' || c == '\r') { + return -1; + } + if(r2 != NULL) r2->name.append(c); + r.name.append(c); + } + // Set up a default name if one hasn't been set + if(r.name.empty()) { + char cbuf[20]; + itoa10(readCnt_, cbuf); + r.name.install(cbuf); + if(r2 != NULL) r2->name.install(cbuf); + } + return (int)r.name.length(); +} + +/** + * Parse a single sequence from fb_ and store in r. Assume + * that the next character obtained via fb_.get() is the first + * character of the sequence and the sequence stops at the next + * char upto (could be tab, newline, etc.). + */ +int TabbedPatternSource::parseSeq( + Read& r, + int& charsRead, + int& trim5, + char upto /*= '\t'*/) +{ + int begin = 0; + int c = fb_.get(); + assert(c != upto); + r.patFw.clear(); + r.color = gColor; + if(gColor) { + // This may be a primer character. If so, keep it in the + // 'primer' field of the read buf and parse the rest of the + // read without it. + c = toupper(c); + if(asc2dnacat[c] > 0) { + // First char is a DNA char + int c2 = toupper(fb_.peek()); + // Second char is a color char + if(asc2colcat[c2] > 0) { + r.primer = c; + r.trimc = c2; + trim5 += 2; // trim primer and first color + } + } + if(c < 0) { return -1; } + } + while(c != upto) { + if(gColor) { + if(c >= '0' && c <= '4') c = "ACGTN"[(int)c - '0']; + if(c == '.') c = 'N'; + } + if(isalpha(c)) { + assert_in(toupper(c), "ACGTN"); + if(begin++ >= trim5) { + assert_neq(0, asc2dnacat[c]); + r.patFw.append(asc2dna_3N[0][c]); + if (threeN) { + r.patFw_3N.append(asc2dna_3N[1][c]); + r.originalFw.append((asc2dna[c])); + } + } + charsRead++; + } + if((c = fb_.get()) < 0) { + return -1; + } + } + r.patFw.trimEnd(gTrim3); + if (threeN) r.patFw_3N.trimEnd(gTrim3); + return (int)r.patFw.length(); +} + +/** + * Parse a single quality string from fb_ and store in r. + * Assume that the next character obtained via fb_.get() is + * the first character of the quality string and the string stops + * at the next char upto (could be tab, newline, etc.). + */ +int TabbedPatternSource::parseQuals( + Read& r, + int charsRead, + int dstLen, + int trim5, + char& c2, + char upto /*= '\t'*/, + char upto2 /*= -1*/) +{ + int qualsRead = 0; + int c = 0; + if (intQuals_) { + char buf[4096]; + while (qualsRead < charsRead) { + qualToks_.clear(); + if(!tokenizeQualLine(fb_, buf, 4096, qualToks_)) break; + for (unsigned int j = 0; j < qualToks_.size(); ++j) { + char c = intToPhred33(atoi(qualToks_[j].c_str()), solQuals_); + assert_geq(c, 33); + if (qualsRead >= trim5) { + r.qual.append(c); + } + ++qualsRead; + } + } // done reading integer quality lines + if (charsRead > qualsRead) tooFewQualities(r.name); + } else { + // Non-integer qualities + while((qualsRead < dstLen + trim5) && c >= 0) { + c = fb_.get(); + c2 = c; + if (c == ' ') wrongQualityFormat(r.name); + if(c < 0) { + // EOF occurred in the middle of a read - abort + return -1; + } + if(!isspace(c) && c != upto && (upto2 == -1 || c != upto2)) { + if (qualsRead >= trim5) { + c = charToPhred33(c, solQuals_, phred64Quals_); + assert_geq(c, 33); + r.qual.append(c); + } + qualsRead++; + } else { + break; + } + } + if(qualsRead < dstLen + trim5) { + tooFewQualities(r.name); + } else if(qualsRead > dstLen + trim5) { + tooManyQualities(r.name); + } + } + r.qual.resize(dstLen); + while(c != upto && (upto2 == -1 || c != upto2) && c != -1) { + c = fb_.get(); + c2 = c; + } + return qualsRead; +} + +void wrongQualityFormat(const BTString& read_name) { + cerr << "Error: Encountered one or more spaces while parsing the quality " + << "string for read " << read_name << ". If this is a FASTQ file " + << "with integer (non-ASCII-encoded) qualities, try re-running with " + << "the --integer-quals option." << endl; + throw 1; +} + +void tooFewQualities(const BTString& read_name) { + cerr << "Error: Read " << read_name << " has more read characters than " + << "quality values." << endl; + throw 1; +} + +void tooManyQualities(const BTString& read_name) { + cerr << "Error: Read " << read_name << " has more quality values than read " + << "characters." << endl; + throw 1; +} + +#ifdef USE_SRA + +struct SRA_Read { + SStringExpandable name; // read name + SDnaStringExpandable<128, 2> patFw; // forward-strand sequence + SStringExpandable qual; // quality values + + void reset() { + name.clear(); + patFw.clear(); + qual.clear(); + } +}; + +static const uint64_t buffer_size_per_thread = 4096; + +struct SRA_Data { + uint64_t read_pos; + uint64_t write_pos; + uint64_t buffer_size; + bool done; + EList > paired_reads; + + ngs::ReadIterator* sra_it; + + SRA_Data() { + read_pos = 0; + write_pos = 0; + buffer_size = buffer_size_per_thread; + done = false; + sra_it = NULL; + } + + bool isFull() { + assert_leq(read_pos, write_pos); + assert_geq(read_pos + buffer_size, write_pos); + return read_pos + buffer_size <= write_pos; + } + + bool isEmpty() { + assert_leq(read_pos, write_pos); + assert_geq(read_pos + buffer_size, write_pos); + return read_pos == write_pos; + } + + pair& getPairForRead() { + assert(!isEmpty()); + return paired_reads[read_pos % buffer_size]; + } + + pair& getPairForWrite() { + assert(!isFull()); + return paired_reads[write_pos % buffer_size]; + } + + void advanceReadPos() { + assert(!isEmpty()); + read_pos++; + } + + void advanceWritePos() { + assert(!isFull()); + write_pos++; + } +}; + +static void SRA_IO_Worker(void *vp) +{ + SRA_Data* sra_data = (SRA_Data*)vp; + assert(sra_data != NULL); + ngs::ReadIterator* sra_it = sra_data->sra_it; + assert(sra_it != NULL); + + while(!sra_data->done) { + while(sra_data->isFull()) { +#if defined(_TTHREAD_WIN32_) + Sleep(1); +#elif defined(_TTHREAD_POSIX_) + const static timespec ts = {0, 1000000}; // 1 millisecond + nanosleep(&ts, NULL); +#endif + } + pair& pair = sra_data->getPairForWrite(); + SRA_Read& ra = pair.first; + SRA_Read& rb = pair.second; + bool exception_thrown = false; + try { + if(!sra_it->nextRead() || !sra_it->nextFragment()) { + ra.reset(); + rb.reset(); + sra_data->done = true; + return; + } + + // Read the name out of the first field + ngs::StringRef rname = sra_it->getReadId(); + ra.name.install(rname.data(), rname.size()); + assert(!ra.name.empty()); + + ngs::StringRef ra_seq = sra_it->getFragmentBases(); + if(gTrim5 + gTrim3 < (int)ra_seq.size()) { + ra.patFw.installChars(ra_seq.data() + gTrim5, ra_seq.size() - gTrim5 - gTrim3); + } + ngs::StringRef ra_qual = sra_it->getFragmentQualities(); + if(ra_seq.size() == ra_qual.size() && gTrim5 + gTrim3 < (int)ra_qual.size()) { + ra.qual.install(ra_qual.data() + gTrim5, ra_qual.size() - gTrim5 - gTrim3); + } else { + ra.qual.resize(ra.patFw.length()); + ra.qual.fill('I'); + } + assert_eq(ra.patFw.length(), ra.qual.length()); + + if(!sra_it->nextFragment()) { + rb.reset(); + } else { + // rb.name = ra.name; + ngs::StringRef rb_seq = sra_it->getFragmentBases(); + if(gTrim5 + gTrim3 < (int)rb_seq.size()) { + rb.patFw.installChars(rb_seq.data() + gTrim5, rb_seq.size() - gTrim5 - gTrim3); + } + ngs::StringRef rb_qual = sra_it->getFragmentQualities(); + if(rb_seq.size() == rb_qual.size() && gTrim5 + gTrim3 < (int)rb_qual.size()) { + rb.qual.install(rb_qual.data() + gTrim5, rb_qual.size() - gTrim5 - gTrim3); + } else { + rb.qual.resize(rb.patFw.length()); + rb.qual.fill('I'); + } + assert_eq(rb.patFw.length(), rb.qual.length()); + } + sra_data->advanceWritePos(); + } catch(ngs::ErrorMsg & x) { + cerr << x.toString () << endl; + exception_thrown = true; + } catch(exception & x) { + cerr << x.what () << endl; + exception_thrown = true; + } catch(...) { + cerr << "unknown exception\n"; + exception_thrown = true; + } + + if(exception_thrown) { + ra.reset(); + rb.reset(); + sra_data->done = true; + cerr << "An error happened while fetching SRA reads. Please rerun HISAT2. You may want to disable the SRA cache if you didn't (see the instructions at https://github.com/ncbi/sra-tools/wiki/Toolkit-Configuration).\n"; + exit(1); + } + } +} + +SRAPatternSource::~SRAPatternSource() { + if(io_thread_) delete io_thread_; + if(sra_data_) delete sra_data_; + if(sra_it_) delete sra_it_; + if(sra_run_) delete sra_run_; +} + +/// Read another pair of patterns from a FASTA input file +bool SRAPatternSource::readPair( + Read& ra, + Read& rb, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done, + bool& paired) +{ + assert(sra_run_ != NULL && sra_it_ != NULL); + success = true; + done = false; + while(sra_data_->isEmpty()) { + if(sra_data_->done && sra_data_->isEmpty()) { + ra.reset(); + rb.reset(); + success = false; + done = true; + return false; + } + +#if defined(_TTHREAD_WIN32_) + Sleep(1); +#elif defined(_TTHREAD_POSIX_) + const static timespec ts = {0, 1000000}; // 1 millisecond + nanosleep(&ts, NULL); +#endif + } + + pair& pair = sra_data_->getPairForRead(); + ra.name.install(pair.first.name.buf(), pair.first.name.length()); + ra.patFw.install(pair.first.patFw.buf(), pair.first.patFw.length()); + ra.qual.install(pair.first.qual.buf(), pair.first.qual.length()); + ra.trimmed3 = gTrim3; + ra.trimmed5 = gTrim5; + if(pair.second.patFw.length() > 0) { + rb.name.install(pair.first.name.buf(), pair.first.name.length()); + rb.patFw.install(pair.second.patFw.buf(), pair.second.patFw.length()); + rb.qual.install(pair.second.qual.buf(), pair.second.qual.length()); + rb.trimmed3 = gTrim3; + rb.trimmed5 = gTrim5; + paired = true; + } else { + rb.reset(); + } + sra_data_->advanceReadPos(); + + rdid = endid = readCnt_; + readCnt_++; + + return true; +} + +void SRAPatternSource::open() { + string version = "hisat2-"; + version += HISAT2_VERSION; + ncbi::NGS::setAppVersionString(version.c_str()); + assert(!sra_accs_.empty()); + while(sra_acc_cur_ < sra_accs_.size()) { + // Open read + if(sra_it_) { + delete sra_it_; + sra_it_ = NULL; + } + if(sra_run_) { + delete sra_run_; + sra_run_ = NULL; + } + try { + // open requested accession using SRA implementation of the API + sra_run_ = new ngs::ReadCollection(ncbi::NGS::openReadCollection(sra_accs_[sra_acc_cur_])); + +#if 0 + string run_name = sra_run_->getName(); + cerr << " ReadGroups for " << run_name << endl; + + ngs::ReadGroupIterator it = sra_run_->getReadGroups(); + do { + ngs::Statistics s = it.getStatistics(); + cerr << "Statistics for group <" << it.getName() << ">" << endl; + + // for(string p = s.nextPath(""); p != ""; p = s.nextPath(p)){ + // System.out.println("\t"+p+": "+s.getAsString(p)); + } while(it.nextReadGroup()); + exit(1); +#endif + + // compute window to iterate through + size_t MAX_ROW = sra_run_->getReadCount(); + sra_it_ = new ngs::ReadIterator(sra_run_->getReadRange(1, MAX_ROW, ngs::Read::all)); + + // create a buffer for SRA data + sra_data_ = new SRA_Data; + sra_data_->sra_it = sra_it_; + sra_data_->buffer_size = nthreads_ * buffer_size_per_thread; + sra_data_->paired_reads.resize(sra_data_->buffer_size); + + // create a thread for handling SRA data access + io_thread_ = new tthread::thread(SRA_IO_Worker, (void*)sra_data_); + // io_thread_->join(); + } catch(...) { + if(!errs_[sra_acc_cur_]) { + cerr << "Warning: Could not access \"" << sra_accs_[sra_acc_cur_].c_str() << "\" for reading; skipping..." << endl; + errs_[sra_acc_cur_] = true; + } + sra_acc_cur_++; + continue; + } + return; + } + cerr << "Error: No input SRA accessions were valid" << endl; + exit(1); + return; +} + +#endif diff --git a/pat.h b/pat.h new file mode 100644 index 0000000..b257663 --- /dev/null +++ b/pat.h @@ -0,0 +1,1800 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef PAT_H_ +#define PAT_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "alphabet.h" +#include "assert_helpers.h" +#include "tokenize.h" +#include "random_source.h" +#include "threading.h" +#include "filebuf.h" +#include "qual.h" +#include "search_globals.h" +#include "sstring.h" +#include "ds.h" +#include "read.h" +#include "util.h" + +extern bool threeN; +/** + * Classes and routines for reading reads from various input sources. + */ + +using namespace std; + +/** + * Calculate a per-read random seed based on a combination of + * the read data (incl. sequence, name, quals) and the global + * seed in '_randSeed'. + */ +static inline uint32_t genRandSeed(const BTDnaString& qry, + const BTString& qual, + const BTString& name, + uint32_t seed) +{ + // Calculate a per-read random seed based on a combination of + // the read data (incl. sequence, name, quals) and the global + // seed + uint32_t rseed = (seed + 101) * 59 * 61 * 67 * 71 * 73 * 79 * 83; + size_t qlen = qry.length(); + // Throw all the characters of the read into the random seed + for(size_t i = 0; i < qlen; i++) { + int p = (int)qry[i]; + assert_leq(p, 4); + size_t off = ((i & 15) << 1); + rseed ^= (p << off); + } + // Throw all the quality values for the read into the random + // seed + for(size_t i = 0; i < qlen; i++) { + int p = (int)qual[i]; + assert_leq(p, 255); + size_t off = ((i & 3) << 3); + rseed ^= (p << off); + } + // Throw all the characters in the read name into the random + // seed + size_t namelen = name.length(); + for(size_t i = 0; i < namelen; i++) { + int p = (int)name[i]; + if(p == '/') break; + assert_leq(p, 255); + size_t off = ((i & 3) << 3); + rseed ^= (p << off); + } + return rseed; +} + +/** + * Parameters affecting how reads and read in. + */ +struct PatternParams { + PatternParams( + int format_, + bool fileParallel_, + uint32_t seed_, + bool useSpinlock_, + bool solexa64_, + bool phred64_, + bool intQuals_, + bool fuzzy_, + int sampleLen_, + int sampleFreq_, + uint32_t skip_) : + format(format_), + fileParallel(fileParallel_), + seed(seed_), + useSpinlock(useSpinlock_), + solexa64(solexa64_), + phred64(phred64_), + intQuals(intQuals_), + fuzzy(fuzzy_), + sampleLen(sampleLen_), + sampleFreq(sampleFreq_), + skip(skip_) { } + + int format; // file format + bool fileParallel; // true -> wrap files with separate PairedPatternSources + uint32_t seed; // pseudo-random seed + bool useSpinlock; // use spin locks instead of pthreads + bool solexa64; // true -> qualities are on solexa64 scale + bool phred64; // true -> qualities are on phred64 scale + bool intQuals; // true -> qualities are space-separated numbers + bool fuzzy; // true -> try to parse fuzzy fastq + int sampleLen; // length of sampled reads for FastaContinuous... + int sampleFreq; // frequency of sampled reads for FastaContinuous... + uint32_t skip; // skip the first 'skip' patterns +}; + +/** + * Encapsulates a synchronized source of patterns; usually a file. + * Optionally reverses reads and quality strings before returning them, + * though that is usually more efficiently done by the concrete + * subclass. Concrete subclasses should delimit critical sections with + * calls to lock() and unlock(). + */ +class PatternSource { + +public: + + PatternSource(const PatternParams& p) : + seed_(p.seed), + readCnt_(0), + numWrappers_(0), + doLocking_(true), + useSpinlock_(p.useSpinlock), + mutex() + { + } + + virtual ~PatternSource() { } + + /** + * Call this whenever this PatternSource is wrapped by a new + * WrappedPatternSourcePerThread. This helps us keep track of + * whether locks will be contended. + */ + void addWrapper() { + lock(); + numWrappers_++; + unlock(); + } + + /** + * The main member function for dispensing patterns. + * + * Returns true iff a pair was parsed succesfully. + */ + virtual bool nextReadPair( + Read& ra, + Read& rb, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done, + bool& paired, + bool fixName); + + /** + * The main member function for dispensing patterns. + */ + virtual bool nextRead( + Read& r, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done); + + /** + * Implementation to be provided by concrete subclasses. An + * implementation for this member is only relevant for formats that + * can read in a pair of reads in a single transaction with a + * single input source. If paired-end input is given as a pair of + * parallel files, this member should throw an error and exit. + */ + virtual bool nextReadPairImpl( + Read& ra, + Read& rb, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done, + bool& paired) = 0; + + /** + * Implementation to be provided by concrete subclasses. An + * implementation for this member is only relevant for formats + * where individual input sources look like single-end-read + * sources, e.g., formats where paired-end reads are specified in + * parallel read files. + */ + virtual bool nextReadImpl( + Read& r, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done) = 0; + + /// Reset state to start over again with the first read + virtual void reset() { readCnt_ = 0; } + + /** + * Concrete subclasses call lock() to enter a critical region. + * What constitutes a critical region depends on the subclass. + */ + void lock() { + if(!doLocking_) return; // no contention + mutex.lock(); + } + + /** + * Concrete subclasses call unlock() to exit a critical region + * What constitutes a critical region depends on the subclass. + */ + void unlock() { + if(!doLocking_) return; // no contention + mutex.unlock(); + } + + /** + * Return a new dynamically allocated PatternSource for the given + * format, using the given list of strings as the filenames to read + * from or as the sequences themselves (i.e. if -c was used). + */ + static PatternSource* patsrcFromStrings( + const PatternParams& p, + const EList& qs, + size_t nthreads = 1); + + /** + * Return the number of reads attempted. + */ + TReadId readCnt() const { return readCnt_ - 1; } + + int paired_type; // 1 - left or unpaird, 2-right +// int align_times = 0; + +protected: + + uint32_t seed_; + + /// The number of reads read by this PatternSource + TReadId readCnt_; + + int numWrappers_; /// # threads that own a wrapper for this PatternSource + bool doLocking_; /// override whether to lock (true = don't override) + /// User can ask to use the normal pthreads-style lock even if + /// spinlocks is enabled and compiled in. This is sometimes better + /// if we expect bad I/O latency on some reads. + bool useSpinlock_; + MUTEX_T mutex; +}; + +/** + * Abstract parent class for synhconized sources of paired-end reads + * (and possibly also single-end reads). + */ +class PairedPatternSource { +public: + PairedPatternSource(const PatternParams& p) : mutex_m(), seed_(p.seed) {} + virtual ~PairedPatternSource() { } + + virtual void addWrapper() = 0; + virtual void reset() = 0; + + virtual bool nextReadPair( + Read& ra, + Read& rb, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done, + bool& paired, + bool fixName) = 0; + + virtual pair readCnt() const = 0; + + /** + * Lock this PairedPatternSource, usually because one of its shared + * fields is being updated. + */ + void lock() { + mutex_m.lock(); + } + + /** + * Unlock this PairedPatternSource. + */ + void unlock() { + mutex_m.unlock(); + } + + /** + * Given the values for all of the various arguments used to specify + * the read and quality input, create a list of pattern sources to + * dispense them. + */ + static PairedPatternSource* setupPatternSources( + const EList& si, // singles, from argv + const EList& m1, // mate1's, from -1 arg + const EList& m2, // mate2's, from -2 arg + const EList& m12, // both mates on each line, from --12 arg +#ifdef USE_SRA + const EList& sra_accs, +#endif + const EList& q, // qualities associated with singles + const EList& q1, // qualities associated with m1 + const EList& q2, // qualities associated with m2 + const PatternParams& p, // read-in params + size_t nthreads, + bool verbose); // be talkative? + +protected: + + MUTEX_T mutex_m; /// mutex for syncing over critical regions + uint32_t seed_; +}; + +/** + * Encapsulates a synchronized source of both paired-end reads and + * unpaired reads, where the paired-end must come from parallel files. + */ +class PairedSoloPatternSource : public PairedPatternSource { + +public: + + PairedSoloPatternSource( + const EList* src, + const PatternParams& p) : + PairedPatternSource(p), + cur_(0), + src_(src) + { + assert(src_ != NULL); + for(size_t i = 0; i < src_->size(); i++) { + assert((*src_)[i] != NULL); + } + } + + virtual ~PairedSoloPatternSource() { delete src_; } + + /** + * Call this whenever this PairedPatternSource is wrapped by a new + * WrappedPatternSourcePerThread. This helps us keep track of + * whether locks within PatternSources will be contended. + */ + virtual void addWrapper() { + for(size_t i = 0; i < src_->size(); i++) { + (*src_)[i]->addWrapper(); + } + } + + /** + * Reset this object and all the PatternSources under it so that + * the next call to nextReadPair gets the very first read pair. + */ + virtual void reset() { + for(size_t i = 0; i < src_->size(); i++) { + (*src_)[i]->reset(); + } + cur_ = 0; + } + + /** + * The main member function for dispensing pairs of reads or + * singleton reads. Returns true iff ra and rb contain a new + * pair; returns false if ra contains a new unpaired read. + */ + virtual bool nextReadPair( + Read& ra, + Read& rb, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done, + bool& paired, + bool fixName); + + /** + * Return the number of reads attempted. + */ + virtual pair readCnt() const { + uint64_t ret = 0llu; + for(size_t i = 0; i < src_->size(); i++) ret += (*src_)[i]->readCnt(); + return make_pair(ret, 0llu); + } + +protected: + + volatile uint32_t cur_; // current element in parallel srca_, srcb_ vectors + const EList* src_; /// PatternSources for paired-end reads +}; + +/** + * Encapsulates a synchronized source of both paired-end reads and + * unpaired reads, where the paired-end must come from parallel files. + */ +class PairedDualPatternSource : public PairedPatternSource { + +public: + + PairedDualPatternSource( + const EList* srca, + const EList* srcb, + const PatternParams& p) : + PairedPatternSource(p), cur_(0), srca_(srca), srcb_(srcb) + { + assert(srca_ != NULL); + assert(srcb_ != NULL); + // srca_ and srcb_ must be parallel + assert_eq(srca_->size(), srcb_->size()); + for(size_t i = 0; i < srca_->size(); i++) { + // Can't have NULL first-mate sources. Second-mate sources + // can be NULL, in the case when the corresponding first- + // mate source is unpaired. + assert((*srca_)[i] != NULL); + for(size_t j = 0; j < srcb_->size(); j++) { + assert_neq((*srca_)[i], (*srcb_)[j]); + } + } + } + + virtual ~PairedDualPatternSource() { + delete srca_; + delete srcb_; + } + + /** + * Call this whenever this PairedPatternSource is wrapped by a new + * WrappedPatternSourcePerThread. This helps us keep track of + * whether locks within PatternSources will be contended. + */ + virtual void addWrapper() { + for(size_t i = 0; i < srca_->size(); i++) { + (*srca_)[i]->addWrapper(); + if((*srcb_)[i] != NULL) { + (*srcb_)[i]->addWrapper(); + } + } + } + + /** + * Reset this object and all the PatternSources under it so that + * the next call to nextReadPair gets the very first read pair. + */ + virtual void reset() { + for(size_t i = 0; i < srca_->size(); i++) { + (*srca_)[i]->reset(); + if((*srcb_)[i] != NULL) { + (*srcb_)[i]->reset(); + } + } + cur_ = 0; + } + + /** + * The main member function for dispensing pairs of reads or + * singleton reads. Returns true iff ra and rb contain a new + * pair; returns false if ra contains a new unpaired read. + */ + virtual bool nextReadPair( + Read& ra, + Read& rb, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done, + bool& paired, + bool fixName); + + /** + * Return the number of reads attempted. + */ + virtual pair readCnt() const; + +protected: + + volatile uint32_t cur_; // current element in parallel srca_, srcb_ vectors + const EList* srca_; /// PatternSources for 1st mates and/or unpaired reads + const EList* srcb_; /// PatternSources for 2nd mates +}; + +/** + * Encapsulates a single thread's interaction with the PatternSource. + * Most notably, this class holds the buffers into which the + * PatterSource will write sequences. This class is *not* threadsafe + * - it doesn't need to be since there's one per thread. PatternSource + * is thread-safe. + */ +class PatternSourcePerThread { + +public: + + PatternSourcePerThread() : + buf1_(), buf2_(), rdid_(0xffffffff), endid_(0xffffffff) { } + + virtual ~PatternSourcePerThread() { } + + /** + * change 3N plan for both mate1 and mate2 + */ + void changePlan3N(int mappingCycle) { + buf1_.changePlan3N(mappingCycle); + buf2_.changePlan3N(3-mappingCycle); + } + + /** + * Read the next read pair. + */ + virtual bool nextReadPair( + bool& success, + bool& done, + bool& paired, + bool fixName) + { + return success; + } + + Read& bufa() { return buf1_; } + Read& bufb() { return buf2_; } + const Read& bufa() const { return buf1_; } + const Read& bufb() const { return buf2_; } + + TReadId rdid() const { return rdid_; } + TReadId endid() const { return endid_; } + virtual void reset() { rdid_ = endid_ = 0xffffffff; } + + /** + * Return the length of mate 1 or mate 2. + */ + size_t length(int mate) const { + return (mate == 1) ? buf1_.length() : buf2_.length(); + } + +protected: + + Read buf1_; // read buffer for mate a + Read buf2_; // read buffer for mate b + TReadId rdid_; // index of read just read + TReadId endid_; // index of read just read +}; + +/** + * Abstract parent factory for PatternSourcePerThreads. + */ +class PatternSourcePerThreadFactory { +public: + virtual ~PatternSourcePerThreadFactory() { } + virtual PatternSourcePerThread* create() const = 0; + virtual EList* create(uint32_t n) const = 0; + + /// Free memory associated with a pattern source + virtual void destroy(PatternSourcePerThread* patsrc) const { + assert(patsrc != NULL); + // Free the PatternSourcePerThread + delete patsrc; + } + + /// Free memory associated with a pattern source list + virtual void destroy(EList* patsrcs) const { + assert(patsrcs != NULL); + // Free all of the PatternSourcePerThreads + for(size_t i = 0; i < patsrcs->size(); i++) { + if((*patsrcs)[i] != NULL) { + delete (*patsrcs)[i]; + (*patsrcs)[i] = NULL; + } + } + // Free the vector + delete patsrcs; + } +}; + +/** + * A per-thread wrapper for a PairedPatternSource. + */ +class WrappedPatternSourcePerThread : public PatternSourcePerThread { +public: + WrappedPatternSourcePerThread(PairedPatternSource& __patsrc) : + patsrc_(__patsrc) + { + patsrc_.addWrapper(); + } + + /** + * Get the next paired or unpaired read from the wrapped + * PairedPatternSource. + */ + virtual bool nextReadPair( + bool& success, + bool& done, + bool& paired, + bool fixName); + +private: + + /// Container for obtaining paired reads from PatternSources + PairedPatternSource& patsrc_; +}; + +/** + * Abstract parent factory for PatternSourcePerThreads. + */ +class WrappedPatternSourcePerThreadFactory : public PatternSourcePerThreadFactory { +public: + WrappedPatternSourcePerThreadFactory(PairedPatternSource& patsrc) : + patsrc_(patsrc) { } + + /** + * Create a new heap-allocated WrappedPatternSourcePerThreads. + */ + virtual PatternSourcePerThread* create() const { + return new WrappedPatternSourcePerThread(patsrc_); + } + + /** + * Create a new heap-allocated vector of heap-allocated + * WrappedPatternSourcePerThreads. + */ + virtual EList* create(uint32_t n) const { + EList* v = new EList; + for(size_t i = 0; i < n; i++) { + v->push_back(new WrappedPatternSourcePerThread(patsrc_)); + assert(v->back() != NULL); + } + return v; + } + +private: + /// Container for obtaining paired reads from PatternSources + PairedPatternSource& patsrc_; +}; + +/// Skip to the end of the current string of newline chars and return +/// the first character after the newline chars, or -1 for EOF +static inline int getOverNewline(FileBuf& in) { + int c; + while(isspace(c = in.get())); + return c; +} + +/// Skip to the end of the current string of newline chars such that +/// the next call to get() returns the first character after the +/// whitespace +static inline int peekOverNewline(FileBuf& in) { + while(true) { + int c = in.peek(); + if(c != '\r' && c != '\n') { + return c; + } + in.get(); + } +} + +/// Skip to the end of the current line; return the first character +/// of the next line or -1 for EOF +static inline int getToEndOfLine(FileBuf& in) { + while(true) { + int c = in.get(); if(c < 0) return -1; + if(c == '\n' || c == '\r') { + while(c == '\n' || c == '\r') { + c = in.get(); if(c < 0) return -1; + } + // c now holds first character of next line + return c; + } + } +} + +/// Skip to the end of the current line such that the next call to +/// get() returns the first character on the next line +static inline int peekToEndOfLine(FileBuf& in) { + while(true) { + int c = in.get(); if(c < 0) return c; + if(c == '\n' || c == '\r') { + c = in.peek(); + while(c == '\n' || c == '\r') { + in.get(); if(c < 0) return c; // consume \r or \n + c = in.peek(); + } + // next get() gets first character of next line + return c; + } + } +} + +extern void wrongQualityFormat(const BTString& read_name); +extern void tooFewQualities(const BTString& read_name); +extern void tooManyQualities(const BTString& read_name); + +/** + * Encapsulates a source of patterns which is an in-memory vector. + */ +class VectorPatternSource : public PatternSource { + +public: + + VectorPatternSource( + const EList& v, + const PatternParams& p); + + virtual ~VectorPatternSource() { } + + virtual bool nextReadImpl( + Read& r, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done); + + /** + * This is unused, but implementation is given for completeness. + */ + virtual bool nextReadPairImpl( + Read& ra, + Read& rb, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done, + bool& paired); + + virtual void reset() { + PatternSource::reset(); + cur_ = skip_; + paired_ = false; + } + +private: + + size_t cur_; + uint32_t skip_; + bool paired_; + EList v_; // forward sequences + EList quals_; // forward qualities + EList names_; // names + EList trimmed3_; // names + EList trimmed5_; // names +}; + +/** + * + */ +class BufferedFilePatternSource : public PatternSource { +public: + BufferedFilePatternSource( + const EList& infiles, + const PatternParams& p) : + PatternSource(p), + infiles_(infiles), + filecur_(0), + fb_(), + skip_(p.skip), + first_(true) + { + assert_gt(infiles.size(), 0); + errs_.resize(infiles_.size()); + errs_.fill(0, infiles_.size(), false); + assert(!fb_.isOpen()); + open(); // open first file in the list + filecur_++; + } + + virtual ~BufferedFilePatternSource() { + if(fb_.isOpen()) fb_.close(); + } + + /** + * Fill Read with the sequence, quality and name for the next + * read in the list of read files. This function gets called by + * all the search threads, so we must handle synchronization. + */ + virtual bool nextReadImpl( + Read& r, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done) + { + // We'll be manipulating our file handle/filecur_ state + lock(); + while(true) { + do { read(r, rdid, endid, success, done); } + while(!success && !done); + if(!success && filecur_ < infiles_.size()) { + assert(done); + open(); + resetForNextFile(); // reset state to handle a fresh file + filecur_++; + continue; + } + break; + } + assert(r.repOk()); + // Leaving critical region + unlock(); + return success; + } + + /** + * + */ + virtual bool nextReadPairImpl( + Read& ra, + Read& rb, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done, + bool& paired) + { + // We'll be manipulating our file handle/filecur_ state + lock(); + while(true) { + do { readPair(ra, rb, rdid, endid, success, done, paired); } + while(!success && !done); + if(!success && filecur_ < infiles_.size()) { + assert(done); + open(); + resetForNextFile(); // reset state to handle a fresh file + filecur_++; + continue; + } + break; + } + assert(ra.repOk()); + assert(rb.repOk()); + // Leaving critical region + unlock(); + return success; + } + + /** + * Reset state so that we read start reading again from the + * beginning of the first file. Should only be called by the + * master thread. + */ + virtual void reset() { + PatternSource::reset(); + filecur_ = 0; + open(); + filecur_++; + } + +protected: + + /// Read another pattern from the input file; this is overridden + /// to deal with specific file formats + virtual bool read( + Read& r, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done) = 0; + + /// Read another pattern pair from the input file; this is + /// overridden to deal with specific file formats + virtual bool readPair( + Read& ra, + Read& rb, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done, + bool& paired) = 0; + + /// Reset state to handle a fresh file + virtual void resetForNextFile() { } + + void open() { + if(fb_.isOpen()) fb_.close(); + while(filecur_ < infiles_.size()) { + // Open read + FILE *in; + if(infiles_[filecur_] == "-") { + in = stdin; + } else if((in = fopen(infiles_[filecur_].c_str(), "rb")) == NULL) { + if(!errs_[filecur_]) { + cerr << "Warning: Could not open read file \"" << infiles_[filecur_].c_str() << "\" for reading; skipping..." << endl; + errs_[filecur_] = true; + } + filecur_++; + continue; + } + fb_.newFile(in); + return; + } + cerr << "Error: No input read files were valid" << endl; + exit(1); + return; + } + + EList infiles_; // filenames for read files + EList errs_; // whether we've already printed an error for each file + size_t filecur_; // index into infiles_ of next file to read + FileBuf fb_; // read file currently being read from + TReadId skip_; // number of reads to skip + bool first_; +}; + +/** + * Parse a single quality string from fb and store qualities in r. + * Assume the next character obtained via fb.get() is the first + * character of the quality string. When returning, the next + * character returned by fb.peek() or fb.get() should be the first + * character of the following line. + */ +int parseQuals( + Read& r, + FileBuf& fb, + int firstc, + int readLen, + int trim3, + int trim5, + bool intQuals, + bool phred64, + bool solexa64); + +/** + * Synchronized concrete pattern source for a list of FASTA or CSFASTA + * (if color = true) files. + */ +class FastaPatternSource : public BufferedFilePatternSource { +public: + FastaPatternSource(const EList& infiles, + const PatternParams& p) : + BufferedFilePatternSource(infiles, p), + first_(true), solexa64_(p.solexa64), phred64_(p.phred64), intQuals_(p.intQuals) + { } + virtual void reset() { + first_ = true; + BufferedFilePatternSource::reset(); + } +protected: + /** + * Scan to the next FASTA record (starting with >) and return the first + * character of the record (which will always be >). + */ + static int skipToNextFastaRecord(FileBuf& in) { + int c; + while((c = in.get()) != '>') { + if(in.eof()) return -1; + } + return c; + } + + /// Called when we have to bail without having parsed a read. + void bail(Read& r) { + r.reset(); + fb_.resetLastN(); + } + + /// Read another pattern from a FASTA input file + virtual bool read( + Read& r, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done); + + /// Read another pair of patterns from a FASTA input file + virtual bool readPair( + Read& ra, + Read& rb, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done, + bool& paired) + { + // (For now, we shouldn't ever be here) + cerr << "In FastaPatternSource.readPair()" << endl; + throw 1; + return false; + } + + virtual void resetForNextFile() { + first_ = true; + } + +private: + bool first_; + +public: + bool solexa64_; + bool phred64_; + bool intQuals_; +}; + + +/** + * Tokenize a line of space-separated integer quality values. + */ +static inline bool tokenizeQualLine( + FileBuf& filebuf, + char *buf, + size_t buflen, + EList& toks) +{ + size_t rd = filebuf.gets(buf, buflen); + if(rd == 0) return false; + assert(NULL == strrchr(buf, '\n')); + tokenize(string(buf), " ", toks); + return true; +} + +/** + * Synchronized concrete pattern source for a list of files with tab- + * delimited name, seq, qual fields (or, for paired-end reads, + * basename, seq1, qual1, seq2, qual2). + */ +class TabbedPatternSource : public BufferedFilePatternSource { + +public: + + TabbedPatternSource( + const EList& infiles, + const PatternParams& p, + bool secondName) : + BufferedFilePatternSource(infiles, p), + solQuals_(p.solexa64), + phred64Quals_(p.phred64), + intQuals_(p.intQuals), + secondName_(secondName) { } + +protected: + + /// Read another pattern from a FASTA input file + virtual bool read( + Read& r, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done); + + /// Read another pair of patterns from a FASTA input file + virtual bool readPair( + Read& ra, + Read& rb, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done, + bool& paired); + +private: + + /** + * Parse a name from fb_ and store in r. Assume that the next + * character obtained via fb_.get() is the first character of + * the sequence and the string stops at the next char upto (could + * be tab, newline, etc.). + */ + int parseName(Read& r, Read* r2, char upto = '\t'); + + /** + * Parse a single sequence from fb_ and store in r. Assume + * that the next character obtained via fb_.get() is the first + * character of the sequence and the sequence stops at the next + * char upto (could be tab, newline, etc.). + */ + int parseSeq(Read& r, int& charsRead, int& trim5, char upto = '\t'); + + /** + * Parse a single quality string from fb_ and store in r. + * Assume that the next character obtained via fb_.get() is + * the first character of the quality string and the string stops + * at the next char upto (could be tab, newline, etc.). + */ + int parseQuals(Read& r, int charsRead, int dstLen, int trim5, + char& c2, char upto = '\t', char upto2 = -1); + + bool solQuals_; + bool phred64Quals_; + bool intQuals_; + EList qualToks_; + bool secondName_; +}; + +/** + * Synchronized concrete pattern source for Illumina Qseq files. In + * Qseq files, each read appears on a separate line and the tab- + * delimited fields are: + * + * 1. Machine name + * 2. Run number + * 3. Lane number + * 4. Tile number + * 5. X coordinate of spot + * 6. Y coordinate of spot + * 7. Index: "Index sequence or 0. For no indexing, or for a file that + * has not been demultiplexed yet, this field should have a value of + * 0." + * 8. Read number: 1 for unpaired, 1 or 2 for paired + * 9. Sequence + * 10. Quality + * 11. Filter: 1 = passed, 0 = didn't + */ +class QseqPatternSource : public BufferedFilePatternSource { + +public: + + QseqPatternSource( + const EList& infiles, + const PatternParams& p) : + BufferedFilePatternSource(infiles, p), + solQuals_(p.solexa64), + phred64Quals_(p.phred64), + intQuals_(p.intQuals) { } + +protected: + +#define BAIL_UNPAIRED() { \ + peekOverNewline(fb_); \ + r.reset(); \ + success = false; \ + done = true; \ + return success; \ +} + + /** + * Parse a name from fb_ and store in r. Assume that the next + * character obtained via fb_.get() is the first character of + * the sequence and the string stops at the next char upto (could + * be tab, newline, etc.). + */ + int parseName( + Read& r, // buffer for mate 1 + Read* r2, // buffer for mate 2 (NULL if mate2 is read separately) + bool append, // true -> append characters, false -> skip them + bool clearFirst, // clear the name buffer first + bool warnEmpty, // emit a warning if nothing was added to the name + bool useDefault, // if nothing is read, put readCnt_ as a default value + int upto); // stop parsing when we first reach character 'upto' + + /** + * Parse a single sequence from fb_ and store in r. Assume + * that the next character obtained via fb_.get() is the first + * character of the sequence and the sequence stops at the next + * char upto (could be tab, newline, etc.). + */ + int parseSeq( + Read& r, // buffer for read + int& charsRead, + int& trim5, + char upto); + + /** + * Parse a single quality string from fb_ and store in r. + * Assume that the next character obtained via fb_.get() is + * the first character of the quality string and the string stops + * at the next char upto (could be tab, newline, etc.). + */ + int parseQuals( + Read& r, // buffer for read + int charsRead, + int dstLen, + int trim5, + char& c2, + char upto, + char upto2); + + /** + * Read another pattern from a Qseq input file. + */ + virtual bool read( + Read& r, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done); + + /** + * Read a pair of patterns from 1 Qseq file. Note: this is never used. + */ + virtual bool readPair( + Read& ra, + Read& rb, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done, + bool& paired) + { + // (For now, we shouldn't ever be here) + cerr << "In QseqPatternSource.readPair()" << endl; + throw 1; + return false; + } + + bool solQuals_; + bool phred64Quals_; + bool intQuals_; + EList qualToks_; +}; + +/** + * Synchronized concrete pattern source for a list of FASTA files where + * reads need to be extracted from long continuous sequences. + */ +class FastaContinuousPatternSource : public BufferedFilePatternSource { +public: + FastaContinuousPatternSource(const EList& infiles, const PatternParams& p) : + BufferedFilePatternSource(infiles, p), + length_(p.sampleLen), freq_(p.sampleFreq), + eat_(length_-1), beginning_(true), + bufCur_(0), subReadCnt_(0llu) + { + resetForNextFile(); + } + + virtual void reset() { + BufferedFilePatternSource::reset(); + resetForNextFile(); + } + +protected: + + /// Read another pattern from a FASTA input file + virtual bool read( + Read& r, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done) + { + success = true; + done = false; + r.reset(); + while(true) { + r.color = gColor; + int c = fb_.get(); + if(c < 0) { success = false; done = true; return success; } + if(c == '>') { + resetForNextFile(); + c = fb_.peek(); + bool sawSpace = false; + while(c != '\n' && c != '\r') { + if(!sawSpace) { + sawSpace = isspace(c); + } + if(!sawSpace) { + nameBuf_.append(c); + } + fb_.get(); + c = fb_.peek(); + } + while(c == '\n' || c == '\r') { + fb_.get(); + c = fb_.peek(); + } + nameBuf_.append('_'); + } else { + int cat = asc2dnacat[c]; + if(cat >= 2) c = 'N'; + if(cat == 0) { + // Encountered non-DNA, non-IUPAC char; skip it + continue; + } else { + // DNA char + buf_[bufCur_++] = c; + if(bufCur_ == 1024) bufCur_ = 0; + if(eat_ > 0) { + eat_--; + // Try to keep readCnt_ aligned with the offset + // into the reference; that lets us see where + // the sampling gaps are by looking at the read + // name + if(!beginning_) readCnt_++; + continue; + } + for(size_t i = 0; i < length_; i++) { + if(length_ - i <= bufCur_) { + c = buf_[bufCur_ - (length_ - i)]; + } else { + // Rotate + c = buf_[bufCur_ - (length_ - i) + 1024]; + } + r.patFw.append(asc2dna[c]); + r.qual.append('I'); + } + // Set up a default name if one hasn't been set + r.name = nameBuf_; + char cbuf[20]; + itoa10(readCnt_ - subReadCnt_, cbuf); + r.name.append(cbuf); + eat_ = freq_-1; + readCnt_++; + beginning_ = false; + rdid = endid = readCnt_-1; + break; + } + } + } + return true; + } + + /// Shouldn't ever be here; it's not sensible to obtain read pairs + // from a continuous input. + virtual bool readPair( + Read& ra, + Read& rb, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done, + bool& paired) + { + cerr << "In FastaContinuousPatternSource.readPair()" << endl; + throw 1; + return false; + } + + /** + * Reset state to be read for the next file. + */ + virtual void resetForNextFile() { + eat_ = length_-1; + beginning_ = true; + bufCur_ = 0; + nameBuf_.clear(); + subReadCnt_ = readCnt_; + } + +private: + size_t length_; /// length of reads to generate + size_t freq_; /// frequency to sample reads + size_t eat_; /// number of characters we need to skip before + /// we have flushed all of the ambiguous or + /// non-existent characters out of our read + /// window + bool beginning_; /// skipping over the first read length? + char buf_[1024]; /// read buffer + BTString nameBuf_; /// read buffer for name of fasta record being + /// split into mers + size_t bufCur_; /// buffer cursor; points to where we should + /// insert the next character + uint64_t subReadCnt_;/// number to subtract from readCnt_ to get + /// the pat id to output (so it resets to 0 for + /// each new sequence) +}; + +/** + * Read a FASTQ-format file. + * See: http://maq.sourceforge.net/fastq.shtml + */ +class FastqPatternSource : public BufferedFilePatternSource { + +public: + + FastqPatternSource(const EList& infiles, const PatternParams& p) : + BufferedFilePatternSource(infiles, p), + first_(true), + solQuals_(p.solexa64), + phred64Quals_(p.phred64), + intQuals_(p.intQuals), + fuzzy_(p.fuzzy) + { } + + virtual void reset() { + first_ = true; + fb_.resetLastN(); + BufferedFilePatternSource::reset(); + } + +protected: + + /** + * Scan to the next FASTQ record (starting with @) and return the first + * character of the record (which will always be @). Since the quality + * line may start with @, we keep scanning until we've seen a line + * beginning with @ where the line two lines back began with +. + */ + static int skipToNextFastqRecord(FileBuf& in, bool sawPlus) { + int line = 0; + int plusLine = -1; + int c = in.get(); + int firstc = c; + while(true) { + if(line > 20) { + // If we couldn't find our desired '@' in the first 20 + // lines, it's time to give up + if(firstc == '>') { + // That firstc is '>' may be a hint that this is + // actually a FASTA file, so return it intact + return '>'; + } + // Return an error + return -1; + } + if(c == -1) return -1; + if(c == '\n') { + c = in.get(); + if(c == '@' && sawPlus && plusLine == (line-2)) { + return '@'; + } + else if(c == '+') { + // Saw a '+' at the beginning of a line; remember where + // we saw it + sawPlus = true; + plusLine = line; + } + else if(c == -1) { + return -1; + } + line++; + } + c = in.get(); + } + } + + /// Read another pattern from a FASTQ input file + virtual bool read( + Read& r, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done); + + /// Read another read pair from a FASTQ input file + virtual bool readPair( + Read& ra, + Read& rb, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done, + bool& paired) + { + // (For now, we shouldn't ever be here) + cerr << "In FastqPatternSource.readPair()" << endl; + throw 1; + return false; + } + + virtual void resetForNextFile() { + first_ = true; + } + +private: + + /** + * Do things we need to do if we have to bail in the middle of a + * read, usually because we reached the end of the input without + * finishing. + */ + void bail(Read& r) { + r.patFw.clear(); + fb_.resetLastN(); + } + + bool first_; + bool solQuals_; + bool phred64Quals_; + bool intQuals_; + bool fuzzy_; + EList qualToks_; +}; + +/** + * Read a Raw-format file (one sequence per line). No quality strings + * allowed. All qualities are assumed to be 'I' (40 on the Phred-33 + * scale). + */ +class RawPatternSource : public BufferedFilePatternSource { + +public: + + RawPatternSource(const EList& infiles, const PatternParams& p) : + BufferedFilePatternSource(infiles, p), first_(true) { } + + virtual void reset() { + first_ = true; + BufferedFilePatternSource::reset(); + } + +protected: + + /// Read another pattern from a Raw input file + virtual bool read( + Read& r, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done) + { + int c; + success = true; + done = false; + r.reset(); + c = getOverNewline(this->fb_); + if(c < 0) { + bail(r); success = false; done = true; return success; + } + assert(!isspace(c)); + r.color = gColor; + int mytrim5 = gTrim5; + if(first_) { + // Check that the first character is sane for a raw file + int cc = c; + if(gColor) { + if(cc >= '0' && cc <= '4') cc = "ACGTN"[(int)cc - '0']; + if(cc == '.') cc = 'N'; + } + if(asc2dnacat[cc] == 0) { + cerr << "Error: reads file does not look like a Raw file" << endl; + if(c == '>') { + cerr << "Reads file looks like a FASTA file; please use -f" << endl; + } + if(c == '@') { + cerr << "Reads file looks like a FASTQ file; please use -q" << endl; + } + throw 1; + } + first_ = false; + } + if(gColor) { + // This may be a primer character. If so, keep it in the + // 'primer' field of the read buf and parse the rest of the + // read without it. + c = toupper(c); + if(asc2dnacat[c] > 0) { + // First char is a DNA char + int c2 = toupper(fb_.peek()); + // Second char is a color char + if(asc2colcat[c2] > 0) { + r.primer = c; + r.trimc = c2; + mytrim5 += 2; // trim primer and first color + } + } + if(c < 0) { + bail(r); success = false; done = true; return success; + } + } + // _in now points just past the first character of a sequence + // line, and c holds the first character + int chs = 0; + while(!isspace(c) && c >= 0) { + if(gColor) { + if(c >= '0' && c <= '4') c = "ACGTN"[(int)c - '0']; + if(c == '.') c = 'N'; + } + // 5' trimming + if(isalpha(c) && chs >= mytrim5) { + //size_t len = chs - mytrim5; + //if(len >= 1024) tooManyQualities(BTString("(no name)")); + r.patFw.append(asc2dna[c]); + r.qual.append('I'); + } + chs++; + if(isspace(fb_.peek())) break; + c = fb_.get(); + } + // 3' trimming + r.patFw.trimEnd(gTrim3); + r.qual.trimEnd(gTrim3); + c = peekToEndOfLine(fb_); + r.trimmed3 = gTrim3; + r.trimmed5 = mytrim5; + r.readOrigBuf.install(fb_.lastN(), fb_.lastNLen()); + fb_.resetLastN(); + + // Set up name + char cbuf[20]; + itoa10(readCnt_, cbuf); + r.name.install(cbuf); + readCnt_++; + + rdid = endid = readCnt_-1; + return success; + } + + /// Read another read pair from a FASTQ input file + virtual bool readPair( + Read& ra, + Read& rb, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done, + bool& paired) + { + // (For now, we shouldn't ever be here) + cerr << "In RawPatternSource.readPair()" << endl; + throw 1; + return false; + } + + virtual void resetForNextFile() { + first_ = true; + } + +private: + + /** + * Do things we need to do if we have to bail in the middle of a + * read, usually because we reached the end of the input without + * finishing. + */ + void bail(Read& r) { + r.patFw.clear(); + fb_.resetLastN(); + } + + bool first_; +}; + +#ifdef USE_SRA + +namespace ngs { + class ReadCollection; + class ReadIterator; +} + +namespace tthread { + class thread; +}; + +struct SRA_Data; + +/** + * + */ +class SRAPatternSource : public PatternSource { +public: + SRAPatternSource( + const EList& sra_accs, + const PatternParams& p, + const size_t nthreads = 1) : + PatternSource(p), + sra_accs_(sra_accs), + sra_acc_cur_(0), + skip_(p.skip), + first_(true), + nthreads_(nthreads), + sra_run_(NULL), + sra_it_(NULL), + sra_data_(NULL), + io_thread_(NULL) + { + assert_gt(sra_accs_.size(), 0); + errs_.resize(sra_accs_.size()); + errs_.fill(0, sra_accs_.size(), false); + open(); // open first file in the list + sra_acc_cur_++; + } + + virtual ~SRAPatternSource(); + + /** + * Fill Read with the sequence, quality and name for the next + * read in the list of read files. This function gets called by + * all the search threads, so we must handle synchronization. + */ + virtual bool nextReadImpl( + Read& r, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done) + { + // We'll be manipulating our file handle/filecur_ state + lock(); + while(true) { + do { read(r, rdid, endid, success, done); } + while(!success && !done); + if(!success && sra_acc_cur_ < sra_accs_.size()) { + assert(done); + open(); + resetForNextFile(); // reset state to handle a fresh file + sra_acc_cur_++; + continue; + } + break; + } + assert(r.repOk()); + // Leaving critical region + unlock(); + return success; + } + + /** + * + */ + virtual bool nextReadPairImpl( + Read& ra, + Read& rb, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done, + bool& paired) + { + // We'll be manipulating our file handle/filecur_ state + lock(); + while(true) { + do { readPair(ra, rb, rdid, endid, success, done, paired); } + while(!success && !done); + if(!success && sra_acc_cur_ < sra_accs_.size()) { + assert(done); + open(); + resetForNextFile(); // reset state to handle a fresh file + sra_acc_cur_++; + continue; + } + break; + } + assert(ra.repOk()); + assert(rb.repOk()); + // Leaving critical region + unlock(); + return success; + } + + /** + * Reset state so that we read start reading again from the + * beginning of the first file. Should only be called by the + * master thread. + */ + virtual void reset() { + PatternSource::reset(); + sra_acc_cur_ = 0, + open(); + sra_acc_cur_++; + } + + /// Read another pattern from the input file; this is overridden + /// to deal with specific file formats + virtual bool read( + Read& r, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done) + { + return true; + } + + /// Read another pattern pair from the input file; this is + /// overridden to deal with specific file formats + virtual bool readPair( + Read& ra, + Read& rb, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done, + bool& paired); + +protected: + + /// Reset state to handle a fresh file + virtual void resetForNextFile() { } + + void open(); + + EList sra_accs_; // filenames for read files + EList errs_; // whether we've already printed an error for each file + size_t sra_acc_cur_; // index into infiles_ of next file to read + TReadId skip_; // number of reads to skip + bool first_; + + size_t nthreads_; + + ngs::ReadCollection* sra_run_; + ngs::ReadIterator* sra_it_; + + SRA_Data* sra_data_; + tthread::thread* io_thread_; +}; + +#endif + +#endif /*PAT_H_*/ diff --git a/pe.cpp b/pe.cpp new file mode 100644 index 0000000..8845898 --- /dev/null +++ b/pe.cpp @@ -0,0 +1,941 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include +#include "assert_helpers.h" +#include "pe.h" + +using namespace std; + +/** + * Return a PE_TYPE flag indicating, given a PE_POLICY and coordinates + * for a paired-end alignment, what type of alignment it is, i.e., + * whether it's: + * + * 1. Straightforwardly concordant + * 2. Mates dovetail (one extends beyond the end of the other) + * 3. One mate contains the other but they don't dovetail + * 4. One mate overlaps the other but neither contains the other and + * they don't dovetail + * 5. Discordant + */ +int PairedEndPolicy::peClassifyPair( + int64_t off1, // offset of mate 1 + size_t len1, // length of mate 1 + bool fw1, // whether mate 1 aligned to Watson + int64_t off2, // offset of mate 2 + size_t len2, // length of mate 2 + bool fw2) // whether mate 2 aligned to Watson + const +{ + assert_gt(len1, 0); + assert_gt(len2, 0); + // Expand the maximum fragment length if necessary to accomodate + // the longer mate + size_t maxfrag = maxfrag_; + if(len1 > maxfrag && expandToFit_) maxfrag = len1; + if(len2 > maxfrag && expandToFit_) maxfrag = len2; + size_t minfrag = minfrag_; + if(minfrag < 1) { + minfrag = 1; + } + bool oneLeft = false; + if(pol_ == PE_POLICY_FF) { + if(fw1 != fw2) { + // Bad combination of orientations + return PE_ALS_DISCORD; + } + oneLeft = fw1; + } else if(pol_ == PE_POLICY_RR) { + if(fw1 != fw2) { + // Bad combination of orientations + return PE_ALS_DISCORD; + } + oneLeft = !fw1; + } else if(pol_ == PE_POLICY_FR) { + if(fw1 == fw2) { + // Bad combination of orientations + return PE_ALS_DISCORD; + } + oneLeft = fw1; + } else if(pol_ == PE_POLICY_RF) { + if(fw1 == fw2) { + // Bad combination of orientations + return PE_ALS_DISCORD; + } + oneLeft = !fw1; + } + // Calc implied fragment size + int64_t fraglo = min(off1, off2); + int64_t fraghi = max(off1+len1, off2+len2); + assert_gt(fraghi, fraglo); + size_t frag = (size_t)(fraghi - fraglo); + if(frag > maxfrag || frag < minfrag) { + // Pair is discordant by virtue of the extents + return PE_ALS_DISCORD; + } + int64_t lo1 = off1; + int64_t hi1 = off1 + len1 - 1; + int64_t lo2 = off2; + int64_t hi2 = off2 + len2 - 1; + bool containment = false; + // Check whether one mate entirely contains the other + if((lo1 >= lo2 && hi1 <= hi2) || + (lo2 >= lo1 && hi2 <= hi1)) + { + containment = true; + } + int type = PE_ALS_NORMAL; + // Check whether one mate overlaps the other + bool olap = false; + if((lo1 <= lo2 && hi1 >= lo2) || + (lo1 <= hi2 && hi1 >= hi2) || + containment) + { + // The mates overlap + olap = true; + if(!olapOk_) return PE_ALS_DISCORD; + type = PE_ALS_OVERLAP; + } + // Check if the mates are in the wrong relative orientation, + // without any overlap + if(!olap) { + if((oneLeft && lo2 < lo1) || (!oneLeft && lo1 < lo2)) { + return PE_ALS_DISCORD; + } + } + // If one mate contained the other, report that + if(containment) { + if(!containOk_) return PE_ALS_DISCORD; + type = PE_ALS_CONTAIN; + } + // Check whether there's dovetailing; i.e. does the left mate + // extend past the right end of the right mate, or vice versa + if(( oneLeft && (hi1 > hi2 || lo2 < lo1)) || + (!oneLeft && (hi2 > hi1 || lo1 < lo2))) + { + if(!dovetailOk_) return PE_ALS_DISCORD; + type = PE_ALS_DOVETAIL; + } + return type; +} + +/** + * Given details about how one mate aligns, and some details about the + * reference sequence it aligned to, calculate a window and orientation s.t. + * a paired-end alignment is concordant iff the opposite mate aligns in the + * calculated window with the calculated orientation. The "window" is really a + * cosntraint on which positions the extreme end of the opposite mate can fall. + * This is a different type of constraint from the one placed on seed-extend + * dynamic programming problems. That constraints requires that alignments at + * one point pass through one of a set of "core" columns. + * + * When the opposite mate is to the left, we're constraining where its + * left-hand extreme can fall, i.e., which cells in the top row of the matrix + * it can end in. When the opposite mate is to the right, we're cosntraining + * where its right-hand extreme can fall, i.e., which cells in the bottom row + * of the matrix it can end in. However, in practice we can only constrain + * where we start the backtrace, i.e. where the RHS of the alignment falls. + * See frameFindMateRect for details. + * + * This calculaton does not consider gaps - the dynamic programming framer will + * take gaps into account. + * + * Returns false if no concordant alignments are possible, true otherwise. + */ +bool PairedEndPolicy::otherMate( + bool is1, // true -> mate 1 aligned and we're looking + // for 2, false -> vice versa + bool fw, // orientation of aligned mate + int64_t off, // offset into the reference sequence + int64_t maxalcols, // max # columns spanned by alignment + size_t reflen, // length of reference sequence aligned to + size_t len1, // length of mate 1 + size_t len2, // length of mate 2 + bool& oleft, // out: true iff opp mate must be to right of anchor + int64_t& oll, // out: leftmost Watson off for LHS of opp alignment + int64_t& olr, // out: rightmost Watson off for LHS of opp alignment + int64_t& orl, // out: leftmost Watson off for RHS of opp alignment + int64_t& orr, // out: rightmost Watson off for RHS of opp alignment + bool& ofw) // out: true iff opp mate must be on Watson strand + const +{ + assert_gt(len1, 0); + assert_gt(len2, 0); + assert_gt(maxfrag_, 0); + assert_geq(minfrag_, 0); + assert_geq(maxfrag_, minfrag_); + assert(maxalcols == -1 || maxalcols > 0); + + // Calculate whether opposite mate should align to left or to right + // of given mate, and what strand it should align to + pePolicyMateDir(pol_, is1, fw, oleft, ofw); + + size_t alen = is1 ? len1 : len2; // length of opposite mate + + // Expand the maximum fragment length if necessary to accomodate + // the longer mate + size_t maxfrag = maxfrag_; + size_t minfrag = minfrag_; + if(minfrag < 1) { + minfrag = 1; + } + if(len1 > maxfrag && expandToFit_) maxfrag = len1; + if(len2 > maxfrag && expandToFit_) maxfrag = len2; + if(!expandToFit_ && (len1 > maxfrag || len2 > maxfrag)) { + // Not possible to find a concordant alignment; one of the + // mates is too long + return false; + } + + // Now calculate bounds within which a dynamic programming + // algorithm should search for an alignment for the opposite mate + if(oleft) { + // -----------FRAG MAX---------------- + // -------FRAG MIN------- + // |-alen-| + // Anchor mate + // ^off + // |------| + // Not concordant: LHS not outside min + // |------| + // Concordant + // |------| + // Concordant + // |------| + // Not concordant: LHS outside max + + // -----------FRAG MAX---------------- + // -------FRAG MIN------- + // |-alen-| + // Anchor mate + // ^off + // |------------| + // LHS can't be outside this range + // -----------FRAG MAX---------------- + // |------------------------------------------------------------| + // LHS can't be outside this range, assuming no restrictions on + // flipping, dovetailing, containment, overlap, etc. + // |-------| + // maxalcols + // |-----------------------------------------| + // LHS can't be outside this range, assuming no flipping + // |---------------------------------| + // LHS can't be outside this range, assuming no dovetailing + // |-------------------------| + // LHS can't be outside this range, assuming no overlap + + oll = off + alen - maxfrag; + olr = off + alen - minfrag; + assert_geq(olr, oll); + + orl = oll; + orr = off + maxfrag - 1; + assert_geq(olr, oll); + + // What if overlapping alignments are not allowed? + if(!olapOk_) { + // RHS can't be flush with or to the right of off + orr = min(orr, off-1); + if(orr < olr) olr = orr; + assert_leq(oll, olr); + assert_leq(orl, orr); + assert_geq(orr, olr); + } + // What if dovetail alignments are not allowed? + else if(!dovetailOk_) { + // RHS can't be past off+alen-1 + orr = min(orr, off + alen - 1); + assert_leq(oll, olr); + assert_leq(orl, orr); + } + // What if flipped alignments are not allowed? + else if(!flippingOk_ && maxalcols != -1) { + // RHS can't be right of ??? + orr = min(orr, off + alen - 1 + (maxalcols-1)); + assert_leq(oll, olr); + assert_leq(orl, orr); + } + assert_geq(olr, oll); + assert_geq(orr, orl); + assert_geq(orr, olr); + assert_geq(orl, oll); + } else { + // -----------FRAG MAX---------------- + // -------FRAG MIN------- + // -----------FRAG MAX---------------- + // |-alen-| + // Anchor mate + // ^off + // |------| + // Not concordant: RHS not outside min + // |------| + // Concordant + // |------| + // Concordant + // |------| + // Not concordant: RHS outside max + // + + // -----------FRAG MAX---------------- + // -------FRAG MIN------- + // -----------FRAG MAX---------------- + // |-alen-| + // Anchor mate + // ^off + // |------------| + // RHS can't be outside this range + // |------------------------------------------------------------| + // LHS can't be outside this range, assuming no restrictions on + // dovetailing, containment, overlap, etc. + // |-------| + // maxalcols + // |-----------------------------------------| + // LHS can't be outside this range, assuming no flipping + // |---------------------------------| + // LHS can't be outside this range, assuming no dovetailing + // |-------------------------| + // LHS can't be outside this range, assuming no overlap + + orr = off + (maxfrag - 1); + orl = off + (minfrag - 1); + assert_geq(orr, orl); + + oll = off + alen - maxfrag; + olr = orr; + assert_geq(olr, oll); + + // What if overlapping alignments are not allowed? + if(!olapOk_) { + // LHS can't be left of off+alen + oll = max(oll, off+alen); + if(oll > orl) orl = oll; + assert_leq(oll, olr); + assert_leq(orl, orr); + assert_geq(orl, oll); + } + // What if dovetail alignments are not allowed? + else if(!dovetailOk_) { + // LHS can't be left of off + oll = max(oll, off); + assert_leq(oll, olr); + assert_leq(orl, orr); + } + // What if flipped alignments are not allowed? + else if(!flippingOk_ && maxalcols != -1) { + // LHS can't be left of off - maxalcols + 1 + oll = max(oll, off - maxalcols + 1); + assert_leq(oll, olr); + assert_leq(orl, orr); + } + assert_geq(olr, oll); + assert_geq(orr, orl); + assert_geq(orr, olr); + assert_geq(orl, oll); + } + + // Boundaries and orientation determined + return true; +} + +#ifdef MAIN_PE + +#include +#include + +void testCaseClassify( + const string& name, + int pol, + size_t maxfrag, + size_t minfrag, + bool local, + bool flip, + bool dove, + bool cont, + bool olap, + bool expand, + int64_t off1, + size_t len1, + bool fw1, + int64_t off2, + size_t len2, + bool fw2, + int expect_class) +{ + PairedEndPolicy pepol( + pol, + maxfrag, + minfrag, + local, + flip, + dove, + cont, + olap, + expand); + int ret = pepol.peClassifyPair( + off1, // offset of mate 1 + len1, // length of mate 1 + fw1, // whether mate 1 aligned to Watson + off2, // offset of mate 2 + len2, // length of mate 2 + fw2); // whether mate 2 aligned to Watson + assert_eq(expect_class, ret); + cout << "peClassifyPair: " << name << "...PASSED" << endl; +} + +void testCaseOtherMate( + const string& name, + int pol, + size_t maxfrag, + size_t minfrag, + bool local, + bool flip, + bool dove, + bool cont, + bool olap, + bool expand, + bool is1, + bool fw, + int64_t off, + int64_t maxalcols, + size_t reflen, + size_t len1, + size_t len2, + bool expect_ret, + bool expect_oleft, + int64_t expect_oll, + int64_t expect_olr, + int64_t expect_orl, + int64_t expect_orr, + bool expect_ofw) +{ + PairedEndPolicy pepol( + pol, + maxfrag, + minfrag, + local, + flip, + dove, + cont, + olap, + expand); + int64_t oll = 0, olr = 0; + int64_t orl = 0, orr = 0; + bool oleft = false, ofw = false; + bool ret = pepol.otherMate( + is1, + fw, + off, + maxalcols, + reflen, + len1, + len2, + oleft, + oll, + olr, + orl, + orr, + ofw); + assert(ret == expect_ret); + if(ret) { + assert_eq(expect_oleft, oleft); + assert_eq(expect_oll, oll); + assert_eq(expect_olr, olr); + assert_eq(expect_orl, orl); + assert_eq(expect_orr, orr); + assert_eq(expect_ofw, ofw); + } + cout << "otherMate: " << name << "...PASSED" << endl; +} + +int main(int argc, char **argv) { + + // Set of 8 cases where we look for the opposite mate to the right + // of the anchor mate, with various combinations of policies and + // anchor-mate orientations. + + // |--------| + // |--------| + // ^110 ^119 + // |------------------| + // min frag + // |--------| + // ^120 ^129 + // |----------------------------| + // max frag + // ^ + // 100 + + { + int policies[] = { PE_POLICY_FF, PE_POLICY_RR, PE_POLICY_FR, PE_POLICY_RF, PE_POLICY_FF, PE_POLICY_RR, PE_POLICY_FR, PE_POLICY_RF }; + bool is1[] = { true, true, true, true, false, false, false, false }; + bool fw[] = { true, false, true, false, false, true, true, false }; + bool oleft[] = { false, false, false, false, false, false, false, false }; + bool ofw[] = { true, false, false, true, false, true, false, true }; + + for(int i = 0; i < 8; i++) { + ostringstream oss; + oss << "Simple"; + oss << i; + testCaseOtherMate( + oss.str(), + policies[i], // policy + 30, // maxfrag + 20, // minfrag + false, // local + true, // flipping OK + true, // dovetail OK + true, // containment OK + true, // overlap OK + true, // expand-to-fit + is1[i], // mate 1 is anchor + fw[i], // anchor aligned to Watson + 100, // anchor's offset into ref + -1, // max # alignment cols + 200, // ref length + 10, // mate 1 length + 10, // mate 2 length + true, // expected return val from otherMate + oleft[i], // wheter to look for opposite to left + 80, // expected leftmost pos for opp mate LHS + 129, // expected rightmost pos for opp mate LHS + 119, // expected leftmost pos for opp mate RHS + 129, // expected rightmost pos for opp mate RHS + ofw[i]); // expected orientation in which opposite mate must align + } + } + + // Set of 8 cases where we look for the opposite mate to the left + // of the anchor mate, with various combinations of policies and + // anchor-mate orientations. + + // |--------| + // ^100 ^109 + // |--------| + // ^110 ^119 + // |------------------| + // min frag + // |-Anchor-| + // ^120 ^129 + // |----------------------------| + // max frag + // ^ + // 100 + + { + int policies[] = { PE_POLICY_FF, PE_POLICY_RR, PE_POLICY_FR, PE_POLICY_RF, PE_POLICY_FF, PE_POLICY_RR, PE_POLICY_FR, PE_POLICY_RF }; + bool is1[] = { false, false, false, false, true, true, true, true }; + bool fw[] = { true, false, false, true, false, true, false, true }; + bool oleft[] = { true, true, true, true, true, true, true, true }; + bool ofw[] = { true, false, true, false, false, true, true, false }; + + for(int i = 0; i < 8; i++) { + ostringstream oss; + oss << "Simple"; + oss << (i+8); + testCaseOtherMate( + oss.str(), + policies[i], // policy + 30, // maxfrag + 20, // minfrag + false, // local + true, // flipping OK + true, // dovetail OK + true, // containment OK + true, // overlap OK + true, // expand-to-fit + is1[i], // mate 1 is anchor + fw[i], // anchor aligned to Watson + 120, // anchor's offset into ref + -1, // max # alignment cols + 200, // ref length + 10, // mate 1 length + 10, // mate 2 length + true, // expected return val from otherMate + oleft[i], // wheter to look for opposite to left + 100, // expected leftmost pos for opp mate LHS + 110, // expected rightmost pos for opp mate LHS + 100, // expected leftmost pos for opp mate RHS + 149, // expected rightmost pos for opp mate RHS + ofw[i]); // expected orientation in which opposite mate must align + } + } + + // Case where min frag == max frag and opposite is to the right + + // |----------------------------| + // min frag + // |--------| + // ^120 ^129 + // |----------------------------| + // max frag + // ^ + // 100 + testCaseOtherMate( + "MinFragEqMax1", + PE_POLICY_FR, // policy + 30, // maxfrag + 30, // minfrag + false, // local + true, // flipping OK + true, // dovetail OK + true, // containment OK + true, // overlap OK + true, // expand-to-fit + false, // mate 1 is anchor + false, // anchor aligned to Watson + 120, // anchor's offset into ref + -1, // max # alignment cols + 200, // ref length + 10, // mate 1 length + 10, // mate 2 length + true, // expected return val from otherMate + true, // wheter to look for opposite to left + 100, // expected leftmost pos for opp mate LHS + 100, // expected rightmost pos for opp mate LHS + 100, // expected leftmost pos for opp mate RHS + 149, // expected rightmost pos for opp mate RHS + true); // expected orientation in which opposite mate must align + + // Case where min frag == max frag and opposite is to the right + + // |----------------------------| + // min frag ^129 + // |--------| + // ^100 ^109 + // |----------------------------| + // max frag + testCaseOtherMate( + "MinFragEqMax2", + PE_POLICY_FR, // policy + 30, // maxfrag + 30, // minfrag + false, // local + true, // flipping OK + true, // dovetail OK + true, // containment OK + true, // overlap OK + true, // expand-to-fit + true, // mate 1 is anchor + true, // anchor aligned to Watson + 100, // anchor's offset into ref + -1, // max # alignment cols + 200, // ref length + 10, // mate 1 length + 10, // mate 2 length + true, // expected return val from otherMate + false, // wheter to look for opposite to left + 80, // expected leftmost pos for opp mate LHS + 129, // expected rightmost pos for opp mate LHS + 129, // expected leftmost pos for opp mate RHS + 129, // expected rightmost pos for opp mate RHS + false); // expected orientation in which opposite mate must align + + testCaseOtherMate( + "MinFragEqMax4NoDove1", + PE_POLICY_FR, // policy + 30, // maxfrag + 25, // minfrag + false, // local + true, // flipping OK + false, // dovetail OK + true, // containment OK + true, // overlap OK + true, // expand-to-fit + true, // mate 1 is anchor + true, // anchor aligned to Watson + 100, // anchor's offset into ref + -1, // max # alignment cols + 200, // ref length + 10, // mate 1 length + 10, // mate 2 length + true, // expected return val from otherMate + false, // wheter to look for opposite to left + 100, // expected leftmost pos for opp mate LHS + 129, // expected rightmost pos for opp mate LHS + 124, // expected leftmost pos for opp mate RHS + 129, // expected rightmost pos for opp mate RHS + false); // expected orientation in which opposite mate must align + + testCaseOtherMate( + "MinFragEqMax4NoCont1", + PE_POLICY_FR, // policy + 30, // maxfrag + 25, // minfrag + false, // local + true, // flipping OK + false, // dovetail OK + false, // containment OK + true, // overlap OK + true, // expand-to-fit + true, // mate 1 is anchor + true, // anchor aligned to Watson + 100, // anchor's offset into ref + -1, // max # alignment cols + 200, // ref length + 10, // mate 1 length + 10, // mate 2 length + true, // expected return val from otherMate + false, // wheter to look for opposite to left + 100, // expected leftmost pos for opp mate LHS + 129, // expected rightmost pos for opp mate LHS + 124, // expected leftmost pos for opp mate RHS + 129, // expected rightmost pos for opp mate RHS + false); // expected orientation in which opposite mate must align + + testCaseOtherMate( + "MinFragEqMax4NoOlap1", + PE_POLICY_FR, // policy + 30, // maxfrag + 25, // minfrag + false, // local + true, // flipping OK + false, // dovetail OK + false, // containment OK + false, // overlap OK + true, // expand-to-fit + true, // mate 1 is anchor + true, // anchor aligned to Watson + 100, // anchor's offset into ref + -1, // max # alignment cols + 200, // ref length + 10, // mate 1 length + 10, // mate 2 length + true, // expected return val from otherMate + false, // wheter to look for opposite to left + 110, // expected leftmost pos for opp mate LHS + 129, // expected rightmost pos for opp mate LHS + 124, // expected leftmost pos for opp mate RHS + 129, // expected rightmost pos for opp mate RHS + false); // expected orientation in which opposite mate must align + + testCaseOtherMate( + "MinFragEqMax4NoDove2", + PE_POLICY_FR, // policy + 30, // maxfrag + 25, // minfrag + false, // local + true, // flipping OK + false, // dovetail OK + true, // containment OK + true, // overlap OK + true, // expand-to-fit + false, // mate 1 is anchor + false, // anchor aligned to Watson + 120, // anchor's offset into ref + -1, // max # alignment cols + 200, // ref length + 10, // mate 1 length + 10, // mate 2 length + true, // expected return val from otherMate + true, // whether to look for opposite to left + 100, // expected leftmost pos for opp mate LHS + 105, // expected rightmost pos for opp mate LHS + 100, // expected leftmost pos for opp mate RHS + 129, // expected rightmost pos for opp mate RHS + true); // expected orientation in which opposite mate must align + + testCaseOtherMate( + "MinFragEqMax4NoOlap2", + PE_POLICY_FR, // policy + 30, // maxfrag + 25, // minfrag + false, // local + true, // flipping OK + false, // dovetail OK + false, // containment OK + false, // overlap OK + true, // expand-to-fit + false, // mate 1 is anchor + false, // anchor aligned to Watson + 120, // anchor's offset into ref + -1, // max # alignment cols + 200, // ref length + 10, // mate 1 length + 10, // mate 2 length + true, // expected return val from otherMate + true, // whether to look for opposite to left + 100, // expected leftmost pos for opp mate LHS + 105, // expected rightmost pos for opp mate LHS + 100, // expected leftmost pos for opp mate RHS + 119, // expected rightmost pos for opp mate RHS + true); // expected orientation in which opposite mate must align + + { + int olls[] = { 110 }; + int olrs[] = { 299 }; + int orls[] = { 149 }; + int orrs[] = { 299 }; + for(int i = 0; i < 1; i++) { + ostringstream oss; + oss << "Overhang1_"; + oss << (i+1); + testCaseOtherMate( + oss.str(), + PE_POLICY_FR, // policy + 200, // maxfrag + 50, // minfrag + false, // local + true, // flipping OK + true, // dovetail OK + true, // containment OK + false, // overlap OK + true, // expand-to-fit + true, // mate 1 is anchor + true, // anchor aligned to Watson + 100, // anchor's offset into ref + -1, // max # alignment cols + 200, // ref length + 10, // mate 1 length + 10, // mate 2 length + true, // expected return val from otherMate + false, // whether to look for opposite to left + olls[i], // expected leftmost pos for opp mate LHS + olrs[i], // expected rightmost pos for opp mate LHS + orls[i], // expected leftmost pos for opp mate RHS + orrs[i], // expected rightmost pos for opp mate RHS + false); // expected orientation in which opposite mate must align + } + } + + { + int olls[] = { -100 }; + int olrs[] = { 50 }; + int orls[] = { -100 }; + int orrs[] = { 89 }; + for(int i = 0; i < 1; i++) { + ostringstream oss; + oss << "Overhang2_"; + oss << (i+1); + testCaseOtherMate( + oss.str(), + PE_POLICY_FR, // policy + 200, // maxfrag + 50, // minfrag + false, // local + true, // flipping OK + true, // dovetail OK + true, // containment OK + false, // overlap OK + true, // expand-to-fit + true, // mate 1 is anchor + false, // anchor aligned to Watson + 90, // anchor's offset into ref + -1, // max # alignment cols + 200, // ref length + 10, // mate 1 length + 10, // mate 2 length + true, // expected return val from otherMate + true, // whether to look for opposite to left + olls[i], // expected leftmost pos for opp mate LHS + olrs[i], // expected rightmost pos for opp mate LHS + orls[i], // expected leftmost pos for opp mate RHS + orrs[i], // expected rightmost pos for opp mate RHS + true); // expected orientation in which opposite mate must align + } + } + + { + int mate2offs[] = { 150, 149, 149, 100, 99, 299, 1, 250, 250 }; + int mate2lens[] = { 50, 50, 51, 100, 101, 1, 50, 50, 51 }; + int peExpects[] = { PE_ALS_NORMAL, PE_ALS_DISCORD, PE_ALS_OVERLAP, PE_ALS_CONTAIN, PE_ALS_DOVETAIL, PE_ALS_NORMAL, PE_ALS_DISCORD, PE_ALS_NORMAL, PE_ALS_DISCORD }; + + for(int i = 0; i < 9; i++) { + ostringstream oss; + oss << "Simple1_"; + oss << (i); + testCaseClassify( + oss.str(), + PE_POLICY_FR, // policy + 200, // maxfrag + 100, // minfrag + false, // local + true, // flipping OK + true, // dovetail OK + true, // containment OK + true, // overlap OK + true, // expand-to-fit + 100, // offset of mate 1 + 50, // length of mate 1 + true, // whether mate 1 aligned to Watson + mate2offs[i], // offset of mate 2 + mate2lens[i], // length of mate 2 + false, // whether mate 2 aligned to Watson + peExpects[i]);// expectation for PE_ALS flag returned + } + } + + { + int mate1offs[] = { 200, 201, 200, 200, 200, 100, 400, 100, 99 }; + int mate1lens[] = { 50, 49, 51, 100, 101, 1, 50, 50, 51 }; + int peExpects[] = { PE_ALS_NORMAL, PE_ALS_DISCORD, PE_ALS_OVERLAP, PE_ALS_CONTAIN, PE_ALS_DOVETAIL, PE_ALS_NORMAL, PE_ALS_DISCORD, PE_ALS_NORMAL, PE_ALS_DISCORD }; + + for(int i = 0; i < 9; i++) { + ostringstream oss; + oss << "Simple2_"; + oss << (i); + testCaseClassify( + oss.str(), + PE_POLICY_FR, // policy + 200, // maxfrag + 100, // minfrag + false, // local + true, // flipping OK + true, // dovetail OK + true, // containment OK + true, // overlap OK + true, // expand-to-fit + mate1offs[i], // offset of mate 1 + mate1lens[i], // length of mate 1 + true, // whether mate 1 aligned to Watson + 250, // offset of mate 2 + 50, // length of mate 2 + false, // whether mate 2 aligned to Watson + peExpects[i]);// expectation for PE_ALS flag returned + } + } + + testCaseOtherMate( + "Regression1", + PE_POLICY_FF, // policy + 50, // maxfrag + 0, // minfrag + false, // local + true, // flipping OK + true, // dovetail OK + true, // containment OK + true, // overlap OK + true, // expand-to-fit + true, // mate 1 is anchor + false, // anchor aligned to Watson + 3, // anchor's offset into ref + -1, // max # alignment cols + 53, // ref length + 10, // mate 1 length + 10, // mate 2 length + true, // expected return val from otherMate + true, // whether to look for opposite to left + -37, // expected leftmost pos for opp mate LHS + 13, // expected rightmost pos for opp mate LHS + -37, // expected leftmost pos for opp mate RHS + 52, // expected rightmost pos for opp mate RHS + false); // expected orientation in which opposite mate must align +} + +#endif /*def MAIN_PE*/ diff --git a/pe.h b/pe.h new file mode 100644 index 0000000..8dd46a4 --- /dev/null +++ b/pe.h @@ -0,0 +1,321 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +/* + * pe.h + * + * A class encapsulating a paired-end policy and routines for + * identifying intervals according to the policy. For instance, + * contains a routine that, given a policy and details about a match + * for one mate, returns details about where to search for the other + * mate. + */ + +#ifndef PE_H_ +#define PE_H_ + +#include +#include + +// In description below "To the left" = "Upstream of w/r/t the Watson strand" + +// The 4 possible policies describing how mates 1 and 2 should be +// oriented with respect to the reference genome and each other +enum { + // (fw) Both mates from Watson with 1 to the left, or + // (rc) Both mates from Crick with 2 to the left + PE_POLICY_FF = 1, + + // (fw) Both mates from Crick with 1 to the left, or + // (rc) Both mates from Watson with 2 to the left + PE_POLICY_RR, + + // (fw) Mate 1 from Watson and mate 2 from Crick with 1 to the left, or + // (rc) Mate 2 from Watson and mate 1 from Crick with 2 to the left + PE_POLICY_FR, + + // (fw) Mate 1 from Crick and mate 2 from Watson with 1 to the left, or + // (rc) Mate 2 from Crick and mate 1 from Watson with 2 to the left + PE_POLICY_RF +}; + +// Various distinct ways that the mates might align with respect to +// each other in a concordant alignment. We distinguish between them +// because in some cases a user may want to consider some of these +// categories to be discordant, even if the alignment otherwise +// conforms to the paired-end policy. + +enum { + // Describes a paired-end alignment where the mates + // straightforwardly conform to the paired-end policy without any + // overlap between the mates + PE_ALS_NORMAL = 1, + + // Describes a paired-end alignment where the mate overlap, but + // neither contains the other and they do not dovetail, but the + // alignment conforms to the paired-end policy + PE_ALS_OVERLAP, + + // Describes a paired-end alignment where the mates conform to the + // paired-end policy, but one mate strictly contains the other but + // they don't dovetail. We distinguish this from a "normal" + // concordant alignment because some users may wish to categorize + // such an alignment as discordant. + PE_ALS_CONTAIN, + + // Describes a paired-end alignment where the mates conform to the + // paired-end policy, but mates "fall off" each other. E.g. if the + // policy is FR and any of these happen: + // 1: >>>>> >>>>> + // 2: <<<<<< <<<<<< + // And the overall extent is consistent with the minimum fragment + // length, this is a dovetail alignment. We distinguish this from + // a "normal" concordant alignment because some users may wish to + // categorize such an alignment as discordant. + PE_ALS_DOVETAIL, + + // The mates are clearly discordant, owing to their orientations + // and/or implied fragment length + PE_ALS_DISCORD +}; + +/** + * Return true iff the orientations and relative positions of mates 1 + * and 2 are compatible with the given PE_POLICY. + */ +static inline bool pePolicyCompat( + int policy, // PE_POLICY + bool oneLeft, // true iff mate 1 is to the left of mate 2 + bool oneWat, // true iff mate 1 aligned to Watson strand + bool twoWat) // true iff mate 2 aligned to Watson strand +{ + switch(policy) { + case PE_POLICY_FF: + return oneWat == twoWat && oneWat == oneLeft; + case PE_POLICY_RR: + return oneWat == twoWat && oneWat != oneLeft; + case PE_POLICY_FR: + return oneWat != twoWat && oneWat == oneLeft; + case PE_POLICY_RF: + return oneWat != twoWat && oneWat != oneLeft; + default: { + std::cerr << "Bad PE_POLICY: " << policy << std::endl; + throw 1; + } + } + throw 1; +} + +/** + * Given that the given mate aligns in the given orientation, return + * true iff the other mate must appear "to the right" of the given mate + * in order for the alignment to be concordant. + */ +static inline void pePolicyMateDir( + int policy,// in: PE_POLICY + bool is1, // in: true iff mate 1 is the one that already aligned + bool fw, // in: true iff already-aligned mate aligned to Watson + bool& left, // out: set =true iff other mate must be to the left + bool& mfw) // out: set =true iff other mate must align to watson +{ + switch(policy) { + case PE_POLICY_FF: { + left = (is1 != fw); + mfw = fw; + break; + } + case PE_POLICY_RR: { + left = (is1 == fw); + mfw = fw; + break; + } + case PE_POLICY_FR: { + left = !fw; + mfw = !fw; + break; + } + case PE_POLICY_RF: { + left = fw; + mfw = !fw; + break; + } + default: { + std::cerr << "Error: No such PE_POLICY: " << policy << std::endl; + throw 1; + } + } + return; +} + +/** + * Encapsulates paired-end alignment parameters. + */ +class PairedEndPolicy { + +public: + + PairedEndPolicy() { reset(); } + + PairedEndPolicy( + int pol, + size_t maxfrag, + size_t minfrag, + bool local, + bool flippingOk, + bool dovetailOk, + bool containOk, + bool olapOk, + bool expandToFit) + { + init( + pol, + maxfrag, + minfrag, + local, + flippingOk, + dovetailOk, + containOk, + olapOk, + expandToFit); + } + + /** + * Initialize with nonsense values. + */ + void reset() { + init(-1, 0xffffffff, 0xffffffff, false, false, false, false, false, false); + } + + /** + * Initialize given policy, maximum & minimum fragment lengths. + */ + void init( + int pol, + size_t maxfrag, + size_t minfrag, + bool local, + bool flippingOk, + bool dovetailOk, + bool containOk, + bool olapOk, + bool expandToFit) + { + pol_ = pol; + maxfrag_ = maxfrag; + minfrag_ = minfrag; + local_ = local; + flippingOk_ = flippingOk; + dovetailOk_ = dovetailOk; + containOk_ = containOk; + olapOk_ = olapOk; + expandToFit_ = expandToFit; + } + +/** + * Given details about how one mate aligns, and some details about the + * reference sequence it aligned to, calculate a window and orientation s.t. + * a paired-end alignment is concordant iff the opposite mate aligns in the + * calculated window with the calculated orientation. The calculaton does not + * consider gaps. The dynamic programming framer will take gaps into account. + * + * Returns false if no concordant alignments are possible, true otherwise. + */ +bool otherMate( + bool is1, // true -> mate 1 aligned and we're looking + // for 2, false -> vice versa + bool fw, // orientation of aligned mate + int64_t off, // offset into the reference sequence + int64_t maxalcols, // max # columns spanned by alignment + size_t reflen, // length of reference sequence aligned to + size_t len1, // length of mate 1 + size_t len2, // length of mate 2 + bool& oleft, // out: true iff opp mate must be to right of anchor + int64_t& oll, // out: leftmost Watson off for LHS of opp alignment + int64_t& olr, // out: rightmost Watson off for LHS of opp alignment + int64_t& orl, // out: leftmost Watson off for RHS of opp alignment + int64_t& orr, // out: rightmost Watson off for RHS of opp alignment + bool& ofw) // out: true iff opp mate must be on Watson strand + const; + + /** + * Return a PE_TYPE flag indicating, given a PE_POLICY and coordinates + * for a paired-end alignment, qwhat type of alignment it is, i.e., + * whether it's: + * + * 1. Straightforwardly concordant + * 2. Mates dovetail (one extends beyond the end of the other) + * 3. One mate contains the other but they don't dovetail + * 4. One mate overlaps the other but neither contains the other and + * they don't dovetail + * 5. Discordant + */ + int peClassifyPair( + int64_t off1, // offset of mate 1 + size_t len1, // length of mate 1 + bool fw1, // whether mate 1 aligned to Watson + int64_t off2, // offset of mate 2 + size_t len2, // length of mate 2 + bool fw2) // whether mate 2 aligned to Watson + const; + + int policy() const { return pol_; } + size_t maxFragLen() const { return maxfrag_; } + size_t minFragLen() const { return minfrag_; } + +protected: + + // Use local alignment to search for the opposite mate, rather than + // a type of alignment that requires the read to align end-to-end + bool local_; + + // Policy governing how mates should be oriented with respect to + // each other and the reference genome + int pol_; + + // true iff settings are such that mates that violate the expected relative + // orientation but are still consistent with maximum fragment length are OK + bool flippingOk_; + + // true iff settings are such that dovetailed mates should be + // considered concordant. + bool dovetailOk_; + + // true iff paired-end alignments where one mate's alignment is + // strictly contained within the other's should be considered + // concordant + bool containOk_; + + // true iff paired-end alignments where one mate's alignment + // overlaps the other's should be considered concordant + bool olapOk_; + + // What to do when a mate length is > maxfrag_? If expandToFit_ is + // true, we temporarily increase maxfrag_ to equal the mate length. + // Otherwise we say that any paired-end alignment involving the + // long mate is discordant. + bool expandToFit_; + + // Maximum fragment size to consider + size_t maxfrag_; + + // Minimum fragment size to consider + size_t minfrag_; +}; + +#endif /*ndef PE_H_*/ diff --git a/position_3n.cpp b/position_3n.cpp new file mode 100644 index 0000000..701d8b3 --- /dev/null +++ b/position_3n.cpp @@ -0,0 +1,393 @@ +/* + * Copyright 2020, Yun (Leo) Zhang + * + * This file is part of HISAT-3N. + * + * HISAT-3N is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT-3N is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT-3N. If not, see . + */ + +#include "alignment_3n.h" +#include "position_3n.h" + + +/** + * compare the MappingPosition to Alignment. + * if they have same chromosome and location information, return true. + */ +bool MappingPosition::operator==(Alignment* o) { + + BTString* testChromosome; + if (!o->repeat && o->pairToRepeat) { + testChromosome = &o->pairToChromosome; + } else { + testChromosome = &o->chromosomeName; + } + + if (locations[1] == NULL) { + return (*locations[o->pairSegment] == o->location) && + (*chromosome == *testChromosome); + } else { + return (*locations[o->pairSegment] == o->location) && + (*locations[1-(o->pairSegment)] == o->pairToLocation) && + (*chromosome == *testChromosome); + } +} + +/** + * constructor for non-repeat alignment, or original repeat alignment ( with chromosomeName = rep*). + */ +MappingPosition::MappingPosition(Alignment* newAlignment) { + initialize(); + locations[newAlignment->pairSegment] = &newAlignment->location; + locations[1-newAlignment->pairSegment] = &newAlignment->pairToLocation; + segmentExist[newAlignment->pairSegment] = true; + alignments[newAlignment->pairSegment] = newAlignment; + if (!newAlignment->repeat && newAlignment->pairToRepeat) { + chromosome = &newAlignment->pairToChromosome; + } else { + chromosome = &newAlignment->chromosomeName; + } + pairScore = numeric_limits::min(); +} + +/** + * constructor for expanded repeat alignment position. + */ +MappingPosition::MappingPosition (RepeatMappingPosition* repeat0, Alignment* newAlignment0, RepeatMappingPosition* repeat1=NULL, Alignment* newAlignment1=NULL) { + initialize(); + locations[newAlignment0->pairSegment] = &repeat0->repeatLocation; + chromosome = &repeat0->repeatChromosome; + repeats[0] = repeat0; + repeats[1] = repeat1; + alignments[0] = newAlignment0; + alignments[1] = newAlignment1; + segmentExist[0] = true; + if (alignments[1] != NULL) { + locations[newAlignment1->pairSegment] = &repeat1->repeatLocation; + segmentExist[1] = true; + } + AS = repeat0->AS; + repeat = true; +} + +/** + * return true if the is a MappingPosition has same information as input Alignment. + * first check the latest MappingPosition, if not same, search all MappingPositions. + * both mate and opposite mate position should be same to MappingPosition to return true. + */ +bool MappingPositions::positionExist (Alignment* newAlignment) { + findBadAlignment = false; + if (positions.empty()) { + index = 0; + return false; + } + + if (positions[index] == newAlignment) { + return positions[index].segmentExist[newAlignment->pairSegment]; + } + + int segment = newAlignment->pairSegment; + long long int* targetLocations[2]; + targetLocations[segment] = &newAlignment->location; + targetLocations[1-segment] = &newAlignment->pairToLocation; + + return findPosition (targetLocations, + (!newAlignment->repeat && newAlignment->pairToRepeat)?newAlignment->pairToChromosome:newAlignment->chromosomeName, + segment); +} + +/** + * append new Alignment to positions. + * return true if the new Alignment successfully append. + * return false if the new Alignment is exist or it's mate is bad aligned. + */ +bool MappingPositions::append(Alignment* newAlignment) { + if (positionExist(newAlignment)) { // check if position is exist. + return false; + } else { + int segment = newAlignment->pairSegment; + if (!positions.empty() && positions[index] == newAlignment && !findBadAlignment) { + // check if current MappingPosition is same to new Alignment. + positions[index].segmentExist[segment] = true; + if (positions[index].badAlignment) { + return false; + } + positions[index].alignments[segment] = newAlignment; + } else { + // add the new Alignment to positions. + positions.emplace_back(newAlignment); + index = positions.size()-1; + if (oppositeAlignment != NULL) { + positions[index].alignments[1-segment] = oppositeAlignment; + positions[index].segmentExist[1-segment] = true; + } + } + return true; + } +} + +/** + * output paired-end alignment results. + */ +void MappingPositions::outputPair(BTString& o) { + int outputCount = 0; + bool primary = true; // for primary alignment flag. + for (int i = 0; i < positions.size(); i++) { + if (positions[i].pairScore == bestPairScore) { + outputCount++; + assert(positions[i].alignments[0] != NULL); + assert(positions[i].alignments[1] != NULL); + + // change the NH tag + positions[i].alignments[0]->updateNH(nBestPair); + positions[i].alignments[1]->updateNH(nBestPair); + + // get concordant information and change the concordant flag. + bool concordant; + if (!positions[i].alignments[0]->mapped || !positions[i].alignments[1]->mapped) { + concordant = false; + } else { + concordant = Alignment::isConcordant(*positions[i].locations[0], + positions[i].alignments[0]->forward, + positions[i].alignments[0]->readSequence.length(), + *positions[i].locations[1], + positions[i].alignments[1]->forward, + positions[i].alignments[1]->readSequence.length()); + } + + positions[i].alignments[0]->setConcordant(concordant); + positions[i].alignments[1]->setConcordant(concordant); + + positions[i].alignments[0]->setMateMappingFlag(positions[i].alignments[1]->mapped ? positions[i].locations[1] : NULL); + positions[i].alignments[1]->setMateMappingFlag(positions[i].alignments[0]->mapped ? positions[i].locations[0] : NULL); + + if (!positions[i].repeat) { + // output regular alignment result + + // if both mate is outputted before, change the mate 1 status and output mate1. + if (positions[i].alignments[0]->outputted && positions[i].alignments[1]->outputted) { + positions[i].alignments[1]->outputted = false; + } + // change YS tag. + positions[i].alignments[0]->setYS(positions[i].alignments[1]); + positions[i].alignments[1]->setYS(positions[i].alignments[0]); + // output + positions[i].alignments[0]->outputAlignment(o, NULL, positions[i].locations[1], primary); + positions[i].alignments[1]->outputAlignment(o, NULL, positions[i].locations[0], primary); + } else { + //output repeat alignment result. + + // if both mate is outputted before, change the mate 1 status and output mate1. + if (positions[i].repeats[0]->outputted && positions[i].repeats[1]->outputted) { + positions[i].repeats[1]->outputted = false; + } + // change YS tag. + positions[i].alignments[0]->setYS(positions[i].repeats[1]); + positions[i].alignments[1]->setYS(positions[i].repeats[0]); + //output + positions[i].alignments[0]->outputAlignment(o, positions[i].repeats[0], positions[i].locations[1], primary); + positions[i].alignments[1]->outputAlignment(o, positions[i].repeats[1], positions[i].locations[0], primary); + } + primary = false; // after output the first pair for each read, set primary status to false; + } + } + assert(outputCount == nBestPair); +} + +/** + * output single-end alignment results. + */ +void MappingPositions::outputSingle(BTString &o) { + int outputCount = 0; + bool primary = true; // for primary alignment flag. + for (int i = 0; i < positions.size(); i++) { + if (positions[i].AS == bestAS && !positions[i].badAlignment) { + outputCount++; + assert(positions[i].alignments[0] != NULL); + // set NH tag + positions[i].alignments[0]->updateNH(nBestSingle); + + if (!positions[i].repeat) { // output regular alignment result + positions[i].alignments[0]->outputAlignment(o, NULL, NULL, primary); + } else { // output repeat alignment result + positions[i].alignments[0]->outputAlignment(o, positions[i].repeats[0], NULL, primary); + } + primary = false; // after output the first alignment for each read, set primary status to false; + } + } + assert(outputCount == nBestSingle); +} + +bool MappingPositions::updateAS_regular() { + if (isBad()) { return false; } + if (!positions[index].alignments[0]->mapped) { return true; } + int AS = positions[index].alignments[0]->AS; + if (AS > bestAS) { + bestAS = AS; + nBestSingle = 1; + } else if (AS == bestAS) { + nBestSingle++; + } else { + badAligned(); + return false; + } + positions[index].AS = AS; + return true; +} + +/** + * if AS in repeatPosition is larger than bestAS, add it to positions and update BestAS. + */ +bool MappingPositions::updateAS_repeat() { + if (isBad()) { return false; } + Alignment* alignment = positions[index].alignments[0]; + RepeatMappingPosition* repeatPosition; + badAligned(); // label this as bad alignment to avoid directly output. + int AS; + for (int i = 0; i < alignment->repeatPositions.size(); i++) { + repeatPosition = &alignment->repeatPositions.positions[i]; + AS = (repeatPosition->flagInfoIndex == -1)?repeatPosition->AS : alignment->repeatPositions.positions[repeatPosition->flagInfoIndex].AS; + if (AS >= bestAS) { + positions.emplace_back(repeatPosition, alignment); + if (AS > bestAS) { + bestAS = AS; + nBestSingle = 1; + } else { + nBestSingle++; + } + } + } + return true; +} + +/** + * redirect to updateAS_regular() or updateAS_repeat(). + */ +bool MappingPositions::updateAS() { + if (positions[index].alignments[0]->repeat) { + return updateAS_repeat(); + } else { + return updateAS_regular(); + } +} + +/** + * calculate the pairing score for regular (non-repeat) alignment. + */ +bool MappingPositions::updatePairScore_regular() { + if (positions[index].alignments[0]->chromosomeName != positions[index].alignments[1]->chromosomeName) { + badAligned(); + return false; + } + int nPair; + int score; + score = positions[index].alignments[0]->calculatePairScore(positions[index].alignments[1], nPair); + if (score > bestPairScore) { + bestPairScore = score; + nBestPair = nPair; + concordantExist = positions[index].alignments[0]->concordant; + } else if (score == bestPairScore) { + nBestPair += nPair; + } else { // the newPair Score is less than bestPairScore, label it + badAligned(); + return false; + } + positions[index].pairScore = score; + return true; +} + +/** + * calculate the pairing score for repeat alignment + * append to positions if the new pair has better (or equal) pairing score. + */ +bool MappingPositions::updatePairScore_repeat() { + Alignment* alignments[2]; + alignments[0] = positions[index].alignments[0]; + alignments[1] = positions[index].alignments[1]; + if ((!alignments[0]->mapped || !alignments[1]->mapped) && + (bestPairScore >= (numeric_limits::min()/2 - 1))) { + badAligned(); + return false; + } + RepeatMappingPosition *repeatPosition0; + RepeatMappingPosition *repeatPosition1; + RepeatMappingPosition *repeatFlag0; + RepeatMappingPosition *repeatFlag1; + bool forward[2]; + forward[0] = alignments[0]->forward; + forward[1] = alignments[1]->forward; + bool DNA = alignments[0]->DNA; + int score; + bool concordant; + for (int i = 0; i < alignments[0]->repeatPositions.size(); i++) { + repeatPosition0 = &alignments[0]->repeatPositions.positions[i]; + repeatFlag0 = repeatPosition0->flagInfoIndex==-1 ? repeatPosition0 : &alignments[0]->repeatPositions.positions[repeatPosition0->flagInfoIndex]; + for (int j = 0; j < alignments[1]->repeatPositions.size(); j++) { + repeatPosition1 = &alignments[1]->repeatPositions.positions[j]; + if (repeatPosition0->repeatChromosome == repeatPosition1->repeatChromosome) { + repeatFlag1 = repeatPosition1->flagInfoIndex==-1 ? repeatPosition1 : &alignments[1]->repeatPositions.positions[repeatPosition1->flagInfoIndex]; + if (DNA) { + score = Alignment::calculatePairScore_DNA(repeatPosition0->repeatLocation, + repeatFlag0->AS, + forward[0], + alignments[0]->readSequence.length(), + repeatPosition1->repeatLocation, + repeatFlag1->AS, + forward[1], + alignments[1]->readSequence.length(), + concordant); + } else { + score = Alignment::calculatePairScore_RNA(repeatPosition0->repeatLocation, + repeatFlag0->XM, + forward[0], + alignments[0]->readSequence.length(), + repeatPosition1->repeatLocation, + repeatFlag1->XM, + forward[1], + alignments[1]->readSequence.length(), + concordant); + } + if (score >= bestPairScore) { + positions.emplace_back(repeatPosition0, alignments[0], repeatPosition1, alignments[1]); + positions.back().pairScore = score; + if (score > bestPairScore) { + nBestPair = 1; + bestPairScore = score; + concordantExist = concordant; + } else { + nBestPair++; + } + } + } + } + } + return true; +} + +/** + * calculate the pairing score, + * if one of mate is repeat, calculate the pairing score by knn and append the pair has best pairing score to positions. + */ +bool MappingPositions::updatePairScore() { + if (!mateExist()) { return true; } + + assert(positions[index].alignments[0] != NULL); + assert(positions[index].alignments[1] != NULL); + + if (positions[index].alignments[0]->repeat || positions[index].alignments[1]->repeat) { + return updatePairScore_repeat(); + } else { + return updatePairScore_regular(); + } +} \ No newline at end of file diff --git a/position_3n.h b/position_3n.h new file mode 100644 index 0000000..d99c227 --- /dev/null +++ b/position_3n.h @@ -0,0 +1,358 @@ +/* + * Copyright 2020, Yun (Leo) Zhang + * + * This file is part of HISAT-3N. + * + * HISAT-3N is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT-3N is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT-3N. If not, see . + */ + +#ifndef HISAT2_POSITION_3N_H +#define HISAT2_POSITION_3N_H + +#include "sstring.h" +#include "alignment_3n.h" + +class Alignment; +class RepeatMappingPosition; + +/** + * the data structure to store existing mapping positions. + */ +class MappingPosition { +public: + long long int* locations[2] = {NULL}; + BTString* chromosome; + int AS = numeric_limits::min(); + int pairScore; // score to decide which mapping position should be output. + bool segmentExist[2] = {false}; // indicate whether we have the segment alignment information. + bool badAlignment = false; // if the alignment result + bool repeat = false; // whether the mapping position is belong to a expanded repeat alignment + Alignment* alignments[2] = {NULL}; + RepeatMappingPosition* repeats[2] = {NULL}; + + void initialize() { + for (int i = 0; i < 2; i++) { + locations[i] = NULL; + segmentExist[i] = false; + alignments[i] = NULL; + repeats[i] = NULL; + } + chromosome = NULL; + AS = numeric_limits::min(); + pairScore = numeric_limits::min(); + badAlignment = false; + repeat = false; + } + + MappingPosition() { + + } + + /** + * constructor for non-repeat alignment, or original repeat alignment( with chromosomeName = rep*). + */ + MappingPosition (Alignment* newAlignment); + + + /** + * constructor for expanded repeat alignment position. + */ + MappingPosition (RepeatMappingPosition* repeat0, Alignment* newAlignment0, RepeatMappingPosition* repeat1, Alignment* newAlignment1); + + /** + * return true if the MappingPosition has same information as input Alignment + */ + bool operator==(Alignment* o); +}; + +/** + * this is the data structure to store all MappingPosition + */ +class MappingPositions { +public: + vector positions; + int bestPairScore; // the best pairing score, for paired-end alignment output + int nBestPair; // the number of pair have bestPairScore, should equal to NH. + int bestAS; // the best AS score, for single-end alignment output. + int nBestSingle; // the number of alignment have bestAS, should equal to NH. + int index; // the index number on positions. should always point to the last or current MappingPosition. + Alignment* oppositeAlignment; // the temporary pointer point to the opposite mate's Alignment. use in append function. + bool concordantExist; // whether concordant alignment is exist. use for paired-end output statistics. + bool findBadAlignment; + + void initialize() { + positions.clear(); + bestPairScore = numeric_limits::min(); + nBestPair = 0; + bestAS = numeric_limits::min(); + nBestSingle = 0; + index = -1; + oppositeAlignment = NULL; + concordantExist = false; + findBadAlignment = false; + } + + MappingPositions() { + initialize(); + }; + + /** + * return number of MappingPosition in positions. + */ + int size() { + return positions.size(); + } + + /** + * recursively search the positions to find whether there is a Mapping position has target information. + * if the opposite mate is exist, we will save it's Alignment address to oppositeAlignment. + */ + bool findPosition (long long int* inputLocations[2], BTString& chromosome, int& pairSegment) { + oppositeAlignment = NULL; + findBadAlignment = false; + for (int i = 0; i < positions.size(); i++) { + if (positions[i].locations[1-pairSegment] == NULL || + *(positions[i].locations[1-pairSegment]) == *inputLocations[1-pairSegment]) { + if (!positions[i].badAlignment) { + oppositeAlignment = positions[i].alignments[1-pairSegment]; + } + if (*positions[i].locations[pairSegment] == *inputLocations[pairSegment] && + (*positions[i].chromosome == chromosome)) { + index = i; + if (positions[i].badAlignment) + { + findBadAlignment = true; + continue; + } + findBadAlignment = false; + return positions[i].segmentExist[pairSegment]; + } + } + } + return false; + } + + /** + * set current MappingPosition to a bad alignment. + */ + void badAligned() { + positions[index].badAlignment = true; + } + + /** + * return true if current positions is a bad alignment. + */ + bool isBad() { + return positions[index].badAlignment; + } + + /** + * return if both segment is exist for current MappingPosition + */ + bool mateExist() { + return positions[index].segmentExist[0] && positions[index].segmentExist[1]; + } + + /** + * calculate the pairing score, + * if one of mate is repeat, calculate the pairing score by knn and append the pair has best pairing score to positions. + */ + bool updatePairScore(); + + /** + * calculate the pairing score for regular (non-repeat) alignment. + */ + bool updatePairScore_regular(); + + /** + * calculate the pairing score for repeat alignment + * append to positions if the new pair has better (or equal) pairing score. + */ + bool updatePairScore_repeat(); + + /** + * redirect to updateAS_regular() or updateAS_repeat(). + */ + bool updateAS(); + + /** + * if AS is larger than bestAS, update bestAS. + */ + bool updateAS_regular(); + + /** + * if AS in repeatPosition is larger than bestAS, add it to positions and update BestAS. + */ + bool updateAS_repeat(); + + /** + * return true if the is a MappingPosition has same information as input Alignment. + * first check the latest MappingPosition, if not same, search all MappingPositions. + * both mate and opposite mate position should be same to MappingPosition to return true. + */ + bool positionExist(Alignment* newAlignment); + + /** + * return true if the is a MappingPosition has same information as position. + * this function is to check repeat mapping position. + * without checking it's mate, if the repeat mapping location is exist, return true. + */ + bool positionExist (BTString& chromosome, long long int& location, int& segment) { + for (int i = 0; i < positions.size(); i++) { + if ((*(positions[i].locations[segment]) == location) && + (*(positions[i].chromosome) == chromosome)) { + return true; + } + } + return false; + } + + /** + * output paired-end alignment results. + */ + void outputPair(BTString& o); + + /** + * output single-end alignment results. + */ + void outputSingle(BTString& o); + + /** + * append new Alignment to positions. + * return true if the new Alignment successfully append. + * return false if the new Alignment is exist or it's mate is bad aligned. + */ + bool append(Alignment* newAlignment); +}; + +/** + * the data structure to store repeat information. + */ +class RepeatMappingPosition: public MappingPosition { +public: + long long int repeatLocation; + BTString MD; + int XM; + int NM; + int YS; + int Yf; + int Zf; + char YZ; + BTString refSequence; + BTString repeatChromosome; + bool outputted = false; + int flagInfoIndex = -1; + + RepeatMappingPosition() {}; + + /** + * constructor for new repeat information. + */ + RepeatMappingPosition (long long int& inputLocation, + BTString& inputChromosome, + BTString &inputRefSequence, + int &inputAS, + BTString &inputMD, + int &inputXM, + int &inputNM, + int &inputYf, + int &inputZf, + char &repeatYZ) { + repeatLocation = inputLocation; + repeatChromosome = inputChromosome; + refSequence = inputRefSequence; + AS = inputAS; + MD = inputMD; + XM = inputXM; + NM = inputNM; + Yf = inputYf; + Zf = inputZf; + YZ = repeatYZ; + pairScore = numeric_limits::min(); + flagInfoIndex = -1; + } + + /** + * constructor for the repeat which has same reference sequence. + * we save the index for pattern RepeatMappingPosition, because they should have same information except location and chromosome. + */ + RepeatMappingPosition(long long int &inputLocation, + BTString &inputChromosome, + int& inputAS, + int& index) { + repeatLocation = inputLocation; + repeatChromosome = inputChromosome; + AS = inputAS; + flagInfoIndex = index; + } +}; + +/** + * this is the data structure to store all repeatMappingPosition after expansion. + */ +class RepeatMappingPositions { +public: + vector positions; + + void initialize() { + positions.clear(); + } + + /** + * return number of MappingPosition in positions. + */ + int size() { + return positions.size(); + } + + /** + * return true if reference sequence is exist, else, return false. + */ + bool sequenceExist (BTString& refSequence, int &index) { + + for (int i = 0; i < positions.size(); i++) { + if ((positions[i].flagInfoIndex == -1) && (refSequence == positions[i].refSequence)) { + index = i; + return true; + } + } + return false; + } + + /** + * add repeat mapping information. + */ + void append (long long int &location, + BTString &chromosome, + BTString &refSequence, + int &AS, + BTString &MD, + int &XM, + int &NM, + int &Yf, + int &Zf, + char &repeatYZ) { + positions.emplace_back(location, chromosome, refSequence, AS, MD, XM, NM, Yf, Zf, repeatYZ); + } + + /** + * add repeat mapping information. + */ + void append(BTString &chromosome, long long int &location, int &index) { + positions.emplace_back(location, chromosome, positions[index].AS, index); + } +}; + +#endif //HISAT2_POSITION_3N_H + diff --git a/position_3n_table.h b/position_3n_table.h new file mode 100644 index 0000000..2a0c99d --- /dev/null +++ b/position_3n_table.h @@ -0,0 +1,549 @@ +/* + * Copyright 2020, Yun (Leo) Zhang + * + * This file is part of HISAT-3N. + * + * HISAT-3N is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT-3N is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT-3N. If not, see . + */ + +#ifndef POSITION_3N_TABLE_H +#define POSITION_3N_TABLE_H + +#include +#include +#include +#include +#include +#include +#include "alignment_3n_table.h" + +using namespace std; + +extern bool CG_only; +extern long long int loadingBlockSize; + +/** + * store unique information for one base information with readID, and the quality. + */ +class uniqueID +{ +public: + unsigned long long readNameID; + bool isConverted; + char quality; + bool removed; + + uniqueID(unsigned long long InReadNameID, + bool InIsConverted, + char& InQual){ + readNameID = InReadNameID; + isConverted = InIsConverted; + quality = InQual; + removed = false; + } +}; + +/** + * basic class to store reference position information + */ +class Position{ + mutex mutex_; +public: + string chromosome; // reference chromosome name + long long int location; // 1-based position + char strand; // +(REF) or -(REF-RC) + string convertedQualities; // each char is a mapping quality on this position for converted base. + string unconvertedQualities; // each char is a mapping quality on this position for unconverted base. + vector uniqueIDs; // each value represent a readName which contributed the base information. + // readNameIDs is to make sure no read contribute 2 times in same position. + + void initialize() { + chromosome.clear(); + location = -1; + strand = '?'; + convertedQualities.clear(); + unconvertedQualities.clear(); + vector().swap(uniqueIDs); + } + + Position(){ + initialize(); + }; + + /** + * return true if there is mapping information in this reference position. + */ + bool empty() { + return convertedQualities.empty() && unconvertedQualities.empty(); + } + + /** + * set the chromosome, location (position), and strand information. + */ + + void set (string& inputChr, long long int inputLoc) { + chromosome = inputChr; + location = inputLoc + 1; + } + + void set(char inputStrand) { + strand = inputStrand; + } + + /** + * binary search of readNameID in readNameIDs. + * always return a index. + * if cannot find, return the index which has bigger value than input readNameID. + */ + int searchReadNameID (unsigned long long&readNameID, int start, int end) { + if (uniqueIDs.empty()) { + return 0; + } + if (start <= end) { + int middle = (start + end) / 2; + if (uniqueIDs[middle].readNameID == readNameID) { + return middle; + } + if (uniqueIDs[middle].readNameID > readNameID) { + return searchReadNameID(readNameID, start, middle-1); + } + return searchReadNameID(readNameID, middle+1, end); + } + return start; // return the bigger one + } + + + /** + * with a input readNameID, add it into readNameIDs. + * if the input readNameID already exist in readNameIDs, return false. + */ + bool appendReadNameID(PosQuality& InBase, Alignment& InAlignment) { + int idCount = uniqueIDs.size(); + if (idCount == 0 || InAlignment.readNameID > uniqueIDs.back().readNameID) { + uniqueIDs.emplace_back(InAlignment.readNameID, InBase.converted, InBase.qual); + return true; + } + int index = searchReadNameID(InAlignment.readNameID, 0, idCount); + if (uniqueIDs[index].readNameID == InAlignment.readNameID) { + // if the new base is consistent with exist base's conversion status, ignore + // otherwise, delete the exist conversion status + if (uniqueIDs[index].removed) { + return false; + } + if (uniqueIDs[index].isConverted != InBase.converted) { + uniqueIDs[index].removed = true; + if (uniqueIDs[index].isConverted) { + for (int i = 0; i < convertedQualities.size(); i++) { + if (convertedQualities[i] == InBase.qual) { + convertedQualities.erase(convertedQualities.begin()+i); + return false; + } + } + } else { + for (int i = 0; i < unconvertedQualities.size(); i++) { + if (unconvertedQualities[i] == InBase.qual) { + unconvertedQualities.erase(unconvertedQualities.begin()+i); + return false; + } + } + } + } + return false; + } else { + uniqueIDs.emplace(uniqueIDs.begin()+index, InAlignment.readNameID, InBase.converted, InBase.qual); + return true; + } + } + + /** + * append the SAM information into this position. + */ + void appendBase (PosQuality& input, Alignment& a) { + mutex_.lock(); + if (appendReadNameID(input,a)) { + if (input.converted) { + convertedQualities += input.qual; + } else { + unconvertedQualities += input.qual; + } + } + mutex_.unlock(); + } +}; + +/** + * store all reference position in this class. + */ +class Positions{ +public: + vector refPositions; // the pool of all current reference position. + string chromosome; // current reference chromosome name. + long long int location; // current location (position) in reference chromosome. + char lastBase = 'X'; // the last base of reference line. this is for CG_only mode. + SafeQueue linePool; // pool to store unprocessed SAM line. + SafeQueue freeLinePool; // pool to store free string pointer for SAM line. + SafeQueue freePositionPool; // pool to store free position pointer for reference position. + SafeQueue outputPositionPool; // pool to store the reference position which is loaded and ready to output. + bool working; + mutex mutex_; + long long int refCoveredPosition; // this is the last position in reference chromosome we loaded in refPositions. + ifstream refFile; + vector workerLock; // one lock for one worker thread. + int nThreads = 1; + ChromosomeFilePositions chromosomePos; // store the chromosome name and it's streamPos. To quickly find new chromosome in file. + bool addedChrName = false; + bool removedChrName = false; + + Positions(string inputRefFileName, int inputNThreads, bool inputAddedChrName, bool inputRemovedChrName) { + working = true; + nThreads = inputNThreads; + addedChrName = inputAddedChrName; + removedChrName = inputRemovedChrName; + for (int i = 0; i < nThreads; i++) { + workerLock.push_back(new mutex); + } + refFile.open(inputRefFileName, ios_base::in); + LoadChromosomeNamesPos(); + } + + ~Positions() { + for (int i = 0; i < workerLock.size(); i++) { + delete workerLock[i]; + } + Position* pos; + while(freePositionPool.popFront(pos)) { + delete pos; + } + } + + /** + * given the target Position output the corresponding position index in refPositions. + */ + int getIndex(long long int &targetPos) { + int firstPos = refPositions[0]->location; + return targetPos - firstPos; + } + + /** + * given reference line (start with '>'), extract the chromosome information. + * this is important when there is space in chromosome name. the SAM information only contain the first word. + */ + string getChrName(string& inputLine) { + string name; + for (int i = 1; i < inputLine.size(); i++) + { + char c = inputLine[i]; + if (isspace(c)){ + break; + } + name += c; + } + + if(removedChrName) { + if(name.find("chr") == 0) { + name = name.substr(3); + } + } else if(addedChrName) { + if(name.find("chr") != 0) { + name = string("chr") + name; + } + } + return name; + } + + + /** + * Scan the reference file. Record each chromosome and its position in file. + */ + void LoadChromosomeNamesPos() { + string line; + while (refFile.good()) { + getline(refFile, line); + if (line.front() == '>') { // this line is chromosome name + chromosome = getChrName(line); + streampos currentPos = refFile.tellg(); + chromosomePos.append(chromosome, currentPos); + } + } + chromosomePos.sort(); + chromosome.clear(); + } + + /** + * get a fasta line (not header), append the bases to positions. + */ + void appendRefPosition(string& line) { + Position* newPos; + // check the base one by one + char* b; + for (int i = 0; i < line.size(); i++) { + getFreePosition(newPos); + newPos->set(chromosome, location+i); + b = &line[i]; + if (CG_only) { + if (lastBase == 'C' && *b == 'G') { + refPositions.back()->set('+'); + newPos->set('-'); + } + } else { + if (*b == convertFrom) { + newPos->set('+'); + } else if (*b == convertFromComplement) { + newPos->set('-'); + } + } + refPositions.push_back(newPos); + lastBase = *b; + } + location += line.size(); + } + + /** + * if we can go through all the workerLock, that means no worker is appending new position. + */ + void appendingFinished() { + for (int i = 0; i < nThreads; i++) { + workerLock[i]->lock(); + workerLock[i]->unlock(); + } + } + + /** + * the output function for output thread. + */ + void outputFunction(string outputFileName) { + ostream* out_ = &cout; + out_ = &cout; + ofstream tableFile; + if (!outputFileName.empty()) { + tableFile.open(outputFileName, ios_base::out); + out_ = &tableFile; + } + + *out_ << "ref\tpos\tstrand\tconvertedBaseQualities\tconvertedBaseCount\tunconvertedBaseQualities\tunconvertedBaseCount\n"; + Position* pos; + while (working) { + if (outputPositionPool.popFront(pos)) { + *out_ << pos->chromosome << '\t' + << to_string(pos->location) << '\t' + << pos->strand << '\t' + << pos->convertedQualities << '\t' + << to_string(pos->convertedQualities.size()) << '\t' + << pos->unconvertedQualities << '\t' + << to_string(pos->unconvertedQualities.size()) << '\n'; + returnPosition(pos); + } else { + this_thread::sleep_for (std::chrono::microseconds(1)); + } + } + tableFile.close(); + } + + /** + * move the position which position smaller than refCoveredPosition - loadingBlockSize, output it. + */ + void moveBlockToOutput() { + if (refPositions.empty()) { + return; + } + int index; + for (index = 0; index < refPositions.size(); index++) { + if (refPositions[index]->location < refCoveredPosition - loadingBlockSize) { + if (refPositions[index]->empty() || refPositions[index]->strand == '?') { + returnPosition(refPositions[index]); + } else { + outputPositionPool.push(refPositions[index]); + } + } else { + break; + } + } + if (index != 0) { + refPositions.erase(refPositions.begin(), refPositions.begin()+index); + } + } + + /** + * move all the refPosition into output pool. + */ + void moveAllToOutput() { + if (refPositions.empty()) { + return; + } + for (int index = 0; index < refPositions.size(); index++) { + if (refPositions[index]->empty() || refPositions[index]->strand == '?') { + returnPosition(refPositions[index]); + } else { + vector().swap(refPositions[index]->uniqueIDs); + outputPositionPool.push(refPositions[index]); + } + } + refPositions.clear(); + } + + /** + * initially load reference sequence for 2 million bp + */ + void loadNewChromosome(string targetChromosome) { + refFile.clear(); + // find the start position in file based on chromosome name. + streampos startPos = chromosomePos.getChromosomePosInRefFile(targetChromosome); + chromosome = targetChromosome; + refFile.seekg(startPos, ios::beg); + refCoveredPosition = 2 * loadingBlockSize; + string line; + lastBase = 'X'; + location = 0; + while (refFile.good()) { + getline(refFile, line); + if (line.front() == '>') { // this line is chromosome name + return; // meet next chromosome, return it. + } else { + if (line.empty()) { continue; } + // change all base to upper case + for (int i = 0; i < line.size(); i++) { + line[i] = toupper(line[i]); + } + appendRefPosition(line); + if (location >= refCoveredPosition) { + return; + } + } + } + } + + /** + * load more Position (loadingBlockSize bp) to positions + * if we meet next chromosome, return false. Else, return ture. + */ + void loadMore() { + refCoveredPosition += loadingBlockSize; + string line; + while (refFile.good()) { + getline(refFile, line); + if (line.front() == '>') { // meet next chromosome, return. + return ; + } else { + if (line.empty()) { continue; } + + // change all base to upper case + for (int i = 0; i < line.size(); i++) { + line[i] = toupper(line[i]); + } + + appendRefPosition(line); + if (location >= refCoveredPosition) { + return ; + } + } + } + } + + + /** + * add position information from Alignment into ref position. + */ + void appendPositions(Alignment& newAlignment) { + if (!newAlignment.mapped || newAlignment.bases.empty()) { + return; + } + long long int startPos = newAlignment.location; // 1-based position + // find the first reference position in pool. + int index = getIndex(newAlignment.location); + + for (int i = 0; i < newAlignment.sequence.size(); i++) { + PosQuality* b = &newAlignment.bases[i]; + if (b->remove) { + continue; + } + + Position* pos = refPositions[index+b->refPos]; + assert (pos->location == startPos + b->refPos); + + if (pos->strand == '?') { + // this is for CG-only mode. read has a 'C' or 'G' but not 'CG'. + continue; + } + pos->appendBase(newAlignment.bases[i], newAlignment); + } + } + + /** + * get a string pointer from freeLinePool, if freeLinePool is empty, make a new string pointer. + */ + void getFreeStringPointer(string*& newLine) { + if (freeLinePool.popFront(newLine)) { + return; + } else { + newLine = new string(); + } + } + + /** + * get a Position pointer from freePositionPool, if freePositionPool is empty, make a new Position pointer. + */ + void getFreePosition(Position*& newPosition) { + while (outputPositionPool.size() >= 10000) { + this_thread::sleep_for (std::chrono::microseconds(1)); + } + if (freePositionPool.popFront(newPosition)) { + return; + } else { + newPosition = new Position(); + } + } + + /** + * return the line to freeLinePool + */ + void returnLine(string* line) { + line->clear(); + freeLinePool.push(line); + } + + /** + * return the position to freePositionPool. + */ + void returnPosition(Position* pos) { + pos->initialize(); + freePositionPool.push(pos); + } + + /** + * this is the working function. + * it take the SAM line from linePool, parse it. + */ + void append(int threadID) { + string* line; + Alignment newAlignment; + + while (working) { + workerLock[threadID]->lock(); + if(!linePool.popFront(line)) { + workerLock[threadID]->unlock(); + this_thread::sleep_for (std::chrono::nanoseconds(1)); + continue; + } + while (refPositions.empty()) { + this_thread::sleep_for (std::chrono::microseconds(1)); + } + newAlignment.parse(line); + returnLine(line); + appendPositions(newAlignment); + workerLock[threadID]->unlock(); + } + } +}; + +#endif //POSITION_3N_TABLE_H diff --git a/presets.cpp b/presets.cpp new file mode 100644 index 0000000..9fef89c --- /dev/null +++ b/presets.cpp @@ -0,0 +1,87 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include +#include "presets.h" +#include "opts.h" + +using namespace std; + +void PresetsV0::apply( + const std::string& preset, + std::string& policy, + EList >& opts) +{ + // Presets: Same as: + // For --end-to-end: + // --very-fast -M 5 -R 1 -N 0 -L 22 -i S,1,2.50 + // --fast -M 10 -R 2 -N 0 -L 22 -i S,1,2.50 + // --sensitive -M 15 -R 2 -N 0 -L 22 -i S,1,1.15 + // --very-sensitive -M 25 -R 3 -N 0 -L 19 -i S,1,0.50 + if(preset == "very-fast") { + policy += ";SEED=0,22"; + policy += ";DPS=5"; + policy += ";ROUNDS=1"; + policy += ";IVAL=S,0,2.50"; + } else if(preset == "fast") { + policy += ";SEED=0,22"; + policy += ";DPS=10"; + policy += ";ROUNDS=2"; + policy += ";IVAL=S,0,2.50"; + } else if(preset == "sensitive") { + policy += ";SEED=0,22"; + policy += ";DPS=15"; + policy += ";ROUNDS=2"; + policy += ";IVAL=S,1,1.15"; + } else if(preset == "very-sensitive") { + policy += ";SEED=0,20"; + policy += ";DPS=20"; + policy += ";ROUNDS=3"; + policy += ";IVAL=S,1,0.50"; + } + // For --local: + // --very-fast-local -M 1 -N 0 -L 25 -i S,1,2.00 + // --fast-local -M 2 -N 0 -L 22 -i S,1,1.75 + // --sensitive-local -M 2 -N 0 -L 20 -i S,1,0.75 (default) + // --very-sensitive-local -M 3 -N 0 -L 20 -i S,1,0.50 + else if(preset == "very-fast-local") { + policy += ";SEED=0,25"; + policy += ";DPS=5"; + policy += ";ROUNDS=1"; + policy += ";IVAL=S,1,2.00"; + } else if(preset == "fast-local") { + policy += ";SEED=0,22"; + policy += ";DPS=10"; + policy += ";ROUNDS=2"; + policy += ";IVAL=S,1,1.75"; + } else if(preset == "sensitive-local") { + policy += ";SEED=0,20"; + policy += ";DPS=15"; + policy += ";ROUNDS=2"; + policy += ";IVAL=S,1,0.75"; + } else if(preset == "very-sensitive-local") { + policy += ";SEED=0,20"; + policy += ";DPS=20"; + policy += ";ROUNDS=3"; + policy += ";IVAL=S,1,0.50"; + } + else { + cerr << "Unknown preset: " << preset.c_str() << endl; + } +} diff --git a/presets.h b/presets.h new file mode 100644 index 0000000..dfcec41 --- /dev/null +++ b/presets.h @@ -0,0 +1,67 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +/** + * presets.h + * + * Maps simple command-line options to more complicated combinations of + * options for ease-of-use. + */ + +#ifndef PRESETS_H_ +#define PRESETS_H_ + +#include +#include +#include "ds.h" + +class Presets { +public: + + Presets() { } + + virtual ~Presets() { } + + virtual void apply( + const std::string& preset, + std::string& policy, + EList >& opts) = 0; + + virtual const char * name() = 0; +}; + +/** + * Initial collection of presets: 8/14/2011 prior to first Bowtie 2 release. + */ +class PresetsV0 : public Presets { +public: + + PresetsV0() : Presets() { } + + virtual ~PresetsV0() { } + + virtual void apply( + const std::string& preset, + std::string& policy, + EList >& opts); + + virtual const char * name() { return "V0"; } +}; + +#endif /*ndef PRESETS_H_*/ diff --git a/processor_support.h b/processor_support.h new file mode 100644 index 0000000..e731e00 --- /dev/null +++ b/processor_support.h @@ -0,0 +1,73 @@ +#ifndef PROCESSOR_SUPPORT_H_ +#define PROCESSOR_SUPPORT_H_ + +// Utility class ProcessorSupport provides POPCNTenabled() to determine +// processor support for POPCNT instruction. It uses CPUID to +// retrieve the processor capabilities. +// for Intel ICC compiler __cpuid() is an intrinsic +// for Microsoft compiler __cpuid() is provided by #include +// for GCC compiler __get_cpuid() is provided by #include + +// Intel compiler defines __GNUC__, so this is needed to disambiguate + +#if defined(__INTEL_COMPILER) +# define USING_INTEL_COMPILER +#elif defined(__GNUC__) +# define USING_GCC_COMPILER +# include +#elif defined(_MSC_VER) +// __MSC_VER defined by Microsoft compiler +#define USING_MSC_COMPILER +#endif + +struct regs_t {unsigned int EAX, EBX, ECX, EDX;}; +#define BIT(n) ((1< + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +/// An array that transforms Phred qualities into their maq-like +/// equivalents by dividing by ten and rounding to the nearest 10, +/// but saturating at 3. +unsigned char qualRounds[] = { + 0, 0, 0, 0, 0, // 0 - 4 + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 5 - 14 + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 15 - 24 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 25 - 34 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 35 - 44 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 45 - 54 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 55 - 64 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 65 - 74 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 75 - 84 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 85 - 94 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 95 - 104 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 105 - 114 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 115 - 124 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 125 - 134 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 135 - 144 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 145 - 154 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 155 - 164 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 165 - 174 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 175 - 184 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 185 - 194 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 195 - 204 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 205 - 214 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 215 - 224 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 225 - 234 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 235 - 244 + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, // 245 - 254 + 30 // 255 +}; + +/** + * Lookup table for converting from Solexa-scaled (log-odds) quality + * values to Phred-scaled quality values. + */ +unsigned char solToPhred[] = { + /* -10 */ 0, 1, 1, 1, 1, 1, 1, 2, 2, 3, + /* 0 */ 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, + /* 10 */ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + /* 20 */ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + /* 30 */ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + /* 40 */ 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + /* 50 */ 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + /* 60 */ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + /* 70 */ 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + /* 80 */ 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + /* 90 */ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + /* 100 */ 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + /* 110 */ 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, + /* 120 */ 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, + /* 130 */ 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, + /* 140 */ 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, + /* 150 */ 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, + /* 160 */ 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, + /* 170 */ 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, + /* 180 */ 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, + /* 190 */ 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, + /* 200 */ 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, + /* 210 */ 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, + /* 220 */ 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, + /* 230 */ 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, + /* 240 */ 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, + /* 250 */ 250, 251, 252, 253, 254, 255 +}; diff --git a/qual.h b/qual.h new file mode 100644 index 0000000..7c542bb --- /dev/null +++ b/qual.h @@ -0,0 +1,236 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef QUAL_H_ +#define QUAL_H_ + +#include +#include +#include "search_globals.h" +#include "sstring.h" + +extern unsigned char qualRounds[]; +extern unsigned char solToPhred[]; + +/// Translate a Phred-encoded ASCII character into a Phred quality +static inline uint8_t phredcToPhredq(char c) { + return ((uint8_t)c >= 33 ? ((uint8_t)c - 33) : 0); +} + +/** + * Convert a Solexa-scaled quality value into a Phred-scale quality + * value. + * + * p = probability that base is miscalled + * Qphred = -10 * log10 (p) + * Qsolexa = -10 * log10 (p / (1 - p)) + * See: http://en.wikipedia.org/wiki/FASTQ_format + * + */ +static inline uint8_t solexaToPhred(int sol) { + assert_lt(sol, 256); + if(sol < -10) return 0; + return solToPhred[sol+10]; +} + +class SimplePhredPenalty { +public: + static uint8_t mmPenalty (uint8_t qual) { + return qual; + } + static uint8_t delPenalty(uint8_t qual) { + return qual; + } + static uint8_t insPenalty(uint8_t qual_left, uint8_t qual_right) { + return std::max(qual_left, qual_right); + } +}; + +class MaqPhredPenalty { +public: + static uint8_t mmPenalty (uint8_t qual) { + return qualRounds[qual]; + } + static uint8_t delPenalty(uint8_t qual) { + return qualRounds[qual]; + } + static uint8_t insPenalty(uint8_t qual_left, uint8_t qual_right) { + return qualRounds[std::max(qual_left, qual_right)]; + } +}; + +static inline uint8_t mmPenalty(bool maq, uint8_t qual) { + if(maq) { + return MaqPhredPenalty::mmPenalty(qual); + } else { + return SimplePhredPenalty::mmPenalty(qual); + } +} + +static inline uint8_t delPenalty(bool maq, uint8_t qual) { + if(maq) { + return MaqPhredPenalty::delPenalty(qual); + } else { + return SimplePhredPenalty::delPenalty(qual); + } +} + +static inline uint8_t insPenalty(bool maq, uint8_t qual_left, uint8_t qual_right) { + if(maq) { + return MaqPhredPenalty::insPenalty(qual_left, qual_right); + } else { + return SimplePhredPenalty::insPenalty(qual_left, qual_right); + } +} + +/** + * Take an ASCII-encoded quality value and convert it to a Phred33 + * ASCII char. + */ +inline static char charToPhred33(char c, bool solQuals, bool phred64Quals) { + using namespace std; + if(c == ' ') { + std::cerr << "Saw a space but expected an ASCII-encoded quality value." << endl + << "Are quality values formatted as integers? If so, try --integer-quals." << endl; + throw 1; + } + if (solQuals) { + // Convert solexa-scaled chars to phred + // http://maq.sourceforge.net/fastq.shtml + char cc = solexaToPhred((int)c - 64) + 33; + if (cc < 33) { + std::cerr << "Saw ASCII character " + << ((int)c) + << " but expected 64-based Solexa qual (converts to " << (int)cc << ")." << endl + << "Try not specifying --solexa-quals." << endl; + throw 1; + } + c = cc; + } + else if(phred64Quals) { + if (c < 64) { + cerr << "Saw ASCII character " + << ((int)c) + << " but expected 64-based Phred qual." << endl + << "Try not specifying --solexa1.3-quals/--phred64-quals." << endl; + throw 1; + } + // Convert to 33-based phred + c -= (64-33); + } + else { + // Keep the phred quality + if (c < 33) { + cerr << "Saw ASCII character " + << ((int)c) + << " but expected 33-based Phred qual." << endl; + throw 1; + } + } + return c; +} + +/** + * Take an integer quality value and convert it to a Phred33 ASCII + * char. + */ +inline static char intToPhred33(int iQ, bool solQuals) { + using namespace std; + int pQ; + if (solQuals) { + // Convert from solexa quality to phred + // quality and translate to ASCII + // http://maq.sourceforge.net/qual.shtml + pQ = solexaToPhred((int)iQ) + 33; + } else { + // Keep the phred quality and translate + // to ASCII + pQ = (iQ <= 93 ? iQ : 93) + 33; + } + if (pQ < 33) { + cerr << "Saw negative Phred quality " << ((int)pQ-33) << "." << endl; + throw 1; + } + assert_geq(pQ, 0); + return (int)pQ; +} + +inline static uint8_t roundPenalty(uint8_t p) { + if(gNoMaqRound) return p; + return qualRounds[p]; +} + +/** + * Fill the q[] array with the penalties that are determined by + * subtracting the quality values of the alternate basecalls from + * the quality of the primary basecall. + */ +inline static uint8_t penaltiesAt(size_t off, uint8_t *q, + int alts, + const BTString& qual, + const BTDnaString *altQry, + const BTString *altQual) +{ + uint8_t primQ = qual[off]; // qual of primary call + uint8_t bestPenalty = roundPenalty(phredcToPhredq(primQ)); + // By default, any mismatch incurs a penalty equal to the quality + // of the called base + q[0] = q[1] = q[2] = q[3] = bestPenalty; + for(int i = 0; i < alts; i++) { + uint8_t altQ = altQual[i][off]; // qual of alt call + if(altQ == 33) break; // no alt call + assert_leq(altQ, primQ); + uint8_t pen = roundPenalty(primQ - altQ); + if(pen < bestPenalty) { + bestPenalty = pen; + } + // Get the base + int altC = (int)altQry[i][off]; + assert_lt(altC, 4); + q[altC] = pen; + } + // Return the best penalty so that the caller can evaluate whether + // any of the penalties are within-budget + return bestPenalty; +} + +/** + * Fill the q[] array with the penalties that are determined by + * subtracting the quality values of the alternate basecalls from + * the quality of the primary basecall. + */ +inline static uint8_t loPenaltyAt(size_t off, int alts, + const BTString& qual, + const BTString *altQual) +{ + uint8_t primQ = qual[off]; // qual of primary call + uint8_t bestPenalty = roundPenalty(phredcToPhredq(primQ)); + for(int i = 0; i < alts; i++) { + uint8_t altQ = altQual[i][off]; // qual of alt call + if(altQ == 33) break; // no more alt calls at this position + assert_leq(altQ, primQ); + uint8_t pen = roundPenalty(primQ - altQ); + if(pen < bestPenalty) { + bestPenalty = pen; + } + } + return bestPenalty; +} + +#endif /*QUAL_H_*/ diff --git a/radix_sort.h b/radix_sort.h new file mode 100644 index 0000000..1b1e1e8 --- /dev/null +++ b/radix_sort.h @@ -0,0 +1,297 @@ +#ifndef RADIX_SORT_H_ +#define RADIX_SORT_H_ + +#include + +// in place radix sort using a single thread, should not be called directly +// used for leaves of both in and out of place radix sorts +template +static void _radix_sort(T* begin, T* end, index_t (*hash)(T&), int log_size) { + const int SHIFT = 8; + const int BLOCKS = (1 << (SHIFT + 1)); + const int BLOCKS_MASK = BLOCKS - 1; + + // compute maximum of log_size - 7 and 0 + int right_shift = (log_size - SHIFT) * (log_size > SHIFT); + // count number in each bin + index_t count[BLOCKS] = {0}; + for(T* curr = begin; curr != end; curr++) { + count[(hash(*curr) >> right_shift) & BLOCKS_MASK]++; + } + // sum numbers to create an index + T* index[BLOCKS + 1]; + T* place[BLOCKS]; + index[0] = place[0] = begin; + for(int i = 1; i < BLOCKS; i++) { + index[i] = place[i] = index[i - 1] + count[i - 1]; + } + index[BLOCKS] = end; + //put objects in proper place + for(int bin = 0; bin < BLOCKS; bin++) { + while(place[bin] != index[bin + 1]) { + T curr = *place[bin]; + int x = (hash(curr) >> right_shift) & BLOCKS_MASK; + while(x != bin) { + T temp = *place[x]; + *place[x]++ = curr; + curr = temp; + x = (hash(curr) >> right_shift) & BLOCKS_MASK; + } + *place[bin]++ = curr; + } + } + //sort partitions + for(int bin = 0; bin < BLOCKS; bin++) { + if(index[bin + 1] - index[bin] > 64 && right_shift) { + _radix_sort(index[bin], index[bin + 1], hash, right_shift); + } else if (index[bin + 1] - index[bin] > 1) { + sort(index[bin], index[bin + 1], CMP()); + } + } +} + +template +struct RecurseParams { + index_t (*hash)(T&); + T** begin; + int log_size; + int num; +}; + +//basically used to wrap together calls to bin_sort +template +static void _radix_sort_worker(void* vp) { + RecurseParams* params = (RecurseParams*)vp; + index_t (*hash)(T&) = params->hash; + T** begin = params->begin; + int log_size = params->log_size; + int num = params->num; + for(int i = 0; i < num; i++) { + if(begin[i + 1] - begin[i] > 1) + _radix_sort(begin[i], begin[i + 1], hash, log_size); + } +} + +template +void radix_sort_in_place(T* begin, T* end, index_t (*hash)(T&), index_t maxv, int nthreads = 1) { + const int SHIFT = 8; + const int BLOCKS = (1 << (SHIFT + 1)); + + int log_size = sizeof(maxv) * 8; + while(!((1 << log_size) & maxv)) log_size--; + int right_shift = log_size - SHIFT; + + // {(maxv >> right_shift) + 1 <= BLOCKS}, + int occupied = (maxv >> right_shift) + 1; + time_t start = time(0); + // count number in each bin + index_t count[BLOCKS] = {0}; + for(T* curr = begin; curr != end; curr++) { + count[hash(*curr) >> right_shift]++; + } + + // sum numbers to create an index + T* index[BLOCKS + 1]; + T* place[BLOCKS]; + index[0] = place[0] = begin; + for(int i = 1; i < occupied; i++) { + index[i] = place[i] = index[i - 1] + count[i - 1]; + } + index[occupied] = end; + if(nthreads != 1) cerr << "COUNT NUMBER IN EACH BIN: " << time(0) - start << endl; + start = time(0); + //put objects in proper place + for(int bin = 0; bin < occupied; bin++) { + while(place[bin] != index[bin + 1]) { + T curr = *place[bin]; + int x = hash(curr) >> right_shift; + while(x != bin) { // switched inner loop here, removed branch statement + T temp = *place[x]; + *place[x]++ = curr; + curr = temp; + x = hash(curr) >> right_shift; + } + *place[bin]++ = curr; + } + } + if(nthreads != 1) cerr << "PLACE IN CORRECT BIN: " << time(0) - start << endl; + start = time(0); + //sort partitions + if(nthreads == 1) { + for(int bin = 0; bin < occupied; bin++) { + if(index[bin + 1] - index[bin] > 1) _radix_sort(index[bin], index[bin + 1], hash, right_shift); + } + } else { + AutoArray threads(nthreads); + EList > params; params.resizeExact(nthreads); + int st = 0; + for(int i = 0; i < nthreads; i++) { + params[i].hash = hash; + params[i].begin = index + st; + params[i].log_size = right_shift; + params[i].num = occupied / nthreads; + threads[i] = new tthread::thread(&_radix_sort_worker, (void*)¶ms[i]); + st += params[i].num; + } + //do any remaining bins using main thread + for(int bin = st; bin < occupied; bin++) { + if(index[bin + 1] - index[bin] > 1) _radix_sort(index[bin], index[bin + 1], hash, right_shift); + } + for(int i = 0; i < nthreads; i++) { + threads[i]->join(); + } + } + if(nthreads != 1) cerr << "FINISHED RECURSIVE SORTS: " << time(0) - start << endl; +} + +template +struct CountParams { + T* begin; + T* end; + T* o; + index_t* count; + index_t (*hash)(T&); + int occupied; + int right_shift; +}; + +template +static void _count_worker(void* vp) { + CountParams* params = (CountParams*)vp; + T* begin = params->begin; + T* end = params->end; + index_t (*hash)(T&) = params->hash; + int occupied = params->occupied; + int right_shift = params->right_shift; + + params->count = new index_t[occupied + 1](); + for(T* curr = begin; curr != end; curr++) { + params->count[hash(*curr) >> right_shift]++; + } +} +template +static void _write_worker(void* vp) { + CountParams* params = (CountParams*)vp; + T* begin = params->begin; + T* end = params->end; + T* o = params->o; + index_t* count = params->count; + index_t (*hash)(T&) = params->hash; + int right_shift = params->right_shift; + + for(T* curr = begin; curr != end; curr++) { + o[count[hash(*curr) >> right_shift]++] = *curr; + } +} + +template +void radix_sort_copy(T* begin, T* end, T* o, index_t (*hash)(T&), index_t maxv, int nthreads = 1) { + //set parameters + const int SHIFT = 8; + const int BLOCKS = (1 << (SHIFT + 1)); + int log_size = sizeof(maxv) * 8; + while(!((1 << log_size) & maxv)) log_size--; + int right_shift = log_size - SHIFT; + int occupied = (maxv >> right_shift) + 1; + //count nodes + time_t start = time(0); + EList > cparams; cparams.resizeExact(nthreads); + AutoArray threads1(nthreads); + T* st = begin; + T* en = st + (end - begin) / nthreads; + for(int i = 0; i < nthreads; i++) { + cparams[i].begin = st; + cparams[i].end = en; + cparams[i].hash = hash; + cparams[i].o = o; + cparams[i].occupied = occupied; + cparams[i].right_shift= right_shift; + if(nthreads == 1) { + _count_worker((void*)&cparams[i]); + } else { + threads1[i] = new tthread::thread(&_count_worker, (void*)&cparams[i]); + } + st = en; + if(i + 2 == nthreads) { + en = end; + } else { + en = st + (end - begin) / nthreads; + } + } + if(nthreads > 1) { + for(int i = 0; i < nthreads; i++) { + threads1[i]->join(); + delete threads1[i]; + } + } + if(nthreads != 1) cerr << "COUNT NUMBER IN EACH BIN: " << time(0) - start << endl; + start = time(0); + //transform counts into index + index_t tot = cparams[0].count[0]; + cparams[0].count[0] = 0; + for(int i = 1; i < nthreads; i++) { + tot += cparams[i].count[0]; + cparams[i].count[0] = tot - cparams[i].count[0]; + } + for(int j = 1; j < occupied + 1; j++) { + for(int i = 0; i < nthreads; i++) { + tot += cparams[i].count[j]; + cparams[i].count[j] = tot - cparams[i].count[j]; + } + } + T* index[BLOCKS + 1]; + for(int i = 0; i < occupied + 1; i++) { + index[i] = o + cparams[0].count[i]; + } + //write T's to correct bin + if(nthreads == 1) { + _write_worker((void*)&cparams[0]); + } else { + for(int i = 0; i < nthreads; i++) + threads1[i] = new tthread::thread(&_write_worker, (void*)&cparams[i]); + for(int i = 0; i < nthreads; i++) { + threads1[i]->join(); + } + } + for(int i = 0; i < nthreads; i++) { + delete[] cparams[i].count; + delete threads1[i]; + } + if(nthreads != 1) cerr << "FINISHED FIRST ROUND: " << time(0) - start << endl; + start = time(0); + //sort partitions + if(nthreads == 1) { + for(int bin = 0; bin < occupied; bin++) + if(index[bin + 1] - index[bin] > 1) + _radix_sort(index[bin], index[bin + 1], hash, right_shift); + } else { + AutoArray threads(nthreads); + EList > params; params.resizeExact(nthreads); + int st = 0; + for(int i = 0; i < nthreads; i++) { + params[i].hash = hash; + params[i].begin = index + st; + params[i].log_size = right_shift; + params[i].num = 0; + index_t remaining_elements = (index_t)(index[occupied] - index[st]); + while(params[i].num + st < occupied + && (index_t)(index[params[i].num + st] - index[st]) < remaining_elements / (nthreads - i)) + params[i].num++; + cerr << params[i].num << " " << (index_t)(index[params[i].num + st] - index[st]) << endl; + threads[i] = new tthread::thread(&_radix_sort_worker, (void*)¶ms[i]); + st += params[i].num; + } + //do any remaining bins using main thread + for(int bin = st; bin < occupied; bin++) { + if(index[bin + 1] - index[bin] > 1) + _radix_sort(index[bin], index[bin + 1], hash, right_shift); + } + for(int i = 0; i < nthreads; i++) { + threads[i]->join(); + delete threads[i]; + } + } + if(nthreads != 1) cerr << "FINISHED RECURSIVE SORTS: " << time(0) - start << endl; +} + +#endif //RADIX_SORT_H_ diff --git a/random_source.cpp b/random_source.cpp new file mode 100644 index 0000000..0311f91 --- /dev/null +++ b/random_source.cpp @@ -0,0 +1,128 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include "random_source.h" +#include "random_util.h" + +#ifdef MERSENNE_TWISTER + +void RandomSource::gen_state() { + for(int i = 0; i < (n - m); ++i) { + state_[i] = state_[i + m] ^ twiddle(state_[i], state_[i + 1]); + } + for(int i = n - m; i < (n - 1); ++i) { + state_[i] = state_[i + m - n] ^ twiddle(state_[i], state_[i + 1]); + } + state_[n - 1] = state_[m - 1] ^ twiddle(state_[n - 1], state_[0]); + p_ = 0; // reset position +} + +void RandomSource::init(uint32_t s) { // init by 32 bit seed + reset(); + state_[0] = s; + for(int i = 1; i < n; ++i) { + state_[i] = 1812433253UL * (state_[i - 1] ^ (state_[i - 1] >> 30)) + i; + } + p_ = n; // force gen_state() to be called for next random number + inited_ = true; +} + +void RandomSource::init(const uint32_t* array, int size) { // init by array + init(19650218UL); + int i = 1, j = 0; + for(int k = ((n > size) ? n : size); k; --k) { + state_[i] = (state_[i] ^ ((state_[i - 1] ^ (state_[i - 1] >> 30)) * 1664525UL)) + array[j] + j; // non linear + ++j; j %= size; + if((++i) == n) { state_[0] = state_[n - 1]; i = 1; } + } + for(int k = n - 1; k; --k) { + state_[i] = (state_[i] ^ ((state_[i - 1] ^ (state_[i - 1] >> 30)) * 1566083941UL)) - i; + if((++i) == n) { state_[0] = state_[n - 1]; i = 1; } + } + state_[0] = 0x80000000UL; // MSB is 1; assuring non-zero initial array + p_ = n; // force gen_state() to be called for next random number + inited_ = true; +} + +#endif + +#ifdef MAIN_RANDOM_SOURCE + +using namespace std; + +int main(void) { + cerr << "Test 1" << endl; + { + RandomSource rnd; + int cnts[32]; + for(size_t i = 0; i < 32; i++) { + cnts[i] = 0; + } + for(uint32_t j = 0; j < 10; j++) { + rnd.init(j); + for(size_t i = 0; i < 10000; i++) { + uint32_t rndi = rnd.nextU32(); + for(size_t i = 0; i < 32; i++) { + if((rndi & 1) != 0) { + cnts[i]++; + } + rndi >>= 1; + } + } + for(size_t i = 0; i < 32; i++) { + cerr << i << ": " << cnts[i] << endl; + } + } + } + + cerr << "Test 2" << endl; + { + int cnts[4][4]; + for(size_t i = 0; i < 4; i++) { + for(size_t j = 0; j < 4; j++) { + cnts[i][j] = 0; + } + } + RandomSource rnd; + Random1toN rn1n; + for(size_t i = 0; i < 100; i++) { + rnd.init((uint32_t)i); + rn1n.init(4, true); + uint32_t ri = rn1n.next(rnd); + cnts[ri][0]++; + ri = rn1n.next(rnd); + cnts[ri][1]++; + ri = rn1n.next(rnd); + cnts[ri][2]++; + ri = rn1n.next(rnd); + cnts[ri][3]++; + } + for(size_t i = 0; i < 4; i++) { + for(size_t j = 0; j < 4; j++) { + cerr << cnts[i][j]; + if(j < 3) { + cerr << ", "; + } + } + cerr << endl; + } + } +} + +#endif diff --git a/random_source.h b/random_source.h new file mode 100644 index 0000000..098d54f --- /dev/null +++ b/random_source.h @@ -0,0 +1,239 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef RANDOM_GEN_H_ +#define RANDOM_GEN_H_ + +#include +#include "assert_helpers.h" + +//#define MERSENNE_TWISTER + +#ifndef MERSENNE_TWISTER + +/** + * Simple pseudo-random linear congruential generator, a la Numerical + * Recipes. + */ +class RandomSource { +public: + static const uint32_t DEFUALT_A = 1664525; + static const uint32_t DEFUALT_C = 1013904223; + + RandomSource() : + a(DEFUALT_A), c(DEFUALT_C), inited_(false) { } + RandomSource(uint32_t _last) : + a(DEFUALT_A), c(DEFUALT_C), last(_last), inited_(true) { } + RandomSource(uint32_t _a, uint32_t _c) : + a(_a), c(_c), inited_(false) { } + + void init(uint32_t seed = 0) { + last = seed; + inited_ = true; + lastOff = 30; + } + + uint32_t nextU32() { + assert(inited_); + uint32_t ret; + last = a * last + c; + ret = last >> 16; + last = a * last + c; + ret ^= last; + lastOff = 0; + return ret; + } + + uint64_t nextU64() { + assert(inited_); + uint64_t first = nextU32(); + first = first << 32; + uint64_t second = nextU32(); + return first | second; + } + + /** + * Return a pseudo-random unsigned 32-bit integer sampled uniformly + * from [lo, hi]. + */ + uint32_t nextU32Range(uint32_t lo, uint32_t hi) { + uint32_t ret = lo; + if(hi > lo) { + ret += (nextU32() % (hi-lo+1)); + } + return ret; + } + + /** + * Get next 2-bit unsigned integer. + */ + uint32_t nextU2() { + assert(inited_); + if(lastOff > 30) { + nextU32(); + } + uint32_t ret = (last >> lastOff) & 3; + lastOff += 2; + return ret; + } + + /** + * Get next boolean. + */ + bool nextBool() { + assert(inited_); + if(lastOff > 31) { + nextU32(); + } + uint32_t ret = (last >> lastOff) & 1; + lastOff++; + return ret; + } + + /** + * Return an unsigned int chosen by picking randomly from among + * options weighted by probabilies supplied as the elements of the + * 'weights' array of length 'numWeights'. The weights should add + * to 1. + */ + uint32_t nextFromProbs( + const float* weights, + size_t numWeights) + { + float f = nextFloat(); + float tot = 0.0f; // total weight seen so far + for(uint32_t i = 0; i < numWeights; i++) { + tot += weights[i]; + if(f < tot) return i; + } + return (uint32_t)(numWeights-1); + } + + float nextFloat() { + assert(inited_); + return (float)nextU32() / (float)0xffffffff; + } + + static uint32_t nextU32(uint32_t last, + uint32_t a = DEFUALT_A, + uint32_t c = DEFUALT_C) + { + return (a * last) + c; + } + + uint32_t currentA() const { return a; } + uint32_t currentC() const { return c; } + uint32_t currentLast() const { return last; } + +private: + uint32_t a; + uint32_t c; + uint32_t last; + uint32_t lastOff; + bool inited_; +}; + +#else + +class RandomSource { // Mersenne Twister random number generator + +public: + + // default constructor: uses default seed only if this is the first instance + RandomSource() { + reset(); + } + + // constructor with 32 bit int as seed + RandomSource(uint32_t s) { + init(s); + } + + // constructor with array of size 32 bit ints as seed + RandomSource(const uint32_t* array, int size) { + init(array, size); + } + + void reset() { + state_[0] = 0; + p_ = 0; + inited_ = false; + } + + virtual ~RandomSource() { } + + // the two seed functions + void init(uint32_t); // seed with 32 bit integer + void init(const uint32_t*, int size); // seed with array + + /** + * Return next 1-bit unsigned integer. + */ + bool nextBool() { + return (nextU32() & 1) == 0; + } + + /** + * Get next unsigned 32-bit integer. + */ + inline uint32_t nextU32() { + assert(inited_); + if(p_ == n) { + gen_state(); // new state vector needed + } + // gen_state() is split off to be non-inline, because it is only called once + // in every 624 calls and otherwise irand() would become too big to get inlined + uint32_t x = state_[p_++]; + x ^= (x >> 11); + x ^= (x << 7) & 0x9D2C5680UL; + x ^= (x << 15) & 0xEFC60000UL; + x ^= (x >> 18); + return x; + } + + /** + * Return next float between 0 and 1. + */ + float nextFloat() { + assert(inited_); + return (float)nextU32() / (float)0xffffffff; + } + +protected: // used by derived classes, otherwise not accessible; use the ()-operator + + static const int n = 624, m = 397; // compile time constants + + // the variables below are static (no duplicates can exist) + uint32_t state_[n]; // state vector array + int p_; // position in state array + + bool inited_; // true if init function has been called + + // private functions used to generate the pseudo random numbers + uint32_t twiddle(uint32_t u, uint32_t v) { + return (((u & 0x80000000UL) | (v & 0x7FFFFFFFUL)) >> 1) ^ ((v & 1UL) ? 0x9908B0DFUL : 0x0UL); + } + + void gen_state(); // generate new state + +}; + +#endif + +#endif /*RANDOM_GEN_H_*/ diff --git a/random_util.cpp b/random_util.cpp new file mode 100644 index 0000000..2c5ed7d --- /dev/null +++ b/random_util.cpp @@ -0,0 +1,24 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include "random_util.h" + +const size_t Random1toN::SWAPLIST_THRESH = 128; +const size_t Random1toN::CONVERSION_THRESH = 16; +const float Random1toN::CONVERSION_FRAC = 0.10f; diff --git a/random_util.h b/random_util.h new file mode 100644 index 0000000..39f8c04 --- /dev/null +++ b/random_util.h @@ -0,0 +1,221 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef RANDOM_UTIL_H_ +#define RANDOM_UTIL_H_ + +#include +#include "random_source.h" +#include "ds.h" + +/** + * Return a random integer in [1, N]. Each time it's called it samples again + * without replacement. done() indicates when all elements have been given + * out. + */ +class Random1toN { + + typedef uint32_t T; + +public: + + // A set with fewer than this many elements should kick us into swap-list + // mode immediately. Otherwise we start in seen-list mode and then + // possibly proceed to swap-list mode later. + static const size_t SWAPLIST_THRESH; + + // Convert seen-list to swap-list after this many entries in the seen-list. + static const size_t CONVERSION_THRESH; + + // Convert seen-list to swap-list after this (this times n_) many entries + // in the seen-list. + static const float CONVERSION_FRAC; + + Random1toN(int cat = 0) : + sz_(0), n_(0), cur_(0), + list_(SWAPLIST_THRESH, cat), seen_(CONVERSION_THRESH, cat), + thresh_(0) {} + + Random1toN(size_t n, int cat = 0) : + sz_(0), n_(n), cur_(0), + list_(SWAPLIST_THRESH, cat), seen_(CONVERSION_THRESH, cat), + thresh_(0) {} + + /** + * Initialize the set of pseudo-randoms to be given out without replacement. + */ + void init(size_t n, bool withoutReplacement) { + sz_ = n_ = n; + converted_ = false; + swaplist_ = n < SWAPLIST_THRESH || withoutReplacement; + cur_ = 0; + list_.clear(); + seen_.clear(); + thresh_ = std::max(CONVERSION_THRESH, (size_t)(CONVERSION_FRAC * n)); + } + + /** + * Reset in preparation for giving out a fresh collection of pseudo-randoms + * without replacement. + */ + void reset() { + sz_ = n_ = cur_ = 0; swaplist_ = converted_ = false; + list_.clear(); seen_.clear(); + thresh_ = 0; + } + + /** + * Get next pseudo-random element without replacement. + */ + T next(RandomSource& rnd) { + assert(!done()); + if(cur_ == 0 && !converted_) { + // This is the first call to next() + if(n_ == 1) { + // Trivial case: set of 1 + cur_ = 1; + return 0; + } + if(swaplist_) { + // The set is small, so we go immediately to the random + // swapping list + list_.resize(n_); + for(size_t i = 0; i < n_; i++) { + list_[i] = (T)i; + } + } + } + if(swaplist_) { + // Get next pseudo-random using the swap-list + size_t r = cur_ + (rnd.nextU32() % (n_ - cur_)); + if(r != cur_) { + std::swap(list_[cur_], list_[r]); + } + return list_[cur_++]; + } else { + assert(!converted_); + // Get next pseudo-random but reject it if it's in the seen-list + bool again = true; + T rn = 0; + size_t seenSz = seen_.size(); + while(again) { + rn = rnd.nextU32() % (T)n_; + again = false; + for(size_t i = 0; i < seenSz; i++) { + if(seen_[i] == rn) { + again = true; + break; + } + } + } + // Add it to the seen-list + seen_.push_back(rn); + cur_++; + assert_leq(cur_, n_); + // Move on to using the swap-list? + assert_gt(thresh_, 0); + if(seen_.size() >= thresh_ && cur_ < n_) { + // Add all elements not already in the seen list to the + // swap-list + assert(!seen_.empty()); + seen_.sort(); + list_.resize(n_ - cur_); + size_t prev = 0; + size_t cur = 0; + for(size_t i = 0; i <= seenSz; i++) { + // Add all the elements between the previous element and + // this one + for(size_t j = prev; j < seen_[i]; j++) { + list_[cur++] = (T)j; + } + prev = seen_[i]+1; + } + for(size_t j = prev; j < n_; j++) { + list_[cur++] = (T)j; + } + assert_eq(cur, n_ - cur_); + seen_.clear(); + cur_ = 0; + n_ = list_.size(); + converted_ = true; + swaplist_ = true; + } + return rn; + } + } + + /** + * Return true iff the generator was initialized. + */ + bool inited() const { return n_ > 0; } + + /** + * Set so that there are no pseudo-randoms remaining. + */ + void setDone() { assert(inited()); cur_ = n_; } + + /** + * Return true iff all pseudo-randoms have already been given out. + */ + bool done() const { return inited() && cur_ >= n_; } + + /** + * Return the total number of pseudo-randoms we are initialized to give + * out, including ones already given out. + */ + size_t size() const { return n_; } + + /** + * Return the number of pseudo-randoms left to give out. + */ + size_t left() const { return n_ - cur_; } + + /** + * Return the total size occupued by the Descent driver and all its + * constituent parts. + */ + size_t totalSizeBytes() const { + return list_.totalSizeBytes() + + seen_.totalSizeBytes(); + } + + /** + * Return the total capacity of the Descent driver and all its constituent + * parts. + */ + size_t totalCapacityBytes() const { + return list_.totalCapacityBytes() + + seen_.totalCapacityBytes(); + } + +protected: + + size_t sz_; // domain to pick elts from + size_t n_; // number of elements in active list + bool swaplist_; // if small, use swapping + bool converted_; // true iff seen-list was converted to swap-list + size_t cur_; // # times next() was called + EList list_; // pseudo-random swapping list + EList seen_; // prior to swaplist_ mode, list of + // pseudo-randoms given out + size_t thresh_; // conversion threshold for this instantiation, which + // depends both on CONVERSION_THRESH and on n_ +}; + +#endif diff --git a/read.h b/read.h new file mode 100644 index 0000000..00bbd60 --- /dev/null +++ b/read.h @@ -0,0 +1,599 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * This file is edited by Yun (Leo) Zhang for HISAT-3N. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef READ_H_ +#define READ_H_ + +#include +#include +#include "ds.h" +#include "sstring.h" +#include "filebuf.h" +#include "util.h" + + +/** + * the threeN_cycle + */ +/*enum { + threeN_CT_FW = 0, + threeN_CT_RC, + threeN_GA_FW, + threeN_GA_RC +};*/ + +enum { + threeN_type1conversion_FW = 0, + threeN_type1conversion_RC, + threeN_type2conversion_FW, + threeN_type2conversion_RC +}; + +enum rna_strandness_format { + RNA_STRANDNESS_UNKNOWN = 0, + RNA_STRANDNESS_F, + RNA_STRANDNESS_R, + RNA_STRANDNESS_FR, + RNA_STRANDNESS_RF +}; + +typedef uint64_t TReadId; +typedef size_t TReadOff; +typedef int64_t TAlScore; + +extern bool threeN; + +class HitSet; + +/** + * A buffer for keeping all relevant information about a single read. + */ +struct Read { + + Read() { reset(); } + + Read(const char *nm, const char *seq, const char *ql) { init(nm, seq, ql); } + + void reset() { + rdid = 0; + endid = 0; + alts = 0; + trimmed5 = trimmed3 = 0; + readOrigBuf.clear(); + patFw.clear(); + patFw_3N.clear(); + patRc.clear(); + qual.clear(); + patFwRev.clear(); + patRcRev.clear(); + qualRev.clear(); + name.clear(); + originalFw.clear(); + originalRc.clear(); + for(int j = 0; j < 3; j++) { + altPatFw[j].clear(); + altPatFwRev[j].clear(); + altPatRc[j].clear(); + altPatRcRev[j].clear(); + altQual[j].clear(); + altQualRev[j].clear(); + } + color = fuzzy = false; + primer = '?'; + trimc = '?'; + filter = '?'; + seed = 0; + ns_ = 0; + threeN_cycle = 0; + oppositeConversion_3N = false; + } + + /** + * Finish initializing a new read. + */ + void finalize() { + for(size_t i = 0; i < patFw.length(); i++) { + if((int)patFw[i] > 3) { + ns_++; + } + } + constructRevComps(); + constructReverses(); + } + + /** + * change patFw sequence based on current threeN_cycle and newMappingCycle. + * + * There are two types of changes: + * type1conversion: hs3N_convertedFrom to hs3N_convertedTo + * type2conversion: hs3N_convertedFromComplement to hs3N_convertedToComplement + * + * The initial threeN_cycle is 0. There are 4 cycle: 0, 1, 2, 3; + * + * mate 1, mate2 + * initial: threeN_type1conversion_FW(0), threeN_type1conversion_FW(0), + * --------------- type1->type2 change conversion type + * 1st cycle: threeN_type1conversion_FW(0), threeN_type2conversion_RC(3 = 3-0), + * 2nd cycle: threeN_type1conversion_RC(1), threeN_type2conversion_FW(2 = 3-1), + * type1c->type2 type2->type1 change conversion type + * 3rd cycle: threeN_type2conversion_FW(2), threeN_type1conversion_RC(1 = 3-2), + * 4rd cycle: threeN_type2conversion_RC(3), threeN_type1conversion_FW(0 = 3-3), + */ + void changePlan3N(int newMappingCycle) { + if (name.length() == 0) return; + if ((threeN_cycle == threeN_type1conversion_FW && newMappingCycle == threeN_type2conversion_RC) || + (threeN_cycle == threeN_type1conversion_RC && newMappingCycle == threeN_type2conversion_FW) || + (threeN_cycle == threeN_type2conversion_FW && newMappingCycle == threeN_type1conversion_RC)) { + ns_ = 0; + swap(patFw, patFw_3N); + finalize(); + } + threeN_cycle = newMappingCycle; + oppositeConversion_3N = false; + } + + /** + * Simple init function, used for testing. + */ + void init( + const char *nm, + const char *seq, + const char *ql) + { + reset(); + patFw.installChars(seq); + qual.install(ql); + for(size_t i = 0; i < patFw.length(); i++) { + if((int)patFw[i] > 3) { + ns_++; + } + } + constructRevComps(); + constructReverses(); + if(nm != NULL) name.install(nm); + } + + /// Return true iff the read (pair) is empty + bool empty() const { + return patFw.empty(); + } + + /// Return length of the read in the buffer + size_t length() const { + return patFw.length(); + } + + /** + * Return the number of Ns in the read. + */ + size_t ns() const { + return ns_; + } + + /** + * Construct reverse complement of the pattern and the fuzzy + * alternative patters. If read is in colorspace, just reverse + * them. + */ + void constructRevComps() { + if(color) { + patRc.installReverse(patFw); + for(int j = 0; j < alts; j++) { + altPatRc[j].installReverse(altPatFw[j]); + } + if (threeN) originalRc.installReverse(originalFw); + } else { + patRc.installReverseComp(patFw); + for(int j = 0; j < alts; j++) { + altPatRc[j].installReverseComp(altPatFw[j]); + } + if (threeN) originalRc.installReverseComp(originalFw); + } + } + + /** + * Given patFw, patRc, and qual, construct the *Rev versions in + * place. Assumes constructRevComps() was called previously. + */ + void constructReverses() { + patFwRev.installReverse(patFw); + patRcRev.installReverse(patRc); + qualRev.installReverse(qual); + for(int j = 0; j < alts; j++) { + altPatFwRev[j].installReverse(altPatFw[j]); + altPatRcRev[j].installReverse(altPatRc[j]); + altQualRev[j].installReverse(altQual[j]); + } + } + + /** + * Append a "/1" or "/2" string onto the end of the name buf if + * it's not already there. + */ + void fixMateName(int i) { + assert(i == 1 || i == 2); + size_t namelen = name.length(); + bool append = false; + if(namelen < 2) { + // Name is too short to possibly have /1 or /2 on the end + append = true; + } else { + if(i == 1) { + // append = true iff mate name does not already end in /1 + append = + name[namelen-2] != '/' || + name[namelen-1] != '1'; + } else { + // append = true iff mate name does not already end in /2 + append = + name[namelen-2] != '/' || + name[namelen-1] != '2'; + } + } + if(append) { + name.append('/'); + name.append("012"[i]); + } + } + + /** + * Dump basic information about this read to the given ostream. + */ + void dump(std::ostream& os) const { + using namespace std; + os << name << ' '; + if(color) { + os << patFw.toZBufXForm("0123."); + } else { + os << patFw; + } + os << ' '; + // Print out the fuzzy alternative sequences + for(int j = 0; j < 3; j++) { + bool started = false; + if(!altQual[j].empty()) { + for(size_t i = 0; i < length(); i++) { + if(altQual[j][i] != '!') { + started = true; + } + if(started) { + if(altQual[j][i] == '!') { + os << '-'; + } else { + if(color) { + os << "0123."[(int)altPatFw[j][i]]; + } else { + os << altPatFw[j][i]; + } + } + } + } + } + cout << " "; + } + os << qual.toZBuf() << " "; + // Print out the fuzzy alternative quality strings + for(int j = 0; j < 3; j++) { + bool started = false; + if(!altQual[j].empty()) { + for(size_t i = 0; i < length(); i++) { + if(altQual[j][i] != '!') { + started = true; + } + if(started) { + os << altQual[j][i]; + } + } + } + if(j == 2) { + os << endl; + } else { + os << " "; + } + } + } + + /** + * Check whether two reads are the same in the sense that they will + * lead to us finding the same set of alignments. + */ + static bool same( + const BTDnaString& seq1, + const BTString& qual1, + const BTDnaString& seq2, + const BTString& qual2, + bool qualitiesMatter) + { + if(seq1.length() != seq2.length()) { + return false; + } + for(size_t i = 0; i < seq1.length(); i++) { + if(seq1[i] != seq2[i]) return false; + } + if(qualitiesMatter) { + if(qual1.length() != qual2.length()) { + return false; + } + for(size_t i = 0; i < qual1.length(); i++) { + if(qual1[i] != qual2[i]) return false; + } + } + return true; + } + + /** + * Get the nucleotide and quality value at the given offset from 5' end. + * If 'fw' is false, get the reverse complement. + */ + std::pair get(TReadOff off5p, bool fw) const { + assert_lt(off5p, length()); + int c = (int)patFw[off5p]; + int q = qual[off5p]; + assert_geq(q, 33); + return make_pair((!fw && c < 4) ? (c ^ 3) : c, q - 33); + } + + /** + * Get the nucleotide at the given offset from 5' end. + * If 'fw' is false, get the reverse complement. + */ + int getc(TReadOff off5p, bool fw) const { + assert_lt(off5p, length()); + int c = (int)patFw[off5p]; + return (!fw && c < 4) ? (c ^ 3) : c; + } + + /** + * Get the quality value at the given offset from 5' end. + */ + int getq(TReadOff off5p) const { + assert_lt(off5p, length()); + int q = qual[off5p]; + assert_geq(q, 33); + return q-33; + } + +#ifndef NDEBUG + /** + * Check that read info is internally consistent. + */ + bool repOk() const { + if(patFw.empty()) return true; + assert_eq(qual.length(), patFw.length()); + return true; + } +#endif + + BTDnaString patFw; // forward-strand sequence + BTDnaString patFw_3N; + BTDnaString patRc; // reverse-complement sequence + BTDnaString patRc1; + BTString qual; // quality values + BTDnaString originalFw; // the forward-strand sequence from read (without editing) + BTDnaString originalRc; // the reverse-complement sequence from read (without editing) + + BTDnaString altPatFw[3]; + BTDnaString altPatRc[3]; + BTString altQual[3]; + + BTDnaString patFwRev; + BTDnaString patRcRev; + BTString qualRev; + + BTDnaString altPatFwRev[3]; + BTDnaString altPatRcRev[3]; + BTString altQualRev[3]; + + // For remembering the exact input text used to define a read + SStringExpandable readOrigBuf; + + BTString name; // read name + TReadId rdid; // 0-based id based on pair's offset in read file(s) + TReadId endid; // 0-based id based on pair's offset in read file(s) + // and which mate ("end") this is + int mate; // 0 = single-end, 1 = mate1, 2 = mate2 + uint32_t seed; // random seed + size_t ns_; // # Ns + int alts; // number of alternatives + bool fuzzy; // whether to employ fuzziness + bool color; // whether read is in color space + char primer; // primer base, for csfasta files + char trimc; // trimmed color, for csfasta files + char filter; // if read format permits filter char, set it here + int trimmed5; // amount actually trimmed off 5' end + int trimmed3; // amount actually trimmed off 3' end + HitSet *hitset; // holds previously-found hits; for chaining + // for HISAT-3N + int threeN_cycle; + bool oppositeConversion_3N; +}; + +/** + * A string of FmStringOps represent a string of tasks performed by the + * best-first alignment search. We model the search as a series of FM ops + * interspersed with reported alignments. + */ +struct FmStringOp { + bool alignment; // true -> found an alignment + TAlScore pen; // penalty of the FM op or alignment + size_t n; // number of FM ops (only relevant for non-alignment) +}; + +/** + * A string that summarizes the progress of an FM-index-assistet best-first + * search. Useful for trying to figure out what the aligner is spending its + * time doing for a given read. + */ +struct FmString { + + /** + * Add one or more FM index ops to the op string + */ + void add(bool alignment, TAlScore pen, size_t nops) { + if(ops.empty() || ops.back().pen != pen) { + ops.expand(); + ops.back().alignment = alignment; + ops.back().pen = pen; + ops.back().n = 0; + } + ops.back().n++; + } + + /** + * Reset FmString to uninitialized state. + */ + void reset() { + pen = std::numeric_limits::max(); + ops.clear(); + } + + /** + * Print a :Z optional field where certain characters (whitespace, colon + * and percent) are escaped using % escapes. + */ + void print(BTString& o, char *buf) const { + for(size_t i = 0; i < ops.size(); i++) { + if(i > 0) { + o.append(';'); + } + if(ops[i].alignment) { + o.append("A,"); + itoa10(ops[i].pen, buf); + o.append(buf); + } else { + o.append("F,"); + itoa10(ops[i].pen, buf); o.append(buf); + o.append(','); + itoa10(ops[i].n, buf); o.append(buf); + } + } + } + + TAlScore pen; // current penalty + EList ops; // op string +}; + +/** + * Key per-read metrics. These are used for thresholds, allowing us to bail + * for unproductive reads. They also the basis of what's printed when the user + * specifies --read-times. + */ +struct PerReadMetrics { + + PerReadMetrics() { reset(); } + + void reset() { + nExIters = + nExDps = nExDpSuccs = nExDpFails = + nMateDps = nMateDpSuccs = nMateDpFails = + nExUgs = nExUgSuccs = nExUgFails = + nMateUgs = nMateUgSuccs = nMateUgFails = + nExEes = nExEeSuccs = nExEeFails = + nRedundants = + nEeFmops = nSdFmops = nExFmops = + nDpFail = nDpFailStreak = nDpLastSucc = + nUgFail = nUgFailStreak = nUgLastSucc = + nEeFail = nEeFailStreak = nEeLastSucc = + nFilt = 0; + nFtabs = 0; + nRedSkip = 0; + nRedFail = 0; + nRedIns = 0; + doFmString = false; + nSeedRanges = nSeedElts = 0; + nSeedRangesFw = nSeedEltsFw = 0; + nSeedRangesRc = nSeedEltsRc = 0; + seedMedian = seedMean = 0; + bestLtMinscMate1 = + bestLtMinscMate2 = std::numeric_limits::min(); + fmString.reset(); + } + + struct timeval tv_beg; // timer start to measure how long alignment takes + struct timezone tz_beg; // timer start to measure how long alignment takes + + uint64_t nExIters; // iterations of seed hit extend loop + + uint64_t nExDps; // # extend DPs run on this read + uint64_t nExDpSuccs; // # extend DPs run on this read + uint64_t nExDpFails; // # extend DPs run on this read + + uint64_t nExUgs; // # extend ungapped alignments run on this read + uint64_t nExUgSuccs; // # extend ungapped alignments run on this read + uint64_t nExUgFails; // # extend ungapped alignments run on this read + + uint64_t nExEes; // # extend ungapped alignments run on this read + uint64_t nExEeSuccs; // # extend ungapped alignments run on this read + uint64_t nExEeFails; // # extend ungapped alignments run on this read + + uint64_t nMateDps; // # mate DPs run on this read + uint64_t nMateDpSuccs; // # mate DPs run on this read + uint64_t nMateDpFails; // # mate DPs run on this read + + uint64_t nMateUgs; // # mate ungapped alignments run on this read + uint64_t nMateUgSuccs; // # mate ungapped alignments run on this read + uint64_t nMateUgFails; // # mate ungapped alignments run on this read + + uint64_t nRedundants; // # redundant seed hits + + uint64_t nSeedRanges; // # BW ranges found for seeds + uint64_t nSeedElts; // # BW elements found for seeds + + uint64_t nSeedRangesFw; // # BW ranges found for seeds from fw read + uint64_t nSeedEltsFw; // # BW elements found for seeds from fw read + + uint64_t nSeedRangesRc; // # BW ranges found for seeds from fw read + uint64_t nSeedEltsRc; // # BW elements found for seeds from fw read + + uint64_t seedMedian; // median seed hit count + uint64_t seedMean; // rounded mean seed hit count + + uint64_t nEeFmops; // FM Index ops for end-to-end alignment + uint64_t nSdFmops; // FM Index ops used to align seeds + uint64_t nExFmops; // FM Index ops used to resolve offsets + + uint64_t nFtabs; // # ftab lookups + uint64_t nRedSkip; // # times redundant path was detected and aborted + uint64_t nRedFail; // # times a path was deemed non-redundant + uint64_t nRedIns; // # times a path was added to redundancy list + + uint64_t nDpFail; // number of dp failures in a row up until now + uint64_t nDpFailStreak; // longest streak of dp failures + uint64_t nDpLastSucc; // index of last dp attempt that succeeded + + uint64_t nUgFail; // number of ungap failures in a row up until now + uint64_t nUgFailStreak; // longest streak of ungap failures + uint64_t nUgLastSucc; // index of last ungap attempt that succeeded + + uint64_t nEeFail; // number of ungap failures in a row up until now + uint64_t nEeFailStreak; // longest streak of ungap failures + uint64_t nEeLastSucc; // index of last ungap attempt that succeeded + + uint64_t nFilt; // # mates filtered + + TAlScore bestLtMinscMate1; // best invalid score observed for mate 1 + TAlScore bestLtMinscMate2; // best invalid score observed for mate 2 + + // For collecting information to go into an FM string + bool doFmString; + FmString fmString; +}; + +#endif /*READ_H_*/ diff --git a/read_qseq.cpp b/read_qseq.cpp new file mode 100644 index 0000000..ced6c68 --- /dev/null +++ b/read_qseq.cpp @@ -0,0 +1,304 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include "pat.h" + +/** + * Parse a name from fb_ and store in r. Assume that the next + * character obtained via fb_.get() is the first character of + * the sequence and the string stops at the next char upto (could + * be tab, newline, etc.). + */ +int QseqPatternSource::parseName( + Read& r, // buffer for mate 1 + Read* r2, // buffer for mate 2 (NULL if mate2 is read separately) + bool append, // true -> append characters, false -> skip them + bool clearFirst, // clear the name buffer first + bool warnEmpty, // emit a warning if nothing was added to the name + bool useDefault, // if nothing is read, put readCnt_ as a default value + int upto) // stop parsing when we first reach character 'upto' +{ + if(clearFirst) { + if(r2 != NULL) r2->name.clear(); + r.name.clear(); + } + while(true) { + int c; + if((c = fb_.get()) < 0) { + // EOF reached in the middle of the name + return -1; + } + if(c == '\n' || c == '\r') { + // EOL reached in the middle of the name + return -1; + } + if(c == upto) { + // Finished with field + break; + } + if(append) { + if(r2 != NULL) r2->name.append(c); + r.name.append(c); + } + } + // Set up a default name if one hasn't been set + if(r.name.empty() && useDefault && append) { + char cbuf[20]; + itoa10(readCnt_, cbuf); + r.name.append(cbuf); + if(r2 != NULL) r2->name.append(cbuf); + } + if(r.name.empty() && warnEmpty) { + cerr << "Warning: read had an empty name field" << endl; + } + return (int)r.name.length(); +} + +/** + * Parse a single sequence from fb_ and store in r. Assume + * that the next character obtained via fb_.get() is the first + * character of the sequence and the sequence stops at the next + * char upto (could be tab, newline, etc.). + */ +int QseqPatternSource::parseSeq( + Read& r, + int& charsRead, + int& trim5, + char upto) +{ + int begin = 0; + int c = fb_.get(); + assert(c != upto); + r.patFw.clear(); + r.color = gColor; + if(gColor) { + // NOTE: clearly this is not relevant for Illumina output, but + // I'm keeping it here in case there's some reason to put SOLiD + // data in this format in the future. + + // This may be a primer character. If so, keep it in the + // 'primer' field of the read buf and parse the rest of the + // read without it. + c = toupper(c); + if(asc2dnacat[c] > 0) { + // First char is a DNA char + int c2 = toupper(fb_.peek()); + // Second char is a color char + if(asc2colcat[c2] > 0) { + r.primer = c; + r.trimc = c2; + trim5 += 2; // trim primer and first color + } + } + if(c < 0) { return -1; } + } + while(c != upto) { + if(c == '.') c = 'N'; + if(gColor) { + if(c >= '0' && c <= '4') c = "ACGTN"[(int)c - '0']; + } + if(isalpha(c)) { + assert_in(toupper(c), "ACGTN"); + if(begin++ >= trim5) { + assert_neq(0, asc2dnacat[c]); + r.patFw.append(asc2dna[c]); + } + charsRead++; + } + if((c = fb_.get()) < 0) { + return -1; + } + } + r.patFw.trimEnd(gTrim3); + return (int)r.patFw.length(); +} + +/** + * Parse a single quality string from fb_ and store in r. + * Assume that the next character obtained via fb_.get() is + * the first character of the quality string and the string stops + * at the next char upto (could be tab, newline, etc.). + */ +int QseqPatternSource::parseQuals( + Read& r, + int charsRead, + int dstLen, + int trim5, + char& c2, + char upto = '\t', + char upto2 = -1) +{ + int qualsRead = 0; + int c = 0; + if (intQuals_) { + // Probably not relevant + char buf[4096]; + while (qualsRead < charsRead) { + qualToks_.clear(); + if(!tokenizeQualLine(fb_, buf, 4096, qualToks_)) break; + for (unsigned int j = 0; j < qualToks_.size(); ++j) { + char c = intToPhred33(atoi(qualToks_[j].c_str()), solQuals_); + assert_geq(c, 33); + if (qualsRead >= trim5) { + r.qual.append(c); + } + ++qualsRead; + } + } // done reading integer quality lines + if (charsRead > qualsRead) tooFewQualities(r.name); + } else { + // Non-integer qualities + while((qualsRead < dstLen + trim5) && c >= 0) { + c = fb_.get(); + c2 = c; + if (c == ' ') wrongQualityFormat(r.name); + if(c < 0) { + // EOF occurred in the middle of a read - abort + return -1; + } + if(!isspace(c) && c != upto && (upto2 == -1 || c != upto2)) { + if (qualsRead >= trim5) { + c = charToPhred33(c, solQuals_, phred64Quals_); + assert_geq(c, 33); + r.qual.append(c); + } + qualsRead++; + } else { + break; + } + } + } + if(r.qual.length() < (size_t)dstLen) { + tooFewQualities(r.name); + } + // TODO: How to detect too many qualities?? + r.qual.resize(dstLen); + while(c != -1 && c != upto && (upto2 == -1 || c != upto2)) { + c = fb_.get(); + c2 = c; + } + return qualsRead; +} + +/** + * Read another pattern from a Qseq input file. + */ +bool QseqPatternSource::read( + Read& r, + TReadId& rdid, + TReadId& endid, + bool& success, + bool& done) +{ + r.reset(); + r.color = gColor; + success = true; + done = false; + readCnt_++; + rdid = endid = readCnt_-1; + peekOverNewline(fb_); + fb_.resetLastN(); + // 1. Machine name + if(parseName(r, NULL, true, true, true, false, '\t') == -1) BAIL_UNPAIRED(); + assert_neq('\t', fb_.peek()); + r.name.append('_'); + // 2. Run number + if(parseName(r, NULL, true, false, true, false, '\t') == -1) BAIL_UNPAIRED(); + assert_neq('\t', fb_.peek()); + r.name.append('_'); + // 3. Lane number + if(parseName(r, NULL, true, false, true, false, '\t') == -1) BAIL_UNPAIRED(); + assert_neq('\t', fb_.peek()); + r.name.append('_'); + // 4. Tile number + if(parseName(r, NULL, true, false, true, false, '\t') == -1) BAIL_UNPAIRED(); + assert_neq('\t', fb_.peek()); + r.name.append('_'); + // 5. X coordinate of spot + if(parseName(r, NULL, true, false, true, false, '\t') == -1) BAIL_UNPAIRED(); + assert_neq('\t', fb_.peek()); + r.name.append('_'); + // 6. Y coordinate of spot + if(parseName(r, NULL, true, false, true, false, '\t') == -1) BAIL_UNPAIRED(); + assert_neq('\t', fb_.peek()); + r.name.append('_'); + // 7. Index + if(parseName(r, NULL, true, false, true, false, '\t') == -1) BAIL_UNPAIRED(); + assert_neq('\t', fb_.peek()); + r.name.append('/'); + // 8. Mate number + if(parseName(r, NULL, true, false, true, false, '\t') == -1) BAIL_UNPAIRED(); + // Empty sequence?? + if(fb_.peek() == '\t') { + // Get tab that separates seq from qual + ASSERT_ONLY(int c =) fb_.get(); + assert_eq('\t', c); + assert_eq('\t', fb_.peek()); + // Get tab that separates qual from filter + ASSERT_ONLY(c =) fb_.get(); + assert_eq('\t', c); + // Next char is first char of filter flag + assert_neq('\t', fb_.peek()); + fb_.resetLastN(); + cerr << "Warning: skipping empty QSEQ read with name '" << r.name << "'" << endl; + } else { + assert_neq('\t', fb_.peek()); + int charsRead = 0; + int mytrim5 = gTrim5; + // 9. Sequence + int dstLen = parseSeq(r, charsRead, mytrim5, '\t'); + assert_neq('\t', fb_.peek()); + if(dstLen < 0) BAIL_UNPAIRED(); + char ct = 0; + // 10. Qualities + if(parseQuals(r, charsRead, dstLen, mytrim5, ct, '\t', -1) < 0) BAIL_UNPAIRED(); + r.trimmed3 = gTrim3; + r.trimmed5 = mytrim5; + if(ct != '\t') { + cerr << "Error: QSEQ with name " << r.name << " did not have tab after qualities" << endl; + throw 1; + } + assert_eq(ct, '\t'); + } + // 11. Filter flag + int filt = fb_.get(); + if(filt == -1) BAIL_UNPAIRED(); + r.filter = filt; + if(filt != '0' && filt != '1') { + // Bad value for filt + } + if(fb_.peek() != -1 && fb_.peek() != '\n') { + // Bad value right after the filt field + } + fb_.get(); + r.readOrigBuf.install(fb_.lastN(), fb_.lastNLen()); + fb_.resetLastN(); + if(r.qual.length() < r.patFw.length()) { + tooFewQualities(r.name); + } else if(r.qual.length() > r.patFw.length()) { + tooManyQualities(r.name); + } +#ifndef NDEBUG + assert_eq(r.patFw.length(), r.qual.length()); + for(size_t i = 0; i < r.qual.length(); i++) { + assert_geq((int)r.qual[i], 33); + } +#endif + return true; +} diff --git a/ref_coord.cpp b/ref_coord.cpp new file mode 100644 index 0000000..738c6fa --- /dev/null +++ b/ref_coord.cpp @@ -0,0 +1,33 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include "ref_coord.h" +#include + +using namespace std; + +ostream& operator<<(ostream& out, const Interval& c) { + out << c.upstream() << "+" << c.len(); + return out; +} + +ostream& operator<<(ostream& out, const Coord& c) { + out << c.ref() << ":" << c.off(); + return out; +} diff --git a/ref_coord.h b/ref_coord.h new file mode 100644 index 0000000..2967763 --- /dev/null +++ b/ref_coord.h @@ -0,0 +1,429 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef REF_COORD_H_ +#define REF_COORD_H_ + +#include +#include +#include +#include "assert_helpers.h" + +typedef int64_t TRefId; +typedef int64_t TRefOff; + +/** + * Encapsulates a reference coordinate; i.e. identifiers for (a) a + * reference sequence, and (b) a 0-based offset into that sequence. + */ +class Coord { + +public: + + Coord() { reset(); } + + Coord(const Coord& c) { init(c); } + + Coord(TRefId rf, TRefOff of, bool fw, TRefOff jof = 0) { init(rf, of, fw, jof); } + + /** + * Copy given fields into this Coord. + */ + void init(TRefId rf, TRefOff of, bool fw, TRefOff jof = 0) { + ref_ = rf; + off_ = of; + orient_ = (fw ? 1 : 0); + joinedOff_ = jof; + } + + /** + * Copy contents of given Coord into this one. + */ + void init(const Coord& c) { + ref_ = c.ref_; + off_ = c.off_; + orient_ = c.orient_; + joinedOff_ = c.joinedOff_; + } + + /** + * Return true iff this Coord is identical to the given Coord. + */ + bool operator==(const Coord& o) const { + assert(inited()); + assert(o.inited()); + return ref_ == o.ref_ && off_ == o.off_ && fw() == o.fw(); + } + + /** + * Return true iff this Coord is less than the given Coord. One Coord is + * less than another if (a) its reference id is less, (b) its orientation is + * less, or (c) its offset is less. + */ + bool operator<(const Coord& o) const { + if(ref_ < o.ref_) return true; + if(ref_ > o.ref_) return false; + if(orient_ < o.orient_) return true; + if(orient_ > o.orient_) return false; + if(off_ < o.off_) return true; + if(off_ > o.off_) return false; + return false; + } + + /** + * Return the opposite result from operator<. + */ + bool operator>=(const Coord& o) const { + return !((*this) < o); + } + + /** + * Return true iff this Coord is greater than the given Coord. One Coord + * is greater than another if (a) its reference id is greater, (b) its + * orientation is greater, or (c) its offset is greater. + */ + bool operator>(const Coord& o) const { + if(ref_ > o.ref_) return true; + if(ref_ < o.ref_) return false; + if(orient_ > o.orient_) return true; + if(orient_ < o.orient_) return false; + if(off_ > o.off_) return true; + if(off_ < o.off_) return false; + return false; + } + + /** + * Return the opposite result from operator>. + */ + bool operator<=(const Coord& o) const { + return !((*this) > o); + } + + /** + * Reset this coord to uninitialized state. + */ + void reset() { + ref_ = std::numeric_limits::max(); + off_ = std::numeric_limits::max(); + orient_ = -1; + joinedOff_ = std::numeric_limits::max(); + } + + /** + * Return true iff this Coord is initialized (i.e. ref and off have both + * been set since the last call to reset()). + */ + bool inited() const { + if(ref_ != std::numeric_limits::max() && + off_ != std::numeric_limits::max()) + { + assert(orient_ == 0 || orient_ == 1); + return true; + } + return false; + } + + /** + * Get orientation of the Coord. + */ + bool fw() const { + assert(inited()); + assert(orient_ == 0 || orient_ == 1); + return orient_ == 1; + } + +#ifndef NDEBUG + /** + * Check that coord is internally consistent. + */ + bool repOk() const { + if(ref_ != std::numeric_limits::max() && + off_ != std::numeric_limits::max()) + { + assert(orient_ == 0 || orient_ == 1); + } + return true; + } +#endif + + /** + * Check whether an interval defined by this coord and having + * length 'len' is contained within an interval defined by + * 'inbegin' and 'inend'. + */ + bool within(int64_t len, int64_t inbegin, int64_t inend) const { + return off_ >= inbegin && off_ + len <= inend; + } + + inline TRefId ref() const { return ref_; } + inline TRefOff off() const { return off_; } + inline int orient() const { return orient_; } + inline TRefOff joinedOff() const { return joinedOff_; } + + inline void setRef(TRefId id) { ref_ = id; } + inline void setOff(TRefOff off) { off_ = off; } + inline void setJoinedOff(TRefOff joinedOff) { joinedOff_ = joinedOff; } + + inline void adjustOff(TRefOff off) { off_ += off; } + +protected: + + TRefId ref_; // which reference? + TRefOff off_; // 0-based offset into reference + int orient_; // true -> Watson strand + TRefOff joinedOff_; // offset in a joined ref. sequence +}; + +std::ostream& operator<<(std::ostream& out, const Coord& c); + +/** + * Encapsulates a reference interval, which consists of a Coord and a length. + */ +class Interval { + +public: + + Interval() { reset(); } + + explicit Interval(const Coord& upstream, TRefOff len) { + init(upstream, len); + } + + explicit Interval(TRefId rf, TRefOff of, bool fw, TRefOff len) { + init(rf, of, fw, len); + } + + void init(const Coord& upstream, TRefOff len) { + upstream_ = upstream; + len_ = len; + } + + void init(TRefId rf, TRefOff of, bool fw, TRefOff len) { + upstream_.init(rf, of, fw); + len_ = len; + } + + /** + * Set offset. + */ + void setOff(TRefOff of) { + upstream_.setOff(of); + } + + /** + * Set length. + */ + void setLen(TRefOff len) { + len_ = len; + } + + /** + * Reset this interval to uninitialized state. + */ + void reset() { + upstream_.reset(); + len_ = 0; + } + + /** + * Return true iff this Interval is initialized. + */ + bool inited() const { + if(upstream_.inited()) { + assert_gt(len_, 0); + return true; + } else { + return false; + } + } + + /** + * Return true iff this Interval is equal to the given Interval, + * i.e. if they cover the same set of positions. + */ + bool operator==(const Interval& o) const { + return upstream_ == o.upstream_ && + len_ == o.len_; + } + + /** + * Return true iff this Interval is less than the given Interval. + * One interval is less than another if its upstream location is + * prior to the other's or, if their upstream locations are equal, + * if its length is less than the other's. + */ + bool operator<(const Interval& o) const { + if(upstream_ < o.upstream_) return true; + if(upstream_ > o.upstream_) return false; + if(len_ < o.len_) return true; + return false; + } + + /** + * Return opposite result from operator<. + */ + bool operator>=(const Interval& o) const { + return !((*this) < o); + } + + /** + * Return true iff this Interval is greater than than the given + * Interval. One interval is greater than another if its upstream + * location is after the other's or, if their upstream locations + * are equal, if its length is greater than the other's. + */ + bool operator>(const Interval& o) const { + if(upstream_ > o.upstream_) return true; + if(upstream_ < o.upstream_) return false; + if(len_ > o.len_) return true; + return false; + } + + /** + * Return opposite result from operator>. + */ + bool operator<=(const Interval& o) const { + return !((*this) > o); + } + + /** + * Set upstream Coord. + */ + void setUpstream(const Coord& c) { + upstream_ = c; + } + + /** + * Set length. + */ + void setLength(TRefOff l) { + len_ = l; + } + + inline TRefId ref() const { return upstream_.ref(); } + inline TRefOff off() const { return upstream_.off(); } + inline TRefOff dnoff() const { return upstream_.off() + len_; } + inline int orient() const { return upstream_.orient(); } + + /** + * Return a Coord encoding the coordinate just past the downstream edge of + * the interval. + */ + inline Coord downstream() const { + return Coord( + upstream_.ref(), + upstream_.off() + len_, + upstream_.orient()); + } + + /** + * Return true iff the given Coord is inside this Interval. + */ + inline bool contains(const Coord& c) const { + return + c.ref() == ref() && + c.orient() == orient() && + c.off() >= off() && + c.off() < dnoff(); + } + + /** + * Return true iff the given Coord is inside this Interval, without + * requiring orientations to match. + */ + inline bool containsIgnoreOrient(const Coord& c) const { + return + c.ref() == ref() && + c.off() >= off() && + c.off() < dnoff(); + } + + /** + * Return true iff the given Interval is inside this Interval. + */ + inline bool contains(const Interval& c) const { + return + c.ref() == ref() && + c.orient() == orient() && + c.off() >= off() && + c.dnoff() <= dnoff(); + } + + /** + * Return true iff the given Interval is inside this Interval, without + * requiring orientations to match. + */ + inline bool containsIgnoreOrient(const Interval& c) const { + return + c.ref() == ref() && + c.off() >= off() && + c.dnoff() <= dnoff(); + } + + /** + * Return true iff the given Interval overlaps this Interval. + */ + inline bool overlaps(const Interval& c) const { + return + c.ref() == upstream_.ref() && + c.orient() == upstream_.orient() && + ((off() <= c.off() && dnoff() > c.off()) || + (off() <= c.dnoff() && dnoff() > c.dnoff()) || + (c.off() <= off() && c.dnoff() > off()) || + (c.off() <= dnoff() && c.dnoff() > dnoff())); + } + + /** + * Return true iff the given Interval overlaps this Interval, without + * requiring orientations to match. + */ + inline bool overlapsIgnoreOrient(const Interval& c) const { + return + c.ref() == upstream_.ref() && + ((off() <= c.off() && dnoff() > c.off()) || + (off() <= c.dnoff() && dnoff() > c.dnoff()) || + (c.off() <= off() && c.dnoff() > off()) || + (c.off() <= dnoff() && c.dnoff() > dnoff())); + } + + inline const Coord& upstream() const { return upstream_; } + inline TRefOff len() const { return len_; } + +#ifndef NDEBUG + /** + * Check that the Interval is internally consistent. + */ + bool repOk() const { + assert(upstream_.repOk()); + assert_geq(len_, 0); + return true; + } +#endif + + inline void adjustOff(TRefOff off) { upstream_.adjustOff(off); } + +protected: + + Coord upstream_; + TRefOff len_; +}; + +std::ostream& operator<<(std::ostream& out, const Interval& c); + +#endif /*ndef REF_COORD_H_*/ diff --git a/ref_read.cpp b/ref_read.cpp new file mode 100644 index 0000000..1ffd8fe --- /dev/null +++ b/ref_read.cpp @@ -0,0 +1,454 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include "ref_read.h" + +/** + * Reads past the next ambiguous or unambiguous stretch of sequence + * from the given FASTA file and returns its length. Does not do + * anything with the sequence characters themselves; this is purely for + * measuring lengths. + */ +RefRecord fastaRefReadSize( + FileBuf& in, + const RefReadInParams& rparms, + bool first, + BitpairOutFileBuf* bpout, + string* name // put parsed FASTA name here + ) +{ + int c; + static int lastc = '>'; // last character seen + + // RefRecord params + TIndexOffU len = 0; // 'len' counts toward total length + // 'off' counts number of ambiguous characters before first + // unambiguous character + size_t off = 0; + + // Pick off the first carat and any preceding whitespace + if(first) { + assert(!in.eof()); + lastc = '>'; + c = in.getPastWhitespace(); + if(in.eof()) { + // Got eof right away; emit warning + cerr << "Warning: Empty input file" << endl; + lastc = -1; + return RefRecord(0, 0, true); + } + assert(c == '>'); + } + + first = true; + // Skip to the end of the id line; if the next line is either + // another id line or a comment line, keep skipping + if(lastc == '>') { +#if 0 + // Skip to the end of the name line + do { + if((c = in.getPastNewline()) == -1) { + // No more input + cerr << "Warning: Encountered empty reference sequence" << endl; + lastc = -1; + return RefRecord(0, 0, true); + } + if(c == '>') { + cerr << "Warning: Encountered empty reference sequence" << endl; + } + // continue until a non-name, non-comment line + } while (c == '>'); +#else + c = lastc; + // Skip to the end of the name line + do { + // get name + + while (true) { + c = in.get(); + if (c == -1) { + // No more input + cerr << "Warning: Encountered empty reference sequence" << endl; + lastc = -1; + return RefRecord(0, 0, true); + } + + if(c == '\n' || c == '\r') { + while(c == '\r' || c == '\n') c = in.get(); + if (c == -1) { + // No more input + cerr << "Warning: Encountered empty reference sequence" << endl; + lastc = -1; + return RefRecord(0, 0, true); + } + break; + } + + if (name) name->push_back(c); + } + + if (c == '>') { + cerr << "Warning: Encountered empty reference sequence" << endl; + // If there's another name line immediately after this one, + // discard the previous name and start fresh with the new one + if (name) name->clear(); + } + } while (c == '>'); +#endif + } else { + first = false; // not the first in a sequence + off = 1; // The gap has already been consumed, so count it + if((c = in.get()) == -1) { + // Don't emit a warning, since this might legitimately be + // a gap on the end of the final sequence in the file + lastc = -1; + return RefRecord((TIndexOffU)off, (TIndexOffU)len, first); + } + } + + // Now skip to the first DNA character, counting gap characters + // as we go + int lc = -1; // last-DNA char variable for color conversion + while(true) { + int cat = asc2dnacat[c]; + if(rparms.nsToAs && cat >= 2) c = 'A'; + if(cat == 1) { + // This is a DNA character + if(rparms.color) { + if(lc != -1) { + // Got two consecutive unambiguous DNAs + break; // to read-in loop + } + // Keep going; we need two consecutive unambiguous DNAs + lc = asc2dna[(int)c]; + // The 'if(off > 0)' takes care of the case where + // the reference is entirely unambiguous and we don't + // want to incorrectly increment off. + if(off > 0) off++; + } else { + break; // to read-in loop + } + } else if(cat >= 2) { + if(lc != -1 && off == 0) off++; + lc = -1; + off++; // skip over gap character and increment + } else if(c == '>') { + if(off > 0 && lastc == '>') { + cerr << "Warning: Encountered reference sequence with only gaps" << endl; + } else if(lastc == '>') { + cerr << "Warning: Encountered empty reference sequence" << endl; + } + lastc = '>'; + //return RefRecord(off, 0, false); + return RefRecord((TIndexOffU)off, 0, first); + } + c = in.get(); + if(c == -1) { + // End-of-file + if(off > 0 && lastc == '>') { + cerr << "Warning: Encountered reference sequence with only gaps" << endl; + } else if(lastc == '>') { + cerr << "Warning: Encountered empty reference sequence" << endl; + } + lastc = -1; + //return RefRecord(off, 0, false); + return RefRecord((TIndexOffU)off, 0, first); + } + } + assert(!rparms.color || (lc != -1)); + assert_eq(1, asc2dnacat[c]); // C must be unambiguous base + if(off > 0 && rparms.color && first) { + // Handle the case where the first record has ambiguous + // characters but we're in color space; one of those counts is + // spurious + off--; + } + + // in now points just past the first character of a sequence + // line, and c holds the first character + while(c != -1 && c != '>') { + if(rparms.nsToAs && asc2dnacat[c] >= 2) c = 'A'; + uint8_t cat = asc2dnacat[c]; + int cc = toupper(c); + if(rparms.bisulfite && cc == 'C') c = cc = 'T'; + if(cat == 1) { + // It's a DNA character + assert(cc == 'A' || cc == 'C' || cc == 'G' || cc == 'T'); + // Check for overflow + if((TIndexOffU)(len + 1) < len) { + throw RefTooLongException(); + } + // Consume it + len++; + // Output it + if(bpout != NULL) { + if(rparms.color) { + // output color + bpout->write(dinuc2color[asc2dna[(int)c]][lc]); + } else if(!rparms.color) { + // output nucleotide + bpout->write(asc2dna[c]); + } + } + lc = asc2dna[(int)c]; + } else if(cat >= 2) { + // It's an N or a gap + lastc = c; + assert(cc != 'A' && cc != 'C' && cc != 'G' && cc != 'T'); + return RefRecord((TIndexOffU)off, (TIndexOffU)len, first); + } else { + // Not DNA and not a gap, ignore it +#ifndef NDEBUG + if(!isspace(c)) { + cerr << "Unexpected character in sequence: "; + if(isprint(c)) { + cerr << ((char)c) << endl; + } else { + cerr << "(" << c << ")" << endl; + } + } +#endif + } + c = in.get(); + } + lastc = c; + return RefRecord((TIndexOffU)off, (TIndexOffU)len, first); +} + +#if 0 +static void +printRecords(ostream& os, const EList& l) { + for(size_t i = 0; i < l.size(); i++) { + os << l[i].first << ", " << l[i].off << ", " << l[i].len << endl; + } +} +#endif + +/** + * Reverse the 'src' list of RefRecords into the 'dst' list. Don't + * modify 'src'. + */ +void reverseRefRecords( + const EList& src, + EList& dst, + bool recursive, + bool verbose) +{ + dst.clear(); + { + EList cur; + for(int i = (int)src.size()-1; i >= 0; i--) { + bool first = (i == (int)src.size()-1 || src[i+1].first); + // Clause after the || on next line is to deal with empty FASTA + // records at the end of the 'src' list, which would be wrongly + // omitted otherwise. + if(src[i].len || (first && src[i].off == 0)) { + cur.push_back(RefRecord(0, src[i].len, first)); + first = false; + } + if(src[i].off) cur.push_back(RefRecord(src[i].off, 0, first)); + } + for(int i = 0; i < (int)cur.size(); i++) { + assert(cur[i].off == 0 || cur[i].len == 0); + if(i < (int)cur.size()-1 && cur[i].off != 0 && !cur[i+1].first) { + dst.push_back(RefRecord(cur[i].off, cur[i+1].len, cur[i].first)); + i++; + } else { + dst.push_back(cur[i]); + } + } + } + //if(verbose) { + // cout << "Source: " << endl; + // printRecords(cout, src); + // cout << "Dest: " << endl; + // printRecords(cout, dst); + //} +#ifndef NDEBUG + size_t srcnfirst = 0, dstnfirst = 0; + for(size_t i = 0; i < src.size(); i++) { + if(src[i].first) { + srcnfirst++; + } + } + for(size_t i = 0; i < dst.size(); i++) { + if(dst[i].first) { + dstnfirst++; + } + } + assert_eq(srcnfirst, dstnfirst); + if(!recursive) { + EList tmp; + reverseRefRecords(dst, tmp, true); + assert_eq(tmp.size(), src.size()); + for(size_t i = 0; i < src.size(); i++) { + assert_eq(src[i].len, tmp[i].len); + assert_eq(src[i].off, tmp[i].off); + assert_eq(src[i].first, tmp[i].first); + } + } +#endif +} + +/** + * Calculate a vector containing the sizes of all of the patterns in + * all of the given input files, in order. Returns the total size of + * all references combined. Rewinds each istream before returning. + */ +std::pair +fastaRefReadSizes( + EList& in, + EList& recs, + const RefReadInParams& rparms, + BitpairOutFileBuf* bpout, + TIndexOff& numSeqs) +{ + TIndexOffU unambigTot = 0; + size_t bothTot = 0; + assert_gt(in.size(), 0); + // For each input istream + for(size_t i = 0; i < in.size(); i++) { + bool first = true; + assert(!in[i]->eof()); + // For each pattern in this istream + while(!in[i]->eof()) { + RefRecord rec; + try { + rec = fastaRefReadSize(*in[i], rparms, first, bpout); + if((unambigTot + rec.len) < unambigTot) { + throw RefTooLongException(); + } + } + catch(RefTooLongException& e) { + cerr << e.what() << endl; + throw 1; + } + // Add the length of this record. + if(rec.first) numSeqs++; + unambigTot += rec.len; + bothTot += rec.len; + bothTot += rec.off; + first = false; + if(rec.len == 0 && rec.off == 0 && !rec.first) continue; + recs.push_back(rec); + } + // Reset the input stream + in[i]->reset(); + assert(!in[i]->eof()); +#ifndef NDEBUG + // Check that it's really reset + int c = in[i]->get(); + assert_eq('>', c); + in[i]->reset(); + assert(!in[i]->eof()); +#endif + } + + // Remove empty reference sequences + for(int64_t i = 0; (size_t)i < recs.size(); i++) { + const RefRecord& rec = recs[i]; + if(rec.first && rec.len == 0) { + if(i + 1 >= recs.size() || recs[i+1].first) { + bothTot -= rec.len; + bothTot -= rec.off; + recs.erase(i); + i -= 1; + } + } + } + + assert_geq(bothTot, 0); + assert_geq(unambigTot, 0); + return make_pair( + unambigTot, // total number of unambiguous DNA characters read + bothTot); // total number of DNA characters read, incl. ambiguous ones +} + + +std::pair +fastaRefReadFragsNames( + EList& in, + EList& recs, + const RefReadInParams& rparms, + BitpairOutFileBuf* bpout, + TIndexOff& numSeqs, + EList& names) +{ + TIndexOffU unambigTot = 0; + size_t bothTot = 0; + assert_gt(in.size(), 0); + // For each input istream + for(size_t i = 0; i < in.size(); i++) { + bool first = true; + assert(!in[i]->eof()); + // For each pattern in this istream + while(!in[i]->eof()) { + RefRecord rec; + string name; + try { + rec = fastaRefReadSize(*in[i], rparms, first, bpout, &name); + if((unambigTot + rec.len) < unambigTot) { + throw RefTooLongException(); + } + } + catch(RefTooLongException& e) { + cerr << e.what() << endl; + throw 1; + } + // Add the length of this record. + if(rec.first) numSeqs++; + if(rec.first) names.push_back(name); + unambigTot += rec.len; + bothTot += rec.len; + bothTot += rec.off; + first = false; + if(rec.len == 0 && rec.off == 0 && !rec.first) continue; + recs.push_back(rec); + } + // Reset the input stream + in[i]->reset(); + assert(!in[i]->eof()); +#ifndef NDEBUG + // Check that it's really reset + int c = in[i]->get(); + assert_eq('>', c); + in[i]->reset(); + assert(!in[i]->eof()); +#endif + } + + // Remove empty reference sequences + for(int64_t i = 0; (size_t)i < recs.size(); i++) { + const RefRecord& rec = recs[i]; + if(rec.first && rec.len == 0) { + if(i + 1 >= recs.size() || recs[i+1].first) { + bothTot -= rec.len; + bothTot -= rec.off; + recs.erase(i); + i -= 1; + } + } + } + + assert_geq(bothTot, 0); + assert_geq(unambigTot, 0); + return make_pair( + unambigTot, // total number of unambiguous DNA characters read + bothTot); // total number of DNA characters read, incl. ambiguous ones +} diff --git a/ref_read.h b/ref_read.h new file mode 100644 index 0000000..9791e4f --- /dev/null +++ b/ref_read.h @@ -0,0 +1,325 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef REF_READ_H_ +#define REF_READ_H_ + +#include +#include +#include +#include +#include +#include +#include "alphabet.h" +#include "assert_helpers.h" +#include "filebuf.h" +#include "word_io.h" +#include "ds.h" +#include "endian_swap.h" + +using namespace std; + +class RefTooLongException : public exception { + +public: + RefTooLongException() { +#ifdef BOWTIE_64BIT_INDEX + // This should never happen! + msg = "Error: Reference sequence has more than 2^64-1 characters! " + "Please divide the reference into smaller chunks and index each " + "independently."; +#else + msg = "Error: Reference sequence has more than 2^32-1 characters! " + "Please build a large index by passing the --large-index option " + "to bowtie2-build"; +#endif + } + + ~RefTooLongException() throw() {} + + const char* what() const throw() { + return msg.c_str(); + } + +protected: + + string msg; + +}; + +/** + * Encapsulates a stretch of the reference containing only unambiguous + * characters. From an ordered list of RefRecords, one can (almost) + * deduce the "shape" of the reference sequences (almost because we + * lose information about stretches of ambiguous characters at the end + * of reference sequences). + */ +struct RefRecord { + RefRecord() : off(), len(), first() { } + RefRecord(TIndexOffU _off, TIndexOffU _len, bool _first) : + off(_off), len(_len), first(_first) + { } + + RefRecord(FILE *in, bool swap) { + assert(in != NULL); + if(!fread(&off, OFF_SIZE, 1, in)) { + cerr << "Error reading RefRecord offset from FILE" << endl; + throw 1; + } + if(swap) off = endianSwapIndex(off); + if(!fread(&len, OFF_SIZE, 1, in)) { + cerr << "Error reading RefRecord offset from FILE" << endl; + throw 1; + } + if(swap) len = endianSwapIndex(len); + first = fgetc(in) ? true : false; + } + + void write(std::ostream& out, bool be) { + writeIndex(out, off, be); + writeIndex(out, len, be); + out.put(first ? 1 : 0); + } + + TIndexOffU off; /// Offset of the first character in the record + TIndexOffU len; /// Length of the record + bool first; /// Whether this record is the first for a reference sequence +}; + +enum { + REF_READ_FORWARD = 0, // don't reverse reference sequence + REF_READ_REVERSE, // reverse entire reference sequence + REF_READ_REVERSE_EACH, // reverse each unambiguous stretch of reference + REF_READ_REVERSE_REPLACEMENT, // reverse the entire reference sequence and replace specific base by others. +}; + +/** + * Parameters governing treatment of references as they're read in. + */ +struct RefReadInParams { + RefReadInParams(bool col, int r, bool nsToA, bool bisulf) : + color(col), reverse(r), nsToAs(nsToA), bisulfite(bisulf) { } + // extract colors from reference + bool color; + // reverse each reference sequence before passing it along + int reverse; + // convert ambiguous characters to As + bool nsToAs; + // bisulfite-convert the reference + bool bisulfite; +}; + +extern RefRecord +fastaRefReadSize( + FileBuf& in, + const RefReadInParams& rparms, + bool first, + BitpairOutFileBuf* bpout, + string *name = NULL); + +extern std::pair +fastaRefReadSizes( + EList& in, + EList& recs, + const RefReadInParams& rparms, + BitpairOutFileBuf* bpout, + TIndexOff& numSeqs); + +extern std::pair +fastaRefReadFragsNames( + EList& in, + EList& recs, + const RefReadInParams& rparms, + BitpairOutFileBuf* bpout, + TIndexOff& numSeqs, + EList& names); + +extern void +reverseRefRecords( + const EList& src, + EList& dst, + bool recursive = false, + bool verbose = false); + +/** + * Reads the next sequence from the given FASTA file and appends it to + * the end of dst, optionally reversing it. + */ +template +static RefRecord fastaRefReadAppend( + FileBuf& in, // input file + bool first, // true iff this is the first record in the file + TStr& dst, // destination buf for parsed characters + TIndexOffU& dstoff, // index of next character in dst to assign + RefReadInParams& rparms, // + string* name = NULL) // put parsed FASTA name here +{ + int c; + static int lastc = '>'; + if(first) { + c = in.getPastWhitespace(); + if(c != '>') { + cerr << "Reference file does not seem to be a FASTA file" << endl; + throw 1; + } + lastc = c; + } + assert_neq(-1, lastc); + + // RefRecord params + size_t len = 0; + size_t off = 0; + first = true; + + size_t ilen = dstoff; + + // Chew up the id line; if the next line is either + // another id line or a comment line, keep chewing + int lc = -1; // last-DNA char variable for color conversion + c = lastc; + if(c == '>' || c == '#') { + do { + while (c == '#') { + if((c = in.getPastNewline()) == -1) { + lastc = -1; + goto bail; + } + } + assert_eq('>', c); + while(true) { + c = in.get(); + if(c == -1) { + lastc = -1; + goto bail; + } + if(c == '\n' || c == '\r') { + while(c == '\r' || c == '\n') c = in.get(); + if(c == -1) { + lastc = -1; + goto bail; + } + break; + } + if (name) name->push_back(c); + } + // c holds the first character on the line after the name + // line + if(c == '>') { + // If there's another name line immediately after this one, + // discard the previous name and start fresh with the new one + if (name) name->clear(); + } + } while (c == '>' || c == '#'); + } else { + ASSERT_ONLY(int cc = toupper(c)); + assert(cc != 'A' && cc != 'C' && cc != 'G' && cc != 'T'); + first = false; + } + + // Skip over an initial stretch of gaps or ambiguous characters. + // For colorspace we skip until we see two consecutive unambiguous + // characters (i.e. the first unambiguous color). + while(true) { + int cat = asc2dnacat[c]; + if(rparms.nsToAs && cat >= 2) { + c = 'A'; + } + int cc = toupper(c); + if(rparms.bisulfite && cc == 'C') c = cc = 'T'; + if(cat == 1) { + // This is a DNA character + if(rparms.color) { + if(lc != -1) { + // Got two consecutive unambiguous DNAs + break; // to read-in loop + } + // Keep going; we need two consecutive unambiguous DNAs + lc = asc2dna[(int)c]; + // The 'if(off > 0)' takes care of the case where + // the reference is entirely unambiguous and we don't + // want to incorrectly increment off. + if(off > 0) off++; + } else { + break; // to read-in loop + } + } else if(cat >= 2) { + if(lc != -1 && off == 0) { + off++; + } + lc = -1; + off++; // skip it + } else if(c == '>') { + lastc = '>'; + goto bail; + } + c = in.get(); + if(c == -1) { + lastc = -1; + goto bail; + } + } + if(first && rparms.color && off > 0) { + // Handle the case where the first record has ambiguous + // characters but we're in color space; one of those counts is + // spurious + off--; + } + assert(!rparms.color || lc != -1); + assert_eq(1, asc2dnacat[c]); + + // in now points just past the first character of a sequence + // line, and c holds the first character + while(true) { + // Note: can't have a comment in the middle of a sequence, + // though a comment can end a sequence + int cat = asc2dnacat[c]; + assert_neq(2, cat); + if(cat == 1) { + // Consume it + if(!rparms.color || lc != -1) len++; + // Add it to reference buffer + if(rparms.color) { + dst.set((char)dinuc2color[asc2dna[(int)c]][lc], dstoff++); + } else if(!rparms.color) { + dst.set(asc2dna[c], dstoff++); + } + assert_lt((int)dst[dstoff-1], 4); + lc = asc2dna[(int)c]; + } + c = in.get(); + if(rparms.nsToAs && asc2dnacat[c] >= 2) c = 'A'; + if (c == -1 || c == '>' || c == '#' || asc2dnacat[c] >= 2) { + lastc = c; + break; + } + if(rparms.bisulfite && toupper(c) == 'C') c = 'T'; + } + + bail: + // Optionally reverse the portion that we just appended. + // ilen = length of buffer before this last sequence was appended. + if(rparms.reverse == REF_READ_REVERSE_EACH) { + // Find limits of the portion we just appended + size_t nlen = dstoff; + dst.reverseWindow(ilen, nlen); + } + return RefRecord((TIndexOffU)off, (TIndexOffU)len, first); +} + +#endif /*ndef REF_READ_H_*/ diff --git a/reference.cpp b/reference.cpp new file mode 100644 index 0000000..a897a4a --- /dev/null +++ b/reference.cpp @@ -0,0 +1,722 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include +#include +#include "reference.h" +#include "mem_ids.h" + +using namespace std; + +/** + * Load from .3.gfm_ext/.4.gfm_ext HISAT2 index files. + */ +BitPairReference::BitPairReference( + const string& in, + const EList* included, + bool color, + bool sanity, + EList* infiles, + EList >* origs, + bool infilesSeq, + bool useMm, + bool useShmem, + bool mmSweep, + bool verbose, + bool startVerbose) : + buf_(NULL), + sanityBuf_(NULL), + loaded_(true), + sanity_(sanity), + useMm_(useMm), + useShmem_(useShmem), + verbose_(verbose) +{ + string s3 = in + ".3." + gfm_ext; + string s4 = in + ".4." + gfm_ext; + + FILE *f3, *f4; + if((f3 = fopen(s3.c_str(), "rb")) == NULL) { + cerr << "Could not open reference-string index file " << s3 << " for reading." << endl; + cerr << "This is most likely because your index was built with an older version" << endl + << "(<= 0.9.8.1) of bowtie-build. Please re-run bowtie-build to generate a new" << endl + << "index (or download one from the Bowtie website) and try again." << endl; + loaded_ = false; + return; + } + if((f4 = fopen(s4.c_str(), "rb")) == NULL) { + cerr << "Could not open reference-string index file " << s4 << " for reading." << endl; + loaded_ = false; + return; + } +#ifdef BOWTIE_MM + char *mmFile = NULL; + if(useMm_) { + if(verbose_ || startVerbose) { + cerr << " Memory-mapping reference index file " << s4.c_str() << ": "; + logTime(cerr); + } + struct stat sbuf; + if (stat(s4.c_str(), &sbuf) == -1) { + perror("stat"); + cerr << "Error: Could not stat index file " << s4.c_str() << " prior to memory-mapping" << endl; + throw 1; + } + mmFile = (char*)mmap((void *)0, (size_t)sbuf.st_size, + PROT_READ, MAP_SHARED, fileno(f4), 0); + if(mmFile == (void *)(-1) || mmFile == NULL) { + perror("mmap"); + cerr << "Error: Could not memory-map the index file " << s4.c_str() << endl; + throw 1; + } + if(mmSweep) { + TIndexOff sum = 0; + for(off_t i = 0; i < sbuf.st_size; i += 1024) { + sum += (TIndexOff) mmFile[i]; + } + if(startVerbose) { + cerr << " Swept the memory-mapped ref index file; checksum: " << sum << ": "; + logTime(cerr); + } + } + } +#endif + + // Read endianness sentinel, set 'swap' + uint32_t one; + bool swap = false; + one = readIndex(f3, swap); + if(one != 1) { + if(useMm_) { + cerr << "Error: Can't use memory-mapped files when the index is the opposite endianness" << endl; + throw 1; + } + assert_eq(0x1000000, one); + swap = true; // have to endian swap U32s + } + + // Read # records + TIndexOffU sz; + sz = readIndex(f3, swap); + if(sz == 0) { + cerr << "Error: number of reference records is 0 in " << s3.c_str() << endl; + throw 1; + } + + // Read records + nrefs_ = 0; + + // Cumulative count of all unambiguous characters on a per- + // stretch 8-bit alignment (i.e. count of bytes we need to + // allocate in buf_) + TIndexOffU cumsz = 0; + TIndexOffU cumlen = 0; + + EList seq_poss; + TIndexOffU seq_cumpos = 0; + TIndexOffU skips = 0; + // For each unambiguous stretch... + for(TIndexOffU i = 0; i < sz; i++) { + recs_.push_back(RefRecord(f3, swap)); + if(included != NULL && !(*included)[i]) { + seq_cumpos += recs_.back().len; + recs_.pop_back(); + skips++; + continue; + } + seq_poss.push_back(seq_cumpos); + if(recs_.back().first) { + // This is the first record for this reference sequence (and the + // last record for the one before) + refRecOffs_.push_back((TIndexOffU)recs_.size()-1); + // refOffs_ links each reference sequence with the total number of + // unambiguous characters preceding it in the pasted reference + refOffs_.push_back(cumsz); + if(nrefs_ > 0) { + // refLens_ links each reference sequence with the total number + // of ambiguous and unambiguous characters in it. + refLens_.push_back(cumlen); + } + cumlen = 0; + nrefs_++; + } else if(i == 0) { + cerr << "First record in reference index file was not marked as " + << "'first'" << endl; + throw 1; + } + cumUnambig_.push_back(cumsz); + cumRefOff_.push_back(cumlen); + cumsz += recs_.back().len; + cumlen += recs_.back().off; + cumlen += recs_.back().len; + seq_cumpos += recs_.back().len; + } + if(verbose_ || startVerbose) { + cerr << "Read " << nrefs_ << " reference strings from " + << sz << " records: "; + logTime(cerr); + } + // Store a cap entry for the end of the last reference seq + refRecOffs_.push_back((TIndexOffU)recs_.size()); + refOffs_.push_back(cumsz); + refLens_.push_back(cumlen); + cumUnambig_.push_back(cumsz); + cumRefOff_.push_back(cumlen); + bufSz_ = cumsz; + assert_eq(nrefs_, refLens_.size()); + assert_eq(sz, recs_.size() + skips); + if (f3 != NULL) fclose(f3); // done with .3.gfm_ext file + + // Round cumsz up to nearest byte boundary + if((cumsz & 3) != 0) { + cumsz += (4 - (cumsz & 3)); + } + bufAllocSz_ = cumsz >> 2; + assert_eq(0, cumsz & 3); // should be rounded up to nearest 4 + if(useMm_) { +#ifdef BOWTIE_MM + buf_ = (uint8_t*)mmFile; + if(sanity_) { + FILE *ftmp = fopen(s4.c_str(), "rb"); + sanityBuf_ = new uint8_t[cumsz >> 2]; + size_t ret = fread(sanityBuf_, 1, cumsz >> 2, ftmp); + if(ret != (cumsz >> 2)) { + cerr << "Only read " << ret << " bytes (out of " << (cumsz >> 2) << ") from reference index file " << s4.c_str() << endl; + throw 1; + } + fclose(ftmp); + for(size_t i = 0; i < (cumsz >> 2); i++) { + assert_eq(sanityBuf_[i], buf_[i]); + } + } +#else + cerr << "Shouldn't be at " << __FILE__ << ":" << __LINE__ << " without BOWTIE_MM defined" << endl; + throw 1; +#endif + } else { + bool shmemLeader = true; + if(!useShmem_) { + // Allocate a buffer to hold the reference string + try { + buf_ = new uint8_t[cumsz >> 2]; + if(buf_ == NULL) throw std::bad_alloc(); + } catch(std::bad_alloc& e) { + cerr << "Error: Ran out of memory allocating space for the bitpacked reference. Please" << endl + << "re-run on a computer with more memory." << endl; + throw 1; + } + } else { + shmemLeader = ALLOC_SHARED_U8( + (s4 + "[ref]"), (cumsz >> 2), &buf_, + "ref", (verbose_ || startVerbose)); + } + if(shmemLeader) { + // Open the bitpair-encoded reference file + FILE *f4 = fopen(s4.c_str(), "rb"); + if(f4 == NULL) { + cerr << "Could not open reference-string index file " << s4.c_str() << " for reading." << endl; + cerr << "This is most likely because your index was built with an older version" << endl + << "(<= 0.9.8.1) of bowtie-build. Please re-run bowtie-build to generate a new" << endl + << "index (or download one from the Bowtie website) and try again." << endl; + loaded_ = false; + return; + } + if(included == NULL) { + // Read the whole thing in + size_t ret = fread(buf_, 1, cumsz >> 2, f4); + // Didn't read all of it? + if(ret != (cumsz >> 2)) { + cerr << "Only read " << ret << " bytes (out of " << (cumsz >> 2) << ") from reference index file " << s4.c_str() << endl; + throw 1; + } + // Make sure there's no more + char c; + ret = fread(&c, 1, 1, f4); + assert_eq(0, ret); // should have failed + } else { + TIndexOffU buf_pos = 0; + uint8_t four_buf = 0, four_buf2 = 0; + for(size_t i = 0; i < seq_poss.size(); i++) { + TIndexOffU seq_pos = seq_poss[i]; + TIndexOffU cur_len = refLens_[i]; + TIndexOffU seq_pos2 = seq_pos + cur_len; + TIndexOffU left_pad = seq_pos & 3; + assert_eq((seq_pos - left_pad) & 3, 0); + TIndexOffU right_pad = 4 - (seq_pos2 & 3); + if(right_pad == 4) right_pad = 0; + assert_eq((seq_pos2 + right_pad) & 3, 0); + TIndexOffU cur_len2 = left_pad + cur_len + right_pad; + assert_eq(cur_len2 & 3, 0); + uint8_t *buf2_ = new uint8_t[cur_len2 >> 2]; + // Read sequences selectively + fseek(f4, (seq_pos - left_pad) >> 2, SEEK_SET); + size_t ret = fread(buf2_, 1, cur_len2 >> 2, f4); + // Didn't read all of it? + if(ret != (cur_len2 >> 2)) { + cerr << "Only read " << ret << " bytes (out of " << (cur_len2 >> 2) << ") from reference index file " << s4.c_str() << endl; + throw 1; + } + four_buf2 = buf2_[0] >> (left_pad << 1); + for(TIndexOffU j = seq_pos; j < seq_pos2; j++, buf_pos++) { + if((j & 3) == 0) { + four_buf2 = buf2_[(j - (seq_pos - left_pad)) >> 2]; + } + uint8_t nt = four_buf2 & 3; + four_buf2 >>= 2; + four_buf |= (nt << ((buf_pos & 3) << 1)); + if((buf_pos & 3) == 3) { + buf_[buf_pos >> 2] = four_buf; + four_buf = 0; + } + } + delete [] buf2_; + seq_pos += cur_len; + } +#ifndef NDEBUG + TIndexOffU cumsz2 = 0; + for(size_t i = 0; i < refLens_.size(); i++) { + cumsz2 += refLens_[i]; + } + assert_eq(buf_pos, cumsz2); +#endif + if((buf_pos & 3) != 0) { + buf_[buf_pos >> 2] = four_buf; + } + assert_eq(nrefs_, refLens_.size()); + } + fclose(f4); +#ifdef BOWTIE_SHARED_MEM + if(useShmem_) NOTIFY_SHARED(buf_, (cumsz >> 2)); +#endif + } else { +#ifdef BOWTIE_SHARED_MEM + if(useShmem_) WAIT_SHARED(buf_, (cumsz >> 2)); +#endif + } + } + + // Populate byteToU32_ + bool big = currentlyBigEndian(); + for(int i = 0; i < 256; i++) { + uint32_t word = 0; + if(big) { + word |= ((i >> 0) & 3) << 24; + word |= ((i >> 2) & 3) << 16; + word |= ((i >> 4) & 3) << 8; + word |= ((i >> 6) & 3) << 0; + } else { + word |= ((i >> 0) & 3) << 0; + word |= ((i >> 2) & 3) << 8; + word |= ((i >> 4) & 3) << 16; + word |= ((i >> 6) & 3) << 24; + } + byteToU32_[i] = word; + } + +#ifndef NDEBUG + if(sanity_) { + // Compare the sequence we just read from the compact index + // file to the true reference sequence. + EList > *os; // for holding references + EList > osv(DEBUG_CAT); // for holding ref seqs + EList > osn(DEBUG_CAT); // for holding ref names + EList osvLen(DEBUG_CAT); // for holding ref seq lens + EList osnLen(DEBUG_CAT); // for holding ref name lens + SStringExpandable tmp_destU32_; + if(infiles != NULL) { + if(infilesSeq) { + for(size_t i = 0; i < infiles->size(); i++) { + // Remove initial backslash; that's almost + // certainly being used to protect the first + // character of the sequence from getopts (e.g., + // when the first char is -) + if((*infiles)[i].at(0) == '\\') { + (*infiles)[i].erase(0, 1); + } + osv.push_back(SString((*infiles)[i])); + } + } else { + parseFastas(*infiles, osn, osnLen, osv, osvLen); + } + os = &osv; + } else { + assert(origs != NULL); + os = origs; + } + + // Go through the loaded reference files base-by-base and + // sanity check against what we get by calling getBase and + // getStretch + for(size_t i = 0; i < os->size(); i++) { + size_t olen = ((*os)[i]).length(); + size_t olenU32 = (olen + 12) / 4; + uint32_t *buf = new uint32_t[olenU32]; + uint8_t *bufadj = (uint8_t*)buf; + bufadj += getStretch(buf, i, 0, olen, tmp_destU32_); + for(size_t j = 0; j < olen; j++) { + assert_eq((int)(*os)[i][j], (int)bufadj[j]); + assert_eq((int)(*os)[i][j], (int)getBase(i, j)); + } + delete[] buf; + } + } +#endif + + // generate minkRepeat + long long int genomeLen = approxLen(0); + minkRepeat = 0; + while(genomeLen > 0) { + genomeLen >>= 2; + minkRepeat++; + } +} + +BitPairReference::~BitPairReference() { + if(buf_ != NULL && !useMm_ && !useShmem_) delete[] buf_; + if(sanityBuf_ != NULL) delete[] sanityBuf_; +} + +/** + * Return a single base of the reference. Calling this repeatedly + * is not an efficient way to retrieve bases from the reference; + * use loadStretch() instead. + * + * This implementation scans linearly through the records for the + * unambiguous stretches of the target reference sequence. When + * there are many records, binary search would be more appropriate. + */ +int BitPairReference::getBase(size_t tidx, size_t toff) const { + uint64_t reci = refRecOffs_[tidx]; // first record for target reference sequence + uint64_t recf = refRecOffs_[tidx+1]; // last record (exclusive) for target seq + assert_gt(recf, reci); + uint64_t bufOff = refOffs_[tidx]; + uint64_t off = 0; + // For all records pertaining to the target reference sequence... + for(uint64_t i = reci; i < recf; i++) { + assert_geq(toff, off); + off += recs_[i].off; + if(toff < off) { + return 4; + } + assert_geq(toff, off); + uint64_t recOff = off + recs_[i].len; + if(toff < recOff) { + toff -= off; + bufOff += (uint64_t)toff; + assert_lt(bufOff, bufSz_); + const uint64_t bufElt = (bufOff) >> 2; + const uint64_t shift = (bufOff & 3) << 1; + return ((buf_[bufElt] >> shift) & 3); + } + bufOff += recs_[i].len; + off = recOff; + assert_geq(toff, off); + } // end for loop over records + return 4; +} + +/** + * Load a stretch of the reference string into memory at 'dest'. + * + * This implementation scans linearly through the records for the + * unambiguous stretches of the target reference sequence. When + * there are many records, binary search would be more appropriate. + */ +int BitPairReference::getStretchNaive( + uint32_t *destU32, + size_t tidx, + size_t toff, + size_t count) const +{ + uint8_t *dest = (uint8_t*)destU32; + uint64_t reci = refRecOffs_[tidx]; // first record for target reference sequence + uint64_t recf = refRecOffs_[tidx+1]; // last record (exclusive) for target seq + assert_gt(recf, reci); + uint64_t cur = 0; + uint64_t bufOff = refOffs_[tidx]; + uint64_t off = 0; + // For all records pertaining to the target reference sequence... + for(uint64_t i = reci; i < recf; i++) { + assert_geq(toff, off); + off += recs_[i].off; + for(; toff < off && count > 0; toff++) { + dest[cur++] = 4; + count--; + } + if(count == 0) break; + assert_geq(toff, off); + if(toff < off + recs_[i].len) { + bufOff += (TIndexOffU)(toff - off); // move bufOff pointer forward + } else { + bufOff += recs_[i].len; + } + off += recs_[i].len; + for(; toff < off && count > 0; toff++) { + assert_lt(bufOff, bufSz_); + const uint64_t bufElt = (bufOff) >> 2; + const uint64_t shift = (bufOff & 3) << 1; + dest[cur++] = (buf_[bufElt] >> shift) & 3; + bufOff++; + count--; + } + if(count == 0) break; + assert_geq(toff, off); + } // end for loop over records + // In any chars are left after scanning all the records, + // they must be ambiguous + while(count > 0) { + count--; + dest[cur++] = 4; + } + assert_eq(0, count); + return 0; +} + +/** + * Load a stretch of the reference string into memory at 'dest'. + */ +int BitPairReference::getStretch( + uint32_t *destU32, + size_t tidx, + size_t toff, + size_t count + ASSERT_ONLY(, SStringExpandable& destU32_2)) const +{ + ASSERT_ONLY(size_t origCount = count); + ASSERT_ONLY(size_t origToff = toff); + if(count == 0) return 0; + uint8_t *dest = (uint8_t*)destU32; +#ifndef NDEBUG + destU32_2.clear(); + uint8_t *dest_2 = NULL; + int off2; + if((rand() % 10) == 0) { + destU32_2.resize((origCount >> 2) + 2); + off2 = getStretchNaive(destU32_2.wbuf(), tidx, origToff, origCount); + dest_2 = ((uint8_t*)destU32_2.wbuf()) + off2; + } +#endif + destU32[0] = 0x04040404; // Add Ns, which we might end up using later + uint64_t reci = refRecOffs_[tidx]; // first record for target reference sequence + uint64_t recf = refRecOffs_[tidx+1]; // last record (exclusive) for target seq + assert_gt(recf, reci); + uint64_t cur = 4; // keep a cushion of 4 bases at the beginning + uint64_t bufOff = refOffs_[tidx]; + uint64_t off = 0; + int64_t offset = 4; + bool firstStretch = true; + bool binarySearched = false; + uint64_t left = reci; + uint64_t right = recf; + uint64_t mid = 0; + // For all records pertaining to the target reference sequence... + for(uint64_t i = reci; i < recf; i++) { + uint64_t origBufOff = bufOff; + assert_geq(toff, off); + if (firstStretch && recf > reci + 16){ + // binary search finds smallest i s.t. toff >= cumRefOff_[i] + while (left < right-1) { + mid = left + ((right - left) >> 1); + if (cumRefOff_[mid] <= toff) + left = mid; + else + right = mid; + } + off = cumRefOff_[left]; + bufOff = cumUnambig_[left]; + origBufOff = bufOff; + i = left; + assert(cumRefOff_[i+1] == 0 || cumRefOff_[i+1] > toff); + binarySearched = true; + } + off += recs_[i].off; // skip Ns at beginning of stretch + assert_gt(count, 0); + if(toff < off) { + size_t cpycnt = min((size_t)(off - toff), count); + memset(&dest[cur], 4, cpycnt); + count -= cpycnt; + toff += cpycnt; + cur += cpycnt; + if(count == 0) break; + } + assert_geq(toff, off); + if(toff < off + recs_[i].len) { + bufOff += toff - off; // move bufOff pointer forward + } else { + bufOff += recs_[i].len; + } + off += recs_[i].len; + assert(off == cumRefOff_[i+1] || cumRefOff_[i+1] == 0); + assert(!binarySearched || toff < off); + if(toff < off) { + if(firstStretch) { + if(toff + 8 < off && count > 8) { + // We already added some Ns, so we have to do + // a fixup at the beginning of the buffer so + // that we can start clobbering at cur >> 2 + if(cur & 3) { + offset -= (cur & 3); + } + uint64_t curU32 = cur >> 2; + // Do the initial few bases + if(bufOff & 3) { + const uint64_t bufElt = (bufOff) >> 2; + const int64_t low2 = bufOff & 3; + // Lots of cache misses on the following line + destU32[curU32] = byteToU32_[buf_[bufElt]]; + for(int j = 0; j < low2; j++) { + ((char *)(&destU32[curU32]))[j] = 4; + } + curU32++; + offset += low2; + const int64_t chars = 4 - low2; + count -= chars; + bufOff += chars; + toff += chars; + } + assert_eq(0, bufOff & 3); + uint64_t bufOffU32 = bufOff >> 2; + uint64_t countLim = count >> 2; + uint64_t offLim = ((off - (toff + 4)) >> 2); + uint64_t lim = min(countLim, offLim); + // Do the fast thing for as far as possible + for(uint64_t j = 0; j < lim; j++) { + // Lots of cache misses on the following line + destU32[curU32] = byteToU32_[buf_[bufOffU32++]]; +#ifndef NDEBUG + if(dest_2 != NULL) { + assert_eq(dest[(curU32 << 2) + 0], dest_2[(curU32 << 2) - offset + 0]); + assert_eq(dest[(curU32 << 2) + 1], dest_2[(curU32 << 2) - offset + 1]); + assert_eq(dest[(curU32 << 2) + 2], dest_2[(curU32 << 2) - offset + 2]); + assert_eq(dest[(curU32 << 2) + 3], dest_2[(curU32 << 2) - offset + 3]); + } +#endif + curU32++; + } + toff += (lim << 2); + assert_leq(toff, off); + assert_leq((lim << 2), count); + count -= (lim << 2); + bufOff = bufOffU32 << 2; + cur = curU32 << 2; + } + // Do the slow thing for the rest + for(; toff < off && count > 0; toff++) { + assert_lt(bufOff, bufSz_); + const uint64_t bufElt = (bufOff) >> 2; + const uint64_t shift = (bufOff & 3) << 1; + dest[cur++] = (buf_[bufElt] >> shift) & 3; + bufOff++; + count--; + } + firstStretch = false; + } else { + // Do the slow thing + for(; toff < off && count > 0; toff++) { + assert_lt(bufOff, bufSz_); + const uint64_t bufElt = (bufOff) >> 2; + const uint64_t shift = (bufOff & 3) << 1; + dest[cur++] = (buf_[bufElt] >> shift) & 3; + bufOff++; + count--; + } + } + } + if(count == 0) break; + assert_eq(recs_[i].len, bufOff - origBufOff); + assert_geq(toff, off); + } // end for loop over records + // In any chars are left after scanning all the records, + // they must be ambiguous + while(count > 0) { + count--; + dest[cur++] = 4; + } + assert_eq(0, count); + return (int)offset; +} + + +/** + * Parse the input fasta files, populating the szs list and writing the + * .3.gfm_ext and .4.gfm_ext portions of the index as we go. + */ +pair +BitPairReference::szsFromFasta( + EList& is, + const string& outfile, + bool bigEndian, + const RefReadInParams& refparams, + EList& szs, + bool sanity, + EList *names) +{ + RefReadInParams parms = refparams; + std::pair sztot; + if(!outfile.empty()) { + string file3 = outfile + ".3." + gfm_ext; + string file4 = outfile + ".4." + gfm_ext; + // Open output stream for the '.3.gfm_ext' file which will + // hold the size records. + ofstream fout3(file3.c_str(), ios::binary); + if(!fout3.good()) { + cerr << "Could not open index file for writing: \"" << file3.c_str() << "\"" << endl + << "Please make sure the directory exists and that permissions allow writing by" << endl + << "HISAT2." << endl; + throw 1; + } + BitpairOutFileBuf bpout(file4.c_str()); + // Read in the sizes of all the unambiguous stretches of the genome + // into a vector of RefRecords. The input streams are reset once + // it's done. + writeIndex(fout3, 1, bigEndian); // endianness sentinel + TIndexOff numSeqs = 0; + sztot = fastaRefReadSizes(is, szs, parms, &bpout, numSeqs); + writeIndex(fout3, (TIndexOffU)szs.size(), bigEndian); // write # records + for(size_t i = 0; i < szs.size(); i++) szs[i].write(fout3, bigEndian); + if(sztot.first == 0) { + cerr << "Error: No unambiguous stretches of characters in the input. Aborting..." << endl; + throw 1; + } + assert_gt(sztot.first, 0); + assert_gt(sztot.second, 0); + bpout.close(); + fout3.close(); + } else { + // Read in the sizes of all the unambiguous stretches of the + // genome into a vector of RefRecords + TIndexOff numSeqs = 0; + //sztot = fastaRefReadSizes(is, szs, parms, NULL, numSeqs); + sztot = fastaRefReadFragsNames(is, szs, parms, NULL, numSeqs, *names); +#ifndef NDEBUG + if(parms.color) { + parms.color = false; + EList szs2(EBWTB_CAT); + TIndexOff numSeqs2 = 0; + ASSERT_ONLY(std::pair sztot2 =) + fastaRefReadSizes(is, szs2, parms, NULL, numSeqs2); + assert_eq(numSeqs, numSeqs2); + // One less color than base + assert_geq(sztot2.second, sztot.second + numSeqs); + parms.color = true; + } +#endif + } + return sztot; +} diff --git a/reference.h b/reference.h new file mode 100644 index 0000000..120ca07 --- /dev/null +++ b/reference.h @@ -0,0 +1,196 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef REFERENCE_H_ +#define REFERENCE_H_ + +#include +#include +#include +#include +#ifdef BOWTIE_MM +#include +#include +#endif +#include "endian_swap.h" +#include "ref_read.h" +#include "sequence_io.h" +#include "mm.h" +#include "shmem.h" +#include "timer.h" +#include "sstring.h" +#include "btypes.h" + + +/** + * Concrete reference representation that bulk-loads the reference from + * the bit-pair-compacted binary file and stores it in memory also in + * bit-pair-compacted format. The user may request reference + * characters either on a per-character bases or by "stretch" using + * getBase(...) and getStretch(...) respectively. + * + * Most of the complexity in this class is due to the fact that we want + * to represent references with ambiguous (non-A/C/G/T) characters but + * we don't want to use more than two bits per base. This means we + * need a way to encode the ambiguous stretches of the reference in a + * way that is external to the bitpair sequence. To accomplish this, + * we use the RefRecords vector, which is stored in the .3.ebwt index + * file. The bitpairs themselves are stored in the .4.ebwt index file. + * + * Once it has been loaded, a BitPairReference is read-only, and is + * safe for many threads to access at once. + */ +class BitPairReference { + +public: + /** + * Load from .3.ebwt/.4.ebwt Bowtie index files. + */ + BitPairReference( + const string& in, + const EList* included, + bool color, + bool sanity = false, + EList* infiles = NULL, + EList >* origs = NULL, + bool infilesSeq = false, + bool useMm = false, + bool useShmem = false, + bool mmSweep = false, + bool verbose = false, + bool startVerbose = false); + + ~BitPairReference(); + + /** + * Return a single base of the reference. Calling this repeatedly + * is not an efficient way to retrieve bases from the reference; + * use loadStretch() instead. + * + * This implementation scans linearly through the records for the + * unambiguous stretches of the target reference sequence. When + * there are many records, binary search would be more appropriate. + */ + int getBase(size_t tidx, size_t toff) const; + + /** + * Load a stretch of the reference string into memory at 'dest'. + * + * This implementation scans linearly through the records for the + * unambiguous stretches of the target reference sequence. When + * there are many records, binary search would be more appropriate. + */ + int getStretchNaive( + uint32_t *destU32, + size_t tidx, + size_t toff, + size_t count) const; + + /** + * Load a stretch of the reference string into memory at 'dest'. + * + * This implementation scans linearly through the records for the + * unambiguous stretches of the target reference sequence. When + * there are many records, binary search would be more appropriate. + */ + int getStretch( + uint32_t *destU32, + size_t tidx, + size_t toff, + size_t count + ASSERT_ONLY(, SStringExpandable& destU32_2)) const; + + /** + * Return the number of reference sequences. + */ + TIndexOffU numRefs() const { + return nrefs_; + } + + /** + * Return the approximate length of a reference sequence (it might leave + * off some Ns on the end). + * + * TODO: Is it still true that it might leave off Ns? + */ + TIndexOffU approxLen(TIndexOffU elt) const { + assert_lt(elt, nrefs_); + return refLens_[elt]; + } + + /** + * Return true iff buf_ and all the vectors are populated. + */ + bool loaded() const { + return loaded_; + } + + /** + * Given a reference sequence id, return its offset into the pasted + * reference string; i.e., return the number of unambiguous nucleotides + * preceding it. + */ + TIndexOffU pastedOffset(TIndexOffU idx) const { + return refOffs_[idx]; + } + + /** + * Parse the input fasta files, populating the szs list and writing the + * .3.ebwt and .4.ebwt portions of the index as we go. + */ + static std::pair + szsFromFasta( + EList& is, + const string& outfile, + bool bigEndian, + const RefReadInParams& refparams, + EList& szs, + bool sanity, + EList *names = NULL); + + size_t getMinK() const{ + return minkRepeat; + } + +protected: + + uint32_t byteToU32_[256]; + + EList recs_; /// records describing unambiguous stretches + // following two lists are purely for the binary search in getStretch + EList cumUnambig_; // # unambig ref chars up to each record + EList cumRefOff_; // # ref chars up to each record + EList refLens_; /// approx lens of ref seqs (excludes trailing ambig chars) + EList refOffs_; /// buf_ begin offsets per ref seq + EList refRecOffs_; /// record begin/end offsets per ref seq + uint8_t *buf_; /// the whole reference as a big bitpacked byte array + uint8_t *sanityBuf_;/// for sanity-checking buf_ + TIndexOffU bufSz_; /// size of buf_ + TIndexOffU bufAllocSz_; + TIndexOffU nrefs_; /// the number of reference sequences + bool loaded_; /// whether it's loaded + bool sanity_; /// do sanity checking + bool useMm_; /// load the reference as a memory-mapped file + bool useShmem_; /// load the reference into shared memory + bool verbose_; + size_t minkRepeat; // log4 of the size of repeat genome + ASSERT_ONLY(SStringExpandable tmp_destU32_); +}; + +#endif diff --git a/repeat.h b/repeat.h new file mode 100644 index 0000000..03be227 --- /dev/null +++ b/repeat.h @@ -0,0 +1,627 @@ +/* + * Copyright 2018, Chanhee Park and Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#ifndef REPEAT_H_ +#define REPEAT_H_ + +#include +#include +#include +#include +#include "assert_helpers.h" +#include "word_io.h" +#include "mem_ids.h" +#include "ref_coord.h" +#include "alt.h" + +using namespace std; + +template +class RepeatCoord { +public: + bool operator< (const RepeatCoord& o) const { + if(joinedOff != o.joinedOff) + return joinedOff < o.joinedOff; + if(fw != o.fw) + return fw; + if(alleleID != o.alleleID) + return alleleID < o.alleleID; + return false; + } + +public: + RepeatCoord() {}; + RepeatCoord(index_t l_tid, index_t l_toff, index_t l_joinedOff, bool l_fw, index_t l_alleleID) : + tid(l_tid), toff(l_toff), joinedOff(l_joinedOff), fw(l_fw) {}; + index_t tid; + index_t toff; + index_t joinedOff; + bool fw; + index_t alleleID; +}; + +template +class RepeatAllele { +public: + RepeatAllele() { + reset(); + } + + void init(index_t allelePos_, + index_t alleleLen_) { + allelePos = allelePos_; + alleleLen = alleleLen_; + } + + void reset() { + allelePos = 0; + alleleLen = 0; + } + + bool operator< (const RepeatAllele& o) const { + if(allelePos != o.allelePos) + return allelePos < o.allelePos; + return alleleLen < o.alleleLen; + } + +#ifndef NDEBUG + bool repOk() const { + return true; + } +#endif + + bool write(ofstream& f_out, bool bigEndian) const { + writeU16(f_out, allelePos, bigEndian); + writeU16(f_out, alleleLen, bigEndian); + return true; + } + + bool read(ifstream& f_in, bool bigEndian) { + allelePos = readU16(f_in, bigEndian); + alleleLen = readU16(f_in, bigEndian); + return true; + } + + bool compatible(index_t left, index_t right) const { + if(left < allelePos || right > allelePos + alleleLen) + return false; + + return true; + } + +public: + uint16_t allelePos; + uint16_t alleleLen; +}; + +// sorting functions +template +struct sort_pair_loci { + bool operator()(const pair, index_t>& a, const pair, index_t>& b) { + return a.first.joinedOff < b.first.joinedOff; + } +}; + +template +struct sort_pair_loci_by_index { + bool operator()(const pair, index_t>& a, const pair, index_t>& b) { + return a.second < b.second; + } +}; + +template +class Repeat { +public: + void init(const string& repName_, + index_t repID_, + index_t repPos_, + index_t repLen_) { + repName = repName_; + repID = repID_; + repPos = repPos_; + repLen = repLen_; + } + + bool write(ofstream& f_out, bool bigEndian) const { + writeIndex(f_out, repID, bigEndian); + writeIndex(f_out, repPos, bigEndian); + writeIndex(f_out, repLen, bigEndian); + writeIndex(f_out, alleles.size(), bigEndian); + for(index_t i = 0; i < alleles.size(); i++) { + alleles[i].write(f_out, bigEndian); + } + writeIndex(f_out, positions.size(), bigEndian); + for(index_t i = 0; i < positions.size(); i++) { + writeIndex(f_out, positions[i].joinedOff, bigEndian); + writeU8(f_out, positions[i].fw); + writeIndex(f_out, positions[i].alleleID, bigEndian); + } + return true; + } + + bool read(ifstream& f_in, bool bigEndian) { + repID = readIndex(f_in, bigEndian); + repPos = readIndex(f_in, bigEndian); + repLen = readIndex(f_in, bigEndian); + index_t numAlleles = readIndex(f_in, bigEndian); + alleles.resizeExact(numAlleles); + for(index_t i = 0; i < numAlleles; i++) { + alleles[i].read(f_in, bigEndian); + } + index_t numPositions = readIndex(f_in, bigEndian); + positions.resizeExact(numPositions); + for(index_t i = 0; i < numPositions; i++) { + positions[i].tid = 0; + positions[i].toff = 0; + positions[i].joinedOff = readIndex(f_in, bigEndian); + positions[i].fw = readU8(f_in); + positions[i].alleleID = readIndex(f_in, bigEndian); + assert_lt(positions[i].alleleID, alleles.size()); + } + return true; + } + +public: + string repName; + index_t repID; + index_t repPos; + index_t repLen; + EList > alleles; + EList > positions; +}; + +template +class RepeatDB { +public: + RepeatDB() {} + + virtual ~RepeatDB() {} + + bool empty() const { return _repeats.size() == 0; } + + EList >& repeats() { return _repeats; } + const EList >& repeats() const { return _repeats; } + + const ELList >& repeatMap() const { return _repeatMap; } + + void write(ofstream& f_out, bool bigEndian) const { + if(_repeats.size() <= 0) { + writeIndex(f_out, 0, bigEndian); + return; + } + EList repeatGroup; + for(index_t i = 0; i < _repeats.size(); i++) { +#ifndef NDEBUG + if(i + 1 < _repeats.size()) { + assert_leq(_repeats[i].repID, _repeats[i+1].repID); + } +#endif + if(_repeats[i].repID > repeatGroup.size()) { + repeatGroup.push_back(i); + assert_eq(_repeats[i].repID, repeatGroup.size()); + } + } + repeatGroup.push_back(_repeats.size()); + assert_eq(_repeats.back().repID + 1, repeatGroup.size()); + writeIndex(f_out, repeatGroup.size(), bigEndian); + streampos filepos = f_out.tellp(); + EList repeatFilePos; + for(index_t i = 0; i < repeatGroup.size(); i++) { + writeIndex(f_out, 0, bigEndian); + } + + for(index_t i = 0; i < repeatGroup.size(); i++) { + index_t begin = (i == 0 ? 0 : repeatGroup[i-1]); + index_t end = repeatGroup[i]; + writeIndex(f_out, end - begin, bigEndian); + for(index_t j = begin; j < end; j++) { + _repeats[j].write(f_out, bigEndian); + } + repeatFilePos.push_back(f_out.tellp()); + } + assert_eq(repeatFilePos.size(), repeatGroup.size()); + + streampos origpos = f_out.tellp(); + f_out.seekp(filepos); + for(index_t i = 0; i < repeatFilePos.size(); i++) { + writeIndex(f_out, repeatFilePos[i], bigEndian); + } + f_out.seekp(origpos); + } + + void read(ifstream& f_in, bool bigEndian, const EList& includeRepeat) { + index_t numRepeatGroup = readIndex(f_in, bigEndian); + EList filePos; filePos.resizeExact(numRepeatGroup); + for(index_t i = 0; i < numRepeatGroup; i++) { + filePos[i] = readIndex(f_in, bigEndian); + } + assert_eq(numRepeatGroup, includeRepeat.size()); + for(index_t i = 0, repID = 0; i < numRepeatGroup; i++) { + if(!includeRepeat[i]) + continue; + if(i > 0) { + f_in.seekg(filePos[i-1]); + } + index_t numRepeats = readIndex(f_in, bigEndian); + index_t repeat_size = _repeats.size(); + _repeats.resizeExact(repeat_size + numRepeats); + for(index_t j = 0; j < numRepeats; j++) { + _repeats[repeat_size+j].read(f_in, bigEndian); + _repeats[repeat_size+j].repID = repID; + } + repID++; + } + f_in.seekg(filePos.back()); + } + + // Build an internal table to enable rapid search of repeats + // and converts joined offsets to chromosome IDs (tid) and loci (toff) + void construct(const index_t* rstarts, index_t rlen) { + _repeatMap.clear(); + if(_repeats.empty()) + return; + for(index_t r = 0; r < _repeats.size(); r++) { + if(_repeats[r].repID >= _repeatMap.size()) { + _repeatMap.expand(); + _repeatMap.back().clear(); + } + EList >& repeatMap = _repeatMap.back(); + repeatMap.expand(); + if(repeatMap.size() == 1) { + repeatMap.back().first = _repeats[r].repLen; + } else { + repeatMap.back().first = repeatMap[repeatMap.size() - 2].first + _repeats[r].repLen; + } + repeatMap.back().second = r; + } + + EList, index_t> > joinedOffList; + for(index_t r = 0; r < _repeats.size(); r++) { + Repeat& repeat = _repeats[r]; + EList >& positions = repeat.positions; + for(index_t p = 0; p < positions.size(); p++) { + joinedOffList.expand(); + joinedOffList.back().first.joinedOff = positions[p].joinedOff; + joinedOffList.back().first.tid = 0; + joinedOffList.back().first.toff = 0; + joinedOffList.back().first.fw = positions[p].fw; + joinedOffList.back().first.alleleID = positions[p].alleleID; + joinedOffList.back().second = joinedOffList.size() - 1; + } + } + + sort(joinedOffList.begin(), joinedOffList.end(), sort_pair_loci()); + + index_t j = 0, r = 0; + while(j < joinedOffList.size() && r < rlen) { + index_t off = joinedOffList[j].first.joinedOff; + index_t lower = rstarts[r*3]; + index_t upper; + if(r == rlen - 1) { + upper = numeric_limits::max(); + } else { + upper = rstarts[(r+1)*3]; + } + assert_gt(upper, lower); + if(off >= upper) { + r++; + continue; + } + assert_geq(off, lower); + joinedOffList[j].first.tid = rstarts[(r*3)+1]; + joinedOffList[j].first.toff = off - lower + rstarts[(r*3)+2]; + j++; + } + + sort(joinedOffList.begin(), joinedOffList.end(), sort_pair_loci_by_index()); + + index_t count = 0; + for(index_t r = 0; r < _repeats.size(); r++) { + Repeat& repeat = _repeats[r]; + EList >& positions = _repeats[r].positions; + for(index_t p = 0; p < positions.size(); p++) { + assert_lt(count, joinedOffList.size()); + assert_eq(positions[p].joinedOff, joinedOffList[count].first.joinedOff); + positions[p] = joinedOffList[count].first; + + RepeatAllele& allele = repeat.alleles[positions[p].alleleID]; + if(positions[p].fw) { + positions[p].joinedOff -= allele.allelePos; + positions[p].toff -= allele.allelePos; + } else { + assert_leq(allele.allelePos + allele.alleleLen, repeat.repLen); + index_t subLen = repeat.repLen - allele.allelePos - allele.alleleLen; + positions[p].joinedOff -= subLen; + positions[p].toff -= subLen; + } + + count++; + } + } + } + + bool repeatExist(index_t repID, index_t left, index_t right) const { + if(repID >= _repeatMap.size()) + return false; + + // Find a repeat corresponding to a given location (left, right) + const EList >& repeatMap = _repeatMap[repID]; + pair repeat(left, numeric_limits::max()); + index_t repeatIdx = repeatMap.bsearchLoBound(repeat); + assert_lt(repeatIdx, repeatMap.size()); + if(right > repeatMap[repeatIdx].first) + return false; + return true; + } + + bool getCoords(index_t repID, + index_t left, // left offset in the repeat sequence + index_t right, // right offset + const EList& snpIDs, // SNP IDs + const ALTDB& altdb, + EList, RepeatCoord > >& near_positions, + index_t max_positions = numeric_limits::max()) const { + near_positions.clear(); + + if(repID >= _repeatMap.size()) + return false; + + // Find a repeat corresponding to a given location (left, right) + const EList >& repeatMap = _repeatMap[repID]; + pair repeat(left, numeric_limits::max()); + index_t repeatIdx = repeatMap.bsearchLoBound(repeat); + assert_lt(repeatIdx, repeatMap.size()); + if(right > repeatMap[repeatIdx].first) + return false; + + index_t repeatIdx_ = repeatMap[repeatIdx].second; + assert_lt(repeatIdx_, _repeats.size()); + + const EList >& alleles = _repeats[repeatIdx_].alleles; + index_t adjLeft = left, adjRight = right; + if(repeatIdx > 0) { + adjLeft -= repeatMap[repeatIdx-1].first; + adjRight -= repeatMap[repeatIdx-1].first; + } + const EList >& positions = _repeats[repeatIdx_].positions; + for(index_t p = 0; p < positions.size(); p++) { + const RepeatCoord& position = positions[p]; + assert_lt(position.alleleID, alleles.size()); + const RepeatAllele& allele = alleles[position.alleleID]; + if(!allele.compatible(adjLeft, adjRight)) + continue; + + near_positions.expand(); + near_positions.back().first = position; + if(positions[p].fw) { + near_positions.back().first.joinedOff += adjLeft; + near_positions.back().first.toff += adjLeft; + } else { + const index_t len = right - left; + assert_leq(adjLeft + len, _repeats[repeatIdx_].repLen); + index_t rc_adjLeft = _repeats[repeatIdx_].repLen - adjLeft - len; + near_positions.back().first.joinedOff += rc_adjLeft; + near_positions.back().first.toff += rc_adjLeft; + } + + if(near_positions.size() >= max_positions) + break; + } + + return near_positions.size() > 0; + } + + bool findCoords(index_t anchor_left, + index_t anchor_right, + index_t repID, + index_t left, // left offset in the repeat sequence + index_t right, // right offset + const EList& snpIDs, // SNP IDs + const ALTDB& altdb, + EList, RepeatCoord > >& near_positions, + index_t max_positions = numeric_limits::max(), + index_t dist = 1000) const { + near_positions.clear(); + + if(repID >= _repeatMap.size()) + return false; + + // Find a repeat corresponding to a given location (left, right) + const EList >& repeatMap = _repeatMap[repID]; + pair repeat(left, numeric_limits::max()); + index_t repeatIdx = repeatMap.bsearchLoBound(repeat); + assert_lt(repeatIdx, repeatMap.size()); + if(right > repeatMap[repeatIdx].first) + return false; + + index_t repeatIdx_ = repeatMap[repeatIdx].second; + assert_lt(repeatIdx_, _repeats.size()); + + const EList >& alleles = _repeats[repeatIdx_].alleles; + index_t adjLeft = left, adjRight = right; + if(repeatIdx > 0) { + adjLeft -= repeatMap[repeatIdx-1].first; + adjRight -= repeatMap[repeatIdx-1].first; + } + const EList >& positions = _repeats[repeatIdx_].positions; + + RepeatCoord cmp; + cmp.joinedOff = (anchor_left >= dist ? anchor_left - dist : 0); + index_t p = positions.bsearchLoBound(cmp); + for(; p < positions.size(); p++) { + const RepeatCoord& position = positions[p]; + index_t pos = positions[p].joinedOff + adjLeft; + if(pos + dist < anchor_left) + continue; + if(anchor_right + dist < pos) + break; + + assert_lt(position.alleleID, alleles.size()); + const RepeatAllele& allele = alleles[position.alleleID]; + if(!allele.compatible(adjLeft, adjRight)) + continue; + + near_positions.expand(); + near_positions.back().first = position; + if(positions[p].fw) { + near_positions.back().first.joinedOff += adjLeft; + near_positions.back().first.toff += adjLeft; + } else { + const index_t len = right - left; + assert_leq(adjLeft + len, _repeats[repeatIdx_].repLen); + index_t rc_adjLeft = _repeats[repeatIdx_].repLen - adjLeft - len; + near_positions.back().first.joinedOff += rc_adjLeft; + near_positions.back().first.toff += rc_adjLeft; + } + + if(near_positions.size() >= max_positions) + break; + } + + return near_positions.size() > 0; + } + + bool findCommonCoords(index_t repID, + index_t left, // left offset in the repeat sequence + index_t right, // right offset + const EList& snpIDs, // SNP IDs + index_t repID2, + index_t left2, // left offset 2 in the repeat sequence + index_t right2, // right offset 2 + const EList& snpIDs2, // SNP IDs + const ALTDB& altdb, + EList, RepeatCoord > >& common_positions, + index_t max_positions = numeric_limits::max(), + index_t dist = 1000) const { + common_positions.clear(); + + if(repID >= _repeatMap.size() || repID2 >= _repeatMap.size()) + return false; + + // Find a repeat corresponding to a given location (left, right) + const EList >& repeatMap = _repeatMap[repID]; + assert_lt(left, right); + pair repeat(left, numeric_limits::max()); + index_t repeatIdx = repeatMap.bsearchLoBound(repeat); + assert_lt(repeatIdx, repeatMap.size()); + if(right > repeatMap[repeatIdx].first) + return false; + index_t repeatIdx_ = repeatMap[repeatIdx].second; + assert_lt(repeatIdx_, _repeats.size()); + const EList >& alleles = _repeats[repeatIdx_].alleles; + index_t adjLeft = left, adjRight = right; + if(repeatIdx > 0) { + adjLeft -= repeatMap[repeatIdx-1].first; + adjRight -= repeatMap[repeatIdx-1].first; + } + + // Find a repeat cooresponding to a given location (left2, right2) + const EList >& repeatMap2 = _repeatMap[repID2]; + assert_lt(left2, right2); + pair repeat2(left2, numeric_limits::max()); + index_t repeatIdx2 = repeatMap2.bsearchLoBound(repeat2); + assert_lt(repeatIdx2, repeatMap2.size()); + if(right2 > repeatMap2[repeatIdx2].first) + return false; + index_t repeatIdx2_ = repeatMap2[repeatIdx2].second; + assert_lt(repeatIdx2_, _repeats.size()); + const EList >& alleles2 = _repeats[repeatIdx2_].alleles; + index_t adjLeft2 = left2, adjRight2 = right2; + if(repeatIdx2 > 0) { + adjLeft2 -= repeatMap2[repeatIdx2-1].first; + adjRight2 -= repeatMap2[repeatIdx2-1].first; + } + + const EList >& positions = _repeats[repeatIdx_].positions; + const EList >& positions2 = _repeats[repeatIdx2_].positions; + index_t jsave = 0; + for(index_t i = 0; i < positions.size(); i++) { + const RepeatAllele& allele = alleles[positions[i].alleleID]; + if(!allele.compatible(adjLeft, adjRight)) + continue; + index_t i_pos = positions[i].joinedOff + adjLeft; + for(index_t j = jsave; j < positions2.size(); j++) { + index_t j_pos = positions2[j].joinedOff + adjLeft2; + if(j_pos + dist < i_pos) { + jsave = j + 1; + continue; + } + if(i_pos + dist < j_pos) + break; + + const RepeatAllele& allele2 = alleles2[positions2[j].alleleID]; + if(!allele2.compatible(adjLeft2, adjRight2)) + continue; + + common_positions.expand(); + common_positions.back().first = positions[i]; + if(positions[i].fw) { + common_positions.back().first.joinedOff += adjLeft; + common_positions.back().first.toff += adjLeft; + } else { + const index_t len = right - left; + assert_leq(adjLeft + len, _repeats[repeatIdx_].repLen); + index_t rc_adjLeft = _repeats[repeatIdx_].repLen - adjLeft - len; + common_positions.back().first.joinedOff += rc_adjLeft; + common_positions.back().first.toff += rc_adjLeft; + } + common_positions.back().second = positions2[j]; + if(positions2[j].fw) { + common_positions.back().second.toff += adjLeft2; + common_positions.back().second.joinedOff += adjLeft2; + } else { + const index_t len = right2 - left2; + assert_leq(adjLeft2 + len, _repeats[repeatIdx2_].repLen); + index_t rc_adjLeft2 = _repeats[repeatIdx2_].repLen - adjLeft2 - len; + common_positions.back().second.joinedOff += rc_adjLeft2; + common_positions.back().second.toff += rc_adjLeft2; + } + + if(common_positions.size() >= max_positions) + break; + } + + if(common_positions.size() >= max_positions) + break; + } + + return common_positions.size() > 0; + } + +private: + pair get_alt_range(const ALTDB& altdb, + index_t left, + index_t right) const { + pair alt_range; + ALT cmp_alt; + cmp_alt.pos = left; + alt_range.first = alt_range.second = (index_t)altdb.alts().bsearchLoBound(cmp_alt); + for(; alt_range.second < altdb.alts().size(); alt_range.second++) { + const ALT& alt = altdb.alts()[alt_range.second]; + if(alt.left > right) break; + } + return alt_range; + } + +private: + EList > _repeats; + ELList > _repeatMap; // pos to repeat id +}; + +#endif /*ifndef REPEAT_H_*/ diff --git a/repeat_builder.cpp b/repeat_builder.cpp new file mode 100644 index 0000000..7496766 --- /dev/null +++ b/repeat_builder.cpp @@ -0,0 +1,4771 @@ + +/* + * Copyright 2018, Chanhee Park and Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#include +#include +#include +#include +#include "timer.h" +#include "aligner_sw.h" +#include "aligner_result.h" +#include "scoring.h" +#include "sstring.h" + +#include "repeat_builder.h" +#include "repeat_kmer.h" +#include "bit_packed_array.h" + +unsigned int levenshtein_distance(const std::string& s1, const std::string& s2) +{ + const std::size_t len1 = s1.size(), len2 = s2.size(); + std::vector col(len2+1), prevCol(len2+1); + + for (unsigned int i = 0; i < prevCol.size(); i++) + prevCol[i] = i; + for (unsigned int i = 0; i < len1; i++) { + col[0] = i+1; + for (unsigned int j = 0; j < len2; j++) + // note that std::min({arg1, arg2, arg3}) works only in C++11, + // for C++98 use std::min(std::min(arg1, arg2), arg3) + col[j+1] = std::min( std::min(prevCol[1 + j] + 1, col[j] + 1), prevCol[j] + (s1[i]==s2[j] ? 0 : 1) ); + col.swap(prevCol); + } + return prevCol[len2]; +} + + +unsigned int hamming_distance(const string& s1, const string& s2) +{ + assert_eq(s1.length(), s2.length()); + + uint32_t cnt = 0; + + for(size_t i = 0; i < s1.length(); i++) { + if(s1[i] != s2[i]) { + cnt++; + } + } + + return cnt; +} + +string reverse(const string& str) +{ + string rev = str; + size_t str_len = str.length(); + + for(size_t i = 0; i < str_len; i++) { + rev[i] = str[str_len - i - 1]; + } + + return rev; +} + +string reverse_complement(const string& str) +{ + string rev = str; + size_t str_len = str.length(); + + for(size_t i = 0; i < str_len; i++) { + char nt = str[str_len - i - 1]; + rev[i] = asc2dnacomp[nt]; + } + + return rev; +} + +template +TIndexOffU getLCP(const TStr& s, + CoordHelper& coordHelper, + TIndexOffU a, + TIndexOffU b, + TIndexOffU max_len = 1000) +{ + TIndexOffU a_end = coordHelper.getEnd(a); + TIndexOffU b_end = coordHelper.getEnd(b); + + assert_leq(a_end, s.length()); + assert_leq(b_end, s.length()); + + TIndexOffU k = 0; + while((a + k) < a_end && (b + k) < b_end && k < max_len) { + if(s[a + k] != s[b + k]) { + break; + } + k++; + } + + return k; +} + +template +bool isSameSequenceUpto(const TStr& s, + CoordHelper& coordHelper, + TIndexOffU a, + TIndexOffU b, + TIndexOffU upto) +{ + TIndexOffU a_end = coordHelper.getEnd(a); + TIndexOffU b_end = coordHelper.getEnd(b); + assert_leq(a_end, s.length()); + assert_leq(b_end, s.length()); + + if(a + upto > a_end || b + upto > b_end) + return false; + + for(int k = upto - 1; k >= 0; k--) { + if(s[a + k] != s[b + k]) { + return false; + } + } + + return true; +} + +bool isSenseDominant(CoordHelper& coordHelper, + const EList& positions, + size_t seed_len) +{ + // count the number of seeds on the sense strand + size_t sense_mer_count = 0; + for(size_t i = 0; i < positions.size(); i++) { + if(positions[i] < coordHelper.forward_length()) + sense_mer_count++; + } + + // there exists two sets of seeds that are essentially the same + // due to sense and antisense strands except palindromic sequences + // choose one and skip the other one + assert_leq(sense_mer_count, positions.size()); + size_t antisense_mer_count = positions.size() - sense_mer_count; + if(sense_mer_count < antisense_mer_count) { + return false; + } else if(sense_mer_count == antisense_mer_count) { + assert_geq(positions.back(), coordHelper.forward_length()); + TIndexOffU sense_pos = coordHelper.length() - positions.back() - seed_len; + if(positions[0] > sense_pos) return false; + } + + return true; +} + +static const Range EMPTY_RANGE = Range(1, 0); + +struct RepeatRange { + RepeatRange() { + forward = true; + }; + RepeatRange(Range r, int id) : + range(r), rg_id(id), forward(true) {}; + RepeatRange(Range r, int id, bool fw) : + range(r), rg_id(id), forward(fw) {}; + + Range range; + int rg_id; + bool forward; +}; + +Range reverseRange(const Range& r, TIndexOffU size) +{ + size_t len = r.second - r.first; + Range rc; + + rc.first = size - r.second; + rc.second = rc.first + len; + + return rc; +} + +string reverseComplement(const string& str) +{ + string rc; + for(TIndexOffU si = str.length(); si > 0; si--) { + rc.push_back(asc2dnacomp[str[si - 1]]); + } + return rc; +} + + +string toMDZ(const EList& edits, const string& read) +{ + StackedAln stk; + BTDnaString btread; + BTString buf; + + btread.install(read.c_str(), true); + + stk.init(btread, edits, 0, 0, 0, 0); + stk.buildMdz(); + stk.writeMdz(&buf, NULL); + + return string(buf.toZBuf()); +} + +string applyEdits(const string& ref, size_t read_len, const EList& edits, const Coord& coord) +{ + string read; + size_t ref_pos = coord.off(); + size_t read_pos = 0; + + for(size_t i = 0; i < edits.size(); i++) { + for(; read_pos < edits[i].pos; read_pos++, ref_pos++) { + read.push_back(ref[ref_pos]); + } + + if(edits[i].type == EDIT_TYPE_READ_GAP) { + // delete on read + ref_pos++; + } else if(edits[i].type == EDIT_TYPE_REF_GAP) { + // insert on read + read.push_back(edits[i].qchr); + read_pos++; + } else if(edits[i].type == EDIT_TYPE_MM) { + assert_eq(ref[ref_pos], edits[i].chr); + read.push_back(edits[i].qchr); + + read_pos++; + ref_pos++; + } else { + assert(false); + } + } + + for(; read_pos < read_len; read_pos++, ref_pos++) { + read.push_back(ref[ref_pos]); + } + + return read; +} + +template +static bool compareRepeatCoordByJoinedOff(const RepeatCoord& a, const RepeatCoord& b) +{ + return a.joinedOff < b.joinedOff; +} + + +template +string getString(const TStr& ref, TIndexOffU start, size_t len) +{ + ASSERT_ONLY(const size_t ref_len = ref.length()); + assert_leq(start + len, ref_len); + + string s; + for(size_t i = 0; i < len; i++) { + char nt = "ACGT"[ref[start + i]]; + s.push_back(nt); + } + + return s; +} + +template +void getString(const TStr& ref, TIndexOffU start, size_t len, string& s) +{ + s.clear(); + ASSERT_ONLY(const size_t ref_len = ref.length()); + assert_leq(start + len, ref_len); + for(size_t i = 0; i < len; i++) { + char nt = "ACGT"[ref[start + i]]; + s.push_back(nt); + } +} + + +template +inline uint8_t getSequenceBase(const TStr& ref, TIndexOffU pos) +{ + assert_lt(pos, ref.length()); + return (uint8_t)ref[pos]; +} + + +template +void dump_tstr(const TStr& s) +{ + static int print_width = 60; + + size_t s_len = s.length(); + + for(size_t i = 0; i < s_len; i += print_width) { + string buf; + for(size_t j = 0; (j < print_width) && (i + j < s_len); j++) { + buf.push_back("ACGTN"[s[i + j]]); + } + cerr << buf << endl; + } + cerr << endl; +} + + +size_t getMaxMatchLen(const EList& edits, const size_t read_len) +{ + size_t max_length = 0; + uint32_t last_edit_pos = 0; + uint32_t len = 0; + + if (edits.size() == 0) { + // no edits --> exact match + return read_len; + } + + for(size_t i = 0; i < edits.size(); i++) { + if (last_edit_pos > edits[i].pos) { + continue; + } + + len = edits[i].pos - last_edit_pos; + if(len > max_length) { + max_length = len; + } + + last_edit_pos = edits[i].pos + 1; + } + + if (last_edit_pos < read_len) { + len = read_len - last_edit_pos; + if(len > max_length) { + max_length = len; + } + } + + return max_length; +} + +int max_index(size_t array[4]) +{ + int max_idx = 0; + + for(size_t i = 1; i < 4; i++) { + if(array[max_idx] < array[i]) { + max_idx = i; + } + } + + return max_idx; +} + +CoordHelper::CoordHelper(TIndexOffU length, + TIndexOffU forward_length, + const EList& szs, + const EList& ref_names) : +length_(length), +forward_length_(forward_length), +szs_(szs), +ref_namelines_(ref_names) +{ + // build ref_names_ from ref_namelines_ + buildNames(); + buildJoinedFragment(); +} + +CoordHelper::~CoordHelper() +{ +} + +void CoordHelper::buildNames() +{ + ref_names_.resize(ref_namelines_.size()); + for(size_t i = 0; i < ref_namelines_.size(); i++) { + string& nameline = ref_namelines_[i]; + + for(size_t j = 0; j < nameline.length(); j++) { + char n = nameline[j]; + if(n == ' ') { + break; + } + ref_names_[i].push_back(n); + } + } +} + +int CoordHelper::mapJoinedOffToSeq(TIndexOffU joinedOff) +{ + /* search from cached_list */ + if(num_cached_ > 0) { + for(size_t i = 0; i < num_cached_; i++) { + Fragments *frag = &cached_[i]; + if(frag->contain(joinedOff)) { + return frag->frag_id; + } + } + /* fall through */ + } + + /* search list */ + int top = 0; + int bot = fraglist_.size() - 1; + int pos = 0; + + Fragments *frag = &fraglist_[pos]; + while((bot - top) > 1) { + pos = top + ((bot - top) >> 1); + frag = &fraglist_[pos]; + + if (joinedOff < frag->joinedOff) { + bot = pos; + } else { + top = pos; + } + } + + frag = &fraglist_[top]; + if (frag->contain(joinedOff)) { + // update cache + if (num_cached_ < CACHE_SIZE_JOINEDFRG) { + cached_[num_cached_] = *frag; + num_cached_++; + } else { + cached_[victim_] = *frag; + victim_++; // round-robin + victim_ %= CACHE_SIZE_JOINEDFRG; + } + + return top; + } + + return -1; +} + +int CoordHelper::getGenomeCoord(TIndexOffU joinedOff, + string& chr_name, + TIndexOffU& pos_in_chr) +{ + int seq_id = mapJoinedOffToSeq(joinedOff); + if (seq_id < 0) { + return -1; + } + + Fragments *frag = &fraglist_[seq_id]; + TIndexOffU offset = joinedOff - frag->joinedOff; + + pos_in_chr = frag->seqOff + offset; + chr_name = ref_names_[frag->seq_id]; + + return 0; +} + +void CoordHelper::buildJoinedFragment() +{ + TIndexOffU acc_joinedOff = 0; + TIndexOffU acc_seqOff = 0; + TIndexOffU seq_id = 0; + TIndexOffU frag_id = 0; + for(size_t i = 0; i < szs_.size(); i++) { + const RefRecord& rec = szs_[i]; + if(rec.len == 0) { + continue; + } + if (rec.first) { + acc_seqOff = 0; + seq_id++; + } + + fraglist_.expand(); + + fraglist_.back().joinedOff = acc_joinedOff; + fraglist_.back().length = rec.len; + + fraglist_.back().frag_id = frag_id++; + fraglist_.back().seq_id = seq_id - 1; + fraglist_.back().first = rec.first; + + acc_seqOff += rec.off; + fraglist_.back().seqOff = acc_seqOff; + + acc_joinedOff += rec.len; + acc_seqOff += rec.len; + } + + // Add Last Fragment(empty) + fraglist_.expand(); + fraglist_.back().joinedOff = acc_joinedOff; + fraglist_.back().length = 0; + fraglist_.back().seqOff = acc_seqOff; + fraglist_.back().first = false; + fraglist_.back().frag_id = frag_id; + fraglist_.back().seq_id = seq_id; +} + +TIndexOffU CoordHelper::getEnd(TIndexOffU e) { + assert_lt(e, length_) + + TIndexOffU end = 0; + if(e < forward_length_) { + int frag_id = mapJoinedOffToSeq(e); + assert_geq(frag_id, 0); + end = fraglist_[frag_id].joinedOff + fraglist_[frag_id].length; + } else { + // ReverseComplement + // a, b are positions w.r.t reverse complement string. + // fragment map is based on forward string + int frag_id = mapJoinedOffToSeq(length_ - e - 1); + assert_geq(frag_id, 0); + end = length_ - fraglist_[frag_id].joinedOff; + } + + assert_leq(end, length_); + return end; +} + +TIndexOffU CoordHelper::getStart(TIndexOffU e) { + assert_lt(e, length_) + + TIndexOffU start = 0; + if(e < forward_length_) { + int frag_id = mapJoinedOffToSeq(e); + assert_geq(frag_id, 0); + start = fraglist_[frag_id].joinedOff; + } else { + // ReverseComplement + // a, b are positions w.r.t reverse complement string. + // fragment map is based on forward string + int frag_id = mapJoinedOffToSeq(length_ - e - 1); + assert_geq(frag_id, 0); + start = length_ - (fraglist_[frag_id].joinedOff + fraglist_[frag_id].length); + } + + assert_leq(start, length_); + return start; +} + +template +void SeedExt::getExtendedSeedSequence(const TStr& s, + string& seq) const +{ + seq.clear(); + TIndexOffU prev_end = orig_pos.first; + for(size_t j = 0; j < left_gaps.size(); j++) { + TIndexOffU curr_end = orig_pos.first - left_gaps[j].first; + assert_leq(curr_end, prev_end); + if(curr_end < prev_end) { + seq = getString(s, curr_end, prev_end - curr_end) + seq; + } + int gap_len = left_gaps[j].second; + assert_neq(gap_len, 0); + if(gap_len > 0) { // deletion + string gap_str(gap_len, '-'); + seq = gap_str + seq; + } else { + curr_end += gap_len; + } + prev_end = curr_end; + } + assert_leq(pos.first, prev_end); + if(pos.first < prev_end) { + seq = getString(s, pos.first, prev_end - pos.first) + seq; + } + if(orig_pos.second - orig_pos.first > 0) + seq += getString(s, orig_pos.first, orig_pos.second - orig_pos.first); + TIndexOffU prev_begin = orig_pos.second; + for(size_t j = 0; j < right_gaps.size(); j++) { + TIndexOffU curr_begin = orig_pos.second + right_gaps[j].first; + assert_leq(prev_begin, curr_begin); + if(prev_begin < curr_begin) { + seq += getString(s, prev_begin, curr_begin - prev_begin); + } + int gap_len = right_gaps[j].second; + assert_neq(gap_len, 0); + if(gap_len > 0) { // deletion + string gap_str(gap_len, '-'); + seq += gap_str; + } else { + curr_begin -= gap_len; + } + prev_begin = curr_begin; + } + assert_leq(prev_begin, pos.second); + if(prev_begin < pos.second) { + seq += getString(s, prev_begin, pos.second - prev_begin); + } +} + +SeedSNP *lookup_add_SNP(EList& repeat_snps, SeedSNP& snp) +{ + for(size_t i = 0; i < repeat_snps.size(); i++) { + if(*repeat_snps[i] == snp) { + return repeat_snps[i]; + } + } + + repeat_snps.expand(); + repeat_snps.back() = new SeedSNP(); + *(repeat_snps.back()) = snp; + return repeat_snps.back(); +} + +template +void SeedExt::generateSNPs(const TStr &s, const string& consensus, EList& repeat_snps) +{ + EList > gaps; + + // Merge Gaps + { + TIndexOffU left_ext_len = getLeftExtLength(); + for(size_t g = 0; g < left_gaps.size(); g++) { + gaps.expand(); + gaps.back().first = left_ext_len - left_gaps[g].first + left_gaps[g].second; + gaps.back().second = left_gaps[g].second; + } + TIndexOffU right_base = orig_pos.second - pos.first; + for(size_t g = 0; g < right_gaps.size(); g++) { + gaps.expand(); + gaps.back().first = right_base + right_gaps[g].first; + gaps.back().second = right_gaps[g].second; + } + gaps.sort(); + } + + // + string con_ref; + string seq_read; + TIndexOffU prev_con_pos = consensus_pos.first; + TIndexOffU prev_seq_pos = pos.first; + + for(size_t g = 0; g < gaps.size(); g++) { + TIndexOffU curr_seq_pos = pos.first + gaps[g].first; + TIndexOffU curr_con_pos = prev_con_pos + (curr_seq_pos - prev_seq_pos); + + con_ref = consensus.substr(prev_con_pos, curr_con_pos - prev_con_pos); + seq_read = getString(s, prev_seq_pos, curr_seq_pos - prev_seq_pos); + for(size_t l = 0; l < con_ref.length(); l++) { + if(seq_read[l] != con_ref[l]) { + // Single + SeedSNP snp; + snp.init(EDIT_TYPE_MM, prev_con_pos + l, 1, seq_read[l]); + + // lookup repeat_snp + snps.expand(); + snps.back() = lookup_add_SNP(repeat_snps, snp); + } + } + + int gap_len = gaps[g].second; + assert_neq(gap_len, 0); + if(gap_len > 0) { + // Deletion (gap on read) + + string gap_str(gap_len, '-'); + + SeedSNP snp; + snp.init(EDIT_TYPE_READ_GAP, curr_con_pos, gap_len, consensus.substr(curr_con_pos, gap_len)); + + snps.expand(); + snps.back() = lookup_add_SNP(repeat_snps, snp); + + curr_con_pos += gap_len; + + } else { + // Insertion (gap on reference) + string gap_str(abs(gap_len), '-'); + + SeedSNP snp; + snp.init(EDIT_TYPE_REF_GAP, curr_con_pos, abs(gap_len), getString(s, curr_seq_pos, abs(gap_len))); + + snps.expand(); + snps.back() = lookup_add_SNP(repeat_snps, snp); + + curr_seq_pos += abs(gap_len); + } + + prev_con_pos = curr_con_pos; + prev_seq_pos = curr_seq_pos; + } + + assert_eq(consensus_pos.second - prev_con_pos, pos.second - prev_seq_pos); + con_ref = consensus.substr(prev_con_pos, consensus_pos.second - prev_con_pos); + seq_read = getString(s, prev_seq_pos, pos.second - prev_seq_pos); + + for(size_t l = 0; l < con_ref.length(); l++) { + if(seq_read[l] != con_ref[l]) { + // Single + + SeedSNP snp; + snp.init(EDIT_TYPE_MM, prev_con_pos + l, 1, seq_read[l]); + + snps.expand(); + snps.back() = lookup_add_SNP(repeat_snps, snp); + } + } +} + +bool seedCmp(const SeedExt& s, const SeedExt& s2) +{ + if(s.getLength() != s2.getLength()) + return s.getLength() > s2.getLength(); + if(s.pos.first != s2.pos.first) + return s.pos.first < s2.pos.first; + if(s.pos.second != s2.pos.second) + return s.pos.second < s2.pos.second; + return false; +} + +template +void RB_Repeat::init(const RepeatParameter& rp, + const TStr& s, + CoordHelper& coordHelper, + const RB_SubSA& subSA, + const RB_RepeatBase& repeatBase) +{ + consensus_ = repeatBase.seq; + assert_geq(consensus_.length(), rp.min_repeat_len); + assert_eq(consensus_.length(), rp.min_repeat_len + repeatBase.nodes.size() - 1); + + EList positions, next_positions; + + const EList& repeat_index = subSA.getRepeatIndex(); + seeds_.clear(); + for(size_t n = 0; n < repeatBase.nodes.size(); n++) { + TIndexOffU idx = repeatBase.nodes[n]; + TIndexOffU saBegin = repeat_index[idx]; + TIndexOffU saEnd = (idx + 1 < repeat_index.size() ? repeat_index[idx+1] : subSA.size()); + + next_positions.clear(); + for(size_t sa = saBegin; sa < saEnd; sa++) { + TIndexOffU left = subSA.get(sa); + TIndexOffU right = left + subSA.seed_len(); + next_positions.push_back(left); + + if(left > 0) { + size_t idx = positions.bsearchLoBound(left - 1); + if(idx < positions.size() && positions[idx] == left - 1) + continue; + } + + seeds_.expand(); + SeedExt& seed = seeds_.back(); + seed.reset(); + seed.orig_pos = pair(left, right); + seed.pos = seed.orig_pos; + seed.consensus_pos.first = n; + seed.consensus_pos.second = n + rp.min_repeat_len; + seed.bound = pair(coordHelper.getStart(left), coordHelper.getEnd(left)); + +#ifndef NDEBUG + string tmp_str = getString(s, seed.pos.first, seed.pos.second - seed.pos.first); + assert_eq(tmp_str, consensus_.substr(n, seed.pos.second - seed.pos.first)); +#endif + + for(size_t p = seed.consensus_pos.second; p < consensus_.length(); p++) { + size_t pos = seed.pos.second; + if(pos >= seed.bound.second) + break; + int nt = getSequenceBase(s, pos); + if("ACGT"[nt] != consensus_[p]) + break; + seed.pos.second++; + seed.consensus_pos.second++; + } + } + positions = next_positions; + } + + internal_update(); +} + +template +void RB_Repeat::extendConsensus(const RepeatParameter& rp, + const TStr& s) +{ + size_t remain = seeds_.size(); + EList left_consensuses, right_consensuses, empty_consensuses; + EList ed_seed_nums; + + const TIndexOffU default_max_ext_len = (rp.max_repeat_len - rp.seed_len) / 2; + const TIndexOffU seed_mm = 1; + string left_ext_consensus = "", right_ext_consensus = ""; + + empty_consensuses.resize(seed_mm + 1); + empty_consensuses.fill(""); + + while(remain >= rp.repeat_count) { + for(size_t i = 0; i < remain; i++) { + seeds_[i].done = false; + seeds_[i].curr_ext_len = 0; + } + + // extend seeds in left or right direction + TIndexOffU max_ext_len = min(default_max_ext_len, (TIndexOffU)(rp.max_repeat_len - consensus_.length())); + get_consensus_seq(s, + seeds_, + 0, // seed begin + remain, // seed end + max_ext_len, + max_ext_len, + seed_mm, + rp, + ed_seed_nums, + &left_consensuses, + &right_consensuses); + + size_t allowed_seed_mm = 0; + left_ext_consensus.clear(); + right_ext_consensus.clear(); + for(int i = 0 /* (int)seed_mm */; i >= 0; i--) { + size_t extlen = (ed_seed_nums[i] < rp.repeat_count ? 0 : (left_consensuses[i].length() + right_consensuses[i].length())); + // if(extlen <= 0 || extlen < max_ext_len * i / seed_mm) + //if(extlen < max_ext_len * 2) + // continue; + if(extlen <= 0) + continue; + + left_ext_consensus = left_consensuses[i]; + right_ext_consensus = right_consensuses[i]; + allowed_seed_mm = (size_t)i; + assert_gt(left_ext_consensus.length(), 0); + assert_gt(right_ext_consensus.length(), 0); + break; + } + size_t num_passed_seeds = 0; + if(left_ext_consensus.length() > 0 && right_ext_consensus.length()) { + consensus_ = reverse(left_ext_consensus) + consensus_; + consensus_ += right_ext_consensus; + +#if 0 + calc_edit_dist(s, + seeds_, + 0, + remain, + left ? consensuses : empty_consensuses, + left ? empty_consensuses : consensuses, + allowed_seed_mm); +#endif + + // update seeds + for(size_t i = 0; i < seeds_.size(); i++) { + SeedExt& seed = seeds_[i]; + + if(i < remain) { + if(seed.ed <= allowed_seed_mm) { + num_passed_seeds++; + seed.done = true; + seed.total_ed += seed.ed; + seed.pos.first -= left_ext_consensus.length(); + seed.pos.second += right_ext_consensus.length(); + seed.consensus_pos.first = 0; + seed.consensus_pos.second = consensus_.length(); + + if(seed.left_gaps.size() > 0 && + seed.left_gaps.back().first >= seed.orig_pos.first - seed.pos.first) { + int gap_len = seed.left_gaps.back().second; + seed.pos.first += gap_len; + } + if(seed.right_gaps.size() > 0 && + seed.right_gaps.back().first >= seed.pos.second - seed.orig_pos.second) { + int gap_len = seed.right_gaps.back().second; + seed.pos.second -= gap_len; + } + } + } + } + + // move up "done" seeds + size_t j = 0; + for(size_t i = 0; i < remain; i++) { + if(!seeds_[i].done) continue; + assert_geq(i, j); + if(i > j) { + SeedExt temp = seeds_[j]; + seeds_[j] = seeds_[i]; + seeds_[i] = temp; + // Find next "undone" seed + j++; + while(j < i && seeds_[j].done) { + j++; + } + assert(j < remain && !seeds_[j].done); + } else { + j = i + 1; + } + } + } + + remain = num_passed_seeds; + break; + } // while(remain >= rp.repeat_count) + +#ifndef NDEBUG + // make sure seed positions are unique + EList > seed_poses; + for(size_t i = 0; i < seeds_.size(); i++) { + seed_poses.expand(); + seed_poses.back() = seeds_[i].orig_pos; + } + seed_poses.sort(); + for(size_t i = 0; i + 1 < seed_poses.size(); i++) { + if(seed_poses[i].first == seed_poses[i+1].first) { + assert_lt(seed_poses[i].second, seed_poses[i+1].second); + } else { + assert_lt(seed_poses[i].first, seed_poses[i+1].first); + } + } + + for(size_t i = 0; i < seeds_.size(); i++) { + assert(seeds_[i].valid()); + } +#endif + + RB_Repeat::internal_update(); + + // check repeats within a repeat sequence + self_repeat_ = false; + for(size_t i = 0; i + 1 < seed_ranges_.size(); i++) { + const RB_AlleleCoord& range = seed_ranges_[i]; + for(size_t j = i + 1; j < seed_ranges_.size(); j++) { + RB_AlleleCoord& range2 = seed_ranges_[j]; + if(range.right <= range2.left) + break; + self_repeat_ = true; + } + } +} + +template +void RB_Repeat::getNextRepeat(const RepeatParameter& rp, + const TStr& s, + RB_Repeat& o) +{ + o.reset(); + + size_t i = 0; + for(i = 0; i < seeds_.size() && seeds_[i].done; i++); + if(i >= seeds_.size()) + return; + + for(size_t j = i; j < seeds_.size(); j++) { + assert(!seeds_[j].done); + o.seeds_.expand(); + o.seeds_.back() = seeds_[j]; + + } + seeds_.resizeExact(i); + internal_update(); + + assert_gt(o.seeds_.size(), 0); + o.consensus_ = getString(s, o.seeds_[0].orig_pos.first, o.seeds_[0].orig_pos.second - o.seeds_[0].orig_pos.first); + for(size_t j = 0; j < o.seeds_.size(); j++) { + o.seeds_[j].pos = o.seeds_[j].orig_pos; + o.seeds_[j].left_gaps.clear(); + o.seeds_[j].right_gaps.clear(); + o.seeds_[j].ed = o.seeds_[j].total_ed = 0; + } +} + +void RB_Repeat::internal_update() +{ + sort(seeds_.begin(), seeds_.end(), seedCmp); + + seed_ranges_.resizeExact(seeds_.size()); + for(size_t i = 0; i < seeds_.size(); i++) { + seed_ranges_[i].left = seeds_[i].pos.first; + seed_ranges_[i].right = seeds_[i].pos.second; + seed_ranges_[i].idx = i; + } + seed_ranges_.sort(); + + // check repeats within a repeat sequence + size_t remove_count = 0; + for(size_t i = 0; i + 1 < seed_ranges_.size(); i++) { + const RB_AlleleCoord& range = seed_ranges_[i]; + if(range.left == numeric_limits::max()) + continue; + for(size_t j = i + 1; j < seed_ranges_.size(); j++) { + RB_AlleleCoord& range2 = seed_ranges_[j]; + if(range2.left == numeric_limits::max()) + continue; + if(range.right <= range2.left) + break; + if(range.right >= range2.right) { + range2.left = numeric_limits::max(); + remove_count++; + } + } + } + + if(remove_count <= 0) + return; + + for(size_t i = 0; i < seed_ranges_.size(); i++) { + if(seed_ranges_[i].left == numeric_limits::max()) + seeds_[seed_ranges_[i].idx].reset(); + } + sort(seeds_.begin(), seeds_.end(), seedCmp); + +#ifndef NDEBUG + for(size_t i = 0; i < seeds_.size(); i++) { + if(i < seeds_.size() - remove_count) { + assert_lt(seeds_[i].pos.first, seeds_[i].pos.second); + } else { + assert_eq(seeds_[i].pos.first, seeds_[i].pos.second); + } + } +#endif + + seeds_.resize(seeds_.size() - remove_count); + seed_ranges_.resize(seeds_.size()); + for(size_t i = 0; i < seeds_.size(); i++) { + seed_ranges_[i].left = seeds_[i].pos.first; + seed_ranges_[i].right = seeds_[i].pos.second; + seed_ranges_[i].idx = i; + } + seed_ranges_.sort(); +} + +bool RB_Repeat::contain(TIndexOffU left, TIndexOffU right) const +{ + size_t l = 0, r = seed_ranges_.size(); + while(l < r) { + size_t m = (l + r) / 2; + const RB_AlleleCoord& coord = seed_ranges_[m]; + if(right <= coord.left) { + r = m; + } else if(left >= coord.right) { + l = m + 1; + } else { + return coord.left <= left && right <= coord.right; + } + } + + return false; +} + +template +void RB_Repeat::saveSeedExtension(const RepeatParameter& rp, + const TStr& s, + CoordHelper& coordHelper, + TIndexOffU grp_id, + ostream& fp, + size_t& total_repeat_seq_len, + size_t& total_allele_seq_len) const +{ + // apply color, which is compatible with linux commands such as cat and less -r +#if 1 + const string red = "\033[31m", reset = "\033[0m", resetline = "\033[4m", redline = "\033[4;31m"; +#else + const string red = "", reset = "", resetline = "", redline = ""; +#endif + bool show_exterior_seq = true; + const size_t max_show_seq_len = 700; + + size_t total_count = 0; + for(size_t i = 0; i < seeds_.size(); i++) { + const SeedExt& seed = seeds_[i]; + size_t ext_len = seed.getLength(); + if(ext_len < rp.min_repeat_len) continue; + total_allele_seq_len += ext_len; + total_count++; + bool sense_strand = seed.pos.first < coordHelper.forward_length(); + + fp << setw(6) << repeat_id_ << " " << setw(5) << seeds_.size(); + fp << " " << setw(4) << i; + fp << " " << setw(4) << ext_len; + fp << " " << (sense_strand ? '+' : '-'); + fp << " " << setw(10) << seed.pos.first << " " << setw(10) << seed.pos.second; + fp << " " << setw(10) << seed.orig_pos.first << " " << setw(10) << seed.orig_pos.second; + + string chr_name; + TIndexOffU pos_in_chr; + if(sense_strand) { + coordHelper.getGenomeCoord(seed.pos.first, chr_name, pos_in_chr); + } else { + coordHelper.getGenomeCoord(s.length() - seed.pos.first - (seed.pos.second - seed.pos.first), chr_name, pos_in_chr); + } + fp << " " << setw(5) << chr_name << ":" << setw(10) << std::left << pos_in_chr << std::right; + + if(sense_strand) { + coordHelper.getGenomeCoord(seed.pos.second, chr_name, pos_in_chr); + } else { + coordHelper.getGenomeCoord(s.length() - seed.pos.second - (seed.pos.second - seed.pos.first), chr_name, pos_in_chr); + } + fp << " " << setw(5) << chr_name << ":" << setw(10) << std::left << pos_in_chr << std::right; + + if(!seed.aligned) { + fp << endl; + continue; + } + + string deststr = ""; + seed.getExtendedSeedSequence(s, deststr); + + // add exterior sequences + if(seed.consensus_pos.first > 0) { + TIndexOffU seq_pos, seq_len; + if(seed.pos.first >= seed.consensus_pos.first) { + seq_pos = seed.pos.first - seed.consensus_pos.first; + seq_len = seed.consensus_pos.first; + } else { + seq_pos = 0; + seq_len = seed.pos.first; + } + deststr = getString(s, seq_pos, seq_len) + deststr; + if(seq_len < seed.consensus_pos.first) { + deststr = string(seed.consensus_pos.first - seq_len, 'N') + deststr; + } + } + if(seed.consensus_pos.second < consensus_.length()) { + deststr += getString(s, seed.pos.second, consensus_.length() - seed.consensus_pos.second); + } + + assert_eq(consensus_.length(), deststr.length()); + fp << " "; + + // print sequence w.r.t. the current group + for(size_t j = 0; j < min(consensus_.length(), max_show_seq_len); j++) { + bool outside = j < seed.consensus_pos.first || j >= seed.consensus_pos.second; + bool different = (consensus_[j] != deststr[j]); + if(outside) { + if(show_exterior_seq) { + if(different) { + fp << redline; + fp << deststr[j]; + fp << reset; + } else { + fp << resetline; + fp << deststr[j]; + fp << reset; + } + } else { + if(j < seed.consensus_pos.first) + fp << " "; + } + } else { + if(different) fp << red; + fp << deststr[j]; + if(different) fp << reset; + } + } + +#if 0 + fp << "\t"; + for(size_t ei = 0; ei < seed.edits.size(); ei++) { + const Edit& edit = seed.edits[ei]; + if(ei > 0) fp << ","; + fp << edit; + if (edit.snpID != std::numeric_limits::max()) { + fp << "@" << edit.snpID; + } + } +#endif + + fp << endl; + } + + if(total_count > 0) fp << setw(5) << total_count << endl << endl; + total_repeat_seq_len += consensus_.length(); +} + +template +size_t calc_edit_dist(const TStr& s, + const SeedExt& seed, + const SeedExt& seed2, + size_t left_ext, + size_t right_ext, + size_t max_ed) +{ + if(seed.bound.first + left_ext > seed.pos.first || + seed.pos.second + right_ext > seed.bound.second || + seed2.bound.first + left_ext > seed2.pos.first || + seed2.pos.second + right_ext > seed2.bound.second) + return max_ed + 1; + + size_t ed = 0; + for(size_t i = 0; i < left_ext; i++) { + int ch = getSequenceBase(s, seed.pos.first - i - 1); + assert_range(0, 3, ch); + int ch2 = getSequenceBase(s, seed2.pos.first - i - 1); + assert_range(0, 3, ch2); + if(ch != ch2) ed++; + if(ed > max_ed) + return ed; + + } + for(size_t i = 0; i < right_ext; i++) { + int ch = getSequenceBase(s, seed.pos.second + i); + assert_range(0, 3, ch); + int ch2 = getSequenceBase(s, seed2.pos.second + i); + assert_range(0, 3, ch2); + if(ch != ch2) ed++; + if(ed > max_ed) + return ed; + } + + return ed; +} + +size_t extract_kmer(const string& seq, size_t offset, size_t k = 5) +{ + assert_leq(offset + k, seq.length()); + size_t kmer = 0; + for(size_t i = 0; i < k; i++) { + kmer = (kmer << 2) | asc2dna[seq[offset + i]]; + } + return kmer; +} + +size_t next_kmer(size_t kmer, char base, size_t k = 5) +{ + kmer &= ((1 << ((k-1)*2))) - 1; + kmer = (kmer << 2) | asc2dna[base]; + return kmer; +} + +void build_kmer_table(const string& consensus, EList >& kmer_table, size_t k = 5) +{ + kmer_table.clear(); + if(consensus.length() < k) + return; + size_t kmer = 0; + for(size_t i = 0; i + k <= consensus.length(); i++) { + if(i == 0) { + kmer = extract_kmer(consensus, i, k); + } else { + kmer = next_kmer(kmer, consensus[i+k-1], k); + } + kmer_table.expand(); + kmer_table.back().first = kmer; + kmer_table.back().second = i; + } + kmer_table.sort(); +} + +void find_gap_pos(const string& s, + const string& s2, + EList& ed, + EList& ed2, + bool del, + size_t gap_len, + size_t max_mm, + size_t& gap_pos, + size_t& mm, + bool debug = false) +{ + assert_eq(s.length(), s2.length()); + size_t seq_len = s.length(); + ed.resizeExact(seq_len); ed.fill(max_mm + 1); + ed2.resizeExact(seq_len); ed2.fill(max_mm + 1); + + // from left to right + for(size_t i = 0; i < seq_len; i++) { + size_t add = (s[i] == s2[i] ? 0 : 1); + if(i == 0) ed[i] = add; + else ed[i] = ed[i-1] + add; + if(ed[i] >= max_mm + 1) break; + } + + // from right to left + size_t s_sub = del ? 0 : gap_len; + size_t s2_sub = del ? gap_len : 0; + for(int i = seq_len - 1; i >= gap_len; i--) { + size_t add = (s[i - s_sub] == s2[i - s2_sub] ? 0 : 1); + if(i == seq_len - 1) ed2[i] = add; + else ed2[i] = ed2[i+1] + add; + if(ed2[i] > max_mm) break; + } + + if(debug) { + cout << s << endl << s2 << endl; + for(size_t i = 0; i < ed.size(); i++) { + cout << (ed[i] % 10); + } + cout << endl; + for(size_t i = 0; i < ed2.size(); i++) { + cout << (ed2[i] % 10); + } + cout << endl; + } + + size_t min_mm = ed2[gap_len]; + int min_mm_i = -1; + assert_eq(ed.size(), ed2.size()); + for(size_t i = 0; i + gap_len + 1 < ed.size(); i++) { + if(ed[i] > max_mm) break; + size_t cur_mm = ed[i] + ed2[i + gap_len + 1]; + if(cur_mm < min_mm) { + min_mm = cur_mm; + min_mm_i = i; + } + } + + mm = min_mm; + if(mm > max_mm) + return; + gap_pos = (size_t)(min_mm_i + 1); +} + +void align_with_one_gap(const string& s, + const EList >& s_kmer_table, + const string& s2, + EList& counts, + size_t max_gap, + size_t max_mm, + size_t& mm, + size_t& gap_pos, + int& gap_len, + size_t k = 5) +{ + mm = max_mm + 1; + assert_eq(s.length(), s2.length()); + counts.resizeExact(max_gap * 2 + 1); + counts.fillZero(); + size_t max_count = 0, max_count_i = 0; + for(size_t i = 0; i + k <= s2.length(); i += 1) { + pair kmer(0, 0); + kmer.first = extract_kmer(s2, i, k); + size_t lb = s_kmer_table.bsearchLoBound(kmer); + while(lb < s_kmer_table.size() && kmer.first == s_kmer_table[lb].first) { + int gap = (int)s_kmer_table[lb].second - (int)i; + if(gap != 0 && abs(gap) < max_gap) { + size_t gap_i = (size_t)(gap + max_gap); + counts[gap_i]++; + if(counts[gap_i] > max_count) { + max_count = counts[gap_i]; + max_count_i = gap_i; + } + } + lb++; + } + } + + if(max_count <= 0) + return; + + assert_lt(max_count_i, counts.size()); + int gap = (int)max_count_i - (int)max_gap; + assert_neq(gap, 0); + size_t abs_gap = abs(gap); + bool del = gap > 0; + + EList ed, ed2; + find_gap_pos(s, + s2, + ed, + ed2, + del, + abs_gap, + max_mm, + gap_pos, + mm); + if(mm > max_mm) + return; + + gap_len = del ? abs_gap : -abs_gap; + +#ifndef NDEBUG + assert_leq(mm, max_mm); + string ds = s, ds2 = s2; + size_t debug_mm = 0; + if(del) ds.erase(gap_pos, abs_gap); + else ds2.erase(gap_pos, abs_gap); + + for(size_t i = 0; i < min(ds.length(), ds2.length()); i++) { + if(ds[i] != ds2[i]) + debug_mm++; + } + assert_eq(debug_mm, mm); +#endif +} + +template +void calc_edit_dist(const TStr& s, + EList& seeds, + size_t sb, + size_t se, + const EList& left_consensuses, + const EList& right_consensuses, + uint32_t max_ed) +{ + string left_consensus = left_consensuses[max_ed]; + string right_consensus = right_consensuses[max_ed]; + + EList > left_kmer_table, right_kmer_table; + build_kmer_table(left_consensus, left_kmer_table); + build_kmer_table(right_consensus, right_kmer_table); + + string left_seq, right_seq; + const size_t max_gap = 10; + EList counts; + + size_t left_ext = left_consensus.length(); + size_t right_ext = right_consensus.length(); + for(size_t i = sb; i < se; i++) { + SeedExt& seed = seeds[i]; + if(seed.bound.first + left_ext > seed.pos.first || + seed.pos.second + right_ext > seed.bound.second) { + seed.ed = max_ed + 1; + continue; + } + + size_t left_ed = 0; + if(left_ext > 0) { + getString(s, seed.pos.first - left_ext, left_ext, left_seq); + reverse(left_seq.begin(), left_seq.end()); + for(size_t j = 0; j < left_ext; j++) { + if(left_seq[j] != left_consensus[j]) left_ed++; + if(left_ed <= max_ed && j < left_consensuses[left_ed].length()) { + seed.curr_ext_len = j + 1; + } + } + + if(left_ed > max_ed) { + size_t gap_pos = 0; + int gap_len = 0; + align_with_one_gap(left_consensus, + left_kmer_table, + left_seq, + counts, + min(left_ed - max_ed, max_gap), + max_ed, + left_ed, + gap_pos, + gap_len); + if(left_ed <= max_ed) { + seed.left_gaps.expand(); + seed.left_gaps.back().first = seed.getLeftExtLength() + gap_pos; + seed.left_gaps.back().second = gap_len; + } + } + } else { + left_seq.clear(); + } + + size_t right_ed = 0; + if(right_ext > 0) { + getString(s, seed.pos.second, right_ext, right_seq); + for(size_t j = 0; j < right_ext; j++) { + if(right_seq[j] != right_consensus[j]) right_ed++; + if(right_ed <= max_ed && j < right_consensuses[right_ed].length()) { + seed.curr_ext_len = j + 1; + } + } + if(right_ed > max_ed) { + size_t gap_pos = 0; + int gap_len = 0; + align_with_one_gap(right_consensus, + right_kmer_table, + right_seq, + counts, + min(right_ed - max_ed, max_gap), + max_ed, + right_ed, + gap_pos, + gap_len); + if(right_ed <= max_ed) { + seed.right_gaps.expand(); + seed.right_gaps.back().first = seed.getRightExtLength() + gap_pos; + seed.right_gaps.back().second = gap_len; + } + } + } else { + right_seq.clear(); + } + + seed.ed = left_ed + right_ed; + } +} + +template +void RB_Repeat::get_consensus_seq(const TStr& s, + EList& seeds, + size_t sb, + size_t se, + size_t min_left_ext, + size_t min_right_ext, + size_t max_ed, + const RepeatParameter& rp, + EList& ed_seed_nums, + EList* left_consensuses, + EList* right_consensuses) const +{ + assert_lt(sb, se); + assert_geq(se - sb, rp.seed_count); + assert_leq(se, seeds.size()); + if(left_consensuses != NULL) { + left_consensuses->clear(); left_consensuses->resize(max_ed + 1); + for(size_t i = 0; i < max_ed + 1; i++) { + (*left_consensuses)[i].clear(); + } + } + if(right_consensuses != NULL) { + right_consensuses->clear(); right_consensuses->resize(max_ed + 1); + for(size_t i = 0; i < max_ed + 1; i++) { + (*right_consensuses)[i].clear(); + } + } + + assert_eq(min_left_ext, min_right_ext); + + EList seqs; + seqs.reserveExact(seeds.size()); + size_t max_i = 0, max_count = 0; + while(min_left_ext > 0) { + seqs.clear(); + max_i = 0; + max_count = 0; + for(size_t i = sb; i < se; i++) { + const SeedExt& seed = seeds[i]; + if(seeds[i].bound.first + min_left_ext > seed.pos.first || + seed.pos.second + min_right_ext > seed.bound.second) + continue; + + seqs.expand(); + if(min_left_ext > 0) { + getString(s, seed.pos.first - min_left_ext, min_left_ext, seqs.back()); + } + if(min_right_ext > 0) { + seqs.back() += getString(s, seed.pos.second, min_right_ext); + } + } + seqs.sort(); + for(size_t i = 0; i + max_count < seqs.size();) { + size_t count = 1; + for(size_t j = i + 1; j < seqs.size(); j++) { + if(seqs[i] == seqs[j]) count++; + else break; + } + if(count >= max_count) { + max_count = count; + max_i = i; + } + i = i + count; + } + if(max_count >= rp.seed_count) + break; + + min_left_ext--; + min_right_ext--; + } + + // update ed + // extend consensus string + ed_seed_nums.resize(max_ed + 1); ed_seed_nums.fillZero(); + EList next_ed_seed_nums; next_ed_seed_nums.resize(max_ed + 1); + + if(max_count < rp.seed_count) + return; + + for(size_t i = sb; i < se; i++) seeds[i].ed = 0; + size_t seed_ext_len = 0; + while(seed_ext_len < max(min_left_ext, min_right_ext)) { + // select base to be used for extension + uint8_t left_ext_base = 0; + if(seed_ext_len < min_left_ext) left_ext_base = seqs[max_i][min_left_ext - seed_ext_len - 1]; + uint8_t right_ext_base = 0; + if(seed_ext_len < min_right_ext) right_ext_base = seqs[max_i][min_left_ext + seed_ext_len]; + + // estimate extended ed + next_ed_seed_nums.fillZero(); + for(size_t i = sb; i < se; i++) { + uint32_t est_ed = seeds[i].ed; + if(seed_ext_len < min_left_ext) { + if(seeds[i].bound.first + seed_ext_len + 1 <= seeds[i].pos.first) { + TIndexOffU left_pos = seeds[i].pos.first - seed_ext_len - 1; + int ch = getSequenceBase(s, left_pos); + assert_range(0, 3, ch); + if ("ACGT"[ch] != left_ext_base) { + est_ed++; + } + } else { + est_ed = max_ed + 1; + } + } + + if(seed_ext_len < min_right_ext) { + TIndexOffU right_pos = seeds[i].pos.second + seed_ext_len; + if(right_pos >= seeds[i].bound.second) { + est_ed = max_ed + 1; + } else { + int ch = getSequenceBase(s, right_pos); + assert_range(0, 3, ch); + if ("ACGT"[ch] != right_ext_base) { + est_ed++; + } + } + } + + if(est_ed <= max_ed) { + next_ed_seed_nums[est_ed]++; + } + } + + for(size_t i = 1; i < next_ed_seed_nums.size(); i++) next_ed_seed_nums[i] += next_ed_seed_nums[i-1]; + if(next_ed_seed_nums[max_ed] < rp.repeat_count) { + break; + } + + for(size_t i = sb; i < se; i++) { + if(seed_ext_len < min_left_ext) { + if(seeds[i].bound.first + seed_ext_len + 1 <= seeds[i].pos.first) { + TIndexOffU left_pos = seeds[i].pos.first - seed_ext_len - 1; + int ch = getSequenceBase(s, left_pos); + assert_range(0, 3, ch); + if ("ACGT"[ch] != left_ext_base) { + seeds[i].ed++; + } + } else { + seeds[i].ed = max_ed + 1; + } + } + + if(seed_ext_len < min_right_ext) { + TIndexOffU right_pos = seeds[i].pos.second + seed_ext_len; + if(right_pos >= seeds[i].bound.second) { + seeds[i].ed = max_ed + 1; + } else { + int ch = getSequenceBase(s, right_pos); + assert_range(0, 3, ch); + if ("ACGT"[ch] != right_ext_base) { + seeds[i].ed++; + } + } + } + } + + for(size_t i = 0; i < next_ed_seed_nums.size(); i++) { + if(next_ed_seed_nums[i] < rp.repeat_count) + continue; + ed_seed_nums[i] = next_ed_seed_nums[i]; + if(seed_ext_len < min_left_ext) { + assert(left_consensuses != NULL); + (*left_consensuses)[i] += left_ext_base; + } + if(seed_ext_len < min_right_ext) { + assert(right_consensuses != NULL); + (*right_consensuses)[i] += right_ext_base; + } + } + + seed_ext_len++; + } +} + +template +void RB_RepeatExt::get_consensus_seq(const TStr& s, + EList& seeds, + size_t sb, + size_t se, + size_t min_left_ext, + size_t min_right_ext, + size_t max_ed, + const RepeatParameter& rp, + EList& ed_seed_nums, + EList* left_consensuses, + EList* right_consensuses) const +{ + assert_lt(sb, se); + assert_leq(se, seeds.size()); + if(left_consensuses != NULL) { + left_consensuses->clear(); left_consensuses->resize(max_ed + 1); + for(size_t i = 0; i < max_ed + 1; i++) { + (*left_consensuses)[i].clear(); + } + } + if(right_consensuses != NULL) { + right_consensuses->clear(); right_consensuses->resize(max_ed + 1); + for(size_t i = 0; i < max_ed + 1; i++) { + (*right_consensuses)[i].clear(); + } + } + + // cluster sequences + EList belongto; + belongto.resizeExact(se - sb); + for(size_t i = 0; i < belongto.size(); i++) belongto[i] = i; + + for(size_t i = 0; i + 1 < belongto.size(); i++) { + for(size_t j = i + 1; j < belongto.size(); j++) { + if(belongto[j] != j) continue; + size_t ed = calc_edit_dist(s, + seeds[sb + i], + seeds[sb + j], + min_left_ext, + min_right_ext, + max_ed + 1); + if(ed <= max_ed + 1) { + belongto[j] = belongto[i]; + } + + } + } + + // find the maximum group that has the most sequences + EList vote; + vote.resizeExact(seeds.size()); + vote.fillZero(); + size_t max_group = 0; + for(size_t i = 0; i < belongto.size(); i++) { + size_t cur_group = belongto[i]; + assert_lt(cur_group, vote.size()); + vote[cur_group]++; + if(cur_group != max_group && vote[cur_group] > vote[max_group]) { + max_group = cur_group; + } + } + + // reuse vote to store seeds + EList& consensus_group = vote; + consensus_group.clear(); + for(size_t i = 0; i < belongto.size(); i++) { + if(belongto[i] == max_group) + consensus_group.push_back(i); + } + + // update ed + // extend consensus string + ed_seed_nums.resize(max_ed + 1); ed_seed_nums.fillZero(); + EList next_ed_seed_nums; next_ed_seed_nums.resize(max_ed + 1); + for(size_t i = sb; i < se; i++) seeds[i].ed = 0; + size_t seed_ext_len = 0; + while(seed_ext_len < max(min_left_ext, min_right_ext)) { + // count base + size_t l_count[4] = {0, }; + size_t r_count[4] = {0, }; + + for(size_t i = 0; i < consensus_group.size(); i++) { + size_t si = sb + consensus_group[i]; + int ch; + if(seed_ext_len < min_left_ext) { + if(seeds[i].bound.first + seed_ext_len + 1 <= seeds[i].pos.first) { + ch = getSequenceBase(s, seeds[si].pos.first - seed_ext_len - 1); + assert_range(0, 3, ch); + l_count[ch]++; + } + } + if(seed_ext_len < min_right_ext) { + if(seeds[i].bound.second + seed_ext_len) { + ch = getSequenceBase(s, seeds[si].pos.second + seed_ext_len); + assert_range(0, 3, ch); + r_count[ch]++; + } + } + } + + // select base to be used for extension + uint8_t left_ext_base = 0; + if(seed_ext_len < min_left_ext) left_ext_base = max_index(l_count); + uint8_t right_ext_base = 0; + if(seed_ext_len < min_right_ext) right_ext_base = max_index(r_count); + + // estimate extended ed + next_ed_seed_nums.fillZero(); + for(size_t i = sb; i < se; i++) { + uint32_t est_ed = seeds[i].ed; + if(seed_ext_len < min_left_ext) { + if(seeds[i].bound.first + seed_ext_len + 1 <= seeds[i].pos.first) { + TIndexOffU left_pos = seeds[i].pos.first - seed_ext_len - 1; + int ch = getSequenceBase(s, left_pos); + assert_range(0, 3, ch); + if (ch != left_ext_base) { + est_ed++; + } + } else { + est_ed = max_ed + 1; + } + } + + if(seed_ext_len < min_right_ext) { + TIndexOffU right_pos = seeds[i].pos.second + seed_ext_len; + if(right_pos >= seeds[i].bound.second) { + est_ed = max_ed + 1; + } else { + int ch = getSequenceBase(s, right_pos); + assert_range(0, 3, ch); + if (ch != right_ext_base) { + est_ed++; + } + } + } + + if(est_ed <= max_ed) { + next_ed_seed_nums[est_ed]++; + } + } + + for(size_t i = 1; i < next_ed_seed_nums.size(); i++) next_ed_seed_nums[i] += next_ed_seed_nums[i-1]; + if(next_ed_seed_nums[max_ed] < rp.repeat_count) { + break; + } + + for(size_t i = sb; i < se; i++) { + if(seed_ext_len < min_left_ext) { + if(seeds[i].bound.first + seed_ext_len + 1 <= seeds[i].pos.first) { + TIndexOffU left_pos = seeds[i].pos.first - seed_ext_len - 1; + int ch = getSequenceBase(s, left_pos); + assert_range(0, 3, ch); + if (ch != left_ext_base) { + seeds[i].ed++; + } + } else { + seeds[i].ed = max_ed + 1; + } + } + + if(seed_ext_len < min_right_ext) { + TIndexOffU right_pos = seeds[i].pos.second + seed_ext_len; + if(right_pos >= seeds[i].bound.second) { + seeds[i].ed = max_ed + 1; + } else { + int ch = getSequenceBase(s, right_pos); + assert_range(0, 3, ch); + if (ch != right_ext_base) { + seeds[i].ed++; + } + } + } + } + + for(size_t i = 0; i < next_ed_seed_nums.size(); i++) { + if(next_ed_seed_nums[i] < rp.repeat_count) + continue; + ed_seed_nums[i] = next_ed_seed_nums[i]; + if(seed_ext_len < min_left_ext) { + assert(left_consensuses != NULL); + (*left_consensuses)[i] += (char)("ACGT"[left_ext_base]); + } + if(seed_ext_len < min_right_ext) { + assert(right_consensuses != NULL); + (*right_consensuses)[i] += (char)("ACGT"[right_ext_base]); + } + } + + seed_ext_len++; + } +} + +EList RB_Repeat::ca_ed_; +EList RB_Repeat::ca_ed2_; +string RB_Repeat::ca_s_; +string RB_Repeat::ca_s2_; + +size_t RB_Repeat::seed_merge_tried = 0; +size_t RB_Repeat::seed_merged = 0; + +template +void RB_RepeatExt::extendConsensus(const RepeatParameter& rp, + const TStr& s) +{ + size_t remain = seeds_.size(); + EList consensuses, empty_consensuses; + EList ed_seed_nums; + bool left = true; + + const TIndexOffU default_max_ext_len = 100; + const TIndexOffU seed_mm = 4; + string ext_consensus = ""; + + empty_consensuses.resize(seed_mm + 1); + empty_consensuses.fill(""); + + while(remain >= rp.repeat_count) { + for(size_t i = 0; i < remain; i++) { + seeds_[i].done = false; + seeds_[i].curr_ext_len = 0; + } + + // extend seeds in left or right direction + TIndexOffU max_ext_len = min(default_max_ext_len, (TIndexOffU)(rp.max_repeat_len - consensus_.length())); + get_consensus_seq(s, + seeds_, + 0, // seed begin + remain, // seed end + left ? max_ext_len : 0, + left ? 0 : max_ext_len, + seed_mm, + rp, + ed_seed_nums, + left ? &consensuses : NULL, + left ? NULL : &consensuses); + + size_t allowed_seed_mm = 0; + ext_consensus.clear(); + for(int i = (int)seed_mm; i >= 0; i--) { + size_t extlen = (ed_seed_nums[i] < rp.repeat_count ? 0 : consensuses[i].length()); + if(extlen <= 0 || extlen < max_ext_len * i / seed_mm) + continue; + + if(i > 0 && consensuses[i].length() <= consensuses[i-1].length() + 5) + continue; + + ext_consensus = consensuses[i]; + allowed_seed_mm = (size_t)i; + assert(ext_consensus.length() > 0); + break; + } + size_t num_passed_seeds = 0; + if(ext_consensus.length() > 0) { + if(left) consensus_ = reverse(ext_consensus) + consensus_; + else consensus_ += ext_consensus; + + calc_edit_dist(s, + seeds_, + 0, + remain, + left ? consensuses : empty_consensuses, + left ? empty_consensuses : consensuses, + allowed_seed_mm); + + // update seeds + for(size_t i = 0; i < seeds_.size(); i++) { + SeedExt& seed = seeds_[i]; + + if(i < remain) { + if(seed.ed <= allowed_seed_mm) { + num_passed_seeds++; + seed.done = true; + seed.total_ed += seed.ed; + if(left) { + if(seed.left_gaps.size() > 0 && + seed.left_gaps.back().first >= seed.orig_pos.first - seed.pos.first) { + int gap_len = seed.left_gaps.back().second; + seed.pos.first += gap_len; + } + seed.pos.first -= ext_consensus.length(); + seed.consensus_pos.first = 0; + seed.consensus_pos.second = consensus_.length(); + } else { + if(seed.right_gaps.size() > 0 && + seed.right_gaps.back().first >= seed.pos.second - seed.orig_pos.second) { + int gap_len = seed.right_gaps.back().second; + seed.pos.second -= gap_len; + } + seed.pos.second += ext_consensus.length(); + seed.consensus_pos.second = consensus_.length(); + } + } else { + if(left) { + assert_leq(seed.curr_ext_len, ext_consensus.length()); + TIndexOffU adjust = ext_consensus.length() - seed.curr_ext_len; + seed.consensus_pos.first += adjust; + seed.consensus_pos.second += ext_consensus.length(); + assert_leq(seed.curr_ext_len, seed.pos.first); + seed.pos.first -= seed.curr_ext_len; + } else { + assert_leq(seed.curr_ext_len, ext_consensus.length()); + seed.consensus_pos.second += seed.curr_ext_len; + seed.pos.second += seed.curr_ext_len; + } + } + } else { + if(left) { + seed.consensus_pos.first += ext_consensus.length(); + seed.consensus_pos.second += ext_consensus.length(); + } + } + } + + // move up "done" seeds + size_t j = 0; + for(size_t i = 0; i < remain; i++) { + if(!seeds_[i].done) continue; + assert_geq(i, j); + if(i > j) { + SeedExt temp = seeds_[j]; + seeds_[j] = seeds_[i]; + seeds_[i] = temp; + // Find next "undone" seed + j++; + while(j < i && seeds_[j].done) { + j++; + } + assert(j < remain && !seeds_[j].done); + } else { + j = i + 1; + } + } + } + + remain = num_passed_seeds; + if(remain < rp.repeat_count) { + if(left) { + left = false; + remain = seeds_.size(); + } + } + } // while(remain >= rp.repeat_count) + +#ifndef NDEBUG + // make sure seed positions are unique + EList > seed_poses; + for(size_t i = 0; i < seeds_.size(); i++) { + seed_poses.expand(); + seed_poses.back() = seeds_[i].orig_pos; + } + seed_poses.sort(); + for(size_t i = 0; i + 1 < seed_poses.size(); i++) { + if(seed_poses[i].first == seed_poses[i+1].first) { + assert_lt(seed_poses[i].second, seed_poses[i+1].second); + } else { + assert_lt(seed_poses[i].first, seed_poses[i+1].first); + } + } + + for(size_t i = 0; i < seeds_.size(); i++) { + assert(seeds_[i].valid()); + } +#endif + + internal_update(); + + // check repeats within a repeat sequence + self_repeat_ = false; + for(size_t i = 0; i + 1 < seed_ranges_.size(); i++) { + const RB_AlleleCoord& range = seed_ranges_[i]; + for(size_t j = i + 1; j < seed_ranges_.size(); j++) { + RB_AlleleCoord& range2 = seed_ranges_[j]; + if(range.right <= range2.left) + break; + self_repeat_ = true; + } + } +} + +bool RB_Repeat::overlap(const RB_Repeat& o, + bool& contain, + bool& left, + size_t& seed_i, + size_t& seed_j, + bool debug) const +{ + contain = left = false; + seed_i = seed_j = 0; + size_t p = 0, p2 = 0; + while(p < seed_ranges_.size() && p2 < o.seed_ranges_.size()) { + const RB_AlleleCoord& range = seed_ranges_[p]; + const SeedExt& seed = seeds_[range.idx]; + Range range_ = seed.getExtendedRange(consensus_.length()); + RB_AlleleCoord ex_range(range_.first, range_.second, p); + bool representative = (float)range.len() >= consensus_.length() * 0.95f; + + const RB_AlleleCoord& range2 = o.seed_ranges_[p2]; + const SeedExt& seed2 = o.seeds_[range2.idx]; + Range range2_ = seed2.getExtendedRange(o.consensus().length()); + RB_AlleleCoord ex_range2(range2_.first, range2_.second, p2); + bool representative2 = (float)range2.len() >= o.consensus_.length() * 0.95f; + + seed_i = range.idx; + seed_j = range2.idx; + + const size_t relax = 5; + if(representative && representative2) { + if(ex_range.overlap_len(ex_range2) > 0) { + if(ex_range.contain(ex_range2, relax)) { + contain = true; + left = true; + } else if(ex_range2.contain(ex_range, relax)) { + contain = true; + left = false; + } else { + left = ex_range.left <= ex_range2.left; + } + return true; + } + } else if(representative) { + if(range2.contain(ex_range, relax)) { + contain = true; + left = false; + return true; + } + } else if(representative2) { + if(range.contain(ex_range2, relax)) { + contain = true; + left = true; + return true; + } + } + + if(range.right <= range2.right) p++; + if(range2.right <= range.right) p2++; + } + return false; +} + +void RB_Repeat::showInfo(const RepeatParameter& rp, + CoordHelper& coordHelper) const +{ + cerr << "\trepeat id: " << repeat_id_ << endl; + cerr << "\tnumber of alleles: " << seeds_.size() << endl; + cerr << "\tconsensus length: " << consensus_.length() << endl; + cerr << consensus_ << endl; + for(size_t i = 0; i < seeds_.size(); i++) { + const SeedExt& seed = seeds_[i]; + size_t ext_len = seed.getLength(); + if(ext_len < rp.min_repeat_len) continue; + bool sense_strand = seed.pos.first < coordHelper.forward_length(); + + cerr << "\t\t" << setw(4) << i << " " << setw(4) << ext_len; + cerr << " " << (sense_strand ? '+' : '-'); + cerr << " " << setw(10) << seed.pos.first << " " << setw(10) << seed.pos.second; + cerr << " " << setw(4) << seed.consensus_pos.first << " " << setw(4) << seed.consensus_pos.second; + cerr << " " << (seed.aligned ? "aligned" : "unaligned"); + cerr << endl; + } +} + +template +void RB_Repeat::generateSNPs(const RepeatParameter& rp, const TStr& s, TIndexOffU grp_id) { + EList& seeds = this->seeds(); + + for(size_t i = 0; i < seeds.size(); i++) { + SeedExt& seed = seeds[i]; + + if(!seed.aligned) { + continue; + } + + if(seed.getLength() < rp.min_repeat_len) { + continue; + } + assert_eq(seed.getLength(), seed.pos.second - seed.pos.first); + seed.generateSNPs(s, consensus(), snps()); + } + if(snps().size() > 0) { + sort(snps_.begin(), snps().end(), SeedSNP::cmpSeedSNPByPos); + } +} + +void RB_Repeat::saveSNPs(ofstream &fp, TIndexOffU grp_id, TIndexOffU& snp_id_base) +{ + string chr_name = "rep" + to_string(repeat_id_); + + for(size_t j = 0; j < snps_.size(); j++) { + SeedSNP& snp = *snps_[j]; + // assign SNPid + snp.id = (snp_id_base++); + + fp << "rps" << snp.id; + fp << "\t"; + + if(snp.type == EDIT_TYPE_MM) { + fp << "single"; + } else if(snp.type == EDIT_TYPE_READ_GAP) { + fp << "deletion"; + } else if(snp.type == EDIT_TYPE_REF_GAP) { + fp << "insertion"; + } else { + assert(false); + } + fp << "\t"; + + fp << chr_name; + fp << "\t"; + + fp << snp.pos; + fp << "\t"; + if(snp.type == EDIT_TYPE_MM || snp.type == EDIT_TYPE_REF_GAP) { + fp << snp.base; + } else if(snp.type == EDIT_TYPE_READ_GAP) { + fp << snp.len; + } else { + assert(false); + } + fp << endl; + } +} + +float RB_RepeatExt::mergeable(const RB_Repeat& o) const +{ + const EList& ranges = seed_ranges(); + const EList& ranges2 = o.seed_ranges(); + size_t num_overlap_bp = 0; + size_t p = 0, p2 = 0; + while(p < ranges.size() && p2 < ranges2.size()) { + const RB_AlleleCoord& range = ranges[p]; + const RB_AlleleCoord& range2 = ranges2[p2]; + TIndexOffU overlap = range.overlap_len(range2); + num_overlap_bp += overlap; + if(range.right <= range2.right) p++; + else p2++; + } + size_t num_total_bp = 0, num_total_bp2 = 0; + for(size_t r = 0; r < ranges.size(); r++) num_total_bp += (ranges[r].right - ranges[r].left); + for(size_t r = 0; r < ranges2.size(); r++) num_total_bp2 += (ranges2[r].right - ranges2[r].left); + float portion = float(num_overlap_bp) / float(min(num_total_bp, num_total_bp2)); + return portion; +} + +inline void get_next_range(const EList& offsets, size_t i, Range& r, float& avg) +{ + r.first = i; + for(; r.first < offsets.size() && offsets[r.first] < 0; r.first++); + r.second = r.first + 1; + avg = 0.0f; + if(r.first < offsets.size()) { + float diff = (float)offsets[r.first] - (float)r.first; + avg += diff; + } + for(; r.second < offsets.size() && + offsets[r.second] >= 0 && + (r.second == 0 || offsets[r.second] >= offsets[r.second - 1]); + r.second++) { + float diff = (float)offsets[r.second] - (float)r.second; + avg += diff; + } + avg /= (float)(r.second - r.first); + return; +} + +template +bool RB_RepeatExt::align(const RepeatParameter& rp, + const TStr& ref, + const string& s, + const EList >& s_kmer_table, + const string& s2, + EList& offsets, + size_t k, + SeedExt& seed, + int consensus_approx_left, + int consensus_approx_right, + size_t left, + size_t right, + bool debug) +{ + RB_Repeat::seed_merge_tried++; + + seed.reset(); + seed.pos.first = left; + seed.pos.second = right; + seed.orig_pos.first = seed.pos.first; + seed.orig_pos.second = seed.orig_pos.first; + seed.consensus_pos.first = 0; + seed.consensus_pos.second = consensus_.length(); + seed.aligned = false; + + { + const int query_len = right - left; + const int consensus_len = consensus_approx_right - consensus_approx_left; + const int abs_gap_len = max(abs(consensus_len - query_len), 5); + + offsets.resize(s2.length()); + offsets.fill(-1); + pair kmer(0, 0); + for(size_t i = 0; i + k <= s2.length(); i++) { + if(i == 0) { + kmer.first = extract_kmer(s2, i, k); + } else { + kmer.first = next_kmer(kmer.first, s2[i+k-1], k); + } + size_t lb = s_kmer_table.bsearchLoBound(kmer); + while(lb < s_kmer_table.size() && kmer.first == s_kmer_table[lb].first) { + int expected = (int)i + consensus_approx_left; + int real = (int)s_kmer_table[lb].second; + int abs_diff = abs(expected - real); + if(abs_diff <= abs_gap_len * 2 || debug) { + if(offsets[i] == -1) { + offsets[i] = (int)s_kmer_table[lb].second; + } else if(offsets[i] >= 0) { + offsets[i] = -2; + } else { + assert_lt(offsets[i], -1); + offsets[i] -= 1; + } + } + lb++; + } + if(offsets[i] > 0 && i + k == s2.length()) { + for(size_t j = i + 1; j < s2.length(); j++) { + offsets[j] = offsets[j-1] + 1; + } + } + } + } + + if(debug) { + cerr << "initial offsets" << endl; + for(size_t j = 0; j < offsets.size(); j++) { + cout << j << ": " << offsets[j] << " " << s2[j] << ": " << (offsets[j] < 0 ? ' ' : s[offsets[j]]) << endl; + } + } + + // remove inconsistent positions + Range range; float range_avg; + get_next_range(offsets, 0, range, range_avg); + while(range.second < offsets.size()) { + Range range2; float range_avg2; + get_next_range(offsets, range.second, range2, range_avg2); + if(range2.first >= offsets.size()) + break; + + assert_leq(range.second, range2.first); + + float abs_diff = abs(range_avg - range_avg2); + if(offsets[range.second - 1] > offsets[range2.first] || + (abs_diff > 10.0f && (range.second - range.first < 5 || range2.second - range2.first < 5)) || + abs_diff > 20.0f) { + if(range.second - range.first < range2.second - range2.first) { + for(size_t i = range.first; i < range.second; i++) { + offsets[i] = -1; + } + range = range2; + range_avg = range_avg2; + } else { + for(size_t i = range2.first; i < range2.second; i++) { + offsets[i] = -1; + } + } + } else { + range = range2; + range_avg = range_avg2; + } + } + + bool weighted_avg_inited = false; + float weighted_avg = -1.0f; + for(size_t i = 0; i < offsets.size(); i++) { + if(offsets[i] < 0) + continue; + + float diff = (float)offsets[i] - (float)i; + if(weighted_avg_inited) { + if(abs(diff - weighted_avg) > 20.0f) { + offsets[i] = -1; + continue; + } + } + + if(weighted_avg < 0.0f) { + weighted_avg = diff; + weighted_avg_inited = true; + } else { + weighted_avg = 0.8f * weighted_avg + 0.2f * diff; + } + } + + if(debug) { + cerr << "after filtering" << endl; + for(size_t j = 0; j < offsets.size(); j++) { + cout << j << ": " << offsets[j] << " " << s2[j] << ": " << (offsets[j] < 0 ? ' ' : s[offsets[j]]) << endl; + } + } + + int i = 0; + while(i < offsets.size()) { + // skip non-negative offsets + for(; i < offsets.size() && offsets[i] >= 0; i++); + if(i >= offsets.size()) break; + int j = i; + for(; j < offsets.size(); j++) { + if(offsets[j] >= 0) + break; + } + assert(i >= offsets.size() || offsets[i] < 0); + assert(j >= offsets.size() || offsets[j] >= 0); + if(i > 0 && j < offsets.size()) { + i -= 1; + int left = offsets[i], right = offsets[j]; + assert_geq(left, 0); + if(left > right) return false; + assert_leq(left, right); + int ref_len = right - left + 1; + int query_len = j - i + 1; + if(query_len == ref_len) { // match + for(size_t i2 = i + 1; i2 < j; i2++) + offsets[i2] = offsets[i2-1] + 1; + } else { // deletion or insertion + bool del = query_len < ref_len; + size_t gap_len = del ? ref_len - query_len : query_len - ref_len; + size_t max_len = max(ref_len, query_len); + const size_t max_mm = max_len / 25 + 1; + const size_t very_max_mm = max(max_len / 2, max_mm); + ca_s_ = s.substr(left, max_len); + ca_s2_ = s2.substr(i, max_len); + + size_t gap_pos = 0, mm = very_max_mm + 1; + find_gap_pos(ca_s_, + ca_s2_, + ca_ed_, + ca_ed2_, + del, + gap_len, + very_max_mm, + gap_pos, + mm, + debug); + + if(mm > very_max_mm) { + return false; + } + + assert_lt(gap_pos, query_len); + if(del) { + for(size_t i2 = i + 1; i2 < j; i2++) { + if(i2 - i == gap_pos) { + offsets[i2] = offsets[i2-1] + gap_len; + } else { + offsets[i2] = offsets[i2-1] + 1; + } + } + } else { + for(size_t i2 = i + 1; i2 < j; i2++) { + if(i2 - i >= gap_pos && i2 - i < gap_pos + gap_len) { + offsets[i2] = offsets[i2-1]; + } else { + offsets[i2] = offsets[i2-1] + 1; + } + } + } + } + } + + i = j; + } + + if(debug) { + cerr << "final offsets" << endl; + for(size_t j = 0; j < offsets.size(); j++) { + cout << j << ": " << offsets[j] << " " << s2[j] << ": " << (offsets[j] < 0 ? ' ' : s[offsets[j]]) << endl; + } + } + +#ifndef NDEBUG + for(size_t i = 1; i < offsets.size(); i++) { + if(offsets[i-1] < 0 || offsets[i] < 0) continue; + assert_leq(offsets[i-1], offsets[i]); + } +#endif + + size_t b, e; + for(b = 0; b < offsets.size() && offsets[b] < 0; b++); + if(b >= offsets.size()) return false; + assert_lt((size_t)b, offsets.size()); + for(e = offsets.size() - 1; e > b && offsets[e] < 0; e--); + if(b == e) return false; + for(size_t p = b; p <= e; p++) { + if(offsets[p] < 0) return false; + } + + // try to fill the ends + if(b > 0) { + int mm = 0, pb = (int)b; + for(int i = pb - 1; i >= 0; i--) { + assert_geq(offsets[i+1], 0); + if(offsets[i+1] == 0) break; + if(s2[i] != s[offsets[i+1] - 1]) + mm++; + if(pb - i < 25 * (mm - 1)) + break; + offsets[i] = offsets[i+1] - 1; + b = (size_t)i; + } + } + if(e + 1 < offsets.size()) { + int mm = 0, prev_end = (int)e; + for(int i = prev_end + 1; i < offsets.size(); i++) { + assert_geq(offsets[i-1], 0); + if(offsets[i-1] + 1 >= s.length()) break; + if(s2[i] != s[offsets[i-1] + 1]) + mm++; + if(i - prev_end < 25 * (mm - 1)) + break; + offsets[i] = offsets[i-1] + 1; + e = (size_t)i; + } + } + + assert_geq(seed.pos.first, (TIndexOffU)b); + seed.pos.first += (TIndexOffU)b; + seed.pos.second = seed.pos.first + e - b + 1; + seed.orig_pos.first = seed.pos.first; + seed.orig_pos.second = seed.pos.first; + seed.consensus_pos.first = offsets[b]; + seed.consensus_pos.second = offsets[e] + 1; + seed.aligned = true; + + for(int p = b; p < e; p++) { + assert_geq(offsets[p], 0); + assert_leq(offsets[p], offsets[p+1]); + if(offsets[p] + 1 == offsets[p+1]) { // match + continue; + } else if(offsets[p] + 1 < offsets[p+1]) { // deletion + seed.right_gaps.expand(); + seed.right_gaps.back().first = p + 1 - b; + seed.right_gaps.back().second = offsets[p+1] - offsets[p] - 1; + } else { + assert_eq(offsets[p], offsets[p+1]); + int p2 = p + 1; + for(; offsets[p2] == offsets[p2+1]; p2++); + seed.right_gaps.expand(); + seed.right_gaps.back().first = p + 1 - b; + seed.right_gaps.back().second = (int)p - (int)p2; + p = p2; + } + } + + if(debug) { + string consensus_str = consensus_.substr(seed.consensus_pos.first, seed.consensus_pos.second - seed.consensus_pos.first); + string seed_str; + seed.getExtendedSeedSequence(ref, seed_str); + assert_eq(consensus_str.length(), seed_str.length()); + cerr << "consensus: " << consensus_str << endl; + cerr << "seed : " << seed_str << endl; + } + + RB_Repeat::seed_merged++; + + return true; +} + +bool RB_RepeatExt::isSelfRepeat(const RepeatParameter& rp, + const string& s, + const EList >& s_kmer_table, + EList& offsets, + size_t k, + bool debug) +{ + offsets.resize(s.length()); + offsets.fill(-1); + pair kmer(0, 0); + for(size_t i = 0; i + k <= s.length(); i++) { + if(i == 0) { + kmer.first = extract_kmer(s, i, k); + } else { + kmer.first = next_kmer(kmer.first, s[i+k-1], k); + } + size_t lb = s_kmer_table.bsearchLoBound(kmer); + while(lb < s_kmer_table.size() && kmer.first == s_kmer_table[lb].first) { + if(offsets[i] == -1) { + offsets[i] = (int)s_kmer_table[lb].second; + } else if(offsets[i] >= 0) { + offsets[i] = -2; + } else { + assert_lt(offsets[i], -1); + offsets[i] -= 1; + } + lb++; + } + if(offsets[i] > 0 && i + k == s.length()) { + for(size_t j = i + 1; j < s.length(); j++) { + offsets[j] = offsets[j-1] + 1; + } + } + } + + if(debug) { + cerr << "offsets" << endl; + for(size_t j = 0; j < offsets.size(); j++) { + cout << j << ": " << offsets[j] << " " << s[j] << ": " << (offsets[j] < 0 ? ' ' : s[offsets[j]]) << endl; + } + } + + const size_t min_repeat_size = 100; + size_t repeat_count = 0; + for(size_t i = 0; i + min_repeat_size < offsets.size(); i++) { + if(offsets[i] >= -1) + continue; + size_t j = i + 1; + for(; j < offsets.size() && offsets[j] <= -2; j++); + if(j - i >= min_repeat_size) + repeat_count++; + i = j; + } + + return repeat_count >= 2; +} + +template +bool RB_RepeatExt::merge(const RepeatParameter& rp, + const TStr& s, + RB_SWAligner& swalginer, + const RB_RepeatExt& o, + bool contain, + size_t seed_i, + size_t seed_j, + bool debug) +{ + // construct a new consensus sequence + string prev_consensus = consensus_; + size_t consensus_add_len = 0; + { + assert_lt(seed_i, seeds_.size()); + assert_lt(seed_j, o.seeds_.size()); + const SeedExt& seed = seeds_[seed_i]; + const SeedExt& oseed = o.seeds_[seed_j]; + + Range range = seed.getExtendedRange(consensus_.length()); + Range orange = oseed.getExtendedRange(o.consensus_.length()); + assert_leq(range.first, orange.first + 10); + + consensus_add_len = (int)orange.first - (int)range.first; + if(!contain) { + if(range.second <= orange.first) { + cerr << "something wrong: " << range.first << "-" << range.second << " "; + cerr << orange.first << "-" << orange.second << " " << o.consensus_.length() << endl; + assert(false); + } + if(range.second > orange.first && + range.second - orange.first < o.consensus_.length()) { + consensus_ += o.consensus_.substr(range.second - orange.first); + } + } + } + + EList > merge_list; + merge_list.reserveExact(o.seed_ranges_.size()); + size_t p = 0, p2 = 0; + const size_t relax = 5; + while(p < seed_ranges_.size() && p2 < o.seed_ranges_.size()) { + const RB_AlleleCoord& range = seed_ranges_[p]; + assert_lt(range.idx, seeds_.size()); + const RB_AlleleCoord& range2 = o.seed_ranges_[p2]; + assert_lt(range2.idx, o.seeds_.size()); + if(range.contain(range2, relax)) { + merge_list.expand(); + merge_list.back().first = p; + merge_list.back().second = p2; + if(debug) { + cerr << p << ":" << range.left << "-" << range.right << " > "; + cerr << p2 << ":" << range2.left << "-" << range2.right << endl; + } + } else if(range2.contain(range, relax)) { + merge_list.expand(); + merge_list.back().first = p; + merge_list.back().second = p2; + if(debug) { + cerr << p << ":" << range.left << "-" << range.right << " < "; + cerr << p2 << ":" << range2.left << "-" << range2.right << endl; + } + } else { + TIndexOffU overlap_len = range.overlap_len(range2); + bool stored = !merge_list.empty() && merge_list.back().first == p; + bool stored2 = !merge_list.empty() && merge_list.back().second == p2; + if(overlap_len > 0) { + if(!stored && !stored2) { + merge_list.expand(); + merge_list.back().first = p; + merge_list.back().second = p2; + + if(debug) { + cerr << p << ":" << range.left << "-" << range.right << " ol "; + cerr << p2 << ":" << range2.left << "-" << range2.right << endl; + } + } + } else { + if(range2.right <= range.left) { + if(!stored2) { + merge_list.expand(); + merge_list.back().first = numeric_limits::max(); + merge_list.back().second = p2; + + if(debug) { + cerr << p << ":" << range.left << "-" << range.right << " <> "; + cerr << p2 << ":" << range2.left << "-" << range2.right << endl; + } + } + } else { + assert_leq(range.right, range2.left); + if(!stored) { + merge_list.expand(); + merge_list.back().first = p; + merge_list.back().second = numeric_limits::max(); + + if(debug) { + cerr << p << ":" << range.left << "-" << range.right << " <> "; + cerr << p2 << ":" << range2.left << "-" << range2.right << endl; + } + } + } + } + } + if(range.right <= range2.right) p++; + if(range2.right <= range.right) p2++; + } + assert(p == seeds_.size() || p2 == o.seeds_.size()); + + while(p < seed_ranges_.size()) { + bool stored = !merge_list.empty() && merge_list.back().first == p; + if(!stored) { + const RB_AlleleCoord& range = seed_ranges_[p]; + merge_list.expand(); + merge_list.back().first = p; + merge_list.back().second = numeric_limits::max(); + + if(debug) { + cerr << p << ":" << range.left << "-" << range.right << " : <> " << endl; + } + } + + p++; + } + + while(p2 < o.seed_ranges_.size()) { + bool stored2 = !merge_list.empty() && merge_list.back().second == p2; + if(!stored2) { + const RB_AlleleCoord& range2 = o.seed_ranges_[p2]; + merge_list.expand(); + merge_list.back().first = numeric_limits::max(); + merge_list.back().second = p2; + + if(debug) { + cerr << ": <> " << p2 << ":" << range2.left << "-" << range2.right << endl; + } + } + + p2++; + } + + assert(!merge_list.empty()); + + if(debug) { + for(size_t i = 0; i < merge_list.size(); i++) { + cerr << "merge list:" << endl; + cerr << "\t" << (merge_list[i].first < seed_ranges_.size() ? (int)merge_list[i].first : -1); + cerr << " " << (merge_list[i].second < o.seed_ranges_.size() ? (int)merge_list[i].second : -1) << endl; + } + } + + const size_t kmer_len = 12; + EList > kmer_table; + build_kmer_table(consensus_, kmer_table, kmer_len); + EList offsets; + + bool self_repeat = isSelfRepeat(rp, + consensus_, + kmer_table, + offsets, + kmer_len, + debug); + if(self_repeat) { + consensus_ = prev_consensus; + return false; + } + + string seq; + for(size_t i = 0; i < merge_list.size(); i++) { + size_t seed_id = (merge_list[i].first < seed_ranges_.size() ? seed_ranges_[merge_list[i].first].idx : merge_list[i].first); + size_t oseed_id = (merge_list[i].second < o.seed_ranges_.size() ? o.seed_ranges_[merge_list[i].second].idx : merge_list[i].second); + + if(seed_id < seeds_.size()) { + if(oseed_id >= o.seeds_.size()) + continue; + else if(seed_ranges_[merge_list[i].first].contain(o.seed_ranges_[merge_list[i].second])) + continue; + } + + const SeedExt* seed = (seed_id < seeds_.size() ? &seeds_[seed_id] : NULL); + const SeedExt* oseed = (oseed_id < o.seeds_.size() ? &o.seeds_[oseed_id] : NULL); + assert(seed != NULL || oseed != NULL); + + size_t left = (seed != NULL ? seed->pos.first : numeric_limits::max()); + size_t right = (seed != NULL ? seed->pos.second : 0); + int consensus_approx_left = (seed != NULL ? seed->consensus_pos.first : numeric_limits::max()); + int consensus_approx_right = (seed != NULL ? seed->consensus_pos.second : 0); + if(oseed != NULL) { + if(oseed->pos.first < left) { + left = oseed->pos.first; + } + if(oseed->pos.second > right) { + right = oseed->pos.second; + } + + int oconsensus_approx_left = oseed->consensus_pos.first + consensus_add_len; + if(oconsensus_approx_left < consensus_approx_left) { + consensus_approx_left = oconsensus_approx_left; + } + int oconsensus_approx_right = oseed->consensus_pos.second + consensus_add_len; + if(oconsensus_approx_right > consensus_approx_right) { + consensus_approx_right = oconsensus_approx_right; + } + } + + getString(s, left, right - left, seq); + if(seed_id >= seeds_.size()) { + seed_id = seeds_.size(); + seeds_.expand(); + } + + /* bool succ = */ align(rp, + s, + consensus_, + kmer_table, + seq, + offsets, + kmer_len, + seeds_[seed_id], + consensus_approx_left, + consensus_approx_right, + left, + right, + debug && right - left == 1080); + } + + while(true) { + internal_update(); + + size_t remove_count = 0; + for(size_t i = 0; i + 1 < seed_ranges_.size(); i++) { + RB_AlleleCoord& range = seed_ranges_[i]; + if(range.left == numeric_limits::max()) + break; + for(size_t j = i + 1; j < seed_ranges_.size(); j++) { + RB_AlleleCoord& range2 = seed_ranges_[j]; + if(range2.left == numeric_limits::max()) + break; + if(range.right <= range2.left) + break; + + getString(s, range.left, range2.right - range.left, seq); + + /* bool succ = */ align(rp, + s, + consensus_, + kmer_table, + seq, + offsets, + kmer_len, + seeds_[range.idx], + seeds_[range.idx].consensus_pos.first, + seeds_[range2.idx].consensus_pos.second, + range.left, + range2.right, + debug && range.left == 692422); + + range.left = seeds_[range.idx].pos.first; + range.right = seeds_[range.idx].pos.second; + seeds_[range2.idx].reset(); + range2.left = numeric_limits::max(); + remove_count++; + } + } + + if(remove_count <= 0) break; + + sort(seeds_.begin(), seeds_.end(), seedCmp); + seeds_.resize(seeds_.size() - remove_count); + } + + return true; +} + +#define DMAX std::numeric_limits::max() + +RB_SWAligner::RB_SWAligner() +{ + rnd_.init(0); +} + +RB_SWAligner::~RB_SWAligner() +{ + if(sc_) { + delete sc_; + } +} + +void RB_SWAligner::init_dyn(const RepeatParameter& rp) +{ + const int MM_PEN = 3; + // const int MM_PEN = 6; + const int GAP_PEN_LIN = 2; + // const int GAP_PEN_LIN = (((MM_PEN) * rpt_edit_ + 1) * 1.0); + const int GAP_PEN_CON = 4; + // const int GAP_PEN_CON = (((MM_PEN) * rpt_edit_ + 1) * 1.0); + const int MAX_PEN = MAX_I16; + + scoreMin_.init(SIMPLE_FUNC_LINEAR, rp.max_edit * MM_PEN * -1.0, 0.0); + nCeil_.init(SIMPLE_FUNC_LINEAR, 0.0, 0.0); + + penCanIntronLen_.init(SIMPLE_FUNC_LOG, -8, 1); + penNoncanIntronLen_.init(SIMPLE_FUNC_LOG, -8, 1); + + sc_ = new Scoring( + DEFAULT_MATCH_BONUS, // constant reward for match + DEFAULT_MM_PENALTY_TYPE, // how to penalize mismatches + MM_PEN, // max mm penalty + MM_PEN, // min mm penalty + MAX_PEN, // max sc penalty + MAX_PEN, // min sc penalty + scoreMin_, // min score as function of read len + nCeil_, // max # Ns as function of read len + DEFAULT_N_PENALTY_TYPE, // how to penalize Ns in the read + DEFAULT_N_PENALTY, // constant if N pelanty is a constant + DEFAULT_N_CAT_PAIR, // whether to concat mates before N filtering + + + GAP_PEN_CON, // constant coeff for read gap cost + GAP_PEN_CON, // constant coeff for ref gap cost + GAP_PEN_LIN, // linear coeff for read gap cost + GAP_PEN_LIN, // linear coeff for ref gap cost + 1 /* gGapBarrier */ // # rows at top/bot only entered diagonally + ); +} + +void RB_SWAligner::makePadString(const string& ref, + const string& read, + string& pad, + size_t len) +{ + pad.resize(len); + + for(size_t i = 0; i < len; i++) { + // shift A->C, C->G, G->T, T->A + pad[i] = "CGTA"[asc2dna[ref[i]]]; + + if(read[i] == pad[i]) { + // shift + pad[i] = "CGTA"[asc2dna[pad[i]]]; + } + } + + int head_len = len / 2; + size_t pad_start = len - head_len; + + for(size_t i = 0; i < head_len; i++) { + if(read[i] == pad[pad_start + i]) { + // shift + pad[pad_start + i] = "CGTA"[asc2dna[pad[pad_start + i]]]; + } + } +} + +int RB_SWAligner::alignStrings(const string &ref, + const string &read, + EList& edits, + Coord& coord) +{ + // Prepare Strings + + // Read -> BTDnaString + // Ref -> bit-encoded string + + //SwAligner swa; + + BTDnaString btread; + BTString btqual; + BTString btref; + BTString btref2; + + BTDnaString btreadrc; + BTString btqualrc; + + + string qual = ""; + for(size_t i = 0; i < read.length(); i++) { + qual.push_back('I'); + } + +#if 0 + cerr << "REF : " << ref << endl; + cerr << "READ: " << read << endl; + cerr << "QUAL: " << qual << endl; +#endif + + btread.install(read.c_str(), true); + btreadrc = btread; + btreadrc.reverseComp(); + + btqual.install(qual.c_str()); + btqualrc = btqual; + + btref.install(ref.c_str()); + + TAlScore min_score = sc_->scoreMin.f((double)btread.length()); + + btref2 = btref; + + size_t nceil = 0; + // size_t nrow = btread.length(); + + // Convert reference string to mask + for(size_t i = 0; i < btref2.length(); i++) { + if(toupper(btref2[i]) == 'N') { + btref2.set(16, i); + } else { + int num = 0; + int alts[] = {4, 4, 4, 4}; + decodeNuc(toupper(btref2[i]), num, alts); + assert_leq(num, 4); + assert_gt(num, 0); + btref2.set(0, i); + for(int j = 0; j < num; j++) { + btref2.set(btref2[i] | (1 << alts[j]), i); + } + } + } + + + bool fw = true; + uint32_t refidx = 0; + + swa.initRead( + btread, // read sequence + btreadrc, + btqual, // read qualities + btqualrc, + 0, // offset of first character within 'read' to consider + btread.length(), // offset of last char (exclusive) in 'read' to consider + *sc_); // local-alignment score floor + + DynProgFramer dpframe(false); + size_t readgaps = 0; + size_t refgaps = 0; + size_t maxhalf = 0; + + DPRect rect; + dpframe.frameSeedExtensionRect( + 0, // ref offset implied by seed hit assuming no gaps + btread.length(), // length of read sequence used in DP table + btref2.length(), // length of reference + readgaps, // max # of read gaps permitted in opp mate alignment + refgaps, // max # of ref gaps permitted in opp mate alignment + (size_t)nceil, // # Ns permitted + maxhalf, // max width in either direction + rect); // DP Rectangle + + assert(rect.repOk()); + + size_t cminlen = 2000, cpow2 = 4; + + swa.initRef( + fw, // whether to align forward or revcomp read + refidx, // reference ID + rect, // DP rectangle + btref2.wbuf(), // reference strings + 0, // offset of first reference char to align to + btref2.length(), // offset of last reference char to align to + btref2.length(), // length of reference sequence + *sc_, // scoring scheme + min_score, // minimum score + true, // use 8-bit SSE if positions + cminlen, // minimum length for using checkpointing scheme + cpow2, // interval b/t checkpointed diags; 1 << this + false, // triangular mini-fills? + false // is this a seed extension? + ); + + + TAlScore best = std::numeric_limits::min(); + bool found = swa.align(rnd_, best); + +#ifdef DEBUGLOG + cerr << "found: " << found << "\t" << best << "\t" << "minsc: " << min_score << endl; +#endif + + if (found) { +#ifdef DEBUGLOG + //cerr << "CP " << "found: " << found << "\t" << best << "\t" << "minsc: " << min_score << endl; + cerr << "REF : " << ref << endl; + cerr << "READ: " << read << endl; +#endif + + SwResult res; + int max_match_len = 0; + res.reset(); + res.alres.init_raw_edits(&rawEdits_); + + found = swa.nextAlignment(res, best, rnd_); + if (found) { + edits = res.alres.ned(); + //const TRefOff ref_off = res.alres.refoff(); + //const Coord& coord = res.alres.refcoord(); + coord = res.alres.refcoord(); + //assert_geq(genomeHit._joinedOff + coord.off(), genomeHit.refoff()); + +#ifdef DEBUGLOG + cerr << "num edits: " << edits.size() << endl; + cerr << "coord: " << coord.off(); + cerr << ", " << coord.ref(); + cerr << ", " << coord.orient(); + cerr << ", " << coord.joinedOff(); + cerr << endl; + Edit::print(cerr, edits); cerr << endl; + Edit::printQAlign(cerr, btread, edits); +#endif + + max_match_len = getMaxMatchLen(edits, btread.length()); +#ifdef DEBUGLOG + cerr << "max match length: " << max_match_len << endl; +#endif + } +#ifdef DEBUGLOG + cerr << "nextAlignment: " << found << endl; + cerr << "-------------------------" << endl; +#endif + } + + return 0; +} + +void RB_SWAligner::doTest(const RepeatParameter& rp, + const string& refstr, + const string& readstr) +{ + init_dyn(rp); + + doTestCase1(refstr, + readstr, + rp.max_edit); +} + +void RB_SWAligner::doTestCase1(const string& refstr, + const string& readstr, + TIndexOffU rpt_edit) +{ + cerr << "doTestCase1----------------" << endl; + EList edits; + Coord coord; + + if (refstr.length() == 0 || + readstr.length() == 0) { + return; + } + + EList ed; + + string pad; + makePadString(refstr, readstr, pad, 5); + + string ref2 = pad + refstr + pad; + string read2 = pad + readstr + pad; + alignStrings(refstr, readstr, edits, coord); + + size_t left = pad.length(); + size_t right = left + readstr.length(); + + edits.reserveExact(ed.size()); + for(size_t i = 0; i < ed.size(); i++) { + if(ed[i].pos >= left && ed[i].pos <= right) { + edits.push_back(ed[i]); + edits.back().pos -= left; + } + } + + +#if 0 + RepeatGroup rg; + + rg.edits = edits; + rg.coord = coord; + rg.seq = readstr; + rg.base_offset = 0; + + string chr_name = "rep"; + + cerr << "REF : " << refstr << endl; + cerr << "READ: " << readstr << endl; + size_t snpids = 0; + rg.buildSNPs(snpids); + rg.writeSNPs(cerr, chr_name); cerr << endl; +#endif +} + + +template +RepeatBuilder::RepeatBuilder(TStr& s, + TStr& sOriginal, + const EList& szs, + const EList& ref_names, + bool forward_only, + const string& filename) : +s_(s), +sOriginal_(sOriginal), +coordHelper_(s.length(), forward_only ? s.length() : s.length() / 2, szs, ref_names), +forward_only_(forward_only), +filename_(filename), +forward_length_(forward_only ? s.length() : s.length() / 2) +{ + cerr << "RepeatBuilder: " << filename_ << endl; +} + +template +RepeatBuilder::~RepeatBuilder() +{ + for(map::iterator it = repeat_map_.begin(); it != repeat_map_.end(); it++) { + delete it->second; + } + repeat_map_.clear(); +} + +template +void RepeatBuilder::readSA(const RepeatParameter& rp, + BlockwiseSA& sa) +{ + TIndexOffU count = 0; + subSA_.init(s_.length() + 1, rp.min_repeat_len, rp.repeat_count); + + while(count < s_.length() + 1) { + TIndexOffU saElt = sa.nextSuffix(); + count++; + + if(count && (count % 10000000 == 0)) { + cerr << "SA count " << count << endl; + } + + if(saElt == s_.length()) { + assert_eq(count, s_.length() + 1); + break; + } + + subSA_.push_back(s_, coordHelper_, saElt, count == s_.length()); + } + + cerr << "subSA size is " << subSA_.size() << endl; + subSA_.dump(); +#if 0 + for(size_t i = 0; i < subSA_.size(); i++) { + TIndexOffU joinedOff = subSA_[i]; + fp << setw(10) << joinedOff << " " << getString(s_, joinedOff, rp.seed_len) << endl; + } +#endif + cerr << "subSA mem Usage: " << subSA_.getMemUsage() << endl << endl; +} + +template +void RepeatBuilder::readSA(const RepeatParameter &rp, + const BitPackedArray &sa) +{ + TIndexOffU count = 0; + + subSA_.init(s_.length() + 1, rp.min_repeat_len, rp.repeat_count); + + for(size_t i = 0; i < sa.size(); i++) { + TIndexOffU saElt = sa[i]; + count++; + + if(count && (count % 10000000 == 0)) { + cerr << "RB count " << count << endl; + } + + if(saElt == s_.length()) { + assert_eq(count, s_.length() + 1); + break; + } + + subSA_.push_back(s_, coordHelper_, saElt, count == s_.length()); + } + + cerr << "subSA size: " << endl; + subSA_.dump(); +#if 0 + for(size_t i = 0; i < subSA_.size(); i++) { + TIndexOffU joinedOff = subSA_[i]; + fp << setw(10) << joinedOff << " " << getString(s_, joinedOff, rp.seed_len) << endl; + } +#endif + cerr << "subSA mem Usage: " << subSA_.getMemUsage() << endl << endl; +} + +template +void RepeatBuilder::build(const RepeatParameter& rp) +{ + string rpt_len_str; + rpt_len_str = to_string(rp.min_repeat_len) + "-" + to_string(rp.max_repeat_len); + + string seed_filename = filename_ + ".rep." + rpt_len_str + ".seed"; + ofstream fp(seed_filename.c_str()); + + swaligner_.init_dyn(rp); + + RB_RepeatManager* repeat_manager = new RB_RepeatManager; + + EList repeatBases; + subSA_.buildRepeatBase(s_, + coordHelper_, + rp.max_repeat_len, + repeatBases); + + size_t repeat_id = 0; + for(size_t i = 0; i < repeatBases.size(); i++) { + addRepeatGroup(rp, + repeat_id, + *repeat_manager, + repeatBases[i], + fp); + } + + { + // Build and test minimizer-based k-mer table + const size_t window = RB_Minimizer::default_w; + const size_t k = RB_Minimizer::default_k; + RB_KmerTable kmer_table; + EList seqs; + seqs.reserveExact(repeat_map_.size()); + for(map::const_iterator it = repeat_map_.begin(); it != repeat_map_.end(); it++) { + const RB_Repeat& repeat = *(it->second); + assert(repeat.satisfy(rp)); + seqs.expand(); + seqs.back() = repeat.consensus(); + } + kmer_table.build(seqs, + window, + k); + kmer_table.dump(cerr); + cerr << endl; + + string query, rc_query; + string queryOriginal; + EList > minimizers; + size_t total = 0, num_repeat = 0, correct = 0, false_positive = 0, false_negative = 0; + for(size_t i = 0; i + rp.min_repeat_len <= forward_length_; i += 1000) { + if(coordHelper_.getEnd(i) != coordHelper_.getEnd(i + rp.min_repeat_len)) + continue; + query = getString(s_, i, rp.min_repeat_len); + queryOriginal = getString(sOriginal_, i, rp.min_repeat_len); + rc_query = reverseComplement(queryOriginal); + + TIndexOffU idx = subSA_.find_repeat_idx(s_, query); + const EList& test_repeat_index = subSA_.getRepeatIndex(); + bool repeat = (idx < test_repeat_index.size()); + bool est_repeat = kmer_table.isRepeat(query, + rc_query, + minimizers); + + total++; + if(repeat) num_repeat++; + if(repeat == est_repeat) { + correct++; + } else { + if(est_repeat) { + false_positive++; + } else { + false_negative++; + //assert(false); + } + } + } + + cerr << "total: " << total << endl; + cerr << "repeat: " << num_repeat << endl; + cerr << "correct: " << correct << endl; + cerr << "false positive: " << false_positive << endl; + cerr << "false negative: " << false_negative << endl; + cerr << endl; + + ELList position2D; EList alignments; + size_t repeat_total = 0, repeat_aligned = 0; + const EList& test_repeat_index = subSA_.getRepeatIndex(); + size_t interval = 1; + if(test_repeat_index.size() >= 1000) { + interval = test_repeat_index.size() / 1000; + } + size_t total_alignments = 0, max_alignments = 0; + string query2, rc_query2; + string queryOriginal2, rc_queryOriginal2; + for(size_t i = 0; i < test_repeat_index.size(); i += interval) { + TIndexOffU saElt_idx = test_repeat_index[i]; + TIndexOffU saElt_idx_end = (i + 1 < test_repeat_index.size() ? test_repeat_index[i+1] : subSA_.size()); + TIndexOffU saElt_size = saElt_idx_end - saElt_idx; + TIndexOffU saElt = subSA_[saElt_idx]; + query = getString(s_, saElt, rp.min_repeat_len); + query2 = query; + + queryOriginal = getString(sOriginal_, saElt, rp.min_repeat_len); + queryOriginal2 = queryOriginal; + + // introduce three mismatches into a query when the query is at lest 100-bp + if(rp.min_repeat_len >= 100) { + const size_t mid_pos1 = (size_t)(rp.min_repeat_len * 0.1); + if(query2[mid_pos1] == 'A') { + query2[mid_pos1] = 'C'; + queryOriginal2[mid_pos1] ='C'; + } else { + query2[mid_pos1] = 'A'; + queryOriginal2[mid_pos1] = 'A'; + } + const size_t mid_pos2 = (size_t)(rp.min_repeat_len * 0.5); + if(query2[mid_pos2] == 'C') { + query2[mid_pos2] = 'G'; + queryOriginal2[mid_pos2] = 'G'; + } else { + query2[mid_pos2] = 'C'; + queryOriginal2[mid_pos1] = 'G'; + } + const size_t mid_pos3 = (size_t)(rp.min_repeat_len * 0.9); + if(query2[mid_pos3] == 'G') { + query2[mid_pos3] = 'T'; + queryOriginal2[mid_pos3] = 'T'; + } else { + query2[mid_pos3] = 'G'; + queryOriginal2[mid_pos3] = 'G'; + } + } + + repeat_total += saElt_size; + + size_t found = 0; + size_t cur_alignments = 0; + if(kmer_table.isRepeat(query2, minimizers)) { + kmer_table.findAlignments(query2, + minimizers, + position2D, + alignments); + total_alignments += (alignments.size() * saElt_size); + cur_alignments = alignments.size(); + TIndexOffU baseoff = 0; + for(size_t s = 0; s < seqs.size(); s++) { + int spos = seqs[s].find(query); + if(spos != string::npos) { + for(size_t a = 0; a < alignments.size(); a++) { + if(alignments[a].pos == baseoff + spos) { + found++; + } + } + } + baseoff += seqs[s].length(); + } + + assert_leq(found, 1); + } + + rc_query = reverseComplement(queryOriginal); + rc_query2 = reverseComplement(queryOriginal2); + size_t rc_found = 0; + if(kmer_table.isRepeat(rc_query2, minimizers)) { + kmer_table.findAlignments(rc_query2, + minimizers, + position2D, + alignments); + total_alignments += (alignments.size() * saElt_size); + cur_alignments += alignments.size(); + if(cur_alignments > max_alignments) { + max_alignments = cur_alignments; + } + + TIndexOffU baseoff = 0; + for(size_t s = 0; s < seqs.size(); s++) { + int spos = seqs[s].find(rc_query); + if(spos != string::npos) { + for(size_t a = 0; a < alignments.size(); a++) { + if(alignments[a].pos == baseoff + spos) { + rc_found++; + } + } + } + baseoff += seqs[s].length(); + } + assert_leq(rc_found, 1); + } + + if(found + rc_found == 1) { + repeat_aligned += saElt_size; + } + } + + cerr << "num repeats: " << repeat_total << endl; + cerr << "repeat aligned using minimizers: " << repeat_aligned << endl; + cerr << "average number of alignments: " << (float)total_alignments / repeat_total << endl; + cerr << "max alignment: " << max_alignments << endl; + cerr << endl; + } + + cerr << "number of repeats is " << repeat_map_.size() << endl; + + size_t total_rep_seq_len = 0, total_allele_seq_len = 0; + size_t i = 0; + for(map::iterator it = repeat_map_.begin(); it != repeat_map_.end(); it++, i++) { + RB_Repeat& repeat = *(it->second); + if(!repeat.satisfy(rp)) + continue; + + repeat.saveSeedExtension(rp, + s_, + coordHelper_, + i, + fp, + total_rep_seq_len, + total_allele_seq_len); + } + + size_t total_qual_seeds = 0; + for(map::iterator it = repeat_map_.begin(); it != repeat_map_.end(); it++, i++) { + RB_Repeat& repeat = *(it->second); + EList& seeds = repeat.seeds(); + for(size_t i = 0; i < seeds.size(); i++) { + if(seeds[i].getLength() < rp.min_repeat_len) + continue; + total_qual_seeds++; + } + } + + cerr << "total repeat sequence length: " << total_rep_seq_len << endl; + cerr << "total allele sequence length: " << total_allele_seq_len << endl; + cerr << "total number of seeds including those that are position-wise different, but sequence-wise identical: " << total_qual_seeds << endl; + cerr << endl; + fp << "total repeat sequence length: " << total_rep_seq_len << endl; + fp << "total allele sequence length: " << total_allele_seq_len << endl; + fp << "total number of seeds including those that are position-wise different, but sequence-wise identical: " << total_qual_seeds << endl; + fp.close(); + + delete repeat_manager; + repeat_manager = NULL; + + // remove non-qualifying repeats + // and update repeat IDs for those remaining + { + map temp_repeat_map; + size_t i = 0; + for(map::iterator it = repeat_map_.begin(); it != repeat_map_.end(); it++) { + RB_Repeat* repeat = it->second; + if(!repeat->satisfy(rp)) { + delete repeat; + continue; + } + repeat->repeat_id(i); + temp_repeat_map[repeat->repeat_id()] = repeat; + i++; + } + repeat_map_ = temp_repeat_map; + } + + const bool sanity_check = true; + if(sanity_check) { + EList > kmer_table; + string query; + string queryOriginal; + size_t total = 0, match = 0; + EList positions; + const EList& test_repeat_index = subSA_.getRepeatIndex(); + size_t interval = 1; + if(test_repeat_index.size() >= 10000) { + interval = test_repeat_index.size() / 10000; + } + for(size_t i = 0; i < test_repeat_index.size(); i += interval) { + TIndexOffU saElt_idx = test_repeat_index[i]; + TIndexOffU saElt_idx_end = (i + 1 < test_repeat_index.size() ? test_repeat_index[i+1] : subSA_.size()); + positions.clear(); + for(size_t j = saElt_idx; j < saElt_idx_end; j++) { + positions.push_back(subSA_[j]); +#ifndef NDEBUG + if(j > saElt_idx) { + TIndexOffU lcp_len = getLCP(s_, + coordHelper_, + positions[0], + positions.back(), + rp.min_repeat_len); + assert_eq(lcp_len, rp.min_repeat_len); + } + + TIndexOffU saElt = subSA_[j]; + TIndexOffU start = coordHelper_.getStart(saElt); + TIndexOffU start2 = coordHelper_.getStart(saElt + rp.min_repeat_len - 1); + assert_eq(start, start2); +#endif + } + + TIndexOffU saElt = subSA_[saElt_idx]; + size_t true_count = saElt_idx_end - saElt_idx; + getString(s_, saElt, rp.min_repeat_len, query); + getString(sOriginal_, saElt, rp.min_repeat_len, queryOriginal); + total++; + + size_t count = 0, rc_count = 0; + string rc_query = reverse_complement(queryOriginal); + for(map::iterator it = repeat_map_.begin(); it != repeat_map_.end(); it++) { + RB_Repeat& repeat = *(it->second); + int pos = repeat.consensus().find(query); + if(pos != string::npos) { + for(size_t s = 0; s < repeat.seeds().size(); s++) { + SeedExt& seed = repeat.seeds()[s]; + string seq; + seed.getExtendedSeedSequence(s_, seq); + if(seq.find(query) != string::npos) + count++; + } + } + pos = repeat.consensus().find(rc_query); + if(pos != string::npos) { + for(size_t s = 0; s < repeat.seeds().size(); s++) { + SeedExt& seed = repeat.seeds()[s]; + string seq; + seed.getExtendedSeedSequence(s_, seq); + if(seq.find(rc_query) != string::npos) + rc_count++; + } + } + } + + if(count == true_count || rc_count == true_count) { + match++; + } else if(total - match <= 10) { + cerr << " query: " << query << endl; + cerr << "rc_query: " << rc_query << endl; + cerr << "true count: " << true_count << endl; + cerr << "found count: " << count << endl; + cerr << "rc found count: " << rc_count << endl; + cerr << endl; + } + } + + cerr << "RepeatBuilder: sanity check: " << match << " passed (out of " << total << ")" << endl << endl; + } +} + +template +void RepeatBuilder::writeHaploType(const EList& haplo_lists, + const EList& seeds, + TIndexOffU& hapl_id_base, + const string& seq_name, + ostream &fp) +{ + if(haplo_lists.size() == 0) { + return; + } + + for(size_t i = 0; i < haplo_lists.size(); i++) { + const SeedHP &haploType = haplo_lists[i]; + + fp << "rpht" << hapl_id_base++; + fp << "\t" << seq_name; + fp << "\t" << haploType.range.first; + fp << "\t" << haploType.range.second; + fp << "\t"; + + assert_gt(haploType.snpIDs.size(), 0); + + for (size_t j = 0; j < haploType.snpIDs.size(); j++) { + if(j) { + fp << ","; + } + fp << haploType.snpIDs[j]; + } + fp << endl; + } +} + +template +void RepeatBuilder::writeAllele(TIndexOffU grp_id, + TIndexOffU allele_id, + Range range, + const string& seq_name, + TIndexOffU baseoff, + const EList& seeds, + ostream &fp) +{ + // >rpt_name*0\trep\trep_pos\trep_len\tpos_count\t0 + // chr_name:pos:direction chr_name:pos:direction + // + // >rep1*0 rep 0 100 470 0 + // 22:112123123:+ 22:1232131113:+ + // + size_t snp_size = seeds[range.first].snps.size(); + size_t pos_size = range.second - range.first; + fp << ">"; + fp << "rpt_" << grp_id << "*" << allele_id; + fp << "\t" << seq_name; + fp << "\t" << baseoff + seeds[range.first].consensus_pos.first; + fp << "\t" << seeds[range.first].consensus_pos.second - seeds[range.first].consensus_pos.first; + fp << "\t" << pos_size; + fp << "\t" << snp_size; + + fp << "\t"; + for(size_t i = 0; i < snp_size; i++) { + if(i > 0) {fp << ",";} + fp << "rps" << seeds[range.first].snps[i]->id; + } + fp << endl; + + // print positions + for(size_t i = 0; i < pos_size; i++) { + if(i > 0 && (i % 10 == 0)) { + fp << endl; + } + + if(i % 10 != 0) { + fp << " "; + } + string chr_name; + TIndexOffU pos_in_chr; + TIndexOffU joinedOff = seeds[range.first + i].pos.first; + + bool fw = true; + if(joinedOff < forward_length_) { + fw = true; + } else { + fw = false; + joinedOff = s_.length() - joinedOff - seeds[range.first].getLength(); + } + + coordHelper_.getGenomeCoord(joinedOff, chr_name, pos_in_chr); + + char direction = fw ? '+' : '-'; + fp << chr_name << ":" << pos_in_chr << ":" << direction; + } + + fp << endl; +} + +template +void RepeatBuilder::writeSNPs(ostream& fp, + const string& rep_chr_name, + const ESet& editset) +{ + for(size_t i = 0; i < editset.size(); i++) { + const Edit& ed = editset[i]; + + if(ed.isMismatch()) { + fp << "rps" << ed.snpID; + fp << "\t" << "single"; + fp << "\t" << rep_chr_name; + fp << "\t" << ed.pos; + fp << "\t" << ed.qchr; + fp << endl; + } else { + assert(false); + } + } +} + + +template +void RepeatBuilder::saveFile(const RepeatParameter& rp) +{ + saveRepeats(rp); +} + +bool RB_RepeatManager::checkRedundant(const RepeatParameter& rp, + const map& repeat_map, + const EList& positions, + EList& to_remove) const +{ + to_remove.clear(); + bool replace = false; + for(size_t i = 0; i < positions.size(); i++) { + TIndexOffU seed_pos = positions[i]; + Range seed_range(seed_pos + rp.seed_len, 0); + map >::const_iterator it = range_to_repeats_.upper_bound(seed_range); + if(it == range_to_repeats_.end()) + continue; + + for(map >::const_reverse_iterator rit(it); rit != range_to_repeats_.rend(); rit++) { + Range repeat_range = rit->first; + if(repeat_range.first + rp.max_repeat_len <= seed_pos) + break; + + const EList& repeat_ids = rit->second; + assert_gt(repeat_ids.size(), 0); + for(size_t r = 0; r < repeat_ids.size(); r++) { + size_t repeat_id = repeat_ids[r]; + size_t idx = to_remove.bsearchLoBound(repeat_id); + if(idx < to_remove.size() && to_remove[idx] == repeat_id) + continue; + + bool overlap = seed_pos < repeat_range.second && seed_pos + rp.seed_len > repeat_range.first; + if(!overlap) + continue; + + map::const_iterator it2 = repeat_map.find(repeat_id); + assert(it2 != repeat_map.end()); + const RB_Repeat& repeat = *(it2->second); + const EList& allele_ranges = repeat.seed_ranges(); + size_t num_contain = 0, num_overlap = 0, num_close = 0, num_overlap_bp = 0; + size_t p = 0, p2 = 0; + while(p < positions.size() && p2 < allele_ranges.size()) { + RB_AlleleCoord range; + range.left = positions[p]; + range.right = positions[p] + rp.seed_len; + RB_AlleleCoord range2 = allele_ranges[p2]; + if(range2.contain(range)) { + num_contain++; + num_overlap_bp += rp.seed_len; + } else { + TIndexOffU overlap = range2.overlap_len(range); + if(overlap > 0) { + num_overlap++; + num_overlap_bp += (range2.right - range.left); + } else if(range.right + 10 > range2.left && range2.right + 10 > range.left) { + num_close++; + } + } + if(range.right <= range2.right) p++; + else p2++; + } + + // if the number of matches is >= 90% of positions in the smaller group + if((num_contain + num_overlap) * 10 + num_close * 8 >= min(positions.size(), allele_ranges.size()) * 9) { + if(positions.size() <= allele_ranges.size()) { + return true; + } else { + replace = true; + to_remove.push_back(repeat_id); + to_remove.sort(); + } + } + } + } + + // DK - check this out + if(replace) + break; + } + return false; +} + +void RB_RepeatManager::addRepeat(const RB_Repeat* repeat) +{ + const EList& allele_ranges = repeat->seed_ranges(); + for(size_t i = 0; i < allele_ranges.size(); i++) { + Range allele_range(allele_ranges[i].left, allele_ranges[i].right); + addRepeat(allele_range, repeat->repeat_id()); + } +} + +void RB_RepeatManager::addRepeat(Range range, size_t repeat_id) +{ + if(range_to_repeats_.find(range) == range_to_repeats_.end()) { + range_to_repeats_[range] = EList(); + } + EList& repeat_ids = range_to_repeats_[range]; + size_t idx = repeat_ids.bsearchLoBound(repeat_id); + if(idx < repeat_ids.size() && repeat_ids[idx] == repeat_id) + return; + repeat_ids.push_back(repeat_id); + repeat_ids.sort(); +} + +void RB_RepeatManager::removeRepeat(const RB_Repeat* repeat) +{ + const EList& allele_ranges = repeat->seed_ranges(); + for(size_t p = 0; p < allele_ranges.size(); p++) { + Range range(allele_ranges[p].left, allele_ranges[p].right); + removeRepeat(range, repeat->repeat_id()); + } +} + +void RB_RepeatManager::removeRepeat(Range range, size_t repeat_id) +{ + EList& repeat_ids = range_to_repeats_[range]; + TIndexOffU idx = repeat_ids.bsearchLoBound(repeat_id); + if(idx < repeat_ids.size() && repeat_id == repeat_ids[idx]) { + repeat_ids.erase(idx); + if(repeat_ids.empty()) + range_to_repeats_.erase(range); + } +} + +void RB_RepeatManager::showInfo(const RepeatParameter& rp, + CoordHelper& coordHelper, + const map& repeat_map, + size_t num_case) const +{ + size_t count = 0; + for(map >::const_iterator it = range_to_repeats_.begin(); + it != range_to_repeats_.end(); it++) { + const Range& range = it->first; + if(range.second - range.first < rp.min_repeat_len) continue; + map >::const_iterator jt = it; jt++; + for(; jt != range_to_repeats_.end(); jt++) { + const Range& range2 = jt->first; + if(range2.second - range2.first < rp.min_repeat_len) continue; + if(range.second <= range2.first) + break; + if(count < num_case) { + cerr << "range (" << range.first << ", " << range.second << ") vs. range2 ("; + cerr << range2.first << ", " << range2.second << ")" << endl; + for(size_t i = 0; i < it->second.size(); i++) { + cerr << "\t1 " << it->second[i] << endl; + const RB_Repeat* repeat = repeat_map.find(it->second[i])->second; + repeat->showInfo(rp, coordHelper); + } + for(size_t i = 0; i < jt->second.size(); i++) { + cerr << "\t2 " << jt->second[i] << endl; + const RB_Repeat* repeat = repeat_map.find(jt->second[i])->second; + repeat->showInfo(rp, coordHelper); + } + cerr << endl << endl; + } + count++; + } + } + cerr << "ShowInfo - count: " << count << endl; +} + +template +void RepeatBuilder::reassignSeeds(const RepeatParameter& rp, + size_t repeat_bid, // repeat begin id + size_t repeat_eid, // repeat end id + EList& seeds) +{ + assert_lt(repeat_bid, repeat_eid); + EList updated; + updated.resizeExact(repeat_eid - repeat_bid); + updated.fillZero(); + + string seq; + for(size_t s = 0; s < seeds.size(); s++) { + SeedExt& seed = seeds[s]; + size_t max_repeat_id = repeat_bid; + size_t max_ext_len = 0; + for(size_t i = repeat_bid; i < repeat_eid; i++) { + map::iterator it = repeat_map_.find(i); + assert(it != repeat_map_.end()); + const RB_Repeat& repeat = *(it->second); + const string& consensus = repeat.consensus(); + const size_t ext_len = (consensus.length() - rp.seed_len) / 2; + + if(seed.bound.first + ext_len > seed.pos.first) + continue; + if(seed.pos.second + ext_len > seed.bound.second) + continue; + + getString(s_, seed.pos.first - ext_len, rp.seed_len + ext_len * 2, seq); + assert_eq(consensus.length(), seq.length()); + size_t tmp_ext_len = 0; + for(size_t j = 0; j < ext_len; j++, tmp_ext_len++) { + if(seq[ext_len - j - 1] != consensus[ext_len - j - 1] || + seq[ext_len + rp.seed_len + j] != consensus[ext_len + rp.seed_len + j]) { + break; + } + } + + if(tmp_ext_len > max_ext_len) { + max_repeat_id = i; + max_ext_len = tmp_ext_len; + } + } + if(rp.seed_len + max_ext_len * 2 >= rp.min_repeat_len) { + seed.pos.first -= max_ext_len; + seed.pos.second += max_ext_len; + map::iterator it = repeat_map_.find(max_repeat_id); + assert(it != repeat_map_.end()); + RB_Repeat& repeat = *(it->second); + const string& consensus = repeat.consensus(); + const size_t ext_len = (consensus.length() - rp.seed_len) / 2; + assert_leq(max_ext_len, ext_len); + seed.consensus_pos.first = ext_len - max_ext_len; + seed.consensus_pos.second = consensus.length() - (ext_len - max_ext_len); + assert_leq(seed.consensus_pos.second - seed.consensus_pos.first, consensus.length()); + repeat.addSeed(seed); + updated[max_repeat_id - repeat_bid] = true; + } + } + + for(size_t i = repeat_bid; i < repeat_eid; i++) { + if(!updated[i - repeat_bid]) + continue; + map::iterator it = repeat_map_.find(i); + assert(it != repeat_map_.end()); + RB_Repeat& repeat = *(it->second); + repeat.update(); + } +} + +/** + * TODO + * @brief + * + * @param rpt_seq + * @param rpt_range + */ +template +void RepeatBuilder::addRepeatGroup(const RepeatParameter& rp, + size_t& repeat_id, + RB_RepeatManager& repeat_manager, + const RB_RepeatBase& repeatBase, + ostream& fp) +{ + RB_Repeat* repeat = new RB_Repeat; + repeat->repeat_id(repeat_id); + repeat->init(rp, + s_, + coordHelper_, + subSA_, + repeatBase); + + assert(repeat_map_.find(repeat->repeat_id()) == repeat_map_.end()); + repeat_map_[repeat->repeat_id()] = repeat; + repeat_id++; +} + +template +bool RepeatBuilder::checkSequenceMergeable(const string& ref, + const string& read, + EList& edits, + Coord& coord, + TIndexOffU rpt_len, + TIndexOffU max_edit) +{ + size_t max_matchlen = 0; + EList ed; + + string pad; + swaligner_.makePadString(ref, read, pad, 5); + + string ref2 = pad + ref; + string read2 = pad + read; + + swaligner_.alignStrings(ref2, read2, ed, coord); + + // match should start from pad string + if(coord.off() != 0) { + return false; + } + + // no edits on pad string + if(ed.size() > 0 && ed[0].pos < pad.length()) { + return false; + } + + size_t left = pad.length(); + size_t right = left + read.length(); + + edits.clear(); + edits.reserveExact(ed.size()); + for(size_t i = 0; i < ed.size(); i++) { + if(ed[i].pos >= left && ed[i].pos <= right) { + edits.push_back(ed[i]); + edits.back().pos -= left; + } + } + + max_matchlen = getMaxMatchLen(edits, read.length()); + +#ifdef DEBUGLOG + { + cerr << "After pad removed" << endl; + BTDnaString btread; + btread.install(read.c_str(), true); + Edit::print(cerr, edits); cerr << endl; + Edit::printQAlign(cerr, btread, edits); + } +#endif + + return (max_matchlen >= rpt_len); +} + + +template +void RepeatBuilder::saveRepeats(const RepeatParameter &rp) +{ + ios_base::openmode mode = ios_base::out; + if(rp.append_result) { + mode |= ios_base::app; + } else { + mode |= ios_base::trunc; + } + // Generate SNPs + size_t i = 0; + for(map::iterator it = repeat_map_.begin(); it != repeat_map_.end(); it++, i++) { + RB_Repeat& repeat = *(it->second); + if(!repeat.satisfy(rp)) + continue; + + // for each repeats + repeat.generateSNPs(rp, s_, i); + } + + // save snp, consensus sequenuce, info + string snp_fname = filename_ + ".rep.snp"; + string info_fname = filename_ + ".rep.info"; + string hapl_fname = filename_ + ".rep.haplotype"; + + ofstream snp_fp(snp_fname.c_str(), mode); + ofstream info_fp(info_fname.c_str(), mode); + ofstream hapl_fp(hapl_fname.c_str(), mode); + + const string repName = "rep" + to_string(rp.min_repeat_len) + "-" + to_string(rp.max_repeat_len); + + i = 0; + TIndexOffU consensus_baseoff = 0; + TIndexOffU snp_id_base = 0; + TIndexOffU hapl_id_base = 0; + for(map::iterator it = repeat_map_.begin(); it != repeat_map_.end(); it++, i++) { + RB_Repeat& repeat = *(it->second); + if(!repeat.satisfy(rp)) + continue; + + // for each repeats + repeat.saveSNPs(snp_fp, i, snp_id_base); + saveAlleles(rp, + repName, + repeat, + info_fp, + hapl_fp, + i, + hapl_id_base, + consensus_baseoff); + + consensus_baseoff += repeat.consensus().length(); + } + + snp_fp.close(); + info_fp.close(); + hapl_fp.close(); + + // save all consensus sequence + saveConsensus(rp, repName); +} + +template +void RepeatBuilder::saveConsensus(const RepeatParameter &rp, + const string& repName) { + ios_base::openmode mode = ios_base::out; + if(rp.append_result) { + mode |= ios_base::app; + } else { + mode |= ios_base::trunc; + } + + string fa_fname = filename_ + ".rep.fa"; + ofstream fa_fp(fa_fname.c_str(), mode); + + fa_fp << ">" << repName << endl; + + size_t oskip = 0; + for(map::iterator it = repeat_map_.begin(); it != repeat_map_.end(); it++) { + RB_Repeat& repeat = *(it->second); + if(!repeat.satisfy(rp)) + continue; + + // for each repeats + const string& constr = repeat.consensus(); + size_t si = 0; + size_t constr_len = constr.length(); + while(si < constr_len) { + size_t out_len = std::min((size_t)(output_width - oskip), (size_t)(constr_len - si)); + fa_fp << constr.substr(si, out_len); + + if((oskip + out_len) == output_width) { + fa_fp << endl; + oskip = 0; + } else { + // last line + oskip = oskip + out_len; + } + + si += out_len; + } + } + if(oskip) { + fa_fp << endl; + } + + fa_fp.close(); +} + +template +void RepeatBuilder::saveAlleles( + const RepeatParameter& rp, + const string& repName, + RB_Repeat& repeat, + ofstream& fp, + ofstream& hapl_fp, + TIndexOffU grp_id, + TIndexOffU& hapl_id_base, + TIndexOffU baseoff) +{ + const EList& seeds = repeat.seeds(); + EList haplo_lists; + Range range(0, seeds.size()); + + int allele_id = 0; + + for(size_t sb = range.first; sb < range.second;) { + size_t se = sb + 1; + for(; se < range.second; se++) { + if(!(SeedExt::isSameConsensus(seeds[sb], seeds[se]) + && SeedExt::isSameSNPs(seeds[sb], seeds[se]) + && seeds[sb].aligned == seeds[se].aligned)) { + break; + } + } + + if(!seeds[sb].aligned) { + sb = se; + continue; + } + + if(seeds[sb].getLength() < rp.min_repeat_len) { + sb = se; + continue; + } + + // [sb, se) are same alleles + writeAllele(grp_id, allele_id, Range(sb, se), + repName, baseoff, seeds, fp); + generateHaploType(Range(sb, se), seeds, haplo_lists); + + allele_id++; + sb = se; + } + + // sort HaploType List by pos + haplo_lists.sort(); + writeHaploType(haplo_lists, seeds, hapl_id_base, repName, hapl_fp); +} + +template +void RepeatBuilder::generateHaploType(Range range, const EList &seeds, EList &haplo_list) +{ + const EList& snps = seeds[range.first].snps; + if(snps.size() == 0) + return; + + // right-most position + TIndexOffU max_right_pos = seeds[range.first].consensus_pos.second - 1; + +#ifndef NDEBUG + for(size_t i = 0; i < snps.size(); i++) { + assert_leq(snps[i]->pos, max_right_pos); + } +#endif + + // create haplotypes of at least 16 bp long to prevent combinations of SNPs + // break a list of SNPs into several haplotypes if a SNP is far from the next SNP in the list + const TIndexOffU min_ht_len = 16; + size_t eb = 0, ee = 1; + while(ee < snps.size() + 1) { + if(ee == snps.size() || + snps[eb]->pos + (min_ht_len << 1) < snps[ee]->pos) { + TIndexOffU left_pos = snps[eb]->pos; + TIndexOffU right_pos = snps[ee-1]->pos; + + if(snps[ee-1]->type == EDIT_TYPE_READ_GAP) { + right_pos += snps[ee-1]->len; + } + if(left_pos + min_ht_len - 1 > right_pos) { + right_pos = left_pos + min_ht_len - 1; + } + right_pos = min(max_right_pos, right_pos); + assert_leq(left_pos, right_pos); + + SeedHP seedHP; + + seedHP.range.first = left_pos; + seedHP.range.second = right_pos; + for(size_t i = eb; i < ee; i++) { + string snp_ids = "rps" + to_string(snps[i]->id); + seedHP.snpIDs.push_back(snp_ids); + } + + // Add to haplo_list + bool found = false; + for(size_t i = 0; i < haplo_list.size(); i++) { + if(haplo_list[i] == seedHP) { + found = true; + break; + } + } + if(!found) { + // Add + haplo_list.push_back(seedHP); + } + + eb = ee; + } + ee++; + } +} + + +void RB_SubSA::init(TIndexOffU sa_size, + TIndexOffU seed_len, + TIndexOffU seed_count) +{ + assert_gt(sa_size, 1); + sa_size_ = sa_size; + assert_gt(seed_len, 0); + seed_len_ = seed_len; + assert_gt(seed_count, 1); + seed_count_ = seed_count; + temp_suffixes_.clear(); + + repeat_list_.clear(); + repeat_index_.clear(); +} + +template +void RB_SubSA::push_back(const TStr& s, + CoordHelper& coordHelper, + TIndexOffU saElt, + bool lastInput) +{ + if(saElt + seed_len() <= coordHelper.getEnd(saElt)) { + if(seed_count_ == 1) { + repeat_list_.push_back(saElt); + } else { + assert_gt(seed_count_, 1); + + if(temp_suffixes_.empty()) { + temp_suffixes_.push_back(saElt); + return; + } + + TIndexOffU prev_saElt = temp_suffixes_.back(); + + // calculate common prefix length between two text. + // text1 is started from prev_saElt and text2 is started from saElt + bool same = isSameSequenceUpto(s, + coordHelper, + prev_saElt, + saElt, + seed_len_); + + if(same) { + temp_suffixes_.push_back(saElt); + } + + if(!same || lastInput) { + if(temp_suffixes_.size() >= seed_count_) { + repeat_index_.push_back(repeat_list_.size()); + temp_suffixes_.sort(); + for(size_t pi = 0; pi < temp_suffixes_.size(); pi++) { + repeat_list_.push_back(temp_suffixes_[pi]); + } + } + temp_suffixes_.clear(); + if(!lastInput) { + temp_suffixes_.push_back(saElt); + } else { + temp_suffixes_.nullify(); + } + } + } + } + + if(lastInput) { + size_t bit = sizeof(uint32_t) * 8; + size_t num = (repeat_list_.size() + bit - 1) / bit; + done_.resizeExact(num); + done_.fillZero(); + +#ifndef NDEBUG + string prev_seq = ""; + for(size_t i = 0; i < repeat_index_.size(); i++) { + TIndexOffU saBegin = repeat_index_[i]; + TIndexOffU saEnd = (i + 1 < repeat_index_.size() ? repeat_index_[i+1] : repeat_list_.size()); + string seq = getString(s, repeat_list_[saBegin], seed_len()); + for(size_t j = saBegin + 1; j < saEnd; j++) { + string tmp_seq = getString(s, repeat_list_[j], seed_len()); + assert_eq(seq, tmp_seq); + } + if(prev_seq != "" ) { + assert_lt(prev_seq, seq); + } + prev_seq = seq; + } +#endif + } +} + +template +Range RB_SubSA::find(const TStr& s, + const string& seq) const +{ + TIndexOffU i = find_repeat_idx(s, seq); + if(i >= repeat_index_.size()) + return Range(0, 0); + + Range range(repeat_index_[i], + i + 1 < repeat_index_.size() ? repeat_index_[i+1] : repeat_list_.size()); + return range; +} + +template +TIndexOffU RB_SubSA::find_repeat_idx(const TStr& s, + const string& seq) const +{ + assert_eq(seq.length(), seed_len_); + + string temp; + size_t l = 0, r = repeat_index_.size(); + while(l < r) { + size_t m = (r + l) >> 1; + TIndexOffU saElt = repeat_list_[repeat_index_[m]]; + getString(s, saElt, seed_len_, temp); + if(seq == temp) { + return m; + } else if(seq < temp) { + r = m; + } else { + assert(seq > temp); + l = m + 1; + } + } + return repeat_index_.size(); +} + +void RB_SubSA::setDone(TIndexOffU off, TIndexOffU len) +{ + assert_leq(off + len, repeat_list_.size()); + const TIndexOffU bit = sizeof(uint32_t) * 8; + for(TIndexOffU i = off; i < off + len; i++) { + TIndexOffU quotient = i / bit; + TIndexOffU remainder = i % bit; + assert_lt(quotient, done_.size()); + uint32_t num = done_[quotient]; + num = num | (1 << remainder); + done_[quotient] = num; + } +} + +bool RB_SubSA::isDone(TIndexOffU off, TIndexOffU len) const +{ + assert_leq(off + len, repeat_list_.size()); + const TIndexOffU bit = sizeof(uint32_t) * 8; + for(TIndexOffU i = off; i < off + len; i++) { + TIndexOffU quotient = i / bit; + TIndexOffU remainder = i % bit; + assert_lt(quotient, done_.size()); + uint32_t num = done_[quotient]; + num = num & (1 << remainder); + if(num == 0) + return false; + } + + return true; +} + +template +void RB_SubSA::buildRepeatBase(const TStr& s, + CoordHelper& coordHelper, + const size_t max_len, + EList& repeatBases) +{ + if(repeat_index_.empty()) + return; + + done_.fillZero(); + + EList senseDominant; + senseDominant.resizeExact(repeat_index_.size()); + senseDominant.fillZero(); + + EList repeatStack; + EList > size_table; + size_table.reserveExact(repeat_index_.size() / 2 + 1); + for(size_t i = 0; i < repeat_index_.size(); i++) { + TIndexOffU begin = repeat_index_[i]; + TIndexOffU end = (i + 1 < repeat_index_.size() ? repeat_index_[i+1] : repeat_list_.size()); + assert_lt(begin, end); + EList positions; positions.reserveExact(end - begin); + for(size_t j = begin; j < end; j++) positions.push_back(repeat_list_[j]); + + if(!isSenseDominant(coordHelper, positions, seed_len_) && !threeN) + continue; + + senseDominant[i] = 1; + size_table.expand(); + size_table.back().first = end - begin; + size_table.back().second = i; + } + size_table.sort(); + + size_t bundle_count = 0; + string tmp_str; + EList tmp_ranges; tmp_ranges.resizeExact(4); + EList > tmp_sort_ranges; tmp_sort_ranges.resizeExact(4); + for(int64_t i = (int64_t)size_table.size() - 1; i >= 0; i--) { + TIndexOffU idx = size_table[i].second; + ASSERT_ONLY(TIndexOffU num = size_table[i].first); + assert_lt(idx, repeat_index_.size()); +#ifndef NDEBUG + if(idx + 1 < repeat_index_.size()) { + assert_eq(repeat_index_[idx] + num, repeat_index_[idx+1]); + } else { + assert_eq(repeat_index_[idx] + num, repeat_list_.size()); + } +#endif + TIndexOffU saBegin = repeat_index_[idx]; + if(isDone(saBegin)) { + assert(isDone(saBegin, num)); + continue; + } + + ASSERT_ONLY(size_t rb_done = 0); + assert_lt(saBegin, repeat_list_.size()); + repeatStack.push_back(idx); + while(!repeatStack.empty()) { + TIndexOffU idx = repeatStack.back(); + assert(senseDominant[idx]); + repeatStack.pop_back(); + assert_lt(idx, repeat_index_.size()); + TIndexOffU saBegin = repeat_index_[idx]; + TIndexOffU saEnd = (idx + 1 < repeat_index_.size() ? repeat_index_[idx + 1] : repeat_list_.size()); + if(isDone(saBegin)) { + assert(isDone(saBegin, saEnd - saBegin)); + continue; + } + + TIndexOffU saElt = repeat_list_[saBegin]; + size_t ri = repeatBases.size(); + repeatBases.expand(); + repeatBases.back().seq = getString(s, saElt, seed_len_); + repeatBases.back().nodes.clear(); + repeatBases.back().nodes.push_back(idx); + ASSERT_ONLY(rb_done++); + setDone(saBegin, saEnd - saBegin); + bool left = true; + while(repeatBases[ri].seq.length() <= max_len) { + if(left) { + tmp_str = "N"; + tmp_str += repeatBases[ri].seq.substr(0, seed_len_ - 1); + } else { + tmp_str = repeatBases[ri].seq.substr(repeatBases[ri].seq.length() - seed_len_ + 1, seed_len_ - 1); + tmp_str.push_back('N'); + } + assert_eq(tmp_str.length(), seed_len_); + + for(size_t c = 0; c < 4; c++) { + if(left) tmp_str[0] = "ACGT"[c]; + else tmp_str.back() = "ACGT"[c]; + assert_eq(tmp_str.length(), seed_len_); + TIndexOffU idx = find_repeat_idx(s, tmp_str); + size_t num = 0; + if(idx < repeat_index_.size()) { + if(idx + 1 < repeat_index_.size()) { + num = repeat_index_[idx+1] - repeat_index_[idx]; + } else { + num = repeat_list_.size() - repeat_index_[idx]; + } + } + tmp_ranges[c].first = idx; + tmp_ranges[c].second = num; + assert(num == 0 || num >= seed_count_); + tmp_sort_ranges[c].first = num; + tmp_sort_ranges[c].second = c; + if(idx == repeat_index_.size() || + isDone(repeat_index_[idx]) || + !senseDominant[idx]) { +#ifndef NDEBUG + if(idx < repeat_index_.size()) { + assert(isDone(repeat_index_[idx], num) || + !senseDominant[idx]); + } +#endif + tmp_sort_ranges[c].first = 0; + tmp_ranges[c].second = 0; + } + } + tmp_sort_ranges.sort(); + if(tmp_sort_ranges[3].first < seed_count_) { + if(left) { + left = false; + continue; + } else { + break; + } + } + + for(size_t cc = 0; cc < 3; cc++) { + assert_leq(tmp_sort_ranges[cc].first, tmp_sort_ranges[cc+1].first); + if(tmp_sort_ranges[cc].first < seed_count_) + continue; + + size_t c = tmp_sort_ranges[cc].second; + repeatStack.push_back(tmp_ranges[c].first); + } + + size_t c = tmp_sort_ranges[3].second; + if(repeatBases[ri].seq.length() >= max_len) { + assert_eq(repeatBases[ri].seq.length(), max_len); + TIndexOffU idx = tmp_ranges[c].first; + assert(!isDone(repeat_index_[idx])); + repeatStack.push_back(idx); + if(left) { + left = false; + continue; + } else { + break; + } + } else { + TIndexOffU idx = tmp_ranges[c].first; + TIndexOffU num = tmp_ranges[c].second; + setDone(repeat_index_[idx], num); + if(left) { + repeatBases[ri].seq.insert(0, 1, "ACGT"[c]); + repeatBases[ri].nodes.insert(idx, 0); + } else { + repeatBases[ri].seq.push_back("ACGT"[c]); + repeatBases[ri].nodes.push_back(idx); + } + } + } + } + + assert_gt(rb_done, 0); + bundle_count++; + } + + cerr << "Bundle count: " << bundle_count << endl; + +#ifndef NDEBUG + { + set idx_set; + for(size_t i = 0; i < repeatBases.size(); i++) { + const RB_RepeatBase& repeatBase = repeatBases[i]; + for(size_t j = 0; j < repeatBase.nodes.size(); j++) { + assert(idx_set.find(repeatBase.nodes[j]) == idx_set.end()); + idx_set.insert(repeatBase.nodes[j]); + } + } + } +#endif + +#if 0 + { + EList > kmer_table; + string query; + size_t total = 0, match = 0; + size_t interval = 1; + if(repeat_index_.size() >= 10000) { + interval = repeat_index_.size() / 10000; + } + EList positions; + for(size_t i = 0; i < repeat_index_.size(); i += interval) { + TIndexOffU saElt_idx = repeat_index_[i]; + TIndexOffU saElt_idx_end = (i + 1 < repeat_index_.size() ? repeat_index_[i+1] : repeat_list_.size()); + positions.clear(); + for(size_t j = saElt_idx; j < saElt_idx_end; j++) { + positions.push_back(repeat_list_[j]); +#ifndef NDEBUG + if(j > saElt_idx) { + TIndexOffU lcp_len = getLCP(s, + coordHelper, + positions[0], + positions.back(), + seed_len_); + assert_geq(lcp_len, seed_len_); + } + + TIndexOffU saElt = repeat_list_[j]; + TIndexOffU start = coordHelper.getStart(saElt); + TIndexOffU start2 = coordHelper.getStart(saElt + seed_len_ - 1); + assert_eq(start, start2); +#endif + } + + TIndexOffU saElt = repeat_list_[saElt_idx]; + size_t true_count = saElt_idx_end - saElt_idx; + getString(s, saElt, seed_len_, query); + + total++; + + size_t count = 0; + for(size_t r = 0; r < repeatBases.size(); r++) { + const RB_RepeatBase& repeat = repeatBases[r]; + int pos = repeat.seq.find(query); + if(pos != string::npos) { + for(size_t j = 0; j < repeat.nodes.size(); j++) { + TIndexOffU _node = repeat.nodes[j]; + TIndexOffU _saElt_idx = repeat_index_[_node]; + TIndexOffU _saElt_idx_end = (_node + 1 < repeat_index_.size() ? repeat_index_[_node+1] : repeat_list_.size()); + TIndexOffU _saElt = repeat_list_[_saElt_idx]; + string seq = getString(s, _saElt, seed_len()); + if(query == seq) { + count += (_saElt_idx_end - _saElt_idx); + } + } + } + } + + size_t rc_count = 0; + string rc_query = reverse_complement(query); + for(size_t r = 0; r < repeatBases.size(); r++) { + const RB_RepeatBase& repeat = repeatBases[r]; + int pos = repeat.seq.find(rc_query); + if(pos != string::npos) { + for(size_t j = 0; j < repeat.nodes.size(); j++) { + TIndexOffU _node = repeat.nodes[j]; + TIndexOffU _saElt_idx = repeat_index_[_node]; + TIndexOffU _saElt_idx_end = (_node + 1 < repeat_index_.size() ? repeat_index_[_node+1] : repeat_list_.size()); + TIndexOffU _saElt = repeat_list_[_saElt_idx]; + string seq = getString(s, _saElt, seed_len()); + if(rc_query == seq) { + rc_count += (_saElt_idx_end - _saElt_idx); + } + } + } + } + + if(count == true_count || rc_count == true_count) { + match++; + } else if(total - match <= 10) { + cerr << "query: " << query << endl; + cerr << "true count: " << true_count << endl; + cerr << "found count: " << count << endl; + cerr << "rc found count: " << rc_count << endl; + cerr << endl; + } + } + + cerr << "RB_SubSA: sanity check: " << match << " passed (out of " << total << ")" << endl << endl; + } +#endif +} + + +#define write_fp(x) fp.write((const char *)&(x), sizeof((x))) + +void RB_SubSA::writeFile(ofstream& fp) +{ + write_fp(sa_size_); + write_fp(seed_len_); + write_fp(seed_count_); + + size_t sz = temp_suffixes_.size(); + write_fp(sz); + for(size_t i = 0; i < sz; i++) { + write_fp(temp_suffixes_[i]); + } + + sz = repeat_index_.size(); + write_fp(sz); + for(size_t i = 0; i < sz; i++) { + write_fp(repeat_index_[i]); + } + + sz = done_.size(); + write_fp(sz); + for(size_t i = 0; i < sz; i++) { + write_fp(done_[i]); + } +} + +#define read_fp(x) fp.read((char *)&(x), sizeof((x))) + +void RB_SubSA::readFile(ifstream& fp) +{ + TIndexOffU val; + + read_fp(val); + rt_assert_eq(val, sa_size_); + + read_fp(val); + rt_assert_eq(val, seed_len_); + + read_fp(val); + rt_assert_eq(val, seed_count_); + + size_t sz; + read_fp(sz); + temp_suffixes_.resizeExact(sz); + for(size_t i = 0; i < sz; i++) { + read_fp(temp_suffixes_[i]); + } + + size_t val_sz; + + // repeat_index_ + read_fp(val_sz); + repeat_index_.resizeExact(val_sz); + for(size_t i = 0; i < val_sz; i++) { + read_fp(repeat_index_[i]); + } + + // done + read_fp(val_sz); + done_.resizeExact(val_sz); + for(size_t i = 0; i < val_sz; i++) { + read_fp(done_[i]); + } + +} + + +/****************************/ +template class RepeatBuilder >; +template void dump_tstr(const SString& ); +template bool compareRepeatCoordByJoinedOff(const RepeatCoord& , const RepeatCoord&); diff --git a/repeat_builder.h b/repeat_builder.h new file mode 100644 index 0000000..092f33d --- /dev/null +++ b/repeat_builder.h @@ -0,0 +1,964 @@ + /* + * Copyright 2018, Chanhee Park and Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#ifndef __REPEAT_BUILDER_H__ +#define __REPEAT_BUILDER_H__ + +#include +#include +#include +#include +#include "assert_helpers.h" +#include "word_io.h" +#include "mem_ids.h" +#include "ref_coord.h" +#include "ref_read.h" +#include "edit.h" +#include "ds.h" +#include "repeat.h" +#include "blockwise_sa.h" +#include "simple_func.h" +#include "scoring.h" +#include "aligner_sw.h" +#include "bit_packed_array.h" + +//#define DEBUGLOG + +using namespace std; + +/** + * Encapsulates repeat parameters. + */ +class RepeatParameter { +public: + TIndexOffU seed_len; // seed length + TIndexOffU seed_count; // seed count + TIndexOffU seed_mm; // maximum edit distance allowed during initial seed extension + TIndexOffU repeat_count; // repeat count + TIndexOffU min_repeat_len; // minimum repeat length + TIndexOffU max_repeat_len; // maximum repeat length + TIndexOffU max_edit; // maximum edit distance allowed + bool symmetric_extend; // extend symmetrically + TIndexOffU extend_unit_len; // extend seeds no longer than this length at a time + bool append_result; +}; + +typedef pair Range; + +struct Fragments { + bool contain(TIndexOffU pos) { + if (pos >= joinedOff && pos < (joinedOff + length)) { + return true; + } + return false; + } + + TIndexOffU joinedOff; // index within joined text + TIndexOffU length; + + int frag_id; + int seq_id; + TIndexOffU seqOff; // index within sequence + bool first; +}; + +class CoordHelper { +public: + CoordHelper(TIndexOffU length, + TIndexOffU forward_length, + const EList& szs, + const EList& ref_names); + ~CoordHelper(); + int mapJoinedOffToSeq(TIndexOffU joined_pos); + int getGenomeCoord(TIndexOffU joined_pos, + string& chr_name, + TIndexOffU& pos_in_chr); + + TIndexOffU getEnd(TIndexOffU e); + TIndexOffU getStart(TIndexOffU e); + TIndexOffU forward_length() const { return forward_length_; } + TIndexOffU length() const { return length_; } + +private: + void buildNames(); + void buildJoinedFragment(); + +private: + TIndexOffU length_; + TIndexOffU forward_length_; + EList szs_; + EList ref_names_; + EList ref_namelines_; + + // mapping info from joined string to genome + EList fraglist_; + + // Fragments Cache +#define CACHE_SIZE_JOINEDFRG 10 + Fragments cached_[CACHE_SIZE_JOINEDFRG]; + int num_cached_ = 0; + int victim_ = 0; /* round-robin */ +}; + +struct SeedHP { + bool operator==(const SeedHP &rhs) const { + return range == rhs.range && + snpIDs == rhs.snpIDs; + } + + bool operator!=(const SeedHP &rhs) const { + return !(rhs == *this); + } + + bool operator<(const SeedHP &rhs) const { + return range < rhs.range; + } + + bool operator>(const SeedHP &rhs) const { + return rhs < *this; + } + + bool operator<=(const SeedHP &rhs) const { + return !(rhs < *this); + } + + bool operator>=(const SeedHP &rhs) const { + return !(*this < rhs); + } + + Range range; + EList snpIDs; +}; + +struct SeedSNP { + int type; // EDIT_TYPE_MM, EDIT_TYPE_READ_GAP, EDIT_TYPE_REF_GAP + TIndexOffU pos; + size_t len; + string base; // valid when ty = MM, REF_GAP + TIndexOffU id; + + SeedSNP() {} + + void init(int ty, TIndexOffU po, size_t ln, string bs) { + type = ty; + pos = po; + len = ln; + base = bs; + } + void init(int ty, TIndexOffU po, size_t ln, char bs) { + type = ty; + pos = po; + len = ln; + base.assign(1, bs); + } + + friend ostream &operator<<(ostream &os, const SeedSNP &snp) { + + if(snp.type == EDIT_TYPE_MM) { + os << "single"; + } else if(snp.type == EDIT_TYPE_REF_GAP) { + os << "insertion"; + } else if(snp.type == EDIT_TYPE_READ_GAP) { + os << "deletion"; + } + os << " "; + os << snp.pos << " "; + + if(snp.type == EDIT_TYPE_MM || snp.type == EDIT_TYPE_REF_GAP) { + os << snp.base; + } else if(snp.type == EDIT_TYPE_READ_GAP) { + os << snp.len; + } + return os; + } + + bool operator==(const SeedSNP &rhs) const { + return type == rhs.type && + pos == rhs.pos && + len == rhs.len && + base == rhs.base; + } + + bool operator!=(const SeedSNP &rhs) const { + return !(rhs == *this); + } + + static bool cmpSeedSNPByPos( SeedSNP * const& a, SeedSNP * const& b) { + return a->pos < b->pos; + } + +}; + +class SeedExt { +public: + SeedExt() { + reset(); + }; + + void reset() { + done = false; + curr_ext_len = 0; + ed = total_ed = 0; + orig_pos.first = 0; + orig_pos.second = 0; + pos.first = 0; + pos.second = 0; + bound.first = 0; + bound.second = 0; + consensus_pos.first = 0; + consensus_pos.second = 0; + left_gaps.clear(); + right_gaps.clear(); + aligned = true; + }; + + TIndexOffU getLeftExtLength() const { + assert_leq(pos.first, orig_pos.first); + TIndexOffU len = orig_pos.first - pos.first; + return len; + } + + TIndexOffU getRightExtLength() const { + assert_geq(pos.second, orig_pos.second); + return pos.second - orig_pos.second; + } + + TIndexOffU getLength() const { + TIndexOffU len = orig_pos.second - orig_pos.first; + len += getLeftExtLength(); + len += getRightExtLength(); + return len; + } + + template + void getLeftExtString(const TStr& s, string& seq) + { + seq.clear(); + seq = getString(s, pos.first, orig_pos.first - pos.first); + } + + template + void getRightExtString(const TStr& s, string& seq) + { + seq.clear(); + seq = getString(s, orig_pos.second, pos.second - orig_pos.second); + } + + template + void getExtendedSeedSequence(const TStr& s, + string& seq) const; + + template + void generateSNPs(const TStr& s, const string& consensus, EList& repeat_snps); + + static bool isSameConsensus(const SeedExt& a, const SeedExt& b) { + return (a.consensus_pos == b.consensus_pos); + //&& (a.getLength() == b.getLength()); + } + + static bool isSameSNPs(const SeedExt& a, const SeedExt& b) { + const EList& a_edit = a.snps; + const EList& b_edit = b.snps; + + if(a_edit.size() != b_edit.size()) { + return false; + } + + for(size_t i = 0; i < a_edit.size(); i++) { + if(!(*a_edit[i] == *b_edit[i])) { + return false; + } + } + return true; + } + + static bool isSameAllele(const SeedExt& a, const SeedExt& b) { + const EList& a_edit = a.edits; + const EList& b_edit = b.edits; + + if(a_edit.size() != b_edit.size()) { + return false; + } + + for(size_t i = 0; i < a_edit.size(); i++) { + if(!(a_edit[i] == b_edit[i])) { + return false; + } + } + return true; + } + + static bool sort_by_edits(const SeedExt& a, const SeedExt& b) { + const EList& a_edit = a.edits; + const EList& b_edit = b.edits; + + size_t i = 0; + + while((i < a_edit.size()) + && (i < b_edit.size())) { + + if(!(a_edit[i] == b_edit[i])) { + if(a_edit[i] < b_edit[i]) { + return true; + } else { + return false; + } + } + + i++; + } + + if(a_edit.size() < b_edit.size()) { + return true; + } + + if((a_edit.size() == b_edit.size()) + && (a.pos.first < b.pos.first)) { + return true; + } + return false; + } + + void generateEdits(const string& consensus_merged, const string& seed_ext) + { + size_t ed_done = 0; + size_t ext_len = getLength(); + assert_eq(ext_len, seed_ext.length()); + + edits.clear(); + + for(size_t i = 0; i < ext_len && ed_done < total_ed; i++) { + char con_base = consensus_merged[consensus_pos.first + i]; + char seed_base = seed_ext[i]; + + if (con_base != seed_base) { + edits.expand(); + edits.back().init(consensus_pos.first + i, + con_base, seed_base, EDIT_TYPE_MM); + ed_done++; + } + } + } + + Range getExtendedRange(size_t consensus_len) const + { + assert_leq(consensus_pos.second, consensus_len); + Range range; + range.first = (pos.first < consensus_pos.first ? 0 : pos.first - consensus_pos.first); + range.second = pos.second + (consensus_len - consensus_pos.second); + return range; + } + +#ifndef NDEBUG + bool valid() const { + assert_leq(consensus_pos.first, consensus_pos.second); + TIndexOffU constr_len = consensus_pos.second - consensus_pos.first; + TIndexOffU allele_len = getLength(); + + TIndexOffU cur_off = 0; + for(size_t i = 0; i < left_gaps.size(); i++) { + assert_geq(left_gaps[i].first, cur_off); + cur_off = left_gaps[i].first; + int gap_len = left_gaps[i].second; + assert_neq(gap_len, 0); + if(gap_len > 0) { // deletion + allele_len += gap_len; + } else { + allele_len += gap_len; + cur_off += (-gap_len); + } + } + cur_off = 0; + for(size_t i = 0; i < right_gaps.size(); i++) { + assert_geq(right_gaps[i].first, cur_off); + cur_off = right_gaps[i].first; + int gap_len = right_gaps[i].second; + assert_neq(gap_len, 0); + if(gap_len > 0) { // deletion + allele_len += gap_len; + } else { + allele_len += gap_len; + cur_off += (-gap_len); + } + } + assert_eq(constr_len, allele_len); + return true; + } +#endif + +public: + // seed extended position [first, second) + pair orig_pos; + pair pos; + + // extension bound. the seed must be placed on same fragment + // [first, second) + pair bound; + + // positions relative to consensus sequence + pair consensus_pos; + + // a list of gaps (deletions and insertions) in both directions + // offsets from seed's left and right ("pos" above) + // positive and negative values indicate deletions and insertions, resp. + EList > left_gaps; + EList > right_gaps; + + uint32_t ed; // edit distance + uint32_t total_ed; // total edit distance + bool done; // done flag + uint32_t curr_ext_len; // + + bool aligned; + + EList edits; // edits w.r.t. consensus_merged + + EList snps; +}; + +class RB_AlleleCoord { +public: + RB_AlleleCoord() : + left(0), + right(0), + idx(0) + {} + + RB_AlleleCoord(TIndexOffU l, TIndexOffU r, size_t i) : + left(l), + right(r), + idx(i) + {} + + TIndexOffU len() const { return right - left; } + + bool operator<(const RB_AlleleCoord& o) const + { + if(left != o.left) + return left < o.left; + if(right != o.right) + return right > o.right; + return false; + } + + bool contain(const RB_AlleleCoord& o, size_t relax = 0) const + { + if(o.left + relax >= left && o.right <= right + relax) + return true; + else + return false; + } + + bool contained(const RB_AlleleCoord& o) const + { + return o.contain(*this); + } + + TIndexOffU overlap_len(const RB_AlleleCoord& o) const + { + if(contain(o)) return o.len(); + else if(o.contain(*this)) return len(); + else if(left < o.right && right > o.left) { + if(left <= o.left) { + return right - o.left; + } else { + return o.right - left; + } + } + return 0; + } + +public: + TIndexOffU left; + TIndexOffU right; + size_t idx; +}; + +class RB_RepeatManager; +class RB_SWAligner; +class RB_SubSA; + +class RB_RepeatBase { +public: + string seq; + EList nodes; +}; + +class RB_Repeat { +public: + RB_Repeat() {} + ~RB_Repeat() { + if(snps_.size() > 0) { + for(size_t i = 0; i < snps_.size(); i++) { + delete snps_[i]; + } + } + } + + void repeat_id(size_t repeat_id) { repeat_id_ = repeat_id; } + size_t repeat_id() const { return repeat_id_; } + + void parent_id(size_t parent_id) { parent_id_ = parent_id; } + size_t parent_id() const { return parent_id_; } + + string& consensus() { return consensus_; } + const string& consensus() const { return consensus_; } + + EList& seeds() { return seeds_; } + const EList& seeds() const { return seeds_; } + + EList& seed_ranges() { return seed_ranges_; } + const EList& seed_ranges() const { return seed_ranges_; } + + EList& snps() { return snps_; } + const EList& snps() const { return snps_; } + + template + void init(const RepeatParameter& rp, + const TStr& s, + CoordHelper& coordHelper, + const RB_SubSA& subSA, + const RB_RepeatBase& repeatBase); + + template + void extendConsensus(const RepeatParameter& rp, + const TStr& s); + + template + void getNextRepeat(const RepeatParameter& rp, + const TStr& s, + RB_Repeat& o); + + + template + void saveSeedExtension(const RepeatParameter& rp, + const TStr& s, + CoordHelper& coordHelper, + TIndexOffU seed_grp_id, + ostream& fp, + size_t& total_repeat_seq_len, + size_t& total_allele_seq_len) const; + + void saveSNPs(ofstream& fp, + TIndexOffU grp_id, + TIndexOffU& snp_id_base); + void saveConsensus(ofstream& fp, + TIndexOffU grp_id); + + bool overlap(const RB_Repeat& o, + bool& contain, + bool& left, + size_t& seed_i, + size_t& seed_j, + bool debug = false) const; + + bool satisfy(const RepeatParameter& rp) const + { + if(consensus_.length() < rp.min_repeat_len) + return false; + if(seeds_.size() < rp.repeat_count) + return false; + if(seeds_[rp.repeat_count - 1].getLength() < rp.min_repeat_len) + return false; + return true; + } + + void reset() { + consensus_.clear(); + for(size_t i = 0; i < seeds_.size(); i++) + seeds_[i].reset(); + seeds_.clear(); + seed_ranges_.clear(); + } + + void showInfo(const RepeatParameter& rp, + CoordHelper& coordHelper) const; + + template + void generateSNPs(const RepeatParameter&, const TStr& s, TIndexOffU grp_id); + + bool self_repeat() const { return self_repeat_; } + + void update() { internal_update(); } + void addSeed(const SeedExt& seed) + { + seeds_.expand(); + seeds_.back() = seed; + } + + bool contain(TIndexOffU left, TIndexOffU right) const; + +protected: + template + void get_consensus_seq(const TStr& s, + EList& seeds, + size_t sb, + size_t se, + size_t min_left_ext, + size_t min_right_ext, + size_t max_ed, + const RepeatParameter& rp, + EList& ed_seed_nums, + EList* left_consensuses, + EList* right_consensuses) const; + + void internal_update(); + + +protected: + size_t repeat_id_; + size_t parent_id_; + string consensus_; + EList seeds_; + EList seed_ranges_; + EList snps_; + bool self_repeat_; + + static EList ca_ed_; + static EList ca_ed2_; + static string ca_s_; + static string ca_s2_; + +public: + static size_t seed_merge_tried; + static size_t seed_merged; +}; + +class RB_RepeatExt : public RB_Repeat { +public: + RB_RepeatExt() {} + ~RB_RepeatExt() {} + + template + void extendConsensus(const RepeatParameter& rp, + const TStr& s); + + float mergeable(const RB_Repeat& o) const; + + template + bool merge(const RepeatParameter& rp, + const TStr& s, + RB_SWAligner& swalginer, + const RB_RepeatExt& o, + bool contain, + size_t seed_i, + size_t seed_j, + bool debug = false); + +protected: + template + void get_consensus_seq(const TStr& s, + EList& seeds, + size_t sb, + size_t se, + size_t min_left_ext, + size_t min_right_ext, + size_t max_ed, + const RepeatParameter& rp, + EList& ed_seed_nums, + EList* left_consensuses, + EList* right_consensuses) const; + + template + bool align(const RepeatParameter& rp, + const TStr& ref, + const string& s, + const EList >& s_kmer_table, + const string& s2, + EList& offsets, + size_t k, + SeedExt& seed, + int consensus_approx_left, + int consensus_approx_right, + size_t left, + size_t right, + bool debug); + + bool isSelfRepeat(const RepeatParameter& rp, + const string& s, + const EList >& s_kmer_table, + EList& offsets, + size_t k, + bool debug); + +}; + +// check if a set of seeds are already processed +class RB_RepeatManager { +public: + size_t numCoords() const { return range_to_repeats_.size(); } + + bool checkRedundant(const RepeatParameter& rp, + const map& repeat_map, + const EList& positions, + EList& to_remove) const; + + void addRepeat(const RB_Repeat* repeat); + void addRepeat(Range range, size_t repeat_id); + void removeRepeat(const RB_Repeat* repeat); + void removeRepeat(Range range, size_t repeat_id); + +public: + void showInfo(const RepeatParameter& rp, + CoordHelper& coordHelper, + const map& repeat_map, + size_t num_case = 5) const; + +private: + map > range_to_repeats_; +}; + +class RB_SWAligner { +public: + RB_SWAligner(); + ~RB_SWAligner(); + + void init_dyn(const RepeatParameter& rp); + + int alignStrings(const string &ref, + const string &read, + EList& edits, + Coord& coord); + + void makePadString(const string& ref, + const string& read, + string& pad, + size_t len); + + void doTest(const RepeatParameter& rp, + const string& refstr, + const string& readstr); + + void doTestCase1(const string& refstr, + const string& readstr, + TIndexOffU rpt_edit); + +private: + // + SimpleFunc scoreMin_; + SimpleFunc nCeil_; + SimpleFunc penCanIntronLen_; + SimpleFunc penNoncanIntronLen_; + + Scoring *sc_; + SwAligner swa; + LinkedEList > rawEdits_; + RandomSource rnd_; +}; + +// SA Subset +class RB_SubSA { +public: + RB_SubSA() {} + ~RB_SubSA() {} + + void writeFile(ofstream& fp); + void readFile(ifstream &fp); + + void init(TIndexOffU sa_size, + TIndexOffU seed_len, + TIndexOffU seed_count); + + template + void push_back(const TStr& s, + CoordHelper& coordHelper, + TIndexOffU saElt, + bool lastInput = false); + + inline TIndexOffU seed_len() const { return seed_len_; } + inline TIndexOffU seed_count() const { return seed_count_; } + + inline size_t size() const { return repeat_list_.size(); } + TIndexOffU get(size_t idx) const { return repeat_list_[idx]; } + inline TIndexOffU operator[](size_t i) const { return get(i); } + + const EList& getRepeatIndex() const { return repeat_index_; } + + template + Range find(const TStr& s, + const string& seq) const; + + template + TIndexOffU find_repeat_idx(const TStr& s, + const string& seq) const; + + void setDone(TIndexOffU off, TIndexOffU len = 1); + bool isDone(TIndexOffU off, TIndexOffU len = 1) const; + + void dump() const + { + cerr << "seed length: " << seed_len_ << endl; + cerr << "minimum seed count: " << seed_count_ << endl; + cerr << "number of seed groups: " << repeat_index_.size() << endl; + cerr << "number of seeds: " << repeat_list_.size() << endl; + } + + size_t getMemUsage() const + { + size_t tot = 0; + + tot += repeat_list_.totalCapacityBytes(); + tot += repeat_index_.totalCapacityBytes(); + tot += done_.totalCapacityBytes(); + tot += temp_suffixes_.totalCapacityBytes(); + + return tot; + } + + template + void buildRepeatBase(const TStr& s, + CoordHelper& coordHelper, + const size_t max_len, + EList& repeatBases); + +private: + TIndexOffU sa_size_; + TIndexOffU seed_len_; + TIndexOffU seed_count_; + EList temp_suffixes_; + + // repeat index + EList repeat_list_; + EList repeat_index_; + EList done_; +}; + + +// find and write repeats +template +class RepeatBuilder { + +public: + RepeatBuilder(TStr& s, + TStr& sOriginal, + const EList& szs, + const EList& ref_names, + bool forward_only, + const string& filename); + ~RepeatBuilder(); + + +public: + void readSA(const RepeatParameter& rp, + BlockwiseSA& sa); + + void readSA(const RepeatParameter& rp, + const string& filename); + + void readSA(const RepeatParameter& rp, + const BitPackedArray& sa); + + void writeSA(const RepeatParameter& rp, + const string& filename); + + void build(const RepeatParameter& rp); + + void sortRepeatGroup(); + + void saveRepeats(const RepeatParameter& rp); + void saveConsensus(const RepeatParameter& rp, + const string& repName); + void saveFile(const RepeatParameter& rp); + + void addRepeatGroup(const RepeatParameter& rp, + size_t& repeat_id, + RB_RepeatManager& repeat_manger, + const RB_RepeatBase& repeatBase, + ostream& fp); + + void reassignSeeds(const RepeatParameter& rp, + size_t repeat_bid, + size_t repeat_eid, + EList& seeds); + + bool checkSequenceMergeable(const string& ref, + const string& read, + EList& edits, + Coord& coord, + TIndexOffU rpt_len, + TIndexOffU max_edit = 10); + + void doTest(const RepeatParameter& rp, + const string& refstr, + const string& readstr) + { + swaligner_.doTest(rp, refstr, readstr); + } + +private: + void saveAlleles(const RepeatParameter& rp, + const string& repName, + RB_Repeat& repeat, + ofstream&, + ofstream&, + TIndexOffU grp_id, + TIndexOffU&, + TIndexOffU baseoff); + + void writeAllele(TIndexOffU grp_id, + TIndexOffU allele_id, + Range range, + const string& seq_name, + TIndexOffU baseoff, + const EList& seeds, + ostream &fp); + + + void writeSNPs(ostream& fp, + const string& rep_chr_name, + const ESet& editset); + + void writeHaploType(const EList& haplo_list, + const EList& seeds, + TIndexOffU& hapl_id_base, + const string& seq_name, + ostream& fp); + + void generateHaploType(Range range, + const EList& seeds, + EList& haplo_list); + +private: + const int output_width = 60; + + TStr& s_; + TStr& sOriginal_; + bool forward_only_; + string filename_; + + RB_SubSA subSA_; + + TIndexOffU forward_length_; + + CoordHelper coordHelper_; + + // + RB_SWAligner swaligner_; + + // Seeds + EList consensus_all_; + ELList seeds_; + map repeat_map_; +}; + + +int strcmpPos(const string&, const string&, TIndexOffU&); +template void dump_tstr(TStr& s); + +#endif /* __REPEAT_BUILDER_H__ */ diff --git a/repeat_kmer.h b/repeat_kmer.h new file mode 100644 index 0000000..7969494 --- /dev/null +++ b/repeat_kmer.h @@ -0,0 +1,606 @@ + /* + * Copyright 2018, Chanhee Park and Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#ifndef __REPEAT_KMER_H__ +#define __REPEAT_KMER_H__ + +#include +#include +#include +#include +#include +#include "assert_helpers.h" +#include "word_io.h" +#include "mem_ids.h" +#include "ds.h" + +template +class RB_Minimizer { +public: + static const size_t default_w = 5; + static const size_t default_k = 31; +public: + static pair + get_minimizer(const TStr& seq, + size_t off, + size_t window, + size_t k) + { + assert_leq(k, 32); + assert_leq(off + window + k - 1, seq.length()); + pair minimizer(get_kmer(seq, off, k), off); + uint64_t kmer = minimizer.first; + for(size_t i = off + 1; i < off + window; i++) { + uint64_t next_kmer = get_next_kmer(kmer, seq[i+k-1], k); + if(minimizer_leq(next_kmer, minimizer.first)) { + minimizer.first = next_kmer; + minimizer.second = i; + } + kmer = next_kmer; + } + return minimizer; + } + + static void + get_minimizer(const TStr& seq, + size_t window, + size_t k, + EList >& minimizers) + { + assert_leq(k, 32); + assert_leq(window + k - 1, seq.length()); + + minimizers.clear(); + pair minimizer = get_minimizer(seq, 0, window, k); + minimizers.push_back(minimizer); + uint64_t kmer = get_kmer(seq, window - 1, k); + for(size_t i = 1; i + window + k - 1 <= seq.length(); i++) { + uint64_t next_kmer = get_next_kmer(kmer, seq[i+window+k-2], k); + if(minimizer.second < i) { + minimizer = get_minimizer(seq, i, window, k); + } else if(minimizer_leq(next_kmer, minimizer.first)) { + minimizer.first = next_kmer; + minimizer.second = i + window - 1; + } + minimizers.push_back(minimizer); + kmer = next_kmer; + } + +#ifndef NDEBUG + assert_eq(minimizers.size() + window + k - 2, seq.length()); + for(size_t i = 0; i + window + k - 1 <= seq.length(); i++) { + pair minimizer = get_minimizer(seq, i, window, k); + assert(minimizer == minimizers[i]); + } +#endif + } + +protected: + static bool + minimizer_leq(uint64_t kmer, uint64_t kmer2) + { +#if 1 + kmer = convert_minimizer(kmer); + kmer2 = convert_minimizer(kmer2); +#endif + return kmer <= kmer2; + } + + // Heng Li's minimap and minaisam paper, 2016 + static uint64_t convert_minimizer(uint64_t x) { + x = (~x) + (x << 21); + x = x ^ (x >> 24); + x = x + (x << 3) + (x << 8); + x = x ^ (x >> 14); + x = x + (x << 2) + (x << 4); + x = x ^ (x >> 28); + x = x + (x << 31); + return x; + } + + static uint64_t + get_kmer(const TStr& seq, + size_t offset, + size_t k) + { + assert_leq(offset + k, seq.length()); + uint64_t kmer = 0; + for(size_t i = 0; i < k; i++) { + size_t c = seq[offset + i]; + if(c > 3) c = asc2dna[c]; + kmer = (kmer << 2 | c); + } + return kmer; + } + + static uint64_t + get_next_kmer(uint64_t kmer, + size_t base, + size_t k) + { + kmer &= (((uint64_t)1 << ((k-1)*2))) - 1; + if(base > 3) base = asc2dna[base]; + kmer = (kmer << 2) | base; + return kmer; + } + + static TStr get_string(uint64_t kmer, size_t k) + { + TStr seq = ""; + for(size_t i = 0; i < k; i++) { + size_t nt = kmer & 0x3; + seq.push_back("ACGT"[nt]); + kmer >>= 2; + } + reverse(seq.begin(), seq.end()); + return seq; + } +}; + +struct RB_Alignment { + TIndexOffU pos; + TIndexOffU off; + TIndexOffU len; + + bool operator<(const RB_Alignment& o) const + { + if(pos != o.pos) + return pos < o.pos; + return len > o.len; + } +}; + +struct RB_Alignment_CMPbyLen { + bool operator()(const RB_Alignment& a, const RB_Alignment& b) + { + if(a.len != b.len) + return a.len > b.len; + return a.pos < b.pos; + } +}; + +class RB_KmerTable { +public: + RB_KmerTable() { w_ = k_ = 0; } + ~RB_KmerTable() {} + +public: + bool isIn(uint64_t kmer) const + { +#if 1 + pair key(kmer, 0); + size_t idx = kmer_table_.bsearchLoBound(key); + return idx < kmer_table_.size() && kmer_table_[idx].first == kmer; +#else + return kmers_.find(kmer) != kmers_.end(); +#endif + } + + template + bool isRepeat(const TStr& query, + const TStr& rc_query, + EList >& minimizers) const + { + return isRepeat(query, minimizers) || isRepeat(rc_query, minimizers); + } + + template + bool isRepeat(const TStr& query, + EList >& minimizers) const + { + RB_Minimizer::get_minimizer(query, w_, k_, minimizers); + + size_t est_count = 0; + uint64_t prev_minimizer = 0; + bool prev_in = false; + for(size_t j = 0; j < minimizers.size(); j++) { + bool curr_in = false; + if(minimizers[j].first == prev_minimizer) { + if(prev_in) est_count++; + curr_in = prev_in; + } else if(isIn(minimizers[j].first)) { + curr_in = true; + est_count++; + +#if 1 + return true; +#endif + } + prev_minimizer = minimizers[j].first; + prev_in = curr_in; + } + +#if 1 + return false; +#else + bool est_repeat = est_count * 10 >= minimizers.size(); + return est_repeat; +#endif + } + + template + void findRepeats(const TStr& query, + EList >& minimizers, + EList& repeats) const + { + repeats.clear(); + RB_Minimizer::get_minimizer(query, w_, k_, minimizers); + for(size_t i = 0; i < minimizers.size(); i++) { + if(i > 0 && minimizers[i].first == minimizers[i-1].first) + continue; + pair minimizer(minimizers[i].first, 0); + size_t j = kmer_table_.bsearchLoBound(minimizer); + for(; j < kmer_table_.size() && minimizer.first == kmer_table_[j].first; j++) { + repeats.push_back(kmer_table_[j].second); + } + } + if(repeats.empty()) + return; + + size_t remove_count = 0; + repeats.sort(); + for(size_t i = 0; i + 1 < repeats.size();) { + size_t j = i + 1; + for(; j < repeats.size(); j++) { + if(repeats[i] == repeats[j]) { + repeats[j] = std::numeric_limits::max(); + remove_count++; + } else break; + } + i = j; + } + repeats.sort(); + assert_lt(remove_count, repeats.size()); + repeats.resize(repeats.size() - remove_count); + } + + template + void findAlignments(const TStr& query, + EList >& minimizers, + ELList& position2D, + EList& alignments, + TIndexOffU max_num_alignment = 1000) const + { + minimizers.clear(); + RB_Minimizer::get_minimizer(query, w_, k_, minimizers); + + position2D.clear(); + for(size_t i = 0; i < minimizers.size(); i++) { + if(i > 0 && minimizers[i].first == minimizers[i-1].first) + continue; + pair minimizer(minimizers[i].first, 0); + size_t idx = kmer_table_.bsearchLoBound(minimizer); + if(idx < kmer_table_.size() && kmer_table_[idx].first == minimizer.first) { + TIndexOffU begin = kmer_table_[idx].second; + TIndexOffU end = (idx + 1 < kmer_table_.size() ? kmer_table_[idx+1].second : pos_list_.size()); + position2D.expand(); + EList& positions = position2D.back(); + positions.clear(); + positions.expand(); + positions.back().pos = begin; // suffix begin + positions.back().off = i; // minimizer index + positions.expand(); + positions.back().pos = end; // suffix end + positions.back().off = i; // minimizer index + } + } + + alignments.clear(); + if(position2D.empty()) + return; + + for(size_t i = 0; i < position2D.size(); i++) { + size_t num_i = 0, num_pos = numeric_limits::max(); + for(size_t j = 0; j < position2D.size(); j++) { + EList& positions = position2D[j]; + if(positions.empty()) + continue; + assert_eq(positions.size(), 2); + TIndexOffU cur_num_pos = positions[1].pos - positions[0].pos; + if(cur_num_pos == 0) + continue; + if(cur_num_pos < num_pos) { + num_i = j; + num_pos = cur_num_pos; + } + } + + if(num_pos > max_num_alignment && alignments.size() > 0) + break; + + if(num_pos > max_num_alignment) + break; + + EList& positions = position2D[num_i]; + TIndexOffU begin = positions[0].pos; + TIndexOffU end = positions[1].pos; + TIndexOffU min_i = positions[0].off; + assert_eq(num_pos, end - begin); + positions.clear(); + for(TIndexOffU j = begin; j < end; j++) { + if(pos_list_[j] < minimizers[min_i].second) + continue; + positions.expand(); + positions.back().pos = pos_list_[j]; + positions.back().off = minimizers[min_i].second; + positions.back().len = k_; + } + + if(i == 0) { + for(size_t j = 0; j < positions.size(); j++) { + alignments.expand(); + alignments.back() = positions[j]; + } + } else { + size_t a = 0, p = 0; + size_t num_alignment = alignments.size(); + while(a < num_alignment && p < positions.size()) { + RB_Alignment& alignment = alignments[a]; + RB_Alignment& position = positions[p]; + if(alignment.pos < position.pos) { + TIndexOffU offDiff = position.off - alignment.off; + if(position.pos - alignment.pos == offDiff) { + alignment.len = min(offDiff, alignment.len) + position.len; + a++; p++; + } else { + a++; + } + } else if(alignment.pos > position.pos) { + TIndexOffU offDiff = alignment.off - position.off; + if(alignment.pos - position.pos == offDiff) { + alignment.pos = position.pos; + alignment.off = position.off; + alignment.len = min(offDiff, position.len) + alignment.len; + assert_geq(alignment.pos, alignment.off); + a++; p++; + } else { + alignments.expand(); + alignments.back() = position; + p++; + } + } else { + assert_eq(alignment.pos, position.pos); + a++; p++; + } + } + + while(p < positions.size()) { + RB_Alignment& position = positions[p]; + alignments.expand(); + alignments.back() = position; + p++; + } + + if(i + 1 < position2D.size()) { + alignments.sort(); + } + } + + positions.clear(); + + if(alignments.size() >= max_num_alignment) { + break; + } + } + + if(alignments.empty()) + return; + + for(size_t i = 0; i < alignments.size(); i++) { + alignments[i].pos -= alignments[i].off; + } + alignments.sort(); + + // remove duplicates + size_t remove_count = 0; + for(size_t i = 0; i + 1 + remove_count < alignments.size(); i++) { + size_t j = i + 1 + remove_count; + for(; j < alignments.size(); j++) { + if(alignments[i].pos != alignments[j].pos) { + assert_geq(j, i + 1); + if(j > i + 1) { + alignments[i+1] = alignments[j]; + } + break; + } else { + remove_count++; + } + } + } + + assert_lt(remove_count, alignments.size()); + if(remove_count > 0) { + alignments.resize(alignments.size() - remove_count); + } + if(alignments.size() > 1) { + sort(alignments.begin(), alignments.end(), RB_Alignment_CMPbyLen()); + } + } + + bool write(ofstream& f_out, bool bigEndian) const { + writeIndex(f_out, w_, bigEndian); + writeIndex(f_out, k_, bigEndian); + writeIndex(f_out, kmer_table_.size(), bigEndian); + for(size_t i = 0; i < kmer_table_.size(); i++) { + writeIndex(f_out, kmer_table_[i].first, bigEndian); + if(sizeof(TIndexOffU) == 4) { + writeU32(f_out, kmer_table_[i].second, bigEndian); + } else { + assert_eq(sizeof(TIndexOffU), 8); + writeIndex(f_out, kmer_table_[i].second, bigEndian); + } + } + writeIndex(f_out, pos_list_.size(), bigEndian); + for(size_t i = 0; i < pos_list_.size(); i++) { + if(sizeof(TIndexOffU) == 4) { + writeU32(f_out, pos_list_[i], bigEndian); + } else { + assert_eq(sizeof(TIndexOffU), 8); + writeIndex(f_out, pos_list_[i], bigEndian); + } + } + return true; + } + + bool read(ifstream& f_in, bool bigEndian) { + w_ = readIndex(f_in, bigEndian); + k_ = readIndex(f_in, bigEndian); + size_t kmer_size = readIndex(f_in, bigEndian); + kmer_table_.reserveExact(kmer_size); + while(kmer_table_.size() < kmer_size) { + kmer_table_.expand(); + kmer_table_.back().first = readIndex(f_in, bigEndian); + if(sizeof(TIndexOffU) == 4) { + kmer_table_.back().second = readU32(f_in, bigEndian); + } else { + assert_eq(sizeof(TIndexOffU), 8); + kmer_table_.back().second = readIndex(f_in, bigEndian); + } + +#if 0 + kmers_.insert(kmer_table_.back().first); +#endif + } + size_t pos_size = readIndex(f_in, bigEndian); + pos_list_.reserveExact(pos_size); + while(pos_list_.size() < pos_size) { + pos_list_.expand(); + if(sizeof(TIndexOffU) == 4) { + pos_list_.back() = readU32(f_in, bigEndian); + } else { + assert_eq(sizeof(TIndexOffU), 8); + pos_list_.back() = readIndex(f_in, bigEndian); + } + } + return true; + } + +public: + template + void build(const EList& seqs, + size_t w, + size_t k) + { + w_ = w; + k_ = k; + kmer_table_.clear(); + pos_list_.clear(); + + EList > tmp_table; + set tmp_kmers; + + TIndexOffU baseoff = 0; + EList > minimizers; + for(size_t s = 0; s < seqs.size(); s++) { + const TStr& seq = seqs[s]; + RB_Minimizer::get_minimizer(seq, + w_, + k_, + minimizers); + for(size_t i = 0; i < minimizers.size(); i++) { + if(!tmp_table.empty() && + tmp_table.back().first == minimizers[i].first && + tmp_table.back().second == baseoff + minimizers[i].second) + continue; + + tmp_table.expand(); + tmp_table.back().first = minimizers[i].first; + tmp_table.back().second = baseoff + minimizers[i].second; + tmp_kmers.insert(minimizers[i].first); +#if 0 + kmers_.insert(minimizers[i].first); +#endif + } + baseoff += seq.length(); + } + tmp_table.sort(); + + kmer_table_.reserveExact(tmp_kmers.size()); + pos_list_.reserveExact(tmp_table.size()); + for(size_t i = 0; i < tmp_table.size(); i++) { +#ifndef NDEBUG + if(!pos_list_.empty()) { + assert_neq(pos_list_.back(), tmp_table[i].second); + } +#endif + if(kmer_table_.empty() || kmer_table_.back().first != tmp_table[i].first) { + kmer_table_.expand(); + kmer_table_.back().first = tmp_table[i].first; + kmer_table_.back().second = pos_list_.size(); + } + pos_list_.push_back(tmp_table[i].second); + } + assert_eq(kmer_table_.size(), tmp_kmers.size()); + assert_eq(pos_list_.size(), tmp_table.size()); + } + + void dump(ostream& o) const + { + o << "window : " << w_ << endl; + o << "k length : " << k_ << endl; + o << "number of kmer : " << kmer_table_.size() << endl; + o << "number of pos : " << pos_list_.size() << endl; + + EList counts, counts_10; + counts.resizeExact(10); counts_10.resizeExact(10); + counts.fillZero(); counts_10.fillZero(); + for(size_t i = 0; i < kmer_table_.size(); i++) { + size_t count = 0; + if(i + 1 < kmer_table_.size()) { + count = kmer_table_[i+1].second - kmer_table_[i].second; + } else { + count = pos_list_.size() - kmer_table_[i].second; + } + assert_gt(count, 0); + count -= 1; + if(count < counts.size()) { + counts[count]++; + } + size_t count_10 = count / 10; + if(count_10 < counts_10.size()) { + counts_10[count_10]++; + } else { + counts_10.back()++; + } + } + for(size_t i = 0; i < counts.size(); i++) { + o << "\t" << i + 1 << ": " << counts[i] << endl; + } + for(size_t i = 1; i < counts_10.size(); i++) { + o << "\t" << i * 10 + 1; + if(i + 1 < counts_10.size()) { + o << " to " << (i+1) * 10 << ": "; + } else { + o << " or more: "; + } + o << counts_10[i] << endl; + } + } + +private: + size_t w_; + size_t k_; + EList > kmer_table_; + EList pos_list_; + + unordered_set kmers_; +}; + + +#endif /* __REPEAT_KMER_H__ */ diff --git a/rfm.h b/rfm.h new file mode 100644 index 0000000..5c5dbfb --- /dev/null +++ b/rfm.h @@ -0,0 +1,1136 @@ +/* + * Copyright 2018, Chanhee Park and Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#ifndef RFM_H_ +#define RFM_H_ + +#include "hgfm.h" + +/** + * Extended Burrows-Wheeler transform data. + * LocalEbwt is a specialized Ebwt index that represents ~64K bps + * and therefore uses two bytes as offsets within 64K bps. + * This class has only two additional member variables to denote the genomic sequenuce it represents: + * (1) the contig index and (2) the offset within the contig. + * + */ +template +class LocalRFM : public GFM { + typedef GFM PARENT_CLASS; +public: + /// Construct an Ebwt from the given input file + LocalRFM(const string& in, + ALTDB* altdb, + FILE *in1, + FILE *in2, + char *mmFile1, + char *mmFile2, + bool switchEndian, + size_t& bytesRead, + size_t& bytesRead2, + int needEntireReverse, + bool fw, + int32_t overrideOffRate, // = -1, + int32_t offRatePlus, // = -1, + bool useMm, // = false, + bool useShmem, // = false, + bool mmSweep, // = false, + bool loadNames, // = false, + bool loadSASamp, // = true, + bool loadFtab, // = true, + bool loadRstarts, // = true, + bool verbose, // = false, + bool startVerbose, // = false, + bool passMemExc, // = false, + bool sanityCheck, // = false) + bool useHaplotype) : // = false + GFM(in, + altdb, + NULL, + NULL, + needEntireReverse, + fw, + overrideOffRate, + offRatePlus, + useMm, + useShmem, + mmSweep, + loadNames, + loadSASamp, + loadFtab, + loadRstarts, + true, // load Splice Sites + verbose, + startVerbose, + passMemExc, + sanityCheck, + useHaplotype, + true) + { + this->_in1 = in1; + this->_in2 = in2; + + this->_repeat = true; + bool subIndex = true; + PARENT_CLASS::readIntoMemory( + needEntireReverse, + loadSASamp, + loadFtab, + loadRstarts, + false, //justHeader + &(this->_gh), + mmSweep, + loadNames, + startVerbose, + subIndex); + + // If the offRate has been overridden, reflect that in the + // _eh._offRate field + if(offRatePlus > 0 && this->_overrideOffRate == -1) { + this->_overrideOffRate = this->_gh._offRate + offRatePlus; + } + if(this->_overrideOffRate > this->_gh._offRate) { + this->_gh.setOffRate(this->_overrideOffRate); + assert_eq(this->_overrideOffRate, this->_gh._offRate); + } + assert(this->repOk()); + } + + + /// Construct an Ebwt from the given header parameters and string + /// vector, optionally using a blockwise suffix sorter with the + /// given 'bmax' and 'dcv' parameters. The string vector is + /// ultimately joined and the joined string is passed to buildToDisk(). + template + LocalRFM( + TStr& s, + InorderBlockwiseSA* sa, + PathGraph* pg, + EList >& alts, + index_t local_size, + const EList& refnames, + bool packed, + int needEntireReverse, + int32_t lineRate, + int32_t offRate, + int32_t ftabChars, + const string& file, // base filename for EBWT files + bool fw, + int dcv, + EList& szs, + index_t sztot, + const RefReadInParams& refparams, + uint32_t seed, + ostream& out1, + ostream& out2, + int32_t overrideOffRate = -1, + bool verbose = false, + bool passMemExc = false, + bool sanityCheck = false) : + GFM(packed, + needEntireReverse, + lineRate, + offRate, + ftabChars, + file, + fw, + dcv, + szs, + sztot, + refparams, + seed, + overrideOffRate, + verbose, + passMemExc, + sanityCheck) + { + const GFMParams& gh = this->_gh; + assert(gh.repOk()); + uint32_t be = this->toBe(); + assert(out1.good()); + assert(out2.good()); + assert_eq(refnames.size(), 1); + this->_refnames.push_back(refnames[0]); + + writeIndex(out1, gh._len, be); // length of string (and bwt and suffix array) + streampos headerPos = out1.tellp(); + writeIndex(out1, 0, be); // gbwtLen + writeIndex(out1, 0, be); // num of nodes + writeI32(out1, lineRate, be); + writeI32(out1, 0, be); + writeI32(out1, offRate, be); + writeI32(out1, ftabChars, be); + writeIndex(out1, 0, be); // eftabLen + writeI32(out1, 0, be); // flag + if(gh._len > 0) { + assert_gt(szs.size(), 0); + assert_gt(sztot, 0); + // Not every fragment represents a distinct sequence - many + // fragments may correspond to a single sequence. Count the + // number of sequences here by counting the number of "first" + // fragments. + this->_nPat = 0; + this->_nFrag = 0; + for(size_t i = 0; i < szs.size(); i++) { + if(szs[i].len > 0) this->_nFrag++; + if(szs[i].first && szs[i].len > 0) this->_nPat++; + } + assert_eq(this->_nPat, 1); + assert_geq(this->_nFrag, this->_nPat); + this->_rstarts.reset(); + writeIndex(out1, this->_nPat, be); + assert_eq(this->_nPat, 1); + this->_plen.init(new index_t[this->_nPat], this->_nPat); + // For each pattern, set plen + int npat = -1; + for(size_t i = 0; i < szs.size(); i++) { + if(szs[i].first && szs[i].len > 0) { + if(npat >= 0) { + writeIndex(out1, this->plen()[npat], be); + } + npat++; + this->plen()[npat] = (szs[i].len + szs[i].off); + } else { + this->plen()[npat] += (szs[i].len + szs[i].off); + } + } + assert_eq((index_t)npat, this->_nPat-1); + writeIndex(out1, this->plen()[npat], be); + // Write the number of fragments + writeIndex(out1, this->_nFrag, be); + + if(refparams.reverse == REF_READ_REVERSE) { + EList tmp(EBWT_CAT); + reverseRefRecords(szs, tmp, false, verbose); + this->szsToDisk(tmp, out1, refparams.reverse); + } else { + this->szsToDisk(szs, out1, refparams.reverse); + } + + if(alts.empty()) { + assert(pg == NULL); + PARENT_CLASS::buildToDisk(*sa, s, out1, out2, headerPos); + } else { + assert(pg != NULL); + // Re-initialize GFM parameters to reflect real number of edges (gbwt string) + this->_gh.init( + this->_gh.len(), + pg->getNumEdges(), + pg->getNumNodes(), + this->_gh.lineRate(), + this->_gh.offRate(), + this->_gh.ftabChars(), + 0, + this->_gh.entireReverse()); + PARENT_CLASS::buildToDisk(*pg, s, out1, out2, headerPos); + } + } + + // Now write reference sequence names on the end + assert_eq(this->_refnames.size(), this->_nPat); + for(index_t i = 0; i < this->_refnames.size(); i++) { + out1 << this->_refnames[i].c_str(); + if(i + 1 < this->_refnames.size()) cout << endl; + else out1 << '\0'; + } + out1.flush(); out2.flush(); + if(out1.fail() || out2.fail()) { + cerr << "An error occurred writing the index to disk. Please check if the disk is full." << endl; + throw 1; + } + } + + /** + * Sanity-check various pieces of the Ebwt + */ + void sanityCheckAll(int reverse) const { + if(this->_gh._len > 0) { + PARENT_CLASS::sanityCheckAll(reverse); + } + } + + bool empty() const { return this->_gh._len == 0; } +}; + +/** + * Extended Burrows-Wheeler transform data. + * RFM is a specialized Ebwt index that represents one global index and a large set of local indexes. + * + */ +template +class RFM : public GFM { + typedef GFM PARENT_CLASS; +public: + /// Construct a GFM from the given input file + RFM(const string& in, + ALTDB* altdb, + RepeatDB* repeatdb, + EList* readLens, + int needEntireReverse, + bool fw, + int32_t overrideOffRate, // = -1, + int32_t offRatePlus, // = -1, + bool useMm, // = false, + bool useShmem, // = false, + bool mmSweep, // = false, + bool loadNames, // = false, + bool loadSASamp, // = true, + bool loadFtab, // = true, + bool loadRstarts, // = true, + bool loadSpliceSites, // = true, + bool verbose, // = false, + bool startVerbose, // = false, + bool passMemExc, // = false, + bool sanityCheck, // = false + bool useHaplotype, // = false + bool skipLoading = false) : + GFM(in, + altdb, + repeatdb, + readLens, + needEntireReverse, + fw, + overrideOffRate, + offRatePlus, + useMm, + useShmem, + mmSweep, + loadNames, + loadSASamp, + loadFtab, + loadRstarts, + loadSpliceSites, + verbose, + startVerbose, + passMemExc, + sanityCheck, + useHaplotype, + skipLoading), + _in1(NULL), + _in2(NULL) + { + _in1Str = in + ".1." + gfm_ext; + _in2Str = in + ".2." + gfm_ext; + } + + /// Construct a HGFM from the given header parameters and string + /// vector, optionally using a blockwise suffix sorter with the + /// given 'bmax' and 'dcv' parameters. The string vector is + /// ultimately joined and the joined string is passed to buildToDisk(). + template + RFM( + TStr& s, + bool packed, + int needEntireReverse, + int32_t lineRate, + int32_t offRate, + int32_t ftabChars, + int32_t localOffRate, + int32_t localFtabChars, + int nthreads, + const string& snpfile, + const string& htfile, + const string& ssfile, + const string& exonfile, + const string& svfile, + const string& repeatfile, + const string& outfile, // base filename for GFM files + bool fw, + bool useBlockwise, + TIndexOffU bmax, + TIndexOffU bmaxSqrtMult, + TIndexOffU bmaxDivN, + int dcv, + EList& is, + EList& szs, + index_t sztot, + const RefReadInParams& refparams, + bool localIndex, // create local indexes? + EList* parent_szs, + EList* parent_refnames, + uint32_t seed, + int32_t overrideOffRate = -1, + bool verbose = false, + bool passMemExc = false, + bool sanityCheck = false); + + RFM() { + clearLocalRFMs(); + } + + /** + * Load this Ebwt into memory by reading it in from the _in1 and + * _in2 streams. + */ + void loadIntoMemory( + int needEntireReverse, + bool loadSASamp, + bool loadFtab, + bool loadRstarts, + bool loadNames, + bool verbose) + { + readIntoMemory( + needEntireReverse, // require reverse index to be concatenated reference reversed + loadSASamp, // load the SA sample portion? + loadFtab, // load the ftab (_ftab[] and _eftab[])? + loadRstarts, // load the r-starts (_rstarts[])? + false, // stop after loading the header portion? + NULL, // params + false, // mmSweep + loadNames, // loadNames + verbose); // startVerbose + } + + // I/O + void readIntoMemory( + int needEntireRev, + bool loadSASamp, + bool loadFtab, + bool loadRstarts, + bool justHeader, + GFMParams *params, + bool mmSweep, + bool loadNames, + bool startVerbose); + + /** + * Frees memory associated with the Ebwt. + */ + void evictFromMemory() { + assert(PARENT_CLASS::isInMemory()); + clearLocalRFMs(); + PARENT_CLASS::evictFromMemory(); + } + + /** + * Sanity-check various pieces of the Ebwt + */ + void sanityCheckAll(int reverse) const { + PARENT_CLASS::sanityCheckAll(reverse); + for(size_t i = 0; i < _localRFMs.size(); i++) { + assert(_localRFMs[i] != NULL); + _localRFMs[i]->sanityCheckAll(reverse); + } + } + + void getReferenceNames(EList& refnames) { + for(size_t i = 0; i < _localRFMs.size(); i++) { + assert_eq(_localRFMs[i]->refnames().size(), 1); + refnames.push_back(_localRFMs[i]->refnames()[0]); + } + } + + void getReferenceLens(EList& reflens) { + for(size_t i = 0; i < _localRFMs.size(); i++) { + reflens.push_back(_localRFMs[i]->plen()[0]); + } + } + + index_t getLocalRFM_idx(index_t readLen) { + for(size_t i = 0; i < this->_repeatLens.size(); i++) { + if(this->_repeatLens[i].first >= readLen) { + return i; + } + } + return this->_repeatLens.size() - 1; + } + + index_t getLocalRFM_idx(const char *refname) { + for(size_t i = 0; i < this->_repeatLens.size(); i++) { + assert_eq(_localRFMs[i]->refnames().size(), 1); + + string& ref = _localRFMs[i]->refnames()[0]; + if(ref.compare(refname) == 0) { + return i; + } + } + return this->_repeatLens.size() - 1; + } + + bool empty() const { return _localRFMs.empty(); } + + LocalRFM& getLocalRFM(index_t idx) { + assert_lt(idx, this->_repeatLens.size()); + return *_localRFMs[idx]; + } + + RB_KmerTable& getKmertable(index_t idx) { + assert_lt(idx, this->_repeat_kmertables.size()); + return this->_repeat_kmertables[idx]; + } + + void clearLocalRFMs() { + for(size_t i = 0; i < _localRFMs.size(); i++) { + assert(_localRFMs[i] != NULL); + delete _localRFMs[i]; + } + _localRFMs.clear(); + } + + +public: + EList _refLens; /// approx lens of ref seqs (excludes trailing ambig chars) + EList*> _localRFMs; + + EList > _localRFMFilePos; + + FILE *_in1; // input fd for primary index file + FILE *_in2; // input fd for secondary index file + string _in1Str; + string _in2Str; + + char *mmFile1_; + char *mmFile2_; + +private: + struct WorkerParam { + // input + SString s; + EList > alts; + EList > haplotypes; + bool bigEndian; + index_t local_offset; + index_t curr_sztot; + EList conv_local_szs; + index_t local_sztot; + index_t index_size; + string file; + InorderBlockwiseSA >* sa; + index_t dcv; + index_t seed; + EList refnames; + + // output + RefGraph* rg; + PathGraph* pg; + + int threads; + }; + static void build_worker(void* vp); +}; + + +template +void RFM::build_worker(void* vp) +{ + WorkerParam& tParam = *(WorkerParam*)vp; + if(tParam.alts.empty()) { + tParam.sa = NULL; + tParam.sa = new KarkkainenBlockwiseSA >( + tParam.s, + (index_t)(tParam.s.length()+1), + tParam.threads, + tParam.dcv, + tParam.seed, + false, /* this->_sanity */ + false, /* this->_passMemExc */ + false); /* this->_verbose */ + assert(tParam.sa->suffixItrIsReset()); + assert_eq(tParam.sa->size(), tParam.s.length()+1); + } else { + while(true) { + tParam.rg = NULL, tParam.pg = NULL; + bool exploded = false; + try { + tParam.rg = new RefGraph( + tParam.s, + tParam.conv_local_szs, + tParam.alts, + tParam.haplotypes, + tParam.file, + tParam.threads, /* num threads */ + false); /* verbose? */ + tParam.pg = new PathGraph( + *tParam.rg, + tParam.file, + local_max_gbwt, + 1, /* num threads */ + false); /* verbose? */ + } catch (const NongraphException& err) { + cerr << "Warning: no variants or splice sites in this graph (" << tParam.curr_sztot << ")" << endl; + delete tParam.rg; + delete tParam.pg; + tParam.alts.clear(); + continue; + } catch (const ExplosionException& err) { + exploded = true; + } + + if(!exploded) { + if(!tParam.pg->generateEdges(*tParam.rg)) { + cerr << "An error occurred - generateEdges" << endl; + throw 1; + } + exploded = tParam.pg->getNumEdges() > local_max_gbwt; + } + if(exploded) { + cerr << "Warning: a local graph exploded (offset: " << tParam.curr_sztot << ", length: " << tParam.local_sztot << ")" << endl; + + delete tParam.pg; tParam.pg = NULL; + delete tParam.rg; tParam.rg = NULL; + if(tParam.alts.size() <= 1) { + tParam.alts.clear(); + } else { + for(index_t s = 2; s < tParam.alts.size(); s += 2) { + tParam.alts[s >> 1] = tParam.alts[s]; + } + tParam.alts.resize(tParam.alts.size() >> 1); + tParam.haplotypes.clear(); + for(index_t a = 0; a < tParam.alts.size(); a++) { + const ALT& alt = tParam.alts[a]; + if(!alt.snp()) continue; + tParam.haplotypes.expand(); + tParam.haplotypes.back().left = alt.pos; + if(alt.deletion()) { + tParam.haplotypes.back().right = alt.pos + alt.len - 1; + } else { + tParam.haplotypes.back().right = alt.pos; + } + tParam.haplotypes.back().alts.clear(); + tParam.haplotypes.back().alts.push_back(a); + } + } + continue; + } + + break; + } + } +} + +/// Construct a GFM from the given header parameters and string +/// vector, optionally using a blockwise suffix sorter with the +/// given 'bmax' and 'dcv' parameters. The string vector is +/// ultimately joined and the joined string is passed to buildToDisk(). +template +template +RFM::RFM( + TStr& s, + bool packed, + int needEntireReverse, + int32_t lineRate, + int32_t offRate, + int32_t ftabChars, + int32_t localOffRate, + int32_t localFtabChars, + int nthreads, + const string& snpfile, + const string& htfile, + const string& ssfile, + const string& exonfile, + const string& svfile, + const string& repeatfile, + const string& outfile, // base filename for EBWT files + bool fw, + bool useBlockwise, + TIndexOffU bmax, + TIndexOffU bmaxSqrtMult, + TIndexOffU bmaxDivN, + int dcv, + EList& is, + EList& szs, + index_t sztot, + const RefReadInParams& refparams, + bool localIndex, + EList* parent_szs, + EList* parent_refnames, + uint32_t seed, + int32_t overrideOffRate, + bool verbose, + bool passMemExc, + bool sanityCheck) : + GFM(s, + packed, + needEntireReverse, + lineRate, + offRate, + ftabChars, + nthreads, + snpfile, + htfile, + ssfile, + exonfile, + svfile, + repeatfile, + outfile, + fw, + useBlockwise, + bmax, + bmaxSqrtMult, + bmaxDivN, + dcv, + is, + szs, + sztot, + refparams, + parent_szs, + parent_refnames, + seed, + overrideOffRate, + verbose, + passMemExc, + sanityCheck), + _in1(NULL), + _in2(NULL) +{ + _in1Str = outfile + ".1." + gfm_ext; + _in2Str = outfile + ".2." + gfm_ext; + + // Open output files + ofstream fout1(_in1Str.c_str(), ios::binary); + if(!fout1.good()) { + cerr << "Could not open index file for writing: \"" << _in1Str.c_str() << "\"" << endl + << "Please make sure the directory exists and that permissions allow writing by" << endl + << "HISAT2." << endl; + throw 1; + } + ofstream fout2(_in2Str.c_str(), ios::binary); + if(!fout2.good()) { + cerr << "Could not open index file for writing: \"" << _in2Str.c_str() << "\"" << endl + << "Please make sure the directory exists and that permissions allow writing by" << endl + << "HISAT2." << endl; + throw 1; + } + + // Split the whole genome into a set of local indexes + uint32_t be = this->toBe(); + assert(fout1.good()); + assert(fout2.good()); + + // When building an Ebwt, these header parameters are known + // "up-front", i.e., they can be written to disk immediately, + // before we join() or buildToDisk() + writeI32(fout1, 1, be); // endian hint for priamry stream + writeI32(fout2, 1, be); // endian hint for secondary stream + int version = GFM::getIndexVersion(); + writeI32(fout1, version, be); // version + index_t nLocalRFMs = this->_repeatLens.size(); + writeIndex(fout1, nLocalRFMs, be); // number of local Ebwts + for(size_t i = 0; i < this->_repeatLens.size(); i++) { + writeIndex(fout1, this->_repeatLens[i].first, be); + writeIndex(fout1, this->_repeatLens[i].second, be); + } + streampos filepos = fout1.tellp(); + _localRFMFilePos.resizeExact(szs.size()); + for(size_t i = 0; i < _localRFMFilePos.size(); i++) { + writeIndex(fout1, 0, be); + writeIndex(fout1, 0, be); + } + + assert_gt(this->_nthreads, 0); + WorkerParam tParam; + tParam.rg = NULL; + tParam.pg = NULL; + tParam.sa = NULL; + tParam.file = outfile; + tParam.dcv = 1024; + tParam.seed = seed; + tParam.threads = this->_nthreads; + + // build local FM indexes + assert_eq(szs.size(), this->_refnames.size()); + index_t curr_sztot = 0; + EList > alts; + for(size_t tidx = 0; tidx < szs.size(); tidx++) { + assert(szs[tidx].first); + index_t refLen = szs[tidx].len; + _localRFMs.expand(); + assert_lt(tidx, _localRFMs.size()); + + tParam.index_size = refLen; + tParam.conv_local_szs.clear(); + tParam.conv_local_szs.push_back(szs[tidx]); + tParam.refnames.clear(); + tParam.refnames.push_back(this->_refnames[tidx]); + + // Extract sequence corresponding to this local index + tParam.s.resize(refLen); + if(refparams.reverse == REF_READ_REVERSE) { + tParam.s.install(s.buf() + s.length() - curr_sztot - refLen, refLen); + } else { + tParam.s.install(s.buf() + curr_sztot, refLen); + } + +#if 0 + // Extract ALTs corresponding to this local index + map alt_map; + tParam.alts.clear(); + ALT alt; + alt.pos = curr_sztot; + index_t alt_i = (index_t)this->_alts.bsearchLoBound(alt); + for(; alt_i < this->_alts.size(); alt_i++) { + const ALT& alt = this->_alts[alt_i]; + if(alt.snp()) { + if(alt.mismatch()) { + if(curr_sztot + local_sztot <= alt.pos) break; + } else if(alt.insertion()) { + if(curr_sztot + local_sztot < alt.pos) break; + } else { + assert(alt.deletion()); + if(curr_sztot + local_sztot < alt.pos + alt.len) break; + } + if(curr_sztot <= alt.pos) { + alt_map[alt_i] = (index_t)tParam.alts.size(); + tParam.alts.push_back(alt); + tParam.alts.back().pos -= curr_sztot; + } + } else if(alt.splicesite()) { + if(alt.excluded) continue; + if(curr_sztot + local_sztot <= alt.right + 1) continue; + if(curr_sztot <= alt.left) { + tParam.alts.push_back(alt); + tParam.alts.back().left -= curr_sztot; + tParam.alts.back().right -= curr_sztot; + } + } else { + assert(alt.exon()); + } + } + + // Extract haplotypes + tParam.haplotypes.clear(); + Haplotype haplotype; + haplotype.left = curr_sztot; + index_t haplotpye_i = (index_t)this->_haplotypes.bsearchLoBound(haplotype); + for(; haplotpye_i < this->_haplotypes.size(); haplotpye_i++) { + const Haplotype& haplotype = this->_haplotypes[haplotpye_i]; + if(curr_sztot + local_sztot <= haplotype.right) continue; + if(curr_sztot <= haplotype.left) { + tParam.haplotypes.push_back(haplotype); + tParam.haplotypes.back().left -= curr_sztot; + tParam.haplotypes.back().right -= curr_sztot; + for(index_t a = 0; a < tParam.haplotypes.back().alts.size(); a++) { + index_t alt_i = tParam.haplotypes.back().alts[a]; + if(alt_map.find(alt_i) == alt_map.end()) { + assert(false); + tParam.haplotypes.pop_back(); + break; + } + tParam.haplotypes.back().alts[a] = alt_map[alt_i]; + } + } + } +#endif + + tParam.local_offset = 0; + tParam.curr_sztot = curr_sztot; + tParam.local_sztot = refLen; + + assert(tParam.rg == NULL); + assert(tParam.pg == NULL); + assert(tParam.sa == NULL); + curr_sztot += refLen; + + build_worker(&tParam); + + LocalRFM( + tParam.s, + tParam.sa, + tParam.pg, + tParam.alts, + tParam.index_size, + tParam.refnames, + packed, + needEntireReverse, + lineRate, + offRate, // suffix-array sampling rate + ftabChars, // number of chars in initial arrow-pair calc + outfile, // basename for .?.ebwt files + fw, // fw + dcv, // difference-cover period + tParam.conv_local_szs, // list of reference sizes + tParam.local_sztot, // total size of all unambiguous ref chars + refparams, // reference read-in parameters + seed, // pseudo-random number generator seed + fout1, + fout2, + -1, // override offRate + false, // be silent + passMemExc, // pass exceptions up to the toplevel so that we can adjust memory settings automatically + sanityCheck); // verify results and internal consistency + if(tParam.rg != NULL) { + assert(tParam.pg != NULL); + delete tParam.rg; tParam.rg = NULL; + delete tParam.pg; tParam.pg = NULL; + } + if(tParam.sa != NULL) { + delete tParam.sa; tParam.sa = NULL; + } + + _localRFMFilePos[tidx].first = fout1.tellp(); + _localRFMFilePos[tidx].second = fout2.tellp(); + } + assert_eq(curr_sztot, sztot); + + streampos origpos = fout1.tellp(); + fout1.seekp(filepos); + for(size_t i = 0; i < _localRFMFilePos.size(); i++) { + writeIndex(fout1, _localRFMFilePos[i].first, be); + writeIndex(fout1, _localRFMFilePos[i].second, be); + } + fout1.seekp(origpos); + + fout1 << '\0'; + fout1.flush(); fout2.flush(); + if(fout1.fail() || fout2.fail()) { + cerr << "An error occurred writing the index to disk. Please check if the disk is full." << endl; + throw 1; + } + VMSG_NL("Returning from initFromVector"); + + // Close output files + fout1.flush(); + int64_t tellpSz1 = (int64_t)fout1.tellp(); + VMSG_NL("Wrote " << fout1.tellp() << " bytes to primary GFM file: " << _in1Str.c_str()); + fout1.close(); + bool err = false; + if(tellpSz1 > fileSize(_in1Str.c_str())) { + err = true; + cerr << "Index is corrupt: File size for " << _in1Str.c_str() << " should have been " << tellpSz1 + << " but is actually " << fileSize(_in1Str.c_str()) << "." << endl; + } + fout2.flush(); + int64_t tellpSz2 = (int64_t)fout2.tellp(); + VMSG_NL("Wrote " << fout2.tellp() << " bytes to secondary GFM file: " << _in2Str.c_str()); + fout2.close(); + if(tellpSz2 > fileSize(_in2Str.c_str())) { + err = true; + cerr << "Index is corrupt: File size for " << _in2Str.c_str() << " should have been " << tellpSz2 + << " but is actually " << fileSize(_in2Str.c_str()) << "." << endl; + } + if(err) { + cerr << "Please check if there is a problem with the disk or if disk is full." << endl; + throw 1; + } + // Reopen as input streams + VMSG_NL("Re-opening _in1 and _in2 as input streams"); + if(this->_sanity) { + VMSG_NL("Sanity-checking ht2"); + assert(!this->isInMemory()); + readIntoMemory( + fw ? -1 : needEntireReverse, // 1 -> need the reverse to be reverse-of-concat + true, // load SA sample (_offs[])? + true, // load ftab (_ftab[] & _eftab[])? + true, // load r-starts (_rstarts[])? + false, // just load header? + NULL, // Params object to fill + false, // mm sweep? + true, // load names? + false); // verbose startup? + sanityCheckAll(refparams.reverse); + evictFromMemory(); + assert(!this->isInMemory()); + } + VMSG_NL("Returning from HGFM constructor"); +} + + +/** + * Read an Ebwt from file with given filename. + */ +template +void RFM::readIntoMemory( + int needEntireRev, + bool loadSASamp, + bool loadFtab, + bool loadRstarts, + bool justHeader, + GFMParams *params, + bool mmSweep, + bool loadNames, + bool startVerbose) +{ + bool switchEndian; // dummy; caller doesn't care +#ifdef BOWTIE_MM + char *mmFile[] = { NULL, NULL }; +#endif + if(_in1Str.length() > 0) { + if(this->_verbose || startVerbose) { + cerr << " About to open input files: "; + logTime(cerr); + } + // Initialize our primary and secondary input-stream fields + if(_in1 != NULL) fclose(_in1); + if(this->_verbose || startVerbose) cerr << "Opening \"" << _in1Str.c_str() << "\"" << endl; + if((_in1 = fopen(_in1Str.c_str(), "rb")) == NULL) { + cerr << "Could not open index file " << _in1Str.c_str() << endl; + } + if(loadSASamp) { + if(_in2 != NULL) fclose(_in2); + if(this->_verbose || startVerbose) cerr << "Opening \"" << _in2Str.c_str() << "\"" << endl; + if((_in2 = fopen(_in2Str.c_str(), "rb")) == NULL) { + cerr << "Could not open index file " << _in2Str.c_str() << endl; + } + } + if(this->_verbose || startVerbose) { + cerr << " Finished opening input files: "; + logTime(cerr); + } + +#ifdef BOWTIE_MM + if(this->_useMm /*&& !justHeader*/) { + const char *names[] = {_in1Str.c_str(), _in2Str.c_str()}; + int fds[] = { fileno(_in1), fileno(_in2) }; + for(int i = 0; i < (loadSASamp ? 2 : 1); i++) { + if(this->_verbose || startVerbose) { + cerr << " ¯ " << (i+1) << ": "; + logTime(cerr); + } + struct stat sbuf; + if (stat(names[i], &sbuf) == -1) { + perror("stat"); + cerr << "Error: Could not stat index file " << names[i] << " prior to memory-mapping" << endl; + throw 1; + } + mmFile[i] = (char*)mmap((void *)0, (size_t)sbuf.st_size, + PROT_READ, MAP_SHARED, fds[(size_t)i], 0); + if(mmFile[i] == (void *)(-1)) { + perror("mmap"); + cerr << "Error: Could not memory-map the index file " << names[i] << endl; + throw 1; + } + if(mmSweep) { + int sum = 0; + for(off_t j = 0; j < sbuf.st_size; j += 1024) { + sum += (int) mmFile[i][j]; + } + if(startVerbose) { + cerr << " Swept the memory-mapped ebwt index file 1; checksum: " << sum << ": "; + logTime(cerr); + } + } + } + mmFile1_ = mmFile[0]; + mmFile2_ = loadSASamp ? mmFile[1] : NULL; + } +#endif + } +#ifdef BOWTIE_MM + else if(this->_useMm && !justHeader) { + mmFile[0] = mmFile1_; + mmFile[1] = mmFile2_; + } + if(this->_useMm && !justHeader) { + assert(mmFile[0] == mmFile1_); + assert(mmFile[1] == mmFile2_); + } +#endif + + if(this->_verbose || startVerbose) { + cerr << " Reading header: "; + logTime(cerr); + } + + // Read endianness hints from both streams + size_t bytesRead = 0, bytesRead2 = 4; + switchEndian = false; + uint32_t one = readU32(_in1, switchEndian); // 1st word of primary stream + bytesRead += 4; + if(loadSASamp) { +#ifndef NDEBUG + assert_eq(one, readU32(_in2, switchEndian)); // should match! +#else + readU32(_in2, switchEndian); +#endif + } + if(one != 1) { + assert_eq((1u<<24), one); + assert_eq(1, endianSwapU32(one)); + switchEndian = true; + } + + // Can't switch endianness and use memory-mapped files; in order to + // support this, someone has to modify the file to switch + // endiannesses appropriately, and we can't do this inside Bowtie + // or we might be setting up a race condition with other processes. + if(switchEndian && this->_useMm) { + cerr << "Error: Can't use memory-mapped files when the index is the opposite endianness" << endl; + throw 1; + } + + readI32(_in1, switchEndian); bytesRead += 4; + index_t nlocalRFMs = readIndex(_in1, switchEndian); bytesRead += sizeof(index_t); + + EList > localRFMLens; + localRFMLens.resizeExact(nlocalRFMs); + for(size_t i = 0; i < localRFMLens.size(); i++) { + localRFMLens[i].first = readIndex(_in1, switchEndian); + localRFMLens[i].second = readIndex(_in1, switchEndian); + } + + _localRFMFilePos.resizeExact(nlocalRFMs); + for(size_t i = 0; i < _localRFMFilePos.size(); i++) { + _localRFMFilePos[i].first = readIndex(_in1, switchEndian); + _localRFMFilePos[i].second = readIndex(_in1, switchEndian); + } + + clearLocalRFMs(); + + assert_eq(this->_repeatIncluded.size(), nlocalRFMs); + string base = ""; + for(size_t i = 0; i < nlocalRFMs; i++) { + if(!this->_repeatIncluded[i]) + continue; + if(i > 0) { + fseek(_in1, _localRFMFilePos[i-1].first, SEEK_SET); + fseek(_in2, _localRFMFilePos[i-1].second, SEEK_SET); + } + LocalRFM *localRFM = new LocalRFM(base, + NULL, + _in1, + _in2, + mmFile1_, + mmFile2_, + switchEndian, + bytesRead, + bytesRead2, + needEntireRev, + this->fw_, + -1, // overrideOffRate + -1, // offRatePlus + this->_useMm, + this->useShmem_, + mmSweep, + loadNames, + loadSASamp, + loadFtab, + loadRstarts, + false, // _verbose + false, + this->_passMemExc, + this->_sanity, + false); // use haplotypes? + + _localRFMs.push_back(localRFM); + } + + fseek(_in1, _localRFMFilePos.back().first, SEEK_SET); + fseek(_in2, _localRFMFilePos.back().second, SEEK_SET); + +#ifdef BOWTIE_MM + fseek(_in1, 0, SEEK_SET); + fseek(_in2, 0, SEEK_SET); +#else + rewind(_in1); + rewind(_in2); +#endif +} + +#endif /*RFM_H_*/ diff --git a/sam.h b/sam.h new file mode 100644 index 0000000..ad9fb2c --- /dev/null +++ b/sam.h @@ -0,0 +1,2013 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef SAM_H_ +#define SAM_H_ + +#include +#include +#include "ds.h" +#include "read.h" +#include "util.h" +#include "aligner_result.h" +#include "scoring.h" +#include "alt.h" +#include "filebuf.h" +#include "alignment_3n.h" + +enum { + // Comments use language from v1.4-r962 spec + SAM_FLAG_PAIRED = 1, // templ. having mult. frag.s in sequencing + SAM_FLAG_MAPPED_PAIRED = 2, // each frag properly aligned + SAM_FLAG_UNMAPPED = 4, // fragment unmapped + SAM_FLAG_MATE_UNMAPPED = 8, // next fragment in template unmapped + SAM_FLAG_QUERY_STRAND = 16, // SEQ is reverse comp'ed from original + SAM_FLAG_MATE_STRAND = 32, // next fragment SEQ reverse comp'ed + SAM_FLAG_FIRST_IN_PAIR = 64, // first fragment in template + SAM_FLAG_SECOND_IN_PAIR = 128, // last fragment in template + SAM_FLAG_NOT_PRIMARY = 256, // secondary alignment + SAM_FLAG_FAILS_CHECKS = 512, // not passing quality controls + SAM_FLAG_DUPLICATE = 1024 // PCR or optical duplicate +}; + +class AlnRes; +class AlnFlags; +class AlnSetSumm; + +/** + * Encapsulates all the various ways that a user may wish to customize SAM + * output. + */ +template +class SamConfig { + + typedef EList StrList; + typedef EList LenList; + +public: + + SamConfig( + const StrList& refnames, // reference sequence names + const LenList& reflens, // reference sequence lengths + const StrList& repnames, // repeat sequence names + const LenList& replens, // repeat sequence lengths + bool truncQname, // truncate read name to 255? + bool omitsec, // omit secondary SEQ/QUAL + bool noUnal, // omit unaligned reads + const std::string& pg_id, // id + const std::string& pg_pn, // name + const std::string& pg_vn, // version + const std::string& pg_cl, // command-line + const std::string& rgs, // read groups string + int rna_strandness, + bool print_as, + bool print_xs, + bool print_xss, + bool print_yn, + bool print_xn, + bool print_cs, + bool print_cq, + bool print_x0, + bool print_x1, + bool print_xm, + bool print_xo, + bool print_xg, + bool print_nm, + bool print_md, + bool print_yf, + bool print_yi, + bool print_ym, + bool print_yp, + bool print_yt, + bool print_ys, + bool print_zs, + bool print_xr, + bool print_xt, + bool print_xd, + bool print_xu, + bool print_ye, // streak of failed DPs at end + bool print_yl, // longest streak of failed DPs + bool print_yu, // index of last succeeded DP + bool print_xp, // print seed hit information + bool print_yr, // # redundant seed hits + bool print_zb, // # Ftab lookups + bool print_zr, // # redundant path checks + bool print_zf, // # FM Index ops + bool print_zm, // FM Index op string for best-first search + bool print_zi, // # seed extend loop iters + bool print_zp, + bool print_zu, + bool print_xs_a, + bool print_nh) : + truncQname_(truncQname), + omitsec_(omitsec), + noUnal_(noUnal), + pg_id_(pg_id), + pg_pn_(pg_pn), + pg_vn_(pg_vn), + pg_cl_(pg_cl), + rgs_(rgs), + refnames_(refnames), + reflens_(reflens), + repnames_(repnames), + replens_(replens), + rna_strandness_(rna_strandness), + print_as_(print_as), // alignment score of best alignment + print_xs_(print_xs), // alignment score of second-best alignment + print_xss_(print_xss), + print_yn_(print_yn), // minimum valid score and perfect score + print_xn_(print_xn), + print_cs_(print_cs), + print_cq_(print_cq), + print_x0_(print_x0), + print_x1_(print_x1), + print_xm_(print_xm), + print_xo_(print_xo), + print_xg_(print_xg), + print_nm_(print_nm), + print_md_(print_md), + print_yf_(print_yf), + print_yi_(print_yi), + print_ym_(print_ym), + print_yp_(print_yp), + print_yt_(print_yt), + print_ys_(print_ys), + print_zs_(print_zs), + print_xr_(print_xr), + print_xt_(print_xt), // time elapsed in microseconds + print_xd_(print_xd), // DP extend attempts + print_xu_(print_xu), // ungapped extend attempts + print_ye_(print_ye), // streak of failed DPs at end + print_yl_(print_yl), // longest streak of failed DPs + print_yu_(print_yu), // index of last succeeded DP + print_xp_(print_xp), // print seed hit information + print_yr_(print_yr), // index of last succeeded DP + print_zb_(print_zb), // # Ftab lookups + print_zr_(print_zr), // # redundant path checks + print_zf_(print_zf), // # FM Index ops + print_zm_(print_zm), // FM Index op string for best-first search + print_zi_(print_zi), // # seed extend loop iters + print_zp_(print_zp), // # seed extend loop iters + print_zu_(print_zu), // # seed extend loop iters + print_xs_a_(print_xs_a), + print_nh_(print_nh) + { + assert_eq(refnames_.size(), reflens_.size()); + } + + /** + * Print a reference name in a way that doesn't violate SAM's character + * constraints. \*|[!-()+-<>-~][!-~]* + */ + void printRefName( + BTString& o, + const std::string& name) + const; + + /** + * Print a :Z optional field where certain characters (whitespace, colon + * and percent) are escaped using % escapes. + */ + template + void printOptFieldEscapedZ(BTString& o, const T& s) const { + size_t len = s.length(); + for(size_t i = 0; i < len; i++) { + if(s[i] < 33 || s[i] > 126 || s[i] == ':' || s[i] == '%') { + // percent-encode it + o.append('%'); + int ms = s[i] >> 4; + int ls = s[i] & 15; + assert_range(0, 15, ms); + assert_range(0, 15, ls); + o.append("0123456789ABCDEF"[ms]); + o.append("0123456789ABCDEF"[ls]); + } else { + o.append(s[i]); + } + } + } + + /** + * Print a :Z optional field where newline characters are escaped using % + * escapes. + */ + template + void printOptFieldNewlineEscapedZ(BTString& o, const T& s) const { + size_t len = s.length(); + for(size_t i = 0; i < len; i++) { + if(s[i] == 10 || s[i] == 13 || s[i] == '%') { + // percent-encode it + o.append('%'); + int ms = s[i] >> 4; + int ls = s[i] & 15; + assert_range(0, 15, ms); + assert_range(0, 15, ls); + o.append("0123456789ABCDEF"[ms]); + o.append("0123456789ABCDEF"[ls]); + } else { + o.append(s[i]); + } + } + } + + /** + * Print a read name in a way that doesn't violate SAM's character + * constraints. [!-?A-~]{1,255} (i.e. [33, 63], [65, 126]) + */ + template + void printReadName( + BTString& o, + const TStr& name, + bool omitSlashMate) + const + { + size_t namelen = name.length(); + if(omitSlashMate && + namelen >= 2 && + name[namelen-2] == '/' && + (name[namelen-1] == '1' || name[namelen-1] == '2' || name[namelen-1] == '3')) + { + namelen -= 2; + } + if(truncQname_ && namelen > 255) { + namelen = 255; + } + for(size_t i = 0; i < namelen; i++) { + if(truncQname_ && isspace(name[i])) { + return; + } + o.append(name[i]); + } + } + + /** + * Print a reference name given a reference index. + */ + void printRefNameFromIndex( + BTString& o, + size_t i, + bool repeat = false) + const; + + /** + * Print SAM header to given output buffer. + */ + void printHeader( + BTString& o, + const std::string& rgid, + const std::string& rgs, + bool printHd, + bool printSq, + bool printPg) + const; + + /** + * Print the @HD header line to the given string. + */ + void printHdLine(BTString& o, const char *samver) const; + + /** + * Print the @SQ header lines to the given string. + */ + void printSqLines(BTString& o) const; + + /** + * Print the @PG header line to the given string. + */ + void printPgLine(BTString& o) const; + + /** + * Print the optional flags to the given string. + */ + void printAlignedOptFlags( + BTString& o, // output buffer + bool first, // first opt flag printed is first overall? + const Read& rd, // the read + AlnRes& res, // individual alignment result + StackedAln& staln, // stacked alignment + const AlnFlags& flags, // alignment flags + const AlnSetSumm& summ, // summary of alignments for this read + const SeedAlSumm& ssm, // seed alignment summary + const PerReadMetrics& prm, // per-read metics + const Scoring& sc, // scoring scheme + const char *mapqInp, // inputs to MAPQ calculation + const ALTDB* altdb) + const; + + /** + * Print the optional flags to the given string. + */ + void printAlignedOptFlags( + Alignment* newAlignment, // output buffer + bool first, // first opt flag printed is first overall? + const Read& rd, // the read + AlnRes& res, // individual alignment result + StackedAln& staln, // stacked alignment + const AlnFlags& flags, // alignment flags + const AlnSetSumm& summ, // summary of alignments for this read + const SeedAlSumm& ssm, // seed alignment summary + const PerReadMetrics& prm, // per-read metics + const Scoring& sc, // scoring scheme + const char *mapqInp, // inputs to MAPQ calculation + const ALTDB* altdb) + const; + /** + * Print the optional flags to the given string. + */ + void printEmptyOptFlags( + BTString& o, // output buffer + bool first, // first opt flag printed is first overall? + const Read& rd, // the read + const AlnFlags& flags, // alignment flags + const AlnSetSumm& summ, // summary of alignments for this read + const SeedAlSumm& ssm, // seed alignment summary + const PerReadMetrics& prm, // per-read metrics + const Scoring& sc) // scoring scheme + const; + + void printEmptyOptFlags( + Alignment* newAlignment, // output buffer + bool first, // first opt flag printed is first overall? + const Read& rd, // the read + const AlnFlags& flags, // alignment flags + const AlnSetSumm& summ, // summary of alignments for this read + const SeedAlSumm& ssm, // seed alignment summary + const PerReadMetrics& prm, // per-read metrics + const Scoring& sc) // scoring scheme + const; + + /** + * Return true iff we should try to obey the SAM spec's recommendations + * that: + * + * SEQ and QUAL of secondary alignments should be set to ‘*’ to reduce the + * file size. + */ + bool omitSecondarySeqQual() const { + return omitsec_; + } + + bool omitUnalignedReads() const { + return noUnal_; + } + +protected: + + bool truncQname_; // truncate QNAME to 255 chars? + bool omitsec_; // omit secondary + bool noUnal_; // omit unaligned reads + + std::string pg_id_; // @PG ID: Program record identifier + std::string pg_pn_; // @PG PN: Program name + std::string pg_vn_; // @PG VN: Program version + std::string pg_cl_; // @PG CL: Program command-line + std::string rgs_; // Read-group string to add to all records + const StrList& refnames_; // reference sequence names + const LenList& reflens_; // reference sequence lengths + + const StrList& repnames_; // repeat sequence names + const LenList& replens_; // repeat sequence lengths + + int rna_strandness_; + + // Which alignment flags to print? + + // Following are printed by BWA-SW + bool print_as_; // AS:i: Alignment score generated by aligner + bool print_xs_; // XS:i: Suboptimal alignment score + bool print_xss_;// Xs:i: Best invalid alignment score found + bool print_yn_; // YN:i:, Yn:i: minimum valid score and perfect score + bool print_xn_; // XN:i: Number of ambiguous bases in the referenece + + // Other optional flags + bool print_cs_; // CS:Z: Color read sequence on the original strand + bool print_cq_; // CQ:Z: Color read quality on the original strand + + // Following are printed by BWA + bool print_x0_; // X0:i: Number of best hits + bool print_x1_; // X1:i: Number of sub-optimal best hits + bool print_xm_; // XM:i: Number of mismatches in the alignment + bool print_xo_; // XO:i: Number of gap opens + bool print_xg_; // XG:i: Number of gap extensions (incl. opens) + bool print_nm_; // NM:i: Edit dist. to the ref, Ns count, clipping doesn't + bool print_md_; // MD:Z: String for mms. [0-9]+(([A-Z]|\^[A-Z]+)[0-9]+)*2 + + // Following are Bowtie2-specific + bool print_yf_; // YF:i: Read was filtered out? + bool print_yi_; // YI:Z: Summary of inputs to MAPQ calculation + bool print_ym_; // YM:i: Read was repetitive when aligned unpaired? + bool print_yp_; // YP:i: Read was repetitive when aligned paired? + bool print_yt_; // YT:Z: String representing alignment type + bool print_ys_; // YS:i: Score of other mate + bool print_zs_; // ZS:i: Pseudo-random seed + + bool print_xr_; // XR:Z: Original read string + bool print_xt_; // XT:i: Time taken to align + bool print_xd_; // XD:i: DP problems + bool print_xu_; // XU:i: ungapped alignment + bool print_ye_; // YE:i: streak of failed DPs at end + bool print_yl_; // YL:i: longest streak of failed DPs + bool print_yu_; // YU:i: index of last succeeded DP + bool print_xp_; // XP:BI: seed hit information + bool print_yr_; // YR:i: # redundant seed hits + bool print_zb_; // ZB:i: # Ftab lookups + bool print_zr_; // ZR:i: # redundant path checks + bool print_zf_; // ZF:i: # FM Index ops + bool print_zm_; // ZM:i: FM ops string for best-first search + bool print_zi_; // ZI:i: # extend loop iters + bool print_zp_; // ZP:i: Score of best/second-best paired-end alignment + bool print_zu_; // ZU:i: Score of best/second-best unpaired alignment + + bool print_xs_a_; // XS:A:[+=] Sense/anti-sense strand splice sites correspond to + bool print_nh_; // NH:i: # alignments +}; + +/** + * Print a reference name in a way that doesn't violate SAM's character + * constraints. \*|[!-()+-<>-~][!-~]* (i.e. [33, 63], [65, 126]) + */ +template +void SamConfig::printRefName( + BTString& o, + const std::string& name) const +{ + size_t namelen = name.length(); + for(size_t i = 0; i < namelen; i++) { + if(isspace(name[i])) { + return; + } + o.append(name[i]); + } +} + +/** + * Print a reference name given a reference index. + */ +template +void SamConfig::printRefNameFromIndex(BTString& o, size_t i, bool repeat) const { + if(repeat) { + printRefName(o, repnames_[i]); + } else { + printRefName(o, refnames_[i]); + } +} + +/** + * Print SAM header to given output buffer. + */ +template +void SamConfig::printHeader( + BTString& o, + const string& rgid, + const string& rgs, + bool printHd, + bool printSq, + bool printPg) const +{ + if(printHd) printHdLine(o, "1.0"); + if(printSq) printSqLines(o); + if(!rgid.empty()) { + o.append("@RG"); + o.append(rgid.c_str()); + o.append(rgs.c_str()); + o.append('\n'); + } + if(printPg) printPgLine(o); +} + +/** + * Print the @HD header line to the given string. + */ +template +void SamConfig::printHdLine(BTString& o, const char *samver) const { + o.append("@HD\tVN:"); + o.append(samver); + o.append("\tSO:unsorted\n"); +} + +/** + * Print the @SQ header lines to the given string. + */ +template +void SamConfig::printSqLines(BTString& o) const { + char buf[1024]; + for(size_t i = 0; i < refnames_.size(); i++) { + o.append("@SQ\tSN:"); + printRefName(o, refnames_[i]); + o.append("\tLN:"); + itoa10(reflens_[i], buf); + o.append(buf); + o.append('\n'); + } + if (!threeN) { + for(size_t i = 0; i < repnames_.size(); i++) { + o.append("@SQ\tSN:"); + printRefName(o, repnames_[i]); + o.append("\tLN:"); + itoa10(replens_[i], buf); + o.append(buf); + o.append('\n'); + } + } +} + +/** + * Print the @PG header line to the given string. + */ +template +void SamConfig::printPgLine(BTString& o) const { + o.append("@PG\tID:"); + o.append(pg_id_.c_str()); + o.append("\tPN:"); + o.append(pg_pn_.c_str()); + o.append("\tVN:"); + o.append(pg_vn_.c_str()); + o.append("\tCL:\""); + o.append(pg_cl_.c_str()); + o.append('"'); + o.append('\n'); +} + +#define WRITE_SEP() { \ +if(!first) o.append('\t'); \ +first = false; \ +} + +/** + * Print the optional flags to the given string. + */ +template +void SamConfig::printAlignedOptFlags( + BTString& o, // output buffer + bool first, // first opt flag printed is first overall? + const Read& rd, // the read + AlnRes& res, // individual alignment result + StackedAln& staln, // stacked alignment buffer + const AlnFlags& flags, // alignment flags + const AlnSetSumm& summ, // summary of alignments for this read + const SeedAlSumm& ssm, // seed alignment summary + const PerReadMetrics& prm, // per-read metrics + const Scoring& sc, // scoring scheme + const char *mapqInp, // inputs to MAPQ calculation + const ALTDB* altdb) +const +{ + char buf[1024]; + if(print_as_) { + // AS:i: Alignment score generated by aligner + itoa10(res.score().score(), buf); + WRITE_SEP(); + o.append("AS:i:"); + o.append(buf); + } + + // Do not output suboptimal alignment score, which conflicts with Cufflinks and StringTie + if(print_xs_) { + // XS:i: Suboptimal alignment score + // Use ZS:i: to avoid conflict with XS:A: + AlnScore sco = summ.secbestMate(rd.mate < 2); + if(sco.valid()) { + itoa10(sco.score(), buf); + WRITE_SEP(); + o.append("ZS:i:"); + o.append(buf); + } + } + if(print_xn_) { + // XN:i: Number of ambiguous bases in the referenece + itoa10(res.refNs(), buf); + WRITE_SEP(); + o.append("XN:i:"); + o.append(buf); + } + if(print_x0_) { + // X0:i: Number of best hits + } + if(print_x1_) { + // X1:i: Number of sub-optimal best hits + } + size_t num_mm = 0; + size_t num_go = 0; + size_t num_gx = 0; + for(size_t i = 0; i < res.ned().size(); i++) { + if(res.ned()[i].isMismatch()) { + if(res.ned()[i].snpID >= altdb->alts().size()) { + num_mm++; + } + } else if(res.ned()[i].isReadGap()) { + if(res.ned()[i].snpID >= altdb->alts().size()) { + num_go++; + num_gx++; + } + while(i < res.ned().size()-1 && + res.ned()[i+1].pos == res.ned()[i].pos && + res.ned()[i+1].isReadGap()) + { + i++; + if(res.ned()[i].snpID >= altdb->alts().size()) { + num_gx++; + } + } + } else if(res.ned()[i].isRefGap()) { + if(res.ned()[i].snpID >= altdb->alts().size()) { + num_go++; + num_gx++; + } + while(i < res.ned().size()-1 && + res.ned()[i+1].pos == res.ned()[i].pos+1 && + res.ned()[i+1].isRefGap()) + { + i++; + if(res.ned()[i].snpID >= altdb->alts().size()) { + num_gx++; + } + } + } + } + if(print_xm_) { + // XM:i: Number of mismatches in the alignment + itoa10(num_mm, buf); + WRITE_SEP(); + o.append("XM:i:"); + o.append(buf); + } + if(print_xo_) { + // XO:i: Number of gap opens + itoa10(num_go, buf); + WRITE_SEP(); + o.append("XO:i:"); + o.append(buf); + } + if(print_xg_) { + // XG:i: Number of gap extensions (incl. opens) + itoa10(num_gx, buf); + WRITE_SEP(); + o.append("XG:i:"); + o.append(buf); + } + if(print_nm_) { + // NM:i: Edit dist. to the ref, Ns count, clipping doesn't + size_t NM = 0; + for(size_t i = 0; i < res.ned().size(); i++) { + if(res.ned()[i].type != EDIT_TYPE_SPL) { + if(res.ned()[i].snpID >= altdb->alts().size()) { + NM++; + } + } + } + itoa10(NM, buf); + WRITE_SEP(); + o.append("NM:i:"); + o.append(buf); + } + if(print_md_) { + // MD:Z: String for mms. [0-9]+(([A-Z]|\^[A-Z]+)[0-9]+)*2 + WRITE_SEP(); + o.append("MD:Z:"); + staln.buildMdz(); + staln.writeMdz( + &o, // output buffer + NULL); // no char buffer + } + if(print_ys_ && summ.paired()) { + // YS:i: Alignment score of opposite mate + assert(res.oscore().valid()); + itoa10(res.oscore().score(), buf); + WRITE_SEP(); + o.append("YS:i:"); + o.append(buf); + } + if(print_yn_) { + // YN:i: Minimum valid score for this mate + TAlScore mn = sc.scoreMin.f(rd.length()); + itoa10(mn, buf); + WRITE_SEP(); + o.append("YN:i:"); + o.append(buf); + // Yn:i: Perfect score for this mate + TAlScore pe = sc.perfectScore(rd.length()); + itoa10(pe, buf); + WRITE_SEP(); + o.append("Yn:i:"); + o.append(buf); + } + if(print_xss_) { + // Xs:i: Best invalid alignment score of this mate + bool one = true; + if(flags.partOfPair() && !flags.readMate1()) { + one = false; + } + TAlScore bst = one ? prm.bestLtMinscMate1 : prm.bestLtMinscMate2; + if(bst > std::numeric_limits::min()) { + itoa10(bst, buf); + WRITE_SEP(); + o.append("Xs:i:"); + o.append(buf); + } + if(flags.partOfPair()) { + // Ys:i: Best invalid alignment score of opposite mate + bst = one ? prm.bestLtMinscMate2 : prm.bestLtMinscMate1; + if(bst > std::numeric_limits::min()) { + itoa10(bst, buf); + WRITE_SEP(); + o.append("Ys:i:"); + o.append(buf); + } + } + } + if(print_zs_) { + // ZS:i: Pseudo-random seed for read + itoa10(rd.seed, buf); + WRITE_SEP(); + o.append("ZS:i:"); + o.append(buf); + } + if(print_yt_) { + // YT:Z: String representing alignment type + WRITE_SEP(); + flags.printYT(o); + } + if(print_yp_ && flags.partOfPair() && flags.canMax()) { + // YP:i: Read was repetitive when aligned paired? + WRITE_SEP(); + flags.printYP(o); + } + if(print_ym_ && flags.canMax() && (flags.isMixedMode() || !flags.partOfPair())) { + // YM:i: Read was repetitive when aligned unpaired? + WRITE_SEP(); + flags.printYM(o); + } + if(print_yf_ && flags.filtered()) { + // YF:i: Read was filtered? + first = flags.printYF(o, first) && first; + } + if(print_yi_) { + // Print MAPQ calibration info + if(mapqInp[0] != '\0') { + // YI:i: Suboptimal alignment score + WRITE_SEP(); + o.append("YI:Z:"); + o.append(mapqInp); + } + } + if(flags.partOfPair() && print_zp_) { + // ZP:i: Score of best concordant paired-end alignment + WRITE_SEP(); + o.append("ZP:Z:"); + if(summ.bestPaired().valid()) { + itoa10(summ.bestPaired().score(), buf); + o.append(buf); + } else { + o.append("NA"); + } + // Zp:i: Second-best concordant paired-end alignment score + WRITE_SEP(); + o.append("Zp:Z:"); + if(summ.secbestPaired().valid()) { + itoa10(summ.secbestPaired().score(), buf); + o.append(buf); + } else { + o.append("NA"); + } + } + if(print_zu_) { + // ZU:i: Score of best unpaired alignment + AlnScore best = (rd.mate <= 1 ? summ.best1() : summ.best2()); + AlnScore secbest = (rd.mate <= 1 ? summ.secbest1() : summ.secbest2()); + WRITE_SEP(); + o.append("ZU:i:"); + if(best.valid()) { + itoa10(best.score(), buf); + o.append(buf); + } else { + o.append("NA"); + } + // Zu:i: Score of second-best unpaired alignment + WRITE_SEP(); + o.append("Zu:i:"); + if(secbest.valid()) { + itoa10(secbest.score(), buf); + o.append(buf); + } else { + o.append("NA"); + } + } + if(!rgs_.empty()) { + WRITE_SEP(); + o.append(rgs_.c_str()); + } + if(print_xt_) { + // XT:i: Timing + WRITE_SEP(); + struct timeval tv_end; + struct timezone tz_end; + gettimeofday(&tv_end, &tz_end); + size_t total_usecs = + (tv_end.tv_sec - prm.tv_beg.tv_sec) * 1000000 + + (tv_end.tv_usec - prm.tv_beg.tv_usec); + itoa10(total_usecs, buf); + o.append("XT:i:"); + o.append(buf); + } + if(print_xd_) { + // XD:i: Extend DPs + WRITE_SEP(); + itoa10(prm.nExDps, buf); + o.append("XD:i:"); + o.append(buf); + // Xd:i: Mate DPs + WRITE_SEP(); + itoa10(prm.nMateDps, buf); + o.append("Xd:i:"); + o.append(buf); + } + if(print_xu_) { + // XU:i: Extend ungapped tries + WRITE_SEP(); + itoa10(prm.nExUgs, buf); + o.append("XU:i:"); + o.append(buf); + // Xu:i: Mate ungapped tries + WRITE_SEP(); + itoa10(prm.nMateUgs, buf); + o.append("Xu:i:"); + o.append(buf); + } + if(print_ye_) { + // YE:i: Streak of failed DPs at end + WRITE_SEP(); + itoa10(prm.nDpFail, buf); + o.append("YE:i:"); + o.append(buf); + // Ye:i: Streak of failed ungaps at end + WRITE_SEP(); + itoa10(prm.nUgFail, buf); + o.append("Ye:i:"); + o.append(buf); + } + if(print_yl_) { + // YL:i: Longest streak of failed DPs + WRITE_SEP(); + itoa10(prm.nDpFailStreak, buf); + o.append("YL:i:"); + o.append(buf); + // Yl:i: Longest streak of failed ungaps + WRITE_SEP(); + itoa10(prm.nUgFailStreak, buf); + o.append("Yl:i:"); + o.append(buf); + } + if(print_yu_) { + // YU:i: Index of last succesful DP + WRITE_SEP(); + itoa10(prm.nDpLastSucc, buf); + o.append("YU:i:"); + o.append(buf); + // Yu:i: Index of last succesful DP + WRITE_SEP(); + itoa10(prm.nUgLastSucc, buf); + o.append("Yu:i:"); + o.append(buf); + } + if(print_xp_) { + // XP:Z: String describing seed hits + WRITE_SEP(); + o.append("XP:B:I,"); + itoa10(prm.nSeedElts, buf); + o.append(buf); + o.append(','); + itoa10(prm.nSeedEltsFw, buf); + o.append(buf); + o.append(','); + itoa10(prm.nSeedEltsRc, buf); + o.append(buf); + o.append(','); + itoa10(prm.seedMean, buf); + o.append(buf); + o.append(','); + itoa10(prm.seedMedian, buf); + o.append(buf); + } + if(print_yr_) { + // YR:i: Redundant seed hits + WRITE_SEP(); + itoa10(prm.nRedundants, buf); + o.append("YR:i:"); + o.append(buf); + } + if(print_zb_) { + // ZB:i: Ftab ops for seed alignment + WRITE_SEP(); + itoa10(prm.nFtabs, buf); + o.append("ZB:i:"); + o.append(buf); + } + if(print_zr_) { + // ZR:Z: Redundant path skips in seed alignment + WRITE_SEP(); + o.append("ZR:Z:"); + itoa10(prm.nRedSkip, buf); o.append(buf); + o.append(','); + itoa10(prm.nRedFail, buf); o.append(buf); + o.append(','); + itoa10(prm.nRedIns, buf); o.append(buf); + } + if(print_zf_) { + // ZF:i: FM Index ops for seed alignment + WRITE_SEP(); + itoa10(prm.nSdFmops, buf); + o.append("ZF:i:"); + o.append(buf); + // Zf:i: FM Index ops for offset resolution + WRITE_SEP(); + itoa10(prm.nExFmops, buf); + o.append("Zf:i:"); + o.append(buf); + } + if(print_zm_) { + // ZM:Z: Print FM index op string for best-first search + WRITE_SEP(); + o.append("ZM:Z:"); + prm.fmString.print(o, buf); + } + if(print_zi_) { + // ZI:i: Seed extend loop iterations + WRITE_SEP(); + itoa10(prm.nExIters, buf); + o.append("ZI:i:"); + o.append(buf); + } + if(print_xs_a_) { + if(rna_strandness_ == RNA_STRANDNESS_UNKNOWN) { + uint8_t whichsense = res.spliced_whichsense_transcript(); + if(whichsense != SPL_UNKNOWN) { + WRITE_SEP(); + o.append("XS:A:"); + if(whichsense == SPL_FW || whichsense == SPL_SEMI_FW) { + o.append('+'); + } else { + assert(whichsense == SPL_RC || whichsense == SPL_SEMI_RC); + o.append('-'); + } + } + } else { + WRITE_SEP(); + o.append("XS:A:"); + char strandness = '+'; + if(res.readMate1()) { + if(res.orient()) { + if(rna_strandness_ == RNA_STRANDNESS_R || rna_strandness_ == RNA_STRANDNESS_RF) { + strandness = '-'; + } + } else { + if(rna_strandness_ == RNA_STRANDNESS_F || rna_strandness_ == RNA_STRANDNESS_FR) { + strandness = '-'; + } + } + } else { + assert(res.readMate2()); + assert(rna_strandness_ == RNA_STRANDNESS_FR || rna_strandness_ == RNA_STRANDNESS_RF); + if(res.orient()) { + if(rna_strandness_ == RNA_STRANDNESS_FR) { + strandness = '-'; + } + } else { + if(rna_strandness_ == RNA_STRANDNESS_RF) { + strandness = '-'; + } + } + } + o.append(strandness); + } + } + if(print_nh_) { + if(flags.alignedPaired()) { + WRITE_SEP(); + itoa10(summ.numAlnsPaired(), buf); + o.append("NH:i:"); + o.append(buf); + } else if(flags.alignedUnpaired() || flags.alignedUnpairedMate()) { + WRITE_SEP(); + itoa10((flags.alignedUnpaired() || flags.readMate1()) ? + summ.numAlns1() : summ.numAlns2(), buf); + o.append("NH:i:"); + o.append(buf); + } + } + + bool snp_first = true; + index_t prev_snp_idx = INDEX_MAX; + size_t len_trimmed = rd.length() - res.trimmed5p(true) - res.trimmed3p(true); + if(!res.fw()) { + Edit::invertPoss(const_cast&>(res.ned()), len_trimmed, false); + } + for(size_t i = 0; i < res.ned().size(); i++) { + if(res.ned()[i].snpID >= altdb->alts().size()) + continue; + index_t snp_idx = res.ned()[i].snpID; + assert_lt(snp_idx, altdb->alts().size()); + const ALT& snp = altdb->alts()[snp_idx]; + const string& snpID = altdb->altnames()[snp_idx]; + if(snp_idx == prev_snp_idx) continue; + if(snp_first) { + WRITE_SEP(); + o.append("Zs:Z:"); + } + if(!snp_first) o.append(","); + uint64_t pos = res.ned()[i].pos; + size_t j = i; + while(j > 0) { + if(res.ned()[j-1].snpID < altdb->alts().size()) { + const ALT& snp2 = altdb->alts()[res.ned()[j-1].snpID]; + if(snp2.type == ALT_SNP_SGL) { + pos -= (res.ned()[j-1].pos + 1); + } else if(snp2.type == ALT_SNP_DEL) { + pos -= res.ned()[j-1].pos; + } else if(snp2.type == ALT_SNP_INS) { + pos -= (res.ned()[j-1].pos + snp.len); + } + break; + } + j--; + } + itoa10(pos, buf); + o.append(buf); + o.append("|"); + if(snp.type == ALT_SNP_SGL) { + o.append("S"); + } else if(snp.type == ALT_SNP_DEL) { + o.append("D"); + } else { + assert_eq(snp.type, ALT_SNP_INS); + o.append("I"); + } + o.append("|"); + o.append(snpID.c_str()); + + if(snp_first) snp_first = false; + prev_snp_idx = snp_idx; + } + if(!res.fw()) { + Edit::invertPoss(const_cast&>(res.ned()), len_trimmed, false); + } + + if(print_xr_) { + // Original read string + o.append("\n"); + printOptFieldNewlineEscapedZ(o, rd.readOrigBuf); + } +} + +template +void SamConfig::printAlignedOptFlags( + Alignment* newAlignment, // output buffer + bool first, // first opt flag printed is first overall? + const Read& rd, // the read + AlnRes& res, // individual alignment result + StackedAln& staln, // stacked alignment buffer + const AlnFlags& flags, // alignment flags + const AlnSetSumm& summ, // summary of alignments for this read + const SeedAlSumm& ssm, // seed alignment summary + const PerReadMetrics& prm, // per-read metrics + const Scoring& sc, // scoring scheme + const char *mapqInp, // inputs to MAPQ calculation + const ALTDB* altdb) +const +{ + BTString &o = newAlignment->unChangedTags; + char buf[1024]; + if(print_as_) { + // AS:i: Alignment score generated by aligner + //itoa10(res.score().score(), buf); + newAlignment->AS = res.score().score(); + } + + // Do not output suboptimal alignment score, which conflicts with Cufflinks and StringTie + if(print_xs_) { + // XS:i: Suboptimal alignment score + // Use ZS:i: to avoid conflict with XS:A: + AlnScore sco = summ.secbestMate(rd.mate < 2); + if(sco.valid()) { + itoa10(sco.score(), buf); + WRITE_SEP(); + o.append("ZS:i:"); + o.append(buf); + } + } + if(print_xn_) { + // XN:i: Number of ambiguous bases in the referenece + itoa10(res.refNs(), buf); + WRITE_SEP(); + o.append("XN:i:"); + o.append(buf); + } + if(print_x0_) { + // X0:i: Number of best hits + } + if(print_x1_) { + // X1:i: Number of sub-optimal best hits + } + size_t num_mm = 0; + size_t num_go = 0; + size_t num_gx = 0; + for(size_t i = 0; i < res.ned().size(); i++) { + if(res.ned()[i].isMismatch()) { + if(res.ned()[i].snpID >= altdb->alts().size()) { + num_mm++; + } + } else if(res.ned()[i].isReadGap()) { + if(res.ned()[i].snpID >= altdb->alts().size()) { + num_go++; + num_gx++; + } + while(i < res.ned().size()-1 && + res.ned()[i+1].pos == res.ned()[i].pos && + res.ned()[i+1].isReadGap()) + { + i++; + if(res.ned()[i].snpID >= altdb->alts().size()) { + num_gx++; + } + } + } else if(res.ned()[i].isRefGap()) { + if(res.ned()[i].snpID >= altdb->alts().size()) { + num_go++; + num_gx++; + } + while(i < res.ned().size()-1 && + res.ned()[i+1].pos == res.ned()[i].pos+1 && + res.ned()[i+1].isRefGap()) + { + i++; + if(res.ned()[i].snpID >= altdb->alts().size()) { + num_gx++; + } + } + } + } + if(print_xm_) { + // XM:i: Number of mismatches in the alignment + //itoa10(num_mm, buf); + /*WRITE_SEP(); + o.append("XM:i:"); + o.append(buf);*/ + newAlignment->XM = num_mm; + } + if(print_xo_) { + // XO:i: Number of gap opens + itoa10(num_go, buf); + WRITE_SEP(); + o.append("XO:i:"); + o.append(buf); + } + if(print_xg_) { + // XG:i: Number of gap extensions (incl. opens) + itoa10(num_gx, buf); + WRITE_SEP(); + o.append("XG:i:"); + o.append(buf); + } + if(print_nm_) { + // NM:i: Edit dist. to the ref, Ns count, clipping doesn't + size_t NM = 0; + for(size_t i = 0; i < res.ned().size(); i++) { + if(res.ned()[i].type != EDIT_TYPE_SPL) { + if(res.ned()[i].snpID >= altdb->alts().size()) { + NM++; + } + } + } + newAlignment->NM = NM; + } + if(print_md_) { + // MD:Z: String for mms. [0-9]+(([A-Z]|\^[A-Z]+)[0-9]+)*2 + /*WRITE_SEP(); + o.append("MD:Z:");*/ + staln.buildMdz(); + staln.writeMdz( + &newAlignment->MD, // output buffer + NULL); // no char buffer + } + if(print_ys_ && summ.paired()) { + // YS:i: Alignment score of opposite mate + assert(res.oscore().valid()); + newAlignment->YS = res.oscore().score(); + } + if(print_yn_) { + // YN:i: Minimum valid score for this mate + TAlScore mn = sc.scoreMin.f(rd.length()); + itoa10(mn, buf); + WRITE_SEP(); + o.append("YN:i:"); + o.append(buf); + // Yn:i: Perfect score for this mate + TAlScore pe = sc.perfectScore(rd.length()); + itoa10(pe, buf); + WRITE_SEP(); + o.append("Yn:i:"); + o.append(buf); + } + if(print_xss_) { + // Xs:i: Best invalid alignment score of this mate + bool one = true; + if(flags.partOfPair() && !flags.readMate1()) { + one = false; + } + TAlScore bst = one ? prm.bestLtMinscMate1 : prm.bestLtMinscMate2; + if(bst > std::numeric_limits::min()) { + itoa10(bst, buf); + WRITE_SEP(); + o.append("Xs:i:"); + o.append(buf); + } + if(flags.partOfPair()) { + // Ys:i: Best invalid alignment score of opposite mate + bst = one ? prm.bestLtMinscMate2 : prm.bestLtMinscMate1; + if(bst > std::numeric_limits::min()) { + itoa10(bst, buf); + WRITE_SEP(); + o.append("Ys:i:"); + o.append(buf); + } + } + } + if(print_zs_) { + // ZS:i: Pseudo-random seed for read + itoa10(rd.seed, buf); + WRITE_SEP(); + o.append("ZS:i:"); + o.append(buf); + } + if(print_yt_ && !threeN) { + // YT:Z: String representing alignment type + WRITE_SEP(); + flags.printYT(o); + } + if(print_yp_ && flags.partOfPair() && flags.canMax()) { + // YP:i: Read was repetitive when aligned paired? + WRITE_SEP(); + flags.printYP(o); + } + if(print_ym_ && flags.canMax() && (flags.isMixedMode() || !flags.partOfPair())) { + // YM:i: Read was repetitive when aligned unpaired? + WRITE_SEP(); + flags.printYM(o); + } + if(print_yf_ && flags.filtered()) { + // YF:i: Read was filtered? + first = flags.printYF(o, first) && first; + } + if(print_yi_) { + // Print MAPQ calibration info + if(mapqInp[0] != '\0') { + // YI:i: Suboptimal alignment score + WRITE_SEP(); + o.append("YI:Z:"); + o.append(mapqInp); + } + } + if(flags.partOfPair() && print_zp_) { + // ZP:i: Score of best concordant paired-end alignment + WRITE_SEP(); + o.append("ZP:Z:"); + if(summ.bestPaired().valid()) { + itoa10(summ.bestPaired().score(), buf); + o.append(buf); + } else { + o.append("NA"); + } + // Zp:i: Second-best concordant paired-end alignment score + WRITE_SEP(); + o.append("Zp:Z:"); + if(summ.secbestPaired().valid()) { + itoa10(summ.secbestPaired().score(), buf); + o.append(buf); + } else { + o.append("NA"); + } + } + if(print_zu_) { + // ZU:i: Score of best unpaired alignment + AlnScore best = (rd.mate <= 1 ? summ.best1() : summ.best2()); + AlnScore secbest = (rd.mate <= 1 ? summ.secbest1() : summ.secbest2()); + WRITE_SEP(); + o.append("ZU:i:"); + if(best.valid()) { + itoa10(best.score(), buf); + o.append(buf); + } else { + o.append("NA"); + } + // Zu:i: Score of second-best unpaired alignment + WRITE_SEP(); + o.append("Zu:i:"); + if(secbest.valid()) { + itoa10(secbest.score(), buf); + o.append(buf); + } else { + o.append("NA"); + } + } + if(!rgs_.empty()) { + WRITE_SEP(); + o.append(rgs_.c_str()); + } + if(print_xt_) { + // XT:i: Timing + WRITE_SEP(); + struct timeval tv_end; + struct timezone tz_end; + gettimeofday(&tv_end, &tz_end); + size_t total_usecs = + (tv_end.tv_sec - prm.tv_beg.tv_sec) * 1000000 + + (tv_end.tv_usec - prm.tv_beg.tv_usec); + itoa10(total_usecs, buf); + o.append("XT:i:"); + o.append(buf); + } + if(print_xd_) { + // XD:i: Extend DPs + WRITE_SEP(); + itoa10(prm.nExDps, buf); + o.append("XD:i:"); + o.append(buf); + // Xd:i: Mate DPs + WRITE_SEP(); + itoa10(prm.nMateDps, buf); + o.append("Xd:i:"); + o.append(buf); + } + if(print_xu_) { + // XU:i: Extend ungapped tries + WRITE_SEP(); + itoa10(prm.nExUgs, buf); + o.append("XU:i:"); + o.append(buf); + // Xu:i: Mate ungapped tries + WRITE_SEP(); + itoa10(prm.nMateUgs, buf); + o.append("Xu:i:"); + o.append(buf); + } + if(print_ye_) { + // YE:i: Streak of failed DPs at end + WRITE_SEP(); + itoa10(prm.nDpFail, buf); + o.append("YE:i:"); + o.append(buf); + // Ye:i: Streak of failed ungaps at end + WRITE_SEP(); + itoa10(prm.nUgFail, buf); + o.append("Ye:i:"); + o.append(buf); + } + if(print_yl_) { + // YL:i: Longest streak of failed DPs + WRITE_SEP(); + itoa10(prm.nDpFailStreak, buf); + o.append("YL:i:"); + o.append(buf); + // Yl:i: Longest streak of failed ungaps + WRITE_SEP(); + itoa10(prm.nUgFailStreak, buf); + o.append("Yl:i:"); + o.append(buf); + } + if(print_yu_) { + // YU:i: Index of last succesful DP + WRITE_SEP(); + itoa10(prm.nDpLastSucc, buf); + o.append("YU:i:"); + o.append(buf); + // Yu:i: Index of last succesful DP + WRITE_SEP(); + itoa10(prm.nUgLastSucc, buf); + o.append("Yu:i:"); + o.append(buf); + } + if(print_xp_) { + // XP:Z: String describing seed hits + WRITE_SEP(); + o.append("XP:B:I,"); + itoa10(prm.nSeedElts, buf); + o.append(buf); + o.append(','); + itoa10(prm.nSeedEltsFw, buf); + o.append(buf); + o.append(','); + itoa10(prm.nSeedEltsRc, buf); + o.append(buf); + o.append(','); + itoa10(prm.seedMean, buf); + o.append(buf); + o.append(','); + itoa10(prm.seedMedian, buf); + o.append(buf); + } + if(print_yr_) { + // YR:i: Redundant seed hits + WRITE_SEP(); + itoa10(prm.nRedundants, buf); + o.append("YR:i:"); + o.append(buf); + } + if(print_zb_) { + // ZB:i: Ftab ops for seed alignment + WRITE_SEP(); + itoa10(prm.nFtabs, buf); + o.append("ZB:i:"); + o.append(buf); + } + if(print_zr_) { + // ZR:Z: Redundant path skips in seed alignment + WRITE_SEP(); + o.append("ZR:Z:"); + itoa10(prm.nRedSkip, buf); o.append(buf); + o.append(','); + itoa10(prm.nRedFail, buf); o.append(buf); + o.append(','); + itoa10(prm.nRedIns, buf); o.append(buf); + } + if(print_zf_) { + // ZF:i: FM Index ops for seed alignment + WRITE_SEP(); + itoa10(prm.nSdFmops, buf); + o.append("ZF:i:"); + o.append(buf); + // Zf:i: FM Index ops for offset resolution + WRITE_SEP(); + itoa10(prm.nExFmops, buf); + o.append("Zf:i:"); + o.append(buf); + } + if(print_zm_) { + // ZM:Z: Print FM index op string for best-first search + WRITE_SEP(); + o.append("ZM:Z:"); + prm.fmString.print(o, buf); + } + if(print_zi_) { + // ZI:i: Seed extend loop iterations + WRITE_SEP(); + itoa10(prm.nExIters, buf); + o.append("ZI:i:"); + o.append(buf); + } + if(print_xs_a_) { + if(rna_strandness_ == RNA_STRANDNESS_UNKNOWN) { + uint8_t whichsense = res.spliced_whichsense_transcript(); + if(whichsense != SPL_UNKNOWN) { + WRITE_SEP(); + o.append("XS:A:"); + if(whichsense == SPL_FW || whichsense == SPL_SEMI_FW) { + o.append('+'); + } else { + assert(whichsense == SPL_RC || whichsense == SPL_SEMI_RC); + o.append('-'); + } + } + } else { + WRITE_SEP(); + o.append("XS:A:"); + char strandness = '+'; + if(res.readMate1()) { + if(res.orient()) { + if(rna_strandness_ == RNA_STRANDNESS_R || rna_strandness_ == RNA_STRANDNESS_RF) { + strandness = '-'; + } + } else { + if(rna_strandness_ == RNA_STRANDNESS_F || rna_strandness_ == RNA_STRANDNESS_FR) { + strandness = '-'; + } + } + } else { + assert(res.readMate2()); + assert(rna_strandness_ == RNA_STRANDNESS_FR || rna_strandness_ == RNA_STRANDNESS_RF); + if(res.orient()) { + if(rna_strandness_ == RNA_STRANDNESS_FR) { + strandness = '-'; + } + } else { + if(rna_strandness_ == RNA_STRANDNESS_RF) { + strandness = '-'; + } + } + } + o.append(strandness); + } + } + if(print_nh_) { + if(flags.alignedPaired()) { + /*WRITE_SEP(); + itoa10(summ.numAlnsPaired(), buf); + o.append("NH:i:"); + o.append(buf);*/ + newAlignment->NH = summ.numAlnsPaired(); + } else if(flags.alignedUnpaired() || flags.alignedUnpairedMate()) { + /*WRITE_SEP(); + itoa10((flags.alignedUnpaired() || flags.readMate1()) ? + summ.numAlns1() : summ.numAlns2(), buf); + o.append("NH:i:"); + o.append(buf);*/ + newAlignment->NH = (flags.alignedUnpaired() || flags.readMate1()) ? summ.numAlns1() : summ.numAlns2(); + } + } + + bool snp_first = true; + index_t prev_snp_idx = INDEX_MAX; + size_t len_trimmed = rd.length() - res.trimmed5p(true) - res.trimmed3p(true); + if(!res.fw()) { + Edit::invertPoss(const_cast&>(res.ned()), len_trimmed, false); + } + for(size_t i = 0; i < res.ned().size(); i++) { + if(res.ned()[i].snpID >= altdb->alts().size()) + continue; + index_t snp_idx = res.ned()[i].snpID; + assert_lt(snp_idx, altdb->alts().size()); + const ALT& snp = altdb->alts()[snp_idx]; + const string& snpID = altdb->altnames()[snp_idx]; + if(snp_idx == prev_snp_idx) continue; + if(snp_first) { + WRITE_SEP(); + o.append("Zs:Z:"); + } + if(!snp_first) o.append(","); + uint64_t pos = res.ned()[i].pos; + size_t j = i; + while(j > 0) { + if(res.ned()[j-1].snpID < altdb->alts().size()) { + const ALT& snp2 = altdb->alts()[res.ned()[j-1].snpID]; + if(snp2.type == ALT_SNP_SGL) { + pos -= (res.ned()[j-1].pos + 1); + } else if(snp2.type == ALT_SNP_DEL) { + pos -= res.ned()[j-1].pos; + } else if(snp2.type == ALT_SNP_INS) { + pos -= (res.ned()[j-1].pos + snp.len); + } + break; + } + j--; + } + itoa10(pos, buf); + o.append(buf); + o.append("|"); + if(snp.type == ALT_SNP_SGL) { + o.append("S"); + } else if(snp.type == ALT_SNP_DEL) { + o.append("D"); + } else { + assert_eq(snp.type, ALT_SNP_INS); + o.append("I"); + } + o.append("|"); + o.append(snpID.c_str()); + + if(snp_first) snp_first = false; + prev_snp_idx = snp_idx; + } + if(!res.fw()) { + Edit::invertPoss(const_cast&>(res.ned()), len_trimmed, false); + } + + if(print_xr_) { + // Original read string + newAlignment->passThroughLine.append("\n"); + printOptFieldNewlineEscapedZ(newAlignment->passThroughLine, rd.readOrigBuf); + } +} + +/** + * Print the optional flags to the given string. + */ +template +void SamConfig::printEmptyOptFlags( + BTString& o, // output buffer + bool first, // first opt flag printed is first overall? + const Read& rd, // read + const AlnFlags& flags, // alignment flags + const AlnSetSumm& summ, // summary of alignments for this read + const SeedAlSumm& ssm, // seed alignment summary + const PerReadMetrics& prm, // per-read metrics + const Scoring& sc) // scoring scheme +const +{ + char buf[1024]; + if(print_yn_) { + // YN:i: Minimum valid score for this mate + TAlScore mn = sc.scoreMin.f(rd.length()); + itoa10(mn, buf); + WRITE_SEP(); + o.append("YN:i:"); + o.append(buf); + // Yn:i: Perfect score for this mate + TAlScore pe = sc.perfectScore(rd.length()); + itoa10(pe, buf); + WRITE_SEP(); + o.append("Yn:i:"); + o.append(buf); + } + if(print_zs_) { + // ZS:i: Pseudo-random seed for read + itoa10(rd.seed, buf); + WRITE_SEP(); + o.append("ZS:i:"); + o.append(buf); + } + if(print_yt_&& !threeN) { + // YT:Z: String representing alignment type + WRITE_SEP(); + flags.printYT(o); + } + if(print_yp_ && flags.partOfPair() && flags.canMax()) { + // YP:i: Read was repetitive when aligned paired? + WRITE_SEP(); + flags.printYP(o); + } + if(print_ym_ && flags.canMax() && (flags.isMixedMode() || !flags.partOfPair())) { + // YM:i: Read was repetitive when aligned unpaired? + WRITE_SEP(); + flags.printYM(o); + } + if(print_yf_ && flags.filtered()) { + // YM:i: Read was repetitive when aligned unpaired? + first = flags.printYF(o, first) && first; + } + if(!rgs_.empty()) { + WRITE_SEP(); + o.append(rgs_.c_str()); + } + if(print_xt_) { + // XT:i: Timing + WRITE_SEP(); + struct timeval tv_end; + struct timezone tz_end; + gettimeofday(&tv_end, &tz_end); + size_t total_usecs = + (tv_end.tv_sec - prm.tv_beg.tv_sec) * 1000000 + + (tv_end.tv_usec - prm.tv_beg.tv_usec); + itoa10(total_usecs, buf); + o.append("XT:i:"); + o.append(buf); + } + if(print_xd_) { + // XD:i: Extend DPs + WRITE_SEP(); + itoa10(prm.nExDps, buf); + o.append("XD:i:"); + o.append(buf); + // Xd:i: Mate DPs + WRITE_SEP(); + itoa10(prm.nMateDps, buf); + o.append("Xd:i:"); + o.append(buf); + } + if(print_xu_) { + // XU:i: Extend ungapped tries + WRITE_SEP(); + itoa10(prm.nExUgs, buf); + o.append("XU:i:"); + o.append(buf); + // Xu:i: Mate ungapped tries + WRITE_SEP(); + itoa10(prm.nMateUgs, buf); + o.append("Xu:i:"); + o.append(buf); + } + if(print_ye_) { + // YE:i: Streak of failed DPs at end + WRITE_SEP(); + itoa10(prm.nDpFail, buf); + o.append("YE:i:"); + o.append(buf); + // Ye:i: Streak of failed ungaps at end + WRITE_SEP(); + itoa10(prm.nUgFail, buf); + o.append("Ye:i:"); + o.append(buf); + } + if(print_yl_) { + // YL:i: Longest streak of failed DPs + WRITE_SEP(); + itoa10(prm.nDpFailStreak, buf); + o.append("YL:i:"); + o.append(buf); + // Yl:i: Longest streak of failed ungaps + WRITE_SEP(); + itoa10(prm.nUgFailStreak, buf); + o.append("Yl:i:"); + o.append(buf); + } + if(print_yu_) { + // YU:i: Index of last succesful DP + WRITE_SEP(); + itoa10(prm.nDpLastSucc, buf); + o.append("YU:i:"); + o.append(buf); + // Yu:i: Index of last succesful DP + WRITE_SEP(); + itoa10(prm.nUgLastSucc, buf); + o.append("Yu:i:"); + o.append(buf); + } + if(print_xp_) { + // XP:Z: String describing seed hits + WRITE_SEP(); + o.append("XP:B:I,"); + itoa10(prm.nSeedElts, buf); + o.append(buf); + o.append(','); + itoa10(prm.nSeedEltsFw, buf); + o.append(buf); + o.append(','); + itoa10(prm.nSeedEltsRc, buf); + o.append(buf); + o.append(','); + itoa10(prm.seedMean, buf); + o.append(buf); + o.append(','); + itoa10(prm.seedMedian, buf); + o.append(buf); + } + if(print_yr_) { + // YR:i: Redundant seed hits + WRITE_SEP(); + itoa10(prm.nRedundants, buf); + o.append("YR:i:"); + o.append(buf); + } + if(print_zb_) { + // ZB:i: Ftab ops for seed alignment + WRITE_SEP(); + itoa10(prm.nFtabs, buf); + o.append("ZB:i:"); + o.append(buf); + } + if(print_zr_) { + // ZR:Z: Redundant path skips in seed alignment + WRITE_SEP(); + o.append("ZR:Z:"); + itoa10(prm.nRedSkip, buf); o.append(buf); + o.append(','); + itoa10(prm.nRedFail, buf); o.append(buf); + o.append(','); + itoa10(prm.nRedIns, buf); o.append(buf); + } + if(print_zf_) { + // ZF:i: FM Index ops for seed alignment + WRITE_SEP(); + itoa10(prm.nSdFmops, buf); + o.append("ZF:i:"); + o.append(buf); + // Zf:i: FM Index ops for offset resolution + WRITE_SEP(); + itoa10(prm.nExFmops, buf); + o.append("Zf:i:"); + o.append(buf); + } + if(print_zm_) { + // ZM:Z: Print FM index op string for best-first search + WRITE_SEP(); + o.append("ZM:Z:"); + prm.fmString.print(o, buf); + } + if(print_zi_) { + // ZI:i: Seed extend loop iterations + WRITE_SEP(); + itoa10(prm.nExIters, buf); + o.append("ZI:i:"); + o.append(buf); + } + if(print_xr_) { + // Original read string + o.append("\n"); + printOptFieldNewlineEscapedZ(o, rd.readOrigBuf); + } +} + +/** + * Print the optional flags to the given string. This function is for HISAT-3N. + */ + +template +void SamConfig::printEmptyOptFlags( + Alignment* newAlignment, // output buffer + bool first, // first opt flag printed is first overall? + const Read& rd, // read + const AlnFlags& flags, // alignment flags + const AlnSetSumm& summ, // summary of alignments for this read + const SeedAlSumm& ssm, // seed alignment summary + const PerReadMetrics& prm, // per-read metrics + const Scoring& sc) // scoring scheme +const +{ + char buf[1024]; + BTString &o = newAlignment->unChangedTags; + if(print_yn_) { + // YN:i: Minimum valid score for this mate + TAlScore mn = sc.scoreMin.f(rd.length()); + itoa10(mn, buf); + WRITE_SEP(); + o.append("YN:i:"); + o.append(buf); + // Yn:i: Perfect score for this mate + TAlScore pe = sc.perfectScore(rd.length()); + itoa10(pe, buf); + WRITE_SEP(); + o.append("Yn:i:"); + o.append(buf); + } + if(print_zs_) { + // ZS:i: Pseudo-random seed for read + itoa10(rd.seed, buf); + WRITE_SEP(); + o.append("ZS:i:"); + o.append(buf); + } + if(print_yt_&& !threeN) { + // YT:Z: String representing alignment type + WRITE_SEP(); + flags.printYT(o); + } + if(print_yp_ && flags.partOfPair() && flags.canMax()) { + // YP:i: Read was repetitive when aligned paired? + WRITE_SEP(); + flags.printYP(o); + } + if(print_ym_ && flags.canMax() && (flags.isMixedMode() || !flags.partOfPair())) { + // YM:i: Read was repetitive when aligned unpaired? + WRITE_SEP(); + flags.printYM(o); + } + if(print_yf_ && flags.filtered()) { + // YM:i: Read was repetitive when aligned unpaired? + first = flags.printYF(o, first) && first; + } + if(!rgs_.empty()) { + WRITE_SEP(); + o.append(rgs_.c_str()); + } + if(print_xt_) { + // XT:i: Timing + WRITE_SEP(); + struct timeval tv_end; + struct timezone tz_end; + gettimeofday(&tv_end, &tz_end); + size_t total_usecs = + (tv_end.tv_sec - prm.tv_beg.tv_sec) * 1000000 + + (tv_end.tv_usec - prm.tv_beg.tv_usec); + itoa10(total_usecs, buf); + o.append("XT:i:"); + o.append(buf); + } + if(print_xd_) { + // XD:i: Extend DPs + WRITE_SEP(); + itoa10(prm.nExDps, buf); + o.append("XD:i:"); + o.append(buf); + // Xd:i: Mate DPs + WRITE_SEP(); + itoa10(prm.nMateDps, buf); + o.append("Xd:i:"); + o.append(buf); + } + if(print_xu_) { + // XU:i: Extend ungapped tries + WRITE_SEP(); + itoa10(prm.nExUgs, buf); + o.append("XU:i:"); + o.append(buf); + // Xu:i: Mate ungapped tries + WRITE_SEP(); + itoa10(prm.nMateUgs, buf); + o.append("Xu:i:"); + o.append(buf); + } + if(print_ye_) { + // YE:i: Streak of failed DPs at end + WRITE_SEP(); + itoa10(prm.nDpFail, buf); + o.append("YE:i:"); + o.append(buf); + // Ye:i: Streak of failed ungaps at end + WRITE_SEP(); + itoa10(prm.nUgFail, buf); + o.append("Ye:i:"); + o.append(buf); + } + if(print_yl_) { + // YL:i: Longest streak of failed DPs + WRITE_SEP(); + itoa10(prm.nDpFailStreak, buf); + o.append("YL:i:"); + o.append(buf); + // Yl:i: Longest streak of failed ungaps + WRITE_SEP(); + itoa10(prm.nUgFailStreak, buf); + o.append("Yl:i:"); + o.append(buf); + } + if(print_yu_) { + // YU:i: Index of last succesful DP + WRITE_SEP(); + itoa10(prm.nDpLastSucc, buf); + o.append("YU:i:"); + o.append(buf); + // Yu:i: Index of last succesful DP + WRITE_SEP(); + itoa10(prm.nUgLastSucc, buf); + o.append("Yu:i:"); + o.append(buf); + } + if(print_xp_) { + // XP:Z: String describing seed hits + WRITE_SEP(); + o.append("XP:B:I,"); + itoa10(prm.nSeedElts, buf); + o.append(buf); + o.append(','); + itoa10(prm.nSeedEltsFw, buf); + o.append(buf); + o.append(','); + itoa10(prm.nSeedEltsRc, buf); + o.append(buf); + o.append(','); + itoa10(prm.seedMean, buf); + o.append(buf); + o.append(','); + itoa10(prm.seedMedian, buf); + o.append(buf); + } + if(print_yr_) { + // YR:i: Redundant seed hits + WRITE_SEP(); + itoa10(prm.nRedundants, buf); + o.append("YR:i:"); + o.append(buf); + } + if(print_zb_) { + // ZB:i: Ftab ops for seed alignment + WRITE_SEP(); + itoa10(prm.nFtabs, buf); + o.append("ZB:i:"); + o.append(buf); + } + if(print_zr_) { + // ZR:Z: Redundant path skips in seed alignment + WRITE_SEP(); + o.append("ZR:Z:"); + itoa10(prm.nRedSkip, buf); o.append(buf); + o.append(','); + itoa10(prm.nRedFail, buf); o.append(buf); + o.append(','); + itoa10(prm.nRedIns, buf); o.append(buf); + } + if(print_zf_) { + // ZF:i: FM Index ops for seed alignment + WRITE_SEP(); + itoa10(prm.nSdFmops, buf); + o.append("ZF:i:"); + o.append(buf); + // Zf:i: FM Index ops for offset resolution + WRITE_SEP(); + itoa10(prm.nExFmops, buf); + o.append("Zf:i:"); + o.append(buf); + } + if(print_zm_) { + // ZM:Z: Print FM index op string for best-first search + WRITE_SEP(); + o.append("ZM:Z:"); + prm.fmString.print(o, buf); + } + if(print_zi_) { + // ZI:i: Seed extend loop iterations + WRITE_SEP(); + itoa10(prm.nExIters, buf); + o.append("ZI:i:"); + o.append(buf); + } + if(print_xr_) { + // Original read string + newAlignment->passThroughLine.append("\n"); + printOptFieldNewlineEscapedZ(newAlignment->passThroughLine, rd.readOrigBuf); + } +} + +#endif /* SAM_H_ */ diff --git a/scoring.cpp b/scoring.cpp new file mode 100644 index 0000000..1348821 --- /dev/null +++ b/scoring.cpp @@ -0,0 +1,286 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include +#include "scoring.h" + +using namespace std; + +/** + * Return true iff a read of length 'rdlen' passes the score filter, i.e., + * has enough characters to rise above the minimum score threshold. + */ +bool Scoring::scoreFilter( + int64_t minsc, + size_t rdlen) const +{ + int64_t sc = (int64_t)(rdlen * match(30)); + return sc >= minsc; +} + +/** + * Given the score floor for valid alignments and the length of the read, + * calculate the maximum possible number of read gaps that could occur in a + * valid alignment. + */ +int Scoring::maxReadGaps( + int64_t minsc, + size_t rdlen) const +{ + // Score if all characters match. TODO: remove assumption that match bonus + // is independent of quality value. + int64_t sc = (int64_t)(rdlen * match(30)); + assert_geq(sc, minsc); + // Now convert matches to read gaps until sc calls below minsc + bool first = true; + int num = 0; + while(sc >= minsc) { + if(first) { + first = false; + // Subtract both penalties + sc -= readGapOpen(); + } else { + // Subtract just the extension penalty + sc -= readGapExtend(); + } + num++; + } + assert_gt(num, 0); + return num-1; +} + +/** + * Given the score floor for valid alignments and the length of the read, + * calculate the maximum possible number of reference gaps that could occur + * in a valid alignment. + */ +int Scoring::maxRefGaps( + int64_t minsc, + size_t rdlen) const +{ + // Score if all characters match. TODO: remove assumption that match bonus + // is independent of quality value. + int64_t sc = (int64_t)(rdlen * match(30)); + assert_geq(sc, minsc); + // Now convert matches to read gaps until sc calls below minsc + bool first = true; + int num = 0; + while(sc >= minsc) { + sc -= match(30); + if(first) { + first = false; + // Subtract both penalties + sc -= refGapOpen(); + } else { + // Subtract just the extension penalty + sc -= refGapExtend(); + } + num++; + } + assert_gt(num, 0); + return num-1; +} + +/** + * Given a read sequence, return true iff the read passes the N filter. + * The N filter rejects reads with more than the number of Ns. + */ +bool Scoring::nFilter(const BTDnaString& rd, size_t& ns) const { + size_t rdlen = rd.length(); + size_t maxns = nCeil.f((double)rdlen); + assert_geq(rd.length(), 0); + for(size_t i = 0; i < rdlen; i++) { + if(rd[i] == 4) { + ns++; + if(ns > maxns) { + return false; // doesn't pass + } + } + } + return true; // passes +} + +/** + * Given a read sequence, return true iff the read passes the N filter. + * The N filter rejects reads with more than the number of Ns. + * + * For paired-end reads, there is a question of how to apply the filter. + * The filter could be applied to both mates separately, which might then + * prevent paired-end alignment. Or the filter could be applied to the + * reads as though they're concatenated together. The latter approach has + * pros and cons. The pro is that we can use paired-end information to + * recover alignments for mates that would not have passed the N filter on + * their own. The con is that we might not want to do that, since the + * non-N portion of the bad mate might contain particularly unreliable + * information. + */ +void Scoring::nFilterPair( + const BTDnaString* rd1, // mate 1 + const BTDnaString* rd2, // mate 2 + size_t& ns1, // # Ns in mate 1 + size_t& ns2, // # Ns in mate 2 + bool& filt1, // true -> mate 1 rejected by filter + bool& filt2) // true -> mate 2 rejected by filter + const +{ + // Both fail to pass by default + filt1 = filt2 = false; + if(rd1 != NULL && rd2 != NULL && ncatpair) { + size_t rdlen1 = rd1->length(); + size_t rdlen2 = rd2->length(); + size_t maxns = nCeil.f((double)(rdlen1 + rdlen2)); + for(size_t i = 0; i < rdlen1; i++) { + if((*rd1)[i] == 4) ns1++; + if(ns1 > maxns) { + // doesn't pass + return; + } + } + for(size_t i = 0; i < rdlen2; i++) { + if((*rd2)[i] == 4) ns2++; + if(ns2 > maxns) { + // doesn't pass + return; + } + } + // Both pass + filt1 = filt2 = true; + } else { + if(rd1 != NULL) filt1 = nFilter(*rd1, ns1); + if(rd2 != NULL) filt2 = nFilter(*rd2, ns2); + } +} + +#ifdef SCORING_MAIN + +int main() { + { + cout << "Case 1: Simple 1 ... "; + Scoring sc = Scoring::base1(); + assert_eq(COST_MODEL_CONSTANT, sc.matchType); + + assert_eq(0, sc.maxRefGaps(0, 10)); // 10 - 1 - 15 = -6 + assert_eq(0, sc.maxRefGaps(0, 11)); // 11 - 1 - 15 = -5 + assert_eq(0, sc.maxRefGaps(0, 12)); // 12 - 1 - 15 = -4 + assert_eq(0, sc.maxRefGaps(0, 13)); // 13 - 1 - 15 = -3 + assert_eq(0, sc.maxRefGaps(0, 14)); // 14 - 1 - 15 = -2 + assert_eq(0, sc.maxRefGaps(0, 15)); // 15 - 1 - 15 = -1 + assert_eq(1, sc.maxRefGaps(0, 16)); // 16 - 1 - 15 = 0 + assert_eq(1, sc.maxRefGaps(0, 17)); // 17 - 2 - 19 = -4 + assert_eq(1, sc.maxRefGaps(0, 18)); // 18 - 2 - 19 = -3 + assert_eq(1, sc.maxRefGaps(0, 19)); // 19 - 2 - 19 = -2 + assert_eq(1, sc.maxRefGaps(0, 20)); // 20 - 2 - 19 = -1 + assert_eq(2, sc.maxRefGaps(0, 21)); // 21 - 2 - 19 = 0 + + assert_eq(0, sc.maxReadGaps(0, 10)); // 10 - 0 - 15 = -5 + assert_eq(0, sc.maxReadGaps(0, 11)); // 11 - 0 - 15 = -4 + assert_eq(0, sc.maxReadGaps(0, 12)); // 12 - 0 - 15 = -3 + assert_eq(0, sc.maxReadGaps(0, 13)); // 13 - 0 - 15 = -2 + assert_eq(0, sc.maxReadGaps(0, 14)); // 14 - 0 - 15 = -1 + assert_eq(1, sc.maxReadGaps(0, 15)); // 15 - 0 - 15 = 0 + assert_eq(1, sc.maxReadGaps(0, 16)); // 16 - 0 - 19 = -3 + assert_eq(1, sc.maxReadGaps(0, 17)); // 17 - 0 - 19 = -2 + assert_eq(1, sc.maxReadGaps(0, 18)); // 18 - 0 - 19 = -1 + assert_eq(2, sc.maxReadGaps(0, 19)); // 19 - 0 - 19 = 0 + assert_eq(2, sc.maxReadGaps(0, 20)); // 20 - 0 - 23 = -3 + assert_eq(2, sc.maxReadGaps(0, 21)); // 21 - 0 - 23 = -2 + + // N ceiling: const=2, linear=0.1 + assert_eq(1, sc.nCeil(1)); + assert_eq(2, sc.nCeil(3)); + assert_eq(2, sc.nCeil(5)); + assert_eq(2, sc.nCeil(7)); + assert_eq(2, sc.nCeil(9)); + assert_eq(3, sc.nCeil(10)); + for(int i = 0; i < 30; i++) { + assert_eq(3, sc.n(i)); + assert_eq(3, sc.mm(i)); + } + assert_eq(5, sc.gapbar); + cout << "PASSED" << endl; + } + { + cout << "Case 2: Simple 2 ... "; + Scoring sc( + 4, // reward for a match + COST_MODEL_QUAL, // how to penalize mismatches + 0, // constant if mm pelanty is a constant + 30, // penalty for nuc mm in decoded colorspace als + -3.0f, // constant coeff for minimum score + -3.0f, // linear coeff for minimum score + DEFAULT_FLOOR_CONST, // constant coeff for score floor + DEFAULT_FLOOR_LINEAR, // linear coeff for score floor + 3.0f, // max # ref Ns allowed in alignment; const coeff + 0.4f, // max # ref Ns allowed in alignment; linear coeff + COST_MODEL_QUAL, // how to penalize Ns in the read + 0, // constant if N pelanty is a constant + true, // whether to concatenate mates before N filtering + 25, // constant coeff for cost of gap in the read + 25, // constant coeff for cost of gap in the ref + 10, // coeff of linear term for cost of gap in read + 10, // coeff of linear term for cost of gap in ref + 5, // 5 rows @ top/bot diagonal-entrance-only + -1, // no restriction on row + false // score prioritized over row + ); + + assert_eq(COST_MODEL_CONSTANT, sc.matchType); + assert_eq(4, sc.matchConst); + assert_eq(COST_MODEL_QUAL, sc.mmcostType); + assert_eq(COST_MODEL_QUAL, sc.npenType); + + assert_eq(0, sc.maxRefGaps(0, 8)); // 32 - 4 - 35 = -7 + assert_eq(0, sc.maxRefGaps(0, 9)); // 36 - 4 - 35 = -3 + assert_eq(1, sc.maxRefGaps(0, 10)); // 40 - 4 - 35 = 1 + assert_eq(1, sc.maxRefGaps(0, 11)); // 44 - 8 - 45 = -9 + assert_eq(1, sc.maxRefGaps(0, 12)); // 48 - 8 - 45 = -5 + assert_eq(1, sc.maxRefGaps(0, 13)); // 52 - 8 - 45 = -1 + assert_eq(2, sc.maxRefGaps(0, 14)); // 56 - 8 - 45 = 3 + + assert_eq(0, sc.maxReadGaps(0, 8)); // 32 - 0 - 35 = -3 + assert_eq(1, sc.maxReadGaps(0, 9)); // 36 - 0 - 35 = 1 + assert_eq(1, sc.maxReadGaps(0, 10)); // 40 - 0 - 45 = -5 + assert_eq(1, sc.maxReadGaps(0, 11)); // 44 - 0 - 45 = -1 + assert_eq(2, sc.maxReadGaps(0, 12)); // 48 - 0 - 45 = 3 + assert_eq(2, sc.maxReadGaps(0, 13)); // 52 - 0 - 55 = -3 + assert_eq(3, sc.maxReadGaps(0, 14)); // 56 - 0 - 55 = 1 + + // N ceiling: const=3, linear=0.4 + assert_eq(1, sc.nCeil(1)); + assert_eq(2, sc.nCeil(2)); + assert_eq(3, sc.nCeil(3)); + assert_eq(4, sc.nCeil(4)); + assert_eq(5, sc.nCeil(5)); + assert_eq(5, sc.nCeil(6)); + assert_eq(5, sc.nCeil(7)); + assert_eq(6, sc.nCeil(8)); + assert_eq(6, sc.nCeil(9)); + + for(int i = 0; i < 256; i++) { + assert_eq(i, sc.n(i)); + assert_eq(i, sc.mm(i)); + } + + assert_eq(5, sc.gapbar); + + cout << "PASSED" << endl; + } +} + +#endif /*def SCORING_MAIN*/ diff --git a/scoring.h b/scoring.h new file mode 100644 index 0000000..67aafaa --- /dev/null +++ b/scoring.h @@ -0,0 +1,546 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef SCORING_H_ +#define SCORING_H_ + +#include +#include "qual.h" +#include "simple_func.h" +#include "limit.h" + +// Default type of bonus to added for matches +#define DEFAULT_MATCH_BONUS_TYPE COST_MODEL_CONSTANT +// When match bonus type is constant, use this constant +#define DEFAULT_MATCH_BONUS 0 +// Same settings but different defaults for --local mode +#define DEFAULT_MATCH_BONUS_TYPE_LOCAL COST_MODEL_CONSTANT +#define DEFAULT_MATCH_BONUS_LOCAL 2 + +// Default type of penalty to assess against mismatches +#define DEFAULT_MM_PENALTY_TYPE COST_MODEL_QUAL +// Default type of penalty to assess against mismatches +#define DEFAULT_MM_PENALTY_TYPE_IGNORE_QUALS COST_MODEL_CONSTANT +// When mismatch penalty type is constant, use this constant +#define DEFAULT_MM_PENALTY_MAX 6 +#define DEFAULT_MM_PENALTY_MIN 2 +// When softclip penalty type is constant, use this constant +#define DEFAULT_SC_PENALTY_MAX 2 +#define DEFAULT_SC_PENALTY_MIN 1 + +// Default type of penalty to assess against mismatches +#define DEFAULT_N_PENALTY_TYPE COST_MODEL_CONSTANT +// When mismatch penalty type is constant, use this constant +#define DEFAULT_N_PENALTY 1 + +// Constant coefficient b in linear function f(x) = ax + b determining +// minimum valid score f when read length is x +#define DEFAULT_MIN_CONST (-0.6f) +// Linear coefficient a +#define DEFAULT_MIN_LINEAR (-0.6f) +// Different defaults for --local mode +#define DEFAULT_MIN_CONST_LOCAL (0.0f) +#define DEFAULT_MIN_LINEAR_LOCAL (10.0f) + +// Constant coefficient b in linear function f(x) = ax + b determining +// maximum permitted number of Ns f in a read before it is filtered & +// the maximum number of Ns in an alignment before it is considered +// invalid. +#define DEFAULT_N_CEIL_CONST 0.0f +// Linear coefficient a +#define DEFAULT_N_CEIL_LINEAR 0.15f + +// Default for whether to concatenate mates before the N filter (as opposed to +// filting each mate separately) +#define DEFAULT_N_CAT_PAIR false + +// Default read gap penalties for when homopolymer calling is reliable +#define DEFAULT_READ_GAP_CONST 5 +#define DEFAULT_READ_GAP_LINEAR 3 + +// Default read gap penalties for when homopolymer calling is not reliable +#define DEFAULT_READ_GAP_CONST_BADHPOLY 3 +#define DEFAULT_READ_GAP_LINEAR_BADHPOLY 1 + +// Default reference gap penalties for when homopolymer calling is reliable +#define DEFAULT_REF_GAP_CONST 5 +#define DEFAULT_REF_GAP_LINEAR 3 + +// Default reference gap penalties for when homopolymer calling is not reliable +#define DEFAULT_REF_GAP_CONST_BADHPOLY 3 +#define DEFAULT_REF_GAP_LINEAR_BADHPOLY 1 + +enum { + COST_MODEL_ROUNDED_QUAL = 1, + COST_MODEL_QUAL, + COST_MODEL_CONSTANT +}; + +/** + * How to penalize various types of sequence dissimilarity, and other settings + * that govern how dynamic programming tables should be filled in and how to + * backtrace to find solutions. + */ +class Scoring { + + /** + * Init an array that maps quality to penalty or bonus according to 'type' + * and 'cons' + */ + template + void initPens( + T *pens, // array to fill + int type, // penalty type; qual | rounded qual | constant + int consMin, // constant for when penalty type is constant + int consMax) // constant for when penalty type is constant + { + if(type == COST_MODEL_ROUNDED_QUAL) { + for(int i = 0; i < 256; i++) { + pens[i] = (T)qualRounds[i]; + } + } else if(type == COST_MODEL_QUAL) { + assert_neq(consMin, 0); + assert_neq(consMax, 0); + for(int i = 0; i < 256; i++) { + int ii = min(i, 40); // TODO: Bit hacky, this + float frac = (float)ii / 40.0f; + pens[i] = consMin + (T)(frac * (consMax-consMin)); + assert_gt(pens[i], 0); + //if(pens[i] == 0) { + // pens[i] = ((consMax > 0) ? (T)1 : (T)-1); + //} + } + } else if(type == COST_MODEL_CONSTANT) { + for(int i = 0; i < 256; i++) { + pens[i] = (T)consMax; + } + } else { + throw 1; + } + } + +public: + + Scoring( + int mat, // reward for a match + int mmcType, // how to penalize mismatches + int mmpMax_, // maximum mismatch penalty + int mmpMin_, // minimum mismatch penalty + int scpMax_, // maximum softclip penalty + int scpMin_, // minimum softclip penalty + const SimpleFunc& scoreMin_, // minimum score for valid alignment; const coeff + const SimpleFunc& nCeil_, // max # ref Ns allowed in alignment; const coeff + int nType, // how to penalize Ns in the read + int n, // constant if N pelanty is a constant + bool ncat, // whether to concatenate mates before N filtering + int rdGpConst, // constant coeff for cost of gap in the read + int rfGpConst, // constant coeff for cost of gap in the ref + int rdGpLinear, // coeff of linear term for cost of gap in read + int rfGpLinear, // coeff of linear term for cost of gap in ref + int gapbar_, // # rows at top/bot can only be entered diagonally + int cp_ = 0, // canonical splicing penalty + int ncp_ = 12, // non-canonical splicing penalty + int csp_ = 24, // conflicting splice site penalty + const SimpleFunc* icp_ = NULL, // penalty as to intron length + const SimpleFunc* incp_ = NULL) // penalty as to intron length + { + matchType = COST_MODEL_CONSTANT; + matchConst = mat; + mmcostType = mmcType; + mmpMax = mmpMax_; + mmpMin = mmpMin_; + scpMax = scpMax_; + scpMin = scpMin_; + scoreMin = scoreMin_; + nCeil = nCeil_; + npenType = nType; + npen = n; + ncatpair = ncat; + rdGapConst = rdGpConst; + rfGapConst = rfGpConst; + rdGapLinear = rdGpLinear; + rfGapLinear = rfGpLinear; + qualsMatter_ = mmcostType != COST_MODEL_CONSTANT; + gapbar = gapbar_; + monotone = matchType == COST_MODEL_CONSTANT && matchConst == 0; + initPens(mmpens, mmcostType, mmpMin_, mmpMax_); + initPens(npens, npenType, npen, npen); + initPens(matchBonuses, matchType, matchConst, matchConst); + cp = cp_; + ncp = ncp_; + csp = csp_; + if(icp_ != NULL) icp = *icp_; + if(incp_ != NULL) incp = *incp_; + assert(repOk()); + } + + /** + * Set a constant match bonus. + */ + void setMatchBonus(int bonus) { + matchType = COST_MODEL_CONSTANT; + matchConst = bonus; + initPens(matchBonuses, matchType, matchConst, matchConst); + assert(repOk()); + } + + /** + * Set the mismatch penalty. + */ + void setMmPen(int mmType_, int mmpMax_, int mmpMin_) { + mmcostType = mmType_; + mmpMax = mmpMax_; + mmpMin = mmpMin_; + initPens(mmpens, mmcostType, mmpMin, mmpMax); + } + + /** + * Set the N penalty. + */ + void setNPen(int nType, int n) { + npenType = nType; + npen = n; + initPens(npens, npenType, npen, npen); + } + +#ifndef NDEBUG + /** + * Check that scoring scheme is internally consistent. + */ + bool repOk() const { + assert_geq(matchConst, 0); + assert_gt(rdGapConst, 0); + assert_gt(rdGapLinear, 0); + assert_gt(rfGapConst, 0); + assert_gt(rfGapLinear, 0); + return true; + } +#endif + + /** + * Return a linear function of x where 'cnst' is the constant coefficiant + * and 'lin' is the linear coefficient. + */ + static float linearFunc(int64_t x, float cnst, float lin) { + return (float)((double)cnst + ((double)lin * x)); + } + + /** + * Return the penalty incurred by a mismatch at an alignment column + * with read character 'rdc' reference mask 'refm' and quality 'q'. + * + * qs should be clamped to 63 on the high end before this query. + */ + inline int mm(int rdc, int refm, int q) const { + assert_range(0, 255, q); + return (rdc > 3 || refm > 15) ? npens[q] : mmpens[q]; + } + + /** + * Return the score of the given read character with the given quality + * aligning to the given reference mask. Take Ns into account. + */ + inline int score(int rdc, int refm, int q) const { + assert_range(0, 255, q); + if(rdc > 3 || refm > 15) { + return -npens[q]; + } + if((refm & (1 << rdc)) != 0) { + return (int)matchBonuses[q]; + } else { + return -mmpens[q]; + } + } + + /** + * Return the score of the given read character with the given quality + * aligning to the given reference mask. Take Ns into account. Increment + * a counter if it's an N. + */ + inline int score(int rdc, int refm, int q, int& ns) const { + assert_range(0, 255, q); + if(rdc > 3 || refm > 15) { + ns++; + return -npens[q]; + } + if((refm & (1 << rdc)) != 0) { + return (int)matchBonuses[q]; + } else { + return -mmpens[q]; + } + } + + /** + * Return the penalty incurred by a mismatch at an alignment column + * with read character 'rdc' and quality 'q'. We assume the + * reference character is non-N. + */ + inline int mm(int rdc, int q) const { + assert_range(0, 255, q); + return (rdc > 3) ? npens[q] : mmpens[q]; + } + + /** + * Return the marginal penalty incurred by a mismatch at a read + * position with quality 'q'. + */ + inline int mm(int q) const { + assert_geq(q, 0); + return q < 255 ? mmpens[q] : mmpens[255]; + } + + /** + * Return the marginal penalty incurred by a mismatch at a read + * position with quality 'q'. + */ + inline int sc(int q) const { + assert_geq(q, 0); + if(q <= 33) return scpMin; + q -= 33; + if(q > 40) q = 40; + return (int)((q / 40.0f) * (scpMax - scpMin) + scpMin); + } + + /** + * Return the marginal penalty incurred by a mismatch at a read + * position with quality 30. + */ + inline int64_t match() const { + return match(30); + } + + /** + * Return the marginal penalty incurred by a mismatch at a read + * position with quality 'q'. + */ + inline int64_t match(int q) const { + assert_geq(q, 0); + return (int64_t)((q < 255 ? matchBonuses[q] : matchBonuses[255]) + 0.5f); + } + + /** + * Return the best score achievable by a read of length 'rdlen'. + */ + inline int64_t perfectScore(size_t rdlen) const { + if(monotone) { + return 0; + } else { + return rdlen * match(30); + } + } + + /** + * Return true iff the penalities are such that two reads with the + * same sequence but different qualities might yield different + * alignments. + */ + inline bool qualitiesMatter() const { return qualsMatter_; } + + /** + * Return the marginal penalty incurred by an N mismatch at a read + * position with quality 'q'. + */ + inline int n(int q) const { + assert_geq(q, 0); + return q < 255 ? npens[q] : npens[255]; + } + + + /** + * Return the marginal penalty incurred by a gap in the read, + * given that this is the 'ext'th extension of the gap (0 = open, + * 1 = first, etc). + */ + inline int ins(int ext) const { + assert_geq(ext, 0); + if(ext == 0) return readGapOpen(); + return readGapExtend(); + } + + /** + * Return the marginal penalty incurred by a gap in the reference, + * given that this is the 'ext'th extension of the gap (0 = open, + * 1 = first, etc). + */ + inline int del(int ext) const { + assert_geq(ext, 0); + if(ext == 0) return refGapOpen(); + return refGapExtend(); + } + + /** + * Return true iff a read of length 'rdlen' passes the score filter, i.e., + * has enough characters to rise above the minimum score threshold. + */ + bool scoreFilter( + int64_t minsc, + size_t rdlen) const; + + /** + * Given the score floor for valid alignments and the length of the read, + * calculate the maximum possible number of read gaps that could occur in a + * valid alignment. + */ + int maxReadGaps( + int64_t minsc, + size_t rdlen) const; + + /** + * Given the score floor for valid alignments and the length of the read, + * calculate the maximum possible number of reference gaps that could occur + * in a valid alignment. + */ + int maxRefGaps( + int64_t minsc, + size_t rdlen) const; + + /** + * Given a read sequence, return true iff the read passes the N filter. + * The N filter rejects reads with more than the number of Ns calculated by + * taking nCeilConst + nCeilLinear * read length. + */ + bool nFilter(const BTDnaString& rd, size_t& ns) const; + + /** + * Given a read sequence, return true iff the read passes the N filter. + * The N filter rejects reads with more than the number of Ns calculated by + * taking nCeilConst + nCeilLinear * read length. + * + * For paired-end reads, there is a question of how to apply the filter. + * The filter could be applied to both mates separately, which might then + * prevent paired-end alignment. Or the filter could be applied to the + * reads as though they're concatenated together. The latter approach has + * pros and cons. The pro is that we can use paired-end information to + * recover alignments for mates that would not have passed the N filter on + * their own. The con is that we might not want to do that, since the + * non-N portion of the bad mate might contain particularly unreliable + * information. + */ + void nFilterPair( + const BTDnaString* rd1, // mate 1 + const BTDnaString* rd2, // mate 2 + size_t& ns1, // # Ns in mate 1 + size_t& ns2, // # Ns in mate 2 + bool& filt1, // true -> mate 1 rejected by filter + bool& filt2) // true -> mate 2 rejected by filter + const; + + /** + * The penalty associated with opening a new read gap. + */ + inline int readGapOpen() const { + return rdGapConst + rdGapLinear; + } + + /** + * The penalty associated with opening a new ref gap. + */ + inline int refGapOpen() const { + return rfGapConst + rfGapLinear; + } + + /** + * The penalty associated with extending a read gap by one character. + */ + inline int readGapExtend() const { + return rdGapLinear; + } + + /** + * The penalty associated with extending a ref gap by one character. + */ + inline int refGapExtend() const { + return rfGapLinear; + } + + // avg. known score: -22.96, avg. random score: -33.70 + inline int64_t canSpl(int intronlen = 0, int minanchor = 100, float probscore = 0.0f) const { + int penintron = (intronlen > 0 ? icp.f((double)intronlen) : 0); + if(penintron < 0) penintron = 0; + if(minanchor < 10 && probscore < -24.0f + (10 - minanchor)) { + return MAX_I32; + } + return penintron + cp; + } + + inline int64_t noncanSpl(int intronlen = 0, int minanchor = 100, float probscore = 0.0f) const { + if(minanchor < 14) return MAX_I32; + int penintron = (intronlen > 0 ? incp.f((double)intronlen) : 0); + if(penintron < 0) penintron = 0; + return penintron + ncp; + } + + inline int conflictSpl() const { return (int)csp; } + + int matchType; // how to reward matches + int matchConst; // reward for a match + int mmcostType; // based on qual? rounded? just a constant? + int mmpMax; // maximum mismatch penalty + int mmpMin; // minimum mismatch penalty + int scpMax; // maximum softclip penalty + int scpMin; // minimum softclip penalty + SimpleFunc scoreMin; // minimum score for valid alignment, constant coeff + SimpleFunc nCeil; // max # Ns involved in alignment, constant coeff + int npenType; // N: based on qual? rounded? just a constant? + int npen; // N: if mmcosttype=constant, this is the const + bool ncatpair; // true -> do N filtering on concated pair + int rdGapConst; // constant term coeffecient in extend cost + int rfGapConst; // constant term coeffecient in extend cost + int rdGapLinear; // linear term coeffecient in extend cost + int rfGapLinear; // linear term coeffecient in extend cost + int gapbar; // # rows at top/bot can only be entered diagonally + bool monotone; // scores can only go down? + float matchBonuses[256]; // map from qualities to match bonus + int mmpens[256]; // map from qualities to mm penalty + int npens[256]; // map from N qualities to penalty + int64_t cp; // canonical splicing penalty + int64_t ncp; // non-canonical splicing penalty + int64_t csp; // conflicting splice site penalty + SimpleFunc icp; // intron length penalty + SimpleFunc incp; // intron length penalty + + static Scoring base1() { + const double DMAX = std::numeric_limits::max(); + SimpleFunc scoreMin(SIMPLE_FUNC_LINEAR, 0.0f, DMAX, 37.0f, 0.3f); + SimpleFunc nCeil(SIMPLE_FUNC_LINEAR, 0.0f, DMAX, 2.0f, 0.1f); + return Scoring( + 1, // reward for a match + COST_MODEL_CONSTANT, // how to penalize mismatches + 3, // max mismatch penalty + 3, // min mismatch penalty + 2, // max softclip penalty + 2, // min softclip penalty + scoreMin, // score min: 37 + 0.3x + nCeil, // n ceiling: 2 + 0.1x + COST_MODEL_CONSTANT, // how to penalize Ns in the read + 3, // constant if N pelanty is a constant + false, // concatenate mates before N filtering? + 11, // constant coeff for gap in read + 11, // constant coeff for gap in ref + 4, // linear coeff for gap in read + 4, // linear coeff for gap in ref + 5); // 5 rows @ top/bot diagonal-entrance-only + } + +protected: + + bool qualsMatter_; +}; + +#endif /*SCORING_H_*/ diff --git a/scripts/convert_quals.pl b/scripts/convert_quals.pl new file mode 100644 index 0000000..abf344b --- /dev/null +++ b/scripts/convert_quals.pl @@ -0,0 +1,133 @@ +#!/usr/bin/perl -w + +# +# Copyright 2011, Ben Langmead +# +# This file is part of Bowtie 2. +# +# Bowtie 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Bowtie 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Bowtie 2. If not, see . +# + +# +# convert_quals.pl +# +# Modify scale/encoding of quality values in a FASTQ file. +# +# Author: Ben Langmead +# Date: 5/5/2009 +# +# p = probability that base is miscalled +# Qphred = -10 * log10 (p) +# Qsolexa = -10 * log10 (p / (1 - p)) +# See: http://en.wikipedia.org/wiki/FASTQ_format +# + +use strict; +use warnings; +use Getopt::Long; + +my $inphred = 33; +my $insolexa = 0; +my $outphred = 0; +my $outsolexa = 64; + +# Default: convert 33-based Phred quals into 64-based Solexa qualss + +my $result = + GetOptions ("inphred=i" => \$inphred, + "insolexa=i" => \$insolexa, + "outphred=i" => \$outphred, + "outsolexa=i" => \$outsolexa); +$result == 1 || die "One or more errors parsing script arguments"; + +if($inphred > 0) { + $inphred >= 33 || die "Input base must be >= 33, was $inphred"; +} else { + $insolexa >= 33 || die "Input base must be >= 33, was $insolexa"; +} + +sub log10($) { + return log(shift) / log(10.0); +} + +sub round { + my($number) = shift; + return int($number + .5 * ($number <=> 0)); +} + +# Convert from phred qual to probability of miscall +sub phredToP($) { + my $phred = shift; + my $p = (10.0 ** (($phred) / -10.0)); + ($p >= 0.0 && $p <= 1.0) || die "Bad prob: $p, from sol $phred"; + return $p; +} + +# Convert from solexa qual to probability of miscall +sub solToP($) { + my $sol = shift; + my $x = (10.0 ** (($sol) / -10.0)); + my $p = $x / (1.0 + $x); + ($p >= 0.0 && $p <= 1.0) || die "Bad prob: $p, from x $x, phred $sol"; + return $p; +} + +# Convert from probability of miscall to phred qual +sub pToPhred($) { + my $p = shift; + ($p >= 0.0 && $p <= 1.0) || die "Bad prob: $p"; + return round(-10.0 * log10($p)); +} + +# Convert from probability of miscall to solexa qual +sub pToSol($) { + my $p = shift; + ($p >= 0.0 && $p <= 1.0) || die "Bad prob: $p"; + return 0.0 if $p == 1.0; + return round(-10.0 * log10($p / (1.0 - $p))); +} + +while(<>) { + my $name = $_; print $name; + my $seq = <>; print $seq; + my $name2 = <>; print $name2; + my $quals = <>; + chomp($quals); + my @qual = split(//, $quals); + for(my $i = 0; $i <= $#qual; $i++) { + my $co = ord($qual[$i]); + my $p; + # Convert input qual to p + if($inphred > 0) { + $co -= $inphred; + $co >= 0 || die "Bad Phred input quality: $co"; + $p = phredToP($co); + } else { + $co -= $insolexa; + $p = solToP($co); + } + # Convert p to output qual + if($outphred > 0) { + $co = pToPhred($p); + $co >= 0 || die "Bad Phred output quality: $co"; + $co += $outphred; + } else { + $co = pToSol($p); + $co += $outsolexa; + } + $co >= 33 || die "Error: Output qual " . $co . " char is less than 33. Try a larger output base."; + print chr($co); + } + print "\n"; +} diff --git a/scripts/gen_2b_occ_lookup.pl b/scripts/gen_2b_occ_lookup.pl new file mode 100644 index 0000000..a6e8ff5 --- /dev/null +++ b/scripts/gen_2b_occ_lookup.pl @@ -0,0 +1,106 @@ +#!/usr/bin/perl -w + +# +# Copyright 2011, Ben Langmead +# +# This file is part of Bowtie 2. +# +# Bowtie 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Bowtie 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Bowtie 2. If not, see . +# + +# +# Generate lookup table that, given two packed DNA bytes (eight bases) +# and a character (A, C, G or T), returns how many times that character +# occurs in that packed pair of bytes. Useful for quickly counting +# character occurrences in long strings. The LUT is indexed first by +# character (0 - 3) then by byte (0 - 2^16-1). +# +# See ebwt.h. +# + +my @as = (); +my @cs = (); +my @gs = (); +my @ts = (); + +# Compile character arrays +my $i; +for($i = 0; $i < (256*256); $i++) { + + my $b01 = ($i >> 0) & 3; + my $b23 = ($i >> 2) & 3; + my $b45 = ($i >> 4) & 3; + my $b67 = ($i >> 6) & 3; + my $b89 = ($i >> 8) & 3; + my $b1011 = ($i >> 10) & 3; + my $b1213 = ($i >> 12) & 3; + my $b1415 = ($i >> 14) & 3; + + my $a = ($b01 == 0) + ($b23 == 0) + ($b45 == 0) + ($b67 == 0) + + ($b89 == 0) + ($b1011 == 0) + ($b1213 == 0) + ($b1415 == 0); + my $c = ($b01 == 1) + ($b23 == 1) + ($b45 == 1) + ($b67 == 1) + + ($b89 == 1) + ($b1011 == 1) + ($b1213 == 1) + ($b1415 == 1); + my $g = ($b01 == 2) + ($b23 == 2) + ($b45 == 2) + ($b67 == 2) + + ($b89 == 2) + ($b1011 == 2) + ($b1213 == 2) + ($b1415 == 2); + my $t = ($b01 == 3) + ($b23 == 3) + ($b45 == 3) + ($b67 == 3) + + ($b89 == 3) + ($b1011 == 3) + ($b1213 == 3) + ($b1415 == 3); + + push @as, $a; + push @cs, $c; + push @gs, $g; + push @ts, $t; +} + +my $entsPerLine = 16; + +# Count occurrences in all 4 bit pairs + +print "uint8_t cCntLUT_16b_4[4][256*256] = {\n"; + +# Print As array +print "\t/* As */ {\n"; +for($i = 0; $i < (256*256); $i++) { + print "\t\t" if(($i % $entsPerLine) == 0); + print "$as[$i], "; + print "\n" if(($i % $entsPerLine) == ($entsPerLine-1)); +} +print "\t},\n"; + +# Print Cs array +print "\t/* Cs */ {\n"; +for($i = 0; $i < (256*256); $i++) { + print "\t\t" if(($i % $entsPerLine) == 0); + print "$cs[$i], "; + print "\n" if(($i % $entsPerLine) == ($entsPerLine-1)); +} +print "\t},\n"; + +# Print Gs array +print "\t/* Gs */ {\n"; +for($i = 0; $i < (256*256); $i++) { + print "\t\t" if(($i % $entsPerLine) == 0); + print "$gs[$i], "; + print "\n" if(($i % $entsPerLine) == ($entsPerLine-1)); +} +print "\t},\n"; + +# Print Ts array +print "\t/* Ts */ {\n"; +for($i = 0; $i < (256*256); $i++) { + print "\t\t" if(($i % $entsPerLine) == 0); + print "$ts[$i], "; + print "\n" if(($i % $entsPerLine) == ($entsPerLine-1)); +} +print "\t}\n"; +print "};\n"; diff --git a/scripts/gen_occ_lookup.pl b/scripts/gen_occ_lookup.pl new file mode 100644 index 0000000..daba33f --- /dev/null +++ b/scripts/gen_occ_lookup.pl @@ -0,0 +1,257 @@ +#!/usr/bin/perl -w + +# +# Copyright 2011, Ben Langmead +# +# This file is part of Bowtie 2. +# +# Bowtie 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Bowtie 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Bowtie 2. If not, see . +# + +# +# Generate lookup table that, given a packed DNA byte (four bases) and +# a character (A, C, G or T), returns how many times that character +# occurs in that packed byte. Useful for quickly counting character +# occurrences in long strings. The LUT is indexed first by character +# (0-3) then by byte (0-255). +# +# Larger lookup tables are also possible, though they seem +# counterproductive. E.g., looking up eight bases at a time yields a +# 256K LUT, which doesn't fit in L1. A four-base LUT is 1KB, easily +# fitting in L1. +# +# See ebwt.h. +# + +my @as4 = (), @as3 = (), @as2 = (), @as1 = (); +my @cs4 = (), @cs3 = (), @cs2 = (), @cs1 = (); +my @gs4 = (), @gs3 = (), @gs2 = (), @gs1 = (); +my @ts4 = (), @ts3 = (), @ts2 = (), @ts1 = (); + +# Compile character arrays +my $i; +for($i = 0; $i < 256; $i++) { + my $b01 = ($i >> 0) & 3; + my $b23 = ($i >> 2) & 3; + my $b45 = ($i >> 4) & 3; + my $b67 = ($i >> 6) & 3; + + my $a4 = ($b01 == 0) + ($b23 == 0) + ($b45 == 0) + ($b67 == 0); + my $c4 = ($b01 == 1) + ($b23 == 1) + ($b45 == 1) + ($b67 == 1); + my $g4 = ($b01 == 2) + ($b23 == 2) + ($b45 == 2) + ($b67 == 2); + my $t4 = ($b01 == 3) + ($b23 == 3) + ($b45 == 3) + ($b67 == 3); + + push @as4, $a4; + push @cs4, $c4; + push @gs4, $g4; + push @ts4, $t4; + + my $a3 = ($b01 == 0) + ($b23 == 0) + ($b45 == 0); + my $c3 = ($b01 == 1) + ($b23 == 1) + ($b45 == 1); + my $g3 = ($b01 == 2) + ($b23 == 2) + ($b45 == 2); + my $t3 = ($b01 == 3) + ($b23 == 3) + ($b45 == 3); + + push @as3, $a3; + push @cs3, $c3; + push @gs3, $g3; + push @ts3, $t3; + + my $a2 = ($b01 == 0) + ($b23 == 0); + my $c2 = ($b01 == 1) + ($b23 == 1); + my $g2 = ($b01 == 2) + ($b23 == 2); + my $t2 = ($b01 == 3) + ($b23 == 3); + + push @as2, $a2; + push @cs2, $c2; + push @gs2, $g2; + push @ts2, $t2; + + my $a1 = ($b01 == 0) + 0; + my $c1 = ($b01 == 1) + 0; + my $g1 = ($b01 == 2) + 0; + my $t1 = ($b01 == 3) + 0; + + push @as1, $a1; + push @cs1, $c1; + push @gs1, $g1; + push @ts1, $t1; +} + +my $entsPerLine = 16; + +print "#include \n\n"; +print "/* Generated by gen_lookup_tables.pl */\n\n"; + +# Count occurrences in all 4 bit pairs + +print "uint8_t cCntLUT_4[4][4][256] = {\n"; +print "\t/* All 4 bit pairs */ {\n"; + +# Print As array +print "\t\t/* As */ {\n"; +for($i = 0; $i < 256; $i++) { + print "\t\t\t" if(($i % $entsPerLine) == 0); + print "$as4[$i], "; + print "\n" if(($i % $entsPerLine) == ($entsPerLine-1)); +} +print "\t\t},\n"; + +# Print Cs array +print "\t\t/* Cs */ {\n"; +for($i = 0; $i < 256; $i++) { + print "\t\t\t" if(($i % $entsPerLine) == 0); + print "$cs4[$i], "; + print "\n" if(($i % $entsPerLine) == ($entsPerLine-1)); +} +print "\t\t},\n"; + +# Print Gs array +print "\t\t/* Gs */ {\n"; +for($i = 0; $i < 256; $i++) { + print "\t\t\t" if(($i % $entsPerLine) == 0); + print "$gs4[$i], "; + print "\n" if(($i % $entsPerLine) == ($entsPerLine-1)); +} +print "\t\t},\n"; + +# Print Ts array +print "\t\t/* Ts */ {\n"; +for($i = 0; $i < 256; $i++) { + print "\t\t\t" if(($i % $entsPerLine) == 0); + print "$ts4[$i], "; + print "\n" if(($i % $entsPerLine) == ($entsPerLine-1)); +} +print "\t\t}\n\t},\n"; + +# Count occurrences in low 1 bit pair + +print "\t/* Least significant 1 bit pair */ {\n"; + +# Print As array +print "\t\t/* As */ {\n"; +for($i = 0; $i < 256; $i++) { + print "\t\t\t" if(($i % $entsPerLine) == 0); + print "$as1[$i], "; + print "\n" if(($i % $entsPerLine) == ($entsPerLine-1)); +} +print "\t\t},\n"; + +# Print Cs array +print "\t\t/* Cs */ {\n"; +for($i = 0; $i < 256; $i++) { + print "\t\t\t" if(($i % $entsPerLine) == 0); + print "$cs1[$i], "; + print "\n" if(($i % $entsPerLine) == ($entsPerLine-1)); +} +print "\t\t},\n"; + +# Print Gs array +print "\t\t/* Gs */ {\n"; +for($i = 0; $i < 256; $i++) { + print "\t\t\t" if(($i % $entsPerLine) == 0); + print "$gs1[$i], "; + print "\n" if(($i % $entsPerLine) == ($entsPerLine-1)); +} +print "\t\t},\n"; + +# Print Ts array +print "\t\t/* Ts */ {\n"; +for($i = 0; $i < 256; $i++) { + print "\t\t\t" if(($i % $entsPerLine) == 0); + print "$ts1[$i], "; + print "\n" if(($i % $entsPerLine) == ($entsPerLine-1)); +} +print "\t\t}\n\t},\n"; + +# Count occurrences in low 2 bit pairs + +print "\t/* Least significant 2 bit pairs */ {\n"; + +# Print As array +print "\t\t/* As */ {\n"; +for($i = 0; $i < 256; $i++) { + print "\t\t\t" if(($i % $entsPerLine) == 0); + print "$as2[$i], "; + print "\n" if(($i % $entsPerLine) == ($entsPerLine-1)); +} +print "\t\t},\n"; + +# Print Cs array +print "\t\t/* Cs */ {\n"; +for($i = 0; $i < 256; $i++) { + print "\t\t\t" if(($i % $entsPerLine) == 0); + print "$cs2[$i], "; + print "\n" if(($i % $entsPerLine) == ($entsPerLine-1)); +} +print "\t\t},\n"; + +# Print Gs array +print "\t\t/* Gs */ {\n"; +for($i = 0; $i < 256; $i++) { + print "\t\t\t" if(($i % $entsPerLine) == 0); + print "$gs2[$i], "; + print "\n" if(($i % $entsPerLine) == ($entsPerLine-1)); +} +print "\t\t},\n"; + +# Print Ts array +print "\t\t/* Ts */ {\n"; +for($i = 0; $i < 256; $i++) { + print "\t\t\t" if(($i % $entsPerLine) == 0); + print "$ts2[$i], "; + print "\n" if(($i % $entsPerLine) == ($entsPerLine-1)); +} +print "\t\t}\n\t},\n"; + +# Count occurrences in low 3 bit pairs + +print "\t/* Least significant 3 bit pairs */ {\n"; + +# Print As array +print "\t\t/* As */ {\n"; +for($i = 0; $i < 256; $i++) { + print "\t\t\t" if(($i % $entsPerLine) == 0); + print "$as3[$i], "; + print "\n" if(($i % $entsPerLine) == ($entsPerLine-1)); +} +print "\t\t},\n"; + +# Print Cs array +print "\t\t/* Cs */ {\n"; +for($i = 0; $i < 256; $i++) { + print "\t\t\t" if(($i % $entsPerLine) == 0); + print "$cs3[$i], "; + print "\n" if(($i % $entsPerLine) == ($entsPerLine-1)); +} +print "\t\t},\n"; + +# Print Gs array +print "\t\t/* Gs */ {\n"; +for($i = 0; $i < 256; $i++) { + print "\t\t\t" if(($i % $entsPerLine) == 0); + print "$gs3[$i], "; + print "\n" if(($i % $entsPerLine) == ($entsPerLine-1)); +} +print "\t\t},\n"; + +# Print Ts array +print "\t\t/* Ts */ {\n"; +for($i = 0; $i < 256; $i++) { + print "\t\t\t" if(($i % $entsPerLine) == 0); + print "$ts3[$i], "; + print "\n" if(($i % $entsPerLine) == ($entsPerLine-1)); +} +print "\t\t}\n\t}\n"; + +print "};\n"; diff --git a/scripts/gen_solqual_lookup.pl b/scripts/gen_solqual_lookup.pl new file mode 100644 index 0000000..a1d5cd1 --- /dev/null +++ b/scripts/gen_solqual_lookup.pl @@ -0,0 +1,80 @@ +#!/usr/bin/perl -w + +# +# Copyright 2011, Ben Langmead +# +# This file is part of Bowtie 2. +# +# Bowtie 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Bowtie 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Bowtie 2. If not, see . +# + +use warnings; +use strict; + +sub log10($) { + return log(shift) / log(10.0); +} + +sub round { + my($number) = shift; + return int($number + .5 * ($number <=> 0)); +} + +# Convert from solexa qual to probability of miscall +sub phredToP($) { + my $sol = shift; + my $p = (10.0 ** (($sol) / -10.0)); + ($p >= 0.0 && $p <= 1.0) || die "Bad prob: $p, from sol $sol"; + return $p; +} + +# Convert from phred qual to probability of miscall +sub solToP($) { + my $phred = shift; + my $x = (10.0 ** (($phred) / -10.0)); + my $p = $x / (1.0 + $x); + ($p >= 0.0 && $p <= 1.0) || die "Bad prob: $p, from x $x, phred $phred"; + return $p; +} + +# Convert from probability of miscall to phred qual +sub pToPhred($) { + my $p = shift; + ($p >= 0.0 && $p <= 1.0) || die "Bad prob: $p"; + return round(-10.0 * log10($p)); +} + +# Convert from probability of miscall to solexa qual +sub pToSol($) { + my $p = shift; + ($p >= 0.0 && $p <= 1.0) || die "Bad prob: $p"; + return 0 if($p == 1.0); + return round(-10.0 * log10($p / (1.0 - $p))); +} + +# Print conversion table from Phred to Solexa +print "uint8_t solToPhred[] = {"; +my $cols = 10; +my $cnt = 0; +for(my $i = -10; $i < 256; $i++) { + # Solexa qual = $i + my $p = solToP($i); + my $ph = pToPhred($p); + print "\n\t/* $i */ " if($cnt == 0); + $cnt++; + $cnt = 0 if($cnt == 10); + print "$ph"; + print ", " if($i < 255); +} +print "\n};\n"; diff --git a/scripts/infer_fraglen.pl b/scripts/infer_fraglen.pl new file mode 100644 index 0000000..caf2f77 --- /dev/null +++ b/scripts/infer_fraglen.pl @@ -0,0 +1,132 @@ +#!/usr/bin/perl -w + +# +# Copyright 2011, Ben Langmead +# +# This file is part of Bowtie 2. +# +# Bowtie 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Bowtie 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Bowtie 2. If not, see . +# + +## +# infer_fraglen.pl +# +# Infer fragment length by looking for unique alignments for mates +# (separately), then piecing those together and building a distribution. +# + +use strict; +use warnings; +use Getopt::Long; +use FindBin qw($Bin); + +my $m1 = ""; +my $m2 = ""; +my $index = ""; +my $bowtie_args = ""; +my $bowtie2 = "$Bin/../bowtie2"; +my $debug = 0; +my $binsz = 10; +my $mapq_cutoff = 30; +my $upto = undef; + +sub dieusage { + my $msg = shift; + my $exitlevel = shift; + $exitlevel = $exitlevel || 1; + print STDERR "$msg\n"; + exit $exitlevel; +} + +## +# Given a basename, return true iff all index files exist. +# +sub checkIndex($) { + my $idx = shift; + my $ext = "bt2"; + return -f "$idx.1.$ext" && + -f "$idx.2.$ext" && + -f "$idx.3.$ext" && + -f "$idx.4.$ext" && + -f "$idx.rev.1.$ext" && + -f "$idx.rev.2.$ext"; +} + +GetOptions ( + "bowtie2=s" => \$bowtie2, + "index=s" => \$index, + "m1=s" => \$m1, + "m2=s" => \$m2, + "upto=i" => \$upto, + "mapq_cutoff=i" => \$mapq_cutoff, + "debug" => \$debug, + "bowtie-args=s" => \$bowtie_args) || dieusage("Bad option", 1); + +die "Must specify --m1" if $m1 eq ""; +die "Must specify --m2" if $m2 eq ""; +die "Must specify --index" if $index eq ""; +$m1 =~ s/^~/$ENV{HOME}/; +$m2 =~ s/^~/$ENV{HOME}/; +$index =~ s/^~/$ENV{HOME}/; +die "Bad bowtie path: $bowtie2" if system("$bowtie2 --version >/dev/null 2>/dev/null") != 0; +die "Bad index: $index" if !checkIndex($index); + +# Hash holding all the observed fragment orientations and lengths +my %fragments = (); +my $m1cmd = ($m1 =~ /\.gz$/) ? "gzip -dc $m1" : "cat $m1"; +my $m2cmd = ($m2 =~ /\.gz$/) ? "gzip -dc $m2" : "cat $m2"; +my $cmd1 = "$m1cmd | $bowtie2 $bowtie_args --sam-nohead -x $index - > .infer_fraglen.tmp"; +my $cmd2 = "$m2cmd | $bowtie2 $bowtie_args --sam-nohead -x $index - |"; +my $tot = 0; +system($cmd1) == 0 || die "Error running '$cmd1'"; +open (M1, ".infer_fraglen.tmp") || die "Could not open '.infer_fraglen.tmp'"; +open (M2, $cmd2) || die "Could not open '$cmd2'"; +while() { + my $lm1 = $_; + my $lm2 = ; + chomp($lm1); chomp($lm2); + my @lms1 = split(/\t/, $lm1); + my @lms2 = split(/\t/, $lm2); + my ($name1, $flags1, $chr1, $off1, $mapq1, $slen1) = ($lms1[0], $lms1[1], $lms1[2], $lms1[3], $lms1[4], length($lms1[9])); + my ($name2, $flags2, $chr2, $off2, $mapq2, $slen2) = ($lms2[0], $lms2[1], $lms2[2], $lms2[3], $lms2[4], length($lms2[9])); + # One or both mates didn't align uniquely? + next if $chr1 eq "*" || $chr2 eq "*"; + # Mates aligned to different chromosomes? + next if $chr1 ne $chr2; + # MAPQs too low? + next if $mapq1 < $mapq_cutoff || $mapq2 < $mapq_cutoff; + # This pairing can be used as an observation of fragment orientation and length + my $fw1 = (($flags1 & 16) == 0) ? "F" : "R"; + my $fw2 = (($flags2 & 16) == 0) ? "F" : "R"; + my $frag = $off2 - $off1; + # This can overestimate if one mate is entirely subsumed in the other + if($frag > 0) { $frag += $slen2; } + else { $frag -= $slen1; } + # Install into bin + $frag = int(($frag + ($binsz/2))/$binsz); # Round to nearest bin + $fragments{"$fw1$fw2"}{$frag}++; + $tot++; +} +close(M1); +close(M2); +unlink(".infer_fraglen.tmp"); # ditch temporary file + +# Print out the bins +for my $k (keys %fragments) { + for my $k2 (sort {$a <=> $b} keys %{$fragments{$k}}) { + print "$k, ".($k2*$binsz).", ".$fragments{$k}{$k2}."\n"; + } +} + +print STDERR "DONE\n"; diff --git a/scripts/make_a_thaliana_tair.sh b/scripts/make_a_thaliana_tair.sh new file mode 100644 index 0000000..2a74bdf --- /dev/null +++ b/scripts/make_a_thaliana_tair.sh @@ -0,0 +1,56 @@ +#!/bin/sh + +# +# Downloads sequence for A. thaliana from TAIR v10 and build Bowtie 2 index. +# + +GENOMES_MIRROR=ftp://ftp.arabidopsis.org/home/tair + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +BOWTIE_BUILD_EXE=./bowtie2-build +if [ ! -x "$BOWTIE_BUILD_EXE" ] ; then + if ! which bowtie2-build ; then + echo "Could not find bowtie2-build in current directory or in PATH" + exit 1 + else + BOWTIE_BUILD_EXE=`which bowtie2-build` + fi +fi + +FC= +for c in 1 2 3 4 5 C M ; do + if [ ! -f TAIR10_chr$c.fas ] ; then + FN=TAIR10_chr$c.fas + F=${GENOMES_MIRROR}/Sequences/whole_chromosomes/${FN} + [ -n "$FC" ] && FC="$FC,$FN" + [ -z "$FC" ] && FC=$FN + get $F || (echo "Error getting $F" && exit 1) + fi + + if [ ! -f TAIR10_chr$c.fas ] ; then + echo "Could not find chr$c.fas file!" + exit 2 + fi +done + +CMD="${BOWTIE_BUILD_EXE} $* $FC a_thaliana" +echo $CMD +if $CMD ; then + echo "a_thaliana index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_b_taurus_UMD3.sh b/scripts/make_b_taurus_UMD3.sh new file mode 100644 index 0000000..94b3a57 --- /dev/null +++ b/scripts/make_b_taurus_UMD3.sh @@ -0,0 +1,89 @@ +#!/bin/sh + +# +# Builds an index from UMD Freeze 3.0 of the Bos Taurus (cow) genome. +# + +BASE_CHRS="\ +Chr1 \ +Chr2 \ +Chr3 \ +Chr4 \ +Chr5 \ +Chr6 \ +Chr7 \ +Chr8 \ +Chr9 \ +Chr10 \ +Chr11 \ +Chr12 \ +Chr13 \ +Chr14 \ +Chr15 \ +Chr16 \ +Chr17 \ +Chr18 \ +Chr19 \ +Chr20 \ +Chr21 \ +Chr22 \ +Chr23 \ +Chr24 \ +Chr25 \ +Chr26 \ +Chr27 \ +Chr28 \ +Chr29 \ +ChrX \ +ChrU \ +ChrY-contigs \ +ChrY-contigs.SHOTGUN_ONLY" + +CHRS_TO_INDEX=$BASE_CHRS + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +BOWTIE_BUILD_EXE=./bowtie2-build +if [ ! -x "$BOWTIE_BUILD_EXE" ] ; then + if ! which bowtie2-build ; then + echo "Could not find bowtie2-build in current directory or in PATH" + exit 1 + else + BOWTIE_BUILD_EXE=`which bowtie2-build` + fi +fi + +FTP_BASE=ftp://ftp.cbcb.umd.edu/pub/data/Bos_taurus/Bos_taurus_UMD_3.0 +OUTPUT=b_taurus + +INPUTS= +for c in $CHRS_TO_INDEX ; do + if [ ! -f ${c}.fa ] ; then + F=${c}.fa.gz + get ${FTP_BASE}/$F || (echo "Error getting $F" && exit 1) + gunzip $F || (echo "Error unzipping $F" && exit 1) + fi + [ -n "$INPUTS" ] && INPUTS=$INPUTS,${c}.fa + [ -z "$INPUTS" ] && INPUTS=${c}.fa +done + +CMD="$BOWTIE_BUILD_EXE $* $INPUTS $OUTPUT" +echo $CMD +if $CMD ; then + echo "$OUTPUT index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_bdgp6.sh b/scripts/make_bdgp6.sh new file mode 100644 index 0000000..52c5ee5 --- /dev/null +++ b/scripts/make_bdgp6.sh @@ -0,0 +1,60 @@ +#!/bin/sh + +# +# Downloads sequence for the BDGP6 release 84 version of drosophila melanogaster (fly) from +# Ensembl. +# +# Note that Ensembl's build has three categories of compressed fasta +# files: +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=84 +ENSEMBL_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/drosophila_melanogaster/dna + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +rm -f genome.fa +F=Drosophila_melanogaster.BDGP6.dna.toplevel.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa genome" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_bdgp6_tran.sh b/scripts/make_bdgp6_tran.sh new file mode 100644 index 0000000..6a2d7d2 --- /dev/null +++ b/scripts/make_bdgp6_tran.sh @@ -0,0 +1,89 @@ +#!/bin/sh + +# +# Downloads sequence for the BDGP6 release 84 version of drosophila melanogaster (fly) from +# Ensembl. +# +# Note that Ensembl's build has three categories of compressed fasta +# files: +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=84 +ENSEMBL_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/drosophila_melanogaster/dna +ENSEMBL_GTF_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/gtf/drosophila_melanogaster +GTF_FILE=Drosophila_melanogaster.BDGP6.${ENSEMBL_RELEASE}.gtf + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +HISAT2_SS_SCRIPT=./hisat2_extract_splice_sites.py +if [ ! -x "$HISAT2_SS_SCRIPT" ] ; then + if ! which hisat2_extract_splice_sites.py ; then + echo "Could not find hisat2_extract_splice_sites.py in current directory or in PATH" + exit 1 + else + HISAT2_SS_SCRIPT=`which hisat2_extract_splice_sites.py` + fi +fi + +HISAT2_EXON_SCRIPT=./hisat2_extract_exons.py +if [ ! -x "$HISAT2_EXON_SCRIPT" ] ; then + if ! which hisat2_extract_exons.py ; then + echo "Could not find hisat2_extract_exons.py in current directory or in PATH" + exit 1 + else + HISAT2_EXON_SCRIPT=`which hisat2_extract_exons.py` + fi +fi + +rm -f genome.fa +F=Drosophila_melanogaster.BDGP6.dna.toplevel.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa +fi + +if [ ! -f $GTF_FILE ] ; then + get ${ENSEMBL_GTF_BASE}/${GTF_FILE}.gz || (echo "Error getting ${GTF_FILE}" && exit 1) + gunzip ${GTF_FILE}.gz || (echo "Error unzipping ${GTF_FILE}" && exit 1) + ${HISAT2_SS_SCRIPT} ${GTF_FILE} > genome.ss + ${HISAT2_EXON_SCRIPT} ${GTF_FILE} > genome.exon +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa --ss genome.ss --exon genome.exon genome_tran" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_canFam2.sh b/scripts/make_canFam2.sh new file mode 100644 index 0000000..272c563 --- /dev/null +++ b/scripts/make_canFam2.sh @@ -0,0 +1,61 @@ +#!/bin/sh + +# +# Downloads sequence for the canFam2 version of C. familiaris (dog) +# from UCSC. +# + +i=2 +BASE_CHRS=chr1 +while [ $i -lt 39 ] ; do + BASE_CHRS="$BASE_CHRS chr$i" + i=`expr $i + 1` +done +BASE_CHRS="$BASE_CHRS chrX chrM chrUn" +CHRS_TO_INDEX=$BASE_CHRS + +CANFAM2_BASE=ftp://hgdownload.cse.ucsc.edu/goldenPath/canFam2/chromosomes + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +BOWTIE_BUILD_EXE=./bowtie2-build +if [ ! -x "$BOWTIE_BUILD_EXE" ] ; then + if ! which bowtie2-build ; then + echo "Could not find bowtie2-build in current directory or in PATH" + exit 1 + else + BOWTIE_BUILD_EXE=`which bowtie2-build` + fi +fi + +INPUTS= +for c in $CHRS_TO_INDEX ; do + if [ ! -f ${c}.fa ] ; then + F=${c}.fa.gz + get ${CANFAM2_BASE}/$F || (echo "Error getting $F" && exit 1) + gunzip $F || (echo "Error unzipping $F" && exit 1) + fi + [ -n "$INPUTS" ] && INPUTS=$INPUTS,${c}.fa + [ -z "$INPUTS" ] && INPUTS=${c}.fa +done + +CMD="${BOWTIE_BUILD_EXE} $* ${INPUTS} canFam2" +echo Running $CMD +if $CMD ; then + echo "canFam2 index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_ce10.sh b/scripts/make_ce10.sh new file mode 100644 index 0000000..f3e9a86 --- /dev/null +++ b/scripts/make_ce10.sh @@ -0,0 +1,43 @@ +#!/bin/sh + +CE10_BASE=ftp://hgdownload.cse.ucsc.edu/goldenPath/ce10/bigZips +F=chromFa.tar.gz + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget -O `basename $1` $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +rm -f genome.fa +get ${CE10_BASE}/$F || (echo "Error getting $F" && exit 1) +tar xvzfO $F > genome.fa || (echo "Error unzipping $F" && exit 1) +rm $F + +CMD="${HISAT2_BUILD_EXE} genome.fa genome" +echo "Running $CMD" +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi + diff --git a/scripts/make_dm6.sh b/scripts/make_dm6.sh new file mode 100644 index 0000000..dcb0cac --- /dev/null +++ b/scripts/make_dm6.sh @@ -0,0 +1,50 @@ +#!/bin/sh + +# +# Downloads sequence for a D. melanogaster from flybase. Currently set +# to download 5.22, but F, REL, and IDX_NAME can be edited to reflect a +# different version number. (But note that you will usually also have +# to change the date in REL.) +# + +DM6_BASE=ftp://hgdownload.cse.ucsc.edu/goldenPath/dm6/bigZips +F=dm6.fa.gz + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget -O `basename $1` $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +rm -f genome.fa +get ${DM6_BASE}/$F || (echo "Error getting $F" && exit 1) +gzip -cd $F > genome.fa || (echo "Error unzipping $F" && exit 1) +rm $F + +CMD="${HISAT2_BUILD_EXE} genome.fa genome" +echo "Running $CMD" +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi + diff --git a/scripts/make_e_coli.sh b/scripts/make_e_coli.sh new file mode 100644 index 0000000..4ca9046 --- /dev/null +++ b/scripts/make_e_coli.sh @@ -0,0 +1,46 @@ +#!/bin/sh + +# +# Downloads the sequence for a strain of e. coli from NCBI and builds a +# Bowtie index for it +# + +GENOMES_MIRROR=ftp://ftp.ncbi.nlm.nih.gov/genomes + +BOWTIE_BUILD_EXE=./bowtie2-build +if [ ! -x "$BOWTIE_BUILD_EXE" ] ; then + if ! which bowtie2-build ; then + echo "Could not find bowtie2-build in current directory or in PATH" + exit 1 + else + BOWTIE_BUILD_EXE=`which bowtie2-build` + fi +fi + +if [ ! -f NC_008253.fna ] ; then + if ! which wget > /dev/null ; then + echo wget not found, looking for curl... + if ! which curl > /dev/null ; then + echo curl not found either, aborting... + else + # Use curl + curl ${GENOMES_MIRROR}/Bacteria/Escherichia_coli_536_uid58531/NC_008253.fna -o NC_008253.fna + fi + else + # Use wget + wget ${GENOMES_MIRROR}/Bacteria/Escherichia_coli_536_uid58531/NC_008253.fna + fi +fi + +if [ ! -f NC_008253.fna ] ; then + echo "Could not find NC_008253.fna file!" + exit 2 +fi + +CMD="${BOWTIE_BUILD_EXE} $* -t 8 NC_008253.fna e_coli" +echo $CMD +if $CMD ; then + echo "e_coli index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_grch37.sh b/scripts/make_grch37.sh new file mode 100644 index 0000000..ac93294 --- /dev/null +++ b/scripts/make_grch37.sh @@ -0,0 +1,60 @@ +#!/bin/sh + +# +# Downloads sequence for the GRCh37 release 75 version of H. sapiens (human) from +# Ensembl. +# +# Note that Ensembl's GRCh37 build has three categories of compressed fasta +# files: +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=75 +ENSEMBL_GRCh37_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/homo_sapiens/dna + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +rm -f genome.fa +F=Homo_sapiens.GRCh37.${ENSEMBL_RELEASE}.dna.primary_assembly.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_GRCh37_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa +fi + +CMD="${HISAT2_BUILD_EXE} genome.fa genome" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_grch37_snp.sh b/scripts/make_grch37_snp.sh new file mode 100644 index 0000000..26ac48d --- /dev/null +++ b/scripts/make_grch37_snp.sh @@ -0,0 +1,83 @@ +#!/bin/sh + +# +# Downloads sequence for the GRCh37 release 75 version of H. sapiens (human) from +# Ensembl. +# +# Note that Ensembl's GRCh37 build has three categories of compressed fasta +# files: +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=75 +ENSEMBL_GRCh37_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/homo_sapiens/dna + +DBSNP_RELEASE=144 +SNP_FILE=snp${DBSNP_RELEASE}Common.txt +UCSC_COMMON_SNP=http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/${SNP_FILE} + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +HISAT2_SNP_SCRIPT=./hisat2_extract_snps_haplotypes_UCSC.py +if [ ! -x "$HISAT2_SNP_SCRIPT" ] ; then + if ! which hisat2_extract_snps_haplotypes_UCSC.py ; then + echo "Could not find hisat2_extract_snps_haplotypes_UCSC.py in current directory or in PATH" + exit 1 + else + HISAT2_SNP_SCRIPT=`which hisat2_extract_snps_haplotypes_UCSC.py` + fi +fi + +rm -f genome.fa +F=Homo_sapiens.GRCh37.${ENSEMBL_RELEASE}.dna.primary_assembly.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_GRCh37_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa +fi + + +if [ ! -f $SNP_FILE ] ; then + get ${UCSC_COMMON_SNP}.gz || (echo "Error getting ${UCSC_COMMON_SNP}" && exit 1) + gunzip ${SNP_FILE}.gz || (echo "Error unzipping ${SNP_FILE}" && exit 1) + awk 'BEGIN{OFS="\t"} {if($2 ~ /^chr/) {$2 = substr($2, 4)}; if($2 == "M") {$2 = "MT"} print}' ${SNP_FILE} > ${SNP_FILE}.tmp + mv ${SNP_FILE}.tmp ${SNP_FILE} + ${HISAT2_SNP_SCRIPT} genome.fa ${SNP_FILE} genome +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa --snp genome.snp --haplotype genome.haplotype genome_snp" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_grch37_snp_tran.sh b/scripts/make_grch37_snp_tran.sh new file mode 100644 index 0000000..679b7c9 --- /dev/null +++ b/scripts/make_grch37_snp_tran.sh @@ -0,0 +1,112 @@ +#!/bin/sh + +# +# Downloads sequence for the GRCh37 release 75 version of H. sapiens (human) from +# Ensembl. +# +# Note that Ensembl's GRCh37 build has three categories of compressed fasta +# files: +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=75 +ENSEMBL_GRCh37_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/homo_sapiens/dna +ENSEMBL_GRCh37_GTF_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/gtf/homo_sapiens +GTF_FILE=Homo_sapiens.GRCh37.${ENSEMBL_RELEASE}.gtf + +DBSNP_RELEASE=144 +SNP_FILE=snp${DBSNP_RELEASE}Common.txt +UCSC_COMMON_SNP=http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/${SNP_FILE} + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +HISAT2_SNP_SCRIPT=./hisat2_extract_snps_haplotypes_UCSC.py +if [ ! -x "$HISAT2_SNP_SCRIPT" ] ; then + if ! which hisat2_extract_snps_haplotypes_UCSC.py ; then + echo "Could not find hisat2_extract_snps_haplotypes_UCSC.py in current directory or in PATH" + exit 1 + else + HISAT2_SNP_SCRIPT=`which hisat2_extract_snps_haplotypes_UCSC.py` + fi +fi + +HISAT2_SS_SCRIPT=./hisat2_extract_splice_sites.py +if [ ! -x "$HISAT2_SS_SCRIPT" ] ; then + if ! which hisat2_extract_splice_sites.py ; then + echo "Could not find hisat2_extract_splice_sites.py in current directory or in PATH" + exit 1 + else + HISAT2_SS_SCRIPT=`which hisat2_extract_splice_sites.py` + fi +fi + +HISAT2_EXON_SCRIPT=./hisat2_extract_exons.py +if [ ! -x "$HISAT2_EXON_SCRIPT" ] ; then + if ! which hisat2_extract_exons.py ; then + echo "Could not find hisat2_extract_exons.py in current directory or in PATH" + exit 1 + else + HISAT2_EXON_SCRIPT=`which hisat2_extract_exons.py` + fi +fi + +rm -f genome.fa +F=Homo_sapiens.GRCh37.${ENSEMBL_RELEASE}.dna.primary_assembly.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_GRCh37_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa +fi + + +if [ ! -f $SNP_FILE ] ; then + get ${UCSC_COMMON_SNP}.gz || (echo "Error getting ${UCSC_COMMON_SNP}" && exit 1) + gunzip ${SNP_FILE}.gz || (echo "Error unzipping ${SNP_FILE}" && exit 1) + awk 'BEGIN{OFS="\t"} {if($2 ~ /^chr/) {$2 = substr($2, 4)}; if($2 == "M") {$2 = "MT"} print}' ${SNP_FILE} > ${SNP_FILE}.tmp + mv ${SNP_FILE}.tmp ${SNP_FILE} + ${HISAT2_SNP_SCRIPT} genome.fa ${SNP_FILE} genome +fi + +if [ ! -f $GTF_FILE ] ; then + get ${ENSEMBL_GRCh37_GTF_BASE}/${GTF_FILE}.gz || (echo "Error getting ${GTF_FILE}" && exit 1) + gunzip ${GTF_FILE}.gz || (echo "Error unzipping ${GTF_FILE}" && exit 1) + ${HISAT2_SS_SCRIPT} ${GTF_FILE} > genome.ss + ${HISAT2_EXON_SCRIPT} ${GTF_FILE} > genome.exon +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa --snp genome.snp --haplotype genome.haplotype --ss genome.ss --exon genome.exon genome_snp_tran" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_grch37_snp_tran_ercc.sh b/scripts/make_grch37_snp_tran_ercc.sh new file mode 100644 index 0000000..08d87f2 --- /dev/null +++ b/scripts/make_grch37_snp_tran_ercc.sh @@ -0,0 +1,117 @@ +#!/bin/sh + +# +# Downloads sequence for the GRCh37 release 75 version of H. sapiens (human) from +# Ensembl. +# +# Note that Ensembl's GRCh37 build has three categories of compressed fasta +# files: +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=75 +ENSEMBL_GRCh37_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/homo_sapiens/dna +ENSEMBL_GRCh37_GTF_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/gtf/homo_sapiens +GTF_FILE=Homo_sapiens.GRCh37.${ENSEMBL_RELEASE}.gtf + +DBSNP_RELEASE=144 +SNP_FILE=snp${DBSNP_RELEASE}Common.txt +UCSC_COMMON_SNP=http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/${SNP_FILE} +ERCC_FILE=ERCC92 +ERCC_FTP=https://tools.thermofisher.com/content/sfs/manuals/${ERCC_FILE}.zip + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +HISAT2_SNP_SCRIPT=./hisat2_extract_snps_haplotypes_UCSC.py +if [ ! -x "$HISAT2_SNP_SCRIPT" ] ; then + if ! which hisat2_extract_snps_haplotypes_UCSC.py ; then + echo "Could not find hisat2_extract_snps_haplotypes_UCSC.py in current directory or in PATH" + exit 1 + else + HISAT2_SNP_SCRIPT=`which hisat2_extract_snps_haplotypes_UCSC.py` + fi +fi + +HISAT2_SS_SCRIPT=./hisat2_extract_splice_sites.py +if [ ! -x "$HISAT2_SS_SCRIPT" ] ; then + if ! which hisat2_extract_splice_sites.py ; then + echo "Could not find hisat2_extract_splice_sites.py in current directory or in PATH" + exit 1 + else + HISAT2_SS_SCRIPT=`which hisat2_extract_splice_sites.py` + fi +fi + +HISAT2_EXON_SCRIPT=./hisat2_extract_exons.py +if [ ! -x "$HISAT2_EXON_SCRIPT" ] ; then + if ! which hisat2_extract_exons.py ; then + echo "Could not find hisat2_extract_exons.py in current directory or in PATH" + exit 1 + else + HISAT2_EXON_SCRIPT=`which hisat2_extract_exons.py` + fi +fi + +rm -f genome.fa +F=Homo_sapiens.GRCh37.${ENSEMBL_RELEASE}.dna.primary_assembly.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_GRCh37_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa + get ${ERCC_FTP} || (echo "Error getting ${ERCC_FILE}.zip" && exit 1) + unzip ${ERCC_FILE}.zip + cat ${ERCC_FILE}.fa >> genome.fa +fi + +if [ ! -f $GTF_FILE ] ; then + get ${ENSEMBL_GRCh37_GTF_BASE}/${GTF_FILE}.gz || (echo "Error getting ${GTF_FILE}" && exit 1) + gunzip ${GTF_FILE}.gz || (echo "Error unzipping ${GTF_FILE}" && exit 1) + cat ${ERCC_FILE}.gtf >> ${GTF_FILE} + ${HISAT2_SS_SCRIPT} ${GTF_FILE} > genome.ss + ${HISAT2_EXON_SCRIPT} ${GTF_FILE} > genome.exon +fi + +if [ ! -f $SNP_FILE ] ; then + get ${UCSC_COMMON_SNP}.gz || (echo "Error getting ${UCSC_COMMON_SNP}" && exit 1) + gunzip ${SNP_FILE}.gz || (echo "Error unzipping ${SNP_FILE}" && exit 1) + awk 'BEGIN{OFS="\t"} {if($2 ~ /^chr/) {$2 = substr($2, 4)}; if($2 == "M") {$2 = "MT"} print}' ${SNP_FILE} > ${SNP_FILE}.tmp + mv ${SNP_FILE}.tmp ${SNP_FILE} + ${HISAT2_SNP_SCRIPT} genome.fa ${SNP_FILE} genome +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa --snp genome.snp --haplotype genome.haplotype --ss genome.ss --exon genome.exon genome_snp_tran_ercc" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_grch37_tran.sh b/scripts/make_grch37_tran.sh new file mode 100644 index 0000000..3ccf023 --- /dev/null +++ b/scripts/make_grch37_tran.sh @@ -0,0 +1,89 @@ +#!/bin/sh + +# +# Downloads sequence for the GRCh37 release 75 version of H. sapiens (human) from +# Ensembl. +# +# Note that Ensembl's GRCh37 build has three categories of compressed fasta +# files: +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=75 +ENSEMBL_GRCh37_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/homo_sapiens/dna +ENSEMBL_GRCh37_GTF_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/gtf/homo_sapiens +GTF_FILE=Homo_sapiens.GRCh37.${ENSEMBL_RELEASE}.gtf + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +HISAT2_SS_SCRIPT=./hisat2_extract_splice_sites.py +if [ ! -x "$HISAT2_SS_SCRIPT" ] ; then + if ! which hisat2_extract_splice_sites.py ; then + echo "Could not find hisat2_extract_splice_sites.py in current directory or in PATH" + exit 1 + else + HISAT2_SS_SCRIPT=`which hisat2_extract_splice_sites.py` + fi +fi + +HISAT2_EXON_SCRIPT=./hisat2_extract_exons.py +if [ ! -x "$HISAT2_EXON_SCRIPT" ] ; then + if ! which hisat2_extract_exons.py ; then + echo "Could not find hisat2_extract_exons.py in current directory or in PATH" + exit 1 + else + HISAT2_EXON_SCRIPT=`which hisat2_extract_exons.py` + fi +fi + +rm -f genome.fa +F=Homo_sapiens.GRCh37.${ENSEMBL_RELEASE}.dna.primary_assembly.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_GRCh37_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa +fi + +if [ ! -f $GTF_FILE ] ; then + get ${ENSEMBL_GRCh37_GTF_BASE}/${GTF_FILE}.gz || (echo "Error getting ${GTF_FILE}" && exit 1) + gunzip ${GTF_FILE}.gz || (echo "Error unzipping ${GTF_FILE}" && exit 1) + ${HISAT2_SS_SCRIPT} ${GTF_FILE} > genome.ss + ${HISAT2_EXON_SCRIPT} ${GTF_FILE} > genome.exon +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa --ss genome.ss --exon genome.exon genome_tran" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_grch38.sh b/scripts/make_grch38.sh new file mode 100644 index 0000000..5e7b6a1 --- /dev/null +++ b/scripts/make_grch38.sh @@ -0,0 +1,60 @@ +#!/bin/sh + +# +# Downloads sequence for the GRCh38 release 84 version of H. sapiens (human) from +# Ensembl. +# +# Note that Ensembl's GRCh38 build has three categories of compressed fasta +# files: +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=84 +ENSEMBL_GRCh38_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/homo_sapiens/dna + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +rm -f genome.fa +F=Homo_sapiens.GRCh38.dna.primary_assembly.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_GRCh38_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa genome" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_grch38_rep.sh b/scripts/make_grch38_rep.sh new file mode 100644 index 0000000..83a3baa --- /dev/null +++ b/scripts/make_grch38_rep.sh @@ -0,0 +1,76 @@ +#!/bin/sh + +# +# Downloads sequence for the GRCh38 release 84 version of H. sapiens (human) from +# Ensembl. +# +# Note that Ensembl's GRCh38 build has three categories of compressed fasta +# files: +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=84 +ENSEMBL_GRCh38_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/homo_sapiens/dna + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +HISAT2_REPEAT_EXE=./hisat2-repeat +if [ ! -x "$HISAT2_REPEAT_EXE" ]; then + if ! which hisat2-repeat ; then + echo "Could not find hisat2-repeat in current directory or in PATH" + exit 1 + else + HISAT2_REPEAT_EXE=`which hisat2-repeat` + fi +fi + +rm -f genome.fa +F=Homo_sapiens.GRCh38.dna.primary_assembly.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_GRCh38_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa +fi + +# Build repeat +CMD="${HISAT2_REPEAT_EXE} -p 4 --repeat-count 5 --repeat-length 51-300,76-300,100-300,101-300,151-300 genome.fa genome_rep" +echo Running $CMD +$CMD +[[ "$?" -eq 0 ]] || { echo "Index building failed; see error message"; exit 1; }; + +CMD="${HISAT2_BUILD_EXE} -p 4 --repeat-ref genome_rep.rep.fa --repeat-info genome_rep.rep.info genome.fa genome_rep" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_grch38_snp.sh b/scripts/make_grch38_snp.sh new file mode 100644 index 0000000..be72089 --- /dev/null +++ b/scripts/make_grch38_snp.sh @@ -0,0 +1,83 @@ +#!/bin/sh + +# +# Downloads sequence for the GRCh38 release 84 version of H. sapiens (human) from +# Ensembl. +# +# Note that Ensembl's GRCh38 build has three categories of compressed fasta +# files: +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=84 +ENSEMBL_GRCh38_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/homo_sapiens/dna + +DBSNP_RELEASE=144 +SNP_FILE=snp${DBSNP_RELEASE}Common.txt +UCSC_COMMON_SNP=http://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/${SNP_FILE} + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +HISAT2_SNP_SCRIPT=./hisat2_extract_snps_haplotypes_UCSC.py +if [ ! -x "$HISAT2_SNP_SCRIPT" ] ; then + if ! which hisat2_extract_snps_haplotypes_UCSC.py ; then + echo "Could not find hisat2_extract_snps_haplotypes_UCSC.py in current directory or in PATH" + exit 1 + else + HISAT2_SNP_SCRIPT=`which hisat2_extract_snps_haplotypes_UCSC.py` + fi +fi + +rm -f genome.fa +F=Homo_sapiens.GRCh38.dna.primary_assembly.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_GRCh38_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa +fi + + +if [ ! -f $SNP_FILE ] ; then + get ${UCSC_COMMON_SNP}.gz || (echo "Error getting ${UCSC_COMMON_SNP}" && exit 1) + gunzip ${SNP_FILE}.gz || (echo "Error unzipping ${SNP_FILE}" && exit 1) + awk 'BEGIN{OFS="\t"} {if($2 ~ /^chr/) {$2 = substr($2, 4)}; if($2 == "M") {$2 = "MT"} print}' ${SNP_FILE} > ${SNP_FILE}.tmp + mv ${SNP_FILE}.tmp ${SNP_FILE} + ${HISAT2_SNP_SCRIPT} genome.fa ${SNP_FILE} genome +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa --snp genome.snp --haplotype genome.haplotype genome_snp" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_grch38_snp_rep.sh b/scripts/make_grch38_snp_rep.sh new file mode 100644 index 0000000..bb24b91 --- /dev/null +++ b/scripts/make_grch38_snp_rep.sh @@ -0,0 +1,99 @@ +#!/bin/sh + +# +# Downloads sequence for the GRCh38 release 84 version of H. sapiens (human) from +# Ensembl. +# +# Note that Ensembl's GRCh38 build has three categories of compressed fasta +# files: +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=84 +ENSEMBL_GRCh38_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/homo_sapiens/dna + +DBSNP_RELEASE=144 +SNP_FILE=snp${DBSNP_RELEASE}Common.txt +UCSC_COMMON_SNP=http://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/${SNP_FILE} + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +HISAT2_SNP_SCRIPT=./hisat2_extract_snps_haplotypes_UCSC.py +if [ ! -x "$HISAT2_SNP_SCRIPT" ] ; then + if ! which hisat2_extract_snps_haplotypes_UCSC.py ; then + echo "Could not find hisat2_extract_snps_haplotypes_UCSC.py in current directory or in PATH" + exit 1 + else + HISAT2_SNP_SCRIPT=`which hisat2_extract_snps_haplotypes_UCSC.py` + fi +fi + +HISAT2_REPEAT_EXE=./hisat2-repeat +if [ ! -x "$HISAT2_REPEAT_EXE" ]; then + if ! which hisat2-repeat ; then + echo "Could not find hisat2-repeat in current directory or in PATH" + exit 1 + else + HISAT2_REPEAT_EXE=`which hisat2-repeat` + fi +fi + +rm -f genome.fa +F=Homo_sapiens.GRCh38.dna.primary_assembly.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_GRCh38_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa +fi + + +if [ ! -f $SNP_FILE ] ; then + get ${UCSC_COMMON_SNP}.gz || (echo "Error getting ${UCSC_COMMON_SNP}" && exit 1) + gunzip ${SNP_FILE}.gz || (echo "Error unzipping ${SNP_FILE}" && exit 1) + awk 'BEGIN{OFS="\t"} {if($2 ~ /^chr/) {$2 = substr($2, 4)}; if($2 == "M") {$2 = "MT"} print}' ${SNP_FILE} > ${SNP_FILE}.tmp + mv ${SNP_FILE}.tmp ${SNP_FILE} + ${HISAT2_SNP_SCRIPT} genome.fa ${SNP_FILE} genome +fi + +# Build repeat +CMD="${HISAT2_REPEAT_EXE} -p 4 --repeat-count 5 --repeat-length 51-300,76-300,100-300,101-300,151-300 genome.fa genome_rep" +echo Running $CMD +$CMD +[[ "$?" -eq 0 ]] || { echo "Index building failed; see error message"; exit 1; }; + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa --snp genome.snp --haplotype genome.haplotype --repeat-ref genome_rep.rep.fa --repeat-info genome_rep.rep.info genome_snp_rep" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_grch38_snp_tran.sh b/scripts/make_grch38_snp_tran.sh new file mode 100644 index 0000000..06ec20a --- /dev/null +++ b/scripts/make_grch38_snp_tran.sh @@ -0,0 +1,112 @@ +#!/bin/sh + +# +# Downloads sequence for the GRCh38 release 84 version of H. sapiens (human) from +# Ensembl. +# +# Note that Ensembl's GRCh38 build has three categories of compressed fasta +# files: +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=84 +ENSEMBL_GRCh38_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/homo_sapiens/dna +ENSEMBL_GRCh38_GTF_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/gtf/homo_sapiens +GTF_FILE=Homo_sapiens.GRCh38.${ENSEMBL_RELEASE}.gtf + +DBSNP_RELEASE=144 +SNP_FILE=snp${DBSNP_RELEASE}Common.txt +UCSC_COMMON_SNP=http://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/${SNP_FILE} + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +HISAT2_SNP_SCRIPT=./hisat2_extract_snps_haplotypes_UCSC.py +if [ ! -x "$HISAT2_SNP_SCRIPT" ] ; then + if ! which hisat2_extract_snps_haplotypes_UCSC.py ; then + echo "Could not find hisat2_extract_snps_haplotypes_UCSC.py in current directory or in PATH" + exit 1 + else + HISAT2_SNP_SCRIPT=`which hisat2_extract_snps_haplotypes_UCSC.py` + fi +fi + +HISAT2_SS_SCRIPT=./hisat2_extract_splice_sites.py +if [ ! -x "$HISAT2_SS_SCRIPT" ] ; then + if ! which hisat2_extract_splice_sites.py ; then + echo "Could not find hisat2_extract_splice_sites.py in current directory or in PATH" + exit 1 + else + HISAT2_SS_SCRIPT=`which hisat2_extract_splice_sites.py` + fi +fi + +HISAT2_EXON_SCRIPT=./hisat2_extract_exons.py +if [ ! -x "$HISAT2_EXON_SCRIPT" ] ; then + if ! which hisat2_extract_exons.py ; then + echo "Could not find hisat2_extract_exons.py in current directory or in PATH" + exit 1 + else + HISAT2_EXON_SCRIPT=`which hisat2_extract_exons.py` + fi +fi + +rm -f genome.fa +F=Homo_sapiens.GRCh38.dna.primary_assembly.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_GRCh38_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa +fi + + +if [ ! -f $SNP_FILE ] ; then + get ${UCSC_COMMON_SNP}.gz || (echo "Error getting ${UCSC_COMMON_SNP}" && exit 1) + gunzip ${SNP_FILE}.gz || (echo "Error unzipping ${SNP_FILE}" && exit 1) + awk 'BEGIN{OFS="\t"} {if($2 ~ /^chr/) {$2 = substr($2, 4)}; if($2 == "M") {$2 = "MT"} print}' ${SNP_FILE} > ${SNP_FILE}.tmp + mv ${SNP_FILE}.tmp ${SNP_FILE} + ${HISAT2_SNP_SCRIPT} genome.fa ${SNP_FILE} genome +fi + +if [ ! -f $GTF_FILE ] ; then + get ${ENSEMBL_GRCh38_GTF_BASE}/${GTF_FILE}.gz || (echo "Error getting ${GTF_FILE}" && exit 1) + gunzip ${GTF_FILE}.gz || (echo "Error unzipping ${GTF_FILE}" && exit 1) + ${HISAT2_SS_SCRIPT} ${GTF_FILE} > genome.ss + ${HISAT2_EXON_SCRIPT} ${GTF_FILE} > genome.exon +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa --snp genome.snp --haplotype genome.haplotype --ss genome.ss --exon genome.exon genome_snp_tran" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_grch38_snp_tran_ercc.sh b/scripts/make_grch38_snp_tran_ercc.sh new file mode 100644 index 0000000..90b3d51 --- /dev/null +++ b/scripts/make_grch38_snp_tran_ercc.sh @@ -0,0 +1,117 @@ +#!/bin/sh + +# +# Downloads sequence for the GRCh38 release 84 version of H. sapiens (human) from +# Ensembl. +# +# Note that Ensembl's GRCh38 build has three categories of compressed fasta +# files: +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=84 +ENSEMBL_GRCh37_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/homo_sapiens/dna +ENSEMBL_GRCh37_GTF_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/gtf/homo_sapiens +GTF_FILE=Homo_sapiens.GRCh38.${ENSEMBL_RELEASE}.gtf + +DBSNP_RELEASE=144 +SNP_FILE=snp${DBSNP_RELEASE}Common.txt +UCSC_COMMON_SNP=http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/${SNP_FILE} +ERCC_FILE=ERCC92 +ERCC_FTP=https://tools.thermofisher.com/content/sfs/manuals/${ERCC_FILE}.zip + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +HISAT2_SNP_SCRIPT=./hisat2_extract_snps_haplotypes_UCSC.py +if [ ! -x "$HISAT2_SNP_SCRIPT" ] ; then + if ! which hisat2_extract_snps_haplotypes_UCSC.py ; then + echo "Could not find hisat2_extract_snps_haplotypes_UCSC.py in current directory or in PATH" + exit 1 + else + HISAT2_SNP_SCRIPT=`which hisat2_extract_snps_haplotypes_UCSC.py` + fi +fi + +HISAT2_SS_SCRIPT=./hisat2_extract_splice_sites.py +if [ ! -x "$HISAT2_SS_SCRIPT" ] ; then + if ! which hisat2_extract_splice_sites.py ; then + echo "Could not find hisat2_extract_splice_sites.py in current directory or in PATH" + exit 1 + else + HISAT2_SS_SCRIPT=`which hisat2_extract_splice_sites.py` + fi +fi + +HISAT2_EXON_SCRIPT=./hisat2_extract_exons.py +if [ ! -x "$HISAT2_EXON_SCRIPT" ] ; then + if ! which hisat2_extract_exons.py ; then + echo "Could not find hisat2_extract_exons.py in current directory or in PATH" + exit 1 + else + HISAT2_EXON_SCRIPT=`which hisat2_extract_exons.py` + fi +fi + +rm -f genome.fa +F=Homo_sapiens.GRCh38.dna.primary_assembly.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_GRCh37_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa + get ${ERCC_FTP} || (echo "Error getting ${ERCC_FILE}.zip" && exit 1) + unzip ${ERCC_FILE}.zip + cat ${ERCC_FILE}.fa >> genome.fa +fi + +if [ ! -f $GTF_FILE ] ; then + get ${ENSEMBL_GRCh37_GTF_BASE}/${GTF_FILE}.gz || (echo "Error getting ${GTF_FILE}" && exit 1) + gunzip ${GTF_FILE}.gz || (echo "Error unzipping ${GTF_FILE}" && exit 1) + cat ${ERCC_FILE}.gtf >> ${GTF_FILE} + ${HISAT2_SS_SCRIPT} ${GTF_FILE} > genome.ss + ${HISAT2_EXON_SCRIPT} ${GTF_FILE} > genome.exon +fi + +if [ ! -f $SNP_FILE ] ; then + get ${UCSC_COMMON_SNP}.gz || (echo "Error getting ${UCSC_COMMON_SNP}" && exit 1) + gunzip ${SNP_FILE}.gz || (echo "Error unzipping ${SNP_FILE}" && exit 1) + awk 'BEGIN{OFS="\t"} {if($2 ~ /^chr/) {$2 = substr($2, 4)}; if($2 == "M") {$2 = "MT"} print}' ${SNP_FILE} > ${SNP_FILE}.tmp + mv ${SNP_FILE}.tmp ${SNP_FILE} + ${HISAT2_SNP_SCRIPT} genome.fa ${SNP_FILE} genome +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa --snp genome.snp --haplotype genome.haplotype --ss genome.ss --exon genome.exon genome_snp_tran_ercc" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_grch38_tran.sh b/scripts/make_grch38_tran.sh new file mode 100644 index 0000000..783aba4 --- /dev/null +++ b/scripts/make_grch38_tran.sh @@ -0,0 +1,89 @@ +#!/bin/sh + +# +# Downloads sequence for the GRCh38 release 84 version of H. sapiens (human) from +# Ensembl. +# +# Note that Ensembl's GRCh38 build has three categories of compressed fasta +# files: +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=84 +ENSEMBL_GRCh38_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/homo_sapiens/dna +ENSEMBL_GRCh38_GTF_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/gtf/homo_sapiens +GTF_FILE=Homo_sapiens.GRCh38.${ENSEMBL_RELEASE}.gtf + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +HISAT2_SS_SCRIPT=./hisat2_extract_splice_sites.py +if [ ! -x "$HISAT2_SS_SCRIPT" ] ; then + if ! which hisat2_extract_splice_sites.py ; then + echo "Could not find hisat2_extract_splice_sites.py in current directory or in PATH" + exit 1 + else + HISAT2_SS_SCRIPT=`which hisat2_extract_splice_sites.py` + fi +fi + +HISAT2_EXON_SCRIPT=./hisat2_extract_exons.py +if [ ! -x "$HISAT2_EXON_SCRIPT" ] ; then + if ! which hisat2_extract_exons.py ; then + echo "Could not find hisat2_extract_exons.py in current directory or in PATH" + exit 1 + else + HISAT2_EXON_SCRIPT=`which hisat2_extract_exons.py` + fi +fi + +rm -f genome.fa +F=Homo_sapiens.GRCh38.dna.primary_assembly.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_GRCh38_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa +fi + +if [ ! -f $GTF_FILE ] ; then + get ${ENSEMBL_GRCh38_GTF_BASE}/${GTF_FILE}.gz || (echo "Error getting ${GTF_FILE}" && exit 1) + gunzip ${GTF_FILE}.gz || (echo "Error unzipping ${GTF_FILE}" && exit 1) + ${HISAT2_SS_SCRIPT} ${GTF_FILE} > genome.ss + ${HISAT2_EXON_SCRIPT} ${GTF_FILE} > genome.exon +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa --ss genome.ss --exon genome.exon genome_tran" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_grcm38.sh b/scripts/make_grcm38.sh new file mode 100644 index 0000000..abc0322 --- /dev/null +++ b/scripts/make_grcm38.sh @@ -0,0 +1,55 @@ +#!/bin/sh + +# +# Downloads sequence for the GRCm38 release 81 version of M. Musculus (mouse) from +# Ensembl. +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=84 +ENSEMBL_GRCm38_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/mus_musculus/dna + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +rm -f genome.fa +F=Mus_musculus.GRCm38.dna.primary_assembly.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_GRCm38_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa +fi + +CMD="${HISAT2_BUILD_EXE} genome.fa genome" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_grcm38_snp.sh b/scripts/make_grcm38_snp.sh new file mode 100644 index 0000000..fe52421 --- /dev/null +++ b/scripts/make_grcm38_snp.sh @@ -0,0 +1,78 @@ +#!/bin/sh + +# +# Downloads sequence for the GRCm37 release 81 version of M. Musculus (mouse) from +# Ensembl. +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=84 +ENSEMBL_GRCm38_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/mus_musculus/dna + +DBSNP_RELEASE=142 +SNP_FILE=snp${DBSNP_RELEASE}Common.txt +UCSC_COMMON_SNP=http://hgdownload.cse.ucsc.edu/goldenPath/mm10/database/${SNP_FILE} + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +HISAT2_SNP_SCRIPT=./hisat2_extract_snps_haplotypes_UCSC.py +if [ ! -x "$HISAT2_SNP_SCRIPT" ] ; then + if ! which hisat2_extract_snps_haplotypes_UCSC.py ; then + echo "Could not find hisat2_extract_snps_haplotypes_UCSC.py in current directory or in PATH" + exit 1 + else + HISAT2_SNP_SCRIPT=`which hisat2_extract_snps_haplotypes_UCSC.py` + fi +fi + +rm -f genome.fa +F=Mus_musculus.GRCm38.dna.primary_assembly.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_GRCm38_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa +fi + + +if [ ! -f $SNP_FILE ] ; then + get ${UCSC_COMMON_SNP}.gz || (echo "Error getting ${UCSC_COMMON_SNP}" && exit 1) + gunzip ${SNP_FILE}.gz || (echo "Error unzipping ${SNP_FILE}" && exit 1) + awk 'BEGIN{OFS="\t"} {if($2 ~ /^chr/) {$2 = substr($2, 4)}; if($2 == "M") {$2 = "MT"} print}' ${SNP_FILE} > ${SNP_FILE}.tmp + mv ${SNP_FILE}.tmp ${SNP_FILE} + ${HISAT2_SNP_SCRIPT} genome.fa ${SNP_FILE} genome +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa --snp genome.snp --haplotype genome.haplotype genome_snp" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_grcm38_snp_tran.sh b/scripts/make_grcm38_snp_tran.sh new file mode 100644 index 0000000..a2b97c3 --- /dev/null +++ b/scripts/make_grcm38_snp_tran.sh @@ -0,0 +1,107 @@ +#!/bin/sh + +# +# Downloads sequence for the GRCm38 release 81 version of M. Musculus (mouse) from +# Ensembl. +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=84 +ENSEMBL_GRCm38_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/mus_musculus/dna +ENSEMBL_GRCm38_GTF_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/gtf/mus_musculus +GTF_FILE=Mus_musculus.GRCm38.${ENSEMBL_RELEASE}.gtf + +DBSNP_RELEASE=142 +SNP_FILE=snp${DBSNP_RELEASE}Common.txt +UCSC_COMMON_SNP=http://hgdownload.cse.ucsc.edu/goldenPath/mm10/database/${SNP_FILE} + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +HISAT2_SNP_SCRIPT=./hisat2_extract_snps_haplotypes_UCSC.py +if [ ! -x "$HISAT2_SNP_SCRIPT" ] ; then + if ! which hisat2_extract_snps_haplotypes_UCSC.py ; then + echo "Could not find hisat2_extract_snps_haplotypes_UCSC.py in current directory or in PATH" + exit 1 + else + HISAT2_SNP_SCRIPT=`which hisat2_extract_snps_haplotypes_UCSC.py` + fi +fi + +HISAT2_SS_SCRIPT=./hisat2_extract_splice_sites.py +if [ ! -x "$HISAT2_SS_SCRIPT" ] ; then + if ! which hisat2_extract_splice_sites.py ; then + echo "Could not find hisat2_extract_splice_sites.py in current directory or in PATH" + exit 1 + else + HISAT2_SS_SCRIPT=`which hisat2_extract_splice_sites.py` + fi +fi + +HISAT2_EXON_SCRIPT=./hisat2_extract_exons.py +if [ ! -x "$HISAT2_EXON_SCRIPT" ] ; then + if ! which hisat2_extract_exons.py ; then + echo "Could not find hisat2_extract_exons.py in current directory or in PATH" + exit 1 + else + HISAT2_EXON_SCRIPT=`which hisat2_extract_exons.py` + fi +fi + +rm -f genome.fa +F=Mus_musculus.GRCm38.dna.primary_assembly.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_GRCm38_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa +fi + + +if [ ! -f $SNP_FILE ] ; then + get ${UCSC_COMMON_SNP}.gz || (echo "Error getting ${UCSC_COMMON_SNP}" && exit 1) + gunzip ${SNP_FILE}.gz || (echo "Error unzipping ${SNP_FILE}" && exit 1) + awk 'BEGIN{OFS="\t"} {if($2 ~ /^chr/) {$2 = substr($2, 4)}; if($2 == "M") {$2 = "MT"} print}' ${SNP_FILE} > ${SNP_FILE}.tmp + mv ${SNP_FILE}.tmp ${SNP_FILE} + ${HISAT2_SNP_SCRIPT} genome.fa ${SNP_FILE} genome +fi + +if [ ! -f $GTF_FILE ] ; then + get ${ENSEMBL_GRCm38_GTF_BASE}/${GTF_FILE}.gz || (echo "Error getting ${GTF_FILE}" && exit 1) + gunzip ${GTF_FILE}.gz || (echo "Error unzipping ${GTF_FILE}" && exit 1) + ${HISAT2_SS_SCRIPT} ${GTF_FILE} > genome.ss + ${HISAT2_EXON_SCRIPT} ${GTF_FILE} > genome.exon +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa --snp genome.snp --haplotype genome.haplotype --ss genome.ss --exon genome.exon genome_snp_tran" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_grcm38_tran.sh b/scripts/make_grcm38_tran.sh new file mode 100644 index 0000000..4bfd7be --- /dev/null +++ b/scripts/make_grcm38_tran.sh @@ -0,0 +1,85 @@ +#!/bin/sh + +# + +# Downloads sequence for the GRCm38 release 81 version of M. musculus (mouse) from +# Ensembl. +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=84 +ENSEMBL_GRCm38_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/mus_musculus/dna +ENSEMBL_GRCm38_GTF_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/gtf/mus_musculus +GTF_FILE=Mus_musculus.GRCm38.${ENSEMBL_RELEASE}.gtf + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +HISAT2_SS_SCRIPT=./hisat2_extract_splice_sites.py +if [ ! -x "$HISAT2_SS_SCRIPT" ] ; then + if ! which hisat2_extract_splice_sites.py ; then + echo "Could not find hisat2_extract_splice_sites.py in current directory or in PATH" + exit 1 + else + HISAT2_SS_SCRIPT=`which hisat2_extract_splice_sites.py` + fi +fi + +HISAT2_EXON_SCRIPT=./hisat2_extract_exons.py +if [ ! -x "$HISAT2_EXON_SCRIPT" ] ; then + if ! which hisat2_extract_exons.py ; then + echo "Could not find hisat2_extract_exons.py in current directory or in PATH" + exit 1 + else + HISAT2_EXON_SCRIPT=`which hisat2_extract_exons.py` + fi +fi + +rm -f genome.fa +F=Mus_musculus.GRCm38.dna.primary_assembly.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_GRCm38_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa +fi + +if [ ! -f $GTF_FILE ] ; then + get ${ENSEMBL_GRCm38_GTF_BASE}/${GTF_FILE}.gz || (echo "Error getting ${GTF_FILE}" && exit 1) + gunzip ${GTF_FILE}.gz || (echo "Error unzipping ${GTF_FILE}" && exit 1) + ${HISAT2_SS_SCRIPT} ${GTF_FILE} > genome.ss + ${HISAT2_EXON_SCRIPT} ${GTF_FILE} > genome.exon +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa --ss genome.ss --exon genome.exon genome_tran" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_hg19.sh b/scripts/make_hg19.sh new file mode 100644 index 0000000..1816e38 --- /dev/null +++ b/scripts/make_hg19.sh @@ -0,0 +1,54 @@ +#!/bin/sh + +# +# Downloads sequence for the HG19 version of H. sapiens (human) from +# UCSC. +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +UCSC_HG19_BASE=http://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips +F=chromFa.tar.gz + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +rm -f genome.fa +get ${UCSC_HG19_BASE}/$F || (echo "Error getting $F" && exit 1) +tar xvzfO $F > genome.fa || (echo "Error unzipping $F" && exit 1) +rm $F + +CMD="${HISAT2_BUILD_EXE} genome.fa genome" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_hg38.sh b/scripts/make_hg38.sh new file mode 100644 index 0000000..bc35479 --- /dev/null +++ b/scripts/make_hg38.sh @@ -0,0 +1,54 @@ +#!/bin/sh + +# +# Downloads sequence for the HG38 version of H. sapiens (human) from +# UCSC. +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +UCSC_HG38_BASE=http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips +F=hg38.chromFa.tar.gz + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +rm -f genome.fa +get ${UCSC_HG38_BASE}/$F || (echo "Error getting $F" && exit 1) +tar xvzfO $F > genome.fa || (echo "Error unzipping $F" && exit 1) +rm $F + +CMD="${HISAT2_BUILD_EXE} genome.fa genome" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_hg38_allsnp.sh b/scripts/make_hg38_allsnp.sh new file mode 100644 index 0000000..66d64d9 --- /dev/null +++ b/scripts/make_hg38_allsnp.sh @@ -0,0 +1,75 @@ +#!/bin/sh + +# +# Downloads sequence for the HG38 version of H. spiens (human) from +# UCSC. +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +UCSC_HG38_BASE=http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips +F=hg38.chromFa.tar.gz + +DBSNP_RELEASE=144 +SNP_FILE=snp${DBSNP_RELEASE}.txt +UCSC_SNP=http://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/${SNP_FILE} + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +HISAT2_SNP_SCRIPT=./hisat2_extract_snps_haplotypes_UCSC.py +if [ ! -x "$HISAT2_SNP_SCRIPT" ] ; then + if ! which hisat2_extract_snps_haplotypes_UCSC.py ; then + echo "Could not find hisat2_extract_snps_haplotypes_UCSC.py in current directory or in PATH" + exit 1 + else + HISAT2_SNP_SCRIPT=`which hisat2_extract_snps_haplotypes_UCSC.py` + fi +fi + +rm -f genome.fa +get ${UCSC_HG38_BASE}/$F || (echo "Error getting $F" && exit 1) +tar xvzf $F || (echo "Error unzipping $F" && exit 1) +for i in {1..22}; do cat chroms/chr$i.fa >> genome.fa; done +cat chroms/chr[XYM].fa >> genome.fa +rm $F + +if [ ! -f $SNP_FILE ] ; then + get ${UCSC_SNP}.gz || (echo "Error getting ${UCSC_COMMON_SNP}" && exit 1) + ${HISAT2_SNP_SCRIPT} genome.fa ${SNP_FILE}.gz genome +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 --large-index genome.fa --snp genome.snp --haplotype genome.haplotype genome" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_hg38_snp.sh b/scripts/make_hg38_snp.sh new file mode 100644 index 0000000..1dad607 --- /dev/null +++ b/scripts/make_hg38_snp.sh @@ -0,0 +1,75 @@ +#!/bin/sh + +# +# Downloads sequence for the HG38 version of H. spiens (human) from +# UCSC. +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +UCSC_HG38_BASE=http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips +F=hg38.chromFa.tar.gz + +DBSNP_RELEASE=144 +SNP_FILE=snp${DBSNP_RELEASE}Common.txt +UCSC_COMMON_SNP=http://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/${SNP_FILE} + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +HISAT2_SNP_SCRIPT=./hisat2_extract_snps_haplotypes_UCSC.py +if [ ! -x "$HISAT2_SNP_SCRIPT" ] ; then + if ! which hisat2_extract_snps_haplotypes_UCSC.py ; then + echo "Could not find hisat2_extract_snps_haplotypes_UCSC.py in current directory or in PATH" + exit 1 + else + HISAT2_SNP_SCRIPT=`which hisat2_extract_snps_haplotypes_UCSC.py` + fi +fi + +rm -f genome.fa +get ${UCSC_HG38_BASE}/$F || (echo "Error getting $F" && exit 1) +tar xvzf $F || (echo "Error unzipping $F" && exit 1) +for i in {1..22}; do cat chroms/chr$i.fa >> genome.fa; done +cat chroms/chr[XYM].fa >> genome.fa +rm $F + +if [ ! -f $SNP_FILE ] ; then + get ${UCSC_COMMON_SNP}.gz || (echo "Error getting ${UCSC_COMMON_SNP}" && exit 1) + ${HISAT2_SNP_SCRIPT} genome.fa ${SNP_FILE}.gz genome +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa --snp genome.snp --haplotype genome.haplotype genome" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_hg38_snp_tran.sh b/scripts/make_hg38_snp_tran.sh new file mode 100644 index 0000000..f1b4162 --- /dev/null +++ b/scripts/make_hg38_snp_tran.sh @@ -0,0 +1,54 @@ +#!/bin/sh + +# +# Downloads sequence for the HG38 version of H. spiens (human) from +# UCSC. +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +UCSC_HG38_BASE=http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips +F=hg38.chromFa.tar.gz + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +rm -f genome.fa +get ${UCSC_HG38_BASE}/$F || (echo "Error getting $F" && exit 1) +tar xvzfO $F > genome.fa || (echo "Error unzipping $F" && exit 1) +rm $F + +CMD="${HISAT2_BUILD_EXE} genome.fa genome" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_mm10.sh b/scripts/make_mm10.sh new file mode 100644 index 0000000..d05af5b --- /dev/null +++ b/scripts/make_mm10.sh @@ -0,0 +1,54 @@ +#!/bin/sh + +# +# Downloads sequence for the MM10 version of M. musculus (mouse) from +# UCSC. +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +UCSC_MM10_BASE=http://hgdownload.cse.ucsc.edu/goldenPath/mm10/bigZips +F=chromFa.tar.gz + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +rm -f genome.fa +get ${UCSC_MM10_BASE}/$F || (echo "Error getting $F" && exit 1) +tar xvzfO $F > genome.fa || (echo "Error unzipping $F" && exit 1) +rm $F + +CMD="${HISAT2_BUILD_EXE} genome.fa genome" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_mm9.sh b/scripts/make_mm9.sh new file mode 100644 index 0000000..e39c92c --- /dev/null +++ b/scripts/make_mm9.sh @@ -0,0 +1,103 @@ +#!/bin/sh + +# +# Downloads sequence for the mm9 version of M. musculus (mouse) from +# UCSC. +# +# Note that UCSC's mm9 build has two categories of compressed fasta +# files: +# +# 1. The base files, named chr??.fa.gz +# 2. The unplaced-sequence files, named chr??_random.fa.gz +# +# By default, this script indexes all these files. To change which +# categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +BASE_CHRS="\ +chr1 \ +chr2 \ +chr3 \ +chr4 \ +chr5 \ +chr6 \ +chr7 \ +chr8 \ +chr9 \ +chr10 \ +chr11 \ +chr12 \ +chr13 \ +chr14 \ +chr15 \ +chr16 \ +chr17 \ +chr18 \ +chr19 \ +chrX \ +chrY \ +chrM" + +RANDOM_CHRS="\ +chr1_random \ +chr3_random \ +chr4_random \ +chr5_random \ +chr7_random \ +chr8_random \ +chr9_random \ +chr13_random \ +chr16_random \ +chr17_random \ +chrX_random \ +chrY_random \ +chrUn_random" + +CHRS_TO_INDEX="$BASE_CHRS $RANDOM_CHRS" + +UCSC_MM9_BASE=ftp://hgdownload.cse.ucsc.edu/goldenPath/mm9/chromosomes + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +BOWTIE_BUILD_EXE=./bowtie2-build +if [ ! -x "$BOWTIE_BUILD_EXE" ] ; then + if ! which bowtie2-build ; then + echo "Could not find bowtie2-build in current directory or in PATH" + exit 1 + else + BOWTIE_BUILD_EXE=`which bowtie2-build` + fi +fi + +INPUTS= +for c in $CHRS_TO_INDEX ; do + if [ ! -f ${c}.fa ] ; then + F=${c}.fa.gz + get ${UCSC_MM9_BASE}/$F || (echo "Error getting $F" && exit 1) + gunzip $F || (echo "Error unzipping $F" && exit 1) + fi + [ -n "$INPUTS" ] && INPUTS=$INPUTS,${c}.fa + [ -z "$INPUTS" ] && INPUTS=${c}.fa +done + +CMD="${BOWTIE_BUILD_EXE} $* ${INPUTS} mm9" +echo Running $CMD +if $CMD ; then + echo "mm9 index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_r64.sh b/scripts/make_r64.sh new file mode 100644 index 0000000..b0089af --- /dev/null +++ b/scripts/make_r64.sh @@ -0,0 +1,60 @@ +#!/bin/sh + +# +# Downloads sequence for the R64-1-1 release 84 version of saccharomyces cerevisiae (yeast) from +# Ensembl. +# +# Note that Ensembl's build has three categories of compressed fasta +# files: +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=84 +ENSEMBL_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/saccharomyces_cerevisiae/dna + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +rm -f genome.fa +F=Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa genome" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_r64_tran.sh b/scripts/make_r64_tran.sh new file mode 100644 index 0000000..7310a7d --- /dev/null +++ b/scripts/make_r64_tran.sh @@ -0,0 +1,89 @@ +#!/bin/sh + +# +# Downloads sequence for the R64-1-1 release 84 version of saccharomyces cerevisiae (yeast) from +# Ensembl. +# +# Note that Ensembl's build has three categories of compressed fasta +# files: +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=84 +ENSEMBL_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/saccharomyces_cerevisiae/dna +ENSEMBL_GTF_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/gtf/saccharomyces_cerevisiae +GTF_FILE=Saccharomyces_cerevisiae.R64-1-1.${ENSEMBL_RELEASE}.gtf + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +HISAT2_SS_SCRIPT=./hisat2_extract_splice_sites.py +if [ ! -x "$HISAT2_SS_SCRIPT" ] ; then + if ! which hisat2_extract_splice_sites.py ; then + echo "Could not find hisat2_extract_splice_sites.py in current directory or in PATH" + exit 1 + else + HISAT2_SS_SCRIPT=`which hisat2_extract_splice_sites.py` + fi +fi + +HISAT2_EXON_SCRIPT=./hisat2_extract_exons.py +if [ ! -x "$HISAT2_EXON_SCRIPT" ] ; then + if ! which hisat2_extract_exons.py ; then + echo "Could not find hisat2_extract_exons.py in current directory or in PATH" + exit 1 + else + HISAT2_EXON_SCRIPT=`which hisat2_extract_exons.py` + fi +fi + +rm -f genome.fa +F=Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa +fi + +if [ ! -f $GTF_FILE ] ; then + get ${ENSEMBL_GTF_BASE}/${GTF_FILE}.gz || (echo "Error getting ${GTF_FILE}" && exit 1) + gunzip ${GTF_FILE}.gz || (echo "Error unzipping ${GTF_FILE}" && exit 1) + ${HISAT2_SS_SCRIPT} ${GTF_FILE} > genome.ss + ${HISAT2_EXON_SCRIPT} ${GTF_FILE} > genome.exon +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa --ss genome.ss --exon genome.exon genome_tran" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_rn4.sh b/scripts/make_rn4.sh new file mode 100644 index 0000000..7ac9209 --- /dev/null +++ b/scripts/make_rn4.sh @@ -0,0 +1,96 @@ +#!/bin/sh + +# +# Downloads sequence for the rn4 version of R. norvegicus (rat) from +# UCSC. +# +# Note that UCSC's rn4 build has two categories of compressed fasta +# files: +# +# 1. The base files, named chr??.fa.gz +# 2. The unplaced-sequence files, named chr??_random.fa.gz +# +# By default, this script indexes all these files. To change which +# categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +i=2 +BASE_CHRS=chr1 +while [ $i -lt 21 ] ; do + BASE_CHRS="$BASE_CHRS chr$i" + i=`expr $i + 1` +done +BASE_CHRS="$BASE_CHRS chrX chrM chrUn" + +RANDOM_CHRS="\ +chr1_random \ +chr2_random \ +chr3_random \ +chr4_random \ +chr5_random \ +chr6_random \ +chr7_random \ +chr8_random \ +chr9_random \ +chr10_random \ +chr11_random \ +chr12_random \ +chr13_random \ +chr14_random \ +chr15_random \ +chr16_random \ +chr17_random \ +chr18_random \ +chr19_random \ +chr20_random \ +chrX_random \ +chrUn_random" + +CHRS_TO_INDEX="$BASE_CHRS $RANDOM_CHRS" + +RN4_BASE=ftp://hgdownload.cse.ucsc.edu/goldenPath/rn4/chromosomes + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +BOWTIE_BUILD_EXE=./bowtie2-build +if [ ! -x "$BOWTIE_BUILD_EXE" ] ; then + if ! which bowtie2-build ; then + echo "Could not find bowtie2-build in current directory or in PATH" + exit 1 + else + BOWTIE_BUILD_EXE=`which bowtie2-build` + fi +fi + +INPUTS= +for c in $CHRS_TO_INDEX ; do + if [ ! -f ${c}.fa ] ; then + F=${c}.fa.gz + get ${RN4_BASE}/$F || (echo "Error getting $F" && exit 1) + gunzip $F || (echo "Error unzipping $F" && exit 1) + fi + [ -n "$INPUTS" ] && INPUTS=$INPUTS,${c}.fa + [ -z "$INPUTS" ] && INPUTS=${c}.fa +done + +CMD="${BOWTIE_BUILD_EXE} $* ${INPUTS} rn4" +echo Running $CMD +if $CMD ; then + echo "rn4 index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_rn6.sh b/scripts/make_rn6.sh new file mode 100644 index 0000000..7baa90e --- /dev/null +++ b/scripts/make_rn6.sh @@ -0,0 +1,57 @@ +#!/bin/sh + +# +# Downloads sequence for the rn6 version of R. norvegicus (rat) from +# UCSC. +# +# Note that UCSC's rn6 build has two categories of compressed fasta +# files: +# +# 1. The base files, named chr??.fa.gz +# 2. The unplaced-sequence files, named chr??_random.fa.gz +# +# By default, this script indexes all these files. To change which +# categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +RN6_BASE=ftp://hgdownload.cse.ucsc.edu/goldenPath/rn6/bigZips +F=rn6.fa.gz + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +rm -f genome.fa +get ${RN6_BASE}/$F || (echo "Error getting $F" && exit 1) +gzip -cd $F > genome.fa || (echo "Error unzipping $F" && exit 1) +rm $F + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa genome" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_rnor6.sh b/scripts/make_rnor6.sh new file mode 100644 index 0000000..294ea12 --- /dev/null +++ b/scripts/make_rnor6.sh @@ -0,0 +1,60 @@ +#!/bin/sh + +# +# Downloads sequence for the RNor_6.0 release 84 version of rattus_norvegicus (rat) from +# Ensembl. +# +# Note that Ensembl's build has three categories of compressed fasta +# files: +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=84 +ENSEMBL_RNOR6_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/rattus_norvegicus/dna + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +rm -f genome.fa +F=Rattus_norvegicus.Rnor_6.0.dna.toplevel.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_RNOR6_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa genome" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_rnor6_tran.sh b/scripts/make_rnor6_tran.sh new file mode 100644 index 0000000..8392a0b --- /dev/null +++ b/scripts/make_rnor6_tran.sh @@ -0,0 +1,89 @@ +#!/bin/sh + +# +# Downloads sequence for the RNor_6.0 release 84 version of rattus_norvegicus (rat) from +# Ensembl. +# +# Note that Ensembl's build has three categories of compressed fasta +# files: +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=84 +ENSEMBL_RNOR6_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/rattus_norvegicus/dna +ENSEMBL_RNOR6_GTF_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/gtf/rattus_norvegicus +GTF_FILE=Rattus_norvegicus.Rnor_6.0.${ENSEMBL_RELEASE}.gtf + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +HISAT2_SS_SCRIPT=./hisat2_extract_splice_sites.py +if [ ! -x "$HISAT2_SS_SCRIPT" ] ; then + if ! which hisat2_extract_splice_sites.py ; then + echo "Could not find hisat2_extract_splice_sites.py in current directory or in PATH" + exit 1 + else + HISAT2_SS_SCRIPT=`which hisat2_extract_splice_sites.py` + fi +fi + +HISAT2_EXON_SCRIPT=./hisat2_extract_exons.py +if [ ! -x "$HISAT2_EXON_SCRIPT" ] ; then + if ! which hisat2_extract_exons.py ; then + echo "Could not find hisat2_extract_exons.py in current directory or in PATH" + exit 1 + else + HISAT2_EXON_SCRIPT=`which hisat2_extract_exons.py` + fi +fi + +rm -f genome.fa +F=Rattus_norvegicus.Rnor_6.0.dna.toplevel.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_RNOR6_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa +fi + +if [ ! -f $GTF_FILE ] ; then + get ${ENSEMBL_RNOR6_GTF_BASE}/${GTF_FILE}.gz || (echo "Error getting ${GTF_FILE}" && exit 1) + gunzip ${GTF_FILE}.gz || (echo "Error unzipping ${GTF_FILE}" && exit 1) + ${HISAT2_SS_SCRIPT} ${GTF_FILE} > genome.ss + ${HISAT2_EXON_SCRIPT} ${GTF_FILE} > genome.exon +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa --ss genome.ss --exon genome.exon genome_tran" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_sc3.sh b/scripts/make_sc3.sh new file mode 100644 index 0000000..bb5c393 --- /dev/null +++ b/scripts/make_sc3.sh @@ -0,0 +1,42 @@ +#!/bin/sh + +SC3_BASE=ftp://hgdownload.cse.ucsc.edu/goldenPath/sacCer3/bigZips +F=chromFa.tar.gz + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +rm -f genome.fa +get ${SC3_BASE}/$F || (echo "Error getting $F" && exit 1) +tar xvzfO $F > genome.fa || (echo "Error unzipping $F" && exit 1) +rm $F + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa genome" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_wbcel235.sh b/scripts/make_wbcel235.sh new file mode 100644 index 0000000..2c23a0d --- /dev/null +++ b/scripts/make_wbcel235.sh @@ -0,0 +1,60 @@ +#!/bin/sh + +# +# Downloads sequence for the WBcel235 release 84 version of caenorhabditis elegans (worm) from +# Ensembl. +# +# Note that Ensembl's build has three categories of compressed fasta +# files: +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=84 +ENSEMBL_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/caenorhabditis_elegans/dna + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +rm -f genome.fa +F=Caenorhabditis_elegans.WBcel235.dna.toplevel.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa genome" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_wbcel235_tran.sh b/scripts/make_wbcel235_tran.sh new file mode 100644 index 0000000..eb7e208 --- /dev/null +++ b/scripts/make_wbcel235_tran.sh @@ -0,0 +1,89 @@ +#!/bin/sh + +# +# Downloads sequence for the WBcel235 release 84 version of caenorhabditis elegans (worm) from +# Ensembl. +# +# Note that Ensembl's build has three categories of compressed fasta +# files: +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=84 +ENSEMBL_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/fasta/caenorhabditis_elegans/dna +ENSEMBL_GTF_BASE=ftp://ftp.ensembl.org/pub/release-${ENSEMBL_RELEASE}/gtf/caenorhabditis_elegans +GTF_FILE=Caenorhabditis_elegans.WBcel235.${ENSEMBL_RELEASE}.gtf + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +HISAT2_SS_SCRIPT=./hisat2_extract_splice_sites.py +if [ ! -x "$HISAT2_SS_SCRIPT" ] ; then + if ! which hisat2_extract_splice_sites.py ; then + echo "Could not find hisat2_extract_splice_sites.py in current directory or in PATH" + exit 1 + else + HISAT2_SS_SCRIPT=`which hisat2_extract_splice_sites.py` + fi +fi + +HISAT2_EXON_SCRIPT=./hisat2_extract_exons.py +if [ ! -x "$HISAT2_EXON_SCRIPT" ] ; then + if ! which hisat2_extract_exons.py ; then + echo "Could not find hisat2_extract_exons.py in current directory or in PATH" + exit 1 + else + HISAT2_EXON_SCRIPT=`which hisat2_extract_exons.py` + fi +fi + +rm -f genome.fa +F=Caenorhabditis_elegans.WBcel235.dna.toplevel.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa +fi + +if [ ! -f $GTF_FILE ] ; then + get ${ENSEMBL_GTF_BASE}/${GTF_FILE}.gz || (echo "Error getting ${GTF_FILE}" && exit 1) + gunzip ${GTF_FILE}.gz || (echo "Error unzipping ${GTF_FILE}" && exit 1) + ${HISAT2_SS_SCRIPT} ${GTF_FILE} > genome.ss + ${HISAT2_EXON_SCRIPT} ${GTF_FILE} > genome.exon +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa --ss genome.ss --exon genome.exon genome_tran" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/make_zm3_snp_tran_ercc.sh b/scripts/make_zm3_snp_tran_ercc.sh new file mode 100644 index 0000000..adefca5 --- /dev/null +++ b/scripts/make_zm3_snp_tran_ercc.sh @@ -0,0 +1,110 @@ +#!/bin/sh + +# +# Downloads sequence for Zea mays AGPv3.31 from +# Ensembl. +# +# The base files, named ??.fa.gz +# +# By default, this script builds and index for just the base files, +# since alignments to those sequences are the most useful. To change +# which categories are built by this script, edit the CHRS_TO_INDEX +# variable below. +# + +ENSEMBL_RELEASE=31 +ENSEMBL_DNA_BASE=ftp://ftp.ensemblgenomes.org/pub/plants/release-${ENSEMBL_RELEASE}/fasta/zea_mays/dna/ +ENSEMBL_GTF_BASE=ftp://ftp.ensemblgenomes.org/pub/plants/release-${ENSEMBL_RELEASE}/gtf/zea_mays/ +GTF_FILE=Zea_mays.AGPv3.31.gtf + +ENSEMBL_VCF_BASE=ftp://ftp.ensemblgenomes.org/pub/plants/release-${ENSEMBL_RELEASE}/vcf/zea_mays/ +VCF_FILE=zea_mays.vcf.gz +ERCC_FILE=ERCC92 +ERCC_FTP=https://tools.thermofisher.com/content/sfs/manuals/${ERCC_FILE}.zip + +get() { + file=$1 + if ! wget --version >/dev/null 2>/dev/null ; then + if ! curl --version >/dev/null 2>/dev/null ; then + echo "Please install wget or curl somewhere in your PATH" + exit 1 + fi + curl -o `basename $1` $1 + return $? + else + wget $1 + return $? + fi +} + +HISAT2_BUILD_EXE=./hisat2-build +if [ ! -x "$HISAT2_BUILD_EXE" ] ; then + if ! which hisat2-build ; then + echo "Could not find hisat2-build in current directory or in PATH" + exit 1 + else + HISAT2_BUILD_EXE=`which hisat2-build` + fi +fi + +HISAT2_VCF_SCRIPT=./hisat2_extract_snps_haplotypes_VCF.py +if [ ! -x "$HISAT2_VCF_SCRIPT" ] ; then + if ! which hisat2_extract_snps_haplotypes_VCF.py ; then + echo "Could not find hisat2_extract_snps_haplotypes_VCF.py in current directory or in PATH" + exit 1 + else + HISAT2_SNP_SCRIPT=`which hisat2_extract_snps_haplotypes_VCF.py` + fi +fi + +HISAT2_SS_SCRIPT=./hisat2_extract_splice_sites.py +if [ ! -x "$HISAT2_SS_SCRIPT" ] ; then + if ! which hisat2_extract_splice_sites.py ; then + echo "Could not find hisat2_extract_splice_sites.py in current directory or in PATH" + exit 1 + else + HISAT2_SS_SCRIPT=`which hisat2_extract_splice_sites.py` + fi +fi + +HISAT2_EXON_SCRIPT=./hisat2_extract_exons.py +if [ ! -x "$HISAT2_EXON_SCRIPT" ] ; then + if ! which hisat2_extract_exons.py ; then + echo "Could not find hisat2_extract_exons.py in current directory or in PATH" + exit 1 + else + HISAT2_EXON_SCRIPT=`which hisat2_extract_exons.py` + fi +fi + +rm -f genome.fa +F=Zea_mays.AGPv3.31.dna.genome.fa +if [ ! -f $F ] ; then + get ${ENSEMBL_DNA_BASE}/$F.gz || (echo "Error getting $F" && exit 1) + gunzip $F.gz || (echo "Error unzipping $F" && exit 1) + mv $F genome.fa + get ${ERCC_FTP} || (echo "Error getting ${ERCC_FILE}.zip" && exit 1) + unzip ${ERCC_FILE}.zip + cat ${ERCC_FILE}.fa >> genome.fa +fi + +if [ ! -f $GTF_FILE ] ; then + get ${ENSEMBL_GTF_BASE}/${GTF_FILE}.gz || (echo "Error getting ${GTF_FILE}" && exit 1) + gunzip ${GTF_FILE}.gz || (echo "Error unzipping ${GTF_FILE}" && exit 1) + cat ${ERCC_FILE}.gtf >> ${GTF_FILE} + ${HISAT2_SS_SCRIPT} ${GTF_FILE} > genome.ss + ${HISAT2_EXON_SCRIPT} ${GTF_FILE} > genome.exon +fi + +if [ ! -f $VCF_FILE ] ; then + get ${ENSEMBL_VCF_BASE}/${VCF_FILE} || (echo "Error getting ${ENSEMBL_VCF_BASE}/${VCF_FILE}" && exit 1) + ${HISAT2_VCF_SCRIPT} --non-rs genome.fa ${VCF_FILE} genome +fi + +CMD="${HISAT2_BUILD_EXE} -p 4 genome.fa --snp genome.snp --haplotype genome.haplotype --ss genome.ss --exon genome.exon genome_snp_tran_ercc" +echo Running $CMD +if $CMD ; then + echo "genome index built; you may remove fasta files" +else + echo "Index building failed; see error message" +fi diff --git a/scripts/sa.py b/scripts/sa.py new file mode 100644 index 0000000..d27857d --- /dev/null +++ b/scripts/sa.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python + +""" +sa.py + +Parse and possibly sanity-check a .sa file output by bowtie2-build in --sa +mode. These files have a very simple format: first is a uint32_t containing +the length of the suffix array, the rest is an array of that many uint32_ts +containing the suffix array. +""" + +import sys +import struct + +def loadBowtieSa(fh): + """ Load a .sa file from handle into an array of ints """ + nsa = struct.unpack('I', fh.read(4))[0] + return [ struct.unpack('I', fh.read(4))[0] for i in xrange(0, nsa) ] + +def loadBowtieSaFilename(fn): + """ Load a .sa file from filename into an array of ints """ + with open(fn, 'rb') as fh: + return loadBowtieSa(fh) + +def loadFasta(fns): + """ Load the concatenation of all the A/C/G/T characters """ + falist = [] + dna = set(['A', 'C', 'G', 'T', 'a', 'c', 'g', 't']) + for fn in fns: + with open(fn, 'r') as fh: + for line in fh: + if line[0] == '>': + continue + for c in line: + if c in dna: + falist.append(c) + return ''.join(falist) + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(\ + description='Parse suffix array built from bowtie2-build') + + parser.add_argument(\ + '--sa', metavar='string', required=True, type=str, + help='Suffix array file') + parser.add_argument(\ + '--fa', metavar='string', type=str, nargs='+', help='FASTA file') + + args = parser.parse_args() + + def go(): + ref = None + if args.fa is not None: + ref = loadFasta(args.fa) + sas = loadBowtieSaFilename(args.sa) + # Suffix array is in sas; note that $ is considered greater than all + # other characters + if ref is not None: + for i in xrange(1, len(sas)): + sa1, sa2 = sas[i-1], sas[i] + assert sa1 != sa2 + # Sanity check that suffixes are really in order + while sa1 < len(ref) and sa2 < len(ref): + if ref[sa1] < ref[sa2]: + break + assert ref[sa1] == ref[sa2] + sa1 += 1 + sa2 += 1 + else: + # Note: Bowtie treats $ as greater than all other + # characters; so if these strings are tied up to the end of + # one or the other, the longer string is prior + assert sa1 < sa2, "%d, %d" % (sas[i-1], sas[i]) + assert sas[-1] == len(ref) + + go() + \ No newline at end of file diff --git a/scripts/sim/AlignmentCheck.pm b/scripts/sim/AlignmentCheck.pm new file mode 100644 index 0000000..fb259c4 --- /dev/null +++ b/scripts/sim/AlignmentCheck.pm @@ -0,0 +1,859 @@ +#!/usr/bin/perl -w + +# +# Copyright 2011, Ben Langmead +# +# This file is part of Bowtie 2. +# +# Bowtie 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Bowtie 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Bowtie 2. If not, see . +# + +## +# AlignmentCheck.pm +# +# Read in fasta files containing reference sequences that might be +# aligned to, then read in alignment files, checking each alignment to +# be sure it's sane and consistent with the reference sequence it +# aligns to. +# + +package AlignmentCheck; +use strict; +use warnings; +use FindBin qw($Bin); +use lib $Bin; +use DNA; +use Data::Dumper; + +## +# Parse a fasta file into the %ref hash +# +sub parseFasta($$) { + my ($fa, $ref) = @_; + print STDERR "Parsing FASTA file $fa...\n"; + my $fapipe = "$fa"; + $fapipe = "gzip -dc $fa |" if $fa =~ /\.gz$/; + open(FA, $fapipe) || die "Could not open '$fapipe' for reading"; + my $name = ""; + my $bases = 0; + while() { + chomp; + if(/^>/) { + $name = substr($_, 1); + $name =~ s/\s.*//; + print STDERR " parsing sequence $name...\n"; + $ref->{std}{$name} = ""; + } else { + $name ne "" || die "sequence before name"; + $bases += length($_); + $ref->{std}{$name} .= $_; + } + } + print STDERR "Parsed $bases reference bases\n"; + close(FA); +} + +## +# Create a new alignment checker +# +sub new { + my ( + $class, + $name, # name of checker + $fas, # list of fasta files containing reference sequences + # or hash of the references themselves (former if it's + # an array ref latter if it's a hash ref) + $format, # alignment format + $bisC, # whether alignment was w/r/t ref w/ all Cs converted to Ys + $bisCpG, # whether alignment was w/r/t ref w/ all CpGs converted to YpGs + ) = @_; + (defined($fas)) || die "Must specify non-empty list of fasta files"; + # Set defaults for format, bisC, bisCpG, name + $format = "bowtie" unless defined($format); + $bisC = 0 unless defined($bisC); + $bisCpG = 0 unless defined($bisCpG); + $name = "noname" unless defined($name); + # Parse all the fasta files into the ref hash + my %ref = (); + if(ref($fas) eq "HASH") { + for (keys %$fas) { + $ref{std}{$_} = $fas->{$_} + } + } else { + ref($fas) eq "ARRAY" || die; + foreach (@$fas) { parseFasta($_, \%ref); } + } + return bless { + _name => $name, + _fas => $fas, + _format => $format, + _bisC => $bisC, + _bisCpG => $bisCpG, + _refs => \%ref, + _nals => 0, + _nedits => 0 + }, $class; +} +sub name { return $_[0]->{_name} } +sub fas { return $_[0]->{_fas} } +sub format { return $_[0]->{_format} } +sub bisC { return $_[0]->{_bisC} } +sub bisCpG { return $_[0]->{_bisCpG} } +sub refs { return $_[0]->{_refs} } +sub nals { return $_[0]->{_nals} } +sub nedits { return $_[0]->{_nedits} } +sub nrefs { return scalar(keys %{$_[0]->{_refs}}); } + +## +# Given a sequence that represents the read oriented s.t. the Watson- +# upstream end is on the left, and given a set of edits to apply to the +# read, also oriented assuming that Watson-upstream is on the left, +# return the string corresponding to the read mutated to match the +# reference. +# +my $nedits = 0; +sub applyEdits($$$) { + my ($seq, $edits, $line) = @_; + my $rfseq = $seq; + my $lpos = length($seq)+1; + $nedits += scalar(@$edits); + foreach (reverse @$edits) { + next unless defined($_); + #print STDERR "Applying edit at $_->{pos}\n"; + # Apply the edit + $_->{pos} <= $lpos || die "Edit position $_->{pos} was greater than previous $lpos"; + if($_->{qchr} eq "-") { + # Insert + $_->{pos} <= length($rfseq) || die "Read gap pos $_->{pos} was not <= string len ".length($rfseq)."\n$line"; + substr($rfseq, $_->{pos}, 0) = $_->{chr}; + } elsif($_->{chr} eq "-") { + # Deletion + $_->{pos} < length($rfseq) || die "Ref gap pos $_->{pos} was not < string len ".length($rfseq)."\n$line"; + my $dc = substr($rfseq, $_->{pos}, 1); + $dc eq $_->{qchr} || + die "Edit: $_->{pos}:$_->{chr}>$_->{qchr} but ref char was $dc". + "\n$rfseq\n$line"; + substr($rfseq, $_->{pos}, 1) = ""; + } else { + # Mismatch + $_->{pos} < length($rfseq) || die "Mismatch pos $_->{pos} was not < string len ".length($rfseq)."\n$line"; + substr($rfseq, $_->{pos}, 1) eq $_->{qchr} || + die "Edit: $_->{pos}:$_->{chr}>$_->{qchr}\n$rfseq\n$line"; + substr($rfseq, $_->{pos}, 1) = $_->{chr}; + } + } + return $rfseq; +} + +## +# Given a list of Bowtie edits, invert them by reversing the list and +# changing the poss to be with respect to the other end of the read. +# +sub invertEdits($$) { + my ($edits, $len) = @_; + @$edits = reverse @$edits; + for (@$edits) { + next unless defined($_); + defined($_->{qchr}) || die; + if($_->{qchr} eq "-") { + $_->{pos} = $len - $_->{pos}; + length($_->{chr}) >= 1 || die; + $_->{chr} = reverse $_->{chr}; + } else { + $_->{pos} = $len - $_->{pos} - 1; + length($_->{chr}) == 1 || die; + } + } +} + +## +# Given an edit string, parses it into a list of hashes and returns +# the list. +# +sub parseEdits($) { + my $editstr = shift; + return undef if (!defined($editstr) || $editstr eq "-" || $editstr eq ""); + my @edits = (); + # For each edit + for (split(/,/, $editstr)) { + # Parse pos + my ($pos, $ed) = split(/:/); + defined($ed) || die; + # Parse chr, qchr + my ($chr, $qchr) = split(/>/, $ed); + push @edits, { pos => $pos, chr => $chr, qchr => $qchr }; + } + return \@edits; +} + +## +# Given an edit string, possibly with 4 semicolon-delimited fields, +# parse it into a set of 4 lists of hashes and return the set as an +# array ref. +# +sub parseAllEdits($) { + my $editstr = shift; + return [ undef, undef, undef, undef ] if ($editstr eq "-" || $editstr eq ""); + my @editls = split(/;/, $editstr, -1); + if(scalar(@editls) > 1) { + scalar(@editls) == 4 || die; + return [ + parseEdits($editls[0]), + parseEdits($editls[1]), + parseEdits($editls[2]), + parseEdits($editls[3]) ]; + } else { + scalar(@editls) == 1 || die; + return [ + parseEdits($editls[0]), + undef, + undef, + undef ]; + } +} + +## +# Given array refs for two lists of edits, one corresponding to the +# nucleotide edit list and the other corresponding to the resolved +# ambiguous base list, eliminate any edits that appear in both lists. +# Really this shouldn't happen, but I observe that merman does report +# an edit in both categories if the reference base is being resolved to +# an incompatible nucleotide, e.g. R>C. +# +sub removeDups($$) { + my ($ned, $aed) = @_; + return unless (defined($ned) && defined($aed)); + for my $i (0..scalar(@$ned)-1) { + next unless defined($ned->[$i]); + for my $j (0..scalar(@$aed)-1) { + next unless defined($aed->[$j]); + if($ned->[$i]->{qchr} eq $aed->[$j]->{qchr} && + $ned->[$i]->{chr} eq $aed->[$j]->{chr} && + $ned->[$i]->{pos} eq $aed->[$j]->{pos}) + { + #print STDERR " Eliminated a duplicate edit\n"; + $aed->[$j] = undef; + } + } + } +} + +## +# Take all the references in %ref and make both Watson and Crick +# versions where the sequence is in-silico bisulfite treated. +# +sub bisulfiteC($) { + my $ref = shift; + for(keys %{$ref->{std}}) { + $ref->{bisc_fw}{$_} = $ref->{std}{$_}; + $ref->{bisc_fw}{$_} = s/C/Y/g; + $ref->{bisc_rc}{$_} = $ref->{std}{$_}; + $ref->{bisc_rc}{$_} = s/G/R/g; + } +} + +## +# Take all the references in %ref and make both Watson and Crick +# versions where the sequence is in-silico bisulfite treated. +# +sub bisulfiteCpG($) { + my $ref = shift; + for(keys %{$ref->{std}}) { + $ref->{biscpg_fw}{$_} = $ref->{std}{$_}; + $ref->{biscpg_fw}{$_} =~ s/CG/YG/g; + $ref->{biscpg_fw}{$_} =~ s/C/T/g; + $ref->{biscpg_rc}{$_} = $ref->{std}{$_}; + $ref->{biscpg_rc}{$_} =~ s/CG/CR/g; + $ref->{biscpg_rc}{$_} =~ s/G/A/g; + } +} + +## +# Given a Bowtie orientation string, return true iff the 5' end of the +# read is at the left of the printed sequence. +# +sub fivePrimeLeft($) { + return ($_[0] eq "+" || $_[0] eq "W" || $_[0] eq "CR"); +} + +## +# Given the orientation of the read and the state of the global +# bisulfite variables, determine which version of the reference to +# compare against. +# +sub calcRefType { + my ($self, $orient) = @_; + if($self->bisC || $self->bisCpG) { + if($orient eq "W" || $orient eq "WR" || $orient eq "+") { + return $self->bisC ? "bisc_fw" : "biscpg_fw"; + } else { + $orient eq "C" || $orient eq "CR" || $orient eq "-" || die; + return $self->bisC ? "bisc_rc" : "biscpg_rc"; + } + } else { + return "std"; + } +} + +## +# Parse a CIGAR string into parallel arrays of CIGAR operations (M, I, D) +# +sub cigarParse($$$) { + my ($cigar, $ops, $runs) = @_; + my $i = 0; + while($i < length($cigar)) { + substr($cigar, $i) =~ /^([0-9]+)/; + defined($1) || die "Could not parse number at pos $i: '$cigar'"; + $i += length($1); + $i < length($cigar) || die; + push @$runs, $1; + my $op = substr($cigar, $i, 1); + defined($op) || die "Could not parse operation at pos $i: '$cigar'"; + push @$ops, $op; + $i++; + } +} + +## +# Trim a read sequence according to the soft clipping in the CIGAR string. +# +sub cigarTrim($$) { + my ($seq, $cigar) = @_; + my @ops = (); + my @runs = (); + cigarParse($cigar, \@ops, \@runs); + my ($trimup, $trimdn) = (0, 0); + if($ops[0] eq 'S') { + $runs[0] < length($seq) || die "Soft clipped entire alignment!"; + $seq = substr($seq, $runs[0]); + $trimup = $runs[0]; + } + if(scalar(@ops) > 1 && $ops[-1] eq 'S') { + $runs[-1] < length($seq) || die "Soft clipped entire alignment!"; + $seq = substr($seq, 0, -$runs[-1]); + $trimdn = $runs[-1]; + } + return ($seq, $trimup, $trimdn); +} + +## +# Parse a CIGAR string into a string of operators. Operators are expanded into +# runs where appropriate. = and X are collapsed into M. +# +sub parse_cigar($) { + my ($cigar) = @_; + my $ret = ""; + my $i = 0; + my ($rdlen, $rflen) = (0, 0); + while($i < length($cigar)) { + substr($cigar, $i) =~ /^([0-9]+)/; + defined($1) || die "Could not parse number at pos $i: '$cigar'"; + my $runlen = $1; + $i += length($1); + $i < length($cigar) || die; + my $op = substr($cigar, $i, 1); + defined($op) || die "Could not parse operation at pos $i: '$cigar'"; + if($op eq "X" || $op eq "=") { + $op = "M"; + } + $rdlen += $runlen if $op ne "D"; + $rflen += $runlen if $op ne "I"; + $ret .= ($op x $runlen); + $i++; + } + return ($ret, $rdlen, $rflen); +} + +## +# Parse an MD:Z string into a string with length equal to query length. Each +# position contains either a space, if the read matches the reference at that +# position, or a character, if the reference contains a character that doesn't +# match its opposite in the alignment. In the latter case, the character in +# the string is the reference character. +# +sub parse_md($) { + my ($md) = @_; + my $i = 0; + my $ret = ""; + while($i < length($md)) { + # Starts with a number? + my $ch = substr($md, $i, 1); + if($ch =~ /[0-9]/) { + # Parse the number off the beginning + substr($md, $i) =~ /^([0-9]+)/; + defined($1) || die "Could not parse number at pos $i: '$md'"; + my $runlen = $1; + $ret .= (" " x $runlen) if $runlen > 0; + $i += length($runlen); + } elsif($ch eq "^") { + # Read gap + $i++; + substr($md, $i) =~ /^([A-Za-z]+)/; + defined($1) || die "Could not parse read gap at pos $i: '$md'"; + my $chrs = $1; + $i += length($chrs); + $ret .= $chrs; + } else { + # DNA character + $ch =~ /[ACGTN.]/i || die "Bad char '$ch' at pos $i: '$md'"; + $ret .= $ch; + $i++; + } + } + return $ret; +} + +## +# Given a read sequence (with characters in upstream-to-downstream order with +# respect to the reference - NOT necessarily 5'-to-3') and a CIGAR string and +# an MD:Z string, build the alignment strings. The alignment strings will only +# contain the portion of the read that aligned. Any portions that were either +# hard-trimmed or soft-trimmed are trimmed from this function's result. +# +# For now, I'm assuming that the MD:Z string only describes aligned characters, +# i.e. *after* trimming. +# +sub _read_md_cigar_to_al($$$) { + my ($seq, $md, $cigar) = @_; + my $alread = ""; + my $alref = ""; + $cigar ne "*" || die "CIGAR string was star!"; + $seq ne "" || die "Empty sequence given to _read_md_cigar_to_al"; + my $parsed_md = parse_md($md); + my ($parsed_cig, $cigar_rdlen, $cigar_rflen) = parse_cigar($cigar); + my ($rdoff, $mdoff) = (0, 0); + my ($htriml, $striml, $htrimr, $strimr) = (0, 0, 0, 0); + my $nonsh = 0; # have I seen a non-S, non-H CIGAR op? + my $nonh = 0; # have I seen a non-H CIGAR op? + for(my $i = 0; $i < length($parsed_cig); $i++) { + my $cigop = substr($parsed_cig, $i, 1); + $nonh++ if $cigop ne "H"; + $nonsh++ if ($cigop ne "H" && $cigop ne "S"); + if($cigop eq "S") { + if($nonsh) { + $strimr++; + } else { + $striml++; + } + $rdoff++; + next; + } + if($cigop eq "H") { + if($nonh) { + $htrimr++; + } else { + $htriml++; + } + next; + } + $cigop = "M" if $cigop eq "=" || $cigop eq "X"; + if($cigop eq "P") { + # Padding + $alread .= "-"; + $alref .= "-"; + } elsif($cigop eq "M") { + my $rdc = substr($seq, $rdoff, 1); + $mdoff < length($parsed_md) || + die "Bad mdoff ($mdoff)\nlength(parsed_md)=".length($parsed_md)."\nseq:\n$seq\ncigar:\n$cigar\nmd:\n$md\nparsed md:\n$parsed_md"; + my $rfc = substr($parsed_md, $mdoff, 1); + $rfc = $rdc if $rfc eq " "; + $alread .= $rdc; + $alref .= $rfc; + $rdoff++; + $mdoff++; + } elsif($cigop eq "D") { + # Read gap + # Read: AAA-AAA + # Ref: AAAAAAA + my $rfc = substr($parsed_md, $mdoff, 1); + $rfc ne " " || + die "Must have a ref character opposite a gap in the read:\n". + "cig: $parsed_cig ($i)\nmd: $parsed_md ($mdoff)\n"; + $alread .= "-"; + $alref .= $rfc; + $mdoff++; + } else { + # Reference gap + # Read: AAAAAAA + # Ref: AAA-AAA + $cigop eq "I" || die "Unsupported cigop: $cigop in cigar $cigar"; + my $rdc = substr($seq, $rdoff, 1); + $alread .= $rdc; + $alref .= "-"; + $rdoff++; + # $mdoff SHOULD NOT be incremented here + } + $rdoff <= length($seq) || + die "Bad rdoff:$rdoff for seq '$seq' cigop=$cigop\nseq: $seq\ncigar=$cigar\nmd=$md"; + } + return ($alread, $alref, $htriml, $striml, $htrimr, $strimr); +} + +## +# Parse a line from a Bowtie alignment file and check that the +# alignment is sane and consistent with the reference. +# +sub parseBowtieLines { + my ($self, $lines) = @_; + for my $line (@$lines) { + chomp($line); + my ($rdname, $orient, $refname, $off, $seq, $qual, $oms, $editstr, + $flags) = split(/\t/, $line, -1); + next if $refname eq "*"; + $flags =~ /XC:([^,\s]+)/; + my $cigar = $1; + defined($cigar) || + die "Could not parse CIGAR string from flags: '$flags'"; + defined($editstr) || die "Failed to get 8 tokens from line:\n$_"; + $off == int($off) || die "Offset field (col 4) must be an integer:\n$_"; + $oms == int($oms) || die "OMS field (col 7) must be an integer:\n$_"; + my $reftype = $self->calcRefType($orient); + defined($self->refs->{$reftype}{$refname}) || + die "No such refname as $refname for reftype $reftype:\n". + Dumper($self->refs->{$reftype}); + my $edits4 = parseAllEdits($editstr); + my ($ned, $aed) = ($edits4->[0], $edits4->[1]); + removeDups($ned, $aed); + my $fpl = fivePrimeLeft($orient); + # Trim seq according to CIGAR string + my $rfseq = $seq; + my ($trimup, $trimdn); + ($rfseq, $trimup, $trimdn) = cigarTrim($rfseq, $cigar); + invertEdits($ned, length($rfseq)) unless ($fpl || !defined($ned)); + invertEdits($aed, length($rfseq)) unless ($fpl || !defined($aed)); + $rfseq = applyEdits($rfseq, $ned, $line) if defined($ned); + $rfseq = applyEdits($rfseq, $aed, $line) if defined($aed); + # Check if our alignment falls off the end of the reference, in + # which case we need to pad the reference string with Ns + my $exoff = $off; + my $padleft = ""; + my $exlen = length($rfseq); + my $tlen = length($self->refs->{$reftype}{$refname}); + if($exoff < 0) { + # Alignment hangs off LHS; pad it + my $npad = -$exoff; + $padleft = "N" x $npad; + $exlen += $exoff; + $exlen >= 0 || + die "Read was entirely off the LHS of the reference\n". + "Referemce len=$tlen\n". + "Alignment referemce len=$tlen\n". + "$line\n"; + $exoff = 0; + } + my $padright = ""; + my $roverhang = $off + length($rfseq) - $tlen; + if($roverhang > 0) { + $padright = "N" x $roverhang; + $exlen -= $roverhang; + $exlen >= 0 || + die "Read was entirely off the RHS of the reference\n". + "Referemce len=$tlen\n". + "Alignment referemce len=$tlen\n". + "$line\n"; + } + my $refsub = substr($self->refs->{$reftype}{$refname}, $exoff, $exlen); + length($refsub) == $exlen || + die "Tried to extract ref substring of length $exlen, got ". + "\"$refsub\" from \"".$self->refs->{$reftype}{$refname}."\"". + "\n$line\n". + "\noff=$off, rfseq=$rfseq\n"; + $refsub = DNA::iupacSubN($refsub); + my $trueRfseq = $padleft . $refsub . $padright; + length($trueRfseq) == length($rfseq) || + die "Different lengths for edited read and ref:\n". + " Read: $seq\n". + "Edited read: $rfseq\n". + " Ref: $trueRfseq\n"; + $rfseq eq $trueRfseq || + die "Did not match:\n". + " Read: $seq\n". + "Edited read: $rfseq\n". + " Ref: $trueRfseq\n"; + $self->{_nals}++; + } +} + +## +# Parse a line from a SAM alignment file and check that the +# alignment is sane and consistent with the reference. +# +sub parseSamLines { + my ($self, $lines) = @_; + my ($lastseq, $lastqual) = ("", ""); + my $idx = 0; + for my $line (@$lines) { + $idx++; + print STDERR "Processing line...\n"; + chomp($line); + next if $line =~ /^\@/; + my @toks = split(/\t/, $line, -1); + my ( + $qname, #1 + $flag, #2 + $rname, #3 + $pos, #4 + $mapq, #5 + $cigar, #6 + $rnext, #7 + $pnext, #8 + $tlen, #9 + $seq, #10 + $qual) = @toks; + defined($qual) || die "Not enough SAM tokens:\n$line\n"; + my @opt_flags_list = @toks[11..$#toks]; + my %opt_flags = (); + next if $cigar eq "*"; # Failed to align + # Get the read sequence & qualities from previous record if necessary + if($seq eq "*") { + $lastseq ne "" || die "Line #$idx:\n$line"; + $seq = $lastseq; + $qual = $lastqual; + } else { + $lastseq = $seq; + $lastqual = $qual; + } + $seq ne "*" || die; + my ($parsed_cigar, $rdlen_cigar, $rflen_cigar) = parse_cigar($cigar); + length($seq) == $rdlen_cigar || + die "Sequence length and parsed cigar string length ($rdlen_cigar) mismatch:\n". + "$seq\n$parsed_cigar\nLine:\n$line"; + # Stick optional flags into a hash + for my $fl (@opt_flags_list) { + my @fs = split(/:/, $fl, -1); + scalar(@fs) > 2 || die "Bad optional flag: $fl\n$line\n"; + $opt_flags{$fs[0]}{type} = $fs[1]; + $opt_flags{$fs[0]}{value} = join(":", @fs[2..$#fs]); + } + defined($opt_flags{"MD"}) || die "No MD:Z flag:\n$line\n"; + $opt_flags{"MD"}{type} eq "Z" || die "Bad type for MD:Z flag\n$line\n"; + my $md = $opt_flags{"MD"}{value}; + $pos == int($pos) || die "POS field (col 4) must be an int:\n$line\n"; + $pnext == int($pnext) || die "PNEXT field (col 8) must be an int:\n$line\n"; + $tlen == int($tlen) || die "TLEN field (col 9) must be an int:\n$line\n"; + $mapq == int($mapq) || die "MAPQ field (col 5) must be an int:\n$line\n"; + # TODO: deal with bisulfite strands?? + my $fw = (($flag & 0x10) == 0); + my $orient = $fw ? "+" : "-"; + my $reftype = $self->calcRefType($orient); + defined($self->refs->{$reftype}{$rname}) || + die "No such refname as $rname for reftype $reftype:\n$line\n". + Dumper($self->refs->{$reftype}); + my $exoff = $pos-1; # expected 0-based reference offset + my ($alread, $alref, $htriml, $striml, $htrimr, $strimr) = + _read_md_cigar_to_al($seq, $md, $cigar); + print STDERR "$alread\n$alref\n"; + my $rfseq = $alref; + $rfseq =~ s/[ -]//g; # remove spaces & gaps + my $exlen = length($rfseq); + my $refsub = substr($self->refs->{$reftype}{$rname}, $exoff, $exlen); + length($refsub) == $exlen || + die "Tried to extract ref substring of length $exlen from:\n". + $self->refs->{$reftype}{$rname}. + "\ngot string of length ".length($refsub).":\n". + $refsub. + "\nfrom:\n". + $line. + "\nexlen is the length of:\n$rfseq\npos=$pos, rfseq=$rfseq\n"; + $refsub = DNA::iupacSubN($refsub); + my $trueRfseq = $refsub; + length($trueRfseq) == length($rfseq) || + die "Different lengths for edited read and ref:\n". + " Read: $seq\n". + "Edited read: $rfseq\n". + " Ref: $trueRfseq\n"; + $rfseq eq $trueRfseq || + die "Did not match:\n". + " Read: $seq\n". + "Edited read: $rfseq\n". + " Ref: $trueRfseq\n". + "$line"; + $self->{_nals}++; + } +} + +## +# Parse lines from a Bowtie alignment file and check that the +# alignments are sane and consistent with the reference. +# +sub parseBowtie { + my ($self, $fh) = @_; + while(<$fh>) { + $self->parseBowtieLines([$_]); + } +} + +## +# Parse lines from a SAM alignment file and check that alignments are +# sane and consistent with the reference. +# +sub parseSam { + my ($self, $fh) = @_; + my @lines = (); + while(<$fh>) { push @lines, $_; } + $self->parseSamLines(\@lines); +} + +## +# Parse lines from an alignment file of the type given by self->format +# +sub parseLines { + my ($self, $lines) = @_; + if($self->format eq "bowtie") { + $self->parseBowtieLines($lines); + } else { + $self->format eq "sam" || die; + $self->parseSamLines($lines); + } +} + +## +# Parse lines from an alignment file of the type given by self->format +# +sub parse { + my ($self, $fh) = @_; + if($self->format eq "bowtie") { + $self->parseBowtie($fh); + } else { + $self->format eq "sam" || die; + $self->parseSam($fh); + } +} + +## +# Print summary of how many alignments and edits were checked. +# +sub printSummary { + my $self = shift; + print STDERR "--- Summary ---\n"; + print STDERR "Read ".scalar(keys %{$self->refs})." reference strings\n"; + print STDERR "Checked $self->{_nals} alignments, $self->{_nedits} edits\n"; + print STDERR "---------------\n"; + print STDERR "PASSED\n"; +} + +## +# Check the given batch of alignments. We check that they're +# internally consistent in some basic ways, and we check that the +# sequence and edits are consistent with the reference. +# +# The $als argument is either a list of (possibly compressed) filenames +# of files containing alignments, or a list of alignment strings. If +# the former, $filenames is non-zero. +# +sub checkAlignments { + my ($self, $als, $filenames) = @_; + if($self->bisC) { + print STDERR "Generating all-C bisulfite-treated references\n"; + bisulfiteC($self->refs); + } + if($self->bisCpG) { + print STDERR "Generating all-CpG bisulfite-treated references\n"; + bisulfiteCpG($self->refs); + } + if($filenames) { + foreach (@$als) { + my $alnpipe = $_; + print STDERR "Processing alignment file '$_'\n"; + $alnpipe = "gzip -dc $_ |" if ($_ =~ /\.gz$/); + my $alnfh = undef; + open($alnfh, $alnpipe) || die "Could not open '$alnpipe' for reading"; + $self->parse($alnfh); + close($alnfh); + } + } else { + $self->parseLines($als); + } +} + +## +# Check simple alignments +# +sub test1 { + my $ac = AlignmentCheck->new( + "AlignmentCheck.pm test1 checker", + { "r1" => "TTGTTCGT" }, + "bowtie", + 0, + 0 + ); + $ac->checkAlignments([ + "0\t+\tr1\t1\tTGTTCGT\tIIIIIII\t40\t-", + "1\t+\tr1\t0\tTTGTTCG\tIIIIIII\t40\t-", + "2\t+\tr1\t2\tGTTCGTA\tIIIIIII\t40\t6:N>A", + "3\t+\tr1\t-1\tATTGTTC\tIIIIIII\t40\t0:N>A"], 0); + return 1; +} + +## +# Check simple alignments from files +# +sub test2 { + open(TMP, ">/tmp/.AlignmentCheck.pm.fa") || die; + print TMP ">r1\n"; + print TMP "TTGTTCGT\n"; + close(TMP); + my $ac = AlignmentCheck->new( + "AlignmentCheck.pm test1 checker", + [ "/tmp/.AlignmentCheck.pm.fa" ], + "bowtie", + 0, + 0 + ); + $ac->checkAlignments([ + "0\t+\tr1\t1\tTGTTCGT\tIIIIIII\t40\t-", + "1\t+\tr1\t0\tTTGTTCG\tIIIIIII\t40\t-", + "2\t+\tr1\t2\tGTTCGTA\tIIIIIII\t40\t6:N>A", + "3\t+\tr1\t-1\tATTGTTC\tIIIIIII\t40\t0:N>A"], 0); + return 1; +} + +if($0 =~ /[^0-9a-zA-Z_]?AlignmentCheck\.pm$/) { + my @fas = (); + my @als = (); + my $format = "sam"; + my $bisC = 0; + my $bisCpG = 0; + my $test = 0; + + use Getopt::Long; + GetOptions ( + "test" => \$test, + "fasta|ref=s" => \@fas, + "als|aln=s" => \@als, + "format=s" => \$format, + "bis-C|bisulfite-C" => \$bisC, + "bis-CpG|bisulfite-CpG" => \$bisCpG, + "bowtie" => sub {$format = "bowtie"}, + "sam" => sub {$format = "sam"}) || die; + + if($test) { + use Test; + print "Running unit tests\n"; + # Run unit tests + Test::shouldSucceed("test1", \&test1); + Test::shouldSucceed("test2", \&test2); + exit 0; + } + + my $ac = AlignmentCheck->new( + "AlignmentCheck.pm checker", + \@fas, + $format, + $bisC, + $bisCpG); + $ac->checkAlignments(\@als, 1); +} + +1; diff --git a/scripts/sim/DNA.pm b/scripts/sim/DNA.pm new file mode 100644 index 0000000..bf6b4e2 --- /dev/null +++ b/scripts/sim/DNA.pm @@ -0,0 +1,287 @@ +#!/usr/bin/perl -w + +# +# Copyright 2011, Ben Langmead +# +# This file is part of Bowtie 2. +# +# Bowtie 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Bowtie 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Bowtie 2. If not, see . +# + +package DNA; +use strict; +use warnings; +use Carp; +use FindBin qw($Bin); +use lib $Bin; +use Test; + +## +# Set up uppercase IUPAC characters minus N +# +my %iupac_u_nn = ( + "R" => 1, + "Y" => 1, + "M" => 1, + "K" => 1, + "S" => 1, + "W" => 1, + "B" => 1, + "V" => 1, + "H" => 1, + "D" => 1 +); + +## +# Return true iff arg is an IUPAC code and not ACGT or N. +# +sub isIUPAC($) { + return defined($iupac_u_nn{$_[0]}); +} + +## +# Replace IUPAC characters with Ns. +# +sub iupacSubN($) { + $_[0] =~ tr/RYMKSWBVHDrymkswbvhd/NNNNNNNNNNnnnnnnnnnn/; + return $_[0]; +} + +my %compat = ( + "A" => "A", + "T" => "T", + "C" => "C", + "G" => "G", + "R" => "AG", + "Y" => "CT", + "M" => "AC", + "K" => "GT", + "S" => "CG", + "W" => "AT", + "B" => "CGT", + "V" => "ACG", + "H" => "ACT", + "D" => "AGT", + "N" => "ACGT" +); + +## +# Pick a random character that's compatible with input character. +# +sub pickCompat($) { + my $c = uc $_[0]; + defined($compat{$c}) || die "Bad input $c"; + my $cc = $compat{$c}; + if(length($cc) == 1) { + return $cc; + } else { + length($cc) > 1 || die; + return substr($cc, int(rand(length($cc))), 1); + } +} + +my %incompat = ( + "A" => "CGT", + "T" => "ACG", + "C" => "AGT", + "G" => "ACT", + "R" => "CT", + "Y" => "AG", + "M" => "GT", + "K" => "AC", + "S" => "AT", + "W" => "CG", + "B" => "A", + "V" => "T", + "H" => "G", + "D" => "C", + "N" => "ACGT" +); + +## +# Pick a random character that's compatible with input character. +# +sub pickIncompat($) { + my $c = uc $_[0]; + defined($incompat{$c}) || die "Bad input $c"; + my $cc = $incompat{$c}; + if(length($cc) == 1) { + return $cc; + } else { + length($cc) > 1 || die; + return substr($cc, int(rand(length($cc))), 1); + } +} + +## +# Set up lowercase IUPAC characters minus N +# +my %iupac_l_nn = ( + "r" => 1, + "y" => 1, + "m" => 1, + "k" => 1, + "s" => 1, + "w" => 1, + "b" => 1, + "v" => 1, + "h" => 1, + "d" => 1 +); + +my %revcompMap = ( + "A" => "T", + "T" => "A", + "C" => "G", + "G" => "C", + "R" => "Y", + "Y" => "R", + "M" => "K", + "K" => "M", + "S" => "S", + "W" => "W", + "B" => "V", + "V" => "B", + "H" => "D", + "D" => "H", + "N" => "N", + "a" => "t", + "t" => "a", + "c" => "g", + "g" => "c", + "r" => "y", + "y" => "r", + "m" => "k", + "k" => "m", + "s" => "s", + "w" => "w", + "b" => "v", + "v" => "b", + "h" => "d", + "d" => "h", + "n" => "n" +); + +my %unambigSet = ( + "A" => 1, "a" => 1, + "C" => 1, "c" => 1, + "G" => 1, "g" => 1, + "T" => 1, "t" => 1 +); + +## +# Return the complement, incl. if it's IUPAC. +# +sub comp($) { + my $s = uc shift; + return $s =~ tr/ACGTRYMKSWBVHDN/TGCAYRKMSWVBDHN/; +} + +## +# Return the reverse complement of a string. +# +sub revcomp($) { + my $s = uc shift; + $s = reverse $s; + $s =~ tr/ACGTRYMKSWBVHDN/TGCAYRKMSWVBDHN/; + return $s; +} + +## +# Return true iff it's unambiguous. +# +sub unambig($) { + return $unambigSet{$_[0]}; +} + +## +# Manipulate DNA in an integer-indexed fashion. +# +sub plus($$) { + my ($c, $amt) = @_; + my %ctoi = ("A" => 0, "C" => 1, "G" => 2, "T" => 3); + my %itoc = (0 => "A", 1 => "C", 2 => "G", 3 => "T"); + $c = uc $c; + defined($ctoi{$c}) || die "Not an unambiguous nucleotide: $c"; + return $itoc{($ctoi{$c}+$amt) % 4}; +} + +my %dinucToColorMap = ( + "AA" => "0", + "AC" => "1", + "AG" => "2", + "AT" => "3", + "CC" => "0", + "CG" => "3", + "CT" => "2", + "GG" => "0", + "GT" => "1", + "TT" => "0", + + "AN" => ".", + "CN" => ".", + "GN" => ".", + "NT" => ".", + "NN" => ".", +); + +sub dinucToColor($$) { + my ($n1, $n2) = @_; + if(ord($n2) < ord($n1)) { + my $tmp = $n1; + $n1 = $n2; + $n2 = $tmp; + } + ord($n1) <= ord($n2) || die; + defined($dinucToColorMap{"$n1$n2"}) || + die "Bad nucleotide dinuc: '$n1$n2'"; + return $dinucToColorMap{"$n1$n2"}; +} + +sub test1 { + plus("A", 1) eq "C" || die; + plus("C", 1) eq "G" || die; + plus("G", 1) eq "T" || die; + plus("T", 1) eq "A" || die; + return 1; +} + +sub test2 { + plus("A", 2) eq "G" || die; + plus("C", 2) eq "T" || die; + plus("G", 2) eq "A" || die; + plus("T", 2) eq "C" || die; + return 1; +} + +sub test3 { + revcomp("ACGT") eq "ACGT" || die; + return 1; +} + +sub test4 { + revcomp("ACGTYR") eq "YRACGT" || die; + return 1; +} + +if($0 =~ /[^0-9a-zA-Z_]?DNA\.pm$/) { + print "Running unit tests\n"; + # Run unit tests + Test::shouldSucceed("test1", \&test1); + Test::shouldSucceed("test2", \&test2); + Test::shouldSucceed("test3", \&test3); + Test::shouldSucceed("test4", \&test4); +} + +1; diff --git a/scripts/sim/Mutate.pm b/scripts/sim/Mutate.pm new file mode 100644 index 0000000..2157f73 --- /dev/null +++ b/scripts/sim/Mutate.pm @@ -0,0 +1,301 @@ +#!/usr/bin/perl -w + +# +# Copyright 2011, Ben Langmead +# +# This file is part of Bowtie 2. +# +# Bowtie 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Bowtie 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Bowtie 2. If not, see . +# + +package Mutate; +use strict; +use Carp; +use FindBin qw($Bin); +use lib $Bin; +use DNA; +use Test; +use List::Util qw(max min); +use Math::Random; + +## +# Default SNP rate generator. Doesn't generate the SNP per se, just +# the rate. +# +sub defaultSNPGen() { + return Math::Random::random_uniform(1, 0, 0.05); +} + +## +# Default read gap rate generator. Doesn't generate the gaps or +# lengths, just the rate. +# +sub defaultRdGapGen() { + return Math::Random::random_uniform(1, 0, 0.005); +} + +## +# Default reference gap rate generator. Doesn't generate the gaps or +# lengths, just the rate. +# +sub defaultRfGapGen() { + return Math::Random::random_uniform(1, 0, 0.005); +} + +## +# Default rearrangement rate generator. +# +sub defaultRearrGen() { + return Math::Random::random_uniform(1, 0, 0.005); +} + +## +# Default function for generating gap lengths when introducing a gap. +# +sub defaultGapLenGen() { + return int(Math::Random::random_exponential(1, 3))+1; +} + +## +# Default function for generating random sequence to insert into a gap. +# +sub defaultSeqGen($) { + my $len = shift; + ($len == int($len) && $len > 0) || + die "Bad length for sequence generator: $len"; + my $ret = ""; + for (1..$len) { + $ret .= substr("ACGT", int(rand(4)), 1); + } + return $ret; +} + +## +# Create a new DNA mutator +# +sub new { + my ( + $class, + $name, # name + $snp, # SNP rate + $rdgap, # read gap rate + $rfgap, # ref gap rate + $rearr, # rearrangement rate + $gaplen, # gap length + $seqgen, # DNA generator + ) = @_; + $name = "noname" unless defined($name); + $snp = \&defaultSNPGen unless defined($snp); + $rdgap = \&defaultRdGapGen unless defined($rdgap); + $rfgap = \&defaultRfGapGen unless defined($rfgap); + $rearr = \&defaultRearrGen unless defined($rearr); + $gaplen = \&defaultGapLenGen unless defined($gaplen); + $seqgen = \&defaultSeqGen unless defined($seqgen); + return bless { + _name => $name, + _snp => $snp, + _rdgap => $rdgap, + _rfgap => $rfgap, + _rearr => $rearr, + _gaplen => $gaplen, + _seqgen => $seqgen, + }, $class; +} +sub snp { return $_[0]->{_snp} } +sub rdgap { return $_[0]->{_rdgap} } +sub rfgap { return $_[0]->{_rfgap} } +sub rearr { return $_[0]->{_rearr} } +sub gaplen { return $_[0]->{_gaplen} } +sub seqgen { return $_[0]->{_seqgen} } + +## +# Given a sequence (i.e. a key $srcchr into the reference hash), +# mutate that string. Note that rearrangement mutations can affect +# more than one sequence at a time. +# +# Returns a list containing counts for: +# +# 1: number of SNPs added +# 2: number of read gaps added +# 3: number of ref gaps added +# 4: number of rearrangements added +# +sub mutateSeq { + my ($self, $srcchr, $ref) = @_; + my ($nsnp, $nrfgap, $nrdgap, $nrearr) = (0, 0, 0, 0); + my $mutseq = $ref->{$srcchr}; + # Calculate # SNPs to add + my $len = length($mutseq); + my $snpRate = $self->snp->(); + my $rfgapRate = $self->rfgap->(); + my $rdgapRate = $self->rdgap->(); + my $rearrRate = $self->rearr->(); + $nsnp = Math::Random::random_binomial(1, $len, $snpRate); + $nrfgap = Math::Random::random_binomial(1, $len, $rfgapRate); + $nrdgap = Math::Random::random_binomial(1, $len, $rdgapRate); + $nrearr = Math::Random::random_binomial(1, $len, $rearrRate); + print STDERR " Introducing $nsnp SNPs, $nrfgap/$nrdgap ref/read gaps, and $nrearr rearrangements\n"; + $nsnp = min($nsnp, $len); + # Add the SNPs + for (1..$nsnp) { + my $off = int(rand($len)); # where to mutate + my $add = int(rand(3))+1; # how to mutate + my $c = substr($mutseq, $off, 1); + $c eq "A" || $c eq "C" || $c eq "G" || $c eq "T" || $c eq "N" || die "Bad char '$c' in:\n$ref->{$srcchr}"; + substr($mutseq, $off, 1) = DNA::plus(substr($mutseq, $off, 1), $add); + } + print STDERR " Finished SNPs\n"; + # Calculate # ref gaps to add + for (1..$nrfgap) { + my $off = int(rand($len)); # where to mutate + my $gaplen = $self->gaplen->(); # how many gap positions in ref + # Insert characters into the subject genome + my $insseq = $self->seqgen->($gaplen); + substr($mutseq, $off, 0) = $insseq; + $len = length($mutseq); + } + print STDERR " Finished ref gaps\n"; + # Calculate # read gaps to add + for (1..$nrdgap) { + my $off = int(rand($len)); # where to mutate + my $gaplen = $self->gaplen->(); # how many gap positions in ref + # Delete characters from subject genome + substr($mutseq, $off, $gaplen) = ""; + $len = length($mutseq); + } + print STDERR " Finished read gaps\n"; + $ref->{$srcchr} = $mutseq; + return ($nsnp, $nrfgap, $nrdgap, $nrearr); + + my $totlen = 0; + for (keys %$ref) { $totlen += length($ref->{$_}); } + # Calculate # rearrangements to add + for (1..$nrearr) { + # Pick two loci, at least one on this reference sequence and + # then cross them over somehow + my $off = int(rand($len)); + my @refkeys = keys %$ref; + my $ochr = $refkeys[int(rand(scalar(@refkeys)))]; + my $oseq = $ref->{$ochr}; + my $ooff = int(rand(length($oseq))); + my $srcleft = int(rand(2)); + my $dstleft = int(rand(2)); + my $srcrc = int(rand(2)); + my $dstrc = int(rand(2)); + # Check that the source and dest don't overlap + next if $srcchr eq $ochr; + # Get the sequence to move + my $presrclen = length($mutseq); + my $predstlen = length($oseq); + my $srcseq; + if($srcleft) { + $srcseq = substr($mutseq, 0, $off); + } else { + $srcseq = substr($mutseq, $off); + } + my $dstseq; + if($dstleft) { + $dstseq = substr($oseq, 0, $ooff); + } else { + $dstseq = substr($oseq, $ooff); + } + # Delete the sequence from the source + length($srcseq) <= length($mutseq) || die; + length($dstseq) <= length($oseq) || die; + if($srcleft) { + substr($mutseq, 0, length($srcseq)) = ""; + } else { + substr($mutseq, -length($srcseq)) = ""; + } + if($dstleft) { + substr($oseq, 0, length($dstseq)) = ""; + } else { + substr($oseq, -length($dstseq)) = ""; + } + # Possibly reverse the pieces we broke off + my $len1 = length($srcseq); + my $len2 = length($dstseq); + $srcseq = DNA::revcomp($srcseq) if $srcrc; + $dstseq = DNA::revcomp($dstseq) if $dstrc; + length($srcseq) == $len1 || die "$srcseq"; + length($dstseq) == $len2 || die; + # Mutate the current chromosome + if($srcleft) { + $mutseq = $dstseq . $mutseq; + } else { + $mutseq = $mutseq . $dstseq; + } + # Mutate the other chromosome + if($dstleft) { + $oseq = $srcseq . $oseq; + } else { + $oseq = $oseq . $srcseq; + } + my $postsrclen = length($mutseq); + my $postdstlen = length($oseq); + ($presrclen + $presrclen) == ($postsrclen + $postsrclen) || + die "from $srcchr to $ochr: $presrclen + $presrclen != $postsrclen + $postsrclen"; + $ref->{$srcchr} = $mutseq; + $ref->{$ochr} = $oseq; + my $ntotlen = 0; + for (keys %$ref) { $ntotlen += length($ref->{$_}); } + $totlen == $ntotlen || die "Total length changed after rearrangements from $srcchr to $ochr ($totlen -> $ntotlen)"; + } + print STDERR " Finished rearrangements\n"; + $ref->{$srcchr} = $mutseq; + return ($nsnp, $nrfgap, $nrdgap, $nrearr); +} + +sub test1 { + my $mut = Mutate->new("UnitTest mutator"); + my %refs = ( + "r1" => "TATGACGGTCGAAACCAGGCGA", + "r2" => "TATATTTAGTCTCGTCTGGCTGTCTCGGCTGCGCGCGAGTAAAGACCGGCCTGATC"); + $mut->mutateSeq("r1", \%refs); + $mut->mutateSeq("r2", \%refs); + return 1; +} + +sub test2 { + my $mut = Mutate->new( + "UnitTest mutator", + \&defaultSNPGen, + \&defaultRdGapGen, + \&defaultRfGapGen, + sub { return 0.1 }, + \&defaultGapLenGen, + \&defaultSeqGen); + my %refs = ( + "r1" => "TATGACGGTCGAAACCAGGCGA", + "r2" => "TATATTTAGTCTCGTCTGGCTGTCTCGGCTGCGCGCGAGTAAAGACCGGCCTGATC", + "r3" => "TATATTTAGTCTCGTCTGGCTGTCTCGGCTGCGCGCGAGTAAAGACCGGCCTGATC". + "ATTGGTGTCGCGGCGCGCGTATATATATATATATATAGCCTGCTACGTCAGCTAGC", + "r4" => "TATATTTAGTCTCGTCTGGCTGTCTCGGCTGCGCGCGAGTAAAGACCGGCCTGATC". + "ATTGGTGTCGCGGCGCGCGTATATATATATATATATAGCCTGCTACGTCAGCTAGC". + "ATATAACAAAAAAACCCCACACGACGCGGACTCTAGCACTATCGGACTATCATCGG"); + $mut->mutateSeq("r1", \%refs); + $mut->mutateSeq("r2", \%refs); + $mut->mutateSeq("r3", \%refs); + $mut->mutateSeq("r4", \%refs); + return 1; +} + +if($0 =~ /[^0-9a-zA-Z_]?Mutate\.pm$/) { + print "Running unit tests\n"; + # Run unit tests + Test::shouldSucceed("test1", \&test1); + Test::shouldSucceed("test2", \&test2); +} + +1; diff --git a/scripts/sim/RandDNA.pm b/scripts/sim/RandDNA.pm new file mode 100644 index 0000000..a924992 --- /dev/null +++ b/scripts/sim/RandDNA.pm @@ -0,0 +1,191 @@ +#!/usr/bin/perl -w + +# +# Copyright 2011, Ben Langmead +# +# This file is part of Bowtie 2. +# +# Bowtie 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Bowtie 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Bowtie 2. If not, see . +# + +package RandDNA; +use strict; +use Carp; +use FindBin qw($Bin); +use lib $Bin; +use DNA; +use Test; +use Math::Random; + +## +# Create a new random DNA generator +# +sub new { + my ( + $class, + $name, # name of generator + $n, # N fraction + $iupac, # Non-A/C/G/T/N IUPAC fraction (after N fraction removed) + $at, # AT fraction (after N/IUPAC fractions removed) + $a, # A fraction (of AT) + $c, # C fraction (of CG) + ) = @_; + $name = "noname" unless defined($name); + return bless { + _name => $name, + _n => $n || croak("No N frac"), + _iupac => $iupac || croak("No IUPAC frac"), + _at => $at || rand(), + _a => $a || rand(), + _c => $c || rand() + }, $class; +} +sub name { return $_[0]->{_name} } +sub nFrac { return $_[0]->{_n} } +sub iupacFrac { return $_[0]->{_iupac} } +sub atFrac { return $_[0]->{_at} } +sub aFrac { return $_[0]->{_a} } +sub cFrac { return $_[0]->{_c} } + +## +# Return a random IUPAC character. +# +sub randIUPAC() { + my @iu = ( + "R", + "Y", + "M", + "K", + "S", + "W", + "B", + "V", + "H", + "D" + ); + my $iui = int(rand(scalar(@iu))); + defined($iu[$iui]) || die "Bad index $iui"; + return $iu[$iui]; +} + +## +# Given parameters controlling character frequencies, build a palette +# of short sequences that can be composed to make longer sequences. +# +sub genBuildingBlocks { + my ($self, $arr, $num, $bbrnd) = @_; + defined($arr) || die; + $num = 30 unless defined($num); + # Random generator for length + $bbrnd = sub { return int(rand(100))+1 } unless defined($bbrnd); + for my $i (1..$num) { + my $seq = ""; + # Generate length + my $len = $bbrnd->(); + $len > 0 || die "Bad length: $len"; + # Generate characters + for my $j (1..$len) { + my $c = ""; + if(rand() < $self->nFrac()) { + $c = "N"; + } elsif(rand() < $self->iupacFrac()) { + $c = randIUPAC(); + defined($c) || die; + } else { + $c = (rand() < $self->atFrac()) ? "AT" : "CG"; + if($c eq "AT") { + $c = (rand() < $self->aFrac()) ? "A" : "T"; + } else { + $c = (rand() < $self->cFrac()) ? "C" : "G"; + } + } + $seq .= $c; + } + # Add to return list + push @$arr, $seq; + } +} + +## +# Use this generator to generate another random sequence. +# +sub nextSeq { + my ($self, $len, $bbsr, $runrnd) = @_; + my $seq = ""; + # Generate building blocks + my @bbs = (); + if(defined($bbsr)) { + @bbs = @$bbsr; + } else { + $self->genBuildingBlocks(\@bbs); + } + scalar(@bbs) > 0 || die; + # Random generator for run length + my $defaultRnd = sub { + # Mean of exp is 1/lambda + return int(Math::Random::random_exponential(1, 2))+1; + }; + $runrnd = $defaultRnd unless defined($runrnd); + # Build the sequence by repeatedly inserting runs of building blocks + while(length($seq) < $len * 5) { + # Choose building block + my $bbi = int(rand(scalar(@bbs))); + # Choose how many times to add it + my $runlen = $runrnd->(); + # Choose insert point + my $insat = int(rand(length($seq))); + # Repeatedly insert building lock + for my $i (1..$runlen) { + substr($seq, $insat, 0) = $bbs[$bbi]; + } + } + # Return chopped out piece + return substr($seq, $len * 2, $len); +} + +sub test1 { + my $rd = RandDNA->new( + "randtest1", # name + 0.02, # n frac + 0.01, # IUPAC frac + 0.4, # AT frac + 0.45, # A/AT frac + 0.35); # C/CG frac + my $seq = $rd->nextSeq(200, undef); + length($seq) == 200 || die; + return 1; +} + +sub test2 { + my @bb = ("AAA", "CCCC"); + my $rd = RandDNA->new( + "randtest2", # name + 0.02, # n frac + 0.01, # IUPAC frac + 0.4, # AT frac + 0.45, # A/AT frac + 0.35); # C/CG frac + my $seq = $rd->nextSeq(300, \@bb); + length($seq) == 300 || die; + return 1; +} + +if($0 =~ /[^0-9a-zA-Z_]?RandDNA\.pm$/) { + print "Running unit tests\n"; + # Run unit tests + Test::shouldSucceed("test1", \&test1); + Test::shouldSucceed("test2", \&test2); +} + +1; diff --git a/scripts/sim/SampleRead.pm b/scripts/sim/SampleRead.pm new file mode 100644 index 0000000..1defeed --- /dev/null +++ b/scripts/sim/SampleRead.pm @@ -0,0 +1,244 @@ +#!/usr/bin/perl -w + +# +# Copyright 2011, Ben Langmead +# +# This file is part of Bowtie 2. +# +# Bowtie 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Bowtie 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Bowtie 2. If not, see . +# + +package SampleRead; +use strict; +use Carp; +use FindBin qw($Bin); +use lib $Bin; +use DNA; +use Test; +use List::Util qw(max min); +use Math::Random; + +## +# Default sequencing miscall rate generator. +# +sub defaultSeqMmGen() { + return Math::Random::random_uniform(1, 0, 0.1); +} + +## +# Default random generator for read length. +# +sub defaultFragLenGen() { + return int(Math::Random::random_normal(1, 200, 40))+1; +} + +## +# Default random generator for read length. +# +sub defaultReadLenGen() { + my $r = int(rand(3)); + if($r == 0) { + return int(Math::Random::random_exponential(1, 60))+1; + } elsif($r == 1) { + return int(Math::Random::random_exponential(1, 20))+1; + } else { + return int(Math::Random::random_exponential(1, 150))+1; + } +} + +## +# Create a new read sampler +# +sub new { + my ( + $class, + $name, # name of simulator + $fraglengen, # paired-end fragment length generator + $m1lengen, # random mate1 length generator + $m2lengen, # random mate2 length generator + ) = @_; + $fraglengen = \&defaultFragLenGen unless defined($fraglengen); + $m1lengen = \&defaultReadLenGen unless defined($m1lengen); + $m2lengen = \&defaultReadLenGen unless defined($m2lengen); + $name = "noname" unless defined($name); + return bless { + _name => $name, + _fraglengen => $fraglengen, + _m1lengen => $m1lengen, + _m2lengen => $m2lengen, + }, $class; +} +sub name { return $_[0]->{_name} } +sub fraglengen { return $_[0]->{_fraglengen} } +sub m1lengen { return $_[0]->{_m1lengen} } +sub m2lengen { return $_[0]->{_m2lengen} } + +## +# Generate a set of reads from a subject genome encoded in a hash ref. +# +sub genReads { + my ( + $self, + $num, # number of reads/fragments to generate + $color, # colorize? + $refs, # hash ref holding reference sequences + $seqs, # put generated read sequences here + $quals, # put generated quality sequences here + $lengen) = @_; # length generator + + ref($refs) eq "HASH" || die "Reference input must be hash ref, is ".ref($refs); + ref($seqs) eq "ARRAY" || die "seqs input must be array ref, is ".ref($seqs); + ref($quals) eq "ARRAY" || die "quals input must be array ref, is".ref($quals); + $lengen = $self->m1lengen() unless defined($lengen); + my $totreflen = 0; + my @keys = keys %$refs; + for (@keys) { $totreflen += length($refs->{$_}); } + for(1..$num) { + if(rand() < 0.05 && scalar(@$seqs) > 0) { + # Clone a previous read + my $ci = int(rand(scalar(@$seqs))); + push @$seqs, $seqs->[$ci]; + push @$quals, $quals->[$ci]; + } else { + while(1) { + my $ro = int(rand($totreflen)); + my $len = $lengen->(); + $len = 1 if $len < 1; + my $key = undef; + my $rflen = 0; + for (@keys) { + $rflen = length($refs->{$_}); + if($ro < $rflen) { + $key = $_; + last; + } + $ro -= $rflen; + } + defined($key) || die; + $rflen > 0 || die; + # If we are overhanging the end, discard and try again + next if $ro + $len > $rflen; + my $rfseq = substr($refs->{$key}, $ro, $len); + length($rfseq) == $len || die; + my $rc = int(rand(2)); + # Possibly reverse-complement it + $rfseq = DNA::revcomp($rfseq) if $rc == 1; + # Possible colorize + if($color) { + my $cseq = ""; + for(0..$len-2) { + my ($c1, $c2) = (substr($rfseq, $_, 1), substr($rfseq, $_+1, 1)); + my $col = DNA::dinucToColor($c1, $c2); + $cseq .= $col; + } + $rfseq = $cseq; + $len = length($rfseq); + } + push @$seqs, $rfseq; + # TODO: generate interesting qualities + push @$quals, "I" x $len; + last; + } + } + # Simulate next read + } +} + +## +# Generate a set of read pairs from a subject genome encoded in a hash +# ref. First we extract unpaired fragments, then take sequences from +# either end to make the mates. +# +sub genReadPairs { + my ( + $self, + $num, # number of reads/fragments to generate + $color, # colorize? + $refs, # hash ref holding reference sequences + $m1fw, # orientation of mate 1 when fragment comes from Watson strand + $m2fw, # orientation of mate 2 when fragment comes from Watson strand + $seq1s, # put generated mate1 sequences here + $seq2s, # put generated mate2 sequences here + $qual1s, # put generated mate1 quality sequences here + $qual2s) = @_; # put generated mate2 quality sequences here + + # First simulate fragments + ref($refs) eq "HASH" || die "Reference input must be hash ref"; + ref($seq1s) eq "ARRAY" || die "seq1s input must be array ref"; + ref($seq2s) eq "ARRAY" || die "seq2s input must be array ref"; + ref($qual1s) eq "ARRAY" || die "qual1s input must be array ref"; + ref($qual2s) eq "ARRAY" || die "qual2s input must be array ref"; + my @fragseqs = (); + my @fragquals = (); + $self->genReads( + $num, + $color, + $refs, + \@fragseqs, + \@fragquals, + $self->fraglengen); + scalar(@fragseqs) == scalar(@fragquals) || die; + # For each fragment + for (1..scalar(@fragseqs)) { + # Take mates from either end + my $m1len = $self->m1lengen->(); + my $m2len = $self->m2lengen->(); + $m1len = min($m1len, length($fragseqs[$_-1])); + $m2len = min($m2len, length($fragseqs[$_-1])); + my $m1seq = substr($fragseqs [$_-1], 0, $m1len); + my $m2seq = substr($fragseqs [$_-1], -$m2len); + my $m1qual = substr($fragquals[$_-1], 0, $m1len); + my $m2qual = substr($fragquals[$_-1], -$m2len); + if(!$m1fw) { + $m1seq = DNA::revcomp($m1seq); + $m1qual = reverse $m1qual; + } + if(!$m2fw) { + $m2seq = DNA::revcomp($m2seq); + $m2qual = reverse $m2qual; + } + # Commit new pair to the list + push @$seq1s, $m1seq; + push @$seq2s, $m2seq; + push @$qual1s, $m1qual; + push @$qual2s, $m2qual; + # Simulate next pair + } +} + +sub test1 { + my $samp = SampleRead->new("UnitTest read sampler"); + my %refs = ( + "r1" => "TATGACGGTCGAAACCAGGCGA", + "r2" => "TATATTTAGTCTCGTCTGGCTGTCTCGGCTGCGCGCGAGTAAAGACCGGCCTGATC"); + my @seqs = (); + my @quals = (); + $samp->genReads(10, 0, \%refs, \@seqs, \@quals, \&defaultReadLenGen); + scalar(@seqs) == 10 || die; + scalar(@quals) == 10 || die; + return 1; +} + +sub test2 { + return 1; +} + +if($0 =~ /[^0-9a-zA-Z_]?SampleRead\.pm$/) { + print "Running unit tests\n"; + # Run unit tests + Test::shouldSucceed("test1", \&test1); + Test::shouldSucceed("test2", \&test2); +} + +1; diff --git a/scripts/sim/Sim.pm b/scripts/sim/Sim.pm new file mode 100644 index 0000000..458e867 --- /dev/null +++ b/scripts/sim/Sim.pm @@ -0,0 +1,1052 @@ +#!/usr/bin/perl -w + +# +# Copyright 2011, Ben Langmead +# +# This file is part of Bowtie 2. +# +# Bowtie 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Bowtie 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Bowtie 2. If not, see . +# + +package Sim; +use strict; +use Carp; +use FindBin qw($Bin); +use lib $Bin; +use DNA; +use Test; +use RandDNA; +use SampleRead; +use Mutate; +use AlignmentCheck; +use Math::Random; +use List::Util qw(max min); +use POSIX; + +## +# Replacement for "die" that additionally writes error message to file so that +# run.pl can read it later. +# +sub mydie($) { + my $fn = ".run.pl.child.$$"; + open(EO, ">$fn") || die "Could not open $fn for writing"; + print EO "$_[0]\n"; + close(EO); + confess $_[0]; +} + +# Generates random printable strings of a given length +sub randStr($) { + my $len = shift; + my @chars = ('a'..'z', 'A'..'Z', '0'..'9', '_'); + my $str = ""; + foreach (1..$len) { + $str .= $chars[int(rand(scalar(@chars)))]; + } + return $str; +} + +## +# Default random generator for number of reference per test case. +# +sub defaultRefNumGen() { return int(Math::Random::random_exponential(1, 8))+1; } + +## +# Default random generator for reference length. +# +sub defaultRefLenGen() { + return int(Math::Random::random_exponential(1, 50000))+1; +} + +## +# Default random generator for number of reference per test case. +# +sub defaultReadNumGen() { + return int(Math::Random::random_exponential(1, 10000))+1; +} + +## +# Default random generator for read length. +# +sub defaultFragLenGen() { + return int(Math::Random::random_normal(1, 200, 40))+1; +} + +## +# Default random generator for reference length. +# +sub defaultReadLenGen() { + my $r = int(rand(3)); + if($r == 0) { + return int(Math::Random::random_exponential(1, 60))+1; + } elsif($r == 1) { + return int(Math::Random::random_exponential(1, 20))+1; + } else { + return int(Math::Random::random_exponential(1, 150))+1; + } +} + +## +# Default random generator for fraction of reference characters = N. +# +sub defaultNGen() { + return Math::Random::random_uniform(1, 0, 0.05); +} + +## +# Default random generator for fraction of reference characters = an +# ambiguous IUPAC code. +# +sub defaultIupacGen() { + return Math::Random::random_uniform(1, 0, 0.01); +} + +## +# Default random generator for AT/ACGT fraction. +# +sub defaultAtGen() { + return min(max(Math::Random::random_normal(1, 0.5, 0.18), 0), 1); +} + +## +# Default random generator for A/AT fraction. +# +sub defaultAGen() { + return min(max(Math::Random::random_normal(1, 0.5, 0.18), 0), 1); +} + +## +# Default random generator for C/CG fraction. +# +sub defaultCGen() { + return min(max(Math::Random::random_normal(1, 0.5, 0.18), 0), 1); +} + +## +# Default SNP rate generator. Doesn't generate the SNP per se, just +# the rate. +# +sub defaultSNPGen() { + return Math::Random::random_uniform(1, 0, 0.05); +} + +## +# Default read gap rate generator. Doesn't generate the gaps or +# lengths, just the rate. +# +sub defaultRdGapGen() { + return Math::Random::random_uniform(1, 0, 0.005); +} + +## +# Default reference gap rate generator. Doesn't generate the gaps or +# lengths, just the rate. +# +sub defaultRfGapGen() { + return Math::Random::random_uniform(1, 0, 0.005); +} + +## +# Default rearrangement rate generator. +# +sub defaultRearrGen() { + return Math::Random::random_uniform(1, 0, 0.005); +} + +## +# Default function for generating gap lengths when introducing a gap. +# +sub defaultGapLenGen($) { + return int(Math::Random::random_exponential(1, 3))+1; +} + +## +# Default function for generating random sequence to insert into a gap. +# +sub defaultSeqGen($) { + my $len = shift; + ($len == int($len) && $len > 0) || + mydie("Bad length for sequence generator: $len"); + my $ret = ""; + for (1..$len) { + $ret .= substr("ACGT", int(rand(4)), 1); + } + return $ret; +} + +## +# Default sequencing miscall rate generator. +# +sub defaultSeqMmGen() { + return Math::Random::random_uniform(1, 0, 0.1); +} + +## +# Create a new test case simulator +# +sub new { + my ( + $class, + $name, # name of simulator + $rfnumgen, # number of reference sequences + $rflengen, # reference length + $rdnumgen, # number of read sequences per run + $rdlengen, # read length generator + $fraglengen, # fragment length generator + $ngen, # N fraction + $iupacgen, # Non-A/C/G/T/N IUPAC fraction (after N fraction removed) + $atgen, # AT fraction (after N/IUPAC fractions removed) + $agen, # A fraction (of AT) + $cgen, # C fraction (of CG) + $snpgen, # SNP rate gen + $rdgapgen, # read gap generator + $rfgapgen, # ref gap generator + $rearrgen, # rearrangement generator + $gaplengen, # gap length generator + $seqgen, # gap filler sequence generator + $seqmm, # sequencing error generator + ) = @_; + $rfnumgen = \&defaultRefNumGen unless defined($rfnumgen); + $rflengen = \&defaultRefLenGen unless defined($rflengen); + $rdnumgen = \&defaultReadNumGen unless defined($rdnumgen); + $rdlengen = \&defaultReadLenGen unless defined($rdlengen); + $fraglengen = \&defaultFragLenGen unless defined($fraglengen); + $ngen = \&defaultNGen unless defined($ngen); + $iupacgen = \&defaultIupacGen unless defined($iupacgen); + $atgen = \&defaultAtGen unless defined($atgen); + $agen = \&defaultAGen unless defined($agen); + $cgen = \&defaultCGen unless defined($cgen); + $snpgen = \&defaultSNPGen unless defined($snpgen); + $rdgapgen = \&defaultRdGapGen unless defined($rdgapgen); + $rfgapgen = \&defaultRfGapGen unless defined($rfgapgen); + $rearrgen = \&defaultRearrGen unless defined($rearrgen); + $gaplengen = \&defaultGapLenGen unless defined($gaplengen); + $seqgen = \&defaultSeqGen unless defined($seqgen); + $seqmm = \&defaultSeqMmGen unless defined($seqmm); + $name = "noname" unless defined($name); + return bless { + _name => $name, + _rfnumgen => $rfnumgen, + _rflengen => $rflengen, + _rdnumgen => $rdnumgen, + _rdlengen => $rdlengen, + _fraglengen => $fraglengen, + _ngen => $ngen, + _iupacgen => $iupacgen, + _atgen => $atgen, + _agen => $agen, + _cgen => $cgen, + _snpgen => $snpgen, + _rdgapgen => $rdgapgen, + _rfgapgen => $rfgapgen, + _rearrgen => $rearrgen, + _gaplengen => $gaplengen, + _seqgen => $seqgen, + _seqmm => $seqmm, + }, $class; +} +sub rfnumgen { return $_[0]->{_rfnumgen} } +sub rflengen { return $_[0]->{_rflengen} } +sub rdnumgen { return $_[0]->{_rdnumgen} } +sub rdlengen { return $_[0]->{_rdlengen} } +sub fraglengen { return $_[0]->{_fraglengen} } +sub ngen { return $_[0]->{_ngen} } +sub iupacgen { return $_[0]->{_iupacgen} } +sub atgen { return $_[0]->{_atgen} } +sub agen { return $_[0]->{_agen} } +sub cgen { return $_[0]->{_cgen} } +sub snpgen { return $_[0]->{_snpgen} } +sub rdgapgen { return $_[0]->{_rdgapgen} } +sub rfgapgen { return $_[0]->{_rfgapgen} } +sub rearrgen { return $_[0]->{_rearrgen} } +sub gaplengen { return $_[0]->{_gaplengen} } +sub seqgen { return $_[0]->{_seqgen} } +sub seqmm { return $_[0]->{_seqmm} } + +## +# Generate DNA generator. +# +sub genDNAgen { + my $self = shift; + my $nfrac = $self->ngen->(); + my $iupacfrac = $self->iupacgen->(); + my $atfrac = $self->atgen->(); + my $afrac = $self->agen->(); + my $cfrac = $self->cgen->(); + my $refdnagen = RandDNA->new( + "Sim.pm gen", + $nfrac, + $iupacfrac, + $atfrac, + $afrac, + $cfrac); + printf STDERR "Created DNA generator\n"; + printf STDERR " N frac: %0.3f\n", $nfrac; + printf STDERR " IUPAC frac: %0.3f\n", $iupacfrac; + printf STDERR " AT/ACGT frac: %0.3f\n", $atfrac; + printf STDERR " A/AT frac: %0.3f\n", $afrac; + printf STDERR " C/CG frac: %0.3f\n", $cfrac; + return $refdnagen; +} + +## +# Generate and print reference sequences to file of given name. Also, +# install reference sequences into hash ref $ref. To allow for +# "overhang" (alignment that hang off the end of the reference), we +# actually write out a little bit less than the full reference sequence +# for each sequence. +# +sub genRef { + my ($self, $ref, $refdnagen, $conf, $tmpfn) = @_; + # Get a generator for reference length + my $reflen = $self->rflengen; + # Generate the number of references + my $refnum = $self->rfnumgen->(); + $refnum = sqrt($refnum) if $conf->{small}; + $refnum = 1 if $refnum <= 0; + $refnum = sqrt($refnum) if $conf->{small}; + $refnum = 1 if $refnum <= 0; + $refnum = ceil($refnum); + $refnum = $conf->{numrefs} if defined($conf->{numrefs}); + # Open output file + open (FA, ">$tmpfn") || + mydie("Could not open temporary fasta file '$tmpfn' for writing"); + my %ccnt = (); + print STDERR "Generating $refnum references\n"; + for (1..$refnum) { + # Randomly generate length + my $len = $reflen->(); + $len = sqrt($len) if $conf->{small}; + $len = 1 if $len <= 0; + $len = ceil($len); + my $seq = $refdnagen->nextSeq($len); + length($seq) >= $len || die; + my $name = "Sim.pm.$_"; + $ref->{$name} = $seq; + # Select amount to trim from LHS + my $trimleft = int(Math::Random::random_exponential(1, 200)); + # Select amount to trim from RHS + my $trimright = int(Math::Random::random_exponential(1, 200)); + # Make sure we're leaving some sequence after trimming + while($trimleft + $trimright > $len) { + if(int(rand(2))) { + $trimleft = int($trimleft*0.5); + } else { + $trimright = int($trimright*0.5); + } + } + # Trim the sequence + substr($seq, 0, $trimleft) = ""; + $seq = substr($seq, 0, length($seq)-$trimright); + my $trimlen = length($seq); + $trimlen == $len - $trimleft - $trimright || + mydie("Unexpected trim combo: $len, $trimleft, $trimright, $trimlen"); + print STDERR " Generated reference '$name' of untrimmed length $len, trimmed length $trimlen\n"; + print FA ">$name\n"; + my $buf = ""; + length($seq) >= $trimlen || die; + for my $i (1..$trimlen) { + my $c = substr($seq, $i-1, 1); + defined($c) || die; + $ccnt{$c}++; + $buf .= $c; + $ref->{$name} .= $c; + if($i % 60 == 0) { + print FA "$buf\n"; + $buf = ""; + } + } + print FA "$buf\n" if $buf ne ""; + } + close(FA); + print STDERR "Wrote references to $tmpfn\n"; + for my $k (sort keys %ccnt) { + print STDERR " $k: $ccnt{$k}\n"; + } +} + +## +# Generate a hash of key/value arguments to pass to bowtie2. +# +sub genBuildArgs { + my ($self) = @_; + my %args = (); + my $r1 = int(rand(3)); + if($r1 == 0) { + $args{"--bmaxdivn"} = int(Math::Random::random_exponential(1, 4))+1; + } elsif($r1 == 1) { + $args{"--bmax"} = int(Math::Random::random_exponential(1, 10000))+100; + } + my $r2 = int(rand(2)); + if($r2 == 0) { + $args{"--dcv"} = 2 ** (int(rand(10))+4); + } + my $r3 = int(rand(5)); + if($r3 == 0) { + $args{"--packed"} = ""; + } + my $r4 = int(rand(3)); + if($r4 == 0) { + $args{"--offrate"} = int(rand(8))+1; + } + return \%args; +} + +## +# Given a fasta filename, an index basename, and a path to the +# bowtie2-build executable, build nucleotide-space and colorpace +# indexes for the sequences in the fasta file. +# +sub build { + my ($self, $fa, $idx, $conf, $args) = @_; + my $argstr = ""; + for (keys %$args) { + $argstr .= " $_"; + if($args->{$_} ne "") { + $argstr .= " ".$args->{$_}; + } + } + $argstr .= " --sanity"; + # Build nucleotide index + my $cmd = "$conf->{bowtie2_build_debug} $argstr $fa $idx"; + print STDERR "$cmd\n"; + system($cmd); + $? == 0 || mydie("Error running '$cmd'; exitlevel=$?"); + print STDERR "Built nucleotide index '$idx'\n"; + # Build colorspace index + unless($conf->{no_color}) { + $cmd = "$conf->{bowtie2_build_debug} $argstr -C $fa ${idx}.c"; + print STDERR "$cmd\n"; + system($cmd); + $? == 0 || mydie("Error running '$cmd'; exitlevel=$?"); + print STDERR "Built colorspace index '$idx'\n"; + } +} + +## +# Given a hash of sequences, flatten all IUPAC codes into unambiguous +# nucleotides. +# +sub flattenIUPAC() { + my ($self, $h) = @_; + for my $c (keys %$h) { + my $len = length($h->{$c}); + for my $i (0..$len-1) { + my $ch = uc substr($h->{$c}, $i, 1); + my $nc = $ch; + if(DNA::isIUPAC($ch) || $ch eq "N") { + if(rand() < $self->snpgen->()) { + $nc = DNA::pickIncompat($ch); + defined($nc) || mydie("Couldn't find incompatible base for $ch"); + } else { + $nc = DNA::pickCompat($ch); + defined($nc) || mydie("Couldn't find compatible base for $ch"); + } + } + if($ch ne $nc) { + substr($h->{$c}, $i, 1) = $nc; + } + } + } +} + +## +# Mutate reference genome into a subject genome. +# +sub mutate() { + my ($self, $refs) = @_; + my %subj = %$refs; + $self->flattenIUPAC(\%subj); + print STDERR "Flattened IUPAC characters\n"; + my $mutator = Mutate->new( + "Sim.pm mutator", + $self->snpgen, + $self->rdgapgen, + $self->rfgapgen, + $self->rearrgen, + $self->gaplengen, + $self->seqgen); + my ($nsnp, $nrfgap, $nrdgap, $nrearr) = (0, 0, 0, 0); + for(keys %subj) { + print STDERR " Mutating sequence $_\n"; + my ($nsnp_, $nrfgap_, $nrdgap_, $nrearr_) = $mutator->mutateSeq($_, \%subj); + $nsnp += $nsnp_; + $nrfgap += $nrfgap_; + $nrdgap += $nrdgap_; + $nrearr += $nrearr_; + } + print STDERR "Mutated reference genome to subject genome\n"; + print STDERR " SNPs introduced: $nsnp\n"; + print STDERR " Reference gaps introduced: $nrfgap\n"; + print STDERR " Read gaps introduced: $nrdgap\n"; + print STDERR " Rearrangements introduced: $nrearr\n"; + return \%subj; +} + +sub dumpFastq { + my ($self, $input, $fh1, $fh2) = @_; + for (1..scalar(@{$input->{seq1s}})) { + my $seq1 = $input->{seq1s}->[$_-1]; + my $qual1 = $input->{qual1s}->[$_-1]; + print {$fh1} "\@$_\n"; + print {$fh1} "$seq1\n"; + print {$fh1} "+$_\n"; + print {$fh1} "$qual1\n"; + if($input->{paired}) { + my $seq2 = $input->{seq2s}->[$_-1]; + my $qual2 = $input->{qual2s}->[$_-1]; + print {$fh2} "\@$_\n"; + print {$fh2} "$seq2\n"; + print {$fh2} "+$_\n"; + print {$fh2} "$qual2\n"; + } + } +} + +sub dumpQseq { + my ($self, $input, $fh1, $fh2) = @_; + for (1..scalar(@{$input->{seq1s}})) { + my $seq1 = $input->{seq1s}->[$_-1]; + my $qual1 = $input->{qual1s}->[$_-1]; + print {$fh1} "R\t1\t1\t1\t$_\t$_\t1\t1\t$seq1\t$qual1\t1\n"; + if($input->{paired}) { + my $seq2 = $input->{seq2s}->[$_-1]; + my $qual2 = $input->{qual2s}->[$_-1]; + print {$fh2} "R\t1\t1\t1\t$_\t$_\t1\t1\t$seq2\t$qual2\t1\n"; + } + } +} + +sub dumpFasta { + my ($self, $input, $fh1, $fh2) = @_; + for (1..scalar(@{$input->{seq1s}})) { + my $seq1 = $input->{seq1s}->[$_-1]; + print {$fh1} ">$_\n"; + print {$fh1} "$seq1\n"; + if($input->{paired}) { + my $seq2 = $input->{seq2s}->[$_-1]; + print {$fh2} ">$_\n"; + print {$fh2} "$seq2\n"; + } + } +} + +sub dumpRaw { + my ($self, $input, $fh1, $fh2) = @_; + for (1..scalar(@{$input->{seq1s}})) { + my $seq1 = $input->{seq1s}->[$_-1]; + print {$fh1} "$seq1\n"; + if($input->{paired}) { + my $seq2 = $input->{seq2s}->[$_-1]; + print {$fh2} "$seq2\n"; + } + } +} + +## +# Generate the input (reads plus paired/fragment information) +# +sub genInput { + my ($self, $refs, $conf) = @_; + # Select whether we're doing colorspace + my $color = int(rand(2)); + $color = 0 if $conf->{no_color}; + # Select whether we're doing unpaired or paired-end. + my $paired = int(rand(2)); + $paired = 0 if $conf->{no_paired}; + # Select format for read file + my @formats = ("fastq", "qseq", "fasta", "raw"); + my @format_arg = ( "-q", "--qseq", "-f", "-r"); + my $formati = int(rand(scalar(@formats))); + my $format = $formats[$formati]; + my $format_arg = $format_arg[$formati]; + my $tmprdfn1 = "$conf->{tempdir}/Sim.pm.$conf->{randstr}_1.$format"; + my $tmprdfn2 = "$conf->{tempdir}/Sim.pm.$conf->{randstr}_2.$format"; + # Generate reads from the subject genome; no sequencing error yet + my %input = ( + seq1s => [], + seq2s => [], + qual1s => [], + qual2s => [], + mate1fw => 1, + mate2fw => 0, + paired => $paired, + color => $color, + format => $format, + format_arg => $format_arg, + file1 => $tmprdfn1, + file2 => $tmprdfn2 ); + my $read_sampler = SampleRead->new( + "Sim.pm read sampler", + $self->fraglengen, + $self->rdlengen, + $self->rdlengen); + print STDERR "Created read sampler\n"; + my $numreads = $self->rdnumgen->(); + $numreads = ceil(sqrt($numreads)) if $conf->{small}; + $numreads == int($numreads) || mydie("numreads $numreads not a number"); + my $tmp = int(rand(3)); + if($tmp == 0) { + $input{mate2fw} = 1; + } elsif($tmp == 1) { + $input{mate1fw} = 0; + $input{mate2fw} = 1; + } + print STDERR "Sampling $numreads reads\n"; + ref($refs) eq "HASH" || mydie("Reference input must be hash ref"); + if($paired) { + $read_sampler->genReadPairs( + $numreads, # number of reads/fragments to generate + $input{color}, # colorize? + $refs, # hash ref holding reference sequences + $input{mate1fw}, # orientation of mate 1 when fragment comes from Watson strand + $input{mate2fw}, # orientation of mate 2 when fragment comes from Watson strand + $input{seq1s}, # put generated mate1 sequences here + $input{seq2s}, # put generated mate2 sequences here + $input{qual1s}, # put generated mate1 quality sequences here + $input{qual2s}); # put generated mate2 quality sequences here + } else { + $read_sampler->genReads( + $numreads, # number of reads/fragments to generate + $input{color}, # colorize? + $refs, # hash ref holding reference sequences + $input{seq1s}, # put generated sequences here + $input{qual1s}); # put generated quality sequences here + } + # TODO: with some probability, sort the reads + print STDERR "Dumping reads to temporary files $tmprdfn1 & $tmprdfn2\n"; + # Dump reads to output file + my ($fh1, $fh2); + open($fh1, ">$tmprdfn1") || mydie("Could not open '$tmprdfn1' for writing"); + open($fh2, ">$tmprdfn2") || mydie("Could not open '$tmprdfn2' for writing"); + if($format eq "fastq") { + $self->dumpFastq(\%input, $fh1, $fh2); + } elsif($format eq "qseq") { + $self->dumpQseq(\%input, $fh1, $fh2); + } elsif($format eq "fasta") { + $self->dumpFasta(\%input, $fh1, $fh2); + } elsif($format eq "raw") { + $self->dumpRaw(\%input, $fh1, $fh2); + } + close($fh1); + close($fh2); + return \%input; +} + +## +# Mutate reads according to sequencing error model. +# +sub mutateSeq { + my ($self, $input) = @_; + return $input; +} + +## +# Generate a setting for MA (match bonus). +# +sub genPolicyMA($) { + my $local = shift; + return "" if ($local || int(rand(2)) == 0); + return "MA=".Math::Random::random_uniform(1, 1, 40).";"; +} + +## +# Generate a setting for MMP (mismatch penalty). +# +sub genPolicyMMP() { + return "" if int(rand(2)) == 0; + my $op = substr("CQR", int(rand(3)), 1); + if($op eq "C") { + $op .= Math::Random::random_uniform(1, 1, 40); + } + return "MMP=$op;"; +} + +## +# Generate a setting for NP (penalty for a mismatch involving an N). +# +sub genPolicyNP() { + return "" if int(rand(2)) == 0; + my $op = substr("CQR", int(rand(3)), 1); + if($op eq "C") { + $op .= int(Math::Random::random_exponential(1, 3))+1; + } + return "NP=$op;"; +} + +## +# Generate a setting for RDG (read gap open and extend penalties). +# +sub genPolicyRDG() { + return undef if int(rand(2)) == 0; + my $op = Math::Random::random_uniform(1, 1, 50); + if(int(rand(2)) == 0) { + $op .= ","; + $op .= Math::Random::random_uniform(1, 1, 20); + } + return "$op"; +} + +## +# Generate a setting for RFG (ref gap open and extend penalties). +# +sub genPolicyRFG() { + return undef if int(rand(2)) == 0; + my $op = Math::Random::random_uniform(1, 1, 50); + if(int(rand(2)) == 0) { + $op .= ","; + $op .= Math::Random::random_uniform(1, 1, 20); + } + return "$op"; +} + +## +# Generate a setting for MIN (function determining minimum acceptable score). +# +sub genPolicyMIN($) { + my $local = shift; + return undef if ($local || int(rand(2)) == 0); + my $xx = Math::Random::random_uniform(1, 1, 10); + my $yy = Math::Random::random_uniform(1, 1, 10); + if(!$local) { + $xx = -$xx if int(rand(2)) == 0; + $yy = -$yy; + } + return "L,$xx,$yy"; +} + +## +# Generate a setting for NCEIL (function determining maximum number of Ns +# allowed). +# +sub genPolicyNCEIL() { + return undef if int(rand(2)) == 0; + my $xx = Math::Random::random_uniform(1, 0, 1.5); + my $yy = Math::Random::random_uniform(1, 0, 1.5); + return "$xx,$yy"; +} + +## +# Generate a setting for SEED (# mismatches, length, interval). +# +sub genPolicySEED() { + return undef if int(rand(2)) == 0; + # Pick a number of mismatches + my $sd = substr("012", int(rand(2)), 1); + if(rand() < 0.9) { + # Length + $sd .= ",".int(Math::Random::random_uniform(1, 12, 32)); + } + return $sd; +} + +## +# Generate a setting for -D (# DP fails in a row). +# +sub genPolicyFailStreak() { + return undef if int(rand(2)) == 0; + return int(Math::Random::random_uniform(1, 2, 50)); +} + +## +# Generate a setting for -R (# seeding rounds). +# +sub genPolicySeedRounds() { + return undef if int(rand(2)) == 0; + return int(Math::Random::random_uniform(1, 1, 5)); +} + +## +# Generate a setting for IVAL. Interval between seeds is a function of the +# read length OR sqaure root of read length OR cube root of read length. +# +sub genPolicyIVAL() { + return "" if int(rand(2)) == 0; + # Pick a number of mismatches + my $iv = substr("LSC", int(rand(3)), 1); + if($iv eq "L") { + if(rand() < 0.9) { + # Multiplier + $iv .= ",".Math::Random::random_uniform(1, 0.0, 0.5); + } + if(rand() < 0.3) { + # Offset + $iv .= ",".Math::Random::random_uniform(1, 0.0, 4.0); + } + } elsif($iv eq "S") { + if(rand() < 0.9) { + # Multiplier + $iv .= ",".Math::Random::random_uniform(1, 0.0, 3.0); + } + if(rand() < 0.3) { + # Offset + $iv .= ",".Math::Random::random_uniform(1, 0.0, 7.0); + } + } elsif($iv eq "C") { + if(rand() < 0.9) { + # Multiplier + $iv .= ",".Math::Random::random_uniform(1, 0.0, 5.0); + } + if(rand() < 0.3) { + # Offset + $iv .= ",".Math::Random::random_uniform(1, 0.0, 14.0); + } + } + return "IVAL=$iv;"; +} + +## +# Generate a random but meaningful string of policy arguments to specify using +# the -P option. +# +sub genPolicyArg($) { + my $local = shift; + my $args = ""; + $args .= genPolicyMA($local); + $args .= genPolicyMMP(); + $args .= genPolicyNP(); + $args .= genPolicyIVAL(); + if($args ne "") { + return substr($args, 0, -1); + } else { return ""; } +} + +## +# Generate a hash of key/value arguments to pass to bowtie2. +# +sub genAlignArgs { + my ($self, $input, $color, $conf) = @_; + my %args = (); + my $local = int(rand(2)) == 0; + $args{"-u"} = $conf->{maxreads} if defined($conf->{maxreads}); + $args{"--mm"} = "" if int(rand(2)) == 0; + #$args{"--overhang"} = "" if int(rand(2)) == 0; + $args{"--trim3"} = int(rand(10)) if int(rand(2)) == 0; + $args{"--trim5"} = int(rand(10)) if int(rand(2)) == 0; + $args{"--nofw"} = "" if int(rand(4)) == 0; + $args{"--norc"} = "" if int(rand(4)) == 0; + $args{"--col-keepends"} = "" if ($color && int(rand(3)) == 0); + $args{"--gbar"} = int(Math::Random::random_exponential(1, 3))+1 if int(rand(4)) == 0; + $args{"--local"} = "" if $local; + my $rep = int(rand(5)); + if($rep == 0) { + $args{"-a"} = ""; + } elsif($rep == 1) { + $args{"-k"} = int(Math::Random::random_exponential(1, 3))+2; + } elsif($rep == 2) { + $args{"-M"} = int(Math::Random::random_exponential(1, 3))+2; + } + $args{"--rdg"} = genPolicyRDG(); + $args{"--rfg"} = genPolicyRFG(); + $args{"--score-min"} = genPolicyMIN($local); + $args{"--n-ceil"} = genPolicyNCEIL(); + $args{"-N"} = genPolicySEED(); + $args{"-D"} = genPolicyFailStreak(); + $args{"-R"} = genPolicySeedRounds(); + $args{"--policy"} = ("\"".genPolicyArg($local)."\"") if rand() < 0.9; + $args{"--cp-min"} = int(Math::Random::random_exponential(1, 3)) + 2; + $args{"--cp-ival"} = int(Math::Random::random_exponential(1, 1)) + 1; + return \%args; +} + +## +# Align the given input set against the given index using the given +# bowtie2 binary and arguments. Sanity-check the SAM output. +# +sub align { + my ($self, $fa, $idx, $input, $conf, $args) = @_; + my $argstr = ""; + for (keys %$args) { + if(defined($args->{$_})) { + $argstr .= " $_"; + if($args->{$_} ne "") { + $argstr .= " ".$args->{$_}; + } + } + } + $argstr .= " -C" if $input->{color}; + $argstr .= " ".$input->{format_arg}; + $idx .= ".c" if $input->{color}; + my $inputfn; + if($input->{paired}) { + $inputfn = "-1 $input->{file1} -2 $input->{file2}"; + } else { + $inputfn = $input->{file1}; + } + # Create object that will help us sanity-check alignments + my $ac = AlignmentCheck->new( + "Sim.pm alignment checker", # name + [ $fa ], # fasta + "sam", # SAM-formatted alignments + 0, # no bis-C + 0 # no bis-CpG + ); + $ac->nrefs() > 0 || mydie("No references"); + # Run normal (non-debug) Bowtie + defined($conf->{tempdir}) || mydie("No tmp dir"); + my $als = "$conf->{tempdir}/Sim.pm.$conf->{randstr}.als"; + my $als_debug = "$conf->{tempdir}/Sim.pm.$conf->{randstr}.debug.als"; + my $als_px = "$conf->{tempdir}/Sim.pm.$conf->{randstr}.px.als"; + my $als_px_reord = "$conf->{tempdir}/Sim.pm.$conf->{randstr}.px.reord.als"; + my $cmd = "$conf->{bowtie2_debug} $argstr $idx $inputfn"; + print "$cmd\n"; + open(ALSDEB, ">$als_debug") || mydie("Could not open '$als_debug' for writing"); + open(ALSDEBCMD, "$cmd |") || mydie("Could not open pipe '$cmd |'"); + my $ival = 50; + my $nals = 0; + my @lines = (); + while() { + # Remove @PG line because CL: tag can legitimately differ + print ALSDEB $_ unless /^\@PG/; + push @lines, $_; + $nals++; + print STDERR " Read $nals alignments...\n" if ($nals % $ival) == 0; + } + close(ALSDEBCMD); + $ac->checkAlignments(\@lines, 0); + $? == 0 || mydie("bowtie2-align-debug exited with exitlevel $?:\n$cmd\n"); + close(ALSDEB); + $ac->printSummary(); + # With some probability, also run debug Bowtie and check that + # results are identical + if(int(rand(3)) == 0) { + print STDERR "ALSO checking that bowtie2 and bowtie2-align-debug match up\n"; + # Remove @PG line because CL: tag can legitimately differ + $cmd = "$conf->{bowtie2} $argstr $idx $inputfn | grep -v '^\@PG' > $als"; + print "$cmd\n"; + system($cmd); + $? == 0 || + mydie("Command '$cmd' failed with exitlevel $?"); + $cmd = "diff -uw $als $als_debug"; + print "$cmd\n"; + system($cmd); + $? == 0 || + mydie("diff found a difference between bowtie2 and bowtie2-align-debug ". + "output for same input (above)\n"); + } + # With some probability, also run debug Bowtie in -p X mode with X > 1 and + # without the --reorder argument and check that results are identical + if(int(rand(3)) == 0) { + print STDERR "ALSO checking that bowtie2 and bowtie2 -p X w/ X > 1 match up\n"; + my $p = int(rand(3))+2; + $cmd = "$conf->{bowtie2} $argstr -p $p $idx $inputfn | grep -v '^\@PG' > $als_px"; + print "$cmd\n"; + system($cmd); + $? == 0 || + mydie("Command '$cmd' failed with exitlevel $?"); + # Sort the $als_px and $als_debug files to guarantee that reads and + # alignments for a given read appear in the same order in both + $cmd = "sort -k 1,1 -n -k 2,2 -k 3,3 -k 4,4 < $als_px | grep -v '^\@PG' > $als_px.sorted"; + print "$cmd\n"; + system($cmd); + $? == 0 || + mydie("Failed to sort alignment file $als_px\n"); + # Sort the $als_px and $als_debug files to guarantee that reads and + # alignments for a given read appear in the same order in both + $cmd = "sort -k 1,1 -n -k 2,2 -k 3,3 -k 4,4 < $als_debug | grep -v '^\@PG' > $als_debug.sorted"; + print "$cmd\n"; + system($cmd); + $? == 0 || + mydie("Failed to sort alignment file $als_debug\n"); + $cmd = "diff -uw $als_debug.sorted $als_px.sorted"; + print "$cmd\n"; + system($cmd); + $? == 0 || + mydie("diff found a difference between bowtie2-align-debug and bowtie2 ". + "-p output for same input (above)\n"); + } + + # With some probability, also run debug Bowtie in -p X mode with X > 1 and + # with the --reorder argument and check that results are identical + if(int(rand(3)) == 0) { + print STDERR "ALSO checking that bowtie2 and bowtie2 -p X --reorder w/ X > 1 match up\n"; + my $p = int(rand(3))+2; + $cmd = "$conf->{bowtie2} $argstr -p $p $idx --reorder $inputfn | grep -v '^\@PG' > $als_px_reord"; + print "$cmd\n"; + system($cmd); + $? == 0 || mydie("Command '$cmd' failed with exitlevel $?"); + $cmd = "diff -uw $als_debug $als_px_reord"; + print "$cmd\n"; + system($cmd); + $? == 0 || + mydie("diff found a difference between bowtie2-align-debug and bowtie2 ". + "-p --reorder output for same input (above)\n"); + } +} + +## +# Generate a new test case +# +# Possible key/value pairs in $conf hash: +# +# 1. bowtie2_build: path to bowtie2-build binary +# 2. bowtie2: path to bowtie2 binary +# 3. bowtie2_build_debug: path to bowtie2-build-debug binary +# 4. bowtie2_debug: path to bowtie2-debug binary +# 5. tempdir: temporary directory for reference/reads/index +# 6. no_paired: defined & non-0 -> don't generate paired-end datasets +# 7. no_color: defined & non-0 -> don't generate colorspace datasets +# 8. single_thread: defined & non-0 -> don't use -p X where X > 1 +# +sub nextCase { + my ($self, $conf) = @_; + + $conf->{bowtie2_build} = "bowtie2-build" unless defined($conf->{bowtie2_build}); + $conf->{bowtie2} = "bowtie2-align" unless defined($conf->{bowtie2}); + $conf->{bowtie2_build_debug} = "bowtie2-build-debug" unless defined($conf->{bowtie2_build_debug}); + $conf->{bowtie2_debug} = "bowtie2-align-debug" unless defined($conf->{bowtie2_debug}); + $conf->{tempdir} = "/tmp" unless defined($conf->{tempdir}); + srand(time ^ $$); + $conf->{randstr} = randStr(8); + + print "*** TEST CASE ***\n"; + + # Generate the references + my $refdnagen = $self->genDNAgen(); + # Generate references and write them to a temporary fasta file + my $tmpfn = "$conf->{tempdir}/Sim.pm.$conf->{randstr}.fa"; + my %refs = (); + $self->genRef(\%refs, $refdnagen, $conf, $tmpfn); + # Run bowtie2-build + my $tmpidxfn = "$conf->{tempdir}/Sim.pm.$conf->{randstr}"; + my $buildArgs = $self->genBuildArgs(); + $self->build($tmpfn, $tmpidxfn, $conf, $buildArgs); + my $numruns = 10; + $numruns *= 10 if $conf->{small}; # Lots of short runs + # For each batch of reads / bowtie options + for(1..$numruns) { + print "*** Run $_ of $numruns\n"; + # Generate mutated version of the reference as our subject genome + my $subj = $self->mutate(\%refs); + # Generate all the input, including reads, pairedness, + # fragment information, whether it's colorspace, etc + my $input = $self->genInput($subj, $conf); + # Mutate the input + my $mutinput = $self->mutateSeq($input); + # Select Bowtie arguments + my $args = $self->genAlignArgs($mutinput, $input->{color}, $conf); + $self->align($tmpfn, $tmpidxfn, $mutinput, $conf, $args); + # Sanity check output. Possible sanity checks are: + # 1. Check alignments & edits against reference + # 2. Compare bowtie2 and bowtie2-debug + # 3. Compare -p X>1 and -p 1 + } +} + +if($0 =~ /Sim\.pm$/) { + print "Running unit tests\n"; + # Run unit tests +} + +1; diff --git a/scripts/sim/Test.pm b/scripts/sim/Test.pm new file mode 100644 index 0000000..82b3793 --- /dev/null +++ b/scripts/sim/Test.pm @@ -0,0 +1,47 @@ +#!/usr/bin/perl -w + +# +# Copyright 2011, Ben Langmead +# +# This file is part of Bowtie 2. +# +# Bowtie 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Bowtie 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Bowtie 2. If not, see . +# + +package Test; +use strict; +use Carp; + +sub test($$) { + my ($name, $f) = @_; + print "Test \"$name\"..."; + $f->(); + print "PASSED\n"; +} + +sub shouldSucceed($$) { + my ($name, $f) = @_; + print "Test \"$name\"..."; + $f->() || die ""; + print "PASSED\n"; +} + +sub shouldFail($$) { + my ($name, $f) = @_; + print "Test \"$name\"..."; + $f->() && die ""; + print "PASSED\n"; +} + +1; diff --git a/scripts/sim/contrib/ForkManager.pm b/scripts/sim/contrib/ForkManager.pm new file mode 100644 index 0000000..331c59c --- /dev/null +++ b/scripts/sim/contrib/ForkManager.pm @@ -0,0 +1,412 @@ +=head1 NAME + +Parallel::ForkManager - A simple parallel processing fork manager + +=head1 SYNOPSIS + + use Parallel::ForkManager; + + $pm = new Parallel::ForkManager($MAX_PROCESSES); + + foreach $data (@all_data) { + # Forks and returns the pid for the child: + my $pid = $pm->start and next; + + ... do some work with $data in the child process ... + + $pm->finish; # Terminates the child process + } + +=head1 DESCRIPTION + +This module is intended for use in operations that can be done in parallel +where the number of processes to be forked off should be limited. Typical +use is a downloader which will be retrieving hundreds/thousands of files. + +The code for a downloader would look something like this: + + use LWP::Simple; + use Parallel::ForkManager; + + ... + + @links=( + ["http://www.foo.bar/rulez.data","rulez_data.txt"], + ["http://new.host/more_data.doc","more_data.doc"], + ... + ); + + ... + + # Max 30 processes for parallel download + my $pm = new Parallel::ForkManager(30); + + foreach my $linkarray (@links) { + $pm->start and next; # do the fork + + my ($link,$fn) = @$linkarray; + warn "Cannot get $fn from $link" + if getstore($link,$fn) != RC_OK; + + $pm->finish; # do the exit in the child process + } + $pm->wait_all_children; + +First you need to instantiate the ForkManager with the "new" constructor. +You must specify the maximum number of processes to be created. If you +specify 0, then NO fork will be done; this is good for debugging purposes. + +Next, use $pm->start to do the fork. $pm returns 0 for the child process, +and child pid for the parent process (see also L). +The "and next" skips the internal loop in the parent process. NOTE: +$pm->start dies if the fork fails. + +$pm->finish terminates the child process (assuming a fork was done in the +"start"). + +NOTE: You cannot use $pm->start if you are already in the child process. +If you want to manage another set of subprocesses in the child process, +you must instantiate another Parallel::ForkManager object! + +=head1 METHODS + +=over 5 + +=item new $processes + +Instantiate a new Parallel::ForkManager object. You must specify the maximum +number of children to fork off. If you specify 0 (zero), then no children +will be forked. This is intended for debugging purposes. + +=item start [ $process_identifier ] + +This method does the fork. It returns the pid of the child process for +the parent, and 0 for the child process. If the $processes parameter +for the constructor is 0 then, assuming you're in the child process, +$pm->start simply returns 0. + +An optional $process_identifier can be provided to this method... It is used by +the "run_on_finish" callback (see CALLBACKS) for identifying the finished +process. + +=item finish [ $exit_code ] + +Closes the child process by exiting and accepts an optional exit code +(default exit code is 0) which can be retrieved in the parent via callback. +If you use the program in debug mode ($processes == 0), this method doesn't +do anything. + +=item set_max_procs $processes + +Allows you to set a new maximum number of children to maintain. Returns +the previous setting. + +=item wait_all_children + +You can call this method to wait for all the processes which have been +forked. This is a blocking wait. + +=back + +=head1 CALLBACKS + +You can define callbacks in the code, which are called on events like starting +a process or upon finish. + +The callbacks can be defined with the following methods: + +=over 4 + +=item run_on_finish $code [, $pid ] + +You can define a subroutine which is called when a child is terminated. It is +called in the parent process. + +The paremeters of the $code are the following: + + - pid of the process, which is terminated + - exit code of the program + - identification of the process (if provided in the "start" method) + - exit signal (0-127: signal name) + - core dump (1 if there was core dump at exit) + +=item run_on_start $code + +You can define a subroutine which is called when a child is started. It called +after the successful startup of a child in the parent process. + +The parameters of the $code are the following: + + - pid of the process which has been started + - identification of the process (if provided in the "start" method) + +=item run_on_wait $code, [$period] + +You can define a subroutine which is called when the child process needs to wait +for the startup. If $period is not defined, then one call is done per +child. If $period is defined, then $code is called periodically and the +module waits for $period seconds betwen the two calls. Note, $period can be +fractional number also. The exact "$period seconds" is not guarranteed, +signals can shorten and the process scheduler can make it longer (on busy +systems). + +The $code called in the "start" and the "wait_all_children" method also. + +No parameters are passed to the $code on the call. + +=back + +=head1 EXAMPLE + +=head2 Parallel get + +This small example can be used to get URLs in parallel. + + use Parallel::ForkManager; + use LWP::Simple; + my $pm=new Parallel::ForkManager(10); + for my $link (@ARGV) { + $pm->start and next; + my ($fn)= $link =~ /^.*\/(.*?)$/; + if (!$fn) { + warn "Cannot determine filename from $fn\n"; + } else { + $0.=" ".$fn; + print "Getting $fn from $link\n"; + my $rc=getstore($link,$fn); + print "$link downloaded. response code: $rc\n"; + }; + $pm->finish; + }; + +=head2 Callbacks + +Example of a program using callbacks to get child exit codes: + + use strict; + use Parallel::ForkManager; + + my $max_procs = 5; + my @names = qw( Fred Jim Lily Steve Jessica Bob Dave Christine Rico Sara ); + # hash to resolve PID's back to child specific information + + my $pm = new Parallel::ForkManager($max_procs); + + # Setup a callback for when a child finishes up so we can + # get it's exit code + $pm->run_on_finish( + sub { my ($pid, $exit_code, $ident) = @_; + print "** $ident just got out of the pool ". + "with PID $pid and exit code: $exit_code\n"; + } + ); + + $pm->run_on_start( + sub { my ($pid,$ident)=@_; + print "** $ident started, pid: $pid\n"; + } + ); + + $pm->run_on_wait( + sub { + print "** Have to wait for one children ...\n" + }, + 0.5 + ); + + foreach my $child ( 0 .. $#names ) { + my $pid = $pm->start($names[$child]) and next; + + # This code is the child process + print "This is $names[$child], Child number $child\n"; + sleep ( 2 * $child ); + print "$names[$child], Child $child is about to get out...\n"; + sleep 1; + $pm->finish($child); # pass an exit code to finish + } + + print "Waiting for Children...\n"; + $pm->wait_all_children; + print "Everybody is out of the pool!\n"; + +=head1 BUGS AND LIMITATIONS + +Do not use Parallel::ForkManager in an environment, where other child +processes can affect the run of the main program, so using this module +is not recommended in an environment where fork() / wait() is already used. + +If you want to use more than one copies of the Parallel::ForkManager, then +you have to make sure that all children processes are terminated, before you +use the second object in the main program. + +You are free to use a new copy of Parallel::ForkManager in the child +processes, although I don't think it makes sense. + +=head1 COPYRIGHT + +Copyright (c) 2000 Szabó, Balázs (dLux) + +All right reserved. This program is free software; you can redistribute it +and/or modify it under the same terms as Perl itself. + +=head1 AUTHOR + + dLux (Szabó, Balázs) + +=head1 CREDITS + + Noah Robin (documentation tweaks) + Chuck Hirstius (callback exit status, example) + Grant Hopwood (win32 port) + Mark Southern (bugfix) + +=cut + +package Parallel::ForkManager; +use POSIX ":sys_wait_h"; +use strict; +use vars qw($VERSION); +$VERSION='0.7.5'; + +sub new { my ($c,$processes)=@_; + my $h={ + max_proc => $processes, + processes => {}, + in_child => 0, + }; + return bless($h,ref($c)||$c); +}; + +sub start { my ($s,$identification)=@_; + die "Cannot start another process while you are in the child process" + if $s->{in_child}; + while ($s->{max_proc} && ( keys %{ $s->{processes} } ) >= $s->{max_proc}) { + $s->on_wait; + $s->wait_one_child(defined $s->{on_wait_period} ? &WNOHANG : undef); + }; + $s->wait_children; + if ($s->{max_proc}) { + my $pid=fork(); + die "Cannot fork: $!" if !defined $pid; + if ($pid) { + $s->{processes}->{$pid}=$identification; + $s->on_start($pid,$identification); + } else { + $s->{in_child}=1 if !$pid; + } + return $pid; + } else { + $s->{processes}->{$$}=$identification; + $s->on_start($$,$identification); + return 0; # Simulating the child which returns 0 + } +} + +sub finish { my ($s, $x)=@_; + if ( $s->{in_child} ) { + exit ($x || 0); + } + if ($s->{max_proc} == 0) { # max_proc == 0 + $s->on_finish($$, $x ,$s->{processes}->{$$}, 0, 0); + delete $s->{processes}->{$$}; + } + return 0; +} + +sub wait_children { my ($s)=@_; + return if !keys %{$s->{processes}}; + my $kid; + do { + $kid = $s->wait_one_child(&WNOHANG); + } while $kid > 0 || $kid < -1; # AS 5.6/Win32 returns negative PIDs +}; + +sub wait_one_child { my ($s,$par)=@_; + my $kid; + while (1) { + $kid = $s->_waitpid(-1,$par||=0); + last if $kid == 0 || $kid == -1; # AS 5.6/Win32 returns negative PIDs + redo if !exists $s->{processes}->{$kid}; + my $id = delete $s->{processes}->{$kid}; + $s->on_finish( $kid, $? >> 8 , $id, $? & 0x7f, $? & 0x80 ? 1 : 0); + last; + } + $kid; +}; + +sub wait_all_children { my ($s)=@_; + while (keys %{ $s->{processes} }) { + $s->on_wait; + $s->wait_one_child(defined $s->{on_wait_period} ? &WNOHANG : undef); + }; +} + +sub run_on_finish { my ($s,$code,$pid)=@_; + $s->{on_finish}->{$pid || 0}=$code; +} + +sub on_finish { my ($s,$pid,@par)=@_; + my $code=$s->{on_finish}->{$pid} || $s->{on_finish}->{0} or return 0; + $code->($pid,@par); +}; + +sub run_on_wait { my ($s,$code, $period)=@_; + $s->{on_wait}=$code; + $s->{on_wait_period} = $period; +} + +sub on_wait { my ($s)=@_; + if(ref($s->{on_wait}) eq 'CODE') { + $s->{on_wait}->(); + if (defined $s->{on_wait_period}) { + local $SIG{CHLD} = sub { } if ! defined $SIG{CHLD}; + select undef, undef, undef, $s->{on_wait_period} + }; + }; +}; + +sub run_on_start { my ($s,$code)=@_; + $s->{on_start}=$code; +} + +sub on_start { my ($s,@par)=@_; + $s->{on_start}->(@par) if ref($s->{on_start}) eq 'CODE'; +}; + +sub set_max_procs { my ($s, $mp)=@_; + $s->{max_proc} = $mp; +} + +# OS dependant code follows... + +sub _waitpid { # Call waitpid() in the standard Unix fashion. + return waitpid($_[1],$_[2]); +} + +# On ActiveState Perl 5.6/Win32 build 625, waitpid(-1, &WNOHANG) always +# blocks unless an actual PID other than -1 is given. +sub _NT_waitpid { my ($s, $pid, $par) = @_; + if ($par == &WNOHANG) { # Need to nonblock on each of our PIDs in the pool. + my @pids = keys %{ $s->{processes} }; + # Simulate -1 (no processes awaiting cleanup.) + return -1 unless scalar(@pids); + # Check each PID in the pool. + my $kid; + foreach $pid (@pids) { + $kid = waitpid($pid, $par); + return $kid if $kid != 0; # AS 5.6/Win32 returns negative PIDs. + } + return $kid; + } else { # Normal waitpid() call. + return waitpid($pid, $par); + } +} + +{ + local $^W = 0; + if ($^O eq 'NT' or $^O eq 'MSWin32') { + *_waitpid = \&_NT_waitpid; + } +} + +1; diff --git a/scripts/sim/run.pl b/scripts/sim/run.pl new file mode 100644 index 0000000..02bf282 --- /dev/null +++ b/scripts/sim/run.pl @@ -0,0 +1,135 @@ +#!/usr/bin/perl -w + +# +# Copyright 2011, Ben Langmead +# +# This file is part of Bowtie 2. +# +# Bowtie 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Bowtie 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Bowtie 2. If not, see . +# + +use strict; +use warnings; +use Getopt::Long; +use FindBin qw($Bin); +use lib "$Bin"; +use lib "$Bin/contrib"; +use Sim; +use ForkManager; + +# Simulator configuration +my %conf = ( + bowtie2_build => "bowtie2-build", + bowtie2 => "bowtie2", + bowtie2_build_debug => "bowtie2-build-debug", + bowtie2_debug => "bowtie2-debug", + tempdir => "/tmp", + no_color => 1, + small => 1 +); + +# Number of parallel processes to use +my $cpus = 1; + +my $usage = qq! +run.pl [options*] + +Options: + + --bowtie2 Path to bowtie2 release binary + --bowtie2-debug Path to bowtie2 debug binary + --bowtie2-build Path to bowtie2-build release binary + --bowtie2-build-debug Path to bowtie2-build debug binary + --tempdir Put temporary files here + --cases Each thread runs around cases (def: 5) + --cpus / -p Run test cases in threads at once + --maxreads Handle at most reads per case + --numrefs Generate refs per case + --die-with-child Kill parent as soon as 1 child dies + --no-die-with-child Don\'t kill parent as soon as 1 child dies + --small Make small test cases + --help Print this usage message + +!; + +my $help = 0; +my $ncases = 5; +my $dieWithChild = 1; + +GetOptions( + "bowtie2=s" => \$conf{bowtie2}, + "bowtie2-debug=s" => \$conf{bowtie2_debug}, + "bowtie2-build=s" => \$conf{bowtie2_build}, + "bowtie2-build-debug=s" => \$conf{bowtie2_build_debug}, + "tempdir|tmpdir=s" => \$conf{tempdir}, + "cases-per-thread=i" => \$ncases, + "small" => \$conf{small}, + "large" => sub { $conf{small} = 0 }, + "no-paired" => \$conf{no_paired}, + "color" => sub { $conf{no_color} = 0 }, + "no-color" => \$conf{no_color}, + "help" => \$help, + "die-with-child" => \$dieWithChild, + "no-die-with-child" => sub { $dieWithChild = 0 }, + "p|cpus=i" => \$cpus, + "u|qupto|maxreads=i" => \$conf{maxreads}, + "numrefs|num-refs=i" => \$conf{numrefs}, +) || die "Bad options;"; + +if($help) { + print $usage; + exit 0; +} + +my $sim = Sim->new(); +my $pm = new Parallel::ForkManager($cpus); + +# Callback for when a child finishes so we can get its exit code +my @childFailed = (); +my @childFailedPid = (); + +$pm->run_on_finish(sub { + my ($pid, $exit_code, $ident) = @_; + if($exit_code != 0) { + push @childFailed, $exit_code; + push @childFailedPid, $pid; + !$dieWithChild || die "Dying with child with PID $pid"; + } +}); + +my $totcases = $ncases * $cpus; +for(1..$totcases) { + my $childPid = $pm->start; + if($childPid != 0) { + next; # spawn the next child + } + $sim->nextCase(\%conf); + $pm->finish; +} +$pm->wait_all_children; +for(@childFailedPid) { + print STDERR "Error message from child with pid $_:\n"; + my $fn = ".run.pl.child.$_"; + if(open(ER, $fn)) { + print STDERR "---------\n"; + while() { + print STDERR $_; + } + print STDERR "---------\n"; + close(ER); + } else { + print STDERR "(could not open $fn)\n"; + } +} +print STDERR "PASSED\n"; diff --git a/scripts/sim/run.sh b/scripts/sim/run.sh new file mode 100644 index 0000000..6e6c2ae --- /dev/null +++ b/scripts/sim/run.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +# +# Copyright 2011, Ben Langmead +# +# This file is part of Bowtie 2. +# +# Bowtie 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Bowtie 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Bowtie 2. If not, see . +# + +CPUS=$1 +shift +make -j $CPUS bowtie2-align bowtie2-align-debug bowtie2-build bowtie2-build-debug && \ +perl scripts/sim/run.pl \ + --bowtie2=./bowtie2-align \ + --bowtie2-debug=./bowtie2-align-debug \ + --bowtie2-build=./bowtie2-build \ + --bowtie2-build-debug=./bowtie2-build-debug \ + --cpus=$CPUS \ + $* diff --git a/scripts/sim/unit.sh b/scripts/sim/unit.sh new file mode 100644 index 0000000..f4fdcf8 --- /dev/null +++ b/scripts/sim/unit.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +# +# Copyright 2011, Ben Langmead +# +# This file is part of Bowtie 2. +# +# Bowtie 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Bowtie 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Bowtie 2. If not, see . +# + +d=`dirname $0` +for i in `ls $d/*.pm` ; do echo $i ; perl $i --test ; done diff --git a/scripts/test/DNA.pm b/scripts/test/DNA.pm new file mode 100644 index 0000000..06a6ec7 --- /dev/null +++ b/scripts/test/DNA.pm @@ -0,0 +1,129 @@ +#!/usr/bin/perl -w + +# +# Copyright 2011, Ben Langmead +# +# This file is part of Bowtie 2. +# +# Bowtie 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Bowtie 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Bowtie 2. If not, see . +# + +package DNA; +use strict; +use warnings; +use Carp; + +my %revcompMap = ( + "A" => "T", "a" => "t", + "T" => "A", "t" => "a", + "C" => "G", "c" => "g", + "G" => "C", "g" => "c", + "R" => "Y", "r" => "y", + "Y" => "R", "y" => "r", + "M" => "K", "m" => "k", + "K" => "M", "k" => "m", + "S" => "S", "s" => "s", + "W" => "W", "w" => "w", + "B" => "V", "b" => "v", + "V" => "B", "v" => "b", + "H" => "D", "h" => "d", + "D" => "H", "d" => "h", + "N" => "N", "n" => "n" +); + +my %compat = ( + "A" => "A", + "T" => "T", + "C" => "C", + "G" => "G", + "R" => "AG", + "Y" => "CT", + "M" => "AC", + "K" => "GT", + "S" => "CG", + "W" => "AT", + "B" => "CGT", + "V" => "ACG", + "H" => "ACT", + "D" => "AGT", + "N" => "N" +); + +my %incompat = ( + "A" => "CGT", + "T" => "ACG", + "C" => "AGT", + "G" => "ACT", + "R" => "CT", + "Y" => "AG", + "M" => "GT", + "K" => "AC", + "S" => "AT", + "W" => "CG", + "B" => "A", + "V" => "T", + "H" => "G", + "D" => "C", + "N" => "N" +); + +my %unambigSet = ( + "A" => 1, "a" => 1, + "C" => 1, "c" => 1, + "G" => 1, "g" => 1, + "T" => 1, "t" => 1 +); + +## +# Return the complement, incl. if it's IUPAC. +# +sub comp($) { + my $ret = $revcompMap{$_[0]} || die "Can't reverse-complement '$_[0]'"; + return $ret; +} + +## +# Return the complement, incl. if it's IUPAC. +# +sub revcomp { + my ($ret, $color) = @_; + $ret = reverse $ret; + unless($color) { + for(my $i = 0; $i < length($ret); $i++) { + substr($ret, $i, 1) = comp(substr($ret, $i, 1)); + } + } + return $ret; +} + +## +# Return true iff it's unambiguous. +# +sub unambig($) { + return $unambigSet{$_[0]}; +} + +## +# Manipulate DNA in an integer-indexed fashion. +# +sub plus($$) { + my ($c, $amt) = @_; + my %ctoi = ("A" => 0, "C" => 1, "G" => 2, "T" => 3); + my %itoc = (0 => "A", 1 => "C", 2 => "G", 3 => "T"); + $c = uc $c; + defined($ctoi{$c}) || die; + return $itoc{($ctoi{$c}+$amt) % 4}; +} + +1; diff --git a/scripts/test/Read.pm b/scripts/test/Read.pm new file mode 100644 index 0000000..819a077 --- /dev/null +++ b/scripts/test/Read.pm @@ -0,0 +1,178 @@ +#!/usr/bin/perl -w + +# +# Copyright 2011, Ben Langmead +# +# This file is part of Bowtie 2. +# +# Bowtie 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Bowtie 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Bowtie 2. If not, see . +# + +package Read; +use strict; +use Carp; +use FindBin qw($Bin); +use lib $Bin; +use DNA; +use Test; + +sub new { + my ($class, $name, $seq, $qual, $color, $fw, $orig) = @_; + $name = "noname" unless defined($name); + return bless { + _name => $name, + _seq => $seq || croak("No sequence"), + _qual => $qual || croak("No qualities"), + _color => $color || 0, + _fw => $fw || croak("No orientation"), + _orig => $orig || croak("No original read string") + }, $class; +} +sub name { return $_[0]->{_name} } +sub seq { return $_[0]->{_seq} } +sub qual { return $_[0]->{_qual} } +sub color { return $_[0]->{_color} } +sub fw { return $_[0]->{_fw} } +sub orig { return $_[0]->{_orig} } +sub len { return length($_[0]->seq()) } + +## +# Obtain a character from the read. +# +sub at { + my ($self, $off, $ori) = @_; + my ($c, $q) = ""; + if($ori eq "RtL") { + $c = uc substr($self->seq(), -$off-1, 1); + $q = substr($self->qual(), -$off-1, 1); + } else { + $c = uc substr($self->seq(), $off, 1); + $q = substr($self->qual(), $off, 1); + } + length($c) == 1 || die; + return ($c, $q); +} + +## +# Load a set of FASTQ reads into the given reads array. +# +sub fromFastq { + my ($fh, $color, $reads) = @_; + $reads = [] unless defined($reads); + while(<$fh>) { + my $l1 = $_; + my $l2 = <$fh>; defined($l2) || croak("Name line followed by EOF"); + my $l3 = <$fh>; defined($l3) || croak("Sequence line followed by EOF"); + my $l4 = <$fh>; defined($l4) || croak("Name2 line followed by EOF"); + my $orig = "$l1$l2$l3$l4"; + chomp($l1); chomp($l2); chomp($l3); chomp($l4); + push @{$reads}, Read->new(substr($l1, 1), $l2, $l4, $color, "FW", $orig); + } + return $reads; +} + +## +# Load a set of FASTQ reads into the given reads array. +# +sub fromFastqs { + my ($fqs, $color, $reads) = @_; + $reads = [] unless defined($reads); + for my $f (@$fqs) { + my $fqfh; + open($fqfh, $f =~ /\.gz$/ ? "gzip -dc $f |" : "$f") || croak("Could not open $f for reading"); + fromFastq($fqfh, $color, $reads); + close($fqfh); + } + return $reads; +} + +## +# Load a set of FASTA reads into the given reads array. +# +sub fromFasta { + my ($fh, $color, $reads) = @_; + $reads = [] unless defined($reads); + while(<$fh>) { + my $l1 = $_; + my $l2 = <$fh>; defined($l2) || croak("Name line followed by EOF"); + my $orig = "$l1$l2"; + chomp($l1); chomp($l2); + my $qual = "I" x length($l2); + push @{$reads}, Read->new(substr($l1, 1), $l2, $qual, $color, "FW", $orig); + } + return $reads; +} + +## +# Load a set of FASTA reads into the given reads array. +# +sub fromFastas { + my ($fas, $color, $reads) = @_; + $reads = [] unless defined($reads); + for my $f (@$fas) { + my $fafh; + open($fafh, $f =~ /\.gz$/ ? "gzip -dc $f |" : "$f") || croak("Could not open $f for reading"); + fromFasta($fafh, $color, $reads); + close($fafh); + } + return $reads; +} + +## +# Load a set of FASTA reads into the given reads array. +# +sub fromStrings { + my ($strs, $color, $reads) = @_; + $reads = [] unless defined($reads); + my $idx = 0; + for my $str (@$strs) { + my $qual = "I" x length($str); + push @{$reads}, new Read($idx, $str, $qual, $color, "FW", $str); + $idx++; + } + return $reads; +} + +sub test1 { + my $r = new Read("blah", "TTACGAACCACAACGTATCG", "I"x20, 0, "FW", "?"); + my ($c, $q) = $r->at(0, "LtR"); + ($c eq "T" && $q eq "I") || croak("Expected (T, I), got ($c, $q)\n"); + ($c, $q) = $r->at(0, "RtL"); + ($c eq "G" && $q eq "I") || croak("Expected (G, I), got ($c, $q)\n"); + ($c, $q) = $r->at(1, "LtR"); + ($c eq "T" && $q eq "I") || croak("Expected (T, I), got ($c, $q)\n"); + ($c, $q) = $r->at(1, "RtL"); + ($c eq "C" && $q eq "I") || croak("Expected (C, I), got ($c, $q)\n"); + return 1; +} + +sub test2 { + my $rs = fromStrings(["ACGATGCTACG", "TGACGATGCTAG"], 0); + $rs->[0]->seq() eq "ACGATGCTACG" || croak($rs->[0]->seq()); + $rs->[0]->qual() eq "IIIIIIIIIII" || croak($rs->[0]->qual()); + $rs->[0]->name() eq "0" || croak($rs->[0]->name()); + $rs->[1]->seq() eq "TGACGATGCTAG" || croak($rs->[1]->seq()); + $rs->[1]->qual() eq "IIIIIIIIIIII" || croak($rs->[1]->qual()); + $rs->[1]->name() eq "1" || croak($rs->[1]->name()); + return 1; +} + +if($0 =~ /Read\.pm$/) { + print "Running unit tests\n"; + # Run unit tests + print "Test \"test1\"..."; test1(); print "PASSED\n"; + print "Test \"test2\"..."; test2(); print "PASSED\n"; +} + +1; diff --git a/scripts/test/args.pl b/scripts/test/args.pl new file mode 100644 index 0000000..15740f9 --- /dev/null +++ b/scripts/test/args.pl @@ -0,0 +1,134 @@ +#!/usr/bin/perl -w + +# +# Copyright 2011, Ben Langmead +# +# This file is part of Bowtie 2. +# +# Bowtie 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Bowtie 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Bowtie 2. If not, see . +# + +## +# args.pl +# +# Basic tests to ensure that bad combinations of arguments are rejected +# and good ones are accepted. +# + +my $bowtie2 = "./bowtie2"; +my $bowtie2_d = "./bowtie2-debug"; +if(system("$bowtie2 --version") != 0) { + print STDERR "Could not execute ./bowtie2; looking in PATH...\n"; + $bowtie2 = `which bowtie2`; + chomp($bowtie2); + if(system("$bowtie2 --version") != 0) { + die "Could not find bowtie2 in current directory or in PATH\n"; + } +} +if(system("$bowtie2_d --version") != 0) { + print STDERR "Could not execute ./bowtie2-debug; looking in PATH...\n"; + $bowtie2_d = `which bowtie2-debug`; + chomp($bowtie2_d); + if(system("$bowtie2_d --version") != 0) { + die "Could not find bowtie2-debug in current directory or in PATH\n"; + } +} + +if(! -f "e_coli_c.1.ebwt") { + print STDERR "Making colorspace e_coli index\n"; + my $bowtie2_build = "./bowtie2-build"; + if(system("$bowtie2_build --version") != 0) { + print STDERR "Could not execute ./bowtie2-build; looking in PATH...\n"; + $bowtie2_build = `which $bowtie2_build`; + chomp($bowtie2_build); + if(system("$bowtie2_build --version") != 0) { + die "Could not find bowtie2-build in current directory or in PATH\n"; + } + } + system("$bowtie2_build -C genomes/NC_008253.fna e_coli_c") && die; +} else { + print STDERR "Colorspace e_coli index already present...\n"; +} + +open TMP, ">.args.pl.1.fa" || die; +print TMP ">\nT0120012002012030303023\n"; +close(TMP); +open TMP, ">.args.pl.1.qv" || die; +print TMP ">\n10 11 12 10 10 11 12 10 10 12 10 22 33 23 13 10 12 23 24 25 26 27\n"; +close(TMP); +open TMP, ">.args.pl.2.qv" || die; +print TMP ">\n9 10 11 12 10 10 11 12 10 10 12 10 22 33 23 13 10 12 23 24 25 26 27\n"; +close(TMP); + +my @bad = ( + "-N 6", + "-N 5", + "-N 4", + "-N 3" +); + +my @badEx = ( + "e_coli -f .args.pl.1.fa -Q .args.pl.1.qv", + "e_coli_c -f .args.pl.1.fa -Q .args.pl.1.qv" +); + +my @good = ( + "-N 0", + "-N 1", + "-N 2" +); + +my @goodEx = ( + "-C e_coli_c -f .args.pl.1.fa -Q .args.pl.1.qv", + "-C e_coli_c -f .args.pl.1.fa -Q .args.pl.2.qv" +); + +sub run($) { + my $cmd = shift; + print "$cmd\n"; + return system($cmd); +} + +print "Bad:\n"; +for my $a (@bad) { + run("$bowtie2 $a e_coli reads/e_coli_1000.fq /dev/null") != 0 || + die "bowtie2 should have rejected: \"$a\"\n"; + run("$bowtie2_d $a e_coli reads/e_coli_1000.fq /dev/null") != 0 || + die "bowtie2-debug should have rejected: \"$a\"\n"; + print "PASSED: bad args \"$a\"\n"; +} +print "\nBadEx:\n"; +for my $a (@badEx) { + run("$bowtie2 $a /dev/null") != 0 || + die "bowtie2 should have rejected: \"$a\"\n"; + run("$bowtie2_d $a /dev/null") != 0 || + die "bowtie2-debug should have rejected: \"$a\"\n"; + print "PASSED: bad args \"$a\"\n"; +} +print "\nGood:\n"; +for my $a (@good) { + run("$bowtie2 $a e_coli reads/e_coli_1000.fq /dev/null") == 0 || + die "bowtie2 should have accepted: \"$a\"\n"; + run("$bowtie2_d $a e_coli reads/e_coli_1000.fq /dev/null") == 0 || + die "bowtie2-debug should have accepted: \"$a\"\n"; + print "PASSED: good args \"$a\"\n"; +} +print "\nGoodEx:\n"; +for my $a (@goodEx) { + run("$bowtie2 $a /dev/null") == 0 || + die "bowtie2 should have accepted: \"$a\"\n"; + run("$bowtie2_d $a /dev/null") == 0 || + die "bowtie2-debug should have accepted: \"$a\"\n"; + print "PASSED: good args \"$a\"\n"; +} diff --git a/scripts/test/simple_tests.pl b/scripts/test/simple_tests.pl new file mode 100644 index 0000000..2bae3de --- /dev/null +++ b/scripts/test/simple_tests.pl @@ -0,0 +1,4805 @@ +#!/usr/bin/perl -w + +# +# Copyright 2011, Ben Langmead +# +# This file is part of Bowtie 2. +# +# Bowtie 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Bowtie 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Bowtie 2. If not, see . +# + +## +# Give simple tests with known results to bowtie2. +# + +use strict; +use warnings; +use Getopt::Long; +use FindBin qw($Bin); +use lib $Bin; +use List::Util qw(max min); +use Data::Dumper; +use DNA; +use Clone qw(clone); +use Test::Deep; + +my $bowtie2 = ""; +my $bowtie2_build = ""; +my $skipColor = 1; + +GetOptions( + "bowtie2=s" => \$bowtie2, + "bowtie2-build=s" => \$bowtie2_build, + "skip-color" => \$skipColor) || die "Bad options"; + +if(! -x $bowtie2 || ! -x $bowtie2_build) { + my $bowtie2_dir = `dirname $bowtie2`; + my $bowtie2_exe = `basename $bowtie2`; + my $bowtie2_build_exe = `basename $bowtie2_build`; + chomp($bowtie2_dir); + chomp($bowtie2_exe); + chomp($bowtie2_build_exe); + system("make -C $bowtie2_dir $bowtie2_exe $bowtie2_build_exe") && die; +} + +(-x $bowtie2) || die "Cannot run '$bowtie2'"; +(-x $bowtie2_build) || die "Cannot run '$bowtie2_build'"; + +my @cases = ( + + { name => "Left-align insertion", + ref => [ "GCGATATCTACGACTGCTACGTACAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGACAGC" ], + norc => 1, + reads => [ "ACAAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGA" ], + # ref: AC-AAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGA + # read: ACAAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGA + # 0123456789012345678901234567890123456789 + cigar => [ "2M1I40M" ], + samoptflags => [ { + "MD:Z:42" => 1, + "YT:Z:UU" => 1, + "NM:i:1" => 1, + "XG:i:1" => 1, + "XO:i:1" => 1, + "AS:i:-8" => 1 } ], + report => "", + args => "" + }, + + { name => "Left-align deletion", + ref => [ "GCGATATCTACGACTGCTACGTACAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGACAGC" ], + norc => 1, + reads => [ "ACGTACAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGA" ], + # ref: ACGTACAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGA + # read: ACGTAC-AAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGA + # 012345678901234567890123456789012345678 + cigar => [ "6M1D39M" ], + samoptflags => [ { + "MD:Z:6^A39" => 1, + "YT:Z:UU" => 1, + "NM:i:1" => 1, + "XG:i:1" => 1, + "XO:i:1" => 1, + "AS:i:-8" => 1 } ], + report => "", + args => "" + }, + + { name => "Left-align insertion with mismatch at LHS", + ref => [ "GCGATATCTACGACTGCTACGCCCAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGACAGC" ], + norc => 1, + reads => [ "TATCTACGACTGCTACGCCCTAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGAC" ], + # ref: GCGATATCTACGACTGCTACGCCCAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGACAGC + # read: TATCTACGACTGCTACGCCC-TAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGAC + # 01234567890123456789-012345678901234567890123456789012345678901234 + # 0 1 2 3 4 5 6 + cigar => [ "20M1D45M" ], + samoptflags => [ { + "MD:Z:20^A0A44" => 1, + "YT:Z:UU" => 1, + "NM:i:2" => 1, + "XG:i:1" => 1, + "XO:i:1" => 1, + "XM:i:1" => 1, + "AS:i:-14" => 1 } ], + report => "", + args => "" + }, + + # This won't necessarily pass because the original location of the deletion + # might + #{ name => "Left-align deletion with mismatch at LHS", + # ref => [ "GCGATATCTACGACTGCTACGCCCAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGACAGC" ], + # norc => 1, + # reads => [ "TATCTACGACTGCTACGCCAAAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGAC" ], + # # ref: GCGATATCTACGACTGCTACGCCC-AAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGACAGC + # # read: TATCTACGACTGCTACGCCAAAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGAC + # # 01234567890123456789-012345678901234567890123456789012345678901234 + # # 0 1 0 1 2 3 4 + # cigar => [ "20M1I45M" ], + # samoptflags => [ { + # "MD:Z:19C45" => 1, + # "YT:Z:UU" => 1, + # "NM:i:2" => 1, + # "XG:i:1" => 1, + # "XO:i:1" => 1, + # "XM:i:1" => 1, + # "AS:i:-14" => 1 } ], + # report => "", + # args => "" + #}, + + { name => "Flags for when mates align non-concordantly, with many alignments for one", + # 012345678 + ref => [ "CAGCGGCTAGCTATCGATCGTCCGGCAGCTATCATTATGATNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAGGATAGATCGCTCGCCTGACCTATATCGCTCGCGATTACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGATCGAGGATAGATCGCTCGCCTGACCTATATCGCTCGCGATTACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGATCGAGGATAGATCGCTCGCCTGACCTATATCGCTCGCGATTACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGATCG" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 + # 0 1 2 3 4 5 6 7 8 9 0 1 + # 0 1 + norc => 1, + mate1s => [ "GCGGCTAGCTATCGATCGTCCGGCAGCTATCATTATGA" ], + mate2s => [ "ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA" ], + # 981 1064 1147 + # ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA + samflags_map => [{ 981 => (1 | 128), 1064 => (1 | 128), 1147 => (1 | 128), 2 => (1 | 64) }], + report => "", + args => "" + }, + + { name => "Flags for when mates align non-concordantly, with many alignments for one", + # 012345678 + ref => [ "CAGCGGCTAGCTATCGATCGTCCGGCAGCTATCATTATGATNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAGGATAGATCGCTCGCCTGACCTATATCGCTCGCGATTACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGATCGAG" ], + # 01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 + # 0 1 2 3 4 5 6 7 8 9 0 1 + # 0 1 + norc => 1, + mate1s => [ "GCGGCTAGCTATCGATCGTCCGGCAGCTATCATTATGA" ], + mate2s => [ "ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA" ], + tlen_map => [{ 2 => 1021, 981 => -1021 }], + samflags_map => [{ 981 => (1 | 128), 2 => (1 | 64) }], + report => "", + args => "" + }, + + { name => "Flags for when mates align non-concordantly, with many alignments for one", + # 012345678 + ref => [ "CAGCGGCTAGCTATCGATCGTCCGGCAGCTATCATTATGATAGGATAGATCGCTCGCCTGACCTATATCGCTCGCGATTACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGATCGAGGATAGATCGCTCGCCTGACCTATATCGCTCGCGATTACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGATCGAGGATAGATCGCTCGCCTGACCTATATCGCTCGCGATTACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGATCG" ], + # 01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 + # 0 * 1 * 2 * + # 0 1 + norc => 1, + mate1s => [ "GCGGCTAGCTATCGATCGTCCGGCAGCTATCATTATGA" ], + mate2s => [ "TCGTCGTGATGCGTCAGCTCGGATAGCCAGTACGTAGCTCGT" ], + # 981 1064 1147 + # ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA + samflags_map => [{ 79 => (1 | 2 | 16 | 128), 162 => (1 | 2 | 16 | 128), 245 => (1 | 2 | 16 | 128), 2 => (1 | 2 | 32 | 64) }], + report => "", + args => "" + }, + + # Checking MD:Z strings for alignment + { name => "MD:Z 1", + ref => [ "CACGATCGACTTGA"."C"."TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ], + reads => [ "CACGATCGACTTGG". "TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ], + hits => [ { 0 => 1 } ], + samoptflags => [ { + "AS:i:-14" => 1, # alignment score + "XM:i:1" => 1, # num mismatches + "XO:i:1" => 1, # num gap opens + "XG:i:1" => 1, # num gap extensions + "NM:i:2" => 1, # num edits + "MD:Z:13^A0C39" => 1, # mismatching positions/bases + "YT:Z:UU" => 1, # type of alignment (concordant/discordant/etc) + } ] }, + { name => "MD:Z 2", + ref => [ "CACGATCGACTTGA"."A"."TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ], + reads => [ "CACGATCGACTTGG". "TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ], + # 0123456789012 012345678901234567890123456789012345678 + hits => [ { 0 => 1 } ], + samoptflags => [ { + "AS:i:-14" => 1, # alignment score + "XM:i:1" => 1, # num mismatches + "XO:i:1" => 1, # num gap opens + "XG:i:1" => 1, # num gap extensions + "NM:i:2" => 1, # num edits + "MD:Z:13^A0A39" => 1, # mismatching positions/bases + "YT:Z:UU" => 1, # type of alignment (concordant/discordant/etc) + } ] }, + { name => "MD:Z 3", + ref => [ "CACGATCGACTTGT"."AA"."TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ], + reads => [ "CACGATCGACTTGC". "TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ], + # 0123456789012 012345678901234567890123456789012345678 + hits => [ { 0 => 1 } ], + samoptflags => [ { + "AS:i:-17" => 1, # alignment score + "XM:i:1" => 1, # num mismatches + "XO:i:1" => 1, # num gap opens + "XG:i:2" => 1, # num gap extensions + "NM:i:3" => 1, # num edits + "MD:Z:13^TA0A39" => 1, # mismatching positions/bases + "YT:Z:UU" => 1, # type of alignment (concordant/discordant/etc) + } ] }, + { name => "MD:Z 4", + ref => [ "CACGATCGACTTGN"."NN"."TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ], + reads => [ "CACGATCGACTTGC". "TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ], + # 0123456789012 012345678901234567890123456789012345678 + hits => [ { 0 => 1 } ], + samoptflags => [ { + "AS:i:-12" => 1, # alignment score + "XN:i:3" => 1, # num ambiguous ref bases + "XM:i:1" => 1, # num mismatches + "XO:i:1" => 1, # num gap opens + "XG:i:2" => 1, # num gap extensions + "NM:i:3" => 1, # num edits + "MD:Z:13^NN0N39" => 1, # mismatching positions/bases + "YT:Z:UU" => 1, # type of alignment (concordant/discordant/etc) + } ] }, + + # + # Local alignment + # + + # Local alignment for a short perfect hit where hit spans the read + { name => "Local alignment 1", + ref => [ "TTGT" ], + reads => [ "TTGT" ], + args => "--local --policy \"MIN=L,0.0,0.75\"", + hits => [ { 0 => 1 } ], + flags => [ "XM:0,XP:0,XT:UU,XC:4=" ], + cigar => [ "4M" ], + samoptflags => [ { + "AS:i:8" => 1, # alignment score + "XS:i:0" => 1, # suboptimal alignment score + "XN:i:0" => 1, # num ambiguous ref bases + "XM:i:0" => 1, # num mismatches + "XO:i:0" => 1, # num gap opens + "XG:i:0" => 1, # num gap extensions + "NM:i:0" => 1, # num edits + "MD:Z:4" => 1, # mismatching positions/bases + "YM:i:0" => 1, # read aligned repetitively in unpaired fashion + "YP:i:0" => 1, # read aligned repetitively in paired fashion + "YT:Z:UU" => 1, # type of alignment (concordant/discordant/etc) + } ] }, + + # T T G A T T G A + # T x T x + # T x T + # G x G + # T T + + # Local alignment for a short hit where hit is trimmed at one end + { name => "Local alignment 2", + ref => [ "TTGA" ], + reads => [ "TTGT" ], + args => "--local --policy \"MIN=L,0.0,0.75\\;SEED=0,3\\;IVAL=C,1,0\"", + report => "-a", + hits => [ { 0 => 1 } ], + flags => [ "XM:0,XP:0,XT:UU,XC:3=1S" ], + cigar => [ "3M1S" ], + samoptflags => [ { + "AS:i:6" => 1, # alignment score + "XS:i:0" => 1, # suboptimal alignment score + "XN:i:0" => 1, # num ambiguous ref bases + "XM:i:0" => 1, # num mismatches + "XO:i:0" => 1, # num gap opens + "XG:i:0" => 1, # num gap extensions + "NM:i:0" => 1, # num edits + "MD:Z:3" => 1, # mismatching positions/bases + "YM:i:0" => 1, # read aligned repetitively in unpaired fashion + "YP:i:0" => 1, # read aligned repetitively in paired fashion + "YT:Z:UU" => 1, # type of alignment (concordant/discordant/etc) + } ] }, + + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + # T T G T T C G T T T G T T C G T + # 0 T x + # 1 T x + # 2 G x + # 3 T x + # 4 T x + # 5 C x + # 6 G x + # 7 T x + # 8 T x + # 9 T x + # 0 G x + # 1 T x + # 2 T x + # + # Score=130 + + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + # T T G T T C G T T T G T T C G T + # 0 T x + # 1 T x + # 2 G x + # 3 T x + # 4 T x + # 5 C x + # 6 G x + # 7 T x + # 8 T + # 9 T + # 0 G + # 1 T + # 2 T + # + # Score=80 + + # Local alignment for a perfect hit + { name => "Local alignment 3", + # TTGTTCGT + # TTGTTCGT + ref => [ "TTGTTCGTTTGTTCGT" ], + # 0123456789012345 + # TTGTTCGTTTGTT + # TTGTTCGT----- + reads => [ "TTGTTCGTTTGTT" ], + args => "--local -L 8 -i C,1,0 --score-min=C,12", + report => "-a", + hits => [ { 0 => 1, 8 => 1 } ], + flags_map => [{ + 0 => "XM:0,XP:0,XT:UU,XC:13=", + 8 => "XM:0,XP:0,XT:UU,XC:8=" + }], + cigar_map => [{ + 0 => "13M", + 8 => "8M5S" + }], + samoptflags_map => [{ + 0 => { "AS:i:26" => 1, "XS:i:16" => 1, "YT:Z:UU" => 1, "MD:Z:13" => 1 }, + 8 => { "AS:i:16" => 1, "XS:i:16" => 1, "YT:Z:UU" => 1, "MD:Z:8" => 1 } + }] + }, + + # 1 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + # T T G T T C G T T T G T T C G T + # 0 T x + # 1 T x + # 2 G x + # 3 T x + # 4 T x + # 5 C x + # 6 G x + # 7 T x + # 8 T + # 9 T + # 10 G + # 1 T + + # Local alignment for a hit that should be trimmed from the right end + { name => "Local alignment 4", + ref => [ "TTGTTCGTTTGTTCGT" ], + reads => [ "TTGTTCGTTTGT" ], + args => "--local --policy \"SEED=0,3\\;IVAL=C,1,0\" --score-min=C,12", + report => "-a", + hits => [ { 0 => 1, 8 => 1 } ], + flags_map => [{ + 0 => "XM:0,XP:0,XT:UU,XC:12=", + 8 => "XM:0,XP:0,XT:UU,XC:8=4S" + }], + cigar_map => [{ + 0 => "12M", + 8 => "8M4S" + }], + samoptflags_map => [{ + 0 => { "AS:i:24" => 1, "XS:i:16" => 1, "YT:Z:UU" => 1, "MD:Z:12" => 1 }, + 8 => { "AS:i:16" => 1, "XS:i:16" => 1, "YT:Z:UU" => 1, "MD:Z:8" => 1 } + }] + }, + + # + # Test some common featuers for the manual. E.g. when more than one + # alignment is reported in -k mode, what order are they reported in? They + # should be in order by alignment score. + # + + { name => "Alignment order -k", + # 012345678 + ref => [ "GCGCATGCACATATCANNNNNGCGCATGCACATATCTNNNNNNNNGCGCATGCACATATTTNNNNNNNNNGCGCATGGTGTTATCA" ], + reads => [ "GCGCATGCACATATCA" ], + quals => [ "GOAIYEFGFIWDSFIU" ], + args => "--min-score C,-24,0 -L 4", + report => "-k 4" + }, + + { name => "Alignment order -a", + # 012345678 + ref => [ "GCGCATGCACATATCANNNNNGCGCATGCACATATCTNNNNNNNNGCGCATGCACATATTTNNNNNNNNNGCGCATGGTGTTATCA" ], + reads => [ "GCGCATGCACATATCA" ], + quals => [ "GOAIYEFGFIWDSFIU" ], + args => "--min-score C,-24,0 -L 4", + report => "-a" + }, + + # + # What order are mates reported in? Should be reporting in mate1/mate2 + # order. + # + + { name => "Mate reporting order, -a", + # 012345678 + ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAAATAGACGACTCGATCGCGGATTAGGGGTAGACCCCCCCCCGACTNNNNNNNNNNAGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAAATAGACGACTCGATCGCGGATTAGGGGTAGACCCCCCCCCGACTNNNNNNNNNNAGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAAATAGACGACTCGATCGCGGATTAGGGGTAGACCCCCCCCCGACTNNNNNNNNCGGTAATACGGCCATCGCGGCGGCATTACTCGGCGACTGCACGAGCAGATATTGGGGGTCTAATATAACGTCTCATTAAAACGCTCTAGTCAGCTCATTGGCTCTA" ], + mate1s => [ "CTATCATCACGCGGATATT", "GGGGGGGGTCTACCCCTAA", "ATACGGCCATCGCGGCGGCATTACTCGGCG" ], + mate2s => [ "GGGGGGGGTCTACCCCTAA", "CTATCATCACGCGGATATT", "AGCCAATGAGCTGACTAGAGCGTTTT" ], + quals => [ "GOAIYEFGFIWDSFIU" ], + args => "", + report => "-a" + }, + + { name => "Mate reporting order, -M 1", + # 012345678 + ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAAATAGACGACTCGATCGCGGATTAGGGGTAGACCCCCCCCCGACTNNNNNNNNNNAGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAAATAGACGACTCGATCGCGGATTAGGGGTAGACCCCCCCCCGACTNNNNNNNNNNAGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAAATAGACGACTCGATCGCGGATTAGGGGTAGACCCCCCCCCGACTNNNNNNNNCGGTAATACGGCCATCGCGGCGGCATTACTCGGCGACTGCACGAGCAGATATTGGGGGTCTAATATAACGTCTCATTAAAACGCTCTAGTCAGCTCATTGGCTCTA" ], + mate1s => [ "CTATCATCACGCGGATATT", "GGGGGGGGTCTACCCCTAA", "ATACGGCCATCGCGGCGGCATTACTCGGCG" ], + mate2s => [ "GGGGGGGGTCTACCCCTAA", "CTATCATCACGCGGATATT", "AGCCAATGAGCTGACTAGAGCGTTTT" ], + quals => [ "GOAIYEFGFIWDSFIU" ], + args => "", + report => "-M 1" + }, + + # + # Test dovetailing, containment, and overlapping + # + { name => "Non-overlapping; no args", + ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ], + # 01234567890123456789012345678901234567890123456 + mate1s => [ "GCTATCATCACGCGGATA" ], + mate2s => [ "CGCATCGACATTAATATCC" ], + pairhits => [{ "1,23" => 1 }], + mate1fw => 1, mate2fw => 1, + report => "-M 1" + }, + { name => "Non-overlapping; --no-discordant", + ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ], + # 01234567890123456789012345678901234567890123456 + mate1s => [ "GCTATCATCACGCGGATA" ], + mate2s => [ "CGCATCGACATTAATATCC" ], + pairhits => [{ "1,23" => 1 }], + mate1fw => 1, mate2fw => 1, + report => "-M 1 --no-discordant" + }, + { name => "Non-overlapping; --no-discordant --no-mixed", + ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ], + # 01234567890123456789012345678901234567890123456 + mate1s => [ "GCTATCATCACGCGGATA" ], + mate2s => [ "CGCATCGACATTAATATCC" ], + pairhits => [{ "1,23" => 1 }], + mate1fw => 1, mate2fw => 1, + report => "-M 1 --no-discordant --no-mixed" + }, + { name => "Non-overlapping; --no-discordant --no-mixed", + ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ], + # 01234567890123456789012345678901234567890123456 + mate1s => [ "GCTATCATCACGCGGATA" ], + mate2s => [ "CGCATCGACATTAATATCC" ], + pairhits => [{ "1,23" => 1 }], + mate1fw => 1, mate2fw => 1, + report => "-M 1 --no-discordant --no-mixed" + }, + { name => "Non-overlapping; --no-dovetail", + ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ], + # 01234567890123456789012345678901234567890123456 + mate1s => [ "GCTATCATCACGCGGATA" ], + mate2s => [ "CGCATCGACATTAATATCC" ], + pairhits => [{ "1,23" => 1 }], + mate1fw => 1, mate2fw => 1, + args => "--no-dovetail", + report => "-M 1" + }, + { name => "Non-overlapping; --un-conc=.tmp.simple_tests.pl", + ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ], + # 01234567890123456789012345678901234567890123456 + mate1s => [ "GCTATCATCACGCGGATA" ], + mate2s => [ "CGCATCGACATTAATATCC" ], + pairhits => [{ "1,23" => 1 }], + mate1fw => 1, mate2fw => 1, + args => "--un-conc=.tmp.simple_tests.pl", + report => "-M 1" + }, + { name => "Non-overlapping; --no-overlap", + ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ], + # 01234567890123456789012345678901234567890123456 + mate1s => [ "GCTATCATCACGCGGATA" ], + mate2s => [ "CGCATCGACATTAATATCC" ], + pairhits => [{ "1,23" => 1 }], + mate1fw => 1, mate2fw => 1, + args => "--no-overlap", + report => "-M 1" + }, + + { name => "Overlapping; no args", + ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ], + # 01234567890123456789012345678901234567890123456 + mate1s => [ "GCTATCATCACGCGGATATTA" ], + mate2s => [ "TTAGCGCATCGACATTAATATCC" ], + pairhits => [{ "1,19" => 1 }], + mate1fw => 1, mate2fw => 1, + args => "", + report => "-M 1" + }, + { name => "Overlapping; --no-dovetail", + ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ], + # 01234567890123456789012345678901234567890123456 + mate1s => [ "GCTATCATCACGCGGATATTA" ], + mate2s => [ "TTAGCGCATCGACATTAATATCC" ], + pairhits => [{ "1,19" => 1 }], + mate1fw => 1, mate2fw => 1, + args => "--no-dovetail", + report => "-M 1" + }, + { name => "Overlapping; --no-contain", + ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ], + # 01234567890123456789012345678901234567890123456 + mate1s => [ "GCTATCATCACGCGGATATTA" ], + mate2s => [ "TTAGCGCATCGACATTAATATCC" ], + pairhits => [{ "1,19" => 1 }], + mate1fw => 1, mate2fw => 1, + args => "--no-contain", + report => "-M 1" + }, + { name => "Overlapping; --no-overlap", + ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ], + # 01234567890123456789012345678901234567890123456 + mate1s => [ "GCTATCATCACGCGGATATTA" ], + mate2s => [ "TTAGCGCATCGACATTAATATCC" ], + pairhits => [], + mate1fw => 1, mate2fw => 1, + args => "--no-overlap", + report => "-M 1" + }, + + # + # Test XS:i with quality scaling + # + + { name => "Scoring params 1", + # 012345678 + ref => [ "ACTATTGCGCGCATGCACATATCAATTAAGCCGTCTCTCTAAAGAGACCCCAATCTCGCGCGCTAGACGTCAGTAGTTTAATTTTATAAACACCTCGCTGCGGGG" ], + reads => [ "GCGCATGCACATATCAATTAAGCCGTCTCTCTAAAGAGACCCCAATCTCGCGCGCTAGACGTCAGTAGTTTAATTTTATAAACACCTC" ], + quals => [ "GOAIYEFGFIWDSFIUYWEHRIWQWLFNSLDKkjdfglduhiuevhsiuqkAUHFIUEHGIUDJFHSKseuweyriwfskdgbiuuhh" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567 + # 0 1 2 3 4 5 6 7 8 + args => "", + report => "-M 1", + hits => [ { 8 => 1 } ], + cigar => [ "88M" ], + samoptflags => [ { + "AS:i:0" => 1, + "YT:Z:UU" => 1, + "MD:Z:88" => 1 } ], + }, + + { name => "Scoring params 2", + # 012345678 + ref => [ "ACTATTGCGCGCATGCACATATCAATTAAGCCGTCTCTCTAAAGAGACCCCAATCTCGCGCGCTAGACGTCAGTAGTTT"."TTTATAAACACCTCGCTGCGGGG" ], + reads => [ "NCGCATGCACATtTCAATTAAGCCGTCTCTCTAAAGA". "CCAATCTCGCGCGCTAGACGTCAGTAGTTTAAATTTATAAACACCTC" ], + # * -1 * -6 **** -5 -3 -3 -3 -3 *** -5 -3 -3 -3 + quals => [ "GOAIYEFGFIWDSFIUYWEHRIWQWLFNSLDKkjdfg". "iuevhsiuqkAUHFIUEHGIUDJFHSKseuweyriwfskdgbiuuhh" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567 + # 0 1 2 3 4 5 6 7 8 + args => "--ignore-quals --score-min C,-40,0 -N 1 -L 20", + report => "-M 1", + hits => [ { 8 => 1 } ], + cigar => [ "37M4D30M3I14M" ], + # 37M4D30M13I4M + samoptflags => [ { + "AS:i:-38" => 1, + "YT:Z:UU" => 1, + "MD:Z:0G11A24^GACC44" => 1, + "NM:i:9" => 1, + "XM:i:2" => 1, + "XG:i:7" => 1, + "XO:i:2" => 1 } ], + }, + + { name => "Scoring params 3", + # 012345678 + ref => [ "ACTATTGCGCGCATGCACATATCAATTAAGCCGTCTCTCTAAAGAGACCCCAATCTCGCGCGCTAGACGTCAGTAGTTT"."TTTATAAACACCTCGCTGCGGGG" ], + reads => [ "NCGCATGCACATtTCAATTAAGCCGTCTCTCTAAAGA". "CCAATCTCGCGCGCTAGACGTCAGTAGTTTAAATTTATAAACACCTC" ], + # * -1 * -6 **** -5 -3 -3 -3 -3 *** -1 -2 -2 -2 + quals => [ "GOAIYEFGFIWDSFIUYWEHRIWQWLFNSLDKkjdfg". "iuevhsiuqkAUHFIUEHGIUDJFHSKseuweyriwfskdgbiuuhh" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567 + # 0 1 2 3 4 5 6 7 8 + args => "--ignore-quals --rfg 1,2 --score-min C,-40,0 -N 1 -L 20", + report => "-M 1", + hits => [ { 8 => 1 } ], + cigar => [ "37M4D30M3I14M" ], + samoptflags => [ { + "AS:i:-31" => 1, + "YT:Z:UU" => 1, + "MD:Z:0G11A24^GACC44" => 1, + "NM:i:9" => 1, + "XM:i:2" => 1, + "XG:i:7" => 1, + "XO:i:2" => 1 } ], + }, + + { name => "Scoring params 4", + # 012345678 + ref => [ "ACTATTGCGCGCATGCACATATCAATTAAGCCGTCTCTCTAAAGAGACCCCAATCTCGCGCGCTAGACGTCAGTAGTTT"."TTTATAAACACCTCGCTGCGGGG" ], + reads => [ "NCGCATGCACATtTCAATTAAGCCGTCTCTCTAAAGA". "CCAATCTCGCGCGCTAGACGTCAGTAGTTTAAATTTATAAACACCTC" ], + # * -1 * -6 **** -1 -2 -2 -2 -2 *** -5 -3 -3 -3 + quals => [ "GOAIYEFGFIWDSFIUYWEHRIWQWLFNSLDKkjdfg". "iuevhsiuqkAUHFIUEHGIUDJFHSKseuweyriwfskdgbiuuhh" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567 + # 0 1 2 3 4 5 6 7 8 + args => "--ignore-quals --rdg 1,2 --score-min C,-40,0 -N 1 -L 20", + report => "-M 1", + hits => [ { 8 => 1 } ], + cigar => [ "37M4D30M3I14M" ], + samoptflags => [ { + "AS:i:-30" => 1, + "YT:Z:UU" => 1, + "MD:Z:0G11A24^GACC44" => 1, + "NM:i:9" => 1, + "XM:i:2" => 1, + "XG:i:7" => 1, + "XO:i:2" => 1 } ], + }, + + { name => "Scoring params 5", + # 012345678 + ref => [ "ACTATTGCGCGCATGCACATATCAATTAAGCCGTCTCTCTAAAGAGACCCCAATCTCGCGCGCTAGACGTCAGTAGTTT"."TTTATAAACACCTCGCTGCGGGG" ], + reads => [ "NCGCATGCACATtTCAATTAAGCCGTCTCTCTAAAGA". "CCAATCTCGCGCGCTAGACGTCAGTAGTTTAAATTTATAAACACCTC" ], + # * -1 * -8 **** -5 -3 -3 -3 -3 *** -5 -3 -3 -3 + quals => [ "GOAIYEFGFIWDSFIUYWEHRIWQWLFNSLDKkjdfg". "iuevhsiuqkAUHFIUEHGIUDJFHSKseuweyriwfskdgbiuuhh" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567 + # 0 1 2 3 4 5 6 7 8 + args => "--ignore-quals --mp 8 --score-min C,-40,0 -N 1 -L 20", + report => "-M 1", + hits => [ { 8 => 1 } ], + cigar => [ "37M4D30M3I14M" ], + samoptflags => [ { + "AS:i:-40" => 1, + "YT:Z:UU" => 1, + "MD:Z:0G11A24^GACC44" => 1, + "NM:i:9" => 1, + "XM:i:2" => 1, + "XG:i:7" => 1, + "XO:i:2" => 1 } ], + }, + + { name => "Scoring params 6", + # 012345678 + ref => [ "ACTATTGCGCGCATGCACATATCAATTAAGCCGTCTCTCTAAAGAGACCCCAATCTCGCGCGCTAGACGTCAGTAGTTT"."TTTATAAACACCTCGCTGCGGGG" ], + reads => [ "NCGCATGCACATtTCAATTAAGCCGTCTCTCTAAAGA". "CCAATCTCGCGCGCTAGACGTCAGTAGTTTAAATTTATAAACACCTC" ], + # * -4 * -6 **** -5 -3 -3 -3 -3 *** -5 -3 -3 -3 + quals => [ "GOAIYEFGFIWDSFIUYWEHRIWQWLFNSLDKkjdfg". "iuevhsiuqkAUHFIUEHGIUDJFHSKseuweyriwfskdgbiuuhh" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567 + # 0 1 2 3 4 5 6 7 8 + args => "--ignore-quals --np 4 --score-min C,-41,0 -N 1 -L 20", + report => "-M 1", + hits => [ { 8 => 1 } ], + cigar => [ "37M4D30M3I14M" ], + samoptflags => [ { + "AS:i:-41" => 1, + "YT:Z:UU" => 1, + "MD:Z:0G11A24^GACC44" => 1, + "NM:i:9" => 1, + "XM:i:2" => 1, + "XG:i:7" => 1, + "XO:i:2" => 1 } ], + }, + + # + # Test XS:i with quality scaling + # + + { name => "Q XS:i 1a", + ref => [ "TTGTTCGATTGTTCGA" ], + reads => [ "TTGTTCGT" ], + quals => [ "IIIIIIIA" ], + args => "--multiseed=0,7,C,1 --score-min=C,-6", + report => "-M 1", + hits => [ { 0 => 1, 8 => 1 } ], + hits_are_superset => [ 1 ], + cigar => [ "8M" ], + samoptflags => [ { + "AS:i:-5" => 1, "XS:i:-5" => 1, + "YM:i:1" => 1, "YT:Z:UU" => 1, + "MD:Z:7A0" => 1, + "NM:i:1" => 1, "XM:i:1" => 1 } ], + }, + + { name => "Q XS:i 1a ! --mp 3,3", + ref => [ "TTGTTCGATTGTTCGA" ], + reads => [ "TTGTTCGT" ], + quals => [ "IIIIIII!" ], + args => "-L 6 --mp 3,3 --score-min=C,-6", + report => "-M 1", + hits => [ { 0 => 1, 8 => 1 } ], + hits_are_superset => [ 1 ], + cigar => [ "8M" ], + samoptflags => [ { + "AS:i:-3" => 1, "XS:i:-3" => 1, + "YM:i:1" => 1, "YT:Z:UU" => 1, + "MD:Z:7A0" => 1, + "NM:i:1" => 1, "XM:i:1" => 1 } ], + }, + + { name => "Q XS:i 1a ! --mp 3,6", + ref => [ "TTGTTCGATTGTTCGA" ], + reads => [ "TTGTTCGT" ], + quals => [ "IIIIIII!" ], + args => "-L 6 --mp 6,3 --score-min=C,-6", + report => "-M 1", + hits => [ { 0 => 1, 8 => 1 } ], + hits_are_superset => [ 1 ], + cigar => [ "8M" ], + samoptflags => [ { + "AS:i:-3" => 1, "XS:i:-3" => 1, + "YM:i:1" => 1, "YT:Z:UU" => 1, + "MD:Z:7A0" => 1, + "NM:i:1" => 1, "XM:i:1" => 1 } ], + }, + + { name => "Q XS:i 1a I --mp 3,3", + ref => [ "TTGTTCGATTGTTCGA" ], + reads => [ "TTGTTCGT" ], + quals => [ "IIIIIIII" ], + args => "-L 6 --mp 3,3 --score-min=C,-6", + report => "-M 1", + hits => [ { 0 => 1, 8 => 1 } ], + hits_are_superset => [ 1 ], + cigar => [ "8M" ], + samoptflags => [ { + "AS:i:-3" => 1, "XS:i:-3" => 1, + "YM:i:1" => 1, "YT:Z:UU" => 1, + "MD:Z:7A0" => 1, + "NM:i:1" => 1, "XM:i:1" => 1 } ], + }, + + { name => "Q XS:i 1a I --mp 3,6", + ref => [ "TTGTTCGATTGTTCGA" ], + reads => [ "TTGTTCGT" ], + quals => [ "IIIIIIII" ], + args => "-L 6 --mp 6,3 --score-min=C,-6", + report => "-M 1", + hits => [ { 0 => 1, 8 => 1 } ], + hits_are_superset => [ 1 ], + cigar => [ "8M" ], + samoptflags => [ { + "AS:i:-6" => 1, "XS:i:-6" => 1, + "YM:i:1" => 1, "YT:Z:UU" => 1, + "MD:Z:7A0" => 1, + "NM:i:1" => 1, "XM:i:1" => 1 } ], + }, + + { name => "Q XS:i 1a --ignore-quals", + ref => [ "TTGTTCGATTGTTCGA" ], + reads => [ "TTGTTCGT" ], + quals => [ "IIIIIIIA" ], + args => "--multiseed=0,7,C,1 --score-min=C,-6 --ignore-quals", + report => "-M 1", + hits => [ { 0 => 1, 8 => 1 } ], + hits_are_superset => [ 1 ], + cigar => [ "8M" ], + samoptflags => [ { + "AS:i:-6" => 1, "XS:i:-6" => 1, + "YM:i:1" => 1, "YT:Z:UU" => 1, + "MD:Z:7A0" => 1, + "NM:i:1" => 1, "XM:i:1" => 1 } ], + }, + + { name => "Q XS:i 1b", + ref => [ "TTGTTCGATTGTTCGA" ], + reads => [ "TTGTTCGT" ], + quals => [ "IIIIIII5" ], + args => "--multiseed=0,7,C,1 --score-min=C,-6", + report => "-M 1", + hits => [ { 0 => 1, 8 => 1 } ], + hits_are_superset => [ 1 ], + cigar => [ "8M" ], + samoptflags => [ { + "AS:i:-4" => 1, "XS:i:-4" => 1, + "YM:i:1" => 1, "YT:Z:UU" => 1, + "MD:Z:7A0" => 1, + "NM:i:1" => 1, "XM:i:1" => 1 } ], + }, + + { name => "Q XS:i 1b --ignore-quals", + ref => [ "TTGTTCGATTGTTCGA" ], + reads => [ "TTGTTCGT" ], + quals => [ "IIIIIII5" ], + args => "--multiseed=0,7,C,1 --score-min=C,-6 --ignore-quals", + report => "-M 1", + hits => [ { 0 => 1, 8 => 1 } ], + hits_are_superset => [ 1 ], + cigar => [ "8M" ], + samoptflags => [ { + "AS:i:-6" => 1, "XS:i:-6" => 1, + "YM:i:1" => 1, "YT:Z:UU" => 1, + "MD:Z:7A0" => 1, + "NM:i:1" => 1, "XM:i:1" => 1 } ], + }, + + { name => "Q XS:i 1c", + ref => [ "TTGTTCGATTGTTCGA" ], + reads => [ "TTGTTCGT" ], + quals => [ "IIIIIII4" ], + args => "--multiseed=0,7,C,1 --score-min=C,-6", + report => "-M 1", + hits => [ { 0 => 1, 8 => 1 } ], + hits_are_superset => [ 1 ], + cigar => [ "8M" ], + samoptflags => [ { + "AS:i:-3" => 1, "XS:i:-3" => 1, + "YM:i:1" => 1, "YT:Z:UU" => 1, + "MD:Z:7A0" => 1, + "NM:i:1" => 1, "XM:i:1" => 1 } ], + }, + + { name => "Q XS:i 1c --ignore-quals", + ref => [ "TTGTTCGATTGTTCGA" ], + reads => [ "TTGTTCGT" ], + quals => [ "IIIIIII4" ], + args => "--multiseed=0,7,C,1 --score-min=C,-6 --ignore-quals", + report => "-M 1", + hits => [ { 0 => 1, 8 => 1 } ], + hits_are_superset => [ 1 ], + cigar => [ "8M" ], + samoptflags => [ { + "AS:i:-6" => 1, "XS:i:-6" => 1, + "YM:i:1" => 1, "YT:Z:UU" => 1, + "MD:Z:7A0" => 1, + "NM:i:1" => 1, "XM:i:1" => 1 } ], + }, + + # One mate aligns. Ensuring that the unmapped mate gets reference + # information filled in from the other mate. + { ref => [ "CATCGACTGAGACTCGTACGACAATTACGCGCATTATTCGCATCACCAGCGCGGCGCGCGCCCCCTAT" ], + # 01234567890123456789012345678901234567890123456789012345678901234567 + # 0 1 2 3 4 5 6 + # ATCACCAGCGTTTCGCGCGAAACCTA + mate1s => [ "ATCGACTGAGACTCGTACGACAATTAC" ], + mate2s => [ "TAGGTTTCGCGCGAAACGCTGGTGAT" ], + pairhits_orig => [{ "1,1" => 1}] + }, + + { ref => [ "TTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGT" ], + reads => [ "TTGTTCGT" ], + args => "--multiseed=0,4,C,1,0", + report => "-M 1" + }, + + # Testing that DEFAULT is -M 1 + { ref => [ "TTGTTCGTTTGTTCGT" ], + reads => [ "TTGTTCGT" ], + report => "-M 1", + hits => [ { 0 => 1, 8 => 1 } ], + hits_are_superset => [ 1 ], + cigar => [ "8M" ], + samoptflags => [ + { "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:8" => 1, "YM:i:1" => 1 } + ], + }, + { ref => [ "TTGTTCGTTTGTTCGT" ], + reads => [ "TTGTTCGT" ], + report => "-M 1", + hits => [ { 0 => 1, 8 => 1 } ], + hits_are_superset => [ 1 ], + cigar => [ "8M" ], + samoptflags => [ + { "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:8" => 1, "YM:i:1" => 1 } + ], + }, + + # + # Test XS:i + # + + { name => "XS:i 1", + ref => [ "TTGTTCGATTGTTCGA" ], + reads => [ "TTGTTCGT" ], + args => "--multiseed=0,7,C,1 --score-min=C,-6", + report => "-M 1", + hits => [ { 0 => 1, 8 => 1 } ], + hits_are_superset => [ 1 ], + cigar => [ "8M" ], + samoptflags => [ { + "AS:i:-6" => 1, "XS:i:-6" => 1, + "YM:i:1" => 1, "YT:Z:UU" => 1, + "MD:Z:7A0" => 1, + "NM:i:1" => 1, "XM:i:1" => 1 } ], + }, + + { name => "XS:i 2", + ref => [ "TTGTTCGATTGTTCGA" ], + reads => [ "TTGTTCGT" ], + args => "--multiseed=0,7,C,1 --score-min=C,-5", + report => "", + cigar => [ "*" ], + samoptflags => [{ "YT:Z:UU" => 1, "YM:i:0" => 1 }], + }, + + { name => "XS:i 3a", + ref => [ "TTGTTCGATTGTTCGT" ], + # TTGTTCGT + reads => [ "TTGTTCGT" ], + args => "--multiseed=0,7,C,1 --score-min=C,-6", + report => "-M 1", + hits => [ { 8 => 1 } ], + cigar => [ "8M" ], + samoptflags => [ { + "AS:i:0" => 1, "XS:i:-6" => 1, + "YM:i:1" => 1, "YT:Z:UU" => 1, + "MD:Z:8" => 1, + "NM:i:0" => 1, "XM:i:0" => 1 } ], + }, + + { name => "XS:i 3b", + ref => [ "TTGTTCGATTGTTCGT" ], + # TTGTTCGT + reads => [ "TTGTTCGT" ], + args => "--multiseed=0,7,C,1 --score-min=C,-6 --seed=52", + report => "-M 1", + hits => [ { 8 => 1 } ], + cigar => [ "8M" ], + samoptflags => [ { + "AS:i:0" => 1, "XS:i:-6" => 1, + "YM:i:1" => 1, "YT:Z:UU" => 1, + "MD:Z:8" => 1, + "NM:i:0" => 1, "XM:i:0" => 1 } ], + }, + + { name => "XS:i 3c", + ref => [ "TTGTTCGATTGTTCGT" ], + # TTGTTCGT + reads => [ "TTGTTCGT" ], + args => "--multiseed=0,7,C,1 --score-min=C,-6 --seed=53", + report => "-M 2", + hits => [ { 8 => 1 } ], + cigar => [ "8M" ], + samoptflags => [ { + "AS:i:0" => 1, "XS:i:-6" => 1, + "YM:i:0" => 1, "YT:Z:UU" => 1, + "MD:Z:8" => 1, + "NM:i:0" => 1, "XM:i:0" => 1 } ], + }, + + { name => "XS:i 4a", + ref => [ "TTGTTCAATTGTTCGATTGTTCGT" ], + # |||||| ||||||| |||||||| + # TTGTTCGT||||||| |||||||| + # TTGTTCGT|||||||| + # TTGTTCGT + reads => [ "TTGTTCGT" ], + args => "--multiseed=0,6,C,1 --score-min=C,-12 --seed=53", + report => "-M 2", + hits => [ { 16 => 1 } ], + cigar => [ "8M" ], + samoptflags => [ { + "AS:i:0" => 1, "XS:i:-6" => 1, + "YM:i:1" => 1, "YT:Z:UU" => 1, + "MD:Z:8" => 1, + "NM:i:0" => 1, "XM:i:0" => 1 } ], + }, + + { name => "XS:i 4b", + ref => [ "TTGTTCAATTGTTCGATTGTTCGT" ], + # |||||| ||||||| |||||||| + # TTGTTCGT||||||| |||||||| + # TTGTTCGT|||||||| + # TTGTTCGT + reads => [ "TTGTTCGT" ], + args => "--multiseed=0,6,C,1 --score-min=C,-12 --seed=54", + report => "-M 3", + hits => [ { 16 => 1 } ], + cigar => [ "8M" ], + samoptflags => [ { + "AS:i:0" => 1, "XS:i:-6" => 1, + "YM:i:0" => 1, "YT:Z:UU" => 1, + "MD:Z:8" => 1, + "NM:i:0" => 1, "XM:i:0" => 1 } ], + }, + + { name => "XS:i 5a", + ref => [ "TTGTTCAATTGTTCGATTGTTCGTTTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAA" ], + # |||||| ||||||| |||||||||||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| + # TTGTTCGT||||||| ||||||||TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT|||||| + # TTGTTCGT|||||||| TTGTTCGT TTGTTCGT TTGTTCGT TTGTTCGT TTGTTCGT TTGTTCGT + # TTGTTCGT + reads => [ "TTGTTCGT" ], + args => "--multiseed=0,6,C,1,1 --score-min=C,-12 --seed=54", + report => "-M 1", + hits => [ { 16 => 1 } ], + cigar => [ "8M" ], + samoptflags => [ { + "AS:i:0" => 1, "XS:i:-6" => 1, + "YM:i:1" => 1, "YT:Z:UU" => 1, + "MD:Z:8" => 1, + "NM:i:0" => 1, "XM:i:0" => 1 } ], + }, + + { name => "XS:i 5b", + ref => [ "TTGTTCAATTGTTCGATTGTTCGTTTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAA" ], + # |||||| ||||||| |||||||||||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| + # TTGTTCGT||||||| ||||||||TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT|||||| + # TTGTTCGT|||||||| TTGTTCGT TTGTTCGT TTGTTCGT TTGTTCGT TTGTTCGT TTGTTCGT + # TTGTTCGT + reads => [ "TTGTTCGT" ], + args => "--multiseed=0,5,C,1,1 --score-min=C,-12 --seed=55", + report => "-M 1", + hits => [ { 16 => 1 } ], + cigar => [ "8M" ], + samoptflags => [ { + "AS:i:0" => 1, "XS:i:-6" => 1, + "YM:i:1" => 1, "YT:Z:UU" => 1, + "MD:Z:8" => 1, + "NM:i:0" => 1, "XM:i:0" => 1 } ], + }, + + # Testing BWA-SW-like scoring + # + # a*max{T,c*log(l)} = 1 * max(30, 5.5 * log(56)) = 1 * max(30, 22.139) = 30 + # + { name => "BWA-SW-like 1", + ref => [ "GTTTAGATTCCACTACGCTAACCATCGAGAACTCGTCTCAGAGTTTCGATAGGAAAATCTGCGA" ], + # |||||||||||||||||||||||||||||||||||||||||||||||||||||||| + reads => [ "TAGATTCCACTACGCTAACCATCGAGAACTCGTCTCAGAGTTTCGATAGGAAAATC" ], + # 01234567890123456789012345678901234567890123456789012345 + # 1 2 3 4 5 + args => "--bwa-sw-like", + hits => [{ 3 => 1 }], + samoptflags => [{ "AS:i:56" => 1, "NM:i:0" => 1, + "MD:Z:56" => 1, "YT:Z:UU" => 1 }] + }, + { name => "BWA-SW-like 2", + # 0123 + ref => [ "GTTTAGATTCCACTACGCTAACCATCGAGAACTCGTCTCAGAGTTTCGATAGGAAAATCTGCGA" ], + # |||||||||||||||||||||||||| |||||||||||||||||||||||||||| + reads => [ "TAGATTCCACTACGCTAACCATCGAGTTCTCGTCTCAGAGTTTCGATAGGAAAATC" ], + # 01234567890123456789012345678901234567890123456789012345 + # 1 2 3 4 5 + args => "--bwa-sw-like -L 18", + hits => [{ 3 => 1 }], + # Tot matches = 54 + # Tot penalties = 6 + samoptflags => [{ "AS:i:48" => 1, "NM:i:2" => 1, "XM:i:2" => 1, + "MD:Z:26A0A28" => 1, "YT:Z:UU" => 1 }] + }, + { name => "BWA-SW-like 3", + # 0123 + ref => [ "GTTTAGATTCCACTACGCTAACCATCGAGAACTCGTCTCAGAGTTTCGATAGGAAAATCTGCGA" ], + # |||||||||||||||||||||||||| ||||||||||||||||||||||||||| + reads => [ "TAGATTCCACTACGCTAACCATCGAG"."TCGTCTCAGAGTTTCGATAGGAAAATC" ], + # 01234567890123456789012345678901234567890123456789012345 + # 1 2 3 4 5 + args => "--bwa-sw-like -i C,1,0", + hits => [{ 3 => 1 }], + # Tot matches = 53 + # Tot penalties = 11 + samoptflags => [{ "AS:i:42" => 1, "NM:i:3" => 1, "XM:i:0" => 1, + "XO:i:1" => 1, "XG:i:3" => 1, + "MD:Z:26^AAC27" => 1, "YT:Z:UU" => 1 }] + }, + + # Some tricky SAM FLAGS field tests + + { name => "SAM paired-end where both mates align 1", + ref => [ "GCACTATCTACGCTTCGGCGTCGGCGAAAAAACGCACGACCGGGTGTGTGACAATCATATATAGCGCGC" ], + # 012345678901234567890123456789012345678901234567890123456789012345678 + # 0 1 2 3 4 5 6 + mate1s => [ "CTATCTACGCTTCGGCGTCGGTGA" ], + mate2s => [ "GATTGTCACACACCCGGTCGT" ], + # ----------------------------------------------------- + # 01234567890123456789012345678901234567890123456789012 + # 0 1 2 3 4 5 + # 0x1 template having multiple fragments in sequencing + # 0x2 each fragment properly aligned according to the aligner + # 0x4 fragment unmapped + # 0x8 next fragment in the template unmapped + # 0x10 SEQ being reverse complemented + # 0x20 SEQ of the next fragment in the template being reversed + # 0x40 the first fragment in the template + # 0x80 the last fragment in the template + pairhits => [{ "3,35" => 1 }], + norc => 1, + samflags_map => [{ 3 => (1 | 2 | 32 | 64), 35 => (1 | 2 | 16 | 128) }], + tlen_map => [{ 3 => 53, 35 => -53 }] }, + + { name => "SAM paired-end where both mates align 2", + ref => [ "GCACTATCTACGCTTCGGCGTCGGCGAAAAAACGCACGACCGGGTGTGTGACAATCATATATAGCGCGC" ], + # 012345678901234567890123456789012345678901234567890123456789012345678 + # 0 1 2 3 4 5 6 + mate1s => [ "TCACCGACGCCGAAGCGTAGATAG" ], + mate2s => [ "ACGACCGGGTGTGTGACAATC" ], + # ----------------------------------------------------- + # 01234567890123456789012345678901234567890123456789012 + # 0 1 2 3 4 5 + # 0x1 template having multiple fragments in sequencing + # 0x2 each fragment properly aligned according to the aligner + # 0x4 fragment unmapped + # 0x8 next fragment in the template unmapped + # 0x10 SEQ being reverse complemented + # 0x20 SEQ of the next fragment in the template being reversed + # 0x40 the first fragment in the template + # 0x80 the last fragment in the template + mate1fw => 0, + mate2fw => 1, + pairhits => [{ "3,35" => 1 }], + norc => 1, + samflags_map => [{ 3 => (1 | 2 | 16 | 64), 35 => (1 | 2 | 32 | 128) }], + tlen_map => [{ 3 => 53, 35 => -53 }] }, + + { name => "SAM paired-end where both mates align 3", + ref => [ "GCACTATCTACGCTTCGGCGTCGGCGAAAAAACGCACGACCGGGTGTGTGACAATCATATATAGCGCGC" ], + # 012345678901234567890123456789012345678901234567890123456789012345678 + # 0 1 2 3 4 5 6 + mate1s => [ "CTATCTACGCTTCGGCGTCGGTGA" ], + mate2s => [ "ACGACCGGGTGTGTGACAATC" ], + # ----------------------------------------------------- + # 01234567890123456789012345678901234567890123456789012 + # 0 1 2 3 4 5 + # 0x1 template having multiple fragments in sequencing + # 0x2 each fragment properly aligned according to the aligner + # 0x4 fragment unmapped + # 0x8 next fragment in the template unmapped + # 0x10 SEQ being reverse complemented + # 0x20 SEQ of the next fragment in the template being reversed + # 0x40 the first fragment in the template + # 0x80 the last fragment in the template + mate1fw => 1, + mate2fw => 1, + pairhits => [{ "3,35" => 1 }], + norc => 1, + samflags_map => [{ 3 => (1 | 2 | 64), 35 => (1 | 2 | 128) }], + tlen_map => [{ 3 => 53, 35 => -53 }] }, + + { name => "SAM paired-end where mate #1 aligns but mate #2 doesn't", + ref => [ "GCACTATCTACGCTTCGGCGTCGGCGAAAAAACGCACGACCGGGTGTGTGACAATCATATATAGCGCGC" ], + # 012345678901234567890123456789012345678901234567890123456789012345678 + # 0 1 2 3 4 5 6 + mate1s => [ "CTATCTACGCTTCGGCGTCGGCGA" ], + mate2s => [ "GATTGTCTTTTCCCGGAAAAATCGT" ], + # 0x1 template having multiple fragments in sequencing + # 0x2 each fragment properly aligned according to the aligner + # 0x4 fragment unmapped + # 0x8 next fragment in the template unmapped + # 0x10 SEQ being reverse complemented + # 0x20 SEQ of the next fragment in the template being reversed + # 0x40 the first fragment in the template + # 0x80 the last fragment in the template + pairhits => [{ "*,3" => 1 }], + norc => 1, + samflags_map => [{ 3 => (1 | 8 | 64), "*" => (1 | 4 | 128) }] }, + + { name => "SAM paired-end where neither mate aligns", + ref => [ "GCACTATCTACGCTTCGGCGTCGGCGAAAAAACGCACGACCGGGTGTGTGACAATCATATATAGCGCGC" ], + # 012345678901234567890123456789012345678901234567890123456789012345678 + # 0 1 2 3 4 5 6 + mate1s => [ "CTATATACGAAAAAGCGTCGGCGA" ], + mate2s => [ "GATTGTCTTTTCCCGGAAAAATCGT" ], + # 0x1 template having multiple fragments in sequencing + # 0x2 each fragment properly aligned according to the aligner + # 0x4 fragment unmapped + # 0x8 next fragment in the template unmapped + # 0x10 SEQ being reverse complemented + # 0x20 SEQ of the next fragment in the template being reversed + # 0x40 the first fragment in the template + # 0x80 the last fragment in the template + pairhits => [{ "*,*" => 1 }], + norc => 1, + samoptflags_flagmap => [{ + (1 | 4 | 8 | 64) => { "YT:Z:UP" => 1 }, + (1 | 4 | 8 | 128) => { "YT:Z:UP" => 1 } + }] }, + + { name => "SAM paired-end where both mates align, but discordantly", + ref => [ "GCACTATCTACGCTTCGGCGTCGGCGAAAAAACGCACGACCGGGTGTGTGACAATCATATATAGCGCGC" ], + # 012345678901234567890123456789012345678901234567890123456789012345678 + # 0 1 2 3 4 5 6 + mate1s => [ "CTATCTACGCTTCGGCGTCGGCGA" ], + mate2s => [ "ACGACCGGGTGTGTGACAATC" ], + # ----------------------------------------------------- + # 01234567890123456789012345678901234567890123456789012 + # 0 1 2 3 4 5 + # 0x1 template having multiple fragments in sequencing + # 0x2 each fragment properly aligned according to the aligner + # 0x4 fragment unmapped + # 0x8 next fragment in the template unmapped + # 0x10 SEQ being reverse complemented + # 0x20 SEQ of the next fragment in the template being reversed + # 0x40 the first fragment in the template + # 0x80 the last fragment in the template + pairhits => [{ "3,35" => 1 }], + norc => 1, + samflags_map => [{ 3 => (1 | 64), 35 => (1 | 128) }], + # Which TLEN is right? Depends on criteria for when to infer TLEN. If + # criterion is mates are concordant, then it should be 0 here. If the + # criterion is that both mates align to the same chromosome, should be + # +-53 + #tlen_map => [{ 3 => 0, 35 => 0 }] }, + tlen_map => [{ 3 => 53, 35 => -53 }] }, + + { name => "matchesRef regression 4", + ref => [ "CCGGGTCGTCACGCCCCGCTTGCGTCANGCCCCTCACCCTCCCTTTGTCGGCTCCCACCCCTCCCCATCCGTTGTCCCCGCCCCCGCCCGCCGGGTCGTCACGCCCCGCTTGCGTCANGC", + "GCTCGGAATTCGTGCTCCGNCCCGTACGGTT" ], + # + # NNNNNGA------A-------------------G-NTTT + # |||||||||||||||||||||||||||||||||| + # CCAAT-ATTTTTAATTTCCTCTATTTTTCTCTCGTCTTG + args => "--policy \"NP=Q\\;RDG=46.3220993654702\\;RFG=41.3796024365659\\;MIN=L,5.57015383125426,-3.28597145122829\\;NCEIL=L,0.263054599454459,0.130843661549367\\;SEED=1,29\\;IVAL=L,0.0169183264663712,3.75762168662522\" --overhang --trim5 6", + reads => [ "CTTTGCACCCCTCCCTTGTCGGCTCCCACCCATCCCCATCCGTTGTCCCCGCCCCCGCCCGCCGGTCGTCACTCCCCGTTTGCGTCATGCCCCTCACCCTCCCTTTGTCGGCTCGCACCCCTCCCCATCCGTTGTCCCCGCCCCCGCTCTCGGGGTCTTCACGCCCCGCTTGCTTCATGCCCCTCACTCGCACCCCG" ], + }, + + { name => "matchesRef regression 3", + ref => [ "GAAGNTTTTCCAATATTTTTAATTTCCTCTATTTTTCTCTCGTCTTGNTCTAC" ], + # + # NNNNNGA------A-------------------G-NTTT + # |||||||||||||||||||||||||||||||||| + # CCAAT-ATTTTTAATTTCCTCTATTTTTCTCTCGTCTTG + args => "--policy \"MMP=R\\;MIN=L,8.8,-8.1\" --overhang", + reads => [ "CAAGACGAGAGAAAAATAGAGGAAATTAAAAATATTGG" ], + }, + + { name => "matchesRef regression 2", + ref => ["GTTGTCGGCAGCTCTGGATATGTGNTCTCGGGTTTATNTCGTTGTCG", + "CCTTGTTNTTAATGCTGCCTGGTTTNG"], + args => "--policy \"RDG=2.02030755427021,2.81949533273331\\;MIN=L,-6.52134769703939,-3.39889659588514\\;IVAL=L,0.127835912101927\" --overhang --trim5 5", + mate1s => ["TCTGGCGGTTGCGAAGGCCCCTGGCGGTTGCTATGTCCTCTGGCGGTTGCGTTGTCGGCAGCTCG"], + mate2s => ["AGAACACATATCCAGAGCTGCCGACAACGAAATGAACCCGAGAGCACAAATCCAGAG"] }, + + # Regression test for an issue observed once + { name => "matchesRef regression 1", + # 0 1 2 3 4 5 6 7 + # 01234567890123456789012345678901234567890123456789012345678901234567890 + ref => [ "AGGTCGACCGAAAGGCCTAGAGGTCGACCGACAATCTGACCATGGGGCGAGGAGCGAGTAC" ], + # |||||||||||||||||||||||||||||||||||||||||||||||||| + reads => [ "AAGGCCTAGAGGTCGACCGACAATCTGACCATGGGGCGAGGAGCGAGTACTGGTCTGGGG" ], + # 012345678901234567890123456789012345678901234567890123456789 + # 0 1 2 3 4 5 + args => "--overhang" }, + + # 1 discordant alignment and one concordant alignment. Discordant because + # the fragment is too long. + + { name => "Discordant with different chromosomes", + ref => [ "TTTATAAAAATATTTCCCCCCCC", + "CCCCCCTGTCGCTACCGCCCCCCCCCCC" ], + # ATAAAAATAT GTCGCTACCG + # ATAAAAATAT TGTCGCTACC + # 01234567890123456789012 + # 0 1 2 + # 0123456789012345678901234567 + # 0 1 2 + mate1s => [ "ATAAAAATAT", "ATAAAAATAT" ], + mate2s => [ "GTCGCTACCG", "TGTCGCTACC" ], + mate1fw => 1, + mate2fw => 1, + args => "-I 0 -X 35", + # Not really any way to flag an alignment as discordant + pairhits => [ { "3,7" => 1 }, { "3,6" => 1 } ], + rnext_map => [ { 3 => 1, 7 => 0 }, { 3 => 1, 6 => 0 } ], + pnext_map => [ { 3 => 7, 7 => 3 }, { 3 => 6, 6 => 3 } ] }, + + { name => "Fastq 1", + ref => [ "AGCATCGATCAGTATCTGA" ], + fastq => "\@r0\nCATCGATCAGTATCTG\n+\nIIIIIIIIIIIIIIII", + hits => [{ 2 => 1 }] }, + + { name => "Tabbed 1", + ref => [ "AGCATCGATCAGTATCTGA" ], + tabbed => "r0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII", + hits => [{ 2 => 1 }] }, + + { name => "Fasta 1", + ref => [ "AGCATCGATCAGTATCTGA" ], + fasta => ">r0\nCATCGATCAGTATCTG", + hits => [{ 2 => 1 }] }, + + { name => "Qseq 1", + ref => [ "AGCATCGATCAGTATCTGA" ], + qseq => join("\t", "MachName", + "RunNum", + "Lane", + "Tile", + "X", "Y", + "Index", + "0", # Mate + "CATCGATCAGTATCTG", + "IIIIIIIIIIIIIIII", + "1"), + hits => [{ 2 => 1 }] }, + + { name => "Raw 1", + ref => [ "AGCATCGATCAGTATCTGA" ], + raw => "CATCGATCAGTATCTG", + hits => [{ 2 => 1 }] }, + + # Like Fastq 1 but with extra newline + { name => "Fastq 2", + ref => [ "AGCATCGATCAGTATCTGA" ], + fastq => "\@r0\nCATCGATCAGTATCTG\n+\nIIIIIIIIIIIIIIII\n", + hits => [{ 2 => 1 }] }, + + { name => "Tabbed 1", + ref => [ "AGCATCGATCAGTATCTGA" ], + tabbed => "r0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n", + hits => [{ 2 => 1 }] }, + + { name => "Fasta 2", + ref => [ "AGCATCGATCAGTATCTGA" ], + fasta => ">r0\nCATCGATCAGTATCTG\n", + hits => [{ 2 => 1 }] }, + + { name => "Qseq 2", + ref => [ "AGCATCGATCAGTATCTGA" ], + qseq => join("\t", "MachName", + "RunNum", + "Lane", + "Tile", + "X", "Y", + "Index", + "0", # Mate + "CATCGATCAGTATCTG", + "IIIIIIIIIIIIIIII", + "1")."\n", + hits => [{ 2 => 1 }] }, + + { name => "Raw 2", + ref => [ "AGCATCGATCAGTATCTGA" ], + raw => "CATCGATCAGTATCTG\n", + hits => [{ 2 => 1 }] }, + + # Like Fastq 1 but with many extra newlines + { name => "Fastq 3", + ref => [ "AGCATCGATCAGTATCTGA" ], + fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIIII\n\n", + hits => [{ 2 => 1 }] }, + + { name => "Tabbed 3", + ref => [ "AGCATCGATCAGTATCTGA" ], + tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n\n", + hits => [{ 2 => 1 }] }, + + { name => "Fasta 3", + ref => [ "AGCATCGATCAGTATCTGA" ], + fasta => "\n\n\r\n>r0\nCATCGATCAGTATCTG\r\n\n", + hits => [{ 2 => 1 }] }, + + { name => "Qseq 3", + ref => [ "AGCATCGATCAGTATCTGA" ], + qseq => "\n\n\n".join("\t", "MachName", + "RunNum", + "Lane", + "Tile", + "X", "Y", + "Index", + "0", # Mate + "CATCGATCAGTATCTG", + "IIIIIIIIIIIIIIII", + "1")."\n\n", + hits => [{ 2 => 1 }] }, + + { name => "Raw 3", + ref => [ "AGCATCGATCAGTATCTGA" ], + raw => "\n\n\nCATCGATCAGTATCTG\n\n", + hits => [{ 2 => 1 }] }, + + # Quality string length doesn't match (too short by 1) + { name => "Fastq 4", + ref => [ "AGCATCGATCAGTATCTGA" ], + fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIII\n\n", + should_abort => 1}, + + { name => "Tabbed 4", + ref => [ "AGCATCGATCAGTATCTGA" ], + tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIII\n\n", + should_abort => 1}, + + { name => "Qseq 4", + ref => [ "AGCATCGATCAGTATCTGA" ], + qseq => "\n\n\n".join("\t", "MachName", + "RunNum", + "Lane", + "Tile", + "X", "Y", + "Index", + "0", # Mate + "CATCGATCAGTATCTG", + "IIIIIIIIIIIIIII", + "1")."\n\n", + should_abort => 1}, + + # Name line doesn't start with @ + { name => "Fastq 5", + ref => [ "AGCATCGATCAGTATCTGA" ], + fastq => "\n\n\r\nr0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIII\n\n", + should_abort => 1, + hits => [{ }] }, + + # Name line doesn't start with > + { name => "Fasta 5", + ref => [ "AGCATCGATCAGTATCTGA" ], + fasta => "\n\n\r\nr0\nCATCGATCAGTATCTG\r", + should_abort => 1, + hits => [{ }] }, + + # Name line doesn't start with @ (2) + { name => "Fastq 6", + ref => [ "AGCATCGATCAGTATCTGA" ], + fastq => "r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIII\n\n", + should_abort => 1, + hits => [{ }] }, + + # Name line doesn't start with > (2) + { name => "Fasta 6", + ref => [ "AGCATCGATCAGTATCTGA" ], + fasta => "r0\nCATCGATCAGTATCTG\r", + should_abort => 1, + hits => [{ }] }, + + # Part of sequence is trimmed + { name => "Fastq 7", + ref => [ "AGCATCGATCAGTATCTGA" ], + fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIIII\n\n", + args => "--trim3 4", + norc => 1, + hits => [{ 2 => 1 }] }, + + { name => "Tabbed 7", + ref => [ "AGCATCGATCAGTATCTGA" ], + tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n\n", + args => "--trim3 4", + norc => 1, + hits => [{ 2 => 1 }] }, + + { name => "Fasta 7", + ref => [ "AGCATCGATCAGTATCTGA" ], + fasta => "\n\n\r\n\>r0\nCATCGATCAGTATCTG\r\n", + args => "--trim3 4", + norc => 1, + hits => [{ 2 => 1 }] }, + + { name => "Qseq 7", + ref => [ "AGCATCGATCAGTATCTGA" ], + qseq => "\n\n\n".join("\t", "MachName", + "RunNum", + "Lane", + "Tile", + "X", "Y", + "Index", + "0", # Mate + "CATCGATCAGTATCTG", + "IIIIIIIIIIIIIIII", + "1")."\n\n", + args => "--trim3 4", + norc => 1, + hits => [{ 2 => 1 }] }, + + { name => "Raw 7", + ref => [ "AGCATCGATCAGTATCTGA" ], + raw => "\n\n\r\nCATCGATCAGTATCTG\r\n", + args => "--trim3 4", + norc => 1, + hits => [{ 2 => 1 }] }, + + # Whole sequence is trimmed + { name => "Fastq 8", + ref => [ "AGCATCGATCAGTATCTGA" ], + fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIIII\n\n", + args => "--trim5 16", + hits => [{ "*" => 1 }] }, + + { name => "Tabbed 8", + ref => [ "AGCATCGATCAGTATCTGA" ], + tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n\n", + args => "--trim5 16", + hits => [{ "*" => 1 }] }, + + { name => "Fasta 8", + ref => [ "AGCATCGATCAGTATCTGA" ], + fasta => "\n\n\r\n\>r0\nCATCGATCAGTATCTG\r\n", + args => "--trim3 16", + hits => [{ "*" => 1 }] }, + + { name => "Qseq 8", + ref => [ "AGCATCGATCAGTATCTGA" ], + qseq => "\n\n\n".join("\t", "MachName", + "RunNum", + "Lane", + "Tile", + "X", "Y", + "Index", + "0", # Mate + "CATCGATCAGTATCTG", + "IIIIIIIIIIIIIIII", + "1")."\n\n", + args => "--trim3 16", + hits => [{ "*" => 1 }] }, + + { name => "Raw 8", + ref => [ "AGCATCGATCAGTATCTGA" ], + raw => "\n\n\r\nCATCGATCAGTATCTG\r\n", + args => "--trim3 16", + hits => [{ "*" => 1 }] }, + + # Sequence is skipped + { name => "Fastq 9", + ref => [ "AGCATCGATCAGTATCTGA" ], + fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIIII\n\n", + args => "-s 1", + hits => [{ }] }, + + { name => "Tabbed 9", + ref => [ "AGCATCGATCAGTATCTGA" ], + tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n\n", + args => "-s 1", + hits => [{ }] }, + + { name => "Fasta 9", + ref => [ "AGCATCGATCAGTATCTGA" ], + fasta => "\n\n\r\n>r0\nCATCGATCAGTATCTG\r\n", + args => "-s 1", + hits => [{ }] }, + + { name => "Qseq 9", + ref => [ "AGCATCGATCAGTATCTGA" ], + qseq => "\n\n\n".join("\t", "MachName", + "RunNum", + "Lane", + "Tile", + "X", "Y", + "Index", + "0", # Mate + "CATCGATCAGTATCTG", + "IIIIIIIIIIIIIIII", + "1")."\n\n", + args => "-s 1", + hits => [{ }] }, + + { name => "Raw 9", + ref => [ "AGCATCGATCAGTATCTGA" ], + raw => "CATCGATCAGTATCTG\n", + args => "-s 1", + hits => [{ }] }, + + # Like Fastq 1 but with many extra newlines + { name => "Fastq multiread 1", + ref => [ "AGCATCGATCAGTATCTGA" ], + fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIIII\n\n". + "\n\n\r\n\@r1\nATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIII\n\n", + hits => [{ 2 => 1 }, { 3 => 1 }] }, + + { name => "Tabbed multiread 1", + ref => [ "AGCATCGATCAGTATCTGA" ], + tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n\n". + "\n\n\r\nr1\tATCGATCAGTATCTG\tIIIIIIIIIIIIIII\n\n", + hits => [{ 2 => 1 }, { 3 => 1 }] }, + + { name => "Fasta multiread 1", + ref => [ "AGCATCGATCAGTATCTGA" ], + fasta => "\n\n\r\n>r0\nCATCGATCAGTATCTG\n\n". + "\n\n\r\n>r1\nATCGATCAGTATCTG\n\n", + hits => [{ 2 => 1 }, { 3 => 1 }] }, + + { name => "Qseq multiread 1", + ref => [ "AGCATCGATCAGTATCTGA" ], + qseq => "\n\n\n".join("\t", "MachName", + "RunNum", + "Lane", + "Tile", + "10", "10", + "Index", + "0", # Mate + "CATCGATCAGTATCTG", + "IIIIIIIIIIIIIIII", + "1")."\n\n". + join("\t", "MachName", + "RunNum", + "Lane", + "Tile", + "12", "15", + "Index", + "0", # Mate + "ATCGATCAGTATCTG", + "IIIIIIIIIIIIIII", + "1")."\n\n", + idx_map => { "MachName_RunNum_Lane_Tile_10_10_Index" => 0, + "MachName_RunNum_Lane_Tile_12_15_Index" => 1 }, + hits => [{ 2 => 1 }, { 3 => 1 }] }, + + { name => "Raw multiread 1", + ref => [ "AGCATCGATCAGTATCTGA" ], + raw => "\n\n\r\nCATCGATCAGTATCTG\n\n". + "\n\n\r\nATCGATCAGTATCTG\n\n", + hits => [{ 2 => 1 }, { 3 => 1 }] }, + + # Like Fastq multiread 1 but with -u 1 + { name => "Fastq multiread 2", + ref => [ "AGCATCGATCAGTATCTGA" ], + args => "-u 1", + fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIIII\n\n". + "\n\n\r\n\@r1\nATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIII\n\n", + hits => [{ 2 => 1 }] }, + + { name => "Tabbed multiread 2", + ref => [ "AGCATCGATCAGTATCTGA" ], + args => "-u 1", + tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n\n". + "\n\n\r\nr1\tATCGATCAGTATCTG\tIIIIIIIIIIIIIII\n\n", + hits => [{ 2 => 1 }] }, + + { name => "Fasta multiread 2", + ref => [ "AGCATCGATCAGTATCTGA" ], + args => "-u 1", + fasta => "\n\n\r\n>r0\nCATCGATCAGTATCTG\r\n". + "\n\n\r\n>r1\nATCGATCAGTATCTG\r\n", + hits => [{ 2 => 1 }] }, + + { name => "Qseq multiread 2", + ref => [ "AGCATCGATCAGTATCTGA" ], + args => "-u 1", + qseq => "\n\n\n".join("\t", "MachName", + "RunNum", + "Lane", + "Tile", + "10", "10", + "Index", + "0", # Mate + "CATCGATCAGTATCTG", + "IIIIIIIIIIIIIIII", + "1")."\n\n". + join("\t", "MachName", + "RunNum", + "Lane", + "Tile", + "12", "15", + "Index", + "0", # Mate + "ATCGATCAGTATCTG", + "IIIIIIIIIIIIIII", + "1")."\n\n", + idx_map => { "MachName_RunNum_Lane_Tile_10_10_Index" => 0, + "MachName_RunNum_Lane_Tile_12_15_Index" => 1 }, + hits => [{ 2 => 1 }] }, + + { name => "Raw multiread 2", + ref => [ "AGCATCGATCAGTATCTGA" ], + args => "-u 1", + raw => "\n\n\r\nCATCGATCAGTATCTG\r\n". + "\n\n\r\nATCGATCAGTATCTG\r\n", + hits => [{ 2 => 1 }] }, + + # Like Fastq multiread 1 but with -u 2 + { name => "Fastq multiread 3", + ref => [ "AGCATCGATCAGTATCTGA" ], + args => "-u 2", + fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIIII\n\n". + "\n\n\r\n\@r1\nATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIII\n\n", + hits => [{ 2 => 1 }, { 3 => 1 }] }, + + { name => "Tabbed multiread 3", + ref => [ "AGCATCGATCAGTATCTGA" ], + args => "-u 2", + tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n\n". + "\n\n\r\nr1\tATCGATCAGTATCTG\tIIIIIIIIIIIIIII\n\n", + hits => [{ 2 => 1 }, { 3 => 1 }] }, + + { name => "Fasta multiread 3", + ref => [ "AGCATCGATCAGTATCTGA" ], + args => "-u 2", + fasta => "\n\n\r\n>r0\nCATCGATCAGTATCTG\r\n". + "\n\n\r\n>r1\nATCGATCAGTATCTG\r\n", + hits => [{ 2 => 1 }, { 3 => 1 }] }, + + { name => "Qseq multiread 3", + ref => [ "AGCATCGATCAGTATCTGA" ], + args => "-u 2", + qseq => "\n\n\n".join("\t", "MachName", + "RunNum", + "Lane", + "Tile", + "10", "10", + "Index", + "0", # Mate + "CATCGATCAGTATCTG", + "IIIIIIIIIIIIIIII", + "1")."\n\n". + join("\t", "MachName", + "RunNum", + "Lane", + "Tile", + "12", "15", + "Index", + "0", # Mate + "ATCGATCAGTATCTG", + "IIIIIIIIIIIIIII", + "1")."\n\n", + idx_map => { "MachName_RunNum_Lane_Tile_10_10_Index" => 0, + "MachName_RunNum_Lane_Tile_12_15_Index" => 1 }, + hits => [{ 2 => 1 }, { 3 => 1 }] }, + + { name => "Raw multiread 3", + ref => [ "AGCATCGATCAGTATCTGA" ], + args => "-u 2", + raw => "\n\n\r\nCATCGATCAGTATCTG\r\n". + "\n\n\r\nATCGATCAGTATCTG\r\n", + hits => [{ 2 => 1 }, { 3 => 1 }] }, + + # Paired-end reads that should align + { name => "Fastq paired 1", + ref => [ "AGCATCGATCAAAAACTGA" ], + # AGCATCGATC + # TCAAAAACTGA + # 0123456789012345678 + fastq1 => "\n\n\r\n\@r0\nAGCATCGATC\r\n+\n\nIIIIIIIIII\n\n". + "\n\n\@r1\nTCAGTTTTTGA\r\n+\n\nIIIIIIIIIII\n\n", + fastq2 => "\n\n\r\n\@r0\nTCAGTTTTTGA\n+\n\nIIIIIIIIIII\n\n". + "\n\n\r\n\@r1\nAGCATCGATC\r\n+\n\nIIIIIIIIII", + pairhits => [ { "0,8" => 1 }, { "0,8" => 1 } ] }, + + { name => "Tabbed paired 1", + ref => [ "AGCATCGATCAAAAACTGA" ], + # AGCATCGATC + # TCAAAAACTGA + # 0123456789012345678 + tabbed => "\n\n\r\nr0\tAGCATCGATC\tIIIIIIIIII\tTCAGTTTTTGA\tIIIIIIIIIII\n\n". + "\n\nr1\tTCAGTTTTTGA\tIIIIIIIIIII\tAGCATCGATC\tIIIIIIIIII\n\n", + paired => 1, + pairhits => [ { "0,8" => 1 }, { "0,8" => 1 } ] }, + + { name => "Fasta paired 1", + ref => [ "AGCATCGATCAAAAACTGA" ], + # AGCATCGATC + # TCAAAAACTGA + # 0123456789012345678 + fasta1 => "\n\n\r\n>r0\nAGCATCGATC\r\n". + "\n\n>r1\nTCAGTTTTTGA\r\n", + fasta2 => "\n\n\r\n>r0\nTCAGTTTTTGA\n". + "\n\n\r\n>r1\nAGCATCGATC", + pairhits => [ { "0,8" => 1 }, { "0,8" => 1 } ] }, + + { name => "Qseq paired 1", + ref => [ "AGCATCGATCAAAAACTGA" ], + # AGCATCGATC + # TCAAAAACTGA + # 0123456789012345678 + qseq1 => "\n\n\n".join("\t", "MachName", + "RunNum", + "Lane", + "Tile", + "10", "10", + "Index", + "1", # Mate + "AGCATCGATC", + "ABCBGACBCB", + "1")."\n\n". + join("\t", "MachName", + "RunNum", + "Lane", + "Tile", + "12", "15", + "Index", + "1", # Mate + "TCAGTTTTTGA", + "95849456875", + "1")."\n\n", + qseq2 => "\n\n\n".join("\t", "MachName", + "RunNum", + "Lane", + "Tile", + "10", "10", + "Index", + "2", # Mate + "TCAGTTTTTGA", + "ABCBGACBCBA", + "1")."\n\n". + join("\t", "MachName", + "RunNum", + "Lane", + "Tile", + "12", "15", + "Index", + "2", # Mate + "AGCATCGATC", + "AGGCBBGCBG", + "1")."\n\n", + idx_map => { "MachName_RunNum_Lane_Tile_10_10_Index" => 0, + "MachName_RunNum_Lane_Tile_12_15_Index" => 1 }, + pairhits => [ { "0,8" => 1 }, { "0,8" => 1 } ] }, + + { name => "Raw paired 1", + ref => [ "AGCATCGATCAAAAACTGA" ], + # AGCATCGATC + # TCAAAAACTGA + # 0123456789012345678 + raw1 => "\n\n\r\nAGCATCGATC\r\n". + "\n\nTCAGTTTTTGA\r\n", + raw2 => "\n\n\r\nTCAGTTTTTGA\n". + "\n\n\r\nAGCATCGATC", + pairhits => [ { "0,8" => 1 }, { "0,8" => 1 } ] }, + + # Paired-end reads that should align + { name => "Fastq paired 2", + ref => [ "AGCATCGATCAAAAACTGA" ], + args => "-s 1", + # AGCATCGATC + # TCAAAAACTGA + # 0123456789012345678 + fastq1 => "\@r0\nAGCATCGATC\r\n+\n\nIIIIIIIIII\n\n". + "\n\n\@r1\nTCAGTTTTTGA\n+\n\nIIIIIIIIIII\n\n", + fastq2 => "\n\n\r\n\@r0\nTCAGTTTTTGA\n+\n\nIIIIIIIIIII\n\n". + "\n\n\r\n\@r1\nAGCATCGATC\r\n+\n\nIIIIIIIIII", + pairhits => [ { }, { "0,8" => 1 } ] }, + + { name => "Tabbed paired 2", + ref => [ "AGCATCGATCAAAAACTGA" ], + args => "-s 1", + # AGCATCGATC + # TCAAAAACTGA + # 0123456789012345678 + tabbed => "r0\tAGCATCGATC\tIIIIIIIIII\tTCAGTTTTTGA\tIIIIIIIIIII\n\n". + "\nr1\tTCAGTTTTTGA\tIIIIIIIIIII\tAGCATCGATC\tIIIIIIIIII", + paired => 1, + pairhits => [ { }, { "0,8" => 1 } ] }, + + { name => "Fasta paired 2", + ref => [ "AGCATCGATCAAAAACTGA" ], + args => "-s 1", + # AGCATCGATC + # TCAAAAACTGA + # 0123456789012345678 + fasta1 => ">r0\nAGCATCGATC\r\n". + "\n\n>r1\nTCAGTTTTTGA\n", + fasta2 => "\n\n\r\n>r0\nTCAGTTTTTGA\n". + "\n\n\r\n>r1\nAGCATCGATC", + pairhits => [ { }, { "0,8" => 1 } ] }, + + { name => "Qseq paired 1", + ref => [ "AGCATCGATCAAAAACTGA" ], + # AGCATCGATC + # TCAAAAACTGA + # 0123456789012345678 + args => "-s 1", + qseq1 => "\n\n\n".join("\t", "MachName", + "RunNum", + "Lane", + "Tile", + "10", "10", + "Index", + "1", # Mate + "AGCATCGATC", + "ABCBGACBCB", + "1")."\n\n". + join("\t", "MachName", + "RunNum", + "Lane", + "Tile", + "12", "15", + "Index", + "1", # Mate + "TCAGTTTTTGA", + "95849456875", + "1")."\n\n", + qseq2 => "\n\n\n".join("\t", "MachName", + "RunNum", + "Lane", + "Tile", + "10", "10", + "Index", + "2", # Mate + "TCAGTTTTTGA", + "ABCBGACBCBA", + "1")."\n\n". + join("\t", "MachName", + "RunNum", + "Lane", + "Tile", + "12", "15", + "Index", + "2", # Mate + "AGCATCGATC", + "AGGCBBGCBG", + "1")."\n\n", + idx_map => { "MachName_RunNum_Lane_Tile_10_10_Index" => 0, + "MachName_RunNum_Lane_Tile_12_15_Index" => 1 }, + pairhits => [ { }, { "0,8" => 1 } ] }, + + { name => "Raw paired 2", + ref => [ "AGCATCGATCAAAAACTGA" ], + args => "-s 1", + # AGCATCGATC + # TCAAAAACTGA + # 0123456789012345678 + raw1 => "AGCATCGATC\r\n". + "\n\nTCAGTTTTTGA\n", + raw2 => "\n\n\r\nTCAGTTTTTGA\n". + "\n\n\r\nAGCATCGATC", + pairhits => [ { }, { "0,8" => 1 } ] }, + + # Paired-end reads that should align + { name => "Fastq paired 3", + ref => [ "AGCATCGATCAAAAACTGA" ], + args => "-u 1", + # AGCATCGATC + # TCAAAAACTGA + # 0123456789012345678 + fastq1 => "\n\n\r\n\@r0\nAGCATCGATC\r\n+\n\nIIIIIIIIII\n\n". + "\n\n\@r1\nTCAGTTTTTGA\r\n+\n\nIIIIIIIIIII\n\n", + fastq2 => "\n\n\r\n\@r0\nTCAGTTTTTGA\n+\n\nIIIIIIIIIII\n\n". + "\n\n\r\n\@r1\nAGCATCGATC\r\n+\nIIIIIIIIII", + pairhits => [ { "0,8" => 1 }, { } ] }, + + { name => "Tabbed paired 3", + ref => [ "AGCATCGATCAAAAACTGA" ], + args => "-u 1", + # AGCATCGATC + # TCAAAAACTGA + # 0123456789012345678 + tabbed => "\n\n\r\nr0\tAGCATCGATC\tIIIIIIIIII\tTCAGTTTTTGA\tIIIIIIIIIII\n\n". + "\n\nr1\tTCAGTTTTTGA\tIIIIIIIIIII\tAGCATCGATC\tIIIIIIIIII", + paired => 1, + pairhits => [ { "0,8" => 1 }, { } ] }, + + { name => "Fasta paired 3", + ref => [ "AGCATCGATCAAAAACTGA" ], + args => "-u 1", + # AGCATCGATC + # TCAAAAACTGA + # 0123456789012345678 + fasta1 => "\n\n\r\n>r0\nAGCATCGATC\r\n". + "\n\n>r1\nTCAGTTTTTGA\r\n", + fasta2 => "\n\n\r\n>r0\nTCAGTTTTTGA\n". + "\n\n\r\n>r1\nAGCATCGATC", + pairhits => [ { "0,8" => 1 }, { } ] }, + + { name => "Raw paired 3", + ref => [ "AGCATCGATCAAAAACTGA" ], + args => "-u 1", + # AGCATCGATC + # TCAAAAACTGA + # 0123456789012345678 + raw1 => "\n\n\r\nAGCATCGATC\r\n". + "\n\nTCAGTTTTTGA\r\n", + raw2 => "\n\n\r\nTCAGTTTTTGA\n". + "\n\n\r\nAGCATCGATC", + pairhits => [ { "0,8" => 1 }, { } ] }, + + # Paired-end reads that should align + #{ name => "Fastq paired 4", + # ref => [ "AGCATCGATCAAAAACTGA" ], + # args => "-s 1 -L 4 -i C,1,0", + # # AGCATCGATC + # # TCAAAAACTGA + # # 0123456789012345678 + # fastq1 => "\n\n\r\n\@r0\nAGCATCGATC\r\n+\n\nIIIIIIIIII\n\n". + # #"\n\n\@r1\nTC\r\n+\n\nII\n\n". + # "\n\n\@r2\nTCAGTTTTTGA\r\n+\n\nIIIIIIIIIII\n\n", + # fastq2 => "\n\n\r\n\@r0\nTCAGTTTTTGA\n+\n\nIIIIIIIIIII\n\n". + # #"\n\n\r\n\@r1\nAG\r\n+\nII". + # "\n\@r2\nAGCATCGATC\r\n+\nIIIIIIIIII", + # paired => 1, + # pairhits => [ { }, { "*,*" => 1 }, { "0,8" => 1 } ], + # pairhits => [ { "0,8" => 1 } ], + # samoptflags_map => [ + # { }, + # { "*" => { "YT:Z:UP" => 1, "YF:Z:LN" => 1 } }, + ## { 0 => { "MD:Z:10" => 1, "YT:Z:CP" => 1 }, + # 8 => { "MD:Z:11" => 1, "YT:Z:CP" => 1 } }] + #}, + + #{ name => "Tabbed paired 4", + # ref => [ "AGCATCGATCAAAAACTGA" ], + # args => "-s 1 -L 4 -i C,1,0", + # # AGCATCGATC + # # TCAAAAACTGA + # # 0123456789012345678 + # tabbed => "\n\n\r\nr0\tAGCATCGATC\tIIIIIIIIII\tTCAGTTTTTGA\tIIIIIIIIIII\n\n". + # "\n\nr1\tTC\tII\tAG\tII". + # "\n\nr2\tTCAGTTTTTGA\tIIIIIIIIIII\tAGCATCGATC\tIIIIIIIIII\n\n", + # paired => 1, + # #pairhits => [ { }, { "*,*" => 1 }, { "0,8" => 1 } ], + # pairhits => [ { }, { "0,8" => 1 } ], + # samoptflags_map => [ + # { }, + # #{ "*" => { "YT:Z:UP" => 1, "YF:Z:LN" => 1 } }, + # { 0 => { "MD:Z:10" => 1, "YT:Z:CP" => 1 }, +# 8 => { "MD:Z:11" => 1, "YT:Z:CP" => 1 } }] + #}, + + #{ name => "Fasta paired 4", + # ref => [ "AGCATCGATCAAAAACTGA" ], + # args => "-s 1 -L 4 -i C,1,0", + # # AGCATCGATC + # # TCAAAAACTGA + # # 0123456789012345678 + # fasta1 => "\n\n\r\n>r0\nAGCATCGATC\r\n". + # # "\n\n>r1\nTC\r\n". + # "\n\n>r2\nTCAGTTTTTGA\r\n", + # fasta2 => "\n\n\r\n>r0\nTCAGTTTTTGA\n\n". + # # "\n\n\r\n>r1\nAG". + # "\n>r2\nAGCATCGATC", + # # pairhits => [ { }, { "*,*" => 1 }, { "0,8" => 1 } ], + # pairhits => [ { }, { "0,8" => 1 } ], + # samoptflags_map => [ + # { }, + # #{ "*" => { "YT:Z:UP" => 1, "YF:Z:LN" => 1 } }, + # { 0 => { "MD:Z:10" => 1, "YT:Z:CP" => 1 }, + # 8 => { "MD:Z:11" => 1, "YT:Z:CP" => 1 } }] + #}, + + #{ name => "Raw paired 4", + # ref => [ "AGCATCGATCAAAAACTGA" ], + # args => "-s 1 -L 4 -i C,1,0", + # # AGCATCGATC + # # TCAAAAACTGA + # # 0123456789012345678 + # raw1 => "\n\n\r\nAGCATCGATC\r\n". + ## "\n\nTC\r\n". + # "\n\nTCAGTTTTTGA\r\n", + # raw2 => "\n\n\r\nTCAGTTTTTGA\n\n". + # "\n\n\r\nAG". + # "\nAGCATCGATC", + # pairhits => [ { }, { "*,*" => 1 }, { "0,8" => 1 } ], + # pairhits => [ { }, { "0,8" => 1 } ], + # samoptflags_map => [ + # { }, + # { "*" => { "YT:Z:UP" => 1, "YF:Z:LN" => 1 } }, + # { 0 => { "MD:Z:10" => 1, "YT:Z:CP" => 1 }, +# 8 => { "MD:Z:11" => 1, "YT:Z:CP" => 1 } }] + #}, + + # + # Check that skipping of empty reads is handled correctly. A read that is + # empty or becomes empty after --trim3/--trim5 are applied should still + # count as a first-class read that gets propagated up into the alignment + # loop. And it should be counted in the -s/-u totals. + # + + { ref => [ "AGCATCGATCAGTATCTGA" ], + reads => [ "", "ATCGATCAGTA" ], + args => "-s 1", + hits => [ {}, { 3 => 1 }] }, + + { ref => [ "AGCATCGATCAGTATCTGA" ], + mate1s => [ "", "AGCATCGATC" ], + mate2s => [ "", "TCAGATACTG" ], + args => "-s 1", + pairhits => [ {}, { "0,9" => 1 }] }, + + { ref => [ "AGCATCGATCAGTATCTGA" ], + reads => [ "", "ATCGATCAGTA" ], + args => "-s 2", + hits => [ {}, {} ] }, + + { ref => [ "AGCATCGATCAGTATCTGA" ], + mate1s => [ "", "AGCATCGATC" ], + mate2s => [ "", "TCAGATACTG" ], + args => "-s 2", + pairhits => [ {}, {} ] }, + + { ref => [ "AGCATCGATCAGTATCTGA" ], + reads => [ "", "ATCGATCAGTA", "AGTATCTGA" ], + args => "-s 1 -u 1", + hits => [ {}, { 3 => 1 }] }, + + { ref => [ "AGCATCGATCAGTATCTGA" ], + reads => [ "AC", "ATCGATCAGTA" ], + args => "-s 1 --trim3 2", + norc => 1, + hits => [ {}, { 3 => 1 }] }, + + { ref => [ "AGCATCGATCAGTATCTGA" ], + reads => [ "AC", "ATCGATCAGTA" ], + args => "-s 1 --trim3 2", + nofw => 1, + hits => [ {}, { 5 => 1 }] }, + + { ref => [ "AGCATCGATCAGTATCTGA" ], + reads => [ "AC", "ATCGATCAGTA" ], + args => "-s 1 --trim5 2", + nofw => 1, + hits => [ {}, { 3 => 1 }] }, + + { ref => [ "AGCATCGATCAGTATCTGA" ], + reads => [ "AC", "ATCGATCAGTA" ], + args => "-s 1 --trim5 2", + norc => 1, + hits => [ {}, { 5 => 1 }] }, + + # + # Alignment with overhang + # + + { ref => [ "TGC" ], + reads => [ "ATGC" ], + args => "--overhang --policy \"SEED=0,3\\;IVAL=C,1,0\\;NCEIL=L,1,0\"", + hits => [ { 0 => 1 } ], + cigar => [ "1S3M" ], + samoptflags => [ + { "AS:i:-1" => 1, "YT:Z:UU" => 1, "MD:Z:3" => 1, "XN:i:1" => 1 } ] + }, + + { ref => [ "TTGTTCGT" ], + reads => [ "TTGTTCG" ], + args => "--policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,2,0\"", + hits => [ { 0 => 1 } ], + cigar => [ "7M" ], + samoptflags => [ { "AS:i:0" => 1, "YT:Z:UU" => 1, "MD:Z:7" => 1 } ] + }, + + { ref => [ "TTGTTCGT" ], + reads => [ "TTGTTCG" ], + args => "", + hits => [ { 0 => 1 } ], + flags => [ "XM:0,XP:0,XT:UU,XC:7=" ], + cigar => [ "7M" ], + samoptflags => [ { "AS:i:0" => 1, "YT:Z:UU" => 1, "MD:Z:7" => 1 } ] + }, + + { ref => [ "TTGTTCGT" ], + reads => [ "TGTTCGT", "TTGTTCG" ], + args => "--overhang", + hits => [ { 1 => 1 }, { 0 => 1 } ], + flags => [ "XM:0,XP:0,XT:UU,XC:7=", "XM:0,XP:0,XT:UU,XC:7=" ], + cigar => [ "7M", "7M" ], + samoptflags => [ + { "YT:Z:UU" => 1, "MD:Z:7" => 1 }, + { "YT:Z:UU" => 1, "MD:Z:7" => 1 } + ] + }, + + { ref => [ "TTGTTCGT" ], + reads => [ "TGTTCGT", "TTGTTCG" ], + args => "", + hits => [ { 1 => 1 }, { 0 => 1 } ], + flags => [ "XM:0,XP:0,XT:UU,XC:7=", "XM:0,XP:0,XT:UU,XC:7=" ], + cigar => [ "7M", "7M" ], + samoptflags => [ + { "YT:Z:UU" => 1, "MD:Z:7" => 1 }, + { "YT:Z:UU" => 1, "MD:Z:7" => 1 } + ] + }, + + # Reads 1 and 2 don't have overhang, reads 3 and 4 overhang opposite ends + { ref => [ "TTGTTCGT" ], + # TGTTCGT + # GTTCGTA + # ATTGTTC + reads => [ "TGTTCGT", "GTTCGTA", "ATTGTTC" ], + args => "--overhang --policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,2,0\"", + hits => [ { 1 => 1 }, { 2 => 1 }, { 0 => 1 } ], + cigar => [ "7M", "6M1S", "1S6M" ], + samoptflags => [ + { "YT:Z:UU" => 1, "MD:Z:7" => 1 }, + { "AS:i:-1" => 1, "XN:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:6" => 1 }, + { "AS:i:-1" => 1, "XN:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:6" => 1 } + ]}, + + # Same as previous case but --overhang not specified + { ref => [ "TTGTTCGT" ], + reads => [ "TGTTCGT", "TTGTTCG", "GTTCGTA", "ATTGTTC" ], + args => "--policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,2,0\"", + hits => [ { 1 => 1 }, { 0 => 1 } ], # only the internal hits + cigar => [ "7M", "7M", "*", "*" ], + samoptflags => [ + { "YT:Z:UU" => 1, "MD:Z:7" => 1 }, + { "YT:Z:UU" => 1, "MD:Z:7" => 1 }, + { "YT:Z:UU" => 1 }, + { "YT:Z:UU" => 1 } + ] + }, + + # A simple case that should align with or without overhang, with or without + # a special NCEIL setting. + { ref => [ "TTGTTCGT" ], + reads => [ "TTGTTCG" ], + args => "--overhang --policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,2,0\"", + hits => [ { 0 => 1 } ]}, + + { ref => [ "TTGTTCGT" ], + reads => [ "TTGTTCG" ], + args => "--overhang", + hits => [ { 0 => 1 } ]}, + + # + # Testing the various -M/-m/-k/-a alignment modes in both unpaired and + # paired-end modes. Ensuring that SAM optional flags such as YM:i, YP:i + # are set properly in all cases. + # + + # + # Paired-end + # + + { name => "P.M.58.G.b Unpaired -M 5 w/ 8 hits global, but mate #1 has just 1", + # 0 1 2 3 0 1 2 0 1 2 0 1 2 + # 012345678901234567890123456789012 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567 + # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG + ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGT" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 + # 0 1 2 3 4 5 6 7 8 9 + # 0 0 0 0 0 0 0 0 0 0 + mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], + mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped + args => "-X 1000", + report => "-M 5", + pairhits => [{ "12,78" => 1, "12,249" => 1, "12,315" => 1, + "12,486" => 1, "12,552" => 1, "12,723" => 1, + "12,789" => 1, "12,960" => 1 }], + hits_are_superset => [ 1 ], + cigar_map => [{ + 12 => "33M", 78 => "28M", + 249 => "28M", 315 => "28M", + 486 => "28M", 552 => "28M", + 723 => "28M", 789 => "28M", + 960 => "28M" + }], + samoptflags_map => [ { + 12 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:0" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 78 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 249 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 315 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 486 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 552 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 723 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 789 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 960 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + } ] }, + + { name => "P.M.58.L.b Unpaired -M 5 w/ 8 hits local, but mate #1 has just 1", + # 0 1 2 3 0 1 2 0 1 2 0 1 2 + # 012345678901234567890123456789012 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567 + # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG + ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGT" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 + # 0 1 2 3 4 5 6 7 8 9 + # 0 0 0 0 0 0 0 0 0 0 + mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], + mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped + args => "-X 1000 --local", + report => "-M 5", + pairhits => [{ "12,78" => 1, "12,249" => 1, "12,315" => 1, + "12,486" => 1, "12,552" => 1, "12,723" => 1, + "12,789" => 1, "12,960" => 1 }], + hits_are_superset => [ 1 ], + cigar_map => [{ + 12 => "33M", 78 => "28M", + 249 => "28M", 315 => "28M", + 486 => "28M", 552 => "28M", + 723 => "28M", 789 => "28M", + 960 => "28M" + }], + samoptflags_map => [ { + 12 => { "AS:i:66" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:0" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, + 78 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 249 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 315 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 486 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 552 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 723 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 789 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 960 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + } ] }, + + { name => "P.k.58.G.b Unpaired -k 5 w/ 8 hits global, but mate #1 has just 1", + # 0 1 2 3 0 1 2 0 1 2 0 1 2 + # 012345678901234567890123456789012 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567 + # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG + ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGT" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 + # 0 1 2 3 4 5 6 7 8 9 + # 0 0 0 0 0 0 0 0 0 0 + mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], + mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped + args => "-X 1000", + report => "-k 5", + pairhits => [{ "12,78" => 1, "12,249" => 1, "12,315" => 1, + "12,486" => 1, "12,552" => 1, "12,723" => 1, + "12,789" => 1, "12,960" => 1 }], + hits_are_superset => [ 1 ], + cigar_map => [{ + 12 => "33M", 78 => "28M", + 249 => "28M", 315 => "28M", + 486 => "28M", 552 => "28M", + 723 => "28M", 789 => "28M", + 960 => "28M" + }], + samoptflags_map => [ { + 12 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:0" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 78 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 249 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 315 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 486 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 552 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 723 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 789 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 960 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + } ] }, + + { name => "P.k.58.L.b Unpaired -k 5 w/ 8 hits local, but mate #1 has just 1", + # 0 1 2 3 0 1 2 0 1 2 0 1 2 + # 012345678901234567890123456789012 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567 + # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG + ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGT" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 + # 0 1 2 3 4 5 6 7 8 9 + # 0 0 0 0 0 0 0 0 0 0 + mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], + mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped + args => "-X 1000 --local", + report => "-k 5", + pairhits => [{ "12,78" => 1, "12,249" => 1, "12,315" => 1, + "12,486" => 1, "12,552" => 1, "12,723" => 1, + "12,789" => 1, "12,960" => 1 }], + hits_are_superset => [ 1 ], + cigar_map => [{ + 12 => "33M", 78 => "28M", + 249 => "28M", 315 => "28M", + 486 => "28M", 552 => "28M", + 723 => "28M", 789 => "28M", + 960 => "28M" + }], + samoptflags_map => [ { + 12 => { "AS:i:66" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:0" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, + 78 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 249 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 315 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 486 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 552 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 723 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 789 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 960 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + } ] }, + + { name => "P.M.22.G. Paired -M 2 w/ 2 paired hit, 2 unpaired hits each, global", + # 0 1 2 3 0 1 2 0 1 2 3 0 1 2 + # 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567 + # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG + ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ], + # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 + # 0 1 2 3 + mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], + mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped + mate1fw => 1, mate2fw => 0, + args => "-X 150", + report => "-M 2", + pairhits => [ { "12,78" => 1, "249,315" => 1 } ], + cigar_map => [{ + 12 => "33M", 249 => "33M", + 78 => "28M", 315 => "28M" + }], + hits_are_superset => [ 1 ], + samoptflags_map => [{ + 12 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, + "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 78 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, + "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 249 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, + "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 315 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, + "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + }] + }, + + { name => "P.M.22.L. Paired -M 2 w/ 2 paired hit, 2 unpaired hits each, local", + # 0 1 2 3 0 1 2 0 1 2 3 0 1 2 + # 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567 + # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG + ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ], + # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 + # 0 1 2 3 + mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], + mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped + mate1fw => 1, mate2fw => 0, + args => "--local -X 150", + report => "-M 2", + pairhits => [ { "12,78" => 1, "249,315" => 1 } ], + cigar_map => [{ + 12 => "33M", 249 => "33M", + 78 => "28M", 315 => "28M" + }], + hits_are_superset => [ 1 ], + samoptflags_map => [{ + 12 => { "AS:i:66" => 1, "XS:i:66" => 1, "MD:Z:33" => 1, + "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, + 78 => { "AS:i:56" => 1, "XS:i:56" => 1, "MD:Z:28" => 1, + "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 249 => { "AS:i:66" => 1, "XS:i:66" => 1, "MD:Z:33" => 1, + "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, + 315 => { "AS:i:56" => 1, "XS:i:56" => 1, "MD:Z:28" => 1, + "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + }] + }, + + { name => "P.k.2.G. Paired -k 1 w/ 2 paired hit, 2 unpaired hits each, global", + # 0 1 2 3 0 1 2 0 1 2 3 0 1 2 + # 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567 + # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG + ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ], + # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 + # 0 1 2 3 + mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], + mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped + mate1fw => 1, mate2fw => 0, + args => "-X 150", + report => "-k 1", + pairhits => [ { "12,78" => 1, "249,315" => 1 } ], + hits_are_superset => [ 1 ], + cigar_map => [{ + 12 => "33M", 249 => "33M", + 78 => "28M", 315 => "28M" + }], + samoptflags_map => [{ + 12 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, + "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 78 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, + "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 249 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, + "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 315 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, + "YT:Z:CP" => 1, "YS:i:0" => 1 }, + }] + }, + + { name => "P.k.2.L. Paired -k 1 w/ 2 paired hit, 2 unpaired hits each, local", + # 0 1 2 3 0 1 2 0 1 2 3 0 1 2 + # 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567 + # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG + ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ], + # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 + # 0 1 2 3 + mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], + mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped + mate1fw => 1, mate2fw => 0, + args => "--local -X 150", + report => "-k 1", + pairhits => [ { "12,78" => 1, "249,315" => 1 } ], + hits_are_superset => [ 1 ], + cigar_map => [{ + 12 => "33M", 249 => "33M", + 78 => "28M", 315 => "28M" + }], + samoptflags_map => [{ + 12 => { "AS:i:66" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, + "YT:Z:CP" => 1, "YS:i:56" => 1 }, + 78 => { "AS:i:56" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, + "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 249 => { "AS:i:66" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, + "YT:Z:CP" => 1, "YS:i:56" => 1 }, + 315 => { "AS:i:56" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, + "YT:Z:CP" => 1, "YS:i:66" => 1 }, + }] + }, + + { name => "P.M.2.G. Paired -M 1 w/ 2 paired hit, 2 unpaired hits each, global", + # 0 1 2 3 0 1 2 0 1 2 3 0 1 2 + # 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567 + # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG + ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ], + # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 + # 0 1 2 3 + mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], + mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped + mate1fw => 1, mate2fw => 0, + report => "-M 1 -X 150", + pairhits => [ { "12,78" => 1, "249,315" => 1 } ], + hits_are_superset => [ 1 ], + cigar_map => [{ + 12 => "33M", 249 => "33M", + 78 => "28M", 315 => "28M" + }], + samoptflags_map => [{ + 12 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 78 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 249 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 315 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + }] + }, + + { name => "P.M.2.L. Paired -M 1 w/ 2 paired hit, 2 unpaired hits each, local", + # 0 1 2 3 0 1 2 0 1 2 3 0 1 2 + # 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567 + # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG + ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ], + # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 + # 0 1 2 3 + mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], + mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped + mate1fw => 1, mate2fw => 0, + report => "-M 1 --local -X 150", + pairhits => [ { "12,78" => 1, "249,315" => 1 } ], + hits_are_superset => [ 1 ], + cigar_map => [{ + 12 => "33M", 249 => "33M", + 78 => "28M", 315 => "28M" + }], + samoptflags_map => [{ + 12 => { "AS:i:66" => 1, "XS:i:66" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, + 78 => { "AS:i:56" => 1, "XS:i:56" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 249 => { "AS:i:66" => 1, "XS:i:66" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, + 315 => { "AS:i:56" => 1, "XS:i:56" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + }] + }, + + { name => "P.k.1.G. Paired -k w/ 1 paired hit, 1 unpaired hit each, global", + # 0 1 2 3 0 1 2 + # 012345678901234567890123456789012 0123456789012345678901234567 + # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG + ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ], + # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 + mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], + mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped + mate1fw => 1, mate2fw => 0, + report => "-k 1 -X 150", + pairhits => [ { "12,78" => 1 } ], + cigar_map => [{ + 12 => "33M", + 78 => "28M" + }], + samoptflags_map => [{ + 12 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, + "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 78 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, + "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + }] + }, + + { name => "P.k.1.L. Paired -k 1 w/ 1 paired hit, 1 unpaired hit each, local", + # 0 1 2 3 0 1 2 + # 012345678901234567890123456789012 0123456789012345678901234567 + # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG + ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ], + # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 + mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], + mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped + mate1fw => 1, mate2fw => 0, + args => "--local -X 150", + report => "-k 1", + pairhits => [ { "12,78" => 1 } ], + cigar_map => [{ + 12 => "33M", + 78 => "28M" + }], + samoptflags_map => [{ + 12 => { "AS:i:66" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, + "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, + 78 => { "AS:i:56" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, + "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + }] + }, + + { name => "P.M.1.G. Paired -M w/ 1 paired hit, 1 unpaired hit each, global", + # 0 1 2 3 0 1 2 + # 012345678901234567890123456789012 0123456789012345678901234567 + # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG + ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ], + # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 + mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], + mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped + mate1fw => 1, mate2fw => 0, + args => "-X 150", + report => "-M 1", + pairhits => [ { "12,78" => 1 } ], + cigar_map => [{ + 12 => "33M", + 78 => "28M" + }], + samoptflags_map => [{ + 12 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, + "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 78 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, + "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + }] + }, + + { name => "P.M.1.L. Paired -M w/ 1 paired hit, 1 unpaired hit each, local", + # 0 1 2 3 0 1 2 + # 012345678901234567890123456789012 0123456789012345678901234567 + # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG + ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ], + # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 + mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], + mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped + mate1fw => 1, mate2fw => 0, + args => "-X 150 --local", + report => "-M 1", + pairhits => [ { "12,78" => 1 } ], + cigar_map => [{ + 12 => "33M", + 78 => "28M" + }], + samoptflags_map => [{ + 12 => { "AS:i:66" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, + "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, + 78 => { "AS:i:56" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, + "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + }] + }, + + { name => "P.M.58.G. Unpaired -M 5 w/ 8 hits global", + # 0 1 2 3 0 1 2 0 1 2 3 0 1 2 + # 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567 + # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG + ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGA" ], + # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 + # 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 + mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], + mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped + args => "-X 150", + report => "-M 5", + pairhits => [ { "12,78" => 1, "249,315" => 1, "486,552" => 1, + "723,789" => 1, "960,1026" => 1, "1197,1263" => 1, + "1434,1500" => 1, "1671,1737" => 1, "1908,1974" => 1, + "2145,2211" => 1, "2382,2448" => 1 } ], + hits_are_superset => [ 1 ], + cigar_map => [{ + 12 => "33M", 78 => "28M", + 249 => "33M", 315 => "28M", + 486 => "33M", 552 => "28M", + 723 => "33M", 789 => "28M", + 960 => "33M", 1026 => "28M", + 1197 => "33M", 1263 => "28M", + 1434 => "33M", 1500 => "28M", + 1671 => "33M", 1737 => "28M", + 1908 => "33M", 1974 => "28M", + 2145 => "33M", 2211 => "28M", + 2382 => "33M", 2448 => "28M", + }], + samoptflags_map => [ { + 12 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 78 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 249 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 315 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 486 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 552 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 723 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 789 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 960 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 1026 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 1197 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 1263 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 1434 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 1500 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 1671 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 1737 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 1908 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 1974 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 2145 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 2211 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 2382 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + 2448 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, + } ] }, + + { name => "P.M.58.L. Unpaired -M 5 w/ 8 hits local", + # 0 1 2 3 0 1 2 0 1 2 3 0 1 2 + # 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567 + # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG + ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTC" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 + # 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 + mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], + mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped + args => "--local -X 150", + report => "-M 5", + pairhits => [ { "12,78" => 1, "249,315" => 1, "486,552" => 1, + "723,789" => 1, "960,1026" => 1, "1197,1263" => 1, + "1434,1500" => 1, "1671,1737" => 1 } ], + hits_are_superset => [ 1 ], + cigar_map => [{ + 12 => "33M", 78 => "28M", + 249 => "33M", 315 => "28M", + 486 => "33M", 552 => "28M", + 723 => "33M", 789 => "28M", + 960 => "33M", 1026 => "28M", + 1197 => "33M", 1263 => "28M", + 1434 => "33M", 1500 => "28M", + 1671 => "33M", 1737 => "28M" + }], + samoptflags_map => [ { + 12 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, + 78 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 249 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, + 315 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 486 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, + 552 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 723 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, + 789 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 960 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, + 1026 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 1197 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, + 1263 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 1434 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, + 1500 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + 1671 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, + 1737 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, + "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, + "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, + } ] }, + + # + # Unpaired + # + + { name => "U.M.1.G. Unpaired -M w/ 1 hit global", + # 0 1 2 3 + # 012345678901234567890123456789012 + # CAGCGTACGGTATCTAGCTATGGGCATCGATCG + # CAGCGTACGGTATCTAGCTATG + # GGTATCTAGCTATGGGCATCGA + # AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGA + ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGA" ], + # 01234567890123456789012345678901234567890123456789012345 + # 0 1 2 3 4 5 + reads => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], + report => "-M 1", + hits => [ { 12 => 1 } ], + cigar => [ "33M" ], + samoptflags => [{ + "AS:i:0" => 1, # alignment score + "XS:i:0" => 1, # suboptimal alignment score + "XN:i:0" => 1, # num ambiguous ref bases + "XM:i:0" => 1, # num mismatches + "XO:i:0" => 1, # num gap opens + "XG:i:0" => 1, # num gap extensions + "NM:i:0" => 1, # num edits + "MD:Z:33" => 1, # mismatching positions/bases + "YM:i:0" => 1, # read aligned repetitively in unpaired fashion + "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion + }] + }, + + { name => "U.M.1.L. Unpaired -M w/ 1 hit local", + # 0 1 2 3 + # 012345678901234567890123456789012 + # CAGCGTACGGTATCTAGCTATGGGCATCGATCG + ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGA" ], + # 01234567890123456789012345678901234567890123456789012345 + # 0 1 2 3 4 5 + reads => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], + args => "--local", + report => "-M 1", + hits => [ { 12 => 1 } ], + cigar => [ "33M" ], + samoptflags => [{ + "AS:i:66" => 1, # alignment score + "XS:i:0" => 1, # suboptimal alignment score + "XN:i:0" => 1, # num ambiguous ref bases + "XM:i:0" => 1, # num mismatches + "XO:i:0" => 1, # num gap opens + "XG:i:0" => 1, # num gap extensions + "NM:i:0" => 1, # num edits + "MD:Z:33" => 1, # mismatching positions/bases + "YM:i:0" => 1, # read aligned repetitively in unpaired fashion + "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion + }] + }, + + { name => "U.k.1.G. Unpaired -k 1 w/ 1 hit global", + # 0 1 2 3 + # 012345678901234567890123456789012 + # CAGCGTACGGTATCTAGCTATGGGCATCGATCG + ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGA" ], + # 01234567890123456789012345678901234567890123456789012345 + # 0 1 2 3 4 5 + reads => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], + report => "-k 1", + hits => [ { 12 => 1 } ], + cigar => [ "33M" ], + samoptflags => [ { + "AS:i:0" => 1, # alignment score + "XS:i:0" => 1, # suboptimal alignment score + "XN:i:0" => 1, # num ambiguous ref bases + "XM:i:0" => 1, # num mismatches + "XO:i:0" => 1, # num gap opens + "XG:i:0" => 1, # num gap extensions + "NM:i:0" => 1, # num edits + "MD:Z:33" => 1, # mismatching positions/bases + "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion + } ] }, + + { name => "U.M.1.L. Unpaired -m w/ 1 hit local", + # 0 1 2 3 + # 012345678901234567890123456789012 + # CAGCGTACGGTATCTAGCTATGGGCATCGATCG + ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGA" ], + # 01234567890123456789012345678901234567890123456789012345 + # 0 1 2 3 4 5 + reads => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], + args => "--local", + report => "-k 1", + hits => [ { 12 => 1 } ], + cigar => [ "33M" ], + samoptflags => [ { + "AS:i:66" => 1, # alignment score + "XS:i:0" => 1, # suboptimal alignment score + "XN:i:0" => 1, # num ambiguous ref bases + "XM:i:0" => 1, # num mismatches + "XO:i:0" => 1, # num gap opens + "XG:i:0" => 1, # num gap extensions + "NM:i:0" => 1, # num edits + "MD:Z:33" => 1, # mismatching positions/bases + "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion + } ] }, + + { name => "U.M.2.G. Unpaired -M 1 w/ 2 hit global", + # 0 1 2 0 1 2 + # 012345678901234567890123456789 012345678901234567890123456789 + # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA + ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234 + # 0 1 2 3 4 5 6 7 8 + reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], + args => "", + report => "-M 1", + hits => [ { 6 => 1, 48 => 1 } ], + hits_are_superset => [ 1 ], + cigar => [ "30M" ], + samoptflags => [ { + "AS:i:0" => 1, # alignment score + "XS:i:0" => 1, # suboptimal alignment score + "XN:i:0" => 1, # num ambiguous ref bases + "XM:i:0" => 1, # num mismatches + "XO:i:0" => 1, # num gap opens + "XG:i:0" => 1, # num gap extensions + "NM:i:0" => 1, # num edits + "MD:Z:30" => 1, # mismatching positions/bases + "YM:i:1" => 1, # read aligned repetitively in unpaired fashion + "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion + } ] }, + + { name => "U.M.2.L. Unpaired -M 1 w/ 2 hit local", + # 0 1 2 0 1 2 + # 012345678901234567890123456789 012345678901234567890123456789 + # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA + ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234 + # 0 1 2 3 4 5 6 7 8 + reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], + args => "--local", + report => "-M 1", + hits => [ { 6 => 1, 48 => 1 } ], + hits_are_superset => [ 1 ], + cigar => [ "30M" ], + samoptflags => [ { + "AS:i:60" => 1, # alignment score + "XS:i:60" => 1, # suboptimal alignment score + "XN:i:0" => 1, # num ambiguous ref bases + "XM:i:0" => 1, # num mismatches + "XO:i:0" => 1, # num gap opens + "XG:i:0" => 1, # num gap extensions + "NM:i:0" => 1, # num edits + "MD:Z:30" => 1, # mismatching positions/bases + "YM:i:1" => 1, # read aligned repetitively in unpaired fashion + "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion + } ] }, + + { name => "U.k.2.G. Unpaired -k 1 w/ 2 hit global", + # 0 1 2 0 1 2 + # 012345678901234567890123456789 012345678901234567890123456789 + # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA + ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234 + # 0 1 2 3 4 5 6 7 8 + reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], + report => "-k 1", + hits => [ { 6 => 1, 48 => 1 } ], + hits_are_superset => [ 1 ], + cigar => [ "30M" ], + samoptflags => [ { + "AS:i:0" => 1, # alignment score + "XS:i:0" => 1, # suboptimal alignment score + "XN:i:0" => 1, # num ambiguous ref bases + "XM:i:0" => 1, # num mismatches + "XO:i:0" => 1, # num gap opens + "XG:i:0" => 1, # num gap extensions + "NM:i:0" => 1, # num edits + "MD:Z:30" => 1, # mismatching positions/bases + "YM:i:0" => 1, # read aligned repetitively in unpaired fashion + "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion + } ] }, + + { name => "U.k.2.L. Unpaired -k 1 w/ 2 hit local", + # 0 1 2 0 1 2 + # 012345678901234567890123456789 012345678901234567890123456789 + # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA + ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234 + # 0 1 2 3 4 5 6 7 8 + reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], + report => "-k 1", + hits => [ { 6 => 1, 48 => 1 } ], + hits_are_superset => [ 1 ], + cigar => [ "30M" ], + samoptflags => [ { + "AS:i:0" => 1, # alignment score + "XS:i:0" => 1, # suboptimal alignment score + "XN:i:0" => 1, # num ambiguous ref bases + "XM:i:0" => 1, # num mismatches + "XO:i:0" => 1, # num gap opens + "XG:i:0" => 1, # num gap extensions + "NM:i:0" => 1, # num edits + "MD:Z:30" => 1, # mismatching positions/bases + "YM:i:0" => 1, # read aligned repetitively in unpaired fashion + "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion + } ] }, + + { name => "U.M.22.G. Unpaired -M 2 w/ 2 hit global", + # 0 1 2 0 1 2 + # 012345678901234567890123456789 012345678901234567890123456789 + # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA + ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234 + # 0 1 2 3 4 5 6 7 8 + reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], + report => "-M 2", + hits => [ { 6 => 1, 48 => 1 } ], + cigar => [ "30M" ], + hits_are_superset => [ 1 ], + samoptflags => [ { + "AS:i:0" => 1, # alignment score + "XS:i:0" => 1, # suboptimal alignment score + "XN:i:0" => 1, # num ambiguous ref bases + "XM:i:0" => 1, # num mismatches + "XO:i:0" => 1, # num gap opens + "XG:i:0" => 1, # num gap extensions + "NM:i:0" => 1, # num edits + "MD:Z:30" => 1, # mismatching positions/bases + "YM:i:0" => 1, # read aligned repetitively in unpaired fashion + "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion + } ] }, + + { name => "U.M.22.L. Unpaired -M 2 w/ 2 hit local", + # 0 1 2 0 1 2 + # 012345678901234567890123456789 012345678901234567890123456789 + # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA + ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234 + # 0 1 2 3 4 5 6 7 8 + reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], + report => "-M 2 --local", + hits => [ { 6 => 1, 48 => 1 } ], + cigar => [ "30M" ], + hits_are_superset => [ 1 ], + samoptflags => [ { + "AS:i:60" => 1, # alignment score + "XS:i:60" => 1, # suboptimal alignment score + "XN:i:0" => 1, # num ambiguous ref bases + "XM:i:0" => 1, # num mismatches + "XO:i:0" => 1, # num gap opens + "XG:i:0" => 1, # num gap extensions + "NM:i:0" => 1, # num edits + "MD:Z:30" => 1, # mismatching positions/bases + "YM:i:0" => 1, # read aligned repetitively in unpaired fashion + "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion + } ] }, + + { name => "U.k.22.G. Unpaired -k 2 w/ 2 hit global", + # 0 1 2 0 1 2 + # 012345678901234567890123456789 012345678901234567890123456789 + # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA + ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234 + # 0 1 2 3 4 5 6 7 8 + reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], + report => "-k 2", + hits => [ { 6 => 1, 48 => 1 } ], + cigar => [ "30M" ], + samoptflags => [ { + "AS:i:0" => 1, # alignment score + "XS:i:0" => 1, # suboptimal alignment score + "XN:i:0" => 1, # num ambiguous ref bases + "XM:i:0" => 1, # num mismatches + "XO:i:0" => 1, # num gap opens + "XG:i:0" => 1, # num gap extensions + "NM:i:0" => 1, # num edits + "MD:Z:30" => 1, # mismatching positions/bases + "YM:i:0" => 1, # read aligned repetitively in unpaired fashion + "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion + } ] }, + + { name => "U.k.22.L. Unpaired -k 2 w/ 2 hit local", + # 0 1 2 0 1 2 + # 012345678901234567890123456789 012345678901234567890123456789 + # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA + ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234 + # 0 1 2 3 4 5 6 7 8 + reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], + args => "--local", + report => "-k 2", + hits => [ { 6 => 1, 48 => 1 } ], + cigar => [ "30M" ], + samoptflags => [ { + "AS:i:60" => 1, # alignment score + "XS:i:60" => 1, # suboptimal alignment score + "XN:i:0" => 1, # num ambiguous ref bases + "XM:i:0" => 1, # num mismatches + "XO:i:0" => 1, # num gap opens + "XG:i:0" => 1, # num gap extensions + "NM:i:0" => 1, # num edits + "MD:Z:30" => 1, # mismatching positions/bases + "YM:i:0" => 1, # read aligned repetitively in unpaired fashion + "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion + } ] }, + + { name => "U.M.58.G. Unpaired -M 5 w/ 8 hits global", + # 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 + # 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 + # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA + ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 + # 0 1 2 3 + reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], + args => "-X 150", + report => "-M 5", + hits => [ { 6 => 1, 48 => 1, 91 => 1, 133 => 1, 176 => 1, 218 => 1, 261 => 1, 303 => 1 } ], + hits_are_superset => [ 1 ], + cigar => [ "30M" ], + samoptflags => [ { + "AS:i:0" => 1, # alignment score + "XS:i:0" => 1, # suboptimal alignment score + "XN:i:0" => 1, # num ambiguous ref bases + "XM:i:0" => 1, # num mismatches + "XO:i:0" => 1, # num gap opens + "XG:i:0" => 1, # num gap extensions + "NM:i:0" => 1, # num edits + "MD:Z:30" => 1, # mismatching positions/bases + "YM:i:1" => 1, # read aligned repetitively in unpaired fashion + "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion + } ] }, + + { name => "U.M.58.L. Unpaired -M 5 w/ 8 hits global", + # 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 + # 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 + # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA + ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 + # 0 1 2 3 + reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], + args => "--local", + report => "-M 5", + hits => [ { 6 => 1, 48 => 1, 91 => 1, 133 => 1, 176 => 1, 218 => 1, 261 => 1, 303 => 1 } ], + hits_are_superset => [ 1 ], + cigar => [ "30M" ], + samoptflags => [ { + "AS:i:60" => 1, # alignment score + "XS:i:60" => 1, # suboptimal alignment score + "XN:i:0" => 1, # num ambiguous ref bases + "XM:i:0" => 1, # num mismatches + "XO:i:0" => 1, # num gap opens + "XG:i:0" => 1, # num gap extensions + "NM:i:0" => 1, # num edits + "MD:Z:30" => 1, # mismatching positions/bases + "YM:i:1" => 1, # read aligned repetitively in unpaired fashion + "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion + } ] }, + + { name => "U.k.58.G. Unpaired -k 5 w/ 8 hits global", + # 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 + # 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 + # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA + ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 + # 0 1 2 3 + reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], + report => "-k 5", + hits => [ { 6 => 1, 48 => 1, 91 => 1, 133 => 1, 176 => 1, 218 => 1, 261 => 1, 303 => 1 } ], + hits_are_superset => [ 1 ], + cigar => [ "30M" ], + samoptflags => [ { + "AS:i:0" => 1, # alignment score + "XS:i:0" => 1, # suboptimal alignment score + "XN:i:0" => 1, # num ambiguous ref bases + "XM:i:0" => 1, # num mismatches + "XO:i:0" => 1, # num gap opens + "XG:i:0" => 1, # num gap extensions + "NM:i:0" => 1, # num edits + "MD:Z:30" => 1, # mismatching positions/bases + "YM:i:0" => 1, # read aligned repetitively in unpaired fashion + "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion + } ] }, + + { name => "U.k.58.L. Unpaired -k 5 w/ 8 hits local", + # 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 + # 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 + # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA + ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], + # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 + # 0 1 2 3 + reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], + args => "--local", + report => "-k 5", + hits => [ { 6 => 1, 48 => 1, 91 => 1, 133 => 1, 176 => 1, 218 => 1, 261 => 1, 303 => 1 } ], + hits_are_superset => [ 1 ], + cigar => [ "30M" ], + samoptflags => [ { + "AS:i:60" => 1, # alignment score + "XS:i:60" => 1, # suboptimal alignment score + "XN:i:0" => 1, # num ambiguous ref bases + "XM:i:0" => 1, # num mismatches + "XO:i:0" => 1, # num gap opens + "XG:i:0" => 1, # num gap extensions + "NM:i:0" => 1, # num edits + "MD:Z:30" => 1, # mismatching positions/bases + "YM:i:0" => 1, # read aligned repetitively in unpaired fashion + "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion + } ] }, + + # Following cases depend on this being the case: + # + # static const float DEFAULT_CEIL_CONST = 3.0f; + # static const float DEFAULT_CEIL_LINEAR = 3.0f; + + # Just enough budget for hits, so it should align + { ref => [ "TTGTTCGTTTGTTCGT" ], + reads => [ "TTGTTCAT" ], # budget = 3 + 8 * 3 = 27 + args => "-L 6 -i C,1,0 --policy \"MMP=C27\\;MIN=L,-3,-3\\;RDG=25,15\\;RFG=25,15\"", # penalty = 27 + report => "-a", + hits => [ { 0 => 1, 8 => 1 } ], + flags => [ "XM:0,XP:0,XT:UU,XC:6=1X1=" ], + cigar => [ "8M" ], + samoptflags => [ { "AS:i:-27" => 1, "XS:i:-27" => 1, "NM:i:1" => 1, + "XM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:6G1" => 1 } ] }, + + # Not quite enough budget for hits, so it should NOT align + { ref => [ "TTGTTCGTTTGTTCGT" ], + reads => [ "TTGTTCAT" ], # budget = 3 + 8 * 3 = 27 + args => "-L 6 -i C,1,0 --policy \"MMP=C28\\;MIN=L,-3,-3\\;RDG=25,15\\;RFG=25,15\"", # penalty = 28 + report => "-a", + hits => [ { "*" => 1 } ], + flags => [ "XM:0,XP:0,XT:UU" ], + cigar => [ "*" ], + samoptflags => [ { "YT:Z:UU" => 1 } ] }, + + # Check that using a seed of length 1 with 1-mismatch doesn't crash. + # Perhaps we should disallow it though? + + { ref => [ "AAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCCCCCCCCC" ], + reads => [ "AA", "AA", "AA", "AA", "CC", "CC", "CC", "CC", "AA", "AA", "AA", "AA", "CC", "CC", "CC", "CC" ], + names => [ "r1", "r1", "r1", "r1", "r2", "r2", "r2", "r2", "r3", "r3", "r3", "r3", "r4", "r4", "r4", "r4" ], + args => "--policy \"SEED=1,1\"", + check_random => 1, + report => "-k 1" }, + + # + # Gap penalties + # + + # Alignment with 1 read gap + { name => "Gap penalties 1", + ref => [ "TTGTTCGTTTGTTCGT" ], + reads => [ "TTGTTCTTTGTT" ], # budget = 3 + 12 * 3 = 39 + args => "--policy \"MMP=C30\\;SEED=0,3\\;IVAL=C,1,0\\;RDG=29,10\\;RFG=25,15\\;MIN=L,-3,-3\"", + report => "-a", + hits => [ { 0 => 1 } ], + flags => [ "XM:0,XP:0,XT:UU,XC:6=1D6=" ], + cigar => [ "6M1D6M" ], + samoptflags => [{ + "AS:i:-39" => 1, "NM:i:1" => 1, "XO:i:1" => 1, "XG:i:1" => 1, + "YT:Z:UU" => 1, "MD:Z:6^G6" => 1 }] + }, + + # Alignment with 1 read gap, but not enough budget + { name => "Gap penalties 2", + ref => [ "TTGTTCGTTTGTTCGT" ], + reads => [ "TTGTTCTTTGTT" ], # budget = 3 + 12 * 3 = 39 + args => "--policy \"MMP=C30\\;SEED=0,3\\;IVAL=C,1,0\\;RDG=30,10\\;RFG=25,15\\;MIN=L,-3,-3\"", + report => "-a", + hits => [ { "*" => 1 } ], + flags => [ "XM:0,XP:0,XT:UU" ], + cigar => [ "*" ], + samoptflags => [{ "YT:Z:UU" => 1 }] + }, + + # Alignment with 1 reference gap + { name => "Gap penalties 3", + ref => [ "TTGTTCGTTTGTTCGT" ], + reads => [ "TTGTTCGATTTGTT" ], # budget = 3 + 14 * 3 = 45 + args => "--policy \"MMP=C30\\;SEED=0,3\\;IVAL=C,1,0\\;RDG=25,15\\;RFG=30,15\\;MIN=L,-3,-3\"", + report => "-a", + hits => [ { 0 => 1 } ], + flags => [ "XM:0,XP:0,XT:UU,XC:7=1I6=" ], + cigar => [ "7M1I6M" ], + samoptflags => [{ "AS:i:-45" => 1, "NM:i:1" => 1, "XO:i:1" => 1, + "XG:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:13" => 1 }] + }, + + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + # T T G T T C G T T T G T T C G T + # 0 1 1 0 2 3 1 0 0 1 1 0 2 3 1 + # 0 T x + # 0 0 x + # 1 T x + # 1 1 x + # 2 G x + # 2 1 x + # 3 T x + # 3 0 x + # 4 T x + # 4 2 x + # 5 C x + # 5 3 x + # 6 G + # 6 2 x + # 7 A + # 7 3 x + # 8 T x + # 8 0 x + # 9 T x + # 9 0 x + # 0 T x + # 0 1 x + # 1 G x + # 1 1 x + # 2 T x + # 2 0 x + # 3 T + # + + # Alignment with 1 reference gap, but not enough budget + { name => "Gap penalties 4", + ref => [ "TTGTTCGTTTGTTCGT" ], + reads => [ "TTGTTCGATTTGTT" ], # budget = 3 + 14 * 3 = 45 + args => "--policy \"MMP=C30\\;SEED=0,3\\;IVAL=C,1,0\\;RDG=25,15\\;RFG=30,16\\;MIN=L,-3,-3\"", + report => "-a", + hits => [ { "*" => 1 } ], + flags => [ "XM:0,XP:0,XT:UU" ], + cigar => [ "*" ], + samoptflags => [ { "YT:Z:UU" => 1 } ] }, + + # Alignment with 1 reference gap, but not enough budget + { name => "Gap penalties 5", + ref => [ "TTGTTCGTTTGTTCGT" ], + reads => [ "TTGTTCGATTTGTT" ], # budget = 3 + 14 * 3 = 45 + args => "--policy \"MMP=C30\\;SEED=0,3\\;IVAL=C,1,0\\;RDG=25,15\\;RFG=31,15\\;MIN=L,-3,-3\"", + report => "-a", + hits => [ { "*" => 1 } ], + flags => [ "XM:0,XP:0,XT:UU" ], + cigar => [ "*" ], + samoptflags => [ { "YT:Z:UU" => 1 } ] }, + + # Alignment with 1 reference gap and 1 read gap + { name => "Gap penalties 6", + ref => [ "ATTGTTCGTTTGTTCGTA" ], + reads => [ "ATTGTTGTTTGATTCGTA" ], # budget = 3 + 18 * 3 = 57 + args => "--policy \"MMP=C30\\;SEED=0,3\\;IVAL=C,1,0\\;RDG=19,10\\;RFG=18,10\\;MIN=L,-3,-3\"", + report => "-a", + hits => [ { 0 => 1 } ], + flags => [ "XM:0,XP:0,XT:UU,XC:6=1D5=1I6=" ], + cigar => [ "6M1D5M1I6M" ] }, + + # Alignment with 1 reference gap and 1 read gap, but not enough budget + { name => "Gap penalties 7", + ref => [ "TTGTTCGTTTGTTCGT" ], + reads => [ "TTGTTGTTTGATTCGT" ], # budget = 3 + 16 * 3 = 51 + args => "--policy \"MMP=C30\\;SEED=0,3\\;IVAL=C,1,0\\;RDG=16,10\\;RFG=16,10\\;MIN=L,-3,-3\"", + report => "-a", + hits => [ { "*" => 1 } ], + flags => [ "XM:0,XP:0,XT:UU" ], + cigar => [ "*" ] }, + + # Experiment with N filtering + + { name => "N filtering 1", + ref => [ "GAGACTTTATACGCATCGAACTATCGCTCTA" ], + reads => [ "ATACGCATCGAAC" ], + # 0123456789012345678901234567890 + # 1 2 3 + args => "--policy \"NCEIL=L,0,0\"", + report => "-a", + hits => [ { 8 => 1 } ], + flags => [ "XM:0,XP:0,XT:UU,XC:13=" ] }, + + { name => "N filtering 2", + ref => [ "GAGACTTTATNCGCATCGAACTATCGCTCTA" ], + reads => [ "ATACGCATCGAAC" ], + # 0123456789012345678901234567890 + # 1 2 3 + args => "--policy \"NCEIL=L,0,0\"", + report => "-a", + hits => [ { "*" => 1 } ] }, + + { name => "N filtering 3", + ref => [ "GAGACTTTATACGCATCGAANTATCGCTCTA" ], + reads => [ "ATACGCATCGAAC" ], + # 0123456789012345678901234567890 + # 1 2 3 + args => "--policy \"NCEIL=L,0,0\"", + report => "-a", + hits => [ { "*" => 1 } ] }, + + { name => "N filtering 4", + ref => [ "GAGACTTTNTACGCATCGAACTATCGCTCTA" ], + reads => [ "ATACGCATCGAAC" ], + # 0123456789012345678901234567890 + # 1 2 3 + args => "--policy \"NCEIL=L,0,0\"", + report => "-a", + hits => [ { "*" => 1 } ] }, + + { name => "N filtering 5", + ref => [ "GAGACTTTATNCGCATCGAACTATCGCTCTA" ], + reads => [ "ATACGCATCGAAC" ], + # 0123456789012345678901234567890 + # 1 2 3 + args => "--policy \"NCEIL=L,0,0.1\\;SEED=0,10\\;IVAL=C,1,0\"", + report => "-a", + hits => [ { 8 => 1 } ], + flags => [ "XM:0,XP:0,XT:UU,XC:2=1X10=" ] }, + + { name => "N filtering 6", + ref => [ "GAGACTTTNTACGCATCGAANTATCGCTCTA" ], + reads => [ "ATACGCATCGAAC" ], + # 0123456789012345678901234567890 + # 1 2 3 + args => "--policy \"NCEIL=L,0,0.1\\;SEED=0,10\\;IVAL=C,1,0\"", + report => "-a", + hits => [ { "*" => 1 } ] }, + + # No discordant alignment because one mate is repetitive. + + # Alignment with 1 reference gap + { ref => [ "TTTTGTTCGTTTG" ], + reads => [ "TTTTGTTCGATTTG" ], # budget = 3 + 14 * 3 = 45 + args => "--policy \"SEED=0,8\\;IVAL=C,1,0\\;MMP=C30\\;RDG=25,15\\;RFG=25,20\\;MIN=L,-3,-3\"", + report => "-a", + hits => [ { 0 => 1 } ], + flags => [ "XM:0,XP:0,XT:UU,XC:9=1I4=" ], + cigar => [ "9M1I4M" ], + samoptflags => [ + { "AS:i:-45" => 1, "NM:i:1" => 1, "XO:i:1" => 1, "XG:i:1" => 1, + "YT:Z:UU" => 1, "MD:Z:13" => 1 }, + ] + }, + + # TTGTTCGTTTGTT + # Tx + # T x + # G x + # T x + # T x + # C x + # G x + # A x + # T x + # T x + # T x + # G x + # T x + # T x + + # Alignment with 1 reference gap + { ref => [ "TTGTTCGTTTGTT" ], + reads => [ "TTGTTCGATTTGTT" ], # budget = 3 + 14 * 3 = 45 + args => "--policy \"SEED=0,3\\;IVAL=C,1,0\\;MMP=C30\\;RDG=25,15\\;RFG=25,20\\;MIN=L,-3,-3\"", + report => "-a", + hits => [ { 0 => 1 } ], + flags => [ "XM:0,XP:0,XT:UU,XC:7=1I6=" ], + cigar => [ "7M1I6M" ], + samoptflags => [ + { "AS:i:-45" => 1, "NM:i:1" => 1, "XO:i:1" => 1, "XG:i:1" => 1, + "YT:Z:UU" => 1, "MD:Z:13" => 1 }, + ] + }, + + { ref => [ "ACNCA" ], + reads => [ "CA" ], + args => "", + report => "-a --policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,0,0\"", + hits => [ { 3 => 1 } ], + edits => [ ], + flags => [ "XM:0,XP:0,XT:UU,XC:2=" ], + cigar => [ "2M" ], + samoptflags => [ + { "YT:Z:UU" => 1, "MD:Z:2" => 1 }, + ] + }, + + { name => "N ceil = 0, 2 legit hits (1)", + ref => [ "ACNCA" ], + reads => [ "AC" ], + args => "", + report => "-a --policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,0,0\"", + hits => [ { 0 => 1 } ], + edits => [ ], + flags => [ ] }, + + { name => "N ceil = 0, 2 legit hits (2)", + ref => [ "ACNCANNNNNNNNCGNNNNNNNNCG" ], + # 0123456789012345678901234 + # 0 1 2 + reads => [ "CG" ], + args => "", + report => "-a --policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,0,0\"", + hits => [ { 13 => 2, 23 => 2 } ], + edits => [ ], + cigar => [ "2M", "2M" ], + samoptflags => [ + { "YT:Z:UU" => 1, "MD:Z:2" => 1 }, + { "YT:Z:UU" => 1, "MD:Z:2" => 1 }, + ] + }, + + { ref => [ "ACNCANNNNNNAACGNNNNNNNACGAANNNNCGAAAN" ], + # 0123456789012345678901234567890123456 + # 0 1 2 3 + reads => [ "CG" ], + args => "", + report => "-a --policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,0,0\"", + hits => [ { 13 => 2, 23 => 2, 31 => 2 } ], + edits => [ ], + flags => [ "XM:0,XP:0,XT:UU,XC:2=", + "XM:0,XP:0,XT:UU,XC:2=", + "XM:0,XP:0,XT:UU,XC:2=" ], + cigar => [ "2M", "2M", "2M" ], + samoptflags => [ + { "YT:Z:UU" => 1, "MD:Z:2" => 1 }, + { "YT:Z:UU" => 1, "MD:Z:2" => 1 }, + { "YT:Z:UU" => 1, "MD:Z:2" => 1 }, + ] + }, + + { ref => [ "ACNCANNNNNNAACGNNNNNNNACGAANNNNCGAAAN" ], + # 0123456789012345678901234567890123456 + # 0 1 2 3 + reads => [ "CG" ], + args => "", + report => "-a --policy \"SEED=0,1\\;IVAL=C,1,0\\;NCEIL=L,0,0\"", + hits => [ { 13 => 2, 23 => 2, 31 => 2 } ], + edits => [ ], + flags => [ "XM:0,XP:0,XT:UU,XC:2=", + "XM:0,XP:0,XT:UU,XC:2=", + "XM:0,XP:0,XT:UU,XC:2=" ], + cigar => [ "2M", "2M", "2M" ], + samoptflags => [ + { "YT:Z:UU" => 1, "MD:Z:2" => 1 }, + { "YT:Z:UU" => 1, "MD:Z:2" => 1 }, + { "YT:Z:UU" => 1, "MD:Z:2" => 1 }, + ] + }, + + # + # Alignment involving ambiguous reference character + # + + # First read has non-compatible unambiguous charcacter (G for Y), + # second read has compatible one + { ref => [ "TTGTTYGT" ], + reads => [ "TTGTTGGT", "TTGTTCGT" ], + args => "", + report => "-a --policy \"SEED=0,5\\;IVAL=C,1,0\\;NCEIL=L,2,0\"", + hits => [ { 0 => 1 }, { 0 => 1 } ], + norc => 1, + edits => [ "5:N>G", "5:N>C" ], + flags => [ "XM:0,XP:0,XT:UU,XC:5=1X2=", "XM:0,XP:0,XT:UU,XC:5=1X2=" ], + cigar => [ "8M", "8M" ], + samoptflags => [ + { "AS:i:-1" => 1, "NM:i:1" => 1, "XM:i:1" => 1, "XN:i:1" => 1, + "YT:Z:UU" => 1, "MD:Z:5N2" => 1 }, + { "AS:i:-1" => 1, "NM:i:1" => 1, "XM:i:1" => 1, "XN:i:1" => 1, + "YT:Z:UU" => 1, "MD:Z:5N2" => 1 }, + ] + }, + + # + # Alignment with multi-character read gap + # + + # Relatively small example with a read gap extend + { ref => [ "ATAACCTTCG" ], + reads => [ "ATAATTCG" ], # 3 * 19 + 3 = 60 + # ^ + # 4:CC>- + args => "", + report => "-a --overhang --gbar 3 --policy \"MMP=C30\\;RDG=5,5\\;SEED=0,4\\;IVAL=C,1,0\\;RFG=25,20\\;MIN=L,-3,-3\"", + hits => [ { 0 => 1 } ], + edits => [ "4:CC>-" ], + flags => [ "XM:0,XP:0,XT:UU,XC:4=2D4=" ], + cigar => [ "4M2D4M" ], + samoptflags => [ + { "AS:i:-15" => 1, "NM:i:2" => 1, + "XO:i:1" => 1, "XG:i:2" => 3, "YT:Z:UU" => 1, "MD:Z:4^CC4" => 1 } + ] + }, + + # Reads 1 and 2 don't have overhang, reads 3 and 4 overhang opposite ends + { ref => [ "ATATGCCCCATGCCCCCCTCCG" ], + reads => [ "ATATGCCCCCCCCCCTCCG" ], # 3 * 19 + 3 = 60 + # ^ + # 9:ATG>- + args => "--policy \"SEED=0,8\\;IVAL=C,1,0\\;MMP=C30\\;RDG=5,5\\;RFG=25,15\\;MIN=L,-3,-3\"", + hits => [ { 0 => 1 } ], + edits => [ "9:ATG>-" ], + norc => 1, + flags => [ "XM:0,XP:0,XT:UU,XC:9=3D10=" ], + cigar => [ "9M3D10M" ], + samoptflags => [ + { "AS:i:-20" => 1, "NM:i:3" => 1, + "XO:i:1" => 1, "XG:i:3" => 3, "YT:Z:UU" => 1, "MD:Z:9^ATG10" => 1 } + ] + }, + + # Reads 1 and 2 don't have overhang, reads 3 and 4 overhang opposite ends + { ref => [ "ATATGCCCCATGCCCCCCTCCG" ], + reads => [ "CGGAGGGGGGGGGGCATAT" ], + # ATATGCCCCCCCCCCTCCG + # ^ + # 10:GTA>- + args => "", + report => "-a --overhang --policy \"SEED=0,8\\;IVAL=C,1,0\\;MMP=C30\\;RDG=5,5\\;RFG=25,20\\;MIN=L,-3,-3\"", + hits => [ { 0 => 1 } ], + edits => [ "10:GTA>-" ], + norc => 1, + flags => [ "XM:0,XP:0,XT:UU,XC:9=3D10=" ], + cigar => [ "9M3D10M" ], + samoptflags => [ + { "AS:i:-20" => 1, "NM:i:3" => 1, + "XO:i:1" => 1, "XG:i:3" => 3, "YT:Z:UU" => 1, "MD:Z:9^ATG10" => 1 } + ] + }, + + # 1 discordant alignment and one concordant alignment. Discordant because + # the fragment is too long. + + { name => "Simple paired-end 13", + ref => [ "TTTATAAAAATATTTCCCCCCCCCCCCCCTGTCGCTACCGCCCCCCCCCCC" ], + # 012345678901234567890123456789012345678901234567890 + # 0 1 2 3 4 5 + # ATAAAAATAT GTCGCTACCG + # ATAAAAATAT TGTCGCTACC + # ATAAAAATAT CTGTCGCTAC + # ATAAAAATAT CCTGTCGCTA + # TAAAAATATT GTCGCTACCG + # TAAAAATATT TGTCGCTACC + # TAAAAATATT CTGTCGCTAC + # TAAAAATATT CCTGTCGCTA + # 012345678901234567890123456789012345678901234567890 + # 0 1 2 3 4 5 + # ----------------------------------- + # 012345678901234567890123456789012345678901234567 + # 0 1 2 3 4 + mate1s => [ "ATAAAAATAT", "ATAAAAATAT", "ATAAAAATAT", "ATAAAAATAT", + "TAAAAATATT", "TAAAAATATT", "TAAAAATATT", "TAAAAATATT", ], + mate2s => [ "GTCGCTACCG", "TGTCGCTACC", "CTGTCGCTAC", "CCTGTCGCTA", + "GTCGCTACCG", "TGTCGCTACC", "CTGTCGCTAC", "CCTGTCGCTA" ], + mate1fw => 1, mate2fw => 1, + args => "-I 0 -X 35", + # Not really any way to flag an alignment as discordant + pairhits => [ { "3,30" => 1 }, { "3,29" => 1 }, { "3,28" => 1 }, { "3,27" => 1 }, + { "4,30" => 1 }, { "4,29" => 1 }, { "4,28" => 1 }, { "4,27" => 1 } ], + flags => [ "XM:0,XP:0,XT:DP,XC:10=", "XM:0,XP:0,XT:DP,XC:10=", + "XM:0,XP:0,XT:CP,XC:10=", "XM:0,XP:0,XT:CP,XC:10=", + "XM:0,XP:0,XT:DP,XC:10=", "XM:0,XP:0,XT:CP,XC:10=", + "XM:0,XP:0,XT:CP,XC:10=", "XM:0,XP:0,XT:CP,XC:10=" ] }, + + # 1 discordant alignment and one concordant alignment. Discordant because + # the fragment is too long. + + { name => "Simple paired-end 12", + ref => [ "TTTATAAAAATATTTCCCCCCCCCCCCCCGGGCCCGCCCGCCCCCCCCCCC" ], + # ATAAAAATAT GGCCCGCCCG + # ATAAAAATAT CCGGGCCCGC + # 012345678901234567890123456789012345678901234567890 + # 0 1 2 3 4 5 + # ------------------------------------- + # 012345678901234567890123456789012345678901234567 + mate1s => [ "ATAAAAATAT", "ATAAAAATAT" ], + mate2s => [ "GGCCCGCCCG", "CCGGGCCCGC" ], + mate1fw => 1, mate2fw => 1, + args => "-I 0 -X 36", + # Not really any way to flag an alignment as discordant + pairhits => [ { "3,30" => 1 }, { "3,27" => 1 } ], + flags => [ "XM:0,XP:0,XT:DP,XC:10=", "XM:0,XP:0,XT:CP,XC:10=" ] }, + + # 1 discordant alignment. Discordant because the fragment is too long. + + { name => "Simple paired-end 11", + ref => [ "TTTATAAAAATATTTCCCCCCCCCCCCCCCCGATCGCCCGCCCCCCCCCCC" ], + # ATAAAAATAT CGATCGCCCG + # 012345678901234567890123456789012345678901234567890 + # 0 1 2 3 4 5 + # ------------------------------------- + # 012345678901234567890123456789012345678901234567 + mate1s => [ "ATAAAAATAT" ], + mate2s => [ "CGATCGCCCG" ], + mate1fw => 1, mate2fw => 1, + args => "-I 0 -X 36", + # Not really any way to flag an alignment as discordant + pairhits => [ { "3,30" => 1 } ], + flags => [ "XM:0,XP:0,XT:DP,XC:10=" ] }, + + # 1 discordant alignment. Discordant because the fragment is too short. + + { name => "Simple paired-end 10", + ref => [ "TTTATAAAAATATTTCCCCCCGATCGCCCGCCCCCCCCCCC" ], + # ATAAAAATAT CGATCGCCCG + # 01234567890123456789012345678901234567890 + # 0 1 2 3 4 + # --------------------------- + # 012345678901234567890123456 + mate1s => [ "ATAAAAATAT" ], + mate2s => [ "CGATCGCCCG" ], + mate1fw => 1, mate2fw => 1, + args => "-I 28 -X 80", + # Not really any way to flag an alignment as discordant + pairhits => [ { "3,20" => 1 } ], + flags => [ "XM:0,XP:0,XT:DP,XC:10=" ] }, + + # Like 6, but with -M limit + + { name => "Simple paired-end 9", + ref => [ "CCCATATATATATCCTCCCATATATATATCCCTCCCCATATATATATCCCTTTTCCTTTCGCGCGCGCGTTTCCCCCCCCC" ], + # ATATATATAT ATATATATAT ATATATATAT CGCGCGCGCG + # 012345678901234567890123456789012345678901234567890123456789012345678901234567890 + # 0 1 2 3 4 5 6 7 8 + mate1s => [ "ATATATATAT" ], + mate2s => [ "CGCGCGCGCG" ], + mate1fw => 1, mate2fw => 0, + args => "-I 0 -X 80", + report => "-M 2", + lines => 2, + pairhits => [ { "3,59" => 1, "19,59" => 1, "37,59" => 1 } ], + hits_are_superset => [ 1 ], + flags => [ "XM:1,XP:1,XT:CP,XC:10=", "XM:1,XP:1,XT:CP,XC:10=" ] }, + + # Like 6, but without -m limit + + { name => "Simple paired-end 8", + ref => [ "CCCATATATATATCCTCCCATATATATATCCCTTCCCATATATATATCCCTTTTTTTTTCGCGCGCGCGTTTCCCCCCCCC" ], + # ATATATATAT ATATATATAT ATATATATAT CGCGCGCGCG + # 012345678901234567890123456789012345678901234567890123456789012345678901234567890 + # 0 1 2 3 4 5 6 7 8 + mate1s => [ "ATATATATAT" ], + mate2s => [ "CGCGCGCGCG" ], + mate1fw => 1, mate2fw => 0, + args => "-I 0 -X 80", + pairhits => [ { "3,59" => 1, "19,59" => 1, "37,59" => 1 } ], + flags => [ "XM:0,XP:0,XT:CP,XC:10=" ] }, + + # Paired-end read, but only one mate aligns + + { name => "Simple paired-end 2; no --no-mixed", + ref => [ "CCCATATATATATCCCTTTTTTTCCCCCCCCCCTTCGCGCGCGCGTTTCCCCC" ], + # ATATATATAT CGCGCGCGCG + # 01234567890123456789012345678901234567890123456789012 + # 0 1 2 3 4 5 + mate1s => [ "ATATATATAT" ], + mate2s => [ "CCCCCGGGGG" ], + mate1fw => 1, mate2fw => 1, + args => "-I 0 -X 50 --nofw", + nofw => 1, + pairhits => [ { "*,3" => 1 } ], + flags_map => [ { 3 => "XM:0,XP:0,XT:UP,XC:10=", + "*" => "XM:0,XP:0,XT:UP" } ], + cigar_map => [{ + 3 => "10M", + "*" => "*" + }], + samoptflags_map => [{ + 3 => { + "MD:Z:10" => 1, # mismatching positions/bases + "YT:Z:UP" => 1, # type of alignment (concordant/discordant/etc) + }, + "*" => { + "YT:Z:UP" => 1, # type of alignment (concordant/discordant/etc) + } + }] + }, + + { name => "Simple paired-end 2; --no-mixed", + ref => [ "CCCATATATATATCCCTTTTTTTCCCCCCCCTTTTCGCGCGCGCGTTTCCCCC" ], + # ATATATATAT CGCGCGCGCG + # 01234567890123456789012345678901234567890123456789012 + # 0 1 2 3 4 5 + mate1s => [ "ATATATATAT" ], + mate2s => [ "CCCCCGGGGG" ], + mate1fw => 1, mate2fw => 1, + args => "-I 0 -X 50 --no-mixed", + pairhits => [ { "*,*" => 1 } ] }, + + # Simple paired-end alignment + + { name => "Simple paired-end 1", + ref => [ "CCCATATATATATCCCTTTTTTTCCCCCCCCTTTTCGCGCGCGCGTTTTCCCC" ], + # ATATATATAT CGCGCGCGCG + # 01234567890123456789012345678901234567890123456789012 + # 0 1 2 3 4 5 + mate1s => [ "ATATATATAT" ], + mate2s => [ "CGCGCGCGCG" ], + mate1fw => 1, mate2fw => 1, + args => "-I 0 -X 50", + pairhits => [ { "3,35" => 1 } ], + flags => [ "XM:0,XP:0,XT:CP,XC:10=" ], + cigar_map => [{ + 3 => "10M", + 35 => "10M" + }], + samoptflags_map => [{ + 3 => { + "MD:Z:10" => 1, # mismatching positions/bases + "YT:Z:CP" => 1, # type of alignment (concordant/discordant/etc) + }, + 35 => { + "MD:Z:10" => 1, # mismatching positions/bases + "YT:Z:CP" => 1, # type of alignment (concordant/discordant/etc) + } + }] + }, + + # Check that pseudo-random generation is always the same for + # same-sequence, same-name reads + + { ref => [ "AAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCCCCCCCCC" ], + reads => [ "AA", "AA", "AA", "AA", "CC", "CC", "CC", "CC", "AA", "AA", "AA", "AA", "CC", "CC", "CC", "CC" ], + names => [ "r1", "r1", "r1", "r1", "r2", "r2", "r2", "r2", "r3", "r3", "r3", "r3", "r4", "r4", "r4", "r4" ], + args => "", + check_random => 1, + report => "-k 1" }, + + { ref => [ "TTGTTCGTTTGTTCGT" ], + reads => [ "TTGTTCGT" ], + report => "-M 1", + hits => [ { 0 => 1, 8 => 1 } ], + flags => [ "XM:1,XP:0,XT:UU,XC:8=" ], + hits_are_superset => [ 1 ], + cigar => [ "8M" ], + samoptflags => [ + { "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:8" => 1, "YM:i:1" => 1 } + ], + }, + + # Read 3 overhangs right end + { ref => [ "TTGTTCGT" ], + reads => [ "GTTCGTA" ], + args => "--overhang --policy \"SEED=0,3\\;IVAL=C,1,0\\;NCEIL=L,2,0\"", + hits => [ { 2 => 1 } ], + flags => [ "XM:0,XP:0,XT:UU,XC:6=1X" ] }, + + # Mess with arguments + + # Default should be 1-mismatch, so this shouldn't align + { ref => [ "TTGTTCGTTTGTTCGT" ], + reads => [ "TTATTAGT" ], + args => "", + hits => [ { "*" => 1 } ], + flags => [ "XM:0,XP:0,XT:UU" ], + cigar => [ "*" ], + samoptflags => [{ "YT:Z:UU" => 1 }], + }, + + # Shouldn't align with 0 mismatches either + { ref => [ "TTGTTCGTTTGTTCGT" ], + reads => [ "TTATTAGT" ], + args => "--policy SEED=0", + hits => [ { "*" => 1 } ], + flags => [ "XM:0,XP:0,XT:UU" ], + cigar => [ "*" ], + samoptflags => [{ "YT:Z:UU" => 1 }], + }, + + # Should align with 0 mismatches if we can wedge a seed into the 2 + # matching characters between the two mismatches. Here we fail to + # wedge a length-3 seed in (there's no room) + { ref => [ "TTGTTCGTTTGTTCGT" ], + reads => [ "TTATTAGT" ], + args => "--policy \"SEED=0,3\\;IVAL=C,1,0\\;MMP=C1\"", + hits => [ { "*" => 1 } ], + flags => [ "XM:0,XP:0,XT:UU" ], + cigar => [ "*" ], + samoptflags => [{ "YT:Z:UU" => 1 }], + }, + + # Should align with 0 mismatches if we can wedge a seed into the 2 + # matching characters between the two mismatches. Here we wedge a + # length-2 seed in + { ref => [ "TTGTTCGTTTGTTCGT" ], + reads => [ "TTATTAGT" ], + args => "--policy \"SEED=0,2\\;IVAL=C,1,0\\;MMP=C1\"", + # + # TTGTTCGTTTGTTCGT TTGTTCGTTTGTTCGT TTGTTCGTTTGTTCGT + # || || || || | | || || + # TTATTAGT TTATTAGT TTATTAGT + # + # TTGTTCGTTTGTTCGT TTGTTCGTTTGTTCGT TTGTTCGTTTGTTCGT + # || | || | || || || + # TTATTAGT TTATTAGT TTATTAGT + # + hits => [ { 0 => 1, 3 => 1, 4 => 1, + 5 => 1, 7 => 1, 8 => 1} ], + flag_map => [ { 0 => "XM:0,XP:0,XT:UU,XC:2=1X2=1X2=", + 3 => "XM:0,XP:0,XT:UU,XC:2=2X1=3X", + 4 => "XM:0,XP:0,XT:UU,XC:1=2X2=1X2=", + 5 => "XM:0,XP:0,XT:UU,XC:3X2=2X1=", + 7 => "XM:0,XP:0,XT:UU,XC:2=2X1=3X", + 8 => "XM:0,XP:0,XT:UU,XC:2=1X2=1X2="} ], + cigar_map => [ { 0 => "8M", 3 => "8M", 4 => "8M", + 5 => "8M", 7 => "8M", 8 => "8M" } ], + samoptflags_map => [{ + 0 => { "AS:i:-2" => 1, "XS:i:-2" => 1, "NM:i:2" => 1, "XM:i:2" => 1, + "YT:Z:UU" => 1, "MD:Z:2G2C2" => 1 }, + 3 => { "AS:i:-5" => 1, "XS:i:-2" => 1, "NM:i:5" => 1, "XM:i:5" => 1, + "YT:Z:UU" => 1, "MD:Z:2C0G1T0T0G0" => 1 }, + 4 => { "AS:i:-3" => 1, "XS:i:-2" => 1, "NM:i:3" => 1, "XM:i:3" => 1, + "YT:Z:UU" => 1, "MD:Z:1C0G2T2" => 1 }, + 5 => { "AS:i:-5" => 1, "XS:i:-2" => 1, "NM:i:5" => 1, "XM:i:5" => 1, + "YT:Z:UU" => 1, "MD:Z:0C0G0T2G0T1" => 1 }, + 7 => { "AS:i:-5" => 1, "XS:i:-2" => 1, "NM:i:5" => 1, "XM:i:5" => 1, + "YT:Z:UU" => 1, "MD:Z:2T0G1T0C0G0" => 1 }, + 8 => { "AS:i:-2" => 1, "XS:i:-2" => 1, "NM:i:2" => 1, "XM:i:2" => 1, + "YT:Z:UU" => 1, "MD:Z:2G2C2" => 1 }, + }], + }, +); + +## +# Take a list of reference sequences and write them to a temporary +# FASTA file of the given name. +# +sub writeFasta($$) { + my ($l, $fa) = @_; + open(FA, ">$fa") || die "Could not open $fa for writing"; + my $idx = 0; + for(my $i = 0; $i < scalar(@$l); $i++) { + print FA ">$idx\n".$l->[$i]."\n"; + $idx++; + } + close(FA); +} + +## +# Take a lists of named reads/mates and write them to appropriate +# files. +# +sub writeReads($$$$$$$$$) { + my ( + $reads, + $quals, + $mate1s, + $qual1s, + $mate2s, + $qual2s, + $names, + $fq1, + $fq2) = @_; + + open(FQ1, ">$fq1") || die "Could not open '$fq1' for writing"; + open(FQ2, ">$fq2") || die "Could not open '$fq2' for writing"; + my $pe = (defined($mate1s) && $mate1s ne ""); + if($pe) { + for (0..scalar(@$mate1s)-1) { + my $m1 = $mate1s->[$_]; + my $m2 = $mate2s->[$_]; + my $q1 = $qual1s->[$_]; + my $q2 = $qual2s->[$_]; + my $nm = $names->[$_]; + defined($m1) || die; + defined($m2) || die; + $q1 = $q1 || ("I" x length($m1)); + $q2 = $q2 || ("I" x length($m2)); + $nm = $nm || "r$_"; + print FQ1 "\@$nm/1\n$m1\n+\n$q1\n"; + print FQ2 "\@$nm/2\n$m2\n+\n$q2\n"; + } + } else { + for (0..scalar(@$reads)-1) { + my $read = $reads->[$_]; + defined($read) || die; + my $qual = $quals->[$_]; + my $nm = $names->[$_]; + $qual = $qual || ("I" x length($read)); + $nm = $nm || "r$_"; + print FQ1 "\@$nm\n$read\n+\n$qual\n"; + } + } + close(FQ1); + close(FQ2); +} + +## +# Run bowtie2 with given arguments +# +sub runbowtie2($$$$$$$$$$$$$$$$$$$$$) { + + my ( + $do_build, + $args, + $color, + $fa, + $reportargs, #5 + $read_file_format, + $read_file, + $mate1_file, + $mate2_file, + $reads, + $quals, + $mate1s, + $qual1s, + $mate2s, + $qual2s, + $names, + $ls, + $rawls, + $header_ls, + $raw_header_ls, + $should_abort) = @_; + + $args .= " --quiet"; + $reportargs = "-a" unless defined($reportargs); + $args .= " -C" if $color; + $args .= " $reportargs"; + # Write the reference to a fasta file + print "References:\n"; + open(FA, $fa) || die; + while() { print $_; } + close(FA); + if($do_build) { + my $build_args = ($color ? "-C" : ""); + my $cmd = "$bowtie2_build --quiet --sanity $build_args $fa .simple_tests.tmp"; + print "$cmd\n"; + system($cmd); + ($? == 0) || die "Bad exitlevel from bowtie2-build: $?"; + } + my $pe = (defined($mate1s) && $mate1s ne ""); + $pe = $pe || (defined($mate1_file)); + my $mate1arg; + my $mate2arg; + my $readarg; + my $formatarg = "-c"; + my ($readstr, $m1str, $m2str) = (undef, undef, undef); + $readstr = join(",", @$reads) if defined($reads); + $m1str = join(",", @$mate1s) if defined($mate1s); + $m2str = join(",", @$mate2s) if defined($mate2s); + if(defined($read_file) || defined($mate1_file)) { + defined($read_file_format) || die; + my $ext = ""; + if($read_file_format eq "fastq") { + $formatarg = "-q"; + $ext = ".fq"; + } elsif($read_file_format eq "tabbed") { + $formatarg = "--12"; + $ext = ".tab"; + } elsif($read_file_format eq "fasta") { + $formatarg = "-f"; + $ext = ".fa"; + } elsif($read_file_format eq "qseq") { + $formatarg = "--qseq"; + $ext = "_qseq.txt"; + } elsif($read_file_format eq "raw") { + $formatarg = "-r"; + $ext = ".raw"; + } else { + die "Bad format: $read_file_format"; + } + if(defined($read_file)) { + # Unpaired + open(RD, ">.simple_tests$ext") || die; + print RD $read_file; + close(RD); + $readarg = ".simple_tests$ext"; + } else { + defined($mate1_file) || die; + defined($mate2_file) || die; + # Paired + open(M1, ">.simple_tests.1$ext") || die; + print M1 $mate1_file; + close(M1); + open(M2, ">.simple_tests.2$ext") || die; + print M2 $mate2_file; + close(M2); + $mate1arg = ".simple_tests.1$ext"; + $mate2arg = ".simple_tests.2$ext"; + } + } else { + writeReads( + $reads, + $quals, + $mate1s, + $qual1s, + $mate2s, + $qual2s, + $names, + ".simple_tests.1.fq", + ".simple_tests.2.fq"); + $mate1arg = ".simple_tests.1.fq"; + $mate2arg = ".simple_tests.2.fq"; + $formatarg = "-q"; + $readarg = $mate1arg; + } + my $cmd; + if($pe) { + # Paired-end case + $cmd = "$bowtie2 --debug $args .simple_tests.tmp $formatarg -1 $mate1arg -2 $mate2arg"; + } else { + # Unpaired case + $cmd = "$bowtie2 --debug $args .simple_tests.tmp $formatarg $readarg"; + } + print "$cmd\n"; + open(BT, "$cmd |") || die "Could not open pipe '$cmd |'"; + while() { + print $_; + chomp; + if(substr($_, 0, 1) eq "@") { + push @$header_ls, [ split(/\t/, $_, -1) ]; + push @$raw_header_ls, $_; + } else { + push @$ls, [ split(/\t/, $_, -1) ]; + push @$rawls, $_; + } + } + close(BT); + ($? == 0 || $should_abort) || die "bowtie2 aborted with exitlevel $?\n"; + ($? != 0 || !$should_abort) || die "bowtie2 failed to abort!\n"; +} + +## +# Compare a hash ref of expected SAM flags with a hash ref of observed SAM +# flags. +# +sub matchSamOptionalFlags($$) { + my ($flags, $ex_flags) = @_; + my %ex = (); + for(keys %$ex_flags) { + my ($nm, $ty, $vl) = split(/:/, $_); + defined($vl) || die "Could not parse optional flag field \"$_\""; + ($ex{$nm}{ty}, $ex{$nm}{vl}) = ($ty, $vl); + } + for(keys %$flags) { + my ($ex_ty, $ex_vl); + if(defined($ex{$_})) { + ($ex_ty, $ex_vl) = ($ex{$_}{ty}, $ex{$_}{vl}); + } else { + ($ex_ty, $ex_vl) = ("i", "0"); + } + defined($ex_ty) || die; + defined($ex_vl) || die; + my ($ty, $vl) = ($flags->{$_}{ty}, $flags->{$_}{vl}); + defined($ty) || die; + defined($vl) || die; + $ex_ty eq $ty || + die "Expected SAM optional flag $_ to have type $ex_ty, had $ty"; + $ex_vl eq $vl || + die "Expected SAM optional flag $_ to have value $ex_vl, had $vl"; + } + return 1; +} + +my $tmpfafn = ".simple_tests.pl.fa"; +my $last_ref = undef; +for (my $ci = 0; $ci < scalar(@cases); $ci++) { + my $c = $cases[$ci]; + last unless defined($c); + # If there's any skipping of cases to be done, do it here prior to the + # eq_deeply check + my $color = 0; + $color = $c->{color} if defined($c->{color}); + next if ($color && $skipColor); + my $do_build = 0; + unless(defined($last_ref) && eq_deeply($c->{ref}, $last_ref)) { + writeFasta($c->{ref}, $tmpfafn); + $do_build = 1; + } + $last_ref = $c->{ref}; + # For each set of arguments... + my $case_args = $c->{args}; + $case_args = "" unless defined($case_args); + my $first = 1; # did we build the index yet? + # Forward, then reverse-complemented + my $fwlo = ($c->{nofw} ? 1 : 0); + my $fwhi = ($c->{norc} ? 0 : 1); + for(my $fwi = $fwlo; $fwi <= $fwhi; $fwi++) { + my $fw = ($fwi == 0); + my $sam = 1; + + my $reads = $c->{reads}; + my $quals = $c->{quals}; + my $m1s = $c->{mate1s}; + my $q1s = $c->{qual1s}; + my $m2s = $c->{mate2s}; + my $q2s = $c->{qual2s}; + + my $read_file = undef; + my $mate1_file = undef; + my $mate2_file = undef; + + $read_file = $c->{fastq} if defined($c->{fastq}); + $read_file = $c->{tabbed} if defined($c->{tabbed}); + $read_file = $c->{fasta} if defined($c->{fasta}); + $read_file = $c->{qseq} if defined($c->{qseq}); + $read_file = $c->{raw} if defined($c->{raw}); + + $mate1_file = $c->{fastq1} if defined($c->{fastq1}); + $mate1_file = $c->{tabbed1} if defined($c->{tabbed1}); + $mate1_file = $c->{fasta1} if defined($c->{fasta1}); + $mate1_file = $c->{qseq1} if defined($c->{qseq1}); + $mate1_file = $c->{raw1} if defined($c->{raw1}); + + $mate2_file = $c->{fastq2} if defined($c->{fastq2}); + $mate2_file = $c->{tabbed2} if defined($c->{tabbed2}); + $mate2_file = $c->{fasta2} if defined($c->{fasta2}); + $mate2_file = $c->{qseq2} if defined($c->{qseq2}); + $mate2_file = $c->{raw2} if defined($c->{raw2}); + + my $read_file_format = undef; + if(!defined($reads) && !defined($m1s) && !defined($m2s)) { + defined($read_file) || defined($mate1_file) || die; + $read_file_format = "fastq" if defined($c->{fastq}) || defined($c->{fastq1}); + $read_file_format = "tabbed" if defined($c->{tabbed}) || defined($c->{tabbed}); + $read_file_format = "fasta" if defined($c->{fasta}) || defined($c->{fasta1}); + $read_file_format = "qseq" if defined($c->{qseq}) || defined($c->{qseq1}); + $read_file_format = "raw" if defined($c->{raw}) || defined($c->{raw1}); + next unless $fw; + } + # Run bowtie2 + my @lines = (); + my @rawlines = (); + my @header_lines = (); + my @header_rawlines = (); + print $c->{name}." " if defined($c->{name}); + print "(fw:".($fw ? 1 : 0).", sam:$sam)\n"; + my $mate1fw = 1; + my $mate2fw = 0; + $mate1fw = $c->{mate1fw} if defined($c->{mate1fw}); + $mate2fw = $c->{mate2fw} if defined($c->{mate2fw}); + if(!$fw) { + # Reverse-complement the reads + my @s = (); @s = @$reads if defined($reads); + my @q = (); @q = @$quals if defined($quals); + # Reverse-complement mates and switch mate1 with mate2 + my @m1 = (); @m1 = @$m1s if defined($m1s); + my @m2 = (); @m2 = @$m2s if defined($m2s); + my @q1 = (); @q1 = @$q1s if defined($q1s); + my @q2 = (); @q2 = @$q2s if defined($q2s); + for(0..scalar(@s)-1) { + $s[$_] = DNA::revcomp($s[$_], $color); + $q[$_] = reverse $q[$_] if $_ < scalar(@q); + } + if($mate1fw == $mate2fw) { + for(0..$#m1) { $m1[$_] = DNA::revcomp($m1[$_], $color); } + for(0..$#m2) { $m2[$_] = DNA::revcomp($m2[$_], $color); } + for(0..$#q1) { $q1[$_] = reverse $q1[$_]; } + for(0..$#q2) { $q2[$_] = reverse $q2[$_]; } + } + $reads = \@s if defined($reads); + $quals = \@q if defined($quals); + $m1s = \@m2 if defined($m1s); + $q1s = \@q2 if defined($q1s); + $m2s = \@m1 if defined($m2s); + $q2s = \@q1 if defined($q2s); + } + my $a = $case_args; + if(defined($m2s)) { + $a .= " --"; + $a .= ($mate1fw ? "f" : "r"); + $a .= ($mate2fw ? "f" : "r"); + } + runbowtie2( + $do_build && $first, + "$a", + $color, + $tmpfafn, + $c->{report}, + $read_file_format, # formate of read/mate files + $read_file, # read file + $mate1_file, # mate #1 file + $mate2_file, # mate #2 file + $reads, # read list + $quals, # quality list + $m1s, # mate #1 sequence list + $q1s, # mate #1 quality list + $m2s, # mate #2 sequence list + $q2s, # mate #2 quality list + $c->{names}, + \@lines, + \@rawlines, + \@header_lines, + \@header_rawlines, + $c->{should_abort}); + $first = 0; + my $pe = defined($c->{mate1s}) && $c->{mate1s} ne ""; + $pe = $pe || defined($mate1_file); + $pe = $pe || $c->{paired}; + my ($lastchr, $lastoff, $lastoff_orig) = ("", -1, -1); + # Keep temporary copies of hits and pairhits so that we can + # restore for the next orientation + my $hitstmp = []; + $hitstmp = clone($c->{hits}) if defined($c->{hits}); + my $pairhitstmp = []; + $pairhitstmp = clone($c->{pairhits}) if defined($c->{pairhits}); + my $pairhits_orig_tmp = []; + $pairhits_orig_tmp = clone($c->{pairhits_orig}) if defined($c->{pairhits_orig}); + # Record map from already-seen read name, read sequence and + # quality to the place on the reference where it's reported. + # This allows us to check that the pseudo-random generator + # isn't mistakenly yielding different alignments for identical + # reads. + my %seenNameSeqQual = (); + if(defined($c->{lines})) { + my $l = scalar(@lines); + $l == $c->{lines} || die "Expected $c->{lines} lines, got $l"; + } + for my $li (0 .. scalar(@lines)-1) { + my $l = $lines[$li]; + my ($readname, $orient, $chr, $off_orig, $off, $seq, $qual, $mapq, + $oms, $editstr, $flagstr, $samflags, $cigar, $rnext, $pnext, + $tlen); + my %samoptflags = (); + if($sam) { + scalar(@$l) >= 11 || + die "Bad number of fields; expected at least 11 got ". + scalar(@$l).":\n$rawlines[$li]\n"; + ($readname, $samflags, $chr, $off) = @$l[0..3]; + ($seq, $qual) = @$l[9..10]; + $orient = ((($samflags >> 4) & 1) == 0) ? "+" : "-"; + $mapq = $l->[4]; # mapping quality + $cigar = $l->[5]; # CIGAR string + $rnext = $l->[6]; # ref seq of next frag in template + $pnext = $l->[7]; # position of next frag in template + $tlen = $l->[8]; # template length + if($pnext == 0) { $pnext = "*"; } else { $pnext--; } + for(my $m = 11; $m < scalar(@$l); $m++) { + next if $l->[$m] eq ""; + my ($nm, $ty, $vl) = split(/:/, $l->[$m]); + defined($vl) || + die "Could not parse optional flag field $m: ". + "\"$l->[$m]\""; + $samoptflags{$nm}{ty} = $ty; + $samoptflags{$nm}{vl} = $vl; + } + if($off > 0) { $off--; } + else { $off = "*"; } + $off_orig = $off; + $off = "*" if $cigar eq "*"; + } else { + scalar(@$l) == 9 || + die "Bad number of fields; expected 9 got ". + scalar(@$l).":\n$rawlines[$li]\n"; + ($readname, $orient, $chr, $off, $seq, + $qual, $oms, $editstr, $flagstr) = @$l; + $off_orig = $off; + } + if($c->{check_random}) { + my $rsqKey = "$readname\t$orient\t$seq\t$qual"; + my $rsqVal = "$chr\t$off"; + if(defined($seenNameSeqQual{$rsqKey})) { + $seenNameSeqQual{$rsqKey} eq $rsqVal || + die "Two hits for read/seq/qual:\n$rsqKey\n". + "had different alignments:\n". + "$seenNameSeqQual{$rsqKey}\n$rsqVal\n"; + } + $seenNameSeqQual{$rsqKey} = $rsqVal; + } + $readname ne "" || die "readname was blank:\n".Dumper($c); + my $rdi = $readname; + $rdi = substr($rdi, 1) if substr($rdi, 0, 1) eq "r"; + my $mate = 0; + if($readname =~ /\//) { + ($rdi, $mate) = split(/\//, $readname); + defined($rdi) || die; + } + $rdi = $c->{idx_map}{$rdi} if defined($c->{idx_map}{$rdi}); + $rdi ne "" || die "rdi was blank:\nreadname=$readname\n".Dumper($c); + if($rdi != int($rdi)) { + # Read name has non-numeric characters. Figure out + # what number it is by scanning the names list. + my $found = 0; + for(my $i = 0; $i < scalar(@{$c->{names}}); $i++) { + if($c->{names}->[$i] eq $readname) { + $rdi = $i; + $found = 1; + last; + } + } + $found || die "No specified name matched reported name $readname"; + } + # Check that the sequence printed in the alignment is sane + if($color) { + # It's a decoded nucleotide sequence + my $dseq = $c->{dec_seq}->[$rdi]; + if(defined($dseq)) { + $seq eq $dseq || die "Expected decoded sequence '$seq' from alignment to match '$dseq'"; + } + my $dqual = $c->{dec_qual}->[$rdi]; + if(defined($dqual)) { + $qual eq $dqual || die "Expected decoded qualities '$qual' from alignment to match '$dqual'"; + } + } else { + + } + # Make simply-named copies of some portions of the test case + # 'hits' + my %hits = (); + %hits = %{$c->{hits}->[$rdi]} if + defined($c->{hits}->[$rdi]); + # 'flags' + my $flags = undef; + $flags = $c->{flags}->[$rdi] if + defined($c->{flags}->[$rdi]); + # 'samflags' + my $ex_samflags = undef; + $ex_samflags = $c->{ex_samflags}->[$rdi] if + defined($c->{ex_samflags}->[$rdi]); + # 'samflags_map' + my $ex_samflags_map = undef; + $ex_samflags_map = $c->{samflags_map}->[$rdi] if + defined($c->{samflags_map}->[$rdi]); + # 'samoptflags' + my $ex_samoptflags = undef; + $ex_samoptflags = $c->{samoptflags}->[$rdi] if + defined($c->{samoptflags}->[$rdi]); + # 'cigar' + my $ex_cigar = undef; + $ex_cigar = $c->{cigar}->[$rdi] if + defined($c->{cigar}->[$rdi]); + # 'cigar_map' + my $ex_cigar_map = undef; + $ex_cigar_map = $c->{cigar_map}->[$rdi] if + defined($c->{cigar_map}->[$rdi]); + # 'mapq_hi' - boolean indicating whether mapq is hi/lo + my $ex_mapq_hi = undef; + $ex_mapq_hi = $c->{mapq_hi}->[$rdi] if + defined($c->{mapq_hi}->[$rdi]); + # 'mapq' + my $ex_mapq = undef; + $ex_mapq = $c->{mapq}->[$rdi] if + defined($c->{mapq}->[$rdi]); + # 'mapq_map' + my $ex_mapq_map = undef; + $ex_mapq_map = $c->{mapq_map}->[$rdi] if + defined($c->{mapq_map}->[$rdi]); + # 'rnext_map' + my $ex_rnext_map = undef; + $ex_rnext_map = $c->{rnext_map}->[$rdi] if + defined($c->{rnext_map}) && defined($c->{rnext_map}->[$rdi]); + # 'pnext_map' + my $ex_pnext_map = undef; + $ex_pnext_map = $c->{pnext_map}->[$rdi] if + defined($c->{pnext_map}) && defined($c->{pnext_map}->[$rdi]); + # 'tlen_map' + my $ex_tlen_map = undef; + $ex_tlen_map = $c->{tlen_map}->[$rdi] if + defined($c->{tlen_map}) && defined($c->{tlen_map}->[$rdi]); + # 'flags_fw' + my $flags_fw = undef; + $flags_fw = $c->{flags_fw}->[$rdi] if + defined($c->{flags_fw}->[$rdi]); + # 'flags_rc' + my $flags_rc = undef; + $flags_rc = $c->{flags_rc}->[$rdi] if + defined($c->{flags_rc}->[$rdi]); + # 'pairhits' + my %pairhits = (); + %pairhits = %{$c->{pairhits}->[$rdi]} if + defined($c->{pairhits}->[$rdi]); + # 'pairhits_orig' + my %pairhits_orig = (); + %pairhits_orig = %{$c->{pairhits_orig}->[$rdi]} if + defined($c->{pairhits_orig}->[$rdi]); + # 'pairflags' + my %pairflags = (); + %pairflags = %{$c->{pairflags}->[$rdi]} if + defined($c->{pairflags}->[$rdi]); + # 'hits_are_superset' + my $hits_are_superset = 0; + $hits_are_superset = $c->{hits_are_superset}->[$rdi] if + defined($ci); + # edits + my $ex_edits = undef; + $ex_edits = $c->{edits}->[$rdi] if + defined($c->{edits}->[$rdi]); + if(!$sam) { + # Bowtie flags + if(defined($flags)) { + $flagstr eq $flags || + die "Expected flags=\"$flags\", got \"$flagstr\""; + } + if(defined($flags_fw) && $fw) { + $flagstr eq $flags_fw || + die "Expected flags=\"$flags_fw\", got \"$flagstr\""; + } + if(defined($flags_rc) && !$fw) { + $flagstr eq $flags_rc || + die "Expected flags=\"$flags_rc\", got \"$flagstr\""; + } + if(defined($c->{flag_map})) { + if(defined($c->{flag_map}->[$rdi]->{$off})) { + $flagstr eq $c->{flag_map}->[$rdi]->{$off} || + die "Expected flags=\"$c->{flag_map}->[$rdi]->{$off}\"". + " at offset $off, got \"$flagstr\""; + } + } + } + if($sam) { + # SAM flags + if(defined($ex_samflags)) { + $samflags eq $ex_samflags || + die "Expected flags $ex_samflags, got $samflags"; + } + if(defined($ex_samflags_map)) { + if(defined($c->{samflags_map}->[$rdi]->{$off})) { + my $ex = $c->{samflags_map}->[$rdi]->{$off}; + $samflags eq $ex || die + "Expected FLAGS value $ex at offset $off, got $samflags" + } else { + die "Expected to see alignment with offset $off parsing samflags_map"; + } + } + # CIGAR string + if(defined($ex_cigar)) { + $cigar eq $ex_cigar || + die "Expected CIGAR string $ex_cigar, got $cigar"; + } + if(defined($ex_cigar_map)) { + if(defined($c->{cigar_map}->[$rdi]->{$off})) { + my $ex = $c->{cigar_map}->[$rdi]->{$off}; + $cigar eq $ex || die + "Expected CIGAR string $ex at offset $off, got $cigar" + } else { + die "Expected to see alignment with offset $off parsing cigar_map"; + } + } + # MAPQ + if(defined($ex_mapq)) { + $mapq eq $ex_mapq || + die "Expected MAPQ $ex_mapq, got $mapq"; + } + if(defined($ex_mapq_map)) { + if(defined($c->{mapq_map}->[$rdi]->{$off})) { + my $ex = $c->{mapq_map}->[$rdi]->{$off}; + $mapq eq $ex || die + "Expected MAPQ string $ex at offset $off, got $mapq" + } else { + die "Expected to see alignment with offset $off parsing mapq_map"; + } + } + # MAPQ + if(defined($ex_mapq_hi)) { + if($ex_mapq_hi == 0) { + $mapq < 20 || die "Expected MAPQ < 20, got $mapq"; + } else { + $mapq >= 20 || die "Expected MAPQ >= 20, got $mapq"; + } + } + if(defined($ex_mapq_map)) { + if(defined($c->{mapq_map}->[$rdi]->{$off})) { + my $ex = $c->{mapq_map}->[$rdi]->{$off}; + $mapq eq $ex || die + "Expected MAPQ string $ex at offset $off, got $mapq" + } else { + die "Expected to see alignment with offset $off parsing mapq_map"; + } + } + # SAM optional flags + if(defined($ex_samoptflags)) { + matchSamOptionalFlags(\%samoptflags, $ex_samoptflags); + } + if(defined($c->{samoptflags_map})) { + if(defined($c->{samoptflags_map}->[$rdi]->{$off})) { + matchSamOptionalFlags( + \%samoptflags, + $c->{samoptflags_map}->[$rdi]->{$off}); + } else { + die "Expected to see alignment with offset $off parsing samoptflags_map"; + } + } + if(defined($c->{samoptflags_flagmap})) { + if(defined($c->{samoptflags_flagmap}->[$rdi]->{$samflags})) { + matchSamOptionalFlags( + \%samoptflags, + $c->{samoptflags_flagmap}->[$rdi]->{$samflags}); + } else { + die "Expected to see alignment with flag $samflags parsing samoptflags_flagmap"; + } + } + # RNEXT map + if(defined($c->{rnext_map})) { + if(defined($c->{rnext_map}->[$rdi]->{$off})) { + my $ex = $c->{rnext_map}->[$rdi]->{$off}; + $rnext eq $ex || die + "Expected RNEXT '$ex' at offset $off, got '$rnext'" + } else { + die "Expected to see alignment with offset $off parsing rnext_map".Dumper($c); + } + } + # PNEXT map + if(defined($c->{pnext_map})) { + if(defined($c->{pnext_map}->[$rdi]->{$off})) { + my $ex = $c->{pnext_map}->[$rdi]->{$off}; + $pnext eq $ex || die + "Expected PNEXT '$ex' at offset $off, got '$pnext'" + } else { + die "Expected to see alignment with offset $off parsing pnext_map"; + } + } + # TLEN map + if(defined($c->{tlen_map})) { + if(defined($c->{tlen_map}->[$rdi]->{$off})) { + my $ex = $c->{tlen_map}->[$rdi]->{$off}; + $tlen eq $ex || die + "Expected TLEN '$ex' at offset $off, got '$tlen'" + } else { + die "Expected to see alignment with offset $off parsing tlen_map"; + } + } + } + if($pe && $lastchr ne "") { + my $offkey_orig = $lastoff.",".$off_orig; + $offkey_orig = $off_orig.",".$lastoff_orig if $off_orig eq "*"; + + my $offkey = $lastoff.",".$off; + $offkey = $off.",".$lastoff if $off eq "*"; + + if($lastoff ne "*" && $off ne "*") { + $offkey = min($lastoff, $off).",".max($lastoff, $off); + } + if(defined($c->{pairhits}->[$rdi])) { + defined($pairhits{$offkey}) || + die "No such paired off as $offkey in pairhits list: ".Dumper(\%pairhits)."\n"; + $c->{pairhits}->[$rdi]->{$offkey}--; + delete $c->{pairhits}->[$rdi]->{$offkey} if $c->{pairhits}->[$rdi]->{$offkey} == 0; + %pairhits = %{$c->{pairhits}->[$rdi]}; + } + if(defined($c->{pairhits_orig}->[$rdi])) { + defined($pairhits_orig{$offkey_orig}) || + die "No such paired off as $offkey in pairhits_orig list: ".Dumper(\%pairhits_orig)."\n"; + $c->{pairhits_orig}->[$rdi]->{$offkey_orig}--; + delete $c->{pairhits_orig}->[$rdi]->{$offkey_orig} if $c->{pairhits_orig}->[$rdi]->{$offkey_orig} == 0; + %pairhits_orig = %{$c->{pairhits_orig}->[$rdi]}; + } + ($lastchr, $lastoff, $lastoff_orig) = ("", -1, -1); + } elsif($pe) { + # Found an unpaired alignment from aligning a pair + my $foundSe = + defined($c->{pairhits}->[$rdi]) && + $c->{pairhits}->[$rdi]->{$off}; + if($foundSe) { + $c->{pairhits}->[$rdi]->{$off}--; + delete $c->{pairhits}->[$rdi]->{$off} + if $c->{pairhits}->[$rdi]->{$off} == 0; + %pairhits = %{$c->{pairhits}->[$rdi]}; + } else { + ($lastchr, $lastoff) = ($chr, $off); + } + # Found an unpaired alignment from aligning a pair + $foundSe = + defined($c->{pairhits_orig}->[$rdi]) && + $c->{pairhits_orig}->[$rdi]->{$off_orig}; + if($foundSe) { + $c->{pairhits_orig}->[$rdi]->{$off_orig}--; + delete $c->{pairhits_orig}->[$rdi]->{$off_orig} + if $c->{pairhits_orig}->[$rdi]->{$off_orig} == 0; + %pairhits_orig = %{$c->{pairhits_orig}->[$rdi]}; + } else { + ($lastchr, $lastoff, $lastoff_orig) = ($chr, $off, $off_orig); + } + } else { + if(defined($c->{hits}->[$rdi])) { + defined($hits{$off}) || + die "No such off as $off in hits list: ".Dumper(\%hits)."\n"; + $c->{hits}->[$rdi]->{$off}--; + delete $c->{hits}->[$rdi]->{$off} if $c->{hits}->[$rdi]->{$off} == 0; + %hits = %{$c->{hits}->[$rdi]}; + } + } + if(!$sam && defined($ex_edits)) { + my $eds = $l->[7]; + $eds eq $ex_edits || + die "For edit string, expected \"$ex_edits\" got \"$eds\"\n"; + } + } + # Go through all the per-read + my $klim = 0; + $klim = scalar(@{$c->{hits}}) if defined($c->{hits}); + $klim = max($klim, scalar(@{$c->{pairhits}})) if defined($c->{pairhits}); + for (my $k = 0; $k < $klim; $k++) { + # For each read + my %hits = %{$c->{hits}->[$k]} if defined($c->{hits}->[$k]); + my %pairhits = %{$c->{pairhits}->[$k]} if defined($c->{pairhits}->[$k]); + my %pairhits_orig = %{$c->{pairhits_orig}->[$k]} if defined($c->{pairhits_orig}->[$k]); + my $hits_are_superset = $c->{hits_are_superset}->[$k]; + # Check if there are any hits left over + my $hitsLeft = scalar(keys %hits); + if($hitsLeft != 0 && !$hits_are_superset) { + print Dumper(\%hits); + die "Had $hitsLeft hit(s) left over at position $k"; + } + my $pairhitsLeft = scalar(keys %pairhits); + if($pairhitsLeft != 0 && !$hits_are_superset) { + print Dumper(\%pairhits); + die "Had $pairhitsLeft hit(s) left over at position $k"; + } + my $pairhits_orig_Left = scalar(keys %pairhits_orig); + if($pairhits_orig_Left != 0 && !$hits_are_superset) { + print Dumper(\%pairhits_orig); + die "Had $pairhits_orig_Left hit(s) left over at position $k"; + } + } + + $c->{hits} = $hitstmp; + $c->{pairhits} = $pairhitstmp; + $c->{pairhits_orig} = $pairhits_orig_tmp; + } + $last_ref = undef if $first; +} +print "PASSED\n"; diff --git a/scripts/test/simple_tests.sh b/scripts/test/simple_tests.sh new file mode 100644 index 0000000..0ba1cb0 --- /dev/null +++ b/scripts/test/simple_tests.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +# +# Copyright 2011, Ben Langmead +# +# This file is part of Bowtie 2. +# +# Bowtie 2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Bowtie 2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Bowtie 2. If not, see . +# + +# simple_tests.sh + +make $* bowtie2-align bowtie2-align-debug bowtie2-build bowtie2-build-debug && \ +perl scripts/test/simple_tests.pl \ + --bowtie2=./bowtie2 \ + --bowtie2-build=./bowtie2-build-debug diff --git a/scripts/validate_repeat.py b/scripts/validate_repeat.py new file mode 100644 index 0000000..70c469c --- /dev/null +++ b/scripts/validate_repeat.py @@ -0,0 +1,230 @@ +#!/usr/bin/python +import sys, subprocess +import re +from argparse import ArgumentParser, FileType +from collections import defaultdict, Counter + +flag_include_N = True + +""" +""" +def read_genome(genome_file): + chr_dic = {} + chr_name, sequence = "", "" + for line in genome_file: + if line.startswith(">"): + if chr_name and sequence: + chr_dic[chr_name] = sequence + chr_name = line.strip().split()[0][1:] + sequence = "" + else: + line = line.strip() + if not flag_include_N: + # remove N-bases + line = line.replace('N', '') + + sequence += line; + + if chr_name and sequence: + chr_dic[chr_name] = sequence + return chr_dic + +""" +""" +def reverse_complement(seq): + result = "" + for nt in seq: + base = nt + if nt == 'A': + base = 'T' + elif nt == 'a': + base = 't' + elif nt == 'C': + base = 'G' + elif nt == 'c': + base = 'g' + elif nt == 'G': + base = 'C' + elif nt == 'g': + base = 'c' + elif nt == 'T': + base = 'A' + elif nt == 't': + base = 'a' + + result = base + result + + return result + + +""" +""" +def read_snp(snp_file): + snps = defaultdict(dict) + for line in snp_file: + line = line.strip() + if not line or line.startswith('#'): + continue + try: + snpID, type, chr, pos, data = line.split('\t') + except ValueError: + continue + + assert type in ["single", "deletion", "insertion"] + if type == "deletion": + data = int(data) + + snps[chr][snpID] = [snpID, type, int(pos), data]; + + return snps + + +def indelCount(snp_list, snp_id_list): + indel = 0 + + for snp_id in snp_id_list: + snp = snp_list[snp_id] + + if snp[1] == 'deletion': + indel -= int(snp[3]) + elif snp[1] == 'insertion': + indel += len(snp[3]) + + return indel + +def applySNPs(snp_list, ref_sqn, snp_id_list, base_pos): + + ref_pos = 0 + read_pos = 0 + read = "" + + for snp_id in snp_id_list: + snp = snp_list[snp_id] + + pos = snp[2] - base_pos; + + while ref_pos < pos: + read += ref_sqn[ref_pos] + ref_pos += 1 + + + if snp[1] == 'single': + read += snp[3] + ref_pos += 1 + elif snp[1] == 'deletion': + ref_pos += int(snp[3]) + elif snp[1] == 'insertion': + read += snp[3] + + #print snp_id, snp_list[snp_id] + + while ref_pos < len(ref_sqn): + read += ref_sqn[ref_pos] + ref_pos += 1 + + return read + + +def main(genome_file, rpt_name): + # load genome sequeuce + chr_dic = read_genome(genome_file) + + rpt_fa_name = rpt_name + ".rep.fa" + rpt_info_name = rpt_name + ".rep.info" + rpt_snp_name = rpt_name + ".rep.snp" + + # load repeat sequence + fp = open(rpt_fa_name, 'r') + rpt_dic = read_genome(fp) + fp.close() + + # load repeat snp + fp = open(rpt_snp_name, 'r') + rpt_snps = read_snp(fp) + fp.close() + + # Validates + # load repeat info + fp = open(rpt_info_name, 'r') + repeat_sequence = "" + repeat_length = 0 + snp_cnt = 0 + indel = 0 + snp_id_list = [] + + for line in fp: + line = line.strip() + + if line.startswith('>'): + line = line[1:] + fields = line.split() + + #print fields + + name, rpt_seq_name, rpt_pos, rpt_len, pos_cnt, snp_cnt = fields[0:6] + + snp_cnt = int(snp_cnt) + rpt_pos = int(rpt_pos) + rpt_len = int(rpt_len) + + if snp_cnt > 0: + snp_id_list = fields[6].split(',') + else: + snp_id_list = [] + + #print name, snp_cnt, snp_list + + # make repeat_sequence (with snp) + + + repeat_sequence = rpt_dic[rpt_seq_name][rpt_pos:rpt_pos + rpt_len] + indel = 0 + + if snp_cnt > 0: + # apply snps + repeat_sequence = applySNPs(rpt_snps[rpt_seq_name], repeat_sequence, snp_id_list, rpt_pos) + # in/del count + indel = indelCount(rpt_snps[rpt_seq_name], snp_id_list) + + #repeat_length = rpt_len + indel + repeat_length = len(repeat_sequence) + + #print repeat_sequence + + else: + coords = line.split() + for coord in coords: + chr, pos, strand = coord.split(':') + pos = int(pos) + + # get string + seq = chr_dic[chr][pos:pos + repeat_length] + if strand == '-': + seq = reverse_complement(seq) + + if seq != repeat_sequence: + print 'Mismatch', seq, repeat_sequence, snp_cnt, coord, snp_id_list, repeat_length + + fp.close() + + +if __name__ == '__main__': + parser = ArgumentParser( + description='Validate repeat files') + + parser.add_argument('genome_file', + nargs='?', + type=FileType('r'), + help='input genome file (e.g. genome.fa)') + + parser.add_argument('-r', + dest='rpt_name', + type=str, + help='Repeat Name') + + args = parser.parse_args() + if not args.genome_file or not args.rpt_name: + parser.print_help() + exit(1) + + main(args.genome_file, args.rpt_name) diff --git a/search_globals.h b/search_globals.h new file mode 100644 index 0000000..bd2704f --- /dev/null +++ b/search_globals.h @@ -0,0 +1,48 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef SEARCH_GLOBALS_H_ +#define SEARCH_GLOBALS_H_ + +#include + +// declared in ebwt_search.cpp +extern bool gColor; +extern bool gColorExEnds; +extern bool gReportOverhangs; +extern bool gColorSeq; +extern bool gColorEdit; +extern bool gColorQual; +extern bool gNoMaqRound; +extern bool gStrandFix; +extern bool gRangeMode; +extern int gVerbose; +extern int gQuiet; +extern bool gNofw; +extern bool gNorc; +extern bool gMate1fw; +extern bool gMate2fw; +extern int gMinInsert; +extern int gMaxInsert; +extern int gTrim5; +extern int gTrim3; +extern int gGapBarrier; +extern int gAllowRedundant; + +#endif /* SEARCH_GLOBALS_H_ */ diff --git a/sequence_io.h b/sequence_io.h new file mode 100644 index 0000000..5a2cd6f --- /dev/null +++ b/sequence_io.h @@ -0,0 +1,125 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef SEQUENCE_IO_H_ +#define SEQUENCE_IO_H_ + +#include +#include +#include +#include +#include "assert_helpers.h" +#include "ds.h" +#include "filebuf.h" +#include "sstring.h" + +using namespace std; + +/** + * Parse the fasta file 'infile'. Store + */ +template +static void parseFastaLens( + const TFnStr& infile, // filename + EList& namelens, // destination for fasta name lengths + EList& seqlens) // destination for fasta sequence lengths +{ + FILE *in = fopen(sstr_to_cstr(infile), "r"); + if(in == NULL) { + cerr << "Could not open sequence file" << endl; + throw 1; + } + FileBuf fb(in); + while(!fb.eof()) { + namelens.expand(); namelens.back() = 0; + seqlens.expand(); seqlens.back() = 0; + fb.parseFastaRecordLength(namelens.back(), seqlens.back()); + if(seqlens.back() == 0) { + // Couldn't read a record. We're probably done with this file. + namelens.pop_back(); + seqlens.pop_back(); + continue; + } + } + fb.close(); +} + +/** + * Parse the fasta file 'infile'. Store each name record in 'names', each + * sequence record in 'seqs', and the lengths of each + */ +template +static void parseFasta( + const TFnStr& infile, // filename + EList& names, // destination for fasta names + EList& namelens, // destination for fasta name lengths + EList& seqs, // destination for fasta sequences + EList& seqlens) // destination for fasta sequence lengths +{ + assert_eq(namelens.size(), seqlens.size()); + assert_eq(names.size(), namelens.size()); + assert_eq(seqs.size(), seqlens.size()); + size_t cur = namelens.size(); + parseFastaLens(infile, namelens, seqlens); + FILE *in = fopen(sstr_to_cstr(infile), "r"); + if(in == NULL) { + cerr << "Could not open sequence file" << endl; + throw 1; + } + FileBuf fb(in); + while(!fb.eof()) { + // Add a new empty record to the end + names.expand(); + seqs.expand(); + names.back() = new char[namelens[cur]+1]; + seqs.back() = new char[seqlens[cur]+1]; + fb.parseFastaRecord(names.back(), seqs.back()); + if(seqs.back().empty()) { + // Couldn't read a record. We're probably done with this file. + names.pop_back(); + seqs.pop_back(); + continue; + } + } + fb.close(); +} + +/** + * Read a set of FASTA sequence files of the given format and alphabet type. + * Store all of the extracted sequences in vector ss. + */ +template +static void parseFastas( + const EList& infiles, // filenames + EList& names, // destination for fasta names + EList& namelens, // destination for fasta name lengths + EList& seqs, // destination for fasta sequences + EList& seqlens) // destination for fasta sequence lengths +{ + for(size_t i = 0; i < infiles.size(); i++) { + parseFasta( + infiles[i], + names, + namelens, + seqs, + seqlens); + } +} + +#endif /*SEQUENCE_IO_H_*/ diff --git a/shmem.cpp b/shmem.cpp new file mode 100644 index 0000000..a4853e7 --- /dev/null +++ b/shmem.cpp @@ -0,0 +1,49 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifdef BOWTIE_SHARED_MEM + +#include +#include +#include +#include +#include +#include "shmem.h" + +using namespace std; + +/** + * Notify other users of a shared-memory chunk that the leader has + * finished initializing it. + */ +void notifySharedMem(void *mem, size_t len) { + ((volatile uint32_t*)((char*)mem + len))[0] = SHMEM_INIT; +} + +/** + * Wait until the leader of a shared-memory chunk has finished + * initializing it. + */ +void waitSharedMem(void *mem, size_t len) { + while(((volatile uint32_t*)((char*)mem + len))[0] != SHMEM_INIT) { + sleep(1); + } +} + +#endif diff --git a/shmem.h b/shmem.h new file mode 100644 index 0000000..b36f3ad --- /dev/null +++ b/shmem.h @@ -0,0 +1,161 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef SHMEM_H_ +#define SHMEM_H_ + +#ifdef BOWTIE_SHARED_MEM + +#include +#include +#include +#include +#include +#include +#include +#include "str_util.h" +#include "btypes.h" + +extern void notifySharedMem(void *mem, size_t len); + +extern void waitSharedMem(void *mem, size_t len); + +#define ALLOC_SHARED_U allocSharedMem +#define ALLOC_SHARED_U8 allocSharedMem +#define ALLOC_SHARED_U32 allocSharedMem +#define FREE_SHARED shmdt +#define NOTIFY_SHARED notifySharedMem +#define WAIT_SHARED waitSharedMem + +#define SHMEM_UNINIT 0xafba4242 +#define SHMEM_INIT 0xffaa6161 + +/** + * Tries to allocate a shared-memory chunk for a given file of a given size. + */ +template +bool allocSharedMem(std::string fname, + size_t len, + T ** dst, + const char *memName, + bool verbose) +{ + using namespace std; + int shmid = -1; + // Calculate key given string + key_t key = (key_t)hash_string(fname); + shmid_ds ds; + int ret; + // Reserve 4 bytes at the end for silly synchronization + size_t shmemLen = len + 4; + if(verbose) { + cerr << "Reading " << len << "+4 bytes into shared memory for " << memName << endl; + } + T *ptr = NULL; + while(true) { + // Create the shrared-memory block + if((shmid = shmget(key, shmemLen, IPC_CREAT | 0666)) < 0) { + if(errno == ENOMEM) { + cerr << "Out of memory allocating shared area " << memName << endl; + } else if(errno == EACCES) { + cerr << "EACCES" << endl; + } else if(errno == EEXIST) { + cerr << "EEXIST" << endl; + } else if(errno == EINVAL) { + cerr << "Warning: shared-memory chunk's segment size doesn't match expected size (" << (shmemLen) << ")" << endl + << "Deleteing old shared memory block and trying again." << endl; + shmid = shmget(key, 0, 0); + if((ret = shmctl(shmid, IPC_RMID, &ds)) < 0) { + cerr << "shmctl returned " << ret + << " for IPC_RMID, errno is " << errno + << ", shmid is " << shmid << endl; + throw 1; + } else { + cerr << "Deleted shared mem chunk with shmid " << shmid << endl; + } + continue; + } else if(errno == ENOENT) { + cerr << "ENOENT" << endl; + } else if(errno == ENOSPC) { + cerr << "ENOSPC" << endl; + } else { + cerr << "shmget returned " << shmid << " for and errno is " << errno << endl; + } + throw 1; + } + ptr = (T*)shmat(shmid, 0, 0); + if(ptr == (void*)-1) { + cerr << "Failed to attach " << memName << " to shared memory with shmat()." << endl; + throw 1; + } + if(ptr == NULL) { + cerr << memName << " pointer returned by shmat() was NULL." << endl; + throw 1; + } + // Did I create it, or did I just attach to one created by + // another process? + if((ret = shmctl(shmid, IPC_STAT, &ds)) < 0) { + cerr << "shmctl returned " << ret << " for IPC_STAT and errno is " << errno << endl; + throw 1; + } + if(ds.shm_segsz != shmemLen) { + cerr << "Warning: shared-memory chunk's segment size (" << ds.shm_segsz + << ") doesn't match expected size (" << shmemLen << ")" << endl + << "Deleteing old shared memory block and trying again." << endl; + if((ret = shmctl(shmid, IPC_RMID, &ds)) < 0) { + cerr << "shmctl returned " << ret << " for IPC_RMID and errno is " << errno << endl; + throw 1; + } + } else { + break; + } + } // while(true) + *dst = ptr; + bool initid = (((volatile uint32_t*)((char*)ptr + len))[0] == SHMEM_INIT); + if(ds.shm_cpid == getpid() && !initid) { + if(verbose) { + cerr << " I (pid = " << getpid() << ") created the " + << "shared memory for " << memName << endl; + } + // Set this value just off the end of the chunk to + // indicate that the data hasn't been read yet. + ((volatile uint32_t*)((char*)ptr + len))[0] = SHMEM_UNINIT; + return true; + } else { + if(verbose) { + cerr << " I (pid = " << getpid() + << ") did not create the shared memory for " + << memName << ". Pid " << ds.shm_cpid << " did." << endl; + } + return false; + } +} + +#else + +#define ALLOC_SHARED_U(...) 0 +#define ALLOC_SHARED_U8(...) 0 +#define ALLOC_SHARED_U32(...) 0 +#define FREE_SHARED(...) +#define NOTIFY_SHARED(...) +#define WAIT_SHARED(...) + +#endif /*BOWTIE_SHARED_MEM*/ + +#endif /* SHMEM_H_ */ diff --git a/simple_func.cpp b/simple_func.cpp new file mode 100644 index 0000000..a5b0859 --- /dev/null +++ b/simple_func.cpp @@ -0,0 +1,93 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include +#include "simple_func.h" +#include "ds.h" +#include "mem_ids.h" + +int SimpleFunc::parseType(const std::string& otype) { + string type = otype; + if(type == "C" || type == "Constant") { + return SIMPLE_FUNC_CONST; + } else if(type == "L" || type == "Linear") { + return SIMPLE_FUNC_LINEAR; + } else if(type == "S" || type == "Sqrt") { + return SIMPLE_FUNC_SQRT; + } else if(type == "G" || type == "Log") { + return SIMPLE_FUNC_LOG; + } + std::cerr << "Error: Bad function type '" << otype.c_str() + << "'. Should be C (constant), L (linear), " + << "S (square root) or G (natural log)." << std::endl; + throw 1; +} + +SimpleFunc SimpleFunc::parse( + const std::string& s, + double defaultConst, + double defaultLinear, + double defaultMin, + double defaultMax) +{ + // Separate value into comma-separated tokens + EList ctoks(MISC_CAT); + string ctok; + istringstream css(s); + SimpleFunc fv; + while(getline(css, ctok, ',')) { + ctoks.push_back(ctok); + } + if(ctoks.size() >= 1) { + fv.setType(parseType(ctoks[0])); + } + if(ctoks.size() >= 2) { + double co; + istringstream tmpss(ctoks[1]); + tmpss >> co; + fv.setConst(co); + } else { + fv.setConst(defaultConst); + } + if(ctoks.size() >= 3) { + double ce; + istringstream tmpss(ctoks[2]); + tmpss >> ce; + fv.setCoeff(ce); + } else { + fv.setCoeff(defaultLinear); + } + if(ctoks.size() >= 4) { + double mn; + istringstream tmpss(ctoks[3]); + tmpss >> mn; + fv.setMin(mn); + } else { + fv.setMin(defaultMin); + } + if(ctoks.size() >= 5) { + double mx; + istringstream tmpss(ctoks[4]); + tmpss >> mx; + fv.setMax(mx); + } else { + fv.setMax(defaultMax); + } + return fv; +} diff --git a/simple_func.h b/simple_func.h new file mode 100644 index 0000000..ca76869 --- /dev/null +++ b/simple_func.h @@ -0,0 +1,125 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef SIMPLE_FUNC_H_ +#define SIMPLE_FUNC_H_ + +#include +#include +#include +#include "tokenize.h" + +#define SIMPLE_FUNC_CONST 1 +#define SIMPLE_FUNC_LINEAR 2 +#define SIMPLE_FUNC_SQRT 3 +#define SIMPLE_FUNC_LOG 4 + +/** + * A simple function of one argument, parmeterized by I, X, C and L: min + * value, max value, constant term, and coefficient respectively: + * + * 1. Constant: f(x) = max(I, min(X, C + L * 0)) + * 2. Linear: f(x) = max(I, min(X, C + L * x)) + * 3. Square root: f(x) = max(I, min(X, C + L * sqrt(x))) + * 4. Log: f(x) = max(I, min(X, C + L * ln(x))) + * + * Clearly, the return value of the Constant function doesn't depend on x. + */ +class SimpleFunc { + +public: + + SimpleFunc() : type_(0), I_(0.0), X_(0.0), C_(0.0), L_(0.0) { } + + SimpleFunc(int type, double I, double X, double C, double L) { + init(type, I, X, C, L); + } + + void init(int type, double I, double X, double C, double L) { + type_ = type; I_ = I; X_ = X; C_ = C; L_ = L; + } + + void init(int type, double C, double L) { + type_ = type; C_ = C; L_ = L; + I_ = -std::numeric_limits::max(); + X_ = std::numeric_limits::max(); + } + + void setType (int type ) { type_ = type; } + void setMin (double mn) { I_ = mn; } + void setMax (double mx) { X_ = mx; } + void setConst(double co) { C_ = co; } + void setCoeff(double ce) { L_ = ce; } + + int getType () const { return type_; } + double getMin () const { return I_; } + double getMax () const { return X_; } + double getConst() const { return C_; } + double getCoeff() const { return L_; } + + void mult(double x) { + if(I_ < std::numeric_limits::max()) { + I_ *= x; X_ *= x; C_ *= x; L_ *= x; + } + } + + bool initialized() const { return type_ != 0; } + void reset() { type_ = 0; } + + template + T f(double x) const { + assert(type_ >= SIMPLE_FUNC_CONST && type_ <= SIMPLE_FUNC_LOG); + double X; + if(type_ == SIMPLE_FUNC_CONST) { + X = 0.0; + } else if(type_ == SIMPLE_FUNC_LINEAR) { + X = x; + } else if(type_ == SIMPLE_FUNC_SQRT) { + X = sqrt(x); + } else if(type_ == SIMPLE_FUNC_LOG) { + X = log(x); + } else { + throw 1; + } + double ret = std::max(I_, std::min(X_, C_ + L_ * X)); + if(ret == std::numeric_limits::max()) { + return std::numeric_limits::max(); + } else if(ret == std::numeric_limits::min()) { + return std::numeric_limits::min(); + } else { + return (T)ret; + } + } + + static int parseType(const std::string& otype); + + static SimpleFunc parse( + const std::string& s, + double defaultConst = 0.0, + double defaultLinear = 0.0, + double defaultMin = 0.0, + double defaultMax = std::numeric_limits::max()); + +protected: + + int type_; + double I_, X_, C_, L_; +}; + +#endif /*ndef SIMPLE_FUNC_H_*/ diff --git a/splice_site.cpp b/splice_site.cpp new file mode 100644 index 0000000..069b2df --- /dev/null +++ b/splice_site.cpp @@ -0,0 +1,850 @@ +/* + * Copyright 2013, Daehwan Kim + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include "edit.h" +#include "splice_site.h" +#include "aligner_report.h" +#include "aligner_result.h" + +#if defined(NEW_PROB_MODEL) + +#include "splice_site_mem.h" + +#else + +float donor_prob[4][donor_len] = { + {0.340f, 0.604f, 0.092f, 0.001f, 0.001f, 0.526f, 0.713f, 0.071f, 0.160f}, + {0.363f, 0.129f, 0.033f, 0.001f, 0.001f, 0.028f, 0.076f, 0.055f, 0.165f}, + {0.183f, 0.125f, 0.803f, 1.000f, 0.001f, 0.419f, 0.118f, 0.814f, 0.209f}, + {0.114f, 0.142f, 0.073f, 0.001f, 1.000f, 0.025f, 0.093f, 0.059f, 0.462f} +}; + +float acceptor_prob[4][acceptor_len] = { + {0.090f, 0.084f, 0.075f, 0.068f, 0.076f, 0.080f, 0.097f, 0.092f, 0.076f, 0.078f, 0.237f, 0.042f, 1.000f, 0.001f, 0.239f}, + {0.310f, 0.310f, 0.307f, 0.293f, 0.326f, 0.330f, 0.373f, 0.385f, 0.410f, 0.352f, 0.309f, 0.708f, 0.001f, 0.001f, 0.138f}, + {0.125f, 0.115f, 0.106f, 0.104f, 0.110f, 0.113f, 0.113f, 0.085f, 0.066f, 0.064f, 0.212f, 0.003f, 0.001f, 1.000f, 0.520f}, + {0.463f, 0.440f, 0.470f, 0.494f, 0.471f, 0.463f, 0.408f, 0.429f, 0.445f, 0.504f, 0.240f, 0.246f, 0.001f, 0.001f, 0.104f} +}; + +float donor_prob_sum[1 << (donor_len << 1)]; +float acceptor_prob_sum1[1 << (acceptor_len1 << 1)]; +float acceptor_prob_sum2[1 << (acceptor_len2 << 1)]; + +#endif + +void init_junction_prob() +{ +#if !defined(NEW_PROB_MODEL) + for(size_t i = 0; i < donor_len; i++) { + ASSERT_ONLY(float sum = 0.0f); + for(size_t j = 0; j < 4; j++) { + float prob = donor_prob[j][i]; + assert_gt(prob, 0.0f); + ASSERT_ONLY(sum += prob); + donor_prob[j][i] = log(prob / background_prob[j]); + } + assert_range(0.9f, 1.05f, sum); + } + for(size_t i = 0; i < acceptor_len; i++) { + ASSERT_ONLY(float sum = 0.0f); + for(size_t j = 0; j < 4; j++) { + float prob = acceptor_prob[j][i]; + assert_gt(prob, 0.0f); + ASSERT_ONLY(sum += prob); + acceptor_prob[j][i] = log(prob / background_prob[j]); + } + assert_range(0.9f, 1.05f, sum); + } + + const size_t donor_elms = 1 << (donor_len << 1); + for(size_t i = 0; i < donor_elms; i++) { + float sum = 0.0f; + for(size_t j = 0; j < donor_len; j++) { + int base = (i >> (j << 1)) & 0x3; + sum += donor_prob[base][donor_len - j - 1]; + } + donor_prob_sum[i] = exp(-sum); + } + + const size_t acceptor_elms1 = 1 << (acceptor_len1 << 1); + for(size_t i = 0; i < acceptor_elms1; i++) { + float sum = 0.0f; + for(size_t j = 0; j < acceptor_len1; j++) { + int base = (i >> (j << 1)) & 0x3; + sum += acceptor_prob[base][acceptor_len1 - j - 1]; + } + acceptor_prob_sum1[i] = exp(-sum); + } + + const size_t acceptor_elms2 = 1 << (acceptor_len2 << 1); + for(size_t i = 0; i < acceptor_elms2; i++) { + float sum = 0.0f; + for(size_t j = 0; j < acceptor_len2; j++) { + int base = (i >> (j << 1)) & 0x3; + sum += acceptor_prob[base][acceptor_len - j - 1]; + } + acceptor_prob_sum2[i] = exp(-sum); + } +#endif +} + +ostream& operator<<(ostream& out, const SpliceSite& s) +{ + out << s.ref() << "\t" + << s.left() << "\t" + << s.right() << "\t"; + if(s.splDir() == SPL_FW || s.splDir() == SPL_SEMI_FW) { + out << "+"; + } else if(s.splDir() == SPL_RC || s.splDir() == SPL_SEMI_RC) { + out << "-"; + } else { + out << "."; + } + out << endl; + return out; +} + +SpliceSiteDB::SpliceSiteDB( + const BitPairReference& refs, + const EList& refnames, + bool threadSafe, + bool write, + bool read) : +_numRefs(refs.numRefs()), +_write(write), +_read(read), +_threadSafe(threadSafe), +_empty(true) +{ + for(size_t r = 0; r < refnames.size(); r++) { + const string& refname = refnames[r]; + _refnames.expand(); + size_t i = 0; + for(; i < refname.size(); i++) { + if(isspace(refname[i])) { + break; + } + } + _refnames.back() = refname.substr(0, i); + } + + assert_gt(_numRefs, 0); + assert_eq(_numRefs, _refnames.size()); + for(uint64_t i = 0; i < _numRefs; i++) { + _fwIndex.push_back(new RedBlack(16 << 10, CA_CAT)); + _bwIndex.push_back(new RedBlack(16 << 10, CA_CAT)); + _pool.expand(); + _spliceSites.expand(); + _mutex.push_back(MUTEX_T()); + } + + donorstr.resize(donor_exonic_len + donor_intronic_len); + acceptorstr.resize(acceptor_intronic_len + acceptor_exonic_len); +} + +SpliceSiteDB::~SpliceSiteDB() { + assert_eq(_fwIndex.size(), _bwIndex.size()); + assert_eq(_fwIndex.size(), _pool.size()); + for(uint64_t i = 0; i < _numRefs; i++) { + delete _fwIndex[i]; + delete _bwIndex[i]; + + EList& pool = _pool[i]; + for(size_t j = 0; j < pool.size(); j++) { + delete pool[j]; + } + } +} + +size_t SpliceSiteDB::size(uint64_t ref) const { + if(!_read) return 0; + + assert_lt(ref, _numRefs); + assert_lt(ref, _mutex.size()); + assert_lt(ref, _fwIndex.size()); + assert_eq(_fwIndex.size(), _bwIndex.size()); + ThreadSafe t(const_cast(&_mutex[ref]), _threadSafe && _write); + return _fwIndex.size(); +} + +bool SpliceSiteDB::empty(uint64_t ref) const { + return size(ref) == 0; +} + +bool SpliceSiteDB::addSpliceSite( + const Read& rd, + const AlnRes& rs, + uint32_t minAnchorLen) +{ + if(!_write) return false; + if(rs.trimmed5p(true) + rs.trimmed3p(true) > 0) return false; + + _empty = false; + + Coord coord = rs.refcoord(); + uint64_t ref = coord.ref(); + assert_lt(ref, _numRefs); + const EList& edits = rs.ned(); + if(!coord.orient()) { + Edit::invertPoss(const_cast&>(edits), rd.length(), false); + } + // daehwan - for debugging purposes + uint32_t editdist = 0; + for(size_t i = 0; i < edits.size(); i++) { + const Edit& edit = edits[i]; + if(edit.isGap() || edit.isMismatch()) editdist++; + } + + SpliceSitePos ssp; + uint32_t refoff = (uint32_t)coord.off(); + uint32_t leftAnchorLen = 0, rightAnchorLen = 0; + size_t eidx = 0; + size_t last_eidx = 0; + uint32_t mm = 0; + for(size_t i = 0; i < rd.length(); i++, refoff++) { + while(eidx < edits.size() && edits[eidx].pos == i) { + if(edits[eidx].isReadGap()) { + refoff++; + } else if(edits[eidx].isRefGap()) { + assert_gt(refoff, 0); + refoff--; + } + if(edits[eidx].isGap() || edits[eidx].isMismatch()) mm++; + if(edits[eidx].isSpliced()) { + assert_gt(refoff, 0); + if(ssp.inited()) { + assert(edits[last_eidx].isSpliced()); + assert_lt(edits[last_eidx].pos, edits[eidx].pos); + rightAnchorLen = edits[eidx].pos - edits[last_eidx].pos; + uint32_t minLeftAnchorLen = minAnchorLen + mm * 2 + (edits[eidx].splDir == SPL_UNKNOWN ? 6 : 0); + uint32_t mm2 = 0; + for(size_t j = eidx + 1; j < edits.size(); j++) { + if(edits[j].isGap() || edits[j].isMismatch()) mm2++; + } + uint32_t minRightAnchorLen = minAnchorLen + mm2 * 2 + (edits[eidx].splDir == SPL_UNKNOWN ? 6 : 0); + if(leftAnchorLen >= minLeftAnchorLen && rightAnchorLen >= minRightAnchorLen) { + bool added = false; + assert_lt(ref, _mutex.size()); + ThreadSafe t(&_mutex[ref], _threadSafe && _write); + assert_lt(ref, _fwIndex.size()); + assert(_fwIndex[ref] != NULL); + Node *cur = _fwIndex[ref]->add(pool(ref), ssp, &added); + if(added) { + assert_lt(ref, _spliceSites.size()); + _spliceSites[ref].expand(); + _spliceSites[ref].back().init(ssp.ref(), ssp.left(), ssp.right(), ssp.splDir()); + _spliceSites[ref].back()._readid = rd.rdid; + _spliceSites[ref].back()._leftext = leftAnchorLen; + _spliceSites[ref].back()._rightext = rightAnchorLen; + _spliceSites[ref].back()._editdist = editdist; + _spliceSites[ref].back()._numreads = 1; + assert(cur != NULL); + cur->payload = (uint32_t)_spliceSites[ref].size() - 1; + + SpliceSitePos rssp(ssp.ref(), ssp.right(), ssp.left(), ssp.splDir()); + assert_lt(ref, _bwIndex.size()); + assert(_bwIndex[ref] != NULL); + cur = _bwIndex[ref]->add(pool(ref), rssp, &added); + assert(added); + assert(cur != NULL); + cur->payload = (uint32_t)_spliceSites[ref].size() - 1; + assert_eq(_fwIndex[ref]->size(), _bwIndex[ref]->size()); + } else { + assert(cur != NULL); + assert_lt(ref, _spliceSites.size()); + assert_lt(cur->payload, _spliceSites[ref].size()); + if(leftAnchorLen > _spliceSites[ref][cur->payload]._leftext) _spliceSites[ref][cur->payload]._leftext = leftAnchorLen; + if(rightAnchorLen > _spliceSites[ref][cur->payload]._rightext) _spliceSites[ref][cur->payload]._rightext = rightAnchorLen; + if(editdist < _spliceSites[ref][cur->payload]._editdist) _spliceSites[ref][cur->payload]._editdist = editdist; + _spliceSites[ref][cur->payload]._numreads += 1; + if(rd.rdid < _spliceSites[ref][cur->payload]._readid) { + _spliceSites[ref][cur->payload]._readid = rd.rdid; + } + } + } + leftAnchorLen = rightAnchorLen; + rightAnchorLen = 0; + } else { + leftAnchorLen = edits[eidx].pos; + } + ssp.init((uint32_t)coord.ref(), refoff - 1, refoff + edits[eidx].splLen, edits[eidx].splDir); + refoff += edits[eidx].splLen; + last_eidx = eidx; + } + eidx++; + } + } + if(ssp.inited()) { + assert(edits[last_eidx].isSpliced()); + assert_lt(edits[last_eidx].pos, rd.length()); + rightAnchorLen = (uint32_t)(rd.length() - edits[last_eidx].pos); + uint32_t minLeftAnchorLen = minAnchorLen + mm * 2 + (edits[last_eidx].splDir == SPL_UNKNOWN ? 6 : 0); + uint32_t mm2 = 0; + for(size_t j = last_eidx + 1; j < edits.size(); j++) { + if(edits[j].isGap() || edits[j].isMismatch()) mm2++; + } + uint32_t minRightAnchorLen = minAnchorLen + mm2 * 2 + (edits[last_eidx].splDir == SPL_UNKNOWN ? 6 : 0); + if(leftAnchorLen >= minLeftAnchorLen && rightAnchorLen >= minRightAnchorLen) { + bool added = false; + assert_lt(ref, _mutex.size()); + ThreadSafe t(&_mutex[ref], _threadSafe && _write); + assert_lt(ref, _fwIndex.size()); + assert(_fwIndex[ref] != NULL); + Node *cur = _fwIndex[ref]->add(pool(ref), ssp, &added); + if(added) { + assert_lt(ref, _spliceSites.size()); + _spliceSites[ref].expand(); + _spliceSites[ref].back().init(ssp.ref(), ssp.left(), ssp.right(), ssp.splDir()); + _spliceSites[ref].back()._readid = rd.rdid; + _spliceSites[ref].back()._leftext = leftAnchorLen; + _spliceSites[ref].back()._rightext = rightAnchorLen; + _spliceSites[ref].back()._editdist = editdist; + _spliceSites[ref].back()._numreads = 1; + assert(cur != NULL); + cur->payload = (uint32_t)_spliceSites[ref].size() - 1; + + SpliceSitePos rssp(ssp.ref(), ssp.right(), ssp.left(), ssp.splDir()); + assert_lt(ref, _bwIndex.size()); + assert(_bwIndex[ref] != NULL); + cur = _bwIndex[ref]->add(pool(ref), rssp, &added); + assert(added); + assert(cur != NULL); + cur->payload = (uint32_t)_spliceSites[ref].size() - 1; + assert_eq(_fwIndex[ref]->size(), _bwIndex[ref]->size()); + } else { + assert(cur != NULL); + assert_lt(ref, _spliceSites.size()); + assert_lt(cur->payload, _spliceSites[ref].size()); + if(leftAnchorLen > _spliceSites[ref][cur->payload]._leftext) _spliceSites[ref][cur->payload]._leftext = leftAnchorLen; + if(rightAnchorLen > _spliceSites[ref][cur->payload]._rightext) _spliceSites[ref][cur->payload]._rightext = rightAnchorLen; + if(editdist < _spliceSites[ref][cur->payload]._editdist) _spliceSites[ref][cur->payload]._editdist = editdist; + _spliceSites[ref][cur->payload]._numreads += 1; + if(rd.rdid < _spliceSites[ref][cur->payload]._readid) { + _spliceSites[ref][cur->payload]._readid = rd.rdid; + } + } + } + } + if(!coord.orient()) { + Edit::invertPoss(const_cast&>(edits), rd.length(), false); + } + + return true; +} + +bool SpliceSiteDB::getSpliceSite(SpliceSite& ss) const +{ + if(!_read) return false; + + uint64_t ref = ss.ref(); + assert_lt(ref, _numRefs); + assert_lt(ref, _mutex.size()); + ThreadSafe t(const_cast(&_mutex[ref]), _threadSafe && _write); + + assert_lt(ref, _fwIndex.size()); + assert(_fwIndex[ref] != NULL); + const Node *cur = _fwIndex[ref]->lookup(ss); + if(cur == NULL) return false; + assert(cur != NULL); + assert_lt(ref, _spliceSites.size()); + ss = _spliceSites[ref][cur->payload]; + return true; +} + +void SpliceSiteDB::getLeftSpliceSites(uint32_t ref, uint32_t left, uint32_t range, EList& spliceSites) const +{ + if(!_read) return; + + assert_lt(ref, _numRefs); + assert_lt(ref, _mutex.size()); + ThreadSafe t(const_cast(&_mutex[ref]), _threadSafe && _write); + assert_gt(range, 0); + assert_geq(left + 1, range); + assert_lt(ref, _bwIndex.size()); + assert(_bwIndex[ref] != NULL); + const Node *cur = _bwIndex[ref]->root(); + if(cur != NULL) getSpliceSites_recur(cur, left + 1 - range, left, spliceSites); +} + +void SpliceSiteDB::getRightSpliceSites(uint32_t ref, uint32_t right, uint32_t range, EList& spliceSites) const +{ + if(!_read) return; + + assert_lt(ref, _numRefs); + assert_lt(ref, _mutex.size()); + ThreadSafe t(const_cast(&_mutex[ref]), _threadSafe && _write); + assert_gt(range, 0); + assert_gt(right + range, range); + assert_lt(ref, _fwIndex.size()); + assert(_fwIndex[ref] != NULL); + const Node *cur = _fwIndex[ref]->root(); + if(cur != NULL) getSpliceSites_recur(cur, right, right + range - 1, spliceSites); + +} + +void SpliceSiteDB::getSpliceSites_recur( + const RedBlackNode *node, + uint32_t left, + uint32_t right, + EList& spliceSites) const +{ + assert(node != NULL); + if(node->key.left() >= left && node->left != NULL) { + getSpliceSites_recur( + node->left, + left, + right, + spliceSites); + } + + if(node->key.left() >= left && node->key.left() <= right) { + uint32_t ref = node->key.ref(); + assert_lt(ref, _spliceSites.size()); + assert_lt(node->payload, _spliceSites[ref].size()); + ASSERT_ONLY(const SpliceSite& ss = _spliceSites[ref][node->payload]); + assert_eq(ss.ref(), node->key.ref()); + assert(ss.left() == node->key.left() || + ss.right() == node->key.left()); + spliceSites.push_back(_spliceSites[ref][node->payload]); + } + + if(node->key.left() <= right && node->right != NULL) { + getSpliceSites_recur( + node->right, + left, + right, + spliceSites); + } +} + +bool SpliceSiteDB::hasSpliceSites( + uint32_t ref, + uint32_t left1, + uint32_t right1, + uint32_t left2, + uint32_t right2, + bool includeNovel) const +{ + if(!_read) return false; + + assert_lt(ref, _numRefs); + assert_lt(ref, _mutex.size()); + ThreadSafe t(const_cast(&_mutex[ref]), _threadSafe && _write); + + if(left1 < right1) { + assert_lt(ref, _bwIndex.size()); + assert(_bwIndex[ref] != NULL); + const Node *cur = _bwIndex[ref]->root(); + if(cur != NULL) { + if(hasSpliceSites_recur(cur, left1, right1, includeNovel)) + return true; + } + } + + if(left2 < right2) { + assert_lt(ref, _fwIndex.size()); + assert(_fwIndex[ref] != NULL); + const Node *cur = _fwIndex[ref]->root(); + if(cur != NULL) { + return hasSpliceSites_recur(cur, left2, right2, includeNovel); + } + } + return false; +} + +bool SpliceSiteDB::hasSpliceSites_recur( + const RedBlackNode *node, + uint32_t left, + uint32_t right, + bool includeNovel) const +{ + assert(node != NULL); + if(node->key.left() >= left && node->key.left() <= right) { + uint32_t ref = node->key.ref(); + assert_lt(ref, _spliceSites.size()); + assert_lt(node->payload, _spliceSites[ref].size()); + const SpliceSite& ss = _spliceSites[ref][node->payload]; + if(includeNovel || ss._known) + return true; + } + + if(node->key.left() >= left && node->left != NULL) { + if(hasSpliceSites_recur( + node->left, + left, + right, + includeNovel)) + return true; + } + + if(node->key.left() <= right && node->right != NULL) { + if(hasSpliceSites_recur( + node->right, + left, + right, + includeNovel)) + return true; + } + + return false; +} + +bool SpliceSiteDB::insideExon( + uint32_t ref, + uint32_t left, + uint32_t right) const +{ + if(_exons.empty()) return false; + assert_lt(ref, _numRefs); + assert_lt(left, right); + + Exon e(ref, left + 1, 0, true); + size_t i = _exons.bsearchLoBound(e); + for(; i > 0; i--) { + const Exon& e = _exons[i-1]; + if(e.right() < left) break; + if(e.left() <= left && right <= e.right()) + return true; + } + return false; +} + +void calculate_splicesite_read_dist_impl(const RedBlackNode *node, + const EList &spliceSites, + EList& splicesite_read_dist) { + if(node == NULL) return; + calculate_splicesite_read_dist_impl(node->left, spliceSites, splicesite_read_dist); + assert_lt(node->payload, spliceSites.size()); + const SpliceSite& ss = spliceSites[node->payload]; + if(ss.numreads() < splicesite_read_dist.size()) + splicesite_read_dist[ss.numreads()] += 1; + else + splicesite_read_dist.back() += 1; + calculate_splicesite_read_dist_impl(node->right, spliceSites, splicesite_read_dist); +} + +uint32_t calculate_splicesite_read_dist(const EList* >& fwIndex, + const ELList &spliceSites, + EList& splicesite_read_dist) { + for(size_t i = 0; i < fwIndex.size(); i++) { + assert(fwIndex[i] != NULL); + const RedBlackNode *root = fwIndex[i]->root(); + assert_lt(i, spliceSites.size()); + if(root != NULL) calculate_splicesite_read_dist_impl(root, spliceSites[i], splicesite_read_dist); + } + + for(size_t i = 1; i < splicesite_read_dist.size(); i++) { + splicesite_read_dist[i] += splicesite_read_dist[i-1]; + } + + for(size_t i = 0; i < splicesite_read_dist.size(); i++) { + float cmf_i = float(splicesite_read_dist[i]) / splicesite_read_dist.back(); + if(cmf_i > 0.7) + return (uint32_t)i; + } + + return 0; +} + +void SpliceSiteDB::print(ofstream& out) +{ + EList splicesite_read_dist; + for(size_t i = 0; i < 100; i++) { + splicesite_read_dist.push_back(0); + } + uint32_t numreads_cutoff = calculate_splicesite_read_dist(_fwIndex, _spliceSites, splicesite_read_dist); + size_t numsplicesites = 0; + for(size_t i = 0; i < _spliceSites.size(); i++) { + numsplicesites += _spliceSites[i].size(); + } + uint32_t numreads_cutoff2 = (uint32_t)(numsplicesites / 100000); + + EList ss_list; + for(size_t i = 0; i < _fwIndex.size(); i++) { + assert(_fwIndex[i] != NULL); + const Node *root = _fwIndex[i]->root(); + if(root != NULL) print_recur(root, out, numreads_cutoff, numreads_cutoff2, ss_list); + } + print_impl(out, ss_list); +} + +void SpliceSiteDB::print_recur( + const RedBlackNode *node, + ofstream& out, + const uint32_t numreads_cutoff, + const uint32_t numreads_cutoff2, + EList& ss_list) +{ + if(node == NULL) return; + print_recur(node->left, out, numreads_cutoff, numreads_cutoff2, ss_list); + const SpliceSitePos& ssp = node->key; + assert_lt(ssp.ref(), _spliceSites.size()); + assert_lt(node->payload, _spliceSites[ssp.ref()].size()); + const SpliceSite& ss = _spliceSites[ssp.ref()][node->payload]; + if(ss.numreads() >= numreads_cutoff || + (ss.editdist() == 0 && ss.numreads() >= numreads_cutoff2)) print_impl(out, ss_list, &ss); + print_recur(node->right, out, numreads_cutoff, numreads_cutoff2, ss_list); +} + +void SpliceSiteDB::print_impl( + ofstream& out, + EList& ss_list, + const SpliceSite* ss) +{ + size_t i = 0; + while(i < ss_list.size()) { + const SpliceSite& tmp_ss = ss_list[i]; + bool do_print = true; + if(ss != NULL) { + if(tmp_ss.ref() == ss->ref()) { + assert_leq(tmp_ss.left(), ss->left()); + if(ss->left() < tmp_ss.left() + 10) { + do_print = false; + if(abs(((int)ss->left() - (int)tmp_ss.left()) - ((int)ss->right() - (int)tmp_ss.right())) <= 10) { + if(tmp_ss.numreads() < ss->numreads()) { + ss_list.erase(i); + ss_list.push_back(*ss); + } + return; + } + } + } + } + + if(!do_print) { + i++; + continue; + } + + assert_lt(tmp_ss.ref(), _refnames.size()); + out << _refnames[tmp_ss.ref()] << "\t" + << tmp_ss.left() << "\t" + << tmp_ss.right() << "\t"; + if(tmp_ss.splDir() == SPL_FW || tmp_ss.splDir() == SPL_SEMI_FW) { + out << "+"; + } else if(tmp_ss.splDir() == SPL_RC || tmp_ss.splDir() == SPL_SEMI_RC) { + out << "-"; + } else { + out << "."; + } + out << endl; + ss_list.erase(i); + } + + if(ss != NULL) ss_list.push_back(*ss); +} + +void SpliceSiteDB::read(const GFM& gfm, const EList >& alts) +{ + EList exons; + _empty = false; + assert_eq(_numRefs, _refnames.size()); + for(size_t i = 0; i < alts.size(); i++) { + const ALT& alt = alts[i]; + if(!alt.splicesite() && !alt.exon()) continue; + if(alt.left > alt.right) continue; + TIndexOffU ref = 0, left = 0, tlen = 0; + char fw = alt.fw; + bool straddled2 = false; + gfm.joinedToTextOff( + 1, + alt.left, + ref, + left, + tlen, + true, // reject straddlers? + straddled2); // straddled? + assert_lt(ref, _spliceSites.size()); + TIndexOffU right = left + (alt.right - alt.left); + if(alt.splicesite()) { + left -= 1; right += 1; + _spliceSites[ref].expand(); + _spliceSites[ref].back().init(ref, + left, + right, + fw ? SPL_FW : SPL_RC, + alt.exon(), + true, // from file? + true); // known splice site? + assert_gt(_spliceSites[ref].size(), 0); + bool added = false; + assert_lt(ref, _fwIndex.size()); + assert(_fwIndex[ref] != NULL); + Node *cur = _fwIndex[ref]->add(pool(ref), _spliceSites[ref].back(), &added); + if(!added) { + _spliceSites[ref].pop_back(); + continue; + } + assert(added); + assert(cur != NULL); + cur->payload = (uint32_t)_spliceSites[ref].size() - 1; + + added = false; + SpliceSitePos rssp(ref, + right, + left, + fw ? SPL_FW : SPL_RC); + assert_lt(ref, _bwIndex.size()); + assert(_bwIndex[ref] != NULL); + cur = _bwIndex[ref]->add(pool(ref), rssp, &added); + assert(added); + assert(cur != NULL); + cur->payload = (uint32_t)_spliceSites[ref].size() - 1; + } else { + assert(alt.exon()); + // Given some relaxation + if(left >= 10) left -= 10; + else left = 0; + if(right + 10 < tlen) right += 10; + else right = tlen - 1; + exons.expand(); + exons.back().init(ref, left, right, fw == '+' ? SPL_FW : SPL_RC); + } + } + if(exons.size() > 0) { + _exons.resizeExact(exons.size()); _exons.clear(); + _exons.push_back_array(exons.begin(), exons.size()); + _exons.sort(); + } +} + +void SpliceSiteDB::read(ifstream& in, bool known) +{ + _empty = false; + assert_eq(_numRefs, _refnames.size()); + while(!in.eof()) { + string refname; + uint32_t left = 0, right = 0; + char fw = 0; + in >> refname >> left >> right >> fw; + uint32_t ref = 0; + for(; ref < _refnames.size(); ref++) { + if(_refnames[ref] == refname) break; + } + if(ref >= _numRefs) continue; + assert_lt(ref, _spliceSites.size()); + _spliceSites[ref].expand(); + _spliceSites[ref].back().init(ref, + left, + right, + fw == '+' ? SPL_FW : SPL_RC, + false, // exon? + true, // from file? + known); // known splice site? + assert_gt(_spliceSites[ref].size(), 0); + + bool added = false; + assert_lt(ref, _fwIndex.size()); + assert(_fwIndex[ref] != NULL); + Node *cur = _fwIndex[ref]->add(pool(ref), _spliceSites[ref].back(), &added); + if(!added) { + _spliceSites[ref].pop_back(); + continue; + } + + assert(cur != NULL); + cur->payload = (uint32_t)_spliceSites[ref].size() - 1; + added = false; + SpliceSitePos rssp(ref, + right, + left, + fw == '+' ? SPL_FW : SPL_RC); + assert_lt(ref, _bwIndex.size()); + assert(_bwIndex[ref] != NULL); + cur = _bwIndex[ref]->add(pool(ref), rssp, &added); + assert(added); + assert(cur != NULL); + cur->payload = (uint32_t)_spliceSites[ref].size() - 1; + } +} + +Pool& SpliceSiteDB::pool(uint64_t ref) { + assert_lt(ref, _numRefs); + assert_lt(ref, _pool.size()); + EList& pool = _pool[ref]; + if(pool.size() <= 0 || pool.back()->full()) { + pool.push_back(new Pool(1 << 20 /* 1MB */, 16 << 10 /* 16KB */, CA_CAT)); + } + assert(pool.back() != NULL); + return *pool.back(); +} + +float SpliceSiteDB::probscore( + int64_t donor_seq, + int64_t acceptor_seq) +{ + float probscore = 0.0f; +#if defined(NEW_PROB_MODEL) + float donor_probscore = 0.0f; + assert_leq(donor_seq, 0x3ffff); + int64_t donor_exonic_seq = (donor_seq >> 4) & (~0xff); + int64_t donor_intronic_seq = donor_seq & 0xff; + int64_t donor_rest_seq = donor_exonic_seq | donor_intronic_seq; + int donor_seq3 = (donor_seq >> 10) & 0x3; + int donor_seq4 = (donor_seq >> 8) & 0x3; + donor_probscore = donor_cons1[donor_seq3] * donor_cons2[donor_seq4] / (background_bp_prob[donor_seq3] * background_bp_prob[donor_seq4]) * donor_me2x5[donor_rest_seq]; + + float acceptor_probscore = 0.0f; + assert_leq(acceptor_seq, 0x3fffffffffff); + int64_t acceptor_intronic_seq = (acceptor_seq >> 4) & (~0x3f); + int64_t acceptor_exonic_seq = acceptor_seq & 0x3f; + int64_t acceptor_rest_seq = acceptor_intronic_seq | acceptor_exonic_seq; + int acceptor_seq18 = (acceptor_seq >> 8) & 0x3; + int acceptor_seq19 = (acceptor_seq >> 6) & 0x3; + acceptor_probscore = acceptor_cons1[acceptor_seq18] * acceptor_cons2[acceptor_seq19] / (background_bp_prob[acceptor_seq18] * background_bp_prob[acceptor_seq19]); + + int64_t acceptor_seq1 = acceptor_rest_seq >> 28 & 0x3fff; // [0, 7] + acceptor_probscore *= acceptor_me2x3acc1[acceptor_seq1]; + int64_t acceptor_seq2 = (acceptor_rest_seq >> 14) & 0x3fff; // [7, 7] + acceptor_probscore *= acceptor_me2x3acc2[acceptor_seq2]; + int64_t acceptor_seq3 = acceptor_rest_seq & 0x3fff; // [14, 7] + acceptor_probscore *= acceptor_me2x3acc3[acceptor_seq3]; + int64_t acceptor_seq4 = (acceptor_rest_seq >> 20) & 0x3fff; // [4, 7] + acceptor_probscore *= acceptor_me2x3acc4[acceptor_seq4]; + int64_t acceptor_seq5 = (acceptor_rest_seq >> 6) & 0x3fff; // [11, 7] + acceptor_probscore *= acceptor_me2x3acc5[acceptor_seq5]; + int64_t acceptor_seq6 = acceptor_seq1 & 0x3f; // [4, 3] + acceptor_probscore /= acceptor_me2x3acc6[acceptor_seq6]; + int64_t acceptor_seq7 = acceptor_seq4 & 0xff; // [7, 4] + acceptor_probscore /= acceptor_me2x3acc7[acceptor_seq7]; + int64_t acceptor_seq8 = acceptor_seq2 & 0x3f; // [11, 3] + acceptor_probscore /= acceptor_me2x3acc8[acceptor_seq8]; + int64_t acceptor_seq9 = acceptor_seq5 & 0xff; // [14, 4] + acceptor_probscore /= acceptor_me2x3acc9[acceptor_seq9]; + + donor_probscore /= (1.0f + donor_probscore); + acceptor_probscore /= (1.0f + acceptor_probscore); + probscore = (donor_probscore + acceptor_probscore) / 2.0; + +#else + assert_lt(donor_seq, (int)(1 << (donor_len << 1))); + probscore = donor_prob_sum[donor_seq]; + + int acceptor_seq1 = (int)(acceptor_seq >> (acceptor_len2 << 1)); + assert_lt(acceptor_seq1, (int)(1 << (acceptor_len1 << 1))); + probscore *= acceptor_prob_sum1[acceptor_seq1]; + + int acceptor_seq2 = acceptor_seq % (1 << (acceptor_len2 << 1)); + probscore *= acceptor_prob_sum2[acceptor_seq2]; + + probscore = 1.0 / (1.0 + probscore); +#endif + return probscore; +} + diff --git a/splice_site.h b/splice_site.h new file mode 100644 index 0000000..b0a5f43 --- /dev/null +++ b/splice_site.h @@ -0,0 +1,615 @@ +/* + * Copyright 2013, Daehwan Kim + * + * This file is part of HISAT. + * + * HISAT is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT. If not, see . + */ + +#ifndef SPLICE_SITE_H_ +#define SPLICE_SITE_H_ + +#include +#include +#include +#include +#include "assert_helpers.h" +#include "mem_ids.h" +#include "ref_coord.h" +#include "ds.h" +#include "read.h" +#include "reference.h" +#include "hier_idx_common.h" +#include "gfm.h" +#include "alt.h" + +enum { + SPL_UNKNOWN = 1, + SPL_FW, + SPL_RC, + SPL_SEMI_FW, + SPL_SEMI_RC, +}; + +using namespace std; + +// #define NEW_PROB_MODEL +// the following parameters are borrowed from Yeo and Burge 2004 in Journal of Computational Biology +const size_t donor_exonic_len = 3; +const size_t donor_intronic_len = 6; +const size_t donor_len = donor_exonic_len + donor_intronic_len; + +#if defined(NEW_PROB_MODEL) +const size_t acceptor_intronic_len = 20; +const size_t acceptor_exonic_len = 3; +const size_t acceptor_len = acceptor_intronic_len + acceptor_exonic_len; +#else +// the following parameters are borrowed from Ch 3. in Bioinformatics - From Genomes to Drugs. Volume I: Basic Technologies (Victor Solovyev) +const size_t acceptor_intronic_len = 14; +const size_t acceptor_exonic_len = 1; +const size_t acceptor_len = acceptor_intronic_len + acceptor_exonic_len; +const size_t acceptor_len1 = acceptor_len / 2; +const size_t acceptor_len2 = acceptor_len - acceptor_len1; + +// the following parameters are borrowed from Yeo and Burge 2004 in Journal of Computational Biology +const float background_prob[4] = {0.27f, 0.23f, 0.23f, 0.27f}; + +extern float donor_prob[4][donor_len]; +extern float acceptor_prob[4][acceptor_len]; + +extern float donor_prob_sum[1 << (donor_len << 1)]; +extern float acceptor_prob_sum1[1 << (acceptor_len1 << 1)]; +extern float acceptor_prob_sum2[1 << (acceptor_len2 << 1)]; + +#endif +const size_t intronic_len = max(donor_intronic_len, acceptor_intronic_len); + +extern void init_junction_prob(); + +/** + * + */ +class SpliceSitePos { + +public: + + SpliceSitePos() { reset(); } + + SpliceSitePos(const SpliceSitePos& c) { init(c); } + + SpliceSitePos(uint32_t ref, uint32_t left, uint32_t right, uint8_t splDir, bool exon = false) + { + init(ref, left, right, splDir, exon); + } + + /** + * Copy given fields into this Coord. + */ + void init(uint32_t ref, uint32_t left, uint32_t right, uint8_t splDir, bool exon = false) { + _ref = ref; + _left = left; + _right = right; + _splDir = splDir; + _exon = exon; + } + + /** + * Copy contents of given Coord into this one. + */ + void init(const SpliceSitePos& c) { + _ref = c._ref; + _left = c._left; + _right = c._right; + _splDir = c._splDir; + _exon = c._exon; + } + + /** + * Return true iff this Coord is identical to the given Coord. + */ + bool operator==(const SpliceSitePos& o) const { + assert(inited()); + assert(o.inited()); + return _ref == o._ref && + _left == o._left && + _right == o._right && + _splDir == o._splDir && + _exon == o._exon; + + } + + /** + * Return true iff this Coord is less than the given Coord. One Coord is + * less than another if (a) its reference id is less, (b) its orientation is + * less, or (c) its offset is less. + */ + bool operator<(const SpliceSitePos& o) const { + if(_ref < o._ref) return true; + if(_ref > o._ref) return false; + if(_left < o._left) return true; + if(_left > o._left) return false; + if(_right < o._right) return true; + if(_right > o._right) return false; + if(_splDir < o._splDir) return true; + if(_splDir > o._splDir) return false; + if(_exon != o._exon) return _exon; + return false; + } + + /** + * Return the opposite result from operator<. + */ + bool operator>=(const SpliceSitePos& o) const { + return !((*this) < o); + } + + /** + * Return true iff this Coord is greater than the given Coord. One Coord + * is greater than another if (a) its reference id is greater, (b) its + * orientation is greater, or (c) its offset is greater. + */ + bool operator>(const SpliceSitePos& o) const { + if(_ref > o._ref) return true; + if(_ref < o._ref) return false; + if(_left > o._left) return true; + if(_left < o._left) return false; + if(_right > o._right) return true; + if(_right < o._right) return false; + if(_splDir > o._splDir) return true; + if(_splDir < o._splDir) return false; + if(_exon != o._exon) return !_exon; + return false; + } + + /** + * Return the opposite result from operator>. + */ + bool operator<=(const SpliceSitePos& o) const { + return !((*this) > o); + } + + /** + * Reset this coord to uninitialized state. + */ + virtual void reset() { + _ref = std::numeric_limits::max(); + _left = std::numeric_limits::max(); + _right = std::numeric_limits::max(); + _splDir = SPL_UNKNOWN; + _exon = false; + } + + /** + * Return true iff this Coord is initialized (i.e. ref and off have both + * been set since the last call to reset()). + */ + bool inited() const { + if(_ref != std::numeric_limits::max() && + _left != std::numeric_limits::max() && + _right != std::numeric_limits::max()) + { + return true; + } + return false; + } + +#ifndef NDEBUG + /** + * Check that coord is internally consistent. + */ + bool repOk() const { + if(_ref == std::numeric_limits::max() || + _left == std::numeric_limits::max() || + _right == std::numeric_limits::max()) { + return false; + } + return true; + } +#endif + + /** + * Check whether an interval defined by this coord and having + * length 'len' is contained within an interval defined by + * 'inbegin' and 'inend'. + */ +#if 0 + bool within(int64_t len, int64_t inbegin, int64_t inend) const { + return off_ >= inbegin && off_ + len <= inend; + } +#endif + + uint32_t ref() const { return _ref; } + uint32_t left() const { return _left; } + uint32_t right() const { return _right; } + uint8_t splDir() const { return _splDir; } + bool canonical() const { return _splDir == SPL_FW || _splDir == SPL_RC; } + uint32_t intron_len() const { return _right - _left - 1; } + bool exon() const { return _exon; } + +protected: + + uint32_t _ref; // which reference? + uint32_t _left; // 0-based offset of the right most base of the left flanking exon + uint32_t _right; // 0-based offset of the left most base of the right flanking exon + uint8_t _splDir; + bool _exon; +}; + +/** + * + */ +class SpliceSite : public SpliceSitePos { + +public: + + SpliceSite() { reset(); } + + SpliceSite(const SpliceSite& c) { init(c); } + + SpliceSite(uint32_t ref, + uint32_t left, + uint32_t right, + uint8_t splDir, + bool exon = false, + bool fromFile = false, + bool known = false) + { + init(ref, left, right, splDir, exon, fromFile, known); + } + + /** + * Copy given fields into this Coord. + */ + void init(uint32_t ref, + uint32_t left, + uint32_t right, + uint8_t splDir, + bool exon = false, + bool fromFile = false, + bool known = false) + { + SpliceSitePos::init(ref, left, right, splDir, exon); + + // _donordint = 0; + // _acceptordint = 0; + // _leftseq = 0; + // _rightseq = 0; + _leftext = 0; + _rightext = 0; + _numreads = 0; + _editdist = 0; + // _probscore = 0.0f; + _readid = 0; + _exon = exon; + _fromfile = fromFile; + _known = known; + } + + /** + * Copy contents of given Coord into this one. + */ + void init(const SpliceSite& c) { + SpliceSitePos::init(c); + + // _donordint = c._donordint; + // _acceptordint = c._acceptordint; + // _leftseq = c._leftseq; + // _rightseq = c._rightseq; + _leftext = c._leftext; + _rightext = c._rightext; + _numreads = c._numreads; + _editdist = c._editdist; + // _probscore = c._probscore; + _readid = c._readid; + _fromfile = c._fromfile; + _known = c._known; + } + + /** + * Reset this coord to uninitialized state. + */ + virtual void reset() { + SpliceSitePos::reset(); + + // _donordint = 0; + // _acceptordint = 0; + // _leftseq = 0; + // _rightseq = 0; + _leftext = 0; + _rightext = 0; + _numreads = 0; + _editdist = 0; + // _probscore = 0.0; + _readid = 0; + _fromfile = false; + _known = false; + } + + // uint8_t donordint() const { return _donordint; } + // uint8_t acceptordint() const { return _acceptordint; } + // uint64_t leftseq() const { return _leftseq; } + // uint64_t rightseq() const { return _rightseq; } + + uint32_t leftext() const { return _leftext; } + uint32_t rightext() const { return _rightext; } + + uint64_t numreads() const { return _numreads; } + uint32_t editdist() const { return _editdist; } + // float probscore() const { return _probscore; } + +public: + + // uint8_t _donordint; // 3' dinucleotide on Watson strand + // uint8_t _acceptordint; // 5' dinucleotide on Watson strand + // uint64_t _leftseq; // left 32bp flanking seq on Watson strand + // uint64_t _rightseq; // right 32bp flanking seq on Watson strand + + uint32_t _leftext; + uint32_t _rightext; + + uint64_t _numreads; // number of reads spanning this splice site + + uint32_t _editdist; + + // float _probscore; + + uint64_t _readid; + bool _fromfile; + bool _known; +}; + +std::ostream& operator<<(std::ostream& out, const SpliceSite& c); + +/** + * + */ +class Exon { +public: + + Exon() { reset(); } + + Exon(const Exon& c) { init(c); } + + Exon(uint32_t ref, uint32_t left, uint32_t right, bool fw) + { + init(ref, left, right, fw); + } + + /** + * Copy given fields into this Coord. + */ + void init(uint32_t ref, uint32_t left, uint32_t right, bool fw) { + _ref = ref; + _left = left; + _right = right; + _fw = fw; + } + + /** + * Copy contents of given Coord into this one. + */ + void init(const Exon& c) { + _ref = c._ref; + _left = c._left; + _right = c._right; + _fw = c._fw; + } + + /** + * Return true iff this Coord is identical to the given Coord. + */ + bool operator==(const Exon& o) const { + assert(inited()); + assert(o.inited()); + return _ref == o._ref && + _left == o._left && + _right == o._right && + _fw == o._fw; + } + + /** + * Return true iff this Coord is less than the given Coord. One Coord is + * less than another if (a) its reference id is less, (b) its orientation is + * less, or (c) its offset is less. + */ + bool operator<(const Exon& o) const { + if(_ref < o._ref) return true; + if(_ref > o._ref) return false; + if(_left < o._left) return true; + if(_left > o._left) return false; + if(_right < o._right) return true; + if(_right > o._right) return false; + if(_fw != o._fw) return _fw; + return false; + } + + /** + * Return the opposite result from operator<. + */ + bool operator>=(const Exon& o) const { + return !((*this) < o); + } + + /** + * Return true iff this Coord is greater than the given Coord. One Coord + * is greater than another if (a) its reference id is greater, (b) its + * orientation is greater, or (c) its offset is greater. + */ + bool operator>(const Exon& o) const { + if(_ref > o._ref) return true; + if(_ref < o._ref) return false; + if(_left > o._left) return true; + if(_left < o._left) return false; + if(_right > o._right) return true; + if(_right < o._right) return false; + if(_fw != o._fw) return !_fw; + return false; + } + + /** + * Return the opposite result from operator>. + */ + bool operator<=(const Exon& o) const { + return !((*this) > o); + } + + /** + * Reset this coord to uninitialized state. + */ + void reset() { + _ref = std::numeric_limits::max(); + _left = std::numeric_limits::max(); + _right = std::numeric_limits::max(); + _fw = true; + } + + /** + * Return true iff this Coord is initialized (i.e. ref and off have both + * been set since the last call to reset()). + */ + bool inited() const { + if(_ref != std::numeric_limits::max() && + _left != std::numeric_limits::max() && + _right != std::numeric_limits::max()) + { + return true; + } + return false; + } + +#ifndef NDEBUG + /** + * Check that coord is internally consistent. + */ + bool repOk() const { + if(_ref == std::numeric_limits::max() || + _left == std::numeric_limits::max() || + _right == std::numeric_limits::max()) { + return false; + } + return true; + } +#endif + + uint32_t ref() const { return _ref; } + uint32_t left() const { return _left; } + uint32_t right() const { return _right; } + bool fw() const { return _fw; } + +protected: + uint32_t _ref; // which reference? + uint32_t _left; // 0-based offset of the right most base of the left flanking exon + uint32_t _right; // 0-based offset of the left most base of the right flanking exon + bool _fw; // true -> Watson strand +}; + +class AlnRes; + +class SpliceSiteDB { +public: + typedef RedBlackNode Node; + +public: + SpliceSiteDB( + const BitPairReference& refs, + const EList& refnames, + bool threadSafe = true, + bool write = false, + bool read = false); + ~SpliceSiteDB(); + + bool addSpliceSite( + const Read& rd, + const AlnRes& rs, + uint32_t minAnchorLen = 15); + + static float probscore( + int64_t donor_seq, + int64_t acceptor_seq); + + size_t size(uint64_t ref) const; + bool empty(uint64_t ref) const; + + bool empty() { return _empty; } + + bool write() const { return _write; } + bool read() const { return _read; } + + bool getSpliceSite(SpliceSite& ss) const; + void getLeftSpliceSites(uint32_t ref, uint32_t left, uint32_t range, EList& spliceSites) const; + void getRightSpliceSites(uint32_t ref, uint32_t right, uint32_t range, EList& spliceSites) const; + bool hasSpliceSites(uint32_t ref, uint32_t left1, uint32_t right1, uint32_t left2, uint32_t right2, bool includeNovel = false) const; + bool insideExon(uint32_t ref, uint32_t left, uint32_t right) const; + + void print(ofstream& out); + void read(const GFM& gfm, const EList >& alts); + void read(ifstream& in, bool known = false); + +private: + void getSpliceSites_recur( + const RedBlackNode *node, + uint32_t left, + uint32_t right, + EList& spliceSites) const; + + bool hasSpliceSites_recur( + const RedBlackNode *node, + uint32_t left, + uint32_t right, + bool includeNovel) const; + + const RedBlackNode* getSpliceSite_temp(const SpliceSitePos& ssp) const; + + void print_recur( + const RedBlackNode *node, + ofstream& out, + const uint32_t numreads_cutoff, + const uint32_t numreads_cutoff2, + EList& ss_list); + + Pool& pool(uint64_t ref); + + void print_impl( + ofstream& out, + EList& ss_list, + const SpliceSite* ss = NULL); + +private: + uint64_t _numRefs; + EList _refnames; + + EList* > _fwIndex; + EList* > _bwIndex; + + ELList _spliceSites; + + ELList _pool; // dispenses memory pages + + bool _write; + bool _read; + + EList _mutex; + bool _threadSafe; + + SStringExpandable raw_refbuf; + ASSERT_ONLY(SStringExpandable destU32); + BTDnaString donorstr; + BTDnaString acceptorstr; + + bool _empty; + + EList _exons; +}; + +#endif /*ifndef SPLICE_SITE_H_*/ diff --git a/splice_site_mem.h b/splice_site_mem.h new file mode 100644 index 0000000..2d1d1e7 --- /dev/null +++ b/splice_site_mem.h @@ -0,0 +1,6224 @@ +#ifndef SPLICE_SITE_MEM_H_ +#define SPLICE_SITE_MEM_H_ + +static const float background_bp_prob[4] = {0.27f, 0.23f, 0.23f, 0.27f}; +static const float donor_cons1[4] = {0.004f, 0.0032f, 0.9896f, 0.0032f}; +static const float donor_cons2[4] = {0.0034f, 0.0039f, 0.0042f, 0.9884f}; +static const float acceptor_cons1[4] = {0.9903f, 0.0032f, 0.0034f, 0.0030f}; +static const float acceptor_cons2[4] = {0.0027f, 0.0037f, 0.9905f, 0.0030f}; + +static const float donor_me2x5[16384] = { +0.143596f, 0.204157f, 0.192979f, 1.061763f, 0.231687f, 0.483900f, 0.466345f, 1.279266f, 7.648993f, 10.067296f, 6.140895f, 53.603145f, 0.119893f, 0.098323f, 0.141841f, 0.789030f, +0.017593f, 0.048968f, 0.034133f, 0.188161f, 0.040230f, 0.164498f, 0.116904f, 0.321303f, 1.594035f, 4.107312f, 1.847533f, 16.157861f, 0.022057f, 0.035412f, 0.037672f, 0.209963f, +0.007744f, 0.021649f, 0.006671f, 0.075685f, 0.008285f, 0.034023f, 0.010689f, 0.060461f, 0.786409f, 2.035123f, 0.404707f, 7.283977f, 0.003849f, 0.006207f, 0.002919f, 0.033480f, +0.019657f, 0.048412f, 0.030548f, 0.172960f, 0.023873f, 0.086370f, 0.055565f, 0.156854f, 0.930388f, 2.121214f, 0.863749f, 7.758677f, 0.014753f, 0.020958f, 0.020183f, 0.115538f, +0.000036f, 0.000079f, 0.000033f, 0.000718f, 0.000041f, 0.000132f, 0.000057f, 0.000609f, 0.013171f, 0.027007f, 0.007361f, 0.251065f, 0.000045f, 0.000058f, 0.000037f, 0.000809f, +0.000001f, 0.000004f, 0.000001f, 0.000026f, 0.000001f, 0.000009f, 0.000003f, 0.000032f, 0.000570f, 0.002290f, 0.000460f, 0.015726f, 0.000002f, 0.000004f, 0.000002f, 0.000045f, +0.000001f, 0.000005f, 0.000001f, 0.000029f, 0.000001f, 0.000005f, 0.000001f, 0.000016f, 0.000759f, 0.003059f, 0.000272f, 0.019118f, 0.000001f, 0.000002f, 0.000000f, 0.000019f, +0.000002f, 0.000009f, 0.000003f, 0.000057f, 0.000002f, 0.000011f, 0.000003f, 0.000036f, 0.000778f, 0.002763f, 0.000503f, 0.017645f, 0.000003f, 0.000006f, 0.000003f, 0.000058f, +0.005656f, 0.019340f, 0.010995f, 0.107534f, 0.010056f, 0.050509f, 0.029277f, 0.142761f, 1.189892f, 3.766273f, 1.381765f, 21.439941f, 0.003482f, 0.006868f, 0.005959f, 0.058922f, +0.000416f, 0.002786f, 0.001168f, 0.011444f, 0.001049f, 0.010311f, 0.004407f, 0.021532f, 0.148908f, 0.922725f, 0.249638f, 3.880905f, 0.000385f, 0.001485f, 0.000950f, 0.009416f, +0.000106f, 0.000710f, 0.000132f, 0.002652f, 0.000124f, 0.001229f, 0.000232f, 0.002335f, 0.042330f, 0.263445f, 0.031510f, 1.008098f, 0.000039f, 0.000150f, 0.000042f, 0.000865f, +0.000125f, 0.000743f, 0.000282f, 0.002838f, 0.000168f, 0.001461f, 0.000565f, 0.002836f, 0.023448f, 0.128565f, 0.031487f, 0.502760f, 0.000069f, 0.000237f, 0.000137f, 0.001398f, +0.000345f, 0.000440f, 0.000464f, 0.010163f, 0.000033f, 0.000062f, 0.000067f, 0.000728f, 0.043093f, 0.050775f, 0.034547f, 1.201494f, 0.000144f, 0.000106f, 0.000171f, 0.003780f, +0.000047f, 0.000117f, 0.000091f, 0.002005f, 0.000006f, 0.000023f, 0.000019f, 0.000204f, 0.009996f, 0.023059f, 0.011570f, 0.403146f, 0.000030f, 0.000042f, 0.000050f, 0.001120f, +0.000029f, 0.000072f, 0.000025f, 0.001123f, 0.000002f, 0.000007f, 0.000002f, 0.000053f, 0.006867f, 0.015910f, 0.003529f, 0.253063f, 0.000007f, 0.000010f, 0.000005f, 0.000249f, +0.000066f, 0.000145f, 0.000102f, 0.002301f, 0.000005f, 0.000015f, 0.000011f, 0.000124f, 0.007286f, 0.014872f, 0.006755f, 0.241743f, 0.000025f, 0.000031f, 0.000034f, 0.000769f, +0.009874f, 0.008446f, 0.012943f, 0.086116f, 1.028860f, 1.292943f, 2.020043f, 6.700973f, 13.092332f, 10.367984f, 10.252782f, 108.224270f, 0.271467f, 0.133951f, 0.313275f, 2.107366f, +0.001717f, 0.002876f, 0.003250f, 0.021663f, 0.253596f, 0.623901f, 0.718808f, 2.389038f, 3.872948f, 6.004407f, 4.378581f, 46.307291f, 0.070892f, 0.068483f, 0.118107f, 0.796016f, +0.000362f, 0.000609f, 0.000304f, 0.004171f, 0.024999f, 0.061770f, 0.031462f, 0.215197f, 0.914634f, 1.424157f, 0.459131f, 9.992836f, 0.005922f, 0.005746f, 0.004381f, 0.060761f, +0.001131f, 0.001676f, 0.001715f, 0.011741f, 0.088727f, 0.193148f, 0.201444f, 0.687660f, 1.332838f, 1.828380f, 1.206975f, 13.110603f, 0.027958f, 0.023897f, 0.037309f, 0.258268f, +0.000014f, 0.000019f, 0.000013f, 0.000342f, 0.001058f, 0.002070f, 0.001445f, 0.018734f, 0.132319f, 0.163239f, 0.072129f, 2.975054f, 0.000601f, 0.000462f, 0.000482f, 0.012681f, +0.000001f, 0.000001f, 0.000001f, 0.000018f, 0.000054f, 0.000208f, 0.000107f, 0.001388f, 0.008134f, 0.019645f, 0.006401f, 0.264521f, 0.000033f, 0.000049f, 0.000038f, 0.000995f, +0.000000f, 0.000001f, 0.000000f, 0.000009f, 0.000014f, 0.000055f, 0.000013f, 0.000337f, 0.005180f, 0.012565f, 0.001810f, 0.153938f, 0.000007f, 0.000011f, 0.000004f, 0.000205f, +0.000001f, 0.000002f, 0.000001f, 0.000023f, 0.000044f, 0.000150f, 0.000070f, 0.000933f, 0.006540f, 0.013977f, 0.004123f, 0.174993f, 0.000030f, 0.000040f, 0.000028f, 0.000755f, +0.000424f, 0.000872f, 0.000804f, 0.009508f, 0.048683f, 0.147128f, 0.138255f, 0.815245f, 2.220356f, 4.228586f, 2.515052f, 47.191117f, 0.008596f, 0.010200f, 0.014348f, 0.171564f, +0.000044f, 0.000178f, 0.000121f, 0.001436f, 0.007206f, 0.042633f, 0.029543f, 0.174538f, 0.394424f, 1.470576f, 0.644992f, 12.125531f, 0.001348f, 0.003131f, 0.003248f, 0.038916f, +0.000005f, 0.000022f, 0.000007f, 0.000159f, 0.000409f, 0.002432f, 0.000745f, 0.009059f, 0.053673f, 0.200983f, 0.038971f, 1.507736f, 0.000065f, 0.000151f, 0.000069f, 0.001712f, +0.000008f, 0.000028f, 0.000017f, 0.000210f, 0.000680f, 0.003561f, 0.002234f, 0.013554f, 0.036620f, 0.120811f, 0.047967f, 0.926186f, 0.000143f, 0.000295f, 0.000277f, 0.003406f, +0.000002f, 0.000001f, 0.000002f, 0.000057f, 0.000010f, 0.000011f, 0.000020f, 0.000264f, 0.005100f, 0.003615f, 0.003988f, 0.167719f, 0.000023f, 0.000010f, 0.000026f, 0.000698f, +0.000000f, 0.000000f, 0.000001f, 0.000016f, 0.000003f, 0.000006f, 0.000008f, 0.000105f, 0.001679f, 0.002331f, 0.001896f, 0.079883f, 0.000007f, 0.000006f, 0.000011f, 0.000293f, +0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000000f, 0.000001f, 0.000000f, 0.000013f, 0.000552f, 0.000770f, 0.000277f, 0.024003f, 0.000001f, 0.000001f, 0.000001f, 0.000031f, +0.000000f, 0.000000f, 0.000000f, 0.000011f, 0.000001f, 0.000002f, 0.000003f, 0.000038f, 0.000722f, 0.000886f, 0.000653f, 0.028243f, 0.000003f, 0.000002f, 0.000004f, 0.000119f, +21.087740f, 17.884734f, 33.983089f, 30.001875f, 30.794886f, 38.367558f, 74.327616f, 32.716865f, 96.319545f, 75.623049f, 92.727039f, 129.877440f, 22.803677f, 11.155706f, 32.350456f, 28.876139f, +7.196053f, 11.948074f, 16.741476f, 14.808539f, 14.893460f, 36.327235f, 51.895990f, 22.886975f, 55.907610f, 85.933372f, 77.701565f, 109.041030f, 11.684721f, 11.190815f, 23.930992f, 21.401897f, +5.105855f, 8.514407f, 5.274309f, 9.601089f, 4.943664f, 12.110663f, 7.648645f, 6.941859f, 44.457961f, 68.631388f, 27.435065f, 79.232364f, 3.286688f, 3.161432f, 2.988811f, 5.500807f, +8.617404f, 12.660210f, 16.058510f, 14.589219f, 9.471986f, 20.442712f, 26.436794f, 11.974895f, 34.973513f, 47.565386f, 38.933860f, 56.117203f, 8.376484f, 7.098487f, 13.741478f, 12.622157f, +0.224340f, 0.296405f, 0.251652f, 0.868142f, 0.230635f, 0.447648f, 0.387487f, 0.666474f, 7.092942f, 8.675464f, 4.753135f, 26.014280f, 0.367579f, 0.280137f, 0.362985f, 1.266052f, +0.015908f, 0.041147f, 0.025762f, 0.089042f, 0.023178f, 0.088074f, 0.056219f, 0.096882f, 0.855508f, 2.048525f, 0.827646f, 4.538465f, 0.039139f, 0.058395f, 0.055797f, 0.194987f, +0.030439f, 0.079076f, 0.021887f, 0.155686f, 0.020748f, 0.079182f, 0.022345f, 0.079246f, 1.834633f, 4.412133f, 0.788074f, 8.893401f, 0.029689f, 0.044488f, 0.018793f, 0.135153f, +0.044512f, 0.101876f, 0.057739f, 0.204976f, 0.034444f, 0.115808f, 0.066918f, 0.118444f, 1.250488f, 2.649462f, 0.969013f, 5.457610f, 0.065560f, 0.086550f, 0.074863f, 0.268704f, +1.443648f, 2.944485f, 3.365064f, 5.280921f, 2.322943f, 6.960168f, 8.109783f, 6.345439f, 26.041123f, 49.169426f, 36.261952f, 90.283603f, 1.151068f, 1.354219f, 2.361979f, 3.747706f, +0.295829f, 1.181246f, 0.995497f, 1.565269f, 0.674639f, 3.957341f, 3.400234f, 2.665594f, 9.076784f, 33.552007f, 18.246950f, 45.517779f, 0.354185f, 0.815773f, 1.049233f, 1.667994f, +0.120948f, 0.485045f, 0.180716f, 0.584766f, 0.129036f, 0.760194f, 0.288765f, 0.465872f, 4.159065f, 15.440613f, 3.712374f, 19.058065f, 0.057406f, 0.132793f, 0.075508f, 0.247032f, +0.095576f, 0.337681f, 0.257618f, 0.416038f, 0.115755f, 0.600805f, 0.467313f, 0.376271f, 1.531878f, 5.010390f, 2.466674f, 6.319914f, 0.068501f, 0.139604f, 0.162543f, 0.265399f, +0.412315f, 0.313054f, 0.663504f, 2.333879f, 0.035808f, 0.039940f, 0.086304f, 0.151357f, 4.410267f, 3.099863f, 4.239736f, 23.660012f, 0.223138f, 0.097724f, 0.316104f, 1.124185f, +0.156618f, 0.232800f, 0.363850f, 1.282300f, 0.019277f, 0.042094f, 0.067075f, 0.117860f, 2.849501f, 3.921005f, 3.954662f, 22.111508f, 0.127272f, 0.109123f, 0.260290f, 0.927466f, +0.154738f, 0.231004f, 0.159616f, 1.157655f, 0.008910f, 0.019540f, 0.013766f, 0.049778f, 3.155214f, 4.360536f, 1.944316f, 22.372388f, 0.049849f, 0.042926f, 0.045266f, 0.331935f, +0.234212f, 0.308044f, 0.435833f, 1.577597f, 0.015310f, 0.029581f, 0.042670f, 0.077008f, 2.225993f, 2.710274f, 2.474535f, 14.210571f, 0.113937f, 0.086438f, 0.186645f, 0.683072f, +0.036979f, 0.059113f, 0.094671f, 0.327429f, 0.578785f, 1.359172f, 2.219282f, 3.826934f, 2.983161f, 4.414563f, 4.562388f, 25.034352f, 0.022719f, 0.020948f, 0.051202f, 0.179045f, +0.003656f, 0.011441f, 0.013511f, 0.046821f, 0.081095f, 0.372820f, 0.448903f, 0.775576f, 0.501638f, 1.453291f, 1.107573f, 6.089051f, 0.003373f, 0.006088f, 0.010973f, 0.038444f, +0.001295f, 0.004071f, 0.002126f, 0.015159f, 0.013442f, 0.062067f, 0.033039f, 0.117474f, 0.199204f, 0.579618f, 0.195289f, 2.209485f, 0.000474f, 0.000859f, 0.000684f, 0.004934f, +0.005193f, 0.014380f, 0.015374f, 0.054717f, 0.061178f, 0.248867f, 0.271262f, 0.481359f, 0.372237f, 0.954207f, 0.658311f, 3.717211f, 0.002868f, 0.004581f, 0.007474f, 0.026895f, +0.000019f, 0.000047f, 0.000034f, 0.000456f, 0.000209f, 0.000764f, 0.000557f, 0.003754f, 0.010577f, 0.024384f, 0.011260f, 0.241434f, 0.000018f, 0.000025f, 0.000028f, 0.000378f, +0.000000f, 0.000002f, 0.000001f, 0.000014f, 0.000006f, 0.000044f, 0.000023f, 0.000158f, 0.000370f, 0.001668f, 0.000568f, 0.012203f, 0.000001f, 0.000002f, 0.000001f, 0.000017f, +0.000000f, 0.000002f, 0.000000f, 0.000012f, 0.000003f, 0.000020f, 0.000005f, 0.000065f, 0.000396f, 0.001794f, 0.000270f, 0.011941f, 0.000000f, 0.000001f, 0.000000f, 0.000006f, +0.000001f, 0.000006f, 0.000003f, 0.000037f, 0.000011f, 0.000068f, 0.000033f, 0.000229f, 0.000641f, 0.002559f, 0.000789f, 0.017406f, 0.000001f, 0.000003f, 0.000002f, 0.000028f, +0.002055f, 0.007900f, 0.007609f, 0.046783f, 0.035439f, 0.200142f, 0.196553f, 0.602490f, 0.654683f, 2.329900f, 1.448256f, 14.126038f, 0.000931f, 0.002064f, 0.003035f, 0.018862f, +0.000122f, 0.000918f, 0.000652f, 0.004017f, 0.002982f, 0.032967f, 0.023875f, 0.073323f, 0.066109f, 0.460594f, 0.211126f, 2.063238f, 0.000083f, 0.000360f, 0.000391f, 0.002432f, +0.000025f, 0.000188f, 0.000059f, 0.000749f, 0.000285f, 0.003162f, 0.001013f, 0.006399f, 0.015127f, 0.105850f, 0.021450f, 0.431396f, 0.000007f, 0.000029f, 0.000014f, 0.000180f, +0.000047f, 0.000311f, 0.000200f, 0.001267f, 0.000607f, 0.005937f, 0.003892f, 0.012277f, 0.013235f, 0.081589f, 0.033855f, 0.339814f, 0.000019f, 0.000073f, 0.000072f, 0.000459f, +0.000003f, 0.000004f, 0.000007f, 0.000100f, 0.000003f, 0.000006f, 0.000010f, 0.000069f, 0.000536f, 0.000710f, 0.000818f, 0.017891f, 0.000001f, 0.000001f, 0.000002f, 0.000027f, +0.000000f, 0.000001f, 0.000001f, 0.000016f, 0.000000f, 0.000002f, 0.000002f, 0.000016f, 0.000100f, 0.000260f, 0.000221f, 0.004844f, 0.000000f, 0.000000f, 0.000000f, 0.000007f, +0.000000f, 0.000000f, 0.000000f, 0.000007f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000055f, 0.000144f, 0.000054f, 0.002447f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000001f, 0.000001f, 0.000002f, 0.000023f, 0.000000f, 0.000001f, 0.000002f, 0.000012f, 0.000093f, 0.000213f, 0.000164f, 0.003693f, 0.000000f, 0.000000f, 0.000000f, 0.000006f, +0.019499f, 0.020735f, 0.022501f, 0.237798f, 0.024483f, 0.038246f, 0.042315f, 0.222967f, 3.952008f, 3.890416f, 2.724379f, 45.679610f, 0.012357f, 0.007580f, 0.012553f, 0.134133f, +0.002053f, 0.004275f, 0.003421f, 0.036220f, 0.003654f, 0.011175f, 0.009117f, 0.048132f, 0.707871f, 1.364217f, 0.704483f, 11.834727f, 0.001954f, 0.002346f, 0.002866f, 0.030678f, +0.000548f, 0.001146f, 0.000406f, 0.008838f, 0.000456f, 0.001402f, 0.000506f, 0.005494f, 0.211845f, 0.410043f, 0.093612f, 3.236357f, 0.000207f, 0.000249f, 0.000135f, 0.002967f, +0.001696f, 0.003124f, 0.002263f, 0.024613f, 0.001603f, 0.004337f, 0.003203f, 0.017370f, 0.305428f, 0.520834f, 0.243475f, 4.200982f, 0.000966f, 0.001027f, 0.001135f, 0.012480f, +0.000002f, 0.000003f, 0.000001f, 0.000058f, 0.000002f, 0.000004f, 0.000002f, 0.000038f, 0.002463f, 0.003777f, 0.001182f, 0.077429f, 0.000002f, 0.000002f, 0.000001f, 0.000050f, +0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000092f, 0.000275f, 0.000064f, 0.004168f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000074f, 0.000223f, 0.000023f, 0.003074f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000092f, 0.000246f, 0.000051f, 0.003457f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +0.000865f, 0.002212f, 0.001444f, 0.027121f, 0.001197f, 0.004496f, 0.002992f, 0.028021f, 0.692321f, 1.639010f, 0.690330f, 20.575120f, 0.000404f, 0.000596f, 0.000594f, 0.011280f, +0.000055f, 0.000274f, 0.000132f, 0.002481f, 0.000107f, 0.000789f, 0.000387f, 0.003632f, 0.074466f, 0.345132f, 0.107195f, 3.201062f, 0.000038f, 0.000111f, 0.000081f, 0.001549f, +0.000008f, 0.000042f, 0.000009f, 0.000349f, 0.000008f, 0.000057f, 0.000012f, 0.000239f, 0.012841f, 0.059775f, 0.008208f, 0.504403f, 0.000002f, 0.000007f, 0.000002f, 0.000086f, +0.000012f, 0.000054f, 0.000024f, 0.000455f, 0.000013f, 0.000083f, 0.000037f, 0.000354f, 0.008668f, 0.035549f, 0.009995f, 0.306557f, 0.000005f, 0.000013f, 0.000009f, 0.000170f, +0.000012f, 0.000011f, 0.000014f, 0.000577f, 0.000001f, 0.000001f, 0.000002f, 0.000032f, 0.005644f, 0.004974f, 0.003885f, 0.259533f, 0.000004f, 0.000002f, 0.000004f, 0.000163f, +0.000001f, 0.000003f, 0.000002f, 0.000098f, 0.000000f, 0.000000f, 0.000000f, 0.000008f, 0.001125f, 0.001941f, 0.001118f, 0.074847f, 0.000001f, 0.000001f, 0.000001f, 0.000041f, +0.000001f, 0.000001f, 0.000000f, 0.000033f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000469f, 0.000813f, 0.000207f, 0.028501f, 0.000000f, 0.000000f, 0.000000f, 0.000006f, +0.000001f, 0.000002f, 0.000002f, 0.000083f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000606f, 0.000926f, 0.000483f, 0.033178f, 0.000000f, 0.000000f, 0.000000f, 0.000021f, +0.001269f, 0.000812f, 0.001429f, 0.018259f, 0.102928f, 0.096744f, 0.173524f, 1.105688f, 6.403900f, 3.793073f, 4.306173f, 87.311341f, 0.026488f, 0.009776f, 0.026247f, 0.339154f, +0.000190f, 0.000238f, 0.000308f, 0.003948f, 0.021805f, 0.040124f, 0.053071f, 0.338813f, 1.628216f, 1.888035f, 1.580613f, 32.109815f, 0.005945f, 0.004296f, 0.008505f, 0.110109f, +0.000024f, 0.000031f, 0.000017f, 0.000461f, 0.001304f, 0.002410f, 0.001409f, 0.018513f, 0.233255f, 0.271651f, 0.100541f, 4.203303f, 0.000301f, 0.000219f, 0.000191f, 0.005098f, +0.000092f, 0.000102f, 0.000120f, 0.001582f, 0.005640f, 0.009183f, 0.010995f, 0.072094f, 0.414225f, 0.425006f, 0.322092f, 6.720473f, 0.001733f, 0.001108f, 0.001986f, 0.026409f, +0.000001f, 0.000001f, 0.000001f, 0.000026f, 0.000038f, 0.000056f, 0.000045f, 0.001119f, 0.023423f, 0.021613f, 0.010963f, 0.868612f, 0.000021f, 0.000012f, 0.000015f, 0.000739f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000005f, 0.000003f, 0.000071f, 0.001237f, 0.002235f, 0.000836f, 0.066380f, 0.000001f, 0.000001f, 0.000001f, 0.000050f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000010f, 0.000478f, 0.000867f, 0.000143f, 0.023433f, 0.000000f, 0.000000f, 0.000000f, 0.000006f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000003f, 0.000001f, 0.000035f, 0.000736f, 0.001176f, 0.000398f, 0.032463f, 0.000001f, 0.000001f, 0.000001f, 0.000028f, +0.000061f, 0.000094f, 0.000100f, 0.002270f, 0.005485f, 0.012397f, 0.013374f, 0.151485f, 1.223030f, 1.742125f, 1.189552f, 42.873926f, 0.000944f, 0.000838f, 0.001354f, 0.031094f, +0.000006f, 0.000017f, 0.000013f, 0.000295f, 0.000698f, 0.003088f, 0.002456f, 0.027875f, 0.186733f, 0.520731f, 0.262200f, 9.468390f, 0.000127f, 0.000221f, 0.000263f, 0.006062f, +0.000000f, 0.000001f, 0.000000f, 0.000020f, 0.000024f, 0.000107f, 0.000038f, 0.000878f, 0.015414f, 0.043172f, 0.009610f, 0.714191f, 0.000004f, 0.000006f, 0.000003f, 0.000162f, +0.000001f, 0.000002f, 0.000001f, 0.000032f, 0.000049f, 0.000191f, 0.000137f, 0.001600f, 0.012816f, 0.031624f, 0.014415f, 0.534641f, 0.000010f, 0.000015f, 0.000017f, 0.000392f, +0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000000f, 0.000000f, 0.000000f, 0.000011f, 0.000632f, 0.000335f, 0.000425f, 0.034298f, 0.000001f, 0.000000f, 0.000001f, 0.000028f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000179f, 0.000186f, 0.000173f, 0.014040f, 0.000000f, 0.000000f, 0.000000f, 0.000010f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000036f, 0.000037f, 0.000015f, 0.002559f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000057f, 0.000052f, 0.000044f, 0.003670f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, +5.883660f, 3.732242f, 8.141462f, 13.806528f, 6.686468f, 6.230917f, 13.857669f, 11.716778f, 102.254810f, 60.047269f, 84.527496f, 227.416240f, 4.829313f, 1.767043f, 5.882782f, 10.086437f, +1.725656f, 2.143027f, 3.447274f, 5.857208f, 2.779433f, 5.070635f, 8.316031f, 7.044777f, 51.013235f, 58.646675f, 60.878462f, 164.104410f, 2.126872f, 1.523541f, 3.740293f, 6.425304f, +0.742749f, 0.926398f, 0.658811f, 2.303627f, 0.559658f, 1.025442f, 0.743499f, 1.296189f, 24.607926f, 28.413038f, 13.039277f, 72.334638f, 0.362907f, 0.261090f, 0.283372f, 1.001800f, +1.527654f, 1.678646f, 2.444420f, 4.265789f, 1.306744f, 2.109392f, 3.131698f, 2.724829f, 23.590641f, 23.997232f, 22.550174f, 62.433122f, 1.127128f, 0.714409f, 1.587694f, 2.801326f, +0.022652f, 0.022385f, 0.021819f, 0.144581f, 0.018123f, 0.026309f, 0.026145f, 0.086378f, 2.725090f, 2.492968f, 1.568037f, 16.484824f, 0.028172f, 0.016058f, 0.023888f, 0.160042f, +0.001381f, 0.002671f, 0.001920f, 0.012746f, 0.001565f, 0.004449f, 0.003260f, 0.010792f, 0.282501f, 0.505950f, 0.234673f, 2.471862f, 0.002578f, 0.002877f, 0.003156f, 0.021185f, +0.001602f, 0.003114f, 0.000989f, 0.013518f, 0.000850f, 0.002426f, 0.000786f, 0.005355f, 0.367501f, 0.661040f, 0.135550f, 2.938301f, 0.001186f, 0.001330f, 0.000645f, 0.008908f, +0.002856f, 0.004888f, 0.003181f, 0.021690f, 0.001720f, 0.004325f, 0.002869f, 0.009754f, 0.305256f, 0.483740f, 0.203113f, 2.197385f, 0.003193f, 0.003152f, 0.003130f, 0.021582f, +0.453592f, 0.691964f, 0.907862f, 2.736732f, 0.567994f, 1.272900f, 1.702692f, 2.559086f, 31.132643f, 43.966405f, 37.224543f, 178.026050f, 0.274516f, 0.241560f, 0.483688f, 1.474182f, +0.079889f, 0.238592f, 0.230839f, 0.697195f, 0.141781f, 0.622043f, 0.613590f, 0.923973f, 9.326760f, 25.786173f, 16.099448f, 77.143294f, 0.072601f, 0.125069f, 0.184673f, 0.563926f, +0.019813f, 0.059431f, 0.025420f, 0.158001f, 0.016450f, 0.072486f, 0.031610f, 0.097959f, 2.592435f, 7.198571f, 1.986945f, 19.593366f, 0.007138f, 0.012350f, 0.008062f, 0.050663f, +0.019080f, 0.050421f, 0.044160f, 0.136989f, 0.017984f, 0.069813f, 0.062340f, 0.096417f, 1.163622f, 2.846612f, 1.608872f, 7.918023f, 0.010380f, 0.015822f, 0.021149f, 0.066331f, +0.029160f, 0.016559f, 0.040292f, 0.272241f, 0.001971f, 0.001644f, 0.004079f, 0.013740f, 1.186788f, 0.623908f, 0.979646f, 10.501258f, 0.011978f, 0.003924f, 0.014570f, 0.099535f, +0.009520f, 0.010584f, 0.018991f, 0.128560f, 0.000912f, 0.001489f, 0.002724f, 0.009196f, 0.659052f, 0.678293f, 0.785383f, 8.435041f, 0.005872f, 0.003766f, 0.010312f, 0.070579f, +0.005706f, 0.006371f, 0.005054f, 0.070406f, 0.000256f, 0.000419f, 0.000339f, 0.002356f, 0.442683f, 0.457587f, 0.234236f, 5.177198f, 0.001395f, 0.000899f, 0.001088f, 0.015323f, +0.010524f, 0.010353f, 0.016816f, 0.116924f, 0.000535f, 0.000774f, 0.001281f, 0.004442f, 0.380595f, 0.346595f, 0.363291f, 4.007457f, 0.003886f, 0.002205f, 0.005466f, 0.038427f, +0.005764f, 0.006891f, 0.012670f, 0.084173f, 0.070203f, 0.123305f, 0.231138f, 0.765608f, 1.769152f, 1.958149f, 2.323285f, 24.487426f, 0.002688f, 0.001854f, 0.005201f, 0.034937f, +0.000490f, 0.001146f, 0.001554f, 0.010345f, 0.008454f, 0.029070f, 0.040184f, 0.133359f, 0.255694f, 0.554055f, 0.484758f, 5.119162f, 0.000343f, 0.000463f, 0.000958f, 0.006448f, +0.000105f, 0.000247f, 0.000148f, 0.002032f, 0.000850f, 0.002936f, 0.001794f, 0.012253f, 0.061595f, 0.134047f, 0.051849f, 1.126818f, 0.000029f, 0.000040f, 0.000036f, 0.000502f, +0.000514f, 0.001065f, 0.001307f, 0.008937f, 0.004715f, 0.014345f, 0.017951f, 0.061187f, 0.140262f, 0.268925f, 0.212997f, 2.310229f, 0.000216f, 0.000258f, 0.000482f, 0.003334f, +0.000001f, 0.000002f, 0.000002f, 0.000042f, 0.000009f, 0.000025f, 0.000021f, 0.000272f, 0.002270f, 0.003914f, 0.002075f, 0.085465f, 0.000001f, 0.000001f, 0.000001f, 0.000027f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000001f, 0.000001f, 0.000010f, 0.000068f, 0.000230f, 0.000090f, 0.003713f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000044f, 0.000150f, 0.000026f, 0.002204f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000000f, 0.000001f, 0.000001f, 0.000011f, 0.000087f, 0.000261f, 0.000092f, 0.003915f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000361f, 0.001037f, 0.001147f, 0.013543f, 0.004841f, 0.020447f, 0.023053f, 0.135735f, 0.437226f, 1.163810f, 0.830506f, 15.560163f, 0.000124f, 0.000206f, 0.000347f, 0.004145f, +0.000018f, 0.000104f, 0.000084f, 0.001000f, 0.000350f, 0.002895f, 0.002407f, 0.014198f, 0.037947f, 0.197745f, 0.104059f, 1.953374f, 0.000010f, 0.000031f, 0.000038f, 0.000459f, +0.000002f, 0.000013f, 0.000005f, 0.000113f, 0.000020f, 0.000168f, 0.000062f, 0.000752f, 0.005267f, 0.027567f, 0.006413f, 0.247757f, 0.000000f, 0.000002f, 0.000001f, 0.000021f, +0.000005f, 0.000026f, 0.000019f, 0.000233f, 0.000053f, 0.000385f, 0.000290f, 0.001757f, 0.005616f, 0.025895f, 0.012335f, 0.237829f, 0.000002f, 0.000005f, 0.000005f, 0.000064f, +0.000000f, 0.000000f, 0.000000f, 0.000007f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000081f, 0.000080f, 0.000106f, 0.004436f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000013f, 0.000025f, 0.000025f, 0.001032f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000008f, 0.000004f, 0.000316f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000009f, 0.000015f, 0.000013f, 0.000582f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.011136f, 0.017969f, 0.015051f, 0.144626f, 0.016183f, 0.038363f, 0.032760f, 0.156952f, 3.374181f, 5.040423f, 2.724399f, 41.533249f, 0.016767f, 0.015607f, 0.019950f, 0.193821f, +0.000662f, 0.002090f, 0.001291f, 0.012429f, 0.001363f, 0.006324f, 0.003982f, 0.019116f, 0.340995f, 0.997237f, 0.397482f, 6.071219f, 0.001496f, 0.002726f, 0.002569f, 0.025011f, +0.000193f, 0.000612f, 0.000167f, 0.003311f, 0.000186f, 0.000866f, 0.000241f, 0.002382f, 0.111409f, 0.327229f, 0.057662f, 1.812512f, 0.000173f, 0.000316f, 0.000132f, 0.002641f, +0.000815f, 0.002278f, 0.001274f, 0.012596f, 0.000892f, 0.003661f, 0.002087f, 0.010289f, 0.219431f, 0.567818f, 0.204879f, 3.214132f, 0.001103f, 0.001779f, 0.001518f, 0.015174f, +0.000001f, 0.000001f, 0.000001f, 0.000020f, 0.000001f, 0.000002f, 0.000001f, 0.000015f, 0.001197f, 0.002786f, 0.000673f, 0.040083f, 0.000001f, 0.000002f, 0.000001f, 0.000041f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000025f, 0.000115f, 0.000020f, 0.001218f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000022f, 0.000101f, 0.000008f, 0.000980f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000038f, 0.000152f, 0.000025f, 0.001506f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +0.000275f, 0.001067f, 0.000538f, 0.009185f, 0.000440f, 0.002511f, 0.001290f, 0.010984f, 0.329159f, 1.182497f, 0.384421f, 10.417490f, 0.000305f, 0.000684f, 0.000526f, 0.009077f, +0.000010f, 0.000075f, 0.000028f, 0.000474f, 0.000022f, 0.000249f, 0.000094f, 0.000803f, 0.019976f, 0.140491f, 0.033680f, 0.914448f, 0.000016f, 0.000072f, 0.000041f, 0.000703f, +0.000002f, 0.000013f, 0.000002f, 0.000073f, 0.000002f, 0.000020f, 0.000003f, 0.000058f, 0.003761f, 0.026564f, 0.002815f, 0.157307f, 0.000001f, 0.000005f, 0.000001f, 0.000043f, +0.000003f, 0.000022f, 0.000007f, 0.000130f, 0.000004f, 0.000039f, 0.000013f, 0.000117f, 0.003468f, 0.021581f, 0.004684f, 0.130608f, 0.000003f, 0.000013f, 0.000006f, 0.000115f, +0.000003f, 0.000005f, 0.000005f, 0.000176f, 0.000000f, 0.000001f, 0.000001f, 0.000011f, 0.002415f, 0.003230f, 0.001947f, 0.118269f, 0.000003f, 0.000002f, 0.000003f, 0.000118f, +0.000000f, 0.000001f, 0.000000f, 0.000017f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000272f, 0.000711f, 0.000316f, 0.019244f, 0.000000f, 0.000000f, 0.000000f, 0.000017f, +0.000000f, 0.000000f, 0.000000f, 0.000006f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000124f, 0.000325f, 0.000064f, 0.008000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +0.000000f, 0.000001f, 0.000001f, 0.000021f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000218f, 0.000506f, 0.000204f, 0.012723f, 0.000000f, 0.000000f, 0.000000f, 0.000013f, +0.000624f, 0.000606f, 0.000823f, 0.009558f, 0.058561f, 0.083525f, 0.115634f, 0.669929f, 4.706149f, 4.229924f, 3.706510f, 68.330523f, 0.030936f, 0.017326f, 0.035905f, 0.421824f, +0.000053f, 0.000100f, 0.000100f, 0.001166f, 0.007000f, 0.019545f, 0.019954f, 0.115824f, 0.675114f, 1.187942f, 0.767615f, 14.178356f, 0.003918f, 0.004295f, 0.006564f, 0.077268f, +0.000007f, 0.000014f, 0.000006f, 0.000149f, 0.000457f, 0.001282f, 0.000578f, 0.006909f, 0.105585f, 0.186596f, 0.053305f, 2.026216f, 0.000217f, 0.000239f, 0.000161f, 0.003906f, +0.000038f, 0.000064f, 0.000058f, 0.000697f, 0.002700f, 0.006671f, 0.006165f, 0.036757f, 0.256151f, 0.398819f, 0.233288f, 4.425712f, 0.001703f, 0.001653f, 0.002286f, 0.027640f, +0.000000f, 0.000000f, 0.000000f, 0.000008f, 0.000012f, 0.000028f, 0.000017f, 0.000386f, 0.009800f, 0.013723f, 0.005373f, 0.387043f, 0.000014f, 0.000012f, 0.000011f, 0.000523f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000014f, 0.000292f, 0.000801f, 0.000231f, 0.016688f, 0.000000f, 0.000001f, 0.000000f, 0.000020f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000123f, 0.000339f, 0.000043f, 0.006432f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000010f, 0.000259f, 0.000628f, 0.000164f, 0.012172f, 0.000000f, 0.000001f, 0.000000f, 0.000017f, +0.000017f, 0.000039f, 0.000032f, 0.000662f, 0.001738f, 0.005960f, 0.004963f, 0.051111f, 0.500501f, 1.081852f, 0.570170f, 18.684623f, 0.000614f, 0.000827f, 0.001031f, 0.021535f, +0.000001f, 0.000004f, 0.000002f, 0.000048f, 0.000125f, 0.000838f, 0.000514f, 0.005306f, 0.043115f, 0.182451f, 0.070909f, 2.328152f, 0.000047f, 0.000123f, 0.000113f, 0.002369f, +0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000005f, 0.000032f, 0.000009f, 0.000182f, 0.003885f, 0.016514f, 0.002837f, 0.191715f, 0.000001f, 0.000004f, 0.000002f, 0.000069f, +0.000000f, 0.000001f, 0.000000f, 0.000008f, 0.000013f, 0.000077f, 0.000043f, 0.000454f, 0.004413f, 0.016525f, 0.005814f, 0.196062f, 0.000005f, 0.000013f, 0.000011f, 0.000229f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000233f, 0.000187f, 0.000183f, 0.013453f, 0.000000f, 0.000000f, 0.000000f, 0.000018f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000037f, 0.000059f, 0.000042f, 0.003107f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000008f, 0.000013f, 0.000004f, 0.000618f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000018f, 0.000025f, 0.000016f, 0.001211f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +1.443791f, 1.389783f, 2.339979f, 3.607981f, 1.899073f, 2.685451f, 4.609860f, 3.543856f, 37.512581f, 33.427714f, 36.319829f, 88.845948f, 2.815587f, 1.563331f, 4.017161f, 6.262456f, +0.238922f, 0.450245f, 0.559023f, 0.863604f, 0.445395f, 1.233025f, 1.560839f, 1.202207f, 10.558935f, 18.420481f, 14.758886f, 36.172664f, 0.699631f, 0.760506f, 1.441074f, 2.250840f, +0.112266f, 0.212484f, 0.116633f, 0.370803f, 0.097908f, 0.272225f, 0.152345f, 0.241483f, 5.560564f, 9.742758f, 3.451040f, 17.406571f, 0.130325f, 0.142280f, 0.119191f, 0.383123f, +0.315443f, 0.525988f, 0.591187f, 0.938034f, 0.312302f, 0.765001f, 0.876631f, 0.693500f, 7.282361f, 11.241228f, 8.153332f, 20.524410f, 0.552963f, 0.531851f, 0.912311f, 1.463557f, +0.003165f, 0.004746f, 0.003570f, 0.021512f, 0.002931f, 0.006456f, 0.004952f, 0.014875f, 0.569197f, 0.790167f, 0.383611f, 3.666820f, 0.009352f, 0.008089f, 0.009288f, 0.056576f, +0.000109f, 0.000319f, 0.000177f, 0.001070f, 0.000143f, 0.000616f, 0.000348f, 0.001049f, 0.033293f, 0.090480f, 0.032392f, 0.310223f, 0.000483f, 0.000818f, 0.000692f, 0.004225f, +0.000138f, 0.000407f, 0.000100f, 0.001239f, 0.000085f, 0.000367f, 0.000092f, 0.000568f, 0.047282f, 0.129057f, 0.020426f, 0.402580f, 0.000243f, 0.000413f, 0.000154f, 0.001940f, +0.000336f, 0.000872f, 0.000438f, 0.002716f, 0.000234f, 0.000893f, 0.000457f, 0.001413f, 0.053652f, 0.129019f, 0.041813f, 0.411292f, 0.000892f, 0.001336f, 0.001024f, 0.006420f, +0.061983f, 0.143485f, 0.145304f, 0.398253f, 0.089833f, 0.305497f, 0.315414f, 0.431023f, 6.359984f, 13.629546f, 8.906814f, 38.729945f, 0.089125f, 0.119008f, 0.183928f, 0.509688f, +0.006159f, 0.027914f, 0.020845f, 0.057243f, 0.012652f, 0.084232f, 0.064131f, 0.087805f, 1.075016f, 4.510157f, 2.173442f, 9.469028f, 0.013299f, 0.034765f, 0.039622f, 0.110007f, +0.001668f, 0.007591f, 0.002506f, 0.014162f, 0.001603f, 0.010716f, 0.003607f, 0.010163f, 0.326211f, 1.374540f, 0.292839f, 2.625569f, 0.001427f, 0.003748f, 0.001888f, 0.010789f, +0.002194f, 0.008798f, 0.005947f, 0.016775f, 0.002393f, 0.014099f, 0.009717f, 0.013665f, 0.200028f, 0.742554f, 0.323932f, 1.449504f, 0.002836f, 0.006559f, 0.006767f, 0.019298f, +0.003586f, 0.003090f, 0.005804f, 0.035656f, 0.000281f, 0.000355f, 0.000680f, 0.002083f, 0.218209f, 0.174076f, 0.210970f, 2.056189f, 0.003500f, 0.001740f, 0.004987f, 0.030973f, +0.000661f, 0.001114f, 0.001543f, 0.009500f, 0.000073f, 0.000182f, 0.000256f, 0.000787f, 0.068369f, 0.106778f, 0.095428f, 0.931865f, 0.000968f, 0.000942f, 0.001991f, 0.012392f, +0.000432f, 0.000732f, 0.000448f, 0.005680f, 0.000022f, 0.000056f, 0.000035f, 0.000220f, 0.050135f, 0.078640f, 0.031071f, 0.624406f, 0.000251f, 0.000245f, 0.000229f, 0.002937f, +0.001089f, 0.001626f, 0.002038f, 0.012886f, 0.000064f, 0.000141f, 0.000180f, 0.000567f, 0.058885f, 0.081373f, 0.065833f, 0.660283f, 0.000956f, 0.000823f, 0.001574f, 0.010062f, +0.003296f, 0.005979f, 0.008485f, 0.051254f, 0.046460f, 0.123830f, 0.179163f, 0.539578f, 1.512300f, 2.540030f, 2.326098f, 22.291478f, 0.003651f, 0.003821f, 0.008276f, 0.050544f, +0.000158f, 0.000561f, 0.000587f, 0.003554f, 0.003157f, 0.016472f, 0.017574f, 0.053029f, 0.123321f, 0.405499f, 0.273839f, 2.629291f, 0.000263f, 0.000539f, 0.000860f, 0.005263f, +0.000037f, 0.000132f, 0.000061f, 0.000762f, 0.000347f, 0.001816f, 0.000857f, 0.005319f, 0.032431f, 0.107103f, 0.031976f, 0.631830f, 0.000024f, 0.000050f, 0.000036f, 0.000447f, +0.000247f, 0.000778f, 0.000737f, 0.004579f, 0.002626f, 0.012122f, 0.011708f, 0.036286f, 0.100891f, 0.293538f, 0.179448f, 1.769661f, 0.000246f, 0.000447f, 0.000646f, 0.004059f, +0.000000f, 0.000001f, 0.000001f, 0.000015f, 0.000003f, 0.000014f, 0.000009f, 0.000109f, 0.001105f, 0.002891f, 0.001183f, 0.044297f, 0.000001f, 0.000001f, 0.000001f, 0.000022f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000019f, 0.000096f, 0.000029f, 0.001086f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000013f, 0.000068f, 0.000009f, 0.000704f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000001f, 0.000000f, 0.000004f, 0.000036f, 0.000162f, 0.000044f, 0.001707f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000115f, 0.000501f, 0.000428f, 0.004592f, 0.001784f, 0.011435f, 0.009951f, 0.053270f, 0.208126f, 0.840664f, 0.463037f, 7.887816f, 0.000094f, 0.000236f, 0.000308f, 0.003339f, +0.000003f, 0.000028f, 0.000018f, 0.000191f, 0.000073f, 0.000913f, 0.000586f, 0.003144f, 0.010192f, 0.080592f, 0.032734f, 0.558692f, 0.000004f, 0.000020f, 0.000019f, 0.000209f, +0.000000f, 0.000004f, 0.000001f, 0.000024f, 0.000005f, 0.000058f, 0.000016f, 0.000182f, 0.001544f, 0.012265f, 0.002202f, 0.077361f, 0.000000f, 0.000001f, 0.000000f, 0.000010f, +0.000001f, 0.000011f, 0.000006f, 0.000066f, 0.000016f, 0.000181f, 0.000105f, 0.000580f, 0.002249f, 0.015739f, 0.005787f, 0.101449f, 0.000001f, 0.000004f, 0.000004f, 0.000043f, +0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000035f, 0.000052f, 0.000053f, 0.002024f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000009f, 0.000007f, 0.000266f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000003f, 0.000001f, 0.000089f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000008f, 0.000006f, 0.000223f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.005649f, 0.008779f, 0.007132f, 0.089865f, 0.006571f, 0.015002f, 0.012425f, 0.078059f, 1.693550f, 2.436380f, 1.277219f, 25.532425f, 0.005202f, 0.004663f, 0.005782f, 0.073655f, +0.000379f, 0.001152f, 0.000690f, 0.008712f, 0.000624f, 0.002790f, 0.001704f, 0.010725f, 0.193067f, 0.543759f, 0.210205f, 4.210195f, 0.000524f, 0.000919f, 0.000840f, 0.010722f, +0.000094f, 0.000287f, 0.000076f, 0.001973f, 0.000072f, 0.000325f, 0.000088f, 0.001136f, 0.053620f, 0.151674f, 0.025922f, 1.068457f, 0.000051f, 0.000091f, 0.000037f, 0.000962f, +0.000283f, 0.000761f, 0.000413f, 0.005351f, 0.000247f, 0.000979f, 0.000541f, 0.003498f, 0.075294f, 0.187636f, 0.065663f, 1.350797f, 0.000234f, 0.000363f, 0.000301f, 0.003942f, +0.000000f, 0.000001f, 0.000000f, 0.000014f, 0.000000f, 0.000001f, 0.000000f, 0.000009f, 0.000668f, 0.001497f, 0.000351f, 0.027394f, 0.000000f, 0.000001f, 0.000000f, 0.000017f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000016f, 0.000069f, 0.000012f, 0.000939f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000012f, 0.000052f, 0.000004f, 0.000642f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000014f, 0.000056f, 0.000009f, 0.000704f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000173f, 0.000647f, 0.000316f, 0.007076f, 0.000222f, 0.001217f, 0.000606f, 0.006772f, 0.204815f, 0.708603f, 0.223423f, 7.939340f, 0.000117f, 0.000253f, 0.000189f, 0.004276f, +0.000007f, 0.000051f, 0.000018f, 0.000412f, 0.000013f, 0.000136f, 0.000050f, 0.000559f, 0.014021f, 0.094969f, 0.022081f, 0.786159f, 0.000007f, 0.000030f, 0.000016f, 0.000374f, +0.000001f, 0.000007f, 0.000001f, 0.000054f, 0.000001f, 0.000009f, 0.000001f, 0.000034f, 0.002244f, 0.015264f, 0.001569f, 0.114961f, 0.000000f, 0.000002f, 0.000000f, 0.000019f, +0.000001f, 0.000009f, 0.000003f, 0.000068f, 0.000001f, 0.000013f, 0.000004f, 0.000049f, 0.001475f, 0.008841f, 0.001861f, 0.068049f, 0.000001f, 0.000003f, 0.000002f, 0.000037f, +0.000005f, 0.000007f, 0.000006f, 0.000306f, 0.000000f, 0.000001f, 0.000001f, 0.000016f, 0.003395f, 0.004372f, 0.002556f, 0.203620f, 0.000002f, 0.000002f, 0.000002f, 0.000126f, +0.000000f, 0.000001f, 0.000001f, 0.000033f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000431f, 0.001086f, 0.000468f, 0.037375f, 0.000000f, 0.000000f, 0.000000f, 0.000020f, +0.000000f, 0.000000f, 0.000000f, 0.000010f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000167f, 0.000422f, 0.000080f, 0.013207f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, +0.000000f, 0.000001f, 0.000000f, 0.000025f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000210f, 0.000468f, 0.000183f, 0.014974f, 0.000000f, 0.000000f, 0.000000f, 0.000009f, +0.000665f, 0.000621f, 0.000818f, 0.012470f, 0.049925f, 0.068577f, 0.092080f, 0.699534f, 4.959286f, 4.292740f, 3.648243f, 88.193099f, 0.020153f, 0.010869f, 0.021846f, 0.336558f, +0.000063f, 0.000116f, 0.000112f, 0.001716f, 0.006732f, 0.018102f, 0.017924f, 0.136430f, 0.802529f, 1.359965f, 0.852300f, 20.643154f, 0.002879f, 0.003040f, 0.004506f, 0.069544f, +0.000007f, 0.000014f, 0.000006f, 0.000186f, 0.000374f, 0.001009f, 0.000442f, 0.006918f, 0.106693f, 0.181587f, 0.050311f, 2.507760f, 0.000135f, 0.000144f, 0.000094f, 0.002988f, +0.000028f, 0.000045f, 0.000040f, 0.000621f, 0.001574f, 0.003744f, 0.003356f, 0.026239f, 0.184536f, 0.276699f, 0.156979f, 3.905110f, 0.000759f, 0.000709f, 0.000951f, 0.015076f, +0.000000f, 0.000000f, 0.000000f, 0.000011f, 0.000012f, 0.000025f, 0.000015f, 0.000448f, 0.011481f, 0.015482f, 0.005879f, 0.555362f, 0.000010f, 0.000009f, 0.000008f, 0.000464f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000018f, 0.000386f, 0.001019f, 0.000285f, 0.027012f, 0.000000f, 0.000000f, 0.000000f, 0.000020f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000138f, 0.000367f, 0.000045f, 0.008849f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000008f, 0.000207f, 0.000485f, 0.000123f, 0.011940f, 0.000000f, 0.000000f, 0.000000f, 0.000010f, +0.000022f, 0.000050f, 0.000040f, 0.001070f, 0.001837f, 0.006067f, 0.004899f, 0.066164f, 0.653859f, 1.361116f, 0.695741f, 29.897121f, 0.000496f, 0.000643f, 0.000778f, 0.021301f, +0.000001f, 0.000006f, 0.000003f, 0.000088f, 0.000149f, 0.000962f, 0.000573f, 0.007749f, 0.063539f, 0.258943f, 0.097605f, 4.202296f, 0.000043f, 0.000108f, 0.000096f, 0.002643f, +0.000000f, 0.000000f, 0.000000f, 0.000006f, 0.000005f, 0.000031f, 0.000008f, 0.000226f, 0.004867f, 0.019923f, 0.003320f, 0.294158f, 0.000001f, 0.000003f, 0.000001f, 0.000065f, +0.000000f, 0.000001f, 0.000000f, 0.000009f, 0.000009f, 0.000054f, 0.000029f, 0.000402f, 0.003942f, 0.014214f, 0.004850f, 0.214471f, 0.000003f, 0.000007f, 0.000005f, 0.000155f, +0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000000f, 0.000000f, 0.000000f, 0.000010f, 0.000687f, 0.000533f, 0.000505f, 0.048628f, 0.000001f, 0.000000f, 0.000001f, 0.000040f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000124f, 0.000188f, 0.000131f, 0.012670f, 0.000000f, 0.000000f, 0.000000f, 0.000009f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000023f, 0.000035f, 0.000011f, 0.002143f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000036f, 0.000048f, 0.000030f, 0.002993f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +1.760054f, 1.631611f, 2.664394f, 5.387062f, 1.853002f, 2.523479f, 4.201332f, 4.235227f, 45.242902f, 38.826538f, 40.914995f, 131.243450f, 2.099186f, 1.122488f, 2.797476f, 5.718645f, +0.328554f, 0.596278f, 0.718036f, 1.454563f, 0.490241f, 1.307027f, 1.604676f, 1.620728f, 14.365604f, 24.135327f, 18.755239f, 60.276885f, 0.588412f, 0.615975f, 1.132044f, 2.318586f, +0.131236f, 0.239208f, 0.127347f, 0.530898f, 0.091608f, 0.245296f, 0.133140f, 0.276737f, 6.430918f, 10.851366f, 3.727941f, 24.656634f, 0.093173f, 0.097962f, 0.079592f, 0.335481f, +0.262889f, 0.422159f, 0.460195f, 0.957495f, 0.208324f, 0.491445f, 0.546193f, 0.566601f, 6.004489f, 8.926187f, 6.279194f, 20.727201f, 0.281844f, 0.261066f, 0.434330f, 0.913667f, +0.004289f, 0.006194f, 0.004520f, 0.035708f, 0.003179f, 0.006744f, 0.005017f, 0.019763f, 0.763191f, 1.020324f, 0.480426f, 6.021803f, 0.007751f, 0.006457f, 0.007190f, 0.057435f, +0.000166f, 0.000470f, 0.000253f, 0.002003f, 0.000175f, 0.000726f, 0.000398f, 0.001572f, 0.050356f, 0.131796f, 0.045762f, 0.574699f, 0.000451f, 0.000736f, 0.000605f, 0.004839f, +0.000179f, 0.000509f, 0.000121f, 0.001972f, 0.000088f, 0.000367f, 0.000089f, 0.000724f, 0.060792f, 0.159801f, 0.024530f, 0.633972f, 0.000193f, 0.000316f, 0.000115f, 0.001888f, +0.000311f, 0.000778f, 0.000379f, 0.003082f, 0.000174f, 0.000638f, 0.000317f, 0.001284f, 0.049180f, 0.113895f, 0.035799f, 0.461761f, 0.000505f, 0.000729f, 0.000542f, 0.004456f, +0.093673f, 0.208835f, 0.205111f, 0.737178f, 0.108666f, 0.355889f, 0.356373f, 0.638595f, 9.509443f, 19.625861f, 12.439028f, 70.927112f, 0.082377f, 0.105933f, 0.158789f, 0.577004f, +0.010501f, 0.045830f, 0.033193f, 0.119528f, 0.017264f, 0.110692f, 0.081738f, 0.146749f, 1.813193f, 7.326033f, 3.424067f, 19.561450f, 0.013866f, 0.034908f, 0.038586f, 0.140483f, +0.002417f, 0.010594f, 0.003392f, 0.025138f, 0.001859f, 0.011970f, 0.003908f, 0.014438f, 0.467711f, 1.897952f, 0.392169f, 4.610722f, 0.001265f, 0.003199f, 0.001563f, 0.011713f, +0.002267f, 0.008754f, 0.005739f, 0.021227f, 0.001979f, 0.011229f, 0.007506f, 0.013841f, 0.204466f, 0.730980f, 0.309277f, 1.814742f, 0.001792f, 0.003992f, 0.003994f, 0.014935f, +0.012244f, 0.010161f, 0.018509f, 0.149100f, 0.000767f, 0.000935f, 0.001736f, 0.006971f, 0.737052f, 0.566258f, 0.665597f, 8.506593f, 0.007308f, 0.003499f, 0.009726f, 0.079212f, +0.002544f, 0.004134f, 0.005552f, 0.044813f, 0.000226f, 0.000539f, 0.000738f, 0.002970f, 0.260507f, 0.391819f, 0.339625f, 4.348865f, 0.002280f, 0.002137f, 0.004381f, 0.035749f, +0.001415f, 0.002309f, 0.001371f, 0.022775f, 0.000059f, 0.000141f, 0.000085f, 0.000706f, 0.162386f, 0.245300f, 0.094000f, 2.477081f, 0.000503f, 0.000473f, 0.000429f, 0.007203f, +0.002542f, 0.003655f, 0.004444f, 0.036838f, 0.000120f, 0.000253f, 0.000314f, 0.001296f, 0.135975f, 0.180961f, 0.141993f, 1.867466f, 0.001364f, 0.001131f, 0.002099f, 0.017592f, +0.002887f, 0.005045f, 0.006944f, 0.055002f, 0.032581f, 0.083630f, 0.117355f, 0.463457f, 1.310890f, 2.120391f, 1.883310f, 23.666499f, 0.001957f, 0.001972f, 0.004142f, 0.033172f, +0.000156f, 0.000534f, 0.000542f, 0.004302f, 0.002497f, 0.012549f, 0.012986f, 0.051381f, 0.120586f, 0.381854f, 0.250103f, 3.148938f, 0.000159f, 0.000313f, 0.000486f, 0.003896f, +0.000031f, 0.000107f, 0.000048f, 0.000784f, 0.000233f, 0.001176f, 0.000538f, 0.004381f, 0.026957f, 0.085735f, 0.024825f, 0.643244f, 0.000013f, 0.000025f, 0.000017f, 0.000282f, +0.000148f, 0.000449f, 0.000412f, 0.003360f, 0.001259f, 0.005597f, 0.005243f, 0.021307f, 0.059787f, 0.167522f, 0.099326f, 1.284444f, 0.000090f, 0.000158f, 0.000221f, 0.001821f, +0.000000f, 0.000001f, 0.000001f, 0.000018f, 0.000003f, 0.000011f, 0.000007f, 0.000104f, 0.001065f, 0.002683f, 0.001065f, 0.052284f, 0.000000f, 0.000001f, 0.000001f, 0.000016f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000020f, 0.000100f, 0.000029f, 0.001446f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000012f, 0.000061f, 0.000008f, 0.000796f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000024f, 0.000103f, 0.000027f, 0.001378f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000125f, 0.000524f, 0.000434f, 0.006109f, 0.001551f, 0.009574f, 0.008080f, 0.056724f, 0.223655f, 0.870011f, 0.464766f, 10.381913f, 0.000062f, 0.000151f, 0.000191f, 0.002717f, +0.000004f, 0.000033f, 0.000020f, 0.000287f, 0.000071f, 0.000863f, 0.000537f, 0.003776f, 0.012355f, 0.094085f, 0.037064f, 0.829513f, 0.000003f, 0.000014f, 0.000013f, 0.000192f, +0.000000f, 0.000004f, 0.000001f, 0.000030f, 0.000004f, 0.000047f, 0.000013f, 0.000186f, 0.001591f, 0.012172f, 0.002120f, 0.097638f, 0.000000f, 0.000001f, 0.000000f, 0.000008f, +0.000001f, 0.000008f, 0.000004f, 0.000060f, 0.000010f, 0.000104f, 0.000058f, 0.000422f, 0.001653f, 0.011136f, 0.003971f, 0.091285f, 0.000000f, 0.000002f, 0.000002f, 0.000024f, +0.000000f, 0.000000f, 0.000000f, 0.000006f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000084f, 0.000121f, 0.000120f, 0.006018f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000009f, 0.000024f, 0.000018f, 0.000891f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000008f, 0.000002f, 0.000254f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000005f, 0.000013f, 0.000009f, 0.000454f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.235649f, 0.231419f, 0.409699f, 1.377262f, 0.316991f, 0.457313f, 0.825440f, 1.383481f, 12.383959f, 11.258506f, 12.862312f, 68.598140f, 0.176751f, 0.100123f, 0.270523f, 0.919451f, +0.029861f, 0.057410f, 0.074950f, 0.252439f, 0.056930f, 0.160790f, 0.214015f, 0.359390f, 2.669267f, 4.750776f, 4.002381f, 21.386739f, 0.033632f, 0.037297f, 0.074312f, 0.253057f, +0.013955f, 0.026945f, 0.015552f, 0.107796f, 0.012446f, 0.035305f, 0.020775f, 0.071795f, 1.398008f, 2.498989f, 0.930751f, 10.235197f, 0.006231f, 0.006940f, 0.006113f, 0.042838f, +0.035896f, 0.061065f, 0.072167f, 0.249651f, 0.036345f, 0.090828f, 0.109440f, 0.188758f, 1.676164f, 2.639670f, 2.013132f, 11.048593f, 0.024202f, 0.023748f, 0.042834f, 0.149815f, +0.000077f, 0.000119f, 0.000094f, 0.001232f, 0.000073f, 0.000165f, 0.000133f, 0.000871f, 0.028191f, 0.039926f, 0.020381f, 0.424740f, 0.000088f, 0.000078f, 0.000094f, 0.001246f, +0.000002f, 0.000006f, 0.000004f, 0.000047f, 0.000003f, 0.000012f, 0.000007f, 0.000047f, 0.001263f, 0.003501f, 0.001318f, 0.027517f, 0.000003f, 0.000006f, 0.000005f, 0.000071f, +0.000003f, 0.000008f, 0.000002f, 0.000054f, 0.000002f, 0.000007f, 0.000002f, 0.000025f, 0.001783f, 0.004966f, 0.000826f, 0.035514f, 0.000002f, 0.000003f, 0.000001f, 0.000033f, +0.000006f, 0.000015f, 0.000008f, 0.000108f, 0.000004f, 0.000016f, 0.000009f, 0.000058f, 0.001853f, 0.004545f, 0.001549f, 0.033216f, 0.000006f, 0.000009f, 0.000007f, 0.000099f, +0.011748f, 0.027746f, 0.029544f, 0.176544f, 0.017413f, 0.060415f, 0.065587f, 0.195406f, 2.438254f, 5.330844f, 3.663010f, 34.726567f, 0.006497f, 0.008851f, 0.014384f, 0.086902f, +0.000894f, 0.004133f, 0.003246f, 0.019432f, 0.001878f, 0.012756f, 0.010212f, 0.030482f, 0.315593f, 1.350813f, 0.684468f, 6.501448f, 0.000742f, 0.001980f, 0.002373f, 0.014363f, +0.000241f, 0.001118f, 0.000388f, 0.004781f, 0.000237f, 0.001614f, 0.000571f, 0.003509f, 0.095242f, 0.409431f, 0.091718f, 1.792861f, 0.000079f, 0.000212f, 0.000112f, 0.001401f, +0.000290f, 0.001186f, 0.000843f, 0.005185f, 0.000323f, 0.001944f, 0.001409f, 0.004319f, 0.053466f, 0.202490f, 0.092882f, 0.906142f, 0.000144f, 0.000340f, 0.000369f, 0.002294f, +0.000436f, 0.000383f, 0.000757f, 0.010138f, 0.000035f, 0.000045f, 0.000091f, 0.000606f, 0.053657f, 0.043670f, 0.055651f, 1.182530f, 0.000164f, 0.000083f, 0.000250f, 0.003387f, +0.000061f, 0.000106f, 0.000154f, 0.002068f, 0.000007f, 0.000018f, 0.000026f, 0.000175f, 0.012874f, 0.020513f, 0.019276f, 0.410385f, 0.000035f, 0.000034f, 0.000076f, 0.001038f, +0.000040f, 0.000069f, 0.000045f, 0.001230f, 0.000002f, 0.000005f, 0.000004f, 0.000049f, 0.009389f, 0.015025f, 0.006242f, 0.273480f, 0.000009f, 0.000009f, 0.000009f, 0.000245f, +0.000092f, 0.000141f, 0.000185f, 0.002555f, 0.000006f, 0.000012f, 0.000017f, 0.000115f, 0.010095f, 0.014233f, 0.012108f, 0.264753f, 0.000031f, 0.000027f, 0.000055f, 0.000767f, +0.008469f, 0.005004f, 0.014363f, 0.058388f, 0.735788f, 0.638688f, 1.868913f, 3.787922f, 11.079567f, 6.060557f, 11.224824f, 72.393137f, 0.209188f, 0.071298f, 0.312303f, 1.283588f, +0.001523f, 0.001762f, 0.003730f, 0.015191f, 0.187576f, 0.318760f, 0.687828f, 1.396770f, 3.389899f, 3.630177f, 4.958045f, 32.037658f, 0.056501f, 0.037701f, 0.121777f, 0.501472f, +0.000341f, 0.000396f, 0.000370f, 0.003105f, 0.019630f, 0.033504f, 0.031961f, 0.133569f, 0.849883f, 0.914076f, 0.551925f, 7.339511f, 0.005011f, 0.003358f, 0.004795f, 0.040636f, +0.001080f, 0.001105f, 0.002117f, 0.008858f, 0.070607f, 0.106168f, 0.207387f, 0.432548f, 1.255107f, 1.189275f, 1.470393f, 9.758710f, 0.023973f, 0.014154f, 0.041387f, 0.175046f, +0.000016f, 0.000015f, 0.000019f, 0.000307f, 0.001000f, 0.001352f, 0.001768f, 0.014000f, 0.148028f, 0.126142f, 0.104391f, 2.630773f, 0.000612f, 0.000325f, 0.000636f, 0.010210f, +0.000001f, 0.000001f, 0.000001f, 0.000017f, 0.000053f, 0.000140f, 0.000135f, 0.001073f, 0.009411f, 0.015701f, 0.009581f, 0.241929f, 0.000034f, 0.000036f, 0.000052f, 0.000829f, +0.000000f, 0.000001f, 0.000000f, 0.000009f, 0.000015f, 0.000040f, 0.000017f, 0.000277f, 0.006363f, 0.010661f, 0.002876f, 0.149465f, 0.000008f, 0.000009f, 0.000005f, 0.000181f, +0.000001f, 0.000002f, 0.000001f, 0.000023f, 0.000047f, 0.000109f, 0.000095f, 0.000776f, 0.008142f, 0.012019f, 0.006640f, 0.172190f, 0.000034f, 0.000031f, 0.000041f, 0.000676f, +0.000460f, 0.000654f, 0.001129f, 0.008159f, 0.044065f, 0.091986f, 0.161892f, 0.583267f, 2.378180f, 3.128458f, 3.484988f, 39.952996f, 0.008383f, 0.006871f, 0.018103f, 0.132260f, +0.000050f, 0.000138f, 0.000176f, 0.001275f, 0.006746f, 0.027568f, 0.035779f, 0.129154f, 0.436943f, 1.125281f, 0.924374f, 10.617655f, 0.001360f, 0.002182f, 0.004239f, 0.031029f, +0.000006f, 0.000018f, 0.000010f, 0.000150f, 0.000407f, 0.001670f, 0.000958f, 0.007117f, 0.063122f, 0.163268f, 0.059293f, 1.401586f, 0.000069f, 0.000112f, 0.000096f, 0.001449f, +0.000010f, 0.000023f, 0.000027f, 0.000201f, 0.000685f, 0.002477f, 0.002910f, 0.010790f, 0.043646f, 0.099458f, 0.073960f, 0.872538f, 0.000156f, 0.000221f, 0.000389f, 0.002922f, +0.000001f, 0.000001f, 0.000002f, 0.000030f, 0.000006f, 0.000004f, 0.000014f, 0.000115f, 0.003319f, 0.001625f, 0.003358f, 0.086283f, 0.000013f, 0.000004f, 0.000020f, 0.000327f, +0.000000f, 0.000000f, 0.000001f, 0.000009f, 0.000002f, 0.000002f, 0.000006f, 0.000047f, 0.001130f, 0.001084f, 0.001651f, 0.042504f, 0.000004f, 0.000002f, 0.000009f, 0.000142f, +0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000000f, 0.000000f, 0.000000f, 0.000006f, 0.000395f, 0.000380f, 0.000256f, 0.013559f, 0.000000f, 0.000000f, 0.000000f, 0.000016f, +0.000000f, 0.000000f, 0.000000f, 0.000006f, 0.000001f, 0.000001f, 0.000002f, 0.000018f, 0.000523f, 0.000443f, 0.000611f, 0.016168f, 0.000002f, 0.000001f, 0.000004f, 0.000062f, +24.539969f, 14.376040f, 51.160990f, 27.596817f, 29.877612f, 25.712497f, 93.293089f, 25.090290f, 110.583720f, 59.971320f, 137.725780f, 117.862900f, 23.839366f, 8.055631f, 43.752438f, 23.861386f, +8.661184f, 9.933300f, 26.068064f, 14.088412f, 14.945191f, 25.179736f, 67.370839f, 18.153531f, 66.387528f, 70.483941f, 119.365130f, 102.346320f, 12.634184f, 8.358017f, 33.475072f, 18.291440f, +6.524064f, 7.514790f, 8.718618f, 9.696986f, 5.266496f, 8.911556f, 10.541190f, 5.845417f, 56.044366f, 59.761002f, 44.742512f, 78.949931f, 3.772716f, 2.506642f, 4.438396f, 4.991015f, +11.158807f, 11.323868f, 26.901622f, 14.932763f, 10.225994f, 15.244584f, 36.923689f, 10.218864f, 44.680001f, 41.973722f, 64.347764f, 56.667848f, 9.744261f, 5.703817f, 20.680100f, 11.606135f, +0.345117f, 0.314962f, 0.500833f, 1.055643f, 0.295806f, 0.396581f, 0.642942f, 0.675668f, 10.765114f, 9.094895f, 9.332643f, 31.208355f, 0.507992f, 0.267416f, 0.648972f, 1.383005f, +0.025311f, 0.045222f, 0.053028f, 0.111985f, 0.030747f, 0.080701f, 0.096480f, 0.101585f, 1.342936f, 2.221188f, 1.680768f, 5.631277f, 0.055944f, 0.057654f, 0.103178f, 0.220301f, +0.051416f, 0.092262f, 0.047829f, 0.207865f, 0.029219f, 0.077024f, 0.040710f, 0.088212f, 3.057363f, 5.078776f, 1.699014f, 11.714746f, 0.045051f, 0.046630f, 0.036892f, 0.162108f, +0.076197f, 0.120460f, 0.127868f, 0.277349f, 0.049158f, 0.114165f, 0.123554f, 0.133616f, 2.111879f, 3.090722f, 2.117147f, 7.285488f, 0.100818f, 0.091935f, 0.148938f, 0.326621f, +2.126286f, 2.995591f, 6.411885f, 6.148037f, 2.852476f, 5.903588f, 12.883233f, 6.159023f, 37.840132f, 49.351565f, 68.167332f, 103.697620f, 1.523025f, 1.237677f, 4.043099f, 3.919567f, +0.450651f, 1.242947f, 1.961875f, 1.884755f, 0.856828f, 3.471671f, 5.586800f, 2.675982f, 13.641554f, 34.830774f, 35.477601f, 54.072886f, 0.484703f, 0.771129f, 1.857587f, 1.804289f, +0.195599f, 0.541827f, 0.378090f, 0.747507f, 0.173980f, 0.707988f, 0.503693f, 0.496503f, 6.635819f, 17.016723f, 7.662708f, 24.034994f, 0.083400f, 0.133260f, 0.141918f, 0.283682f, +0.156641f, 0.382276f, 0.546217f, 0.538961f, 0.158169f, 0.567058f, 0.826077f, 0.406395f, 2.476934f, 5.595957f, 5.159816f, 8.077332f, 0.100856f, 0.141975f, 0.309602f, 0.308866f, +0.369014f, 0.193529f, 0.768226f, 1.651042f, 0.026719f, 0.020585f, 0.083311f, 0.089270f, 3.894129f, 1.890605f, 4.843021f, 16.513048f, 0.179404f, 0.054272f, 0.328792f, 0.714435f, +0.144975f, 0.148849f, 0.435718f, 0.938227f, 0.014877f, 0.022439f, 0.066968f, 0.071897f, 2.602274f, 2.473401f, 4.672245f, 15.961343f, 0.105835f, 0.062679f, 0.280019f, 0.609625f, +0.152060f, 0.156802f, 0.202921f, 0.899217f, 0.007300f, 0.011058f, 0.014590f, 0.032236f, 3.059006f, 2.920141f, 2.438655f, 17.144722f, 0.044007f, 0.026176f, 0.051698f, 0.231625f, +0.233249f, 0.211902f, 0.561516f, 1.241861f, 0.012712f, 0.016965f, 0.045834f, 0.050540f, 2.187089f, 1.839368f, 3.145347f, 11.036224f, 0.101934f, 0.053416f, 0.216025f, 0.483046f, +0.038207f, 0.042188f, 0.126542f, 0.267407f, 0.498575f, 0.808723f, 2.473187f, 2.605736f, 3.040875f, 3.108296f, 6.016532f, 20.170898f, 0.021087f, 0.013431f, 0.061483f, 0.131360f, +0.003907f, 0.008445f, 0.018679f, 0.039549f, 0.072251f, 0.229437f, 0.517412f, 0.546189f, 0.528873f, 1.058342f, 1.510655f, 5.074314f, 0.003238f, 0.004037f, 0.013628f, 0.029172f, +0.001470f, 0.003190f, 0.003120f, 0.013594f, 0.012714f, 0.040550f, 0.040428f, 0.087827f, 0.222959f, 0.448108f, 0.282773f, 1.954725f, 0.000483f, 0.000605f, 0.000902f, 0.003975f, +0.005970f, 0.011420f, 0.022866f, 0.049725f, 0.058642f, 0.164774f, 0.336381f, 0.364708f, 0.422220f, 0.747609f, 0.966013f, 3.332752f, 0.002962f, 0.003268f, 0.009987f, 0.021957f, +0.000026f, 0.000045f, 0.000060f, 0.000493f, 0.000238f, 0.000601f, 0.000821f, 0.003379f, 0.014253f, 0.022697f, 0.019630f, 0.257160f, 0.000022f, 0.000021f, 0.000044f, 0.000367f, +0.000001f, 0.000002f, 0.000002f, 0.000015f, 0.000007f, 0.000035f, 0.000036f, 0.000147f, 0.000515f, 0.001606f, 0.001024f, 0.013443f, 0.000001f, 0.000001f, 0.000002f, 0.000017f, +0.000001f, 0.000002f, 0.000001f, 0.000014f, 0.000003f, 0.000017f, 0.000008f, 0.000064f, 0.000586f, 0.001834f, 0.000517f, 0.013965f, 0.000000f, 0.000001f, 0.000000f, 0.000006f, +0.000002f, 0.000006f, 0.000005f, 0.000044f, 0.000014f, 0.000059f, 0.000054f, 0.000230f, 0.000961f, 0.002651f, 0.001530f, 0.020630f, 0.000001f, 0.000003f, 0.000003f, 0.000030f, +0.002687f, 0.007136f, 0.012873f, 0.048357f, 0.038638f, 0.150723f, 0.277231f, 0.519213f, 0.844635f, 2.076292f, 2.417219f, 14.405401f, 0.001094f, 0.001675f, 0.004612f, 0.017515f, +0.000165f, 0.000858f, 0.001141f, 0.004295f, 0.003362f, 0.025678f, 0.034829f, 0.065354f, 0.088214f, 0.424529f, 0.364461f, 2.176173f, 0.000101f, 0.000302f, 0.000614f, 0.002336f, +0.000036f, 0.000187f, 0.000110f, 0.000851f, 0.000341f, 0.002615f, 0.001568f, 0.006055f, 0.021429f, 0.103574f, 0.039310f, 0.483044f, 0.000009f, 0.000026f, 0.000023f, 0.000183f, +0.000068f, 0.000313f, 0.000377f, 0.001457f, 0.000736f, 0.004975f, 0.006109f, 0.011773f, 0.019000f, 0.080906f, 0.062877f, 0.385605f, 0.000025f, 0.000066f, 0.000121f, 0.000474f, +0.000002f, 0.000002f, 0.000007f, 0.000063f, 0.000002f, 0.000003f, 0.000009f, 0.000036f, 0.000420f, 0.000384f, 0.000830f, 0.011086f, 0.000001f, 0.000000f, 0.000002f, 0.000015f, +0.000000f, 0.000000f, 0.000001f, 0.000010f, 0.000000f, 0.000001f, 0.000002f, 0.000008f, 0.000081f, 0.000146f, 0.000232f, 0.003104f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, +0.000000f, 0.000000f, 0.000000f, 0.000005f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000048f, 0.000086f, 0.000060f, 0.001665f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000001f, 0.000002f, 0.000016f, 0.000000f, 0.000001f, 0.000002f, 0.000007f, 0.000081f, 0.000129f, 0.000185f, 0.002546f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, +0.018963f, 0.013928f, 0.028309f, 0.182797f, 0.019851f, 0.021420f, 0.044385f, 0.142898f, 3.791790f, 2.578310f, 3.381629f, 34.643010f, 0.010796f, 0.004574f, 0.014188f, 0.092628f, +0.002065f, 0.002970f, 0.004451f, 0.028797f, 0.003064f, 0.006473f, 0.009891f, 0.031905f, 0.702456f, 0.935107f, 0.904415f, 9.283040f, 0.001766f, 0.001464f, 0.003350f, 0.021912f, +0.000585f, 0.000846f, 0.000560f, 0.007460f, 0.000406f, 0.000862f, 0.000582f, 0.003866f, 0.223177f, 0.298383f, 0.127584f, 2.694977f, 0.000198f, 0.000165f, 0.000167f, 0.002250f, +0.001835f, 0.002335f, 0.003168f, 0.021053f, 0.001446f, 0.002703f, 0.003739f, 0.012388f, 0.326086f, 0.384092f, 0.336287f, 3.545205f, 0.000939f, 0.000689f, 0.001427f, 0.009590f, +0.000002f, 0.000003f, 0.000002f, 0.000059f, 0.000002f, 0.000003f, 0.000003f, 0.000033f, 0.003124f, 0.003309f, 0.001939f, 0.077627f, 0.000002f, 0.000001f, 0.000002f, 0.000045f, +0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000120f, 0.000249f, 0.000108f, 0.004322f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000103f, 0.000215f, 0.000041f, 0.003384f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000130f, 0.000239f, 0.000094f, 0.003857f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +0.001065f, 0.001881f, 0.002299f, 0.026387f, 0.001228f, 0.003187f, 0.003972f, 0.022729f, 0.840718f, 1.374793f, 1.084505f, 19.749299f, 0.000447f, 0.000455f, 0.000850f, 0.009859f, +0.000070f, 0.000241f, 0.000217f, 0.002496f, 0.000114f, 0.000578f, 0.000531f, 0.003047f, 0.093528f, 0.299419f, 0.174176f, 3.177915f, 0.000044f, 0.000088f, 0.000120f, 0.001400f, +0.000011f, 0.000040f, 0.000016f, 0.000373f, 0.000009f, 0.000044f, 0.000018f, 0.000213f, 0.017122f, 0.055053f, 0.014158f, 0.531609f, 0.000003f, 0.000006f, 0.000003f, 0.000083f, +0.000017f, 0.000051f, 0.000042f, 0.000492f, 0.000014f, 0.000065f, 0.000054f, 0.000319f, 0.011713f, 0.033180f, 0.017473f, 0.327429f, 0.000006f, 0.000011f, 0.000014f, 0.000165f, +0.000009f, 0.000006f, 0.000013f, 0.000341f, 0.000001f, 0.000001f, 0.000001f, 0.000016f, 0.004164f, 0.002535f, 0.003709f, 0.151375f, 0.000003f, 0.000001f, 0.000003f, 0.000086f, +0.000001f, 0.000001f, 0.000002f, 0.000060f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000859f, 0.001023f, 0.001104f, 0.045152f, 0.000000f, 0.000000f, 0.000001f, 0.000023f, +0.000000f, 0.000001f, 0.000000f, 0.000022f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000380f, 0.000455f, 0.000217f, 0.018253f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, +0.000001f, 0.000001f, 0.000002f, 0.000055f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000498f, 0.000525f, 0.000513f, 0.021533f, 0.000000f, 0.000000f, 0.000000f, 0.000012f, +0.000645f, 0.000285f, 0.000940f, 0.007336f, 0.043621f, 0.028321f, 0.095139f, 0.370396f, 3.211601f, 1.313957f, 2.793835f, 34.611020f, 0.012096f, 0.003084f, 0.015506f, 0.122420f, +0.000100f, 0.000086f, 0.000210f, 0.001641f, 0.009558f, 0.012149f, 0.030095f, 0.117391f, 0.844555f, 0.676455f, 1.060654f, 13.164993f, 0.002808f, 0.001401f, 0.005197f, 0.041107f, +0.000014f, 0.000012f, 0.000013f, 0.000203f, 0.000607f, 0.000775f, 0.000848f, 0.006810f, 0.128444f, 0.103325f, 0.071624f, 1.829533f, 0.000151f, 0.000076f, 0.000124f, 0.002021f, +0.000052f, 0.000040f, 0.000088f, 0.000707f, 0.002660f, 0.002991f, 0.006708f, 0.026874f, 0.231159f, 0.163826f, 0.232534f, 2.964428f, 0.000881f, 0.000389f, 0.001306f, 0.010607f, +0.000000f, 0.000000f, 0.000000f, 0.000014f, 0.000021f, 0.000022f, 0.000033f, 0.000495f, 0.015528f, 0.009897f, 0.009403f, 0.455182f, 0.000013f, 0.000005f, 0.000011f, 0.000352f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000002f, 0.000002f, 0.000033f, 0.000849f, 0.001059f, 0.000742f, 0.035978f, 0.000001f, 0.000000f, 0.000001f, 0.000025f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000005f, 0.000348f, 0.000436f, 0.000135f, 0.013483f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000001f, 0.000001f, 0.000017f, 0.000543f, 0.000599f, 0.000380f, 0.018930f, 0.000000f, 0.000000f, 0.000000f, 0.000015f, +0.000039f, 0.000042f, 0.000083f, 0.001155f, 0.002942f, 0.004593f, 0.009281f, 0.064227f, 0.776302f, 0.763810f, 0.976807f, 21.510634f, 0.000546f, 0.000335f, 0.001012f, 0.014205f, +0.000004f, 0.000008f, 0.000011f, 0.000155f, 0.000387f, 0.001183f, 0.001763f, 0.012224f, 0.122589f, 0.236134f, 0.222688f, 4.913320f, 0.000076f, 0.000091f, 0.000204f, 0.002864f, +0.000000f, 0.000001f, 0.000000f, 0.000011f, 0.000014f, 0.000043f, 0.000029f, 0.000409f, 0.010743f, 0.020783f, 0.008665f, 0.393441f, 0.000002f, 0.000003f, 0.000003f, 0.000081f, +0.000001f, 0.000001f, 0.000001f, 0.000018f, 0.000029f, 0.000079f, 0.000106f, 0.000755f, 0.009052f, 0.015429f, 0.013171f, 0.298483f, 0.000006f, 0.000007f, 0.000014f, 0.000199f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000244f, 0.000089f, 0.000212f, 0.010456f, 0.000000f, 0.000000f, 0.000000f, 0.000008f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000071f, 0.000051f, 0.000090f, 0.004427f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000015f, 0.000011f, 0.000008f, 0.000857f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000024f, 0.000015f, 0.000025f, 0.001245f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +4.057542f, 1.777860f, 7.263562f, 7.526035f, 3.844458f, 2.474589f, 10.307672f, 5.324924f, 69.571535f, 28.219790f, 74.400866f, 122.302770f, 2.991895f, 0.756172f, 4.714939f, 4.939303f, +1.230860f, 1.055831f, 3.180987f, 3.302262f, 1.652850f, 2.082823f, 6.397727f, 3.311396f, 35.897955f, 28.506439f, 55.422038f, 91.279604f, 1.362829f, 0.674321f, 3.100543f, 3.254319f, +0.562423f, 0.484542f, 0.645378f, 1.378795f, 0.353319f, 0.447165f, 0.607235f, 0.646813f, 18.383520f, 14.661686f, 12.601986f, 42.713651f, 0.246866f, 0.122679f, 0.249377f, 0.538660f, +1.172297f, 0.889783f, 2.426726f, 2.587489f, 0.836038f, 0.932193f, 2.592074f, 1.377976f, 17.860140f, 12.549284f, 22.086516f, 37.361711f, 0.777019f, 0.340187f, 1.415981f, 1.526471f, +0.020651f, 0.014096f, 0.025733f, 0.104186f, 0.013775f, 0.013813f, 0.025708f, 0.051895f, 2.451005f, 1.548791f, 1.824534f, 11.719646f, 0.023072f, 0.009084f, 0.025310f, 0.103604f, +0.001302f, 0.001740f, 0.002342f, 0.009499f, 0.001231f, 0.002416f, 0.003316f, 0.006706f, 0.262799f, 0.325104f, 0.282421f, 1.817579f, 0.002184f, 0.001683f, 0.003458f, 0.014184f, +0.001604f, 0.002153f, 0.001281f, 0.010696f, 0.000709f, 0.001399f, 0.000849f, 0.003532f, 0.362934f, 0.450931f, 0.173181f, 2.293677f, 0.001067f, 0.000826f, 0.000750f, 0.006332f, +0.002897f, 0.003425f, 0.004174f, 0.017392f, 0.001454f, 0.002526f, 0.003139f, 0.006521f, 0.305510f, 0.334415f, 0.262984f, 1.738335f, 0.002909f, 0.001984f, 0.003691f, 0.015546f, +0.395911f, 0.417184f, 1.025141f, 1.888125f, 0.413331f, 0.639825f, 1.602961f, 1.471995f, 26.808972f, 26.151573f, 41.469212f, 121.175430f, 0.215251f, 0.130833f, 0.490653f, 0.913682f, +0.072120f, 0.148778f, 0.269595f, 0.497498f, 0.106712f, 0.323390f, 0.597453f, 0.549693f, 8.306802f, 15.863632f, 18.550098f, 54.308500f, 0.058878f, 0.070061f, 0.193755f, 0.361498f, +0.018989f, 0.039342f, 0.031517f, 0.119692f, 0.013144f, 0.040006f, 0.032675f, 0.061869f, 2.451194f, 4.701416f, 2.430454f, 14.643528f, 0.006146f, 0.007345f, 0.008980f, 0.034478f, +0.018532f, 0.033826f, 0.055487f, 0.105168f, 0.014562f, 0.039048f, 0.065305f, 0.061713f, 1.114995f, 1.884094f, 1.994412f, 5.997151f, 0.009057f, 0.009536f, 0.023872f, 0.045746f, +0.015466f, 0.006067f, 0.027646f, 0.114131f, 0.000871f, 0.000502f, 0.002333f, 0.004802f, 0.620997f, 0.225502f, 0.663159f, 4.343349f, 0.005707f, 0.001291f, 0.008981f, 0.037486f, +0.005222f, 0.004010f, 0.013477f, 0.055744f, 0.000417f, 0.000470f, 0.001612f, 0.003324f, 0.356677f, 0.253563f, 0.549882f, 3.608357f, 0.002894f, 0.001282f, 0.006574f, 0.027492f, +0.003323f, 0.002563f, 0.003807f, 0.032409f, 0.000124f, 0.000141f, 0.000213f, 0.000904f, 0.254340f, 0.181597f, 0.174103f, 2.351168f, 0.000730f, 0.000325f, 0.000736f, 0.006336f, +0.006211f, 0.004220f, 0.012839f, 0.054544f, 0.000263f, 0.000263f, 0.000816f, 0.001727f, 0.221604f, 0.139395f, 0.273653f, 1.844375f, 0.002060f, 0.000808f, 0.003749f, 0.016104f, +0.003529f, 0.002914f, 0.010036f, 0.040738f, 0.035838f, 0.043479f, 0.152647f, 0.308928f, 1.068706f, 0.817056f, 1.815633f, 11.692378f, 0.001478f, 0.000704f, 0.003701f, 0.015190f, +0.000310f, 0.000501f, 0.001273f, 0.005178f, 0.004464f, 0.010602f, 0.027448f, 0.055656f, 0.159755f, 0.239110f, 0.391823f, 2.528120f, 0.000195f, 0.000182f, 0.000705f, 0.002899f, +0.000071f, 0.000115f, 0.000129f, 0.001080f, 0.000476f, 0.001137f, 0.001301f, 0.005429f, 0.040855f, 0.061414f, 0.044491f, 0.590771f, 0.000018f, 0.000017f, 0.000028f, 0.000240f, +0.000350f, 0.000501f, 0.001152f, 0.004813f, 0.002678f, 0.005629f, 0.013191f, 0.027473f, 0.094282f, 0.124863f, 0.185223f, 1.227473f, 0.000132f, 0.000109f, 0.000382f, 0.001613f, +0.000001f, 0.000001f, 0.000002f, 0.000027f, 0.000006f, 0.000012f, 0.000018f, 0.000145f, 0.001813f, 0.002159f, 0.002144f, 0.053947f, 0.000001f, 0.000000f, 0.000001f, 0.000015f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000001f, 0.000001f, 0.000005f, 0.000056f, 0.000131f, 0.000096f, 0.002424f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000039f, 0.000091f, 0.000029f, 0.001527f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000000f, 0.000001f, 0.000001f, 0.000006f, 0.000078f, 0.000160f, 0.000106f, 0.002750f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000280f, 0.000555f, 0.001150f, 0.008296f, 0.003128f, 0.009125f, 0.019269f, 0.069320f, 0.334284f, 0.614617f, 0.821456f, 9.403515f, 0.000086f, 0.000099f, 0.000313f, 0.002281f, +0.000015f, 0.000057f, 0.000088f, 0.000633f, 0.000234f, 0.001336f, 0.002081f, 0.007499f, 0.030007f, 0.108011f, 0.106454f, 1.220958f, 0.000007f, 0.000015f, 0.000036f, 0.000261f, +0.000002f, 0.000008f, 0.000005f, 0.000076f, 0.000014f, 0.000083f, 0.000057f, 0.000422f, 0.004422f, 0.015985f, 0.006965f, 0.164402f, 0.000000f, 0.000001f, 0.000001f, 0.000012f, +0.000004f, 0.000015f, 0.000021f, 0.000159f, 0.000038f, 0.000191f, 0.000270f, 0.000999f, 0.004778f, 0.015217f, 0.013577f, 0.159933f, 0.000001f, 0.000002f, 0.000005f, 0.000039f, +0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000037f, 0.000026f, 0.000063f, 0.001629f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000006f, 0.000008f, 0.000015f, 0.000392f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000003f, 0.000002f, 0.000128f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000005f, 0.000005f, 0.000009f, 0.000238f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.024761f, 0.027599f, 0.043295f, 0.254189f, 0.030001f, 0.049124f, 0.078568f, 0.229986f, 7.401957f, 7.637613f, 7.731806f, 72.017974f, 0.033493f, 0.021533f, 0.051555f, 0.306026f, +0.001522f, 0.003320f, 0.003841f, 0.022594f, 0.002613f, 0.008376f, 0.009879f, 0.028972f, 0.773688f, 1.562889f, 1.166721f, 10.888291f, 0.003090f, 0.003890f, 0.006868f, 0.040845f, +0.000471f, 0.001032f, 0.000528f, 0.006389f, 0.000378f, 0.001218f, 0.000635f, 0.003833f, 0.268351f, 0.544438f, 0.179681f, 3.450892f, 0.000379f, 0.000479f, 0.000374f, 0.004579f, +0.002017f, 0.003894f, 0.004077f, 0.024635f, 0.001839f, 0.005216f, 0.005569f, 0.016777f, 0.535641f, 0.957408f, 0.647000f, 6.201630f, 0.002452f, 0.002731f, 0.004364f, 0.026660f, +0.000002f, 0.000003f, 0.000002f, 0.000047f, 0.000001f, 0.000004f, 0.000003f, 0.000030f, 0.003472f, 0.005581f, 0.002524f, 0.091881f, 0.000003f, 0.000003f, 0.000004f, 0.000085f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000075f, 0.000237f, 0.000079f, 0.002887f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000071f, 0.000223f, 0.000033f, 0.002467f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000122f, 0.000340f, 0.000103f, 0.003842f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, +0.000774f, 0.002075f, 0.001958f, 0.020433f, 0.001033f, 0.004070f, 0.003915f, 0.020370f, 0.913903f, 2.267811f, 1.380810f, 22.862533f, 0.000772f, 0.001194f, 0.001719f, 0.018138f, +0.000029f, 0.000150f, 0.000104f, 0.001091f, 0.000054f, 0.000417f, 0.000296f, 0.001541f, 0.057363f, 0.278672f, 0.125122f, 2.075673f, 0.000043f, 0.000129f, 0.000138f, 0.001454f, +0.000005f, 0.000027f, 0.000008f, 0.000178f, 0.000005f, 0.000035f, 0.000011f, 0.000117f, 0.011465f, 0.055937f, 0.011103f, 0.379067f, 0.000003f, 0.000009f, 0.000004f, 0.000094f, +0.000010f, 0.000047f, 0.000030f, 0.000321f, 0.000010f, 0.000070f, 0.000045f, 0.000241f, 0.010714f, 0.046056f, 0.018720f, 0.318955f, 0.000009f, 0.000025f, 0.000024f, 0.000256f, +0.000006f, 0.000006f, 0.000010f, 0.000238f, 0.000000f, 0.000001f, 0.000001f, 0.000013f, 0.004074f, 0.003764f, 0.004250f, 0.157719f, 0.000004f, 0.000002f, 0.000006f, 0.000143f, +0.000000f, 0.000001f, 0.000001f, 0.000024f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000474f, 0.000857f, 0.000714f, 0.026543f, 0.000000f, 0.000000f, 0.000001f, 0.000021f, +0.000000f, 0.000000f, 0.000000f, 0.000009f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000229f, 0.000416f, 0.000153f, 0.011714f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, +0.000001f, 0.000001f, 0.000001f, 0.000032f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000410f, 0.000656f, 0.000494f, 0.018879f, 0.000000f, 0.000000f, 0.000001f, 0.000017f, +0.000725f, 0.000486f, 0.001237f, 0.008781f, 0.056745f, 0.055905f, 0.144956f, 0.513115f, 5.396280f, 3.350226f, 5.498271f, 61.931317f, 0.032301f, 0.012495f, 0.048498f, 0.348129f, +0.000063f, 0.000083f, 0.000156f, 0.001108f, 0.007015f, 0.013530f, 0.025871f, 0.091754f, 0.800654f, 0.973140f, 1.177724f, 13.291087f, 0.004231f, 0.003204f, 0.009171f, 0.065955f, +0.000009f, 0.000012f, 0.000010f, 0.000150f, 0.000486f, 0.000942f, 0.000796f, 0.005811f, 0.132934f, 0.162275f, 0.086823f, 2.016447f, 0.000248f, 0.000189f, 0.000239f, 0.003539f, +0.000049f, 0.000057f, 0.000097f, 0.000712f, 0.002911f, 0.004969f, 0.008600f, 0.031327f, 0.326831f, 0.351491f, 0.385080f, 4.463507f, 0.001979f, 0.001326f, 0.003436f, 0.025383f, +0.000000f, 0.000000f, 0.000000f, 0.000009f, 0.000016f, 0.000024f, 0.000028f, 0.000391f, 0.014856f, 0.014368f, 0.010536f, 0.463735f, 0.000019f, 0.000012f, 0.000020f, 0.000571f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000015f, 0.000458f, 0.000867f, 0.000469f, 0.020681f, 0.000001f, 0.000001f, 0.000001f, 0.000022f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000205f, 0.000390f, 0.000093f, 0.008461f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000012f, 0.000437f, 0.000732f, 0.000358f, 0.016228f, 0.000001f, 0.000001f, 0.000001f, 0.000020f, +0.000025f, 0.000040f, 0.000061f, 0.000770f, 0.002131f, 0.005049f, 0.007874f, 0.049547f, 0.726358f, 1.084490f, 1.070487f, 21.433650f, 0.000812f, 0.000755f, 0.001763f, 0.022495f, +0.000001f, 0.000004f, 0.000005f, 0.000058f, 0.000158f, 0.000734f, 0.000844f, 0.005320f, 0.064717f, 0.189166f, 0.137694f, 2.762245f, 0.000064f, 0.000116f, 0.000200f, 0.002559f, +0.000000f, 0.000000f, 0.000000f, 0.000005f, 0.000006f, 0.000029f, 0.000015f, 0.000194f, 0.006191f, 0.018176f, 0.005849f, 0.241476f, 0.000002f, 0.000004f, 0.000003f, 0.000079f, +0.000000f, 0.000001f, 0.000001f, 0.000010f, 0.000018f, 0.000073f, 0.000076f, 0.000490f, 0.007127f, 0.018433f, 0.012146f, 0.250266f, 0.000008f, 0.000013f, 0.000020f, 0.000266f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000205f, 0.000114f, 0.000209f, 0.009377f, 0.000000f, 0.000000f, 0.000000f, 0.000011f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000034f, 0.000037f, 0.000050f, 0.002240f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000008f, 0.000009f, 0.000005f, 0.000473f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000017f, 0.000017f, 0.000020f, 0.000939f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +2.276520f, 1.513654f, 4.773215f, 4.496740f, 2.496502f, 2.438484f, 7.839895f, 3.682418f, 58.354794f, 35.918570f, 73.092905f, 109.245580f, 3.988245f, 1.529595f, 7.361466f, 7.011718f, +0.389638f, 0.507186f, 1.179417f, 1.113235f, 0.605584f, 1.158013f, 2.745485f, 1.292038f, 16.988638f, 20.471621f, 30.720207f, 46.002957f, 1.024993f, 0.769604f, 2.731303f, 2.606531f, +0.194367f, 0.254104f, 0.261232f, 0.507437f, 0.141324f, 0.271416f, 0.284483f, 0.275518f, 9.497822f, 11.494766f, 7.625837f, 23.500938f, 0.202697f, 0.152854f, 0.239825f, 0.471003f, +0.553459f, 0.637459f, 1.341904f, 1.300917f, 0.456838f, 0.772969f, 1.658961f, 0.801864f, 12.605762f, 13.440742f, 18.258464f, 28.082391f, 0.871578f, 0.579046f, 1.860308f, 1.823418f, +0.006597f, 0.006833f, 0.009628f, 0.035443f, 0.005093f, 0.007750f, 0.011133f, 0.020433f, 1.170518f, 1.122398f, 1.020560f, 5.960347f, 0.017511f, 0.010463f, 0.022499f, 0.083739f, +0.000235f, 0.000476f, 0.000494f, 0.001823f, 0.000257f, 0.000765f, 0.000810f, 0.001490f, 0.070811f, 0.132929f, 0.089131f, 0.521548f, 0.000935f, 0.001094f, 0.001735f, 0.006469f, +0.000316f, 0.000643f, 0.000295f, 0.002241f, 0.000162f, 0.000483f, 0.000226f, 0.000857f, 0.106761f, 0.201287f, 0.059667f, 0.718522f, 0.000499f, 0.000586f, 0.000411f, 0.003152f, +0.000779f, 0.001397f, 0.001314f, 0.004979f, 0.000453f, 0.001193f, 0.001144f, 0.002160f, 0.122772f, 0.203929f, 0.123781f, 0.743926f, 0.001858f, 0.001923f, 0.002761f, 0.010573f, +0.123695f, 0.197790f, 0.375138f, 0.628216f, 0.149466f, 0.351096f, 0.678921f, 0.566856f, 12.521954f, 18.535747f, 22.686649f, 60.273942f, 0.159782f, 0.147373f, 0.426589f, 0.722272f, +0.012713f, 0.039798f, 0.055663f, 0.093393f, 0.021772f, 0.100123f, 0.142772f, 0.119435f, 2.189123f, 6.343940f, 5.725785f, 15.241474f, 0.024659f, 0.044527f, 0.095045f, 0.161233f, +0.003654f, 0.011489f, 0.007104f, 0.024530f, 0.002928f, 0.013522f, 0.008524f, 0.014675f, 0.705213f, 2.052541f, 0.818999f, 4.486540f, 0.002810f, 0.005096f, 0.004809f, 0.016788f, +0.004872f, 0.013495f, 0.017086f, 0.029444f, 0.004431f, 0.018030f, 0.023275f, 0.019998f, 0.438232f, 1.123710f, 0.918119f, 2.510148f, 0.005657f, 0.009038f, 0.017465f, 0.030430f, +0.004349f, 0.002589f, 0.009106f, 0.034178f, 0.000284f, 0.000248f, 0.000889f, 0.001664f, 0.261060f, 0.143854f, 0.326528f, 1.944455f, 0.003813f, 0.001309f, 0.007028f, 0.026671f, +0.000829f, 0.000966f, 0.002504f, 0.009418f, 0.000077f, 0.000131f, 0.000347f, 0.000650f, 0.084600f, 0.091264f, 0.152763f, 0.911438f, 0.001091f, 0.000733f, 0.002903f, 0.011036f, +0.000576f, 0.000674f, 0.000772f, 0.005978f, 0.000025f, 0.000043f, 0.000050f, 0.000193f, 0.065859f, 0.071356f, 0.052803f, 0.648348f, 0.000300f, 0.000203f, 0.000355f, 0.002777f, +0.001470f, 0.001515f, 0.003558f, 0.013744f, 0.000072f, 0.000109f, 0.000262f, 0.000504f, 0.078391f, 0.074827f, 0.113382f, 0.694804f, 0.001158f, 0.000689f, 0.002469f, 0.009641f, +0.004614f, 0.005782f, 0.015368f, 0.056717f, 0.054227f, 0.099833f, 0.270530f, 0.497802f, 2.088733f, 2.423240f, 4.156285f, 24.336084f, 0.004592f, 0.003320f, 0.013465f, 0.050245f, +0.000229f, 0.000561f, 0.001100f, 0.004068f, 0.003811f, 0.013735f, 0.027446f, 0.050601f, 0.176166f, 0.400117f, 0.506070f, 2.968857f, 0.000342f, 0.000484f, 0.001447f, 0.005411f, +0.000057f, 0.000140f, 0.000122f, 0.000926f, 0.000444f, 0.001608f, 0.001420f, 0.005388f, 0.049183f, 0.112192f, 0.062734f, 0.757387f, 0.000034f, 0.000048f, 0.000063f, 0.000488f, +0.000385f, 0.000837f, 0.001485f, 0.005639f, 0.003410f, 0.010875f, 0.019673f, 0.037251f, 0.155058f, 0.311616f, 0.356790f, 2.149805f, 0.000345f, 0.000432f, 0.001169f, 0.004490f, +0.000001f, 0.000001f, 0.000001f, 0.000022f, 0.000005f, 0.000015f, 0.000018f, 0.000133f, 0.002017f, 0.003646f, 0.002794f, 0.063930f, 0.000001f, 0.000001f, 0.000002f, 0.000029f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000035f, 0.000125f, 0.000071f, 0.001621f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000027f, 0.000095f, 0.000024f, 0.001115f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000001f, 0.000001f, 0.000005f, 0.000073f, 0.000228f, 0.000116f, 0.002742f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000203f, 0.000613f, 0.000980f, 0.006432f, 0.002635f, 0.011668f, 0.019017f, 0.062202f, 0.363820f, 1.015071f, 1.047150f, 10.898956f, 0.000149f, 0.000260f, 0.000633f, 0.004201f, +0.000006f, 0.000036f, 0.000042f, 0.000277f, 0.000111f, 0.000964f, 0.001159f, 0.003797f, 0.018426f, 0.100647f, 0.076565f, 0.798435f, 0.000007f, 0.000023f, 0.000041f, 0.000272f, +0.000001f, 0.000005f, 0.000003f, 0.000036f, 0.000007f, 0.000065f, 0.000035f, 0.000233f, 0.002964f, 0.016262f, 0.005469f, 0.117369f, 0.000000f, 0.000001f, 0.000001f, 0.000014f, +0.000003f, 0.000014f, 0.000015f, 0.000104f, 0.000027f, 0.000206f, 0.000224f, 0.000754f, 0.004376f, 0.021147f, 0.014563f, 0.155981f, 0.000002f, 0.000005f, 0.000009f, 0.000061f, +0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000037f, 0.000038f, 0.000073f, 0.001699f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000007f, 0.000010f, 0.000231f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000003f, 0.000002f, 0.000082f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000007f, 0.000009f, 0.000209f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.007918f, 0.008499f, 0.012931f, 0.099554f, 0.007679f, 0.012108f, 0.018783f, 0.072097f, 2.341719f, 2.326990f, 2.284726f, 27.905854f, 0.006550f, 0.004056f, 0.009417f, 0.073303f, +0.000549f, 0.001153f, 0.001294f, 0.009982f, 0.000754f, 0.002329f, 0.002664f, 0.010245f, 0.276111f, 0.537149f, 0.388911f, 4.759316f, 0.000682f, 0.000826f, 0.001415f, 0.011036f, +0.000144f, 0.000305f, 0.000151f, 0.002400f, 0.000093f, 0.000288f, 0.000146f, 0.001152f, 0.081409f, 0.159062f, 0.050914f, 1.282231f, 0.000071f, 0.000087f, 0.000066f, 0.001052f, +0.000441f, 0.000820f, 0.000833f, 0.006596f, 0.000322f, 0.000879f, 0.000910f, 0.003595f, 0.115849f, 0.199418f, 0.130704f, 1.642823f, 0.000328f, 0.000352f, 0.000545f, 0.004366f, +0.000001f, 0.000001f, 0.000001f, 0.000020f, 0.000000f, 0.000001f, 0.000001f, 0.000010f, 0.001221f, 0.001890f, 0.000829f, 0.039580f, 0.000001f, 0.000001f, 0.000001f, 0.000023f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000030f, 0.000091f, 0.000029f, 0.001403f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000024f, 0.000072f, 0.000010f, 0.001019f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000029f, 0.000079f, 0.000023f, 0.001131f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000307f, 0.000792f, 0.000725f, 0.009921f, 0.000328f, 0.001244f, 0.001160f, 0.007917f, 0.358437f, 0.856582f, 0.505839f, 10.982575f, 0.000187f, 0.000279f, 0.000389f, 0.005386f, +0.000013f, 0.000065f, 0.000044f, 0.000597f, 0.000019f, 0.000144f, 0.000099f, 0.000676f, 0.025379f, 0.118737f, 0.051706f, 1.124783f, 0.000012f, 0.000034f, 0.000035f, 0.000487f, +0.000002f, 0.000010f, 0.000003f, 0.000083f, 0.000001f, 0.000010f, 0.000003f, 0.000044f, 0.004312f, 0.020260f, 0.003900f, 0.174613f, 0.000001f, 0.000002f, 0.000001f, 0.000027f, +0.000003f, 0.000012f, 0.000008f, 0.000106f, 0.000002f, 0.000015f, 0.000009f, 0.000064f, 0.002873f, 0.011893f, 0.004688f, 0.104746f, 0.000002f, 0.000004f, 0.000004f, 0.000052f, +0.000005f, 0.000005f, 0.000009f, 0.000261f, 0.000000f, 0.000000f, 0.000001f, 0.000011f, 0.003610f, 0.003211f, 0.003517f, 0.171156f, 0.000002f, 0.000001f, 0.000003f, 0.000096f, +0.000000f, 0.000001f, 0.000001f, 0.000029f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000474f, 0.000825f, 0.000666f, 0.032493f, 0.000000f, 0.000000f, 0.000001f, 0.000016f, +0.000000f, 0.000000f, 0.000000f, 0.000010f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000195f, 0.000340f, 0.000121f, 0.012190f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +0.000000f, 0.000001f, 0.000001f, 0.000024f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000248f, 0.000383f, 0.000280f, 0.014006f, 0.000000f, 0.000000f, 0.000000f, 0.000008f, +0.000487f, 0.000314f, 0.000776f, 0.007221f, 0.030493f, 0.028932f, 0.072757f, 0.337718f, 3.584317f, 2.143061f, 3.411170f, 50.383549f, 0.013263f, 0.004941f, 0.018600f, 0.175076f, +0.000048f, 0.000061f, 0.000110f, 0.001028f, 0.004252f, 0.007899f, 0.014648f, 0.068123f, 0.599912f, 0.702209f, 0.824235f, 12.197456f, 0.001960f, 0.001429f, 0.003967f, 0.037417f, +0.000006f, 0.000008f, 0.000006f, 0.000118f, 0.000251f, 0.000467f, 0.000383f, 0.003667f, 0.084670f, 0.099539f, 0.051652f, 1.573061f, 0.000098f, 0.000072f, 0.000088f, 0.001707f, +0.000023f, 0.000025f, 0.000042f, 0.000400f, 0.001070f, 0.001758f, 0.002951f, 0.014096f, 0.148411f, 0.153711f, 0.163327f, 2.482474f, 0.000556f, 0.000359f, 0.000901f, 0.008727f, +0.000000f, 0.000000f, 0.000000f, 0.000009f, 0.000009f, 0.000014f, 0.000016f, 0.000286f, 0.010970f, 0.010218f, 0.007267f, 0.419417f, 0.000009f, 0.000005f, 0.000009f, 0.000319f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000012f, 0.000382f, 0.000696f, 0.000365f, 0.021099f, 0.000000f, 0.000000f, 0.000000f, 0.000014f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000145f, 0.000266f, 0.000062f, 0.007338f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000006f, 0.000221f, 0.000356f, 0.000169f, 0.010034f, 0.000000f, 0.000000f, 0.000000f, 0.000008f, +0.000021f, 0.000032f, 0.000047f, 0.000784f, 0.001420f, 0.003239f, 0.004900f, 0.040428f, 0.598119f, 0.860026f, 0.823349f, 21.617223f, 0.000413f, 0.000370f, 0.000838f, 0.014025f, +0.000001f, 0.000004f, 0.000004f, 0.000067f, 0.000119f, 0.000531f, 0.000592f, 0.004897f, 0.060115f, 0.169223f, 0.119467f, 3.142650f, 0.000037f, 0.000064f, 0.000107f, 0.001800f, +0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000004f, 0.000018f, 0.000009f, 0.000152f, 0.004889f, 0.013822f, 0.004314f, 0.233538f, 0.000001f, 0.000002f, 0.000001f, 0.000047f, +0.000000f, 0.000000f, 0.000000f, 0.000007f, 0.000008f, 0.000032f, 0.000032f, 0.000273f, 0.004012f, 0.009994f, 0.006387f, 0.172558f, 0.000003f, 0.000004f, 0.000007f, 0.000113f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000382f, 0.000204f, 0.000363f, 0.021365f, 0.000000f, 0.000000f, 0.000000f, 0.000016f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000071f, 0.000075f, 0.000098f, 0.005758f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000014f, 0.000015f, 0.000009f, 0.001034f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000022f, 0.000020f, 0.000024f, 0.001463f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +1.749249f, 1.120095f, 3.425753f, 4.231986f, 1.535411f, 1.444313f, 4.503681f, 2.773909f, 44.361767f, 26.296606f, 51.900643f, 101.719130f, 1.874229f, 0.692254f, 3.231244f, 4.035813f, +0.337731f, 0.423376f, 0.954867f, 1.181854f, 0.420143f, 0.773722f, 1.779128f, 1.097904f, 14.568709f, 16.906857f, 24.606607f, 48.318620f, 0.543365f, 0.392904f, 1.352402f, 1.692389f, +0.143213f, 0.180310f, 0.179784f, 0.457941f, 0.083347f, 0.154155f, 0.156709f, 0.199016f, 6.923678f, 8.069766f, 5.192363f, 20.982843f, 0.091342f, 0.066335f, 0.100944f, 0.259963f, +0.290734f, 0.322486f, 0.658410f, 0.837001f, 0.192081f, 0.312992f, 0.651515f, 0.412943f, 6.551360f, 6.727198f, 8.863221f, 17.875694f, 0.280012f, 0.179156f, 0.558240f, 0.717503f, +0.005635f, 0.005621f, 0.007682f, 0.037083f, 0.003482f, 0.005103f, 0.007110f, 0.017112f, 0.989253f, 0.913534f, 0.805625f, 6.169743f, 0.009149f, 0.005264f, 0.010979f, 0.053583f, +0.000226f, 0.000442f, 0.000445f, 0.002152f, 0.000198f, 0.000568f, 0.000584f, 0.001407f, 0.067509f, 0.122047f, 0.079369f, 0.609004f, 0.000551f, 0.000621f, 0.000955f, 0.004669f, +0.000259f, 0.000507f, 0.000226f, 0.002249f, 0.000106f, 0.000305f, 0.000139f, 0.000688f, 0.086521f, 0.157099f, 0.045166f, 0.713208f, 0.000250f, 0.000283f, 0.000192f, 0.001934f, +0.000455f, 0.000786f, 0.000717f, 0.003561f, 0.000212f, 0.000537f, 0.000499f, 0.001237f, 0.070935f, 0.113472f, 0.066801f, 0.526448f, 0.000664f, 0.000661f, 0.000921f, 0.004625f, +0.117831f, 0.181450f, 0.333781f, 0.732961f, 0.113962f, 0.257805f, 0.483507f, 0.529368f, 11.801297f, 16.823481f, 19.970677f, 69.574990f, 0.093088f, 0.082686f, 0.232135f, 0.515386f, +0.013661f, 0.041185f, 0.055868f, 0.122918f, 0.018726f, 0.082934f, 0.114698f, 0.125819f, 2.327327f, 6.495237f, 5.685749f, 19.846364f, 0.016206f, 0.028182f, 0.058343f, 0.129783f, +0.003338f, 0.010107f, 0.006061f, 0.027444f, 0.002141f, 0.009521f, 0.005821f, 0.013142f, 0.637321f, 1.786398f, 0.691331f, 4.966104f, 0.001570f, 0.002742f, 0.002509f, 0.011487f, +0.003173f, 0.008464f, 0.010393f, 0.023486f, 0.002310f, 0.009051f, 0.011332f, 0.012767f, 0.282353f, 0.697253f, 0.552524f, 1.980857f, 0.002253f, 0.003467f, 0.006497f, 0.014844f, +0.009359f, 0.005365f, 0.018302f, 0.090082f, 0.000489f, 0.000411f, 0.001431f, 0.003511f, 0.555808f, 0.294954f, 0.649338f, 5.070484f, 0.005018f, 0.001659f, 0.008639f, 0.042993f, +0.002011f, 0.002257f, 0.005679f, 0.028003f, 0.000149f, 0.000245f, 0.000629f, 0.001547f, 0.203182f, 0.211088f, 0.342686f, 2.681072f, 0.001619f, 0.001048f, 0.004025f, 0.020068f, +0.001188f, 0.001339f, 0.001489f, 0.015109f, 0.000041f, 0.000068f, 0.000077f, 0.000390f, 0.134456f, 0.140295f, 0.100691f, 1.621211f, 0.000379f, 0.000246f, 0.000418f, 0.004292f, +0.002162f, 0.002147f, 0.004890f, 0.024766f, 0.000085f, 0.000124f, 0.000288f, 0.000727f, 0.114099f, 0.104887f, 0.154143f, 1.238636f, 0.001042f, 0.000597f, 0.002075f, 0.010625f, +0.002548f, 0.003075f, 0.007927f, 0.038363f, 0.023970f, 0.042498f, 0.111694f, 0.269507f, 1.141222f, 1.275065f, 2.121082f, 16.285615f, 0.001551f, 0.001080f, 0.004248f, 0.020785f, +0.000143f, 0.000337f, 0.000640f, 0.003104f, 0.001900f, 0.006596f, 0.012783f, 0.030903f, 0.108577f, 0.237494f, 0.291336f, 2.241162f, 0.000130f, 0.000178f, 0.000515f, 0.002525f, +0.000030f, 0.000072f, 0.000060f, 0.000601f, 0.000188f, 0.000656f, 0.000562f, 0.002797f, 0.025768f, 0.056608f, 0.030700f, 0.486017f, 0.000011f, 0.000015f, 0.000019f, 0.000194f, +0.000146f, 0.000304f, 0.000524f, 0.002607f, 0.001030f, 0.003165f, 0.005553f, 0.013788f, 0.057918f, 0.112095f, 0.124479f, 0.983519f, 0.000080f, 0.000096f, 0.000252f, 0.001270f, +0.000000f, 0.000001f, 0.000001f, 0.000016f, 0.000003f, 0.000007f, 0.000008f, 0.000080f, 0.001225f, 0.002133f, 0.001585f, 0.047561f, 0.000000f, 0.000000f, 0.000001f, 0.000013f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000024f, 0.000083f, 0.000045f, 0.001360f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000016f, 0.000053f, 0.000013f, 0.000795f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000030f, 0.000091f, 0.000045f, 0.001395f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000139f, 0.000404f, 0.000627f, 0.005393f, 0.001444f, 0.006158f, 0.009734f, 0.041749f, 0.246433f, 0.662151f, 0.662501f, 9.041985f, 0.000063f, 0.000105f, 0.000248f, 0.002155f, +0.000005f, 0.000027f, 0.000030f, 0.000262f, 0.000069f, 0.000574f, 0.000669f, 0.002875f, 0.014079f, 0.074062f, 0.054643f, 0.747220f, 0.000003f, 0.000010f, 0.000018f, 0.000157f, +0.000001f, 0.000003f, 0.000002f, 0.000029f, 0.000004f, 0.000033f, 0.000017f, 0.000150f, 0.001925f, 0.010172f, 0.003318f, 0.093371f, 0.000000f, 0.000001f, 0.000000f, 0.000007f, +0.000001f, 0.000006f, 0.000007f, 0.000059f, 0.000010f, 0.000074f, 0.000078f, 0.000346f, 0.002026f, 0.009431f, 0.006299f, 0.088467f, 0.000001f, 0.000002f, 0.000002f, 0.000021f, +0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000056f, 0.000056f, 0.000104f, 0.003185f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000006f, 0.000012f, 0.000016f, 0.000488f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000004f, 0.000002f, 0.000147f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000007f, 0.000008f, 0.000267f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.068940f, 0.107256f, 0.112069f, 0.763365f, 0.132477f, 0.302777f, 0.322545f, 1.095401f, 5.449895f, 7.849200f, 5.292483f, 57.193660f, 0.083617f, 0.075039f, 0.119660f, 0.824080f, +0.005469f, 0.016658f, 0.012835f, 0.087597f, 0.014895f, 0.066648f, 0.052356f, 0.178150f, 0.735429f, 2.073619f, 1.031048f, 11.163492f, 0.009961f, 0.017500f, 0.020579f, 0.141997f, +0.002267f, 0.006934f, 0.002362f, 0.033172f, 0.002888f, 0.012978f, 0.004507f, 0.031561f, 0.341582f, 0.967309f, 0.212633f, 4.737923f, 0.001637f, 0.002888f, 0.001501f, 0.021317f, +0.009088f, 0.024492f, 0.017084f, 0.119747f, 0.013145f, 0.052041f, 0.037008f, 0.129337f, 0.638355f, 1.592618f, 0.716853f, 7.971863f, 0.009908f, 0.015403f, 0.016397f, 0.116202f, +0.000008f, 0.000020f, 0.000009f, 0.000248f, 0.000011f, 0.000040f, 0.000019f, 0.000250f, 0.004505f, 0.010107f, 0.003045f, 0.128580f, 0.000015f, 0.000021f, 0.000015f, 0.000406f, +0.000000f, 0.000001f, 0.000000f, 0.000006f, 0.000000f, 0.000002f, 0.000001f, 0.000008f, 0.000126f, 0.000555f, 0.000123f, 0.005215f, 0.000000f, 0.000001f, 0.000001f, 0.000015f, +0.000000f, 0.000001f, 0.000000f, 0.000006f, 0.000000f, 0.000001f, 0.000000f, 0.000004f, 0.000158f, 0.000698f, 0.000069f, 0.005969f, 0.000000f, 0.000000f, 0.000000f, 0.000006f, +0.000001f, 0.000002f, 0.000001f, 0.000019f, 0.000001f, 0.000003f, 0.000001f, 0.000014f, 0.000256f, 0.000996f, 0.000200f, 0.008702f, 0.000001f, 0.000002f, 0.000001f, 0.000028f, +0.001892f, 0.007079f, 0.004449f, 0.053868f, 0.004006f, 0.022020f, 0.014109f, 0.085173f, 0.590708f, 2.046002f, 0.829744f, 15.939069f, 0.001692f, 0.003652f, 0.003503f, 0.042878f, +0.000090f, 0.000660f, 0.000306f, 0.003712f, 0.000271f, 0.002911f, 0.001375f, 0.008318f, 0.047868f, 0.324583f, 0.097069f, 1.868233f, 0.000121f, 0.000511f, 0.000362f, 0.004437f, +0.000022f, 0.000158f, 0.000032f, 0.000810f, 0.000030f, 0.000327f, 0.000068f, 0.000849f, 0.012811f, 0.087246f, 0.011535f, 0.456882f, 0.000011f, 0.000049f, 0.000015f, 0.000384f, +0.000040f, 0.000262f, 0.000110f, 0.001369f, 0.000064f, 0.000613f, 0.000262f, 0.001629f, 0.011210f, 0.067256f, 0.018208f, 0.359927f, 0.000032f, 0.000121f, 0.000078f, 0.000980f, +0.000060f, 0.000083f, 0.000097f, 0.002639f, 0.000007f, 0.000014f, 0.000017f, 0.000225f, 0.011090f, 0.014300f, 0.010755f, 0.463059f, 0.000036f, 0.000029f, 0.000052f, 0.001426f, +0.000005f, 0.000014f, 0.000012f, 0.000337f, 0.000001f, 0.000003f, 0.000003f, 0.000041f, 0.001666f, 0.004205f, 0.002332f, 0.100609f, 0.000005f, 0.000008f, 0.000010f, 0.000273f, +0.000003f, 0.000008f, 0.000003f, 0.000178f, 0.000000f, 0.000001f, 0.000000f, 0.000010f, 0.001077f, 0.002731f, 0.000670f, 0.059457f, 0.000001f, 0.000002f, 0.000001f, 0.000057f, +0.000011f, 0.000026f, 0.000021f, 0.000575f, 0.000001f, 0.000003f, 0.000003f, 0.000037f, 0.001806f, 0.004033f, 0.002025f, 0.089719f, 0.000006f, 0.000008f, 0.000010f, 0.000279f, +0.005100f, 0.004774f, 0.008087f, 0.066611f, 0.632926f, 0.870370f, 1.503144f, 6.173163f, 10.035947f, 8.696888f, 9.506641f, 124.233770f, 0.203693f, 0.109985f, 0.284334f, 2.367955f, +0.000574f, 0.001053f, 0.001315f, 0.010850f, 0.101018f, 0.271956f, 0.346347f, 1.425120f, 1.922393f, 3.261362f, 2.628921f, 34.420970f, 0.034444f, 0.036411f, 0.069412f, 0.579180f, +0.000114f, 0.000210f, 0.000116f, 0.001967f, 0.009375f, 0.025349f, 0.014272f, 0.120856f, 0.427416f, 0.728266f, 0.259528f, 6.993037f, 0.002709f, 0.002876f, 0.002424f, 0.041622f, +0.000563f, 0.000912f, 0.001032f, 0.008745f, 0.052561f, 0.125207f, 0.144348f, 0.610039f, 0.983861f, 1.476900f, 1.077701f, 14.492809f, 0.020202f, 0.018895f, 0.032609f, 0.279459f, +0.000004f, 0.000005f, 0.000004f, 0.000127f, 0.000312f, 0.000669f, 0.000516f, 0.008284f, 0.048685f, 0.065724f, 0.032101f, 1.639230f, 0.000216f, 0.000182f, 0.000210f, 0.006839f, +0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000010f, 0.000043f, 0.000025f, 0.000397f, 0.001938f, 0.005122f, 0.001845f, 0.094377f, 0.000008f, 0.000013f, 0.000011f, 0.000348f, +0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000003f, 0.000011f, 0.000003f, 0.000091f, 0.001162f, 0.003084f, 0.000491f, 0.051707f, 0.000002f, 0.000003f, 0.000001f, 0.000067f, +0.000000f, 0.000000f, 0.000000f, 0.000008f, 0.000013f, 0.000047f, 0.000024f, 0.000397f, 0.002317f, 0.005419f, 0.001767f, 0.092850f, 0.000010f, 0.000015f, 0.000012f, 0.000392f, +0.000153f, 0.000344f, 0.000350f, 0.005124f, 0.020867f, 0.069008f, 0.071681f, 0.523287f, 1.185894f, 2.471424f, 1.624853f, 37.744795f, 0.004494f, 0.005835f, 0.009073f, 0.134320f, +0.000010f, 0.000045f, 0.000034f, 0.000501f, 0.002000f, 0.012948f, 0.009918f, 0.072544f, 0.136410f, 0.556542f, 0.269824f, 6.279953f, 0.000456f, 0.001160f, 0.001330f, 0.019729f, +0.000001f, 0.000005f, 0.000002f, 0.000052f, 0.000107f, 0.000695f, 0.000236f, 0.003545f, 0.017476f, 0.071610f, 0.015349f, 0.735164f, 0.000021f, 0.000053f, 0.000027f, 0.000817f, +0.000003f, 0.000011f, 0.000007f, 0.000109f, 0.000281f, 0.001608f, 0.001115f, 0.008378f, 0.018835f, 0.067994f, 0.029842f, 0.713361f, 0.000072f, 0.000162f, 0.000169f, 0.002568f, +0.000000f, 0.000000f, 0.000000f, 0.000016f, 0.000002f, 0.000003f, 0.000005f, 0.000088f, 0.001412f, 0.001095f, 0.001336f, 0.069543f, 0.000006f, 0.000003f, 0.000009f, 0.000283f, +0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000000f, 0.000001f, 0.000001f, 0.000023f, 0.000301f, 0.000457f, 0.000411f, 0.021448f, 0.000001f, 0.000001f, 0.000002f, 0.000077f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000093f, 0.000142f, 0.000057f, 0.006067f, 0.000000f, 0.000000f, 0.000000f, 0.000008f, +0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000000f, 0.000001f, 0.000001f, 0.000012f, 0.000192f, 0.000259f, 0.000210f, 0.011277f, 0.000001f, 0.000001f, 0.000001f, 0.000046f, +10.030623f, 9.309129f, 19.552601f, 21.370745f, 17.445536f, 23.784745f, 50.933032f, 27.755622f, 67.993187f, 58.416260f, 79.177418f, 137.296140f, 15.757024f, 8.435193f, 27.039182f, 29.880098f, +2.216414f, 4.027012f, 6.237263f, 6.830342f, 5.463364f, 14.582276f, 23.027246f, 12.572636f, 25.555280f, 42.983350f, 42.961887f, 74.640287f, 5.228131f, 5.479217f, 12.951873f, 14.340162f, +1.480568f, 2.701735f, 1.849989f, 4.169210f, 1.707327f, 4.576822f, 3.195185f, 3.590186f, 19.132096f, 32.319465f, 14.281130f, 51.060985f, 1.384490f, 1.457283f, 1.522907f, 3.470015f, +3.947199f, 6.345737f, 8.897366f, 10.007329f, 5.167272f, 12.203577f, 17.445060f, 9.782852f, 23.774144f, 35.382219f, 32.013772f, 57.126176f, 5.573728f, 5.168662f, 11.060153f, 12.577405f, +0.051219f, 0.074053f, 0.069498f, 0.296819f, 0.062713f, 0.133199f, 0.127449f, 0.271389f, 2.403297f, 3.216639f, 1.948072f, 13.199778f, 0.121913f, 0.101671f, 0.145624f, 0.628817f, +0.002352f, 0.006657f, 0.004607f, 0.019713f, 0.004081f, 0.016969f, 0.011973f, 0.025545f, 0.187700f, 0.491824f, 0.219648f, 1.491154f, 0.008406f, 0.013723f, 0.014495f, 0.062710f, +0.004237f, 0.012044f, 0.003685f, 0.032450f, 0.003439f, 0.014363f, 0.004480f, 0.019672f, 0.378959f, 0.997287f, 0.196904f, 2.750962f, 0.006003f, 0.009843f, 0.004596f, 0.040922f, +0.009786f, 0.024510f, 0.015355f, 0.067487f, 0.009019f, 0.033183f, 0.021195f, 0.046445f, 0.408014f, 0.945980f, 0.382445f, 2.666685f, 0.020939f, 0.030249f, 0.028922f, 0.128517f, +0.478455f, 1.067868f, 1.349015f, 2.620974f, 0.916909f, 3.006324f, 3.872046f, 3.750789f, 12.808327f, 26.464059f, 21.573860f, 66.499051f, 0.554182f, 0.713458f, 1.375533f, 2.702029f, +0.063486f, 0.277401f, 0.258418f, 0.503038f, 0.172432f, 1.106823f, 1.051233f, 1.020267f, 2.890836f, 11.693339f, 7.029520f, 21.709316f, 0.110418f, 0.278297f, 0.395663f, 0.778714f, +0.024437f, 0.107239f, 0.044165f, 0.176928f, 0.031050f, 0.200171f, 0.084050f, 0.167876f, 1.247069f, 5.066263f, 1.346450f, 8.557502f, 0.016849f, 0.042650f, 0.026807f, 0.108578f, +0.030503f, 0.117932f, 0.099452f, 0.198839f, 0.043999f, 0.249899f, 0.214859f, 0.214179f, 0.725557f, 2.596854f, 1.413198f, 4.482619f, 0.031759f, 0.070826f, 0.091155f, 0.184263f, +0.070841f, 0.058858f, 0.137893f, 0.600492f, 0.007327f, 0.008943f, 0.021362f, 0.046381f, 1.124536f, 0.864927f, 1.307649f, 9.034351f, 0.055693f, 0.026691f, 0.095433f, 0.420183f, +0.017424f, 0.028342f, 0.048964f, 0.213637f, 0.002554f, 0.006103f, 0.010751f, 0.023386f, 0.470475f, 0.708424f, 0.789806f, 5.467127f, 0.020569f, 0.019299f, 0.050885f, 0.224470f, +0.016207f, 0.026477f, 0.020223f, 0.181581f, 0.001111f, 0.002667f, 0.002077f, 0.009299f, 0.490455f, 0.741718f, 0.365579f, 5.207827f, 0.007585f, 0.007147f, 0.008331f, 0.075634f, +0.038751f, 0.055771f, 0.087224f, 0.390877f, 0.003017f, 0.006378f, 0.010171f, 0.022724f, 0.546571f, 0.728224f, 0.734955f, 5.225260f, 0.027385f, 0.022734f, 0.054263f, 0.245856f, +0.023394f, 0.040922f, 0.072444f, 0.310194f, 0.436082f, 1.120607f, 2.022584f, 4.317926f, 2.800738f, 4.535365f, 5.181218f, 35.197017f, 0.020879f, 0.021067f, 0.056917f, 0.246405f, +0.001498f, 0.005128f, 0.006695f, 0.028722f, 0.039564f, 0.199039f, 0.264915f, 0.566640f, 0.304961f, 0.966798f, 0.814462f, 5.543422f, 0.002007f, 0.003964f, 0.007898f, 0.034259f, +0.000500f, 0.001718f, 0.000992f, 0.008755f, 0.006174f, 0.031196f, 0.018356f, 0.080803f, 0.114013f, 0.363018f, 0.135201f, 1.893749f, 0.000265f, 0.000527f, 0.000464f, 0.004140f, +0.003164f, 0.009586f, 0.011329f, 0.049917f, 0.044388f, 0.197588f, 0.238066f, 0.523008f, 0.336535f, 0.944021f, 0.719922f, 5.032701f, 0.002538f, 0.004436f, 0.008001f, 0.035643f, +0.000006f, 0.000016f, 0.000012f, 0.000207f, 0.000075f, 0.000302f, 0.000244f, 0.002033f, 0.004766f, 0.012024f, 0.006138f, 0.162929f, 0.000008f, 0.000012f, 0.000015f, 0.000250f, +0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000001f, 0.000011f, 0.000007f, 0.000055f, 0.000108f, 0.000533f, 0.000200f, 0.005332f, 0.000000f, 0.000000f, 0.000000f, 0.000007f, +0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000001f, 0.000005f, 0.000001f, 0.000021f, 0.000109f, 0.000539f, 0.000090f, 0.004912f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +0.000000f, 0.000002f, 0.000001f, 0.000016f, 0.000004f, 0.000026f, 0.000014f, 0.000120f, 0.000278f, 0.001215f, 0.000414f, 0.011312f, 0.000000f, 0.000001f, 0.000001f, 0.000018f, +0.000906f, 0.003810f, 0.004057f, 0.030881f, 0.018605f, 0.114974f, 0.124812f, 0.473648f, 0.428261f, 1.667798f, 1.145954f, 13.837949f, 0.000596f, 0.001446f, 0.002350f, 0.018087f, +0.000035f, 0.000287f, 0.000225f, 0.001717f, 0.001014f, 0.012263f, 0.009817f, 0.037325f, 0.028002f, 0.213493f, 0.108174f, 1.308758f, 0.000034f, 0.000163f, 0.000196f, 0.001510f, +0.000007f, 0.000055f, 0.000019f, 0.000302f, 0.000091f, 0.001108f, 0.000392f, 0.003067f, 0.006032f, 0.046191f, 0.010347f, 0.257626f, 0.000003f, 0.000013f, 0.000007f, 0.000105f, +0.000020f, 0.000145f, 0.000103f, 0.000805f, 0.000307f, 0.003284f, 0.002380f, 0.009295f, 0.008337f, 0.056241f, 0.025796f, 0.320558f, 0.000012f, 0.000049f, 0.000054f, 0.000424f, +0.000001f, 0.000001f, 0.000002f, 0.000034f, 0.000001f, 0.000002f, 0.000003f, 0.000028f, 0.000182f, 0.000263f, 0.000336f, 0.009086f, 0.000000f, 0.000000f, 0.000001f, 0.000014f, +0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000022f, 0.000063f, 0.000059f, 0.001593f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000011f, 0.000033f, 0.000014f, 0.000758f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000008f, 0.000000f, 0.000000f, 0.000001f, 0.000005f, 0.000030f, 0.000076f, 0.000065f, 0.001806f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, +0.006535f, 0.007605f, 0.009122f, 0.119356f, 0.009773f, 0.016707f, 0.020432f, 0.133286f, 1.965775f, 2.117584f, 1.639182f, 34.026089f, 0.006017f, 0.004038f, 0.007393f, 0.097801f, +0.000446f, 0.001015f, 0.000898f, 0.011772f, 0.000944f, 0.003161f, 0.002851f, 0.018631f, 0.227997f, 0.480825f, 0.274466f, 5.708296f, 0.000616f, 0.000809f, 0.001093f, 0.014484f, +0.000112f, 0.000256f, 0.000100f, 0.002704f, 0.000111f, 0.000373f, 0.000149f, 0.002002f, 0.064239f, 0.136062f, 0.034336f, 1.469629f, 0.000061f, 0.000081f, 0.000048f, 0.001319f, +0.000547f, 0.001103f, 0.000884f, 0.011896f, 0.000616f, 0.001824f, 0.001490f, 0.009999f, 0.146298f, 0.272997f, 0.141068f, 3.013388f, 0.000453f, 0.000527f, 0.000644f, 0.008762f, +0.000000f, 0.000001f, 0.000000f, 0.000014f, 0.000000f, 0.000001f, 0.000000f, 0.000011f, 0.000588f, 0.000987f, 0.000341f, 0.027684f, 0.000000f, 0.000000f, 0.000000f, 0.000017f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000014f, 0.000047f, 0.000012f, 0.000965f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000011f, 0.000036f, 0.000004f, 0.000670f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000021f, 0.000062f, 0.000014f, 0.001190f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000202f, 0.000565f, 0.000408f, 0.009485f, 0.000333f, 0.001368f, 0.001006f, 0.011671f, 0.239942f, 0.621596f, 0.289400f, 10.678587f, 0.000137f, 0.000221f, 0.000244f, 0.005731f, +0.000008f, 0.000045f, 0.000024f, 0.000562f, 0.000019f, 0.000155f, 0.000084f, 0.000980f, 0.016712f, 0.084756f, 0.029099f, 1.075781f, 0.000008f, 0.000027f, 0.000022f, 0.000510f, +0.000001f, 0.000007f, 0.000002f, 0.000074f, 0.000001f, 0.000011f, 0.000003f, 0.000061f, 0.002713f, 0.013820f, 0.002098f, 0.159592f, 0.000000f, 0.000002f, 0.000001f, 0.000027f, +0.000003f, 0.000013f, 0.000006f, 0.000153f, 0.000003f, 0.000024f, 0.000012f, 0.000142f, 0.002893f, 0.012983f, 0.004035f, 0.153213f, 0.000002f, 0.000005f, 0.000003f, 0.000083f, +0.000001f, 0.000001f, 0.000002f, 0.000105f, 0.000000f, 0.000000f, 0.000000f, 0.000007f, 0.001014f, 0.000978f, 0.000844f, 0.069830f, 0.000001f, 0.000000f, 0.000001f, 0.000043f, +0.000000f, 0.000000f, 0.000000f, 0.000011f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000131f, 0.000247f, 0.000157f, 0.013040f, 0.000000f, 0.000000f, 0.000000f, 0.000007f, +0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000051f, 0.000097f, 0.000027f, 0.004675f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000014f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000105f, 0.000175f, 0.000101f, 0.008596f, 0.000000f, 0.000000f, 0.000000f, 0.000005f, +0.000458f, 0.000320f, 0.000623f, 0.009860f, 0.044204f, 0.045465f, 0.090143f, 0.711106f, 3.427032f, 2.221229f, 2.787462f, 69.970954f, 0.013875f, 0.005604f, 0.016631f, 0.266049f, +0.000044f, 0.000061f, 0.000087f, 0.001380f, 0.006064f, 0.012210f, 0.017852f, 0.141098f, 0.564215f, 0.715930f, 0.662525f, 16.662633f, 0.002017f, 0.001594f, 0.003490f, 0.055930f, +0.000005f, 0.000007f, 0.000005f, 0.000152f, 0.000341f, 0.000690f, 0.000446f, 0.007259f, 0.076097f, 0.096978f, 0.039675f, 2.053523f, 0.000096f, 0.000076f, 0.000074f, 0.002438f, +0.000032f, 0.000039f, 0.000051f, 0.000822f, 0.002332f, 0.004156f, 0.005500f, 0.044649f, 0.213464f, 0.239669f, 0.200776f, 5.186346f, 0.000874f, 0.000612f, 0.001212f, 0.019950f, +0.000000f, 0.000000f, 0.000000f, 0.000007f, 0.000008f, 0.000013f, 0.000011f, 0.000345f, 0.006016f, 0.006075f, 0.003406f, 0.334120f, 0.000005f, 0.000003f, 0.000004f, 0.000278f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000014f, 0.000206f, 0.000407f, 0.000168f, 0.016534f, 0.000000f, 0.000000f, 0.000000f, 0.000012f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000075f, 0.000149f, 0.000027f, 0.005495f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000011f, 0.000182f, 0.000318f, 0.000119f, 0.012025f, 0.000000f, 0.000000f, 0.000000f, 0.000010f, +0.000015f, 0.000026f, 0.000030f, 0.000854f, 0.001641f, 0.004059f, 0.004841f, 0.067882f, 0.456029f, 0.710826f, 0.536516f, 23.939884f, 0.000345f, 0.000335f, 0.000598f, 0.016995f, +0.000001f, 0.000003f, 0.000003f, 0.000072f, 0.000135f, 0.000655f, 0.000576f, 0.008088f, 0.045085f, 0.137580f, 0.076576f, 3.423448f, 0.000030f, 0.000057f, 0.000075f, 0.002145f, +0.000000f, 0.000000f, 0.000000f, 0.000005f, 0.000004f, 0.000021f, 0.000008f, 0.000240f, 0.003504f, 0.010739f, 0.002642f, 0.243111f, 0.000001f, 0.000002f, 0.000001f, 0.000054f, +0.000000f, 0.000001f, 0.000000f, 0.000012f, 0.000014f, 0.000060f, 0.000048f, 0.000691f, 0.004602f, 0.012426f, 0.006261f, 0.287479f, 0.000004f, 0.000006f, 0.000007f, 0.000206f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000122f, 0.000071f, 0.000099f, 0.009928f, 0.000000f, 0.000000f, 0.000000f, 0.000008f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000022f, 0.000025f, 0.000026f, 0.002632f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000005f, 0.000002f, 0.000452f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000011f, 0.000011f, 0.000010f, 0.001023f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +1.953789f, 1.356214f, 3.270213f, 6.865748f, 2.644444f, 2.696611f, 6.629360f, 6.939361f, 50.392641f, 32.382087f, 50.387742f, 167.833310f, 2.329629f, 0.932776f, 3.432639f, 7.286399f, +0.371059f, 0.504249f, 0.896620f, 1.886049f, 0.711791f, 1.420978f, 2.576063f, 2.701701f, 16.278884f, 20.479235f, 23.498994f, 78.421591f, 0.664357f, 0.520766f, 1.413219f, 3.005574f, +0.150360f, 0.205219f, 0.161323f, 0.698357f, 0.134934f, 0.270545f, 0.216832f, 0.467995f, 7.392990f, 9.340955f, 4.738514f, 32.543545f, 0.106723f, 0.084020f, 0.100801f, 0.441182f, +0.488506f, 0.587398f, 0.945506f, 2.042760f, 0.497671f, 0.879100f, 1.442700f, 1.554050f, 11.195345f, 12.461984f, 12.944685f, 44.369664f, 0.523588f, 0.363155f, 0.892127f, 1.948736f, +0.003611f, 0.003904f, 0.004207f, 0.034510f, 0.003440f, 0.005465f, 0.006003f, 0.024555f, 0.644605f, 0.645295f, 0.448656f, 5.839439f, 0.006523f, 0.004069f, 0.006690f, 0.055493f, +0.000142f, 0.000302f, 0.000240f, 0.001970f, 0.000192f, 0.000598f, 0.000485f, 0.001987f, 0.043271f, 0.084802f, 0.043479f, 0.566982f, 0.000387f, 0.000472f, 0.000572f, 0.004757f, +0.000156f, 0.000331f, 0.000116f, 0.001967f, 0.000098f, 0.000307f, 0.000110f, 0.000928f, 0.052995f, 0.104311f, 0.023644f, 0.634519f, 0.000167f, 0.000205f, 0.000110f, 0.001883f, +0.000438f, 0.000821f, 0.000591f, 0.004985f, 0.000314f, 0.000865f, 0.000634f, 0.002670f, 0.069533f, 0.120578f, 0.055964f, 0.749562f, 0.000712f, 0.000769f, 0.000844f, 0.007206f, +0.104949f, 0.175196f, 0.254083f, 0.948239f, 0.156518f, 0.383833f, 0.567544f, 1.056034f, 10.690077f, 16.520164f, 15.461020f, 91.542361f, 0.092268f, 0.088846f, 0.196649f, 0.742006f, +0.011969f, 0.039116f, 0.041833f, 0.156422f, 0.025299f, 0.121458f, 0.132434f, 0.246895f, 2.073741f, 6.273920f, 4.329906f, 25.685934f, 0.015801f, 0.029786f, 0.048617f, 0.183797f, +0.002795f, 0.009173f, 0.004337f, 0.033374f, 0.002763f, 0.013325f, 0.006423f, 0.024643f, 0.542669f, 1.648927f, 0.503102f, 6.141993f, 0.001463f, 0.002769f, 0.001998f, 0.015546f, +0.004251f, 0.012293f, 0.011902f, 0.045707f, 0.004772f, 0.020272f, 0.020010f, 0.038315f, 0.384761f, 1.029998f, 0.643494f, 3.920756f, 0.003360f, 0.005604f, 0.008280f, 0.032150f, +0.003498f, 0.002174f, 0.005846f, 0.048901f, 0.000282f, 0.000257f, 0.000705f, 0.002939f, 0.211258f, 0.121532f, 0.210937f, 2.799339f, 0.002087f, 0.000748f, 0.003071f, 0.025972f, +0.000739f, 0.000900f, 0.001784f, 0.014953f, 0.000084f, 0.000151f, 0.000305f, 0.001274f, 0.075966f, 0.085555f, 0.109503f, 1.455996f, 0.000663f, 0.000465f, 0.001407f, 0.011925f, +0.000417f, 0.000510f, 0.000447f, 0.007710f, 0.000022f, 0.000040f, 0.000036f, 0.000307f, 0.048039f, 0.054338f, 0.030747f, 0.841338f, 0.000148f, 0.000104f, 0.000140f, 0.002437f, +0.001216f, 0.001309f, 0.002350f, 0.020224f, 0.000074f, 0.000116f, 0.000213f, 0.000915f, 0.065241f, 0.065014f, 0.075328f, 1.028720f, 0.000652f, 0.000405f, 0.001109f, 0.009656f, +0.002545f, 0.003330f, 0.006768f, 0.055670f, 0.036926f, 0.070973f, 0.147061f, 0.603063f, 1.159560f, 1.404437f, 1.841935f, 24.035034f, 0.001724f, 0.001301f, 0.004036f, 0.033566f, +0.000140f, 0.000359f, 0.000538f, 0.004430f, 0.002879f, 0.010835f, 0.016555f, 0.068020f, 0.108519f, 0.257317f, 0.248860f, 3.253562f, 0.000142f, 0.000210f, 0.000481f, 0.004011f, +0.000028f, 0.000073f, 0.000048f, 0.000819f, 0.000273f, 0.001030f, 0.000696f, 0.005884f, 0.024611f, 0.058610f, 0.025060f, 0.674244f, 0.000011f, 0.000017f, 0.000017f, 0.000294f, +0.000219f, 0.000496f, 0.000673f, 0.005692f, 0.002388f, 0.007951f, 0.010998f, 0.046412f, 0.088528f, 0.185739f, 0.162614f, 2.183589f, 0.000133f, 0.000174f, 0.000361f, 0.003085f, +0.000000f, 0.000000f, 0.000000f, 0.000013f, 0.000002f, 0.000007f, 0.000006f, 0.000103f, 0.000714f, 0.001348f, 0.000790f, 0.040264f, 0.000000f, 0.000000f, 0.000000f, 0.000012f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000014f, 0.000051f, 0.000022f, 0.001133f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000008f, 0.000032f, 0.000006f, 0.000633f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000026f, 0.000087f, 0.000034f, 0.001776f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000111f, 0.000349f, 0.000427f, 0.006241f, 0.001774f, 0.008200f, 0.010220f, 0.074495f, 0.199671f, 0.581595f, 0.458771f, 10.641363f, 0.000055f, 0.000101f, 0.000188f, 0.002775f, +0.000004f, 0.000023f, 0.000020f, 0.000298f, 0.000083f, 0.000752f, 0.000691f, 0.005046f, 0.011221f, 0.063989f, 0.037221f, 0.865022f, 0.000003f, 0.000010f, 0.000013f, 0.000199f, +0.000000f, 0.000003f, 0.000001f, 0.000032f, 0.000005f, 0.000041f, 0.000017f, 0.000252f, 0.001466f, 0.008398f, 0.002160f, 0.103293f, 0.000000f, 0.000000f, 0.000000f, 0.000008f, +0.000002f, 0.000008f, 0.000007f, 0.000103f, 0.000019f, 0.000149f, 0.000124f, 0.000929f, 0.002470f, 0.012461f, 0.006562f, 0.156626f, 0.000001f, 0.000002f, 0.000003f, 0.000041f, +0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000019f, 0.000021f, 0.000030f, 0.001573f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000004f, 0.000005f, 0.000237f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000001f, 0.000068f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000004f, 0.000004f, 0.000199f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.002908f, 0.005136f, 0.004755f, 0.056567f, 0.005034f, 0.013058f, 0.012327f, 0.073113f, 1.307874f, 2.137928f, 1.277357f, 24.108343f, 0.006362f, 0.006480f, 0.009156f, 0.110126f, +0.000112f, 0.000387f, 0.000264f, 0.003148f, 0.000274f, 0.001394f, 0.000970f, 0.005766f, 0.085586f, 0.273895f, 0.120675f, 2.281949f, 0.000368f, 0.000733f, 0.000764f, 0.009202f, +0.000031f, 0.000107f, 0.000032f, 0.000789f, 0.000035f, 0.000180f, 0.000055f, 0.000677f, 0.026326f, 0.084613f, 0.016481f, 0.641378f, 0.000040f, 0.000080f, 0.000037f, 0.000915f, +0.000205f, 0.000627f, 0.000388f, 0.004744f, 0.000267f, 0.001200f, 0.000756f, 0.004615f, 0.081905f, 0.231926f, 0.092502f, 1.796593f, 0.000403f, 0.000711f, 0.000671f, 0.008302f, +0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000223f, 0.000567f, 0.000151f, 0.011168f, 0.000000f, 0.000000f, 0.000000f, 0.000011f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000015f, 0.000003f, 0.000220f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000013f, 0.000001f, 0.000166f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000007f, 0.000030f, 0.000005f, 0.000404f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000050f, 0.000213f, 0.000118f, 0.002503f, 0.000095f, 0.000596f, 0.000338f, 0.003565f, 0.088897f, 0.349468f, 0.125583f, 4.213239f, 0.000081f, 0.000198f, 0.000168f, 0.003593f, +0.000001f, 0.000010f, 0.000004f, 0.000084f, 0.000003f, 0.000038f, 0.000016f, 0.000169f, 0.003493f, 0.026885f, 0.007124f, 0.239481f, 0.000003f, 0.000013f, 0.000008f, 0.000180f, +0.000000f, 0.000002f, 0.000000f, 0.000012f, 0.000000f, 0.000003f, 0.000001f, 0.000011f, 0.000619f, 0.004786f, 0.000561f, 0.038785f, 0.000000f, 0.000001f, 0.000000f, 0.000010f, +0.000001f, 0.000004f, 0.000002f, 0.000034f, 0.000001f, 0.000009f, 0.000003f, 0.000036f, 0.000902f, 0.006142f, 0.001473f, 0.050867f, 0.000001f, 0.000004f, 0.000002f, 0.000044f, +0.000000f, 0.000001f, 0.000001f, 0.000025f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000338f, 0.000495f, 0.000330f, 0.024797f, 0.000000f, 0.000000f, 0.000001f, 0.000024f, +0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000025f, 0.000071f, 0.000035f, 0.002613f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000011f, 0.000030f, 0.000007f, 0.001023f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000029f, 0.000075f, 0.000033f, 0.002569f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, +0.000175f, 0.000186f, 0.000280f, 0.004022f, 0.019598f, 0.030588f, 0.046810f, 0.335747f, 1.962550f, 1.930263f, 1.869667f, 42.672059f, 0.012628f, 0.007739f, 0.017728f, 0.257857f, +0.000010f, 0.000020f, 0.000022f, 0.000318f, 0.001517f, 0.004635f, 0.005230f, 0.037587f, 0.182302f, 0.351025f, 0.250727f, 5.733420f, 0.001036f, 0.001242f, 0.002099f, 0.030585f, +0.000001f, 0.000003f, 0.000001f, 0.000038f, 0.000093f, 0.000286f, 0.000143f, 0.002111f, 0.026842f, 0.051910f, 0.016392f, 0.771394f, 0.000054f, 0.000065f, 0.000049f, 0.001456f, +0.000010f, 0.000019f, 0.000019f, 0.000282f, 0.000870f, 0.002353f, 0.002403f, 0.017739f, 0.102865f, 0.175256f, 0.113320f, 2.661500f, 0.000670f, 0.000711f, 0.001087f, 0.016270f, +0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000002f, 0.000005f, 0.000003f, 0.000093f, 0.001962f, 0.003006f, 0.001301f, 0.116016f, 0.000003f, 0.000003f, 0.000003f, 0.000153f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000038f, 0.000114f, 0.000036f, 0.003239f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000015f, 0.000045f, 0.000006f, 0.001175f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000050f, 0.000133f, 0.000038f, 0.003513f, 0.000000f, 0.000000f, 0.000000f, 0.000005f, +0.000003f, 0.000008f, 0.000008f, 0.000194f, 0.000405f, 0.001521f, 0.001400f, 0.017848f, 0.145426f, 0.343980f, 0.200394f, 8.130085f, 0.000175f, 0.000257f, 0.000355f, 0.009172f, +0.000000f, 0.000001f, 0.000000f, 0.000009f, 0.000019f, 0.000138f, 0.000094f, 0.001200f, 0.008112f, 0.037564f, 0.016138f, 0.655965f, 0.000009f, 0.000025f, 0.000025f, 0.000653f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000005f, 0.000001f, 0.000039f, 0.000688f, 0.003201f, 0.000608f, 0.050854f, 0.000000f, 0.000001f, 0.000000f, 0.000018f, +0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000003f, 0.000019f, 0.000012f, 0.000153f, 0.001235f, 0.005060f, 0.001968f, 0.082152f, 0.000002f, 0.000004f, 0.000004f, 0.000094f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000035f, 0.000031f, 0.000033f, 0.003035f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000006f, 0.000005f, 0.000454f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000000f, 0.000085f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000004f, 0.000003f, 0.000263f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.373608f, 0.393538f, 0.732432f, 1.398136f, 0.585277f, 0.905660f, 1.718506f, 1.635570f, 14.405942f, 14.047531f, 16.871454f, 51.094719f, 1.058406f, 0.643077f, 1.826612f, 3.525344f, +0.040034f, 0.082556f, 0.113304f, 0.216700f, 0.088884f, 0.269264f, 0.376773f, 0.359278f, 2.625691f, 5.012485f, 4.439367f, 13.470311f, 0.170299f, 0.202569f, 0.424299f, 0.820465f, +0.017710f, 0.036680f, 0.022256f, 0.087597f, 0.018395f, 0.055968f, 0.034622f, 0.067943f, 1.301804f, 2.495957f, 0.977283f, 6.102581f, 0.029866f, 0.035679f, 0.033039f, 0.131479f, +0.078605f, 0.143427f, 0.178195f, 0.350041f, 0.092685f, 0.248442f, 0.314698f, 0.308215f, 2.693095f, 4.549059f, 3.647190f, 11.366410f, 0.200168f, 0.210677f, 0.399470f, 0.793379f, +0.000393f, 0.000645f, 0.000536f, 0.004001f, 0.000434f, 0.001045f, 0.000886f, 0.003295f, 0.104920f, 0.159383f, 0.085532f, 1.012181f, 0.001687f, 0.001597f, 0.002027f, 0.015287f, +0.000009f, 0.000028f, 0.000017f, 0.000129f, 0.000014f, 0.000065f, 0.000040f, 0.000150f, 0.003974f, 0.011818f, 0.004677f, 0.055450f, 0.000056f, 0.000105f, 0.000098f, 0.000739f, +0.000010f, 0.000034f, 0.000009f, 0.000140f, 0.000008f, 0.000036f, 0.000010f, 0.000077f, 0.005313f, 0.015870f, 0.002776f, 0.067746f, 0.000027f, 0.000050f, 0.000021f, 0.000319f, +0.000040f, 0.000114f, 0.000063f, 0.000486f, 0.000033f, 0.000139f, 0.000079f, 0.000302f, 0.009523f, 0.025061f, 0.008978f, 0.109329f, 0.000155f, 0.000254f, 0.000215f, 0.001670f, +0.011175f, 0.028309f, 0.031689f, 0.107529f, 0.019290f, 0.071785f, 0.081927f, 0.138604f, 1.701777f, 3.990770f, 2.882788f, 15.519116f, 0.023343f, 0.034109f, 0.058272f, 0.199914f, +0.000719f, 0.003566f, 0.002944f, 0.010008f, 0.001759f, 0.012816f, 0.010786f, 0.018283f, 0.186260f, 0.855117f, 0.455509f, 2.456882f, 0.002255f, 0.006452f, 0.008128f, 0.027939f, +0.000183f, 0.000913f, 0.000333f, 0.002331f, 0.000210f, 0.001535f, 0.000571f, 0.001992f, 0.053212f, 0.245355f, 0.057781f, 0.641365f, 0.000228f, 0.000655f, 0.000365f, 0.002580f, +0.000381f, 0.001672f, 0.001249f, 0.004361f, 0.000495f, 0.003190f, 0.002431f, 0.004232f, 0.051541f, 0.209372f, 0.100962f, 0.559312f, 0.000715f, 0.001810f, 0.002065f, 0.007289f, +0.000335f, 0.000316f, 0.000656f, 0.004991f, 0.000031f, 0.000043f, 0.000092f, 0.000347f, 0.030269f, 0.026423f, 0.035399f, 0.427129f, 0.000475f, 0.000259f, 0.000819f, 0.006298f, +0.000040f, 0.000074f, 0.000113f, 0.000861f, 0.000005f, 0.000014f, 0.000022f, 0.000085f, 0.006141f, 0.010495f, 0.010368f, 0.125345f, 0.000085f, 0.000091f, 0.000212f, 0.001632f, +0.000025f, 0.000046f, 0.000031f, 0.000485f, 0.000002f, 0.000004f, 0.000003f, 0.000022f, 0.004240f, 0.007277f, 0.003178f, 0.079072f, 0.000021f, 0.000022f, 0.000023f, 0.000364f, +0.000098f, 0.000160f, 0.000222f, 0.001737f, 0.000007f, 0.000016f, 0.000023f, 0.000091f, 0.007866f, 0.011894f, 0.010637f, 0.132081f, 0.000125f, 0.000118f, 0.000249f, 0.001970f, +0.001134f, 0.002252f, 0.003532f, 0.026416f, 0.019043f, 0.055542f, 0.088829f, 0.331201f, 0.772409f, 1.419636f, 1.437082f, 17.049910f, 0.001825f, 0.002091f, 0.005005f, 0.037841f, +0.000035f, 0.000137f, 0.000158f, 0.001186f, 0.000838f, 0.004784f, 0.005642f, 0.021077f, 0.040786f, 0.146753f, 0.109549f, 1.302209f, 0.000085f, 0.000191f, 0.000337f, 0.002551f, +0.000008f, 0.000030f, 0.000016f, 0.000239f, 0.000087f, 0.000497f, 0.000259f, 0.001990f, 0.010098f, 0.036492f, 0.012043f, 0.294609f, 0.000007f, 0.000017f, 0.000013f, 0.000204f, +0.000082f, 0.000282f, 0.000295f, 0.002273f, 0.001036f, 0.005236f, 0.005590f, 0.021448f, 0.049622f, 0.157985f, 0.106759f, 1.303430f, 0.000119f, 0.000235f, 0.000376f, 0.002927f, +0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000001f, 0.000003f, 0.000002f, 0.000032f, 0.000271f, 0.000776f, 0.000351f, 0.016263f, 0.000000f, 0.000000f, 0.000000f, 0.000008f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000017f, 0.000006f, 0.000258f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000011f, 0.000002f, 0.000157f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000008f, 0.000042f, 0.000013f, 0.000604f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000028f, 0.000131f, 0.000124f, 0.001649f, 0.000509f, 0.003574f, 0.003437f, 0.022783f, 0.074066f, 0.327372f, 0.199320f, 4.203605f, 0.000033f, 0.000090f, 0.000130f, 0.001742f, +0.000001f, 0.000005f, 0.000003f, 0.000044f, 0.000013f, 0.000185f, 0.000131f, 0.000871f, 0.002349f, 0.020322f, 0.009124f, 0.192795f, 0.000001f, 0.000005f, 0.000005f, 0.000071f, +0.000000f, 0.000001f, 0.000000f, 0.000005f, 0.000001f, 0.000011f, 0.000003f, 0.000047f, 0.000335f, 0.002912f, 0.000578f, 0.025133f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, +0.000000f, 0.000003f, 0.000002f, 0.000023f, 0.000004f, 0.000055f, 0.000035f, 0.000239f, 0.000771f, 0.005902f, 0.002399f, 0.052063f, 0.000000f, 0.000002f, 0.000002f, 0.000022f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000006f, 0.000010f, 0.000012f, 0.000559f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000048f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000015f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000001f, 0.000059f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.001626f, 0.002765f, 0.002483f, 0.038729f, 0.002252f, 0.005627f, 0.005151f, 0.040066f, 0.723305f, 1.138668f, 0.659831f, 16.330100f, 0.002175f, 0.002133f, 0.002924f, 0.046113f, +0.000071f, 0.000235f, 0.000156f, 0.002431f, 0.000139f, 0.000678f, 0.000457f, 0.003565f, 0.053394f, 0.164557f, 0.070318f, 1.743644f, 0.000142f, 0.000272f, 0.000275f, 0.004347f, +0.000016f, 0.000055f, 0.000016f, 0.000518f, 0.000015f, 0.000074f, 0.000022f, 0.000355f, 0.013961f, 0.043214f, 0.008164f, 0.416597f, 0.000013f, 0.000025f, 0.000011f, 0.000367f, +0.000078f, 0.000231f, 0.000138f, 0.002221f, 0.000082f, 0.000353f, 0.000216f, 0.001729f, 0.030967f, 0.084447f, 0.032667f, 0.831958f, 0.000094f, 0.000160f, 0.000146f, 0.002377f, +0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000137f, 0.000336f, 0.000087f, 0.008410f, 0.000000f, 0.000000f, 0.000000f, 0.000005f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000010f, 0.000002f, 0.000187f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000007f, 0.000001f, 0.000120f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000012f, 0.000002f, 0.000208f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000035f, 0.000142f, 0.000077f, 0.002125f, 0.000053f, 0.000318f, 0.000175f, 0.002422f, 0.060949f, 0.230747f, 0.080422f, 3.538043f, 0.000034f, 0.000081f, 0.000067f, 0.001865f, +0.000001f, 0.000007f, 0.000003f, 0.000080f, 0.000002f, 0.000023f, 0.000009f, 0.000129f, 0.002702f, 0.020025f, 0.005147f, 0.226855f, 0.000001f, 0.000006f, 0.000004f, 0.000106f, +0.000000f, 0.000001f, 0.000000f, 0.000010f, 0.000000f, 0.000001f, 0.000000f, 0.000007f, 0.000407f, 0.003030f, 0.000344f, 0.031231f, 0.000000f, 0.000000f, 0.000000f, 0.000005f, +0.000000f, 0.000002f, 0.000001f, 0.000020f, 0.000000f, 0.000003f, 0.000001f, 0.000017f, 0.000423f, 0.002772f, 0.000645f, 0.029202f, 0.000000f, 0.000001f, 0.000001f, 0.000016f, +0.000001f, 0.000001f, 0.000001f, 0.000048f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000524f, 0.000738f, 0.000477f, 0.047041f, 0.000000f, 0.000000f, 0.000000f, 0.000028f, +0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000043f, 0.000119f, 0.000057f, 0.005591f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000016f, 0.000043f, 0.000009f, 0.001860f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000031f, 0.000076f, 0.000033f, 0.003331f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +0.000206f, 0.000211f, 0.000306f, 0.005782f, 0.018410f, 0.027672f, 0.041072f, 0.386294f, 2.278767f, 2.158460f, 2.027722f, 60.686092f, 0.009064f, 0.005350f, 0.011886f, 0.226690f, +0.000013f, 0.000025f, 0.000027f, 0.000515f, 0.001607f, 0.004730f, 0.005177f, 0.048784f, 0.238782f, 0.442788f, 0.306744f, 9.197915f, 0.000838f, 0.000969f, 0.001587f, 0.030331f, +0.000001f, 0.000003f, 0.000001f, 0.000053f, 0.000084f, 0.000248f, 0.000120f, 0.002329f, 0.029887f, 0.055662f, 0.017047f, 1.051967f, 0.000037f, 0.000043f, 0.000031f, 0.001227f, +0.000008f, 0.000015f, 0.000014f, 0.000277f, 0.000559f, 0.001455f, 0.001442f, 0.013953f, 0.081654f, 0.133977f, 0.084020f, 2.587630f, 0.000329f, 0.000336f, 0.000498f, 0.009779f, +0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000002f, 0.000005f, 0.000003f, 0.000119f, 0.002532f, 0.003737f, 0.001568f, 0.183426f, 0.000002f, 0.000002f, 0.000002f, 0.000150f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000055f, 0.000159f, 0.000049f, 0.005777f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000019f, 0.000054f, 0.000007f, 0.001782f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000044f, 0.000113f, 0.000032f, 0.003798f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, +0.000005f, 0.000012f, 0.000010f, 0.000346f, 0.000472f, 0.001706f, 0.001523f, 0.025457f, 0.209337f, 0.476855f, 0.269435f, 14.333949f, 0.000155f, 0.000221f, 0.000295f, 0.009997f, +0.000000f, 0.000001f, 0.000001f, 0.000019f, 0.000025f, 0.000175f, 0.000115f, 0.001931f, 0.013172f, 0.058743f, 0.024476f, 1.304613f, 0.000009f, 0.000024f, 0.000024f, 0.000803f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000005f, 0.000002f, 0.000053f, 0.000950f, 0.004255f, 0.000784f, 0.085976f, 0.000000f, 0.000001f, 0.000000f, 0.000019f, +0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000002f, 0.000015f, 0.000009f, 0.000149f, 0.001215f, 0.004795f, 0.001809f, 0.099019f, 0.000001f, 0.000002f, 0.000002f, 0.000070f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000114f, 0.000097f, 0.000101f, 0.012086f, 0.000000f, 0.000000f, 0.000000f, 0.000010f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000013f, 0.000022f, 0.000017f, 0.002039f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000004f, 0.000001f, 0.000325f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000006f, 0.000008f, 0.000006f, 0.000716f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.501838f, 0.509076f, 0.918924f, 2.300186f, 0.629247f, 0.937721f, 1.725742f, 2.153751f, 19.144367f, 17.978261f, 20.941934f, 83.165222f, 0.869481f, 0.508767f, 1.401584f, 3.547118f, +0.060660f, 0.120469f, 0.160356f, 0.402163f, 0.107799f, 0.314497f, 0.426810f, 0.533688f, 3.936163f, 7.236538f, 6.216064f, 24.732814f, 0.157815f, 0.180784f, 0.367261f, 0.931246f, +0.022811f, 0.045499f, 0.026775f, 0.138192f, 0.018964f, 0.055568f, 0.033340f, 0.085792f, 1.658919f, 3.063128f, 1.163229f, 9.524881f, 0.023527f, 0.027068f, 0.024310f, 0.126856f, +0.072181f, 0.126840f, 0.152840f, 0.393697f, 0.068124f, 0.175858f, 0.216048f, 0.277466f, 2.446701f, 3.980150f, 3.094943f, 12.647923f, 0.112417f, 0.113947f, 0.209550f, 0.545739f, +0.000587f, 0.000928f, 0.000748f, 0.007318f, 0.000518f, 0.001203f, 0.000989f, 0.004824f, 0.155008f, 0.226771f, 0.118030f, 1.831558f, 0.001541f, 0.001405f, 0.001729f, 0.017100f, +0.000015f, 0.000046f, 0.000027f, 0.000266f, 0.000018f, 0.000084f, 0.000051f, 0.000248f, 0.006623f, 0.018968f, 0.007280f, 0.113186f, 0.000058f, 0.000104f, 0.000094f, 0.000933f, +0.000015f, 0.000046f, 0.000012f, 0.000246f, 0.000009f, 0.000040f, 0.000011f, 0.000108f, 0.007527f, 0.021652f, 0.003674f, 0.117551f, 0.000023f, 0.000042f, 0.000017f, 0.000343f, +0.000041f, 0.000112f, 0.000060f, 0.000608f, 0.000027f, 0.000110f, 0.000060f, 0.000302f, 0.009619f, 0.024376f, 0.008469f, 0.135247f, 0.000097f, 0.000153f, 0.000126f, 0.001277f, +0.018610f, 0.045399f, 0.049289f, 0.219313f, 0.025711f, 0.092145f, 0.101994f, 0.226270f, 2.803673f, 6.331834f, 4.436110f, 31.315380f, 0.023774f, 0.033454f, 0.055431f, 0.249369f, +0.001351f, 0.006451f, 0.005165f, 0.023026f, 0.002645f, 0.018558f, 0.015148f, 0.033669f, 0.346159f, 1.530482f, 0.790709f, 5.592492f, 0.002591f, 0.007139f, 0.008722f, 0.039314f, +0.000293f, 0.001404f, 0.000497f, 0.004559f, 0.000268f, 0.001889f, 0.000682f, 0.003119f, 0.084064f, 0.373291f, 0.085261f, 1.241013f, 0.000223f, 0.000616f, 0.000333f, 0.003086f, +0.000434f, 0.001833f, 0.001328f, 0.006081f, 0.000451f, 0.002800f, 0.002069f, 0.004723f, 0.058051f, 0.227102f, 0.106213f, 0.771569f, 0.000498f, 0.001214f, 0.001343f, 0.006216f, +0.001261f, 0.001145f, 0.002306f, 0.022996f, 0.000094f, 0.000125f, 0.000258f, 0.001281f, 0.112654f, 0.094709f, 0.123056f, 1.947050f, 0.001093f, 0.000573f, 0.001760f, 0.017747f, +0.000170f, 0.000302f, 0.000448f, 0.004475f, 0.000018f, 0.000047f, 0.000071f, 0.000353f, 0.025783f, 0.042435f, 0.040658f, 0.644550f, 0.000221f, 0.000227f, 0.000513f, 0.005186f, +0.000089f, 0.000159f, 0.000104f, 0.002141f, 0.000004f, 0.000012f, 0.000008f, 0.000079f, 0.015131f, 0.025011f, 0.010594f, 0.345640f, 0.000046f, 0.000047f, 0.000047f, 0.000984f, +0.000252f, 0.000397f, 0.000533f, 0.005471f, 0.000014f, 0.000033f, 0.000045f, 0.000229f, 0.020013f, 0.029146f, 0.025280f, 0.411613f, 0.000197f, 0.000178f, 0.000366f, 0.003796f, +0.001095f, 0.002094f, 0.003185f, 0.031234f, 0.014715f, 0.041332f, 0.064112f, 0.313453f, 0.737738f, 1.305810f, 1.282038f, 19.945409f, 0.001078f, 0.001189f, 0.002760f, 0.027365f, +0.000038f, 0.000144f, 0.000161f, 0.001582f, 0.000730f, 0.004016f, 0.004594f, 0.022502f, 0.043943f, 0.152272f, 0.110244f, 1.718429f, 0.000057f, 0.000122f, 0.000210f, 0.002081f, +0.000007f, 0.000027f, 0.000013f, 0.000271f, 0.000064f, 0.000354f, 0.000179f, 0.001806f, 0.009249f, 0.032187f, 0.010302f, 0.330481f, 0.000004f, 0.000009f, 0.000007f, 0.000142f, +0.000054f, 0.000179f, 0.000182f, 0.001837f, 0.000547f, 0.002664f, 0.002758f, 0.013877f, 0.032401f, 0.099346f, 0.065111f, 1.042410f, 0.000048f, 0.000091f, 0.000142f, 0.001447f, +0.000000f, 0.000000f, 0.000000f, 0.000005f, 0.000001f, 0.000003f, 0.000002f, 0.000034f, 0.000288f, 0.000793f, 0.000348f, 0.021150f, 0.000000f, 0.000000f, 0.000000f, 0.000006f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000004f, 0.000019f, 0.000006f, 0.000379f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000011f, 0.000002f, 0.000196f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000006f, 0.000029f, 0.000009f, 0.000537f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000033f, 0.000152f, 0.000139f, 0.002417f, 0.000488f, 0.003297f, 0.003076f, 0.026731f, 0.087699f, 0.373311f, 0.220443f, 6.096321f, 0.000024f, 0.000063f, 0.000089f, 0.001562f, +0.000001f, 0.000006f, 0.000004f, 0.000074f, 0.000015f, 0.000192f, 0.000132f, 0.001152f, 0.003137f, 0.026141f, 0.011383f, 0.315408f, 0.000001f, 0.000004f, 0.000004f, 0.000071f, +0.000000f, 0.000001f, 0.000000f, 0.000007f, 0.000001f, 0.000010f, 0.000003f, 0.000053f, 0.000380f, 0.003184f, 0.000613f, 0.034952f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, +0.000000f, 0.000002f, 0.000001f, 0.000023f, 0.000003f, 0.000034f, 0.000021f, 0.000192f, 0.000624f, 0.004601f, 0.001814f, 0.051618f, 0.000000f, 0.000001f, 0.000001f, 0.000013f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000017f, 0.000027f, 0.000030f, 0.001832f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000004f, 0.000003f, 0.000176f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000047f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000003f, 0.000002f, 0.000133f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.022480f, 0.034207f, 0.035268f, 0.176851f, 0.029065f, 0.064972f, 0.068297f, 0.170749f, 2.280677f, 3.212719f, 2.137542f, 17.005023f, 0.030674f, 0.026923f, 0.042364f, 0.214781f, +0.001166f, 0.003473f, 0.002641f, 0.013268f, 0.002137f, 0.009350f, 0.007248f, 0.018155f, 0.201205f, 0.554880f, 0.272243f, 2.169964f, 0.002389f, 0.004105f, 0.004763f, 0.024195f, +0.000594f, 0.001777f, 0.000597f, 0.006176f, 0.000509f, 0.002238f, 0.000767f, 0.003954f, 0.114881f, 0.318194f, 0.069018f, 1.132130f, 0.000482f, 0.000833f, 0.000427f, 0.004465f, +0.001903f, 0.005017f, 0.003453f, 0.017818f, 0.001852f, 0.007172f, 0.005033f, 0.012949f, 0.171575f, 0.418673f, 0.185952f, 1.522318f, 0.002334f, 0.003549f, 0.003728f, 0.019452f, +0.000003f, 0.000007f, 0.000003f, 0.000062f, 0.000003f, 0.000009f, 0.000004f, 0.000042f, 0.002024f, 0.004442f, 0.001320f, 0.041048f, 0.000006f, 0.000008f, 0.000006f, 0.000113f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000037f, 0.000159f, 0.000035f, 0.001088f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000057f, 0.000247f, 0.000024f, 0.001531f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000074f, 0.000281f, 0.000056f, 0.001784f, 0.000000f, 0.000001f, 0.000000f, 0.000005f, +0.000715f, 0.002615f, 0.001621f, 0.014453f, 0.001018f, 0.005472f, 0.003460f, 0.015376f, 0.286290f, 0.969863f, 0.388111f, 5.488444f, 0.000719f, 0.001517f, 0.001436f, 0.012942f, +0.000022f, 0.000159f, 0.000073f, 0.000651f, 0.000045f, 0.000473f, 0.000220f, 0.000982f, 0.015167f, 0.100589f, 0.029683f, 0.420572f, 0.000034f, 0.000139f, 0.000097f, 0.000876f, +0.000007f, 0.000047f, 0.000010f, 0.000175f, 0.000006f, 0.000065f, 0.000013f, 0.000123f, 0.004990f, 0.033238f, 0.004336f, 0.126436f, 0.000004f, 0.000016f, 0.000005f, 0.000093f, +0.000010f, 0.000062f, 0.000026f, 0.000236f, 0.000011f, 0.000098f, 0.000041f, 0.000189f, 0.003489f, 0.020476f, 0.005470f, 0.079601f, 0.000009f, 0.000032f, 0.000020f, 0.000190f, +0.000017f, 0.000023f, 0.000026f, 0.000518f, 0.000001f, 0.000003f, 0.000003f, 0.000030f, 0.003934f, 0.004962f, 0.003682f, 0.116715f, 0.000011f, 0.000009f, 0.000016f, 0.000315f, +0.000001f, 0.000003f, 0.000002f, 0.000043f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000386f, 0.000954f, 0.000522f, 0.016579f, 0.000001f, 0.000002f, 0.000002f, 0.000040f, +0.000001f, 0.000002f, 0.000001f, 0.000028f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000307f, 0.000762f, 0.000184f, 0.012044f, 0.000000f, 0.000000f, 0.000000f, 0.000010f, +0.000002f, 0.000005f, 0.000004f, 0.000073f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000411f, 0.000899f, 0.000445f, 0.014524f, 0.000001f, 0.000002f, 0.000002f, 0.000040f, +0.001771f, 0.001621f, 0.002710f, 0.016430f, 0.147843f, 0.198849f, 0.338866f, 1.024494f, 4.471477f, 3.789906f, 4.087888f, 39.326566f, 0.079555f, 0.042014f, 0.107176f, 0.657077f, +0.000130f, 0.000234f, 0.000288f, 0.001750f, 0.015427f, 0.040620f, 0.051046f, 0.154624f, 0.559961f, 0.929151f, 0.739048f, 7.123484f, 0.008795f, 0.009093f, 0.017105f, 0.105070f, +0.000032f, 0.000057f, 0.000031f, 0.000390f, 0.001760f, 0.004654f, 0.002586f, 0.016119f, 0.153046f, 0.255055f, 0.089688f, 1.779062f, 0.000850f, 0.000883f, 0.000734f, 0.009282f, +0.000125f, 0.000199f, 0.000222f, 0.001385f, 0.007886f, 0.018372f, 0.020900f, 0.065024f, 0.281541f, 0.413363f, 0.297637f, 2.946556f, 0.005067f, 0.004636f, 0.007894f, 0.049805f, +0.000001f, 0.000002f, 0.000001f, 0.000034f, 0.000078f, 0.000164f, 0.000125f, 0.001476f, 0.023290f, 0.030752f, 0.014821f, 0.557150f, 0.000091f, 0.000075f, 0.000085f, 0.002038f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000007f, 0.000004f, 0.000046f, 0.000606f, 0.001567f, 0.000557f, 0.020971f, 0.000002f, 0.000003f, 0.000003f, 0.000068f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000001f, 0.000013f, 0.000447f, 0.001160f, 0.000182f, 0.014124f, 0.000001f, 0.000001f, 0.000000f, 0.000016f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000007f, 0.000004f, 0.000045f, 0.000712f, 0.001629f, 0.000524f, 0.020269f, 0.000003f, 0.000004f, 0.000003f, 0.000075f, +0.000061f, 0.000135f, 0.000136f, 0.001464f, 0.005645f, 0.018259f, 0.018715f, 0.100577f, 0.611921f, 1.247292f, 0.809176f, 13.837591f, 0.002033f, 0.002582f, 0.003961f, 0.043166f, +0.000003f, 0.000012f, 0.000009f, 0.000094f, 0.000354f, 0.002240f, 0.001693f, 0.009116f, 0.046017f, 0.183629f, 0.087848f, 1.505159f, 0.000135f, 0.000336f, 0.000380f, 0.004145f, +0.000000f, 0.000002f, 0.000001f, 0.000012f, 0.000023f, 0.000148f, 0.000049f, 0.000548f, 0.007247f, 0.029045f, 0.006143f, 0.216604f, 0.000008f, 0.000019f, 0.000009f, 0.000211f, +0.000001f, 0.000003f, 0.000002f, 0.000020f, 0.000049f, 0.000273f, 0.000187f, 0.001034f, 0.006242f, 0.022040f, 0.009545f, 0.167969f, 0.000021f, 0.000046f, 0.000047f, 0.000530f, +0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000000f, 0.000001f, 0.000001f, 0.000012f, 0.000533f, 0.000405f, 0.000487f, 0.018662f, 0.000002f, 0.000001f, 0.000003f, 0.000067f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000074f, 0.000110f, 0.000098f, 0.003763f, 0.000000f, 0.000000f, 0.000000f, 0.000012f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000028f, 0.000042f, 0.000017f, 0.001309f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000047f, 0.000061f, 0.000049f, 0.001944f, 0.000000f, 0.000000f, 0.000000f, 0.000007f, +4.128192f, 3.747249f, 7.766324f, 6.248925f, 4.830865f, 6.441848f, 13.611917f, 5.460661f, 35.912903f, 30.177995f, 40.361351f, 51.522498f, 7.295512f, 3.819866f, 12.082431f, 9.829178f, +0.596356f, 1.059765f, 1.619676f, 1.305722f, 0.989062f, 2.582025f, 4.023321f, 1.617124f, 8.824472f, 14.517097f, 14.317613f, 18.311965f, 1.582527f, 1.622163f, 3.783695f, 3.083985f, +0.489710f, 0.874028f, 0.590553f, 0.979757f, 0.379958f, 0.996220f, 0.686270f, 0.567662f, 8.121320f, 13.418374f, 5.850676f, 15.399504f, 0.515170f, 0.530366f, 0.546907f, 0.917372f, +1.043367f, 1.640594f, 2.269806f, 1.879403f, 0.919005f, 2.122831f, 2.994391f, 1.236163f, 8.065029f, 11.739719f, 10.481350f, 13.768606f, 1.657461f, 1.503306f, 3.174226f, 2.657310f, +0.022633f, 0.032006f, 0.029639f, 0.093189f, 0.018646f, 0.038735f, 0.036571f, 0.057329f, 1.362944f, 1.784203f, 1.066240f, 5.318517f, 0.060606f, 0.049435f, 0.069868f, 0.222098f, +0.000679f, 0.001881f, 0.001284f, 0.004046f, 0.000793f, 0.003226f, 0.002246f, 0.003528f, 0.069592f, 0.178351f, 0.078596f, 0.392798f, 0.002732f, 0.004362f, 0.004547f, 0.014480f, +0.001505f, 0.004183f, 0.001263f, 0.008188f, 0.000822f, 0.003357f, 0.001033f, 0.003340f, 0.172719f, 0.444571f, 0.086613f, 0.890815f, 0.002398f, 0.003846f, 0.001772f, 0.011616f, +0.002778f, 0.006804f, 0.004206f, 0.013608f, 0.001722f, 0.006198f, 0.003906f, 0.006301f, 0.148615f, 0.337008f, 0.134442f, 0.690100f, 0.006685f, 0.009446f, 0.008912f, 0.029154f, +0.228050f, 0.497826f, 0.620561f, 0.887575f, 0.294051f, 0.942985f, 1.198443f, 0.854621f, 7.834913f, 15.833245f, 12.736472f, 28.900865f, 0.297160f, 0.374178f, 0.711850f, 1.029395f, +0.019783f, 0.084546f, 0.077716f, 0.111370f, 0.036152f, 0.226971f, 0.212715f, 0.151980f, 1.156081f, 4.573778f, 2.713125f, 6.168290f, 0.038708f, 0.095420f, 0.133865f, 0.193951f, +0.009361f, 0.040178f, 0.016328f, 0.048153f, 0.008003f, 0.050460f, 0.020907f, 0.030741f, 0.613072f, 2.436016f, 0.638837f, 2.988970f, 0.007261f, 0.017977f, 0.011149f, 0.033244f, +0.009338f, 0.035311f, 0.029383f, 0.043247f, 0.009063f, 0.050344f, 0.042712f, 0.031343f, 0.285056f, 0.997877f, 0.535846f, 1.251249f, 0.010937f, 0.023857f, 0.030298f, 0.045087f, +0.024716f, 0.020085f, 0.046432f, 0.148851f, 0.001720f, 0.002053f, 0.004840f, 0.007736f, 0.503521f, 0.378787f, 0.565086f, 2.874054f, 0.021860f, 0.010246f, 0.036151f, 0.117174f, +0.003974f, 0.006323f, 0.010779f, 0.034621f, 0.000392f, 0.000916f, 0.001592f, 0.002550f, 0.137722f, 0.202830f, 0.223135f, 1.137051f, 0.005278f, 0.004844f, 0.012602f, 0.040924f, +0.004544f, 0.007261f, 0.005472f, 0.036174f, 0.000210f, 0.000492f, 0.000378f, 0.001246f, 0.176491f, 0.261056f, 0.126965f, 1.331476f, 0.002393f, 0.002205f, 0.002536f, 0.016951f, +0.008683f, 0.012223f, 0.018863f, 0.062230f, 0.000455f, 0.000941f, 0.001480f, 0.002434f, 0.157184f, 0.204832f, 0.203986f, 1.067633f, 0.006903f, 0.005605f, 0.013202f, 0.044034f, +0.004790f, 0.008195f, 0.014315f, 0.045123f, 0.060075f, 0.150990f, 0.268912f, 0.422623f, 0.735938f, 1.165608f, 1.313953f, 6.570953f, 0.004809f, 0.004746f, 0.012653f, 0.040324f, +0.000200f, 0.000671f, 0.000865f, 0.002732f, 0.003563f, 0.017533f, 0.023027f, 0.036258f, 0.052389f, 0.162442f, 0.135034f, 0.676586f, 0.000302f, 0.000584f, 0.001148f, 0.003665f, +0.000082f, 0.000277f, 0.000157f, 0.001024f, 0.000684f, 0.003378f, 0.001961f, 0.006356f, 0.024077f, 0.074980f, 0.027555f, 0.284134f, 0.000049f, 0.000095f, 0.000083f, 0.000544f, +0.000416f, 0.001233f, 0.001438f, 0.004664f, 0.003927f, 0.017099f, 0.020329f, 0.032878f, 0.056796f, 0.155825f, 0.117260f, 0.603448f, 0.000375f, 0.000642f, 0.001142f, 0.003746f, +0.000001f, 0.000003f, 0.000003f, 0.000032f, 0.000011f, 0.000044f, 0.000035f, 0.000214f, 0.001345f, 0.003318f, 0.001671f, 0.032659f, 0.000002f, 0.000003f, 0.000004f, 0.000044f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000004f, 0.000020f, 0.000096f, 0.000036f, 0.000699f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000002f, 0.000025f, 0.000120f, 0.000020f, 0.000791f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000000f, 0.000002f, 0.000001f, 0.000008f, 0.000050f, 0.000215f, 0.000072f, 0.001456f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +0.000215f, 0.000884f, 0.000928f, 0.005202f, 0.002968f, 0.017941f, 0.019218f, 0.053690f, 0.130327f, 0.496410f, 0.336567f, 2.991927f, 0.000159f, 0.000377f, 0.000605f, 0.003428f, +0.000005f, 0.000043f, 0.000034f, 0.000189f, 0.000106f, 0.001251f, 0.000988f, 0.002766f, 0.005571f, 0.041543f, 0.020771f, 0.184996f, 0.000006f, 0.000028f, 0.000033f, 0.000187f, +0.000001f, 0.000010f, 0.000004f, 0.000041f, 0.000012f, 0.000139f, 0.000049f, 0.000279f, 0.001475f, 0.011049f, 0.002442f, 0.044766f, 0.000001f, 0.000003f, 0.000001f, 0.000016f, +0.000003f, 0.000022f, 0.000015f, 0.000087f, 0.000031f, 0.000329f, 0.000235f, 0.000677f, 0.001629f, 0.010751f, 0.004866f, 0.044515f, 0.000002f, 0.000008f, 0.000009f, 0.000052f, +0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000040f, 0.000057f, 0.000072f, 0.001438f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000009f, 0.000008f, 0.000165f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000006f, 0.000002f, 0.000096f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000011f, 0.000009f, 0.000184f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.007529f, 0.008569f, 0.010143f, 0.097693f, 0.007575f, 0.012666f, 0.015285f, 0.073403f, 2.906381f, 3.062181f, 2.338973f, 35.742443f, 0.007798f, 0.005119f, 0.009247f, 0.090056f, +0.000336f, 0.000748f, 0.000653f, 0.006299f, 0.000479f, 0.001567f, 0.001394f, 0.006708f, 0.220379f, 0.454569f, 0.256041f, 3.920142f, 0.000522f, 0.000671f, 0.000894f, 0.008719f, +0.000104f, 0.000232f, 0.000090f, 0.001779f, 0.000069f, 0.000227f, 0.000089f, 0.000886f, 0.076330f, 0.158127f, 0.039376f, 1.240678f, 0.000064f, 0.000083f, 0.000049f, 0.000976f, +0.000405f, 0.000799f, 0.000631f, 0.006254f, 0.000307f, 0.000888f, 0.000716f, 0.003537f, 0.138923f, 0.253550f, 0.129283f, 2.033028f, 0.000377f, 0.000429f, 0.000517f, 0.005182f, +0.000000f, 0.000001f, 0.000000f, 0.000012f, 0.000000f, 0.000001f, 0.000000f, 0.000007f, 0.000933f, 0.001532f, 0.000523f, 0.031223f, 0.000001f, 0.000001f, 0.000000f, 0.000017f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000015f, 0.000047f, 0.000012f, 0.000712f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000014f, 0.000044f, 0.000005f, 0.000607f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000022f, 0.000062f, 0.000014f, 0.000862f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000269f, 0.000738f, 0.000525f, 0.008991f, 0.000299f, 0.001201f, 0.000872f, 0.007444f, 0.410848f, 1.041011f, 0.478248f, 12.991014f, 0.000206f, 0.000325f, 0.000353f, 0.006111f, +0.000007f, 0.000039f, 0.000020f, 0.000348f, 0.000011f, 0.000089f, 0.000048f, 0.000408f, 0.018707f, 0.092798f, 0.031438f, 0.855611f, 0.000008f, 0.000026f, 0.000020f, 0.000355f, +0.000001f, 0.000007f, 0.000002f, 0.000057f, 0.000001f, 0.000007f, 0.000002f, 0.000031f, 0.003734f, 0.018601f, 0.002786f, 0.156034f, 0.000001f, 0.000002f, 0.000001f, 0.000023f, +0.000002f, 0.000011f, 0.000005f, 0.000093f, 0.000002f, 0.000014f, 0.000007f, 0.000058f, 0.003182f, 0.013965f, 0.004283f, 0.119713f, 0.000002f, 0.000004f, 0.000003f, 0.000057f, +0.000001f, 0.000001f, 0.000002f, 0.000073f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.001271f, 0.001199f, 0.001021f, 0.062183f, 0.000001f, 0.000000f, 0.000001f, 0.000033f, +0.000000f, 0.000000f, 0.000000f, 0.000005f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000107f, 0.000198f, 0.000124f, 0.007592f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, +0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000052f, 0.000096f, 0.000027f, 0.003346f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000006f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000084f, 0.000138f, 0.000078f, 0.004917f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, +0.000561f, 0.000384f, 0.000738f, 0.008592f, 0.036480f, 0.036698f, 0.071796f, 0.416945f, 5.394532f, 3.419800f, 4.234710f, 78.254084f, 0.019146f, 0.007563f, 0.022148f, 0.260824f, +0.000036f, 0.000048f, 0.000067f, 0.000786f, 0.003272f, 0.006443f, 0.009296f, 0.054086f, 0.580635f, 0.720611f, 0.658021f, 12.183050f, 0.001819f, 0.001407f, 0.003038f, 0.035847f, +0.000005f, 0.000007f, 0.000004f, 0.000106f, 0.000226f, 0.000448f, 0.000286f, 0.003420f, 0.096268f, 0.119994f, 0.048441f, 1.845729f, 0.000107f, 0.000083f, 0.000079f, 0.001921f, +0.000025f, 0.000030f, 0.000038f, 0.000460f, 0.001236f, 0.002154f, 0.002814f, 0.016814f, 0.215812f, 0.236993f, 0.195903f, 3.725349f, 0.000775f, 0.000530f, 0.001037f, 0.012561f, +0.000000f, 0.000000f, 0.000000f, 0.000006f, 0.000007f, 0.000011f, 0.000010f, 0.000217f, 0.010169f, 0.010042f, 0.005556f, 0.401216f, 0.000008f, 0.000005f, 0.000006f, 0.000293f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000006f, 0.000227f, 0.000440f, 0.000179f, 0.012980f, 0.000000f, 0.000000f, 0.000000f, 0.000008f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000102f, 0.000197f, 0.000036f, 0.005303f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000198f, 0.000338f, 0.000125f, 0.009274f, 0.000000f, 0.000000f, 0.000000f, 0.000007f, +0.000022f, 0.000036f, 0.000042f, 0.000862f, 0.001569f, 0.003795f, 0.004465f, 0.046095f, 0.831352f, 1.267440f, 0.943962f, 31.007593f, 0.000551f, 0.000523f, 0.000922f, 0.019296f, +0.000001f, 0.000003f, 0.000002f, 0.000047f, 0.000084f, 0.000400f, 0.000347f, 0.003591f, 0.053734f, 0.160377f, 0.088082f, 2.898897f, 0.000031f, 0.000058f, 0.000076f, 0.001593f, +0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000003f, 0.000016f, 0.000006f, 0.000131f, 0.005133f, 0.015388f, 0.003736f, 0.253064f, 0.000001f, 0.000002f, 0.000001f, 0.000049f, +0.000000f, 0.000000f, 0.000000f, 0.000007f, 0.000009f, 0.000036f, 0.000028f, 0.000301f, 0.005388f, 0.014230f, 0.007075f, 0.239149f, 0.000004f, 0.000006f, 0.000007f, 0.000151f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000163f, 0.000093f, 0.000128f, 0.009413f, 0.000000f, 0.000000f, 0.000000f, 0.000007f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000020f, 0.000022f, 0.000022f, 0.001631f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000005f, 0.000005f, 0.000002f, 0.000344f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000009f, 0.000009f, 0.000008f, 0.000623f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +2.840877f, 1.928744f, 4.589127f, 7.092776f, 2.587129f, 2.580316f, 6.259421f, 4.823436f, 94.036158f, 59.102306f, 90.746952f, 222.515210f, 3.810756f, 1.492358f, 5.419157f, 8.468197f, +0.352729f, 0.468829f, 0.822593f, 1.273809f, 0.455259f, 0.888926f, 1.590165f, 1.227715f, 19.859839f, 24.436346f, 27.668112f, 67.973572f, 0.710475f, 0.544706f, 1.458599f, 2.283643f, +0.175706f, 0.234554f, 0.181941f, 0.579809f, 0.106093f, 0.208053f, 0.164538f, 0.261431f, 11.087327f, 13.701550f, 6.858488f, 34.675709f, 0.140301f, 0.108033f, 0.127893f, 0.412074f, +0.456206f, 0.536531f, 0.852186f, 1.355382f, 0.312710f, 0.540268f, 0.874892f, 0.693774f, 13.417803f, 14.608405f, 14.973204f, 37.781915f, 0.550085f, 0.373167f, 0.904579f, 1.454612f, +0.005637f, 0.005962f, 0.006338f, 0.038279f, 0.003614f, 0.005615f, 0.006086f, 0.018326f, 1.291538f, 1.264570f, 0.867573f, 8.312621f, 0.011457f, 0.006989f, 0.011341f, 0.069247f, +0.000145f, 0.000301f, 0.000236f, 0.001429f, 0.000132f, 0.000402f, 0.000321f, 0.000969f, 0.056680f, 0.108647f, 0.054966f, 0.527666f, 0.000444f, 0.000530f, 0.000634f, 0.003880f, +0.000195f, 0.000406f, 0.000141f, 0.001754f, 0.000083f, 0.000254f, 0.000090f, 0.000557f, 0.085335f, 0.164284f, 0.036744f, 0.725923f, 0.000236f, 0.000284f, 0.000150f, 0.001888f, +0.000440f, 0.000805f, 0.000571f, 0.003552f, 0.000212f, 0.000571f, 0.000413f, 0.001280f, 0.089479f, 0.151764f, 0.069505f, 0.685315f, 0.000803f, 0.000849f, 0.000919f, 0.005775f, +0.176730f, 0.288554f, 0.412939f, 1.134496f, 0.177339f, 0.425357f, 0.620610f, 0.850103f, 23.102855f, 34.919711f, 32.247964f, 140.559580f, 0.174796f, 0.164623f, 0.359545f, 0.998717f, +0.013177f, 0.042119f, 0.044448f, 0.122351f, 0.018740f, 0.087996f, 0.094677f, 0.129936f, 2.929962f, 8.669982f, 5.904259f, 25.784375f, 0.019570f, 0.036082f, 0.058113f, 0.161732f, +0.003782f, 0.012142f, 0.005665f, 0.032090f, 0.002516f, 0.011867f, 0.005645f, 0.015943f, 0.942537f, 2.801152f, 0.843333f, 7.579262f, 0.002227f, 0.004124f, 0.002936f, 0.016816f, +0.004598f, 0.013004f, 0.012423f, 0.035123f, 0.003473f, 0.014429f, 0.014053f, 0.019809f, 0.534062f, 1.398327f, 0.862034f, 3.866555f, 0.004088f, 0.006669f, 0.009723f, 0.027793f, +0.004311f, 0.002620f, 0.006955f, 0.042825f, 0.000233f, 0.000208f, 0.000564f, 0.001732f, 0.334196f, 0.188039f, 0.322047f, 3.146271f, 0.002894f, 0.001015f, 0.004110f, 0.025588f, +0.000596f, 0.000709f, 0.001388f, 0.008561f, 0.000046f, 0.000080f, 0.000160f, 0.000491f, 0.078565f, 0.086542f, 0.109299f, 1.069852f, 0.000601f, 0.000412f, 0.001231f, 0.007681f, +0.000413f, 0.000494f, 0.000427f, 0.005426f, 0.000015f, 0.000026f, 0.000023f, 0.000146f, 0.061075f, 0.067568f, 0.037726f, 0.759959f, 0.000165f, 0.000114f, 0.000150f, 0.001930f, +0.000962f, 0.001013f, 0.001795f, 0.011376f, 0.000039f, 0.000061f, 0.000110f, 0.000346f, 0.066286f, 0.064607f, 0.073865f, 0.742599f, 0.000581f, 0.000353f, 0.000954f, 0.006110f, +0.001841f, 0.002356f, 0.004725f, 0.028611f, 0.017972f, 0.033786f, 0.069079f, 0.208537f, 1.076477f, 1.275221f, 1.650310f, 15.852941f, 0.001403f, 0.001036f, 0.003170f, 0.019407f, +0.000066f, 0.000166f, 0.000245f, 0.001489f, 0.000916f, 0.003372f, 0.005084f, 0.015377f, 0.065863f, 0.152748f, 0.145770f, 1.402967f, 0.000076f, 0.000110f, 0.000247f, 0.001516f, +0.000016f, 0.000041f, 0.000027f, 0.000338f, 0.000107f, 0.000394f, 0.000263f, 0.001635f, 0.018362f, 0.042770f, 0.018045f, 0.357406f, 0.000007f, 0.000011f, 0.000011f, 0.000137f, +0.000102f, 0.000225f, 0.000302f, 0.001879f, 0.000747f, 0.002431f, 0.003318f, 0.010308f, 0.052785f, 0.108318f, 0.093576f, 0.925022f, 0.000070f, 0.000089f, 0.000182f, 0.001146f, +0.000000f, 0.000000f, 0.000000f, 0.000007f, 0.000001f, 0.000004f, 0.000003f, 0.000038f, 0.000712f, 0.001314f, 0.000760f, 0.028515f, 0.000000f, 0.000000f, 0.000000f, 0.000008f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000009f, 0.000033f, 0.000014f, 0.000524f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000007f, 0.000025f, 0.000005f, 0.000360f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000017f, 0.000054f, 0.000021f, 0.000808f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000093f, 0.000286f, 0.000345f, 0.003715f, 0.001000f, 0.004521f, 0.005560f, 0.029834f, 0.214676f, 0.611590f, 0.476041f, 8.128668f, 0.000052f, 0.000093f, 0.000171f, 0.001858f, +0.000002f, 0.000012f, 0.000011f, 0.000116f, 0.000031f, 0.000271f, 0.000246f, 0.001321f, 0.007887f, 0.043991f, 0.025250f, 0.431989f, 0.000002f, 0.000006f, 0.000008f, 0.000087f, +0.000000f, 0.000002f, 0.000001f, 0.000015f, 0.000002f, 0.000018f, 0.000007f, 0.000081f, 0.001267f, 0.007098f, 0.001801f, 0.063412f, 0.000000f, 0.000000f, 0.000000f, 0.000005f, +0.000001f, 0.000004f, 0.000004f, 0.000040f, 0.000007f, 0.000053f, 0.000043f, 0.000239f, 0.001705f, 0.008416f, 0.004373f, 0.076843f, 0.000000f, 0.000001f, 0.000002f, 0.000018f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000015f, 0.000016f, 0.000023f, 0.000879f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000002f, 0.000087f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000031f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000002f, 0.000071f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.003794f, 0.006552f, 0.005986f, 0.052423f, 0.004418f, 0.011209f, 0.010441f, 0.045589f, 2.189400f, 3.500450f, 2.063719f, 28.673458f, 0.009335f, 0.009300f, 0.012967f, 0.114815f, +0.000095f, 0.000323f, 0.000217f, 0.001907f, 0.000157f, 0.000782f, 0.000537f, 0.002351f, 0.093667f, 0.293182f, 0.127462f, 1.774360f, 0.000353f, 0.000688f, 0.000707f, 0.006272f, +0.000032f, 0.000109f, 0.000033f, 0.000588f, 0.000025f, 0.000124f, 0.000038f, 0.000339f, 0.035417f, 0.111340f, 0.021400f, 0.613064f, 0.000047f, 0.000092f, 0.000042f, 0.000767f, +0.000172f, 0.000514f, 0.000313f, 0.002824f, 0.000151f, 0.000662f, 0.000411f, 0.001848f, 0.088062f, 0.243892f, 0.095986f, 1.372393f, 0.000380f, 0.000656f, 0.000610f, 0.005559f, +0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000400f, 0.000997f, 0.000263f, 0.014261f, 0.000000f, 0.000001f, 0.000000f, 0.000013f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000017f, 0.000003f, 0.000183f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000018f, 0.000002f, 0.000171f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000008f, 0.000034f, 0.000006f, 0.000331f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000076f, 0.000314f, 0.000173f, 0.002687f, 0.000097f, 0.000592f, 0.000332f, 0.002574f, 0.172346f, 0.662667f, 0.234977f, 5.803444f, 0.000137f, 0.000329f, 0.000276f, 0.004339f, +0.000001f, 0.000009f, 0.000004f, 0.000059f, 0.000002f, 0.000025f, 0.000010f, 0.000080f, 0.004428f, 0.033329f, 0.008715f, 0.215657f, 0.000003f, 0.000015f, 0.000009f, 0.000142f, +0.000000f, 0.000002f, 0.000000f, 0.000010f, 0.000000f, 0.000002f, 0.000000f, 0.000007f, 0.000965f, 0.007293f, 0.000843f, 0.042935f, 0.000000f, 0.000001f, 0.000000f, 0.000010f, +0.000001f, 0.000004f, 0.000001f, 0.000023f, 0.000001f, 0.000006f, 0.000002f, 0.000017f, 0.001123f, 0.007480f, 0.001771f, 0.045001f, 0.000001f, 0.000004f, 0.000002f, 0.000034f, +0.000000f, 0.000001f, 0.000001f, 0.000020f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000480f, 0.000687f, 0.000452f, 0.025002f, 0.000000f, 0.000000f, 0.000001f, 0.000021f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000023f, 0.000064f, 0.000031f, 0.001722f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000012f, 0.000034f, 0.000007f, 0.000829f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000027f, 0.000067f, 0.000029f, 0.001663f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000243f, 0.000253f, 0.000375f, 0.003969f, 0.018312f, 0.027955f, 0.042213f, 0.222893f, 3.497816f, 3.364839f, 3.216027f, 54.034762f, 0.019729f, 0.011826f, 0.026731f, 0.286223f, +0.000009f, 0.000018f, 0.000019f, 0.000205f, 0.000927f, 0.002769f, 0.003084f, 0.016314f, 0.212417f, 0.400045f, 0.281955f, 4.746422f, 0.001058f, 0.001241f, 0.002069f, 0.022195f, +0.000001f, 0.000003f, 0.000001f, 0.000030f, 0.000070f, 0.000210f, 0.000103f, 0.001126f, 0.038448f, 0.072724f, 0.022660f, 0.785028f, 0.000068f, 0.000080f, 0.000059f, 0.001298f, +0.000009f, 0.000017f, 0.000016f, 0.000179f, 0.000522f, 0.001381f, 0.001392f, 0.007564f, 0.117749f, 0.196217f, 0.125192f, 2.164574f, 0.000672f, 0.000698f, 0.001053f, 0.011599f, +0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000002f, 0.000005f, 0.000003f, 0.000066f, 0.003754f, 0.005626f, 0.002403f, 0.157737f, 0.000005f, 0.000004f, 0.000004f, 0.000183f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000047f, 0.000139f, 0.000044f, 0.002879f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000023f, 0.000068f, 0.000009f, 0.001284f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000061f, 0.000159f, 0.000045f, 0.003068f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, +0.000005f, 0.000013f, 0.000012f, 0.000222f, 0.000438f, 0.001610f, 0.001462f, 0.013722f, 0.300176f, 0.694445f, 0.399206f, 11.922890f, 0.000316f, 0.000456f, 0.000620f, 0.011791f, +0.000000f, 0.000001f, 0.000000f, 0.000007f, 0.000013f, 0.000096f, 0.000064f, 0.000603f, 0.010947f, 0.049579f, 0.021017f, 0.628913f, 0.000010f, 0.000029f, 0.000029f, 0.000549f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000004f, 0.000001f, 0.000024f, 0.001142f, 0.005193f, 0.000973f, 0.059937f, 0.000000f, 0.000001f, 0.000000f, 0.000019f, +0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000002f, 0.000013f, 0.000008f, 0.000075f, 0.001637f, 0.006561f, 0.002518f, 0.077379f, 0.000002f, 0.000004f, 0.000004f, 0.000077f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000053f, 0.000046f, 0.000049f, 0.003258f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000006f, 0.000005f, 0.000319f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000001f, 0.000073f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000004f, 0.000003f, 0.000181f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.615080f, 0.633686f, 1.163756f, 1.635378f, 0.648314f, 0.981207f, 1.837190f, 1.287202f, 30.437582f, 29.029524f, 34.403300f, 76.700467f, 1.960275f, 1.164929f, 3.265054f, 4.638952f, +0.043089f, 0.086908f, 0.117696f, 0.165711f, 0.064368f, 0.190720f, 0.263334f, 0.184855f, 3.626895f, 6.771986f, 5.918226f, 13.219732f, 0.206205f, 0.239901f, 0.495837f, 0.705832f, +0.023432f, 0.047467f, 0.028419f, 0.082345f, 0.016376f, 0.048732f, 0.029747f, 0.042973f, 2.210513f, 4.145302f, 1.601575f, 7.362316f, 0.044455f, 0.051944f, 0.047463f, 0.139045f, +0.083115f, 0.148331f, 0.181847f, 0.262968f, 0.065940f, 0.172877f, 0.216080f, 0.155793f, 3.654567f, 6.037786f, 4.776636f, 10.958761f, 0.238109f, 0.245115f, 0.458611f, 0.670525f, +0.000695f, 0.001115f, 0.000915f, 0.005025f, 0.000516f, 0.001216f, 0.001017f, 0.002785f, 0.238019f, 0.353645f, 0.187268f, 1.631419f, 0.003355f, 0.003106f, 0.003890f, 0.021598f, +0.000010f, 0.000032f, 0.000019f, 0.000106f, 0.000011f, 0.000049f, 0.000030f, 0.000083f, 0.005894f, 0.017143f, 0.006694f, 0.058429f, 0.000073f, 0.000133f, 0.000123f, 0.000683f, +0.000015f, 0.000047f, 0.000013f, 0.000142f, 0.000007f, 0.000034f, 0.000009f, 0.000052f, 0.009687f, 0.028299f, 0.004885f, 0.087754f, 0.000043f, 0.000078f, 0.000032f, 0.000363f, +0.000046f, 0.000127f, 0.000069f, 0.000392f, 0.000025f, 0.000104f, 0.000058f, 0.000164f, 0.013876f, 0.035714f, 0.012624f, 0.113177f, 0.000198f, 0.000317f, 0.000265f, 0.001516f, +0.021308f, 0.052793f, 0.058313f, 0.145664f, 0.024747f, 0.090072f, 0.101435f, 0.126331f, 4.164166f, 9.551099f, 6.807964f, 26.980241f, 0.050071f, 0.071559f, 0.120631f, 0.304662f, +0.000896f, 0.004348f, 0.003541f, 0.008863f, 0.001475f, 0.010513f, 0.008731f, 0.010895f, 0.297967f, 1.337966f, 0.703274f, 2.792455f, 0.003163f, 0.008849f, 0.011001f, 0.027837f, +0.000281f, 0.001368f, 0.000493f, 0.002538f, 0.000216f, 0.001548f, 0.000568f, 0.001459f, 0.104643f, 0.471922f, 0.109664f, 0.896114f, 0.000393f, 0.001104f, 0.000607f, 0.003160f, +0.000466f, 0.002002f, 0.001476f, 0.003795f, 0.000408f, 0.002571f, 0.001933f, 0.002477f, 0.081002f, 0.321833f, 0.153137f, 0.624524f, 0.000985f, 0.002439f, 0.002745f, 0.007134f, +0.000468f, 0.000431f, 0.000884f, 0.004949f, 0.000029f, 0.000040f, 0.000083f, 0.000232f, 0.054215f, 0.046290f, 0.061192f, 0.543551f, 0.000746f, 0.000397f, 0.001241f, 0.007026f, +0.000036f, 0.000066f, 0.000100f, 0.000558f, 0.000003f, 0.000009f, 0.000013f, 0.000037f, 0.007191f, 0.012020f, 0.011717f, 0.104283f, 0.000087f, 0.000091f, 0.000210f, 0.001190f, +0.000028f, 0.000050f, 0.000033f, 0.000386f, 0.000001f, 0.000003f, 0.000002f, 0.000012f, 0.006103f, 0.010246f, 0.004415f, 0.080870f, 0.000026f, 0.000027f, 0.000028f, 0.000326f, +0.000088f, 0.000140f, 0.000192f, 0.001106f, 0.000004f, 0.000010f, 0.000014f, 0.000039f, 0.009049f, 0.013383f, 0.011810f, 0.107954f, 0.000126f, 0.000116f, 0.000242f, 0.001412f, +0.000929f, 0.001804f, 0.002792f, 0.015371f, 0.010494f, 0.029936f, 0.047244f, 0.129674f, 0.811895f, 1.459488f, 1.457850f, 12.732915f, 0.001682f, 0.001884f, 0.004451f, 0.024773f, +0.000019f, 0.000072f, 0.000082f, 0.000451f, 0.000302f, 0.001686f, 0.001962f, 0.005395f, 0.028027f, 0.098636f, 0.072654f, 0.635784f, 0.000051f, 0.000112f, 0.000196f, 0.001092f, +0.000005f, 0.000020f, 0.000010f, 0.000112f, 0.000038f, 0.000215f, 0.000111f, 0.000626f, 0.008530f, 0.030151f, 0.009819f, 0.176820f, 0.000006f, 0.000012f, 0.000009f, 0.000107f, +0.000043f, 0.000145f, 0.000150f, 0.000849f, 0.000367f, 0.001813f, 0.001910f, 0.005394f, 0.033500f, 0.104317f, 0.069559f, 0.625187f, 0.000070f, 0.000136f, 0.000215f, 0.001231f, +0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000000f, 0.000002f, 0.000001f, 0.000014f, 0.000306f, 0.000856f, 0.000382f, 0.013040f, 0.000000f, 0.000000f, 0.000000f, 0.000006f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000012f, 0.000004f, 0.000135f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000010f, 0.000001f, 0.000101f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000006f, 0.000030f, 0.000009f, 0.000311f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000026f, 0.000122f, 0.000114f, 0.001111f, 0.000325f, 0.002231f, 0.002117f, 0.010331f, 0.090163f, 0.389783f, 0.234174f, 3.635670f, 0.000035f, 0.000094f, 0.000133f, 0.001321f, +0.000000f, 0.000003f, 0.000002f, 0.000020f, 0.000006f, 0.000075f, 0.000053f, 0.000258f, 0.001869f, 0.015819f, 0.007008f, 0.109014f, 0.000001f, 0.000003f, 0.000004f, 0.000035f, +0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000000f, 0.000006f, 0.000002f, 0.000017f, 0.000328f, 0.002786f, 0.000546f, 0.017470f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +0.000000f, 0.000002f, 0.000001f, 0.000010f, 0.000002f, 0.000022f, 0.000014f, 0.000070f, 0.000603f, 0.004514f, 0.001810f, 0.028921f, 0.000000f, 0.000001f, 0.000001f, 0.000011f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000006f, 0.000009f, 0.000010f, 0.000354f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000020f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000008f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000024f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.001531f, 0.002547f, 0.002257f, 0.025915f, 0.001427f, 0.003487f, 0.003150f, 0.018039f, 0.874254f, 1.346123f, 0.769712f, 14.023580f, 0.002304f, 0.002211f, 0.002990f, 0.034713f, +0.000043f, 0.000141f, 0.000092f, 0.001064f, 0.000057f, 0.000275f, 0.000183f, 0.001049f, 0.042192f, 0.127183f, 0.053627f, 0.978928f, 0.000098f, 0.000184f, 0.000184f, 0.002139f, +0.000012f, 0.000041f, 0.000012f, 0.000279f, 0.000008f, 0.000037f, 0.000011f, 0.000129f, 0.013562f, 0.041057f, 0.007654f, 0.287518f, 0.000011f, 0.000021f, 0.000009f, 0.000222f, +0.000047f, 0.000137f, 0.000081f, 0.000954f, 0.000033f, 0.000141f, 0.000085f, 0.000500f, 0.024040f, 0.064119f, 0.024475f, 0.458868f, 0.000064f, 0.000107f, 0.000096f, 0.001149f, +0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000178f, 0.000426f, 0.000109f, 0.007754f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000008f, 0.000002f, 0.000112f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000007f, 0.000001f, 0.000089f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000010f, 0.000002f, 0.000123f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000038f, 0.000151f, 0.000081f, 0.001647f, 0.000039f, 0.000228f, 0.000124f, 0.001263f, 0.085318f, 0.315923f, 0.108650f, 3.518762f, 0.000042f, 0.000097f, 0.000079f, 0.001626f, +0.000001f, 0.000005f, 0.000002f, 0.000041f, 0.000001f, 0.000011f, 0.000004f, 0.000044f, 0.002473f, 0.017924f, 0.004546f, 0.147502f, 0.000001f, 0.000005f, 0.000003f, 0.000060f, +0.000000f, 0.000001f, 0.000000f, 0.000006f, 0.000000f, 0.000001f, 0.000000f, 0.000003f, 0.000458f, 0.003334f, 0.000374f, 0.024963f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, +0.000000f, 0.000001f, 0.000000f, 0.000010f, 0.000000f, 0.000001f, 0.000001f, 0.000006f, 0.000380f, 0.002438f, 0.000560f, 0.018653f, 0.000000f, 0.000001f, 0.000000f, 0.000009f, +0.000000f, 0.000001f, 0.000001f, 0.000027f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000537f, 0.000740f, 0.000472f, 0.034245f, 0.000000f, 0.000000f, 0.000000f, 0.000018f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000029f, 0.000078f, 0.000037f, 0.002661f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000013f, 0.000035f, 0.000007f, 0.001088f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000021f, 0.000049f, 0.000021f, 0.001558f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000206f, 0.000206f, 0.000297f, 0.004119f, 0.012421f, 0.018260f, 0.026743f, 0.185166f, 2.932469f, 2.716746f, 2.518379f, 55.485052f, 0.010225f, 0.005902f, 0.012940f, 0.181684f, +0.000008f, 0.000016f, 0.000017f, 0.000240f, 0.000709f, 0.002041f, 0.002204f, 0.015288f, 0.200889f, 0.364354f, 0.249064f, 5.497933f, 0.000618f, 0.000699f, 0.001130f, 0.015893f, +0.000001f, 0.000002f, 0.000001f, 0.000030f, 0.000046f, 0.000132f, 0.000063f, 0.000897f, 0.030909f, 0.056304f, 0.017015f, 0.772980f, 0.000034f, 0.000038f, 0.000027f, 0.000790f, +0.000005f, 0.000009f, 0.000009f, 0.000127f, 0.000242f, 0.000617f, 0.000603f, 0.004296f, 0.067488f, 0.108306f, 0.067021f, 1.519516f, 0.000238f, 0.000238f, 0.000348f, 0.005034f, +0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000002f, 0.000003f, 0.000002f, 0.000061f, 0.003499f, 0.005050f, 0.002092f, 0.180066f, 0.000003f, 0.000002f, 0.000002f, 0.000129f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000050f, 0.000141f, 0.000043f, 0.003708f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000021f, 0.000059f, 0.000008f, 0.001406f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000039f, 0.000098f, 0.000027f, 0.002394f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, +0.000006f, 0.000013f, 0.000012f, 0.000285f, 0.000369f, 0.001304f, 0.001148f, 0.014132f, 0.311987f, 0.695101f, 0.387547f, 15.177824f, 0.000203f, 0.000282f, 0.000372f, 0.009279f, +0.000000f, 0.000001f, 0.000000f, 0.000010f, 0.000013f, 0.000087f, 0.000057f, 0.000701f, 0.012834f, 0.055981f, 0.023016f, 0.903126f, 0.000007f, 0.000020f, 0.000019f, 0.000487f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000003f, 0.000001f, 0.000024f, 0.001138f, 0.004985f, 0.000906f, 0.073165f, 0.000000f, 0.000001f, 0.000000f, 0.000014f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000007f, 0.000004f, 0.000053f, 0.001163f, 0.004489f, 0.001671f, 0.067341f, 0.000001f, 0.000002f, 0.000002f, 0.000042f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000124f, 0.000103f, 0.000107f, 0.009368f, 0.000000f, 0.000000f, 0.000000f, 0.000007f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000009f, 0.000015f, 0.000012f, 0.001033f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000003f, 0.000001f, 0.000202f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000004f, 0.000006f, 0.000004f, 0.000357f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.596535f, 0.591870f, 1.054219f, 1.942625f, 0.503271f, 0.733544f, 1.332097f, 1.223855f, 29.205627f, 26.825310f, 30.833405f, 90.140684f, 1.162738f, 0.665446f, 1.808924f, 3.370164f, +0.047141f, 0.091567f, 0.120271f, 0.222050f, 0.056366f, 0.160839f, 0.215386f, 0.198265f, 3.925741f, 7.059129f, 5.983335f, 17.525716f, 0.137973f, 0.154588f, 0.309884f, 0.578446f, +0.021792f, 0.042513f, 0.024687f, 0.093797f, 0.012190f, 0.034935f, 0.020682f, 0.039180f, 2.033900f, 3.673173f, 1.376414f, 8.296937f, 0.025285f, 0.028453f, 0.025215f, 0.096865f, +0.055108f, 0.094714f, 0.112617f, 0.213552f, 0.034994f, 0.088355f, 0.107109f, 0.101265f, 2.397302f, 3.814282f, 2.926669f, 8.804693f, 0.096554f, 0.095722f, 0.173702f, 0.333025f, +0.000749f, 0.001158f, 0.000922f, 0.006636f, 0.000445f, 0.001010f, 0.000820f, 0.002943f, 0.253901f, 0.363304f, 0.186587f, 2.131500f, 0.002213f, 0.001973f, 0.002396f, 0.017444f, +0.000012f, 0.000037f, 0.000022f, 0.000158f, 0.000010f, 0.000046f, 0.000028f, 0.000099f, 0.007092f, 0.019866f, 0.007524f, 0.086115f, 0.000055f, 0.000095f, 0.000085f, 0.000622f, +0.000015f, 0.000047f, 0.000012f, 0.000180f, 0.000006f, 0.000027f, 0.000007f, 0.000053f, 0.009909f, 0.027877f, 0.004668f, 0.109943f, 0.000027f, 0.000047f, 0.000019f, 0.000281f, +0.000034f, 0.000090f, 0.000048f, 0.000354f, 0.000015f, 0.000059f, 0.000032f, 0.000118f, 0.010119f, 0.025082f, 0.008599f, 0.101090f, 0.000089f, 0.000138f, 0.000112f, 0.000837f, +0.025619f, 0.061130f, 0.065488f, 0.214510f, 0.023816f, 0.083479f, 0.091179f, 0.148908f, 4.953470f, 10.941667f, 7.564215f, 39.309171f, 0.036819f, 0.050676f, 0.082854f, 0.274394f, +0.001216f, 0.005679f, 0.004486f, 0.014724f, 0.001602f, 0.010992f, 0.008853f, 0.014486f, 0.399835f, 1.729042f, 0.881458f, 4.589490f, 0.002624f, 0.007069f, 0.008523f, 0.028282f, +0.000324f, 0.001519f, 0.000531f, 0.003584f, 0.000200f, 0.001376f, 0.000490f, 0.001649f, 0.119364f, 0.518419f, 0.116840f, 1.251963f, 0.000277f, 0.000750f, 0.000400f, 0.002729f, +0.000383f, 0.001585f, 0.001133f, 0.003820f, 0.000268f, 0.001629f, 0.001188f, 0.001996f, 0.065873f, 0.252053f, 0.116320f, 0.622053f, 0.000495f, 0.001181f, 0.001289f, 0.004393f, +0.001271f, 0.001129f, 0.002242f, 0.016464f, 0.000064f, 0.000083f, 0.000169f, 0.000617f, 0.145690f, 0.119797f, 0.153592f, 1.789021f, 0.001240f, 0.000635f, 0.001926f, 0.014294f, +0.000112f, 0.000194f, 0.000285f, 0.002095f, 0.000008f, 0.000020f, 0.000030f, 0.000111f, 0.021799f, 0.035091f, 0.033177f, 0.387184f, 0.000164f, 0.000164f, 0.000367f, 0.002731f, +0.000072f, 0.000126f, 0.000081f, 0.001232f, 0.000002f, 0.000006f, 0.000004f, 0.000031f, 0.015726f, 0.025426f, 0.010627f, 0.255235f, 0.000042f, 0.000042f, 0.000042f, 0.000637f, +0.000163f, 0.000251f, 0.000333f, 0.002516f, 0.000006f, 0.000014f, 0.000019f, 0.000071f, 0.016623f, 0.023678f, 0.020265f, 0.242909f, 0.000143f, 0.000127f, 0.000257f, 0.001963f, +0.000648f, 0.001211f, 0.001818f, 0.013123f, 0.005855f, 0.016085f, 0.024620f, 0.088612f, 0.559901f, 0.969306f, 0.939052f, 10.754886f, 0.000717f, 0.000773f, 0.001772f, 0.012935f, +0.000015f, 0.000054f, 0.000060f, 0.000435f, 0.000190f, 0.001022f, 0.001153f, 0.004159f, 0.021803f, 0.073897f, 0.052792f, 0.605784f, 0.000025f, 0.000052f, 0.000088f, 0.000643f, +0.000003f, 0.000013f, 0.000006f, 0.000092f, 0.000021f, 0.000111f, 0.000055f, 0.000410f, 0.005641f, 0.019202f, 0.006065f, 0.143215f, 0.000002f, 0.000005f, 0.000004f, 0.000054f, +0.000021f, 0.000067f, 0.000067f, 0.000496f, 0.000140f, 0.000666f, 0.000680f, 0.002520f, 0.015794f, 0.047364f, 0.030631f, 0.361009f, 0.000020f, 0.000038f, 0.000058f, 0.000439f, +0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000000f, 0.000001f, 0.000001f, 0.000010f, 0.000234f, 0.000632f, 0.000274f, 0.012245f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000010f, 0.000003f, 0.000143f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000007f, 0.000001f, 0.000091f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000015f, 0.000004f, 0.000200f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000023f, 0.000102f, 0.000092f, 0.001176f, 0.000225f, 0.001486f, 0.001368f, 0.008752f, 0.077084f, 0.320928f, 0.187000f, 3.807040f, 0.000018f, 0.000048f, 0.000066f, 0.000855f, +0.000000f, 0.000003f, 0.000002f, 0.000023f, 0.000004f, 0.000057f, 0.000038f, 0.000247f, 0.001803f, 0.014692f, 0.006313f, 0.128770f, 0.000000f, 0.000002f, 0.000002f, 0.000026f, +0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000000f, 0.000004f, 0.000001f, 0.000014f, 0.000269f, 0.002200f, 0.000418f, 0.017542f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.000000f, 0.000001f, 0.000001f, 0.000007f, 0.000001f, 0.000010f, 0.000006f, 0.000040f, 0.000352f, 0.002541f, 0.000988f, 0.020703f, 0.000000f, 0.000000f, 0.000000f, 0.000005f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000011f, 0.000017f, 0.000018f, 0.000837f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000053f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000017f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000039f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +}; + +static const float acceptor_me2x3acc1[16384] = { +0.194114f, 0.254369f, 0.059224f, 0.479611f, 0.184195f, 0.431590f, 0.364757f, 0.677230f, 0.039156f, 0.074210f, 0.046061f, 0.142616f, 0.263375f, 0.540027f, 0.492918f, 0.741151f, +0.196628f, 0.370974f, 0.085338f, 0.772998f, 0.163797f, 0.552574f, 0.461409f, 0.958220f, 0.116240f, 0.317182f, 0.194511f, 0.673638f, 0.250918f, 0.740738f, 0.668017f, 1.123481f, +0.052415f, 0.117194f, 0.024461f, 0.180371f, 0.066584f, 0.266197f, 0.201679f, 0.340960f, 0.037786f, 0.122190f, 0.067988f, 0.191682f, 0.081306f, 0.284450f, 0.232751f, 0.318665f, +0.304223f, 0.440426f, 0.084923f, 0.930491f, 0.255470f, 0.661310f, 0.462862f, 1.162744f, 0.211373f, 0.442572f, 0.227494f, 0.953032f, 0.335322f, 0.759586f, 0.574182f, 1.168105f, +0.145656f, 0.232551f, 0.041391f, 0.372302f, 0.134074f, 0.382754f, 0.247288f, 0.509960f, 0.039260f, 0.090656f, 0.043015f, 0.147930f, 0.246315f, 0.615336f, 0.429361f, 0.717061f, +0.207366f, 0.476669f, 0.083824f, 0.843342f, 0.167569f, 0.688744f, 0.439648f, 1.014110f, 0.163806f, 0.544582f, 0.255300f, 0.982053f, 0.329812f, 1.186261f, 0.817813f, 1.527686f, +0.121829f, 0.331880f, 0.052954f, 0.433706f, 0.150127f, 0.731262f, 0.423528f, 0.795293f, 0.117358f, 0.462374f, 0.196672f, 0.615874f, 0.235539f, 1.003979f, 0.628002f, 0.955004f, +0.292668f, 0.516223f, 0.076092f, 0.926038f, 0.238406f, 0.751906f, 0.402310f, 1.122523f, 0.271717f, 0.693156f, 0.272375f, 1.267379f, 0.402058f, 1.109644f, 0.641221f, 1.448911f, +0.050828f, 0.081012f, 0.015369f, 0.126731f, 0.072420f, 0.206391f, 0.142132f, 0.268697f, 0.018783f, 0.043298f, 0.021898f, 0.069037f, 0.110917f, 0.276615f, 0.205733f, 0.314974f, +0.067112f, 0.154005f, 0.028867f, 0.266243f, 0.083945f, 0.344441f, 0.234358f, 0.495561f, 0.072683f, 0.241225f, 0.120539f, 0.425059f, 0.137740f, 0.494574f, 0.363432f, 0.622358f, +0.036190f, 0.098419f, 0.016738f, 0.125675f, 0.069030f, 0.335666f, 0.207222f, 0.356712f, 0.047796f, 0.187988f, 0.085231f, 0.244672f, 0.090289f, 0.384196f, 0.256158f, 0.357099f, +0.128070f, 0.225511f, 0.035431f, 0.395288f, 0.161483f, 0.508429f, 0.289965f, 0.741681f, 0.163015f, 0.415145f, 0.173882f, 0.741704f, 0.227034f, 0.625525f, 0.385289f, 0.798100f, +0.181327f, 0.197654f, 0.051343f, 0.382130f, 0.162262f, 0.316260f, 0.298206f, 0.508848f, 0.033636f, 0.053028f, 0.036721f, 0.104494f, 0.317780f, 0.542005f, 0.551954f, 0.762736f, +0.185977f, 0.291874f, 0.074909f, 0.623605f, 0.146101f, 0.409990f, 0.381952f, 0.728999f, 0.101106f, 0.229489f, 0.157014f, 0.499759f, 0.306545f, 0.752769f, 0.757400f, 1.170692f, +0.137800f, 0.256290f, 0.059681f, 0.404458f, 0.165079f, 0.548985f, 0.464044f, 0.721011f, 0.091354f, 0.245734f, 0.152547f, 0.395267f, 0.276097f, 0.803487f, 0.733508f, 0.922968f, +0.325774f, 0.392314f, 0.084396f, 0.849869f, 0.257985f, 0.555515f, 0.433793f, 1.001508f, 0.208150f, 0.362533f, 0.207909f, 0.800479f, 0.463802f, 0.873942f, 0.737048f, 1.378058f, +0.238641f, 0.435520f, 0.076985f, 0.625056f, 0.257179f, 0.839233f, 0.538490f, 1.002384f, 0.052200f, 0.137781f, 0.064927f, 0.201551f, 0.405784f, 1.158756f, 0.802996f, 1.210514f, +0.262777f, 0.690465f, 0.120588f, 1.095123f, 0.248610f, 1.168038f, 0.740484f, 1.541765f, 0.168456f, 0.640166f, 0.298051f, 1.034899f, 0.420250f, 1.727808f, 1.182990f, 1.994727f, +0.070265f, 0.218797f, 0.034671f, 0.256324f, 0.101372f, 0.564427f, 0.324660f, 0.550295f, 0.054929f, 0.247376f, 0.104501f, 0.295387f, 0.136596f, 0.665542f, 0.413450f, 0.567532f, +0.392706f, 0.791780f, 0.115909f, 1.273298f, 0.374527f, 1.350221f, 0.717486f, 1.807051f, 0.295878f, 0.862784f, 0.336706f, 1.414204f, 0.542464f, 1.711359f, 0.982147f, 2.003241f, +0.171132f, 0.380517f, 0.051419f, 0.463701f, 0.178902f, 0.711285f, 0.348891f, 0.721353f, 0.050020f, 0.160857f, 0.057946f, 0.199796f, 0.362680f, 1.261832f, 0.668458f, 1.119263f, +0.264845f, 0.847867f, 0.113199f, 1.141831f, 0.243062f, 1.391353f, 0.674291f, 1.559378f, 0.226868f, 1.050416f, 0.373861f, 1.441848f, 0.527905f, 2.644385f, 1.384080f, 2.592180f, +0.156080f, 0.592148f, 0.071731f, 0.589022f, 0.218435f, 1.481805f, 0.651574f, 1.226682f, 0.163040f, 0.894602f, 0.288896f, 0.907016f, 0.378173f, 2.244954f, 1.066121f, 1.625455f, +0.361048f, 0.886915f, 0.099254f, 1.211045f, 0.334021f, 1.467157f, 0.595986f, 1.667228f, 0.363489f, 1.291404f, 0.385267f, 1.797316f, 0.621599f, 2.389248f, 1.048209f, 2.374685f, +0.124872f, 0.277183f, 0.039924f, 0.330054f, 0.202063f, 0.801998f, 0.419312f, 0.794753f, 0.050040f, 0.160646f, 0.061684f, 0.194972f, 0.341499f, 1.186109f, 0.669754f, 1.028041f, +0.179231f, 0.572805f, 0.081515f, 0.753764f, 0.254610f, 1.454968f, 0.751591f, 1.593392f, 0.210491f, 0.972925f, 0.369102f, 1.304946f, 0.461008f, 2.305337f, 1.286143f, 2.208158f, +0.096950f, 0.367187f, 0.047411f, 0.356897f, 0.210019f, 1.422280f, 0.666616f, 1.150486f, 0.138845f, 0.760547f, 0.261792f, 0.753470f, 0.303124f, 1.796368f, 0.909311f, 1.270919f, +0.330366f, 0.810160f, 0.096639f, 1.080946f, 0.473089f, 2.074448f, 0.898216f, 2.303435f, 0.455997f, 1.617298f, 0.514289f, 2.199416f, 0.733960f, 2.816313f, 1.317000f, 2.735148f, +0.230008f, 0.349173f, 0.068862f, 0.513845f, 0.233756f, 0.634522f, 0.454236f, 0.777101f, 0.046267f, 0.101584f, 0.053407f, 0.152370f, 0.505171f, 1.199972f, 0.927753f, 1.285370f, +0.256445f, 0.560512f, 0.109216f, 0.911560f, 0.228800f, 0.894192f, 0.632455f, 1.210240f, 0.151180f, 0.477902f, 0.248243f, 0.792180f, 0.529737f, 1.811692f, 1.383917f, 2.144627f, +0.190599f, 0.493697f, 0.087282f, 0.593045f, 0.259318f, 1.201039f, 0.770759f, 1.200673f, 0.137021f, 0.513311f, 0.241925f, 0.628481f, 0.478594f, 1.939724f, 1.344399f, 1.696033f, +0.433894f, 0.727706f, 0.118853f, 1.199945f, 0.390238f, 1.170273f, 0.693802f, 1.605950f, 0.300629f, 0.729217f, 0.317501f, 1.225592f, 0.774163f, 2.031601f, 1.300811f, 2.438428f, +0.071432f, 0.135133f, 0.021986f, 0.200242f, 0.083200f, 0.281432f, 0.166209f, 0.347063f, 0.018481f, 0.050564f, 0.021931f, 0.076369f, 0.130629f, 0.386669f, 0.246630f, 0.417061f, +0.066959f, 0.182377f, 0.029317f, 0.298658f, 0.068467f, 0.333445f, 0.194567f, 0.454431f, 0.050770f, 0.199995f, 0.085705f, 0.333817f, 0.115167f, 0.490816f, 0.309307f, 0.585046f, +0.020121f, 0.064948f, 0.009473f, 0.078559f, 0.031374f, 0.181079f, 0.095868f, 0.182280f, 0.018605f, 0.086852f, 0.033770f, 0.107077f, 0.042068f, 0.212467f, 0.121486f, 0.187063f, +0.107373f, 0.224407f, 0.030237f, 0.372602f, 0.110675f, 0.413595f, 0.202288f, 0.571510f, 0.095684f, 0.289224f, 0.103888f, 0.489470f, 0.159512f, 0.521636f, 0.275543f, 0.630439f, +0.045260f, 0.104318f, 0.012975f, 0.131252f, 0.051137f, 0.210749f, 0.095147f, 0.220675f, 0.015647f, 0.052158f, 0.017294f, 0.066889f, 0.103157f, 0.372032f, 0.181400f, 0.340717f, +0.059628f, 0.197874f, 0.024316f, 0.275134f, 0.059144f, 0.350942f, 0.156542f, 0.406100f, 0.060413f, 0.289948f, 0.094985f, 0.410924f, 0.127822f, 0.663711f, 0.319743f, 0.671742f, +0.039491f, 0.155304f, 0.017316f, 0.159502f, 0.059732f, 0.420032f, 0.169996f, 0.359009f, 0.048791f, 0.277512f, 0.082486f, 0.290502f, 0.102904f, 0.633220f, 0.276783f, 0.473375f, +0.087222f, 0.222098f, 0.022877f, 0.313117f, 0.087211f, 0.397081f, 0.148465f, 0.465886f, 0.103860f, 0.382494f, 0.105029f, 0.549629f, 0.161497f, 0.643457f, 0.259831f, 0.660309f, +0.032530f, 0.074849f, 0.009923f, 0.092021f, 0.056891f, 0.234062f, 0.112637f, 0.239482f, 0.015418f, 0.051308f, 0.018133f, 0.064294f, 0.095675f, 0.344459f, 0.179025f, 0.308252f, +0.039747f, 0.131674f, 0.017247f, 0.178901f, 0.061025f, 0.361482f, 0.171870f, 0.408732f, 0.055211f, 0.264529f, 0.092369f, 0.366327f, 0.109950f, 0.569933f, 0.292660f, 0.563640f, +0.024162f, 0.094858f, 0.011273f, 0.095195f, 0.056569f, 0.397110f, 0.171312f, 0.331657f, 0.040927f, 0.232387f, 0.073625f, 0.237703f, 0.081245f, 0.499088f, 0.232531f, 0.364571f, +0.078612f, 0.199834f, 0.021940f, 0.275286f, 0.121668f, 0.553018f, 0.220395f, 0.634009f, 0.128338f, 0.471831f, 0.138099f, 0.662502f, 0.187828f, 0.747091f, 0.321562f, 0.749129f, +0.076356f, 0.120156f, 0.021811f, 0.182566f, 0.083869f, 0.235988f, 0.155493f, 0.298403f, 0.018167f, 0.041346f, 0.020007f, 0.064031f, 0.180357f, 0.444089f, 0.316022f, 0.491146f, +0.072472f, 0.164197f, 0.029448f, 0.275708f, 0.069883f, 0.283106f, 0.184304f, 0.395616f, 0.050533f, 0.165584f, 0.079167f, 0.283391f, 0.161002f, 0.570768f, 0.401301f, 0.697606f, +0.060533f, 0.162530f, 0.026448f, 0.201578f, 0.089010f, 0.427335f, 0.252415f, 0.441082f, 0.051470f, 0.199872f, 0.086704f, 0.252666f, 0.163467f, 0.686764f, 0.438108f, 0.619990f, +0.131572f, 0.228739f, 0.034386f, 0.389429f, 0.127893f, 0.397566f, 0.216942f, 0.563297f, 0.107823f, 0.271106f, 0.108646f, 0.470448f, 0.252468f, 0.686779f, 0.404742f, 0.851082f, +0.254579f, 0.420878f, 0.067651f, 0.623936f, 0.315714f, 0.933283f, 0.544539f, 1.151427f, 0.057210f, 0.136790f, 0.058615f, 0.206692f, 0.412654f, 1.067467f, 0.672662f, 1.151872f, +0.257672f, 0.613327f, 0.097404f, 1.004814f, 0.280530f, 1.193958f, 0.688285f, 1.627880f, 0.169700f, 0.584199f, 0.247332f, 0.975524f, 0.392826f, 1.463052f, 0.910890f, 1.744695f, +0.064821f, 0.182847f, 0.026347f, 0.221263f, 0.107616f, 0.542796f, 0.283909f, 0.546635f, 0.052059f, 0.212385f, 0.081584f, 0.261956f, 0.120124f, 0.530196f, 0.299506f, 0.467007f, +0.374581f, 0.684154f, 0.091073f, 1.136453f, 0.411096f, 1.342566f, 0.648732f, 1.855980f, 0.289940f, 0.765894f, 0.271793f, 1.296732f, 0.493245f, 1.409626f, 0.735631f, 1.704385f, +0.206179f, 0.415299f, 0.051031f, 0.522753f, 0.248034f, 0.893329f, 0.398454f, 0.935810f, 0.061912f, 0.180361f, 0.059081f, 0.231399f, 0.416536f, 1.312808f, 0.632404f, 1.202828f, +0.293298f, 0.850581f, 0.103264f, 1.183210f, 0.309753f, 1.606227f, 0.707844f, 1.859487f, 0.258111f, 1.082596f, 0.350378f, 1.534958f, 0.557296f, 2.528867f, 1.203603f, 2.560581f, +0.162615f, 0.558876f, 0.061562f, 0.574233f, 0.261889f, 1.609376f, 0.643503f, 1.376165f, 0.174511f, 0.867425f, 0.254721f, 0.908425f, 0.375593f, 2.019788f, 0.872219f, 1.510585f, +0.388937f, 0.865503f, 0.088075f, 1.220728f, 0.414067f, 1.647573f, 0.608591f, 1.933904f, 0.402276f, 1.294690f, 0.351225f, 1.861230f, 0.638321f, 2.222600f, 0.886684f, 2.281802f, +0.179228f, 0.360395f, 0.047203f, 0.443272f, 0.333742f, 1.199963f, 0.570496f, 1.228284f, 0.073786f, 0.214585f, 0.074925f, 0.269014f, 0.467245f, 1.470114f, 0.754853f, 1.316160f, +0.236460f, 0.684575f, 0.088588f, 0.930513f, 0.386546f, 2.001013f, 0.939937f, 2.263555f, 0.285295f, 1.194569f, 0.412097f, 1.654994f, 0.579782f, 2.626411f, 1.332411f, 2.598545f, +0.120333f, 0.412856f, 0.048475f, 0.414502f, 0.299972f, 1.840257f, 0.784314f, 1.537611f, 0.177047f, 0.878526f, 0.274983f, 0.899015f, 0.358653f, 1.925397f, 0.886255f, 1.407067f, +0.423972f, 0.941855f, 0.102162f, 1.298043f, 0.698661f, 2.775221f, 1.092689f, 3.183046f, 0.601203f, 1.931614f, 0.558546f, 2.713375f, 0.897900f, 3.121101f, 1.327191f, 3.130975f, +0.282684f, 0.388752f, 0.069716f, 0.590928f, 0.330601f, 0.812942f, 0.529194f, 1.028400f, 0.058419f, 0.116191f, 0.055548f, 0.180020f, 0.591850f, 1.273549f, 0.895361f, 1.409110f, +0.289705f, 0.573611f, 0.101635f, 0.963586f, 0.297440f, 1.053041f, 0.677275f, 1.472170f, 0.175458f, 0.502445f, 0.237327f, 0.860292f, 0.570474f, 1.767384f, 1.227658f, 2.161077f, +0.202572f, 0.475324f, 0.076415f, 0.589780f, 0.317157f, 1.330664f, 0.776517f, 1.374068f, 0.149611f, 0.507723f, 0.217595f, 0.642112f, 0.484886f, 1.780260f, 1.121999f, 1.607865f, +0.476809f, 0.724415f, 0.107588f, 1.233857f, 0.493482f, 1.340603f, 0.722720f, 1.900279f, 0.339398f, 0.745771f, 0.295267f, 1.294692f, 0.810973f, 1.927895f, 1.122484f, 2.390160f, +0.230052f, 0.301463f, 0.070189f, 0.568405f, 0.279227f, 0.654260f, 0.552945f, 1.026632f, 0.054683f, 0.103637f, 0.064326f, 0.199170f, 0.343024f, 0.703341f, 0.641985f, 0.965288f, +0.311947f, 0.588545f, 0.135388f, 1.226351f, 0.332394f, 1.121339f, 0.936339f, 1.944517f, 0.217310f, 0.592967f, 0.363636f, 1.259359f, 0.437472f, 1.291464f, 1.164676f, 1.958770f, +0.064795f, 0.144873f, 0.030238f, 0.222971f, 0.105284f, 0.420916f, 0.318900f, 0.539134f, 0.055043f, 0.177993f, 0.099038f, 0.279222f, 0.110456f, 0.386429f, 0.316195f, 0.432910f, +0.503707f, 0.729221f, 0.140608f, 1.540629f, 0.541048f, 1.400559f, 0.980274f, 2.462522f, 0.412404f, 0.863489f, 0.443856f, 1.859429f, 0.610140f, 1.382114f, 1.044761f, 2.125442f, +0.233699f, 0.373118f, 0.066410f, 0.597343f, 0.275159f, 0.785520f, 0.507505f, 1.046585f, 0.074228f, 0.171400f, 0.081327f, 0.279687f, 0.434310f, 1.084980f, 0.757062f, 1.264344f, +0.445382f, 1.023793f, 0.180038f, 1.811339f, 0.460361f, 1.892185f, 1.207844f, 2.786064f, 0.414584f, 1.378306f, 0.646148f, 2.485518f, 0.778474f, 2.799995f, 1.930329f, 3.605879f, +0.203889f, 0.555421f, 0.088621f, 0.725833f, 0.321374f, 1.565397f, 0.906639f, 1.702468f, 0.231441f, 0.911846f, 0.387856f, 1.214562f, 0.433197f, 1.846492f, 1.155005f, 1.756419f, +0.656025f, 1.157131f, 0.170563f, 2.075746f, 0.683553f, 2.155852f, 1.153497f, 3.218479f, 0.717708f, 1.830891f, 0.719448f, 3.347636f, 0.990409f, 2.733444f, 1.579553f, 3.569178f, +0.069707f, 0.111102f, 0.021078f, 0.173801f, 0.127039f, 0.362051f, 0.249328f, 0.471349f, 0.030355f, 0.069972f, 0.035389f, 0.111568f, 0.167166f, 0.416896f, 0.310067f, 0.474708f, +0.123207f, 0.282731f, 0.052996f, 0.488783f, 0.197125f, 0.808840f, 0.550336f, 1.163711f, 0.157238f, 0.521851f, 0.260766f, 0.919545f, 0.277894f, 0.997815f, 0.733233f, 1.255622f, +0.051770f, 0.140787f, 0.023944f, 0.179776f, 0.126308f, 0.614188f, 0.379166f, 0.652696f, 0.080568f, 0.316884f, 0.143671f, 0.412433f, 0.141938f, 0.603974f, 0.402692f, 0.561376f, +0.245377f, 0.432070f, 0.067885f, 0.757358f, 0.395753f, 1.246029f, 0.710630f, 1.817669f, 0.368045f, 0.937288f, 0.392580f, 1.674573f, 0.478035f, 1.317082f, 0.811251f, 1.680452f, +0.398167f, 0.434019f, 0.112742f, 0.839099f, 0.455751f, 0.888293f, 0.837584f, 1.429224f, 0.087036f, 0.137212f, 0.095018f, 0.270384f, 0.766850f, 1.307938f, 1.331945f, 1.840593f, +0.546677f, 0.857957f, 0.220194f, 1.833072f, 0.549330f, 1.541534f, 1.436115f, 2.740989f, 0.350212f, 0.794912f, 0.543870f, 1.731081f, 0.990251f, 2.431719f, 2.446676f, 3.781761f, +0.315620f, 0.587014f, 0.136695f, 0.926380f, 0.483636f, 1.608373f, 1.359520f, 2.112359f, 0.246565f, 0.663235f, 0.411724f, 1.066824f, 0.694959f, 2.022443f, 1.846298f, 2.323185f, +0.999393f, 1.203519f, 0.258907f, 2.607183f, 1.012334f, 2.179845f, 1.702206f, 3.929924f, 0.752460f, 1.310549f, 0.751587f, 2.893715f, 1.563627f, 2.946345f, 2.484830f, 4.645885f, +0.302687f, 0.552404f, 0.097646f, 0.792808f, 0.417248f, 1.361577f, 0.873649f, 1.626273f, 0.078021f, 0.205933f, 0.097042f, 0.301245f, 0.565622f, 1.615186f, 1.119294f, 1.687331f, +0.446174f, 1.172353f, 0.204749f, 1.859430f, 0.539939f, 2.536787f, 1.608210f, 3.348462f, 0.337046f, 1.280843f, 0.596340f, 2.070625f, 0.784162f, 3.223991f, 2.207391f, 3.722045f, +0.092961f, 0.289471f, 0.045870f, 0.339119f, 0.171551f, 0.955170f, 0.549417f, 0.931256f, 0.085635f, 0.385663f, 0.162918f, 0.460511f, 0.198602f, 0.967653f, 0.601129f, 0.825153f, +0.695880f, 1.403043f, 0.205392f, 2.256299f, 0.848907f, 3.060423f, 1.626261f, 4.095879f, 0.617825f, 1.801587f, 0.703078f, 2.953009f, 1.056377f, 3.332643f, 1.912601f, 3.901044f, +0.293859f, 0.653406f, 0.088294f, 0.796245f, 0.392947f, 1.562293f, 0.766316f, 1.584406f, 0.101213f, 0.325487f, 0.117252f, 0.404279f, 0.684406f, 2.381175f, 1.261432f, 2.112135f, +0.608792f, 1.948965f, 0.260207f, 2.624690f, 0.714667f, 4.090946f, 1.982592f, 4.584984f, 0.614519f, 2.845273f, 1.012682f, 3.905548f, 1.333562f, 6.680086f, 3.496379f, 6.548210f, +0.279556f, 1.060603f, 0.128478f, 1.055003f, 0.500443f, 3.394872f, 1.492779f, 2.810375f, 0.344114f, 1.888159f, 0.609748f, 1.914360f, 0.744378f, 4.418866f, 2.098505f, 3.199472f, +0.866144f, 2.127686f, 0.238107f, 2.905266f, 1.024967f, 4.502072f, 1.828825f, 5.116002f, 1.027552f, 3.650684f, 1.089114f, 5.080852f, 1.638767f, 6.298951f, 2.763470f, 6.260556f, +0.183280f, 0.406834f, 0.058598f, 0.484436f, 0.379356f, 1.505684f, 0.787223f, 1.492083f, 0.086547f, 0.277848f, 0.106687f, 0.337217f, 0.550835f, 1.913182f, 1.080307f, 1.658219f, +0.352153f, 1.125446f, 0.160161f, 1.480996f, 0.639888f, 3.656637f, 1.888903f, 4.004525f, 0.487347f, 2.252598f, 0.854577f, 3.021322f, 0.995423f, 4.977758f, 2.777081f, 4.767925f, +0.148426f, 0.562149f, 0.072585f, 0.546396f, 0.411276f, 2.785219f, 1.305419f, 2.252970f, 0.250485f, 1.372070f, 0.472288f, 1.359304f, 0.509995f, 3.022321f, 1.529882f, 2.138273f, +0.677428f, 1.661262f, 0.198162f, 2.216520f, 1.240854f, 5.441021f, 2.355908f, 6.041625f, 1.101834f, 3.907904f, 1.242686f, 5.314487f, 1.653946f, 6.346435f, 2.967800f, 6.163532f, +0.540536f, 0.820585f, 0.161831f, 1.207576f, 0.702677f, 1.907390f, 1.365445f, 2.335987f, 0.128128f, 0.281316f, 0.147900f, 0.421959f, 1.304676f, 3.099097f, 2.396054f, 3.319651f, +0.806761f, 1.763336f, 0.343588f, 2.867716f, 0.920696f, 3.598249f, 2.545013f, 4.870036f, 0.560444f, 1.771640f, 0.920267f, 2.936706f, 1.831439f, 6.263488f, 4.784557f, 7.414530f, +0.467216f, 1.210199f, 0.213955f, 1.453732f, 0.813092f, 3.765855f, 2.416712f, 3.764709f, 0.395796f, 1.482733f, 0.698818f, 1.815410f, 1.289275f, 5.225378f, 3.621645f, 4.568904f, +1.424570f, 2.389220f, 0.390220f, 3.939683f, 1.638853f, 4.914702f, 2.913708f, 6.744384f, 1.163102f, 2.821260f, 1.228377f, 4.741681f, 2.793276f, 7.330266f, 4.693484f, 8.798148f, +0.149690f, 0.283178f, 0.046073f, 0.419618f, 0.223014f, 0.754368f, 0.445516f, 0.930288f, 0.045636f, 0.124861f, 0.054156f, 0.188584f, 0.300829f, 0.890471f, 0.567972f, 0.960462f, +0.187836f, 0.511608f, 0.082240f, 0.837802f, 0.245673f, 1.196467f, 0.698144f, 1.630592f, 0.167827f, 0.661110f, 0.283307f, 1.103473f, 0.355038f, 1.513098f, 0.953538f, 1.803590f, +0.043981f, 0.141963f, 0.020706f, 0.171714f, 0.087720f, 0.506279f, 0.268038f, 0.509637f, 0.047920f, 0.223706f, 0.086981f, 0.275799f, 0.101052f, 0.510370f, 0.291823f, 0.449348f, +0.314349f, 0.656981f, 0.088522f, 1.090841f, 0.414454f, 1.548824f, 0.757524f, 2.140181f, 0.330098f, 0.997783f, 0.358402f, 1.688607f, 0.513206f, 1.678284f, 0.886516f, 2.028339f, +0.128401f, 0.295949f, 0.036809f, 0.372360f, 0.185567f, 0.764777f, 0.345276f, 0.800795f, 0.052308f, 0.174368f, 0.057815f, 0.223613f, 0.321616f, 1.159898f, 0.565559f, 1.062265f, +0.226451f, 0.751474f, 0.092345f, 1.044890f, 0.287308f, 1.704794f, 0.760444f, 1.972736f, 0.270359f, 1.297575f, 0.425077f, 1.838965f, 0.533474f, 2.770042f, 1.334469f, 2.803560f, +0.116860f, 0.459574f, 0.051241f, 0.471997f, 0.226095f, 1.589879f, 0.643461f, 1.358901f, 0.170137f, 0.967698f, 0.287632f, 1.012996f, 0.334647f, 2.059244f, 0.900103f, 1.539424f, +0.345700f, 0.880280f, 0.090672f, 1.241029f, 0.442138f, 2.013094f, 0.752678f, 2.361920f, 0.485078f, 1.786431f, 0.490536f, 2.567034f, 0.703430f, 2.802697f, 1.131744f, 2.876098f, +0.078882f, 0.181504f, 0.024062f, 0.223145f, 0.176462f, 0.726006f, 0.349374f, 0.742818f, 0.044057f, 0.146614f, 0.051816f, 0.183722f, 0.254965f, 0.917951f, 0.477085f, 0.821462f, +0.129024f, 0.427434f, 0.055987f, 0.580739f, 0.253386f, 1.500944f, 0.713638f, 1.697136f, 0.211192f, 1.011875f, 0.353330f, 1.401274f, 0.392232f, 2.033165f, 1.044030f, 2.010719f, +0.061114f, 0.239932f, 0.028515f, 0.240784f, 0.183023f, 1.284797f, 0.554257f, 1.073035f, 0.121987f, 0.692647f, 0.219446f, 0.708492f, 0.225836f, 1.387305f, 0.646361f, 1.013392f, +0.266322f, 0.676996f, 0.074328f, 0.932615f, 0.527233f, 2.396440f, 0.955059f, 2.747406f, 0.512340f, 1.883608f, 0.551307f, 2.644788f, 0.699294f, 2.781457f, 1.197189f, 2.789042f, +0.296466f, 0.466528f, 0.084684f, 0.708846f, 0.416528f, 1.172013f, 0.772240f, 1.481993f, 0.083117f, 0.189168f, 0.091539f, 0.292958f, 0.769569f, 1.894890f, 1.348439f, 2.095677f, +0.376679f, 0.853425f, 0.153057f, 1.433010f, 0.464602f, 1.882175f, 1.225305f, 2.630170f, 0.309498f, 1.014156f, 0.484874f, 1.735692f, 0.919631f, 3.260177f, 2.292199f, 3.984662f, +0.245153f, 0.658233f, 0.107110f, 0.816376f, 0.461102f, 2.213731f, 1.307591f, 2.284944f, 0.245634f, 0.953860f, 0.413782f, 1.205812f, 0.727544f, 3.056577f, 1.949885f, 2.759386f, +0.713696f, 1.240766f, 0.186522f, 2.112409f, 0.887377f, 2.758478f, 1.505234f, 3.908387f, 0.689202f, 1.732910f, 0.694464f, 3.007101f, 1.505006f, 4.094003f, 2.412734f, 5.073442f, +0.240658f, 0.397863f, 0.063952f, 0.589817f, 0.381752f, 1.128498f, 0.658440f, 1.392272f, 0.063728f, 0.152377f, 0.065294f, 0.230243f, 0.428692f, 1.108953f, 0.698804f, 1.196638f, +0.326071f, 0.776135f, 0.123260f, 1.271542f, 0.454082f, 1.932611f, 1.114099f, 2.634983f, 0.253054f, 0.871148f, 0.368817f, 1.454686f, 0.546295f, 2.034633f, 1.266754f, 2.426308f, +0.063915f, 0.180293f, 0.025979f, 0.218173f, 0.135731f, 0.684602f, 0.358080f, 0.689443f, 0.060489f, 0.246775f, 0.094794f, 0.304372f, 0.130167f, 0.574525f, 0.324547f, 0.506053f, +0.494698f, 0.903542f, 0.120277f, 1.500880f, 0.694461f, 2.267986f, 1.095898f, 3.135292f, 0.451221f, 1.191927f, 0.422979f, 2.018047f, 0.715877f, 2.045878f, 1.067668f, 2.473681f, +0.263865f, 0.531493f, 0.065308f, 0.669011f, 0.406030f, 1.462374f, 0.652266f, 1.531914f, 0.093368f, 0.271998f, 0.089099f, 0.348968f, 0.585828f, 1.846371f, 0.889431f, 1.691693f, +0.502474f, 1.457203f, 0.176911f, 2.027058f, 0.678782f, 3.519828f, 1.551143f, 4.074811f, 0.521072f, 2.185532f, 0.707338f, 3.098755f, 1.049232f, 4.761147f, 2.266047f, 4.820856f, +0.217075f, 0.746046f, 0.082179f, 0.766547f, 0.447175f, 2.748007f, 1.098781f, 2.349800f, 0.274512f, 1.364485f, 0.400683f, 1.428978f, 0.550996f, 2.963038f, 1.279550f, 2.216035f, +0.695398f, 1.547470f, 0.157473f, 2.182592f, 0.946967f, 3.767980f, 1.391840f, 4.422816f, 0.847548f, 2.727759f, 0.739990f, 3.921392f, 1.254221f, 4.367132f, 1.742223f, 4.483457f, +0.196058f, 0.394238f, 0.051635f, 0.484897f, 0.466981f, 1.679023f, 0.798254f, 1.718651f, 0.095113f, 0.276608f, 0.096580f, 0.346769f, 0.561700f, 1.767303f, 0.907450f, 1.582226f, +0.346261f, 1.002461f, 0.129724f, 1.362601f, 0.724032f, 3.748058f, 1.760577f, 4.239820f, 0.492296f, 2.061312f, 0.711102f, 2.855807f, 0.933024f, 4.226590f, 2.144202f, 4.181747f, +0.137302f, 0.471076f, 0.055310f, 0.472954f, 0.437808f, 2.685844f, 1.144701f, 2.244134f, 0.238050f, 1.181227f, 0.369730f, 1.208775f, 0.449726f, 2.414314f, 1.111302f, 1.764364f, +0.647938f, 1.439394f, 0.156129f, 1.983741f, 1.365754f, 5.425046f, 2.136005f, 6.222269f, 1.082688f, 3.478584f, 1.005868f, 4.886432f, 1.508013f, 5.241852f, 2.229002f, 5.258436f, +0.495123f, 0.680900f, 0.122108f, 1.035013f, 0.740671f, 1.821296f, 1.185594f, 2.304004f, 0.120572f, 0.239812f, 0.114648f, 0.371551f, 1.139211f, 2.451365f, 1.723417f, 2.712297f, +0.679258f, 1.344919f, 0.238298f, 2.259276f, 0.892047f, 3.158159f, 2.031204f, 4.415164f, 0.484774f, 1.388206f, 0.655712f, 2.376899f, 1.469927f, 4.553977f, 3.163277f, 5.568397f, +0.370088f, 0.868390f, 0.139605f, 1.077493f, 0.741154f, 3.109590f, 1.814619f, 3.211020f, 0.322088f, 1.093045f, 0.468446f, 1.382363f, 0.973522f, 3.574285f, 2.252673f, 3.228163f, +1.166735f, 1.772621f, 0.263263f, 3.019208f, 1.544579f, 4.196029f, 2.262081f, 5.947790f, 0.978641f, 2.150404f, 0.851391f, 3.733199f, 2.180799f, 5.184333f, 3.018487f, 6.427416f, +0.071176f, 0.093269f, 0.021716f, 0.175859f, 0.080048f, 0.187560f, 0.158516f, 0.294310f, 0.015816f, 0.029975f, 0.018605f, 0.057606f, 0.105376f, 0.216063f, 0.197215f, 0.296532f, +0.099120f, 0.187009f, 0.043019f, 0.389670f, 0.097863f, 0.330144f, 0.275676f, 0.572503f, 0.064550f, 0.176137f, 0.108016f, 0.374084f, 0.138020f, 0.407449f, 0.367448f, 0.617980f, +0.024064f, 0.053803f, 0.011230f, 0.082808f, 0.036230f, 0.144844f, 0.109739f, 0.185525f, 0.019110f, 0.061796f, 0.034384f, 0.096941f, 0.040730f, 0.142495f, 0.116597f, 0.159635f, +0.141717f, 0.205166f, 0.039560f, 0.433455f, 0.141047f, 0.365116f, 0.255551f, 0.641962f, 0.108469f, 0.227112f, 0.116741f, 0.489060f, 0.170445f, 0.386099f, 0.291858f, 0.593750f, +0.073057f, 0.116640f, 0.020760f, 0.186735f, 0.079702f, 0.227533f, 0.147003f, 0.303152f, 0.021692f, 0.050090f, 0.023767f, 0.081736f, 0.134807f, 0.336770f, 0.234987f, 0.392443f, +0.142992f, 0.328693f, 0.057802f, 0.581538f, 0.136950f, 0.562893f, 0.359313f, 0.828807f, 0.124431f, 0.413677f, 0.193932f, 0.745990f, 0.248160f, 0.892575f, 0.615345f, 1.149473f, +0.076509f, 0.208421f, 0.033255f, 0.272367f, 0.111741f, 0.544286f, 0.315237f, 0.591945f, 0.081189f, 0.319873f, 0.136059f, 0.426065f, 0.161404f, 0.687978f, 0.430340f, 0.654418f, +0.186493f, 0.328946f, 0.048487f, 0.590087f, 0.180052f, 0.567864f, 0.303838f, 0.847766f, 0.190733f, 0.486566f, 0.191196f, 0.889646f, 0.279554f, 0.771544f, 0.445846f, 1.007440f, +0.023719f, 0.037804f, 0.007172f, 0.059138f, 0.040053f, 0.114148f, 0.078609f, 0.148608f, 0.009656f, 0.022258f, 0.011257f, 0.035489f, 0.056477f, 0.140848f, 0.104756f, 0.160380f, +0.043055f, 0.098802f, 0.018520f, 0.170808f, 0.063829f, 0.261901f, 0.178198f, 0.376808f, 0.051367f, 0.170481f, 0.085188f, 0.300401f, 0.096423f, 0.346218f, 0.254415f, 0.435671f, +0.021145f, 0.057503f, 0.009780f, 0.073428f, 0.047802f, 0.232443f, 0.143498f, 0.247016f, 0.030763f, 0.120995f, 0.054858f, 0.157479f, 0.057562f, 0.244939f, 0.163310f, 0.227664f, +0.075926f, 0.133693f, 0.021005f, 0.234345f, 0.113465f, 0.357245f, 0.203742f, 0.521138f, 0.106462f, 0.271122f, 0.113559f, 0.484391f, 0.146867f, 0.404647f, 0.249240f, 0.516285f, +0.106184f, 0.115745f, 0.030066f, 0.223773f, 0.112618f, 0.219500f, 0.206970f, 0.353166f, 0.021698f, 0.034208f, 0.023688f, 0.067408f, 0.203055f, 0.346330f, 0.352687f, 0.487373f, +0.149727f, 0.234983f, 0.060308f, 0.502054f, 0.139408f, 0.391208f, 0.364455f, 0.695603f, 0.089668f, 0.203529f, 0.139252f, 0.443226f, 0.269293f, 0.661291f, 0.665359f, 1.028427f, +0.101036f, 0.187914f, 0.043758f, 0.296551f, 0.143454f, 0.477068f, 0.403255f, 0.626559f, 0.073787f, 0.198479f, 0.123212f, 0.319257f, 0.220891f, 0.642829f, 0.586842f, 0.738419f, +0.242365f, 0.291868f, 0.062788f, 0.632274f, 0.227479f, 0.489827f, 0.382498f, 0.883083f, 0.170590f, 0.297115f, 0.170392f, 0.656035f, 0.376509f, 0.709457f, 0.598328f, 1.118693f, +0.082246f, 0.150100f, 0.026533f, 0.215423f, 0.105051f, 0.342807f, 0.219960f, 0.409450f, 0.019818f, 0.052310f, 0.024650f, 0.076521f, 0.152601f, 0.435767f, 0.301979f, 0.455232f, +0.124510f, 0.327158f, 0.057137f, 0.518895f, 0.139614f, 0.655945f, 0.415840f, 0.865822f, 0.087928f, 0.334143f, 0.155572f, 0.540180f, 0.217277f, 0.893310f, 0.611628f, 1.031312f, +0.030321f, 0.094416f, 0.014961f, 0.110609f, 0.051846f, 0.288671f, 0.166045f, 0.281444f, 0.026111f, 0.117594f, 0.049676f, 0.140416f, 0.064318f, 0.313377f, 0.194678f, 0.267228f, +0.171948f, 0.346684f, 0.050751f, 0.557518f, 0.194360f, 0.700693f, 0.372337f, 0.937764f, 0.142714f, 0.416155f, 0.162407f, 0.682126f, 0.259173f, 0.817636f, 0.469241f, 0.957089f, +0.080679f, 0.179392f, 0.024241f, 0.218608f, 0.099963f, 0.397435f, 0.194945f, 0.403060f, 0.025977f, 0.083539f, 0.030094f, 0.103762f, 0.186570f, 0.649113f, 0.343869f, 0.575772f, +0.171658f, 0.549541f, 0.073369f, 0.740072f, 0.186717f, 1.068817f, 0.517980f, 1.197892f, 0.161983f, 0.749993f, 0.266935f, 1.029474f, 0.373352f, 1.870195f, 0.978866f, 1.833274f, +0.092131f, 0.349533f, 0.042341f, 0.347688f, 0.152818f, 1.036675f, 0.455843f, 0.858191f, 0.106017f, 0.581717f, 0.187855f, 0.589789f, 0.243578f, 1.445957f, 0.686680f, 1.046942f, +0.216247f, 0.531211f, 0.059447f, 0.725345f, 0.237112f, 1.041490f, 0.423073f, 1.183514f, 0.239828f, 0.852061f, 0.254197f, 1.185859f, 0.406242f, 1.561479f, 0.685051f, 1.551961f, +0.054771f, 0.121576f, 0.017511f, 0.144767f, 0.105042f, 0.416917f, 0.217979f, 0.413151f, 0.024178f, 0.077620f, 0.029804f, 0.094206f, 0.163442f, 0.567672f, 0.320544f, 0.492020f, +0.108079f, 0.345408f, 0.049155f, 0.454529f, 0.181969f, 1.039858f, 0.537158f, 1.138789f, 0.139825f, 0.646293f, 0.245187f, 0.866847f, 0.303337f, 1.516879f, 0.846264f, 1.452937f, +0.053242f, 0.201651f, 0.026037f, 0.196000f, 0.136699f, 0.925745f, 0.433892f, 0.748837f, 0.083998f, 0.460110f, 0.158377f, 0.455829f, 0.181645f, 1.076459f, 0.544898f, 0.761588f, +0.184092f, 0.451450f, 0.053851f, 0.602342f, 0.312447f, 1.370047f, 0.593217f, 1.521279f, 0.279914f, 0.992779f, 0.315697f, 1.350113f, 0.446274f, 1.712419f, 0.800783f, 1.663068f, +0.126601f, 0.192192f, 0.037903f, 0.282830f, 0.152493f, 0.413938f, 0.296326f, 0.506951f, 0.028054f, 0.061595f, 0.032383f, 0.092389f, 0.303405f, 0.720702f, 0.557208f, 0.771992f, +0.194058f, 0.424153f, 0.082647f, 0.689801f, 0.205205f, 0.801978f, 0.567233f, 1.085434f, 0.126025f, 0.398383f, 0.206937f, 0.660367f, 0.437410f, 1.495935f, 1.142716f, 1.770843f, +0.131355f, 0.340239f, 0.060152f, 0.408707f, 0.211812f, 0.981013f, 0.629559f, 0.980714f, 0.104025f, 0.389698f, 0.183666f, 0.477133f, 0.359900f, 1.458660f, 1.010979f, 1.275406f, +0.303413f, 0.508870f, 0.083111f, 0.839097f, 0.323426f, 0.969911f, 0.575017f, 1.330996f, 0.231583f, 0.561735f, 0.244579f, 0.944105f, 0.590709f, 1.550170f, 0.992556f, 1.860591f, +0.049383f, 0.093420f, 0.015199f, 0.138431f, 0.068170f, 0.230594f, 0.136185f, 0.284369f, 0.014074f, 0.038507f, 0.016702f, 0.058160f, 0.098539f, 0.291682f, 0.186044f, 0.314608f, +0.063641f, 0.173338f, 0.027864f, 0.283856f, 0.077126f, 0.375614f, 0.219172f, 0.511901f, 0.053157f, 0.209396f, 0.089733f, 0.349507f, 0.119438f, 0.509018f, 0.320778f, 0.606742f, +0.017417f, 0.056218f, 0.008199f, 0.067999f, 0.032187f, 0.185768f, 0.098351f, 0.187000f, 0.017740f, 0.082816f, 0.032200f, 0.102100f, 0.039733f, 0.200674f, 0.114743f, 0.176681f, +0.094305f, 0.197094f, 0.026557f, 0.327252f, 0.115208f, 0.430532f, 0.210572f, 0.594914f, 0.092576f, 0.279830f, 0.100514f, 0.473572f, 0.152869f, 0.499913f, 0.264068f, 0.604185f, +0.042800f, 0.098649f, 0.012270f, 0.124120f, 0.057314f, 0.236209f, 0.106642f, 0.247333f, 0.016300f, 0.054335f, 0.018016f, 0.069681f, 0.106445f, 0.383889f, 0.187182f, 0.351576f, +0.077523f, 0.257257f, 0.031613f, 0.357704f, 0.091135f, 0.540766f, 0.241215f, 0.625758f, 0.086523f, 0.415263f, 0.136037f, 0.588524f, 0.181333f, 0.941562f, 0.453598f, 0.952955f, +0.046758f, 0.183886f, 0.020503f, 0.188857f, 0.083824f, 0.589442f, 0.238561f, 0.503808f, 0.063640f, 0.361968f, 0.107589f, 0.378912f, 0.132950f, 0.818107f, 0.357598f, 0.611590f, +0.104789f, 0.266832f, 0.027484f, 0.376183f, 0.124182f, 0.565411f, 0.211402f, 0.663385f, 0.137457f, 0.506221f, 0.139003f, 0.727421f, 0.211713f, 0.843532f, 0.340623f, 0.865624f, +0.028620f, 0.065853f, 0.008730f, 0.080961f, 0.059323f, 0.244070f, 0.117453f, 0.249722f, 0.014943f, 0.049728f, 0.017575f, 0.062314f, 0.091850f, 0.330688f, 0.171868f, 0.295928f, +0.048077f, 0.159270f, 0.020862f, 0.216395f, 0.087485f, 0.518220f, 0.246393f, 0.585958f, 0.073567f, 0.352477f, 0.123079f, 0.488120f, 0.145117f, 0.752225f, 0.386267f, 0.743920f, +0.026616f, 0.104495f, 0.012419f, 0.104866f, 0.073857f, 0.518471f, 0.223666f, 0.433015f, 0.049666f, 0.282004f, 0.089345f, 0.288455f, 0.097658f, 0.599911f, 0.279505f, 0.438220f, +0.087869f, 0.223365f, 0.024524f, 0.307703f, 0.161182f, 0.732621f, 0.291973f, 0.839916f, 0.158025f, 0.580974f, 0.170044f, 0.815751f, 0.229086f, 0.911193f, 0.392194f, 0.913678f, +0.084303f, 0.132662f, 0.024081f, 0.201568f, 0.109748f, 0.308806f, 0.203473f, 0.390480f, 0.022095f, 0.050287f, 0.024334f, 0.077878f, 0.217283f, 0.535011f, 0.380723f, 0.591701f, +0.110006f, 0.249236f, 0.044699f, 0.418499f, 0.125722f, 0.509318f, 0.331569f, 0.711726f, 0.084497f, 0.276877f, 0.132377f, 0.473866f, 0.266666f, 0.945356f, 0.664671f, 1.155436f, +0.083680f, 0.224680f, 0.036561f, 0.278660f, 0.145837f, 0.700154f, 0.413562f, 0.722677f, 0.078381f, 0.304374f, 0.132036f, 0.384771f, 0.246577f, 1.035927f, 0.660850f, 0.935204f, +0.184553f, 0.320847f, 0.048232f, 0.546244f, 0.212618f, 0.660940f, 0.360658f, 0.936461f, 0.166607f, 0.418911f, 0.167879f, 0.726932f, 0.386416f, 1.051152f, 0.619479f, 1.302627f, +0.082191f, 0.135880f, 0.021841f, 0.201437f, 0.120806f, 0.357114f, 0.208364f, 0.440586f, 0.020347f, 0.048650f, 0.020847f, 0.073510f, 0.145371f, 0.376049f, 0.236966f, 0.405783f, +0.114369f, 0.272230f, 0.043233f, 0.445994f, 0.147576f, 0.628097f, 0.362081f, 0.856367f, 0.082975f, 0.285646f, 0.120933f, 0.476986f, 0.190254f, 0.708587f, 0.441164f, 0.844993f, +0.026203f, 0.073913f, 0.010650f, 0.089441f, 0.051559f, 0.260052f, 0.136020f, 0.261891f, 0.023182f, 0.094575f, 0.036329f, 0.116649f, 0.052984f, 0.233860f, 0.132107f, 0.205988f, +0.153639f, 0.280614f, 0.037355f, 0.466130f, 0.199845f, 0.652658f, 0.315366f, 0.902242f, 0.131005f, 0.346058f, 0.122805f, 0.585909f, 0.220754f, 0.630885f, 0.329235f, 0.762806f, +0.091054f, 0.183407f, 0.022537f, 0.230862f, 0.129826f, 0.467585f, 0.208558f, 0.489820f, 0.030120f, 0.087745f, 0.028743f, 0.112575f, 0.200723f, 0.632626f, 0.304747f, 0.579628f, +0.178077f, 0.516434f, 0.062697f, 0.718391f, 0.222899f, 1.155846f, 0.509367f, 1.338092f, 0.172635f, 0.724085f, 0.234347f, 1.026643f, 0.369212f, 1.675388f, 0.797393f, 1.696398f, +0.089918f, 0.309030f, 0.034041f, 0.317522f, 0.171631f, 1.054718f, 0.421725f, 0.901881f, 0.106300f, 0.528373f, 0.155158f, 0.553347f, 0.226617f, 1.218655f, 0.526260f, 0.911423f, +0.218218f, 0.485602f, 0.049416f, 0.684905f, 0.275345f, 1.095596f, 0.404698f, 1.285999f, 0.248633f, 0.800205f, 0.217081f, 1.150365f, 0.390788f, 1.360702f, 0.542838f, 1.396947f, +0.073640f, 0.148078f, 0.019394f, 0.182129f, 0.162523f, 0.584348f, 0.277815f, 0.598140f, 0.033397f, 0.097126f, 0.033912f, 0.121761f, 0.209481f, 0.659099f, 0.338425f, 0.590077f, +0.133571f, 0.386700f, 0.050041f, 0.525625f, 0.258791f, 1.339668f, 0.629283f, 1.515439f, 0.177530f, 0.743341f, 0.256435f, 1.029849f, 0.357362f, 1.618849f, 0.821262f, 1.601673f, +0.061905f, 0.212392f, 0.024938f, 0.213239f, 0.182900f, 1.122048f, 0.478215f, 0.937518f, 0.100335f, 0.497872f, 0.155836f, 0.509483f, 0.201328f, 1.080811f, 0.497494f, 0.789849f, +0.221311f, 0.491643f, 0.053328f, 0.677572f, 0.432242f, 1.716949f, 0.676015f, 1.969259f, 0.345709f, 1.110734f, 0.321180f, 1.560269f, 0.511428f, 1.777723f, 0.755944f, 1.783347f, +0.145755f, 0.200444f, 0.035946f, 0.304689f, 0.202032f, 0.496792f, 0.323393f, 0.628460f, 0.033182f, 0.065996f, 0.031551f, 0.102251f, 0.332984f, 0.716518f, 0.503744f, 0.792787f, +0.205363f, 0.406614f, 0.072046f, 0.683055f, 0.249895f, 0.884717f, 0.569015f, 1.236851f, 0.137014f, 0.392354f, 0.185326f, 0.671792f, 0.441257f, 1.367056f, 0.949582f, 1.671574f, +0.130777f, 0.306860f, 0.049332f, 0.380751f, 0.242671f, 1.018154f, 0.594149f, 1.051364f, 0.106399f, 0.361079f, 0.154748f, 0.456652f, 0.341571f, 1.254078f, 0.790376f, 1.132637f, +0.312336f, 0.474532f, 0.070476f, 0.808244f, 0.383128f, 1.040811f, 0.561102f, 1.475330f, 0.244912f, 0.538155f, 0.213067f, 0.934261f, 0.579661f, 1.378007f, 0.802321f, 1.708422f, +0.232215f, 0.304297f, 0.070849f, 0.573750f, 0.280382f, 0.656966f, 0.555233f, 1.030879f, 0.047375f, 0.089786f, 0.055729f, 0.172552f, 0.306063f, 0.627554f, 0.572810f, 0.861276f, +0.364753f, 0.688174f, 0.158306f, 1.433946f, 0.386634f, 1.304318f, 1.089130f, 2.261821f, 0.218086f, 0.595086f, 0.364935f, 1.263859f, 0.452157f, 1.334816f, 1.203772f, 2.024522f, +0.079495f, 0.177742f, 0.037098f, 0.273559f, 0.128497f, 0.513719f, 0.389211f, 0.658003f, 0.057961f, 0.187429f, 0.104288f, 0.294024f, 0.119788f, 0.419076f, 0.342909f, 0.469484f, +0.515465f, 0.746243f, 0.143890f, 1.576591f, 0.550788f, 1.425774f, 0.997923f, 2.506857f, 0.362222f, 0.758418f, 0.389847f, 1.633171f, 0.551914f, 1.250218f, 0.945059f, 1.922610f, +0.186872f, 0.298355f, 0.053103f, 0.477652f, 0.218877f, 0.624846f, 0.403698f, 0.832512f, 0.050943f, 0.117633f, 0.055815f, 0.191951f, 0.306978f, 0.766885f, 0.535106f, 0.893663f, +0.412547f, 0.948317f, 0.166765f, 1.677803f, 0.424198f, 1.743546f, 1.112962f, 2.567206f, 0.329598f, 1.095766f, 0.513694f, 1.976010f, 0.637391f, 2.292554f, 1.580496f, 2.952388f, +0.198162f, 0.539819f, 0.086132f, 0.705444f, 0.310717f, 1.513487f, 0.876574f, 1.646012f, 0.193062f, 0.760638f, 0.323540f, 1.013156f, 0.372162f, 1.586332f, 0.992272f, 1.508950f, +0.531820f, 0.938052f, 0.138270f, 1.682746f, 0.551245f, 1.738567f, 0.930227f, 2.595513f, 0.499370f, 1.273907f, 0.500581f, 2.329235f, 0.709708f, 1.958733f, 1.131877f, 2.557604f, +0.040615f, 0.064734f, 0.012281f, 0.101267f, 0.073634f, 0.209851f, 0.144515f, 0.273202f, 0.015180f, 0.034992f, 0.017697f, 0.055794f, 0.086096f, 0.214715f, 0.159695f, 0.244490f, +0.083158f, 0.190828f, 0.035769f, 0.329901f, 0.132354f, 0.543073f, 0.369508f, 0.781342f, 0.091087f, 0.302304f, 0.151060f, 0.532686f, 0.165794f, 0.595303f, 0.437452f, 0.749113f, +0.036663f, 0.099704f, 0.016957f, 0.127316f, 0.088984f, 0.432695f, 0.267122f, 0.459824f, 0.048972f, 0.192612f, 0.087327f, 0.250690f, 0.088853f, 0.378086f, 0.252084f, 0.351420f, +0.144946f, 0.255226f, 0.040100f, 0.447375f, 0.232553f, 0.732195f, 0.417582f, 1.068103f, 0.186596f, 0.475198f, 0.199035f, 0.848995f, 0.249604f, 0.687708f, 0.423591f, 0.877439f, +0.339413f, 0.369974f, 0.096106f, 0.715280f, 0.386473f, 0.753265f, 0.710264f, 1.211969f, 0.063678f, 0.100389f, 0.069518f, 0.197822f, 0.577822f, 0.985533f, 1.003622f, 1.386889f, +0.539818f, 0.847192f, 0.217432f, 1.810073f, 0.539608f, 1.514251f, 1.410698f, 2.692478f, 0.296810f, 0.673700f, 0.460938f, 1.467117f, 0.864336f, 2.122515f, 2.135570f, 3.300893f, +0.327014f, 0.608204f, 0.141629f, 0.959821f, 0.498480f, 1.657738f, 1.401248f, 2.177194f, 0.219262f, 0.589793f, 0.366132f, 0.948691f, 0.636475f, 1.852244f, 1.690923f, 2.127677f, +0.863685f, 1.040094f, 0.223750f, 2.253154f, 0.870305f, 1.874017f, 1.463389f, 3.378562f, 0.558128f, 0.972084f, 0.557480f, 2.146378f, 1.194465f, 2.250733f, 1.898178f, 3.549023f, +0.217452f, 0.396849f, 0.070150f, 0.569557f, 0.298189f, 0.973059f, 0.624358f, 1.162226f, 0.048107f, 0.126977f, 0.059836f, 0.185746f, 0.359183f, 1.025681f, 0.710777f, 1.071495f, +0.371301f, 0.975620f, 0.170390f, 1.547399f, 0.446988f, 2.100075f, 1.331354f, 2.772019f, 0.240736f, 0.914849f, 0.425939f, 1.478954f, 0.576832f, 2.371577f, 1.623763f, 2.737947f, +0.081172f, 0.252762f, 0.040053f, 0.296114f, 0.149014f, 0.829690f, 0.477240f, 0.808917f, 0.064179f, 0.289032f, 0.122097f, 0.345126f, 0.153289f, 0.746874f, 0.463976f, 0.636887f, +0.506827f, 1.021872f, 0.149592f, 1.643319f, 0.615055f, 2.217354f, 1.178267f, 2.967568f, 0.386209f, 1.126189f, 0.439501f, 1.845955f, 0.680088f, 2.145532f, 1.231319f, 2.511465f, +0.167236f, 0.371856f, 0.050249f, 0.453147f, 0.222461f, 0.884469f, 0.433839f, 0.896988f, 0.049438f, 0.158985f, 0.057272f, 0.197471f, 0.344292f, 1.197855f, 0.634566f, 1.062514f, +0.401342f, 1.284841f, 0.171539f, 1.730309f, 0.468681f, 2.682858f, 1.300191f, 3.006850f, 0.347706f, 1.609905f, 0.572993f, 2.209828f, 0.777105f, 3.892679f, 2.037441f, 3.815831f, +0.193374f, 0.733640f, 0.088871f, 0.729766f, 0.344360f, 2.336048f, 1.027198f, 1.933850f, 0.204297f, 1.120984f, 0.362002f, 1.136539f, 0.455139f, 2.701854f, 1.283101f, 1.956272f, +0.499734f, 1.227598f, 0.137379f, 1.676233f, 0.588284f, 2.583980f, 1.049661f, 2.936348f, 0.508842f, 1.807813f, 0.539328f, 2.516030f, 0.835770f, 3.212459f, 1.409367f, 3.192878f, +0.076003f, 0.168708f, 0.024300f, 0.200888f, 0.156493f, 0.621127f, 0.324746f, 0.615516f, 0.030804f, 0.098891f, 0.037972f, 0.120021f, 0.201911f, 0.701286f, 0.395992f, 0.607828f, +0.169162f, 0.540625f, 0.076936f, 0.711419f, 0.305777f, 1.747358f, 0.902630f, 1.913599f, 0.200928f, 0.928723f, 0.352333f, 1.245660f, 0.422669f, 2.113617f, 1.179183f, 2.024520f, +0.074811f, 0.283340f, 0.036585f, 0.275400f, 0.206214f, 1.396509f, 0.654537f, 1.129640f, 0.108360f, 0.593558f, 0.204312f, 0.588035f, 0.227218f, 1.346535f, 0.681608f, 0.952665f, +0.284799f, 0.698414f, 0.083310f, 0.931850f, 0.518948f, 2.275533f, 0.985284f, 2.526717f, 0.397577f, 1.410098f, 0.448401f, 1.917638f, 0.614634f, 2.358442f, 1.102885f, 2.290473f, +0.327938f, 0.497841f, 0.098182f, 0.732625f, 0.424083f, 1.151158f, 0.824081f, 1.409827f, 0.066718f, 0.146485f, 0.077014f, 0.219719f, 0.699666f, 1.661970f, 1.284945f, 1.780248f, +0.566978f, 1.239242f, 0.241468f, 2.015380f, 0.643673f, 2.515593f, 1.779259f, 3.404719f, 0.338052f, 1.068630f, 0.555093f, 1.771382f, 1.137717f, 3.890971f, 2.972237f, 4.606015f, +0.344527f, 0.892405f, 0.157771f, 1.071988f, 0.596449f, 2.762468f, 1.772795f, 2.761627f, 0.250500f, 0.938424f, 0.442283f, 1.148976f, 0.840372f, 3.405995f, 2.360653f, 2.978093f, +0.876208f, 1.469535f, 0.240012f, 2.423176f, 1.002748f, 3.007109f, 1.782781f, 4.126619f, 0.614006f, 1.489353f, 0.648464f, 2.503150f, 1.518653f, 3.985330f, 2.551761f, 4.783391f, +0.200458f, 0.379219f, 0.061699f, 0.561933f, 0.297092f, 1.004945f, 0.593502f, 1.239300f, 0.052453f, 0.143512f, 0.062246f, 0.216754f, 0.356099f, 1.054075f, 0.672324f, 1.136926f, +0.291382f, 0.793636f, 0.127576f, 1.299648f, 0.379115f, 1.846349f, 1.077353f, 2.516275f, 0.223448f, 0.880215f, 0.377201f, 1.469188f, 0.486834f, 2.074782f, 1.307505f, 2.473109f, +0.071588f, 0.231071f, 0.033702f, 0.279496f, 0.142035f, 0.819762f, 0.434005f, 0.825199f, 0.066945f, 0.312520f, 0.121513f, 0.385295f, 0.145390f, 0.734303f, 0.419864f, 0.646507f, +0.426776f, 0.891950f, 0.120182f, 1.480979f, 0.559748f, 2.091788f, 1.023086f, 2.890455f, 0.384646f, 1.162665f, 0.417627f, 1.967646f, 0.615885f, 2.014067f, 1.063886f, 2.434158f, +0.136214f, 0.313957f, 0.039049f, 0.395019f, 0.195832f, 0.807081f, 0.364375f, 0.845092f, 0.047627f, 0.158764f, 0.052641f, 0.203602f, 0.301587f, 1.087664f, 0.530338f, 0.996112f, +0.278280f, 0.923467f, 0.113480f, 1.284040f, 0.351224f, 2.084049f, 0.929615f, 2.411598f, 0.285154f, 1.368583f, 0.448338f, 1.939600f, 0.579485f, 3.008950f, 1.449563f, 3.045359f, +0.150681f, 0.592580f, 0.066071f, 0.608599f, 0.290009f, 2.039316f, 0.825358f, 1.743045f, 0.188288f, 1.070934f, 0.318317f, 1.121065f, 0.381416f, 2.347042f, 1.025901f, 1.754572f, +0.371800f, 0.946742f, 0.097517f, 1.334728f, 0.473039f, 2.153788f, 0.805283f, 2.526994f, 0.447768f, 1.649027f, 0.452807f, 2.369589f, 0.668733f, 2.664450f, 1.075919f, 2.734231f, +0.060976f, 0.140302f, 0.018600f, 0.172491f, 0.135693f, 0.558276f, 0.268657f, 0.571204f, 0.029230f, 0.097271f, 0.034377f, 0.121891f, 0.174213f, 0.627221f, 0.325984f, 0.561292f, +0.115533f, 0.382739f, 0.050133f, 0.520014f, 0.225707f, 1.336985f, 0.635682f, 1.511746f, 0.162309f, 0.777663f, 0.271547f, 1.076930f, 0.310454f, 1.609264f, 0.826357f, 1.591498f, +0.057420f, 0.225427f, 0.026791f, 0.226227f, 0.171061f, 1.200829f, 0.518033f, 1.002906f, 0.098370f, 0.558549f, 0.176961f, 0.571326f, 0.187557f, 1.152156f, 0.536802f, 0.841621f, +0.208710f, 0.530546f, 0.058249f, 0.730868f, 0.411025f, 1.868235f, 0.744552f, 2.141844f, 0.344609f, 1.266946f, 0.370818f, 1.778928f, 0.484415f, 1.926770f, 0.829316f, 1.932024f, +0.335278f, 0.527603f, 0.095771f, 0.801643f, 0.468600f, 1.318530f, 0.868781f, 1.667261f, 0.080677f, 0.183615f, 0.088852f, 0.284358f, 0.769304f, 1.894237f, 1.347973f, 2.094954f, +0.493463f, 1.118017f, 0.200511f, 1.877294f, 0.605469f, 2.452851f, 1.596819f, 3.427639f, 0.347994f, 1.140299f, 0.545183f, 1.951580f, 1.064921f, 3.775244f, 2.654338f, 4.614189f, +0.336980f, 0.904788f, 0.147231f, 1.122168f, 0.630512f, 3.027057f, 1.788000f, 3.124434f, 0.289793f, 1.125338f, 0.488168f, 1.422584f, 0.883990f, 3.713844f, 2.369176f, 3.352747f, +0.818275f, 1.422577f, 0.213853f, 2.421943f, 1.012098f, 3.146181f, 1.716794f, 4.457709f, 0.678208f, 1.705268f, 0.683387f, 2.959134f, 1.525264f, 4.149110f, 2.445210f, 5.141732f, +0.237338f, 0.392375f, 0.063070f, 0.581681f, 0.374522f, 1.107124f, 0.645969f, 1.365902f, 0.053942f, 0.128979f, 0.055268f, 0.194888f, 0.373709f, 0.966721f, 0.609177f, 1.043160f, +0.372505f, 0.886662f, 0.140813f, 1.452619f, 0.516040f, 2.196310f, 1.266115f, 2.994519f, 0.248122f, 0.854169f, 0.361629f, 1.426334f, 0.551657f, 2.054603f, 1.279188f, 2.450122f, +0.076615f, 0.216115f, 0.031141f, 0.261520f, 0.161850f, 0.816341f, 0.426986f, 0.822114f, 0.062231f, 0.253885f, 0.097526f, 0.313142f, 0.137920f, 0.608745f, 0.343878f, 0.536194f, +0.494612f, 0.903384f, 0.120256f, 1.500617f, 0.690717f, 2.255759f, 1.089989f, 3.118388f, 0.387207f, 1.022832f, 0.362972f, 1.731753f, 0.632679f, 1.808108f, 0.943585f, 2.186193f, +0.206145f, 0.415229f, 0.051022f, 0.522665f, 0.315556f, 1.136520f, 0.506925f, 1.190565f, 0.062606f, 0.182384f, 0.059744f, 0.233995f, 0.404559f, 1.275059f, 0.614220f, 1.168242f, +0.454734f, 1.318755f, 0.160103f, 1.834468f, 0.611086f, 3.168793f, 1.396447f, 3.668427f, 0.404737f, 1.697587f, 0.549417f, 2.406923f, 0.839337f, 3.808699f, 1.812733f, 3.856463f, +0.206129f, 0.708426f, 0.078035f, 0.727893f, 0.422411f, 2.595821f, 1.037930f, 2.219667f, 0.223728f, 1.112060f, 0.326558f, 1.164622f, 0.462485f, 2.487063f, 1.074006f, 1.860057f, +0.550783f, 1.225658f, 0.124725f, 1.728700f, 0.746123f, 2.968821f, 1.096641f, 3.484771f, 0.576159f, 1.854316f, 0.503042f, 2.665742f, 0.878096f, 3.057484f, 1.219752f, 3.138925f, +0.111610f, 0.224427f, 0.029394f, 0.276036f, 0.264450f, 0.950827f, 0.452050f, 0.973268f, 0.046471f, 0.135149f, 0.047189f, 0.169429f, 0.282645f, 0.889300f, 0.456625f, 0.796170f, +0.228336f, 0.661055f, 0.085544f, 0.898543f, 0.474959f, 2.458696f, 1.154924f, 2.781288f, 0.278630f, 1.166661f, 0.402470f, 1.616330f, 0.543856f, 2.463662f, 1.249847f, 2.437524f, +0.095002f, 0.325946f, 0.038270f, 0.327246f, 0.301347f, 1.848688f, 0.787907f, 1.544656f, 0.141369f, 0.701486f, 0.219568f, 0.717845f, 0.275057f, 1.476622f, 0.679685f, 1.079105f, +0.373944f, 0.830716f, 0.090107f, 1.144875f, 0.784104f, 3.114618f, 1.226320f, 3.572318f, 0.536299f, 1.723084f, 0.498247f, 2.420448f, 0.769306f, 2.674106f, 1.137115f, 2.682567f, +0.412362f, 0.567086f, 0.101697f, 0.862008f, 0.613648f, 1.508949f, 0.982268f, 1.908874f, 0.086188f, 0.171423f, 0.081953f, 0.265592f, 0.838669f, 1.804656f, 1.268752f, 1.996750f, +0.655321f, 1.297525f, 0.229900f, 2.179659f, 0.856121f, 3.030970f, 1.949401f, 4.237352f, 0.401411f, 1.149487f, 0.542954f, 1.968163f, 1.253534f, 3.883570f, 2.697599f, 4.748653f, +0.374635f, 0.879060f, 0.141320f, 1.090733f, 0.746346f, 3.131377f, 1.827332f, 3.233517f, 0.279840f, 0.949670f, 0.407000f, 1.201038f, 0.871105f, 3.198261f, 2.015686f, 2.888552f, +0.985132f, 1.496711f, 0.222286f, 2.549266f, 1.297360f, 3.524431f, 1.900023f, 4.995813f, 0.709213f, 1.558379f, 0.616996f, 2.705417f, 1.627642f, 3.869332f, 2.252851f, 4.797109f, +0.167408f, 0.219374f, 0.051077f, 0.413628f, 0.158854f, 0.372213f, 0.314575f, 0.584059f, 0.033769f, 0.064000f, 0.039724f, 0.122995f, 0.227141f, 0.465732f, 0.425104f, 0.639186f, +0.224094f, 0.422794f, 0.097259f, 0.880975f, 0.186677f, 0.629761f, 0.525862f, 1.092070f, 0.132477f, 0.361487f, 0.221681f, 0.767736f, 0.285968f, 0.844209f, 0.761329f, 1.280415f, +0.050903f, 0.113812f, 0.023755f, 0.175166f, 0.064663f, 0.258514f, 0.195859f, 0.331121f, 0.036696f, 0.118664f, 0.066026f, 0.186150f, 0.078960f, 0.276241f, 0.226034f, 0.309468f, +0.311615f, 0.451127f, 0.086986f, 0.953098f, 0.261677f, 0.677377f, 0.474108f, 1.190994f, 0.216509f, 0.453325f, 0.233021f, 0.976187f, 0.343469f, 0.778041f, 0.588133f, 1.196486f, +0.169161f, 0.270078f, 0.048070f, 0.432381f, 0.155710f, 0.444519f, 0.287193f, 0.592253f, 0.045596f, 0.105285f, 0.049956f, 0.171802f, 0.286063f, 0.714633f, 0.498647f, 0.832774f, +0.318253f, 0.731564f, 0.128648f, 1.294314f, 0.257175f, 1.057045f, 0.674747f, 1.556399f, 0.251401f, 0.835794f, 0.391819f, 1.507199f, 0.506177f, 1.820607f, 1.255134f, 2.344607f, +0.159326f, 0.434024f, 0.069251f, 0.567189f, 0.196333f, 0.956326f, 0.553880f, 1.040064f, 0.153478f, 0.604681f, 0.257203f, 0.805424f, 0.308032f, 1.312978f, 0.821286f, 1.248930f, +0.403693f, 0.712055f, 0.104958f, 1.277336f, 0.328846f, 1.037146f, 0.554929f, 1.548358f, 0.374794f, 0.956109f, 0.375702f, 1.748166f, 0.554580f, 1.530594f, 0.884472f, 1.998564f, +0.050982f, 0.081258f, 0.015416f, 0.127115f, 0.072640f, 0.207016f, 0.142563f, 0.269511f, 0.018840f, 0.043429f, 0.021965f, 0.069247f, 0.111253f, 0.277454f, 0.206357f, 0.315929f, +0.088957f, 0.204134f, 0.038264f, 0.352905f, 0.111269f, 0.456556f, 0.310642f, 0.656866f, 0.096341f, 0.319744f, 0.159774f, 0.563415f, 0.182574f, 0.655557f, 0.481729f, 0.824935f, +0.040876f, 0.111162f, 0.018905f, 0.141946f, 0.077968f, 0.379127f, 0.234052f, 0.402897f, 0.053984f, 0.212328f, 0.096266f, 0.276351f, 0.101979f, 0.433940f, 0.289324f, 0.403335f, +0.152569f, 0.268650f, 0.042209f, 0.470905f, 0.192374f, 0.605690f, 0.345435f, 0.883562f, 0.194199f, 0.494561f, 0.207145f, 0.883589f, 0.270465f, 0.745185f, 0.458993f, 0.950774f, +0.191110f, 0.208318f, 0.054113f, 0.402746f, 0.171016f, 0.333322f, 0.314294f, 0.536301f, 0.035451f, 0.055889f, 0.038702f, 0.110132f, 0.334925f, 0.571247f, 0.581732f, 0.803886f, +0.259028f, 0.406519f, 0.104333f, 0.868550f, 0.203488f, 0.571029f, 0.531979f, 1.015342f, 0.140819f, 0.319630f, 0.218687f, 0.696059f, 0.426952f, 1.048449f, 1.054898f, 1.630527f, +0.163543f, 0.304169f, 0.070830f, 0.480016f, 0.195918f, 0.651543f, 0.550734f, 0.855705f, 0.108421f, 0.291641f, 0.181045f, 0.469108f, 0.327676f, 0.953590f, 0.870537f, 1.095391f, +0.407795f, 0.491088f, 0.105645f, 1.063843f, 0.322939f, 0.695379f, 0.543010f, 1.253661f, 0.260557f, 0.453809f, 0.260255f, 1.002018f, 0.580575f, 1.093978f, 0.922617f, 1.725017f, +0.235787f, 0.430312f, 0.076065f, 0.617582f, 0.254104f, 0.829199f, 0.532051f, 0.990399f, 0.051576f, 0.136134f, 0.064150f, 0.199141f, 0.400933f, 1.144901f, 0.793395f, 1.196040f, +0.343106f, 0.901534f, 0.157451f, 1.429893f, 0.324607f, 1.525098f, 0.966844f, 2.013071f, 0.219951f, 0.835860f, 0.389163f, 1.351260f, 0.548717f, 2.255986f, 1.544620f, 2.604499f, +0.078177f, 0.243433f, 0.038575f, 0.285186f, 0.112787f, 0.627980f, 0.361216f, 0.612257f, 0.061114f, 0.275230f, 0.116267f, 0.328646f, 0.151977f, 0.740480f, 0.460004f, 0.631434f, +0.460839f, 0.929150f, 0.136019f, 1.494209f, 0.439506f, 1.584477f, 0.841966f, 2.120565f, 0.347211f, 1.012473f, 0.395122f, 1.659561f, 0.636579f, 2.008271f, 1.152545f, 2.350792f, +0.227697f, 0.506292f, 0.068415f, 0.616971f, 0.238035f, 0.946391f, 0.464212f, 0.959786f, 0.066553f, 0.214025f, 0.077099f, 0.265836f, 0.482559f, 1.678913f, 0.889407f, 1.489219f, +0.465676f, 1.490799f, 0.199037f, 2.007674f, 0.427374f, 2.446407f, 1.185601f, 2.741845f, 0.398900f, 1.846939f, 0.657358f, 2.535191f, 0.928212f, 4.649604f, 2.433618f, 4.557812f, +0.233849f, 0.887195f, 0.107472f, 0.882511f, 0.327273f, 2.220136f, 0.976229f, 1.837895f, 0.244277f, 1.340351f, 0.432843f, 1.358950f, 0.566603f, 3.363535f, 1.597332f, 2.435361f, +0.570553f, 1.401566f, 0.156848f, 1.913780f, 0.527845f, 2.318506f, 0.941820f, 2.634672f, 0.574412f, 2.040769f, 0.608826f, 2.840247f, 0.982295f, 3.775661f, 1.656455f, 3.752647f, +0.143494f, 0.318520f, 0.045878f, 0.379276f, 0.232197f, 0.921602f, 0.481845f, 0.913277f, 0.057502f, 0.184604f, 0.070883f, 0.224049f, 0.392428f, 1.362997f, 0.769636f, 1.181355f, +0.272175f, 0.869844f, 0.123787f, 1.144644f, 0.386644f, 2.209472f, 1.141344f, 2.419678f, 0.319645f, 1.477454f, 0.560508f, 1.981652f, 0.700073f, 3.500816f, 1.953098f, 3.353242f, +0.125452f, 0.475137f, 0.061350f, 0.461823f, 0.271763f, 1.840422f, 0.862597f, 1.488721f, 0.179665f, 0.984143f, 0.338757f, 0.974986f, 0.392241f, 2.324488f, 1.176643f, 1.644561f, +0.450890f, 1.105722f, 0.131895f, 1.475296f, 0.645681f, 2.831248f, 1.225903f, 3.143774f, 0.622354f, 2.207320f, 0.701912f, 3.001806f, 1.001723f, 3.843760f, 1.797468f, 3.732984f, +0.277727f, 0.421616f, 0.083149f, 0.620452f, 0.282253f, 0.766166f, 0.548476f, 0.938326f, 0.055866f, 0.122660f, 0.064488f, 0.183983f, 0.609979f, 1.448929f, 1.120234f, 1.552045f, +0.409200f, 0.894387f, 0.174272f, 1.454542f, 0.365087f, 1.426827f, 1.009184f, 1.931134f, 0.241233f, 0.762570f, 0.396112f, 1.264051f, 0.845282f, 2.890848f, 2.208263f, 3.422100f, +0.259155f, 0.671272f, 0.118676f, 0.806355f, 0.352591f, 1.633033f, 1.047988f, 1.632536f, 0.186306f, 0.697940f, 0.328942f, 0.854535f, 0.650737f, 2.637413f, 1.827958f, 2.306069f, +0.622250f, 1.043608f, 0.170448f, 1.720848f, 0.559643f, 1.678295f, 0.994986f, 2.303103f, 0.431134f, 1.045775f, 0.455330f, 1.757629f, 1.110232f, 2.913531f, 1.865500f, 3.496965f, +0.073437f, 0.138926f, 0.022603f, 0.205862f, 0.085535f, 0.289332f, 0.170874f, 0.356805f, 0.019000f, 0.051983f, 0.022547f, 0.078513f, 0.134295f, 0.397523f, 0.253553f, 0.428768f, +0.090970f, 0.247775f, 0.039830f, 0.405753f, 0.093018f, 0.453014f, 0.264335f, 0.617384f, 0.068976f, 0.271711f, 0.116437f, 0.453519f, 0.156464f, 0.666816f, 0.420220f, 0.794835f, +0.023294f, 0.075188f, 0.010966f, 0.090945f, 0.036321f, 0.209629f, 0.110983f, 0.211020f, 0.021538f, 0.100546f, 0.039094f, 0.123959f, 0.048701f, 0.245966f, 0.140640f, 0.216558f, +0.131106f, 0.274009f, 0.036920f, 0.454960f, 0.135138f, 0.505014f, 0.247000f, 0.697833f, 0.116834f, 0.353152f, 0.126851f, 0.597659f, 0.194770f, 0.636936f, 0.336447f, 0.769787f, +0.062659f, 0.144421f, 0.017963f, 0.181710f, 0.070796f, 0.291769f, 0.131726f, 0.305510f, 0.021662f, 0.072210f, 0.023942f, 0.092603f, 0.142814f, 0.515054f, 0.251137f, 0.471701f, +0.109090f, 0.362014f, 0.044486f, 0.503364f, 0.108205f, 0.642057f, 0.286397f, 0.742969f, 0.110526f, 0.530466f, 0.173777f, 0.751794f, 0.233854f, 1.214274f, 0.584977f, 1.228967f, +0.061564f, 0.242113f, 0.026995f, 0.248658f, 0.093120f, 0.654813f, 0.265018f, 0.559681f, 0.076063f, 0.432630f, 0.128592f, 0.452881f, 0.160423f, 0.987165f, 0.431493f, 0.737972f, +0.143417f, 0.365194f, 0.037616f, 0.514855f, 0.143400f, 0.652915f, 0.244119f, 0.766051f, 0.170776f, 0.628930f, 0.172698f, 0.903749f, 0.265548f, 1.058029f, 0.427238f, 1.085738f, +0.038895f, 0.089496f, 0.011865f, 0.110028f, 0.068023f, 0.279864f, 0.134678f, 0.286344f, 0.018435f, 0.061348f, 0.021682f, 0.076876f, 0.114397f, 0.411865f, 0.214058f, 0.368572f, +0.062804f, 0.208057f, 0.027252f, 0.282679f, 0.096424f, 0.571173f, 0.271570f, 0.645833f, 0.087238f, 0.417978f, 0.145951f, 0.578829f, 0.173730f, 0.900544f, 0.462429f, 0.890601f, +0.032532f, 0.127718f, 0.015179f, 0.128172f, 0.076166f, 0.534674f, 0.230656f, 0.446548f, 0.055105f, 0.312889f, 0.099130f, 0.320047f, 0.109390f, 0.671979f, 0.313082f, 0.490864f, +0.111638f, 0.283786f, 0.031157f, 0.390937f, 0.172781f, 0.785345f, 0.312986f, 0.900362f, 0.182254f, 0.670052f, 0.196115f, 0.940825f, 0.266737f, 1.060951f, 0.456652f, 1.063844f, +0.095932f, 0.150962f, 0.027403f, 0.229373f, 0.105372f, 0.296491f, 0.195358f, 0.374909f, 0.022824f, 0.051946f, 0.025137f, 0.080447f, 0.226598f, 0.557947f, 0.397045f, 0.617068f, +0.120326f, 0.272617f, 0.048892f, 0.457758f, 0.116027f, 0.470042f, 0.306000f, 0.656842f, 0.083899f, 0.274919f, 0.131440f, 0.470515f, 0.267312f, 0.947648f, 0.666282f, 1.158236f, +0.085640f, 0.229942f, 0.037417f, 0.285186f, 0.125929f, 0.604579f, 0.357108f, 0.624027f, 0.072818f, 0.282772f, 0.122666f, 0.357463f, 0.231268f, 0.971611f, 0.619821f, 0.877141f, +0.196332f, 0.341325f, 0.051311f, 0.581107f, 0.190843f, 0.593249f, 0.323722f, 0.840553f, 0.160893f, 0.404546f, 0.162122f, 0.702004f, 0.376734f, 1.024813f, 0.603957f, 1.269987f, +0.284882f, 0.470976f, 0.075704f, 0.698204f, 0.353294f, 1.044373f, 0.609356f, 1.288484f, 0.064019f, 0.153073f, 0.065592f, 0.231295f, 0.461774f, 1.194529f, 0.752730f, 1.288981f, +0.381043f, 0.906983f, 0.144040f, 1.485911f, 0.414845f, 1.765617f, 1.017831f, 2.407298f, 0.250951f, 0.863908f, 0.365752f, 1.442597f, 0.580909f, 2.163550f, 1.347017f, 2.580042f, +0.081681f, 0.230406f, 0.033200f, 0.278813f, 0.135607f, 0.683977f, 0.357753f, 0.688813f, 0.065599f, 0.267626f, 0.102804f, 0.330090f, 0.151368f, 0.668099f, 0.377407f, 0.588475f, +0.497844f, 0.909288f, 0.121042f, 1.510424f, 0.546375f, 1.784364f, 0.862210f, 2.466726f, 0.385350f, 1.017926f, 0.361231f, 1.723447f, 0.655556f, 1.873490f, 0.977705f, 2.265247f, +0.310698f, 0.625826f, 0.076900f, 0.787752f, 0.373770f, 1.346185f, 0.600442f, 1.410199f, 0.093297f, 0.271791f, 0.089031f, 0.348703f, 0.627690f, 1.978309f, 0.952988f, 1.812578f, +0.584073f, 1.693844f, 0.205640f, 2.356240f, 0.616841f, 3.198636f, 1.409598f, 3.702975f, 0.514002f, 2.155878f, 0.697741f, 3.056711f, 1.109797f, 5.035978f, 2.396851f, 5.099133f, +0.275940f, 0.948354f, 0.104464f, 0.974414f, 0.444398f, 2.730942f, 1.091957f, 2.335208f, 0.296128f, 1.471930f, 0.432235f, 1.541502f, 0.637341f, 3.427368f, 1.480065f, 2.563304f, +0.696110f, 1.549054f, 0.157635f, 2.184826f, 0.741087f, 2.948784f, 1.089240f, 3.461251f, 0.719983f, 2.317203f, 0.628614f, 3.331182f, 1.142450f, 3.977951f, 1.586963f, 4.083910f, +0.233262f, 0.469047f, 0.061434f, 0.576909f, 0.434357f, 1.561725f, 0.742488f, 1.598584f, 0.096031f, 0.279278f, 0.097513f, 0.350116f, 0.608109f, 1.913320f, 0.982425f, 1.712952f, +0.406685f, 1.177396f, 0.152362f, 1.600382f, 0.664817f, 3.441528f, 1.616591f, 3.893072f, 0.490677f, 2.054530f, 0.708763f, 2.846411f, 0.997164f, 4.517144f, 2.291604f, 4.469219f, +0.176354f, 0.605059f, 0.071042f, 0.607471f, 0.439622f, 2.696976f, 1.149446f, 2.253435f, 0.259470f, 1.287518f, 0.402999f, 1.317545f, 0.525621f, 2.821752f, 1.298844f, 2.062118f, +0.655359f, 1.455880f, 0.157917f, 2.006462f, 1.079961f, 4.289822f, 1.689033f, 4.920222f, 0.929315f, 2.985810f, 0.863377f, 4.194223f, 1.387937f, 4.824469f, 2.051517f, 4.839732f, +0.386584f, 0.531636f, 0.095340f, 0.808123f, 0.452113f, 1.111737f, 0.723698f, 1.406387f, 0.079890f, 0.158897f, 0.075965f, 0.246186f, 0.809384f, 1.741640f, 1.224449f, 1.927026f, +0.523556f, 1.036633f, 0.183675f, 1.741397f, 0.537534f, 1.903060f, 1.223974f, 2.660513f, 0.317089f, 0.908022f, 0.428899f, 1.554723f, 1.030964f, 3.194025f, 2.218629f, 3.905509f, +0.311950f, 0.731972f, 0.117674f, 0.908227f, 0.488403f, 2.049147f, 1.195791f, 2.115986f, 0.230393f, 0.781865f, 0.335084f, 0.988816f, 0.746697f, 2.741498f, 1.727814f, 2.476020f, +0.774447f, 1.176618f, 0.174747f, 2.004068f, 0.801530f, 2.177449f, 1.173864f, 3.086492f, 0.551260f, 1.211304f, 0.479582f, 2.102880f, 1.317207f, 3.131347f, 1.823172f, 3.882172f, +0.143729f, 0.188344f, 0.043852f, 0.355121f, 0.174452f, 0.408760f, 0.345462f, 0.641406f, 0.034164f, 0.064749f, 0.040189f, 0.124435f, 0.214310f, 0.439424f, 0.401091f, 0.603081f, +0.257552f, 0.485918f, 0.111780f, 1.012506f, 0.274433f, 0.925806f, 0.773065f, 1.605443f, 0.179416f, 0.489569f, 0.300227f, 1.039759f, 0.361188f, 1.066265f, 0.961586f, 1.617210f, +0.045585f, 0.101922f, 0.021273f, 0.156866f, 0.074070f, 0.296125f, 0.224354f, 0.379295f, 0.038724f, 0.125223f, 0.069676f, 0.196440f, 0.077709f, 0.271863f, 0.222451f, 0.304563f, +0.373768f, 0.541106f, 0.104336f, 1.143198f, 0.401475f, 1.039261f, 0.727396f, 1.827274f, 0.306017f, 0.640738f, 0.329356f, 1.379759f, 0.452744f, 1.025575f, 0.775248f, 1.577149f, +0.196620f, 0.313917f, 0.055873f, 0.502565f, 0.231501f, 0.660885f, 0.426982f, 0.880529f, 0.062451f, 0.144205f, 0.068423f, 0.235310f, 0.365400f, 0.912831f, 0.636943f, 1.063737f, +0.495183f, 1.138272f, 0.200169f, 2.013879f, 0.511838f, 2.103765f, 1.342902f, 3.097595f, 0.460942f, 1.532425f, 0.718399f, 2.763444f, 0.865521f, 3.113085f, 2.146174f, 4.009081f, +0.193163f, 0.526203f, 0.083959f, 0.687650f, 0.304468f, 1.483049f, 0.858945f, 1.612909f, 0.219266f, 0.863878f, 0.367453f, 1.150670f, 0.410409f, 1.749356f, 1.094246f, 1.664022f, +0.655533f, 1.156263f, 0.170435f, 2.074189f, 0.683040f, 2.154235f, 1.152631f, 3.216064f, 0.717169f, 1.829518f, 0.718908f, 3.345124f, 0.989666f, 2.731393f, 1.578368f, 3.566500f, +0.050651f, 0.080730f, 0.015316f, 0.126289f, 0.092311f, 0.263077f, 0.181169f, 0.342495f, 0.022057f, 0.050844f, 0.025714f, 0.081069f, 0.121468f, 0.302928f, 0.225304f, 0.344936f, +0.118308f, 0.271488f, 0.050889f, 0.469347f, 0.189286f, 0.776676f, 0.528452f, 1.117436f, 0.150985f, 0.501099f, 0.250397f, 0.882978f, 0.266844f, 0.958136f, 0.704076f, 1.205692f, +0.042359f, 0.115196f, 0.019592f, 0.147098f, 0.103349f, 0.502546f, 0.310245f, 0.534054f, 0.065923f, 0.259283f, 0.117555f, 0.337465f, 0.116138f, 0.494188f, 0.329494f, 0.459334f, +0.211764f, 0.372882f, 0.058586f, 0.653610f, 0.341540f, 1.075340f, 0.613283f, 1.568673f, 0.317628f, 0.808893f, 0.338802f, 1.445179f, 0.412551f, 1.136660f, 0.700121f, 1.450252f, +0.304007f, 0.331381f, 0.086080f, 0.640666f, 0.347974f, 0.678226f, 0.639509f, 1.091236f, 0.066453f, 0.104764f, 0.072548f, 0.206443f, 0.585502f, 0.998632f, 1.016962f, 1.405323f, +0.551587f, 0.865662f, 0.222172f, 1.849536f, 0.554264f, 1.555379f, 1.449014f, 2.765607f, 0.353358f, 0.802052f, 0.548754f, 1.746628f, 0.999145f, 2.453560f, 2.468651f, 3.815727f, +0.271360f, 0.504695f, 0.117526f, 0.796471f, 0.415814f, 1.382825f, 1.168869f, 1.816136f, 0.211988f, 0.570228f, 0.353986f, 0.917220f, 0.597503f, 1.738828f, 1.587385f, 1.997396f, +0.906275f, 1.091382f, 0.234784f, 2.364259f, 0.918010f, 1.976739f, 1.543603f, 3.563754f, 0.682350f, 1.188439f, 0.681558f, 2.624094f, 1.417936f, 2.671820f, 2.253307f, 4.213006f, +0.216654f, 0.395395f, 0.069892f, 0.567469f, 0.298654f, 0.974577f, 0.625332f, 1.164038f, 0.055845f, 0.147401f, 0.069460f, 0.215623f, 0.404855f, 1.156103f, 0.801157f, 1.207742f, +0.422030f, 1.108912f, 0.193669f, 1.758808f, 0.510721f, 2.399510f, 1.521182f, 3.167262f, 0.318807f, 1.211531f, 0.564070f, 1.958575f, 0.741728f, 3.049527f, 2.087939f, 3.520629f, +0.074927f, 0.233314f, 0.036971f, 0.273331f, 0.138270f, 0.769868f, 0.442831f, 0.750594f, 0.069022f, 0.310845f, 0.131312f, 0.371173f, 0.160073f, 0.779930f, 0.484511f, 0.665074f, +0.591580f, 1.192752f, 0.174608f, 1.918119f, 0.721671f, 2.601720f, 1.382513f, 3.481979f, 0.525224f, 1.531561f, 0.597699f, 2.510405f, 0.898044f, 2.833139f, 1.625936f, 3.316346f, +0.283246f, 0.629807f, 0.085105f, 0.767487f, 0.378755f, 1.505868f, 0.738639f, 1.527182f, 0.097558f, 0.313731f, 0.113017f, 0.389678f, 0.659687f, 2.295174f, 1.215873f, 2.035851f, +0.775457f, 2.482522f, 0.331442f, 3.343237f, 0.910317f, 5.210902f, 2.525356f, 5.840190f, 0.782753f, 3.624207f, 1.289918f, 4.974749f, 1.698644f, 8.508857f, 4.453563f, 8.340877f, +0.303428f, 1.151169f, 0.139449f, 1.145091f, 0.543176f, 3.684764f, 1.620250f, 3.050357f, 0.373498f, 2.049392f, 0.661815f, 2.077830f, 0.807941f, 4.796199f, 2.277699f, 3.472679f, +0.991561f, 2.435775f, 0.272585f, 3.325948f, 1.173382f, 5.153971f, 2.093639f, 5.856797f, 1.176341f, 4.179302f, 1.246818f, 5.816558f, 1.876060f, 7.211038f, 3.163620f, 7.167084f, +0.152575f, 0.338676f, 0.048781f, 0.403277f, 0.315802f, 1.253435f, 0.655339f, 1.242112f, 0.072048f, 0.231299f, 0.088813f, 0.280723f, 0.458553f, 1.592665f, 0.899322f, 1.380416f, +0.387404f, 1.238105f, 0.176194f, 1.629245f, 0.703942f, 4.022671f, 2.077985f, 4.405384f, 0.536131f, 2.478086f, 0.940121f, 3.323760f, 1.095066f, 5.476039f, 3.055071f, 5.245201f, +0.139136f, 0.526964f, 0.068042f, 0.512197f, 0.385535f, 2.610895f, 1.223714f, 2.111959f, 0.234808f, 1.286194f, 0.442727f, 1.274226f, 0.478075f, 2.833156f, 1.434128f, 2.004440f, +0.669786f, 1.642522f, 0.195927f, 2.191516f, 1.226856f, 5.379643f, 2.329333f, 5.973473f, 1.089404f, 3.863821f, 1.228668f, 5.254537f, 1.635289f, 6.274844f, 2.934322f, 6.094005f, +0.472823f, 0.717790f, 0.141559f, 1.056303f, 0.614653f, 1.668451f, 1.194396f, 2.043357f, 0.112077f, 0.246076f, 0.129373f, 0.369100f, 1.141239f, 2.710872f, 2.095900f, 2.903797f, +0.932575f, 2.038327f, 0.397171f, 3.314934f, 1.064278f, 4.159393f, 2.941906f, 5.629514f, 0.647845f, 2.047926f, 1.063782f, 3.394683f, 2.117051f, 7.240274f, 5.530705f, 8.570820f, +0.460208f, 1.192045f, 0.210746f, 1.431925f, 0.800896f, 3.709365f, 2.380460f, 3.708236f, 0.389859f, 1.460491f, 0.688335f, 1.788178f, 1.269935f, 5.146994f, 3.567318f, 4.500367f, +1.480004f, 2.482191f, 0.405405f, 4.092987f, 1.702625f, 5.105947f, 3.027089f, 7.006828f, 1.208362f, 2.931044f, 1.276176f, 4.926193f, 2.901970f, 7.615507f, 4.876121f, 9.140509f, +0.111484f, 0.210901f, 0.034313f, 0.312517f, 0.166093f, 0.561828f, 0.331805f, 0.692847f, 0.033988f, 0.092992f, 0.040334f, 0.140451f, 0.224047f, 0.663193f, 0.423006f, 0.715320f, +0.184869f, 0.503526f, 0.080941f, 0.824568f, 0.241792f, 1.177568f, 0.687116f, 1.604834f, 0.165176f, 0.650666f, 0.278832f, 1.086042f, 0.349430f, 1.489196f, 0.938476f, 1.775100f, +0.036885f, 0.119058f, 0.017365f, 0.144009f, 0.073567f, 0.424592f, 0.224791f, 0.427409f, 0.040188f, 0.187612f, 0.072947f, 0.231300f, 0.084748f, 0.428024f, 0.244738f, 0.376847f, +0.278060f, 0.581137f, 0.078303f, 0.964910f, 0.366608f, 1.370021f, 0.670072f, 1.893110f, 0.291990f, 0.882595f, 0.317026f, 1.493668f, 0.453959f, 1.484536f, 0.784173f, 1.794179f, +0.128777f, 0.296816f, 0.036917f, 0.373451f, 0.186111f, 0.767018f, 0.346287f, 0.803142f, 0.052461f, 0.174879f, 0.057984f, 0.224268f, 0.322559f, 1.163296f, 0.567216f, 1.065378f, +0.300130f, 0.995977f, 0.122391f, 1.384861f, 0.380788f, 2.259474f, 1.007866f, 2.614595f, 0.358324f, 1.719761f, 0.563382f, 2.437300f, 0.707048f, 3.671317f, 1.768658f, 3.715740f, +0.131977f, 0.519024f, 0.057870f, 0.533054f, 0.255343f, 1.795547f, 0.726699f, 1.534690f, 0.192146f, 1.092880f, 0.324840f, 1.144038f, 0.377937f, 2.325629f, 1.016542f, 1.738565f, +0.411789f, 1.048568f, 0.108006f, 1.478284f, 0.526664f, 2.397949f, 0.896572f, 2.813463f, 0.577813f, 2.127954f, 0.584315f, 3.057789f, 0.837909f, 3.338505f, 1.348106f, 3.425939f, +0.068327f, 0.157217f, 0.020843f, 0.193286f, 0.152850f, 0.628861f, 0.302625f, 0.643423f, 0.038162f, 0.126996f, 0.044883f, 0.159138f, 0.220849f, 0.795122f, 0.413247f, 0.711544f, +0.147690f, 0.489269f, 0.064086f, 0.664753f, 0.290043f, 1.718081f, 0.816878f, 1.942655f, 0.241744f, 1.158260f, 0.404444f, 1.603992f, 0.448974f, 2.327297f, 1.195067f, 2.301603f, +0.059610f, 0.234026f, 0.027813f, 0.234857f, 0.178517f, 1.253173f, 0.540614f, 1.046622f, 0.118984f, 0.675598f, 0.214044f, 0.691053f, 0.220277f, 1.353157f, 0.630451f, 0.988448f, +0.273985f, 0.696475f, 0.076467f, 0.959449f, 0.542403f, 2.465392f, 0.982539f, 2.826457f, 0.527082f, 1.937805f, 0.567170f, 2.720886f, 0.719415f, 2.861487f, 1.231635f, 2.869291f, +0.269833f, 0.424618f, 0.077077f, 0.645167f, 0.379110f, 1.066725f, 0.702866f, 1.348858f, 0.075651f, 0.172174f, 0.083316f, 0.266641f, 0.700435f, 1.724663f, 1.227302f, 1.907412f, +0.453061f, 1.026480f, 0.184094f, 1.723592f, 0.558812f, 2.263836f, 1.473769f, 3.163508f, 0.372257f, 1.219803f, 0.583195f, 2.087650f, 1.106111f, 3.921266f, 2.757004f, 4.792660f, +0.251258f, 0.674624f, 0.109778f, 0.836705f, 0.472585f, 2.268857f, 1.340152f, 2.341844f, 0.251751f, 0.977613f, 0.424086f, 1.235839f, 0.745661f, 3.132691f, 1.998441f, 2.828100f, +0.771505f, 1.341268f, 0.201630f, 2.283513f, 0.959254f, 2.981913f, 1.627157f, 4.224964f, 0.745027f, 1.873274f, 0.750715f, 3.250674f, 1.626911f, 4.425616f, 2.608164f, 5.484388f, +0.195092f, 0.322533f, 0.051844f, 0.478143f, 0.309472f, 0.914832f, 0.533773f, 1.128663f, 0.051662f, 0.123526f, 0.052932f, 0.186650f, 0.347525f, 0.898987f, 0.566494f, 0.970070f, +0.349315f, 0.831462f, 0.132046f, 1.362184f, 0.486451f, 2.070377f, 1.193518f, 2.822818f, 0.271093f, 0.933248f, 0.395108f, 1.558384f, 0.585237f, 2.179672f, 1.357055f, 2.599268f, +0.058346f, 0.164582f, 0.023715f, 0.199160f, 0.123903f, 0.624943f, 0.326875f, 0.629362f, 0.055217f, 0.225270f, 0.086534f, 0.277848f, 0.118824f, 0.524459f, 0.296265f, 0.461953f, +0.476306f, 0.869949f, 0.115805f, 1.445079f, 0.668641f, 2.183665f, 1.055153f, 3.018725f, 0.434445f, 1.147612f, 0.407253f, 1.943018f, 0.689261f, 1.969814f, 1.027973f, 2.381712f, +0.288053f, 0.580214f, 0.071295f, 0.730338f, 0.443250f, 1.596427f, 0.712058f, 1.672342f, 0.101927f, 0.296931f, 0.097266f, 0.380957f, 0.639530f, 2.015626f, 0.970964f, 1.846768f, +0.724885f, 2.102208f, 0.255218f, 2.924299f, 0.979232f, 5.077817f, 2.237729f, 5.878454f, 0.751715f, 3.152918f, 1.020429f, 4.470363f, 1.513656f, 6.868584f, 3.269072f, 6.954721f, +0.266848f, 0.917104f, 0.101022f, 0.942305f, 0.549706f, 3.378086f, 1.350715f, 2.888575f, 0.337453f, 1.677342f, 0.492554f, 1.756623f, 0.677332f, 3.642421f, 1.572932f, 2.724140f, +0.901632f, 2.006404f, 0.204175f, 2.829884f, 1.227810f, 4.885451f, 1.804618f, 5.734491f, 1.098905f, 3.536731f, 0.959450f, 5.084360f, 1.626185f, 5.662293f, 2.258914f, 5.813117f, +0.184850f, 0.371700f, 0.048683f, 0.457176f, 0.440284f, 1.583036f, 0.752620f, 1.620399f, 0.089675f, 0.260795f, 0.091059f, 0.326945f, 0.529589f, 1.666270f, 0.855573f, 1.491773f, +0.431422f, 1.249012f, 0.161629f, 1.697727f, 0.902104f, 4.669877f, 2.193584f, 5.282587f, 0.613375f, 2.568283f, 0.885995f, 3.558182f, 1.162497f, 5.266103f, 2.671560f, 5.210231f, +0.145772f, 0.500135f, 0.058722f, 0.502129f, 0.464815f, 2.851525f, 1.215314f, 2.382568f, 0.252734f, 1.254093f, 0.392537f, 1.283340f, 0.477468f, 2.563245f, 1.179854f, 1.873202f, +0.725559f, 1.611830f, 0.174833f, 2.221389f, 1.529368f, 6.074954f, 2.391894f, 6.967683f, 1.212391f, 3.895311f, 1.126369f, 5.471816f, 1.688669f, 5.869814f, 2.496031f, 5.888384f, +0.490516f, 0.674565f, 0.120972f, 1.025383f, 0.733780f, 1.804351f, 1.174563f, 2.282568f, 0.119451f, 0.237581f, 0.113581f, 0.368094f, 1.128612f, 2.428557f, 1.707382f, 2.687061f, +0.889283f, 1.760765f, 0.311979f, 2.957838f, 1.167866f, 4.134654f, 2.659248f, 5.780323f, 0.634665f, 1.817436f, 0.858456f, 3.111831f, 1.924425f, 5.962056f, 4.141355f, 7.290132f, +0.412864f, 0.968762f, 0.155741f, 1.202034f, 0.826820f, 3.469010f, 2.024360f, 3.582164f, 0.359317f, 1.219384f, 0.522591f, 1.542142f, 1.086046f, 3.987417f, 2.513047f, 3.601288f, +1.372833f, 2.085746f, 0.309767f, 3.552537f, 1.817421f, 4.937239f, 2.661668f, 6.998440f, 1.151513f, 2.530263f, 1.001786f, 4.392652f, 2.566027f, 6.100122f, 3.551689f, 7.562790f, +0.111522f, 0.146140f, 0.034026f, 0.275546f, 0.125423f, 0.293880f, 0.248371f, 0.461142f, 0.024782f, 0.046966f, 0.029151f, 0.090260f, 0.165108f, 0.338541f, 0.309008f, 0.464624f, +0.205238f, 0.387219f, 0.089075f, 0.806847f, 0.202635f, 0.683593f, 0.570813f, 1.185420f, 0.133657f, 0.364708f, 0.223656f, 0.774575f, 0.285782f, 0.843661f, 0.760835f, 1.279584f, +0.042457f, 0.094929f, 0.019814f, 0.146104f, 0.063923f, 0.255560f, 0.193620f, 0.327336f, 0.033717f, 0.109032f, 0.060667f, 0.171041f, 0.071864f, 0.251415f, 0.205720f, 0.281657f, +0.263729f, 0.381803f, 0.073619f, 0.806637f, 0.262482f, 0.679462f, 0.475567f, 1.194659f, 0.201855f, 0.422643f, 0.217250f, 0.910117f, 0.317189f, 0.718510f, 0.543133f, 1.104939f, +0.154149f, 0.246110f, 0.043804f, 0.394009f, 0.168171f, 0.480091f, 0.310175f, 0.639648f, 0.045771f, 0.105689f, 0.050148f, 0.172461f, 0.284440f, 0.710580f, 0.495819f, 0.828051f, +0.398709f, 0.916507f, 0.161171f, 1.621523f, 0.381862f, 1.569535f, 1.001886f, 2.310992f, 0.346955f, 1.153470f, 0.540746f, 2.080070f, 0.691953f, 2.488799f, 1.715788f, 3.205115f, +0.181783f, 0.495202f, 0.079013f, 0.647138f, 0.265495f, 1.293209f, 0.748995f, 1.406446f, 0.192903f, 0.760010f, 0.323273f, 1.012320f, 0.383491f, 1.634620f, 1.022477f, 1.554882f, +0.467356f, 0.824346f, 0.121510f, 1.478772f, 0.451215f, 1.423082f, 0.761425f, 2.124523f, 0.477982f, 1.219346f, 0.479141f, 2.229475f, 0.700569f, 1.933509f, 1.117301f, 2.524668f, +0.043223f, 0.068891f, 0.013070f, 0.107769f, 0.072990f, 0.208014f, 0.143250f, 0.270810f, 0.017595f, 0.040560f, 0.020514f, 0.064672f, 0.102919f, 0.256670f, 0.190899f, 0.292263f, +0.103685f, 0.237932f, 0.044599f, 0.411335f, 0.153711f, 0.630705f, 0.429133f, 0.907420f, 0.123701f, 0.410547f, 0.205148f, 0.723419f, 0.232203f, 0.833755f, 0.612676f, 1.049174f, +0.043390f, 0.117999f, 0.020068f, 0.150677f, 0.098091f, 0.476982f, 0.294463f, 0.506887f, 0.063127f, 0.248287f, 0.112570f, 0.323153f, 0.118120f, 0.502624f, 0.335118f, 0.467175f, +0.164330f, 0.289359f, 0.045463f, 0.507205f, 0.245579f, 0.773205f, 0.440971f, 1.127928f, 0.230421f, 0.586805f, 0.245781f, 1.048394f, 0.317872f, 0.875800f, 0.539445f, 1.117424f, +0.203324f, 0.221632f, 0.057572f, 0.428486f, 0.215643f, 0.420305f, 0.396311f, 0.676252f, 0.041549f, 0.065502f, 0.045359f, 0.129075f, 0.388815f, 0.663163f, 0.675335f, 0.933234f, +0.378875f, 0.594607f, 0.152606f, 1.270412f, 0.352762f, 0.989924f, 0.922227f, 1.760175f, 0.226900f, 0.515017f, 0.352368f, 1.121552f, 0.681426f, 1.673351f, 1.683644f, 2.602363f, +0.217855f, 0.405182f, 0.094353f, 0.639427f, 0.309317f, 1.028661f, 0.869503f, 1.350994f, 0.159100f, 0.427964f, 0.265672f, 0.688386f, 0.476289f, 1.386076f, 1.265356f, 1.592189f, +0.551194f, 0.663776f, 0.142795f, 1.437937f, 0.517340f, 1.113981f, 0.869890f, 2.008336f, 0.387962f, 0.675708f, 0.387512f, 1.491975f, 0.856270f, 1.613470f, 1.360736f, 2.544168f, +0.147639f, 0.269442f, 0.047628f, 0.386702f, 0.188576f, 0.615367f, 0.394847f, 0.734997f, 0.035576f, 0.093901f, 0.044249f, 0.137362f, 0.273932f, 0.782240f, 0.542077f, 0.817180f, +0.295361f, 0.776082f, 0.135541f, 1.230918f, 0.331190f, 1.556027f, 0.986451f, 2.053896f, 0.208581f, 0.792652f, 0.369046f, 1.281410f, 0.515423f, 2.119102f, 1.450900f, 2.446469f, +0.061290f, 0.190849f, 0.030242f, 0.223583f, 0.104800f, 0.583513f, 0.335639f, 0.568904f, 0.052781f, 0.237701f, 0.100414f, 0.283834f, 0.130011f, 0.633454f, 0.393516f, 0.540169f, +0.366596f, 0.739135f, 0.108202f, 1.188637f, 0.414378f, 1.493888f, 0.793829f, 1.999326f, 0.304268f, 0.887249f, 0.346253f, 1.454303f, 0.552561f, 1.743213f, 1.000428f, 2.040528f, +0.195027f, 0.433649f, 0.058599f, 0.528448f, 0.241642f, 0.960731f, 0.471245f, 0.974329f, 0.062796f, 0.201942f, 0.072746f, 0.250827f, 0.451001f, 1.569118f, 0.831243f, 1.391829f, +0.548359f, 1.755497f, 0.234377f, 2.364146f, 0.596463f, 3.414316f, 1.654678f, 3.826642f, 0.517451f, 2.395838f, 0.852720f, 3.288634f, 1.192665f, 5.974302f, 3.126969f, 5.856359f, +0.250785f, 0.951450f, 0.115256f, 0.946426f, 0.415979f, 2.821891f, 1.240831f, 2.336045f, 0.288584f, 1.583467f, 0.511353f, 1.605440f, 0.663034f, 3.935980f, 1.869184f, 2.849839f, +0.620854f, 1.525132f, 0.170676f, 2.082503f, 0.680759f, 2.990170f, 1.214663f, 3.397928f, 0.688558f, 2.446309f, 0.729811f, 3.404659f, 1.166342f, 4.483084f, 1.966814f, 4.455758f, +0.114348f, 0.253821f, 0.036559f, 0.302237f, 0.219302f, 0.870420f, 0.455085f, 0.862557f, 0.050478f, 0.162052f, 0.062224f, 0.196679f, 0.341226f, 1.185159f, 0.669218f, 1.027217f, +0.298184f, 0.952965f, 0.135615f, 1.254024f, 0.502042f, 2.868916f, 1.481992f, 3.141861f, 0.385769f, 1.783090f, 0.676458f, 2.391589f, 0.836891f, 4.184995f, 2.334800f, 4.008580f, +0.125170f, 0.474068f, 0.061212f, 0.460783f, 0.321371f, 2.176368f, 1.020053f, 1.760469f, 0.197473f, 1.081690f, 0.372334f, 1.071626f, 0.427035f, 2.530688f, 1.281020f, 1.790446f, +0.456477f, 1.119422f, 0.133529f, 1.493576f, 0.774747f, 3.397190f, 1.470950f, 3.772187f, 0.694079f, 2.461711f, 0.782807f, 3.347761f, 1.106588f, 4.246142f, 1.985635f, 4.123769f, +0.277729f, 0.421619f, 0.083150f, 0.620456f, 0.334531f, 0.908072f, 0.650062f, 1.112118f, 0.061543f, 0.135123f, 0.071040f, 0.202677f, 0.665592f, 1.581032f, 1.222369f, 1.693550f, +0.562578f, 1.229625f, 0.239594f, 1.999741f, 0.594891f, 2.324945f, 1.644415f, 3.146687f, 0.365349f, 1.154916f, 0.599914f, 1.914412f, 1.268058f, 4.336733f, 3.312747f, 5.133695f, +0.324483f, 0.840487f, 0.148593f, 1.009622f, 0.523237f, 2.423382f, 1.555189f, 2.422645f, 0.256970f, 0.962664f, 0.453707f, 1.178655f, 0.889055f, 3.603306f, 2.497407f, 3.150616f, +0.790542f, 1.325859f, 0.216546f, 2.186263f, 0.842684f, 2.527098f, 1.498204f, 3.467905f, 0.603387f, 1.463597f, 0.637250f, 2.459862f, 1.539089f, 4.038961f, 2.586100f, 4.847762f, +0.092237f, 0.174490f, 0.028389f, 0.258563f, 0.127329f, 0.430704f, 0.254366f, 0.531146f, 0.026288f, 0.071924f, 0.031196f, 0.108631f, 0.184052f, 0.544805f, 0.347495f, 0.587627f, +0.157084f, 0.427849f, 0.068776f, 0.700639f, 0.190368f, 0.927124f, 0.540981f, 1.263521f, 0.131206f, 0.516849f, 0.221487f, 0.862685f, 0.294807f, 1.256404f, 0.791772f, 1.497614f, +0.036632f, 0.118240f, 0.017246f, 0.143020f, 0.067697f, 0.390719f, 0.206857f, 0.393310f, 0.037312f, 0.174183f, 0.067725f, 0.214744f, 0.083569f, 0.422070f, 0.241334f, 0.371606f, +0.209204f, 0.437230f, 0.058913f, 0.725969f, 0.255574f, 0.955086f, 0.467129f, 1.319748f, 0.205370f, 0.620769f, 0.222979f, 1.050565f, 0.339123f, 1.108999f, 0.585804f, 1.340313f, +0.107653f, 0.248128f, 0.030861f, 0.312193f, 0.144160f, 0.594125f, 0.268231f, 0.622106f, 0.040998f, 0.136667f, 0.045314f, 0.175264f, 0.267736f, 0.965579f, 0.470810f, 0.884303f, +0.257676f, 0.855094f, 0.105079f, 1.188970f, 0.302923f, 1.797446f, 0.801772f, 2.079950f, 0.287593f, 1.380289f, 0.452173f, 1.956191f, 0.602730f, 3.129649f, 1.507709f, 3.167518f, +0.132436f, 0.520826f, 0.058070f, 0.534904f, 0.237417f, 1.669495f, 0.675683f, 1.426951f, 0.180249f, 1.025214f, 0.304728f, 1.073204f, 0.376559f, 2.317149f, 1.012835f, 1.732225f, +0.313042f, 0.797122f, 0.082106f, 1.123792f, 0.370976f, 1.689085f, 0.631534f, 1.981768f, 0.410632f, 1.512264f, 0.415253f, 2.173065f, 0.632461f, 2.519932f, 1.017562f, 2.585928f, +0.062172f, 0.143054f, 0.018965f, 0.175874f, 0.128869f, 0.530200f, 0.255146f, 0.542477f, 0.032461f, 0.108026f, 0.038178f, 0.135367f, 0.199528f, 0.718362f, 0.373353f, 0.642854f, +0.138016f, 0.457220f, 0.059888f, 0.621208f, 0.251144f, 1.487663f, 0.707323f, 1.682119f, 0.211189f, 1.011860f, 0.353324f, 1.401253f, 0.416589f, 2.159424f, 1.108864f, 2.135583f, +0.065108f, 0.255612f, 0.030378f, 0.256520f, 0.180668f, 1.268270f, 0.547127f, 1.059231f, 0.121491f, 0.689831f, 0.218554f, 0.705612f, 0.238889f, 1.467487f, 0.683718f, 1.071963f, +0.226708f, 0.576296f, 0.063272f, 0.793893f, 0.415859f, 1.890210f, 0.753310f, 2.167038f, 0.407714f, 1.498952f, 0.438723f, 2.104690f, 0.591056f, 2.350937f, 1.011886f, 2.357348f, +0.192431f, 0.302816f, 0.054967f, 0.460100f, 0.250512f, 0.704883f, 0.464448f, 0.891314f, 0.050435f, 0.114785f, 0.055545f, 0.177764f, 0.495972f, 1.221220f, 0.869042f, 1.350622f, +0.331828f, 0.751808f, 0.134833f, 1.262382f, 0.379233f, 1.536333f, 1.000160f, 2.146887f, 0.254880f, 0.835187f, 0.399308f, 1.429393f, 0.804385f, 2.851620f, 2.004947f, 3.485315f, +0.215088f, 0.577508f, 0.093974f, 0.716257f, 0.374852f, 1.799647f, 1.063002f, 1.857539f, 0.201467f, 0.782349f, 0.339381f, 0.988998f, 0.633792f, 2.662703f, 1.698621f, 2.403809f, +0.500332f, 0.869831f, 0.130760f, 1.480891f, 0.576417f, 1.791836f, 0.977761f, 2.538788f, 0.451678f, 1.135686f, 0.455127f, 1.970745f, 1.047592f, 2.849720f, 1.679436f, 3.531480f, +0.167099f, 0.276253f, 0.044405f, 0.409535f, 0.245606f, 0.726036f, 0.423617f, 0.895739f, 0.041366f, 0.098908f, 0.042382f, 0.149451f, 0.295548f, 0.764532f, 0.481768f, 0.824984f, +0.307274f, 0.731395f, 0.116154f, 1.198244f, 0.396490f, 1.687496f, 0.972797f, 2.300786f, 0.222928f, 0.767439f, 0.324910f, 1.281508f, 0.511153f, 1.903749f, 1.185266f, 2.270228f, +0.059987f, 0.169212f, 0.024383f, 0.204763f, 0.118036f, 0.595351f, 0.311398f, 0.599561f, 0.053072f, 0.216516f, 0.083171f, 0.267051f, 0.121300f, 0.535389f, 0.302439f, 0.471581f, +0.370987f, 0.677589f, 0.090199f, 1.125548f, 0.482558f, 1.575950f, 0.761504f, 2.178612f, 0.316333f, 0.835613f, 0.296534f, 1.414773f, 0.533047f, 1.523375f, 0.794993f, 1.841920f, +0.249289f, 0.502132f, 0.061701f, 0.632054f, 0.355437f, 1.280156f, 0.570991f, 1.341031f, 0.082462f, 0.240228f, 0.078692f, 0.308208f, 0.549541f, 1.732003f, 0.834338f, 1.586906f, +0.644281f, 1.868451f, 0.226838f, 2.599128f, 0.806446f, 4.181834f, 1.842881f, 4.841198f, 0.624592f, 2.619727f, 0.847864f, 3.714379f, 1.335802f, 6.061528f, 2.884957f, 6.137544f, +0.277210f, 0.952718f, 0.104945f, 0.978897f, 0.529127f, 3.251623f, 1.300150f, 2.780438f, 0.327715f, 1.628938f, 0.478341f, 1.705932f, 0.698644f, 3.757031f, 1.622425f, 2.809856f, +0.709576f, 1.579019f, 0.160684f, 2.227089f, 0.895332f, 3.562522f, 1.315946f, 4.181651f, 0.808475f, 2.602007f, 0.705876f, 3.740613f, 1.270715f, 4.424563f, 1.765134f, 4.542418f, +0.174125f, 0.350134f, 0.045859f, 0.430651f, 0.384290f, 1.381711f, 0.656904f, 1.414321f, 0.078968f, 0.229657f, 0.080187f, 0.287908f, 0.495325f, 1.558463f, 0.800218f, 1.395257f, +0.417370f, 1.208329f, 0.156365f, 1.642428f, 0.808647f, 4.186081f, 1.966330f, 4.735315f, 0.554730f, 2.322730f, 0.801285f, 3.217984f, 1.116655f, 5.058440f, 2.566210f, 5.004772f, +0.164829f, 0.565517f, 0.066399f, 0.567772f, 0.486992f, 2.987577f, 1.273299f, 2.496245f, 0.267153f, 1.325639f, 0.414931f, 1.356555f, 0.536057f, 2.877778f, 1.324633f, 2.103060f, +0.621519f, 1.380704f, 0.149763f, 1.902857f, 1.213884f, 4.821790f, 1.898485f, 5.530363f, 0.970871f, 3.119328f, 0.901985f, 4.381778f, 1.436267f, 4.992462f, 2.122953f, 5.008257f, +0.362139f, 0.498018f, 0.089311f, 0.757021f, 0.501963f, 1.234316f, 0.803493f, 1.561454f, 0.082442f, 0.163972f, 0.078391f, 0.254049f, 0.827323f, 1.780242f, 1.251588f, 1.969737f, +0.674276f, 1.335055f, 0.236550f, 2.242705f, 0.820492f, 2.904830f, 1.868273f, 4.061006f, 0.449862f, 1.288232f, 0.608490f, 2.205724f, 1.448798f, 4.488515f, 3.117806f, 5.488354f, +0.365885f, 0.858528f, 0.138020f, 1.065257f, 0.678941f, 2.848570f, 1.662299f, 2.941485f, 0.297682f, 1.010219f, 0.432949f, 1.277613f, 0.955640f, 3.508634f, 2.211297f, 3.168869f, +0.921677f, 1.400304f, 0.207968f, 2.385061f, 1.130577f, 3.071346f, 1.655764f, 4.353573f, 0.722715f, 1.588049f, 0.628743f, 2.756925f, 1.710532f, 4.066383f, 2.367581f, 5.041408f, +0.196952f, 0.258088f, 0.060090f, 0.486623f, 0.237805f, 0.557203f, 0.470918f, 0.874335f, 0.040181f, 0.076152f, 0.047266f, 0.146349f, 0.259586f, 0.532257f, 0.485826f, 0.730487f, +0.408822f, 0.771317f, 0.177433f, 1.607192f, 0.433346f, 1.461903f, 1.220716f, 2.535089f, 0.244435f, 0.666983f, 0.409026f, 1.416555f, 0.506785f, 1.496085f, 1.349209f, 2.269120f, +0.075923f, 0.169755f, 0.035431f, 0.261266f, 0.122723f, 0.490635f, 0.371721f, 0.628434f, 0.055357f, 0.179007f, 0.099602f, 0.280812f, 0.114405f, 0.400244f, 0.327500f, 0.448387f, +0.519248f, 0.751719f, 0.144946f, 1.588161f, 0.554830f, 1.436237f, 1.005246f, 2.525253f, 0.364880f, 0.763984f, 0.392708f, 1.645155f, 0.555964f, 1.259393f, 0.951994f, 1.936719f, +0.213435f, 0.340765f, 0.060652f, 0.545547f, 0.249989f, 0.713664f, 0.461081f, 0.950848f, 0.058185f, 0.134354f, 0.063749f, 0.219235f, 0.350614f, 0.875892f, 0.611168f, 1.020692f, +0.622672f, 1.431328f, 0.251704f, 2.532366f, 0.640257f, 2.631594f, 1.679832f, 3.874773f, 0.497474f, 1.653877f, 0.775336f, 2.982460f, 0.962037f, 3.460231f, 2.385498f, 4.456141f, +0.254860f, 0.694274f, 0.110776f, 0.907288f, 0.399621f, 1.946532f, 1.127382f, 2.116975f, 0.248302f, 0.978275f, 0.416112f, 1.303044f, 0.478647f, 2.040219f, 1.276184f, 1.940696f, +0.721424f, 1.272485f, 0.187566f, 2.282677f, 0.747774f, 2.358400f, 1.261870f, 3.520862f, 0.677405f, 1.728079f, 0.679048f, 3.159651f, 0.962733f, 2.657059f, 1.535413f, 3.469439f, +0.040064f, 0.063856f, 0.012114f, 0.099892f, 0.072635f, 0.207003f, 0.142553f, 0.269494f, 0.014974f, 0.034517f, 0.017457f, 0.055036f, 0.084927f, 0.211800f, 0.157527f, 0.241171f, +0.108401f, 0.248754f, 0.046627f, 0.430044f, 0.172530f, 0.707925f, 0.481674f, 1.018521f, 0.118736f, 0.394070f, 0.196915f, 0.694385f, 0.216121f, 0.776010f, 0.570243f, 0.976509f, +0.040724f, 0.110749f, 0.018835f, 0.141420f, 0.098841f, 0.480627f, 0.296713f, 0.510761f, 0.054396f, 0.213949f, 0.097001f, 0.278460f, 0.098696f, 0.419969f, 0.280009f, 0.390349f, +0.169814f, 0.299016f, 0.046980f, 0.524132f, 0.272453f, 0.857820f, 0.489228f, 1.251361f, 0.218611f, 0.556729f, 0.233184f, 0.994660f, 0.292429f, 0.805700f, 0.496267f, 1.027985f, +0.351802f, 0.383479f, 0.099614f, 0.741389f, 0.400580f, 0.780760f, 0.736190f, 1.256209f, 0.066003f, 0.104054f, 0.072056f, 0.205043f, 0.598914f, 1.021507f, 1.040256f, 1.437513f, +0.739405f, 1.160425f, 0.297823f, 2.479312f, 0.739117f, 2.074116f, 1.932276f, 3.687969f, 0.406550f, 0.922787f, 0.631360f, 2.009555f, 1.183907f, 2.907274f, 2.925156f, 4.521334f, +0.381679f, 0.709874f, 0.165305f, 1.120270f, 0.581809f, 1.934855f, 1.635488f, 2.541145f, 0.255915f, 0.688386f, 0.427337f, 1.107279f, 0.742871f, 2.161874f, 1.973586f, 2.483351f, +1.063239f, 1.280407f, 0.275448f, 2.773744f, 1.071389f, 2.307007f, 1.801504f, 4.159177f, 0.687083f, 1.196683f, 0.686286f, 2.642297f, 1.470446f, 2.770763f, 2.336752f, 4.369022f, +0.211295f, 0.385613f, 0.068163f, 0.553430f, 0.289746f, 0.945507f, 0.606680f, 1.129318f, 0.046745f, 0.123382f, 0.058141f, 0.180487f, 0.349013f, 0.996639f, 0.690652f, 1.041156f, +0.476779f, 1.252770f, 0.218794f, 1.986977f, 0.573966f, 2.696654f, 1.709558f, 3.559481f, 0.309124f, 1.174735f, 0.546938f, 1.899089f, 0.740696f, 3.045283f, 2.085034f, 3.515730f, +0.088817f, 0.276566f, 0.043825f, 0.324002f, 0.163048f, 0.907828f, 0.522186f, 0.885099f, 0.070223f, 0.316252f, 0.133596f, 0.377629f, 0.167725f, 0.817213f, 0.507672f, 0.696867f, +0.584912f, 1.179308f, 0.172640f, 1.896500f, 0.709814f, 2.558975f, 1.359799f, 3.424772f, 0.445710f, 1.299697f, 0.507213f, 2.130355f, 0.784867f, 2.476087f, 1.421024f, 2.898398f, +0.218830f, 0.486576f, 0.065751f, 0.592946f, 0.291092f, 1.157335f, 0.567681f, 1.173716f, 0.064690f, 0.208033f, 0.074941f, 0.258393f, 0.450508f, 1.567402f, 0.830334f, 1.390307f, +0.693993f, 2.221726f, 0.296623f, 2.992021f, 0.810436f, 4.639153f, 2.248269f, 5.199394f, 0.601247f, 2.783821f, 0.990810f, 3.821197f, 1.343757f, 6.731155f, 3.523108f, 6.598270f, +0.284929f, 1.080988f, 0.130948f, 1.075281f, 0.507401f, 3.442072f, 1.513534f, 2.849449f, 0.301024f, 1.651725f, 0.533396f, 1.674645f, 0.670629f, 3.981072f, 1.890598f, 2.882488f, +0.776640f, 1.907821f, 0.213502f, 2.605048f, 0.914257f, 4.015786f, 1.631287f, 4.563404f, 0.790796f, 2.809538f, 0.838174f, 3.910185f, 1.298877f, 4.992511f, 2.190310f, 4.962079f, +0.085892f, 0.190658f, 0.027461f, 0.227025f, 0.176853f, 0.701940f, 0.366998f, 0.695599f, 0.034811f, 0.111757f, 0.042912f, 0.135637f, 0.228181f, 0.792529f, 0.447513f, 0.686911f, +0.252632f, 0.807385f, 0.114898f, 1.062454f, 0.456656f, 2.609555f, 1.348014f, 2.857825f, 0.300072f, 1.386982f, 0.526185f, 1.860305f, 0.631226f, 3.156537f, 1.761026f, 3.023476f, +0.095202f, 0.360570f, 0.046557f, 0.350466f, 0.262422f, 1.777157f, 0.832945f, 1.437546f, 0.137896f, 0.755345f, 0.260001f, 0.748317f, 0.289151f, 1.713560f, 0.867395f, 1.212333f, +0.382263f, 0.937427f, 0.111820f, 1.250752f, 0.696544f, 3.054276f, 1.322471f, 3.391420f, 0.533637f, 1.892667f, 0.601855f, 2.573900f, 0.824977f, 3.165558f, 1.480318f, 3.074327f, +0.389420f, 0.591176f, 0.116589f, 0.869977f, 0.503590f, 1.366976f, 0.978578f, 1.674140f, 0.079226f, 0.173948f, 0.091452f, 0.260912f, 0.830838f, 1.973554f, 1.525845f, 2.114006f, +0.889726f, 1.944673f, 0.378922f, 3.162624f, 1.010081f, 3.947580f, 2.792092f, 5.342837f, 0.530487f, 1.676941f, 0.871076f, 2.779731f, 1.785356f, 6.105884f, 4.664167f, 7.227963f, +0.460692f, 1.193300f, 0.210968f, 1.433434f, 0.797557f, 3.693899f, 2.370535f, 3.692775f, 0.334962f, 1.254836f, 0.591409f, 1.536381f, 1.123724f, 4.554406f, 3.156603f, 3.982227f, +1.235772f, 2.072578f, 0.338504f, 3.417559f, 1.414240f, 4.241117f, 2.514369f, 5.820032f, 0.865971f, 2.100529f, 0.914570f, 3.530351f, 2.141852f, 5.620764f, 3.598910f, 6.746319f, +0.202673f, 0.383409f, 0.062380f, 0.568142f, 0.300374f, 1.016048f, 0.600060f, 1.252993f, 0.053032f, 0.145098f, 0.062933f, 0.219149f, 0.360034f, 1.065721f, 0.679752f, 1.149487f, +0.389314f, 1.060372f, 0.170454f, 1.736452f, 0.506533f, 2.466895f, 1.439444f, 3.361979f, 0.298548f, 1.176050f, 0.503976f, 1.962972f, 0.650455f, 2.772102f, 1.746949f, 3.304304f, +0.081503f, 0.263074f, 0.038370f, 0.318207f, 0.161707f, 0.933301f, 0.494115f, 0.939492f, 0.076217f, 0.355805f, 0.138343f, 0.438659f, 0.165527f, 0.836006f, 0.478017f, 0.736050f, +0.512480f, 1.071070f, 0.144317f, 1.778387f, 0.672155f, 2.511857f, 1.228540f, 3.470911f, 0.461890f, 1.396149f, 0.501494f, 2.362785f, 0.739566f, 2.418528f, 1.277533f, 2.922982f, +0.185458f, 0.427458f, 0.053165f, 0.537824f, 0.266628f, 1.098853f, 0.496102f, 1.150605f, 0.064844f, 0.216159f, 0.071671f, 0.277207f, 0.410616f, 1.480871f, 0.722063f, 1.356221f, +0.500691f, 1.661532f, 0.204178f, 2.310286f, 0.631933f, 3.749689f, 1.672594f, 4.339027f, 0.513058f, 2.462399f, 0.806665f, 3.489792f, 1.042628f, 5.413803f, 2.608101f, 5.479310f, +0.231017f, 0.908514f, 0.101296f, 0.933073f, 0.444628f, 3.126577f, 1.265398f, 2.672349f, 0.288673f, 1.641903f, 0.488028f, 1.718761f, 0.584769f, 3.598367f, 1.572860f, 2.690022f, +0.601226f, 1.530944f, 0.157692f, 2.158343f, 0.764935f, 3.482818f, 1.302195f, 4.086316f, 0.724071f, 2.666586f, 0.732218f, 3.831782f, 1.081385f, 4.308592f, 1.739833f, 4.421433f, +0.071701f, 0.164980f, 0.021872f, 0.202831f, 0.159560f, 0.656470f, 0.315911f, 0.671672f, 0.034371f, 0.114380f, 0.040424f, 0.143330f, 0.204855f, 0.737541f, 0.383321f, 0.660017f, +0.179530f, 0.594749f, 0.077903f, 0.808064f, 0.350733f, 2.077579f, 0.987805f, 2.349144f, 0.252216f, 1.208433f, 0.421964f, 1.673473f, 0.482423f, 2.500681f, 1.284099f, 2.473073f, +0.076031f, 0.298493f, 0.035474f, 0.299553f, 0.226506f, 1.590045f, 0.685939f, 1.327971f, 0.130254f, 0.739587f, 0.234318f, 0.756506f, 0.248348f, 1.525595f, 0.710792f, 1.114410f, +0.291484f, 0.740959f, 0.081351f, 1.020729f, 0.574036f, 2.609173f, 1.039840f, 2.991295f, 0.481280f, 1.769414f, 0.517884f, 2.484448f, 0.676533f, 2.690923f, 1.158221f, 2.698261f, +0.414263f, 0.651898f, 0.118333f, 0.990497f, 0.578994f, 1.629154f, 1.073451f, 2.060041f, 0.099684f, 0.226872f, 0.109784f, 0.351349f, 0.950539f, 2.340488f, 1.665534f, 2.588491f, +0.805733f, 1.825515f, 0.327397f, 3.065274f, 0.988619f, 4.005053f, 2.607309f, 5.596701f, 0.568209f, 1.861896f, 0.890183f, 3.186570f, 1.738819f, 6.164275f, 4.334042f, 7.534117f, +0.468855f, 1.258871f, 0.204848f, 1.561320f, 0.877258f, 4.211675f, 2.487722f, 4.347160f, 0.403201f, 1.565731f, 0.679210f, 1.979302f, 1.229933f, 5.167230f, 3.296337f, 4.664821f, +1.200817f, 2.087628f, 0.313828f, 3.554195f, 1.485251f, 4.617012f, 2.519390f, 6.541676f, 0.995269f, 2.502476f, 1.002868f, 4.342522f, 2.238321f, 6.088808f, 3.588340f, 7.545479f, +0.261192f, 0.431811f, 0.069409f, 0.640143f, 0.412163f, 1.218396f, 0.710893f, 1.503183f, 0.059364f, 0.141942f, 0.060823f, 0.214475f, 0.411269f, 1.063882f, 0.670403f, 1.148003f, +0.541738f, 1.289482f, 0.204786f, 2.112558f, 0.750482f, 3.194116f, 1.841324f, 4.354959f, 0.360846f, 1.242226f, 0.525920f, 2.074332f, 0.802280f, 2.988030f, 1.860335f, 3.563237f, +0.094944f, 0.267818f, 0.038591f, 0.324086f, 0.200570f, 1.011640f, 0.529137f, 1.018794f, 0.077119f, 0.314624f, 0.120857f, 0.388057f, 0.170916f, 0.754379f, 0.426146f, 0.664471f, +0.646490f, 1.180782f, 0.157182f, 1.961404f, 0.902812f, 2.948424f, 1.424687f, 4.075938f, 0.506105f, 1.336909f, 0.474429f, 2.263515f, 0.826952f, 2.363316f, 1.233327f, 2.857497f, +0.305503f, 0.615362f, 0.075614f, 0.774580f, 0.467649f, 1.684301f, 0.751253f, 1.764395f, 0.092781f, 0.270289f, 0.088539f, 0.346776f, 0.599548f, 1.889614f, 0.910262f, 1.731313f, +0.890564f, 2.582686f, 0.313550f, 3.592673f, 1.196768f, 6.205851f, 2.734839f, 7.184349f, 0.792647f, 3.324601f, 1.075993f, 4.713784f, 1.643781f, 7.459060f, 3.550107f, 7.552603f, +0.343988f, 1.182222f, 0.130226f, 1.214708f, 0.704920f, 4.331911f, 1.732099f, 3.704183f, 0.373357f, 1.855807f, 0.544961f, 1.943524f, 0.771796f, 4.150415f, 1.792303f, 3.104065f, +0.969457f, 2.157333f, 0.219534f, 3.042758f, 1.313283f, 5.225548f, 1.930245f, 6.133694f, 1.014121f, 3.263862f, 0.885425f, 4.692087f, 1.545574f, 5.381609f, 2.146938f, 5.524956f, +0.142852f, 0.287251f, 0.037623f, 0.353307f, 0.338478f, 1.216992f, 0.578592f, 1.245715f, 0.059480f, 0.172981f, 0.060398f, 0.216857f, 0.361766f, 1.138242f, 0.584448f, 1.019042f, +0.386211f, 1.118121f, 0.144691f, 1.519812f, 0.803354f, 4.158683f, 1.953460f, 4.704322f, 0.471280f, 1.973312f, 0.680744f, 2.733890f, 0.919888f, 4.167084f, 2.114014f, 4.122873f, +0.136925f, 0.469779f, 0.055158f, 0.471652f, 0.434325f, 2.664477f, 1.135595f, 2.226282f, 0.203752f, 1.011037f, 0.316459f, 1.034616f, 0.396435f, 2.128225f, 0.979616f, 1.555293f, +0.568457f, 1.262828f, 0.136977f, 1.740402f, 1.191970f, 4.734741f, 1.864211f, 5.430522f, 0.815265f, 2.619376f, 0.757419f, 3.679487f, 1.169474f, 4.065090f, 1.728605f, 4.077951f, +0.554589f, 0.762678f, 0.136773f, 1.159321f, 0.825300f, 2.029396f, 1.321059f, 2.567258f, 0.115914f, 0.230547f, 0.110219f, 0.357196f, 1.127932f, 2.427095f, 1.706354f, 2.685444f, +1.164692f, 2.306071f, 0.408599f, 3.873875f, 1.521572f, 5.386898f, 3.464642f, 7.530983f, 0.713422f, 2.042966f, 0.964984f, 3.497986f, 2.227887f, 6.902210f, 4.794403f, 8.439710f, +0.567365f, 1.331290f, 0.214023f, 1.651858f, 1.130303f, 4.742307f, 2.767400f, 4.896993f, 0.423803f, 1.438226f, 0.616380f, 1.818909f, 1.319243f, 4.843600f, 3.052652f, 4.374561f, +1.573591f, 2.390758f, 0.355067f, 4.072046f, 2.072326f, 5.629717f, 3.034983f, 7.980014f, 1.132854f, 2.489262f, 0.985553f, 4.321472f, 2.599898f, 6.180641f, 3.598570f, 7.662616f, +0.148224f, 0.194234f, 0.045223f, 0.366227f, 0.140650f, 0.329558f, 0.278525f, 0.517127f, 0.029899f, 0.056666f, 0.035172f, 0.108900f, 0.201111f, 0.412360f, 0.376388f, 0.565937f, +0.136090f, 0.256759f, 0.059064f, 0.535007f, 0.113367f, 0.382447f, 0.319351f, 0.663203f, 0.080452f, 0.219528f, 0.134625f, 0.466239f, 0.173666f, 0.512679f, 0.462348f, 0.777583f, +0.035985f, 0.080458f, 0.016793f, 0.123831f, 0.045713f, 0.182754f, 0.138461f, 0.234083f, 0.025942f, 0.083888f, 0.046677f, 0.131597f, 0.055820f, 0.195286f, 0.159793f, 0.218776f, +0.253532f, 0.367040f, 0.070773f, 0.775448f, 0.212902f, 0.551119f, 0.385738f, 0.969002f, 0.176153f, 0.368829f, 0.189588f, 0.794233f, 0.279449f, 0.633020f, 0.478509f, 0.973470f, +0.154229f, 0.246238f, 0.043827f, 0.394215f, 0.141966f, 0.405281f, 0.261842f, 0.539975f, 0.041571f, 0.095992f, 0.045547f, 0.156637f, 0.260812f, 0.651553f, 0.454631f, 0.759265f, +0.199019f, 0.457482f, 0.080450f, 0.809397f, 0.160824f, 0.661022f, 0.421952f, 0.973292f, 0.157213f, 0.522662f, 0.245024f, 0.942525f, 0.316537f, 1.138513f, 0.784896f, 1.466196f, +0.115983f, 0.315953f, 0.050412f, 0.412892f, 0.142923f, 0.696168f, 0.403203f, 0.757127f, 0.111726f, 0.440185f, 0.187234f, 0.586318f, 0.224235f, 0.955797f, 0.597864f, 0.909173f, +0.338215f, 0.596560f, 0.087934f, 1.070153f, 0.275508f, 0.868922f, 0.464920f, 1.297216f, 0.314002f, 0.801028f, 0.314764f, 1.464615f, 0.464628f, 1.282333f, 0.741011f, 1.674398f, +0.039597f, 0.063112f, 0.011973f, 0.098729f, 0.056418f, 0.160786f, 0.110726f, 0.209325f, 0.014633f, 0.033731f, 0.017060f, 0.053783f, 0.086409f, 0.215495f, 0.160274f, 0.245378f, +0.047389f, 0.108747f, 0.020384f, 0.188001f, 0.059275f, 0.243218f, 0.165486f, 0.349927f, 0.051323f, 0.170334f, 0.085115f, 0.300144f, 0.097261f, 0.349230f, 0.256628f, 0.439461f, +0.025349f, 0.068935f, 0.011724f, 0.088026f, 0.048351f, 0.235110f, 0.145144f, 0.249851f, 0.033478f, 0.131672f, 0.059698f, 0.171375f, 0.063241f, 0.269102f, 0.179421f, 0.250123f, +0.108890f, 0.191737f, 0.030125f, 0.336088f, 0.137299f, 0.432285f, 0.246539f, 0.630604f, 0.138601f, 0.352971f, 0.147841f, 0.630623f, 0.193033f, 0.531844f, 0.327587f, 0.678574f, +0.182021f, 0.198411f, 0.051540f, 0.383592f, 0.162883f, 0.317470f, 0.299347f, 0.510795f, 0.033765f, 0.053231f, 0.036862f, 0.104894f, 0.318996f, 0.544080f, 0.554066f, 0.765655f, +0.169215f, 0.265567f, 0.068158f, 0.567399f, 0.132933f, 0.373037f, 0.347527f, 0.663295f, 0.091993f, 0.208806f, 0.142862f, 0.454716f, 0.278916f, 0.684922f, 0.689135f, 1.065178f, +0.124369f, 0.231310f, 0.053864f, 0.365036f, 0.148989f, 0.495477f, 0.418815f, 0.650736f, 0.082450f, 0.221783f, 0.137679f, 0.356741f, 0.249187f, 0.725173f, 0.662015f, 0.833009f, +0.356907f, 0.429806f, 0.092462f, 0.931087f, 0.282639f, 0.608603f, 0.475249f, 1.097218f, 0.228043f, 0.397179f, 0.227778f, 0.876977f, 0.508125f, 0.957461f, 0.807485f, 1.509754f, +0.243180f, 0.443803f, 0.078449f, 0.636945f, 0.262070f, 0.855195f, 0.548732f, 1.021449f, 0.053193f, 0.140402f, 0.066162f, 0.205384f, 0.413502f, 1.180795f, 0.818269f, 1.233538f, +0.242711f, 0.637741f, 0.111380f, 1.011500f, 0.229626f, 1.078847f, 0.683941f, 1.424038f, 0.155592f, 0.591284f, 0.275292f, 0.955875f, 0.388160f, 1.595875f, 1.092658f, 1.842411f, +0.064376f, 0.200460f, 0.031765f, 0.234842f, 0.092877f, 0.517124f, 0.297451f, 0.504177f, 0.050326f, 0.226644f, 0.095743f, 0.270631f, 0.125148f, 0.609765f, 0.378800f, 0.519968f, +0.436747f, 0.880576f, 0.128908f, 1.416095f, 0.416529f, 1.501645f, 0.797950f, 2.009707f, 0.329060f, 0.959543f, 0.374466f, 1.572803f, 0.603300f, 1.903283f, 1.092292f, 2.227898f, +0.241818f, 0.537692f, 0.072658f, 0.655235f, 0.252798f, 1.005085f, 0.493002f, 1.019311f, 0.070681f, 0.227299f, 0.081881f, 0.282323f, 0.512487f, 1.783038f, 0.944568f, 1.581579f, +0.339213f, 1.085944f, 0.144984f, 1.462451f, 0.311313f, 1.782038f, 0.863628f, 1.997244f, 0.290571f, 1.345367f, 0.478840f, 1.846711f, 0.676138f, 3.386914f, 1.772722f, 3.320051f, +0.198294f, 0.752305f, 0.091132f, 0.748333f, 0.277514f, 1.882584f, 0.827802f, 1.558458f, 0.207136f, 1.136562f, 0.367033f, 1.152334f, 0.480455f, 2.852138f, 1.354471f, 2.065085f, +0.556805f, 1.367794f, 0.153068f, 1.867665f, 0.515125f, 2.262639f, 0.919126f, 2.571186f, 0.560571f, 1.991594f, 0.594155f, 2.771808f, 0.958625f, 3.684681f, 1.616540f, 3.662222f, +0.129822f, 0.288170f, 0.041507f, 0.343137f, 0.210072f, 0.833787f, 0.435932f, 0.826255f, 0.052023f, 0.167014f, 0.064129f, 0.202701f, 0.355035f, 1.233123f, 0.696301f, 1.068789f, +0.168895f, 0.539770f, 0.076814f, 0.710293f, 0.239926f, 1.371058f, 0.708245f, 1.501499f, 0.198352f, 0.916814f, 0.347815f, 1.229687f, 0.434420f, 2.172384f, 1.211968f, 2.080809f, +0.090622f, 0.343220f, 0.044317f, 0.333602f, 0.196311f, 1.329447f, 0.623105f, 1.075393f, 0.129783f, 0.710905f, 0.244704f, 0.704291f, 0.283339f, 1.679117f, 0.849960f, 1.187965f, +0.374849f, 0.919246f, 0.109651f, 1.226493f, 0.536790f, 2.353769f, 1.019159f, 2.613589f, 0.517396f, 1.835064f, 0.583538f, 2.495563f, 0.832787f, 3.195525f, 1.494332f, 3.103431f, +0.308122f, 0.467758f, 0.092249f, 0.688355f, 0.313144f, 0.850016f, 0.608502f, 1.041017f, 0.061980f, 0.136084f, 0.071545f, 0.204118f, 0.676736f, 1.607502f, 1.242834f, 1.721904f, +0.311383f, 0.680590f, 0.132614f, 1.106845f, 0.277816f, 1.085755f, 0.767946f, 1.469510f, 0.183568f, 0.580283f, 0.301424f, 0.961889f, 0.643223f, 2.199812f, 1.680394f, 2.604072f, +0.229565f, 0.594627f, 0.105126f, 0.714287f, 0.312333f, 1.446577f, 0.928331f, 1.446137f, 0.165034f, 0.618251f, 0.291384f, 0.756966f, 0.576438f, 2.336279f, 1.619246f, 2.042768f, +0.634372f, 1.063938f, 0.173768f, 1.754371f, 0.570545f, 1.710989f, 1.014369f, 2.347969f, 0.439533f, 1.066147f, 0.464200f, 1.791868f, 1.131860f, 2.970289f, 1.901841f, 3.565088f, +0.076664f, 0.145030f, 0.023596f, 0.214907f, 0.089293f, 0.302044f, 0.178382f, 0.372482f, 0.019834f, 0.054267f, 0.023537f, 0.081963f, 0.140196f, 0.414989f, 0.264693f, 0.447607f, +0.065137f, 0.177414f, 0.028519f, 0.290531f, 0.066604f, 0.324371f, 0.189272f, 0.442065f, 0.049389f, 0.194553f, 0.083372f, 0.324733f, 0.112033f, 0.477459f, 0.300890f, 0.569124f, +0.019416f, 0.062671f, 0.009141f, 0.075804f, 0.030274f, 0.174730f, 0.092507f, 0.175889f, 0.017952f, 0.083807f, 0.032586f, 0.103322f, 0.040593f, 0.205018f, 0.117226f, 0.180505f, +0.125769f, 0.262853f, 0.035417f, 0.436437f, 0.129636f, 0.484453f, 0.236944f, 0.669423f, 0.112077f, 0.338774f, 0.121687f, 0.573327f, 0.186840f, 0.611004f, 0.322750f, 0.738447f, +0.067357f, 0.155250f, 0.019309f, 0.195334f, 0.076104f, 0.313646f, 0.141602f, 0.328417f, 0.023286f, 0.077624f, 0.025737f, 0.099546f, 0.153522f, 0.553673f, 0.269967f, 0.507069f, +0.080434f, 0.266920f, 0.032801f, 0.371141f, 0.079782f, 0.473401f, 0.211166f, 0.547806f, 0.081493f, 0.391124f, 0.128129f, 0.554313f, 0.172425f, 0.895309f, 0.431315f, 0.906142f, +0.052841f, 0.207807f, 0.023170f, 0.213424f, 0.079926f, 0.562030f, 0.227466f, 0.480378f, 0.065286f, 0.371329f, 0.110371f, 0.388711f, 0.137693f, 0.847290f, 0.370354f, 0.633406f, +0.141670f, 0.360743f, 0.037158f, 0.508580f, 0.141653f, 0.644958f, 0.241144f, 0.756715f, 0.168695f, 0.621266f, 0.170593f, 0.892735f, 0.262311f, 1.045135f, 0.422031f, 1.072506f, +0.035618f, 0.081956f, 0.010865f, 0.100759f, 0.062292f, 0.256287f, 0.123332f, 0.262221f, 0.016882f, 0.056180f, 0.019855f, 0.070399f, 0.104760f, 0.377167f, 0.196024f, 0.337522f, +0.039448f, 0.130682f, 0.017117f, 0.177554f, 0.060565f, 0.358759f, 0.170575f, 0.405653f, 0.054795f, 0.262536f, 0.091673f, 0.363567f, 0.109121f, 0.565640f, 0.290456f, 0.559395f, +0.023786f, 0.093384f, 0.011098f, 0.093716f, 0.055690f, 0.390940f, 0.168650f, 0.326505f, 0.040292f, 0.228777f, 0.072482f, 0.234010f, 0.079983f, 0.491334f, 0.228918f, 0.358907f, +0.093943f, 0.238805f, 0.026219f, 0.328973f, 0.145395f, 0.660868f, 0.263377f, 0.757654f, 0.153366f, 0.563848f, 0.165031f, 0.791704f, 0.224459f, 0.892789f, 0.384273f, 0.895224f, +0.107730f, 0.169527f, 0.030773f, 0.257581f, 0.118330f, 0.332954f, 0.219384f, 0.421015f, 0.025631f, 0.058334f, 0.028228f, 0.090340f, 0.254465f, 0.626563f, 0.445874f, 0.692955f, +0.092680f, 0.209981f, 0.037659f, 0.352585f, 0.089369f, 0.362047f, 0.235694f, 0.505928f, 0.064623f, 0.211755f, 0.101241f, 0.362411f, 0.205896f, 0.729919f, 0.513199f, 0.892124f, +0.076787f, 0.206173f, 0.033549f, 0.255707f, 0.112912f, 0.542085f, 0.320195f, 0.559523f, 0.065291f, 0.253543f, 0.109986f, 0.320513f, 0.207363f, 0.871178f, 0.555751f, 0.786473f, +0.202599f, 0.352220f, 0.052948f, 0.599657f, 0.196935f, 0.612187f, 0.334055f, 0.867385f, 0.166029f, 0.417459f, 0.167297f, 0.724413f, 0.388760f, 1.057527f, 0.623236f, 1.310527f, +0.205142f, 0.339147f, 0.054514f, 0.502772f, 0.254405f, 0.752046f, 0.438793f, 0.927829f, 0.046100f, 0.110227f, 0.047233f, 0.166554f, 0.332520f, 0.860173f, 0.542036f, 0.928187f, +0.188199f, 0.447965f, 0.071142f, 0.733901f, 0.204895f, 0.872049f, 0.502714f, 1.188980f, 0.123946f, 0.426690f, 0.180647f, 0.712508f, 0.286915f, 1.068591f, 0.665301f, 1.274299f, +0.046962f, 0.132472f, 0.019088f, 0.160304f, 0.077967f, 0.393253f, 0.205691f, 0.396034f, 0.037717f, 0.153872f, 0.059107f, 0.189785f, 0.087029f, 0.384124f, 0.216991f, 0.338344f, +0.329425f, 0.601679f, 0.080094f, 0.999453f, 0.361538f, 1.180719f, 0.570527f, 1.632241f, 0.254987f, 0.673565f, 0.239028f, 1.140411f, 0.433784f, 1.239695f, 0.646951f, 1.498921f, +0.230384f, 0.464054f, 0.057022f, 0.584123f, 0.277153f, 0.998204f, 0.445231f, 1.045672f, 0.069180f, 0.201535f, 0.066017f, 0.258565f, 0.465436f, 1.466929f, 0.706646f, 1.344037f, +0.297055f, 0.861478f, 0.104587f, 1.198368f, 0.313721f, 1.626804f, 0.716912f, 1.883308f, 0.261418f, 1.096465f, 0.354866f, 1.554622f, 0.564435f, 2.561264f, 1.219022f, 2.593384f, +0.163370f, 0.561471f, 0.061848f, 0.576900f, 0.263105f, 1.616849f, 0.646491f, 1.382555f, 0.175322f, 0.871453f, 0.255904f, 0.912643f, 0.377337f, 2.029166f, 0.876269f, 1.517599f, +0.474315f, 1.055494f, 0.107409f, 1.488696f, 0.504962f, 2.009241f, 0.742186f, 2.358426f, 0.490582f, 1.578895f, 0.428325f, 2.269799f, 0.778442f, 2.710495f, 1.081325f, 2.782693f, +0.147345f, 0.296285f, 0.038806f, 0.364419f, 0.274373f, 0.986502f, 0.469011f, 1.009786f, 0.060660f, 0.176413f, 0.061596f, 0.221160f, 0.384127f, 1.208597f, 0.620573f, 1.082029f, +0.176201f, 0.510120f, 0.066012f, 0.693383f, 0.288039f, 1.491079f, 0.700406f, 1.686716f, 0.212591f, 0.890147f, 0.307079f, 1.233239f, 0.432032f, 1.957102f, 0.992862f, 1.936338f, +0.088945f, 0.305164f, 0.035830f, 0.306381f, 0.221725f, 1.360232f, 0.579728f, 1.136530f, 0.130865f, 0.649365f, 0.203255f, 0.664510f, 0.265099f, 1.423163f, 0.655078f, 1.040038f, +0.380406f, 0.845072f, 0.091664f, 1.164660f, 0.626869f, 2.490047f, 0.980407f, 2.855965f, 0.539425f, 1.733127f, 0.501151f, 2.434556f, 0.805634f, 2.800385f, 1.190813f, 2.809244f, +0.299455f, 0.411814f, 0.073852f, 0.625985f, 0.350214f, 0.861170f, 0.560589f, 1.089410f, 0.061884f, 0.123084f, 0.058843f, 0.190700f, 0.626962f, 1.349103f, 0.948478f, 1.492706f, +0.278167f, 0.550766f, 0.097587f, 0.925210f, 0.285594f, 1.011102f, 0.650301f, 1.413539f, 0.168471f, 0.482435f, 0.227876f, 0.826029f, 0.547754f, 1.696996f, 1.178765f, 2.075010f, +0.192936f, 0.452713f, 0.072780f, 0.561724f, 0.302070f, 1.267365f, 0.739578f, 1.308705f, 0.142494f, 0.483571f, 0.207244f, 0.611568f, 0.461821f, 1.695574f, 1.068626f, 1.531380f, +0.551256f, 0.837523f, 0.124386f, 1.426506f, 0.570533f, 1.549920f, 0.835562f, 2.196981f, 0.392390f, 0.862213f, 0.341369f, 1.496841f, 0.937595f, 2.228909f, 1.297744f, 2.763350f, +0.098139f, 0.128602f, 0.029942f, 0.242478f, 0.119117f, 0.279104f, 0.235883f, 0.437955f, 0.023328f, 0.044211f, 0.027441f, 0.084965f, 0.146332f, 0.300041f, 0.273867f, 0.411786f, +0.120619f, 0.227570f, 0.052350f, 0.474188f, 0.128525f, 0.433583f, 0.362050f, 0.751878f, 0.084026f, 0.229280f, 0.140605f, 0.486951f, 0.169155f, 0.499365f, 0.450340f, 0.757389f, +0.024852f, 0.055566f, 0.011598f, 0.085520f, 0.040381f, 0.161441f, 0.122313f, 0.206783f, 0.021112f, 0.068269f, 0.037986f, 0.107095f, 0.042365f, 0.148214f, 0.121276f, 0.166042f, +0.234516f, 0.339511f, 0.065464f, 0.717287f, 0.251901f, 0.652073f, 0.456396f, 1.146502f, 0.192007f, 0.402023f, 0.206651f, 0.865714f, 0.284069f, 0.643485f, 0.486420f, 0.989564f, +0.138245f, 0.220718f, 0.039285f, 0.353358f, 0.162770f, 0.464674f, 0.300215f, 0.619107f, 0.043910f, 0.101391f, 0.048109f, 0.165448f, 0.256916f, 0.641819f, 0.447840f, 0.747923f, +0.238806f, 0.548939f, 0.096533f, 0.971207f, 0.246837f, 1.014555f, 0.647624f, 1.493836f, 0.222292f, 0.739022f, 0.346453f, 1.332689f, 0.417403f, 1.501306f, 1.035007f, 1.933406f, +0.108440f, 0.295405f, 0.047134f, 0.386040f, 0.170926f, 0.832569f, 0.482203f, 0.905471f, 0.123094f, 0.484972f, 0.206284f, 0.645975f, 0.230399f, 0.982071f, 0.614299f, 0.934165f, +0.423537f, 0.747057f, 0.110117f, 1.340125f, 0.441309f, 1.391842f, 0.744710f, 2.077886f, 0.463360f, 1.182044f, 0.464484f, 2.161271f, 0.639419f, 1.764742f, 1.019777f, 2.304301f, +0.030338f, 0.048354f, 0.009174f, 0.075643f, 0.055291f, 0.157574f, 0.108514f, 0.205143f, 0.013211f, 0.030454f, 0.015402f, 0.048557f, 0.072755f, 0.181444f, 0.134949f, 0.206605f, +0.048604f, 0.111534f, 0.020906f, 0.192820f, 0.077763f, 0.319078f, 0.217101f, 0.459071f, 0.062028f, 0.205864f, 0.102869f, 0.362750f, 0.109626f, 0.393627f, 0.289252f, 0.495329f, +0.020258f, 0.055091f, 0.009369f, 0.070348f, 0.049425f, 0.240337f, 0.148371f, 0.255405f, 0.031527f, 0.123999f, 0.056219f, 0.161388f, 0.055541f, 0.236340f, 0.157576f, 0.219671f, +0.116554f, 0.205233f, 0.032245f, 0.359745f, 0.187982f, 0.591864f, 0.337549f, 0.863393f, 0.174821f, 0.445212f, 0.186476f, 0.795422f, 0.227067f, 0.625614f, 0.385344f, 0.798215f, +0.223295f, 0.243401f, 0.063226f, 0.470572f, 0.255588f, 0.498160f, 0.469723f, 0.801518f, 0.048810f, 0.076950f, 0.053287f, 0.151633f, 0.430054f, 0.733500f, 0.746963f, 1.032216f, +0.277884f, 0.436113f, 0.111928f, 0.931779f, 0.279233f, 0.783586f, 0.730000f, 1.393288f, 0.178018f, 0.404066f, 0.276458f, 0.879935f, 0.503360f, 1.236081f, 1.243684f, 1.922328f, +0.159141f, 0.295982f, 0.068924f, 0.467096f, 0.243857f, 0.810968f, 0.685492f, 1.065086f, 0.124322f, 0.334414f, 0.207598f, 0.537910f, 0.350410f, 1.019749f, 0.930934f, 1.171388f, +0.611686f, 0.736624f, 0.158466f, 1.595747f, 0.619607f, 1.334192f, 1.041849f, 2.405341f, 0.460549f, 0.802132f, 0.460015f, 1.771122f, 0.957030f, 1.803334f, 1.520860f, 2.843551f, +0.172318f, 0.314480f, 0.055590f, 0.451341f, 0.237537f, 0.775138f, 0.497363f, 0.925828f, 0.044417f, 0.117236f, 0.055245f, 0.171497f, 0.322005f, 0.919516f, 0.637207f, 0.960588f, +0.230230f, 0.604945f, 0.105652f, 0.959483f, 0.278614f, 1.309005f, 0.829851f, 1.727837f, 0.173919f, 0.660927f, 0.307717f, 1.068461f, 0.404635f, 1.663608f, 1.139034f, 1.920609f, +0.047582f, 0.148165f, 0.023479f, 0.173577f, 0.087808f, 0.488902f, 0.281218f, 0.476661f, 0.043832f, 0.197401f, 0.083389f, 0.235712f, 0.101654f, 0.495291f, 0.307687f, 0.422353f, +0.432365f, 0.871741f, 0.127615f, 1.401887f, 0.527444f, 1.901507f, 1.010431f, 2.544857f, 0.383868f, 1.119365f, 0.436837f, 1.834768f, 0.656350f, 2.070643f, 1.188340f, 2.423803f, +0.231981f, 0.515818f, 0.069702f, 0.628579f, 0.310204f, 1.233319f, 0.604952f, 1.250776f, 0.079901f, 0.256949f, 0.092562f, 0.319150f, 0.540290f, 1.879769f, 0.995811f, 1.667381f, +0.435615f, 1.394561f, 0.186188f, 1.878070f, 0.511372f, 2.927234f, 1.418623f, 3.280738f, 0.439713f, 2.035905f, 0.724614f, 2.794575f, 0.954217f, 4.779866f, 2.501798f, 4.685504f, +0.198420f, 0.752783f, 0.091190f, 0.748809f, 0.355199f, 2.409576f, 1.059529f, 1.994718f, 0.244241f, 1.340158f, 0.432781f, 1.358754f, 0.528337f, 3.136376f, 1.489454f, 2.270887f, +0.746247f, 1.833160f, 0.205147f, 2.503103f, 0.883086f, 3.878869f, 1.575669f, 4.407816f, 0.885312f, 3.145336f, 0.938353f, 4.377531f, 1.411920f, 5.427014f, 2.380935f, 5.393935f, +0.106451f, 0.236294f, 0.034035f, 0.281366f, 0.220335f, 0.874520f, 0.457229f, 0.866620f, 0.050268f, 0.161377f, 0.061965f, 0.195860f, 0.319932f, 1.111200f, 0.627455f, 0.963114f, +0.185391f, 0.592490f, 0.084317f, 0.779669f, 0.336869f, 1.925033f, 0.994411f, 2.108178f, 0.256563f, 1.185878f, 0.449891f, 1.590572f, 0.524039f, 2.620536f, 1.461991f, 2.510069f, +0.077509f, 0.293556f, 0.037904f, 0.285330f, 0.214770f, 1.454451f, 0.681695f, 1.176509f, 0.130804f, 0.716500f, 0.246630f, 0.709833f, 0.266321f, 1.578267f, 0.798910f, 1.116614f, +0.429416f, 1.053061f, 0.125613f, 1.405034f, 0.786567f, 3.449020f, 1.493392f, 3.829738f, 0.698444f, 2.477190f, 0.787729f, 3.368811f, 1.048424f, 4.022955f, 1.881265f, 3.907014f, +0.404538f, 0.614127f, 0.121115f, 0.903752f, 0.525885f, 1.427494f, 1.021901f, 1.748256f, 0.095891f, 0.210538f, 0.110689f, 0.315794f, 0.976422f, 2.319369f, 1.793211f, 2.484432f, +0.547268f, 1.196163f, 0.233074f, 1.945321f, 0.624556f, 2.440880f, 1.726415f, 3.303600f, 0.380178f, 1.201796f, 0.624265f, 1.992121f, 1.242361f, 4.248851f, 3.245615f, 5.029663f, +0.314381f, 0.814321f, 0.143966f, 0.978190f, 0.547115f, 2.533975f, 1.626162f, 2.533204f, 0.266324f, 0.997704f, 0.470222f, 1.221557f, 0.867530f, 3.516062f, 2.436939f, 3.074332f, +1.163585f, 1.951509f, 0.318731f, 3.217923f, 1.338611f, 4.014316f, 2.379909f, 5.508795f, 0.950019f, 2.304398f, 1.003335f, 3.872992f, 2.281540f, 5.987342f, 3.833626f, 7.186304f, +0.089752f, 0.169789f, 0.027625f, 0.251597f, 0.133716f, 0.452308f, 0.267125f, 0.557787f, 0.027363f, 0.074865f, 0.032471f, 0.113072f, 0.180372f, 0.533913f, 0.340547f, 0.575879f, +0.102082f, 0.278041f, 0.044695f, 0.455316f, 0.133515f, 0.650238f, 0.379417f, 0.886169f, 0.091208f, 0.359290f, 0.153967f, 0.599699f, 0.192951f, 0.822315f, 0.518214f, 0.980187f, +0.023710f, 0.076530f, 0.011162f, 0.092568f, 0.047288f, 0.272926f, 0.144495f, 0.274736f, 0.025833f, 0.120596f, 0.046890f, 0.148678f, 0.054475f, 0.275131f, 0.157316f, 0.242236f, +0.205704f, 0.429916f, 0.057927f, 0.713826f, 0.271211f, 1.013520f, 0.495709f, 1.400493f, 0.216010f, 0.652930f, 0.234531f, 1.104992f, 0.335832f, 1.098237f, 0.580119f, 1.327306f, +0.106757f, 0.246062f, 0.030604f, 0.309593f, 0.154287f, 0.635860f, 0.287073f, 0.665807f, 0.043490f, 0.144975f, 0.048069f, 0.185919f, 0.267402f, 0.964377f, 0.470224f, 0.883202f, +0.170656f, 0.566319f, 0.069592f, 0.787442f, 0.216519f, 1.284753f, 0.573079f, 1.486677f, 0.203745f, 0.977868f, 0.320343f, 1.385866f, 0.402033f, 2.087537f, 1.005672f, 2.112797f, +0.087357f, 0.343547f, 0.038304f, 0.352834f, 0.169014f, 1.188490f, 0.481009f, 1.015826f, 0.127183f, 0.723388f, 0.215015f, 0.757250f, 0.250160f, 1.539356f, 0.672859f, 1.150773f, +0.313694f, 0.798781f, 0.082277f, 1.126131f, 0.401203f, 1.826716f, 0.682993f, 2.143247f, 0.440168f, 1.621038f, 0.445121f, 2.329370f, 0.638305f, 2.543214f, 1.026964f, 2.609820f, +0.048254f, 0.111029f, 0.014719f, 0.136501f, 0.107944f, 0.444110f, 0.213717f, 0.454394f, 0.026950f, 0.089686f, 0.031697f, 0.112385f, 0.155966f, 0.561525f, 0.291840f, 0.502502f, +0.071539f, 0.236995f, 0.031043f, 0.321997f, 0.140493f, 0.832214f, 0.395684f, 0.940994f, 0.117097f, 0.561044f, 0.195907f, 0.776951f, 0.217477f, 1.127309f, 0.578873f, 1.114863f, +0.033612f, 0.131960f, 0.015683f, 0.132428f, 0.100660f, 0.706624f, 0.304835f, 0.590157f, 0.067092f, 0.380948f, 0.120693f, 0.389663f, 0.124207f, 0.763003f, 0.355491f, 0.557354f, +0.177802f, 0.451976f, 0.049623f, 0.622633f, 0.351992f, 1.599912f, 0.637617f, 1.834224f, 0.342049f, 1.257535f, 0.368064f, 1.765714f, 0.466863f, 1.856957f, 0.799268f, 1.862021f, +0.233681f, 0.367728f, 0.066750f, 0.558728f, 0.328317f, 0.923807f, 0.608697f, 1.168140f, 0.065515f, 0.149107f, 0.072153f, 0.230916f, 0.606592f, 1.493595f, 1.062870f, 1.651859f, +0.269117f, 0.609726f, 0.109351f, 1.023808f, 0.331933f, 1.344712f, 0.875414f, 1.879114f, 0.221119f, 0.724560f, 0.346416f, 1.240058f, 0.657026f, 2.329220f, 1.637652f, 2.846826f, +0.173736f, 0.466479f, 0.075907f, 0.578553f, 0.326776f, 1.568837f, 0.926669f, 1.619305f, 0.174077f, 0.675986f, 0.293241f, 0.854540f, 0.515599f, 2.166149f, 1.381853f, 1.955534f, +0.613963f, 1.067378f, 0.160457f, 1.817216f, 0.763373f, 2.373001f, 1.294888f, 3.362219f, 0.592891f, 1.490748f, 0.597418f, 2.586881f, 1.294693f, 3.521897f, 2.075572f, 4.364466f, +0.108339f, 0.179110f, 0.028790f, 0.265523f, 0.171857f, 0.508026f, 0.296416f, 0.626772f, 0.028689f, 0.068597f, 0.029394f, 0.103651f, 0.192988f, 0.499227f, 0.314587f, 0.538702f, +0.133051f, 0.316696f, 0.050295f, 0.518844f, 0.185285f, 0.788588f, 0.454601f, 1.075186f, 0.103257f, 0.355466f, 0.150493f, 0.593575f, 0.222912f, 0.830218f, 0.516890f, 0.990038f, +0.025870f, 0.072974f, 0.010515f, 0.088306f, 0.054937f, 0.277094f, 0.144934f, 0.279054f, 0.024483f, 0.099883f, 0.038368f, 0.123195f, 0.052685f, 0.232540f, 0.131361f, 0.204826f, +0.243056f, 0.443929f, 0.059095f, 0.737413f, 0.341203f, 1.114309f, 0.538437f, 1.540434f, 0.221694f, 0.585619f, 0.207818f, 0.991508f, 0.351725f, 1.005182f, 0.524567f, 1.215371f, +0.164719f, 0.331787f, 0.040769f, 0.417633f, 0.253466f, 0.912894f, 0.407180f, 0.956304f, 0.058285f, 0.169796f, 0.055620f, 0.217845f, 0.365706f, 1.152606f, 0.555232f, 1.056047f, +0.284312f, 0.824522f, 0.100101f, 1.146961f, 0.384072f, 1.991608f, 0.877676f, 2.305631f, 0.294836f, 1.236629f, 0.400230f, 1.753354f, 0.593682f, 2.693978f, 1.282187f, 2.727762f, +0.121836f, 0.418728f, 0.046124f, 0.430234f, 0.250983f, 1.542353f, 0.616704f, 1.318854f, 0.154073f, 0.765834f, 0.224888f, 0.802032f, 0.309253f, 1.663042f, 0.718163f, 1.243777f, +0.473778f, 1.054299f, 0.107287f, 1.487010f, 0.645173f, 2.567143f, 0.948267f, 3.013286f, 0.577438f, 1.858435f, 0.504159f, 2.671663f, 0.854507f, 2.975348f, 1.186985f, 3.054601f, +0.090047f, 0.181068f, 0.023715f, 0.222707f, 0.214478f, 0.771154f, 0.366628f, 0.789354f, 0.043684f, 0.127043f, 0.044358f, 0.159267f, 0.257982f, 0.811700f, 0.416780f, 0.726696f, +0.144148f, 0.417323f, 0.054004f, 0.567249f, 0.301413f, 1.560311f, 0.732926f, 1.765031f, 0.204942f, 0.858121f, 0.296031f, 1.188869f, 0.388416f, 1.759524f, 0.892628f, 1.740856f, +0.056698f, 0.194527f, 0.022840f, 0.195302f, 0.180789f, 1.109096f, 0.472695f, 0.926696f, 0.098301f, 0.487777f, 0.152677f, 0.499153f, 0.185710f, 0.996970f, 0.458903f, 0.728579f, +0.324786f, 0.721512f, 0.078261f, 0.994372f, 0.684599f, 2.719365f, 1.070696f, 3.118982f, 0.542709f, 1.743679f, 0.504202f, 2.449379f, 0.755908f, 2.627537f, 1.117312f, 2.635850f, +0.293019f, 0.402964f, 0.072265f, 0.612533f, 0.438338f, 1.077864f, 0.701648f, 1.363536f, 0.071356f, 0.141924f, 0.067850f, 0.219888f, 0.674198f, 1.450746f, 1.019938f, 1.605168f, +0.364367f, 0.721440f, 0.127828f, 1.211918f, 0.478511f, 1.694096f, 1.089576f, 2.368377f, 0.260042f, 0.744660f, 0.351736f, 1.275013f, 0.788497f, 2.442839f, 1.696841f, 2.986993f, +0.196921f, 0.462064f, 0.074283f, 0.573326f, 0.394363f, 1.654591f, 0.965546f, 1.708561f, 0.171381f, 0.581602f, 0.249257f, 0.735545f, 0.518004f, 1.901852f, 1.198632f, 1.717683f, +0.753590f, 1.144930f, 0.170041f, 1.950096f, 0.997638f, 2.710201f, 1.461071f, 3.841658f, 0.632101f, 1.388939f, 0.549911f, 2.411261f, 1.408571f, 3.348543f, 1.949631f, 4.151447f, +0.051926f, 0.068044f, 0.015843f, 0.128296f, 0.058398f, 0.136833f, 0.115644f, 0.214711f, 0.011538f, 0.021868f, 0.013573f, 0.042026f, 0.076876f, 0.157627f, 0.143876f, 0.216333f, +0.065544f, 0.123661f, 0.028447f, 0.257672f, 0.064713f, 0.218310f, 0.182293f, 0.378572f, 0.042684f, 0.116472f, 0.071426f, 0.247366f, 0.091267f, 0.269429f, 0.242978f, 0.408644f, +0.015784f, 0.035291f, 0.007366f, 0.054316f, 0.023764f, 0.095007f, 0.071980f, 0.121690f, 0.012535f, 0.040534f, 0.022554f, 0.063586f, 0.026716f, 0.093466f, 0.076479f, 0.104709f, +0.112837f, 0.163356f, 0.031498f, 0.345122f, 0.112304f, 0.290710f, 0.203473f, 0.511139f, 0.086364f, 0.180829f, 0.092951f, 0.389396f, 0.135710f, 0.307417f, 0.232381f, 0.472752f, +0.073907f, 0.117998f, 0.021002f, 0.188909f, 0.080630f, 0.230181f, 0.148714f, 0.306681f, 0.021945f, 0.050673f, 0.024044f, 0.082687f, 0.136376f, 0.340690f, 0.237722f, 0.397012f, +0.131117f, 0.301396f, 0.053002f, 0.533243f, 0.125577f, 0.516146f, 0.329473f, 0.759977f, 0.114097f, 0.379322f, 0.177826f, 0.684037f, 0.227551f, 0.818449f, 0.564242f, 1.054012f, +0.069589f, 0.189571f, 0.030247f, 0.247734f, 0.101635f, 0.495059f, 0.286726f, 0.538408f, 0.073846f, 0.290943f, 0.123753f, 0.387531f, 0.146806f, 0.625756f, 0.391419f, 0.595232f, +0.205906f, 0.363187f, 0.053534f, 0.651511f, 0.198794f, 0.626976f, 0.335466f, 0.936014f, 0.210588f, 0.537214f, 0.211098f, 0.982253f, 0.308654f, 0.851857f, 0.492256f, 1.112308f, +0.017654f, 0.028137f, 0.005338f, 0.044017f, 0.029812f, 0.084961f, 0.058508f, 0.110609f, 0.007187f, 0.016566f, 0.008379f, 0.026414f, 0.042036f, 0.104834f, 0.077970f, 0.119371f, +0.029047f, 0.066655f, 0.012494f, 0.115233f, 0.043061f, 0.176688f, 0.120219f, 0.254208f, 0.034654f, 0.115012f, 0.057471f, 0.202661f, 0.065050f, 0.233571f, 0.171637f, 0.293920f, +0.014150f, 0.038481f, 0.006545f, 0.049138f, 0.031989f, 0.155550f, 0.096028f, 0.165302f, 0.020587f, 0.080970f, 0.036710f, 0.105384f, 0.038520f, 0.163912f, 0.109286f, 0.152352f, +0.061676f, 0.108602f, 0.017063f, 0.190363f, 0.092170f, 0.290198f, 0.165505f, 0.423332f, 0.086481f, 0.220239f, 0.092246f, 0.393481f, 0.119303f, 0.328704f, 0.202463f, 0.419390f, +0.101837f, 0.111007f, 0.028836f, 0.214613f, 0.108008f, 0.210515f, 0.198497f, 0.338709f, 0.020810f, 0.032807f, 0.022719f, 0.064649f, 0.194743f, 0.332153f, 0.338250f, 0.467422f, +0.130158f, 0.204270f, 0.052426f, 0.436434f, 0.121187f, 0.340076f, 0.316819f, 0.604686f, 0.077948f, 0.176927f, 0.121052f, 0.385295f, 0.234095f, 0.574859f, 0.578395f, 0.894009f, +0.087122f, 0.162035f, 0.037732f, 0.255712f, 0.123699f, 0.411370f, 0.347722f, 0.540274f, 0.063625f, 0.171146f, 0.106244f, 0.275291f, 0.190472f, 0.554303f, 0.506026f, 0.636729f, +0.253686f, 0.305502f, 0.065721f, 0.661809f, 0.238105f, 0.512709f, 0.400366f, 0.924334f, 0.178559f, 0.310994f, 0.178352f, 0.686680f, 0.394097f, 0.742598f, 0.626277f, 1.170950f, +0.080074f, 0.146134f, 0.025832f, 0.209731f, 0.102276f, 0.333750f, 0.214149f, 0.398632f, 0.019295f, 0.050928f, 0.023999f, 0.074499f, 0.148570f, 0.424254f, 0.294000f, 0.443205f, +0.109874f, 0.288702f, 0.050421f, 0.457901f, 0.123203f, 0.578841f, 0.366959f, 0.764048f, 0.077592f, 0.294866f, 0.137285f, 0.476684f, 0.191737f, 0.788305f, 0.539734f, 0.910085f, +0.026541f, 0.082645f, 0.013096f, 0.096821f, 0.045383f, 0.252685f, 0.145345f, 0.246359f, 0.022856f, 0.102934f, 0.043483f, 0.122912f, 0.056300f, 0.274311f, 0.170409f, 0.233915f, +0.182704f, 0.368370f, 0.053926f, 0.592394f, 0.206518f, 0.744525f, 0.395629f, 0.996425f, 0.151641f, 0.442187f, 0.172566f, 0.724796f, 0.275386f, 0.868783f, 0.498594f, 1.016959f, +0.108920f, 0.242187f, 0.032727f, 0.295131f, 0.134954f, 0.536555f, 0.263184f, 0.544149f, 0.035070f, 0.112782f, 0.040628f, 0.140083f, 0.251878f, 0.876331f, 0.464238f, 0.777317f, +0.210055f, 0.672463f, 0.089781f, 0.905613f, 0.228482f, 1.307892f, 0.633843f, 1.465838f, 0.198215f, 0.917753f, 0.326644f, 1.259748f, 0.456864f, 2.288524f, 1.197821f, 2.243344f, +0.111829f, 0.424268f, 0.051395f, 0.422028f, 0.185492f, 1.258331f, 0.553308f, 1.041684f, 0.128685f, 0.706096f, 0.228021f, 0.715894f, 0.295658f, 1.755123f, 0.833502f, 1.270793f, +0.318623f, 0.782698f, 0.087591f, 1.068741f, 0.349366f, 1.534556f, 0.623365f, 1.743818f, 0.353368f, 1.255447f, 0.374539f, 1.747272f, 0.598567f, 2.300720f, 1.009370f, 2.286696f, +0.054402f, 0.120759f, 0.017394f, 0.143793f, 0.104336f, 0.414114f, 0.216513f, 0.410374f, 0.024016f, 0.077099f, 0.029604f, 0.093573f, 0.162343f, 0.563856f, 0.318390f, 0.488713f, +0.097304f, 0.310974f, 0.044254f, 0.409217f, 0.163828f, 0.936192f, 0.483608f, 1.025261f, 0.125885f, 0.581863f, 0.220744f, 0.780430f, 0.273096f, 1.365659f, 0.761898f, 1.308091f, +0.047548f, 0.180084f, 0.023252f, 0.175037f, 0.122079f, 0.826734f, 0.387487f, 0.668747f, 0.075014f, 0.410900f, 0.141438f, 0.407077f, 0.162217f, 0.961329f, 0.486620f, 0.680134f, +0.199565f, 0.489395f, 0.058377f, 0.652970f, 0.338709f, 1.485203f, 0.643078f, 1.649146f, 0.303441f, 1.076225f, 0.342232f, 1.463593f, 0.483785f, 1.856352f, 0.868091f, 1.802853f, +0.162034f, 0.245983f, 0.048511f, 0.361989f, 0.195173f, 0.529791f, 0.379262f, 0.648837f, 0.035906f, 0.078834f, 0.041446f, 0.118247f, 0.388323f, 0.922412f, 0.713159f, 0.988058f, +0.225125f, 0.492054f, 0.095877f, 0.800228f, 0.238055f, 0.930364f, 0.658039f, 1.259197f, 0.146200f, 0.462158f, 0.240065f, 0.766083f, 0.507434f, 1.735413f, 1.325649f, 2.054330f, +0.151154f, 0.391523f, 0.069219f, 0.470311f, 0.243739f, 1.128882f, 0.724452f, 1.128538f, 0.119704f, 0.448437f, 0.211350f, 0.549052f, 0.414148f, 1.678525f, 1.163365f, 1.467648f, +0.423822f, 0.710814f, 0.116094f, 1.172091f, 0.451777f, 1.354818f, 0.803211f, 1.859200f, 0.323486f, 0.784658f, 0.341640f, 1.318772f, 0.825131f, 2.165352f, 1.386450f, 2.598963f, +0.050636f, 0.095791f, 0.015585f, 0.141945f, 0.069901f, 0.236447f, 0.139641f, 0.291587f, 0.014431f, 0.039485f, 0.017126f, 0.059636f, 0.101040f, 0.299086f, 0.190767f, 0.322594f, +0.059148f, 0.161102f, 0.025897f, 0.263818f, 0.071681f, 0.349099f, 0.203701f, 0.475765f, 0.049404f, 0.194614f, 0.083398f, 0.324835f, 0.111006f, 0.473085f, 0.298133f, 0.563910f, +0.016057f, 0.051828f, 0.007559f, 0.062689f, 0.029674f, 0.171262f, 0.090671f, 0.172398f, 0.016355f, 0.076349f, 0.029686f, 0.094128f, 0.036630f, 0.185004f, 0.105783f, 0.162884f, +0.105535f, 0.220566f, 0.029719f, 0.366224f, 0.128928f, 0.481805f, 0.235649f, 0.665763f, 0.103601f, 0.313155f, 0.112485f, 0.529971f, 0.171075f, 0.559448f, 0.295516f, 0.676137f, +0.060857f, 0.140267f, 0.017446f, 0.176483f, 0.081494f, 0.335860f, 0.151631f, 0.351678f, 0.023176f, 0.077258f, 0.025616f, 0.099077f, 0.151351f, 0.545844f, 0.266150f, 0.499898f, +0.099910f, 0.331551f, 0.040743f, 0.461006f, 0.117454f, 0.696934f, 0.310876f, 0.806471f, 0.111510f, 0.535188f, 0.175324f, 0.758485f, 0.233700f, 1.213477f, 0.584593f, 1.228160f, +0.059776f, 0.235079f, 0.026211f, 0.241434f, 0.107161f, 0.753542f, 0.304976f, 0.644067f, 0.081357f, 0.462740f, 0.137542f, 0.484401f, 0.169963f, 1.045867f, 0.457152f, 0.781856f, +0.162614f, 0.414075f, 0.042651f, 0.583768f, 0.192708f, 0.877417f, 0.328059f, 1.029455f, 0.213308f, 0.785565f, 0.215708f, 1.128827f, 0.328540f, 1.309012f, 0.528586f, 1.343294f, +0.029940f, 0.068891f, 0.009133f, 0.084696f, 0.062060f, 0.255328f, 0.122871f, 0.261241f, 0.015632f, 0.052022f, 0.018385f, 0.065189f, 0.096087f, 0.345942f, 0.179796f, 0.309579f, +0.045587f, 0.151022f, 0.019781f, 0.205188f, 0.082954f, 0.491382f, 0.233632f, 0.555612f, 0.069757f, 0.334222f, 0.116705f, 0.462841f, 0.137601f, 0.713268f, 0.366263f, 0.705394f, +0.025034f, 0.098284f, 0.011681f, 0.098633f, 0.069468f, 0.487655f, 0.210373f, 0.407279f, 0.046714f, 0.265243f, 0.084035f, 0.271311f, 0.091854f, 0.564255f, 0.262893f, 0.412174f, +0.100323f, 0.255023f, 0.027999f, 0.351314f, 0.184026f, 0.836457f, 0.333355f, 0.958959f, 0.180422f, 0.663318f, 0.194144f, 0.931369f, 0.261555f, 1.040338f, 0.447780f, 1.043176f, +0.113639f, 0.178826f, 0.032461f, 0.271709f, 0.147939f, 0.416264f, 0.274277f, 0.526360f, 0.029784f, 0.067786f, 0.032802f, 0.104978f, 0.292893f, 0.721184f, 0.513208f, 0.797602f, +0.134407f, 0.304519f, 0.054614f, 0.511327f, 0.153608f, 0.622291f, 0.405114f, 0.869595f, 0.103239f, 0.338292f, 0.161739f, 0.578975f, 0.325816f, 1.155047f, 0.812102f, 1.411725f, +0.101417f, 0.272303f, 0.044310f, 0.337725f, 0.176748f, 0.848558f, 0.501220f, 0.875855f, 0.094995f, 0.368888f, 0.160023f, 0.466326f, 0.298841f, 1.255501f, 0.800923f, 1.133429f, +0.271510f, 0.472021f, 0.070958f, 0.803618f, 0.312798f, 0.972355f, 0.530590f, 1.377695f, 0.245107f, 0.616290f, 0.246978f, 1.069442f, 0.568485f, 1.546425f, 0.911360f, 1.916388f, +0.063276f, 0.104611f, 0.016815f, 0.155082f, 0.093005f, 0.274933f, 0.160414f, 0.339196f, 0.015664f, 0.037454f, 0.016049f, 0.056594f, 0.111917f, 0.289511f, 0.182434f, 0.312402f, +0.079809f, 0.189966f, 0.030169f, 0.311222f, 0.102981f, 0.438296f, 0.252666f, 0.597587f, 0.057902f, 0.199328f, 0.084389f, 0.332848f, 0.132762f, 0.494464f, 0.307851f, 0.589650f, +0.018137f, 0.051161f, 0.007372f, 0.061910f, 0.035688f, 0.180005f, 0.094151f, 0.181278f, 0.016046f, 0.065464f, 0.025147f, 0.080743f, 0.036675f, 0.161875f, 0.091443f, 0.142583f, +0.129093f, 0.235782f, 0.031387f, 0.391658f, 0.167916f, 0.548385f, 0.264981f, 0.758094f, 0.110075f, 0.290769f, 0.103185f, 0.492300f, 0.185485f, 0.530091f, 0.276635f, 0.640935f, +0.097207f, 0.195800f, 0.024059f, 0.246461f, 0.138598f, 0.499181f, 0.222651f, 0.522918f, 0.032155f, 0.093674f, 0.030685f, 0.120182f, 0.214286f, 0.675373f, 0.325339f, 0.618794f, +0.172316f, 0.499726f, 0.060669f, 0.695149f, 0.215688f, 1.118452f, 0.492887f, 1.294802f, 0.167050f, 0.700659f, 0.226765f, 0.993429f, 0.357267f, 1.621185f, 0.771596f, 1.641516f, +0.086307f, 0.296620f, 0.032674f, 0.304771f, 0.164739f, 1.012365f, 0.404790f, 0.865665f, 0.102031f, 0.507156f, 0.148927f, 0.531127f, 0.217517f, 1.169719f, 0.505128f, 0.874825f, +0.254254f, 0.565792f, 0.057576f, 0.798007f, 0.320814f, 1.276517f, 0.471528f, 1.498363f, 0.289692f, 0.932347f, 0.252929f, 1.340331f, 0.455321f, 1.585402f, 0.632480f, 1.627632f, +0.057841f, 0.116308f, 0.015233f, 0.143054f, 0.127654f, 0.458977f, 0.218210f, 0.469809f, 0.026232f, 0.076287f, 0.026637f, 0.095637f, 0.164537f, 0.517691f, 0.265816f, 0.463477f, +0.095093f, 0.275305f, 0.035626f, 0.374210f, 0.184242f, 0.953756f, 0.448008f, 1.078893f, 0.126389f, 0.529210f, 0.182565f, 0.733185f, 0.254418f, 1.152514f, 0.584685f, 1.140286f, +0.043717f, 0.149990f, 0.017611f, 0.150588f, 0.129163f, 0.792383f, 0.337712f, 0.662069f, 0.070856f, 0.351594f, 0.110050f, 0.359793f, 0.142176f, 0.763261f, 0.351327f, 0.557786f, +0.189715f, 0.421453f, 0.045714f, 0.580837f, 0.370532f, 1.471826f, 0.579503f, 1.688114f, 0.296353f, 0.952158f, 0.275326f, 1.337515f, 0.438413f, 1.523923f, 0.648020f, 1.528744f, +0.147517f, 0.202867f, 0.036381f, 0.308372f, 0.204474f, 0.502798f, 0.327302f, 0.636057f, 0.033583f, 0.066794f, 0.031932f, 0.103487f, 0.337009f, 0.725180f, 0.509833f, 0.802370f, +0.188391f, 0.373011f, 0.066092f, 0.626606f, 0.229243f, 0.811602f, 0.521991f, 1.134635f, 0.125690f, 0.359929f, 0.170011f, 0.616274f, 0.404790f, 1.254080f, 0.871107f, 1.533432f, +0.119002f, 0.279231f, 0.044890f, 0.346468f, 0.220821f, 0.926479f, 0.540652f, 0.956699f, 0.096819f, 0.328567f, 0.140814f, 0.415535f, 0.310816f, 1.141161f, 0.719210f, 1.030655f, +0.345001f, 0.524160f, 0.077846f, 0.892772f, 0.423196f, 1.149661f, 0.619783f, 1.629622f, 0.270526f, 0.594436f, 0.235350f, 1.031968f, 0.640283f, 1.522122f, 0.886229f, 1.887091f, +0.154099f, 0.201933f, 0.047016f, 0.380743f, 0.186063f, 0.435966f, 0.368455f, 0.684096f, 0.031438f, 0.059583f, 0.036982f, 0.114506f, 0.203105f, 0.416448f, 0.380119f, 0.571547f, +0.219396f, 0.413931f, 0.095220f, 0.862508f, 0.232557f, 0.784538f, 0.655104f, 1.360469f, 0.131177f, 0.357940f, 0.219506f, 0.760202f, 0.271969f, 0.802882f, 0.724060f, 1.217735f, +0.047430f, 0.106048f, 0.022134f, 0.163217f, 0.076667f, 0.306507f, 0.232219f, 0.392592f, 0.034582f, 0.111828f, 0.062223f, 0.175427f, 0.071470f, 0.250038f, 0.204594f, 0.280114f, +0.373326f, 0.540467f, 0.104213f, 1.141849f, 0.398909f, 1.032619f, 0.722747f, 1.815595f, 0.262340f, 0.549285f, 0.282347f, 1.182826f, 0.399724f, 0.905473f, 0.684460f, 1.392453f, +0.171962f, 0.274549f, 0.048866f, 0.439539f, 0.201412f, 0.574989f, 0.371486f, 0.766084f, 0.046878f, 0.108247f, 0.051361f, 0.176635f, 0.282484f, 0.705694f, 0.492409f, 0.822356f, +0.344097f, 0.790970f, 0.139095f, 1.399419f, 0.353814f, 1.454253f, 0.928297f, 2.141250f, 0.274911f, 0.913954f, 0.428461f, 1.648146f, 0.531634f, 1.912169f, 1.318257f, 2.462522f, +0.163949f, 0.446620f, 0.071261f, 0.583650f, 0.257072f, 1.252186f, 0.725235f, 1.361831f, 0.159730f, 0.629316f, 0.267681f, 0.838237f, 0.307909f, 1.312455f, 0.820958f, 1.248433f, +0.534110f, 0.942091f, 0.138866f, 1.689990f, 0.553618f, 1.746052f, 0.934232f, 2.606687f, 0.501520f, 1.279391f, 0.502736f, 2.339263f, 0.712764f, 1.967166f, 1.136750f, 2.568615f, +0.027498f, 0.043827f, 0.008315f, 0.068561f, 0.049853f, 0.142076f, 0.097841f, 0.184966f, 0.010277f, 0.023691f, 0.011982f, 0.037774f, 0.058290f, 0.145369f, 0.108118f, 0.165527f, +0.051031f, 0.117104f, 0.021950f, 0.202448f, 0.081220f, 0.333263f, 0.226753f, 0.479480f, 0.055896f, 0.185513f, 0.092700f, 0.326889f, 0.101741f, 0.365315f, 0.268448f, 0.459702f, +0.022317f, 0.060691f, 0.010322f, 0.077499f, 0.054166f, 0.263387f, 0.162601f, 0.279901f, 0.029810f, 0.117245f, 0.053157f, 0.152598f, 0.054086f, 0.230146f, 0.153447f, 0.213914f, +0.107101f, 0.188587f, 0.029630f, 0.330567f, 0.171835f, 0.541022f, 0.308553f, 0.789226f, 0.137877f, 0.351126f, 0.147068f, 0.627326f, 0.184433f, 0.508150f, 0.312993f, 0.648344f, +0.296098f, 0.322760f, 0.083841f, 0.623999f, 0.337153f, 0.657136f, 0.619623f, 1.057303f, 0.055552f, 0.087578f, 0.060647f, 0.172577f, 0.504083f, 0.859763f, 0.875543f, 1.209899f, +0.426850f, 0.669900f, 0.171930f, 1.431279f, 0.426684f, 1.197364f, 1.115481f, 2.129023f, 0.234697f, 0.532715f, 0.364477f, 1.160093f, 0.683456f, 1.678336f, 1.688659f, 2.610116f, +0.256494f, 0.477047f, 0.111087f, 0.752839f, 0.390985f, 1.300253f, 1.099073f, 1.707690f, 0.171979f, 0.462606f, 0.287177f, 0.744109f, 0.499221f, 1.452814f, 1.326281f, 1.668851f, +0.822324f, 0.990284f, 0.213035f, 2.145252f, 0.828627f, 1.784271f, 1.393308f, 3.216764f, 0.531399f, 0.925531f, 0.530783f, 2.043588f, 1.137263f, 2.142946f, 1.807275f, 3.379061f, +0.192572f, 0.351445f, 0.062124f, 0.504392f, 0.264072f, 0.861729f, 0.552924f, 1.029253f, 0.042603f, 0.112449f, 0.052990f, 0.164495f, 0.318088f, 0.908330f, 0.629456f, 0.948902f, +0.298043f, 0.783128f, 0.136772f, 1.242094f, 0.358796f, 1.685725f, 1.068674f, 2.225093f, 0.193239f, 0.734347f, 0.341900f, 1.187153f, 0.463022f, 1.903659f, 1.303391f, 2.197744f, +0.064631f, 0.201255f, 0.031891f, 0.235774f, 0.118649f, 0.660620f, 0.379991f, 0.644080f, 0.051101f, 0.230134f, 0.097217f, 0.274798f, 0.122052f, 0.594680f, 0.369429f, 0.507105f, +0.489859f, 0.987660f, 0.144584f, 1.588302f, 0.594463f, 2.143119f, 1.138820f, 2.868216f, 0.373279f, 1.088485f, 0.424787f, 1.784153f, 0.657319f, 2.073701f, 1.190095f, 2.427382f, +0.205371f, 0.456649f, 0.061707f, 0.556476f, 0.273188f, 1.086152f, 0.532766f, 1.101525f, 0.060711f, 0.195238f, 0.070331f, 0.242500f, 0.422799f, 1.470998f, 0.779264f, 1.304795f, +0.446727f, 1.430138f, 0.190938f, 1.925981f, 0.521682f, 2.986249f, 1.447224f, 3.346880f, 0.387026f, 1.791962f, 0.637790f, 2.459727f, 0.864984f, 4.332883f, 2.267845f, 4.247344f, +0.213506f, 0.810018f, 0.098123f, 0.805741f, 0.380211f, 2.579252f, 1.134138f, 2.135181f, 0.225567f, 1.237689f, 0.399690f, 1.254864f, 0.502524f, 2.983141f, 1.416684f, 2.159938f, +0.669771f, 1.645294f, 0.184123f, 2.246580f, 0.788450f, 3.463193f, 1.406813f, 3.935455f, 0.681978f, 2.422931f, 0.722837f, 3.372122f, 1.120144f, 4.305515f, 1.888912f, 4.279271f, +0.068669f, 0.152428f, 0.021955f, 0.181503f, 0.141392f, 0.561191f, 0.293410f, 0.556121f, 0.027831f, 0.089348f, 0.034308f, 0.108440f, 0.182428f, 0.633615f, 0.357780f, 0.549176f, +0.138533f, 0.442738f, 0.063006f, 0.582608f, 0.250412f, 1.430977f, 0.739198f, 1.567119f, 0.164548f, 0.760567f, 0.288539f, 1.020118f, 0.346139f, 1.730921f, 0.965677f, 1.657956f, +0.060772f, 0.230166f, 0.029719f, 0.223717f, 0.167515f, 1.134432f, 0.531703f, 0.917645f, 0.088025f, 0.482168f, 0.165969f, 0.477681f, 0.184577f, 1.093836f, 0.553694f, 0.773882f, +0.280833f, 0.688688f, 0.082150f, 0.918874f, 0.511721f, 2.243847f, 0.971564f, 2.491533f, 0.392041f, 1.390463f, 0.442157f, 1.890936f, 0.606076f, 2.325602f, 1.087527f, 2.258578f, +0.381787f, 0.579589f, 0.114304f, 0.852926f, 0.493720f, 1.340184f, 0.959399f, 1.641327f, 0.077673f, 0.170539f, 0.089660f, 0.255798f, 0.814554f, 1.934873f, 1.495940f, 2.072573f, +0.598295f, 1.307693f, 0.254806f, 2.126703f, 0.679228f, 2.654547f, 1.877540f, 3.592785f, 0.356725f, 1.127657f, 0.585754f, 1.869227f, 1.200561f, 4.105896f, 3.136415f, 4.860437f, +0.360626f, 0.934105f, 0.165143f, 1.122079f, 0.624320f, 2.891550f, 1.855633f, 2.890670f, 0.262205f, 0.982274f, 0.462950f, 1.202665f, 0.879641f, 3.565147f, 2.470960f, 3.117251f, +1.113311f, 1.867192f, 0.304960f, 3.078889f, 1.274093f, 3.820835f, 2.265203f, 5.243284f, 0.780156f, 1.892373f, 0.823939f, 3.180504f, 1.929601f, 5.063763f, 3.242269f, 6.077779f, +0.186969f, 0.353701f, 0.057547f, 0.524119f, 0.277100f, 0.937320f, 0.553564f, 1.155905f, 0.048923f, 0.133855f, 0.058057f, 0.202168f, 0.332136f, 0.983144f, 0.627082f, 1.060420f, +0.246337f, 0.670946f, 0.107854f, 1.098733f, 0.320506f, 1.560917f, 0.910802f, 2.127278f, 0.188905f, 0.744141f, 0.318888f, 1.242062f, 0.411573f, 1.754036f, 1.105375f, 2.090785f, +0.060033f, 0.193773f, 0.028262f, 0.234383f, 0.119109f, 0.687444f, 0.363952f, 0.692004f, 0.056139f, 0.262076f, 0.101900f, 0.323104f, 0.121923f, 0.615779f, 0.352094f, 0.542154f, +0.434435f, 0.907958f, 0.122339f, 1.507559f, 0.569794f, 2.129330f, 1.041447f, 2.942331f, 0.391549f, 1.183531f, 0.425122f, 2.002960f, 0.626939f, 2.050213f, 1.082979f, 2.477845f, +0.176175f, 0.406062f, 0.050504f, 0.510904f, 0.253283f, 1.043852f, 0.471270f, 1.093014f, 0.061599f, 0.205340f, 0.068084f, 0.263332f, 0.390063f, 1.406749f, 0.685922f, 1.288338f, +0.326230f, 1.082590f, 0.133034f, 1.505293f, 0.411743f, 2.443152f, 1.089797f, 2.827141f, 0.334288f, 1.604404f, 0.525591f, 2.273813f, 0.679336f, 3.527423f, 1.699337f, 3.570105f, +0.175221f, 0.689086f, 0.076831f, 0.707713f, 0.337239f, 2.371432f, 0.959773f, 2.026911f, 0.218951f, 1.245343f, 0.370157f, 1.303638f, 0.443533f, 2.729273f, 1.192976f, 2.040316f, +0.524821f, 1.336390f, 0.137652f, 1.884058f, 0.667726f, 3.040217f, 1.136711f, 3.567022f, 0.632055f, 2.327713f, 0.639167f, 3.344835f, 0.943961f, 3.761051f, 1.518733f, 3.859552f, +0.058024f, 0.133509f, 0.017700f, 0.164139f, 0.129123f, 0.531243f, 0.255648f, 0.543545f, 0.027814f, 0.092561f, 0.032713f, 0.115989f, 0.165778f, 0.596850f, 0.310200f, 0.534113f, +0.099649f, 0.330117f, 0.043240f, 0.448518f, 0.194675f, 1.153166f, 0.548284f, 1.303899f, 0.139993f, 0.670744f, 0.234212f, 0.928865f, 0.267770f, 1.388010f, 0.712743f, 1.372686f, +0.049126f, 0.192866f, 0.022921f, 0.193550f, 0.146352f, 1.027377f, 0.443207f, 0.858043f, 0.084161f, 0.477870f, 0.151400f, 0.488802f, 0.160465f, 0.985734f, 0.459265f, 0.720055f, +0.216754f, 0.550994f, 0.060494f, 0.759037f, 0.426866f, 1.940241f, 0.773249f, 2.224396f, 0.357891f, 1.315777f, 0.385111f, 1.847492f, 0.503085f, 2.001032f, 0.861280f, 2.006489f, +0.411100f, 0.646920f, 0.117429f, 0.982935f, 0.574573f, 1.616715f, 1.065255f, 2.044312f, 0.098923f, 0.225139f, 0.108946f, 0.348666f, 0.943281f, 2.322618f, 1.652817f, 2.568727f, +0.548426f, 1.242546f, 0.222844f, 2.086394f, 0.672909f, 2.726060f, 1.774678f, 3.809423f, 0.386755f, 1.267309f, 0.605908f, 2.168955f, 1.183536f, 4.195746f, 2.949988f, 5.128135f, +0.371494f, 0.997458f, 0.162310f, 1.237102f, 0.695089f, 3.337093f, 1.971130f, 3.444444f, 0.319474f, 1.240597f, 0.538167f, 1.568287f, 0.974530f, 4.094221f, 2.611831f, 3.696141f, +1.095021f, 1.903702f, 0.286179f, 3.241060f, 1.354396f, 4.210240f, 2.297424f, 5.965335f, 0.907583f, 2.282001f, 0.914513f, 3.959933f, 2.041118f, 5.552366f, 3.272196f, 6.880700f, +0.166206f, 0.274778f, 0.044167f, 0.407347f, 0.262275f, 0.775312f, 0.452368f, 0.956533f, 0.037776f, 0.090323f, 0.038704f, 0.136479f, 0.261706f, 0.676989f, 0.426603f, 0.730518f, +0.236447f, 0.562806f, 0.089381f, 0.922045f, 0.327555f, 1.394102f, 0.803663f, 1.900762f, 0.157495f, 0.542181f, 0.229543f, 0.905361f, 0.350162f, 1.304153f, 0.811961f, 1.555208f, +0.048239f, 0.136072f, 0.019607f, 0.164661f, 0.101905f, 0.513992f, 0.268843f, 0.517626f, 0.039183f, 0.159853f, 0.061405f, 0.197163f, 0.086838f, 0.383283f, 0.216515f, 0.337603f, +0.378028f, 0.690450f, 0.091911f, 1.146911f, 0.527910f, 1.724060f, 0.833071f, 2.383362f, 0.295940f, 0.781743f, 0.277417f, 1.323567f, 0.483552f, 1.381924f, 0.721175f, 1.670891f, +0.200184f, 0.403222f, 0.049547f, 0.507552f, 0.306432f, 1.103656f, 0.492266f, 1.156138f, 0.060796f, 0.177110f, 0.058016f, 0.227229f, 0.392860f, 1.238189f, 0.596459f, 1.134460f, +0.400253f, 1.160757f, 0.140921f, 1.614683f, 0.537873f, 2.789144f, 1.229140f, 3.228918f, 0.356246f, 1.494202f, 0.483592f, 2.118553f, 0.738777f, 3.352384f, 1.595552f, 3.394425f, +0.179970f, 0.618522f, 0.068132f, 0.635518f, 0.368804f, 2.266394f, 0.906210f, 1.937976f, 0.195335f, 0.970932f, 0.285116f, 1.016824f, 0.403793f, 2.171438f, 0.937708f, 1.624003f, +0.583736f, 1.298989f, 0.132188f, 1.832128f, 0.790763f, 3.146445f, 1.162253f, 3.693264f, 0.610630f, 1.965260f, 0.533139f, 2.825233f, 0.930632f, 3.240413f, 1.292730f, 3.326727f, +0.079741f, 0.160344f, 0.021001f, 0.197217f, 0.188940f, 0.679330f, 0.322972f, 0.695363f, 0.033202f, 0.096559f, 0.033714f, 0.121051f, 0.201939f, 0.635371f, 0.326241f, 0.568833f, +0.147868f, 0.428092f, 0.055398f, 0.581887f, 0.307578f, 1.592225f, 0.747917f, 1.801132f, 0.180438f, 0.755517f, 0.260635f, 1.046718f, 0.352195f, 1.595442f, 0.809387f, 1.578514f, +0.061026f, 0.209377f, 0.024584f, 0.210212f, 0.193575f, 1.187537f, 0.506126f, 0.992236f, 0.090811f, 0.450611f, 0.141043f, 0.461120f, 0.176688f, 0.948533f, 0.436607f, 0.693182f, +0.291585f, 0.647756f, 0.070261f, 0.892723f, 0.611410f, 2.428642f, 0.956230f, 2.785537f, 0.418183f, 1.343585f, 0.388511f, 1.887359f, 0.599871f, 2.085151f, 0.886672f, 2.091748f, +0.379627f, 0.522068f, 0.093624f, 0.793578f, 0.564934f, 1.389161f, 0.904291f, 1.757338f, 0.079346f, 0.157814f, 0.075447f, 0.244508f, 0.772092f, 1.661394f, 1.168033f, 1.838239f, +0.546831f, 1.082715f, 0.191840f, 1.818810f, 0.714388f, 2.529183f, 1.626672f, 3.535845f, 0.334956f, 0.959186f, 0.453066f, 1.642327f, 1.046007f, 3.240632f, 2.251003f, 3.962499f, +0.310092f, 0.727614f, 0.116973f, 0.902819f, 0.617764f, 2.591896f, 1.512515f, 2.676439f, 0.231628f, 0.786059f, 0.336881f, 0.994120f, 0.721029f, 2.647258f, 1.668419f, 2.390906f, +0.989811f, 1.503820f, 0.223342f, 2.561374f, 1.303522f, 3.541170f, 1.909047f, 5.019540f, 0.712581f, 1.565780f, 0.619926f, 2.718266f, 1.635372f, 3.887709f, 2.263551f, 4.819892f, +0.323132f, 0.423436f, 0.098588f, 0.798385f, 0.306621f, 0.718447f, 0.607193f, 1.127351f, 0.065181f, 0.123533f, 0.076675f, 0.237406f, 0.438428f, 0.898958f, 0.820537f, 1.233759f, +0.273041f, 0.515142f, 0.118502f, 1.073400f, 0.227452f, 0.767315f, 0.640722f, 1.330603f, 0.161413f, 0.440445f, 0.270101f, 0.935427f, 0.348430f, 1.028603f, 0.927621f, 1.560087f, +0.065452f, 0.146342f, 0.030544f, 0.225232f, 0.083145f, 0.332404f, 0.251841f, 0.425764f, 0.047184f, 0.152581f, 0.084898f, 0.239356f, 0.101529f, 0.355198f, 0.290640f, 0.397922f, +0.419613f, 0.607477f, 0.117133f, 1.283420f, 0.352368f, 0.912141f, 0.638422f, 1.603765f, 0.291546f, 0.610437f, 0.313781f, 1.314510f, 0.462508f, 1.047691f, 0.791966f, 1.611160f, +0.320376f, 0.511503f, 0.091041f, 0.818890f, 0.294901f, 0.841878f, 0.543916f, 1.121673f, 0.086354f, 0.199400f, 0.094612f, 0.325377f, 0.541776f, 1.353450f, 0.944391f, 1.577197f, +0.380475f, 0.874594f, 0.153801f, 1.547369f, 0.307456f, 1.263711f, 0.806668f, 1.860694f, 0.300553f, 0.999202f, 0.468425f, 1.801875f, 0.605141f, 2.176558f, 1.500528f, 2.803006f, +0.201013f, 0.547585f, 0.087371f, 0.715593f, 0.247703f, 1.206546f, 0.698801f, 1.312194f, 0.193635f, 0.762895f, 0.324499f, 1.016162f, 0.388628f, 1.656515f, 1.036173f, 1.575710f, +0.533382f, 0.940807f, 0.138676f, 1.687688f, 0.434490f, 1.370335f, 0.733203f, 2.045778f, 0.495198f, 1.263264f, 0.496399f, 2.309776f, 0.732742f, 2.022306f, 1.168614f, 2.640614f, +0.078857f, 0.125686f, 0.023845f, 0.196616f, 0.112355f, 0.320203f, 0.220509f, 0.416867f, 0.029141f, 0.067174f, 0.033974f, 0.107107f, 0.172081f, 0.429153f, 0.319183f, 0.488665f, +0.086855f, 0.199311f, 0.037360f, 0.344568f, 0.108640f, 0.445770f, 0.303302f, 0.641347f, 0.094065f, 0.312189f, 0.155999f, 0.550103f, 0.178261f, 0.640069f, 0.470348f, 0.805445f, +0.042118f, 0.114539f, 0.019480f, 0.146260f, 0.080337f, 0.390647f, 0.241164f, 0.415139f, 0.055625f, 0.218780f, 0.099192f, 0.284748f, 0.105078f, 0.447126f, 0.298116f, 0.415591f, +0.164633f, 0.289892f, 0.045547f, 0.508139f, 0.207585f, 0.653581f, 0.372748f, 0.953425f, 0.209554f, 0.533665f, 0.223524f, 0.953453f, 0.291850f, 0.804106f, 0.495286f, 1.025951f, +0.292258f, 0.318574f, 0.082754f, 0.615905f, 0.261528f, 0.509738f, 0.480639f, 0.820146f, 0.054214f, 0.085469f, 0.059186f, 0.168421f, 0.512189f, 0.873589f, 0.889623f, 1.229356f, +0.250048f, 0.392427f, 0.100716f, 0.838442f, 0.196434f, 0.551234f, 0.513538f, 0.980145f, 0.135937f, 0.308550f, 0.211107f, 0.671930f, 0.412152f, 1.012105f, 1.018330f, 1.574005f, +0.166607f, 0.309868f, 0.072157f, 0.489010f, 0.199589f, 0.663751f, 0.561053f, 0.871738f, 0.110452f, 0.297105f, 0.184437f, 0.477898f, 0.333816f, 0.971457f, 0.886848f, 1.115915f, +0.435064f, 0.523926f, 0.112710f, 1.134981f, 0.344533f, 0.741878f, 0.579321f, 1.337492f, 0.277981f, 0.484155f, 0.277658f, 1.069022f, 0.619397f, 1.167131f, 0.984312f, 1.840367f, +0.326398f, 0.595677f, 0.105296f, 0.854913f, 0.351753f, 1.147851f, 0.736513f, 1.370998f, 0.071396f, 0.188448f, 0.088803f, 0.275669f, 0.555007f, 1.584874f, 1.098288f, 1.655665f, +0.299813f, 0.787779f, 0.137584f, 1.249470f, 0.283649f, 1.332661f, 0.844847f, 1.759062f, 0.192198f, 0.730391f, 0.340058f, 1.180758f, 0.479480f, 1.971326f, 1.349720f, 2.275864f, +0.072091f, 0.224484f, 0.035572f, 0.262987f, 0.104007f, 0.579097f, 0.333099f, 0.564599f, 0.056357f, 0.253806f, 0.107217f, 0.303064f, 0.140147f, 0.682841f, 0.424197f, 0.582283f, +0.445045f, 0.897306f, 0.131357f, 1.443000f, 0.424443f, 1.530175f, 0.813111f, 2.047890f, 0.335312f, 0.977774f, 0.381581f, 1.602685f, 0.614762f, 1.939444f, 1.113045f, 2.270227f, +0.309271f, 0.687676f, 0.092925f, 0.838007f, 0.323314f, 1.285444f, 0.630520f, 1.303638f, 0.090396f, 0.290702f, 0.104721f, 0.361074f, 0.655441f, 2.280399f, 1.208046f, 2.022746f, +0.399265f, 1.278195f, 0.170652f, 1.721358f, 0.366426f, 2.097523f, 1.016521f, 2.350828f, 0.342013f, 1.583546f, 0.563612f, 2.173646f, 0.795839f, 3.986520f, 2.086558f, 3.907820f, +0.211591f, 0.802751f, 0.097243f, 0.798513f, 0.296123f, 2.008823f, 0.883312f, 1.662963f, 0.221026f, 1.212776f, 0.391645f, 1.229605f, 0.512673f, 3.043392f, 1.445297f, 2.203562f, +0.540639f, 1.328081f, 0.148624f, 1.813439f, 0.500169f, 2.196946f, 0.892440f, 2.496534f, 0.544295f, 1.933770f, 0.576905f, 2.691331f, 0.930793f, 3.577701f, 1.569606f, 3.555893f, +0.159177f, 0.353331f, 0.050892f, 0.420727f, 0.257574f, 1.022323f, 0.534505f, 1.013089f, 0.063787f, 0.204779f, 0.078630f, 0.248535f, 0.435316f, 1.511958f, 0.853749f, 1.310464f, +0.190585f, 0.609090f, 0.086679f, 0.801513f, 0.270739f, 1.547136f, 0.799202f, 1.694329f, 0.223825f, 1.034557f, 0.392484f, 1.387610f, 0.490211f, 2.451373f, 1.367616f, 2.348038f, +0.092705f, 0.351110f, 0.045336f, 0.341271f, 0.200824f, 1.360008f, 0.637429f, 1.100113f, 0.132766f, 0.727247f, 0.250330f, 0.720480f, 0.289852f, 1.717716f, 0.869498f, 1.215273f, +0.348935f, 0.855695f, 0.102071f, 1.141701f, 0.499679f, 2.191043f, 0.948700f, 2.432901f, 0.481627f, 1.708198f, 0.543195f, 2.323035f, 0.775213f, 2.974605f, 1.391023f, 2.888878f, +0.304596f, 0.462406f, 0.091193f, 0.680479f, 0.309561f, 0.840291f, 0.601540f, 1.029106f, 0.061271f, 0.134527f, 0.070727f, 0.201783f, 0.668993f, 1.589110f, 1.228614f, 1.702202f, +0.283294f, 0.619195f, 0.120651f, 1.006998f, 0.252754f, 0.987810f, 0.698670f, 1.336948f, 0.167008f, 0.527936f, 0.274233f, 0.875118f, 0.585199f, 2.001370f, 1.528808f, 2.369162f, +0.189341f, 0.490438f, 0.086706f, 0.589131f, 0.257607f, 1.193111f, 0.765671f, 1.192748f, 0.136117f, 0.509922f, 0.240328f, 0.624333f, 0.475436f, 1.926922f, 1.335526f, 1.684839f, +0.476102f, 0.798495f, 0.130414f, 1.316672f, 0.428199f, 1.284113f, 0.761293f, 1.762172f, 0.329874f, 0.800153f, 0.348387f, 1.344814f, 0.849471f, 2.229229f, 1.427349f, 2.675631f, +0.085535f, 0.161812f, 0.026327f, 0.239776f, 0.099626f, 0.336996f, 0.199024f, 0.415584f, 0.022130f, 0.060547f, 0.026261f, 0.091447f, 0.156419f, 0.463010f, 0.295323f, 0.499403f, +0.066884f, 0.182172f, 0.029284f, 0.298322f, 0.068390f, 0.333070f, 0.194348f, 0.453920f, 0.050713f, 0.199770f, 0.085608f, 0.333442f, 0.115037f, 0.490264f, 0.308959f, 0.584387f, +0.018074f, 0.058338f, 0.008509f, 0.070564f, 0.028182f, 0.162652f, 0.086113f, 0.163731f, 0.016711f, 0.078014f, 0.030333f, 0.096180f, 0.037787f, 0.190846f, 0.109123f, 0.168028f, +0.106532f, 0.222649f, 0.030000f, 0.369683f, 0.109808f, 0.410355f, 0.200703f, 0.567033f, 0.094935f, 0.286958f, 0.103075f, 0.485636f, 0.158263f, 0.517550f, 0.273384f, 0.625500f, +0.071609f, 0.165050f, 0.020528f, 0.207665f, 0.080908f, 0.333445f, 0.150541f, 0.349149f, 0.024756f, 0.082524f, 0.027362f, 0.105830f, 0.163214f, 0.588624f, 0.287009f, 0.539077f, +0.078698f, 0.261159f, 0.032093f, 0.363130f, 0.078060f, 0.463184f, 0.206609f, 0.535982f, 0.079734f, 0.382682f, 0.125364f, 0.542349f, 0.168704f, 0.875985f, 0.422006f, 0.886585f, +0.046870f, 0.184324f, 0.020552f, 0.189307f, 0.070894f, 0.498518f, 0.201762f, 0.426093f, 0.057908f, 0.329367f, 0.097899f, 0.344785f, 0.122133f, 0.751542f, 0.328502f, 0.561828f, +0.114344f, 0.291163f, 0.029991f, 0.410485f, 0.114331f, 0.520559f, 0.194632f, 0.610760f, 0.136157f, 0.501436f, 0.137689f, 0.720545f, 0.211717f, 0.843549f, 0.340630f, 0.865641f, +0.036303f, 0.083531f, 0.011074f, 0.102695f, 0.063490f, 0.261212f, 0.125702f, 0.267261f, 0.017206f, 0.057260f, 0.020237f, 0.071752f, 0.106773f, 0.384416f, 0.199792f, 0.344009f, +0.037002f, 0.122581f, 0.016056f, 0.166547f, 0.056810f, 0.336519f, 0.160001f, 0.380506f, 0.051398f, 0.246261f, 0.085990f, 0.341030f, 0.102357f, 0.530575f, 0.272450f, 0.524717f, +0.020227f, 0.079411f, 0.009438f, 0.079693f, 0.047357f, 0.332442f, 0.143414f, 0.277648f, 0.034262f, 0.194543f, 0.061636f, 0.198994f, 0.068015f, 0.417812f, 0.194663f, 0.305202f, +0.072692f, 0.184784f, 0.020288f, 0.254555f, 0.112505f, 0.511371f, 0.203798f, 0.586263f, 0.118673f, 0.436298f, 0.127699f, 0.612610f, 0.173683f, 0.690829f, 0.297345f, 0.692713f, +0.088527f, 0.139308f, 0.025287f, 0.211666f, 0.097237f, 0.273603f, 0.180277f, 0.345967f, 0.021062f, 0.047936f, 0.023196f, 0.074237f, 0.209105f, 0.514875f, 0.366394f, 0.569432f, +0.070091f, 0.158802f, 0.028480f, 0.266649f, 0.067587f, 0.273805f, 0.178248f, 0.382617f, 0.048872f, 0.160143f, 0.076565f, 0.274080f, 0.155712f, 0.552015f, 0.388116f, 0.674685f, +0.052646f, 0.141353f, 0.023002f, 0.175314f, 0.077413f, 0.371656f, 0.219527f, 0.383611f, 0.044764f, 0.173830f, 0.075407f, 0.219745f, 0.142169f, 0.597283f, 0.381025f, 0.539209f, +0.126394f, 0.219738f, 0.033033f, 0.374104f, 0.122861f, 0.381922f, 0.208405f, 0.541131f, 0.103580f, 0.260438f, 0.104371f, 0.451935f, 0.242534f, 0.659753f, 0.388815f, 0.817591f, +0.471845f, 0.780071f, 0.125387f, 1.156424f, 0.585156f, 1.729779f, 1.009267f, 2.134095f, 0.106034f, 0.253532f, 0.108640f, 0.383090f, 0.764828f, 1.978480f, 1.246734f, 2.134919f, +0.398386f, 0.948264f, 0.150596f, 1.553541f, 0.433727f, 1.845978f, 1.064158f, 2.516865f, 0.262373f, 0.903229f, 0.382399f, 1.508257f, 0.607348f, 2.262023f, 1.408326f, 2.697472f, +0.090122f, 0.254219f, 0.036631f, 0.307629f, 0.149622f, 0.754667f, 0.394728f, 0.760003f, 0.072379f, 0.295285f, 0.113429f, 0.364205f, 0.167012f, 0.737148f, 0.416413f, 0.649294f, +0.575250f, 1.050665f, 0.139862f, 1.745267f, 0.631326f, 2.061799f, 0.996268f, 2.850257f, 0.445265f, 1.176195f, 0.417396f, 1.991412f, 0.757483f, 2.164784f, 1.129720f, 2.617451f, +0.504927f, 1.017055f, 0.124973f, 1.280208f, 0.607429f, 2.187739f, 0.975802f, 2.291772f, 0.151620f, 0.441698f, 0.144688f, 0.566690f, 1.020085f, 3.215030f, 1.548739f, 2.945693f, +0.599174f, 1.737639f, 0.210957f, 2.417161f, 0.632790f, 3.281337f, 1.446043f, 3.798717f, 0.527291f, 2.211619f, 0.715781f, 3.135742f, 1.138491f, 5.166184f, 2.458822f, 5.230972f, +0.298734f, 1.026692f, 0.113094f, 1.054905f, 0.481108f, 2.956531f, 1.182158f, 2.528107f, 0.320589f, 1.593518f, 0.467940f, 1.668837f, 0.689989f, 3.710485f, 1.602325f, 2.775046f, +0.789218f, 1.756247f, 0.178719f, 2.477057f, 0.840211f, 3.343197f, 1.234930f, 3.924209f, 0.816284f, 2.627139f, 0.712694f, 3.776742f, 1.295258f, 4.510020f, 1.799227f, 4.630151f, +0.309597f, 0.622542f, 0.081538f, 0.765702f, 0.576501f, 2.072800f, 0.985467f, 2.121722f, 0.127457f, 0.370672f, 0.129424f, 0.464691f, 0.807113f, 2.539456f, 1.303924f, 2.273517f, +0.340727f, 0.986440f, 0.127651f, 1.340824f, 0.556994f, 2.883364f, 1.354404f, 3.261674f, 0.411096f, 1.721316f, 0.593812f, 2.384766f, 0.835439f, 3.784532f, 1.919940f, 3.744379f, +0.155925f, 0.534970f, 0.062812f, 0.537103f, 0.388697f, 2.384565f, 1.016297f, 1.992403f, 0.229414f, 1.138375f, 0.356317f, 1.164924f, 0.464734f, 2.494888f, 1.148389f, 1.823247f, +0.606821f, 1.348052f, 0.146221f, 1.857856f, 0.999976f, 3.972102f, 1.563937f, 4.555812f, 0.860486f, 2.764670f, 0.799432f, 3.883584f, 1.285141f, 4.467151f, 1.899574f, 4.481284f, +0.507293f, 0.697637f, 0.125109f, 1.060455f, 0.593283f, 1.458871f, 0.949669f, 1.845524f, 0.104835f, 0.208512f, 0.099684f, 0.323056f, 1.062110f, 2.285457f, 1.606776f, 2.528729f, +0.433684f, 0.858687f, 0.152145f, 1.442474f, 0.445263f, 1.576386f, 1.013870f, 2.203817f, 0.262659f, 0.752153f, 0.355276f, 1.287844f, 0.853991f, 2.645747f, 1.837785f, 3.235100f, +0.272696f, 0.639865f, 0.102867f, 0.793941f, 0.426945f, 1.791294f, 1.045320f, 1.849723f, 0.201402f, 0.683480f, 0.292919f, 0.864390f, 0.652737f, 2.396525f, 1.510396f, 2.164453f, +0.708981f, 1.077156f, 0.159975f, 1.834660f, 0.733774f, 1.993384f, 1.074634f, 2.825583f, 0.504661f, 1.108910f, 0.439041f, 1.925118f, 1.205860f, 2.866647f, 1.669055f, 3.554003f, +0.290119f, 0.380175f, 0.088516f, 0.716816f, 0.352134f, 0.825087f, 0.697320f, 1.294686f, 0.068961f, 0.130697f, 0.081122f, 0.251173f, 0.432588f, 0.886984f, 0.809608f, 1.217325f, +0.328164f, 0.619141f, 0.142426f, 1.290103f, 0.349674f, 1.179633f, 0.985015f, 2.045604f, 0.228607f, 0.623793f, 0.382539f, 1.324827f, 0.460214f, 1.358601f, 1.225222f, 2.060597f, +0.061296f, 0.137050f, 0.028605f, 0.210931f, 0.099599f, 0.398186f, 0.301679f, 0.510020f, 0.052071f, 0.168382f, 0.093690f, 0.264144f, 0.104491f, 0.365561f, 0.299120f, 0.409532f, +0.526334f, 0.761978f, 0.146924f, 1.609834f, 0.565351f, 1.463472f, 1.024308f, 2.573139f, 0.430929f, 0.902277f, 0.463794f, 1.942955f, 0.637547f, 1.444199f, 1.091692f, 2.220917f, +0.389416f, 0.621731f, 0.110660f, 0.995359f, 0.458501f, 1.308921f, 0.845662f, 1.743937f, 0.123687f, 0.285605f, 0.135516f, 0.466045f, 0.723695f, 1.807914f, 1.261501f, 2.106791f, +0.619083f, 1.423077f, 0.250254f, 2.517769f, 0.639905f, 2.630146f, 1.678909f, 3.872641f, 0.576274f, 1.915851f, 0.898149f, 3.454882f, 1.082082f, 3.892006f, 2.683166f, 5.012189f, +0.254854f, 0.694256f, 0.110773f, 0.907265f, 0.401706f, 1.956690f, 1.133266f, 2.128023f, 0.289293f, 1.139774f, 0.484806f, 1.518159f, 0.541481f, 2.308048f, 1.443715f, 2.195460f, +0.905754f, 1.597615f, 0.235491f, 2.865919f, 0.943760f, 2.976519f, 1.592597f, 4.443654f, 0.990917f, 2.527855f, 0.993319f, 4.621977f, 1.367427f, 3.773982f, 2.180840f, 4.927854f, +0.081929f, 0.130582f, 0.024774f, 0.204275f, 0.149314f, 0.425531f, 0.293044f, 0.553993f, 0.035677f, 0.082241f, 0.041594f, 0.131130f, 0.196476f, 0.489993f, 0.364433f, 0.557941f, +0.120798f, 0.277202f, 0.051960f, 0.479224f, 0.193269f, 0.793021f, 0.539573f, 1.140951f, 0.154162f, 0.511644f, 0.255666f, 0.901560f, 0.272459f, 0.978300f, 0.718893f, 1.231065f, +0.045644f, 0.124127f, 0.021110f, 0.158502f, 0.111361f, 0.541508f, 0.334297f, 0.575459f, 0.071034f, 0.279385f, 0.126669f, 0.363628f, 0.125142f, 0.532502f, 0.355039f, 0.494945f, +0.238963f, 0.420774f, 0.066110f, 0.737558f, 0.385406f, 1.213454f, 0.692052f, 1.770150f, 0.358423f, 0.912785f, 0.382317f, 1.630794f, 0.465538f, 1.282650f, 0.790042f, 1.636520f, +0.486178f, 0.529955f, 0.137662f, 1.024573f, 0.556490f, 1.084641f, 1.022724f, 1.745140f, 0.106274f, 0.167542f, 0.116020f, 0.330150f, 0.936354f, 1.597044f, 1.626357f, 2.247437f, +0.556827f, 0.873887f, 0.224283f, 1.867108f, 0.559530f, 1.570156f, 1.462780f, 2.791882f, 0.356715f, 0.809671f, 0.553968f, 1.763222f, 1.008637f, 2.476870f, 2.492104f, 3.851978f, +0.289092f, 0.537674f, 0.125205f, 0.848516f, 0.442985f, 1.473186f, 1.245250f, 1.934812f, 0.225841f, 0.607489f, 0.377118f, 0.977156f, 0.636547f, 1.852453f, 1.691114f, 2.127917f, +1.011112f, 1.217632f, 0.261943f, 2.637756f, 1.024205f, 2.205407f, 1.722167f, 3.976008f, 0.761284f, 1.325917f, 0.760401f, 2.927649f, 1.581963f, 2.980895f, 2.513969f, 4.700365f, +0.313634f, 0.572382f, 0.101178f, 0.821481f, 0.432338f, 1.410819f, 0.905245f, 1.685088f, 0.080842f, 0.213380f, 0.100552f, 0.312140f, 0.586078f, 1.673600f, 1.159773f, 1.748354f, +0.385650f, 1.013322f, 0.176975f, 1.607197f, 0.466696f, 2.192670f, 1.390055f, 2.894240f, 0.291325f, 1.107096f, 0.515446f, 1.789743f, 0.677790f, 2.786654f, 1.907956f, 3.217147f, +0.072256f, 0.224996f, 0.035653f, 0.263586f, 0.133341f, 0.742422f, 0.427044f, 0.723835f, 0.066561f, 0.299763f, 0.126631f, 0.357940f, 0.154367f, 0.752125f, 0.467238f, 0.641364f, +0.597444f, 1.204574f, 0.176338f, 1.937132f, 0.728824f, 2.627508f, 1.396217f, 3.516492f, 0.530430f, 1.546742f, 0.603623f, 2.535288f, 0.906946f, 2.861221f, 1.642052f, 3.349218f, +0.402323f, 0.894579f, 0.120884f, 1.090141f, 0.537984f, 2.138938f, 1.049165f, 2.169213f, 0.138571f, 0.445625f, 0.160529f, 0.553500f, 0.937021f, 3.260071f, 1.727028f, 2.891728f, +0.695287f, 2.225869f, 0.297176f, 2.997601f, 0.816205f, 4.672179f, 2.264275f, 5.236409f, 0.701829f, 3.249523f, 1.156562f, 4.460440f, 1.523032f, 7.629179f, 3.993137f, 7.478566f, +0.287108f, 1.089255f, 0.131949f, 1.083504f, 0.513963f, 3.486585f, 1.533107f, 2.886298f, 0.353410f, 1.939168f, 0.626220f, 1.966077f, 0.764487f, 4.538242f, 2.155196f, 3.285906f, +0.982560f, 2.413663f, 0.270110f, 3.295756f, 1.162731f, 5.107184f, 2.074633f, 5.803630f, 1.165662f, 4.141363f, 1.235500f, 5.763756f, 1.859029f, 7.145577f, 3.134901f, 7.102022f, +0.176993f, 0.392879f, 0.056588f, 0.467819f, 0.366344f, 1.454036f, 0.760220f, 1.440902f, 0.083578f, 0.268317f, 0.103027f, 0.325650f, 0.531940f, 1.847557f, 1.043250f, 1.601339f, +0.283683f, 0.906621f, 0.129020f, 1.193041f, 0.515473f, 2.945664f, 1.521637f, 3.225911f, 0.392590f, 1.814618f, 0.688419f, 2.433875f, 0.801879f, 4.009915f, 2.237123f, 3.840881f, +0.107521f, 0.407224f, 0.052581f, 0.395813f, 0.297931f, 2.017632f, 0.945654f, 1.632067f, 0.181453f, 0.993937f, 0.342128f, 0.984689f, 0.369444f, 2.189390f, 1.108257f, 1.548979f, +0.542048f, 1.329269f, 0.158561f, 1.773562f, 0.992877f, 4.353666f, 1.885095f, 4.834243f, 0.881639f, 3.126934f, 0.994343f, 4.252419f, 1.323415f, 5.078139f, 2.374704f, 4.931788f, +0.542293f, 0.823252f, 0.162357f, 1.211501f, 0.704961f, 1.913590f, 1.369883f, 2.343579f, 0.128544f, 0.282231f, 0.148381f, 0.423330f, 1.308916f, 3.109169f, 2.403842f, 3.330440f, +0.675172f, 1.475722f, 0.287546f, 2.399968f, 0.770523f, 3.011345f, 2.129900f, 4.075693f, 0.469031f, 1.482671f, 0.770164f, 2.457705f, 1.532716f, 5.241861f, 4.004157f, 6.205159f, +0.351616f, 0.910767f, 0.161018f, 1.094044f, 0.611914f, 2.834092f, 1.818760f, 2.833230f, 0.297866f, 1.115869f, 0.525914f, 1.366234f, 0.970277f, 3.932495f, 2.725564f, 3.438448f, +1.184204f, 1.986089f, 0.324379f, 3.274944f, 1.362331f, 4.085449f, 2.422081f, 5.606411f, 0.966853f, 2.345232f, 1.021114f, 3.941622f, 2.321969f, 6.093437f, 3.901557f, 7.313645f, +0.135791f, 0.256884f, 0.041795f, 0.380655f, 0.202306f, 0.684322f, 0.404148f, 0.843907f, 0.041398f, 0.113267f, 0.049127f, 0.171073f, 0.272896f, 0.807788f, 0.515234f, 0.871280f, +0.142140f, 0.387146f, 0.062233f, 0.633985f, 0.185907f, 0.905396f, 0.528303f, 1.233908f, 0.126999f, 0.500278f, 0.214385f, 0.835025f, 0.268666f, 1.144997f, 0.721565f, 1.364820f, +0.029929f, 0.096604f, 0.014090f, 0.116849f, 0.059692f, 0.344515f, 0.182396f, 0.346801f, 0.032609f, 0.152229f, 0.059189f, 0.187677f, 0.068764f, 0.347299f, 0.198581f, 0.305775f, +0.236278f, 0.493815f, 0.066537f, 0.819922f, 0.311521f, 1.164160f, 0.569387f, 1.608649f, 0.248115f, 0.749975f, 0.269390f, 1.269228f, 0.385747f, 1.261468f, 0.666342f, 1.524584f, +0.153905f, 0.354732f, 0.044120f, 0.446321f, 0.222426f, 0.916682f, 0.413857f, 0.959855f, 0.062697f, 0.209002f, 0.069298f, 0.268029f, 0.385498f, 1.390284f, 0.677894f, 1.273260f, +0.226422f, 0.751377f, 0.092333f, 1.044756f, 0.287271f, 1.704574f, 0.760346f, 1.972481f, 0.270324f, 1.297408f, 0.425022f, 1.838728f, 0.533406f, 2.769685f, 1.334297f, 2.803199f, +0.105073f, 0.413219f, 0.046073f, 0.424389f, 0.203290f, 1.429517f, 0.578558f, 1.221837f, 0.152976f, 0.870092f, 0.258620f, 0.910821f, 0.300893f, 1.851539f, 0.809315f, 1.384150f, +0.343334f, 0.874255f, 0.090051f, 1.232536f, 0.439112f, 1.999316f, 0.747527f, 2.345755f, 0.481758f, 1.774205f, 0.487179f, 2.549464f, 0.698616f, 2.783514f, 1.123998f, 2.856414f, +0.066691f, 0.153453f, 0.020344f, 0.188659f, 0.149190f, 0.613805f, 0.295379f, 0.628018f, 0.037248f, 0.123955f, 0.043808f, 0.155328f, 0.215561f, 0.776084f, 0.403353f, 0.694508f, +0.090996f, 0.301452f, 0.039485f, 0.409572f, 0.178703f, 1.058557f, 0.503301f, 1.196923f, 0.148945f, 0.713636f, 0.249189f, 0.988263f, 0.276626f, 1.433912f, 0.736313f, 1.418081f, +0.038759f, 0.152167f, 0.018084f, 0.152707f, 0.116074f, 0.814827f, 0.351513f, 0.680526f, 0.077365f, 0.439281f, 0.139174f, 0.449331f, 0.143227f, 0.879838f, 0.409926f, 0.642700f, +0.186565f, 0.474252f, 0.052069f, 0.653319f, 0.369340f, 1.678764f, 0.669042f, 1.924624f, 0.358907f, 1.319513f, 0.386204f, 1.852738f, 0.489872f, 1.948477f, 0.838660f, 1.953791f, +0.260395f, 0.409766f, 0.074381f, 0.622600f, 0.365849f, 1.029413f, 0.678282f, 1.301678f, 0.073005f, 0.166152f, 0.080402f, 0.257314f, 0.675935f, 1.664338f, 1.184374f, 1.840695f, +0.275987f, 0.625292f, 0.112143f, 1.049946f, 0.340407f, 1.379042f, 0.897763f, 1.927087f, 0.226765f, 0.743057f, 0.355260f, 1.271716f, 0.673800f, 2.388684f, 1.679461f, 2.919504f, +0.161523f, 0.433689f, 0.070572f, 0.537884f, 0.303806f, 1.458558f, 0.861530f, 1.505478f, 0.161841f, 0.628468f, 0.272628f, 0.794471f, 0.479355f, 2.013882f, 1.284718f, 1.818073f, +0.519403f, 0.902986f, 0.135744f, 1.537337f, 0.645802f, 2.007522f, 1.095456f, 2.844385f, 0.501577f, 1.261150f, 0.505406f, 2.188462f, 1.095290f, 2.979471f, 1.755902f, 3.692272f, +0.337912f, 0.558647f, 0.089796f, 0.828173f, 0.536025f, 1.584545f, 0.924528f, 1.954914f, 0.089482f, 0.213955f, 0.091681f, 0.323289f, 0.601934f, 1.557101f, 0.981203f, 1.680221f, +0.381923f, 0.909078f, 0.144373f, 1.489342f, 0.531860f, 2.263644f, 1.304931f, 3.086324f, 0.296399f, 1.020365f, 0.431991f, 1.703857f, 0.639868f, 2.383141f, 1.483734f, 2.841905f, +0.067321f, 0.189900f, 0.027363f, 0.229797f, 0.142963f, 0.721079f, 0.377159f, 0.726178f, 0.063712f, 0.259923f, 0.099845f, 0.320590f, 0.137103f, 0.605137f, 0.341840f, 0.533016f, +0.575543f, 1.051200f, 0.139933f, 1.746156f, 0.807951f, 2.638624f, 1.274991f, 3.647666f, 0.524960f, 1.386714f, 0.492103f, 2.347839f, 0.832866f, 2.380218f, 1.242147f, 2.877933f, +0.489544f, 0.986069f, 0.121165f, 1.241205f, 0.753301f, 2.713117f, 1.210138f, 2.842133f, 0.173224f, 0.504633f, 0.165303f, 0.647434f, 1.086877f, 3.425542f, 1.650146f, 3.138569f, +0.777649f, 2.255227f, 0.273795f, 3.137157f, 1.050510f, 5.447430f, 2.400613f, 6.306345f, 0.806432f, 3.382418f, 1.094706f, 4.795760f, 1.623834f, 7.368546f, 3.507027f, 7.460953f, +0.302108f, 1.038286f, 0.114371f, 1.066817f, 0.622342f, 3.824452f, 1.529193f, 3.270259f, 0.382043f, 1.898978f, 0.557638f, 1.988735f, 0.766831f, 4.123715f, 1.780773f, 3.084097f, +1.068998f, 2.378843f, 0.242076f, 3.355182f, 1.455722f, 5.792315f, 2.139601f, 6.798959f, 1.302890f, 4.193238f, 1.137548f, 6.028147f, 1.928047f, 6.713359f, 2.678226f, 6.892179f, +0.256567f, 0.515910f, 0.067571f, 0.634549f, 0.611104f, 2.197214f, 1.044617f, 2.249072f, 0.124467f, 0.361977f, 0.126388f, 0.453791f, 0.735056f, 2.312740f, 1.187513f, 2.070543f, +0.377989f, 1.094317f, 0.141611f, 1.487457f, 0.790375f, 4.091496f, 1.921901f, 4.628320f, 0.537406f, 2.250192f, 0.776261f, 3.117488f, 1.018517f, 4.613877f, 2.340677f, 4.564925f, +0.134783f, 0.462432f, 0.054296f, 0.464276f, 0.429774f, 2.636561f, 1.123697f, 2.202956f, 0.233682f, 1.159552f, 0.362945f, 1.186595f, 0.441474f, 2.370013f, 1.090910f, 1.731990f, +0.702559f, 1.560734f, 0.169290f, 2.150969f, 1.480886f, 5.882374f, 2.316069f, 6.746802f, 1.173958f, 3.771827f, 1.090662f, 5.298356f, 1.635137f, 5.683737f, 2.416905f, 5.701719f, +0.673126f, 0.925693f, 0.166007f, 1.407115f, 1.006953f, 2.476078f, 1.611832f, 3.132327f, 0.163920f, 0.326028f, 0.155866f, 0.505129f, 1.548774f, 3.332665f, 2.343009f, 3.687406f, +0.770333f, 1.525247f, 0.270249f, 2.562200f, 1.011653f, 3.581605f, 2.303548f, 5.007151f, 0.549773f, 1.574337f, 0.743630f, 2.695594f, 1.667015f, 5.164575f, 3.587410f, 6.315009f, +0.377424f, 0.885604f, 0.142373f, 1.098852f, 0.755846f, 3.171232f, 1.850590f, 3.274673f, 0.328473f, 1.114713f, 0.477733f, 1.409765f, 0.992820f, 3.645139f, 2.297329f, 3.292156f, +1.314284f, 1.996793f, 0.296556f, 3.401027f, 1.739911f, 4.726674f, 2.548152f, 6.699968f, 1.102403f, 2.422351f, 0.959061f, 4.205312f, 2.456591f, 5.839962f, 3.400216f, 7.240250f, +0.282075f, 0.369634f, 0.086062f, 0.696942f, 0.317235f, 0.743315f, 0.628210f, 1.166373f, 0.062680f, 0.118793f, 0.073733f, 0.228297f, 0.417612f, 0.856276f, 0.781579f, 1.175181f, +0.327685f, 0.618237f, 0.142218f, 1.288219f, 0.323528f, 1.091430f, 0.911364f, 1.892651f, 0.213398f, 0.582295f, 0.357091f, 1.236693f, 0.456282f, 1.346996f, 1.214756f, 2.042995f, +0.071538f, 0.159949f, 0.033385f, 0.246175f, 0.107707f, 0.430601f, 0.326237f, 0.551539f, 0.056812f, 0.183712f, 0.102220f, 0.288193f, 0.121086f, 0.423618f, 0.346625f, 0.474572f, +0.465361f, 0.673706f, 0.129904f, 1.423343f, 0.463160f, 1.198937f, 0.839156f, 2.108023f, 0.356182f, 0.745771f, 0.383346f, 1.605937f, 0.559693f, 1.267840f, 0.958379f, 1.949709f, +0.382560f, 0.610785f, 0.108712f, 0.977835f, 0.417359f, 1.191470f, 0.769779f, 1.587451f, 0.113592f, 0.262295f, 0.124455f, 0.428007f, 0.705912f, 1.763487f, 1.230502f, 2.055020f, +0.624612f, 1.435788f, 0.252489f, 2.540257f, 0.598220f, 2.458813f, 1.569541f, 3.620369f, 0.543536f, 1.807011f, 0.847125f, 3.258609f, 1.084004f, 3.898919f, 2.687931f, 5.021091f, +0.300533f, 0.818693f, 0.130628f, 1.069881f, 0.438929f, 2.137998f, 1.238275f, 2.325206f, 0.318916f, 1.256486f, 0.534450f, 1.673617f, 0.634006f, 2.702435f, 1.690409f, 2.570608f, +0.809161f, 1.427240f, 0.210377f, 2.560287f, 0.781215f, 2.463867f, 1.318301f, 3.678314f, 0.827560f, 2.111127f, 0.829566f, 3.860024f, 1.212936f, 3.347600f, 1.934450f, 4.371108f, +0.087606f, 0.139631f, 0.026490f, 0.218431f, 0.147939f, 0.421613f, 0.290345f, 0.548891f, 0.035663f, 0.082209f, 0.041578f, 0.131080f, 0.208601f, 0.520231f, 0.386923f, 0.592373f, +0.132658f, 0.304418f, 0.057061f, 0.526275f, 0.196662f, 0.806943f, 0.549045f, 1.160981f, 0.158267f, 0.525267f, 0.262473f, 0.925564f, 0.297087f, 1.066731f, 0.783876f, 1.342345f, +0.058586f, 0.159323f, 0.027096f, 0.203446f, 0.132444f, 0.644024f, 0.397585f, 0.684403f, 0.085235f, 0.335239f, 0.151993f, 0.436323f, 0.159487f, 0.678647f, 0.452479f, 0.630783f, +0.232363f, 0.409154f, 0.064285f, 0.717189f, 0.347248f, 1.093312f, 0.623534f, 1.594891f, 0.325815f, 0.829742f, 0.347535f, 1.482429f, 0.449471f, 1.238381f, 0.762775f, 1.580038f, +0.407448f, 0.444136f, 0.115370f, 0.858658f, 0.432135f, 0.842262f, 0.794181f, 1.355163f, 0.083261f, 0.131262f, 0.090897f, 0.258658f, 0.779160f, 1.328934f, 1.353326f, 1.870139f, +0.479263f, 0.752157f, 0.193041f, 1.607026f, 0.446232f, 1.252218f, 1.166585f, 2.226560f, 0.287020f, 0.651478f, 0.445734f, 1.418724f, 0.861980f, 2.116730f, 2.129750f, 3.291897f, +0.290823f, 0.540894f, 0.125955f, 0.853598f, 0.412921f, 1.373203f, 1.160736f, 1.803498f, 0.212390f, 0.571307f, 0.354656f, 0.918956f, 0.635818f, 1.850330f, 1.689176f, 2.125479f, +0.770577f, 0.927968f, 0.199629f, 2.010255f, 0.723248f, 1.557361f, 1.216118f, 2.807680f, 0.542376f, 0.944649f, 0.541747f, 2.085801f, 1.197076f, 2.255653f, 1.902328f, 3.556781f, +0.267812f, 0.488756f, 0.086396f, 0.701461f, 0.342069f, 1.116250f, 0.716236f, 1.333254f, 0.064533f, 0.170333f, 0.080266f, 0.249169f, 0.496902f, 1.418950f, 0.983306f, 1.482330f, +0.338202f, 0.888649f, 0.155201f, 1.409456f, 0.379228f, 1.781720f, 1.129531f, 2.351802f, 0.238835f, 0.907622f, 0.422575f, 1.467272f, 0.590183f, 2.426467f, 1.661345f, 2.801317f, +0.074062f, 0.230620f, 0.036545f, 0.270175f, 0.126639f, 0.705110f, 0.405582f, 0.687457f, 0.063780f, 0.287235f, 0.121339f, 0.342981f, 0.157103f, 0.765458f, 0.475520f, 0.652733f, +0.463919f, 0.935361f, 0.136928f, 1.504197f, 0.524387f, 1.890486f, 1.004575f, 2.530108f, 0.385045f, 1.122796f, 0.438176f, 1.840392f, 0.699255f, 2.206002f, 1.266022f, 2.582248f, +0.347118f, 0.771830f, 0.104297f, 0.940558f, 0.430087f, 1.709955f, 0.838746f, 1.734158f, 0.111767f, 0.359426f, 0.129478f, 0.446434f, 0.802715f, 2.792793f, 1.479487f, 2.477246f, +0.616089f, 1.972325f, 0.263325f, 2.656151f, 0.670135f, 3.836032f, 1.859053f, 4.299285f, 0.581363f, 2.691757f, 0.958043f, 3.694826f, 1.339975f, 6.712211f, 3.513193f, 6.579701f, +0.297347f, 1.128102f, 0.136655f, 1.122146f, 0.493212f, 3.345821f, 1.471211f, 2.769769f, 0.342164f, 1.877463f, 0.606294f, 1.903516f, 0.786136f, 4.666758f, 2.216228f, 3.378957f, +0.770906f, 1.893733f, 0.211925f, 2.585812f, 0.845288f, 3.712849f, 1.508228f, 4.219156f, 0.854973f, 3.037545f, 0.906196f, 4.227513f, 1.448229f, 5.566577f, 2.442164f, 5.532647f, +0.166216f, 0.368955f, 0.053143f, 0.439332f, 0.318778f, 1.265244f, 0.661513f, 1.253815f, 0.073375f, 0.235560f, 0.090449f, 0.285893f, 0.496006f, 1.722749f, 0.972776f, 1.493164f, +0.273605f, 0.874414f, 0.124437f, 1.150659f, 0.460660f, 2.632439f, 1.359835f, 2.882886f, 0.353971f, 1.636115f, 0.620699f, 2.194457f, 0.767908f, 3.840037f, 2.142349f, 3.678164f, +0.121206f, 0.459055f, 0.059273f, 0.446191f, 0.311194f, 2.107448f, 0.987751f, 1.704719f, 0.191220f, 1.047436f, 0.360543f, 1.037690f, 0.413512f, 2.450547f, 1.240453f, 1.733747f, +0.462906f, 1.135187f, 0.135410f, 1.514610f, 0.785658f, 3.445033f, 1.491665f, 3.825311f, 0.703854f, 2.496379f, 0.793831f, 3.394907f, 1.122172f, 4.305940f, 2.013598f, 4.181843f, +0.399143f, 0.605937f, 0.119500f, 0.891699f, 0.480777f, 1.305050f, 0.934248f, 1.598299f, 0.088447f, 0.194194f, 0.102096f, 0.291280f, 0.956567f, 2.272207f, 1.756748f, 2.433914f, +0.510370f, 1.115514f, 0.217359f, 1.814162f, 0.539685f, 2.109187f, 1.491811f, 2.854670f, 0.331444f, 1.047738f, 0.544241f, 1.736752f, 1.150380f, 3.934279f, 3.005319f, 4.657281f, +0.310655f, 0.804669f, 0.142260f, 0.966597f, 0.500939f, 2.320108f, 1.488914f, 2.319402f, 0.246019f, 0.921640f, 0.434372f, 1.128426f, 0.851168f, 3.449749f, 2.390978f, 3.016350f, +0.792611f, 1.329329f, 0.217113f, 2.191986f, 0.844890f, 2.533713f, 1.502126f, 3.476983f, 0.604967f, 1.467429f, 0.638918f, 2.466301f, 1.543118f, 4.049534f, 2.592870f, 4.860451f, +0.140778f, 0.266318f, 0.043330f, 0.394635f, 0.194337f, 0.657368f, 0.388230f, 0.810667f, 0.040122f, 0.109776f, 0.047613f, 0.165800f, 0.280912f, 0.831515f, 0.530368f, 0.896873f, +0.151341f, 0.412206f, 0.066262f, 0.675024f, 0.183408f, 0.893228f, 0.521203f, 1.217325f, 0.126409f, 0.497953f, 0.213389f, 0.831145f, 0.284028f, 1.210469f, 0.762824f, 1.442860f, +0.037245f, 0.120219f, 0.017534f, 0.145414f, 0.068830f, 0.397258f, 0.210319f, 0.399893f, 0.037936f, 0.177098f, 0.068859f, 0.218338f, 0.084967f, 0.429134f, 0.245373f, 0.377825f, +0.222755f, 0.465551f, 0.062729f, 0.772993f, 0.272129f, 1.016951f, 0.497387f, 1.405233f, 0.218672f, 0.660979f, 0.237422f, 1.118614f, 0.361089f, 1.180833f, 0.623749f, 1.427130f, +0.161218f, 0.371588f, 0.046216f, 0.467529f, 0.215889f, 0.889740f, 0.401693f, 0.931644f, 0.061397f, 0.204667f, 0.067861f, 0.262470f, 0.400951f, 1.446016f, 0.705068f, 1.324300f, +0.243588f, 0.808340f, 0.099333f, 1.123961f, 0.286360f, 1.699168f, 0.757934f, 1.966226f, 0.271868f, 1.304820f, 0.427450f, 1.849233f, 0.569775f, 2.958530f, 1.425273f, 2.994328f, +0.132120f, 0.519585f, 0.057932f, 0.533630f, 0.236852f, 1.665518f, 0.674073f, 1.423552f, 0.179820f, 1.022772f, 0.304002f, 1.070648f, 0.375662f, 2.311629f, 1.010422f, 1.728099f, +0.327052f, 0.832795f, 0.085780f, 1.174084f, 0.387578f, 1.764676f, 0.659797f, 2.070457f, 0.429009f, 1.579941f, 0.433836f, 2.270315f, 0.660765f, 2.632705f, 1.063100f, 2.701654f, +0.076040f, 0.174964f, 0.023195f, 0.215105f, 0.157615f, 0.648466f, 0.312059f, 0.663482f, 0.039702f, 0.132122f, 0.046694f, 0.165562f, 0.244035f, 0.878599f, 0.456633f, 0.786248f, +0.106554f, 0.352994f, 0.046237f, 0.479601f, 0.193894f, 1.148542f, 0.546085f, 1.298671f, 0.163047f, 0.781201f, 0.272782f, 1.081830f, 0.321625f, 1.667172f, 0.856092f, 1.648766f, +0.053047f, 0.208261f, 0.024751f, 0.209000f, 0.147200f, 1.033328f, 0.445773f, 0.863012f, 0.098985f, 0.562042f, 0.178068f, 0.574900f, 0.194635f, 1.195640f, 0.557062f, 0.873386f, +0.193438f, 0.491724f, 0.053987f, 0.677388f, 0.354831f, 1.612819f, 0.642760f, 1.849021f, 0.347881f, 1.278978f, 0.374340f, 1.795823f, 0.504317f, 2.005933f, 0.863390f, 2.011403f, +0.232694f, 0.366174f, 0.066468f, 0.556367f, 0.302927f, 0.852366f, 0.561625f, 1.077804f, 0.060987f, 0.138802f, 0.067167f, 0.214958f, 0.599745f, 1.476736f, 1.050872f, 1.633214f, +0.253290f, 0.573867f, 0.102920f, 0.963597f, 0.289475f, 1.172708f, 0.763439f, 1.638753f, 0.194554f, 0.637512f, 0.304798f, 1.091078f, 0.614000f, 2.176687f, 1.530408f, 2.660397f, +0.173262f, 0.465207f, 0.075700f, 0.576975f, 0.301959f, 1.449691f, 0.856293f, 1.496326f, 0.162291f, 0.630215f, 0.273386f, 0.796680f, 0.510546f, 2.144920f, 1.368311f, 1.936370f, +0.422081f, 0.733791f, 0.110309f, 1.249282f, 0.486267f, 1.511596f, 0.824841f, 2.141725f, 0.381036f, 0.958067f, 0.383946f, 1.662523f, 0.883750f, 2.404028f, 1.416775f, 2.979161f, +0.362667f, 0.599574f, 0.096375f, 0.888845f, 0.533058f, 1.575773f, 0.919410f, 1.944092f, 0.089780f, 0.214667f, 0.091986f, 0.324364f, 0.641450f, 1.659323f, 1.045618f, 1.790526f, +0.420975f, 1.002034f, 0.159135f, 1.641632f, 0.543204f, 2.311922f, 1.332762f, 3.152148f, 0.305419f, 1.051416f, 0.445137f, 1.755706f, 0.700295f, 2.608195f, 1.623852f, 3.110283f, +0.086730f, 0.244650f, 0.035253f, 0.296051f, 0.170659f, 0.860771f, 0.450225f, 0.866858f, 0.076732f, 0.313043f, 0.120250f, 0.386107f, 0.175378f, 0.774076f, 0.437273f, 0.681821f, +0.561723f, 1.025959f, 0.136573f, 1.704227f, 0.730656f, 2.386194f, 1.153016f, 3.298704f, 0.478969f, 1.265228f, 0.448991f, 2.142152f, 0.807103f, 2.306589f, 1.203723f, 2.788908f, +0.530877f, 1.069324f, 0.131395f, 1.346000f, 0.756927f, 2.726177f, 1.215963f, 2.855815f, 0.175609f, 0.511581f, 0.167579f, 0.656348f, 1.170283f, 3.688416f, 1.776778f, 3.379421f, +0.866087f, 2.511702f, 0.304932f, 3.493930f, 1.084081f, 5.621514f, 2.477329f, 6.507877f, 0.839620f, 3.521620f, 1.139758f, 4.993127f, 1.795678f, 8.148329f, 3.878161f, 8.250515f, +0.393260f, 1.351558f, 0.148879f, 1.388697f, 0.750638f, 4.612864f, 1.844437f, 3.944424f, 0.464908f, 2.310867f, 0.678590f, 2.420092f, 0.991120f, 5.329851f, 2.301627f, 3.986158f, +1.054188f, 2.345885f, 0.238722f, 3.308697f, 1.330158f, 5.292696f, 1.955049f, 6.212511f, 1.201119f, 3.865698f, 1.048692f, 5.557279f, 1.887850f, 6.573395f, 2.622389f, 6.748488f, +0.302841f, 0.608959f, 0.079759f, 0.748995f, 0.668364f, 2.403092f, 1.142497f, 2.459809f, 0.137343f, 0.399422f, 0.139462f, 0.500734f, 0.861477f, 2.710503f, 1.391751f, 2.426651f, +0.458215f, 1.326580f, 0.171667f, 1.803162f, 0.887784f, 4.595748f, 2.158763f, 5.198732f, 0.609018f, 2.550041f, 0.879702f, 3.532909f, 1.225936f, 5.553479f, 2.817350f, 5.494559f, +0.190970f, 0.655206f, 0.076930f, 0.657819f, 0.564227f, 3.461398f, 1.475241f, 2.892142f, 0.309522f, 1.535881f, 0.480738f, 1.571700f, 0.621074f, 3.334185f, 1.534716f, 2.436600f, +0.754112f, 1.675260f, 0.181713f, 2.308807f, 1.472851f, 5.850457f, 2.303502f, 6.710195f, 1.177994f, 3.784796f, 1.094412f, 5.316574f, 1.742675f, 6.057539f, 2.575858f, 6.076704f, +0.622716f, 0.856368f, 0.153575f, 1.301737f, 0.863151f, 2.122470f, 1.381647f, 2.685000f, 0.141763f, 0.281959f, 0.134797f, 0.436850f, 1.422625f, 3.061217f, 2.152170f, 3.387064f, +0.731894f, 1.449138f, 0.256764f, 2.434348f, 0.890604f, 3.153052f, 2.027920f, 4.408025f, 0.488304f, 1.398314f, 0.660486f, 2.394206f, 1.572600f, 4.872066f, 3.384227f, 5.957341f, +0.419120f, 0.983442f, 0.158101f, 1.220250f, 0.777725f, 3.263031f, 1.904160f, 3.369466f, 0.340994f, 1.157203f, 0.495943f, 1.463503f, 1.094684f, 4.019133f, 2.533036f, 3.629933f, +1.105661f, 1.679831f, 0.249482f, 2.861164f, 1.356262f, 3.684445f, 1.986286f, 5.222630f, 0.866983f, 1.905054f, 0.754252f, 3.307260f, 2.051986f, 4.878111f, 2.840195f, 6.047769f, +0.445386f, 0.583638f, 0.135888f, 1.100445f, 0.537770f, 1.260054f, 1.064930f, 1.977214f, 0.090865f, 0.172209f, 0.106888f, 0.330952f, 0.587024f, 1.203642f, 1.098642f, 1.651918f, +0.583587f, 1.101043f, 0.253282f, 2.294240f, 0.618594f, 2.086842f, 1.742552f, 3.618797f, 0.348926f, 0.952107f, 0.583877f, 2.022109f, 0.723427f, 2.135636f, 1.925973f, 3.239131f, +0.114375f, 0.255727f, 0.053375f, 0.393585f, 0.184876f, 0.739117f, 0.559979f, 0.946705f, 0.083392f, 0.269665f, 0.150045f, 0.423029f, 0.172345f, 0.602948f, 0.493362f, 0.675473f, +0.819179f, 1.185931f, 0.228671f, 2.505524f, 0.875314f, 2.265844f, 1.585902f, 3.983904f, 0.575644f, 1.205280f, 0.619546f, 2.595439f, 0.877103f, 1.986851f, 1.501891f, 3.055418f, +0.473585f, 0.756112f, 0.134578f, 1.210497f, 0.554693f, 1.583528f, 1.023078f, 2.109809f, 0.129104f, 0.298113f, 0.141450f, 0.486455f, 0.777966f, 1.943491f, 1.356102f, 2.264781f, +0.872140f, 2.004777f, 0.352548f, 3.546937f, 0.896770f, 3.685919f, 2.352843f, 5.427167f, 0.696783f, 2.316489f, 1.085968f, 4.177357f, 1.347469f, 4.846543f, 3.341227f, 6.241456f, +0.376715f, 1.026223f, 0.163741f, 1.341084f, 0.590689f, 2.877215f, 1.666411f, 3.129152f, 0.367021f, 1.446012f, 0.615065f, 1.926062f, 0.707499f, 3.015698f, 1.886359f, 2.868590f, +1.116737f, 1.969757f, 0.290345f, 3.533494f, 1.157525f, 3.650711f, 1.953326f, 5.450157f, 1.048597f, 2.674998f, 1.051139f, 4.891016f, 1.490272f, 4.113023f, 2.376759f, 5.370555f, +0.072602f, 0.115716f, 0.021953f, 0.181019f, 0.131625f, 0.375120f, 0.258328f, 0.488362f, 0.027135f, 0.062550f, 0.031635f, 0.099734f, 0.153901f, 0.383814f, 0.285462f, 0.437038f, +0.124000f, 0.284551f, 0.053337f, 0.491928f, 0.197358f, 0.809798f, 0.550988f, 1.165089f, 0.135823f, 0.450778f, 0.225251f, 0.794309f, 0.247221f, 0.887680f, 0.652302f, 1.117032f, +0.049162f, 0.133694f, 0.022738f, 0.170719f, 0.119319f, 0.580205f, 0.358187f, 0.616582f, 0.065666f, 0.258275f, 0.117098f, 0.336152f, 0.119144f, 0.506979f, 0.338022f, 0.471223f, +0.214682f, 0.378021f, 0.059393f, 0.662617f, 0.344440f, 1.084471f, 0.618491f, 1.581994f, 0.276372f, 0.703827f, 0.294796f, 1.257467f, 0.369694f, 1.018581f, 0.627390f, 1.299597f, +0.630310f, 0.687065f, 0.178474f, 1.328318f, 0.717704f, 1.398858f, 1.319003f, 2.250700f, 0.118254f, 0.186429f, 0.129100f, 0.367368f, 1.073050f, 1.830194f, 1.863786f, 2.575536f, +0.836244f, 1.312405f, 0.336828f, 2.804027f, 0.835920f, 2.345762f, 2.185346f, 4.170981f, 0.459796f, 1.043644f, 0.714049f, 2.272745f, 1.338963f, 3.288038f, 3.308262f, 5.113492f, +0.455547f, 0.847259f, 0.197297f, 1.337080f, 0.694409f, 2.309315f, 1.952010f, 3.032944f, 0.305443f, 0.821612f, 0.510041f, 1.321576f, 0.886643f, 2.580271f, 2.355543f, 2.963964f, +1.328969f, 1.600412f, 0.344289f, 3.466972f, 1.339156f, 2.883586f, 2.251745f, 5.198659f, 0.858802f, 1.495764f, 0.857806f, 3.302673f, 1.837947f, 3.463246f, 2.920764f, 5.460950f, +0.342680f, 0.625390f, 0.110548f, 0.897558f, 0.469912f, 1.533433f, 0.983919f, 1.831538f, 0.075811f, 0.200101f, 0.094294f, 0.292716f, 0.566032f, 1.616359f, 1.120106f, 1.688556f, +0.488104f, 1.282527f, 0.223991f, 2.034173f, 0.587599f, 2.760707f, 1.750165f, 3.644027f, 0.316466f, 1.202638f, 0.559929f, 1.944197f, 0.758289f, 3.117616f, 2.134559f, 3.599237f, +0.095957f, 0.298798f, 0.047348f, 0.350047f, 0.176155f, 0.980805f, 0.564162f, 0.956249f, 0.075868f, 0.341674f, 0.144336f, 0.407985f, 0.181208f, 0.882905f, 0.548482f, 0.752885f, +0.661788f, 1.334305f, 0.195330f, 2.145758f, 0.803106f, 2.895303f, 1.538519f, 3.874892f, 0.504290f, 1.470518f, 0.573877f, 2.410349f, 0.888022f, 2.801521f, 1.607790f, 3.279336f, +0.348227f, 0.774296f, 0.104630f, 0.943563f, 0.463218f, 1.841682f, 0.903359f, 1.867749f, 0.102942f, 0.331046f, 0.119254f, 0.411184f, 0.716900f, 2.494228f, 1.321322f, 2.212415f, +0.697118f, 2.231729f, 0.297959f, 3.005493f, 0.814085f, 4.660040f, 2.258392f, 5.222804f, 0.603954f, 2.796355f, 0.995271f, 3.838402f, 1.349807f, 6.761461f, 3.538971f, 6.627979f, +0.302045f, 1.145924f, 0.138814f, 1.139874f, 0.537881f, 3.648841f, 1.604454f, 3.020618f, 0.319107f, 1.750946f, 0.565437f, 1.775242f, 0.710915f, 4.220219f, 2.004168f, 3.055641f, +0.862192f, 2.117978f, 0.237020f, 2.892010f, 1.014968f, 4.458149f, 1.810983f, 5.066089f, 0.877907f, 3.119025f, 0.930504f, 4.340914f, 1.441956f, 5.542465f, 2.431585f, 5.508681f, +0.111628f, 0.247784f, 0.035690f, 0.295047f, 0.229843f, 0.912258f, 0.476960f, 0.904018f, 0.045242f, 0.145242f, 0.055769f, 0.176277f, 0.296550f, 1.029990f, 0.581599f, 0.892726f, +0.207253f, 0.662360f, 0.094260f, 0.871612f, 0.374630f, 2.140817f, 1.105878f, 2.344492f, 0.246172f, 1.137847f, 0.431669f, 1.526150f, 0.517843f, 2.589548f, 1.444704f, 2.480388f, +0.082422f, 0.312167f, 0.040307f, 0.303419f, 0.227194f, 1.538589f, 0.721129f, 1.244568f, 0.119385f, 0.653946f, 0.225098f, 0.647862f, 0.250335f, 1.483530f, 0.750955f, 1.049588f, +0.346584f, 0.849931f, 0.101383f, 1.134011f, 0.631531f, 2.769199f, 1.199036f, 3.074876f, 0.483830f, 1.716012f, 0.545680f, 2.333661f, 0.747976f, 2.870095f, 1.342150f, 2.787380f, +0.500377f, 0.759620f, 0.149808f, 1.117860f, 0.647078f, 1.756469f, 1.257405f, 2.151153f, 0.101800f, 0.223511f, 0.117509f, 0.335253f, 1.067570f, 2.535880f, 1.960605f, 2.716351f, +0.721658f, 1.577326f, 0.307344f, 2.565207f, 0.819277f, 3.201887f, 2.264669f, 4.333581f, 0.430279f, 1.360169f, 0.706531f, 2.254643f, 1.448104f, 4.952489f, 3.783111f, 5.862609f, +0.394339f, 1.021430f, 0.180582f, 1.226977f, 0.682685f, 3.161869f, 2.029109f, 3.160907f, 0.286717f, 1.074103f, 0.506229f, 1.315097f, 0.961874f, 3.898438f, 2.701959f, 3.408670f, +1.107762f, 1.857885f, 0.303440f, 3.063542f, 1.267742f, 3.801790f, 2.253912f, 5.217149f, 0.776267f, 1.882941f, 0.819832f, 3.164651f, 1.919983f, 5.038523f, 3.226108f, 6.047485f, +0.276565f, 0.523196f, 0.085123f, 0.775280f, 0.409887f, 1.386488f, 0.818835f, 1.709820f, 0.072367f, 0.197999f, 0.085878f, 0.299048f, 0.491298f, 1.454271f, 0.927582f, 1.568577f, +0.335349f, 0.913388f, 0.146826f, 1.495752f, 0.436319f, 2.124945f, 1.239915f, 2.895956f, 0.257165f, 1.013031f, 0.434117f, 1.690873f, 0.560292f, 2.387846f, 1.504795f, 2.846276f, +0.074089f, 0.239144f, 0.034880f, 0.289262f, 0.146998f, 0.848404f, 0.449169f, 0.854031f, 0.069284f, 0.323439f, 0.125759f, 0.398757f, 0.150470f, 0.759959f, 0.434534f, 0.669095f, +0.487873f, 1.019642f, 0.137387f, 1.692997f, 0.639881f, 2.391249f, 1.169551f, 3.304254f, 0.439712f, 1.329112f, 0.477414f, 2.249335f, 0.704056f, 2.302401f, 1.216192f, 2.782633f, +0.248315f, 0.572336f, 0.071185f, 0.720108f, 0.356997f, 1.471286f, 0.664245f, 1.540580f, 0.086822f, 0.289422f, 0.095963f, 0.371161f, 0.549786f, 1.982782f, 0.966792f, 1.815885f, +0.423177f, 1.404306f, 0.172569f, 1.952625f, 0.534102f, 3.169190f, 1.413655f, 3.667290f, 0.433630f, 2.081188f, 0.681783f, 2.949528f, 0.881216f, 4.575677f, 2.204334f, 4.631043f, +0.206054f, 0.810343f, 0.090351f, 0.832248f, 0.396582f, 2.788727f, 1.128662f, 2.383581f, 0.257480f, 1.464483f, 0.435293f, 1.533036f, 0.521580f, 3.209537f, 1.402901f, 2.399345f, +0.561596f, 1.430030f, 0.147297f, 2.016074f, 0.714513f, 3.253244f, 1.216360f, 3.816962f, 0.676343f, 2.490815f, 0.683953f, 3.579206f, 1.010104f, 4.024587f, 1.625150f, 4.129989f, +0.078405f, 0.180406f, 0.023917f, 0.221796f, 0.174480f, 0.717852f, 0.345449f, 0.734475f, 0.037585f, 0.125075f, 0.044204f, 0.156732f, 0.224010f, 0.806504f, 0.419163f, 0.721730f, +0.123923f, 0.410533f, 0.053773f, 0.557777f, 0.242098f, 1.434077f, 0.681846f, 1.621529f, 0.174096f, 0.834137f, 0.291266f, 1.155137f, 0.332999f, 1.726130f, 0.886367f, 1.707072f, +0.055385f, 0.217437f, 0.025841f, 0.218209f, 0.164998f, 1.158266f, 0.499671f, 0.967358f, 0.094883f, 0.538751f, 0.170688f, 0.551076f, 0.180909f, 1.111318f, 0.517775f, 0.811790f, +0.222363f, 0.565252f, 0.062060f, 0.778679f, 0.437912f, 1.990448f, 0.793258f, 2.281956f, 0.367152f, 1.349825f, 0.395076f, 1.895299f, 0.516103f, 2.052812f, 0.883567f, 2.058410f, +0.447876f, 0.704792f, 0.127934f, 1.070866f, 0.625973f, 1.761342f, 1.160550f, 2.227191f, 0.107772f, 0.245280f, 0.118692f, 0.379857f, 1.027665f, 2.530394f, 1.800674f, 2.798519f, +0.549880f, 1.245840f, 0.223435f, 2.091925f, 0.674693f, 2.733286f, 1.779383f, 3.819522f, 0.387780f, 1.270669f, 0.607514f, 2.174705f, 1.186673f, 4.206869f, 2.957808f, 5.141730f, +0.337675f, 0.906655f, 0.147535f, 1.124483f, 0.631812f, 3.033302f, 1.791689f, 3.130880f, 0.290390f, 1.127660f, 0.489175f, 1.425519f, 0.885814f, 3.721505f, 2.374064f, 3.359664f, +0.905705f, 1.574574f, 0.236702f, 2.680718f, 1.120236f, 3.482338f, 1.900226f, 4.933997f, 0.750672f, 1.887469f, 0.756404f, 3.275306f, 1.688232f, 4.592426f, 2.706471f, 5.691106f, +0.506835f, 0.837917f, 0.134686f, 1.242180f, 0.799792f, 2.364266f, 1.379469f, 2.916887f, 0.115194f, 0.275434f, 0.118025f, 0.416183f, 0.798056f, 2.064435f, 1.300899f, 2.227670f, +0.663579f, 1.579495f, 0.250843f, 2.587686f, 0.919270f, 3.912494f, 2.255450f, 5.334418f, 0.442003f, 1.521612f, 0.644203f, 2.540863f, 0.982718f, 3.660058f, 2.278737f, 4.364633f, +0.122731f, 0.346200f, 0.049885f, 0.418935f, 0.259271f, 1.307714f, 0.683998f, 1.316962f, 0.099690f, 0.406704f, 0.156228f, 0.501629f, 0.220937f, 0.975161f, 0.550865f, 0.858940f, +0.875181f, 1.598474f, 0.212784f, 2.655236f, 1.222175f, 3.991407f, 1.928660f, 5.517769f, 0.685136f, 1.809830f, 0.642254f, 3.064216f, 1.119480f, 3.199321f, 1.669607f, 3.868314f, +0.581673f, 1.171640f, 0.143968f, 1.474790f, 0.890396f, 3.206886f, 1.430375f, 3.359383f, 0.176654f, 0.514627f, 0.168577f, 0.660257f, 1.141532f, 3.597799f, 1.733126f, 3.296395f, +1.070346f, 3.104064f, 0.376848f, 4.317941f, 1.438364f, 7.458652f, 3.286933f, 8.634684f, 0.952662f, 3.995753f, 1.293209f, 5.665375f, 1.975618f, 8.964852f, 4.266782f, 9.077278f, +0.436302f, 1.499484f, 0.165173f, 1.540689f, 0.894092f, 5.494428f, 2.196927f, 4.698242f, 0.473552f, 2.353834f, 0.691207f, 2.465090f, 0.978917f, 5.264224f, 2.273287f, 3.937076f, +1.287717f, 2.865558f, 0.291605f, 4.041656f, 1.744416f, 6.941028f, 2.563920f, 8.147307f, 1.347044f, 4.335345f, 1.176099f, 6.232438f, 2.052966f, 7.148322f, 2.851750f, 7.338728f, +0.222133f, 0.446670f, 0.058503f, 0.549386f, 0.526327f, 1.892402f, 0.899701f, 1.937067f, 0.092491f, 0.268983f, 0.093918f, 0.337209f, 0.562541f, 1.769948f, 0.908808f, 1.584594f, +0.379093f, 1.097513f, 0.142024f, 1.491801f, 0.788548f, 4.082037f, 1.917457f, 4.617619f, 0.462594f, 1.936943f, 0.668198f, 2.683503f, 0.902934f, 4.090283f, 2.075052f, 4.046886f, +0.141836f, 0.486630f, 0.057137f, 0.488570f, 0.449904f, 2.760050f, 1.176328f, 2.306137f, 0.211060f, 1.047302f, 0.327811f, 1.071727f, 0.410654f, 2.204564f, 1.014754f, 1.611080f, +0.616669f, 1.369929f, 0.148594f, 1.888007f, 1.293062f, 5.136299f, 2.022317f, 5.891090f, 0.884408f, 2.841528f, 0.821657f, 3.991548f, 1.268658f, 4.409854f, 1.875210f, 4.423806f, +0.852626f, 1.172543f, 0.210276f, 1.782344f, 1.268818f, 3.120000f, 2.031001f, 3.946911f, 0.178207f, 0.354444f, 0.169451f, 0.549155f, 1.734087f, 3.731424f, 2.623354f, 4.128609f, +1.130301f, 2.237977f, 0.396533f, 3.759487f, 1.476643f, 5.227833f, 3.362337f, 7.308607f, 0.692356f, 1.982641f, 0.936490f, 3.394697f, 2.162102f, 6.698401f, 4.652834f, 8.190502f, +0.581071f, 1.363451f, 0.219193f, 1.691763f, 1.157608f, 4.856871f, 2.834254f, 5.015293f, 0.434041f, 1.472970f, 0.631271f, 1.862850f, 1.351113f, 4.960611f, 3.126397f, 4.480241f, +1.687749f, 2.564198f, 0.380825f, 4.367457f, 2.222665f, 6.038130f, 3.255159f, 8.558932f, 1.215038f, 2.669848f, 1.057051f, 4.634978f, 2.788510f, 6.629022f, 3.859632f, 8.218508f, +}; + +static const float acceptor_me2x3acc2[16384] = { +0.028739f, 0.051428f, 0.000313f, 0.073324f, 0.034680f, 0.087951f, 0.021917f, 0.102035f, 0.000252f, 0.000765f, 0.000211f, 0.001303f, 0.086087f, 0.188764f, 0.066268f, 0.215400f, +0.034283f, 0.056280f, 0.000377f, 0.081658f, 0.058641f, 0.136431f, 0.037455f, 0.161069f, 0.023430f, 0.065190f, 0.019763f, 0.112954f, 0.091516f, 0.184088f, 0.071197f, 0.213767f, +0.000407f, 0.000736f, 0.000005f, 0.000961f, 0.000959f, 0.002458f, 0.000722f, 0.002610f, 0.000251f, 0.000770f, 0.000250f, 0.001200f, 0.001973f, 0.004369f, 0.001807f, 0.004564f, +0.056136f, 0.110463f, 0.000789f, 0.143996f, 0.140495f, 0.391797f, 0.114664f, 0.415578f, 0.037582f, 0.125335f, 0.040506f, 0.195116f, 0.152611f, 0.367964f, 0.151713f, 0.383898f, +0.035291f, 0.086949f, 0.000364f, 0.110031f, 0.040326f, 0.140805f, 0.024134f, 0.144985f, 0.000333f, 0.001390f, 0.000263f, 0.002100f, 0.096583f, 0.291575f, 0.070407f, 0.295308f, +0.059092f, 0.133562f, 0.000615f, 0.171997f, 0.095713f, 0.306585f, 0.057892f, 0.321253f, 0.043382f, 0.166183f, 0.034652f, 0.255569f, 0.144117f, 0.399132f, 0.106178f, 0.411368f, +0.026513f, 0.065985f, 0.000325f, 0.076430f, 0.059155f, 0.208640f, 0.042131f, 0.196639f, 0.017575f, 0.074131f, 0.016530f, 0.102541f, 0.117347f, 0.357850f, 0.101800f, 0.331735f, +0.084513f, 0.228965f, 0.001124f, 0.264913f, 0.200288f, 0.768999f, 0.154800f, 0.723963f, 0.060777f, 0.279067f, 0.062035f, 0.385591f, 0.209909f, 0.696825f, 0.197614f, 0.645258f, +0.000302f, 0.001475f, 0.000005f, 0.001613f, 0.000400f, 0.002769f, 0.000416f, 0.002464f, 0.000003f, 0.000028f, 0.000005f, 0.000037f, 0.001070f, 0.006402f, 0.001353f, 0.005604f, +0.000430f, 0.001928f, 0.000008f, 0.002146f, 0.000808f, 0.005130f, 0.000848f, 0.004646f, 0.000376f, 0.002852f, 0.000521f, 0.003791f, 0.001358f, 0.007457f, 0.001737f, 0.006643f, +0.000177f, 0.000874f, 0.000004f, 0.000875f, 0.000458f, 0.003203f, 0.000566f, 0.002609f, 0.000140f, 0.001167f, 0.000228f, 0.001395f, 0.001014f, 0.006133f, 0.001527f, 0.004914f, +0.000826f, 0.004438f, 0.000019f, 0.004438f, 0.002270f, 0.017278f, 0.003045f, 0.014059f, 0.000707f, 0.006432f, 0.001252f, 0.007681f, 0.002656f, 0.017481f, 0.004340f, 0.013991f, +0.091342f, 0.202450f, 0.001093f, 0.214612f, 0.085940f, 0.269942f, 0.059752f, 0.232843f, 0.000775f, 0.002911f, 0.000711f, 0.003685f, 0.256620f, 0.696928f, 0.217328f, 0.591287f, +0.156262f, 0.317725f, 0.001890f, 0.342749f, 0.208397f, 0.600508f, 0.146437f, 0.527108f, 0.103177f, 0.355557f, 0.095746f, 0.458055f, 0.391221f, 0.974695f, 0.334848f, 0.841529f, +0.064571f, 0.144564f, 0.000920f, 0.140269f, 0.118620f, 0.376366f, 0.098146f, 0.297145f, 0.038496f, 0.146072f, 0.042064f, 0.169259f, 0.293377f, 0.804820f, 0.295672f, 0.624993f, +0.163052f, 0.397389f, 0.002520f, 0.385156f, 0.318167f, 1.098934f, 0.285680f, 0.866659f, 0.105462f, 0.435623f, 0.125055f, 0.504214f, 0.415736f, 1.241521f, 0.454686f, 0.963052f, +0.026316f, 0.066875f, 0.000385f, 0.071194f, 0.027083f, 0.097539f, 0.023015f, 0.084491f, 0.000232f, 0.000998f, 0.000260f, 0.001268f, 0.068795f, 0.214218f, 0.071208f, 0.182519f, +0.047793f, 0.111420f, 0.000706f, 0.120706f, 0.069721f, 0.230352f, 0.059878f, 0.203055f, 0.032741f, 0.129366f, 0.037134f, 0.167367f, 0.111340f, 0.318055f, 0.116473f, 0.275767f, +0.000575f, 0.001475f, 0.000010f, 0.001438f, 0.001155f, 0.004202f, 0.001168f, 0.003331f, 0.000356f, 0.001547f, 0.000475f, 0.001800f, 0.002430f, 0.007643f, 0.002993f, 0.005961f, +0.074404f, 0.207915f, 0.001405f, 0.202370f, 0.158812f, 0.628929f, 0.174283f, 0.498102f, 0.049930f, 0.236471f, 0.072362f, 0.274867f, 0.176525f, 0.604427f, 0.235963f, 0.470846f, +0.054146f, 0.189449f, 0.000750f, 0.179006f, 0.052768f, 0.261648f, 0.042464f, 0.201163f, 0.000512f, 0.003036f, 0.000544f, 0.003425f, 0.129323f, 0.554431f, 0.126764f, 0.419274f, +0.138031f, 0.443047f, 0.001932f, 0.426005f, 0.190673f, 0.867340f, 0.155075f, 0.678594f, 0.101575f, 0.552570f, 0.109099f, 0.634505f, 0.293786f, 1.155453f, 0.291040f, 0.889184f, +0.062705f, 0.221615f, 0.001034f, 0.191664f, 0.119315f, 0.597617f, 0.114264f, 0.420552f, 0.041664f, 0.249567f, 0.052693f, 0.257758f, 0.242201f, 1.048876f, 0.282525f, 0.726005f, +0.187688f, 0.722103f, 0.003357f, 0.623820f, 0.379347f, 2.068363f, 0.394237f, 1.453927f, 0.135296f, 0.882213f, 0.185688f, 0.910157f, 0.406828f, 1.917883f, 0.514992f, 1.326040f, +0.016347f, 0.113386f, 0.000393f, 0.092596f, 0.018467f, 0.181521f, 0.025793f, 0.120620f, 0.000184f, 0.002160f, 0.000339f, 0.002107f, 0.050532f, 0.429464f, 0.085970f, 0.280696f, +0.035459f, 0.225629f, 0.000862f, 0.187508f, 0.056779f, 0.512010f, 0.080149f, 0.346225f, 0.031026f, 0.334591f, 0.057839f, 0.332064f, 0.097679f, 0.761570f, 0.167950f, 0.506534f, +0.014776f, 0.103528f, 0.000423f, 0.077385f, 0.032592f, 0.323613f, 0.054173f, 0.196826f, 0.011674f, 0.138621f, 0.025625f, 0.123741f, 0.073868f, 0.634155f, 0.149554f, 0.379377f, +0.064742f, 0.493788f, 0.002010f, 0.368689f, 0.151681f, 1.639505f, 0.273598f, 0.996066f, 0.055491f, 0.717295f, 0.132184f, 0.639588f, 0.181625f, 1.697372f, 0.399048f, 1.014311f, +0.080868f, 0.254532f, 0.001301f, 0.201467f, 0.064889f, 0.289445f, 0.060664f, 0.186416f, 0.000688f, 0.003668f, 0.000849f, 0.003467f, 0.198274f, 0.764682f, 0.225784f, 0.484414f, +0.210619f, 0.608155f, 0.003425f, 0.489853f, 0.239556f, 0.980286f, 0.226344f, 0.642480f, 0.139400f, 0.682192f, 0.173941f, 0.656207f, 0.460188f, 1.628172f, 0.529620f, 1.049605f, +0.088118f, 0.280163f, 0.001687f, 0.202973f, 0.138058f, 0.622059f, 0.153596f, 0.366703f, 0.052660f, 0.283761f, 0.077371f, 0.245506f, 0.349402f, 1.361186f, 0.473493f, 0.789258f, +0.208946f, 0.723173f, 0.004342f, 0.523345f, 0.347723f, 1.705565f, 0.419819f, 1.004315f, 0.135468f, 0.794642f, 0.215996f, 0.686752f, 0.464936f, 1.971734f, 0.683739f, 1.142007f, +0.000383f, 0.000697f, 0.000006f, 0.001236f, 0.000447f, 0.001154f, 0.000387f, 0.001666f, 0.000005f, 0.000014f, 0.000005f, 0.000030f, 0.001259f, 0.002809f, 0.001326f, 0.003989f, +0.000625f, 0.001044f, 0.000009f, 0.001886f, 0.001036f, 0.002452f, 0.000905f, 0.003603f, 0.000582f, 0.001647f, 0.000671f, 0.003551f, 0.001833f, 0.003753f, 0.001952f, 0.005424f, +0.000008f, 0.000015f, 0.000000f, 0.000025f, 0.000019f, 0.000049f, 0.000020f, 0.000065f, 0.000007f, 0.000022f, 0.000009f, 0.000042f, 0.000044f, 0.000100f, 0.000055f, 0.000129f, +0.000883f, 0.001769f, 0.000017f, 0.002870f, 0.002141f, 0.006077f, 0.002392f, 0.008022f, 0.000805f, 0.002732f, 0.001188f, 0.005293f, 0.002638f, 0.006474f, 0.003590f, 0.008406f, +0.000702f, 0.001760f, 0.000010f, 0.002771f, 0.000777f, 0.002760f, 0.000636f, 0.003537f, 0.000009f, 0.000038f, 0.000010f, 0.000072f, 0.002110f, 0.006482f, 0.002105f, 0.008171f, +0.001610f, 0.003703f, 0.000023f, 0.005935f, 0.002526f, 0.008233f, 0.002091f, 0.010736f, 0.001609f, 0.006271f, 0.001759f, 0.012004f, 0.004314f, 0.012158f, 0.004350f, 0.015595f, +0.000808f, 0.002046f, 0.000014f, 0.002949f, 0.001745f, 0.006265f, 0.001702f, 0.007348f, 0.000729f, 0.003128f, 0.000938f, 0.005385f, 0.003928f, 0.012188f, 0.004664f, 0.014062f, +0.001987f, 0.005479f, 0.000036f, 0.007889f, 0.004561f, 0.017820f, 0.004825f, 0.020880f, 0.001945f, 0.009089f, 0.002718f, 0.015629f, 0.005422f, 0.018317f, 0.006987f, 0.021110f, +0.000168f, 0.000834f, 0.000004f, 0.001135f, 0.000215f, 0.001516f, 0.000306f, 0.001679f, 0.000003f, 0.000022f, 0.000005f, 0.000035f, 0.000653f, 0.003977f, 0.001131f, 0.004332f, +0.000328f, 0.001494f, 0.000008f, 0.002069f, 0.000596f, 0.003849f, 0.000856f, 0.004338f, 0.000389f, 0.003008f, 0.000739f, 0.004975f, 0.001136f, 0.006346f, 0.001988f, 0.007036f, +0.000151f, 0.000757f, 0.000004f, 0.000943f, 0.000378f, 0.002687f, 0.000639f, 0.002724f, 0.000162f, 0.001376f, 0.000361f, 0.002047f, 0.000949f, 0.005836f, 0.001955f, 0.005820f, +0.000543f, 0.002967f, 0.000017f, 0.003693f, 0.001444f, 0.011187f, 0.002652f, 0.011329f, 0.000632f, 0.005852f, 0.001532f, 0.008698f, 0.001917f, 0.012839f, 0.004288f, 0.012789f, +0.002140f, 0.004828f, 0.000035f, 0.006369f, 0.001950f, 0.006234f, 0.001856f, 0.006693f, 0.000025f, 0.000094f, 0.000031f, 0.000149f, 0.006606f, 0.018258f, 0.007658f, 0.019279f, +0.005017f, 0.010381f, 0.000083f, 0.013937f, 0.006480f, 0.019001f, 0.006233f, 0.020758f, 0.004509f, 0.015811f, 0.005727f, 0.025351f, 0.013799f, 0.034985f, 0.016166f, 0.037592f, +0.002318f, 0.005281f, 0.000045f, 0.006378f, 0.004124f, 0.013316f, 0.004671f, 0.013084f, 0.001881f, 0.007263f, 0.002813f, 0.010475f, 0.011571f, 0.032301f, 0.015962f, 0.031219f, +0.004518f, 0.011204f, 0.000096f, 0.013515f, 0.008537f, 0.030008f, 0.010493f, 0.029453f, 0.003977f, 0.016717f, 0.006455f, 0.024082f, 0.012655f, 0.038456f, 0.018944f, 0.037126f, +0.051204f, 0.124938f, 0.000859f, 0.177101f, 0.062222f, 0.215159f, 0.060612f, 0.248166f, 0.000471f, 0.001948f, 0.000606f, 0.003298f, 0.120455f, 0.360131f, 0.142925f, 0.408566f, +0.064977f, 0.145444f, 0.001101f, 0.209803f, 0.111920f, 0.355038f, 0.110186f, 0.416723f, 0.046539f, 0.176557f, 0.060508f, 0.304147f, 0.136214f, 0.373602f, 0.163346f, 0.431320f, +0.000826f, 0.002036f, 0.000016f, 0.002641f, 0.001960f, 0.006846f, 0.002272f, 0.007227f, 0.000534f, 0.002231f, 0.000818f, 0.003458f, 0.003143f, 0.009491f, 0.004437f, 0.009855f, +0.097248f, 0.260922f, 0.002106f, 0.338160f, 0.245088f, 0.931919f, 0.308324f, 0.982755f, 0.068231f, 0.310267f, 0.113356f, 0.480209f, 0.207619f, 0.682565f, 0.318142f, 0.707995f, +0.123843f, 0.416040f, 0.001966f, 0.523432f, 0.142502f, 0.678440f, 0.131459f, 0.694533f, 0.001224f, 0.006970f, 0.001491f, 0.010472f, 0.266168f, 1.095634f, 0.299083f, 1.103229f, +0.220589f, 0.679821f, 0.003540f, 0.870383f, 0.359788f, 1.571398f, 0.335440f, 1.637032f, 0.169718f, 0.886471f, 0.208965f, 1.355387f, 0.422487f, 1.595411f, 0.479789f, 1.634793f, +0.105928f, 0.359457f, 0.002002f, 0.413942f, 0.237989f, 1.144517f, 0.261266f, 1.072431f, 0.073587f, 0.423221f, 0.106686f, 0.582025f, 0.368181f, 1.530900f, 0.492330f, 1.410954f, +0.288361f, 1.065216f, 0.005913f, 1.225316f, 0.688157f, 3.602606f, 0.819830f, 3.371965f, 0.217329f, 1.360643f, 0.341926f, 1.869123f, 0.562453f, 2.545868f, 0.816190f, 2.343803f, +0.029090f, 0.193726f, 0.000802f, 0.210656f, 0.038800f, 0.366191f, 0.062123f, 0.324003f, 0.000342f, 0.003859f, 0.000723f, 0.005011f, 0.080916f, 0.660285f, 0.157807f, 0.574634f, +0.044089f, 0.269356f, 0.001228f, 0.298059f, 0.083355f, 0.721709f, 0.134884f, 0.649819f, 0.040332f, 0.417618f, 0.086190f, 0.551871f, 0.109287f, 0.818122f, 0.215410f, 0.724548f, +0.019421f, 0.130645f, 0.000637f, 0.130030f, 0.050577f, 0.482183f, 0.096370f, 0.390498f, 0.016041f, 0.182893f, 0.040365f, 0.217385f, 0.087364f, 0.720122f, 0.202761f, 0.573629f, +0.077388f, 0.566718f, 0.002754f, 0.563426f, 0.214078f, 2.221724f, 0.442657f, 1.797280f, 0.069349f, 0.860709f, 0.189371f, 1.021902f, 0.195362f, 1.752986f, 0.492044f, 1.394835f, +0.171461f, 0.518168f, 0.003163f, 0.546113f, 0.162447f, 0.695738f, 0.174095f, 0.596641f, 0.001524f, 0.007807f, 0.002157f, 0.009826f, 0.378295f, 1.400827f, 0.493826f, 1.181600f, +0.312025f, 0.865059f, 0.005817f, 0.927784f, 0.419036f, 1.646399f, 0.453866f, 1.436787f, 0.215918f, 1.014543f, 0.308847f, 1.299435f, 0.613483f, 2.084043f, 0.809371f, 1.788885f, +0.137994f, 0.421254f, 0.003029f, 0.406369f, 0.255274f, 1.104375f, 0.325567f, 0.866861f, 0.086220f, 0.446086f, 0.145219f, 0.513900f, 0.492375f, 1.841731f, 0.764890f, 1.421929f, +0.297591f, 0.988932f, 0.007089f, 0.952935f, 0.584750f, 2.753875f, 0.809309f, 2.159218f, 0.201723f, 1.136130f, 0.368705f, 1.307397f, 0.595873f, 2.426320f, 1.004539f, 1.871195f, +0.047557f, 0.085104f, 0.000517f, 0.121339f, 0.078073f, 0.197998f, 0.049340f, 0.229704f, 0.000441f, 0.001336f, 0.000368f, 0.002275f, 0.126792f, 0.278017f, 0.097602f, 0.317246f, +0.046567f, 0.076447f, 0.000512f, 0.110918f, 0.108363f, 0.252109f, 0.069212f, 0.297636f, 0.033582f, 0.093436f, 0.028326f, 0.161897f, 0.110637f, 0.222552f, 0.086073f, 0.258432f, +0.000479f, 0.000866f, 0.000006f, 0.001130f, 0.001536f, 0.003934f, 0.001155f, 0.004178f, 0.000312f, 0.000956f, 0.000310f, 0.001490f, 0.002066f, 0.004576f, 0.001893f, 0.004779f, +0.078339f, 0.154152f, 0.001100f, 0.200948f, 0.266726f, 0.743814f, 0.217687f, 0.788962f, 0.055340f, 0.184561f, 0.059647f, 0.287315f, 0.189548f, 0.457024f, 0.188432f, 0.476814f, +0.067036f, 0.165163f, 0.000691f, 0.209008f, 0.104209f, 0.363863f, 0.062367f, 0.374666f, 0.000667f, 0.002786f, 0.000527f, 0.004210f, 0.163286f, 0.492947f, 0.119032f, 0.499258f, +0.092136f, 0.208250f, 0.000959f, 0.268179f, 0.203022f, 0.650316f, 0.122798f, 0.681428f, 0.071374f, 0.273414f, 0.057012f, 0.420478f, 0.199994f, 0.553885f, 0.147345f, 0.570865f, +0.035811f, 0.089124f, 0.000439f, 0.103231f, 0.108695f, 0.383369f, 0.077414f, 0.361318f, 0.025048f, 0.105652f, 0.023559f, 0.146143f, 0.141066f, 0.430180f, 0.122377f, 0.398787f, +0.135380f, 0.366775f, 0.001801f, 0.424361f, 0.436472f, 1.675822f, 0.337344f, 1.577679f, 0.102731f, 0.471707f, 0.104857f, 0.651764f, 0.299270f, 0.993471f, 0.281741f, 0.919951f, +0.000415f, 0.002027f, 0.000007f, 0.002217f, 0.000748f, 0.005176f, 0.000777f, 0.004606f, 0.000005f, 0.000041f, 0.000007f, 0.000053f, 0.001308f, 0.007829f, 0.001655f, 0.006853f, +0.000485f, 0.002174f, 0.000009f, 0.002420f, 0.001240f, 0.007871f, 0.001301f, 0.007128f, 0.000447f, 0.003394f, 0.000620f, 0.004512f, 0.001363f, 0.007485f, 0.001743f, 0.006668f, +0.000173f, 0.000854f, 0.000004f, 0.000855f, 0.000609f, 0.004256f, 0.000753f, 0.003467f, 0.000144f, 0.001203f, 0.000235f, 0.001438f, 0.000882f, 0.005333f, 0.001328f, 0.004273f, +0.000957f, 0.005142f, 0.000022f, 0.005142f, 0.003578f, 0.027236f, 0.004800f, 0.022161f, 0.000864f, 0.007864f, 0.001530f, 0.009391f, 0.002739f, 0.018027f, 0.004476f, 0.014428f, +0.168930f, 0.374418f, 0.002022f, 0.396911f, 0.216224f, 0.679172f, 0.150335f, 0.585830f, 0.001512f, 0.005680f, 0.001388f, 0.007191f, 0.422406f, 1.147167f, 0.357730f, 0.973279f, +0.237216f, 0.482329f, 0.002869f, 0.520318f, 0.430383f, 1.240170f, 0.302421f, 1.088586f, 0.165276f, 0.569552f, 0.153371f, 0.733740f, 0.528584f, 1.316924f, 0.452419f, 1.137001f, +0.084913f, 0.190107f, 0.001209f, 0.184459f, 0.212210f, 0.673317f, 0.175583f, 0.531590f, 0.053418f, 0.202693f, 0.058369f, 0.234868f, 0.343371f, 0.941970f, 0.346058f, 0.731498f, +0.254301f, 0.619779f, 0.003930f, 0.600700f, 0.675066f, 2.331647f, 0.606138f, 1.838821f, 0.173560f, 0.716908f, 0.205804f, 0.829789f, 0.577083f, 1.723355f, 0.631149f, 1.336812f, +0.060412f, 0.153524f, 0.000884f, 0.163438f, 0.084583f, 0.304622f, 0.071876f, 0.263872f, 0.000561f, 0.002416f, 0.000630f, 0.003072f, 0.140562f, 0.437691f, 0.145492f, 0.372922f, +0.090059f, 0.209956f, 0.001331f, 0.227455f, 0.178731f, 0.590510f, 0.153498f, 0.520534f, 0.065101f, 0.257227f, 0.073837f, 0.332786f, 0.186732f, 0.533416f, 0.195339f, 0.462495f, +0.000938f, 0.002408f, 0.000016f, 0.002347f, 0.002565f, 0.009331f, 0.002594f, 0.007398f, 0.000612f, 0.002664f, 0.000818f, 0.003100f, 0.003530f, 0.011104f, 0.004349f, 0.008660f, +0.144041f, 0.402512f, 0.002721f, 0.391778f, 0.418260f, 1.656399f, 0.459005f, 1.311843f, 0.101997f, 0.483062f, 0.147821f, 0.561496f, 0.304157f, 1.041446f, 0.406572f, 0.811283f, +0.142685f, 0.499232f, 0.001976f, 0.471713f, 0.189168f, 0.937989f, 0.152229f, 0.721155f, 0.001423f, 0.008441f, 0.001513f, 0.009524f, 0.303310f, 1.300342f, 0.297308f, 0.983348f, +0.298566f, 0.958325f, 0.004180f, 0.921463f, 0.561077f, 2.552250f, 0.456327f, 1.996843f, 0.231837f, 1.261195f, 0.249009f, 1.448206f, 0.565581f, 2.224413f, 0.560295f, 1.711808f, +0.117492f, 0.415250f, 0.001937f, 0.359129f, 0.304142f, 1.523361f, 0.291264f, 1.072012f, 0.082376f, 0.493434f, 0.104182f, 0.509627f, 0.403911f, 1.749175f, 0.471157f, 1.210734f, +0.417088f, 1.604688f, 0.007461f, 1.386280f, 1.146828f, 6.253006f, 1.191843f, 4.395464f, 0.317254f, 2.068696f, 0.435419f, 2.134223f, 0.804642f, 3.793268f, 1.018574f, 2.622698f, +0.031159f, 0.216123f, 0.000749f, 0.176496f, 0.047885f, 0.470696f, 0.066882f, 0.312774f, 0.000370f, 0.004345f, 0.000682f, 0.004237f, 0.085725f, 0.728566f, 0.145844f, 0.476188f, +0.055479f, 0.353013f, 0.001348f, 0.293370f, 0.120852f, 1.089795f, 0.170595f, 0.736928f, 0.051222f, 0.552386f, 0.095487f, 0.548214f, 0.136018f, 1.060489f, 0.233871f, 0.705351f, +0.020027f, 0.140314f, 0.000573f, 0.104882f, 0.060093f, 0.596676f, 0.099883f, 0.362906f, 0.016695f, 0.198245f, 0.036647f, 0.176965f, 0.089105f, 0.764959f, 0.180402f, 0.457628f, +0.104067f, 0.793717f, 0.003231f, 0.592632f, 0.331686f, 3.585154f, 0.598285f, 2.178127f, 0.094119f, 1.216619f, 0.224200f, 1.084818f, 0.259837f, 2.428296f, 0.570887f, 1.451095f, +0.207479f, 0.653042f, 0.003339f, 0.516895f, 0.226485f, 1.010265f, 0.211738f, 0.650657f, 0.001862f, 0.009931f, 0.002298f, 0.009387f, 0.452756f, 1.746143f, 0.515576f, 1.106154f, +0.443557f, 1.280758f, 0.007214f, 1.031616f, 0.686326f, 2.808508f, 0.648472f, 1.840697f, 0.309776f, 1.515972f, 0.386534f, 1.458227f, 0.862556f, 3.051776f, 0.992697f, 1.967334f, +0.160755f, 0.511104f, 0.003078f, 0.370285f, 0.342633f, 1.543835f, 0.381196f, 0.910088f, 0.101371f, 0.546239f, 0.148940f, 0.472599f, 0.567315f, 2.210121f, 0.768797f, 1.281496f, +0.452080f, 1.564671f, 0.009395f, 1.132320f, 1.023492f, 5.020184f, 1.235700f, 2.956115f, 0.309279f, 1.814195f, 0.493126f, 1.567880f, 0.895310f, 3.796899f, 1.316652f, 2.199124f, +0.023864f, 0.043458f, 0.000355f, 0.077115f, 0.037942f, 0.097919f, 0.032822f, 0.141382f, 0.000301f, 0.000929f, 0.000344f, 0.001968f, 0.069898f, 0.155966f, 0.073649f, 0.221500f, +0.032016f, 0.053485f, 0.000482f, 0.096581f, 0.072153f, 0.170824f, 0.063080f, 0.250995f, 0.031425f, 0.088975f, 0.036282f, 0.191872f, 0.083565f, 0.171058f, 0.088988f, 0.247216f, +0.000368f, 0.000678f, 0.000007f, 0.001101f, 0.001143f, 0.002981f, 0.001177f, 0.003940f, 0.000326f, 0.001018f, 0.000444f, 0.001974f, 0.001745f, 0.003933f, 0.002188f, 0.005112f, +0.046480f, 0.093073f, 0.000894f, 0.151000f, 0.153264f, 0.434937f, 0.171217f, 0.574164f, 0.044690f, 0.151669f, 0.065932f, 0.293855f, 0.123551f, 0.303147f, 0.168120f, 0.393624f, +0.050260f, 0.126014f, 0.000709f, 0.198466f, 0.075668f, 0.268863f, 0.061987f, 0.344552f, 0.000681f, 0.002893f, 0.000737f, 0.005441f, 0.134494f, 0.413185f, 0.134203f, 0.520820f, +0.094646f, 0.217692f, 0.001349f, 0.348900f, 0.201977f, 0.658371f, 0.167221f, 0.858588f, 0.099791f, 0.389010f, 0.109109f, 0.744564f, 0.225698f, 0.636089f, 0.227607f, 0.815925f, +0.041133f, 0.104174f, 0.000690f, 0.150173f, 0.120913f, 0.433980f, 0.117875f, 0.509049f, 0.039159f, 0.168084f, 0.050415f, 0.289363f, 0.178007f, 0.552401f, 0.211375f, 0.637328f, +0.120013f, 0.330872f, 0.002185f, 0.476445f, 0.374728f, 1.464117f, 0.396436f, 1.715475f, 0.123953f, 0.579181f, 0.173178f, 0.995979f, 0.291457f, 0.984588f, 0.375578f, 1.134704f, +0.008693f, 0.043205f, 0.000213f, 0.058811f, 0.015170f, 0.106854f, 0.021569f, 0.118351f, 0.000140f, 0.001179f, 0.000263f, 0.001917f, 0.030105f, 0.183347f, 0.052139f, 0.199745f, +0.013929f, 0.063509f, 0.000345f, 0.087974f, 0.034455f, 0.222643f, 0.049511f, 0.250947f, 0.017461f, 0.134939f, 0.033137f, 0.223223f, 0.042988f, 0.240174f, 0.075243f, 0.266267f, +0.005553f, 0.027878f, 0.000162f, 0.034734f, 0.018921f, 0.134624f, 0.032014f, 0.136481f, 0.006285f, 0.053483f, 0.014045f, 0.079578f, 0.031101f, 0.191327f, 0.064098f, 0.190785f, +0.023715f, 0.129614f, 0.000750f, 0.161311f, 0.085835f, 0.664832f, 0.157608f, 0.673256f, 0.029123f, 0.269767f, 0.070622f, 0.400945f, 0.074540f, 0.499183f, 0.166715f, 0.497218f, +0.149244f, 0.336616f, 0.002445f, 0.444108f, 0.185004f, 0.591352f, 0.176067f, 0.634828f, 0.001818f, 0.006950f, 0.002285f, 0.010951f, 0.409977f, 1.133039f, 0.475253f, 1.196392f, +0.287136f, 0.594121f, 0.004754f, 0.797661f, 0.504530f, 1.479453f, 0.485271f, 1.616221f, 0.272292f, 0.954877f, 0.345868f, 1.530997f, 0.702907f, 1.782102f, 0.823501f, 1.914921f, +0.114927f, 0.261840f, 0.002240f, 0.316196f, 0.278166f, 0.898143f, 0.315036f, 0.882513f, 0.098405f, 0.379978f, 0.147182f, 0.547976f, 0.510568f, 1.425329f, 0.704333f, 1.377557f, +0.265639f, 0.658824f, 0.005620f, 0.794710f, 0.682935f, 2.400403f, 0.839353f, 2.356019f, 0.246761f, 1.037240f, 0.400516f, 1.494174f, 0.662252f, 2.012556f, 0.991419f, 1.942950f, +0.095690f, 0.233483f, 0.001604f, 0.330964f, 0.158188f, 0.547003f, 0.154095f, 0.630917f, 0.000929f, 0.003842f, 0.001195f, 0.006504f, 0.200347f, 0.598991f, 0.237722f, 0.679550f, +0.099671f, 0.223105f, 0.001689f, 0.321829f, 0.233557f, 0.740899f, 0.229938f, 0.869623f, 0.075329f, 0.285778f, 0.097940f, 0.492298f, 0.185967f, 0.510061f, 0.223009f, 0.588861f, +0.001098f, 0.002705f, 0.000022f, 0.003510f, 0.003543f, 0.012375f, 0.004107f, 0.013065f, 0.000749f, 0.003129f, 0.001147f, 0.004848f, 0.003717f, 0.011224f, 0.005248f, 0.011655f, +0.153258f, 0.411199f, 0.003319f, 0.532922f, 0.525452f, 1.997976f, 0.661027f, 2.106966f, 0.113463f, 0.515952f, 0.188503f, 0.798553f, 0.291212f, 0.957383f, 0.446234f, 0.993051f, +0.265663f, 0.892469f, 0.004218f, 1.122843f, 0.415864f, 1.979887f, 0.383635f, 2.026851f, 0.002771f, 0.015776f, 0.003376f, 0.023703f, 0.508176f, 2.091818f, 0.571018f, 2.106319f, +0.388414f, 1.197035f, 0.006233f, 1.532577f, 0.861846f, 3.764163f, 0.803522f, 3.921386f, 0.315334f, 1.647056f, 0.388256f, 2.518297f, 0.662103f, 2.500256f, 0.751904f, 2.561973f, +0.161573f, 0.548284f, 0.003053f, 0.631389f, 0.493839f, 2.374929f, 0.542140f, 2.225347f, 0.118439f, 0.681173f, 0.171711f, 0.936767f, 0.499826f, 2.078283f, 0.668366f, 1.915449f, +0.521647f, 1.926983f, 0.010697f, 2.216606f, 1.693552f, 8.865998f, 2.017598f, 8.298391f, 0.414848f, 2.597265f, 0.652685f, 3.567877f, 0.905580f, 4.098983f, 1.314110f, 3.773647f, +0.045137f, 0.300593f, 0.001244f, 0.326862f, 0.081901f, 0.772984f, 0.131135f, 0.683929f, 0.000560f, 0.006318f, 0.001184f, 0.008204f, 0.111744f, 0.911849f, 0.217931f, 0.793565f, +0.056153f, 0.343062f, 0.001564f, 0.379618f, 0.144427f, 1.250482f, 0.233710f, 1.125921f, 0.054204f, 0.561250f, 0.115834f, 0.741676f, 0.123884f, 0.927391f, 0.244180f, 0.821320f, +0.021427f, 0.144140f, 0.000703f, 0.143461f, 0.075913f, 0.723725f, 0.144645f, 0.586111f, 0.018675f, 0.212921f, 0.046993f, 0.253077f, 0.085787f, 0.707126f, 0.199102f, 0.563277f, +0.101263f, 0.741550f, 0.003604f, 0.737243f, 0.381079f, 3.954886f, 0.787972f, 3.199335f, 0.095751f, 1.188397f, 0.261468f, 1.410960f, 0.227516f, 2.041511f, 0.573030f, 1.624412f, +0.358106f, 1.082227f, 0.006606f, 1.140591f, 0.461562f, 1.976805f, 0.494658f, 1.695240f, 0.003359f, 0.017206f, 0.004754f, 0.021656f, 0.703198f, 2.603947f, 0.917955f, 2.196435f, +0.534922f, 1.483019f, 0.009973f, 1.590553f, 0.977289f, 3.839784f, 1.058520f, 3.350919f, 0.390590f, 1.835284f, 0.558697f, 2.350648f, 0.936060f, 3.179858f, 1.234948f, 2.729503f, +0.204931f, 0.625592f, 0.004499f, 0.603488f, 0.515732f, 2.231179f, 0.657746f, 1.751328f, 0.135110f, 0.699033f, 0.227563f, 0.805300f, 0.650793f, 2.434296f, 1.010988f, 1.879426f, +0.524143f, 1.741791f, 0.012486f, 1.678389f, 1.401105f, 6.598486f, 1.939163f, 5.173643f, 0.374902f, 2.111492f, 0.685236f, 2.429791f, 0.934079f, 3.803449f, 1.574694f, 2.933247f, +0.000875f, 0.001566f, 0.000010f, 0.002233f, 0.001468f, 0.003722f, 0.000927f, 0.004318f, 0.000010f, 0.000029f, 0.000008f, 0.000050f, 0.002960f, 0.006490f, 0.002279f, 0.007406f, +0.001133f, 0.001860f, 0.000012f, 0.002699f, 0.002692f, 0.006264f, 0.001720f, 0.007395f, 0.000980f, 0.002725f, 0.000826f, 0.004722f, 0.003414f, 0.006867f, 0.002656f, 0.007974f, +0.000013f, 0.000023f, 0.000000f, 0.000030f, 0.000042f, 0.000108f, 0.000032f, 0.000115f, 0.000010f, 0.000031f, 0.000010f, 0.000048f, 0.000070f, 0.000156f, 0.000065f, 0.000163f, +0.002086f, 0.004105f, 0.000029f, 0.005351f, 0.007254f, 0.020229f, 0.005920f, 0.021457f, 0.001767f, 0.005893f, 0.001904f, 0.009173f, 0.006402f, 0.015436f, 0.006364f, 0.016104f, +0.001527f, 0.003763f, 0.000016f, 0.004762f, 0.002425f, 0.008466f, 0.001451f, 0.008717f, 0.000018f, 0.000076f, 0.000014f, 0.000115f, 0.004718f, 0.014243f, 0.003439f, 0.014426f, +0.002774f, 0.006271f, 0.000029f, 0.008075f, 0.006243f, 0.019999f, 0.003776f, 0.020955f, 0.002577f, 0.009871f, 0.002058f, 0.015180f, 0.007638f, 0.021153f, 0.005627f, 0.021802f, +0.001192f, 0.002966f, 0.000015f, 0.003436f, 0.003694f, 0.013030f, 0.002631f, 0.012280f, 0.000999f, 0.004216f, 0.000940f, 0.005831f, 0.005954f, 0.018158f, 0.005165f, 0.016833f, +0.004462f, 0.012089f, 0.000059f, 0.013987f, 0.014692f, 0.056409f, 0.011355f, 0.053105f, 0.004060f, 0.018640f, 0.004144f, 0.025755f, 0.012510f, 0.041530f, 0.011777f, 0.038456f, +0.000014f, 0.000069f, 0.000000f, 0.000075f, 0.000026f, 0.000179f, 0.000027f, 0.000160f, 0.000000f, 0.000002f, 0.000000f, 0.000002f, 0.000056f, 0.000337f, 0.000071f, 0.000295f, +0.000022f, 0.000097f, 0.000000f, 0.000109f, 0.000057f, 0.000360f, 0.000060f, 0.000326f, 0.000024f, 0.000182f, 0.000033f, 0.000243f, 0.000078f, 0.000426f, 0.000099f, 0.000379f, +0.000009f, 0.000042f, 0.000000f, 0.000042f, 0.000031f, 0.000215f, 0.000038f, 0.000175f, 0.000009f, 0.000071f, 0.000014f, 0.000085f, 0.000055f, 0.000335f, 0.000083f, 0.000269f, +0.000047f, 0.000252f, 0.000001f, 0.000252f, 0.000179f, 0.001365f, 0.000241f, 0.001111f, 0.000051f, 0.000463f, 0.000090f, 0.000553f, 0.000171f, 0.001122f, 0.000279f, 0.000898f, +0.003783f, 0.008386f, 0.000045f, 0.008889f, 0.004946f, 0.015534f, 0.003439f, 0.013399f, 0.000041f, 0.000153f, 0.000037f, 0.000193f, 0.011998f, 0.032585f, 0.010161f, 0.027646f, +0.007022f, 0.014278f, 0.000085f, 0.015403f, 0.013011f, 0.037493f, 0.009143f, 0.032910f, 0.005866f, 0.020214f, 0.005443f, 0.026041f, 0.019845f, 0.049443f, 0.016986f, 0.042688f, +0.002778f, 0.006220f, 0.000040f, 0.006035f, 0.007091f, 0.022497f, 0.005867f, 0.017762f, 0.002095f, 0.007951f, 0.002290f, 0.009213f, 0.014248f, 0.039087f, 0.014360f, 0.030354f, +0.008240f, 0.020082f, 0.000127f, 0.019464f, 0.022338f, 0.077156f, 0.020057f, 0.060848f, 0.006742f, 0.027850f, 0.007995f, 0.032235f, 0.023715f, 0.070821f, 0.025937f, 0.054936f, +0.001094f, 0.002780f, 0.000016f, 0.002959f, 0.001564f, 0.005632f, 0.001329f, 0.004879f, 0.000012f, 0.000052f, 0.000014f, 0.000067f, 0.003228f, 0.010050f, 0.003341f, 0.008563f, +0.002155f, 0.005024f, 0.000032f, 0.005443f, 0.004368f, 0.014432f, 0.003751f, 0.012721f, 0.001868f, 0.007380f, 0.002118f, 0.009548f, 0.005667f, 0.016190f, 0.005929f, 0.014037f, +0.000025f, 0.000064f, 0.000000f, 0.000062f, 0.000069f, 0.000252f, 0.000070f, 0.000200f, 0.000019f, 0.000084f, 0.000026f, 0.000098f, 0.000118f, 0.000372f, 0.000146f, 0.000290f, +0.003773f, 0.010543f, 0.000071f, 0.010262f, 0.011189f, 0.044309f, 0.012278f, 0.035092f, 0.003203f, 0.015170f, 0.004642f, 0.017633f, 0.010104f, 0.034598f, 0.013507f, 0.026952f, +0.003197f, 0.011187f, 0.000044f, 0.010570f, 0.004329f, 0.021466f, 0.003484f, 0.016504f, 0.000038f, 0.000227f, 0.000041f, 0.000256f, 0.008620f, 0.036956f, 0.008450f, 0.027947f, +0.008843f, 0.028384f, 0.000124f, 0.027293f, 0.016972f, 0.077201f, 0.013803f, 0.060401f, 0.008233f, 0.044786f, 0.008842f, 0.051427f, 0.021246f, 0.083560f, 0.021048f, 0.064304f, +0.003846f, 0.013593f, 0.000063f, 0.011756f, 0.010168f, 0.050928f, 0.009737f, 0.035838f, 0.003233f, 0.019366f, 0.004089f, 0.020001f, 0.016769f, 0.072622f, 0.019561f, 0.050267f, +0.013522f, 0.052024f, 0.000242f, 0.044943f, 0.037970f, 0.207029f, 0.039460f, 0.145528f, 0.012331f, 0.080408f, 0.016924f, 0.082955f, 0.033085f, 0.155969f, 0.041881f, 0.107839f, +0.001040f, 0.007211f, 0.000025f, 0.005889f, 0.001632f, 0.016039f, 0.002279f, 0.010658f, 0.000015f, 0.000174f, 0.000027f, 0.000170f, 0.003628f, 0.030831f, 0.006172f, 0.020151f, +0.002447f, 0.015568f, 0.000059f, 0.012938f, 0.005443f, 0.049083f, 0.007683f, 0.033190f, 0.002708f, 0.029207f, 0.005049f, 0.028986f, 0.007608f, 0.059317f, 0.013081f, 0.039453f, +0.000976f, 0.006839f, 0.000028f, 0.005112f, 0.002991f, 0.029701f, 0.004972f, 0.018065f, 0.000976f, 0.011585f, 0.002142f, 0.010341f, 0.005508f, 0.047289f, 0.011152f, 0.028290f, +0.005024f, 0.038314f, 0.000156f, 0.028608f, 0.016351f, 0.176740f, 0.029494f, 0.107377f, 0.005447f, 0.070411f, 0.012975f, 0.062783f, 0.015908f, 0.148667f, 0.034951f, 0.088840f, +0.004571f, 0.014386f, 0.000074f, 0.011387f, 0.005095f, 0.022729f, 0.004764f, 0.014638f, 0.000049f, 0.000262f, 0.000061f, 0.000248f, 0.012650f, 0.048786f, 0.014405f, 0.030905f, +0.012915f, 0.037293f, 0.000210f, 0.030038f, 0.020409f, 0.083515f, 0.019283f, 0.054736f, 0.010814f, 0.052922f, 0.013494f, 0.050906f, 0.031854f, 0.112700f, 0.036660f, 0.072652f, +0.005173f, 0.016448f, 0.000099f, 0.011916f, 0.011261f, 0.050738f, 0.012528f, 0.029910f, 0.003911f, 0.021076f, 0.005747f, 0.018234f, 0.023155f, 0.090206f, 0.031378f, 0.052304f, +0.014408f, 0.049868f, 0.000299f, 0.036088f, 0.033313f, 0.163399f, 0.040220f, 0.096217f, 0.011818f, 0.069322f, 0.018843f, 0.059910f, 0.036190f, 0.153476f, 0.053221f, 0.088892f, +0.000322f, 0.000586f, 0.000005f, 0.001039f, 0.000522f, 0.001347f, 0.000452f, 0.001945f, 0.000005f, 0.000015f, 0.000006f, 0.000032f, 0.001194f, 0.002665f, 0.001259f, 0.003785f, +0.000570f, 0.000953f, 0.000009f, 0.001720f, 0.001312f, 0.003107f, 0.001147f, 0.004565f, 0.000671f, 0.001900f, 0.000775f, 0.004097f, 0.001887f, 0.003864f, 0.002010f, 0.005584f, +0.000007f, 0.000013f, 0.000000f, 0.000022f, 0.000023f, 0.000060f, 0.000024f, 0.000079f, 0.000008f, 0.000024f, 0.000010f, 0.000047f, 0.000044f, 0.000098f, 0.000055f, 0.000128f, +0.000906f, 0.001814f, 0.000017f, 0.002943f, 0.003051f, 0.008658f, 0.003408f, 0.011430f, 0.001044f, 0.003545f, 0.001541f, 0.006868f, 0.003054f, 0.007495f, 0.004156f, 0.009731f, +0.000838f, 0.002101f, 0.000012f, 0.003310f, 0.001289f, 0.004579f, 0.001056f, 0.005868f, 0.000014f, 0.000058f, 0.000015f, 0.000109f, 0.002845f, 0.008739f, 0.002838f, 0.011015f, +0.002086f, 0.004798f, 0.000030f, 0.007690f, 0.004547f, 0.014820f, 0.003764f, 0.019327f, 0.002637f, 0.010280f, 0.002883f, 0.019676f, 0.006310f, 0.017782f, 0.006363f, 0.022810f, +0.001002f, 0.002538f, 0.000017f, 0.003658f, 0.003008f, 0.010797f, 0.002933f, 0.012665f, 0.001144f, 0.004909f, 0.001472f, 0.008452f, 0.005500f, 0.017068f, 0.006531f, 0.019692f, +0.002895f, 0.007983f, 0.000053f, 0.011495f, 0.009233f, 0.036075f, 0.009768f, 0.042268f, 0.003585f, 0.016753f, 0.005009f, 0.028809f, 0.008918f, 0.030128f, 0.011492f, 0.034721f, +0.000216f, 0.001073f, 0.000005f, 0.001460f, 0.000385f, 0.002710f, 0.000547f, 0.003001f, 0.000004f, 0.000035f, 0.000008f, 0.000057f, 0.000948f, 0.005774f, 0.001642f, 0.006290f, +0.000457f, 0.002084f, 0.000011f, 0.002887f, 0.001155f, 0.007462f, 0.001659f, 0.008411f, 0.000687f, 0.005310f, 0.001304f, 0.008784f, 0.001789f, 0.009997f, 0.003132f, 0.011083f, +0.000201f, 0.001011f, 0.000006f, 0.001260f, 0.000701f, 0.004987f, 0.001186f, 0.005056f, 0.000273f, 0.002326f, 0.000611f, 0.003461f, 0.001431f, 0.008802f, 0.002949f, 0.008777f, +0.000852f, 0.004656f, 0.000027f, 0.005795f, 0.003149f, 0.024391f, 0.005782f, 0.024700f, 0.001254f, 0.011619f, 0.003042f, 0.017269f, 0.003396f, 0.022743f, 0.007596f, 0.022654f, +0.002447f, 0.005519f, 0.000040f, 0.007281f, 0.003097f, 0.009901f, 0.002948f, 0.010629f, 0.000036f, 0.000137f, 0.000045f, 0.000215f, 0.008524f, 0.023559f, 0.009882f, 0.024876f, +0.006222f, 0.012874f, 0.000103f, 0.017285f, 0.011165f, 0.032739f, 0.010739f, 0.035766f, 0.007074f, 0.024807f, 0.008985f, 0.039775f, 0.019318f, 0.048976f, 0.022632f, 0.052627f, +0.002752f, 0.006271f, 0.000054f, 0.007573f, 0.006803f, 0.021967f, 0.007705f, 0.021584f, 0.002826f, 0.010910f, 0.004226f, 0.015734f, 0.015508f, 0.043293f, 0.021394f, 0.041842f, +0.006300f, 0.015626f, 0.000133f, 0.018849f, 0.016542f, 0.058143f, 0.020331f, 0.057068f, 0.007017f, 0.029495f, 0.011389f, 0.042489f, 0.019921f, 0.060540f, 0.029823f, 0.058446f, +0.002303f, 0.005620f, 0.000039f, 0.007966f, 0.003888f, 0.013445f, 0.003788f, 0.015508f, 0.000027f, 0.000111f, 0.000034f, 0.000188f, 0.006116f, 0.018284f, 0.007257f, 0.020743f, +0.003171f, 0.007097f, 0.000054f, 0.010238f, 0.007588f, 0.024071f, 0.007470f, 0.028253f, 0.002873f, 0.010900f, 0.003735f, 0.018777f, 0.007503f, 0.020579f, 0.008998f, 0.023759f, +0.000039f, 0.000095f, 0.000001f, 0.000123f, 0.000127f, 0.000444f, 0.000147f, 0.000469f, 0.000032f, 0.000132f, 0.000048f, 0.000204f, 0.000166f, 0.000501f, 0.000234f, 0.000520f, +0.005337f, 0.014318f, 0.000116f, 0.018557f, 0.018685f, 0.071049f, 0.023507f, 0.074925f, 0.004737f, 0.021540f, 0.007869f, 0.033337f, 0.012861f, 0.042280f, 0.019707f, 0.043856f, +0.007914f, 0.026586f, 0.000126f, 0.033449f, 0.012651f, 0.060233f, 0.011671f, 0.061661f, 0.000099f, 0.000563f, 0.000121f, 0.000847f, 0.019199f, 0.079031f, 0.021574f, 0.079579f, +0.015293f, 0.047132f, 0.000245f, 0.060344f, 0.034655f, 0.151360f, 0.032310f, 0.157682f, 0.014886f, 0.077752f, 0.018328f, 0.118880f, 0.033064f, 0.124856f, 0.037548f, 0.127938f, +0.007031f, 0.023860f, 0.000133f, 0.027476f, 0.021947f, 0.105546f, 0.024094f, 0.098898f, 0.006179f, 0.035539f, 0.008959f, 0.048875f, 0.027586f, 0.114704f, 0.036888f, 0.105717f, +0.022482f, 0.083048f, 0.000461f, 0.095530f, 0.074539f, 0.390222f, 0.088801f, 0.365240f, 0.021435f, 0.134202f, 0.033725f, 0.184354f, 0.049499f, 0.224049f, 0.071829f, 0.206267f, +0.002002f, 0.013333f, 0.000055f, 0.014498f, 0.003710f, 0.035014f, 0.005940f, 0.030980f, 0.000030f, 0.000336f, 0.000063f, 0.000436f, 0.006286f, 0.051296f, 0.012260f, 0.044642f, +0.003292f, 0.020113f, 0.000092f, 0.022256f, 0.008647f, 0.074870f, 0.013993f, 0.067412f, 0.003810f, 0.039450f, 0.008142f, 0.052132f, 0.009211f, 0.068956f, 0.018156f, 0.061069f, +0.001388f, 0.009340f, 0.000046f, 0.009296f, 0.005023f, 0.047891f, 0.009572f, 0.038784f, 0.001451f, 0.016541f, 0.003651f, 0.019660f, 0.007050f, 0.058111f, 0.016362f, 0.046290f, +0.006498f, 0.047586f, 0.000231f, 0.047309f, 0.024974f, 0.259181f, 0.051639f, 0.209667f, 0.007367f, 0.091430f, 0.020116f, 0.108553f, 0.018517f, 0.166152f, 0.046637f, 0.132206f, +0.010487f, 0.031693f, 0.000193f, 0.033402f, 0.013804f, 0.059121f, 0.014794f, 0.050700f, 0.000118f, 0.000604f, 0.000167f, 0.000760f, 0.026118f, 0.096715f, 0.034094f, 0.081579f, +0.020706f, 0.057404f, 0.000386f, 0.061567f, 0.038632f, 0.151787f, 0.041844f, 0.132463f, 0.018126f, 0.085171f, 0.025928f, 0.109088f, 0.045953f, 0.156106f, 0.060626f, 0.133997f, +0.008767f, 0.026763f, 0.000192f, 0.025818f, 0.022532f, 0.097479f, 0.028737f, 0.076515f, 0.006930f, 0.035854f, 0.011672f, 0.041304f, 0.035311f, 0.132080f, 0.054854f, 0.101974f, +0.022207f, 0.073796f, 0.000529f, 0.071110f, 0.060624f, 0.285506f, 0.083904f, 0.223855f, 0.019044f, 0.107256f, 0.034807f, 0.123424f, 0.050192f, 0.204377f, 0.084616f, 0.157617f, +0.048474f, 0.086744f, 0.000527f, 0.123678f, 0.065505f, 0.166124f, 0.041397f, 0.192725f, 0.000420f, 0.001274f, 0.000351f, 0.002170f, 0.132829f, 0.291256f, 0.102249f, 0.332353f, +0.071728f, 0.117753f, 0.000789f, 0.170849f, 0.137395f, 0.319653f, 0.087754f, 0.377377f, 0.048394f, 0.134648f, 0.040820f, 0.233304f, 0.175156f, 0.352333f, 0.136268f, 0.409137f, +0.000576f, 0.001042f, 0.000007f, 0.001359f, 0.001520f, 0.003895f, 0.001143f, 0.004136f, 0.000351f, 0.001075f, 0.000349f, 0.001676f, 0.002553f, 0.005656f, 0.002339f, 0.005907f, +0.085202f, 0.167657f, 0.001197f, 0.218553f, 0.238790f, 0.665910f, 0.194887f, 0.706329f, 0.056310f, 0.187795f, 0.060692f, 0.292350f, 0.211886f, 0.510884f, 0.210639f, 0.533007f, +0.061546f, 0.151637f, 0.000634f, 0.191891f, 0.078755f, 0.274984f, 0.047133f, 0.283148f, 0.000573f, 0.002393f, 0.000453f, 0.003616f, 0.154082f, 0.465160f, 0.112323f, 0.471116f, +0.127833f, 0.288932f, 0.001331f, 0.372080f, 0.231863f, 0.742701f, 0.140243f, 0.778232f, 0.092645f, 0.354898f, 0.074003f, 0.545791f, 0.285194f, 0.789844f, 0.210116f, 0.814059f, +0.038789f, 0.096535f, 0.000476f, 0.111816f, 0.096912f, 0.341812f, 0.069022f, 0.322151f, 0.025383f, 0.107064f, 0.023874f, 0.148096f, 0.157045f, 0.478910f, 0.136239f, 0.443960f, +0.132626f, 0.359312f, 0.001764f, 0.415726f, 0.351971f, 1.351382f, 0.272034f, 1.272239f, 0.094156f, 0.432331f, 0.096104f, 0.597358f, 0.301333f, 1.000318f, 0.283682f, 0.926292f, +0.000503f, 0.002459f, 0.000009f, 0.002689f, 0.000747f, 0.005169f, 0.000776f, 0.004600f, 0.000006f, 0.000046f, 0.000008f, 0.000060f, 0.001631f, 0.009762f, 0.002064f, 0.008546f, +0.000890f, 0.003987f, 0.000016f, 0.004437f, 0.001871f, 0.011879f, 0.001964f, 0.010758f, 0.000767f, 0.005822f, 0.001063f, 0.007739f, 0.002569f, 0.014105f, 0.003285f, 0.012565f, +0.000248f, 0.001222f, 0.000005f, 0.001223f, 0.000717f, 0.005015f, 0.000887f, 0.004085f, 0.000193f, 0.001611f, 0.000315f, 0.001926f, 0.001298f, 0.007845f, 0.001954f, 0.006286f, +0.001240f, 0.006657f, 0.000029f, 0.006657f, 0.003813f, 0.029023f, 0.005115f, 0.023615f, 0.001046f, 0.009524f, 0.001854f, 0.011374f, 0.003645f, 0.023987f, 0.005956f, 0.019197f, +0.171470f, 0.380046f, 0.002052f, 0.402877f, 0.180660f, 0.567463f, 0.125608f, 0.489474f, 0.001436f, 0.005394f, 0.001318f, 0.006829f, 0.440678f, 1.196790f, 0.373204f, 1.015380f, +0.363868f, 0.739849f, 0.004401f, 0.798121f, 0.543416f, 1.565882f, 0.381848f, 1.374486f, 0.237181f, 0.817345f, 0.220098f, 1.052965f, 0.833345f, 2.076212f, 0.713265f, 1.792552f, +0.101684f, 0.227656f, 0.001448f, 0.220892f, 0.209183f, 0.663710f, 0.173078f, 0.524006f, 0.059847f, 0.227087f, 0.065393f, 0.263134f, 0.422626f, 1.159389f, 0.425932f, 0.900337f, +0.275428f, 0.671270f, 0.004257f, 0.650606f, 0.601846f, 2.078747f, 0.540394f, 1.639374f, 0.175866f, 0.726435f, 0.208538f, 0.840815f, 0.642407f, 1.918433f, 0.702593f, 1.488134f, +0.092645f, 0.235435f, 0.001355f, 0.250638f, 0.106771f, 0.384532f, 0.090731f, 0.333093f, 0.000805f, 0.003467f, 0.000903f, 0.004408f, 0.221551f, 0.689878f, 0.229321f, 0.587792f, +0.208709f, 0.486566f, 0.003085f, 0.527118f, 0.340950f, 1.126468f, 0.292815f, 0.992980f, 0.141148f, 0.557702f, 0.160087f, 0.721525f, 0.444777f, 1.270548f, 0.465280f, 1.101619f, +0.001697f, 0.004357f, 0.000030f, 0.004246f, 0.003820f, 0.013896f, 0.003863f, 0.011018f, 0.001037f, 0.004510f, 0.001384f, 0.005248f, 0.006565f, 0.020649f, 0.008086f, 0.016103f, +0.235701f, 0.658647f, 0.004452f, 0.641082f, 0.563377f, 2.231093f, 0.618258f, 1.766991f, 0.156147f, 0.739520f, 0.226299f, 0.859595f, 0.511545f, 1.751549f, 0.683790f, 1.364451f, +0.197093f, 0.689597f, 0.002730f, 0.651584f, 0.215089f, 1.066520f, 0.173089f, 0.819974f, 0.001839f, 0.010908f, 0.001955f, 0.012308f, 0.430617f, 1.846127f, 0.422096f, 1.396084f, +0.623236f, 2.000441f, 0.008725f, 1.923494f, 0.964081f, 4.385450f, 0.784092f, 3.431112f, 0.452761f, 2.463017f, 0.486296f, 2.828234f, 1.213441f, 4.772428f, 1.202100f, 3.672645f, +0.191471f, 0.676711f, 0.003156f, 0.585254f, 0.407988f, 2.043502f, 0.390714f, 1.438043f, 0.125594f, 0.752307f, 0.158840f, 0.776996f, 0.676536f, 2.929803f, 0.789171f, 2.027934f, +0.614754f, 2.365181f, 0.010997f, 2.043264f, 1.391394f, 7.586488f, 1.446009f, 5.332817f, 0.437476f, 2.852613f, 0.600418f, 2.942971f, 1.218955f, 5.746435f, 1.543041f, 3.973134f, +0.056877f, 0.394501f, 0.001367f, 0.322168f, 0.071949f, 0.707238f, 0.100493f, 0.469954f, 0.000631f, 0.007420f, 0.001164f, 0.007236f, 0.160830f, 1.366870f, 0.273619f, 0.893381f, +0.153036f, 0.973772f, 0.003718f, 0.809248f, 0.274410f, 2.474513f, 0.387358f, 1.673285f, 0.132188f, 1.425548f, 0.246425f, 1.414781f, 0.385633f, 3.006659f, 0.663063f, 1.999784f, +0.043128f, 0.302168f, 0.001234f, 0.225865f, 0.106524f, 1.057705f, 0.177059f, 0.643310f, 0.033636f, 0.399414f, 0.073834f, 0.356539f, 0.197224f, 1.693156f, 0.399300f, 1.012912f, +0.202694f, 1.545942f, 0.006293f, 1.154285f, 0.531782f, 5.747959f, 0.959210f, 3.492118f, 0.171505f, 2.216943f, 0.408540f, 1.976774f, 0.520164f, 4.861167f, 1.142850f, 2.904923f, +0.316851f, 0.997293f, 0.005099f, 0.789376f, 0.284708f, 1.269974f, 0.266170f, 0.817922f, 0.002660f, 0.014188f, 0.003284f, 0.013411f, 0.710652f, 2.740769f, 0.809255f, 1.736234f, +1.023648f, 2.955754f, 0.016648f, 2.380780f, 1.303797f, 5.335254f, 1.231887f, 3.496727f, 0.668839f, 3.273142f, 0.834567f, 3.148464f, 2.045973f, 7.238776f, 2.354667f, 4.666493f, +0.289632f, 0.920855f, 0.005546f, 0.667142f, 0.508147f, 2.289610f, 0.565339f, 1.349721f, 0.170871f, 0.920742f, 0.251053f, 0.796614f, 1.050552f, 4.092697f, 1.423657f, 2.373073f, +0.736678f, 2.549677f, 0.015309f, 1.845148f, 1.372856f, 6.733798f, 1.657500f, 3.965170f, 0.471504f, 2.765786f, 0.751783f, 2.390273f, 1.499502f, 6.359202f, 2.205183f, 3.683182f, +0.030926f, 0.056317f, 0.000460f, 0.099933f, 0.040474f, 0.104453f, 0.035012f, 0.150815f, 0.000365f, 0.001126f, 0.000417f, 0.002386f, 0.093099f, 0.207737f, 0.098097f, 0.295025f, +0.062699f, 0.104744f, 0.000943f, 0.189141f, 0.116312f, 0.275373f, 0.101686f, 0.404610f, 0.057576f, 0.163018f, 0.066475f, 0.351542f, 0.168202f, 0.344309f, 0.179118f, 0.497601f, +0.000563f, 0.001036f, 0.000010f, 0.001683f, 0.001439f, 0.003752f, 0.001481f, 0.004958f, 0.000467f, 0.001456f, 0.000635f, 0.002824f, 0.002742f, 0.006180f, 0.003438f, 0.008033f, +0.064271f, 0.128699f, 0.001236f, 0.208800f, 0.174450f, 0.495062f, 0.194885f, 0.653536f, 0.057814f, 0.196211f, 0.085295f, 0.380154f, 0.175594f, 0.430843f, 0.238939f, 0.559432f, +0.058668f, 0.147092f, 0.000827f, 0.231664f, 0.072705f, 0.258334f, 0.059559f, 0.331060f, 0.000743f, 0.003159f, 0.000804f, 0.005942f, 0.161357f, 0.495712f, 0.161007f, 0.624845f, +0.166953f, 0.384004f, 0.002379f, 0.615452f, 0.293273f, 0.955965f, 0.242807f, 1.246682f, 0.164687f, 0.641987f, 0.180064f, 1.228760f, 0.409196f, 1.153245f, 0.412658f, 1.479293f, +0.056645f, 0.143461f, 0.000951f, 0.206808f, 0.137065f, 0.491951f, 0.133621f, 0.577048f, 0.050452f, 0.216557f, 0.064954f, 0.372812f, 0.251955f, 0.781880f, 0.299185f, 0.902087f, +0.149479f, 0.412110f, 0.002722f, 0.593426f, 0.384192f, 1.501093f, 0.406448f, 1.758800f, 0.144438f, 0.674902f, 0.201799f, 1.160584f, 0.373112f, 1.260433f, 0.480801f, 1.452605f, +0.013408f, 0.066644f, 0.000328f, 0.090717f, 0.019261f, 0.135674f, 0.027386f, 0.150272f, 0.000202f, 0.001702f, 0.000379f, 0.002767f, 0.047729f, 0.290678f, 0.082661f, 0.316675f, +0.032468f, 0.148042f, 0.000803f, 0.205070f, 0.066111f, 0.427204f, 0.095000f, 0.481513f, 0.038080f, 0.294278f, 0.072265f, 0.486809f, 0.102992f, 0.575418f, 0.180269f, 0.637934f, +0.010105f, 0.050734f, 0.000294f, 0.063210f, 0.028343f, 0.201664f, 0.047957f, 0.204446f, 0.010701f, 0.091058f, 0.023912f, 0.135486f, 0.058171f, 0.357862f, 0.119891f, 0.356848f, +0.039034f, 0.213334f, 0.001234f, 0.265505f, 0.116292f, 0.900737f, 0.213533f, 0.912149f, 0.044846f, 0.415403f, 0.108747f, 0.617398f, 0.126098f, 0.844460f, 0.282030f, 0.841136f, +0.192601f, 0.434406f, 0.003155f, 0.573126f, 0.196527f, 0.628183f, 0.187033f, 0.674366f, 0.002195f, 0.008392f, 0.002759f, 0.013222f, 0.543792f, 1.502858f, 0.630373f, 1.586889f, +0.559976f, 1.158660f, 0.009271f, 1.555608f, 0.809928f, 2.374985f, 0.779011f, 2.594540f, 0.496808f, 1.742214f, 0.631051f, 2.793369f, 1.408934f, 3.572114f, 1.650656f, 3.838342f, +0.174979f, 0.398656f, 0.003411f, 0.481414f, 0.348614f, 1.125606f, 0.394822f, 1.106017f, 0.140170f, 0.541245f, 0.209647f, 0.780542f, 0.798965f, 2.230432f, 1.102179f, 2.155675f, +0.365793f, 0.907219f, 0.007738f, 1.094337f, 0.774106f, 2.720851f, 0.951405f, 2.670542f, 0.317900f, 1.336270f, 0.515983f, 1.924935f, 0.937296f, 2.848405f, 1.403172f, 2.749890f, +0.121444f, 0.296324f, 0.002036f, 0.420043f, 0.165258f, 0.571453f, 0.160983f, 0.659118f, 0.001103f, 0.004562f, 0.001419f, 0.007723f, 0.261340f, 0.781347f, 0.310093f, 0.886432f, +0.191162f, 0.427899f, 0.003239f, 0.617245f, 0.368725f, 1.169686f, 0.363012f, 1.372907f, 0.135166f, 0.512783f, 0.175738f, 0.883350f, 0.366588f, 1.005461f, 0.439607f, 1.160797f, +0.001643f, 0.004051f, 0.000033f, 0.005256f, 0.004367f, 0.015252f, 0.005062f, 0.016102f, 0.001049f, 0.004383f, 0.001606f, 0.006791f, 0.005720f, 0.017273f, 0.008076f, 0.017937f, +0.207547f, 0.556859f, 0.004494f, 0.721700f, 0.585740f, 2.227211f, 0.736869f, 2.348706f, 0.143754f, 0.653694f, 0.238827f, 1.011740f, 0.405334f, 1.332570f, 0.621108f, 1.382216f, +0.303699f, 1.020246f, 0.004822f, 1.283603f, 0.391328f, 1.863073f, 0.361001f, 1.907267f, 0.002963f, 0.016873f, 0.003610f, 0.025351f, 0.597086f, 2.457800f, 0.670923f, 2.474839f, +0.671006f, 2.067940f, 0.010768f, 2.647607f, 1.225572f, 5.352762f, 1.142634f, 5.576338f, 0.509653f, 2.662025f, 0.627512f, 4.070151f, 1.175622f, 4.439428f, 1.335073f, 4.549011f, +0.217911f, 0.739465f, 0.004118f, 0.851549f, 0.548246f, 2.636581f, 0.601869f, 2.470519f, 0.149444f, 0.859492f, 0.216662f, 1.181997f, 0.692855f, 2.880901f, 0.926484f, 2.655182f, +0.636310f, 2.350553f, 0.013048f, 2.703838f, 1.700470f, 8.902214f, 2.025840f, 8.332289f, 0.473427f, 2.964019f, 0.744849f, 4.071689f, 1.135351f, 5.139012f, 1.647537f, 4.731128f, +0.068186f, 0.454093f, 0.001879f, 0.493776f, 0.101844f, 0.961200f, 0.163065f, 0.850462f, 0.000791f, 0.008929f, 0.001673f, 0.011595f, 0.173500f, 1.415792f, 0.338373f, 1.232138f, +0.128191f, 0.783172f, 0.003570f, 0.866627f, 0.271402f, 2.349854f, 0.439177f, 2.115785f, 0.115768f, 1.198710f, 0.247396f, 1.584061f, 0.290677f, 2.176002f, 0.572937f, 1.927119f, +0.038188f, 0.256892f, 0.001252f, 0.255683f, 0.111369f, 1.061740f, 0.212202f, 0.859854f, 0.031139f, 0.355024f, 0.078355f, 0.421980f, 0.157144f, 1.295312f, 0.364715f, 1.031810f, +0.163228f, 1.195326f, 0.005809f, 1.188383f, 0.505638f, 5.247571f, 1.045527f, 4.245062f, 0.144399f, 1.792174f, 0.394310f, 2.127812f, 0.376938f, 3.382276f, 0.949368f, 2.691247f, +0.452597f, 1.367787f, 0.008349f, 1.441551f, 0.480185f, 2.056563f, 0.514616f, 1.763638f, 0.003972f, 0.020345f, 0.005622f, 0.025606f, 0.913459f, 3.382543f, 1.192430f, 2.853182f, +1.021669f, 2.832478f, 0.019047f, 3.037861f, 1.536460f, 6.036773f, 1.664168f, 5.268195f, 0.697933f, 3.279409f, 0.998317f, 4.200296f, 1.837532f, 6.242215f, 2.424263f, 5.358147f, +0.305569f, 0.932807f, 0.006708f, 0.899848f, 0.632999f, 2.738505f, 0.807305f, 2.149545f, 0.188479f, 0.975149f, 0.317450f, 1.123392f, 0.997367f, 3.730659f, 1.549381f, 2.880298f, +0.706856f, 2.348967f, 0.016839f, 2.263463f, 1.555355f, 7.324926f, 2.152650f, 5.743220f, 0.473010f, 2.664052f, 0.864556f, 3.065647f, 1.294719f, 5.271930f, 2.182671f, 4.065750f, +0.060585f, 0.108418f, 0.000659f, 0.154579f, 0.073111f, 0.185414f, 0.046204f, 0.215104f, 0.000532f, 0.001613f, 0.000444f, 0.002747f, 0.181486f, 0.397944f, 0.139704f, 0.454096f, +0.090159f, 0.148010f, 0.000991f, 0.214750f, 0.154220f, 0.358797f, 0.098501f, 0.423590f, 0.061618f, 0.171440f, 0.051974f, 0.297055f, 0.240675f, 0.484129f, 0.187240f, 0.562181f, +0.000728f, 0.001316f, 0.000009f, 0.001717f, 0.001715f, 0.004393f, 0.001290f, 0.004665f, 0.000449f, 0.001376f, 0.000446f, 0.002144f, 0.003526f, 0.007810f, 0.003230f, 0.008157f, +0.104947f, 0.206511f, 0.001474f, 0.269203f, 0.262657f, 0.732468f, 0.214366f, 0.776927f, 0.070259f, 0.234316f, 0.075727f, 0.364771f, 0.285308f, 0.687913f, 0.283628f, 0.717701f, +0.061109f, 0.150560f, 0.000630f, 0.190528f, 0.069828f, 0.243817f, 0.041791f, 0.251055f, 0.000576f, 0.002407f, 0.000456f, 0.003637f, 0.167241f, 0.504889f, 0.121916f, 0.511353f, +0.127646f, 0.288509f, 0.001329f, 0.371535f, 0.206751f, 0.662261f, 0.125054f, 0.693944f, 0.093709f, 0.358974f, 0.074853f, 0.552060f, 0.311310f, 0.862173f, 0.229356f, 0.888605f, +0.038925f, 0.096874f, 0.000477f, 0.112208f, 0.086846f, 0.306308f, 0.061853f, 0.288689f, 0.025802f, 0.108833f, 0.024268f, 0.150542f, 0.172280f, 0.525366f, 0.149455f, 0.487026f, +0.129776f, 0.351593f, 0.001726f, 0.406794f, 0.307557f, 1.180856f, 0.237707f, 1.111700f, 0.093328f, 0.428529f, 0.095259f, 0.592104f, 0.322331f, 1.070026f, 0.303451f, 0.990841f, +0.000608f, 0.002969f, 0.000011f, 0.003247f, 0.000805f, 0.005574f, 0.000836f, 0.004960f, 0.000007f, 0.000056f, 0.000009f, 0.000074f, 0.002153f, 0.012886f, 0.002724f, 0.011280f, +0.001080f, 0.004841f, 0.000020f, 0.005388f, 0.002029f, 0.012882f, 0.002130f, 0.011666f, 0.000943f, 0.007162f, 0.001308f, 0.009520f, 0.003410f, 0.018724f, 0.004361f, 0.016679f, +0.000302f, 0.001491f, 0.000006f, 0.001493f, 0.000782f, 0.005465f, 0.000966f, 0.004452f, 0.000238f, 0.001992f, 0.000389f, 0.002381f, 0.001731f, 0.010466f, 0.002607f, 0.008386f, +0.001475f, 0.007922f, 0.000034f, 0.007922f, 0.004052f, 0.030842f, 0.005436f, 0.025095f, 0.001261f, 0.011481f, 0.002234f, 0.013710f, 0.004742f, 0.031204f, 0.007748f, 0.024973f, +0.146126f, 0.323874f, 0.001749f, 0.343330f, 0.137484f, 0.431845f, 0.095589f, 0.372495f, 0.001240f, 0.004656f, 0.001138f, 0.005895f, 0.410534f, 1.114925f, 0.347675f, 0.945924f, +0.311848f, 0.634076f, 0.003772f, 0.684017f, 0.415894f, 1.198420f, 0.292240f, 1.051938f, 0.205909f, 0.709577f, 0.191078f, 0.914131f, 0.780751f, 1.945176f, 0.668249f, 1.679419f, +0.087581f, 0.196080f, 0.001247f, 0.190254f, 0.160891f, 0.510486f, 0.133121f, 0.403034f, 0.052214f, 0.198126f, 0.057054f, 0.229576f, 0.397923f, 1.091622f, 0.401036f, 0.847712f, +0.231319f, 0.563767f, 0.003575f, 0.546412f, 0.451376f, 1.559032f, 0.405288f, 1.229509f, 0.149617f, 0.618008f, 0.177412f, 0.715317f, 0.589795f, 1.761317f, 0.645052f, 1.366258f, +0.067482f, 0.171491f, 0.000987f, 0.182565f, 0.069451f, 0.250124f, 0.059017f, 0.216664f, 0.000594f, 0.002558f, 0.000667f, 0.003252f, 0.176414f, 0.549328f, 0.182601f, 0.468040f, +0.152887f, 0.356428f, 0.002260f, 0.386134f, 0.223034f, 0.736886f, 0.191547f, 0.649564f, 0.104737f, 0.413836f, 0.118791f, 0.535398f, 0.356173f, 1.017442f, 0.372591f, 0.882166f, +0.001250f, 0.003208f, 0.000022f, 0.003126f, 0.002511f, 0.009135f, 0.002539f, 0.007243f, 0.000773f, 0.003363f, 0.001032f, 0.003913f, 0.005283f, 0.016618f, 0.006508f, 0.012960f, +0.169198f, 0.472810f, 0.003196f, 0.460201f, 0.361147f, 1.430219f, 0.396328f, 1.132712f, 0.113544f, 0.537748f, 0.164555f, 0.625062f, 0.401427f, 1.374500f, 0.536593f, 1.070731f, +0.114048f, 0.399035f, 0.001580f, 0.377039f, 0.111144f, 0.551107f, 0.089441f, 0.423709f, 0.001078f, 0.006394f, 0.001146f, 0.007215f, 0.272393f, 1.167795f, 0.267003f, 0.883113f, +0.362684f, 1.164129f, 0.005077f, 1.119350f, 0.501003f, 2.278981f, 0.407468f, 1.783042f, 0.266894f, 1.451906f, 0.286663f, 1.667195f, 0.771939f, 3.036012f, 0.764724f, 2.336378f, +0.111978f, 0.395762f, 0.001846f, 0.342275f, 0.213074f, 1.067229f, 0.204052f, 0.751025f, 0.074404f, 0.445679f, 0.094099f, 0.460305f, 0.432525f, 1.873088f, 0.504535f, 1.296504f, +0.350574f, 1.348786f, 0.006271f, 1.165208f, 0.708566f, 3.863412f, 0.736379f, 2.715732f, 0.252713f, 1.647849f, 0.346839f, 1.700046f, 0.759898f, 3.582335f, 0.961933f, 2.476857f, +0.040025f, 0.277616f, 0.000962f, 0.226715f, 0.045214f, 0.444440f, 0.063152f, 0.295328f, 0.000450f, 0.005289f, 0.000830f, 0.005158f, 0.123724f, 1.051509f, 0.210490f, 0.687262f, +0.108306f, 0.689150f, 0.002632f, 0.572714f, 0.173423f, 1.563858f, 0.244805f, 1.057493f, 0.094764f, 1.021960f, 0.176659f, 1.014241f, 0.298345f, 2.326104f, 0.512980f, 1.547135f, +0.030674f, 0.214912f, 0.000878f, 0.160643f, 0.067657f, 0.671781f, 0.112456f, 0.408586f, 0.024233f, 0.287760f, 0.053194f, 0.256871f, 0.153342f, 1.316430f, 0.310456f, 0.787540f, +0.140572f, 1.072144f, 0.004364f, 0.800521f, 0.329340f, 3.559793f, 0.594053f, 2.162719f, 0.120485f, 1.557436f, 0.287006f, 1.388714f, 0.394356f, 3.685439f, 0.866439f, 2.202335f, +0.157364f, 0.495306f, 0.002532f, 0.392044f, 0.126270f, 0.563245f, 0.118049f, 0.362756f, 0.001338f, 0.007138f, 0.001652f, 0.006747f, 0.385830f, 1.488031f, 0.439364f, 0.942644f, +0.511282f, 1.476312f, 0.008315f, 1.189129f, 0.581529f, 2.379668f, 0.549455f, 1.559635f, 0.338398f, 1.656039f, 0.422247f, 1.592959f, 1.117117f, 3.952428f, 1.285666f, 2.547941f, +0.145383f, 0.462229f, 0.002784f, 0.334876f, 0.227775f, 1.026310f, 0.253411f, 0.605008f, 0.086882f, 0.468166f, 0.127652f, 0.405051f, 0.576464f, 2.245765f, 0.781196f, 1.302164f, +0.360571f, 1.247954f, 0.007493f, 0.903119f, 0.600053f, 2.943235f, 0.724467f, 1.733113f, 0.233773f, 1.371286f, 0.372736f, 1.185105f, 0.802324f, 3.402554f, 1.179905f, 1.970723f, +0.001129f, 0.002056f, 0.000017f, 0.003648f, 0.001319f, 0.003405f, 0.001141f, 0.004917f, 0.000013f, 0.000042f, 0.000015f, 0.000088f, 0.003715f, 0.008290f, 0.003915f, 0.011774f, +0.002302f, 0.003846f, 0.000035f, 0.006944f, 0.003813f, 0.009028f, 0.003334f, 0.013265f, 0.002141f, 0.006063f, 0.002472f, 0.013074f, 0.006751f, 0.013819f, 0.007189f, 0.019971f, +0.000021f, 0.000038f, 0.000000f, 0.000062f, 0.000047f, 0.000124f, 0.000049f, 0.000163f, 0.000017f, 0.000054f, 0.000024f, 0.000106f, 0.000111f, 0.000249f, 0.000139f, 0.000324f, +0.002312f, 0.004630f, 0.000044f, 0.007512f, 0.005605f, 0.015905f, 0.006261f, 0.020997f, 0.002107f, 0.007151f, 0.003108f, 0.013854f, 0.006906f, 0.016945f, 0.009397f, 0.022002f, +0.001701f, 0.004266f, 0.000024f, 0.006719f, 0.001883f, 0.006690f, 0.001542f, 0.008574f, 0.000022f, 0.000093f, 0.000024f, 0.000175f, 0.005116f, 0.015716f, 0.005104f, 0.019810f, +0.004869f, 0.011200f, 0.000069f, 0.017950f, 0.007638f, 0.024898f, 0.006324f, 0.032470f, 0.004866f, 0.018967f, 0.005320f, 0.036303f, 0.013047f, 0.036769f, 0.013157f, 0.047165f, +0.001660f, 0.004205f, 0.000028f, 0.006062f, 0.003588f, 0.012877f, 0.003497f, 0.015104f, 0.001498f, 0.006430f, 0.001929f, 0.011069f, 0.008073f, 0.025053f, 0.009586f, 0.028905f, +0.004272f, 0.011779f, 0.000078f, 0.016961f, 0.009806f, 0.038312f, 0.010374f, 0.044890f, 0.004182f, 0.019540f, 0.005842f, 0.033601f, 0.011658f, 0.039381f, 0.015022f, 0.045385f, +0.000473f, 0.002350f, 0.000012f, 0.003200f, 0.000607f, 0.004273f, 0.000863f, 0.004733f, 0.000007f, 0.000061f, 0.000014f, 0.000099f, 0.001840f, 0.011207f, 0.003187f, 0.012210f, +0.001152f, 0.005251f, 0.000028f, 0.007274f, 0.002094f, 0.013531f, 0.003009f, 0.015252f, 0.001368f, 0.010573f, 0.002596f, 0.017491f, 0.003993f, 0.022311f, 0.006990f, 0.024735f, +0.000360f, 0.001808f, 0.000010f, 0.002253f, 0.000902f, 0.006419f, 0.001527f, 0.006508f, 0.000386f, 0.003288f, 0.000863f, 0.004892f, 0.002267f, 0.013945f, 0.004672f, 0.013905f, +0.001357f, 0.007415f, 0.000043f, 0.009228f, 0.003610f, 0.027958f, 0.006628f, 0.028312f, 0.001579f, 0.014626f, 0.003829f, 0.021738f, 0.004791f, 0.032087f, 0.010716f, 0.031961f, +0.004794f, 0.010813f, 0.000079f, 0.014266f, 0.004368f, 0.013963f, 0.004157f, 0.014990f, 0.000055f, 0.000212f, 0.000070f, 0.000333f, 0.014797f, 0.040894f, 0.017153f, 0.043180f, +0.014018f, 0.029004f, 0.000232f, 0.038941f, 0.018105f, 0.053091f, 0.017414f, 0.057999f, 0.012598f, 0.044178f, 0.016002f, 0.070833f, 0.038556f, 0.097751f, 0.045170f, 0.105037f, +0.004402f, 0.010029f, 0.000086f, 0.012111f, 0.007832f, 0.025287f, 0.008870f, 0.024847f, 0.003572f, 0.013793f, 0.005343f, 0.019891f, 0.021973f, 0.061340f, 0.030311f, 0.059284f, +0.008973f, 0.022255f, 0.000190f, 0.026845f, 0.016958f, 0.059603f, 0.020841f, 0.058501f, 0.007899f, 0.033205f, 0.012822f, 0.047833f, 0.025135f, 0.076384f, 0.037628f, 0.073742f, +0.136094f, 0.332070f, 0.002282f, 0.470713f, 0.165378f, 0.571866f, 0.161100f, 0.659594f, 0.001252f, 0.005179f, 0.001611f, 0.008767f, 0.320153f, 0.957184f, 0.379877f, 1.085917f, +0.215439f, 0.482239f, 0.003651f, 0.695631f, 0.371087f, 1.177179f, 0.365337f, 1.381702f, 0.154307f, 0.585398f, 0.200624f, 1.008441f, 0.451636f, 1.238728f, 0.541596f, 1.430101f, +0.001861f, 0.004588f, 0.000037f, 0.005952f, 0.004416f, 0.015427f, 0.005120f, 0.016286f, 0.001204f, 0.005029f, 0.001843f, 0.007791f, 0.007082f, 0.021387f, 0.009999f, 0.022208f, +0.229214f, 0.614994f, 0.004963f, 0.797044f, 0.577672f, 2.196536f, 0.726720f, 2.316357f, 0.160820f, 0.731300f, 0.267181f, 1.131854f, 0.489358f, 1.608808f, 0.749863f, 1.668746f, +0.270365f, 0.908265f, 0.004293f, 1.142717f, 0.311100f, 1.481116f, 0.286990f, 1.516250f, 0.002672f, 0.015216f, 0.003256f, 0.022861f, 0.581077f, 2.391901f, 0.652934f, 2.408483f, +0.600750f, 1.851421f, 0.009641f, 2.370395f, 0.979845f, 4.279534f, 0.913536f, 4.458283f, 0.462208f, 2.414210f, 0.569095f, 3.691251f, 1.150598f, 4.344933f, 1.306655f, 4.452183f, +0.196066f, 0.665336f, 0.003705f, 0.766183f, 0.440504f, 2.118437f, 0.483589f, 1.985010f, 0.136206f, 0.783359f, 0.197470f, 1.077296f, 0.681482f, 2.833611f, 0.911275f, 2.611597f, +0.558264f, 2.062248f, 0.011448f, 2.372201f, 1.332266f, 6.974614f, 1.587184f, 6.528095f, 0.420746f, 2.634193f, 0.661965f, 3.618605f, 1.088905f, 4.928778f, 1.580137f, 4.537581f, +0.073821f, 0.491625f, 0.002034f, 0.534587f, 0.098463f, 0.929296f, 0.157653f, 0.822233f, 0.000868f, 0.009793f, 0.001835f, 0.012716f, 0.205342f, 1.675626f, 0.400473f, 1.458267f, +0.139574f, 0.852718f, 0.003888f, 0.943583f, 0.263883f, 2.284759f, 0.427011f, 2.057173f, 0.127682f, 1.322081f, 0.272859f, 1.747092f, 0.345977f, 2.589978f, 0.681936f, 2.293747f, +0.041786f, 0.281096f, 0.001370f, 0.279773f, 0.108822f, 1.037464f, 0.207350f, 0.840194f, 0.034515f, 0.393512f, 0.086850f, 0.467726f, 0.187971f, 1.549413f, 0.436261f, 1.234220f, +0.174159f, 1.275376f, 0.006199f, 1.267968f, 0.481773f, 4.999900f, 0.996181f, 4.044707f, 0.156067f, 1.936991f, 0.426172f, 2.299750f, 0.439653f, 3.945024f, 1.107325f, 3.139021f, +0.345824f, 1.045107f, 0.006379f, 1.101470f, 0.327644f, 1.403253f, 0.351137f, 1.203381f, 0.003074f, 0.015747f, 0.004351f, 0.019819f, 0.762993f, 2.825366f, 0.996011f, 2.383203f, +0.785077f, 2.176549f, 0.014636f, 2.334370f, 1.054325f, 4.142457f, 1.141959f, 3.615057f, 0.543264f, 2.552661f, 0.777080f, 3.269471f, 1.543567f, 5.243599f, 2.036434f, 4.500962f, +0.235976f, 0.720360f, 0.005180f, 0.694907f, 0.436528f, 1.888524f, 0.556733f, 1.482367f, 0.147440f, 0.762824f, 0.248330f, 0.878789f, 0.841979f, 3.149431f, 1.307991f, 2.431554f, +0.532275f, 1.768815f, 0.012680f, 1.704429f, 1.045891f, 4.925611f, 1.447539f, 3.862000f, 0.360804f, 2.032095f, 0.659469f, 2.338425f, 1.065786f, 4.339743f, 1.796729f, 3.346840f, +0.131161f, 0.234713f, 0.001427f, 0.334649f, 0.215324f, 0.546074f, 0.136079f, 0.633517f, 0.001215f, 0.003686f, 0.001014f, 0.006275f, 0.349688f, 0.766762f, 0.269183f, 0.874955f, +0.160214f, 0.263017f, 0.001761f, 0.381614f, 0.372822f, 0.867383f, 0.238123f, 1.024018f, 0.115539f, 0.321467f, 0.097456f, 0.557007f, 0.380648f, 0.765690f, 0.296136f, 0.889135f, +0.001120f, 0.002025f, 0.000015f, 0.002643f, 0.003591f, 0.009200f, 0.002701f, 0.009769f, 0.000730f, 0.002235f, 0.000725f, 0.003483f, 0.004831f, 0.010700f, 0.004425f, 0.011175f, +0.191598f, 0.377019f, 0.002692f, 0.491473f, 0.652349f, 1.819196f, 0.532411f, 1.929617f, 0.135349f, 0.451392f, 0.145882f, 0.702704f, 0.463590f, 1.117773f, 0.460861f, 1.166176f, +0.151859f, 0.374150f, 0.001564f, 0.473474f, 0.236069f, 0.824272f, 0.141282f, 0.848744f, 0.001511f, 0.006311f, 0.001194f, 0.009537f, 0.369897f, 1.116691f, 0.269648f, 1.130987f, +0.260373f, 0.588505f, 0.002711f, 0.757862f, 0.573731f, 1.837764f, 0.347023f, 1.925685f, 0.201700f, 0.772655f, 0.161114f, 1.188251f, 0.565175f, 1.565253f, 0.416391f, 1.613239f, +0.068780f, 0.171176f, 0.000843f, 0.198271f, 0.208765f, 0.736318f, 0.148684f, 0.693964f, 0.048108f, 0.202921f, 0.045249f, 0.280690f, 0.270938f, 0.826225f, 0.235043f, 0.765929f, +0.271966f, 0.736815f, 0.003618f, 0.852498f, 0.876828f, 3.366557f, 0.677691f, 3.169397f, 0.206376f, 0.947612f, 0.210647f, 1.309327f, 0.601203f, 1.995782f, 0.565988f, 1.848088f, +0.001093f, 0.005337f, 0.000020f, 0.005837f, 0.001969f, 0.013629f, 0.002045f, 0.012129f, 0.000013f, 0.000107f, 0.000018f, 0.000140f, 0.003445f, 0.020616f, 0.004358f, 0.018046f, +0.001594f, 0.007143f, 0.000029f, 0.007950f, 0.004072f, 0.025856f, 0.004275f, 0.023417f, 0.001468f, 0.011151f, 0.002036f, 0.014821f, 0.004479f, 0.024589f, 0.005727f, 0.021903f, +0.000386f, 0.001906f, 0.000008f, 0.001908f, 0.001359f, 0.009503f, 0.001680f, 0.007741f, 0.000321f, 0.002686f, 0.000524f, 0.003212f, 0.001969f, 0.011906f, 0.002965f, 0.009539f, +0.002236f, 0.012009f, 0.000052f, 0.012008f, 0.008356f, 0.063601f, 0.011209f, 0.051750f, 0.002017f, 0.018363f, 0.003574f, 0.021929f, 0.006397f, 0.042098f, 0.010453f, 0.033692f, +0.353552f, 0.783613f, 0.004231f, 0.830688f, 0.452531f, 1.421428f, 0.314633f, 1.226074f, 0.003165f, 0.011888f, 0.002906f, 0.015049f, 0.884047f, 2.400888f, 0.748687f, 2.036960f, +0.619330f, 1.259276f, 0.007491f, 1.358460f, 1.123654f, 3.237866f, 0.789569f, 2.842106f, 0.431505f, 1.487000f, 0.400426f, 1.915665f, 1.380041f, 3.438258f, 1.181185f, 2.968511f, +0.150672f, 0.337333f, 0.002146f, 0.327311f, 0.376554f, 1.194759f, 0.311561f, 0.943273f, 0.094787f, 0.359666f, 0.103572f, 0.416758f, 0.609291f, 1.671467f, 0.614058f, 1.297998f, +0.471975f, 1.150290f, 0.007294f, 1.114881f, 1.252902f, 4.327464f, 1.124973f, 3.412793f, 0.322121f, 1.330559f, 0.381965f, 1.540061f, 1.071048f, 3.198493f, 1.171393f, 2.481080f, +0.202670f, 0.515039f, 0.002964f, 0.548298f, 0.283756f, 1.021936f, 0.241128f, 0.885230f, 0.001882f, 0.008107f, 0.002112f, 0.010306f, 0.471554f, 1.468353f, 0.488092f, 1.251069f, +0.376897f, 0.878666f, 0.005571f, 0.951897f, 0.747988f, 2.471284f, 0.642388f, 2.178433f, 0.272449f, 1.076494f, 0.309006f, 1.392710f, 0.781472f, 2.232347f, 0.817494f, 1.935540f, +0.002669f, 0.006850f, 0.000046f, 0.006675f, 0.007295f, 0.026539f, 0.007377f, 0.021042f, 0.001742f, 0.007578f, 0.002326f, 0.008818f, 0.010041f, 0.031584f, 0.012369f, 0.024631f, +0.428525f, 1.197476f, 0.008095f, 1.165542f, 1.244329f, 4.927804f, 1.365544f, 3.902745f, 0.303441f, 1.437114f, 0.439768f, 1.670457f, 0.904871f, 3.098311f, 1.209555f, 2.413574f, +0.393174f, 1.375651f, 0.005446f, 1.299821f, 0.521258f, 2.584662f, 0.419474f, 1.987169f, 0.003922f, 0.023259f, 0.004168f, 0.026245f, 0.835781f, 3.583137f, 0.819244f, 2.709651f, +1.026310f, 3.294211f, 0.014367f, 3.167499f, 1.928685f, 8.773274f, 1.568608f, 6.864082f, 0.796933f, 4.335316f, 0.855961f, 4.978159f, 1.944165f, 7.646346f, 1.925995f, 5.884282f, +0.274493f, 0.970132f, 0.004525f, 0.839019f, 0.710554f, 3.558971f, 0.680470f, 2.504501f, 0.192453f, 1.152790f, 0.243397f, 1.190623f, 0.943642f, 4.086531f, 1.100747f, 2.828592f, +1.019199f, 3.921228f, 0.018232f, 3.387524f, 2.802398f, 15.279900f, 2.912398f, 10.740790f, 0.775245f, 5.055082f, 1.063993f, 5.215204f, 1.966230f, 9.269260f, 2.488994f, 6.408845f, +0.099807f, 0.692269f, 0.002399f, 0.565340f, 0.153382f, 1.507697f, 0.214232f, 1.001855f, 0.001184f, 0.013917f, 0.002184f, 0.013572f, 0.274589f, 2.333689f, 0.467157f, 1.525290f, +0.221684f, 1.410578f, 0.005386f, 1.172254f, 0.482904f, 4.354630f, 0.681669f, 2.944635f, 0.204673f, 2.207238f, 0.381551f, 2.190567f, 0.543503f, 4.237529f, 0.934509f, 2.818459f, +0.054388f, 0.381057f, 0.001556f, 0.284833f, 0.163197f, 1.620421f, 0.271258f, 0.985562f, 0.045340f, 0.538385f, 0.099524f, 0.480591f, 0.241986f, 2.077435f, 0.489926f, 1.242803f, +0.295605f, 2.254577f, 0.009178f, 1.683390f, 0.942165f, 10.183740f, 1.699446f, 6.187035f, 0.267347f, 3.455843f, 0.636846f, 3.081460f, 0.738075f, 6.897650f, 1.621623f, 4.121879f, +0.528192f, 1.662493f, 0.008500f, 1.315895f, 0.576579f, 2.571898f, 0.539036f, 1.656421f, 0.004739f, 0.025281f, 0.005851f, 0.023897f, 1.152611f, 4.445271f, 1.312535f, 2.816009f, +1.408641f, 4.067410f, 0.022909f, 3.276189f, 2.179624f, 8.919215f, 2.059408f, 5.845656f, 0.983781f, 4.814400f, 1.227547f, 4.631013f, 2.739292f, 9.691783f, 3.152593f, 6.247829f, +0.346975f, 1.103171f, 0.006645f, 0.799226f, 0.739543f, 3.332230f, 0.822777f, 1.964344f, 0.218800f, 1.179009f, 0.321473f, 1.020063f, 1.224498f, 4.770348f, 1.659380f, 2.765995f, +1.020609f, 3.532376f, 0.021210f, 2.556307f, 2.310620f, 11.333490f, 2.789697f, 6.673681f, 0.698223f, 4.095698f, 1.113273f, 3.539622f, 2.021239f, 8.571822f, 2.972454f, 4.964708f, +0.092148f, 0.167806f, 0.001372f, 0.297766f, 0.146508f, 0.378101f, 0.126736f, 0.545925f, 0.001162f, 0.003586f, 0.001327f, 0.007600f, 0.269899f, 0.602240f, 0.284386f, 0.855289f, +0.154218f, 0.257635f, 0.002321f, 0.465226f, 0.347556f, 0.822852f, 0.303853f, 1.209028f, 0.151372f, 0.428590f, 0.174769f, 0.924238f, 0.402529f, 0.823978f, 0.428653f, 1.190827f, +0.001206f, 0.002218f, 0.000021f, 0.003603f, 0.003744f, 0.009759f, 0.003854f, 0.012897f, 0.001069f, 0.003332f, 0.001453f, 0.006463f, 0.005712f, 0.012875f, 0.007162f, 0.016736f, +0.159158f, 0.318704f, 0.003060f, 0.517060f, 0.524812f, 1.489332f, 0.586288f, 1.966080f, 0.153029f, 0.519350f, 0.225767f, 1.006230f, 0.423068f, 1.038048f, 0.575686f, 1.347864f, +0.159407f, 0.399669f, 0.002248f, 0.629460f, 0.239990f, 0.852734f, 0.196599f, 1.092792f, 0.002159f, 0.009175f, 0.002336f, 0.017257f, 0.426567f, 1.310471f, 0.425642f, 1.651849f, +0.374469f, 0.861307f, 0.005336f, 1.380435f, 0.799128f, 2.604870f, 0.661616f, 3.397032f, 0.394828f, 1.539132f, 0.431694f, 2.945892f, 0.892982f, 2.516707f, 0.900536f, 3.228235f, +0.110608f, 0.280128f, 0.001856f, 0.403823f, 0.325141f, 1.166992f, 0.316971f, 1.368856f, 0.105300f, 0.451986f, 0.135568f, 0.778111f, 0.478670f, 1.485432f, 0.568398f, 1.713804f, +0.337548f, 0.930610f, 0.006147f, 1.340049f, 1.053959f, 4.117975f, 1.115016f, 4.824947f, 0.348629f, 1.629005f, 0.487080f, 2.801292f, 0.819752f, 2.769254f, 1.056352f, 3.191468f, +0.032048f, 0.159288f, 0.000784f, 0.216826f, 0.055928f, 0.393950f, 0.079520f, 0.436339f, 0.000516f, 0.004348f, 0.000969f, 0.007068f, 0.110993f, 0.675965f, 0.192225f, 0.736421f, +0.064060f, 0.292093f, 0.001584f, 0.404612f, 0.158465f, 1.023983f, 0.227710f, 1.154160f, 0.080309f, 0.620614f, 0.152402f, 1.026649f, 0.197710f, 1.104609f, 0.346056f, 1.224618f, +0.017357f, 0.087143f, 0.000506f, 0.108574f, 0.059143f, 0.420813f, 0.100071f, 0.426617f, 0.019647f, 0.167180f, 0.043902f, 0.248749f, 0.097216f, 0.598057f, 0.200361f, 0.596363f, +0.077536f, 0.423768f, 0.002451f, 0.527400f, 0.280633f, 2.173641f, 0.515294f, 2.201182f, 0.095218f, 0.881994f, 0.230894f, 1.310874f, 0.243706f, 1.632060f, 0.545069f, 1.625635f, +0.437313f, 0.986346f, 0.007164f, 1.301318f, 0.542096f, 1.732771f, 0.515908f, 1.860163f, 0.005328f, 0.020366f, 0.006696f, 0.032088f, 1.201310f, 3.320014f, 1.392579f, 3.505651f, +1.049580f, 2.171712f, 0.017376f, 2.915722f, 1.844227f, 5.407901f, 1.773828f, 5.907834f, 0.995319f, 3.490399f, 1.264266f, 5.596312f, 2.569363f, 6.514186f, 3.010173f, 6.999686f, +0.285518f, 0.650499f, 0.005566f, 0.785536f, 0.691059f, 2.231291f, 0.782656f, 2.192460f, 0.244472f, 0.943994f, 0.365649f, 1.361357f, 1.268424f, 3.540998f, 1.749801f, 3.422316f, +0.690261f, 1.711947f, 0.014602f, 2.065044f, 1.774599f, 6.237416f, 2.181048f, 6.122086f, 0.641205f, 2.695255f, 1.040736f, 3.882592f, 1.720853f, 5.229602f, 2.576189f, 5.048730f, +0.332726f, 0.811852f, 0.005579f, 1.150810f, 0.550041f, 1.902007f, 0.535811f, 2.193787f, 0.003231f, 0.013360f, 0.004156f, 0.022616f, 0.696635f, 2.082776f, 0.826591f, 2.362891f, +0.432339f, 0.967751f, 0.007326f, 1.395982f, 1.013087f, 3.213761f, 0.997390f, 3.772121f, 0.326752f, 1.239606f, 0.424830f, 2.135418f, 0.806658f, 2.212466f, 0.967334f, 2.554274f, +0.003236f, 0.007975f, 0.000065f, 0.010348f, 0.010445f, 0.036483f, 0.012108f, 0.038515f, 0.002208f, 0.009224f, 0.003381f, 0.014292f, 0.010957f, 0.033089f, 0.015471f, 0.034360f, +0.472575f, 1.267943f, 0.010233f, 1.643279f, 1.620246f, 6.160810f, 2.038294f, 6.496883f, 0.349866f, 1.590950f, 0.581254f, 2.462357f, 0.897958f, 2.952115f, 1.375976f, 3.062100f, +0.758747f, 2.548934f, 0.012048f, 3.206892f, 1.187726f, 5.654646f, 1.095679f, 5.788779f, 0.007914f, 0.045058f, 0.009641f, 0.067698f, 1.451374f, 5.974327f, 1.630855f, 6.015744f, +1.383863f, 4.264857f, 0.022208f, 5.460343f, 3.070627f, 13.411150f, 2.862829f, 13.971320f, 1.123489f, 5.868216f, 1.383298f, 8.972316f, 2.358973f, 8.908041f, 2.678922f, 9.127928f, +0.391244f, 1.327658f, 0.007393f, 1.528895f, 1.195819f, 5.750839f, 1.312781f, 5.388630f, 0.286797f, 1.649445f, 0.415795f, 2.268362f, 1.210318f, 5.032519f, 1.618433f, 4.638220f, +1.321195f, 4.880548f, 0.027092f, 5.614088f, 4.289327f, 22.455270f, 5.110052f, 21.017670f, 1.050702f, 6.578199f, 1.653082f, 9.036506f, 2.293599f, 10.381660f, 3.328299f, 9.557668f, +0.149851f, 0.997958f, 0.004130f, 1.085170f, 0.271910f, 2.566275f, 0.435362f, 2.270619f, 0.001858f, 0.020975f, 0.003929f, 0.027238f, 0.370985f, 3.027304f, 0.723523f, 2.634606f, +0.232561f, 1.420815f, 0.006478f, 1.572217f, 0.598156f, 5.178967f, 0.967926f, 4.663089f, 0.224489f, 2.324460f, 0.479735f, 3.071707f, 0.513073f, 3.840859f, 1.011291f, 3.401557f, +0.060312f, 0.405726f, 0.001978f, 0.403817f, 0.213681f, 2.037146f, 0.407149f, 1.649790f, 0.052567f, 0.599332f, 0.132275f, 0.712363f, 0.241474f, 1.990423f, 0.560434f, 1.585517f, +0.298131f, 2.183227f, 0.010611f, 2.170546f, 1.121951f, 11.643740f, 2.319901f, 9.419293f, 0.281905f, 3.498809f, 0.769799f, 4.154064f, 0.669840f, 6.010496f, 1.687080f, 4.782499f, +0.944908f, 2.855591f, 0.017430f, 3.009593f, 1.217889f, 5.216046f, 1.305216f, 4.473102f, 0.008864f, 0.045400f, 0.012545f, 0.057142f, 1.855477f, 6.870838f, 2.422140f, 5.795567f, +1.760763f, 4.881541f, 0.032826f, 5.235502f, 3.216869f, 12.639130f, 3.484251f, 11.029960f, 1.285676f, 6.041065f, 1.839019f, 7.737449f, 3.081158f, 10.466900f, 4.064984f, 8.984497f, +0.458460f, 1.399536f, 0.010064f, 1.350086f, 1.153765f, 4.991460f, 1.471471f, 3.917966f, 0.302261f, 1.563834f, 0.509091f, 1.801569f, 1.455915f, 5.445861f, 2.261722f, 4.204539f, +1.226459f, 4.075673f, 0.029217f, 3.927316f, 3.278489f, 15.440010f, 4.537510f, 12.105970f, 0.877244f, 4.940747f, 1.603405f, 5.685545f, 2.185681f, 8.899812f, 3.684677f, 6.863598f, +0.027976f, 0.050063f, 0.000304f, 0.071378f, 0.046903f, 0.118949f, 0.029641f, 0.137996f, 0.000311f, 0.000942f, 0.000259f, 0.001605f, 0.094596f, 0.207421f, 0.072818f, 0.236689f, +0.045168f, 0.074150f, 0.000497f, 0.107585f, 0.107340f, 0.249730f, 0.068558f, 0.294827f, 0.039052f, 0.108657f, 0.032940f, 0.188269f, 0.136103f, 0.273776f, 0.105885f, 0.317915f, +0.000349f, 0.000631f, 0.000005f, 0.000824f, 0.001143f, 0.002928f, 0.000859f, 0.003109f, 0.000273f, 0.000835f, 0.000271f, 0.001301f, 0.001909f, 0.004228f, 0.001749f, 0.004416f, +0.059124f, 0.116341f, 0.000831f, 0.151659f, 0.205580f, 0.573298f, 0.167783f, 0.608096f, 0.050074f, 0.166999f, 0.053971f, 0.259976f, 0.181434f, 0.437460f, 0.180366f, 0.456403f, +0.040090f, 0.098773f, 0.000413f, 0.124993f, 0.063645f, 0.222225f, 0.038090f, 0.228823f, 0.000478f, 0.001997f, 0.000378f, 0.003019f, 0.123847f, 0.373886f, 0.090283f, 0.378673f, +0.090853f, 0.205349f, 0.000946f, 0.264443f, 0.204448f, 0.654883f, 0.123661f, 0.686214f, 0.084380f, 0.323235f, 0.067401f, 0.497098f, 0.250115f, 0.692695f, 0.184272f, 0.713932f, +0.026525f, 0.066014f, 0.000325f, 0.076463f, 0.082221f, 0.289994f, 0.058558f, 0.273313f, 0.022244f, 0.093823f, 0.020921f, 0.129781f, 0.132519f, 0.404116f, 0.114962f, 0.374624f, +0.103872f, 0.281411f, 0.001382f, 0.325594f, 0.342003f, 1.313112f, 0.264331f, 1.236210f, 0.094501f, 0.433916f, 0.096457f, 0.599547f, 0.291220f, 0.966746f, 0.274162f, 0.895204f, +0.000430f, 0.002098f, 0.000008f, 0.002294f, 0.000790f, 0.005471f, 0.000821f, 0.004869f, 0.000006f, 0.000050f, 0.000008f, 0.000066f, 0.001717f, 0.010278f, 0.002173f, 0.008997f, +0.000828f, 0.003711f, 0.000015f, 0.004131f, 0.002161f, 0.013719f, 0.002268f, 0.012425f, 0.000915f, 0.006946f, 0.001268f, 0.009232f, 0.002951f, 0.016202f, 0.003774f, 0.014433f, +0.000222f, 0.001094f, 0.000005f, 0.001096f, 0.000797f, 0.005573f, 0.000985f, 0.004539f, 0.000221f, 0.001849f, 0.000361f, 0.002211f, 0.001434f, 0.008671f, 0.002160f, 0.006947f, +0.001272f, 0.006829f, 0.000029f, 0.006829f, 0.004853f, 0.036937f, 0.006510f, 0.030055f, 0.001375f, 0.012520f, 0.002437f, 0.014951f, 0.004614f, 0.030363f, 0.007539f, 0.024300f, +0.091755f, 0.203366f, 0.001098f, 0.215583f, 0.119938f, 0.376734f, 0.083390f, 0.324957f, 0.000985f, 0.003699f, 0.000904f, 0.004683f, 0.290984f, 0.790251f, 0.246430f, 0.670465f, +0.212447f, 0.431966f, 0.002570f, 0.465989f, 0.393635f, 1.134279f, 0.276599f, 0.995638f, 0.177463f, 0.611549f, 0.164681f, 0.787844f, 0.600394f, 1.495833f, 0.513881f, 1.291467f, +0.057123f, 0.127890f, 0.000814f, 0.124091f, 0.145793f, 0.462584f, 0.120630f, 0.365214f, 0.043084f, 0.163482f, 0.047077f, 0.189432f, 0.292967f, 0.803696f, 0.295259f, 0.624120f, +0.177210f, 0.431895f, 0.002739f, 0.418599f, 0.480419f, 1.659343f, 0.431365f, 1.308617f, 0.145005f, 0.598958f, 0.171943f, 0.693266f, 0.510029f, 1.523112f, 0.557813f, 1.181482f, +0.042520f, 0.108054f, 0.000622f, 0.115031f, 0.060796f, 0.218955f, 0.051663f, 0.189665f, 0.000473f, 0.002039f, 0.000531f, 0.002592f, 0.125472f, 0.390702f, 0.129873f, 0.332887f, +0.104514f, 0.243655f, 0.001545f, 0.263962f, 0.211825f, 0.699851f, 0.181920f, 0.616918f, 0.090579f, 0.357894f, 0.102733f, 0.463024f, 0.274840f, 0.785107f, 0.287509f, 0.680721f, +0.000818f, 0.002099f, 0.000014f, 0.002046f, 0.002283f, 0.008307f, 0.002309f, 0.006586f, 0.000640f, 0.002784f, 0.000855f, 0.003240f, 0.003903f, 0.012277f, 0.004808f, 0.009574f, +0.130067f, 0.363462f, 0.002457f, 0.353769f, 0.385709f, 1.527490f, 0.423283f, 1.209748f, 0.110423f, 0.522969f, 0.160032f, 0.607882f, 0.348333f, 1.192707f, 0.465623f, 0.929114f, +0.102094f, 0.357209f, 0.001414f, 0.337519f, 0.138229f, 0.685410f, 0.111238f, 0.526965f, 0.001221f, 0.007241f, 0.001298f, 0.008171f, 0.275247f, 1.180032f, 0.269801f, 0.892367f, +0.352244f, 1.130621f, 0.004931f, 1.087131f, 0.676019f, 3.075103f, 0.549809f, 2.405915f, 0.327929f, 1.783932f, 0.352218f, 2.048454f, 0.846282f, 3.328401f, 0.838372f, 2.561387f, +0.104123f, 0.367998f, 0.001716f, 0.318263f, 0.275261f, 1.378706f, 0.263607f, 0.970216f, 0.087525f, 0.524272f, 0.110693f, 0.541478f, 0.453982f, 1.966011f, 0.529564f, 1.360822f, +0.382883f, 1.473090f, 0.006849f, 1.272593f, 1.075151f, 5.862190f, 1.117353f, 4.120745f, 0.349171f, 2.276812f, 0.479223f, 2.348931f, 0.936823f, 4.416399f, 1.185898f, 3.053536f, +0.038589f, 0.267655f, 0.000928f, 0.218580f, 0.060563f, 0.595315f, 0.084590f, 0.395582f, 0.000549f, 0.006451f, 0.001012f, 0.006291f, 0.134648f, 1.144351f, 0.229075f, 0.747943f, +0.113289f, 0.720856f, 0.002753f, 0.599063f, 0.252026f, 2.272663f, 0.355760f, 1.536792f, 0.125402f, 1.352360f, 0.233773f, 1.342146f, 0.352265f, 2.746503f, 0.605691f, 1.826750f, +0.030719f, 0.215224f, 0.000879f, 0.160876f, 0.094134f, 0.934676f, 0.156464f, 0.568483f, 0.030702f, 0.364573f, 0.067394f, 0.325438f, 0.173343f, 1.488142f, 0.350951f, 0.890265f, +0.165350f, 1.261125f, 0.005134f, 0.941624f, 0.538211f, 5.817449f, 0.970807f, 3.534336f, 0.179292f, 2.317601f, 0.427090f, 2.066528f, 0.523612f, 4.893394f, 1.150426f, 2.924181f, +0.134832f, 0.424386f, 0.002170f, 0.335910f, 0.150311f, 0.670482f, 0.140524f, 0.431822f, 0.001450f, 0.007737f, 0.001791f, 0.007314f, 0.373164f, 1.439181f, 0.424941f, 0.911699f, +0.475284f, 1.372367f, 0.007730f, 1.105405f, 0.751046f, 3.073346f, 0.709622f, 2.014272f, 0.397963f, 1.947540f, 0.496572f, 1.873356f, 1.172214f, 4.147365f, 1.349076f, 2.673608f, +0.129390f, 0.411382f, 0.002478f, 0.298038f, 0.281642f, 1.269022f, 0.313341f, 0.748086f, 0.097823f, 0.527122f, 0.143727f, 0.456059f, 0.579130f, 2.256150f, 0.784809f, 1.308186f, +0.376924f, 1.304551f, 0.007833f, 0.944076f, 0.871475f, 4.274547f, 1.052164f, 2.517050f, 0.309158f, 1.813484f, 0.492933f, 1.567266f, 0.946734f, 4.014979f, 1.392276f, 2.325433f, +0.014387f, 0.026199f, 0.000214f, 0.046490f, 0.023360f, 0.060287f, 0.020208f, 0.087046f, 0.000218f, 0.000671f, 0.000248f, 0.001423f, 0.053444f, 0.119253f, 0.056313f, 0.169361f, +0.031825f, 0.053167f, 0.000479f, 0.096006f, 0.073247f, 0.173416f, 0.064037f, 0.254802f, 0.037452f, 0.106040f, 0.043240f, 0.228670f, 0.105353f, 0.215658f, 0.112191f, 0.311673f, +0.000275f, 0.000506f, 0.000005f, 0.000822f, 0.000872f, 0.002273f, 0.000898f, 0.003004f, 0.000292f, 0.000911f, 0.000397f, 0.001767f, 0.001652f, 0.003724f, 0.002072f, 0.004841f, +0.035950f, 0.071989f, 0.000691f, 0.116793f, 0.121063f, 0.343558f, 0.135244f, 0.453534f, 0.041442f, 0.140646f, 0.061140f, 0.272499f, 0.121200f, 0.297379f, 0.164922f, 0.386134f, +0.030804f, 0.077232f, 0.000434f, 0.121637f, 0.047361f, 0.168284f, 0.038798f, 0.215659f, 0.000500f, 0.002126f, 0.000541f, 0.003998f, 0.104544f, 0.321175f, 0.104318f, 0.404841f, +0.095646f, 0.219992f, 0.001363f, 0.352586f, 0.208448f, 0.679466f, 0.172579f, 0.886097f, 0.120906f, 0.471322f, 0.132196f, 0.902107f, 0.289273f, 0.815264f, 0.291720f, 1.045757f, +0.031224f, 0.079078f, 0.000524f, 0.113996f, 0.093735f, 0.336433f, 0.091380f, 0.394629f, 0.035639f, 0.152973f, 0.045882f, 0.263349f, 0.171376f, 0.531824f, 0.203502f, 0.613587f, +0.094368f, 0.260171f, 0.001718f, 0.374638f, 0.300917f, 1.175729f, 0.318350f, 1.377577f, 0.116855f, 0.546016f, 0.163261f, 0.938948f, 0.290663f, 0.981906f, 0.374555f, 1.131612f, +0.009221f, 0.045832f, 0.000226f, 0.062387f, 0.016434f, 0.115760f, 0.023367f, 0.128216f, 0.000178f, 0.001500f, 0.000334f, 0.002438f, 0.040504f, 0.246674f, 0.070147f, 0.268736f, +0.024363f, 0.111085f, 0.000603f, 0.153877f, 0.061546f, 0.397704f, 0.088440f, 0.448263f, 0.036618f, 0.282975f, 0.069489f, 0.468111f, 0.095363f, 0.532795f, 0.166916f, 0.590679f, +0.007296f, 0.036628f, 0.000212f, 0.045636f, 0.025387f, 0.180637f, 0.042956f, 0.183128f, 0.009901f, 0.084248f, 0.022124f, 0.125354f, 0.051825f, 0.318818f, 0.106810f, 0.317915f, +0.032276f, 0.176402f, 0.001020f, 0.219542f, 0.119302f, 0.924053f, 0.219060f, 0.935761f, 0.047521f, 0.440184f, 0.115234f, 0.654229f, 0.128665f, 0.861646f, 0.287770f, 0.858254f, +0.083076f, 0.187376f, 0.001361f, 0.247211f, 0.105170f, 0.336170f, 0.100090f, 0.360884f, 0.001213f, 0.004639f, 0.001525f, 0.007308f, 0.289438f, 0.799910f, 0.335522f, 0.844636f, +0.263543f, 0.545304f, 0.004363f, 0.732120f, 0.472915f, 1.386747f, 0.454863f, 1.514945f, 0.299634f, 1.050759f, 0.380598f, 1.684729f, 0.818235f, 2.074496f, 0.958614f, 2.229107f, +0.079235f, 0.180523f, 0.001545f, 0.217998f, 0.195854f, 0.632374f, 0.221814f, 0.621369f, 0.081341f, 0.314085f, 0.121658f, 0.452949f, 0.446443f, 1.246314f, 0.615872f, 1.204542f, +0.189710f, 0.470509f, 0.004013f, 0.567554f, 0.498093f, 1.750713f, 0.612175f, 1.718342f, 0.211284f, 0.888116f, 0.342934f, 1.279356f, 0.599843f, 1.822899f, 0.897990f, 1.759852f, +0.092796f, 0.226422f, 0.001556f, 0.320956f, 0.156664f, 0.541734f, 0.152611f, 0.624839f, 0.001080f, 0.004467f, 0.001390f, 0.007562f, 0.246413f, 0.736716f, 0.292381f, 0.835798f, +0.159374f, 0.356744f, 0.002701f, 0.514604f, 0.381393f, 1.209870f, 0.375483f, 1.420074f, 0.144412f, 0.547859f, 0.187759f, 0.943774f, 0.377136f, 1.034393f, 0.452257f, 1.194199f, +0.001318f, 0.003249f, 0.000026f, 0.004216f, 0.004346f, 0.015180f, 0.005038f, 0.016025f, 0.001079f, 0.004506f, 0.001651f, 0.006981f, 0.005662f, 0.017098f, 0.007994f, 0.017755f, +0.190680f, 0.511604f, 0.004129f, 0.663049f, 0.667649f, 2.538661f, 0.839912f, 2.677145f, 0.169250f, 0.769632f, 0.281185f, 1.191180f, 0.459523f, 1.510720f, 0.704144f, 1.567004f, +0.261911f, 0.879862f, 0.004159f, 1.106982f, 0.418702f, 1.993398f, 0.386253f, 2.040683f, 0.003275f, 0.018647f, 0.003990f, 0.028017f, 0.635407f, 2.615540f, 0.713983f, 2.633672f, +0.631394f, 1.945862f, 0.010132f, 2.491309f, 1.430761f, 6.248937f, 1.333937f, 6.509945f, 0.614565f, 3.210005f, 0.756685f, 4.907995f, 1.365044f, 5.154731f, 1.550186f, 5.281971f, +0.197290f, 0.669489f, 0.003728f, 0.770965f, 0.615822f, 2.961561f, 0.676054f, 2.775031f, 0.173390f, 0.997210f, 0.251379f, 1.371391f, 0.774056f, 3.218537f, 1.035066f, 2.966364f, +0.659806f, 2.437349f, 0.013530f, 2.803680f, 2.187614f, 11.452490f, 2.606195f, 10.719290f, 0.629101f, 3.938657f, 0.989773f, 5.410553f, 1.452723f, 6.575552f, 2.108083f, 6.053650f, +0.077020f, 0.512926f, 0.002123f, 0.557750f, 0.142725f, 1.347031f, 0.228521f, 1.191842f, 0.001145f, 0.012925f, 0.002421f, 0.016784f, 0.241833f, 1.973396f, 0.471640f, 1.717410f, +0.157990f, 0.965230f, 0.004401f, 1.068085f, 0.414992f, 3.593097f, 0.671534f, 3.235188f, 0.182844f, 1.893247f, 0.390739f, 2.501872f, 0.442068f, 3.309314f, 0.871336f, 2.930808f, +0.045284f, 0.304632f, 0.001485f, 0.303198f, 0.163848f, 1.562058f, 0.312196f, 1.265039f, 0.047320f, 0.539513f, 0.119073f, 0.641263f, 0.229947f, 1.895415f, 0.533683f, 1.509836f, +0.221688f, 1.623431f, 0.007890f, 1.614002f, 0.852003f, 8.842192f, 1.761720f, 7.152958f, 0.251322f, 3.119228f, 0.686285f, 3.703395f, 0.631717f, 5.668412f, 1.591061f, 4.510306f, +0.320651f, 0.969034f, 0.005915f, 1.021294f, 0.422069f, 1.807661f, 0.452333f, 1.550188f, 0.003606f, 0.018471f, 0.005104f, 0.023248f, 0.798574f, 2.957121f, 1.042458f, 2.494338f, +0.789760f, 2.189533f, 0.014724f, 2.348296f, 1.473534f, 5.789538f, 1.596013f, 5.052438f, 0.691382f, 3.248627f, 0.988946f, 4.160871f, 1.752769f, 5.954271f, 2.312435f, 5.110983f, +0.227272f, 0.693790f, 0.004989f, 0.669276f, 0.584109f, 2.526992f, 0.744951f, 1.983522f, 0.179646f, 0.929451f, 0.302574f, 1.070747f, 0.915369f, 3.423944f, 1.421999f, 2.643495f, +0.602129f, 2.000946f, 0.014344f, 1.928111f, 1.643775f, 7.741337f, 2.275025f, 6.069713f, 0.516355f, 2.908176f, 0.943781f, 3.346572f, 1.360940f, 5.541573f, 2.294308f, 4.273701f, +0.125678f, 0.224900f, 0.001367f, 0.320657f, 0.169833f, 0.430707f, 0.107330f, 0.499676f, 0.001089f, 0.003304f, 0.000909f, 0.005625f, 0.344385f, 0.755133f, 0.265101f, 0.861686f, +0.231992f, 0.380851f, 0.002550f, 0.552581f, 0.444377f, 1.033858f, 0.283825f, 1.220555f, 0.156521f, 0.435493f, 0.132023f, 0.754579f, 0.566508f, 1.139556f, 0.440731f, 1.323278f, +0.001267f, 0.002289f, 0.000016f, 0.002988f, 0.003342f, 0.008561f, 0.002513f, 0.009091f, 0.000772f, 0.002364f, 0.000766f, 0.003684f, 0.005613f, 0.012432f, 0.005142f, 0.012985f, +0.195896f, 0.385475f, 0.002752f, 0.502495f, 0.549023f, 1.531054f, 0.448082f, 1.623985f, 0.129468f, 0.431777f, 0.139543f, 0.672168f, 0.487167f, 1.174620f, 0.484299f, 1.225485f, +0.131067f, 0.322922f, 0.001350f, 0.408646f, 0.167714f, 0.585599f, 0.100373f, 0.602984f, 0.001220f, 0.005096f, 0.000965f, 0.007701f, 0.328128f, 0.990593f, 0.239200f, 1.003275f, +0.339600f, 0.767576f, 0.003536f, 0.988464f, 0.615967f, 1.973054f, 0.372569f, 2.067447f, 0.246121f, 0.942820f, 0.196597f, 1.449945f, 0.757643f, 2.098296f, 0.558191f, 2.162624f, +0.070035f, 0.174299f, 0.000859f, 0.201888f, 0.174980f, 0.617157f, 0.124622f, 0.581658f, 0.045830f, 0.193309f, 0.043105f, 0.267394f, 0.283552f, 0.864693f, 0.245986f, 0.801590f, +0.250464f, 0.678563f, 0.003332f, 0.785101f, 0.664698f, 2.552091f, 0.513738f, 2.402630f, 0.177813f, 0.816460f, 0.181493f, 1.128113f, 0.569068f, 1.889106f, 0.535736f, 1.749306f, +0.001246f, 0.006087f, 0.000022f, 0.006658f, 0.001849f, 0.012795f, 0.001920f, 0.011387f, 0.000014f, 0.000114f, 0.000019f, 0.000149f, 0.004038f, 0.024167f, 0.005109f, 0.021154f, +0.002748f, 0.012311f, 0.000050f, 0.013703f, 0.005777f, 0.036684f, 0.006065f, 0.033222f, 0.002368f, 0.017980f, 0.003283f, 0.023899f, 0.007934f, 0.043558f, 0.010145f, 0.038801f, +0.000520f, 0.002564f, 0.000011f, 0.002567f, 0.001505f, 0.010525f, 0.001861f, 0.008574f, 0.000404f, 0.003382f, 0.000660f, 0.004043f, 0.002724f, 0.016466f, 0.004101f, 0.013192f, +0.002721f, 0.014614f, 0.000063f, 0.014614f, 0.008371f, 0.063713f, 0.011229f, 0.051841f, 0.002297f, 0.020908f, 0.004069f, 0.024968f, 0.008002f, 0.052657f, 0.013074f, 0.042143f, +0.337359f, 0.747724f, 0.004037f, 0.792643f, 0.355440f, 1.116459f, 0.247128f, 0.963019f, 0.002825f, 0.010612f, 0.002594f, 0.013435f, 0.867014f, 2.354631f, 0.734262f, 1.997715f, +0.893062f, 1.815852f, 0.010801f, 1.958873f, 1.333738f, 3.843232f, 0.937190f, 3.373479f, 0.582128f, 2.006056f, 0.540200f, 2.584352f, 2.045327f, 5.095763f, 1.750607f, 4.399562f, +0.169619f, 0.379752f, 0.002416f, 0.368469f, 0.348936f, 1.107131f, 0.288710f, 0.874091f, 0.099830f, 0.378802f, 0.109082f, 0.438931f, 0.704980f, 1.933970f, 0.710495f, 1.501848f, +0.480551f, 1.171192f, 0.007427f, 1.135138f, 1.050064f, 3.626871f, 0.942846f, 2.860281f, 0.306840f, 1.267439f, 0.363845f, 1.467003f, 1.120832f, 3.347165f, 1.225842f, 2.596406f, +0.292175f, 0.742495f, 0.004274f, 0.790442f, 0.336727f, 1.212707f, 0.286140f, 1.050480f, 0.002539f, 0.010934f, 0.002849f, 0.013900f, 0.698710f, 2.175682f, 0.723214f, 1.853729f, +0.821101f, 1.914245f, 0.012138f, 2.073783f, 1.341362f, 4.431740f, 1.151990f, 3.906573f, 0.555304f, 2.194106f, 0.629814f, 2.838616f, 1.749839f, 4.998578f, 1.830499f, 4.333980f, +0.004539f, 0.011651f, 0.000079f, 0.011353f, 0.010213f, 0.037156f, 0.010328f, 0.029459f, 0.002772f, 0.012058f, 0.003701f, 0.014031f, 0.017553f, 0.055212f, 0.021622f, 0.043058f, +0.659190f, 1.842049f, 0.012452f, 1.792926f, 1.575607f, 6.239734f, 1.729093f, 4.941774f, 0.436698f, 2.068227f, 0.632894f, 2.404043f, 1.430646f, 4.898586f, 1.912368f, 3.815982f, +0.510550f, 1.786328f, 0.007072f, 1.687861f, 0.557166f, 2.762708f, 0.448369f, 2.124056f, 0.004765f, 0.028256f, 0.005064f, 0.031884f, 1.115467f, 4.782199f, 1.093396f, 3.616409f, +2.013962f, 6.464340f, 0.028193f, 6.215688f, 3.115388f, 14.171400f, 2.533759f, 11.087490f, 1.463076f, 7.959136f, 1.571444f, 9.139321f, 3.921183f, 15.421900f, 3.884535f, 11.868000f, +0.420519f, 1.486226f, 0.006932f, 1.285363f, 0.896044f, 4.488038f, 0.858106f, 3.158299f, 0.275836f, 1.652253f, 0.348852f, 1.706477f, 1.485842f, 6.434575f, 1.733216f, 4.453848f, +1.412189f, 5.433201f, 0.025261f, 4.693707f, 3.196257f, 17.427390f, 3.321717f, 12.250340f, 1.004952f, 6.552912f, 1.379256f, 6.760479f, 2.800136f, 13.200490f, 3.544613f, 9.126930f, +0.171265f, 1.187905f, 0.004117f, 0.970100f, 0.216650f, 2.129606f, 0.302601f, 1.415109f, 0.001901f, 0.022342f, 0.003506f, 0.021789f, 0.484284f, 4.115865f, 0.823912f, 2.690113f, +0.574859f, 3.657832f, 0.013967f, 3.039823f, 1.030780f, 9.295144f, 1.455052f, 6.285448f, 0.496546f, 5.354863f, 0.925659f, 5.314418f, 1.448572f, 11.294070f, 2.490701f, 7.511896f, +0.110105f, 0.771433f, 0.003150f, 0.576632f, 0.271955f, 2.700312f, 0.452031f, 1.642366f, 0.085873f, 1.019701f, 0.188498f, 0.910241f, 0.503512f, 4.322614f, 1.019410f, 2.585957f, +0.541253f, 4.128124f, 0.016804f, 3.082282f, 1.420015f, 15.348750f, 2.561375f, 9.324990f, 0.457968f, 5.919895f, 1.090924f, 5.278573f, 1.388991f, 12.980760f, 3.051748f, 7.757006f, +0.758287f, 2.386719f, 0.012202f, 1.889134f, 0.681362f, 3.039300f, 0.636997f, 1.957449f, 0.006365f, 0.033955f, 0.007859f, 0.032096f, 1.700730f, 6.559202f, 1.936706f, 4.155151f, +3.056059f, 8.824279f, 0.049701f, 7.107718f, 3.892432f, 15.928180f, 3.677747f, 10.439330f, 1.996790f, 9.771830f, 2.491564f, 9.399609f, 6.108166f, 21.611070f, 7.029759f, 13.931620f, +0.587680f, 1.868467f, 0.011254f, 1.353668f, 1.031060f, 4.645747f, 1.147104f, 2.738660f, 0.346706f, 1.868237f, 0.509401f, 1.616374f, 2.131629f, 8.304312f, 2.888681f, 4.815098f, +1.563441f, 5.411145f, 0.032490f, 3.915933f, 2.913593f, 14.291050f, 3.517690f, 8.415227f, 1.000666f, 5.869790f, 1.595499f, 5.072845f, 3.182374f, 13.496050f, 4.680029f, 7.816768f, +0.112259f, 0.204428f, 0.001671f, 0.362752f, 0.146917f, 0.379158f, 0.127090f, 0.547451f, 0.001324f, 0.004087f, 0.001513f, 0.008662f, 0.337945f, 0.754075f, 0.356085f, 1.070923f, +0.283915f, 0.474307f, 0.004272f, 0.856481f, 0.526691f, 1.246963f, 0.460464f, 1.832180f, 0.260719f, 0.738190f, 0.301016f, 1.591878f, 0.761662f, 1.559123f, 0.811092f, 2.253272f, +0.001733f, 0.003188f, 0.000031f, 0.005178f, 0.004429f, 0.011546f, 0.004559f, 0.015259f, 0.001437f, 0.004480f, 0.001954f, 0.008690f, 0.008438f, 0.019019f, 0.010581f, 0.024723f, +0.206891f, 0.414287f, 0.003978f, 0.672134f, 0.561561f, 1.593619f, 0.627341f, 2.103750f, 0.186107f, 0.631608f, 0.274566f, 1.223727f, 0.565244f, 1.386895f, 0.769150f, 1.800827f, +0.174921f, 0.438564f, 0.002466f, 0.690719f, 0.216773f, 0.770238f, 0.177580f, 0.987071f, 0.002217f, 0.009420f, 0.002398f, 0.017717f, 0.481096f, 1.477991f, 0.480052f, 1.863008f, +0.620967f, 1.428272f, 0.008849f, 2.289122f, 1.090806f, 3.555635f, 0.903102f, 4.636932f, 0.612538f, 2.387819f, 0.669733f, 4.570274f, 1.521972f, 4.289403f, 1.534847f, 5.502110f, +0.143193f, 0.362653f, 0.002403f, 0.522787f, 0.346484f, 1.243599f, 0.337778f, 1.458713f, 0.127537f, 0.547433f, 0.164196f, 0.942428f, 0.636915f, 1.976506f, 0.756307f, 2.280376f, +0.395229f, 1.089636f, 0.007197f, 1.569042f, 1.015818f, 3.968952f, 1.074665f, 4.650339f, 0.381901f, 1.784468f, 0.533564f, 3.068632f, 0.986524f, 3.332637f, 1.271259f, 3.840746f, +0.046472f, 0.230978f, 0.001137f, 0.314412f, 0.066757f, 0.470226f, 0.094917f, 0.520822f, 0.000700f, 0.005899f, 0.001315f, 0.009589f, 0.165422f, 1.007448f, 0.286490f, 1.097551f, +0.140377f, 0.640070f, 0.003472f, 0.886636f, 0.285837f, 1.847047f, 0.410740f, 2.081857f, 0.164643f, 1.272333f, 0.312443f, 2.104754f, 0.445294f, 2.487864f, 0.779408f, 2.758155f, +0.029694f, 0.149081f, 0.000865f, 0.185744f, 0.083285f, 0.592591f, 0.140921f, 0.600764f, 0.031446f, 0.267574f, 0.070266f, 0.398126f, 0.170937f, 1.051579f, 0.352299f, 1.048600f, +0.119970f, 0.655686f, 0.003792f, 0.816034f, 0.357424f, 2.768432f, 0.656298f, 2.803509f, 0.137834f, 1.276749f, 0.334236f, 1.897584f, 0.387566f, 2.595465f, 0.866824f, 2.585248f, +0.530535f, 1.196606f, 0.008691f, 1.578720f, 0.541348f, 1.730378f, 0.515196f, 1.857594f, 0.006047f, 0.023116f, 0.007600f, 0.036420f, 1.497918f, 4.139738f, 1.736412f, 4.371209f, +1.924229f, 3.981473f, 0.031857f, 5.345491f, 2.783133f, 8.161094f, 2.676894f, 8.915545f, 1.707168f, 5.986721f, 2.168464f, 9.598775f, 4.841480f, 12.274760f, 5.672104f, 13.189590f, +0.408654f, 0.931041f, 0.007966f, 1.124317f, 0.814172f, 2.628797f, 0.922086f, 2.583048f, 0.327359f, 1.264050f, 0.489620f, 1.822918f, 1.865942f, 5.209062f, 2.574083f, 5.034472f, +0.893544f, 2.216117f, 0.018903f, 2.673201f, 1.890953f, 6.646383f, 2.324052f, 6.523490f, 0.776554f, 3.264185f, 1.260421f, 4.702152f, 2.289589f, 6.957966f, 3.427609f, 6.717317f, +0.396972f, 0.968610f, 0.006656f, 1.373016f, 0.540189f, 1.867938f, 0.526214f, 2.154492f, 0.003606f, 0.014912f, 0.004639f, 0.025244f, 0.854257f, 2.554030f, 1.013618f, 2.897525f, +0.779500f, 1.744840f, 0.013209f, 2.516934f, 1.503547f, 4.769620f, 1.480250f, 5.598295f, 0.551167f, 2.090971f, 0.716605f, 3.602031f, 1.494833f, 4.099962f, 1.792584f, 4.733373f, +0.004555f, 0.011226f, 0.000091f, 0.014565f, 0.012102f, 0.042270f, 0.014029f, 0.044626f, 0.002908f, 0.012147f, 0.004452f, 0.018821f, 0.015851f, 0.047871f, 0.022382f, 0.049709f, +0.601622f, 1.614183f, 0.013027f, 2.092012f, 1.697900f, 6.456081f, 2.135984f, 6.808261f, 0.416703f, 1.894882f, 0.692295f, 2.932760f, 1.174953f, 3.862758f, 1.800425f, 4.006670f, +0.815395f, 2.739239f, 0.012947f, 3.446321f, 1.050669f, 5.002131f, 0.969244f, 5.120786f, 0.007957f, 0.045301f, 0.009693f, 0.068064f, 1.603106f, 6.598902f, 1.801350f, 6.644649f, +2.247420f, 6.926210f, 0.036066f, 8.867703f, 4.104843f, 17.928150f, 3.827057f, 18.676980f, 1.706995f, 8.915995f, 2.101742f, 13.632270f, 3.937544f, 14.869100f, 4.471596f, 15.236130f, +0.496044f, 1.683289f, 0.009373f, 1.938430f, 1.248004f, 6.001804f, 1.370070f, 5.623788f, 0.340188f, 1.956513f, 0.493201f, 2.690649f, 1.577187f, 6.557963f, 2.109009f, 6.044146f, +1.515023f, 5.596554f, 0.031067f, 6.437709f, 4.048737f, 21.195740f, 4.823427f, 19.838780f, 1.127208f, 7.057187f, 1.773450f, 9.694495f, 2.703217f, 12.235740f, 3.922706f, 11.264590f, +0.212807f, 1.417222f, 0.005865f, 1.541072f, 0.317854f, 2.999900f, 0.508926f, 2.654286f, 0.002469f, 0.027868f, 0.005221f, 0.036188f, 0.541494f, 4.418680f, 1.056060f, 3.845496f, +0.499093f, 3.049177f, 0.013901f, 3.374097f, 1.056666f, 9.148849f, 1.709880f, 8.237530f, 0.450726f, 4.667019f, 0.963206f, 6.167334f, 1.131712f, 8.471977f, 2.230655f, 7.502987f, +0.101049f, 0.679766f, 0.003314f, 0.676566f, 0.294694f, 2.809484f, 0.561510f, 2.275271f, 0.082397f, 0.939434f, 0.207337f, 1.116607f, 0.415822f, 3.427543f, 0.965078f, 2.730287f, +0.451766f, 3.308301f, 0.016079f, 3.289085f, 1.399452f, 14.523690f, 2.893701f, 11.749050f, 0.399652f, 4.960195f, 1.091330f, 5.889138f, 1.043250f, 9.361116f, 2.627562f, 7.448558f, +1.122664f, 3.392782f, 0.020709f, 3.575755f, 1.191093f, 5.101285f, 1.276499f, 4.374687f, 0.009853f, 0.050465f, 0.013945f, 0.063517f, 2.265827f, 8.390367f, 2.957811f, 7.077293f, +3.161408f, 8.764692f, 0.058939f, 9.400221f, 4.754352f, 18.679920f, 5.149528f, 16.301670f, 2.159651f, 10.147660f, 3.089147f, 12.997210f, 5.685976f, 19.315630f, 7.501530f, 16.580010f, +0.642632f, 1.961756f, 0.014107f, 1.892441f, 1.331240f, 5.759260f, 1.697817f, 4.520638f, 0.396383f, 2.050804f, 0.667619f, 2.362568f, 2.097530f, 7.845826f, 3.258452f, 6.057460f, +1.554869f, 5.167019f, 0.037041f, 4.978936f, 3.421312f, 16.112630f, 4.735180f, 12.633350f, 1.040480f, 5.860111f, 1.901763f, 6.743499f, 2.847992f, 11.596660f, 4.801218f, 8.943424f, +0.015396f, 0.027551f, 0.000167f, 0.039281f, 0.018579f, 0.047117f, 0.011741f, 0.054662f, 0.000135f, 0.000410f, 0.000113f, 0.000698f, 0.046119f, 0.101125f, 0.035501f, 0.115394f, +0.029089f, 0.047754f, 0.000320f, 0.069287f, 0.049757f, 0.115762f, 0.031780f, 0.136666f, 0.019880f, 0.055313f, 0.016769f, 0.095842f, 0.077651f, 0.156199f, 0.060411f, 0.181382f, +0.000342f, 0.000618f, 0.000004f, 0.000806f, 0.000805f, 0.002063f, 0.000606f, 0.002190f, 0.000211f, 0.000646f, 0.000209f, 0.001007f, 0.001656f, 0.003667f, 0.001517f, 0.003830f, +0.045868f, 0.090257f, 0.000644f, 0.117656f, 0.114796f, 0.320129f, 0.093690f, 0.339560f, 0.030707f, 0.102409f, 0.033097f, 0.159425f, 0.124695f, 0.300656f, 0.123961f, 0.313675f, +0.015907f, 0.039192f, 0.000164f, 0.049596f, 0.018177f, 0.063467f, 0.010878f, 0.065352f, 0.000150f, 0.000626f, 0.000119f, 0.000947f, 0.043534f, 0.131426f, 0.031736f, 0.133109f, +0.042187f, 0.095352f, 0.000439f, 0.122791f, 0.068331f, 0.218875f, 0.041330f, 0.229347f, 0.030971f, 0.118640f, 0.024739f, 0.182454f, 0.102887f, 0.284946f, 0.075802f, 0.293681f, +0.018721f, 0.046592f, 0.000229f, 0.053967f, 0.041769f, 0.147320f, 0.029748f, 0.138846f, 0.012410f, 0.052344f, 0.011672f, 0.072404f, 0.082858f, 0.252677f, 0.071881f, 0.234237f, +0.058101f, 0.157408f, 0.000773f, 0.182122f, 0.137693f, 0.528669f, 0.106422f, 0.497708f, 0.041783f, 0.191852f, 0.042647f, 0.265085f, 0.144308f, 0.479051f, 0.135855f, 0.443599f, +0.000123f, 0.000603f, 0.000002f, 0.000659f, 0.000163f, 0.001131f, 0.000170f, 0.001007f, 0.000001f, 0.000011f, 0.000002f, 0.000015f, 0.000437f, 0.002615f, 0.000553f, 0.002289f, +0.000278f, 0.001247f, 0.000005f, 0.001388f, 0.000523f, 0.003319f, 0.000549f, 0.003006f, 0.000243f, 0.001846f, 0.000337f, 0.002453f, 0.000879f, 0.004825f, 0.001124f, 0.004298f, +0.000113f, 0.000559f, 0.000002f, 0.000560f, 0.000293f, 0.002049f, 0.000362f, 0.001669f, 0.000089f, 0.000747f, 0.000146f, 0.000893f, 0.000649f, 0.003925f, 0.000977f, 0.003144f, +0.000515f, 0.002765f, 0.000012f, 0.002765f, 0.001414f, 0.010765f, 0.001897f, 0.008759f, 0.000440f, 0.004007f, 0.000780f, 0.004785f, 0.001655f, 0.010892f, 0.002704f, 0.008717f, +0.044706f, 0.099087f, 0.000535f, 0.105040f, 0.042063f, 0.132121f, 0.029245f, 0.113963f, 0.000379f, 0.001425f, 0.000348f, 0.001803f, 0.125601f, 0.341106f, 0.106370f, 0.289401f, +0.121134f, 0.246301f, 0.001465f, 0.265700f, 0.161550f, 0.465515f, 0.113518f, 0.408616f, 0.079983f, 0.275629f, 0.074223f, 0.355085f, 0.303275f, 0.755585f, 0.259575f, 0.652355f, +0.049507f, 0.110839f, 0.000705f, 0.107546f, 0.090948f, 0.288565f, 0.075250f, 0.227825f, 0.029516f, 0.111996f, 0.032251f, 0.129774f, 0.224936f, 0.617067f, 0.226696f, 0.479191f, +0.121718f, 0.296649f, 0.001881f, 0.287517f, 0.237510f, 0.820349f, 0.213259f, 0.646957f, 0.078727f, 0.325191f, 0.093353f, 0.376393f, 0.310345f, 0.926790f, 0.339421f, 0.718913f, +0.012459f, 0.031661f, 0.000182f, 0.033706f, 0.012822f, 0.046179f, 0.010896f, 0.040001f, 0.000110f, 0.000472f, 0.000123f, 0.000600f, 0.032570f, 0.101419f, 0.033712f, 0.086411f, +0.035838f, 0.083549f, 0.000530f, 0.090512f, 0.052280f, 0.172730f, 0.044900f, 0.152261f, 0.024551f, 0.097005f, 0.027845f, 0.125500f, 0.083489f, 0.238494f, 0.087337f, 0.206784f, +0.000426f, 0.001094f, 0.000007f, 0.001066f, 0.000857f, 0.003116f, 0.000866f, 0.002471f, 0.000264f, 0.001147f, 0.000352f, 0.001335f, 0.001802f, 0.005669f, 0.002220f, 0.004421f, +0.053726f, 0.150132f, 0.001015f, 0.146128f, 0.114675f, 0.454139f, 0.125846f, 0.359671f, 0.036054f, 0.170752f, 0.052251f, 0.198477f, 0.127465f, 0.436446f, 0.170385f, 0.339990f, +0.021569f, 0.075465f, 0.000299f, 0.071306f, 0.021019f, 0.104225f, 0.016915f, 0.080132f, 0.000204f, 0.001209f, 0.000217f, 0.001364f, 0.051515f, 0.220853f, 0.050495f, 0.167014f, +0.087086f, 0.279524f, 0.001219f, 0.268772f, 0.120298f, 0.547216f, 0.097839f, 0.428134f, 0.064085f, 0.348623f, 0.068832f, 0.400317f, 0.185353f, 0.728990f, 0.183621f, 0.560998f, +0.039128f, 0.138289f, 0.000645f, 0.119599f, 0.074453f, 0.372916f, 0.071301f, 0.262426f, 0.025999f, 0.155731f, 0.032881f, 0.160842f, 0.151135f, 0.654503f, 0.176297f, 0.453030f, +0.114030f, 0.438713f, 0.002040f, 0.379001f, 0.230472f, 1.256632f, 0.239518f, 0.883332f, 0.082199f, 0.535987f, 0.112815f, 0.552965f, 0.247168f, 1.165208f, 0.312883f, 0.805634f, +0.005902f, 0.040934f, 0.000142f, 0.033428f, 0.006667f, 0.065532f, 0.009312f, 0.043545f, 0.000066f, 0.000780f, 0.000122f, 0.000761f, 0.018243f, 0.155042f, 0.031036f, 0.101335f, +0.020275f, 0.129012f, 0.000493f, 0.107215f, 0.032466f, 0.292762f, 0.045829f, 0.197968f, 0.017740f, 0.191316f, 0.033072f, 0.189871f, 0.055852f, 0.435459f, 0.096032f, 0.289632f, +0.008357f, 0.058548f, 0.000239f, 0.043764f, 0.018432f, 0.183013f, 0.030636f, 0.111311f, 0.006602f, 0.078394f, 0.014492f, 0.069979f, 0.041775f, 0.358634f, 0.084577f, 0.214549f, +0.035648f, 0.271888f, 0.001107f, 0.203007f, 0.083518f, 0.902739f, 0.150648f, 0.548450f, 0.030554f, 0.394955f, 0.072783f, 0.352168f, 0.100006f, 0.934601f, 0.219723f, 0.558497f, +0.034978f, 0.110095f, 0.000563f, 0.087142f, 0.028067f, 0.125196f, 0.026239f, 0.080632f, 0.000297f, 0.001587f, 0.000367f, 0.001500f, 0.085761f, 0.330754f, 0.097660f, 0.209528f, +0.144290f, 0.416632f, 0.002347f, 0.335586f, 0.164114f, 0.671570f, 0.155062f, 0.440147f, 0.095500f, 0.467353f, 0.119163f, 0.449551f, 0.315263f, 1.115420f, 0.362830f, 0.719058f, +0.059707f, 0.189831f, 0.001143f, 0.137529f, 0.093544f, 0.421491f, 0.104072f, 0.248468f, 0.035681f, 0.192269f, 0.052425f, 0.166349f, 0.236746f, 0.922304f, 0.320826f, 0.534781f, +0.137843f, 0.477081f, 0.002865f, 0.345254f, 0.229395f, 1.125172f, 0.276957f, 0.662553f, 0.089369f, 0.524230f, 0.142494f, 0.453055f, 0.306721f, 1.300765f, 0.451067f, 0.753389f, +0.000319f, 0.000582f, 0.000005f, 0.001032f, 0.000373f, 0.000963f, 0.000323f, 0.001391f, 0.000004f, 0.000012f, 0.000004f, 0.000025f, 0.001051f, 0.002345f, 0.001107f, 0.003331f, +0.000827f, 0.001381f, 0.000012f, 0.002494f, 0.001370f, 0.003243f, 0.001197f, 0.004764f, 0.000769f, 0.002177f, 0.000888f, 0.004696f, 0.002425f, 0.004963f, 0.002582f, 0.007173f, +0.000011f, 0.000020f, 0.000000f, 0.000032f, 0.000025f, 0.000065f, 0.000026f, 0.000085f, 0.000009f, 0.000028f, 0.000012f, 0.000055f, 0.000058f, 0.000130f, 0.000072f, 0.000169f, +0.001125f, 0.002253f, 0.000022f, 0.003655f, 0.002727f, 0.007738f, 0.003046f, 0.010216f, 0.001025f, 0.003479f, 0.001512f, 0.006741f, 0.003360f, 0.008244f, 0.004572f, 0.010705f, +0.000493f, 0.001236f, 0.000007f, 0.001947f, 0.000546f, 0.001939f, 0.000447f, 0.002484f, 0.000006f, 0.000027f, 0.000007f, 0.000051f, 0.001482f, 0.004554f, 0.001479f, 0.005740f, +0.001791f, 0.004121f, 0.000026f, 0.006604f, 0.002810f, 0.009160f, 0.002327f, 0.011946f, 0.001790f, 0.006978f, 0.001957f, 0.013356f, 0.004800f, 0.013528f, 0.004841f, 0.017352f, +0.000889f, 0.002251f, 0.000015f, 0.003245f, 0.001921f, 0.006894f, 0.001873f, 0.008087f, 0.000802f, 0.003443f, 0.001033f, 0.005926f, 0.004322f, 0.013413f, 0.005133f, 0.015476f, +0.002129f, 0.005870f, 0.000039f, 0.008453f, 0.004887f, 0.019094f, 0.005170f, 0.022372f, 0.002084f, 0.009738f, 0.002912f, 0.016746f, 0.005810f, 0.019627f, 0.007487f, 0.022619f, +0.000107f, 0.000531f, 0.000003f, 0.000723f, 0.000137f, 0.000965f, 0.000195f, 0.001069f, 0.000002f, 0.000014f, 0.000003f, 0.000022f, 0.000416f, 0.002532f, 0.000720f, 0.002758f, +0.000330f, 0.001506f, 0.000008f, 0.002086f, 0.000601f, 0.003881f, 0.000863f, 0.004375f, 0.000392f, 0.003033f, 0.000745f, 0.005017f, 0.001145f, 0.006400f, 0.002005f, 0.007095f, +0.000150f, 0.000755f, 0.000004f, 0.000941f, 0.000377f, 0.002680f, 0.000637f, 0.002717f, 0.000161f, 0.001372f, 0.000360f, 0.002042f, 0.000946f, 0.005821f, 0.001950f, 0.005804f, +0.000527f, 0.002881f, 0.000017f, 0.003586f, 0.001403f, 0.010864f, 0.002575f, 0.011001f, 0.000614f, 0.005683f, 0.001488f, 0.008447f, 0.001862f, 0.012468f, 0.004164f, 0.012419f, +0.001633f, 0.003683f, 0.000027f, 0.004859f, 0.001488f, 0.004756f, 0.001416f, 0.005105f, 0.000019f, 0.000072f, 0.000024f, 0.000114f, 0.005040f, 0.013928f, 0.005842f, 0.014706f, +0.006061f, 0.012542f, 0.000100f, 0.016839f, 0.007829f, 0.022957f, 0.007530f, 0.025080f, 0.005447f, 0.019103f, 0.006919f, 0.030629f, 0.016672f, 0.042269f, 0.019532f, 0.045419f, +0.002770f, 0.006311f, 0.000054f, 0.007621f, 0.004928f, 0.015912f, 0.005582f, 0.015636f, 0.002248f, 0.008679f, 0.003362f, 0.012517f, 0.013827f, 0.038599f, 0.019074f, 0.037305f, +0.005256f, 0.013036f, 0.000111f, 0.015725f, 0.009933f, 0.034913f, 0.012208f, 0.034268f, 0.004627f, 0.019450f, 0.007510f, 0.028018f, 0.014723f, 0.044743f, 0.022041f, 0.043195f, +0.030336f, 0.074020f, 0.000509f, 0.104924f, 0.036864f, 0.127472f, 0.035910f, 0.147027f, 0.000279f, 0.001154f, 0.000359f, 0.001954f, 0.071364f, 0.213361f, 0.084677f, 0.242056f, +0.060971f, 0.136478f, 0.001033f, 0.196870f, 0.105021f, 0.333153f, 0.103394f, 0.391035f, 0.043670f, 0.165673f, 0.056779f, 0.285398f, 0.127817f, 0.350572f, 0.153277f, 0.404732f, +0.000767f, 0.001889f, 0.000015f, 0.002452f, 0.001819f, 0.006353f, 0.002109f, 0.006707f, 0.000496f, 0.002071f, 0.000759f, 0.003209f, 0.002917f, 0.008808f, 0.004118f, 0.009146f, +0.087874f, 0.235771f, 0.001903f, 0.305564f, 0.221463f, 0.842090f, 0.278604f, 0.888026f, 0.061654f, 0.280360f, 0.102430f, 0.433921f, 0.187606f, 0.616772f, 0.287476f, 0.639750f, +0.061734f, 0.207388f, 0.000980f, 0.260921f, 0.071035f, 0.338189f, 0.065530f, 0.346211f, 0.000610f, 0.003474f, 0.000743f, 0.005220f, 0.132680f, 0.546153f, 0.149087f, 0.549939f, +0.174159f, 0.536732f, 0.002795f, 0.687184f, 0.284060f, 1.240648f, 0.264836f, 1.292468f, 0.133995f, 0.699886f, 0.164982f, 1.070103f, 0.333561f, 1.259607f, 0.378803f, 1.290700f, +0.082716f, 0.280691f, 0.001563f, 0.323236f, 0.185839f, 0.893723f, 0.204016f, 0.837433f, 0.057462f, 0.330482f, 0.083309f, 0.454488f, 0.287502f, 1.195440f, 0.384447f, 1.101777f, +0.219235f, 0.809864f, 0.004496f, 0.931586f, 0.523193f, 2.738996f, 0.623302f, 2.563644f, 0.165231f, 1.034472f, 0.259960f, 1.421060f, 0.427623f, 1.935577f, 0.620535f, 1.781951f, +0.013142f, 0.087519f, 0.000362f, 0.095168f, 0.017529f, 0.165434f, 0.028065f, 0.146375f, 0.000154f, 0.001743f, 0.000327f, 0.002264f, 0.036555f, 0.298296f, 0.071293f, 0.259602f, +0.031547f, 0.192734f, 0.000879f, 0.213271f, 0.059644f, 0.516408f, 0.096514f, 0.464968f, 0.028859f, 0.298820f, 0.061672f, 0.394883f, 0.078199f, 0.585394f, 0.154133f, 0.518439f, +0.013744f, 0.092458f, 0.000451f, 0.092022f, 0.035794f, 0.341241f, 0.068201f, 0.276355f, 0.011352f, 0.129433f, 0.028566f, 0.153843f, 0.061827f, 0.509630f, 0.143494f, 0.405957f, +0.053323f, 0.390490f, 0.001898f, 0.388221f, 0.147508f, 1.530850f, 0.305007f, 1.238393f, 0.047784f, 0.593061f, 0.130484f, 0.704129f, 0.134611f, 1.207872f, 0.339036f, 0.961093f, +0.092807f, 0.280471f, 0.001712f, 0.295597f, 0.087928f, 0.376585f, 0.094233f, 0.322947f, 0.000825f, 0.004226f, 0.001168f, 0.005319f, 0.204761f, 0.758232f, 0.267295f, 0.639570f, +0.267498f, 0.741613f, 0.004987f, 0.795387f, 0.359239f, 1.411454f, 0.389098f, 1.231754f, 0.185106f, 0.869765f, 0.264774f, 1.114003f, 0.525938f, 1.786645f, 0.693872f, 1.533607f, +0.117007f, 0.357185f, 0.002569f, 0.344565f, 0.216449f, 0.936411f, 0.276052f, 0.735021f, 0.073107f, 0.378241f, 0.123133f, 0.435741f, 0.417490f, 1.561623f, 0.648558f, 1.205669f, +0.245677f, 0.816414f, 0.005853f, 0.786696f, 0.482741f, 2.273464f, 0.668126f, 1.782544f, 0.166533f, 0.937933f, 0.304384f, 1.079323f, 0.491924f, 2.003051f, 0.829298f, 1.544767f, +0.036121f, 0.064639f, 0.000393f, 0.092161f, 0.059299f, 0.150387f, 0.037475f, 0.174468f, 0.000335f, 0.001015f, 0.000279f, 0.001728f, 0.096303f, 0.211163f, 0.074132f, 0.240959f, +0.056020f, 0.091965f, 0.000616f, 0.133433f, 0.130359f, 0.303285f, 0.083261f, 0.358052f, 0.040399f, 0.112403f, 0.034076f, 0.194760f, 0.133095f, 0.267727f, 0.103545f, 0.310890f, +0.000570f, 0.001031f, 0.000007f, 0.001345f, 0.001827f, 0.004681f, 0.001374f, 0.004971f, 0.000371f, 0.001137f, 0.000369f, 0.001772f, 0.002458f, 0.005444f, 0.002252f, 0.005686f, +0.090751f, 0.178575f, 0.001275f, 0.232786f, 0.308986f, 0.861664f, 0.252177f, 0.913964f, 0.064108f, 0.213802f, 0.069097f, 0.332836f, 0.219580f, 0.529434f, 0.218287f, 0.552360f, +0.042840f, 0.105549f, 0.000441f, 0.133569f, 0.066596f, 0.232530f, 0.039856f, 0.239434f, 0.000426f, 0.001780f, 0.000337f, 0.002690f, 0.104349f, 0.315022f, 0.076069f, 0.319055f, +0.093258f, 0.210785f, 0.000971f, 0.271444f, 0.205494f, 0.658233f, 0.124293f, 0.689724f, 0.072243f, 0.276742f, 0.057706f, 0.425597f, 0.202429f, 0.560628f, 0.149139f, 0.577815f, +0.035850f, 0.089221f, 0.000439f, 0.103344f, 0.108813f, 0.383788f, 0.077498f, 0.361712f, 0.025075f, 0.105768f, 0.023585f, 0.146303f, 0.141220f, 0.430649f, 0.122510f, 0.399222f, +0.131954f, 0.357493f, 0.001755f, 0.413621f, 0.425426f, 1.633410f, 0.328807f, 1.537751f, 0.100131f, 0.459769f, 0.102204f, 0.635269f, 0.291696f, 0.968328f, 0.274610f, 0.896668f, +0.000240f, 0.001174f, 0.000004f, 0.001284f, 0.000433f, 0.002998f, 0.000450f, 0.002668f, 0.000003f, 0.000024f, 0.000004f, 0.000031f, 0.000758f, 0.004534f, 0.000959f, 0.003969f, +0.000445f, 0.001995f, 0.000008f, 0.002220f, 0.001137f, 0.007220f, 0.001194f, 0.006539f, 0.000410f, 0.003114f, 0.000568f, 0.004139f, 0.001251f, 0.006866f, 0.001599f, 0.006116f, +0.000157f, 0.000774f, 0.000003f, 0.000775f, 0.000552f, 0.003862f, 0.000683f, 0.003146f, 0.000131f, 0.001092f, 0.000213f, 0.001305f, 0.000800f, 0.004838f, 0.001205f, 0.003876f, +0.000846f, 0.004543f, 0.000020f, 0.004542f, 0.003161f, 0.024059f, 0.004240f, 0.019576f, 0.000763f, 0.006946f, 0.001352f, 0.008295f, 0.002420f, 0.015925f, 0.003954f, 0.012745f, +0.117224f, 0.259817f, 0.001403f, 0.275425f, 0.150042f, 0.471292f, 0.104320f, 0.406520f, 0.001049f, 0.003942f, 0.000963f, 0.004990f, 0.293117f, 0.796044f, 0.248236f, 0.675379f, +0.260717f, 0.530112f, 0.003153f, 0.571865f, 0.473020f, 1.363031f, 0.332381f, 1.196429f, 0.181649f, 0.625976f, 0.168566f, 0.806429f, 0.580950f, 1.447389f, 0.497239f, 1.249641f, +0.092303f, 0.206653f, 0.001315f, 0.200513f, 0.230680f, 0.731918f, 0.190864f, 0.577856f, 0.058067f, 0.220334f, 0.063449f, 0.255309f, 0.373256f, 1.023953f, 0.376176f, 0.795163f, +0.269144f, 0.655954f, 0.004160f, 0.635762f, 0.714468f, 2.467740f, 0.641517f, 1.946148f, 0.183690f, 0.758753f, 0.217816f, 0.878221f, 0.610766f, 1.823943f, 0.667988f, 1.414838f, +0.040551f, 0.103050f, 0.000593f, 0.109704f, 0.056774f, 0.204471f, 0.048245f, 0.177118f, 0.000377f, 0.001622f, 0.000423f, 0.002062f, 0.094349f, 0.293791f, 0.097658f, 0.250316f, +0.095744f, 0.223210f, 0.001415f, 0.241813f, 0.190013f, 0.627786f, 0.163187f, 0.553392f, 0.069211f, 0.273464f, 0.078497f, 0.353793f, 0.198519f, 0.567088f, 0.207670f, 0.491690f, +0.000987f, 0.002532f, 0.000017f, 0.002468f, 0.002697f, 0.009811f, 0.002727f, 0.007779f, 0.000644f, 0.002801f, 0.000860f, 0.003260f, 0.003712f, 0.011676f, 0.004572f, 0.009106f, +0.147463f, 0.412074f, 0.002785f, 0.401085f, 0.428197f, 1.695749f, 0.469909f, 1.343007f, 0.104420f, 0.494538f, 0.151333f, 0.574835f, 0.311383f, 1.066187f, 0.416231f, 0.830556f, +0.080583f, 0.281947f, 0.001116f, 0.266405f, 0.106835f, 0.529740f, 0.085973f, 0.407280f, 0.000804f, 0.004767f, 0.000854f, 0.005379f, 0.171297f, 0.734382f, 0.167908f, 0.555357f, +0.267066f, 0.857218f, 0.003739f, 0.824245f, 0.501882f, 2.282978f, 0.408183f, 1.786169f, 0.207378f, 1.128134f, 0.222738f, 1.295415f, 0.505910f, 1.989729f, 0.501182f, 1.531206f, +0.103946f, 0.367372f, 0.001713f, 0.317722f, 0.269075f, 1.347721f, 0.257682f, 0.948411f, 0.072879f, 0.436542f, 0.092170f, 0.450868f, 0.357341f, 1.547498f, 0.416834f, 1.071139f, +0.359268f, 1.382233f, 0.006427f, 1.194102f, 0.987846f, 5.386165f, 1.026621f, 3.786130f, 0.273274f, 1.781917f, 0.375058f, 1.838360f, 0.693096f, 3.267415f, 0.877371f, 2.259119f, +0.015949f, 0.110620f, 0.000383f, 0.090338f, 0.024509f, 0.240920f, 0.034233f, 0.160090f, 0.000189f, 0.002224f, 0.000349f, 0.002169f, 0.043877f, 0.372908f, 0.074649f, 0.243731f, +0.044975f, 0.286179f, 0.001093f, 0.237827f, 0.097972f, 0.883469f, 0.138297f, 0.597409f, 0.041524f, 0.447806f, 0.077409f, 0.444423f, 0.110266f, 0.859712f, 0.189594f, 0.571810f, +0.016057f, 0.112504f, 0.000459f, 0.084094f, 0.048182f, 0.478414f, 0.080086f, 0.290978f, 0.013386f, 0.158953f, 0.029383f, 0.141890f, 0.071444f, 0.613342f, 0.144646f, 0.366926f, +0.081240f, 0.619619f, 0.002522f, 0.462641f, 0.258932f, 2.798767f, 0.467054f, 1.700364f, 0.073474f, 0.949759f, 0.175022f, 0.846868f, 0.202843f, 1.895660f, 0.445666f, 1.132804f, +0.127235f, 0.400475f, 0.002047f, 0.316983f, 0.138891f, 0.619540f, 0.129847f, 0.399012f, 0.001142f, 0.006090f, 0.001409f, 0.005756f, 0.277650f, 1.070813f, 0.316174f, 0.678343f, +0.430821f, 1.243984f, 0.007007f, 1.001995f, 0.666620f, 2.727868f, 0.629853f, 1.787846f, 0.300881f, 1.472444f, 0.375435f, 1.416357f, 0.837790f, 2.964152f, 0.964195f, 1.910847f, +0.154430f, 0.490993f, 0.002957f, 0.355715f, 0.329151f, 1.483088f, 0.366197f, 0.874278f, 0.097382f, 0.524746f, 0.143079f, 0.454003f, 0.544992f, 2.123157f, 0.738547f, 1.231072f, +0.422839f, 1.463467f, 0.008787f, 1.059081f, 0.957292f, 4.695476f, 1.155774f, 2.764912f, 0.289274f, 1.696851f, 0.461230f, 1.466469f, 0.837401f, 3.551314f, 1.231490f, 2.056883f, +0.028250f, 0.051445f, 0.000421f, 0.091287f, 0.044915f, 0.115916f, 0.038854f, 0.167366f, 0.000356f, 0.001099f, 0.000407f, 0.002330f, 0.082744f, 0.184630f, 0.087185f, 0.262209f, +0.060028f, 0.100281f, 0.000903f, 0.181084f, 0.135282f, 0.320285f, 0.118271f, 0.470600f, 0.058920f, 0.166823f, 0.068027f, 0.359748f, 0.156680f, 0.320723f, 0.166848f, 0.463515f, +0.000683f, 0.001256f, 0.000012f, 0.002041f, 0.002120f, 0.005528f, 0.002183f, 0.007306f, 0.000605f, 0.001887f, 0.000823f, 0.003661f, 0.003236f, 0.007293f, 0.004057f, 0.009480f, +0.083919f, 0.168043f, 0.001614f, 0.272631f, 0.276719f, 0.785282f, 0.309133f, 1.036658f, 0.080688f, 0.273839f, 0.119040f, 0.530556f, 0.223072f, 0.547333f, 0.303543f, 0.710690f, +0.050060f, 0.125512f, 0.000706f, 0.197676f, 0.075366f, 0.267792f, 0.061740f, 0.343180f, 0.000678f, 0.002881f, 0.000734f, 0.005420f, 0.133959f, 0.411540f, 0.133668f, 0.518746f, +0.149308f, 0.343419f, 0.002128f, 0.550405f, 0.318627f, 1.038609f, 0.263798f, 1.354459f, 0.157425f, 0.613680f, 0.172124f, 1.174581f, 0.356048f, 1.003457f, 0.359060f, 1.287156f, +0.064178f, 0.162539f, 0.001077f, 0.234311f, 0.188657f, 0.677126f, 0.183917f, 0.794254f, 0.061099f, 0.262256f, 0.078661f, 0.451485f, 0.277739f, 0.861895f, 0.329803f, 0.994404f, +0.182314f, 0.502635f, 0.003320f, 0.723779f, 0.569258f, 2.224176f, 0.602236f, 2.606021f, 0.188300f, 0.879848f, 0.263079f, 1.513017f, 0.442760f, 1.495713f, 0.570550f, 1.723756f, +0.007847f, 0.039000f, 0.000192f, 0.053088f, 0.013694f, 0.096455f, 0.019470f, 0.106834f, 0.000126f, 0.001065f, 0.000237f, 0.001731f, 0.027176f, 0.165504f, 0.047065f, 0.180307f, +0.019914f, 0.090800f, 0.000493f, 0.125778f, 0.049261f, 0.318317f, 0.070786f, 0.358783f, 0.024965f, 0.192925f, 0.047376f, 0.319145f, 0.061460f, 0.343380f, 0.107575f, 0.380686f, +0.007852f, 0.039422f, 0.000229f, 0.049117f, 0.026755f, 0.190366f, 0.045270f, 0.192992f, 0.008888f, 0.075629f, 0.019860f, 0.112528f, 0.043978f, 0.270548f, 0.090639f, 0.269782f, +0.032651f, 0.178449f, 0.001032f, 0.222088f, 0.118175f, 0.915321f, 0.216990f, 0.926918f, 0.040096f, 0.371408f, 0.097230f, 0.552009f, 0.102625f, 0.687261f, 0.229529f, 0.684555f, +0.161411f, 0.364058f, 0.002644f, 0.480314f, 0.200087f, 0.639562f, 0.190421f, 0.686582f, 0.001966f, 0.007517f, 0.002472f, 0.011844f, 0.443401f, 1.225410f, 0.513998f, 1.293928f, +0.491856f, 1.017713f, 0.008143f, 1.366372f, 0.864246f, 2.534262f, 0.831255f, 2.768542f, 0.466429f, 1.635679f, 0.592463f, 2.622556f, 1.204061f, 3.052692f, 1.410634f, 3.280208f, +0.194711f, 0.443613f, 0.003796f, 0.535704f, 0.471274f, 1.521649f, 0.533739f, 1.495168f, 0.166720f, 0.643765f, 0.249358f, 0.928390f, 0.865013f, 2.414815f, 1.193293f, 2.333879f, +0.438183f, 1.086757f, 0.009270f, 1.310905f, 1.126529f, 3.959558f, 1.384546f, 3.886346f, 0.407042f, 1.710968f, 0.660667f, 2.464699f, 1.092411f, 3.319790f, 1.635384f, 3.204972f, +0.080377f, 0.196119f, 0.001348f, 0.278000f, 0.132873f, 0.459467f, 0.129436f, 0.529952f, 0.000780f, 0.003227f, 0.001004f, 0.005463f, 0.168286f, 0.503135f, 0.199679f, 0.570802f, +0.132601f, 0.296816f, 0.002247f, 0.428157f, 0.310721f, 0.985682f, 0.305906f, 1.156935f, 0.100217f, 0.380195f, 0.130298f, 0.654947f, 0.247408f, 0.678578f, 0.296688f, 0.783413f, +0.001444f, 0.003560f, 0.000029f, 0.004619f, 0.004662f, 0.016283f, 0.005404f, 0.017191f, 0.000986f, 0.004117f, 0.001509f, 0.006379f, 0.004890f, 0.014769f, 0.006905f, 0.015336f, +0.196342f, 0.526796f, 0.004252f, 0.682737f, 0.673168f, 2.559648f, 0.846855f, 2.699277f, 0.145360f, 0.660996f, 0.241495f, 1.023042f, 0.373077f, 1.226523f, 0.571680f, 1.272219f, +0.187754f, 0.630741f, 0.002981f, 0.793555f, 0.293906f, 1.399260f, 0.271129f, 1.432451f, 0.001958f, 0.011150f, 0.002386f, 0.016752f, 0.359147f, 1.478366f, 0.403560f, 1.488614f, +0.434778f, 1.339920f, 0.006977f, 1.715514f, 0.964720f, 4.213475f, 0.899435f, 4.389465f, 0.352974f, 1.843658f, 0.434600f, 2.818894f, 0.741135f, 2.798701f, 0.841655f, 2.867784f, +0.178878f, 0.607010f, 0.003380f, 0.699016f, 0.546733f, 2.629304f, 0.600208f, 2.463701f, 0.131124f, 0.754132f, 0.190103f, 1.037103f, 0.553362f, 2.300885f, 0.739954f, 2.120611f, +0.562290f, 2.077122f, 0.011530f, 2.389311f, 1.825503f, 9.556782f, 2.174797f, 8.944952f, 0.447170f, 2.799629f, 0.703538f, 3.845864f, 0.976137f, 4.418351f, 1.416497f, 4.067667f, +0.028910f, 0.192533f, 0.000797f, 0.209359f, 0.052459f, 0.495104f, 0.083993f, 0.438064f, 0.000359f, 0.004047f, 0.000758f, 0.005255f, 0.071573f, 0.584049f, 0.139587f, 0.508287f, +0.056965f, 0.348026f, 0.001587f, 0.385112f, 0.146517f, 1.268579f, 0.237092f, 1.142216f, 0.054988f, 0.569372f, 0.117510f, 0.752409f, 0.125676f, 0.940812f, 0.247714f, 0.833206f, +0.021499f, 0.144625f, 0.000705f, 0.143944f, 0.076169f, 0.726159f, 0.145132f, 0.588083f, 0.018738f, 0.213638f, 0.047151f, 0.253928f, 0.086076f, 0.709505f, 0.199772f, 0.565172f, +0.098924f, 0.724423f, 0.003521f, 0.720216f, 0.372278f, 3.863547f, 0.769773f, 3.125445f, 0.093540f, 1.160951f, 0.255429f, 1.378373f, 0.222262f, 1.994362f, 0.559795f, 1.586896f, +0.274814f, 0.830511f, 0.005069f, 0.875301f, 0.354207f, 1.517019f, 0.379605f, 1.300943f, 0.002578f, 0.013204f, 0.003649f, 0.016619f, 0.539641f, 1.998293f, 0.704448f, 1.685565f, +0.650177f, 1.802552f, 0.012121f, 1.933255f, 1.187857f, 4.667109f, 1.286591f, 4.072912f, 0.474747f, 2.230717f, 0.679074f, 2.857122f, 1.137745f, 3.864994f, 1.501031f, 3.317605f, +0.246359f, 0.752057f, 0.005408f, 0.725484f, 0.619989f, 2.682217f, 0.790711f, 2.105363f, 0.162423f, 0.840344f, 0.273566f, 0.968094f, 0.782352f, 2.926394f, 1.215361f, 2.259356f, +0.613484f, 2.038682f, 0.014615f, 1.964473f, 1.639925f, 7.723205f, 2.269696f, 6.055497f, 0.438804f, 2.471398f, 0.802035f, 2.843952f, 1.093294f, 4.451752f, 1.843103f, 3.433223f, +0.007730f, 0.013833f, 0.000084f, 0.019722f, 0.012960f, 0.032866f, 0.008190f, 0.038129f, 0.000086f, 0.000260f, 0.000072f, 0.000443f, 0.026137f, 0.057311f, 0.020120f, 0.065398f, +0.015845f, 0.026012f, 0.000174f, 0.037742f, 0.037656f, 0.087607f, 0.024051f, 0.103428f, 0.013700f, 0.038118f, 0.011556f, 0.066046f, 0.047746f, 0.096043f, 0.037145f, 0.111527f, +0.000178f, 0.000322f, 0.000002f, 0.000420f, 0.000583f, 0.001495f, 0.000439f, 0.001587f, 0.000139f, 0.000426f, 0.000138f, 0.000664f, 0.000975f, 0.002159f, 0.000893f, 0.002255f, +0.028096f, 0.055287f, 0.000395f, 0.072070f, 0.097695f, 0.272439f, 0.079733f, 0.288976f, 0.023796f, 0.079360f, 0.025648f, 0.123544f, 0.086220f, 0.207887f, 0.085712f, 0.216889f, +0.011347f, 0.027956f, 0.000117f, 0.035377f, 0.018014f, 0.062897f, 0.010781f, 0.064765f, 0.000135f, 0.000565f, 0.000107f, 0.000854f, 0.035053f, 0.105823f, 0.025553f, 0.107177f, +0.032648f, 0.073793f, 0.000340f, 0.095028f, 0.073469f, 0.235334f, 0.044438f, 0.246593f, 0.030322f, 0.116156f, 0.024221f, 0.178633f, 0.089880f, 0.248922f, 0.066219f, 0.256553f, +0.013871f, 0.034522f, 0.000170f, 0.039986f, 0.042997f, 0.151651f, 0.030623f, 0.142928f, 0.011632f, 0.049064f, 0.010941f, 0.067868f, 0.069300f, 0.211330f, 0.060119f, 0.195908f, +0.050564f, 0.136988f, 0.000673f, 0.158496f, 0.166483f, 0.639208f, 0.128673f, 0.601773f, 0.046002f, 0.211225f, 0.046954f, 0.291853f, 0.141762f, 0.470601f, 0.133459f, 0.435775f, +0.000095f, 0.000463f, 0.000002f, 0.000506f, 0.000174f, 0.001207f, 0.000181f, 0.001074f, 0.000001f, 0.000011f, 0.000002f, 0.000015f, 0.000379f, 0.002268f, 0.000479f, 0.001985f, +0.000232f, 0.001040f, 0.000004f, 0.001157f, 0.000605f, 0.003844f, 0.000635f, 0.003481f, 0.000256f, 0.001946f, 0.000355f, 0.002587f, 0.000827f, 0.004539f, 0.001057f, 0.004044f, +0.000090f, 0.000446f, 0.000002f, 0.000447f, 0.000325f, 0.002272f, 0.000402f, 0.001851f, 0.000090f, 0.000754f, 0.000147f, 0.000901f, 0.000585f, 0.003535f, 0.000880f, 0.002832f, +0.000483f, 0.002592f, 0.000011f, 0.002592f, 0.001842f, 0.014019f, 0.002471f, 0.011407f, 0.000522f, 0.004752f, 0.000925f, 0.005674f, 0.001751f, 0.011523f, 0.002861f, 0.009223f, +0.030523f, 0.067651f, 0.000365f, 0.071715f, 0.039898f, 0.125323f, 0.027740f, 0.108099f, 0.000328f, 0.001230f, 0.000301f, 0.001558f, 0.096798f, 0.262883f, 0.081977f, 0.223035f, +0.089728f, 0.182443f, 0.001085f, 0.196813f, 0.166254f, 0.479069f, 0.116823f, 0.420513f, 0.074952f, 0.258291f, 0.069554f, 0.332750f, 0.253580f, 0.631773f, 0.217040f, 0.545458f, +0.035110f, 0.078605f, 0.000500f, 0.076270f, 0.089609f, 0.284318f, 0.074142f, 0.224471f, 0.026481f, 0.100481f, 0.028935f, 0.116431f, 0.180066f, 0.493975f, 0.181475f, 0.383602f, +0.101388f, 0.247101f, 0.001567f, 0.239494f, 0.274863f, 0.949364f, 0.246798f, 0.748703f, 0.082962f, 0.342684f, 0.098374f, 0.396641f, 0.291805f, 0.871422f, 0.319144f, 0.675965f, +0.008535f, 0.021691f, 0.000125f, 0.023092f, 0.012204f, 0.043954f, 0.010371f, 0.038074f, 0.000095f, 0.000409f, 0.000107f, 0.000520f, 0.025188f, 0.078430f, 0.026071f, 0.066824f, +0.026638f, 0.062100f, 0.000394f, 0.067276f, 0.053988f, 0.178372f, 0.046366f, 0.157234f, 0.023086f, 0.091217f, 0.026184f, 0.118011f, 0.070049f, 0.200101f, 0.073278f, 0.173496f, +0.000303f, 0.000779f, 0.000005f, 0.000759f, 0.000847f, 0.003081f, 0.000856f, 0.002443f, 0.000237f, 0.001033f, 0.000317f, 0.001202f, 0.001448f, 0.004553f, 0.001783f, 0.003551f, +0.044906f, 0.125487f, 0.000848f, 0.122140f, 0.133168f, 0.527373f, 0.146140f, 0.417671f, 0.038124f, 0.180557f, 0.055252f, 0.209874f, 0.120264f, 0.411787f, 0.160758f, 0.320781f, +0.020994f, 0.073453f, 0.000291f, 0.069404f, 0.028424f, 0.140942f, 0.022874f, 0.108360f, 0.000251f, 0.001489f, 0.000267f, 0.001680f, 0.056599f, 0.242652f, 0.055480f, 0.183499f, +0.091963f, 0.295181f, 0.001287f, 0.283827f, 0.176494f, 0.802843f, 0.143543f, 0.628133f, 0.085615f, 0.465746f, 0.091956f, 0.534807f, 0.220946f, 0.868974f, 0.218881f, 0.668723f, +0.039560f, 0.139815f, 0.000652f, 0.120919f, 0.104581f, 0.523815f, 0.100153f, 0.368617f, 0.033254f, 0.199188f, 0.042056f, 0.205725f, 0.172482f, 0.746951f, 0.201199f, 0.517020f, +0.135412f, 0.520978f, 0.002422f, 0.450070f, 0.380242f, 2.073243f, 0.395167f, 1.457357f, 0.123489f, 0.805225f, 0.169484f, 0.830731f, 0.331320f, 1.561919f, 0.419409f, 1.079924f, +0.006187f, 0.042911f, 0.000149f, 0.035043f, 0.009709f, 0.095441f, 0.013561f, 0.063420f, 0.000088f, 0.001034f, 0.000162f, 0.001009f, 0.021587f, 0.183463f, 0.036726f, 0.119911f, +0.023060f, 0.146730f, 0.000560f, 0.121939f, 0.051300f, 0.462601f, 0.072415f, 0.312814f, 0.025526f, 0.275273f, 0.047585f, 0.273194f, 0.071704f, 0.559051f, 0.123288f, 0.371835f, +0.009099f, 0.063753f, 0.000260f, 0.047654f, 0.027884f, 0.276865f, 0.046347f, 0.168393f, 0.009094f, 0.107992f, 0.019963f, 0.096400f, 0.051347f, 0.440809f, 0.103957f, 0.263709f, +0.045593f, 0.347735f, 0.001416f, 0.259638f, 0.148403f, 1.604069f, 0.267684f, 0.974537f, 0.049437f, 0.639042f, 0.117763f, 0.569812f, 0.144378f, 1.349275f, 0.317212f, 0.806296f, +0.032587f, 0.102567f, 0.000524f, 0.081184f, 0.036328f, 0.162044f, 0.033962f, 0.104364f, 0.000351f, 0.001870f, 0.000433f, 0.001768f, 0.090187f, 0.347826f, 0.102701f, 0.220342f, +0.145841f, 0.421113f, 0.002372f, 0.339195f, 0.230459f, 0.943060f, 0.217748f, 0.618082f, 0.122116f, 0.597605f, 0.152374f, 0.574842f, 0.359695f, 1.272624f, 0.413966f, 0.820400f, +0.057778f, 0.183699f, 0.001106f, 0.133087f, 0.125765f, 0.566673f, 0.139920f, 0.334053f, 0.043682f, 0.235382f, 0.064180f, 0.203650f, 0.258606f, 1.007468f, 0.350451f, 0.584161f, +0.156675f, 0.542260f, 0.003256f, 0.392423f, 0.362245f, 1.776794f, 0.437351f, 1.046258f, 0.128507f, 0.753808f, 0.204897f, 0.651463f, 0.393527f, 1.668900f, 0.578725f, 0.966609f, +0.004425f, 0.008059f, 0.000066f, 0.014300f, 0.007185f, 0.018543f, 0.006216f, 0.026774f, 0.000067f, 0.000206f, 0.000076f, 0.000438f, 0.016439f, 0.036680f, 0.017321f, 0.052093f, +0.012428f, 0.020763f, 0.000187f, 0.037493f, 0.028605f, 0.067723f, 0.025008f, 0.099506f, 0.014626f, 0.041411f, 0.016886f, 0.089301f, 0.041143f, 0.084219f, 0.043813f, 0.121715f, +0.000156f, 0.000288f, 0.000003f, 0.000467f, 0.000496f, 0.001292f, 0.000510f, 0.001707f, 0.000166f, 0.000518f, 0.000226f, 0.001004f, 0.000939f, 0.002117f, 0.001177f, 0.002751f, +0.019018f, 0.038083f, 0.000366f, 0.061785f, 0.064044f, 0.181746f, 0.071546f, 0.239925f, 0.021923f, 0.074404f, 0.032344f, 0.144155f, 0.064116f, 0.157317f, 0.087246f, 0.204270f, +0.009706f, 0.024334f, 0.000137f, 0.038325f, 0.014922f, 0.053022f, 0.012224f, 0.067949f, 0.000158f, 0.000670f, 0.000171f, 0.001260f, 0.032939f, 0.101195f, 0.032868f, 0.127556f, +0.038262f, 0.088004f, 0.000545f, 0.141047f, 0.083386f, 0.271810f, 0.069037f, 0.354469f, 0.048367f, 0.188545f, 0.052883f, 0.360874f, 0.115719f, 0.326133f, 0.116698f, 0.418339f, +0.018177f, 0.046035f, 0.000305f, 0.066362f, 0.054568f, 0.195854f, 0.053197f, 0.229732f, 0.020747f, 0.089053f, 0.026710f, 0.153308f, 0.099766f, 0.309599f, 0.118468f, 0.357198f, +0.051138f, 0.140986f, 0.000931f, 0.203015f, 0.163066f, 0.637123f, 0.172513f, 0.746504f, 0.063323f, 0.295884f, 0.088471f, 0.508812f, 0.157509f, 0.532091f, 0.202970f, 0.613217f, +0.002265f, 0.011259f, 0.000055f, 0.015325f, 0.004037f, 0.028436f, 0.005740f, 0.031496f, 0.000044f, 0.000368f, 0.000082f, 0.000599f, 0.009950f, 0.060595f, 0.017232f, 0.066015f, +0.007598f, 0.034646f, 0.000188f, 0.047992f, 0.019195f, 0.124039f, 0.027583f, 0.139808f, 0.011421f, 0.088256f, 0.021673f, 0.145998f, 0.029743f, 0.166172f, 0.052059f, 0.184225f, +0.003311f, 0.016625f, 0.000096f, 0.020713f, 0.011523f, 0.081986f, 0.019497f, 0.083117f, 0.004494f, 0.038238f, 0.010041f, 0.056894f, 0.023522f, 0.144703f, 0.048478f, 0.144292f, +0.013636f, 0.074528f, 0.000431f, 0.092754f, 0.050404f, 0.390403f, 0.092551f, 0.395350f, 0.020077f, 0.185973f, 0.048685f, 0.276405f, 0.054360f, 0.364037f, 0.121580f, 0.362604f, +0.030764f, 0.069388f, 0.000504f, 0.091546f, 0.038946f, 0.124489f, 0.037065f, 0.133641f, 0.000449f, 0.001718f, 0.000565f, 0.002706f, 0.107184f, 0.296219f, 0.124249f, 0.312782f, +0.123910f, 0.256385f, 0.002051f, 0.344220f, 0.222350f, 0.652005f, 0.213862f, 0.712280f, 0.140878f, 0.494034f, 0.178945f, 0.792107f, 0.384708f, 0.975363f, 0.450711f, 1.048057f, +0.054214f, 0.123516f, 0.001057f, 0.149156f, 0.134005f, 0.432677f, 0.151767f, 0.425147f, 0.055654f, 0.214900f, 0.083240f, 0.309912f, 0.305461f, 0.852740f, 0.421386f, 0.824159f, +0.120827f, 0.299669f, 0.002556f, 0.361477f, 0.317237f, 1.115034f, 0.389896f, 1.094417f, 0.134567f, 0.565643f, 0.218416f, 0.814825f, 0.382042f, 1.161009f, 0.571932f, 1.120854f, +0.022491f, 0.054877f, 0.000377f, 0.077789f, 0.037970f, 0.131298f, 0.036988f, 0.151440f, 0.000262f, 0.001083f, 0.000337f, 0.001833f, 0.059722f, 0.178556f, 0.070863f, 0.202570f, +0.049042f, 0.109777f, 0.000831f, 0.158353f, 0.117362f, 0.372300f, 0.115543f, 0.436984f, 0.044438f, 0.168587f, 0.057777f, 0.290417f, 0.116052f, 0.318302f, 0.139168f, 0.367478f, +0.000590f, 0.001455f, 0.000012f, 0.001888f, 0.001946f, 0.006798f, 0.002256f, 0.007176f, 0.000483f, 0.002018f, 0.000739f, 0.003126f, 0.002535f, 0.007657f, 0.003580f, 0.007951f, +0.079484f, 0.213259f, 0.001721f, 0.276388f, 0.278305f, 1.058225f, 0.350112f, 1.115952f, 0.070551f, 0.320816f, 0.117210f, 0.496536f, 0.191549f, 0.629734f, 0.293518f, 0.653196f, +0.065024f, 0.218443f, 0.001032f, 0.274830f, 0.103951f, 0.494901f, 0.095895f, 0.506640f, 0.000813f, 0.004630f, 0.000991f, 0.006956f, 0.157752f, 0.649360f, 0.177260f, 0.653861f, +0.199024f, 0.613363f, 0.003194f, 0.785295f, 0.450996f, 1.969751f, 0.420476f, 2.052025f, 0.193719f, 1.011838f, 0.238518f, 1.547068f, 0.430281f, 1.624842f, 0.488640f, 1.664950f, +0.090499f, 0.307103f, 0.001710f, 0.353651f, 0.282485f, 1.358505f, 0.310115f, 1.272941f, 0.079536f, 0.457433f, 0.115310f, 0.629074f, 0.355069f, 1.476383f, 0.474798f, 1.360709f, +0.281735f, 1.040740f, 0.005777f, 1.197162f, 0.934104f, 4.890174f, 1.112836f, 4.577102f, 0.268624f, 1.681793f, 0.422630f, 2.310288f, 0.620308f, 2.807738f, 0.900144f, 2.584888f, +0.014908f, 0.099284f, 0.000411f, 0.107960f, 0.027626f, 0.260736f, 0.044233f, 0.230697f, 0.000222f, 0.002502f, 0.000469f, 0.003249f, 0.046810f, 0.381978f, 0.091292f, 0.332428f, +0.038827f, 0.237212f, 0.001081f, 0.262489f, 0.101987f, 0.883028f, 0.165034f, 0.795069f, 0.044935f, 0.465278f, 0.096027f, 0.614852f, 0.108641f, 0.813286f, 0.214137f, 0.720266f, +0.016195f, 0.108947f, 0.000531f, 0.108435f, 0.058598f, 0.558648f, 0.111653f, 0.452423f, 0.016923f, 0.192949f, 0.042585f, 0.229339f, 0.082237f, 0.677868f, 0.190864f, 0.539971f, +0.073802f, 0.540454f, 0.002627f, 0.537315f, 0.283639f, 2.943639f, 0.586491f, 2.381279f, 0.083667f, 1.038417f, 0.228470f, 1.232891f, 0.210304f, 1.887061f, 0.529677f, 1.501518f, +0.093565f, 0.282761f, 0.001726f, 0.298010f, 0.123158f, 0.527470f, 0.131989f, 0.452340f, 0.001052f, 0.005390f, 0.001489f, 0.006784f, 0.233021f, 0.862878f, 0.304186f, 0.727840f, +0.292588f, 0.811173f, 0.005455f, 0.869991f, 0.545911f, 2.144894f, 0.591287f, 1.871815f, 0.256141f, 1.203544f, 0.366382f, 1.541509f, 0.649362f, 2.205924f, 0.856705f, 1.893505f, +0.122530f, 0.374046f, 0.002690f, 0.360830f, 0.314913f, 1.362389f, 0.401629f, 1.069385f, 0.096853f, 0.501099f, 0.163128f, 0.577277f, 0.493507f, 1.845967f, 0.766649f, 1.425200f, +0.302183f, 1.004192f, 0.007199f, 0.967638f, 0.824942f, 3.885054f, 1.141740f, 3.046136f, 0.259137f, 1.459492f, 0.473644f, 1.679505f, 0.682999f, 2.781085f, 1.151417f, 2.144792f, +0.050497f, 0.090365f, 0.000549f, 0.128840f, 0.068239f, 0.173058f, 0.043125f, 0.200769f, 0.000438f, 0.001327f, 0.000365f, 0.002260f, 0.138373f, 0.303412f, 0.106517f, 0.346225f, +0.118349f, 0.194288f, 0.001301f, 0.281894f, 0.226695f, 0.527414f, 0.144791f, 0.622655f, 0.079848f, 0.222163f, 0.067351f, 0.384942f, 0.288999f, 0.581334f, 0.224835f, 0.675058f, +0.000940f, 0.001700f, 0.000012f, 0.002218f, 0.002481f, 0.006356f, 0.001866f, 0.006749f, 0.000573f, 0.001755f, 0.000569f, 0.002735f, 0.004167f, 0.009229f, 0.003817f, 0.009640f, +0.135374f, 0.266383f, 0.001902f, 0.347249f, 0.379403f, 1.058034f, 0.309647f, 1.122254f, 0.089469f, 0.298379f, 0.096431f, 0.464502f, 0.336657f, 0.811721f, 0.334675f, 0.846871f, +0.053945f, 0.132910f, 0.000556f, 0.168193f, 0.069028f, 0.241024f, 0.041312f, 0.248179f, 0.000502f, 0.002097f, 0.000397f, 0.003170f, 0.135053f, 0.407714f, 0.098451f, 0.412933f, +0.177463f, 0.401109f, 0.001848f, 0.516538f, 0.321883f, 1.031051f, 0.194692f, 1.080378f, 0.128614f, 0.492686f, 0.102735f, 0.757692f, 0.395919f, 1.096498f, 0.291692f, 1.130114f, +0.053259f, 0.132548f, 0.000653f, 0.153528f, 0.133065f, 0.469324f, 0.094770f, 0.442328f, 0.034852f, 0.147004f, 0.032780f, 0.203342f, 0.215630f, 0.657564f, 0.187062f, 0.609577f, +0.177299f, 0.480342f, 0.002359f, 0.555758f, 0.470527f, 1.806577f, 0.363666f, 1.700776f, 0.125871f, 0.577956f, 0.128476f, 0.798570f, 0.402833f, 1.337263f, 0.379237f, 1.238301f, +0.000400f, 0.001953f, 0.000007f, 0.002136f, 0.000593f, 0.004106f, 0.000616f, 0.003654f, 0.000004f, 0.000037f, 0.000006f, 0.000048f, 0.001296f, 0.007755f, 0.001640f, 0.006788f, +0.001119f, 0.005016f, 0.000020f, 0.005583f, 0.002354f, 0.014946f, 0.002471f, 0.013535f, 0.000965f, 0.007326f, 0.001337f, 0.009737f, 0.003232f, 0.017746f, 0.004133f, 0.015808f, +0.000308f, 0.001520f, 0.000007f, 0.001522f, 0.000893f, 0.006240f, 0.001103f, 0.005083f, 0.000240f, 0.002005f, 0.000391f, 0.002397f, 0.001615f, 0.009762f, 0.002431f, 0.007822f, +0.001502f, 0.008066f, 0.000035f, 0.008065f, 0.004620f, 0.035163f, 0.006197f, 0.028611f, 0.001268f, 0.011539f, 0.002246f, 0.013780f, 0.004416f, 0.029061f, 0.007216f, 0.023259f, +0.163196f, 0.361708f, 0.001953f, 0.383437f, 0.171942f, 0.540082f, 0.119547f, 0.465855f, 0.001367f, 0.005134f, 0.001255f, 0.006499f, 0.419414f, 1.139041f, 0.355195f, 0.966385f, +0.548504f, 1.115266f, 0.006634f, 1.203107f, 0.819159f, 2.360449f, 0.575606f, 2.071934f, 0.357533f, 1.232086f, 0.331782f, 1.587265f, 1.256205f, 3.129732f, 1.075194f, 2.702137f, +0.151603f, 0.339417f, 0.002159f, 0.329332f, 0.311874f, 0.989538f, 0.258045f, 0.781250f, 0.089227f, 0.338568f, 0.097496f, 0.392311f, 0.630101f, 1.728554f, 0.635030f, 1.342330f, +0.399812f, 0.974418f, 0.006179f, 0.944422f, 0.873641f, 3.017514f, 0.784437f, 2.379720f, 0.255288f, 1.054494f, 0.302715f, 1.220529f, 0.932519f, 2.784802f, 1.019886f, 2.160178f, +0.085291f, 0.216747f, 0.001248f, 0.230743f, 0.098296f, 0.354009f, 0.083529f, 0.306653f, 0.000741f, 0.003192f, 0.000832f, 0.004058f, 0.203965f, 0.635118f, 0.211118f, 0.541134f, +0.304325f, 0.709476f, 0.004499f, 0.768606f, 0.497149f, 1.642535f, 0.426962f, 1.447892f, 0.205812f, 0.813201f, 0.233428f, 1.052075f, 0.648542f, 1.852622f, 0.678437f, 1.606302f, +0.002448f, 0.006284f, 0.000043f, 0.006123f, 0.005509f, 0.020040f, 0.005571f, 0.015889f, 0.001495f, 0.006504f, 0.001996f, 0.007568f, 0.009468f, 0.029779f, 0.011662f, 0.023224f, +0.330956f, 0.924827f, 0.006252f, 0.900164f, 0.791056f, 3.132748f, 0.868116f, 2.481088f, 0.219251f, 1.038383f, 0.317753f, 1.206984f, 0.718276f, 2.459405f, 0.960132f, 1.915869f, +0.152668f, 0.534160f, 0.002115f, 0.504715f, 0.166607f, 0.826123f, 0.134074f, 0.635149f, 0.001425f, 0.008449f, 0.001514f, 0.009534f, 0.333554f, 1.430005f, 0.326954f, 1.081403f, +0.764614f, 2.454231f, 0.010704f, 2.359829f, 1.182778f, 5.380267f, 0.961959f, 4.209443f, 0.555467f, 3.021740f, 0.596610f, 3.469806f, 1.488704f, 5.855030f, 1.474790f, 4.505767f, +0.232333f, 0.821129f, 0.003830f, 0.710154f, 0.495058f, 2.479609f, 0.474097f, 1.744937f, 0.152397f, 0.912858f, 0.192738f, 0.942816f, 0.820917f, 3.555056f, 0.957589f, 2.460719f, +0.726279f, 2.794256f, 0.012992f, 2.413940f, 1.643812f, 8.962779f, 1.708335f, 6.300261f, 0.516840f, 3.370115f, 0.709342f, 3.476865f, 1.440090f, 6.788916f, 1.822969f, 4.693914f, +0.039928f, 0.276944f, 0.000960f, 0.226166f, 0.050509f, 0.496489f, 0.070547f, 0.329913f, 0.000443f, 0.005209f, 0.000817f, 0.005080f, 0.112904f, 0.959558f, 0.192084f, 0.627163f, +0.170158f, 1.082718f, 0.004134f, 0.899787f, 0.305111f, 2.751361f, 0.430695f, 1.860491f, 0.146978f, 1.585038f, 0.273995f, 1.573067f, 0.428777f, 3.343043f, 0.737247f, 2.223519f, +0.047428f, 0.332296f, 0.001357f, 0.248385f, 0.117145f, 1.163163f, 0.194713f, 0.707452f, 0.036990f, 0.439238f, 0.081196f, 0.392087f, 0.216888f, 1.861972f, 0.439113f, 1.113905f, +0.217025f, 1.655249f, 0.006738f, 1.235899f, 0.569382f, 6.154371f, 1.027032f, 3.739030f, 0.183631f, 2.373693f, 0.437426f, 2.116543f, 0.556942f, 5.204878f, 1.223656f, 3.110317f, +0.266502f, 0.838818f, 0.004288f, 0.663941f, 0.239466f, 1.068169f, 0.223874f, 0.687950f, 0.002237f, 0.011934f, 0.002762f, 0.011280f, 0.597726f, 2.305247f, 0.680660f, 1.460338f, +1.363671f, 3.937560f, 0.022178f, 3.171598f, 1.736877f, 7.107454f, 1.641080f, 4.658227f, 0.891005f, 4.360375f, 1.111783f, 4.194283f, 2.725579f, 9.643265f, 3.136811f, 6.216552f, +0.381614f, 1.213301f, 0.007308f, 0.879014f, 0.669525f, 3.016747f, 0.744880f, 1.778367f, 0.225136f, 1.213152f, 0.330783f, 1.049603f, 1.384188f, 5.392460f, 1.875784f, 3.126716f, +0.945037f, 3.270817f, 0.019639f, 2.367022f, 1.761149f, 8.638358f, 2.126300f, 5.086663f, 0.604862f, 3.548050f, 0.964414f, 3.066329f, 1.923615f, 8.157813f, 2.828887f, 4.724918f, +0.050212f, 0.091438f, 0.000748f, 0.162254f, 0.065714f, 0.169592f, 0.056845f, 0.244867f, 0.000592f, 0.001828f, 0.000677f, 0.003874f, 0.151158f, 0.337287f, 0.159272f, 0.479008f, +0.161234f, 0.269355f, 0.002426f, 0.486390f, 0.299104f, 0.708141f, 0.261494f, 1.040482f, 0.148060f, 0.419212f, 0.170945f, 0.904016f, 0.432542f, 0.885414f, 0.460614f, 1.279617f, +0.001432f, 0.002635f, 0.000025f, 0.004279f, 0.003660f, 0.009542f, 0.003768f, 0.012610f, 0.001188f, 0.003703f, 0.001615f, 0.007182f, 0.006973f, 0.015718f, 0.008744f, 0.020431f, +0.159158f, 0.318704f, 0.003060f, 0.517061f, 0.431999f, 1.225942f, 0.482602f, 1.618377f, 0.143168f, 0.485884f, 0.211219f, 0.941391f, 0.434832f, 1.066913f, 0.591694f, 1.385344f, +0.080145f, 0.200941f, 0.001130f, 0.316474f, 0.099321f, 0.352907f, 0.081363f, 0.452256f, 0.001016f, 0.004316f, 0.001099f, 0.008117f, 0.220428f, 0.677186f, 0.219950f, 0.853592f, +0.361232f, 0.830861f, 0.005148f, 1.331638f, 0.634548f, 2.068400f, 0.525357f, 2.697417f, 0.356328f, 1.389052f, 0.389600f, 2.658640f, 0.885369f, 2.495250f, 0.892858f, 3.200712f, +0.121220f, 0.307004f, 0.002034f, 0.442565f, 0.293316f, 1.052769f, 0.285946f, 1.234874f, 0.107967f, 0.463430f, 0.139000f, 0.797812f, 0.539180f, 1.673211f, 0.640251f, 1.930453f, +0.311448f, 0.858654f, 0.005671f, 1.236435f, 0.800484f, 3.127608f, 0.846856f, 3.664554f, 0.300945f, 1.406194f, 0.420458f, 2.418139f, 0.777400f, 2.626180f, 1.001775f, 3.026579f, +0.016601f, 0.082510f, 0.000406f, 0.112314f, 0.023847f, 0.167974f, 0.033906f, 0.186048f, 0.000250f, 0.002107f, 0.000470f, 0.003425f, 0.059092f, 0.359881f, 0.102340f, 0.392068f, +0.063667f, 0.290299f, 0.001575f, 0.402126f, 0.129639f, 0.837713f, 0.186288f, 0.944210f, 0.074672f, 0.577057f, 0.141706f, 0.954594f, 0.201960f, 1.128351f, 0.353494f, 1.250939f, +0.019598f, 0.098395f, 0.000571f, 0.122594f, 0.054969f, 0.391118f, 0.093010f, 0.396512f, 0.020755f, 0.176603f, 0.046377f, 0.262769f, 0.112821f, 0.694056f, 0.232522f, 0.692090f, +0.073707f, 0.402840f, 0.002330f, 0.501354f, 0.219594f, 1.700867f, 0.403216f, 1.722418f, 0.084683f, 0.784408f, 0.205348f, 1.165837f, 0.238112f, 1.594600f, 0.532559f, 1.588323f, +0.285698f, 0.644382f, 0.004680f, 0.850155f, 0.291520f, 0.931823f, 0.277437f, 1.000330f, 0.003256f, 0.012448f, 0.004093f, 0.019613f, 0.806642f, 2.229284f, 0.935073f, 2.353934f, +1.315621f, 2.722186f, 0.021781f, 3.654783f, 1.902865f, 5.579848f, 1.830228f, 6.095677f, 1.167213f, 4.093201f, 1.482608f, 6.562810f, 3.310184f, 8.392414f, 3.878093f, 9.017897f, +0.406598f, 0.926357f, 0.007926f, 1.118660f, 0.810075f, 2.615570f, 0.917447f, 2.570052f, 0.325712f, 1.257690f, 0.487157f, 1.813746f, 1.856554f, 5.182854f, 2.561132f, 5.009141f, +0.827578f, 2.052513f, 0.017507f, 2.475853f, 1.751354f, 6.155715f, 2.152480f, 6.041895f, 0.719225f, 3.023207f, 1.167371f, 4.355017f, 2.120561f, 6.444296f, 3.174567f, 6.221413f, +0.139911f, 0.341383f, 0.002346f, 0.483915f, 0.190388f, 0.658348f, 0.185462f, 0.759343f, 0.001271f, 0.005256f, 0.001635f, 0.008897f, 0.301080f, 0.900159f, 0.357246f, 1.021222f, +0.348811f, 0.780782f, 0.005911f, 1.126280f, 0.672808f, 2.134313f, 0.662383f, 2.505130f, 0.246637f, 0.935670f, 0.320667f, 1.611840f, 0.668909f, 1.834654f, 0.802147f, 2.118093f, +0.002966f, 0.007310f, 0.000059f, 0.009485f, 0.007880f, 0.027526f, 0.009135f, 0.029060f, 0.001894f, 0.007910f, 0.002899f, 0.012256f, 0.010322f, 0.031173f, 0.014575f, 0.032370f, +0.364684f, 0.978467f, 0.007897f, 1.268113f, 1.029214f, 3.913475f, 1.294767f, 4.126955f, 0.252592f, 1.148618f, 0.419648f, 1.777748f, 0.712220f, 2.341483f, 1.091361f, 2.428718f, +0.294383f, 0.988949f, 0.004674f, 1.244227f, 0.379324f, 1.805922f, 0.349927f, 1.848760f, 0.002873f, 0.016355f, 0.003500f, 0.024573f, 0.578770f, 2.382406f, 0.650342f, 2.398921f, +1.030172f, 3.174834f, 0.016532f, 4.064776f, 1.881577f, 8.217902f, 1.754245f, 8.561150f, 0.782452f, 4.086911f, 0.963396f, 6.248757f, 1.804890f, 6.815694f, 2.049689f, 6.983933f, +0.330888f, 1.122844f, 0.006252f, 1.293037f, 0.832486f, 4.003525f, 0.913910f, 3.751368f, 0.226924f, 1.305099f, 0.328992f, 1.794807f, 1.052068f, 4.374513f, 1.406822f, 4.031770f, +0.940727f, 3.475084f, 0.019291f, 3.997385f, 2.513993f, 13.161130f, 2.995024f, 12.318550f, 0.699920f, 4.382039f, 1.101193f, 6.019630f, 1.678516f, 7.597572f, 2.435737f, 6.994553f, +0.059901f, 0.398917f, 0.001651f, 0.433778f, 0.089469f, 0.844405f, 0.143251f, 0.747123f, 0.000695f, 0.007844f, 0.001470f, 0.010186f, 0.152419f, 1.243761f, 0.297258f, 1.082422f, +0.178364f, 1.089703f, 0.004968f, 1.205822f, 0.377627f, 3.269581f, 0.611070f, 2.943897f, 0.161079f, 1.667882f, 0.344227f, 2.204058f, 0.404447f, 3.027683f, 0.797183f, 2.681389f, +0.052552f, 0.353525f, 0.001724f, 0.351861f, 0.153261f, 1.461125f, 0.292024f, 1.183298f, 0.042852f, 0.488571f, 0.107830f, 0.580712f, 0.216256f, 1.782559f, 0.501907f, 1.419938f, +0.218705f, 1.601584f, 0.007784f, 1.592281f, 0.677490f, 7.031073f, 1.400872f, 5.687840f, 0.193476f, 2.401284f, 0.528324f, 2.850995f, 0.505049f, 4.531817f, 1.272031f, 3.605927f, +0.476377f, 1.439649f, 0.008788f, 1.517289f, 0.505413f, 2.164613f, 0.541653f, 1.856298f, 0.004181f, 0.021414f, 0.005917f, 0.026952f, 0.961452f, 3.560260f, 1.255079f, 3.003087f, +1.703188f, 4.721922f, 0.031753f, 5.064309f, 2.561377f, 10.063690f, 2.774275f, 8.782419f, 1.163498f, 5.466984f, 1.664258f, 7.002162f, 3.063283f, 10.406170f, 4.041401f, 8.932374f, +0.503825f, 1.538022f, 0.011060f, 1.483679f, 1.043696f, 4.515275f, 1.331093f, 3.544192f, 0.310766f, 1.607835f, 0.523415f, 1.852260f, 1.644469f, 6.151148f, 2.554634f, 4.749064f, +1.134737f, 3.770868f, 0.027032f, 3.633606f, 2.496859f, 11.758930f, 3.455714f, 9.219766f, 0.759338f, 4.276684f, 1.387898f, 4.921376f, 2.078452f, 8.463190f, 3.503908f, 6.526872f, +0.080934f, 0.144832f, 0.000880f, 0.206498f, 0.097667f, 0.247690f, 0.061723f, 0.287353f, 0.000711f, 0.002155f, 0.000593f, 0.003670f, 0.242442f, 0.531603f, 0.186627f, 0.606615f, +0.114702f, 0.188301f, 0.001261f, 0.273208f, 0.196201f, 0.456468f, 0.125314f, 0.538898f, 0.078391f, 0.218110f, 0.066122f, 0.377919f, 0.306191f, 0.615917f, 0.238210f, 0.715217f, +0.001175f, 0.002124f, 0.000015f, 0.002772f, 0.002769f, 0.007093f, 0.002082f, 0.007532f, 0.000725f, 0.002222f, 0.000720f, 0.003462f, 0.005693f, 0.012609f, 0.005215f, 0.013169f, +0.157937f, 0.310783f, 0.002219f, 0.405128f, 0.395278f, 1.102306f, 0.322604f, 1.169214f, 0.105735f, 0.352627f, 0.113963f, 0.548952f, 0.429366f, 1.035254f, 0.426838f, 1.080083f, +0.078451f, 0.193287f, 0.000808f, 0.244598f, 0.089645f, 0.313009f, 0.053650f, 0.322302f, 0.000740f, 0.003090f, 0.000585f, 0.004669f, 0.214702f, 0.648170f, 0.156514f, 0.656469f, +0.156061f, 0.352735f, 0.001625f, 0.454243f, 0.252776f, 0.809688f, 0.152892f, 0.848425f, 0.114570f, 0.438886f, 0.091517f, 0.674955f, 0.380611f, 1.054103f, 0.280414f, 1.086419f, +0.060394f, 0.150304f, 0.000740f, 0.174095f, 0.134746f, 0.475252f, 0.095967f, 0.447915f, 0.040033f, 0.168859f, 0.037653f, 0.233574f, 0.267300f, 0.815131f, 0.231886f, 0.755645f, +0.187688f, 0.508488f, 0.002497f, 0.588323f, 0.444801f, 1.707803f, 0.343782f, 1.607787f, 0.134974f, 0.619756f, 0.137767f, 0.856325f, 0.466169f, 1.547517f, 0.438864f, 1.432996f, +0.000721f, 0.003523f, 0.000013f, 0.003853f, 0.000955f, 0.006613f, 0.000992f, 0.005885f, 0.000008f, 0.000067f, 0.000011f, 0.000087f, 0.002555f, 0.015290f, 0.003232f, 0.013384f, +0.001221f, 0.005471f, 0.000022f, 0.006089f, 0.002292f, 0.014556f, 0.002406f, 0.013182f, 0.001066f, 0.008093f, 0.001478f, 0.010757f, 0.003854f, 0.021158f, 0.004928f, 0.018847f, +0.000433f, 0.002138f, 0.000009f, 0.002141f, 0.001121f, 0.007837f, 0.001386f, 0.006384f, 0.000342f, 0.002856f, 0.000558f, 0.003415f, 0.002483f, 0.015008f, 0.003738f, 0.012025f, +0.001972f, 0.010589f, 0.000046f, 0.010589f, 0.005416f, 0.041225f, 0.007266f, 0.033543f, 0.001686f, 0.015345f, 0.002987f, 0.018326f, 0.006338f, 0.041708f, 0.010356f, 0.033381f, +0.209801f, 0.465004f, 0.002511f, 0.492938f, 0.197393f, 0.620025f, 0.137242f, 0.534812f, 0.001780f, 0.006685f, 0.001634f, 0.008463f, 0.589427f, 1.600761f, 0.499177f, 1.358117f, +0.426402f, 0.866997f, 0.005157f, 0.935284f, 0.568668f, 1.638647f, 0.399592f, 1.438357f, 0.281547f, 0.970233f, 0.261269f, 1.249927f, 1.067552f, 2.659717f, 0.913724f, 2.296337f, +0.151971f, 0.340241f, 0.002164f, 0.330132f, 0.279180f, 0.885803f, 0.230993f, 0.699350f, 0.090603f, 0.343791f, 0.099001f, 0.398363f, 0.690481f, 1.894197f, 0.695883f, 1.470962f, +0.374144f, 0.911860f, 0.005782f, 0.883790f, 0.730075f, 2.521643f, 0.655530f, 1.988658f, 0.241996f, 0.999593f, 0.286954f, 1.156983f, 0.953958f, 2.848827f, 1.043334f, 2.209843f, +0.081931f, 0.208209f, 0.001198f, 0.221655f, 0.084321f, 0.303678f, 0.071653f, 0.263055f, 0.000721f, 0.003106f, 0.000809f, 0.003948f, 0.214187f, 0.666946f, 0.221698f, 0.568253f, +0.176777f, 0.412123f, 0.002613f, 0.446470f, 0.257885f, 0.852030f, 0.221477f, 0.751063f, 0.121103f, 0.478501f, 0.137353f, 0.619058f, 0.411828f, 1.176425f, 0.430812f, 1.020011f, +0.001834f, 0.004707f, 0.000032f, 0.004587f, 0.003685f, 0.013405f, 0.003726f, 0.010628f, 0.001134f, 0.004935f, 0.001515f, 0.005742f, 0.007752f, 0.024384f, 0.009549f, 0.019016f, +0.231421f, 0.646685f, 0.004371f, 0.629440f, 0.493959f, 1.956182f, 0.542078f, 1.549266f, 0.155299f, 0.735505f, 0.225070f, 0.854929f, 0.549051f, 1.879972f, 0.733926f, 1.464492f, +0.133068f, 0.465583f, 0.001843f, 0.439919f, 0.129680f, 0.643017f, 0.104358f, 0.494372f, 0.001258f, 0.007460f, 0.001337f, 0.008418f, 0.317820f, 1.362551f, 0.311532f, 1.030393f, +0.403005f, 1.293549f, 0.005642f, 1.243793f, 0.556701f, 2.532344f, 0.452768f, 1.981269f, 0.296566f, 1.613320f, 0.318532f, 1.852544f, 0.857758f, 3.373537f, 0.849741f, 2.596122f, +0.157904f, 0.558075f, 0.002603f, 0.482652f, 0.300461f, 1.504929f, 0.287740f, 1.059041f, 0.104919f, 0.628464f, 0.132692f, 0.649089f, 0.609915f, 2.641294f, 0.711458f, 1.828236f, +0.460802f, 1.772872f, 0.008243f, 1.531572f, 0.931354f, 5.078146f, 0.967911f, 3.569612f, 0.332172f, 2.165966f, 0.455892f, 2.234574f, 0.998825f, 4.708693f, 1.264385f, 3.255630f, +0.043161f, 0.299370f, 0.001038f, 0.244480f, 0.048757f, 0.479266f, 0.068100f, 0.318469f, 0.000485f, 0.005703f, 0.000895f, 0.005562f, 0.133418f, 1.133904f, 0.226984f, 0.741115f, +0.111227f, 0.707738f, 0.002703f, 0.588162f, 0.178101f, 1.606040f, 0.251408f, 1.086016f, 0.097321f, 1.049525f, 0.181424f, 1.041598f, 0.306392f, 2.388846f, 0.526816f, 1.588866f, +0.039977f, 0.280089f, 0.001144f, 0.209362f, 0.088175f, 0.875514f, 0.146561f, 0.532500f, 0.031583f, 0.375030f, 0.069327f, 0.334773f, 0.199846f, 1.715668f, 0.404609f, 1.026380f, +0.170770f, 1.302459f, 0.005302f, 0.972487f, 0.400088f, 4.324500f, 0.721666f, 2.627309f, 0.146367f, 1.892001f, 0.348660f, 1.687034f, 0.479071f, 4.477136f, 1.052565f, 2.675435f, +0.205343f, 0.646320f, 0.003304f, 0.511574f, 0.164769f, 0.734972f, 0.154041f, 0.473356f, 0.001746f, 0.009314f, 0.002156f, 0.008804f, 0.503466f, 1.941717f, 0.573322f, 1.230047f, +0.635375f, 1.834627f, 0.010333f, 1.477743f, 0.722672f, 2.957237f, 0.682813f, 1.938174f, 0.420530f, 2.057976f, 0.524731f, 1.979585f, 1.388252f, 4.911721f, 1.597710f, 3.166351f, +0.229276f, 0.728959f, 0.004391f, 0.528117f, 0.359214f, 1.618545f, 0.399643f, 0.954130f, 0.137017f, 0.738322f, 0.201314f, 0.638786f, 0.909114f, 3.541689f, 1.231987f, 2.053581f, +0.530045f, 1.834512f, 0.011015f, 1.327598f, 0.882087f, 4.326601f, 1.064977f, 2.547702f, 0.343650f, 2.015811f, 0.547928f, 1.742123f, 1.179428f, 5.001806f, 1.734478f, 2.896993f, +0.001478f, 0.002692f, 0.000022f, 0.004777f, 0.001728f, 0.004459f, 0.001495f, 0.006438f, 0.000018f, 0.000055f, 0.000020f, 0.000116f, 0.004865f, 0.010856f, 0.005126f, 0.015417f, +0.002871f, 0.004795f, 0.000043f, 0.008659f, 0.004755f, 0.011258f, 0.004157f, 0.016542f, 0.002670f, 0.007560f, 0.003083f, 0.016303f, 0.008418f, 0.017232f, 0.008965f, 0.024904f, +0.000033f, 0.000060f, 0.000001f, 0.000098f, 0.000075f, 0.000196f, 0.000077f, 0.000259f, 0.000028f, 0.000086f, 0.000038f, 0.000167f, 0.000175f, 0.000394f, 0.000219f, 0.000513f, +0.003411f, 0.006830f, 0.000066f, 0.011081f, 0.008268f, 0.023462f, 0.009236f, 0.030973f, 0.003108f, 0.010548f, 0.004585f, 0.020437f, 0.010187f, 0.024996f, 0.013862f, 0.032456f, +0.002141f, 0.005368f, 0.000030f, 0.008454f, 0.002369f, 0.008419f, 0.001941f, 0.010789f, 0.000027f, 0.000117f, 0.000030f, 0.000220f, 0.006437f, 0.019776f, 0.006423f, 0.024928f, +0.005835f, 0.013422f, 0.000083f, 0.021512f, 0.009154f, 0.029838f, 0.007579f, 0.038912f, 0.005831f, 0.022730f, 0.006375f, 0.043505f, 0.015635f, 0.044064f, 0.015767f, 0.056522f, +0.002525f, 0.006395f, 0.000042f, 0.009219f, 0.005456f, 0.019583f, 0.005319f, 0.022971f, 0.002278f, 0.009779f, 0.002933f, 0.016834f, 0.012278f, 0.038101f, 0.014579f, 0.043959f, +0.006056f, 0.016697f, 0.000110f, 0.024044f, 0.013901f, 0.054312f, 0.014706f, 0.063636f, 0.005928f, 0.027699f, 0.008282f, 0.047633f, 0.016526f, 0.055827f, 0.021295f, 0.064338f, +0.000550f, 0.002734f, 0.000013f, 0.003721f, 0.000706f, 0.004970f, 0.001003f, 0.005504f, 0.000008f, 0.000071f, 0.000016f, 0.000115f, 0.002140f, 0.013034f, 0.003707f, 0.014200f, +0.001276f, 0.005816f, 0.000032f, 0.008056f, 0.002319f, 0.014987f, 0.003333f, 0.016893f, 0.001515f, 0.011711f, 0.002876f, 0.019373f, 0.004423f, 0.024712f, 0.007742f, 0.027397f, +0.000506f, 0.002542f, 0.000015f, 0.003167f, 0.001268f, 0.009023f, 0.002146f, 0.009147f, 0.000543f, 0.004621f, 0.001214f, 0.006876f, 0.003186f, 0.019601f, 0.006567f, 0.019545f, +0.001778f, 0.009715f, 0.000056f, 0.012091f, 0.004729f, 0.036630f, 0.008684f, 0.037094f, 0.002069f, 0.019163f, 0.005017f, 0.028481f, 0.006278f, 0.042040f, 0.014040f, 0.041874f, +0.006747f, 0.015217f, 0.000111f, 0.020077f, 0.006148f, 0.019651f, 0.005851f, 0.021096f, 0.000078f, 0.000298f, 0.000098f, 0.000469f, 0.020824f, 0.057551f, 0.024140f, 0.060769f, +0.018787f, 0.038874f, 0.000311f, 0.052192f, 0.024266f, 0.071156f, 0.023340f, 0.077734f, 0.016884f, 0.059210f, 0.021447f, 0.094934f, 0.051675f, 0.131013f, 0.060540f, 0.140777f, +0.007487f, 0.017058f, 0.000146f, 0.020599f, 0.013321f, 0.043010f, 0.015086f, 0.042262f, 0.006076f, 0.023460f, 0.009087f, 0.033832f, 0.037372f, 0.104330f, 0.051555f, 0.100833f, +0.014226f, 0.035283f, 0.000301f, 0.042560f, 0.026885f, 0.094496f, 0.033042f, 0.092748f, 0.012524f, 0.052644f, 0.020328f, 0.075835f, 0.039849f, 0.121100f, 0.059656f, 0.116912f, +0.177968f, 0.434240f, 0.002984f, 0.615541f, 0.216261f, 0.747817f, 0.210666f, 0.862537f, 0.001638f, 0.006772f, 0.002107f, 0.011464f, 0.418658f, 1.251688f, 0.496757f, 1.420029f, +0.268300f, 0.600564f, 0.004547f, 0.866314f, 0.462139f, 1.466016f, 0.454978f, 1.720723f, 0.192169f, 0.729034f, 0.249850f, 1.255877f, 0.562452f, 1.542668f, 0.674485f, 1.780997f, +0.002942f, 0.007251f, 0.000059f, 0.009407f, 0.006980f, 0.024380f, 0.008091f, 0.025739f, 0.001903f, 0.007947f, 0.002913f, 0.012314f, 0.011192f, 0.033800f, 0.015803f, 0.035098f, +0.337668f, 0.905981f, 0.007312f, 1.174169f, 0.851001f, 3.235838f, 1.070572f, 3.412353f, 0.236913f, 1.077319f, 0.393599f, 1.667396f, 0.720901f, 2.370024f, 1.104664f, 2.458322f, +0.339765f, 1.141408f, 0.005395f, 1.436041f, 0.390956f, 1.861304f, 0.360658f, 1.905456f, 0.003358f, 0.019121f, 0.004091f, 0.028729f, 0.730234f, 3.005879f, 0.820536f, 3.026717f, +0.718981f, 2.215792f, 0.011538f, 2.836903f, 1.172685f, 5.121773f, 1.093326f, 5.335701f, 0.553174f, 2.889342f, 0.681097f, 4.417712f, 1.377043f, 5.200043f, 1.563813f, 5.328401f, +0.297785f, 1.010511f, 0.005627f, 1.163678f, 0.669037f, 3.217480f, 0.734475f, 3.014831f, 0.206870f, 1.189765f, 0.299918f, 1.636197f, 1.035034f, 4.303685f, 1.384044f, 3.966491f, +0.790342f, 2.919556f, 0.016207f, 3.358362f, 1.886110f, 9.874069f, 2.247001f, 9.241925f, 0.595657f, 3.729268f, 0.937154f, 5.122915f, 1.541579f, 6.977747f, 2.237025f, 6.423924f, +0.085741f, 0.571003f, 0.002363f, 0.620903f, 0.114362f, 1.079342f, 0.183108f, 0.954993f, 0.001008f, 0.011374f, 0.002131f, 0.014770f, 0.238497f, 1.946177f, 0.465134f, 1.693722f, +0.154385f, 0.943205f, 0.004300f, 1.043713f, 0.291886f, 2.527209f, 0.472324f, 2.275474f, 0.141232f, 1.462375f, 0.301813f, 1.932487f, 0.382691f, 2.864818f, 0.754301f, 2.537152f, +0.058655f, 0.394577f, 0.001924f, 0.392720f, 0.152755f, 1.456299f, 0.291059f, 1.179389f, 0.048449f, 0.552376f, 0.121912f, 0.656551f, 0.263857f, 2.174926f, 0.612384f, 1.732486f, +0.227877f, 1.668750f, 0.008110f, 1.659057f, 0.630370f, 6.542058f, 1.303440f, 5.292247f, 0.204204f, 2.534432f, 0.557619f, 3.009079f, 0.575259f, 5.161817f, 1.448865f, 4.107212f, +0.486038f, 1.468848f, 0.008966f, 1.548063f, 0.460487f, 1.972204f, 0.493506f, 1.691294f, 0.004321f, 0.022131f, 0.006115f, 0.027855f, 1.072350f, 3.970915f, 1.399845f, 3.349476f, +1.050809f, 2.913265f, 0.019590f, 3.124506f, 1.411192f, 5.544592f, 1.528488f, 4.838677f, 0.727148f, 3.416684f, 1.040106f, 4.376118f, 2.066032f, 7.018447f, 2.725724f, 6.024443f, +0.400825f, 1.223595f, 0.008799f, 1.180361f, 0.741482f, 3.207824f, 0.945659f, 2.517930f, 0.250440f, 1.295724f, 0.421810f, 1.492701f, 1.430176f, 5.349585f, 2.221737f, 4.130208f, +0.842752f, 2.800567f, 0.020077f, 2.698625f, 1.655961f, 7.798727f, 2.291891f, 6.114711f, 0.571262f, 3.217419f, 1.044139f, 3.702432f, 1.687460f, 6.871121f, 2.844764f, 5.299058f, +0.170010f, 0.304233f, 0.001849f, 0.433767f, 0.279100f, 0.707814f, 0.176383f, 0.821156f, 0.001575f, 0.004777f, 0.001315f, 0.008134f, 0.453261f, 0.993866f, 0.348911f, 1.134106f, +0.197772f, 0.324673f, 0.002174f, 0.471072f, 0.460220f, 1.070716f, 0.293944f, 1.264069f, 0.142624f, 0.396826f, 0.120301f, 0.687581f, 0.469880f, 0.945183f, 0.365557f, 1.097568f, +0.001755f, 0.003173f, 0.000023f, 0.004140f, 0.005626f, 0.014412f, 0.004231f, 0.015304f, 0.001143f, 0.003501f, 0.001135f, 0.005457f, 0.007568f, 0.016762f, 0.006932f, 0.017507f, +0.279774f, 0.550527f, 0.003930f, 0.717654f, 0.952567f, 2.656409f, 0.777432f, 2.817646f, 0.197638f, 0.659127f, 0.213018f, 1.026096f, 0.676939f, 1.632184f, 0.672954f, 1.702862f, +0.189163f, 0.466059f, 0.001949f, 0.589782f, 0.294059f, 1.026753f, 0.175988f, 1.057235f, 0.001883f, 0.007861f, 0.001488f, 0.011880f, 0.460761f, 1.391003f, 0.335887f, 1.408811f, +0.308878f, 0.698138f, 0.003216f, 0.899044f, 0.680611f, 2.180121f, 0.411669f, 2.284421f, 0.239274f, 0.916592f, 0.191128f, 1.409610f, 0.670461f, 1.856843f, 0.493960f, 1.913769f, +0.103545f, 0.257697f, 0.001269f, 0.298487f, 0.314286f, 1.108493f, 0.223837f, 1.044731f, 0.072425f, 0.305489f, 0.068120f, 0.422565f, 0.407884f, 1.243843f, 0.353846f, 1.153071f, +0.381643f, 1.033954f, 0.005077f, 1.196290f, 1.230431f, 4.724207f, 0.950987f, 4.447537f, 0.289603f, 1.329760f, 0.295596f, 1.837347f, 0.843653f, 2.800632f, 0.794237f, 2.593377f, +0.001258f, 0.006144f, 0.000022f, 0.006720f, 0.002267f, 0.015691f, 0.002355f, 0.013964f, 0.000015f, 0.000123f, 0.000020f, 0.000161f, 0.003966f, 0.023734f, 0.005018f, 0.020776f, +0.001748f, 0.007832f, 0.000032f, 0.008717f, 0.004464f, 0.028349f, 0.004687f, 0.025674f, 0.001610f, 0.012226f, 0.002232f, 0.016250f, 0.004910f, 0.026959f, 0.006279f, 0.024015f, +0.000537f, 0.002652f, 0.000011f, 0.002655f, 0.001891f, 0.013222f, 0.002338f, 0.010770f, 0.000447f, 0.003738f, 0.000730f, 0.004468f, 0.002740f, 0.016565f, 0.004126f, 0.013273f, +0.002900f, 0.015574f, 0.000067f, 0.015574f, 0.010837f, 0.082486f, 0.014538f, 0.067117f, 0.002616f, 0.023816f, 0.004635f, 0.028441f, 0.008297f, 0.054598f, 0.013556f, 0.043697f, +0.492533f, 1.091653f, 0.005894f, 1.157232f, 0.630422f, 1.980194f, 0.438316f, 1.708046f, 0.004409f, 0.016561f, 0.004048f, 0.020965f, 1.231567f, 3.344681f, 1.042996f, 2.837692f, +0.821676f, 1.670704f, 0.009938f, 1.802292f, 1.490772f, 4.295734f, 1.047535f, 3.770672f, 0.572486f, 1.972829f, 0.531252f, 2.541547f, 1.830924f, 4.561597f, 1.567099f, 3.938375f, +0.253681f, 0.567955f, 0.003613f, 0.551081f, 0.633990f, 2.011570f, 0.524563f, 1.588154f, 0.159589f, 0.605555f, 0.174380f, 0.701679f, 1.025840f, 2.814186f, 1.033865f, 2.185390f, +0.740712f, 1.805253f, 0.011448f, 1.749681f, 1.966291f, 6.791474f, 1.765521f, 5.356000f, 0.505534f, 2.088165f, 0.599451f, 2.416955f, 1.680890f, 5.019680f, 1.838372f, 3.893780f, +0.238754f, 0.606738f, 0.003492f, 0.645918f, 0.334277f, 1.203884f, 0.284058f, 1.042838f, 0.002217f, 0.009550f, 0.002488f, 0.012141f, 0.555511f, 1.729782f, 0.574994f, 1.473812f, +0.422844f, 0.985781f, 0.006251f, 1.067939f, 0.839172f, 2.772549f, 0.720699f, 2.443998f, 0.305662f, 1.207726f, 0.346676f, 1.562490f, 0.876738f, 2.504484f, 0.917152f, 2.171495f, +0.003799f, 0.009753f, 0.000066f, 0.009504f, 0.010387f, 0.037786f, 0.010503f, 0.029959f, 0.002480f, 0.010789f, 0.003312f, 0.012555f, 0.014296f, 0.044968f, 0.017610f, 0.035069f, +0.568702f, 1.589189f, 0.010742f, 1.546809f, 1.651368f, 6.539765f, 1.812234f, 5.179393f, 0.402701f, 1.907217f, 0.583623f, 2.216889f, 1.200868f, 4.111816f, 1.605220f, 3.203091f, +0.445116f, 1.557387f, 0.006166f, 1.471540f, 0.590121f, 2.926120f, 0.474890f, 2.249693f, 0.004440f, 0.026331f, 0.004719f, 0.029712f, 0.946195f, 4.056503f, 0.927473f, 3.067621f, +1.106528f, 3.551694f, 0.015490f, 3.415077f, 2.079435f, 9.459011f, 1.691213f, 7.400592f, 0.859223f, 4.674173f, 0.922864f, 5.367262f, 2.096125f, 8.243999f, 2.076534f, 6.344209f, +0.375571f, 1.327368f, 0.006191f, 1.147975f, 0.972205f, 4.869507f, 0.931042f, 3.426744f, 0.263321f, 1.577287f, 0.333024f, 1.629051f, 1.291124f, 5.591332f, 1.506080f, 3.870177f, +1.299856f, 5.001016f, 0.023252f, 4.320346f, 3.574094f, 19.487520f, 3.714385f, 13.698480f, 0.988725f, 6.447099f, 1.356985f, 6.651314f, 2.507670f, 11.821740f, 3.174388f, 8.173648f, +0.104430f, 0.724336f, 0.002511f, 0.591528f, 0.160487f, 1.577537f, 0.224156f, 1.048263f, 0.001239f, 0.014561f, 0.002285f, 0.014201f, 0.287308f, 2.441791f, 0.488796f, 1.595944f, +0.220900f, 1.405589f, 0.005367f, 1.168107f, 0.481196f, 4.339227f, 0.679258f, 2.934219f, 0.203949f, 2.199431f, 0.380201f, 2.182819f, 0.541581f, 4.222540f, 0.931204f, 2.808489f, +0.068776f, 0.481868f, 0.001968f, 0.360187f, 0.206371f, 2.049111f, 0.343020f, 1.246297f, 0.057334f, 0.680817f, 0.125853f, 0.607734f, 0.306005f, 2.627029f, 0.619537f, 1.571592f, +0.348438f, 2.657531f, 0.010818f, 1.984257f, 1.110555f, 12.003850f, 2.003183f, 7.292824f, 0.315129f, 4.073494f, 0.750668f, 3.632199f, 0.869989f, 8.130445f, 1.911450f, 4.858570f, +0.668757f, 2.104921f, 0.010761f, 1.666085f, 0.730020f, 3.256340f, 0.682486f, 2.097233f, 0.006000f, 0.032009f, 0.007408f, 0.030256f, 1.459347f, 5.628262f, 1.661832f, 3.565415f, +1.698526f, 4.904443f, 0.027623f, 3.950396f, 2.628169f, 10.754700f, 2.483214f, 7.048634f, 1.186234f, 5.805155f, 1.480164f, 5.584030f, 3.303011f, 11.686260f, 3.801365f, 7.533570f, +0.530942f, 1.688073f, 0.010167f, 1.222976f, 1.131648f, 5.098977f, 1.259014f, 3.005839f, 0.334807f, 1.804119f, 0.491918f, 1.560900f, 1.873726f, 7.299585f, 2.539183f, 4.232526f, +1.455738f, 5.038378f, 0.030252f, 3.646170f, 3.295735f, 16.165440f, 3.979064f, 9.518954f, 0.995905f, 5.841868f, 1.587909f, 5.048714f, 2.882979f, 12.226350f, 4.239737f, 7.081374f, +0.117076f, 0.213201f, 0.001743f, 0.378319f, 0.186141f, 0.480386f, 0.161020f, 0.693610f, 0.001476f, 0.004557f, 0.001687f, 0.009656f, 0.342913f, 0.765159f, 0.361318f, 1.086664f, +0.186601f, 0.311733f, 0.002808f, 0.562914f, 0.420535f, 0.995634f, 0.367656f, 1.462899f, 0.183157f, 0.518585f, 0.211467f, 1.118308f, 0.487052f, 0.996996f, 0.518661f, 1.440876f, +0.001852f, 0.003406f, 0.000033f, 0.005532f, 0.005748f, 0.014985f, 0.005918f, 0.019804f, 0.001641f, 0.005116f, 0.002231f, 0.009924f, 0.008771f, 0.019770f, 0.010998f, 0.025698f, +0.227802f, 0.456160f, 0.004380f, 0.740067f, 0.751163f, 2.131678f, 0.839153f, 2.814047f, 0.219030f, 0.743345f, 0.323140f, 1.440215f, 0.605536f, 1.485757f, 0.823978f, 1.929195f, +0.194633f, 0.487989f, 0.002744f, 0.768560f, 0.293024f, 1.041173f, 0.240044f, 1.334280f, 0.002636f, 0.011203f, 0.002852f, 0.021071f, 0.520831f, 1.600063f, 0.519701f, 2.016879f, +0.435433f, 1.001529f, 0.006205f, 1.605171f, 0.929227f, 3.028945f, 0.769327f, 3.950072f, 0.459106f, 1.789705f, 0.501974f, 3.425486f, 1.038360f, 2.926429f, 1.047144f, 3.753795f, +0.163219f, 0.413370f, 0.002739f, 0.595899f, 0.479793f, 1.722066f, 0.467736f, 2.019945f, 0.155386f, 0.666970f, 0.200050f, 1.148215f, 0.706347f, 2.191970f, 0.838754f, 2.528966f, +0.464294f, 1.280045f, 0.008455f, 1.843225f, 1.449711f, 5.664235f, 1.533694f, 6.636667f, 0.479536f, 2.240680f, 0.669974f, 3.853150f, 1.127561f, 3.809082f, 1.453002f, 4.389833f, +0.036165f, 0.179751f, 0.000885f, 0.244680f, 0.063113f, 0.444557f, 0.089736f, 0.492392f, 0.000582f, 0.004907f, 0.001094f, 0.007976f, 0.125251f, 0.762800f, 0.216919f, 0.831023f, +0.068845f, 0.313909f, 0.001703f, 0.434831f, 0.170300f, 1.100462f, 0.244717f, 1.240362f, 0.086307f, 0.666967f, 0.163785f, 1.103327f, 0.212477f, 1.187110f, 0.371903f, 1.316082f, +0.023672f, 0.118848f, 0.000689f, 0.148076f, 0.080661f, 0.573915f, 0.136480f, 0.581831f, 0.026795f, 0.228004f, 0.059875f, 0.339250f, 0.132585f, 0.815646f, 0.273257f, 0.813335f, +0.098569f, 0.538719f, 0.003115f, 0.670463f, 0.356757f, 2.763264f, 0.655073f, 2.798276f, 0.121046f, 1.121243f, 0.293527f, 1.666462f, 0.309814f, 2.074773f, 0.692925f, 2.066606f, +0.597159f, 1.346873f, 0.009782f, 1.776974f, 0.740242f, 2.366129f, 0.704482f, 2.540085f, 0.007275f, 0.027810f, 0.009144f, 0.043817f, 1.640410f, 4.533539f, 1.901591f, 4.787029f, +1.364924f, 2.824199f, 0.022597f, 3.791746f, 2.398321f, 7.032695f, 2.306772f, 7.682831f, 1.294361f, 4.539084f, 1.644112f, 7.277714f, 3.341324f, 8.471362f, 3.914574f, 9.102728f, +0.471197f, 1.073535f, 0.009185f, 1.296391f, 1.140472f, 3.682356f, 1.291636f, 3.618272f, 0.403459f, 1.557897f, 0.603439f, 2.246682f, 2.093312f, 5.843799f, 2.887741f, 5.647934f, +1.061839f, 2.633513f, 0.022463f, 3.176687f, 2.729892f, 9.595109f, 3.355138f, 9.417694f, 0.986375f, 4.146150f, 1.600980f, 5.972649f, 2.647214f, 8.044773f, 3.962989f, 7.766535f, +0.422173f, 1.030100f, 0.007079f, 1.460180f, 0.697908f, 2.413320f, 0.679852f, 2.783539f, 0.004099f, 0.016951f, 0.005273f, 0.028696f, 0.883910f, 2.642684f, 1.048802f, 2.998103f, +0.522424f, 1.169397f, 0.008853f, 1.686857f, 1.224180f, 3.883399f, 1.205211f, 4.558102f, 0.394836f, 1.497897f, 0.513350f, 2.580367f, 0.974738f, 2.673468f, 1.168893f, 3.086497f, +0.004962f, 0.012230f, 0.000099f, 0.015868f, 0.016016f, 0.055945f, 0.018567f, 0.059062f, 0.003386f, 0.014145f, 0.005184f, 0.021917f, 0.016802f, 0.050741f, 0.023724f, 0.052690f, +0.675493f, 1.812384f, 0.014627f, 2.348885f, 2.315963f, 8.806197f, 2.913515f, 9.286575f, 0.500094f, 2.274088f, 0.830838f, 3.519668f, 1.283532f, 4.219722f, 1.966805f, 4.376933f, +0.925182f, 3.108055f, 0.014691f, 3.910340f, 1.448260f, 6.895022f, 1.336022f, 7.058577f, 0.009650f, 0.054941f, 0.011756f, 0.082548f, 1.769741f, 7.284826f, 1.988591f, 7.335328f, +1.607011f, 4.952567f, 0.025789f, 6.340826f, 3.565767f, 15.573710f, 3.324462f, 16.224200f, 1.304652f, 6.814469f, 1.606356f, 10.419100f, 2.739358f, 10.344470f, 3.110899f, 10.599810f, +0.576568f, 1.956539f, 0.010895f, 2.253098f, 1.762252f, 8.474880f, 1.934615f, 7.941101f, 0.422645f, 2.430750f, 0.612748f, 3.342832f, 1.783618f, 7.416307f, 2.385048f, 6.835239f, +1.814868f, 6.704194f, 0.037216f, 7.711826f, 5.892060f, 30.845820f, 7.019453f, 28.871050f, 1.443303f, 9.036182f, 2.270766f, 12.413050f, 3.150616f, 14.260830f, 4.571938f, 13.128950f, +0.168876f, 1.124656f, 0.004654f, 1.222939f, 0.306430f, 2.892081f, 0.490634f, 2.558889f, 0.002094f, 0.023638f, 0.004428f, 0.030696f, 0.418084f, 3.411640f, 0.815379f, 2.969087f, +0.249597f, 1.524897f, 0.006952f, 1.687390f, 0.641974f, 5.558352f, 1.038832f, 5.004683f, 0.240934f, 2.494738f, 0.514878f, 3.296725f, 0.550659f, 4.122221f, 1.085373f, 3.650738f, +0.082146f, 0.552602f, 0.002694f, 0.550001f, 0.291036f, 2.774607f, 0.554539f, 2.247026f, 0.071597f, 0.816295f, 0.180160f, 0.970244f, 0.328889f, 2.710971f, 0.763316f, 2.159485f, +0.378497f, 2.771750f, 0.013471f, 2.755650f, 1.424390f, 14.782490f, 2.945265f, 11.958410f, 0.357897f, 4.441966f, 0.977310f, 5.273855f, 0.850406f, 7.630716f, 2.141858f, 6.071694f, +1.288569f, 3.894161f, 0.023770f, 4.104172f, 1.660831f, 7.113105f, 1.779919f, 6.099955f, 0.012088f, 0.061912f, 0.017108f, 0.077924f, 2.530308f, 9.369741f, 3.303064f, 7.903397f, +2.286727f, 6.339726f, 0.042632f, 6.799419f, 4.177793f, 16.414610f, 4.525046f, 14.324770f, 1.669725f, 7.845615f, 2.388360f, 10.048730f, 4.001543f, 13.593500f, 5.279252f, 11.668290f, +0.755599f, 2.306610f, 0.016587f, 2.225110f, 1.901548f, 8.226547f, 2.425167f, 6.457295f, 0.498164f, 2.577394f, 0.839045f, 2.969211f, 2.399529f, 8.975457f, 3.727599f, 6.929604f, +1.884164f, 6.261307f, 0.044886f, 6.033392f, 5.036622f, 23.719910f, 6.970810f, 18.597960f, 1.347678f, 7.590289f, 2.463252f, 8.734494f, 3.357782f, 13.672450f, 5.660634f, 10.544290f, +0.051774f, 0.092650f, 0.000563f, 0.132098f, 0.086803f, 0.220136f, 0.054857f, 0.255387f, 0.000575f, 0.001744f, 0.000480f, 0.002970f, 0.175067f, 0.383871f, 0.134763f, 0.438036f, +0.079608f, 0.130689f, 0.000875f, 0.189618f, 0.189186f, 0.440148f, 0.120834f, 0.519631f, 0.068830f, 0.191507f, 0.058057f, 0.331824f, 0.239880f, 0.482530f, 0.186622f, 0.560324f, +0.000781f, 0.001411f, 0.000010f, 0.001842f, 0.002556f, 0.006548f, 0.001922f, 0.006953f, 0.000610f, 0.001867f, 0.000605f, 0.002910f, 0.004270f, 0.009457f, 0.003911f, 0.009878f, +0.123265f, 0.242556f, 0.001732f, 0.316190f, 0.428609f, 1.195256f, 0.349807f, 1.267805f, 0.104399f, 0.348173f, 0.112523f, 0.542018f, 0.378267f, 0.912050f, 0.376041f, 0.951545f, +0.071300f, 0.175670f, 0.000734f, 0.222304f, 0.113194f, 0.395233f, 0.067744f, 0.406967f, 0.000851f, 0.003552f, 0.000672f, 0.005369f, 0.220266f, 0.664966f, 0.160570f, 0.673479f, +0.153884f, 0.347815f, 0.001602f, 0.447907f, 0.346288f, 1.109225f, 0.209453f, 1.162291f, 0.142921f, 0.547488f, 0.114162f, 0.841972f, 0.423639f, 1.173270f, 0.312115f, 1.209240f, +0.057015f, 0.141895f, 0.000699f, 0.164355f, 0.176731f, 0.623335f, 0.125870f, 0.587480f, 0.047812f, 0.201671f, 0.044970f, 0.278960f, 0.284845f, 0.868636f, 0.247108f, 0.805245f, +0.208116f, 0.563832f, 0.002769f, 0.652356f, 0.685232f, 2.630931f, 0.529609f, 2.476853f, 0.189340f, 0.869387f, 0.193259f, 1.201244f, 0.583483f, 1.936959f, 0.549306f, 1.793618f, +0.000706f, 0.003448f, 0.000013f, 0.003772f, 0.001299f, 0.008993f, 0.001350f, 0.008004f, 0.000010f, 0.000083f, 0.000014f, 0.000108f, 0.002823f, 0.016894f, 0.003572f, 0.014788f, +0.001297f, 0.005810f, 0.000023f, 0.006466f, 0.003382f, 0.021476f, 0.003551f, 0.019450f, 0.001432f, 0.010873f, 0.001985f, 0.014452f, 0.004620f, 0.025363f, 0.005907f, 0.022593f, +0.000441f, 0.002174f, 0.000009f, 0.002176f, 0.001583f, 0.011071f, 0.001957f, 0.009018f, 0.000439f, 0.003674f, 0.000717f, 0.004392f, 0.002849f, 0.017225f, 0.004290f, 0.013801f, +0.002355f, 0.012646f, 0.000054f, 0.012646f, 0.008986f, 0.068399f, 0.012055f, 0.055654f, 0.002547f, 0.023184f, 0.004512f, 0.027687f, 0.008544f, 0.056225f, 0.013960f, 0.044998f, +0.182506f, 0.404508f, 0.002184f, 0.428808f, 0.238565f, 0.749346f, 0.165868f, 0.646359f, 0.001959f, 0.007357f, 0.001798f, 0.009314f, 0.578784f, 1.571857f, 0.490164f, 1.333594f, +0.402434f, 0.818263f, 0.004867f, 0.882711f, 0.745653f, 2.148638f, 0.523955f, 1.886013f, 0.336163f, 1.158443f, 0.311951f, 1.492394f, 1.137313f, 2.833521f, 0.973433f, 2.446396f, +0.137319f, 0.307437f, 0.001956f, 0.298304f, 0.350475f, 1.112014f, 0.289983f, 0.877946f, 0.103571f, 0.392997f, 0.113170f, 0.455379f, 0.704269f, 1.932021f, 0.709779f, 1.500334f, +0.397086f, 0.967772f, 0.006137f, 0.937981f, 1.076503f, 3.718189f, 0.966585f, 2.932298f, 0.324921f, 1.342121f, 0.385284f, 1.553444f, 1.142854f, 3.412929f, 1.249927f, 2.647419f, +0.071518f, 0.181746f, 0.001046f, 0.193483f, 0.102259f, 0.368283f, 0.086897f, 0.319017f, 0.000796f, 0.003430f, 0.000894f, 0.004360f, 0.211044f, 0.657161f, 0.218446f, 0.559916f, +0.167415f, 0.390298f, 0.002475f, 0.422826f, 0.339312f, 1.121056f, 0.291408f, 0.988209f, 0.145094f, 0.573292f, 0.164563f, 0.741694f, 0.440252f, 1.257622f, 0.460546f, 1.090412f, +0.001663f, 0.004268f, 0.000029f, 0.004159f, 0.004642f, 0.016886f, 0.004694f, 0.013388f, 0.001301f, 0.005660f, 0.001738f, 0.006587f, 0.007934f, 0.024957f, 0.009773f, 0.019463f, +0.246457f, 0.688704f, 0.004655f, 0.670338f, 0.730859f, 2.894355f, 0.802055f, 2.292285f, 0.209234f, 0.990944f, 0.303237f, 1.151843f, 0.660038f, 2.259994f, 0.882283f, 1.760527f, +0.165026f, 0.577399f, 0.002286f, 0.545571f, 0.223436f, 1.107908f, 0.179806f, 0.851794f, 0.001974f, 0.011704f, 0.002098f, 0.013207f, 0.444914f, 1.907423f, 0.436111f, 1.442437f, +0.542242f, 1.740467f, 0.007591f, 1.673520f, 1.040658f, 4.733785f, 0.846372f, 3.703644f, 0.504810f, 2.746168f, 0.542201f, 3.153371f, 1.302758f, 5.123709f, 1.290582f, 3.942975f, +0.203410f, 0.718905f, 0.003353f, 0.621745f, 0.537737f, 2.693377f, 0.514969f, 1.895370f, 0.170985f, 1.024194f, 0.216246f, 1.057807f, 0.886878f, 3.840707f, 1.034532f, 2.658439f, +0.697216f, 2.682442f, 0.012472f, 2.317344f, 1.957809f, 10.674830f, 2.034657f, 7.503724f, 0.635827f, 4.145990f, 0.872648f, 4.277316f, 1.705920f, 8.042098f, 2.159475f, 5.560375f, +0.057649f, 0.399857f, 0.001386f, 0.326543f, 0.090477f, 0.889358f, 0.126371f, 0.590973f, 0.000820f, 0.009637f, 0.001512f, 0.009399f, 0.201154f, 1.709580f, 0.342223f, 1.117374f, +0.161180f, 1.025591f, 0.003916f, 0.852312f, 0.358567f, 3.233411f, 0.506155f, 2.186457f, 0.178414f, 1.924059f, 0.332599f, 1.909527f, 0.501182f, 3.907563f, 0.861742f, 2.598992f, +0.055463f, 0.388592f, 0.001587f, 0.290465f, 0.169960f, 1.687576f, 0.282499f, 1.026407f, 0.055434f, 0.658245f, 0.121681f, 0.587585f, 0.312975f, 2.686870f, 0.633650f, 1.607391f, +0.278281f, 2.122443f, 0.008640f, 1.584731f, 0.905796f, 9.790629f, 1.633845f, 5.948205f, 0.301744f, 3.900469f, 0.718782f, 3.477918f, 0.881227f, 8.235467f, 1.936140f, 4.921328f, +0.243744f, 0.767188f, 0.003922f, 0.607244f, 0.271727f, 1.212071f, 0.254034f, 0.780630f, 0.002622f, 0.013987f, 0.003237f, 0.013221f, 0.674591f, 2.601695f, 0.768191f, 1.648133f, +0.818256f, 2.362691f, 0.013307f, 1.903084f, 1.293013f, 5.291125f, 1.221698f, 3.467805f, 0.685140f, 3.352919f, 0.854908f, 3.225202f, 2.018104f, 7.140174f, 2.322593f, 4.602929f, +0.282692f, 0.898789f, 0.005413f, 0.651155f, 0.615333f, 2.772568f, 0.684589f, 1.634424f, 0.213724f, 1.151659f, 0.314016f, 0.996400f, 1.265287f, 4.929252f, 1.714656f, 2.858133f, +0.767612f, 2.656744f, 0.015952f, 1.922630f, 1.774776f, 8.705200f, 2.142753f, 5.126023f, 0.629606f, 3.693197f, 1.003867f, 3.191770f, 1.928042f, 8.176586f, 2.835397f, 4.735792f, +0.026099f, 0.047527f, 0.000389f, 0.084335f, 0.042376f, 0.109363f, 0.036657f, 0.157905f, 0.000395f, 0.001218f, 0.000451f, 0.002581f, 0.096950f, 0.216330f, 0.102154f, 0.307227f, +0.054981f, 0.091851f, 0.000827f, 0.165860f, 0.126542f, 0.299593f, 0.110630f, 0.440196f, 0.064702f, 0.183194f, 0.074702f, 0.395051f, 0.182008f, 0.372571f, 0.193820f, 0.538446f, +0.000603f, 0.001109f, 0.000011f, 0.001802f, 0.001912f, 0.004984f, 0.001968f, 0.006586f, 0.000641f, 0.001998f, 0.000871f, 0.003874f, 0.003623f, 0.008165f, 0.004542f, 0.010614f, +0.073468f, 0.147116f, 0.001413f, 0.238678f, 0.247404f, 0.702094f, 0.276385f, 0.926840f, 0.084691f, 0.287424f, 0.124946f, 0.556879f, 0.247684f, 0.607722f, 0.337033f, 0.789103f, +0.053701f, 0.134639f, 0.000757f, 0.212051f, 0.082565f, 0.293372f, 0.067637f, 0.375961f, 0.000872f, 0.003706f, 0.000943f, 0.006970f, 0.182254f, 0.559907f, 0.181858f, 0.705763f, +0.158795f, 0.365239f, 0.002263f, 0.585377f, 0.346073f, 1.128075f, 0.286522f, 1.471132f, 0.200733f, 0.782506f, 0.219476f, 1.497712f, 0.480262f, 1.353532f, 0.484325f, 1.736205f, +0.065786f, 0.166610f, 0.001104f, 0.240180f, 0.197492f, 0.708836f, 0.192529f, 0.831449f, 0.075087f, 0.322301f, 0.096670f, 0.554854f, 0.361075f, 1.120507f, 0.428760f, 1.292775f, +0.185331f, 0.510953f, 0.003375f, 0.735757f, 0.590976f, 2.309029f, 0.625211f, 2.705442f, 0.229493f, 1.072328f, 0.320631f, 1.844012f, 0.570837f, 1.928379f, 0.735594f, 2.222388f, +0.014857f, 0.073845f, 0.000364f, 0.100519f, 0.026479f, 0.186513f, 0.037648f, 0.206582f, 0.000287f, 0.002417f, 0.000539f, 0.003929f, 0.065260f, 0.397444f, 0.113022f, 0.432990f, +0.037383f, 0.170453f, 0.000925f, 0.236114f, 0.094438f, 0.610250f, 0.135705f, 0.687829f, 0.056187f, 0.434206f, 0.106627f, 0.718284f, 0.146328f, 0.817537f, 0.256121f, 0.906357f, +0.014206f, 0.071325f, 0.000414f, 0.088866f, 0.049436f, 0.351746f, 0.083647f, 0.356598f, 0.019280f, 0.164053f, 0.043081f, 0.244096f, 0.100916f, 0.620822f, 0.207987f, 0.619063f, +0.058584f, 0.320187f, 0.001852f, 0.398489f, 0.216544f, 1.677244f, 0.397616f, 1.698496f, 0.086255f, 0.798975f, 0.209161f, 1.187487f, 0.233539f, 1.563970f, 0.522329f, 1.557813f, +0.161972f, 0.365323f, 0.002653f, 0.481982f, 0.205048f, 0.655421f, 0.195142f, 0.703608f, 0.002366f, 0.009044f, 0.002973f, 0.014249f, 0.564311f, 1.559564f, 0.654159f, 1.646766f, +0.489339f, 1.012503f, 0.008101f, 1.359378f, 0.878094f, 2.574870f, 0.844575f, 2.812903f, 0.556350f, 1.951018f, 0.706683f, 3.128153f, 1.519273f, 3.851860f, 1.779926f, 4.138937f, +0.186704f, 0.425370f, 0.003640f, 0.513673f, 0.461496f, 1.490077f, 0.522665f, 1.464145f, 0.191665f, 0.740084f, 0.286666f, 1.067294f, 1.051963f, 2.936716f, 1.451192f, 2.838287f, +0.416679f, 1.033423f, 0.008815f, 1.246571f, 1.094008f, 3.845254f, 1.344577f, 3.774155f, 0.464063f, 1.950652f, 0.753218f, 2.809970f, 1.317492f, 4.003803f, 1.972340f, 3.865327f, +0.168111f, 0.410191f, 0.002819f, 0.581450f, 0.283816f, 0.981417f, 0.276473f, 1.131973f, 0.001957f, 0.008093f, 0.002518f, 0.013700f, 0.446407f, 1.334652f, 0.529683f, 1.514151f, +0.274967f, 0.615488f, 0.004660f, 0.887843f, 0.658014f, 2.087383f, 0.647819f, 2.450046f, 0.249153f, 0.945218f, 0.323939f, 1.628288f, 0.650672f, 1.784633f, 0.780277f, 2.060345f, +0.002886f, 0.007114f, 0.000058f, 0.009231f, 0.009515f, 0.033235f, 0.011030f, 0.035087f, 0.002362f, 0.009865f, 0.003615f, 0.015285f, 0.012396f, 0.037436f, 0.017503f, 0.038873f, +0.389153f, 1.044118f, 0.008427f, 1.353198f, 1.362584f, 5.181079f, 1.714151f, 5.463707f, 0.345417f, 1.570719f, 0.573863f, 2.431045f, 0.937826f, 3.083184f, 1.437066f, 3.198052f, +0.455982f, 1.531827f, 0.007240f, 1.927239f, 0.728954f, 3.470477f, 0.672461f, 3.552799f, 0.005702f, 0.032465f, 0.006947f, 0.048778f, 1.106234f, 4.553617f, 1.243033f, 4.585185f, +1.046867f, 3.226286f, 0.016800f, 4.130649f, 2.372236f, 10.360890f, 2.211700f, 10.793640f, 1.018964f, 5.322264f, 1.254602f, 8.137572f, 2.263276f, 8.546666f, 2.570245f, 8.757633f, +0.415119f, 1.408674f, 0.007844f, 1.622191f, 1.295753f, 6.231432f, 1.422489f, 5.838953f, 0.364829f, 2.098235f, 0.528927f, 2.885549f, 1.628695f, 6.772138f, 2.177887f, 6.241540f, +1.294074f, 4.780361f, 0.026536f, 5.498843f, 4.290557f, 22.461710f, 5.111517f, 21.023700f, 1.233853f, 7.724867f, 1.941236f, 10.611690f, 2.849218f, 12.896600f, 4.134572f, 11.872990f, +0.123930f, 0.825328f, 0.003415f, 0.897453f, 0.229652f, 2.167454f, 0.367703f, 1.917744f, 0.001843f, 0.020797f, 0.003896f, 0.027007f, 0.389123f, 3.175311f, 0.758896f, 2.763414f, +0.242101f, 1.479104f, 0.006743f, 1.636717f, 0.635928f, 5.506006f, 1.029048f, 4.957552f, 0.280187f, 2.901183f, 0.598762f, 3.833831f, 0.677418f, 5.071141f, 1.335222f, 4.491125f, +0.088063f, 0.592407f, 0.002888f, 0.589619f, 0.318630f, 3.037676f, 0.607117f, 2.460073f, 0.092022f, 1.049172f, 0.231557f, 1.247040f, 0.447170f, 3.685943f, 1.037834f, 2.936121f, +0.401849f, 2.942753f, 0.014302f, 2.925660f, 1.544405f, 16.028020f, 3.193425f, 12.965990f, 0.455564f, 5.654146f, 1.244011f, 6.713051f, 1.145097f, 10.274990f, 2.884076f, 8.175718f, +0.624332f, 1.886783f, 0.011517f, 1.988537f, 0.821799f, 3.519652f, 0.880726f, 3.018333f, 0.007022f, 0.035965f, 0.009938f, 0.045266f, 1.554883f, 5.757736f, 2.029744f, 4.856663f, +1.464447f, 4.060036f, 0.027302f, 4.354429f, 2.732365f, 10.735500f, 2.959476f, 9.368701f, 1.282024f, 6.023908f, 1.833796f, 7.715474f, 3.250148f, 11.040960f, 4.287932f, 9.477262f, +0.534811f, 1.632612f, 0.011740f, 1.574927f, 1.374512f, 5.946464f, 1.753004f, 4.667581f, 0.422739f, 2.187164f, 0.712010f, 2.519659f, 2.154026f, 8.057151f, 3.346217f, 6.220616f, +1.320747f, 4.389004f, 0.031464f, 4.229242f, 3.605561f, 16.980350f, 4.990185f, 13.313700f, 1.132607f, 6.378980f, 2.070150f, 7.340585f, 2.985174f, 12.155240f, 5.032482f, 9.374209f, +0.134803f, 0.241230f, 0.001466f, 0.343940f, 0.182164f, 0.461980f, 0.115123f, 0.535957f, 0.001168f, 0.003544f, 0.000975f, 0.006034f, 0.369390f, 0.809963f, 0.284349f, 0.924252f, +0.236979f, 0.389038f, 0.002605f, 0.564460f, 0.453930f, 1.056083f, 0.289927f, 1.246793f, 0.159886f, 0.444855f, 0.134862f, 0.770800f, 0.578686f, 1.164053f, 0.450206f, 1.351724f, +0.001642f, 0.002968f, 0.000021f, 0.003873f, 0.004332f, 0.011098f, 0.003258f, 0.011784f, 0.001000f, 0.003064f, 0.000993f, 0.004776f, 0.007276f, 0.016116f, 0.006665f, 0.016832f, +0.236708f, 0.465785f, 0.003325f, 0.607185f, 0.663407f, 1.850033f, 0.541436f, 1.962326f, 0.156441f, 0.521733f, 0.168615f, 0.812208f, 0.588663f, 1.419341f, 0.585198f, 1.480802f, +0.135102f, 0.332863f, 0.001392f, 0.421227f, 0.172877f, 0.603627f, 0.103463f, 0.621548f, 0.001258f, 0.005253f, 0.000994f, 0.007938f, 0.338230f, 1.021090f, 0.246564f, 1.034163f, +0.333374f, 0.753504f, 0.003471f, 0.970344f, 0.604675f, 1.936883f, 0.365739f, 2.029546f, 0.241609f, 0.925536f, 0.192993f, 1.423364f, 0.743754f, 2.059829f, 0.547959f, 2.122978f, +0.087248f, 0.217138f, 0.001070f, 0.251508f, 0.217986f, 0.768842f, 0.155252f, 0.724617f, 0.057093f, 0.240820f, 0.053700f, 0.333113f, 0.353243f, 1.077216f, 0.306444f, 0.998603f, +0.290846f, 0.787965f, 0.003869f, 0.911679f, 0.771864f, 2.963553f, 0.596566f, 2.789995f, 0.206482f, 0.948094f, 0.210755f, 1.309994f, 0.660817f, 2.193678f, 0.622110f, 2.031339f, +0.001187f, 0.005799f, 0.000021f, 0.006343f, 0.001761f, 0.012190f, 0.001829f, 0.010848f, 0.000013f, 0.000109f, 0.000018f, 0.000142f, 0.003847f, 0.023023f, 0.004867f, 0.020153f, +0.002493f, 0.011170f, 0.000045f, 0.012432f, 0.005241f, 0.033282f, 0.005502f, 0.030142f, 0.002148f, 0.016313f, 0.002978f, 0.021683f, 0.007198f, 0.039519f, 0.009204f, 0.035203f, +0.000598f, 0.002953f, 0.000013f, 0.002956f, 0.001733f, 0.012119f, 0.002143f, 0.009872f, 0.000466f, 0.003894f, 0.000760f, 0.004655f, 0.003136f, 0.018958f, 0.004722f, 0.015190f, +0.002920f, 0.015684f, 0.000067f, 0.015684f, 0.008984f, 0.068378f, 0.012051f, 0.055638f, 0.002465f, 0.022439f, 0.004367f, 0.026796f, 0.008588f, 0.056513f, 0.014032f, 0.045229f, +0.388910f, 0.861981f, 0.004654f, 0.913764f, 0.409754f, 1.287062f, 0.284891f, 1.110174f, 0.003257f, 0.012234f, 0.002990f, 0.015488f, 0.999500f, 2.714435f, 0.846462f, 2.302979f, +0.980469f, 1.993575f, 0.011859f, 2.150594f, 1.464275f, 4.219381f, 1.028916f, 3.703652f, 0.639102f, 2.202394f, 0.593071f, 2.837290f, 2.245509f, 5.594501f, 1.921945f, 4.830161f, +0.236321f, 0.529088f, 0.003366f, 0.513369f, 0.486155f, 1.542509f, 0.402245f, 1.217826f, 0.139088f, 0.527765f, 0.151979f, 0.611541f, 0.982212f, 2.694501f, 0.989896f, 2.092447f, +0.624085f, 1.521011f, 0.009645f, 1.474189f, 1.363704f, 4.710168f, 1.224462f, 3.714608f, 0.398489f, 1.646006f, 0.472520f, 1.905177f, 1.455609f, 4.346918f, 1.591984f, 3.371917f, +0.284825f, 0.723816f, 0.004166f, 0.770557f, 0.328255f, 1.182199f, 0.278942f, 1.024053f, 0.002475f, 0.010658f, 0.002777f, 0.013551f, 0.681132f, 2.120948f, 0.705020f, 1.807094f, +0.762302f, 1.777165f, 0.011269f, 1.925279f, 1.245307f, 4.114382f, 1.069496f, 3.626822f, 0.515539f, 2.036986f, 0.584713f, 2.635341f, 1.624532f, 4.640628f, 1.699416f, 4.023622f, +0.005347f, 0.013727f, 0.000093f, 0.013376f, 0.012033f, 0.043776f, 0.012169f, 0.034708f, 0.003265f, 0.014206f, 0.004361f, 0.016531f, 0.020681f, 0.065049f, 0.025474f, 0.050729f, +0.723924f, 2.022943f, 0.013674f, 1.968995f, 1.730335f, 6.852491f, 1.898894f, 5.427068f, 0.479583f, 2.271332f, 0.695045f, 2.640126f, 1.571139f, 5.379639f, 2.100167f, 4.190721f, +0.478300f, 1.673491f, 0.006625f, 1.581243f, 0.521971f, 2.588195f, 0.420047f, 1.989885f, 0.004464f, 0.026471f, 0.004744f, 0.029870f, 1.045006f, 4.480120f, 1.024329f, 3.387970f, +1.796838f, 5.767424f, 0.025154f, 5.545580f, 2.779520f, 12.643590f, 2.260596f, 9.892159f, 1.305343f, 7.101067f, 1.402028f, 8.154017f, 3.498443f, 13.759280f, 3.465746f, 10.588520f, +0.476123f, 1.682748f, 0.007848f, 1.455325f, 1.014527f, 5.081486f, 0.971572f, 3.575917f, 0.312310f, 1.870728f, 0.394980f, 1.932122f, 1.682313f, 7.285411f, 1.962397f, 5.042775f, +1.490399f, 5.734105f, 0.026660f, 4.953656f, 3.373274f, 18.392560f, 3.505682f, 12.928790f, 1.060609f, 6.915828f, 1.455643f, 7.134890f, 2.955214f, 13.931560f, 3.740922f, 9.632401f, +0.148289f, 1.028539f, 0.003565f, 0.839954f, 0.187585f, 1.843904f, 0.262005f, 1.225262f, 0.001646f, 0.019344f, 0.003035f, 0.018866f, 0.419314f, 3.563692f, 0.713378f, 2.329215f, +0.474019f, 3.016189f, 0.011517f, 2.506588f, 0.849964f, 7.664624f, 1.199812f, 5.182878f, 0.409444f, 4.415532f, 0.763284f, 4.382182f, 1.194469f, 9.312908f, 2.053791f, 6.194186f, +0.115218f, 0.807253f, 0.003296f, 0.603406f, 0.284583f, 2.825694f, 0.473020f, 1.718625f, 0.089860f, 1.067048f, 0.197251f, 0.952505f, 0.526891f, 4.523323f, 1.066744f, 2.706030f, +0.527943f, 4.026610f, 0.016391f, 3.006486f, 1.385095f, 14.971310f, 2.498389f, 9.095681f, 0.446706f, 5.774320f, 1.064097f, 5.148769f, 1.354835f, 12.661550f, 2.976703f, 7.566255f, +0.794481f, 2.500639f, 0.012785f, 1.979303f, 0.713884f, 3.184368f, 0.667402f, 2.050880f, 0.006669f, 0.035576f, 0.008234f, 0.033628f, 1.781907f, 6.872278f, 2.029147f, 4.353480f, +3.049349f, 8.804904f, 0.049592f, 7.092112f, 3.883886f, 15.893210f, 3.669672f, 10.416410f, 1.992406f, 9.750374f, 2.486093f, 9.378970f, 6.094754f, 21.563610f, 7.014324f, 13.901030f, +0.744154f, 2.365959f, 0.014250f, 1.714092f, 1.305587f, 5.882709f, 1.452529f, 3.467847f, 0.439019f, 2.365668f, 0.645033f, 2.046745f, 2.699190f, 10.515390f, 3.657812f, 6.097151f, +1.845351f, 6.386850f, 0.038349f, 4.622030f, 3.438954f, 16.867920f, 4.151978f, 9.932609f, 1.181099f, 6.928195f, 1.883189f, 5.987549f, 3.756200f, 15.929570f, 5.523903f, 9.226239f, +0.118026f, 0.214930f, 0.001757f, 0.381387f, 0.154465f, 0.398636f, 0.133619f, 0.575574f, 0.001392f, 0.004297f, 0.001591f, 0.009107f, 0.355306f, 0.792812f, 0.374377f, 1.125937f, +0.284276f, 0.474909f, 0.004278f, 0.857570f, 0.527361f, 1.248548f, 0.461049f, 1.834509f, 0.261050f, 0.739128f, 0.301399f, 1.593901f, 0.762630f, 1.561104f, 0.812123f, 2.256136f, +0.002202f, 0.004051f, 0.000039f, 0.006580f, 0.005628f, 0.014671f, 0.005793f, 0.019388f, 0.001826f, 0.005693f, 0.002482f, 0.011042f, 0.010722f, 0.024167f, 0.013444f, 0.031414f, +0.245045f, 0.490688f, 0.004712f, 0.796085f, 0.665121f, 1.887505f, 0.743032f, 2.491712f, 0.220427f, 0.748085f, 0.325200f, 1.449400f, 0.669483f, 1.642658f, 0.910993f, 2.132926f, +0.176736f, 0.443115f, 0.002492f, 0.697887f, 0.219022f, 0.778230f, 0.179422f, 0.997314f, 0.002240f, 0.009517f, 0.002423f, 0.017901f, 0.486088f, 1.493328f, 0.485034f, 1.882340f, +0.597514f, 1.374327f, 0.008515f, 2.202663f, 1.049606f, 3.421340f, 0.868992f, 4.461797f, 0.589403f, 2.297632f, 0.644437f, 4.397656f, 1.464488f, 4.127394f, 1.476876f, 5.294298f, +0.174855f, 0.442839f, 0.002934f, 0.638381f, 0.423096f, 1.518573f, 0.412465f, 1.781252f, 0.155737f, 0.668477f, 0.200502f, 1.150810f, 0.777744f, 2.413535f, 0.923535f, 2.784595f, +0.449863f, 1.240260f, 0.008192f, 1.785936f, 1.156238f, 4.517592f, 1.223220f, 5.293169f, 0.434692f, 2.031140f, 0.607320f, 3.492818f, 1.122895f, 3.793317f, 1.446988f, 4.371664f, +0.043396f, 0.215691f, 0.001062f, 0.293602f, 0.062339f, 0.439103f, 0.088635f, 0.486351f, 0.000654f, 0.005508f, 0.001228f, 0.008954f, 0.154473f, 0.940769f, 0.267528f, 1.024909f, +0.124840f, 0.569225f, 0.003088f, 0.788499f, 0.254199f, 1.642608f, 0.365278f, 1.851429f, 0.146420f, 1.131506f, 0.277861f, 1.871791f, 0.396007f, 2.212498f, 0.693140f, 2.452871f, +0.033512f, 0.168250f, 0.000976f, 0.209627f, 0.093994f, 0.668786f, 0.159041f, 0.678010f, 0.035489f, 0.301979f, 0.079301f, 0.449318f, 0.192916f, 1.186791f, 0.397598f, 1.183429f, +0.126206f, 0.689769f, 0.003989f, 0.858453f, 0.376004f, 2.912340f, 0.690414f, 2.949241f, 0.144999f, 1.343117f, 0.351610f, 1.996224f, 0.407712f, 2.730382f, 0.911883f, 2.719634f, +0.599494f, 1.352141f, 0.009820f, 1.783924f, 0.611712f, 1.955294f, 0.582161f, 2.099046f, 0.006833f, 0.026120f, 0.008588f, 0.041154f, 1.692618f, 4.677824f, 1.962112f, 4.939382f, +2.070731f, 4.284602f, 0.034282f, 5.752470f, 2.995027f, 8.782438f, 2.880699f, 9.594329f, 1.837143f, 6.442520f, 2.333560f, 10.329580f, 5.210086f, 13.209290f, 6.103949f, 14.193780f, +0.558083f, 1.271488f, 0.010879f, 1.535437f, 1.111883f, 3.590047f, 1.259258f, 3.527570f, 0.447062f, 1.726265f, 0.668655f, 2.489489f, 2.548247f, 7.113818f, 3.515327f, 6.875386f, +1.137456f, 2.821055f, 0.024063f, 3.402911f, 2.407131f, 8.460659f, 2.958453f, 8.304220f, 0.988532f, 4.155216f, 1.604481f, 5.985708f, 2.914583f, 8.857296f, 4.363251f, 8.550956f, +0.416808f, 1.017010f, 0.006989f, 1.441624f, 0.567181f, 1.961277f, 0.552508f, 2.262150f, 0.003786f, 0.015658f, 0.004871f, 0.026505f, 0.896944f, 2.681652f, 1.064267f, 3.042312f, +0.779450f, 1.744728f, 0.013208f, 2.516773f, 1.503451f, 4.769313f, 1.480155f, 5.597936f, 0.551131f, 2.090837f, 0.716559f, 3.601800f, 1.494737f, 4.099699f, 1.792469f, 4.733069f, +0.005780f, 0.014245f, 0.000115f, 0.018483f, 0.015357f, 0.053640f, 0.017802f, 0.056628f, 0.003690f, 0.015414f, 0.005649f, 0.023883f, 0.020114f, 0.060746f, 0.028402f, 0.063079f, +0.711619f, 1.909312f, 0.015409f, 2.474506f, 2.008336f, 7.636480f, 2.526517f, 8.053051f, 0.492891f, 2.241333f, 0.818871f, 3.468972f, 1.389775f, 4.569006f, 2.129606f, 4.739230f, +0.822758f, 2.763973f, 0.013064f, 3.477440f, 1.060156f, 5.047298f, 0.977995f, 5.167024f, 0.008028f, 0.045710f, 0.009781f, 0.068679f, 1.617581f, 6.658487f, 1.817615f, 6.704647f, +2.159652f, 6.655723f, 0.034657f, 8.521395f, 3.944538f, 17.228010f, 3.677600f, 17.947600f, 1.640333f, 8.567801f, 2.019664f, 13.099890f, 3.783772f, 14.288420f, 4.296968f, 14.641120f, +0.604918f, 2.052743f, 0.011431f, 2.363883f, 1.521921f, 7.319102f, 1.670778f, 6.858118f, 0.414853f, 2.385936f, 0.601451f, 3.281203f, 1.923353f, 7.997330f, 2.571902f, 7.370738f, +1.722149f, 6.361689f, 0.035314f, 7.317842f, 4.602261f, 24.093520f, 5.482863f, 22.551040f, 1.281315f, 8.022012f, 2.015908f, 11.019880f, 3.072788f, 13.908550f, 4.459000f, 12.804630f, +0.198457f, 1.321657f, 0.005469f, 1.437156f, 0.296421f, 2.797614f, 0.474608f, 2.475305f, 0.002303f, 0.025989f, 0.004869f, 0.033748f, 0.504980f, 4.120724f, 0.984849f, 3.586190f, +0.443259f, 2.708066f, 0.012346f, 2.996637f, 0.938457f, 8.125368f, 1.518596f, 7.315998f, 0.400303f, 4.144920f, 0.855452f, 5.477394f, 1.005107f, 7.524218f, 1.981112f, 6.663628f, +0.113890f, 0.766148f, 0.003735f, 0.762541f, 0.332142f, 3.166501f, 0.632864f, 2.564402f, 0.092868f, 1.058813f, 0.233685f, 1.258500f, 0.468663f, 3.863100f, 1.087715f, 3.077240f, +0.474616f, 3.475632f, 0.016892f, 3.455443f, 1.470235f, 15.258280f, 3.040062f, 12.343300f, 0.419866f, 5.211077f, 1.146528f, 6.187004f, 1.096017f, 9.834592f, 2.760461f, 7.825298f, +1.266897f, 3.828666f, 0.023370f, 4.035146f, 1.344118f, 5.756667f, 1.440496f, 4.936720f, 0.011119f, 0.056949f, 0.015736f, 0.071677f, 2.556927f, 9.468311f, 3.337812f, 7.986541f, +3.397565f, 9.419414f, 0.063342f, 10.102420f, 5.109502f, 20.075310f, 5.534197f, 17.519400f, 2.320977f, 10.905680f, 3.319906f, 13.968100f, 6.110718f, 20.758510f, 8.061894f, 17.818540f, +0.876448f, 2.675523f, 0.019240f, 2.580989f, 1.815600f, 7.854714f, 2.315552f, 6.165431f, 0.540604f, 2.796970f, 0.910526f, 3.222167f, 2.860697f, 10.700460f, 4.444009f, 8.261411f, +1.976665f, 6.568699f, 0.047089f, 6.329595f, 4.349427f, 20.483570f, 6.019714f, 16.060460f, 1.322736f, 7.449810f, 2.417663f, 8.572838f, 3.620580f, 14.742530f, 6.103666f, 11.369550f, +}; + +static const float acceptor_me2x3acc3[16384] = { +0.036350f, 0.044079f, 0.017386f, 0.046962f, 0.031088f, 0.045756f, 0.024587f, 0.034370f, 0.025195f, 0.034486f, 0.017536f, 0.034052f, 0.076383f, 0.099866f, 0.073811f, 0.086068f, +0.047550f, 0.057744f, 0.028229f, 0.064311f, 0.043775f, 0.064522f, 0.042972f, 0.050664f, 0.064784f, 0.088804f, 0.055968f, 0.091663f, 0.081233f, 0.106361f, 0.097434f, 0.095823f, +0.243824f, 0.254641f, 0.117527f, 0.282971f, 0.147646f, 0.187151f, 0.117680f, 0.146631f, 0.160177f, 0.188823f, 0.112355f, 0.194472f, 0.391222f, 0.440520f, 0.380999f, 0.395997f, +0.023217f, 0.027583f, 0.011848f, 0.035175f, 0.032216f, 0.046454f, 0.027185f, 0.041768f, 0.034881f, 0.046775f, 0.025903f, 0.055285f, 0.052301f, 0.066993f, 0.053925f, 0.069110f, +0.845246f, 1.058768f, 0.485224f, 0.886728f, 0.769988f, 1.170633f, 0.730911f, 0.691241f, 0.573267f, 0.810543f, 0.478904f, 0.629150f, 1.393519f, 1.882007f, 1.616261f, 1.275036f, +0.529473f, 0.664187f, 0.377275f, 0.581493f, 0.519190f, 0.790481f, 0.611732f, 0.487938f, 0.705887f, 0.999501f, 0.731951f, 0.811009f, 0.709682f, 0.959845f, 1.021687f, 0.679778f, +2.480488f, 2.675926f, 1.435063f, 2.337585f, 1.599884f, 2.094809f, 1.530535f, 1.290199f, 1.594530f, 1.941649f, 1.342452f, 1.571999f, 3.122639f, 3.632029f, 3.650020f, 2.566577f, +0.344638f, 0.422943f, 0.211095f, 0.423992f, 0.509371f, 0.758703f, 0.515904f, 0.536249f, 0.506656f, 0.701830f, 0.451605f, 0.652073f, 0.609120f, 0.805956f, 0.753799f, 0.653580f, +0.000004f, 0.000005f, 0.000008f, 0.000022f, 0.000003f, 0.000004f, 0.000009f, 0.000013f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000003f, 0.000011f, 0.000013f, +0.000003f, 0.000004f, 0.000008f, 0.000020f, 0.000002f, 0.000004f, 0.000010f, 0.000013f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000002f, 0.000009f, 0.000009f, +0.000022f, 0.000023f, 0.000046f, 0.000113f, 0.000011f, 0.000014f, 0.000038f, 0.000048f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000011f, 0.000012f, 0.000046f, 0.000049f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.441540f, 0.580346f, 0.242142f, 0.576465f, 0.392887f, 0.626763f, 0.356277f, 0.438944f, 0.287594f, 0.426675f, 0.229515f, 0.392801f, 0.643154f, 0.911428f, 0.712614f, 0.732353f, +0.289517f, 0.381084f, 0.197074f, 0.395705f, 0.277302f, 0.443015f, 0.312126f, 0.324331f, 0.370682f, 0.550742f, 0.367189f, 0.530015f, 0.342855f, 0.486571f, 0.471526f, 0.408705f, +1.929791f, 2.184471f, 1.066560f, 2.263273f, 1.215789f, 1.670371f, 1.111102f, 1.220176f, 1.191355f, 1.522222f, 0.958182f, 1.461696f, 2.146398f, 2.619611f, 2.396761f, 2.195528f, +0.175285f, 0.225716f, 0.102565f, 0.268371f, 0.253054f, 0.395503f, 0.244844f, 0.331544f, 0.247474f, 0.359707f, 0.210726f, 0.396378f, 0.273716f, 0.380021f, 0.323589f, 0.365504f, +0.019208f, 0.024062f, 0.010894f, 0.022668f, 0.014719f, 0.022378f, 0.013803f, 0.014864f, 0.014026f, 0.019833f, 0.011577f, 0.017316f, 0.037260f, 0.050324f, 0.042695f, 0.038350f, +0.016945f, 0.021258f, 0.011929f, 0.020935f, 0.013977f, 0.021281f, 0.016270f, 0.014776f, 0.024323f, 0.034443f, 0.024918f, 0.031436f, 0.026723f, 0.036145f, 0.038009f, 0.028794f, +0.137628f, 0.148482f, 0.078666f, 0.145900f, 0.074667f, 0.097772f, 0.070571f, 0.067735f, 0.095254f, 0.115998f, 0.079231f, 0.105638f, 0.203849f, 0.237119f, 0.235411f, 0.188478f, +0.013467f, 0.016528f, 0.008149f, 0.018637f, 0.016742f, 0.024939f, 0.016753f, 0.019827f, 0.021315f, 0.029529f, 0.018771f, 0.030860f, 0.028004f, 0.037056f, 0.034239f, 0.033802f, +0.439889f, 0.569215f, 0.299447f, 0.421534f, 0.359023f, 0.563863f, 0.404129f, 0.294408f, 0.314317f, 0.459095f, 0.311371f, 0.315099f, 0.669467f, 0.934012f, 0.920758f, 0.559527f, +0.185830f, 0.240811f, 0.157017f, 0.186422f, 0.163258f, 0.256777f, 0.228102f, 0.140151f, 0.261010f, 0.381786f, 0.320939f, 0.273924f, 0.229928f, 0.321250f, 0.392521f, 0.201176f, +1.378923f, 1.536710f, 0.946001f, 1.187005f, 0.796836f, 1.077804f, 0.903945f, 0.586975f, 0.933870f, 1.174734f, 0.932331f, 0.840985f, 1.602435f, 1.925409f, 2.221119f, 1.203082f, +0.196878f, 0.249591f, 0.142997f, 0.221244f, 0.260703f, 0.401141f, 0.313111f, 0.250703f, 0.304928f, 0.436346f, 0.322300f, 0.358478f, 0.321212f, 0.439051f, 0.471370f, 0.314825f, +0.000025f, 0.000032f, 0.000061f, 0.000131f, 0.000016f, 0.000024f, 0.000064f, 0.000070f, 0.000000f, 0.000001f, 0.000001f, 0.000002f, 0.000015f, 0.000020f, 0.000074f, 0.000068f, +0.000014f, 0.000018f, 0.000043f, 0.000077f, 0.000010f, 0.000015f, 0.000048f, 0.000044f, 0.000000f, 0.000001f, 0.000002f, 0.000002f, 0.000007f, 0.000009f, 0.000042f, 0.000033f, +0.000152f, 0.000164f, 0.000372f, 0.000706f, 0.000067f, 0.000089f, 0.000274f, 0.000269f, 0.000002f, 0.000003f, 0.000008f, 0.000011f, 0.000069f, 0.000081f, 0.000344f, 0.000281f, +0.000001f, 0.000001f, 0.000003f, 0.000006f, 0.000001f, 0.000002f, 0.000005f, 0.000006f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000004f, 0.000004f, +0.425449f, 0.577669f, 0.276672f, 0.507379f, 0.339174f, 0.558950f, 0.364721f, 0.346135f, 0.291949f, 0.447446f, 0.276285f, 0.364236f, 0.572068f, 0.837472f, 0.751632f, 0.595026f, +0.188132f, 0.255814f, 0.151858f, 0.234878f, 0.161443f, 0.266440f, 0.215484f, 0.172479f, 0.253771f, 0.389496f, 0.298090f, 0.331444f, 0.205662f, 0.301513f, 0.335404f, 0.223942f, +1.986234f, 2.322637f, 1.301736f, 2.127841f, 1.121130f, 1.591204f, 1.214981f, 1.027785f, 1.291850f, 1.705156f, 1.232074f, 1.447805f, 2.039324f, 2.571151f, 2.700340f, 1.905447f, +0.185394f, 0.246620f, 0.128638f, 0.259279f, 0.239795f, 0.387161f, 0.275128f, 0.286980f, 0.275760f, 0.414061f, 0.278443f, 0.403453f, 0.267243f, 0.383291f, 0.374643f, 0.325972f, +0.004930f, 0.005477f, 0.002425f, 0.006531f, 0.004481f, 0.006043f, 0.003645f, 0.005080f, 0.003602f, 0.004518f, 0.002579f, 0.004992f, 0.009529f, 0.011416f, 0.009473f, 0.011011f, +0.004830f, 0.005374f, 0.002950f, 0.006698f, 0.004725f, 0.006382f, 0.004772f, 0.005608f, 0.006937f, 0.008713f, 0.006165f, 0.010065f, 0.007590f, 0.009106f, 0.009365f, 0.009181f, +0.023858f, 0.022830f, 0.011830f, 0.028393f, 0.015354f, 0.017832f, 0.012589f, 0.015636f, 0.016524f, 0.017848f, 0.011923f, 0.020572f, 0.035215f, 0.036332f, 0.035279f, 0.036551f, +0.002212f, 0.002408f, 0.001161f, 0.003436f, 0.003262f, 0.004309f, 0.002831f, 0.004336f, 0.003503f, 0.004304f, 0.002676f, 0.005694f, 0.004583f, 0.005379f, 0.004861f, 0.006210f, +0.164161f, 0.188414f, 0.096944f, 0.176597f, 0.158929f, 0.221392f, 0.155193f, 0.146303f, 0.117379f, 0.152066f, 0.100872f, 0.132097f, 0.248967f, 0.308086f, 0.297050f, 0.233591f, +0.077016f, 0.088522f, 0.056453f, 0.086734f, 0.080259f, 0.111965f, 0.097279f, 0.077346f, 0.108247f, 0.140439f, 0.115466f, 0.127530f, 0.094960f, 0.117680f, 0.140633f, 0.093272f, +0.347589f, 0.343578f, 0.206866f, 0.335893f, 0.238258f, 0.285842f, 0.234473f, 0.197025f, 0.235562f, 0.262825f, 0.204015f, 0.238139f, 0.402523f, 0.428984f, 0.484009f, 0.339256f, +0.047018f, 0.052870f, 0.029626f, 0.059315f, 0.073853f, 0.100792f, 0.076947f, 0.079727f, 0.072872f, 0.092492f, 0.066818f, 0.096172f, 0.076444f, 0.092678f, 0.097317f, 0.084110f, +0.000000f, 0.000001f, 0.000001f, 0.000003f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, +0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, +0.000002f, 0.000002f, 0.000004f, 0.000010f, 0.000001f, 0.000001f, 0.000003f, 0.000004f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000004f, 0.000004f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.068159f, 0.082084f, 0.038451f, 0.091249f, 0.064454f, 0.094212f, 0.060126f, 0.073841f, 0.046803f, 0.063623f, 0.038423f, 0.065550f, 0.091328f, 0.118587f, 0.104096f, 0.106639f, +0.033472f, 0.040369f, 0.023438f, 0.046911f, 0.034071f, 0.049874f, 0.039450f, 0.040863f, 0.045180f, 0.061506f, 0.046039f, 0.066243f, 0.036463f, 0.047414f, 0.051587f, 0.044571f, +0.214933f, 0.222926f, 0.122199f, 0.258484f, 0.143906f, 0.181158f, 0.135290f, 0.148098f, 0.139887f, 0.163771f, 0.115737f, 0.175994f, 0.219909f, 0.245919f, 0.252607f, 0.230662f, +0.019007f, 0.022426f, 0.011441f, 0.029841f, 0.029162f, 0.041761f, 0.029025f, 0.039178f, 0.028291f, 0.037677f, 0.024781f, 0.046465f, 0.027303f, 0.034733f, 0.033204f, 0.037386f, +0.022710f, 0.028287f, 0.010148f, 0.033733f, 0.015326f, 0.023169f, 0.011325f, 0.019480f, 0.020172f, 0.028361f, 0.013118f, 0.031346f, 0.048390f, 0.064985f, 0.043689f, 0.062689f, +0.018143f, 0.022631f, 0.010063f, 0.028212f, 0.013179f, 0.019953f, 0.012088f, 0.017537f, 0.031678f, 0.044603f, 0.025570f, 0.051533f, 0.031429f, 0.042269f, 0.035222f, 0.042625f, +0.151193f, 0.162189f, 0.068091f, 0.201740f, 0.072241f, 0.094057f, 0.053798f, 0.082486f, 0.127289f, 0.154129f, 0.083422f, 0.177682f, 0.245993f, 0.284514f, 0.223831f, 0.286277f, +0.014170f, 0.017292f, 0.006756f, 0.024683f, 0.015515f, 0.022979f, 0.012232f, 0.023126f, 0.027283f, 0.037580f, 0.018930f, 0.049717f, 0.032368f, 0.042587f, 0.031181f, 0.049175f, +0.387044f, 0.497985f, 0.207595f, 0.466834f, 0.278207f, 0.434453f, 0.246743f, 0.287149f, 0.336411f, 0.488571f, 0.262579f, 0.424485f, 0.647045f, 0.897597f, 0.701182f, 0.680674f, +0.148070f, 0.190789f, 0.098578f, 0.186966f, 0.114566f, 0.179168f, 0.126122f, 0.123791f, 0.252986f, 0.367945f, 0.245098f, 0.334181f, 0.201249f, 0.279582f, 0.270698f, 0.221631f, +1.127349f, 1.249205f, 0.609382f, 1.221471f, 0.573742f, 0.771633f, 0.512824f, 0.531960f, 0.928732f, 1.161627f, 0.730556f, 1.052700f, 1.439090f, 1.719308f, 1.571660f, 1.359925f, +0.154169f, 0.194336f, 0.088228f, 0.218065f, 0.179794f, 0.275074f, 0.170140f, 0.217621f, 0.290458f, 0.413276f, 0.241895f, 0.429794f, 0.276300f, 0.375516f, 0.319471f, 0.340856f, +0.000007f, 0.000009f, 0.000013f, 0.000046f, 0.000004f, 0.000006f, 0.000012f, 0.000022f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000005f, 0.000006f, 0.000018f, 0.000026f, +0.000004f, 0.000004f, 0.000008f, 0.000024f, 0.000002f, 0.000003f, 0.000008f, 0.000012f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000003f, 0.000009f, 0.000011f, +0.000039f, 0.000042f, 0.000076f, 0.000229f, 0.000015f, 0.000020f, 0.000049f, 0.000077f, 0.000001f, 0.000001f, 0.000002f, 0.000004f, 0.000020f, 0.000023f, 0.000077f, 0.000100f, +0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, +0.310022f, 0.418551f, 0.158851f, 0.465362f, 0.217669f, 0.356674f, 0.184423f, 0.279597f, 0.258785f, 0.394362f, 0.192961f, 0.406375f, 0.457913f, 0.666544f, 0.474045f, 0.599492f, +0.124150f, 0.167853f, 0.078958f, 0.195091f, 0.093828f, 0.153969f, 0.098675f, 0.126171f, 0.203709f, 0.310881f, 0.188536f, 0.334881f, 0.149082f, 0.217321f, 0.191566f, 0.204324f, +1.344863f, 1.563696f, 0.694464f, 1.813422f, 0.668547f, 0.943464f, 0.570855f, 0.771420f, 1.064009f, 1.396434f, 0.799557f, 1.500913f, 1.516780f, 1.901461f, 1.582466f, 1.783799f, +0.120233f, 0.159031f, 0.065732f, 0.211646f, 0.136962f, 0.219874f, 0.123815f, 0.206311f, 0.217544f, 0.324790f, 0.173074f, 0.400609f, 0.190382f, 0.271501f, 0.210289f, 0.292288f, +0.041624f, 0.050475f, 0.019908f, 0.053776f, 0.044059f, 0.064847f, 0.034845f, 0.048710f, 0.035672f, 0.048827f, 0.024828f, 0.048212f, 0.097631f, 0.127647f, 0.094344f, 0.110010f, +0.046012f, 0.055876f, 0.027315f, 0.062231f, 0.052425f, 0.077271f, 0.051463f, 0.060675f, 0.077511f, 0.106249f, 0.066963f, 0.109670f, 0.087740f, 0.114881f, 0.105239f, 0.103499f, +0.209913f, 0.219226f, 0.101181f, 0.243616f, 0.157318f, 0.199411f, 0.125389f, 0.156236f, 0.170505f, 0.200998f, 0.119600f, 0.207011f, 0.375953f, 0.423327f, 0.366129f, 0.380542f, +0.021525f, 0.025573f, 0.010985f, 0.032612f, 0.036966f, 0.053303f, 0.031193f, 0.047926f, 0.039985f, 0.053620f, 0.029694f, 0.063374f, 0.054124f, 0.069329f, 0.055805f, 0.071519f, +0.737929f, 0.924342f, 0.423617f, 0.774144f, 0.831970f, 1.264867f, 0.789747f, 0.746884f, 0.618818f, 0.874947f, 0.516956f, 0.679140f, 1.357974f, 1.834002f, 1.575035f, 1.242513f, +0.390616f, 0.490001f, 0.278333f, 0.428994f, 0.474050f, 0.721755f, 0.558547f, 0.445516f, 0.643896f, 0.911724f, 0.667670f, 0.739785f, 0.584410f, 0.790414f, 0.841340f, 0.559784f, +1.628124f, 1.756404f, 0.941936f, 1.534326f, 1.299664f, 1.701715f, 1.243329f, 1.048092f, 1.294067f, 1.575777f, 1.089489f, 1.275782f, 2.287805f, 2.661011f, 2.674192f, 1.880406f, +0.243606f, 0.298955f, 0.149211f, 0.299696f, 0.445605f, 0.663724f, 0.451321f, 0.469118f, 0.442803f, 0.613380f, 0.394690f, 0.569893f, 0.480588f, 0.635890f, 0.594739f, 0.515667f, +0.000025f, 0.000030f, 0.000051f, 0.000141f, 0.000022f, 0.000032f, 0.000073f, 0.000105f, 0.000000f, 0.000001f, 0.000001f, 0.000003f, 0.000018f, 0.000024f, 0.000075f, 0.000089f, +0.000017f, 0.000021f, 0.000045f, 0.000104f, 0.000016f, 0.000024f, 0.000069f, 0.000083f, 0.000001f, 0.000001f, 0.000002f, 0.000004f, 0.000010f, 0.000014f, 0.000053f, 0.000053f, +0.000105f, 0.000110f, 0.000217f, 0.000535f, 0.000064f, 0.000082f, 0.000221f, 0.000281f, 0.000002f, 0.000002f, 0.000006f, 0.000010f, 0.000058f, 0.000066f, 0.000243f, 0.000258f, +0.000001f, 0.000001f, 0.000002f, 0.000005f, 0.000001f, 0.000002f, 0.000004f, 0.000006f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000003f, 0.000003f, +0.537709f, 0.706748f, 0.294881f, 0.702023f, 0.592158f, 0.944654f, 0.536980f, 0.661574f, 0.433043f, 0.642464f, 0.345591f, 0.591458f, 0.874258f, 1.238931f, 0.968677f, 0.995509f, +0.297939f, 0.392169f, 0.202807f, 0.407216f, 0.353182f, 0.564239f, 0.397534f, 0.413079f, 0.471659f, 0.700769f, 0.467214f, 0.674395f, 0.393831f, 0.558916f, 0.541633f, 0.469471f, +1.766879f, 2.000059f, 0.976521f, 2.072208f, 1.377675f, 1.892787f, 1.259049f, 1.382647f, 1.348688f, 1.723250f, 1.084722f, 1.654730f, 2.193581f, 2.677196f, 2.449447f, 2.243791f, +0.172829f, 0.222553f, 0.101128f, 0.264610f, 0.308799f, 0.482627f, 0.298779f, 0.404579f, 0.301699f, 0.438523f, 0.256898f, 0.483229f, 0.301243f, 0.418239f, 0.356132f, 0.402262f, +0.027198f, 0.034071f, 0.015425f, 0.032096f, 0.025793f, 0.039216f, 0.024189f, 0.026047f, 0.024556f, 0.034722f, 0.020267f, 0.030316f, 0.058888f, 0.079536f, 0.067479f, 0.060611f, +0.020275f, 0.025435f, 0.014273f, 0.025048f, 0.020697f, 0.031514f, 0.024093f, 0.021881f, 0.035984f, 0.050955f, 0.036864f, 0.046507f, 0.035690f, 0.048274f, 0.050763f, 0.038456f, +0.146509f, 0.158064f, 0.083742f, 0.155315f, 0.098374f, 0.128815f, 0.092978f, 0.089242f, 0.125376f, 0.152680f, 0.104286f, 0.139044f, 0.242223f, 0.281756f, 0.279726f, 0.223958f, +0.015438f, 0.018947f, 0.009342f, 0.021365f, 0.023754f, 0.035383f, 0.023769f, 0.028131f, 0.030214f, 0.041855f, 0.026607f, 0.043742f, 0.035835f, 0.047418f, 0.043813f, 0.043253f, +0.474865f, 0.614474f, 0.323257f, 0.455051f, 0.479669f, 0.753344f, 0.539932f, 0.393341f, 0.419536f, 0.612778f, 0.415603f, 0.420580f, 0.806683f, 1.125451f, 1.109480f, 0.674209f, +0.169518f, 0.219674f, 0.143235f, 0.170059f, 0.184319f, 0.289901f, 0.257527f, 0.158230f, 0.294397f, 0.430622f, 0.361991f, 0.308962f, 0.234121f, 0.327109f, 0.399680f, 0.204845f, +1.119143f, 1.247204f, 0.767781f, 0.963381f, 0.800400f, 1.082625f, 0.907987f, 0.589600f, 0.937143f, 1.178851f, 0.935599f, 0.843933f, 1.451688f, 1.744278f, 2.012170f, 1.089903f, +0.172074f, 0.218147f, 0.124982f, 0.193371f, 0.282005f, 0.433919f, 0.338695f, 0.271188f, 0.329526f, 0.471546f, 0.348300f, 0.387396f, 0.313370f, 0.428333f, 0.459863f, 0.307139f, +0.000196f, 0.000247f, 0.000477f, 0.001016f, 0.000152f, 0.000232f, 0.000613f, 0.000675f, 0.000004f, 0.000005f, 0.000014f, 0.000021f, 0.000131f, 0.000178f, 0.000644f, 0.000592f, +0.000093f, 0.000117f, 0.000281f, 0.000504f, 0.000078f, 0.000119f, 0.000389f, 0.000361f, 0.000004f, 0.000005f, 0.000016f, 0.000020f, 0.000050f, 0.000069f, 0.000308f, 0.000239f, +0.000885f, 0.000959f, 0.002173f, 0.004121f, 0.000487f, 0.000640f, 0.001977f, 0.001940f, 0.000016f, 0.000020f, 0.000059f, 0.000080f, 0.000452f, 0.000527f, 0.002240f, 0.001834f, +0.000007f, 0.000008f, 0.000017f, 0.000041f, 0.000008f, 0.000013f, 0.000036f, 0.000044f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000005f, 0.000006f, 0.000025f, 0.000025f, +0.640649f, 0.869866f, 0.416618f, 0.764022f, 0.632103f, 1.041690f, 0.679715f, 0.645077f, 0.543569f, 0.833081f, 0.514404f, 0.678156f, 0.961542f, 1.407637f, 1.263356f, 1.000129f, +0.239393f, 0.325516f, 0.193235f, 0.298876f, 0.254250f, 0.419604f, 0.339355f, 0.271630f, 0.399267f, 0.612809f, 0.468996f, 0.521473f, 0.292112f, 0.428254f, 0.476390f, 0.318076f, +2.248652f, 2.629500f, 1.473720f, 2.408968f, 1.570869f, 2.229512f, 1.702369f, 1.440080f, 1.808331f, 2.386875f, 1.724657f, 2.026636f, 2.577063f, 3.249126f, 3.412380f, 2.407886f, +0.226027f, 0.300672f, 0.156832f, 0.316106f, 0.361825f, 0.584184f, 0.415137f, 0.433020f, 0.415691f, 0.624171f, 0.419735f, 0.608180f, 0.363679f, 0.521604f, 0.509835f, 0.443601f, +0.049872f, 0.055413f, 0.024538f, 0.066070f, 0.056101f, 0.075657f, 0.045643f, 0.063601f, 0.045059f, 0.056511f, 0.032262f, 0.062448f, 0.107606f, 0.128909f, 0.106967f, 0.124333f, +0.041288f, 0.045942f, 0.025215f, 0.057262f, 0.049995f, 0.067519f, 0.050487f, 0.059334f, 0.073327f, 0.092098f, 0.065167f, 0.106389f, 0.072426f, 0.086890f, 0.089365f, 0.087607f, +0.181463f, 0.173645f, 0.089979f, 0.215953f, 0.144530f, 0.167861f, 0.118503f, 0.147186f, 0.155393f, 0.167844f, 0.112128f, 0.193460f, 0.298966f, 0.308452f, 0.299511f, 0.310310f, +0.018116f, 0.019721f, 0.009510f, 0.028145f, 0.033064f, 0.043685f, 0.028702f, 0.043957f, 0.035478f, 0.043593f, 0.027103f, 0.057662f, 0.041904f, 0.049181f, 0.044445f, 0.056779f, +1.266162f, 1.453215f, 0.747718f, 1.362077f, 1.517095f, 2.113355f, 1.481436f, 1.396571f, 1.119390f, 1.450184f, 0.961972f, 1.259746f, 2.143413f, 2.652389f, 2.557375f, 2.011039f, +0.501966f, 0.576958f, 0.367942f, 0.565302f, 0.647410f, 0.903167f, 0.784702f, 0.623912f, 0.872337f, 1.131761f, 0.930509f, 1.027731f, 0.690847f, 0.856135f, 1.023118f, 0.678563f, +2.015591f, 1.992334f, 1.199571f, 1.947770f, 1.709922f, 2.051422f, 1.682756f, 1.414000f, 1.688946f, 1.884415f, 1.462755f, 1.707420f, 2.605397f, 2.776670f, 3.132833f, 2.195894f, +0.293614f, 0.330155f, 0.185003f, 0.370404f, 0.570782f, 0.778987f, 0.594696f, 0.616179f, 0.562657f, 0.714144f, 0.515917f, 0.742561f, 0.532847f, 0.646002f, 0.678337f, 0.586278f, +0.000025f, 0.000028f, 0.000053f, 0.000147f, 0.000023f, 0.000032f, 0.000081f, 0.000116f, 0.000000f, 0.000001f, 0.000002f, 0.000003f, 0.000017f, 0.000020f, 0.000072f, 0.000085f, +0.000013f, 0.000015f, 0.000035f, 0.000081f, 0.000013f, 0.000018f, 0.000057f, 0.000069f, 0.000001f, 0.000001f, 0.000002f, 0.000003f, 0.000007f, 0.000009f, 0.000038f, 0.000038f, +0.000077f, 0.000074f, 0.000164f, 0.000403f, 0.000050f, 0.000059f, 0.000177f, 0.000225f, 0.000001f, 0.000002f, 0.000004f, 0.000008f, 0.000039f, 0.000041f, 0.000169f, 0.000179f, +0.000001f, 0.000001f, 0.000001f, 0.000004f, 0.000001f, 0.000001f, 0.000003f, 0.000005f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000002f, +0.733306f, 0.883131f, 0.413690f, 0.981734f, 0.858233f, 1.254481f, 0.800601f, 0.983222f, 0.622605f, 0.846357f, 0.511134f, 0.871988f, 1.096774f, 1.424123f, 1.250105f, 1.280642f, +0.304309f, 0.367015f, 0.213089f, 0.426498f, 0.383369f, 0.561182f, 0.443898f, 0.459787f, 0.507879f, 0.691400f, 0.517532f, 0.744649f, 0.370031f, 0.481168f, 0.523507f, 0.452316f, +1.738541f, 1.803199f, 0.988438f, 2.090819f, 1.440640f, 1.813564f, 1.354381f, 1.482602f, 1.399051f, 1.637923f, 1.157525f, 1.760169f, 1.985508f, 2.220348f, 2.280741f, 2.082598f, +0.165565f, 0.195348f, 0.099658f, 0.259934f, 0.314382f, 0.450212f, 0.312913f, 0.422368f, 0.304699f, 0.405800f, 0.266899f, 0.500443f, 0.265466f, 0.337707f, 0.322844f, 0.363502f, +0.039616f, 0.049345f, 0.017703f, 0.058845f, 0.033087f, 0.050021f, 0.024449f, 0.042057f, 0.043509f, 0.061172f, 0.028294f, 0.067610f, 0.094222f, 0.126536f, 0.085070f, 0.122065f, +0.026744f, 0.033361f, 0.014835f, 0.041588f, 0.024044f, 0.036402f, 0.022053f, 0.031995f, 0.057739f, 0.081296f, 0.046605f, 0.093926f, 0.051714f, 0.069550f, 0.057955f, 0.070136f, +0.198291f, 0.212713f, 0.089302f, 0.264585f, 0.117260f, 0.152671f, 0.087323f, 0.133890f, 0.206414f, 0.249937f, 0.135278f, 0.288130f, 0.360116f, 0.416508f, 0.327673f, 0.419089f, +0.020013f, 0.024423f, 0.009542f, 0.034861f, 0.027120f, 0.040167f, 0.021382f, 0.040425f, 0.047644f, 0.065627f, 0.033058f, 0.086820f, 0.051028f, 0.067139f, 0.049157f, 0.077524f, +0.514755f, 0.662304f, 0.276094f, 0.620873f, 0.457932f, 0.715115f, 0.406142f, 0.472651f, 0.553204f, 0.803420f, 0.431791f, 0.698034f, 0.960556f, 1.332506f, 1.040923f, 1.010477f, +0.166412f, 0.214422f, 0.110789f, 0.210126f, 0.159355f, 0.249212f, 0.175427f, 0.172186f, 0.351549f, 0.511295f, 0.340588f, 0.464377f, 0.252462f, 0.350729f, 0.339584f, 0.278031f, +1.127243f, 1.249088f, 0.609325f, 1.221357f, 0.710016f, 0.954909f, 0.634629f, 0.658310f, 1.148216f, 1.436150f, 0.903206f, 1.301481f, 1.606180f, 1.918934f, 1.754143f, 1.517823f, +0.166008f, 0.209260f, 0.095004f, 0.234811f, 0.239607f, 0.366585f, 0.226742f, 0.290019f, 0.386714f, 0.550233f, 0.322057f, 0.572225f, 0.332094f, 0.451344f, 0.383982f, 0.409686f, +0.000067f, 0.000084f, 0.000129f, 0.000437f, 0.000046f, 0.000070f, 0.000146f, 0.000256f, 0.000002f, 0.000002f, 0.000004f, 0.000011f, 0.000049f, 0.000066f, 0.000191f, 0.000280f, +0.000029f, 0.000036f, 0.000069f, 0.000197f, 0.000021f, 0.000032f, 0.000084f, 0.000124f, 0.000001f, 0.000002f, 0.000005f, 0.000010f, 0.000017f, 0.000023f, 0.000083f, 0.000102f, +0.000281f, 0.000303f, 0.000544f, 0.001649f, 0.000136f, 0.000178f, 0.000436f, 0.000684f, 0.000006f, 0.000008f, 0.000018f, 0.000039f, 0.000158f, 0.000183f, 0.000616f, 0.000806f, +0.000002f, 0.000003f, 0.000004f, 0.000016f, 0.000002f, 0.000003f, 0.000008f, 0.000015f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000002f, 0.000007f, 0.000011f, +0.575148f, 0.776489f, 0.294698f, 0.863332f, 0.499777f, 0.818937f, 0.423443f, 0.641966f, 0.593608f, 0.904599f, 0.442618f, 0.932153f, 0.948237f, 1.380266f, 0.981644f, 1.241415f, +0.194629f, 0.263143f, 0.123783f, 0.305843f, 0.182048f, 0.298736f, 0.191451f, 0.244802f, 0.394861f, 0.602601f, 0.365451f, 0.649121f, 0.260877f, 0.380285f, 0.335218f, 0.357543f, +1.875786f, 2.181010f, 0.968625f, 2.529324f, 1.154064f, 1.628633f, 0.985424f, 1.331646f, 1.834951f, 2.408240f, 1.378887f, 2.588420f, 2.361430f, 2.960329f, 2.463695f, 2.777144f, +0.180594f, 0.238869f, 0.098732f, 0.317899f, 0.254607f, 0.408738f, 0.230167f, 0.383524f, 0.404017f, 0.603192f, 0.321428f, 0.744000f, 0.319191f, 0.455194f, 0.352567f, 0.490046f, +0.000585f, 0.000709f, 0.000280f, 0.000755f, 0.000573f, 0.000844f, 0.000453f, 0.000634f, 0.000446f, 0.000611f, 0.000311f, 0.000603f, 0.002206f, 0.002884f, 0.002131f, 0.002485f, +0.000790f, 0.000959f, 0.000469f, 0.001068f, 0.000834f, 0.001229f, 0.000818f, 0.000965f, 0.001186f, 0.001625f, 0.001024f, 0.001678f, 0.002423f, 0.003172f, 0.002906f, 0.002858f, +0.003562f, 0.003720f, 0.001717f, 0.004133f, 0.002473f, 0.003135f, 0.001971f, 0.002456f, 0.002578f, 0.003039f, 0.001808f, 0.003130f, 0.010262f, 0.011555f, 0.009993f, 0.010387f, +0.000437f, 0.000519f, 0.000223f, 0.000662f, 0.000695f, 0.001003f, 0.000587f, 0.000902f, 0.000724f, 0.000970f, 0.000537f, 0.001147f, 0.001768f, 0.002265f, 0.001823f, 0.002336f, +0.016567f, 0.020752f, 0.009510f, 0.017380f, 0.017304f, 0.026308f, 0.016426f, 0.015535f, 0.012381f, 0.017506f, 0.010343f, 0.013588f, 0.049044f, 0.066236f, 0.056883f, 0.044874f, +0.010718f, 0.013445f, 0.007637f, 0.011771f, 0.012051f, 0.018348f, 0.014199f, 0.011326f, 0.015746f, 0.022295f, 0.016327f, 0.018091f, 0.025797f, 0.034890f, 0.037138f, 0.024710f, +0.044160f, 0.047639f, 0.025548f, 0.041616f, 0.032658f, 0.042761f, 0.031243f, 0.026337f, 0.031280f, 0.038090f, 0.026335f, 0.030838f, 0.099823f, 0.116107f, 0.116682f, 0.082047f, +0.007908f, 0.009704f, 0.004843f, 0.009728f, 0.013401f, 0.019960f, 0.013573f, 0.014108f, 0.012810f, 0.017744f, 0.011418f, 0.016486f, 0.025096f, 0.033205f, 0.031057f, 0.026928f, +0.000002f, 0.000003f, 0.000005f, 0.000012f, 0.000002f, 0.000003f, 0.000006f, 0.000009f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000003f, 0.000003f, 0.000011f, 0.000013f, +0.000002f, 0.000002f, 0.000005f, 0.000011f, 0.000002f, 0.000002f, 0.000007f, 0.000008f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000002f, 0.000009f, 0.000009f, +0.000011f, 0.000012f, 0.000023f, 0.000057f, 0.000006f, 0.000008f, 0.000022f, 0.000028f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000010f, 0.000011f, 0.000042f, 0.000044f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, +0.009855f, 0.012953f, 0.005404f, 0.012866f, 0.010055f, 0.016040f, 0.009118f, 0.011233f, 0.007073f, 0.010494f, 0.005645f, 0.009661f, 0.025776f, 0.036528f, 0.028560f, 0.029351f, +0.006674f, 0.008785f, 0.004543f, 0.009122f, 0.007330f, 0.011710f, 0.008250f, 0.008573f, 0.009416f, 0.013990f, 0.009327f, 0.013463f, 0.014192f, 0.020141f, 0.019518f, 0.016918f, +0.039123f, 0.044286f, 0.021623f, 0.045884f, 0.028262f, 0.038829f, 0.025828f, 0.028364f, 0.026614f, 0.034005f, 0.021405f, 0.032653f, 0.078136f, 0.095363f, 0.087250f, 0.079924f, +0.004580f, 0.005898f, 0.002680f, 0.007012f, 0.007581f, 0.011849f, 0.007335f, 0.009933f, 0.007125f, 0.010356f, 0.006067f, 0.011412f, 0.012842f, 0.017829f, 0.015182f, 0.017148f, +0.000367f, 0.000459f, 0.000208f, 0.000433f, 0.000322f, 0.000490f, 0.000302f, 0.000325f, 0.000295f, 0.000417f, 0.000244f, 0.000364f, 0.001277f, 0.001725f, 0.001464f, 0.001315f, +0.000334f, 0.000419f, 0.000235f, 0.000413f, 0.000316f, 0.000481f, 0.000368f, 0.000334f, 0.000528f, 0.000748f, 0.000541f, 0.000683f, 0.000946f, 0.001280f, 0.001346f, 0.001019f, +0.002387f, 0.002575f, 0.001364f, 0.002530f, 0.001485f, 0.001944f, 0.001403f, 0.001347f, 0.001820f, 0.002216f, 0.001514f, 0.002018f, 0.006347f, 0.007383f, 0.007330f, 0.005869f, +0.000301f, 0.000369f, 0.000182f, 0.000417f, 0.000429f, 0.000639f, 0.000429f, 0.000508f, 0.000525f, 0.000727f, 0.000462f, 0.000760f, 0.001124f, 0.001487f, 0.001374f, 0.001356f, +0.010235f, 0.013244f, 0.006967f, 0.009808f, 0.009578f, 0.015043f, 0.010781f, 0.007854f, 0.008058f, 0.011770f, 0.007983f, 0.008079f, 0.027970f, 0.039022f, 0.038468f, 0.023376f, +0.004466f, 0.005787f, 0.003773f, 0.004480f, 0.004498f, 0.007075f, 0.006285f, 0.003862f, 0.006911f, 0.010110f, 0.008498f, 0.007253f, 0.009921f, 0.013862f, 0.016937f, 0.008681f, +0.029142f, 0.032476f, 0.019993f, 0.025086f, 0.019309f, 0.026118f, 0.021905f, 0.014224f, 0.021747f, 0.027357f, 0.021712f, 0.019584f, 0.060810f, 0.073066f, 0.084288f, 0.045655f, +0.005362f, 0.006798f, 0.003895f, 0.006026f, 0.008142f, 0.012528f, 0.009779f, 0.007830f, 0.009152f, 0.013096f, 0.009673f, 0.010759f, 0.015710f, 0.021473f, 0.023054f, 0.015398f, +0.000017f, 0.000021f, 0.000041f, 0.000086f, 0.000012f, 0.000018f, 0.000048f, 0.000053f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000018f, 0.000024f, 0.000088f, 0.000081f, +0.000010f, 0.000012f, 0.000029f, 0.000052f, 0.000007f, 0.000011f, 0.000037f, 0.000035f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000008f, 0.000011f, 0.000052f, 0.000040f, +0.000091f, 0.000098f, 0.000223f, 0.000423f, 0.000046f, 0.000061f, 0.000188f, 0.000184f, 0.000001f, 0.000002f, 0.000005f, 0.000007f, 0.000075f, 0.000087f, 0.000370f, 0.000303f, +0.000001f, 0.000001f, 0.000002f, 0.000005f, 0.000001f, 0.000001f, 0.000004f, 0.000005f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000005f, 0.000005f, +0.011272f, 0.015306f, 0.007331f, 0.013443f, 0.010304f, 0.016981f, 0.011080f, 0.010516f, 0.008524f, 0.013063f, 0.008066f, 0.010634f, 0.027217f, 0.039844f, 0.035760f, 0.028309f, +0.005148f, 0.007000f, 0.004156f, 0.006427f, 0.005066f, 0.008360f, 0.006761f, 0.005412f, 0.007652f, 0.011745f, 0.008989f, 0.009994f, 0.010106f, 0.014816f, 0.016481f, 0.011004f, +0.047801f, 0.055897f, 0.031328f, 0.051209f, 0.030937f, 0.043909f, 0.033527f, 0.028361f, 0.034258f, 0.045219f, 0.032673f, 0.038394f, 0.088128f, 0.111110f, 0.116693f, 0.082342f, +0.005750f, 0.007649f, 0.003990f, 0.008042f, 0.008528f, 0.013769f, 0.009785f, 0.010206f, 0.009425f, 0.014152f, 0.009517f, 0.013789f, 0.014884f, 0.021347f, 0.020866f, 0.018155f, +0.000472f, 0.000525f, 0.000232f, 0.000625f, 0.000492f, 0.000664f, 0.000400f, 0.000558f, 0.000380f, 0.000477f, 0.000272f, 0.000527f, 0.001639f, 0.001963f, 0.001629f, 0.001893f, +0.000478f, 0.000532f, 0.000292f, 0.000663f, 0.000536f, 0.000724f, 0.000541f, 0.000636f, 0.000756f, 0.000950f, 0.000672f, 0.001097f, 0.001348f, 0.001617f, 0.001663f, 0.001631f, +0.002075f, 0.001986f, 0.001029f, 0.002470f, 0.001531f, 0.001779f, 0.001256f, 0.001560f, 0.001584f, 0.001711f, 0.001143f, 0.001972f, 0.005500f, 0.005675f, 0.005510f, 0.005709f, +0.000248f, 0.000270f, 0.000130f, 0.000385f, 0.000419f, 0.000554f, 0.000364f, 0.000557f, 0.000433f, 0.000532f, 0.000331f, 0.000703f, 0.000923f, 0.001083f, 0.000979f, 0.001250f, +0.019160f, 0.021991f, 0.011315f, 0.020612f, 0.021269f, 0.029628f, 0.020769f, 0.019579f, 0.015096f, 0.019557f, 0.012973f, 0.016989f, 0.052178f, 0.064568f, 0.062256f, 0.048956f, +0.009284f, 0.010671f, 0.006805f, 0.010455f, 0.011093f, 0.015476f, 0.013446f, 0.010691f, 0.014379f, 0.018655f, 0.015338f, 0.016940f, 0.020555f, 0.025473f, 0.030441f, 0.020189f, +0.036850f, 0.036424f, 0.021931f, 0.035610f, 0.028962f, 0.034746f, 0.028502f, 0.023950f, 0.027518f, 0.030703f, 0.023833f, 0.027819f, 0.076626f, 0.081663f, 0.092138f, 0.064582f, +0.006424f, 0.007224f, 0.004048f, 0.008104f, 0.011570f, 0.015791f, 0.012055f, 0.012490f, 0.010971f, 0.013925f, 0.010060f, 0.014479f, 0.018755f, 0.022738f, 0.023876f, 0.020636f, +0.000002f, 0.000002f, 0.000003f, 0.000009f, 0.000001f, 0.000002f, 0.000004f, 0.000006f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000002f, 0.000007f, 0.000008f, +0.000001f, 0.000001f, 0.000003f, 0.000006f, 0.000001f, 0.000001f, 0.000004f, 0.000005f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000004f, 0.000004f, +0.000006f, 0.000005f, 0.000012f, 0.000029f, 0.000003f, 0.000004f, 0.000012f, 0.000015f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000005f, 0.000005f, 0.000020f, 0.000021f, +0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.009059f, 0.010910f, 0.005111f, 0.012128f, 0.009823f, 0.014358f, 0.009163f, 0.011253f, 0.006855f, 0.009318f, 0.005627f, 0.009600f, 0.021796f, 0.028302f, 0.024844f, 0.025451f, +0.004595f, 0.005542f, 0.003217f, 0.006440f, 0.005363f, 0.007850f, 0.006210f, 0.006432f, 0.006834f, 0.009304f, 0.006964f, 0.010020f, 0.008988f, 0.011687f, 0.012716f, 0.010987f, +0.025948f, 0.026913f, 0.014753f, 0.031206f, 0.019920f, 0.025077f, 0.018728f, 0.020501f, 0.018609f, 0.021786f, 0.015396f, 0.023412f, 0.047672f, 0.053310f, 0.054760f, 0.050003f, +0.002957f, 0.003489f, 0.001780f, 0.004643f, 0.005203f, 0.007450f, 0.005178f, 0.006990f, 0.004850f, 0.006460f, 0.004249f, 0.007966f, 0.007628f, 0.009704f, 0.009277f, 0.010445f, +0.000348f, 0.000434f, 0.000156f, 0.000517f, 0.000270f, 0.000408f, 0.000199f, 0.000343f, 0.000341f, 0.000479f, 0.000222f, 0.000530f, 0.001333f, 0.001790f, 0.001203f, 0.001727f, +0.000287f, 0.000359f, 0.000159f, 0.000447f, 0.000239f, 0.000362f, 0.000220f, 0.000319f, 0.000553f, 0.000779f, 0.000446f, 0.000900f, 0.000894f, 0.001203f, 0.001002f, 0.001213f, +0.002107f, 0.002260f, 0.000949f, 0.002811f, 0.001154f, 0.001503f, 0.000859f, 0.001318f, 0.001954f, 0.002366f, 0.001281f, 0.002728f, 0.006155f, 0.007118f, 0.005600f, 0.007163f, +0.000254f, 0.000311f, 0.000121f, 0.000443f, 0.000319f, 0.000473f, 0.000252f, 0.000476f, 0.000540f, 0.000744f, 0.000375f, 0.000984f, 0.001044f, 0.001373f, 0.001005f, 0.001586f, +0.007236f, 0.009310f, 0.003881f, 0.008728f, 0.005964f, 0.009313f, 0.005289f, 0.006156f, 0.006931f, 0.010065f, 0.005409f, 0.008745f, 0.021722f, 0.030133f, 0.023539f, 0.022851f, +0.002859f, 0.003684f, 0.001904f, 0.003610f, 0.002537f, 0.003967f, 0.002792f, 0.002741f, 0.005383f, 0.007829f, 0.005215f, 0.007111f, 0.006978f, 0.009694f, 0.009386f, 0.007685f, +0.019144f, 0.021214f, 0.010348f, 0.020743f, 0.011172f, 0.015025f, 0.009986f, 0.010358f, 0.017379f, 0.021737f, 0.013671f, 0.019699f, 0.043882f, 0.052427f, 0.047925f, 0.041468f, +0.003374f, 0.004253f, 0.001931f, 0.004773f, 0.004512f, 0.006903f, 0.004270f, 0.005461f, 0.007005f, 0.009967f, 0.005834f, 0.010365f, 0.010859f, 0.014758f, 0.012555f, 0.013396f, +0.000004f, 0.000005f, 0.000007f, 0.000024f, 0.000002f, 0.000004f, 0.000007f, 0.000013f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000004f, 0.000006f, 0.000017f, 0.000025f, +0.000002f, 0.000002f, 0.000005f, 0.000013f, 0.000001f, 0.000002f, 0.000005f, 0.000008f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000003f, 0.000009f, 0.000011f, +0.000019f, 0.000020f, 0.000036f, 0.000110f, 0.000008f, 0.000011f, 0.000027f, 0.000042f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000017f, 0.000020f, 0.000066f, 0.000087f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, +0.006600f, 0.008911f, 0.003382f, 0.009908f, 0.005314f, 0.008707f, 0.004502f, 0.006825f, 0.006071f, 0.009252f, 0.004527f, 0.009533f, 0.017506f, 0.025482f, 0.018122f, 0.022918f, +0.002730f, 0.003691f, 0.001736f, 0.004290f, 0.002366f, 0.003882f, 0.002488f, 0.003181f, 0.004936f, 0.007533f, 0.004568f, 0.008114f, 0.005886f, 0.008581f, 0.007564f, 0.008068f, +0.026007f, 0.030239f, 0.013430f, 0.035068f, 0.014824f, 0.020920f, 0.012658f, 0.017105f, 0.022673f, 0.029757f, 0.017038f, 0.031983f, 0.052669f, 0.066027f, 0.054950f, 0.061941f, +0.002997f, 0.003964f, 0.001638f, 0.005275f, 0.003914f, 0.006283f, 0.003538f, 0.005896f, 0.005974f, 0.008920f, 0.004753f, 0.011002f, 0.008520f, 0.012151f, 0.009411f, 0.013081f, +0.048876f, 0.059269f, 0.023377f, 0.063145f, 0.038519f, 0.056692f, 0.030464f, 0.042585f, 0.032304f, 0.044217f, 0.022484f, 0.043660f, 0.111628f, 0.145947f, 0.107869f, 0.125782f, +0.079289f, 0.096289f, 0.047071f, 0.107239f, 0.067262f, 0.099141f, 0.066029f, 0.077848f, 0.103012f, 0.141205f, 0.088994f, 0.145752f, 0.147225f, 0.192767f, 0.176588f, 0.173668f, +0.295126f, 0.308219f, 0.142255f, 0.342511f, 0.164678f, 0.208740f, 0.131255f, 0.163546f, 0.184877f, 0.217940f, 0.129681f, 0.224460f, 0.514680f, 0.579535f, 0.501230f, 0.520962f, +0.026937f, 0.032002f, 0.013746f, 0.040811f, 0.034443f, 0.049665f, 0.029064f, 0.044654f, 0.038590f, 0.051750f, 0.028658f, 0.061164f, 0.065953f, 0.084481f, 0.068001f, 0.087150f, +0.828230f, 1.037454f, 0.475456f, 0.868877f, 0.695237f, 1.056988f, 0.659953f, 0.624135f, 0.535644f, 0.757347f, 0.447474f, 0.587859f, 1.484100f, 2.004341f, 1.721321f, 1.357916f, +0.643406f, 0.807109f, 0.458458f, 0.706620f, 0.581365f, 0.885145f, 0.684990f, 0.546371f, 0.817952f, 1.158179f, 0.848154f, 0.939762f, 0.937321f, 1.267726f, 1.349405f, 0.897824f, +2.187975f, 2.360366f, 1.265832f, 2.061925f, 1.300394f, 1.702671f, 1.244027f, 1.048680f, 1.341185f, 1.633153f, 1.129158f, 1.322235f, 2.993709f, 3.482067f, 3.499315f, 2.460606f, +0.291395f, 0.357602f, 0.178482f, 0.358488f, 0.396857f, 0.591113f, 0.401946f, 0.417797f, 0.408490f, 0.565850f, 0.364106f, 0.525733f, 0.559761f, 0.740648f, 0.692717f, 0.600619f, +0.000058f, 0.000070f, 0.000119f, 0.000328f, 0.000037f, 0.000055f, 0.000127f, 0.000181f, 0.000001f, 0.000001f, 0.000002f, 0.000005f, 0.000041f, 0.000054f, 0.000169f, 0.000202f, +0.000060f, 0.000073f, 0.000152f, 0.000355f, 0.000042f, 0.000061f, 0.000175f, 0.000211f, 0.000002f, 0.000002f, 0.000006f, 0.000010f, 0.000034f, 0.000045f, 0.000176f, 0.000177f, +0.000293f, 0.000307f, 0.000606f, 0.001493f, 0.000134f, 0.000170f, 0.000458f, 0.000584f, 0.000004f, 0.000005f, 0.000012f, 0.000021f, 0.000158f, 0.000178f, 0.000659f, 0.000701f, +0.000002f, 0.000002f, 0.000004f, 0.000013f, 0.000002f, 0.000003f, 0.000007f, 0.000011f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000006f, 0.000008f, +0.582809f, 0.766025f, 0.319614f, 0.760904f, 0.477865f, 0.762326f, 0.433337f, 0.533883f, 0.361982f, 0.537037f, 0.288881f, 0.494401f, 0.922686f, 1.307559f, 1.022335f, 1.050653f, +0.473920f, 0.623808f, 0.322597f, 0.647742f, 0.418278f, 0.668235f, 0.470805f, 0.489215f, 0.578606f, 0.859666f, 0.573153f, 0.827312f, 0.609991f, 0.865684f, 0.838915f, 0.727147f, +2.293001f, 2.595614f, 1.267299f, 2.689247f, 1.331169f, 1.828891f, 1.216547f, 1.335972f, 1.349851f, 1.724737f, 1.085657f, 1.656158f, 2.771957f, 3.383086f, 3.095287f, 2.835406f, +0.199642f, 0.257081f, 0.116817f, 0.305663f, 0.265583f, 0.415085f, 0.256966f, 0.347959f, 0.268774f, 0.390666f, 0.228862f, 0.430494f, 0.338836f, 0.470432f, 0.400574f, 0.452461f, +0.046959f, 0.058826f, 0.026633f, 0.055417f, 0.033157f, 0.050413f, 0.031096f, 0.033484f, 0.032698f, 0.046235f, 0.026987f, 0.040368f, 0.099004f, 0.133718f, 0.113448f, 0.101901f, +0.051375f, 0.064451f, 0.036167f, 0.063470f, 0.039047f, 0.059454f, 0.045453f, 0.041280f, 0.070319f, 0.099575f, 0.072038f, 0.090883f, 0.088059f, 0.119107f, 0.125248f, 0.094884f, +0.302882f, 0.326769f, 0.173122f, 0.321087f, 0.151418f, 0.198273f, 0.143113f, 0.137361f, 0.199894f, 0.243427f, 0.166269f, 0.221686f, 0.487595f, 0.567175f, 0.563090f, 0.450827f, +0.028408f, 0.034865f, 0.017191f, 0.039315f, 0.032544f, 0.048477f, 0.032565f, 0.038541f, 0.042877f, 0.059398f, 0.037759f, 0.062077f, 0.064208f, 0.084962f, 0.078503f, 0.077500f, +0.783695f, 1.014099f, 0.533488f, 0.750995f, 0.589396f, 0.925676f, 0.663446f, 0.483320f, 0.533978f, 0.779933f, 0.528972f, 0.535306f, 1.296330f, 1.808585f, 1.782921f, 1.083446f, +0.410575f, 0.532053f, 0.346917f, 0.411885f, 0.332380f, 0.522775f, 0.464395f, 0.285335f, 0.549904f, 0.804358f, 0.676163f, 0.577110f, 0.552143f, 0.771443f, 0.942592f, 0.483100f, +2.211472f, 2.464526f, 1.517166f, 1.903680f, 1.177583f, 1.592805f, 1.335871f, 0.867445f, 1.428166f, 1.796519f, 1.425813f, 1.286117f, 2.793216f, 3.356193f, 3.871648f, 2.097100f, +0.302657f, 0.383693f, 0.219827f, 0.340115f, 0.369301f, 0.568241f, 0.443540f, 0.355136f, 0.446994f, 0.639640f, 0.472460f, 0.525493f, 0.536695f, 0.733587f, 0.787587f, 0.526025f, +0.000672f, 0.000845f, 0.001636f, 0.003480f, 0.000388f, 0.000593f, 0.001565f, 0.001723f, 0.000010f, 0.000014f, 0.000036f, 0.000055f, 0.000437f, 0.000592f, 0.002150f, 0.001974f, +0.000467f, 0.000589f, 0.001413f, 0.002536f, 0.000291f, 0.000445f, 0.001455f, 0.001351f, 0.000014f, 0.000020f, 0.000061f, 0.000079f, 0.000247f, 0.000336f, 0.001510f, 0.001170f, +0.003631f, 0.003933f, 0.008914f, 0.016905f, 0.001487f, 0.001955f, 0.006037f, 0.005925f, 0.000052f, 0.000063f, 0.000185f, 0.000253f, 0.001804f, 0.002106f, 0.008946f, 0.007324f, +0.000025f, 0.000030f, 0.000064f, 0.000149f, 0.000023f, 0.000034f, 0.000099f, 0.000120f, 0.000001f, 0.000001f, 0.000003f, 0.000005f, 0.000017f, 0.000023f, 0.000090f, 0.000091f, +1.021033f, 1.386346f, 0.663984f, 1.217659f, 0.750060f, 1.236081f, 0.806557f, 0.765455f, 0.668115f, 1.023962f, 0.632268f, 0.833539f, 1.492187f, 2.184467f, 1.960562f, 1.552069f, +0.559925f, 0.761361f, 0.451963f, 0.699052f, 0.442759f, 0.730714f, 0.590965f, 0.473026f, 0.720209f, 1.105403f, 0.845988f, 0.940648f, 0.665279f, 0.975338f, 1.084968f, 0.724411f, +4.291018f, 5.017776f, 2.812244f, 4.596942f, 2.231860f, 3.167647f, 2.418692f, 2.046037f, 2.661295f, 3.512731f, 2.538153f, 2.982572f, 4.788490f, 6.037263f, 6.340607f, 4.474138f, +0.383918f, 0.510705f, 0.266385f, 0.536921f, 0.457578f, 0.738781f, 0.524998f, 0.547614f, 0.544534f, 0.817632f, 0.549831f, 0.796684f, 0.601493f, 0.862688f, 0.843224f, 0.733677f, +0.062173f, 0.069081f, 0.030590f, 0.082367f, 0.052073f, 0.070224f, 0.042365f, 0.059033f, 0.043321f, 0.054332f, 0.031018f, 0.060040f, 0.130622f, 0.156482f, 0.129847f, 0.150927f, +0.075539f, 0.084053f, 0.046132f, 0.104764f, 0.068102f, 0.091973f, 0.068772f, 0.080824f, 0.103463f, 0.129949f, 0.091950f, 0.150113f, 0.129025f, 0.154793f, 0.159201f, 0.156070f, +0.270865f, 0.259196f, 0.134308f, 0.322348f, 0.160624f, 0.186554f, 0.131699f, 0.163576f, 0.178885f, 0.193218f, 0.129079f, 0.222707f, 0.434533f, 0.448320f, 0.435324f, 0.451021f, +0.024070f, 0.026201f, 0.012636f, 0.037394f, 0.032707f, 0.043214f, 0.028392f, 0.043483f, 0.036353f, 0.044668f, 0.027772f, 0.059084f, 0.054212f, 0.063627f, 0.057499f, 0.073457f, +1.508767f, 1.731662f, 0.890987f, 1.623061f, 1.345969f, 1.874971f, 1.314331f, 1.239040f, 1.028707f, 1.332703f, 0.884042f, 1.157693f, 2.486993f, 3.077555f, 2.967311f, 2.333399f, +0.877823f, 1.008965f, 0.643444f, 0.988582f, 0.842948f, 1.175951f, 1.021706f, 0.812352f, 1.176504f, 1.526385f, 1.254960f, 1.386081f, 1.176385f, 1.457839f, 1.742182f, 1.155467f, +2.875770f, 2.842588f, 1.711504f, 2.779006f, 1.816423f, 2.179193f, 1.787565f, 1.502070f, 1.858423f, 2.073507f, 1.609535f, 1.878751f, 3.619605f, 3.857550f, 4.352357f, 3.050694f, +0.372879f, 0.419284f, 0.234948f, 0.470399f, 0.539697f, 0.736563f, 0.562308f, 0.582622f, 0.551077f, 0.699446f, 0.505298f, 0.727278f, 0.658914f, 0.798841f, 0.838826f, 0.724987f, +0.000062f, 0.000070f, 0.000132f, 0.000364f, 0.000043f, 0.000058f, 0.000150f, 0.000213f, 0.000001f, 0.000001f, 0.000003f, 0.000006f, 0.000041f, 0.000049f, 0.000173f, 0.000206f, +0.000048f, 0.000054f, 0.000127f, 0.000294f, 0.000036f, 0.000048f, 0.000155f, 0.000186f, 0.000001f, 0.000002f, 0.000005f, 0.000009f, 0.000025f, 0.000031f, 0.000135f, 0.000135f, +0.000228f, 0.000219f, 0.000486f, 0.001193f, 0.000111f, 0.000129f, 0.000391f, 0.000496f, 0.000003f, 0.000004f, 0.000010f, 0.000018f, 0.000113f, 0.000117f, 0.000486f, 0.000515f, +0.000001f, 0.000002f, 0.000003f, 0.000010f, 0.000002f, 0.000002f, 0.000006f, 0.000009f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000005f, 0.000006f, +0.843841f, 1.016250f, 0.476048f, 1.129716f, 0.735309f, 1.074802f, 0.685931f, 0.842396f, 0.552542f, 0.751114f, 0.453615f, 0.773861f, 1.228933f, 1.595726f, 1.400740f, 1.434957f, +0.513913f, 0.619810f, 0.359861f, 0.720264f, 0.482037f, 0.705614f, 0.558144f, 0.578122f, 0.661472f, 0.900495f, 0.674045f, 0.969846f, 0.608482f, 0.791237f, 0.860860f, 0.743793f, +2.395406f, 2.484494f, 1.361895f, 2.880784f, 1.477877f, 1.860442f, 1.389389f, 1.520925f, 1.486637f, 1.740462f, 1.229990f, 1.870362f, 2.663798f, 2.978864f, 3.059888f, 2.794055f, +0.203049f, 0.239575f, 0.122221f, 0.318784f, 0.287065f, 0.411092f, 0.285723f, 0.385668f, 0.288191f, 0.383815f, 0.252440f, 0.473332f, 0.317014f, 0.403282f, 0.385533f, 0.434086f, +0.090902f, 0.113226f, 0.040622f, 0.135024f, 0.056527f, 0.085457f, 0.041770f, 0.071851f, 0.076995f, 0.108252f, 0.050070f, 0.119644f, 0.210520f, 0.282719f, 0.190071f, 0.272730f, +0.090062f, 0.112341f, 0.049955f, 0.140046f, 0.060284f, 0.091269f, 0.055292f, 0.080218f, 0.149950f, 0.211129f, 0.121037f, 0.243932f, 0.169570f, 0.228055f, 0.190032f, 0.229976f, +0.544789f, 0.584411f, 0.245350f, 0.726925f, 0.239862f, 0.312299f, 0.178624f, 0.273880f, 0.437361f, 0.529580f, 0.286636f, 0.610507f, 0.963391f, 1.114253f, 0.876598f, 1.121156f, +0.048942f, 0.059725f, 0.023336f, 0.085253f, 0.049378f, 0.073135f, 0.038931f, 0.073603f, 0.089856f, 0.123772f, 0.062347f, 0.163743f, 0.121510f, 0.159872f, 0.117055f, 0.184603f, +1.128999f, 1.452613f, 0.605550f, 1.361744f, 0.747795f, 1.167771f, 0.663223f, 0.771831f, 0.935740f, 1.358979f, 0.730372f, 1.180719f, 2.051402f, 2.845756f, 2.223038f, 2.158017f, +0.535643f, 0.690177f, 0.356605f, 0.676349f, 0.381897f, 0.597241f, 0.420415f, 0.412647f, 0.872680f, 1.269231f, 0.845471f, 1.152761f, 0.791268f, 1.099256f, 1.064326f, 0.871406f, +2.960255f, 3.280231f, 1.600149f, 3.207407f, 1.388252f, 1.867078f, 1.240853f, 1.287155f, 2.325477f, 2.908630f, 1.829260f, 2.635884f, 4.107156f, 4.906898f, 4.485512f, 3.881220f, +0.388043f, 0.489144f, 0.222071f, 0.548869f, 0.417003f, 0.637991f, 0.394613f, 0.504737f, 0.697136f, 0.991916f, 0.580578f, 1.031561f, 0.755870f, 1.027292f, 0.873972f, 0.932475f, +0.000305f, 0.000382f, 0.000586f, 0.001991f, 0.000156f, 0.000236f, 0.000494f, 0.000868f, 0.000006f, 0.000008f, 0.000016f, 0.000038f, 0.000218f, 0.000294f, 0.000846f, 0.001241f, +0.000192f, 0.000241f, 0.000458f, 0.001314f, 0.000106f, 0.000160f, 0.000416f, 0.000617f, 0.000007f, 0.000010f, 0.000024f, 0.000050f, 0.000112f, 0.000151f, 0.000538f, 0.000666f, +0.001534f, 0.001652f, 0.002967f, 0.008988f, 0.000553f, 0.000723f, 0.001770f, 0.002774f, 0.000027f, 0.000032f, 0.000075f, 0.000163f, 0.000837f, 0.000972f, 0.003271f, 0.004278f, +0.000010f, 0.000012f, 0.000020f, 0.000076f, 0.000008f, 0.000012f, 0.000028f, 0.000054f, 0.000000f, 0.000001f, 0.000001f, 0.000003f, 0.000008f, 0.000010f, 0.000031f, 0.000051f, +1.218191f, 1.644639f, 0.624184f, 1.828578f, 0.788135f, 1.291441f, 0.667758f, 1.012363f, 0.969644f, 1.477639f, 0.723005f, 1.522648f, 1.955634f, 2.846646f, 2.024531f, 2.560282f, +0.604981f, 0.817948f, 0.384764f, 0.950677f, 0.421316f, 0.691371f, 0.443080f, 0.566549f, 0.946577f, 1.444578f, 0.876074f, 1.556098f, 0.789596f, 1.151010f, 1.014605f, 1.082177f, +4.757045f, 5.531101f, 2.456458f, 6.414434f, 2.179078f, 3.075147f, 1.860655f, 2.514384f, 3.588851f, 4.710106f, 2.696868f, 5.062509f, 5.831288f, 7.310202f, 6.083818f, 6.857845f, +0.407660f, 0.539204f, 0.222869f, 0.717599f, 0.427910f, 0.686953f, 0.386835f, 0.644578f, 0.703347f, 1.050089f, 0.559569f, 1.295220f, 0.701583f, 1.000519f, 0.774944f, 1.077124f, +0.075643f, 0.091728f, 0.036179f, 0.097727f, 0.064694f, 0.095218f, 0.051165f, 0.071523f, 0.052429f, 0.071764f, 0.036492f, 0.070861f, 0.158951f, 0.207820f, 0.153599f, 0.179106f, +0.080989f, 0.098353f, 0.048080f, 0.109537f, 0.074559f, 0.109896f, 0.073192f, 0.086293f, 0.110343f, 0.151254f, 0.095328f, 0.156125f, 0.138359f, 0.181159f, 0.165954f, 0.163210f, +0.455765f, 0.475984f, 0.219686f, 0.528941f, 0.275986f, 0.349830f, 0.219972f, 0.274089f, 0.299409f, 0.352954f, 0.210019f, 0.363514f, 0.731287f, 0.823437f, 0.712178f, 0.740213f, +0.039308f, 0.046699f, 0.020059f, 0.059553f, 0.054543f, 0.078649f, 0.046026f, 0.070715f, 0.059055f, 0.079193f, 0.043856f, 0.093600f, 0.088548f, 0.113423f, 0.091297f, 0.117006f, +2.226413f, 2.788840f, 1.278100f, 2.335679f, 2.028182f, 3.083498f, 1.925249f, 1.820758f, 1.510010f, 2.135005f, 1.261453f, 1.657207f, 3.670587f, 4.957286f, 4.257299f, 3.358498f, +1.141498f, 1.431931f, 0.813373f, 1.253649f, 1.119329f, 1.704211f, 1.318843f, 1.051953f, 1.521833f, 2.154839f, 1.578024f, 1.748466f, 1.530015f, 2.069345f, 2.202671f, 1.465543f, +5.868896f, 6.331307f, 3.395395f, 5.530785f, 3.785366f, 4.956370f, 3.621285f, 3.052643f, 3.772697f, 4.593989f, 3.176274f, 3.719389f, 7.388242f, 8.593471f, 8.636039f, 6.072585f, +0.738564f, 0.906372f, 0.452378f, 0.908619f, 1.091589f, 1.625910f, 1.105589f, 1.149188f, 1.085769f, 1.504031f, 0.967795f, 1.397400f, 1.305351f, 1.727173f, 1.615401f, 1.400629f, +0.000043f, 0.000052f, 0.000088f, 0.000243f, 0.000030f, 0.000044f, 0.000102f, 0.000146f, 0.000001f, 0.000001f, 0.000002f, 0.000004f, 0.000028f, 0.000036f, 0.000115f, 0.000137f, +0.000029f, 0.000036f, 0.000074f, 0.000173f, 0.000022f, 0.000033f, 0.000093f, 0.000112f, 0.000001f, 0.000001f, 0.000003f, 0.000005f, 0.000015f, 0.000020f, 0.000079f, 0.000080f, +0.000216f, 0.000227f, 0.000448f, 0.001102f, 0.000107f, 0.000137f, 0.000367f, 0.000468f, 0.000003f, 0.000004f, 0.000009f, 0.000016f, 0.000107f, 0.000121f, 0.000448f, 0.000476f, +0.000001f, 0.000002f, 0.000003f, 0.000009f, 0.000002f, 0.000002f, 0.000006f, 0.000009f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000004f, 0.000005f, +0.970578f, 1.275696f, 0.532268f, 1.267167f, 0.863630f, 1.377728f, 0.783157f, 0.964871f, 0.632179f, 0.937902f, 0.504513f, 0.863441f, 1.413759f, 2.003470f, 1.566444f, 1.609834f, +0.520888f, 0.685630f, 0.354568f, 0.711937f, 0.498911f, 0.797054f, 0.561564f, 0.583523f, 0.666917f, 0.990873f, 0.660631f, 0.953581f, 0.616851f, 0.875419f, 0.848350f, 0.735325f, +3.810373f, 4.313239f, 2.105922f, 4.468833f, 2.400575f, 3.298148f, 2.193871f, 2.409238f, 2.352330f, 3.005628f, 1.891931f, 2.886118f, 4.238063f, 5.172422f, 4.732404f, 4.335070f, +0.313479f, 0.403669f, 0.183427f, 0.479953f, 0.452560f, 0.707313f, 0.437876f, 0.592930f, 0.442581f, 0.643296f, 0.376860f, 0.708879f, 0.489511f, 0.679626f, 0.578704f, 0.653664f, +0.041395f, 0.051856f, 0.023478f, 0.048851f, 0.031719f, 0.048227f, 0.029747f, 0.032032f, 0.030228f, 0.042742f, 0.024948f, 0.037318f, 0.080297f, 0.108452f, 0.092012f, 0.082647f, +0.029889f, 0.037497f, 0.021041f, 0.036926f, 0.024653f, 0.037538f, 0.028698f, 0.026063f, 0.042903f, 0.060753f, 0.043952f, 0.055449f, 0.047136f, 0.063756f, 0.067043f, 0.050790f, +0.266418f, 0.287429f, 0.152280f, 0.282431f, 0.144539f, 0.189266f, 0.136611f, 0.131121f, 0.184391f, 0.224547f, 0.153373f, 0.204492f, 0.394608f, 0.459012f, 0.455706f, 0.364852f, +0.023612f, 0.028979f, 0.014288f, 0.032677f, 0.029354f, 0.043726f, 0.029373f, 0.034763f, 0.037373f, 0.051773f, 0.032912f, 0.054108f, 0.049100f, 0.064972f, 0.060032f, 0.059265f, +1.199936f, 1.552713f, 0.816836f, 1.149867f, 0.979348f, 1.538115f, 1.102389f, 0.803091f, 0.857400f, 1.252326f, 0.849361f, 0.859532f, 1.826181f, 2.547811f, 2.511657f, 1.526285f, +0.414896f, 0.537651f, 0.350567f, 0.416219f, 0.364501f, 0.573297f, 0.509275f, 0.312910f, 0.582749f, 0.852402f, 0.716550f, 0.611581f, 0.513352f, 0.717245f, 0.876370f, 0.449159f, +3.378714f, 3.765333f, 2.317945f, 2.908466f, 1.952453f, 2.640897f, 2.214896f, 1.438239f, 2.288221f, 2.878398f, 2.284451f, 2.060629f, 3.926377f, 4.717746f, 5.442311f, 2.947859f, +0.436931f, 0.553919f, 0.317354f, 0.491009f, 0.578578f, 0.890254f, 0.694887f, 0.556386f, 0.676727f, 0.968384f, 0.715282f, 0.795571f, 0.712866f, 0.974388f, 1.046114f, 0.698693f, +0.000283f, 0.000356f, 0.000689f, 0.001467f, 0.000178f, 0.000271f, 0.000716f, 0.000788f, 0.000004f, 0.000006f, 0.000016f, 0.000024f, 0.000169f, 0.000230f, 0.000834f, 0.000766f, +0.000130f, 0.000164f, 0.000393f, 0.000705f, 0.000088f, 0.000134f, 0.000439f, 0.000408f, 0.000004f, 0.000006f, 0.000018f, 0.000023f, 0.000063f, 0.000086f, 0.000386f, 0.000299f, +0.001527f, 0.001654f, 0.003749f, 0.007109f, 0.000679f, 0.000892f, 0.002755f, 0.002704f, 0.000023f, 0.000028f, 0.000082f, 0.000111f, 0.000698f, 0.000815f, 0.003461f, 0.002834f, +0.000010f, 0.000012f, 0.000025f, 0.000059f, 0.000010f, 0.000015f, 0.000043f, 0.000052f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000006f, 0.000008f, 0.000033f, 0.000033f, +0.968501f, 1.315018f, 0.629822f, 1.155010f, 0.772103f, 1.272407f, 0.830260f, 0.787950f, 0.664600f, 1.018575f, 0.628942f, 0.829154f, 1.302269f, 1.906440f, 1.711032f, 1.354530f, +0.350530f, 0.476635f, 0.282943f, 0.437627f, 0.300803f, 0.496434f, 0.401491f, 0.321365f, 0.472828f, 0.725713f, 0.555404f, 0.617549f, 0.383192f, 0.561782f, 0.624927f, 0.417252f, +4.061437f, 4.749312f, 2.661781f, 4.350994f, 2.292479f, 3.253682f, 2.484385f, 2.101608f, 2.641566f, 3.486690f, 2.519337f, 2.960461f, 4.169995f, 5.257472f, 5.521635f, 3.896246f, +0.343361f, 0.456754f, 0.238244f, 0.480200f, 0.444115f, 0.717045f, 0.509552f, 0.531503f, 0.510724f, 0.766865f, 0.515692f, 0.747218f, 0.494949f, 0.709877f, 0.693861f, 0.603719f, +0.010920f, 0.012134f, 0.005373f, 0.014467f, 0.009926f, 0.013386f, 0.008075f, 0.011252f, 0.007980f, 0.010008f, 0.005713f, 0.011059f, 0.021109f, 0.025288f, 0.020984f, 0.024390f, +0.008757f, 0.009744f, 0.005348f, 0.012145f, 0.008567f, 0.011570f, 0.008652f, 0.010168f, 0.012578f, 0.015798f, 0.011178f, 0.018249f, 0.013761f, 0.016510f, 0.016980f, 0.016646f, +0.047473f, 0.045428f, 0.023539f, 0.056496f, 0.030551f, 0.035483f, 0.025049f, 0.031112f, 0.032879f, 0.035513f, 0.023724f, 0.040933f, 0.070070f, 0.072293f, 0.070198f, 0.072729f, +0.003986f, 0.004339f, 0.002093f, 0.006193f, 0.005878f, 0.007766f, 0.005103f, 0.007815f, 0.006314f, 0.007758f, 0.004823f, 0.010261f, 0.008260f, 0.009695f, 0.008761f, 0.011193f, +0.460295f, 0.528296f, 0.271822f, 0.495163f, 0.445623f, 0.620765f, 0.435149f, 0.410221f, 0.329120f, 0.426380f, 0.282837f, 0.370387f, 0.698081f, 0.863848f, 0.832903f, 0.654968f, +0.176749f, 0.203154f, 0.129557f, 0.199050f, 0.184191f, 0.256955f, 0.223251f, 0.177506f, 0.248423f, 0.322301f, 0.264989f, 0.292676f, 0.217930f, 0.270070f, 0.322745f, 0.214054f, +0.875442f, 0.865340f, 0.521016f, 0.845985f, 0.600080f, 0.719926f, 0.590546f, 0.496229f, 0.593290f, 0.661954f, 0.513834f, 0.599779f, 1.013799f, 1.080444f, 1.219032f, 0.854455f, +0.107259f, 0.120608f, 0.067583f, 0.135311f, 0.168475f, 0.229930f, 0.175533f, 0.181874f, 0.166237f, 0.210993f, 0.152427f, 0.219389f, 0.174386f, 0.211419f, 0.222001f, 0.191873f, +0.000005f, 0.000006f, 0.000011f, 0.000031f, 0.000004f, 0.000005f, 0.000014f, 0.000019f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000003f, 0.000004f, 0.000013f, 0.000016f, +0.000003f, 0.000003f, 0.000007f, 0.000016f, 0.000002f, 0.000003f, 0.000009f, 0.000011f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000002f, 0.000007f, 0.000007f, +0.000019f, 0.000018f, 0.000041f, 0.000100f, 0.000010f, 0.000012f, 0.000036f, 0.000045f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000009f, 0.000009f, 0.000037f, 0.000040f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, +0.159487f, 0.192072f, 0.089973f, 0.213517f, 0.150817f, 0.220450f, 0.140690f, 0.172782f, 0.109516f, 0.148874f, 0.089908f, 0.153382f, 0.213702f, 0.277485f, 0.243578f, 0.249528f, +0.064104f, 0.077314f, 0.044888f, 0.089844f, 0.065253f, 0.095518f, 0.075555f, 0.078260f, 0.086529f, 0.117795f, 0.088173f, 0.126867f, 0.069833f, 0.090808f, 0.098798f, 0.085363f, +0.451753f, 0.468555f, 0.256842f, 0.543292f, 0.302468f, 0.380765f, 0.284358f, 0.311279f, 0.294020f, 0.344220f, 0.243261f, 0.369911f, 0.462212f, 0.516881f, 0.530940f, 0.484814f, +0.036184f, 0.042693f, 0.021780f, 0.056808f, 0.055516f, 0.079501f, 0.055256f, 0.074584f, 0.053857f, 0.071728f, 0.047176f, 0.088456f, 0.051977f, 0.066121f, 0.063211f, 0.071172f, +0.032611f, 0.040619f, 0.014573f, 0.048439f, 0.022007f, 0.033270f, 0.016262f, 0.027973f, 0.028967f, 0.040726f, 0.018837f, 0.045012f, 0.069486f, 0.093317f, 0.062737f, 0.090020f, +0.021324f, 0.026599f, 0.011828f, 0.033158f, 0.015490f, 0.023451f, 0.014207f, 0.020612f, 0.037232f, 0.052423f, 0.030053f, 0.060568f, 0.036939f, 0.049680f, 0.041397f, 0.050098f, +0.195018f, 0.209201f, 0.087828f, 0.260217f, 0.093181f, 0.121321f, 0.069391f, 0.106396f, 0.164186f, 0.198805f, 0.107603f, 0.229185f, 0.317297f, 0.366984f, 0.288711f, 0.369258f, +0.016555f, 0.020202f, 0.007893f, 0.028837f, 0.018126f, 0.026846f, 0.014291f, 0.027018f, 0.031874f, 0.043904f, 0.022116f, 0.058083f, 0.037815f, 0.049754f, 0.036429f, 0.057451f, +0.703495f, 0.905143f, 0.377327f, 0.848522f, 0.505672f, 0.789667f, 0.448483f, 0.521926f, 0.611465f, 0.888033f, 0.477266f, 0.771548f, 1.176077f, 1.631483f, 1.274476f, 1.237200f, +0.220282f, 0.283833f, 0.146653f, 0.278147f, 0.170438f, 0.266545f, 0.187629f, 0.184162f, 0.376363f, 0.547385f, 0.364629f, 0.497154f, 0.299394f, 0.415929f, 0.402712f, 0.329717f, +1.840585f, 2.039535f, 0.994918f, 1.994256f, 0.936729f, 1.259819f, 0.837271f, 0.868514f, 1.516310f, 1.896550f, 1.192755f, 1.718708f, 2.349554f, 2.807058f, 2.565998f, 2.220304f, +0.227982f, 0.287380f, 0.130470f, 0.322469f, 0.265875f, 0.406774f, 0.251599f, 0.321813f, 0.429522f, 0.611144f, 0.357708f, 0.635570f, 0.408586f, 0.555304f, 0.472427f, 0.504051f, +0.000052f, 0.000065f, 0.000100f, 0.000342f, 0.000029f, 0.000044f, 0.000092f, 0.000162f, 0.000001f, 0.000001f, 0.000003f, 0.000007f, 0.000034f, 0.000046f, 0.000133f, 0.000196f, +0.000022f, 0.000027f, 0.000052f, 0.000149f, 0.000013f, 0.000020f, 0.000051f, 0.000076f, 0.000001f, 0.000001f, 0.000003f, 0.000006f, 0.000012f, 0.000016f, 0.000056f, 0.000069f, +0.000263f, 0.000283f, 0.000508f, 0.001538f, 0.000103f, 0.000134f, 0.000329f, 0.000515f, 0.000005f, 0.000006f, 0.000013f, 0.000029f, 0.000132f, 0.000153f, 0.000515f, 0.000674f, +0.000002f, 0.000002f, 0.000003f, 0.000012f, 0.000001f, 0.000002f, 0.000005f, 0.000009f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000001f, 0.000005f, 0.000008f, +0.470253f, 0.634874f, 0.240951f, 0.705879f, 0.330169f, 0.541016f, 0.279740f, 0.424104f, 0.392535f, 0.598183f, 0.292690f, 0.616404f, 0.694579f, 1.011039f, 0.719050f, 0.909331f, +0.154132f, 0.208390f, 0.098027f, 0.242206f, 0.116487f, 0.191153f, 0.122505f, 0.156642f, 0.252905f, 0.385960f, 0.234068f, 0.415756f, 0.185086f, 0.269804f, 0.237830f, 0.253669f, +1.832371f, 2.130530f, 0.946205f, 2.470782f, 0.910894f, 1.285467f, 0.777787f, 1.051058f, 1.449708f, 1.902637f, 1.089394f, 2.044989f, 2.066608f, 2.590735f, 2.156104f, 2.430420f, +0.148377f, 0.196255f, 0.081118f, 0.261187f, 0.169021f, 0.271341f, 0.152796f, 0.254603f, 0.268465f, 0.400815f, 0.213585f, 0.494381f, 0.234945f, 0.335051f, 0.259512f, 0.360705f, +0.309617f, 0.375455f, 0.148085f, 0.400008f, 0.327728f, 0.482353f, 0.259191f, 0.362322f, 0.265340f, 0.363192f, 0.184680f, 0.358621f, 0.726214f, 0.949485f, 0.701762f, 0.818296f, +0.280126f, 0.340185f, 0.166301f, 0.378870f, 0.319170f, 0.470438f, 0.313317f, 0.369400f, 0.471898f, 0.646861f, 0.407682f, 0.667689f, 0.534174f, 0.699416f, 0.640714f, 0.630119f, +1.402535f, 1.464757f, 0.676043f, 1.627722f, 1.051118f, 1.332362f, 0.837786f, 1.043893f, 1.139232f, 1.342966f, 0.799107f, 1.383146f, 2.511931f, 2.828462f, 2.446291f, 2.542591f, +0.130263f, 0.154759f, 0.066476f, 0.197357f, 0.223707f, 0.322576f, 0.188773f, 0.290033f, 0.241977f, 0.324495f, 0.179699f, 0.383525f, 0.327545f, 0.419560f, 0.337715f, 0.432815f, +6.947791f, 8.702914f, 3.988465f, 7.288769f, 7.833216f, 11.909050f, 7.435672f, 7.032106f, 5.826327f, 8.237849f, 4.867278f, 6.394279f, 12.785670f, 17.267600f, 14.829360f, 11.698580f, +3.010170f, 3.776050f, 2.144892f, 3.305917f, 3.653135f, 5.562004f, 4.304287f, 3.433242f, 4.962000f, 7.025939f, 5.145213f, 5.700945f, 4.503587f, 6.091100f, 6.483545f, 4.313815f, +13.769440f, 14.854330f, 7.966178f, 12.976170f, 10.991570f, 14.391820f, 10.515130f, 8.863966f, 10.944240f, 13.326730f, 9.214072f, 10.789600f, 19.348530f, 22.504810f, 22.616290f, 15.903050f, +1.866041f, 2.290021f, 1.142970f, 2.295700f, 3.413380f, 5.084189f, 3.457158f, 3.593489f, 3.391912f, 4.698549f, 3.023363f, 4.365437f, 3.681352f, 4.870977f, 4.555757f, 3.950057f, +0.000961f, 0.001170f, 0.001974f, 0.005452f, 0.000833f, 0.001231f, 0.002830f, 0.004045f, 0.000018f, 0.000024f, 0.000053f, 0.000106f, 0.000695f, 0.000913f, 0.002886f, 0.003441f, +0.000553f, 0.000674f, 0.001410f, 0.003285f, 0.000516f, 0.000764f, 0.002177f, 0.002624f, 0.000020f, 0.000028f, 0.000075f, 0.000125f, 0.000325f, 0.000428f, 0.001677f, 0.001686f, +0.003649f, 0.003827f, 0.007555f, 0.018599f, 0.002241f, 0.002852f, 0.007670f, 0.009772f, 0.000064f, 0.000076f, 0.000193f, 0.000342f, 0.002017f, 0.002280f, 0.008435f, 0.008964f, +0.000024f, 0.000029f, 0.000053f, 0.000162f, 0.000034f, 0.000050f, 0.000124f, 0.000195f, 0.000001f, 0.000001f, 0.000003f, 0.000007f, 0.000019f, 0.000024f, 0.000084f, 0.000110f, +4.224910f, 5.553086f, 2.316954f, 5.515960f, 4.652723f, 7.422377f, 4.219181f, 5.198149f, 3.402521f, 5.047993f, 2.715395f, 4.647225f, 6.869253f, 9.734575f, 7.611129f, 7.821954f, +1.916048f, 2.522040f, 1.304252f, 2.618806f, 2.271315f, 3.628624f, 2.556547f, 2.656515f, 3.033243f, 4.506650f, 3.004656f, 4.337041f, 2.532731f, 3.594390f, 3.483244f, 3.019174f, +12.470200f, 14.115930f, 6.892046f, 14.625140f, 9.723295f, 13.358830f, 8.886060f, 9.758383f, 9.518709f, 12.162280f, 7.655701f, 11.678680f, 15.481760f, 18.895000f, 17.287600f, 15.836130f, +1.104810f, 1.422671f, 0.646460f, 1.691521f, 1.974000f, 3.085199f, 1.909953f, 2.586277f, 1.928617f, 2.803265f, 1.642225f, 3.089053f, 1.925700f, 2.673600f, 2.276580f, 2.571467f, +0.209508f, 0.262451f, 0.118824f, 0.247244f, 0.198685f, 0.302087f, 0.186333f, 0.200646f, 0.189160f, 0.267472f, 0.156122f, 0.233531f, 0.453624f, 0.612681f, 0.519803f, 0.466900f, +0.127832f, 0.160368f, 0.089991f, 0.157928f, 0.130493f, 0.198693f, 0.151903f, 0.137957f, 0.226874f, 0.321265f, 0.232422f, 0.293220f, 0.225022f, 0.304364f, 0.320055f, 0.242464f, +1.013751f, 1.093701f, 0.579442f, 1.074683f, 0.680686f, 0.891319f, 0.643349f, 0.617495f, 0.867523f, 1.056450f, 0.721592f, 0.962097f, 1.676029f, 1.949571f, 1.935529f, 1.549646f, +0.096754f, 0.118746f, 0.058550f, 0.133900f, 0.148869f, 0.221754f, 0.148965f, 0.176301f, 0.189353f, 0.262314f, 0.166749f, 0.274141f, 0.224581f, 0.297175f, 0.274582f, 0.271074f, +4.630146f, 5.991390f, 3.151894f, 4.436947f, 4.676983f, 7.345436f, 5.264580f, 3.835246f, 4.090663f, 5.974858f, 4.052312f, 4.100838f, 7.865519f, 10.973640f, 10.817920f, 6.573840f, +1.352852f, 1.753122f, 1.143096f, 1.357167f, 1.470967f, 2.313573f, 2.055209f, 1.262768f, 2.349454f, 3.436605f, 2.888893f, 2.465694f, 1.868417f, 2.610513f, 3.189669f, 1.634778f, +9.801819f, 10.923420f, 6.724476f, 8.437606f, 7.010163f, 9.481977f, 7.952448f, 5.163907f, 8.207805f, 10.324760f, 8.194282f, 7.391436f, 12.714350f, 15.276950f, 17.623240f, 9.545726f, +1.365028f, 1.730513f, 0.991455f, 1.533973f, 2.237087f, 3.442190f, 2.686799f, 2.151279f, 2.614062f, 3.740676f, 2.762991f, 3.073132f, 2.485900f, 3.397875f, 3.647999f, 2.436474f, +0.007855f, 0.009879f, 0.019133f, 0.040705f, 0.006102f, 0.009315f, 0.024580f, 0.027062f, 0.000153f, 0.000218f, 0.000544f, 0.000832f, 0.005247f, 0.007116f, 0.025825f, 0.023718f, +0.003049f, 0.003841f, 0.009219f, 0.016543f, 0.002550f, 0.003898f, 0.012749f, 0.011839f, 0.000117f, 0.000166f, 0.000515f, 0.000665f, 0.001656f, 0.002249f, 0.010117f, 0.007837f, +0.031865f, 0.034515f, 0.078224f, 0.148340f, 0.017528f, 0.023044f, 0.071153f, 0.069827f, 0.000590f, 0.000721f, 0.002108f, 0.002873f, 0.016255f, 0.018983f, 0.080623f, 0.065999f, +0.000219f, 0.000270f, 0.000569f, 0.001330f, 0.000276f, 0.000413f, 0.001186f, 0.001435f, 0.000009f, 0.000013f, 0.000035f, 0.000059f, 0.000157f, 0.000208f, 0.000823f, 0.000831f, +5.212938f, 7.078063f, 3.390005f, 6.216820f, 5.143397f, 8.476192f, 5.530812f, 5.248964f, 4.422999f, 6.778751f, 4.185687f, 5.518129f, 7.824029f, 11.453890f, 10.279880f, 8.138012f, +1.594347f, 2.167921f, 1.286933f, 1.990499f, 1.693291f, 2.794545f, 2.260090f, 1.809042f, 2.659100f, 4.081278f, 3.123489f, 3.472986f, 1.945456f, 2.852151f, 3.172737f, 2.118374f, +16.435430f, 19.219060f, 10.771440f, 17.607180f, 11.481510f, 16.295540f, 12.442640f, 10.525560f, 13.217110f, 17.445710f, 12.605540f, 14.812710f, 18.835790f, 23.747910f, 24.941130f, 17.599270f, +1.496322f, 1.990477f, 1.038239f, 2.092653f, 2.395314f, 3.867350f, 2.748248f, 2.866636f, 2.751912f, 4.132069f, 2.778683f, 4.026205f, 2.407588f, 3.453066f, 3.375158f, 2.936679f, +0.394890f, 0.438765f, 0.194291f, 0.523148f, 0.444215f, 0.599057f, 0.361402f, 0.503594f, 0.356777f, 0.447460f, 0.255449f, 0.494464f, 0.852030f, 1.020708f, 0.846974f, 0.984478f, +0.267580f, 0.297741f, 0.163412f, 0.371104f, 0.324006f, 0.437579f, 0.327194f, 0.384532f, 0.475218f, 0.596868f, 0.422334f, 0.689484f, 0.469379f, 0.563117f, 0.579154f, 0.567764f, +1.290638f, 1.235035f, 0.639963f, 1.535946f, 1.027952f, 1.193896f, 0.842838f, 1.046844f, 1.105216f, 1.193776f, 0.797498f, 1.375967f, 2.126369f, 2.193835f, 2.130241f, 2.207051f, +0.116704f, 0.127041f, 0.061266f, 0.181310f, 0.212998f, 0.281417f, 0.184895f, 0.283170f, 0.228551f, 0.280828f, 0.174601f, 0.371456f, 0.269945f, 0.316827f, 0.286316f, 0.365774f, +12.690050f, 14.564790f, 7.493976f, 13.651360f, 15.205020f, 21.181020f, 14.847630f, 13.997080f, 11.219040f, 14.534410f, 9.641323f, 12.625750f, 21.482270f, 26.583460f, 25.631180f, 20.155550f, +4.117730f, 4.732900f, 3.018298f, 4.637284f, 5.310836f, 7.408858f, 6.437068f, 5.118074f, 7.155954f, 9.284063f, 7.633153f, 8.430681f, 5.667156f, 7.023041f, 8.392842f, 5.566384f, +18.145680f, 17.936300f, 10.799330f, 17.535110f, 15.393850f, 18.468260f, 15.149280f, 12.729760f, 15.205010f, 16.964750f, 13.168690f, 15.371320f, 23.455500f, 24.997420f, 28.203830f, 19.768890f, +2.394157f, 2.692115f, 1.508537f, 3.020309f, 4.654214f, 6.351938f, 4.849207f, 5.024387f, 4.587963f, 5.823204f, 4.206836f, 6.054916f, 4.344888f, 5.267568f, 5.531228f, 4.780569f, +0.001041f, 0.001161f, 0.002199f, 0.006055f, 0.000959f, 0.001299f, 0.003351f, 0.004775f, 0.000020f, 0.000026f, 0.000063f, 0.000124f, 0.000693f, 0.000833f, 0.002958f, 0.003516f, +0.000449f, 0.000501f, 0.001177f, 0.002733f, 0.000445f, 0.000604f, 0.001930f, 0.002320f, 0.000017f, 0.000022f, 0.000066f, 0.000110f, 0.000243f, 0.000293f, 0.001287f, 0.001290f, +0.002852f, 0.002740f, 0.006073f, 0.014904f, 0.001861f, 0.002170f, 0.006553f, 0.008322f, 0.000053f, 0.000057f, 0.000164f, 0.000289f, 0.001450f, 0.001502f, 0.006238f, 0.006608f, +0.000019f, 0.000020f, 0.000042f, 0.000127f, 0.000028f, 0.000037f, 0.000103f, 0.000162f, 0.000001f, 0.000001f, 0.000003f, 0.000006f, 0.000013f, 0.000016f, 0.000060f, 0.000079f, +6.133343f, 7.386472f, 3.460087f, 8.211186f, 7.178230f, 10.492430f, 6.696197f, 8.223635f, 5.207445f, 7.078894f, 4.275104f, 7.293275f, 9.173376f, 11.911310f, 10.455830f, 10.711240f, +2.083226f, 2.512493f, 1.458752f, 2.919700f, 2.624446f, 3.841714f, 3.038812f, 3.147585f, 3.476812f, 4.733155f, 3.542897f, 5.097679f, 2.533138f, 3.293956f, 3.583798f, 3.096442f, +13.061520f, 13.547290f, 7.426055f, 15.708150f, 10.823410f, 13.625160f, 10.175360f, 11.138670f, 10.510960f, 12.305580f, 8.696391f, 13.224010f, 14.916960f, 16.681290f, 17.135020f, 15.646390f, +1.126632f, 1.329297f, 0.678151f, 1.768793f, 2.139302f, 3.063592f, 2.129303f, 2.874120f, 2.073407f, 2.761376f, 1.816189f, 3.405406f, 1.806438f, 2.298019f, 2.196884f, 2.473548f, +0.203340f, 0.253277f, 0.090867f, 0.302038f, 0.169831f, 0.256749f, 0.125494f, 0.215871f, 0.223325f, 0.313986f, 0.145229f, 0.347028f, 0.483624f, 0.649486f, 0.436647f, 0.626539f, +0.112357f, 0.140152f, 0.062321f, 0.174715f, 0.101012f, 0.152930f, 0.092648f, 0.134414f, 0.242566f, 0.341532f, 0.195795f, 0.394595f, 0.217257f, 0.292189f, 0.243474f, 0.294651f, +0.914231f, 0.980723f, 0.411731f, 1.219880f, 0.540631f, 0.703898f, 0.402605f, 0.617305f, 0.951680f, 1.152345f, 0.623708f, 1.328439f, 1.660333f, 1.920331f, 1.510751f, 1.932228f, +0.083575f, 0.101988f, 0.039849f, 0.145580f, 0.113251f, 0.167738f, 0.089289f, 0.168812f, 0.198960f, 0.274055f, 0.138050f, 0.362559f, 0.213093f, 0.280370f, 0.205280f, 0.323740f, +3.344346f, 4.302962f, 1.793774f, 4.033789f, 2.975166f, 4.646077f, 2.638691f, 3.070796f, 3.594147f, 5.219791f, 2.805333f, 4.535102f, 6.240695f, 8.657247f, 6.762838f, 6.565035f, +0.884918f, 1.140219f, 0.589135f, 1.117374f, 0.847392f, 1.325221f, 0.932862f, 0.915625f, 1.869413f, 2.718888f, 1.811128f, 2.469391f, 1.342504f, 1.865052f, 1.805788f, 1.478472f, +6.578467f, 7.289539f, 3.555952f, 7.127703f, 4.143573f, 5.572745f, 3.703626f, 3.841826f, 6.700861f, 8.381214f, 5.271012f, 7.595297f, 9.373489f, 11.198690f, 10.236990f, 8.857850f, +0.877490f, 1.106111f, 0.502173f, 1.241168f, 1.266520f, 1.937704f, 1.198516f, 1.532986f, 2.044099f, 2.908435f, 1.702335f, 3.024681f, 1.755387f, 2.385723f, 2.029661f, 2.165527f, +0.001790f, 0.002239f, 0.003436f, 0.011679f, 0.001225f, 0.001859f, 0.003888f, 0.006838f, 0.000043f, 0.000060f, 0.000119f, 0.000290f, 0.001314f, 0.001772f, 0.005095f, 0.007475f, +0.000629f, 0.000788f, 0.001500f, 0.004298f, 0.000464f, 0.000705f, 0.001826f, 0.002709f, 0.000029f, 0.000042f, 0.000102f, 0.000210f, 0.000376f, 0.000507f, 0.001808f, 0.002237f, +0.006749f, 0.007269f, 0.013054f, 0.039546f, 0.003270f, 0.004274f, 0.010458f, 0.016395f, 0.000152f, 0.000185f, 0.000428f, 0.000932f, 0.003782f, 0.004392f, 0.014780f, 0.019328f, +0.000044f, 0.000054f, 0.000091f, 0.000340f, 0.000049f, 0.000073f, 0.000167f, 0.000323f, 0.000002f, 0.000003f, 0.000007f, 0.000018f, 0.000035f, 0.000046f, 0.000145f, 0.000233f, +3.118375f, 4.210014f, 1.597812f, 4.680868f, 2.709721f, 4.440161f, 2.295847f, 3.480652f, 3.218460f, 4.904609f, 2.399814f, 5.054003f, 5.141208f, 7.483608f, 5.322334f, 6.730781f, +0.863704f, 1.167747f, 0.549310f, 1.357239f, 0.807872f, 1.325700f, 0.849603f, 1.086355f, 1.752274f, 2.674158f, 1.621761f, 2.880600f, 1.157690f, 1.687589f, 1.487594f, 1.586667f, +9.135415f, 10.621910f, 4.717375f, 12.318260f, 5.620499f, 7.931733f, 4.799191f, 6.485356f, 8.936540f, 11.728560f, 6.715427f, 12.606070f, 11.500590f, 14.417330f, 11.998630f, 13.525190f, +0.796626f, 1.053683f, 0.435518f, 1.402294f, 1.123106f, 1.802998f, 1.015299f, 1.691778f, 1.782172f, 2.660762f, 1.417861f, 3.281886f, 1.407994f, 2.007922f, 1.555221f, 2.161660f, +0.034844f, 0.042253f, 0.016665f, 0.045017f, 0.034170f, 0.050291f, 0.027024f, 0.037777f, 0.026612f, 0.036426f, 0.018522f, 0.035968f, 0.131474f, 0.171895f, 0.127047f, 0.148144f, +0.038531f, 0.046792f, 0.022874f, 0.052113f, 0.040673f, 0.059949f, 0.039927f, 0.047074f, 0.057846f, 0.079294f, 0.049975f, 0.081847f, 0.118198f, 0.154761f, 0.141772f, 0.139428f, +0.190693f, 0.199153f, 0.091917f, 0.221311f, 0.132403f, 0.167830f, 0.105531f, 0.131493f, 0.138040f, 0.162727f, 0.096828f, 0.167596f, 0.549415f, 0.618648f, 0.535058f, 0.556122f, +0.021196f, 0.025182f, 0.010817f, 0.032114f, 0.033724f, 0.048629f, 0.028458f, 0.043723f, 0.035090f, 0.047056f, 0.026059f, 0.055617f, 0.085739f, 0.109826f, 0.088402f, 0.113295f, +1.249919f, 1.565669f, 0.717532f, 1.311262f, 1.305569f, 1.984892f, 1.239310f, 1.172048f, 0.934119f, 1.320752f, 0.780358f, 1.025178f, 3.700241f, 4.997336f, 4.291694f, 3.385631f, +0.661879f, 0.830281f, 0.471621f, 0.726908f, 0.744180f, 1.133035f, 0.876826f, 0.699385f, 0.972336f, 1.376779f, 1.008238f, 1.117138f, 1.593005f, 2.154539f, 2.293355f, 1.525879f, +2.992750f, 3.228549f, 1.731427f, 2.820335f, 2.213291f, 2.897974f, 2.117354f, 1.784871f, 2.119884f, 2.581369f, 1.784753f, 2.089930f, 6.765085f, 7.868660f, 7.907637f, 5.560397f, +0.485391f, 0.595676f, 0.297307f, 0.597153f, 0.822583f, 1.225228f, 0.833133f, 0.865988f, 0.786299f, 1.089198f, 0.700863f, 1.011977f, 1.540456f, 2.038252f, 1.906349f, 1.652895f, +0.000681f, 0.000829f, 0.001399f, 0.003864f, 0.000547f, 0.000809f, 0.001859f, 0.002656f, 0.000011f, 0.000015f, 0.000034f, 0.000067f, 0.000793f, 0.001041f, 0.003291f, 0.003923f, +0.000479f, 0.000584f, 0.001222f, 0.002846f, 0.000414f, 0.000613f, 0.001747f, 0.002106f, 0.000016f, 0.000021f, 0.000058f, 0.000097f, 0.000454f, 0.000596f, 0.002336f, 0.002349f, +0.003125f, 0.003277f, 0.006469f, 0.015926f, 0.001778f, 0.002262f, 0.006085f, 0.007752f, 0.000049f, 0.000058f, 0.000147f, 0.000261f, 0.002778f, 0.003141f, 0.011620f, 0.012348f, +0.000025f, 0.000030f, 0.000055f, 0.000166f, 0.000033f, 0.000047f, 0.000118f, 0.000186f, 0.000001f, 0.000001f, 0.000003f, 0.000006f, 0.000031f, 0.000040f, 0.000138f, 0.000181f, +0.620495f, 0.815559f, 0.340282f, 0.810107f, 0.633072f, 1.009924f, 0.574082f, 0.707285f, 0.445343f, 0.660712f, 0.355407f, 0.608257f, 1.622938f, 2.299902f, 1.798214f, 1.848024f, +0.343938f, 0.452715f, 0.234118f, 0.470085f, 0.377725f, 0.603448f, 0.425159f, 0.441784f, 0.485236f, 0.720941f, 0.480663f, 0.693808f, 0.731364f, 1.037934f, 1.005839f, 0.871832f, +2.212654f, 2.504664f, 1.222893f, 2.595016f, 1.598373f, 2.196003f, 1.460743f, 1.604141f, 1.505187f, 1.923213f, 1.210591f, 1.846742f, 4.419077f, 5.393344f, 4.934532f, 4.520228f, +0.234609f, 0.302107f, 0.137277f, 0.359198f, 0.388354f, 0.606966f, 0.375754f, 0.508811f, 0.364985f, 0.530509f, 0.310786f, 0.584594f, 0.657834f, 0.913323f, 0.777697f, 0.878433f, +0.022636f, 0.028356f, 0.012838f, 0.026713f, 0.019888f, 0.030238f, 0.018651f, 0.020084f, 0.018214f, 0.025754f, 0.015032f, 0.022486f, 0.078842f, 0.106488f, 0.090345f, 0.081150f, +0.016881f, 0.021177f, 0.011884f, 0.020855f, 0.015965f, 0.024308f, 0.018584f, 0.016878f, 0.026700f, 0.037808f, 0.027352f, 0.034507f, 0.047802f, 0.064656f, 0.067989f, 0.051507f, +0.132326f, 0.142761f, 0.075635f, 0.140279f, 0.082316f, 0.107788f, 0.077801f, 0.074674f, 0.100917f, 0.122895f, 0.083941f, 0.111919f, 0.351937f, 0.409376f, 0.406428f, 0.325399f, +0.015115f, 0.018550f, 0.009147f, 0.020918f, 0.021546f, 0.032094f, 0.021559f, 0.025516f, 0.026362f, 0.036519f, 0.023215f, 0.038166f, 0.056438f, 0.074681f, 0.069004f, 0.068122f, +0.799688f, 1.034793f, 0.544374f, 0.766320f, 0.748370f, 1.175352f, 0.842392f, 0.613682f, 0.629639f, 0.919657f, 0.623736f, 0.631205f, 2.185367f, 3.048932f, 3.005667f, 1.826485f, +0.285580f, 0.370075f, 0.241302f, 0.286491f, 0.287677f, 0.452466f, 0.401938f, 0.246960f, 0.441995f, 0.646517f, 0.543478f, 0.463863f, 0.634487f, 0.886493f, 1.083165f, 0.555147f, +2.045274f, 2.279310f, 1.403147f, 1.760613f, 1.355181f, 1.833024f, 1.537341f, 0.998269f, 1.526315f, 1.919981f, 1.523800f, 1.374504f, 4.267860f, 5.128055f, 5.915637f, 3.204239f, +0.340881f, 0.432152f, 0.247591f, 0.383071f, 0.517569f, 0.796380f, 0.621614f, 0.497717f, 0.581767f, 0.832499f, 0.614912f, 0.683935f, 0.998656f, 1.365022f, 1.465504f, 0.978800f, +0.005345f, 0.006722f, 0.013019f, 0.027697f, 0.003847f, 0.005872f, 0.015495f, 0.017060f, 0.000093f, 0.000132f, 0.000330f, 0.000504f, 0.005744f, 0.007789f, 0.028268f, 0.025962f, +0.002536f, 0.003194f, 0.007667f, 0.013758f, 0.001965f, 0.003004f, 0.009823f, 0.009122f, 0.000087f, 0.000123f, 0.000382f, 0.000493f, 0.002216f, 0.003009f, 0.013536f, 0.010484f, +0.026195f, 0.028373f, 0.064306f, 0.121945f, 0.013350f, 0.017550f, 0.054191f, 0.053181f, 0.000432f, 0.000528f, 0.001544f, 0.002105f, 0.021497f, 0.025105f, 0.106620f, 0.087281f, +0.000215f, 0.000265f, 0.000560f, 0.001309f, 0.000251f, 0.000376f, 0.001081f, 0.001308f, 0.000008f, 0.000011f, 0.000031f, 0.000052f, 0.000248f, 0.000330f, 0.001303f, 0.001315f, +0.735012f, 0.997990f, 0.477983f, 0.876557f, 0.671872f, 1.107228f, 0.722479f, 0.685662f, 0.555777f, 0.851792f, 0.525958f, 0.693387f, 1.774652f, 2.597980f, 2.331690f, 1.845870f, +0.274756f, 0.373600f, 0.221779f, 0.343025f, 0.270346f, 0.446169f, 0.360840f, 0.288826f, 0.408386f, 0.626805f, 0.479707f, 0.533383f, 0.539332f, 0.790692f, 0.879567f, 0.587270f, +2.799702f, 3.273879f, 1.834866f, 2.999304f, 1.811983f, 2.571721f, 1.963666f, 1.661118f, 2.006502f, 2.648448f, 1.913658f, 2.248731f, 5.161616f, 6.507694f, 6.834676f, 4.822769f, +0.305051f, 0.405793f, 0.211663f, 0.426623f, 0.452412f, 0.730441f, 0.519072f, 0.541432f, 0.499981f, 0.750736f, 0.504845f, 0.731502f, 0.789588f, 1.132461f, 1.106910f, 0.963108f, +0.029955f, 0.033283f, 0.014738f, 0.039684f, 0.031218f, 0.042100f, 0.025399f, 0.035391f, 0.024119f, 0.030250f, 0.017269f, 0.033427f, 0.103973f, 0.124557f, 0.103356f, 0.120135f, +0.024808f, 0.027605f, 0.015151f, 0.034407f, 0.027831f, 0.037586f, 0.028104f, 0.033030f, 0.039265f, 0.049317f, 0.034896f, 0.056969f, 0.070007f, 0.083988f, 0.086380f, 0.084681f, +0.118282f, 0.113186f, 0.058650f, 0.140763f, 0.087279f, 0.101369f, 0.071562f, 0.088883f, 0.090268f, 0.097501f, 0.065135f, 0.112381f, 0.313489f, 0.323436f, 0.314060f, 0.325384f, +0.012800f, 0.013934f, 0.006720f, 0.019886f, 0.021644f, 0.028596f, 0.018788f, 0.028774f, 0.022340f, 0.027450f, 0.017067f, 0.036309f, 0.047629f, 0.055901f, 0.050518f, 0.064538f, +1.538826f, 1.766161f, 0.908737f, 1.655396f, 1.708196f, 2.379564f, 1.668045f, 1.572491f, 1.212422f, 1.570709f, 1.041922f, 1.364444f, 4.190611f, 5.185716f, 4.999952f, 3.931805f, +0.610290f, 0.701465f, 0.447343f, 0.687293f, 0.729232f, 1.017312f, 0.883875f, 0.702764f, 0.945188f, 1.226277f, 1.008218f, 1.113559f, 1.351184f, 1.674460f, 2.001053f, 1.327158f, +2.658387f, 2.627714f, 1.582129f, 2.568938f, 2.089377f, 2.506660f, 2.056182f, 1.727786f, 1.985199f, 2.214954f, 1.719332f, 2.006913f, 5.527910f, 5.891303f, 6.646979f, 4.659062f, +0.419773f, 0.472014f, 0.264495f, 0.529557f, 0.756019f, 1.031792f, 0.787693f, 0.816148f, 0.716892f, 0.909905f, 0.657339f, 0.946111f, 1.225494f, 1.485740f, 1.560107f, 1.348380f, +0.000497f, 0.000555f, 0.001051f, 0.002892f, 0.000425f, 0.000575f, 0.001483f, 0.002113f, 0.000009f, 0.000011f, 0.000027f, 0.000053f, 0.000532f, 0.000640f, 0.002273f, 0.002702f, +0.000262f, 0.000293f, 0.000687f, 0.001596f, 0.000241f, 0.000326f, 0.001044f, 0.001255f, 0.000009f, 0.000011f, 0.000034f, 0.000057f, 0.000228f, 0.000275f, 0.001209f, 0.001212f, +0.001646f, 0.001581f, 0.003505f, 0.008602f, 0.000995f, 0.001160f, 0.003504f, 0.004450f, 0.000027f, 0.000029f, 0.000084f, 0.000149f, 0.001346f, 0.001394f, 0.005792f, 0.006135f, +0.000013f, 0.000014f, 0.000029f, 0.000087f, 0.000018f, 0.000024f, 0.000066f, 0.000104f, 0.000000f, 0.000001f, 0.000002f, 0.000003f, 0.000015f, 0.000017f, 0.000067f, 0.000088f, +0.607169f, 0.731222f, 0.342530f, 0.812864f, 0.658346f, 0.962305f, 0.614136f, 0.754224f, 0.459419f, 0.624525f, 0.377165f, 0.643438f, 1.460873f, 1.896892f, 1.665106f, 1.705780f, +0.252058f, 0.303997f, 0.176500f, 0.353266f, 0.294189f, 0.430639f, 0.340638f, 0.352831f, 0.374902f, 0.510372f, 0.382028f, 0.549679f, 0.493054f, 0.641141f, 0.697556f, 0.602696f, +1.562156f, 1.620255f, 0.888156f, 1.878694f, 1.199277f, 1.509723f, 1.127470f, 1.234209f, 1.120329f, 1.311612f, 0.926920f, 1.409504f, 2.870004f, 3.209459f, 3.296755f, 3.010344f, +0.161261f, 0.190270f, 0.097067f, 0.253177f, 0.283690f, 0.406259f, 0.282364f, 0.381133f, 0.264487f, 0.352245f, 0.231676f, 0.434399f, 0.415951f, 0.529142f, 0.505854f, 0.569559f, +0.014329f, 0.017848f, 0.006403f, 0.021284f, 0.011087f, 0.016762f, 0.008193f, 0.014093f, 0.014025f, 0.019718f, 0.009120f, 0.021793f, 0.054824f, 0.073626f, 0.049498f, 0.071024f, +0.009677f, 0.012071f, 0.005368f, 0.015048f, 0.008060f, 0.012203f, 0.007393f, 0.010725f, 0.018618f, 0.026215f, 0.015028f, 0.030288f, 0.030101f, 0.040483f, 0.033734f, 0.040824f, +0.077833f, 0.083494f, 0.035053f, 0.103854f, 0.042642f, 0.055519f, 0.031755f, 0.048689f, 0.072206f, 0.087430f, 0.047322f, 0.100791f, 0.227391f, 0.262999f, 0.206905f, 0.264629f, +0.008515f, 0.010391f, 0.004060f, 0.014833f, 0.010690f, 0.015834f, 0.008428f, 0.015935f, 0.018066f, 0.024885f, 0.012535f, 0.032921f, 0.034927f, 0.045954f, 0.033647f, 0.053063f, +0.376732f, 0.484717f, 0.202064f, 0.454396f, 0.310496f, 0.484877f, 0.275381f, 0.320477f, 0.360819f, 0.524018f, 0.281629f, 0.455282f, 1.130902f, 1.568815f, 1.225522f, 1.189677f, +0.121836f, 0.156986f, 0.081113f, 0.153841f, 0.108089f, 0.169039f, 0.118991f, 0.116793f, 0.229377f, 0.333608f, 0.222226f, 0.302995f, 0.297344f, 0.413081f, 0.399955f, 0.327459f, +0.895292f, 0.992064f, 0.483945f, 0.970039f, 0.522443f, 0.702641f, 0.466973f, 0.484398f, 0.812723f, 1.016527f, 0.639302f, 0.921206f, 2.052165f, 2.451761f, 2.241213f, 1.939274f, +0.142922f, 0.180158f, 0.081792f, 0.202156f, 0.191114f, 0.292394f, 0.180853f, 0.231323f, 0.296709f, 0.422171f, 0.247100f, 0.439044f, 0.459939f, 0.625097f, 0.531803f, 0.567402f, +0.000795f, 0.000994f, 0.001525f, 0.005183f, 0.000504f, 0.000765f, 0.001599f, 0.002812f, 0.000017f, 0.000024f, 0.000047f, 0.000115f, 0.000938f, 0.001265f, 0.003637f, 0.005337f, +0.000341f, 0.000428f, 0.000813f, 0.002331f, 0.000233f, 0.000354f, 0.000918f, 0.001361f, 0.000014f, 0.000020f, 0.000049f, 0.000102f, 0.000328f, 0.000442f, 0.001577f, 0.001952f, +0.003619f, 0.003897f, 0.006999f, 0.021204f, 0.001624f, 0.002123f, 0.005195f, 0.008144f, 0.000073f, 0.000088f, 0.000204f, 0.000445f, 0.003262f, 0.003788f, 0.012748f, 0.016671f, +0.000028f, 0.000035f, 0.000058f, 0.000218f, 0.000029f, 0.000044f, 0.000099f, 0.000192f, 0.000001f, 0.000002f, 0.000004f, 0.000010f, 0.000036f, 0.000048f, 0.000149f, 0.000241f, +0.286771f, 0.387160f, 0.146937f, 0.430460f, 0.230864f, 0.378294f, 0.195602f, 0.296546f, 0.263771f, 0.401960f, 0.196678f, 0.414204f, 0.760577f, 1.107105f, 0.787372f, 0.995734f, +0.097079f, 0.131252f, 0.061741f, 0.152551f, 0.084125f, 0.138048f, 0.088471f, 0.113124f, 0.175523f, 0.267867f, 0.162449f, 0.288545f, 0.209326f, 0.305138f, 0.268977f, 0.286890f, +1.014972f, 1.180126f, 0.524114f, 1.368595f, 0.578529f, 0.816429f, 0.493990f, 0.667550f, 0.884845f, 1.161295f, 0.664923f, 1.248181f, 2.055497f, 2.576806f, 2.144513f, 2.417353f, +0.105925f, 0.140104f, 0.057909f, 0.186458f, 0.138353f, 0.222107f, 0.125072f, 0.208406f, 0.211185f, 0.315298f, 0.168015f, 0.388900f, 0.301172f, 0.429497f, 0.332664f, 0.462382f, +0.177993f, 0.215843f, 0.085131f, 0.229958f, 0.140275f, 0.206458f, 0.110940f, 0.155082f, 0.117641f, 0.161025f, 0.081880f, 0.158998f, 0.406517f, 0.531499f, 0.392830f, 0.458062f, +0.236337f, 0.287007f, 0.140305f, 0.319645f, 0.200488f, 0.295508f, 0.196812f, 0.232041f, 0.307046f, 0.420887f, 0.265263f, 0.434439f, 0.438830f, 0.574578f, 0.526353f, 0.517649f, +0.965408f, 1.008238f, 0.465341f, 1.120412f, 0.538689f, 0.682824f, 0.429358f, 0.534986f, 0.604765f, 0.712918f, 0.424209f, 0.734248f, 1.683607f, 1.895760f, 1.639612f, 1.704157f, +0.079810f, 0.094818f, 0.040729f, 0.120917f, 0.102048f, 0.147149f, 0.086113f, 0.132304f, 0.114337f, 0.153328f, 0.084910f, 0.181221f, 0.195408f, 0.250303f, 0.201476f, 0.258211f, +3.817794f, 4.782230f, 2.191652f, 4.005161f, 3.204754f, 4.872273f, 3.042109f, 2.877001f, 2.469094f, 3.491055f, 2.062666f, 2.709783f, 6.841082f, 9.239177f, 7.934572f, 6.259424f, +2.427481f, 3.045107f, 1.729698f, 2.665979f, 2.193408f, 3.339527f, 2.584371f, 2.061380f, 3.086019f, 4.369645f, 3.199964f, 3.545591f, 3.536378f, 4.782951f, 5.091113f, 3.387363f, +9.059423f, 9.773217f, 5.241244f, 8.537504f, 5.384347f, 7.049997f, 5.150957f, 4.342115f, 5.553247f, 6.762153f, 4.675338f, 5.474780f, 12.395600f, 14.417680f, 14.489090f, 10.188270f, +1.092810f, 1.341106f, 0.669358f, 1.344432f, 1.488323f, 2.216839f, 1.507411f, 1.566855f, 1.531952f, 2.122093f, 1.365498f, 1.971644f, 2.099261f, 2.777635f, 2.597883f, 2.252488f, +0.001096f, 0.001334f, 0.002251f, 0.006218f, 0.000708f, 0.001046f, 0.002404f, 0.003436f, 0.000016f, 0.000022f, 0.000047f, 0.000093f, 0.000772f, 0.001014f, 0.003206f, 0.003822f, +0.000926f, 0.001129f, 0.002361f, 0.005499f, 0.000644f, 0.000952f, 0.002713f, 0.003271f, 0.000026f, 0.000036f, 0.000097f, 0.000162f, 0.000530f, 0.000697f, 0.002733f, 0.002748f, +0.004984f, 0.005226f, 0.010318f, 0.025401f, 0.002278f, 0.002900f, 0.007799f, 0.009936f, 0.000068f, 0.000080f, 0.000204f, 0.000360f, 0.002682f, 0.003032f, 0.011218f, 0.011921f, +0.000030f, 0.000035f, 0.000065f, 0.000197f, 0.000031f, 0.000045f, 0.000113f, 0.000177f, 0.000001f, 0.000001f, 0.000003f, 0.000006f, 0.000022f, 0.000029f, 0.000099f, 0.000130f, +2.241948f, 2.946745f, 1.229492f, 2.927044f, 1.838249f, 2.932514f, 1.666960f, 2.053741f, 1.392470f, 2.065874f, 1.111266f, 1.901862f, 3.549385f, 5.029914f, 3.932717f, 4.041651f, +1.492153f, 1.964080f, 1.015708f, 2.039438f, 1.316962f, 2.103962f, 1.482346f, 1.540310f, 1.821761f, 2.706687f, 1.804591f, 2.604820f, 1.920576f, 2.725635f, 2.641353f, 2.289447f, +7.923192f, 8.968837f, 4.379000f, 9.292375f, 4.599696f, 6.319517f, 4.203634f, 4.616294f, 4.664250f, 5.959623f, 3.751360f, 5.722657f, 9.578170f, 11.689850f, 10.695400f, 9.797409f, +0.624818f, 0.804583f, 0.365601f, 0.956629f, 0.831193f, 1.299087f, 0.804225f, 1.089005f, 0.841180f, 1.222664f, 0.716268f, 1.347313f, 1.060450f, 1.472305f, 1.253673f, 1.416062f, +0.177101f, 0.221854f, 0.100444f, 0.209000f, 0.125047f, 0.190126f, 0.117273f, 0.126281f, 0.123318f, 0.174371f, 0.101780f, 0.152244f, 0.373380f, 0.504300f, 0.427853f, 0.384307f, +0.158584f, 0.198946f, 0.111639f, 0.195919f, 0.120530f, 0.183523f, 0.140305f, 0.127424f, 0.217061f, 0.307368f, 0.222368f, 0.280537f, 0.271819f, 0.367660f, 0.386615f, 0.292888f, +1.026053f, 1.106972f, 0.586473f, 1.087723f, 0.512949f, 0.671676f, 0.484812f, 0.465330f, 0.677168f, 0.824639f, 0.563257f, 0.750990f, 1.651791f, 1.921378f, 1.907539f, 1.527236f, +0.087166f, 0.106978f, 0.052748f, 0.120631f, 0.099855f, 0.148744f, 0.099920f, 0.118255f, 0.131561f, 0.182254f, 0.115856f, 0.190471f, 0.197009f, 0.260691f, 0.240871f, 0.237794f, +3.741116f, 4.840989f, 2.546702f, 3.585013f, 2.813592f, 4.418887f, 3.167080f, 2.307218f, 2.549044f, 3.723155f, 2.525145f, 2.555384f, 6.188271f, 8.633617f, 8.511103f, 5.172030f, +1.604190f, 2.078823f, 1.355464f, 1.609307f, 1.298665f, 2.042573f, 1.814473f, 1.114854f, 2.148570f, 3.142766f, 2.641885f, 2.254871f, 2.157320f, 3.014163f, 3.682871f, 1.887555f, +9.482695f, 10.567780f, 6.505543f, 8.162897f, 5.049426f, 6.829877f, 5.728155f, 3.719567f, 6.123915f, 7.703394f, 6.113826f, 5.514815f, 11.977190f, 14.391210f, 16.601460f, 8.992273f, +1.175455f, 1.490182f, 0.853763f, 1.320937f, 1.434287f, 2.206927f, 1.722616f, 1.379272f, 1.736030f, 2.484228f, 1.834936f, 2.040904f, 2.084411f, 2.849096f, 3.058824f, 2.042968f, +0.013173f, 0.016568f, 0.032089f, 0.068269f, 0.007620f, 0.011632f, 0.030693f, 0.033793f, 0.000198f, 0.000282f, 0.000704f, 0.001076f, 0.008570f, 0.011620f, 0.042175f, 0.038733f, +0.007505f, 0.009453f, 0.022693f, 0.040718f, 0.004673f, 0.007144f, 0.023365f, 0.021696f, 0.000222f, 0.000316f, 0.000978f, 0.001261f, 0.003969f, 0.005390f, 0.024248f, 0.018782f, +0.063989f, 0.069311f, 0.157087f, 0.297889f, 0.026208f, 0.034454f, 0.106385f, 0.104403f, 0.000914f, 0.001117f, 0.003264f, 0.004450f, 0.031785f, 0.037120f, 0.157650f, 0.129054f, +0.000391f, 0.000482f, 0.001017f, 0.002377f, 0.000367f, 0.000549f, 0.001578f, 0.001909f, 0.000013f, 0.000018f, 0.000048f, 0.000081f, 0.000273f, 0.000362f, 0.001433f, 0.001446f, +4.067536f, 5.522850f, 2.645143f, 4.850842f, 2.988049f, 4.924232f, 3.213117f, 3.049378f, 2.661599f, 4.079205f, 2.518794f, 3.320609f, 5.944492f, 8.702362f, 7.810380f, 6.183048f, +1.825705f, 2.482511f, 1.473682f, 2.279343f, 1.443671f, 2.382581f, 1.926914f, 1.542358f, 2.348332f, 3.604301f, 2.758448f, 3.067100f, 2.169225f, 3.180209f, 3.537670f, 2.362032f, +15.354960f, 17.955580f, 10.063320f, 16.449670f, 7.986476f, 11.335090f, 8.655035f, 7.321528f, 9.523162f, 12.569940f, 9.082511f, 10.672820f, 17.135110f, 21.603710f, 22.689190f, 16.010230f, +1.244319f, 1.655251f, 0.863384f, 1.740219f, 1.483058f, 2.394468f, 1.701576f, 1.774876f, 1.764893f, 2.650034f, 1.782062f, 2.582140f, 1.949505f, 2.796064f, 2.732979f, 2.377928f, +0.241019f, 0.267798f, 0.118584f, 0.319301f, 0.201864f, 0.272228f, 0.164231f, 0.228847f, 0.167938f, 0.210623f, 0.120242f, 0.232749f, 0.506368f, 0.606614f, 0.503363f, 0.585082f, +0.239678f, 0.266694f, 0.146372f, 0.332407f, 0.216081f, 0.291823f, 0.218207f, 0.256446f, 0.328280f, 0.412316f, 0.291748f, 0.476295f, 0.409387f, 0.491144f, 0.505131f, 0.495197f, +0.943189f, 0.902555f, 0.467680f, 1.122459f, 0.559314f, 0.649605f, 0.458593f, 0.569594f, 0.622900f, 0.672813f, 0.449470f, 0.775496f, 1.513102f, 1.561111f, 1.515858f, 1.570515f, +0.075914f, 0.082637f, 0.039852f, 0.117939f, 0.103157f, 0.136293f, 0.089546f, 0.137142f, 0.114655f, 0.140881f, 0.087590f, 0.186345f, 0.170980f, 0.200674f, 0.181349f, 0.231676f, +7.403310f, 8.497022f, 4.371947f, 7.964131f, 6.604479f, 9.200220f, 6.449239f, 6.079793f, 5.047719f, 6.539386f, 4.337867f, 5.680635f, 12.203320f, 15.101130f, 14.560180f, 11.449660f, +3.525489f, 4.052182f, 2.584185f, 3.970317f, 3.385426f, 4.722823f, 4.103350f, 3.262548f, 4.725047f, 6.130229f, 5.040140f, 5.566744f, 4.724568f, 5.854937f, 6.996906f, 4.640557f, +12.675190f, 12.528940f, 7.543589f, 12.248690f, 8.006030f, 9.604968f, 7.878834f, 6.620495f, 8.191150f, 9.139148f, 7.094156f, 8.280747f, 15.953700f, 17.002460f, 19.183360f, 13.446180f, +1.488583f, 1.673840f, 0.937943f, 1.877898f, 2.154545f, 2.940462f, 2.244813f, 2.325907f, 2.199974f, 2.792284f, 2.017219f, 2.903392f, 2.630477f, 3.189085f, 3.348710f, 2.894246f, +0.001260f, 0.001406f, 0.002663f, 0.007332f, 0.000865f, 0.001171f, 0.003022f, 0.004305f, 0.000019f, 0.000024f, 0.000058f, 0.000116f, 0.000817f, 0.000983f, 0.003488f, 0.004145f, +0.000797f, 0.000891f, 0.002092f, 0.004856f, 0.000589f, 0.000799f, 0.002554f, 0.003069f, 0.000024f, 0.000030f, 0.000090f, 0.000151f, 0.000420f, 0.000506f, 0.002227f, 0.002232f, +0.004135f, 0.003973f, 0.008806f, 0.021610f, 0.002009f, 0.002342f, 0.007074f, 0.008984f, 0.000059f, 0.000064f, 0.000183f, 0.000323f, 0.002047f, 0.002120f, 0.008807f, 0.009329f, +0.000024f, 0.000026f, 0.000054f, 0.000163f, 0.000027f, 0.000035f, 0.000099f, 0.000156f, 0.000001f, 0.000001f, 0.000003f, 0.000006f, 0.000017f, 0.000020f, 0.000076f, 0.000099f, +3.455431f, 4.161424f, 1.949360f, 4.626055f, 3.011003f, 4.401187f, 2.808808f, 3.449512f, 2.262594f, 3.075724f, 1.857499f, 3.168871f, 5.032335f, 6.534312f, 5.735866f, 5.875980f, +1.722425f, 2.077345f, 1.206106f, 2.414027f, 1.615587f, 2.364927f, 1.870668f, 1.937627f, 2.216982f, 3.018086f, 2.259121f, 3.250525f, 2.039380f, 2.651900f, 2.885246f, 2.492885f, +8.810837f, 9.138522f, 5.009354f, 10.596160f, 5.435962f, 6.843119f, 5.110484f, 5.594300f, 5.468181f, 6.401808f, 4.524178f, 6.879608f, 9.798044f, 10.956930f, 11.254950f, 10.277160f, +0.676464f, 0.798150f, 0.407183f, 1.062037f, 0.956365f, 1.369565f, 0.951895f, 1.284863f, 0.960118f, 1.278691f, 0.841010f, 1.576917f, 1.056140f, 1.343544f, 1.284415f, 1.446167f, +0.228433f, 0.284531f, 0.102080f, 0.339311f, 0.142050f, 0.214750f, 0.104965f, 0.180559f, 0.193486f, 0.272033f, 0.125825f, 0.300661f, 0.529028f, 0.710461f, 0.477641f, 0.685360f, +0.185240f, 0.231065f, 0.102748f, 0.288048f, 0.123993f, 0.187723f, 0.113725f, 0.164994f, 0.308420f, 0.434253f, 0.248950f, 0.501722f, 0.348774f, 0.469066f, 0.390861f, 0.473018f, +1.229731f, 1.319169f, 0.553819f, 1.640859f, 0.541431f, 0.704941f, 0.403202f, 0.618219f, 0.987239f, 1.195401f, 0.647012f, 1.378074f, 2.174626f, 2.515159f, 1.978710f, 2.530742f, +0.100062f, 0.122107f, 0.047710f, 0.174299f, 0.100954f, 0.149525f, 0.079594f, 0.150482f, 0.183711f, 0.253051f, 0.127469f, 0.334772f, 0.248427f, 0.326859f, 0.239318f, 0.377420f, +3.591150f, 4.620509f, 1.926149f, 4.331472f, 2.378605f, 3.714476f, 2.109598f, 2.455060f, 2.976427f, 4.322674f, 2.323185f, 3.755662f, 6.525153f, 9.051854f, 7.071096f, 6.864277f, +1.394519f, 1.796840f, 0.928402f, 1.760840f, 0.994248f, 1.554886f, 1.094529f, 1.074305f, 2.271976f, 3.304378f, 2.201140f, 3.001154f, 2.060024f, 2.861856f, 2.770917f, 2.268661f, +8.457960f, 9.372188f, 4.571901f, 9.164115f, 3.966475f, 5.334564f, 3.545332f, 3.677625f, 6.644290f, 8.310457f, 5.226512f, 7.531175f, 11.734850f, 14.019860f, 12.815880f, 11.089310f, +1.004205f, 1.265840f, 0.574689f, 1.420400f, 1.079148f, 1.651036f, 1.021205f, 1.306193f, 1.804094f, 2.566945f, 1.502458f, 2.669542f, 1.956089f, 2.658494f, 2.261722f, 2.413121f, +0.003991f, 0.004991f, 0.007659f, 0.026031f, 0.002033f, 0.003086f, 0.006452f, 0.011348f, 0.000073f, 0.000103f, 0.000204f, 0.000499f, 0.002852f, 0.003845f, 0.011058f, 0.016223f, +0.002059f, 0.002579f, 0.004905f, 0.014060f, 0.001129f, 0.001716f, 0.004448f, 0.006598f, 0.000074f, 0.000105f, 0.000257f, 0.000530f, 0.001196f, 0.001615f, 0.005757f, 0.007124f, +0.018012f, 0.019399f, 0.034839f, 0.105541f, 0.006497f, 0.008493f, 0.020780f, 0.032577f, 0.000313f, 0.000380f, 0.000881f, 0.001918f, 0.009828f, 0.011412f, 0.038407f, 0.050226f, +0.000105f, 0.000129f, 0.000216f, 0.000807f, 0.000087f, 0.000130f, 0.000295f, 0.000571f, 0.000004f, 0.000006f, 0.000012f, 0.000034f, 0.000081f, 0.000107f, 0.000334f, 0.000539f, +3.233650f, 4.365643f, 1.656877f, 4.853903f, 2.092079f, 3.428091f, 1.772542f, 2.687287f, 2.573889f, 3.922348f, 1.919196f, 4.041823f, 5.191171f, 7.556333f, 5.374056f, 6.796191f, +1.314403f, 1.777102f, 0.835951f, 2.065475f, 0.915366f, 1.502096f, 0.962650f, 1.230904f, 2.056567f, 3.138541f, 1.903389f, 3.380832f, 1.715502f, 2.500725f, 2.204366f, 2.351175f, +11.342570f, 13.188210f, 5.857114f, 15.294410f, 5.195736f, 7.332300f, 4.436497f, 5.995231f, 8.557162f, 11.230650f, 6.430341f, 12.070910f, 13.903970f, 17.430250f, 14.506100f, 16.351670f, +0.880394f, 1.164482f, 0.481314f, 1.549750f, 0.924129f, 1.483566f, 0.835421f, 1.392050f, 1.518970f, 2.267805f, 1.208463f, 2.797197f, 1.515161f, 2.160751f, 1.673594f, 2.326191f, +0.021046f, 0.025521f, 0.010066f, 0.027190f, 0.018000f, 0.026492f, 0.014236f, 0.019900f, 0.014587f, 0.019967f, 0.010153f, 0.019715f, 0.044225f, 0.057821f, 0.042736f, 0.049832f, +0.031997f, 0.038857f, 0.018995f, 0.043276f, 0.029457f, 0.043418f, 0.028917f, 0.034093f, 0.043594f, 0.059757f, 0.037662f, 0.061682f, 0.054663f, 0.071572f, 0.065565f, 0.064481f, +0.140039f, 0.146252f, 0.067501f, 0.162524f, 0.084800f, 0.107490f, 0.067589f, 0.084217f, 0.091997f, 0.108450f, 0.064531f, 0.111694f, 0.224697f, 0.253011f, 0.218825f, 0.227439f, +0.016690f, 0.019829f, 0.008517f, 0.025287f, 0.023160f, 0.033395f, 0.019543f, 0.030026f, 0.025075f, 0.033626f, 0.018622f, 0.039743f, 0.037598f, 0.048160f, 0.038766f, 0.049682f, +0.439727f, 0.550810f, 0.252431f, 0.461308f, 0.400576f, 0.609006f, 0.380246f, 0.359609f, 0.298235f, 0.421674f, 0.249143f, 0.327307f, 0.724959f, 0.979089f, 0.840838f, 0.663320f, +0.320138f, 0.401591f, 0.228114f, 0.351591f, 0.313920f, 0.477953f, 0.369875f, 0.295024f, 0.426804f, 0.604333f, 0.442563f, 0.490364f, 0.429099f, 0.580356f, 0.617748f, 0.411017f, +1.280099f, 1.380958f, 0.740589f, 1.206351f, 0.825648f, 1.081062f, 0.789859f, 0.665830f, 0.822885f, 1.002021f, 0.692795f, 0.811257f, 1.611492f, 1.874371f, 1.883656f, 1.324526f, +0.222616f, 0.273196f, 0.136354f, 0.273873f, 0.329024f, 0.490077f, 0.333244f, 0.346385f, 0.327269f, 0.453341f, 0.291710f, 0.421200f, 0.393455f, 0.520599f, 0.486909f, 0.422174f, +0.000023f, 0.000029f, 0.000048f, 0.000133f, 0.000016f, 0.000024f, 0.000056f, 0.000080f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000015f, 0.000020f, 0.000063f, 0.000075f, +0.000023f, 0.000028f, 0.000058f, 0.000135f, 0.000017f, 0.000025f, 0.000072f, 0.000087f, 0.000001f, 0.000001f, 0.000002f, 0.000004f, 0.000012f, 0.000016f, 0.000062f, 0.000062f, +0.000131f, 0.000137f, 0.000271f, 0.000666f, 0.000065f, 0.000083f, 0.000222f, 0.000283f, 0.000002f, 0.000002f, 0.000006f, 0.000010f, 0.000065f, 0.000073f, 0.000271f, 0.000288f, +0.000001f, 0.000001f, 0.000002f, 0.000007f, 0.000001f, 0.000002f, 0.000005f, 0.000007f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000003f, 0.000005f, +0.241574f, 0.317516f, 0.132480f, 0.315394f, 0.214955f, 0.342912f, 0.194925f, 0.240153f, 0.157347f, 0.233441f, 0.125572f, 0.214908f, 0.351880f, 0.498657f, 0.389883f, 0.400682f, +0.184097f, 0.242322f, 0.125315f, 0.251619f, 0.176330f, 0.281702f, 0.198473f, 0.206234f, 0.235708f, 0.350204f, 0.233486f, 0.337024f, 0.218013f, 0.309399f, 0.299832f, 0.259885f, +1.047359f, 1.185582f, 0.578856f, 1.228350f, 0.659847f, 0.906564f, 0.603031f, 0.662229f, 0.646586f, 0.826159f, 0.520036f, 0.793309f, 1.164919f, 1.421746f, 1.300798f, 1.191583f, +0.119074f, 0.153333f, 0.069674f, 0.182309f, 0.171903f, 0.268671f, 0.166326f, 0.225223f, 0.168113f, 0.244354f, 0.143149f, 0.269266f, 0.185939f, 0.258154f, 0.219819f, 0.248292f, +0.010686f, 0.013386f, 0.006060f, 0.012610f, 0.008188f, 0.012449f, 0.007679f, 0.008269f, 0.007803f, 0.011033f, 0.006440f, 0.009633f, 0.020728f, 0.027996f, 0.023752f, 0.021334f, +0.010956f, 0.013744f, 0.007713f, 0.013535f, 0.009037f, 0.013759f, 0.010519f, 0.009554f, 0.015726f, 0.022269f, 0.016111f, 0.020325f, 0.017278f, 0.023370f, 0.024575f, 0.018617f, +0.075950f, 0.081939f, 0.043411f, 0.080515f, 0.041205f, 0.053955f, 0.038945f, 0.037380f, 0.052566f, 0.064013f, 0.043723f, 0.058296f, 0.112494f, 0.130854f, 0.129911f, 0.104011f, +0.009302f, 0.011416f, 0.005629f, 0.012873f, 0.011564f, 0.017226f, 0.011572f, 0.013695f, 0.014723f, 0.020396f, 0.012966f, 0.021316f, 0.019343f, 0.025596f, 0.023650f, 0.023348f, +0.219882f, 0.284526f, 0.149681f, 0.210707f, 0.179460f, 0.281851f, 0.202007f, 0.147162f, 0.157114f, 0.229482f, 0.155641f, 0.157505f, 0.334638f, 0.466873f, 0.460248f, 0.279683f, +0.107958f, 0.139899f, 0.091219f, 0.108302f, 0.094845f, 0.149174f, 0.132516f, 0.081421f, 0.151634f, 0.221799f, 0.186449f, 0.159136f, 0.133576f, 0.186630f, 0.228035f, 0.116873f, +0.683741f, 0.761980f, 0.469076f, 0.588578f, 0.395112f, 0.534431f, 0.448222f, 0.291052f, 0.463061f, 0.582493f, 0.462298f, 0.417004f, 0.794570f, 0.954717f, 1.101345f, 0.596550f, +0.122190f, 0.154906f, 0.088749f, 0.137313f, 0.161802f, 0.248963f, 0.194328f, 0.155595f, 0.189249f, 0.270813f, 0.200031f, 0.222485f, 0.199356f, 0.272491f, 0.292550f, 0.195392f, +0.000144f, 0.000181f, 0.000350f, 0.000745f, 0.000090f, 0.000138f, 0.000363f, 0.000400f, 0.000002f, 0.000003f, 0.000008f, 0.000012f, 0.000086f, 0.000117f, 0.000423f, 0.000389f, +0.000094f, 0.000118f, 0.000283f, 0.000509f, 0.000063f, 0.000097f, 0.000317f, 0.000294f, 0.000003f, 0.000004f, 0.000013f, 0.000017f, 0.000046f, 0.000062f, 0.000279f, 0.000216f, +0.000856f, 0.000927f, 0.002102f, 0.003986f, 0.000381f, 0.000500f, 0.001545f, 0.001516f, 0.000013f, 0.000016f, 0.000046f, 0.000062f, 0.000391f, 0.000457f, 0.001941f, 0.001589f, +0.000008f, 0.000009f, 0.000020f, 0.000046f, 0.000008f, 0.000011f, 0.000033f, 0.000040f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000005f, 0.000006f, 0.000025f, 0.000026f, +0.223652f, 0.303672f, 0.145442f, 0.266722f, 0.178298f, 0.293831f, 0.191728f, 0.181958f, 0.153473f, 0.235215f, 0.145239f, 0.191473f, 0.300727f, 0.440246f, 0.395121f, 0.312796f, +0.114943f, 0.156294f, 0.092780f, 0.143503f, 0.098637f, 0.162786f, 0.131653f, 0.105379f, 0.155046f, 0.237969f, 0.182123f, 0.202501f, 0.125653f, 0.184215f, 0.204921f, 0.136821f, +1.035765f, 1.211189f, 0.678819f, 1.109609f, 0.584638f, 0.829767f, 0.633578f, 0.535961f, 0.673663f, 0.889190f, 0.642492f, 0.754989f, 1.063449f, 1.340782f, 1.408151f, 0.993637f, +0.121008f, 0.160970f, 0.083962f, 0.169233f, 0.156516f, 0.252702f, 0.179577f, 0.187313f, 0.179990f, 0.270260f, 0.181741f, 0.263336f, 0.174431f, 0.250176f, 0.244531f, 0.212763f, +0.004201f, 0.004668f, 0.002067f, 0.005566f, 0.003818f, 0.005150f, 0.003107f, 0.004329f, 0.003070f, 0.003850f, 0.002198f, 0.004255f, 0.008121f, 0.009728f, 0.008073f, 0.009383f, +0.004784f, 0.005323f, 0.002921f, 0.006634f, 0.004680f, 0.006321f, 0.004726f, 0.005554f, 0.006871f, 0.008630f, 0.006106f, 0.009969f, 0.007518f, 0.009019f, 0.009276f, 0.009093f, +0.020169f, 0.019300f, 0.010001f, 0.024003f, 0.012980f, 0.015075f, 0.010642f, 0.013218f, 0.013969f, 0.015088f, 0.010079f, 0.017391f, 0.029770f, 0.030714f, 0.029824f, 0.030899f, +0.002340f, 0.002548f, 0.001229f, 0.003636f, 0.003451f, 0.004560f, 0.002996f, 0.004588f, 0.003707f, 0.004555f, 0.002832f, 0.006025f, 0.004850f, 0.005692f, 0.005144f, 0.006571f, +0.125703f, 0.144273f, 0.074232f, 0.135225f, 0.121696f, 0.169526f, 0.118835f, 0.112028f, 0.089880f, 0.116441f, 0.077240f, 0.101150f, 0.190640f, 0.235910f, 0.227459f, 0.178867f, +0.068541f, 0.078780f, 0.050240f, 0.077189f, 0.071427f, 0.099644f, 0.086574f, 0.068834f, 0.096335f, 0.124984f, 0.102759f, 0.113496f, 0.084510f, 0.104729f, 0.125156f, 0.083007f, +0.264025f, 0.260979f, 0.157134f, 0.255141f, 0.180978f, 0.217123f, 0.178103f, 0.149658f, 0.178931f, 0.199639f, 0.154968f, 0.180888f, 0.305752f, 0.325852f, 0.367649f, 0.257696f, +0.044703f, 0.050266f, 0.028167f, 0.056394f, 0.070216f, 0.095828f, 0.073157f, 0.075800f, 0.069283f, 0.087936f, 0.063527f, 0.091435f, 0.072679f, 0.088113f, 0.092524f, 0.079967f, +0.000004f, 0.000004f, 0.000008f, 0.000023f, 0.000003f, 0.000004f, 0.000010f, 0.000015f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000002f, 0.000003f, 0.000010f, 0.000012f, +0.000003f, 0.000003f, 0.000008f, 0.000018f, 0.000002f, 0.000003f, 0.000010f, 0.000012f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000002f, 0.000007f, 0.000007f, +0.000016f, 0.000015f, 0.000034f, 0.000084f, 0.000008f, 0.000010f, 0.000030f, 0.000038f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000007f, 0.000008f, 0.000031f, 0.000033f, +0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, +0.054888f, 0.066102f, 0.030964f, 0.073482f, 0.051904f, 0.075868f, 0.048419f, 0.059463f, 0.037690f, 0.051235f, 0.030942f, 0.052787f, 0.073546f, 0.095497f, 0.083828f, 0.085876f, +0.031327f, 0.037782f, 0.021936f, 0.043906f, 0.031888f, 0.046679f, 0.036923f, 0.038245f, 0.042286f, 0.057565f, 0.043089f, 0.061999f, 0.034127f, 0.044377f, 0.048282f, 0.041716f, +0.171696f, 0.178082f, 0.097617f, 0.206487f, 0.114958f, 0.144716f, 0.108075f, 0.118306f, 0.111747f, 0.130826f, 0.092455f, 0.140591f, 0.175671f, 0.196449f, 0.201792f, 0.184261f, +0.019005f, 0.022423f, 0.011439f, 0.029837f, 0.029158f, 0.041755f, 0.029021f, 0.039173f, 0.028287f, 0.037673f, 0.024778f, 0.046459f, 0.027299f, 0.034728f, 0.033200f, 0.037381f, +0.013775f, 0.017158f, 0.006156f, 0.020461f, 0.009296f, 0.014054f, 0.006869f, 0.011816f, 0.012236f, 0.017203f, 0.007957f, 0.019013f, 0.029352f, 0.039418f, 0.026500f, 0.038025f, +0.012790f, 0.015954f, 0.007094f, 0.019889f, 0.009291f, 0.014066f, 0.008522f, 0.012363f, 0.022332f, 0.031444f, 0.018026f, 0.036329f, 0.022157f, 0.029799f, 0.024830f, 0.030050f, +0.090974f, 0.097590f, 0.040971f, 0.121388f, 0.043468f, 0.056595f, 0.032370f, 0.049633f, 0.076591f, 0.092740f, 0.050196f, 0.106912f, 0.148016f, 0.171194f, 0.134681f, 0.172255f, +0.010672f, 0.013023f, 0.005088f, 0.018590f, 0.011685f, 0.017306f, 0.009212f, 0.017417f, 0.020547f, 0.028303f, 0.014257f, 0.037443f, 0.024378f, 0.032074f, 0.023484f, 0.037035f, +0.210946f, 0.271411f, 0.113143f, 0.254433f, 0.151628f, 0.236785f, 0.134480f, 0.156502f, 0.183351f, 0.266281f, 0.143110f, 0.231352f, 0.352652f, 0.489208f, 0.382157f, 0.370980f, +0.093793f, 0.120853f, 0.062443f, 0.118432f, 0.072571f, 0.113492f, 0.079890f, 0.078414f, 0.160251f, 0.233070f, 0.155255f, 0.211683f, 0.127479f, 0.177098f, 0.171470f, 0.140390f, +0.609503f, 0.675384f, 0.329463f, 0.660390f, 0.310194f, 0.417184f, 0.277259f, 0.287605f, 0.502120f, 0.628035f, 0.394976f, 0.569144f, 0.778046f, 0.929546f, 0.849720f, 0.735245f, +0.104328f, 0.131509f, 0.059705f, 0.147567f, 0.121668f, 0.186146f, 0.115136f, 0.147267f, 0.196556f, 0.279669f, 0.163693f, 0.290847f, 0.186975f, 0.254116f, 0.216190f, 0.230661f, +0.000044f, 0.000054f, 0.000083f, 0.000284f, 0.000024f, 0.000037f, 0.000076f, 0.000134f, 0.000001f, 0.000001f, 0.000002f, 0.000006f, 0.000029f, 0.000039f, 0.000111f, 0.000163f, +0.000026f, 0.000032f, 0.000061f, 0.000175f, 0.000015f, 0.000023f, 0.000060f, 0.000089f, 0.000001f, 0.000001f, 0.000003f, 0.000007f, 0.000014f, 0.000019f, 0.000066f, 0.000082f, +0.000241f, 0.000259f, 0.000466f, 0.001411f, 0.000094f, 0.000123f, 0.000302f, 0.000473f, 0.000004f, 0.000005f, 0.000012f, 0.000027f, 0.000121f, 0.000140f, 0.000473f, 0.000618f, +0.000002f, 0.000002f, 0.000004f, 0.000016f, 0.000002f, 0.000003f, 0.000006f, 0.000012f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000002f, 0.000006f, 0.000010f, +0.177699f, 0.239905f, 0.091050f, 0.266736f, 0.124764f, 0.204438f, 0.105708f, 0.160259f, 0.148330f, 0.226040f, 0.110601f, 0.232926f, 0.262466f, 0.382049f, 0.271713f, 0.343616f, +0.082704f, 0.111818f, 0.052600f, 0.129963f, 0.062505f, 0.102569f, 0.065734f, 0.084051f, 0.135704f, 0.207099f, 0.125596f, 0.223086f, 0.099314f, 0.144772f, 0.127615f, 0.136114f, +0.764670f, 0.889096f, 0.394863f, 1.031087f, 0.380127f, 0.536441f, 0.324580f, 0.438619f, 0.604981f, 0.793993f, 0.454617f, 0.853398f, 0.862420f, 1.081145f, 0.899768f, 1.014243f, +0.085567f, 0.113178f, 0.046780f, 0.150623f, 0.097473f, 0.156479f, 0.088116f, 0.146826f, 0.154821f, 0.231146f, 0.123172f, 0.285104f, 0.135490f, 0.193220f, 0.149657f, 0.208015f, +0.031166f, 0.037793f, 0.014906f, 0.040264f, 0.032989f, 0.048553f, 0.026090f, 0.036471f, 0.026709f, 0.036559f, 0.018590f, 0.036098f, 0.073100f, 0.095574f, 0.070639f, 0.082369f, +0.040040f, 0.048624f, 0.023770f, 0.054154f, 0.045620f, 0.067242f, 0.044784f, 0.052800f, 0.067450f, 0.092459f, 0.058272f, 0.095436f, 0.076352f, 0.099970f, 0.091580f, 0.090066f, +0.155911f, 0.162827f, 0.075151f, 0.180943f, 0.116846f, 0.148110f, 0.093131f, 0.116043f, 0.126641f, 0.149289f, 0.088831f, 0.153755f, 0.279235f, 0.314422f, 0.271938f, 0.282643f, +0.020011f, 0.023774f, 0.010212f, 0.030318f, 0.034365f, 0.049553f, 0.028999f, 0.044554f, 0.037172f, 0.049848f, 0.027605f, 0.058916f, 0.050317f, 0.064452f, 0.051879f, 0.066488f, +0.496452f, 0.621864f, 0.284994f, 0.520817f, 0.559720f, 0.850957f, 0.531313f, 0.502477f, 0.416318f, 0.588633f, 0.347790f, 0.456901f, 0.913596f, 1.233851f, 1.059627f, 0.835918f, +0.305425f, 0.383135f, 0.217630f, 0.335433f, 0.370663f, 0.564346f, 0.436732f, 0.348352f, 0.503467f, 0.712883f, 0.522056f, 0.578443f, 0.456954f, 0.618030f, 0.657850f, 0.437699f, +1.086565f, 1.172176f, 0.628622f, 1.023968f, 0.867360f, 1.135678f, 0.829764f, 0.699468f, 0.863625f, 1.051631f, 0.727095f, 0.851422f, 1.526819f, 1.775886f, 1.784683f, 1.254932f, +0.203489f, 0.249724f, 0.124639f, 0.250343f, 0.372225f, 0.554424f, 0.376999f, 0.391865f, 0.369884f, 0.512371f, 0.329694f, 0.476045f, 0.401447f, 0.531174f, 0.496799f, 0.430749f, +0.000190f, 0.000232f, 0.000391f, 0.001079f, 0.000165f, 0.000244f, 0.000560f, 0.000801f, 0.000004f, 0.000005f, 0.000011f, 0.000021f, 0.000138f, 0.000181f, 0.000571f, 0.000681f, +0.000156f, 0.000190f, 0.000397f, 0.000924f, 0.000145f, 0.000215f, 0.000612f, 0.000738f, 0.000006f, 0.000008f, 0.000021f, 0.000035f, 0.000092f, 0.000120f, 0.000471f, 0.000474f, +0.000798f, 0.000837f, 0.001652f, 0.004067f, 0.000490f, 0.000623f, 0.001677f, 0.002137f, 0.000014f, 0.000017f, 0.000042f, 0.000075f, 0.000441f, 0.000499f, 0.001844f, 0.001960f, +0.000007f, 0.000009f, 0.000016f, 0.000049f, 0.000010f, 0.000015f, 0.000038f, 0.000059f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000006f, 0.000007f, 0.000025f, 0.000033f, +0.380443f, 0.500042f, 0.208636f, 0.496699f, 0.418966f, 0.668367f, 0.379927f, 0.468081f, 0.306389f, 0.454559f, 0.244515f, 0.418471f, 0.618559f, 0.876575f, 0.685363f, 0.704348f, +0.244998f, 0.322483f, 0.166770f, 0.334856f, 0.290424f, 0.463978f, 0.326896f, 0.339678f, 0.387849f, 0.576248f, 0.384194f, 0.554560f, 0.323850f, 0.459601f, 0.445389f, 0.386050f, +1.240093f, 1.403752f, 0.685376f, 1.454390f, 0.966928f, 1.328462f, 0.883670f, 0.970418f, 0.946583f, 1.209472f, 0.761317f, 1.161381f, 1.539576f, 1.879004f, 1.719157f, 1.574816f, +0.151827f, 0.195509f, 0.088839f, 0.232455f, 0.271274f, 0.423980f, 0.262473f, 0.355416f, 0.265038f, 0.385235f, 0.225681f, 0.424509f, 0.264637f, 0.367416f, 0.312856f, 0.353380f, +0.019566f, 0.024511f, 0.011097f, 0.023090f, 0.018555f, 0.028212f, 0.017402f, 0.018739f, 0.017666f, 0.024979f, 0.014580f, 0.021810f, 0.042364f, 0.057219f, 0.048545f, 0.043604f, +0.016952f, 0.021267f, 0.011934f, 0.020943f, 0.017305f, 0.026349f, 0.020144f, 0.018295f, 0.030087f, 0.042604f, 0.030822f, 0.038885f, 0.029841f, 0.040363f, 0.042444f, 0.032154f, +0.104555f, 0.112801f, 0.059762f, 0.110840f, 0.070204f, 0.091928f, 0.066353f, 0.063687f, 0.089474f, 0.108959f, 0.074423f, 0.099228f, 0.172861f, 0.201073f, 0.199625f, 0.159826f, +0.013790f, 0.016924f, 0.008345f, 0.019084f, 0.021218f, 0.031606f, 0.021231f, 0.025128f, 0.026988f, 0.037387f, 0.023766f, 0.039072f, 0.032009f, 0.042355f, 0.039135f, 0.038635f, +0.306958f, 0.397202f, 0.208956f, 0.294150f, 0.310063f, 0.486969f, 0.349018f, 0.254259f, 0.271192f, 0.396106f, 0.268650f, 0.271867f, 0.521448f, 0.727503f, 0.717179f, 0.435816f, +0.127356f, 0.165036f, 0.107609f, 0.127762f, 0.138475f, 0.217796f, 0.193474f, 0.118875f, 0.221174f, 0.323517f, 0.271956f, 0.232117f, 0.175890f, 0.245750f, 0.300271f, 0.153896f, +0.717628f, 0.799745f, 0.492324f, 0.617749f, 0.513240f, 0.694211f, 0.582229f, 0.378069f, 0.600924f, 0.755915f, 0.599934f, 0.541155f, 0.930866f, 1.118484f, 1.290264f, 0.698879f, +0.138107f, 0.175085f, 0.100311f, 0.155200f, 0.226337f, 0.348264f, 0.271837f, 0.217656f, 0.264478f, 0.378463f, 0.279546f, 0.310924f, 0.251511f, 0.343780f, 0.369087f, 0.246510f, +0.001443f, 0.001815f, 0.003515f, 0.007477f, 0.001121f, 0.001711f, 0.004515f, 0.004971f, 0.000028f, 0.000040f, 0.000100f, 0.000153f, 0.000964f, 0.001307f, 0.004744f, 0.004357f, +0.000795f, 0.001002f, 0.002405f, 0.004315f, 0.000665f, 0.001017f, 0.003326f, 0.003088f, 0.000031f, 0.000043f, 0.000134f, 0.000173f, 0.000432f, 0.000587f, 0.002639f, 0.002044f, +0.006464f, 0.007002f, 0.015869f, 0.030093f, 0.003556f, 0.004675f, 0.014434f, 0.014165f, 0.000120f, 0.000146f, 0.000428f, 0.000583f, 0.003298f, 0.003851f, 0.016355f, 0.013389f, +0.000061f, 0.000076f, 0.000159f, 0.000373f, 0.000077f, 0.000116f, 0.000332f, 0.000402f, 0.000003f, 0.000004f, 0.000010f, 0.000017f, 0.000044f, 0.000058f, 0.000231f, 0.000233f, +0.435520f, 0.591343f, 0.283221f, 0.519390f, 0.429710f, 0.708151f, 0.462077f, 0.438529f, 0.369523f, 0.566337f, 0.349697f, 0.461017f, 0.653665f, 0.956925f, 0.858841f, 0.679897f, +0.189143f, 0.257189f, 0.152674f, 0.236141f, 0.200882f, 0.331527f, 0.268123f, 0.214614f, 0.315459f, 0.484178f, 0.370551f, 0.412013f, 0.230797f, 0.338361f, 0.376394f, 0.251311f, +1.516405f, 1.773234f, 0.993820f, 1.624515f, 1.059334f, 1.503497f, 1.148012f, 0.971135f, 1.219469f, 1.609617f, 1.163042f, 1.366686f, 1.737873f, 2.191086f, 2.301178f, 1.623786f, +0.190783f, 0.253788f, 0.132377f, 0.266816f, 0.305406f, 0.493092f, 0.350405f, 0.365500f, 0.350872f, 0.526845f, 0.354286f, 0.513347f, 0.306971f, 0.440271f, 0.430337f, 0.374431f, +0.054962f, 0.061068f, 0.027042f, 0.072813f, 0.061827f, 0.083378f, 0.050301f, 0.070091f, 0.049657f, 0.062278f, 0.035554f, 0.068821f, 0.118587f, 0.142064f, 0.117884f, 0.137022f, +0.052884f, 0.058844f, 0.032296f, 0.073344f, 0.064035f, 0.086482f, 0.064665f, 0.075998f, 0.093920f, 0.117963f, 0.083469f, 0.136267f, 0.092766f, 0.111293f, 0.114462f, 0.112211f, +0.198380f, 0.189833f, 0.098367f, 0.236085f, 0.158003f, 0.183510f, 0.129550f, 0.160907f, 0.169879f, 0.183491f, 0.122581f, 0.211495f, 0.326837f, 0.337207f, 0.327432f, 0.339238f, +0.024789f, 0.026985f, 0.013013f, 0.038512f, 0.045243f, 0.059776f, 0.039274f, 0.060148f, 0.048546f, 0.059650f, 0.037087f, 0.078901f, 0.057339f, 0.067297f, 0.060816f, 0.077694f, +1.253790f, 1.439016f, 0.740412f, 1.348768f, 1.502272f, 2.092706f, 1.466961f, 1.382926f, 1.108452f, 1.436014f, 0.952573f, 1.247437f, 2.122470f, 2.626473f, 2.532387f, 1.991389f, +0.577700f, 0.664006f, 0.423455f, 0.650592f, 0.745088f, 1.039432f, 0.903094f, 0.718045f, 1.003951f, 1.302516f, 1.070900f, 1.182790f, 0.795079f, 0.985304f, 1.177481f, 0.780941f, +1.979902f, 1.957057f, 1.178331f, 1.913282f, 1.679646f, 2.015099f, 1.652960f, 1.388964f, 1.659041f, 1.851049f, 1.436855f, 1.677188f, 2.559265f, 2.727505f, 3.077362f, 2.157013f, +0.360997f, 0.405924f, 0.227461f, 0.455410f, 0.701775f, 0.957762f, 0.731176f, 0.757590f, 0.691785f, 0.878038f, 0.634318f, 0.912976f, 0.655134f, 0.794258f, 0.834013f, 0.720827f, +0.000285f, 0.000318f, 0.000602f, 0.001657f, 0.000263f, 0.000355f, 0.000917f, 0.001307f, 0.000006f, 0.000007f, 0.000017f, 0.000034f, 0.000190f, 0.000228f, 0.000810f, 0.000962f, +0.000174f, 0.000195f, 0.000457f, 0.001062f, 0.000173f, 0.000235f, 0.000750f, 0.000902f, 0.000007f, 0.000008f, 0.000026f, 0.000043f, 0.000094f, 0.000114f, 0.000500f, 0.000501f, +0.000862f, 0.000828f, 0.001836f, 0.004506f, 0.000563f, 0.000656f, 0.001981f, 0.002516f, 0.000016f, 0.000017f, 0.000050f, 0.000087f, 0.000438f, 0.000454f, 0.001886f, 0.001998f, +0.000008f, 0.000008f, 0.000017f, 0.000053f, 0.000012f, 0.000015f, 0.000043f, 0.000068f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000006f, 0.000007f, 0.000025f, 0.000033f, +0.763660f, 0.919686f, 0.430814f, 1.022371f, 0.893758f, 1.306407f, 0.833740f, 1.023921f, 0.648377f, 0.881390f, 0.532291f, 0.908082f, 1.142173f, 1.483072f, 1.301851f, 1.333652f, +0.368318f, 0.444213f, 0.257910f, 0.516208f, 0.464006f, 0.679221f, 0.537267f, 0.556498f, 0.614706f, 0.836829f, 0.626390f, 0.901278f, 0.447863f, 0.582377f, 0.633621f, 0.547456f, +1.795996f, 1.862791f, 1.021104f, 2.159916f, 1.488249f, 1.873498f, 1.399140f, 1.531599f, 1.445286f, 1.692052f, 1.195778f, 1.818339f, 2.051125f, 2.293725f, 2.356114f, 2.151423f, +0.214079f, 0.252589f, 0.128860f, 0.336101f, 0.406504f, 0.582135f, 0.404604f, 0.546132f, 0.393983f, 0.524709f, 0.345107f, 0.647085f, 0.343254f, 0.436663f, 0.417445f, 0.470016f, +0.031075f, 0.038706f, 0.013886f, 0.046158f, 0.025954f, 0.039237f, 0.019178f, 0.032990f, 0.034129f, 0.047984f, 0.022194f, 0.053033f, 0.073908f, 0.099256f, 0.066729f, 0.095749f, +0.024382f, 0.030414f, 0.013524f, 0.037914f, 0.021920f, 0.033187f, 0.020105f, 0.029168f, 0.052638f, 0.074114f, 0.042488f, 0.085629f, 0.047146f, 0.063406f, 0.052835f, 0.063941f, +0.154295f, 0.165516f, 0.069488f, 0.205879f, 0.091242f, 0.118797f, 0.067948f, 0.104182f, 0.160615f, 0.194481f, 0.105263f, 0.224200f, 0.280214f, 0.324094f, 0.254969f, 0.326102f, +0.019492f, 0.023786f, 0.009294f, 0.033953f, 0.026413f, 0.039121f, 0.020825f, 0.039371f, 0.046402f, 0.063917f, 0.032197f, 0.084558f, 0.049699f, 0.065389f, 0.047877f, 0.075504f, +0.362806f, 0.466800f, 0.194595f, 0.437599f, 0.322756f, 0.504022f, 0.286254f, 0.333130f, 0.389905f, 0.566261f, 0.304332f, 0.491983f, 0.677012f, 0.939168f, 0.733656f, 0.712197f, +0.136317f, 0.175645f, 0.090753f, 0.172125f, 0.130536f, 0.204143f, 0.143702f, 0.141047f, 0.287973f, 0.418830f, 0.278995f, 0.380397f, 0.206806f, 0.287301f, 0.278172f, 0.227751f, +0.788129f, 0.873318f, 0.426018f, 0.853929f, 0.496418f, 0.667639f, 0.443710f, 0.460267f, 0.802792f, 1.004105f, 0.631490f, 0.909949f, 1.122984f, 1.341651f, 1.226435f, 1.061208f, +0.145276f, 0.183127f, 0.083139f, 0.205487f, 0.209684f, 0.320805f, 0.198425f, 0.253800f, 0.338419f, 0.481518f, 0.281837f, 0.500764f, 0.290621f, 0.394978f, 0.336029f, 0.358523f, +0.000538f, 0.000673f, 0.001033f, 0.003510f, 0.000368f, 0.000559f, 0.001169f, 0.002055f, 0.000013f, 0.000018f, 0.000036f, 0.000087f, 0.000395f, 0.000533f, 0.001532f, 0.002247f, +0.000269f, 0.000336f, 0.000640f, 0.001835f, 0.000198f, 0.000301f, 0.000780f, 0.001156f, 0.000013f, 0.000018f, 0.000044f, 0.000090f, 0.000160f, 0.000216f, 0.000772f, 0.000955f, +0.002240f, 0.002413f, 0.004334f, 0.013128f, 0.001085f, 0.001419f, 0.003472f, 0.005442f, 0.000050f, 0.000061f, 0.000142f, 0.000309f, 0.001255f, 0.001458f, 0.004906f, 0.006416f, +0.000020f, 0.000025f, 0.000042f, 0.000156f, 0.000023f, 0.000034f, 0.000077f, 0.000148f, 0.000001f, 0.000001f, 0.000003f, 0.000008f, 0.000016f, 0.000021f, 0.000066f, 0.000107f, +0.426317f, 0.575557f, 0.218439f, 0.639928f, 0.370450f, 0.607020f, 0.313868f, 0.475845f, 0.440000f, 0.670516f, 0.328082f, 0.690940f, 0.702861f, 1.023094f, 0.727623f, 0.920174f, +0.167669f, 0.226692f, 0.106636f, 0.263478f, 0.156830f, 0.257355f, 0.164932f, 0.210892f, 0.340165f, 0.519129f, 0.314829f, 0.559205f, 0.224740f, 0.327608f, 0.288784f, 0.308016f, +1.379246f, 1.603674f, 0.712220f, 1.859786f, 0.848572f, 1.197517f, 0.724572f, 0.979146f, 1.349220f, 1.770754f, 1.013881f, 1.903239f, 1.736335f, 2.176699f, 1.811529f, 2.042005f, +0.166207f, 0.219839f, 0.090866f, 0.292573f, 0.234323f, 0.376175f, 0.211830f, 0.352970f, 0.371830f, 0.555137f, 0.295820f, 0.684728f, 0.293762f, 0.418930f, 0.324479f, 0.451006f, +0.009544f, 0.011573f, 0.004565f, 0.012330f, 0.009359f, 0.013775f, 0.007402f, 0.010347f, 0.007289f, 0.009977f, 0.005073f, 0.009851f, 0.036011f, 0.047082f, 0.034798f, 0.040577f, +0.014986f, 0.018199f, 0.008897f, 0.020268f, 0.015819f, 0.023316f, 0.015529f, 0.018308f, 0.022498f, 0.030840f, 0.019437f, 0.031833f, 0.045971f, 0.060192f, 0.055140f, 0.054228f, +0.057681f, 0.060240f, 0.027803f, 0.066942f, 0.040050f, 0.050765f, 0.031921f, 0.039774f, 0.041755f, 0.049222f, 0.029289f, 0.050695f, 0.166188f, 0.187130f, 0.161846f, 0.168217f, +0.008860f, 0.010526f, 0.004521f, 0.013424f, 0.014097f, 0.020327f, 0.011896f, 0.018276f, 0.014668f, 0.019670f, 0.010893f, 0.023248f, 0.035839f, 0.045908f, 0.036952f, 0.047358f, +0.243025f, 0.304417f, 0.139511f, 0.254952f, 0.253845f, 0.385927f, 0.240962f, 0.227884f, 0.181623f, 0.256797f, 0.151727f, 0.199328f, 0.719446f, 0.971643f, 0.834444f, 0.658276f, +0.182739f, 0.229233f, 0.130210f, 0.200693f, 0.205461f, 0.312821f, 0.242083f, 0.193094f, 0.268453f, 0.380116f, 0.278365f, 0.308431f, 0.439814f, 0.594848f, 0.633174f, 0.421281f, +0.642610f, 0.693242f, 0.371776f, 0.605589f, 0.475243f, 0.622260f, 0.454643f, 0.383252f, 0.455187f, 0.554278f, 0.383227f, 0.448755f, 1.452616f, 1.689578f, 1.697947f, 1.193942f, +0.144029f, 0.176754f, 0.088219f, 0.177192f, 0.244083f, 0.363559f, 0.247214f, 0.256963f, 0.233317f, 0.323195f, 0.207966f, 0.300282f, 0.457096f, 0.604806f, 0.565667f, 0.490460f, +0.000367f, 0.000447f, 0.000754f, 0.002082f, 0.000295f, 0.000436f, 0.001001f, 0.001431f, 0.000006f, 0.000008f, 0.000018f, 0.000036f, 0.000427f, 0.000561f, 0.001773f, 0.002114f, +0.000367f, 0.000447f, 0.000935f, 0.002177f, 0.000317f, 0.000469f, 0.001337f, 0.001611f, 0.000012f, 0.000016f, 0.000044f, 0.000074f, 0.000347f, 0.000456f, 0.001787f, 0.001797f, +0.001859f, 0.001949f, 0.003849f, 0.009475f, 0.001058f, 0.001346f, 0.003620f, 0.004612f, 0.000029f, 0.000034f, 0.000088f, 0.000155f, 0.001653f, 0.001869f, 0.006913f, 0.007347f, +0.000021f, 0.000025f, 0.000045f, 0.000137f, 0.000027f, 0.000039f, 0.000097f, 0.000153f, 0.000001f, 0.000001f, 0.000002f, 0.000005f, 0.000026f, 0.000033f, 0.000114f, 0.000149f, +0.152037f, 0.199832f, 0.083377f, 0.198496f, 0.155118f, 0.247456f, 0.140664f, 0.173302f, 0.109120f, 0.161891f, 0.087084f, 0.149038f, 0.397659f, 0.563532f, 0.440607f, 0.452811f, +0.119667f, 0.157514f, 0.081457f, 0.163557f, 0.131422f, 0.209958f, 0.147926f, 0.153710f, 0.168829f, 0.250838f, 0.167237f, 0.241397f, 0.254464f, 0.361129f, 0.349963f, 0.303337f, +0.598732f, 0.677748f, 0.330908f, 0.702197f, 0.432511f, 0.594226f, 0.395269f, 0.434071f, 0.407295f, 0.520410f, 0.327579f, 0.499718f, 1.195777f, 1.459408f, 1.335256f, 1.223148f, +0.087729f, 0.112969f, 0.051333f, 0.134318f, 0.145221f, 0.226968f, 0.140509f, 0.190264f, 0.136482f, 0.198378f, 0.116215f, 0.218602f, 0.245989f, 0.341526f, 0.290811f, 0.328479f, +0.005752f, 0.007206f, 0.003262f, 0.006788f, 0.005054f, 0.007684f, 0.004740f, 0.005104f, 0.004628f, 0.006545f, 0.003820f, 0.005714f, 0.020036f, 0.027061f, 0.022959f, 0.020622f, +0.006091f, 0.007642f, 0.004288f, 0.007525f, 0.005761f, 0.008772f, 0.006706f, 0.006090f, 0.009635f, 0.013643f, 0.009870f, 0.012452f, 0.017249f, 0.023331f, 0.024534f, 0.018586f, +0.037136f, 0.040065f, 0.021226f, 0.039368f, 0.023101f, 0.030250f, 0.021834f, 0.020957f, 0.028322f, 0.034489f, 0.023557f, 0.031409f, 0.098768f, 0.114888f, 0.114061f, 0.091321f, +0.005862f, 0.007194f, 0.003547f, 0.008112f, 0.008356f, 0.012447f, 0.008361f, 0.009896f, 0.010224f, 0.014163f, 0.009003f, 0.014802f, 0.021888f, 0.028963f, 0.026761f, 0.026419f, +0.144259f, 0.186670f, 0.098202f, 0.138239f, 0.135001f, 0.212026f, 0.151962f, 0.110704f, 0.113583f, 0.165900f, 0.112518f, 0.113865f, 0.394227f, 0.550008f, 0.542204f, 0.329487f, +0.073153f, 0.094797f, 0.061811f, 0.073387f, 0.073690f, 0.115902f, 0.102959f, 0.063260f, 0.113220f, 0.165609f, 0.139215f, 0.118821f, 0.162528f, 0.227081f, 0.277460f, 0.142204f, +0.407457f, 0.454082f, 0.279533f, 0.350747f, 0.269978f, 0.365173f, 0.306267f, 0.198874f, 0.304071f, 0.382497f, 0.303570f, 0.273827f, 0.850239f, 1.021606f, 1.178507f, 0.638345f, +0.093846f, 0.118973f, 0.068163f, 0.105461f, 0.142489f, 0.219246f, 0.171132f, 0.137023f, 0.160163f, 0.229190f, 0.169287f, 0.188289f, 0.274933f, 0.375795f, 0.403458f, 0.269467f, +0.002671f, 0.003360f, 0.006507f, 0.013844f, 0.001923f, 0.002935f, 0.007745f, 0.008527f, 0.000047f, 0.000066f, 0.000165f, 0.000252f, 0.002871f, 0.003893f, 0.014130f, 0.012977f, +0.001800f, 0.002267f, 0.005442f, 0.009765f, 0.001395f, 0.002132f, 0.006972f, 0.006474f, 0.000062f, 0.000088f, 0.000271f, 0.000350f, 0.001573f, 0.002136f, 0.009607f, 0.007441f, +0.014460f, 0.015662f, 0.035497f, 0.067314f, 0.007369f, 0.009688f, 0.029913f, 0.029356f, 0.000239f, 0.000292f, 0.000852f, 0.001162f, 0.011866f, 0.013858f, 0.058855f, 0.048179f, +0.000164f, 0.000202f, 0.000427f, 0.000998f, 0.000192f, 0.000287f, 0.000824f, 0.000998f, 0.000006f, 0.000009f, 0.000023f, 0.000039f, 0.000189f, 0.000251f, 0.000994f, 0.001003f, +0.167093f, 0.226876f, 0.108661f, 0.199270f, 0.152739f, 0.251710f, 0.164243f, 0.155874f, 0.126347f, 0.193641f, 0.119568f, 0.157630f, 0.403437f, 0.590607f, 0.530070f, 0.419627f, +0.088694f, 0.120602f, 0.071592f, 0.110732f, 0.087270f, 0.144028f, 0.116482f, 0.093236f, 0.131831f, 0.202339f, 0.154854f, 0.172181f, 0.174101f, 0.255243f, 0.283933f, 0.189576f, +0.702884f, 0.821929f, 0.460655f, 0.752995f, 0.454910f, 0.645648f, 0.492992f, 0.417035f, 0.503746f, 0.664911f, 0.480437f, 0.564559f, 1.295858f, 1.633800f, 1.715891f, 1.210788f, +0.105834f, 0.140785f, 0.073434f, 0.148012f, 0.156959f, 0.253418f, 0.180086f, 0.187844f, 0.173463f, 0.260459f, 0.175150f, 0.253786f, 0.273938f, 0.392894f, 0.384030f, 0.334139f, +0.011345f, 0.012605f, 0.005582f, 0.015029f, 0.011823f, 0.015944f, 0.009619f, 0.013404f, 0.009134f, 0.011456f, 0.006540f, 0.012660f, 0.039377f, 0.047172f, 0.039143f, 0.045498f, +0.013341f, 0.014845f, 0.008148f, 0.018503f, 0.014967f, 0.020213f, 0.015114f, 0.017763f, 0.021116f, 0.026522f, 0.018766f, 0.030637f, 0.037648f, 0.045167f, 0.046453f, 0.045540f, +0.049471f, 0.047339f, 0.024530f, 0.058873f, 0.036504f, 0.042397f, 0.029930f, 0.037175f, 0.037754f, 0.040779f, 0.027242f, 0.047003f, 0.131115f, 0.135275f, 0.131354f, 0.136090f, +0.007398f, 0.008053f, 0.003884f, 0.011494f, 0.012509f, 0.016528f, 0.010859f, 0.016631f, 0.012912f, 0.015865f, 0.009864f, 0.020986f, 0.027529f, 0.032310f, 0.029198f, 0.037301f, +0.413703f, 0.474820f, 0.244308f, 0.445042f, 0.459237f, 0.639730f, 0.448443f, 0.422754f, 0.325952f, 0.422275f, 0.280114f, 0.366822f, 1.126618f, 1.394145f, 1.344204f, 1.057039f, +0.232980f, 0.267786f, 0.170774f, 0.262376f, 0.278387f, 0.388362f, 0.337422f, 0.268282f, 0.360828f, 0.468135f, 0.384891f, 0.425105f, 0.515819f, 0.639230f, 0.763908f, 0.506647f, +0.789272f, 0.780165f, 0.469732f, 0.762714f, 0.620333f, 0.744224f, 0.610478f, 0.512977f, 0.589403f, 0.657617f, 0.510467f, 0.595850f, 1.641229f, 1.749120f, 1.973479f, 1.383269f, +0.172228f, 0.193662f, 0.108519f, 0.217271f, 0.310186f, 0.423332f, 0.323181f, 0.334856f, 0.294132f, 0.373323f, 0.269698f, 0.388178f, 0.502806f, 0.609582f, 0.640093f, 0.553224f, +0.000370f, 0.000413f, 0.000783f, 0.002155f, 0.000316f, 0.000428f, 0.001105f, 0.001574f, 0.000006f, 0.000008f, 0.000020f, 0.000039f, 0.000397f, 0.000477f, 0.001693f, 0.002013f, +0.000277f, 0.000310f, 0.000727f, 0.001688f, 0.000255f, 0.000345f, 0.001105f, 0.001327f, 0.000009f, 0.000012f, 0.000036f, 0.000060f, 0.000241f, 0.000291f, 0.001279f, 0.001282f, +0.001354f, 0.001301f, 0.002884f, 0.007076f, 0.000819f, 0.000955f, 0.002883f, 0.003661f, 0.000022f, 0.000024f, 0.000069f, 0.000122f, 0.001107f, 0.001147f, 0.004765f, 0.005047f, +0.000015f, 0.000016f, 0.000033f, 0.000099f, 0.000020f, 0.000027f, 0.000075f, 0.000118f, 0.000001f, 0.000001f, 0.000002f, 0.000004f, 0.000017f, 0.000020f, 0.000076f, 0.000100f, +0.205707f, 0.247736f, 0.116048f, 0.275396f, 0.223046f, 0.326026f, 0.208068f, 0.255529f, 0.155650f, 0.211588f, 0.127782f, 0.217995f, 0.494940f, 0.642663f, 0.564134f, 0.577914f, +0.121262f, 0.146249f, 0.084912f, 0.169952f, 0.141531f, 0.207175f, 0.163876f, 0.169742f, 0.180361f, 0.245534f, 0.183789f, 0.264443f, 0.237202f, 0.308445f, 0.335585f, 0.289949f, +0.584486f, 0.606224f, 0.332306f, 0.702919f, 0.448713f, 0.564867f, 0.421847f, 0.461783f, 0.419175f, 0.490744f, 0.346810f, 0.527370f, 1.073821f, 1.200829f, 1.233491f, 1.126330f, +0.083380f, 0.098378f, 0.050188f, 0.130905f, 0.146681f, 0.210055f, 0.145996f, 0.197064f, 0.136752f, 0.182127f, 0.119787f, 0.224605f, 0.215066f, 0.273591f, 0.261551f, 0.294489f, +0.005958f, 0.007422f, 0.002663f, 0.008851f, 0.004611f, 0.006970f, 0.003407f, 0.005860f, 0.005832f, 0.008200f, 0.003793f, 0.009063f, 0.022798f, 0.030616f, 0.020583f, 0.029535f, +0.005714f, 0.007128f, 0.003169f, 0.008885f, 0.004759f, 0.007206f, 0.004365f, 0.006333f, 0.010994f, 0.015479f, 0.008874f, 0.017884f, 0.017774f, 0.023905f, 0.019919f, 0.024106f, +0.035743f, 0.038343f, 0.016097f, 0.047693f, 0.019582f, 0.025496f, 0.014583f, 0.022360f, 0.033159f, 0.040151f, 0.021732f, 0.046286f, 0.104425f, 0.120778f, 0.095018f, 0.121526f, +0.005404f, 0.006595f, 0.002577f, 0.009413f, 0.006784f, 0.010048f, 0.005349f, 0.010113f, 0.011465f, 0.015792f, 0.007955f, 0.020892f, 0.022166f, 0.029164f, 0.021353f, 0.033675f, +0.111207f, 0.143084f, 0.059647f, 0.134133f, 0.091655f, 0.143131f, 0.081290f, 0.094601f, 0.106510f, 0.154685f, 0.083134f, 0.134394f, 0.333830f, 0.463098f, 0.361761f, 0.351180f, +0.051069f, 0.065803f, 0.034000f, 0.064485f, 0.045307f, 0.070855f, 0.049877f, 0.048955f, 0.096147f, 0.139837f, 0.093149f, 0.127005f, 0.124636f, 0.173149f, 0.167647f, 0.137259f, +0.291860f, 0.323407f, 0.157763f, 0.316227f, 0.170314f, 0.229057f, 0.152230f, 0.157911f, 0.264943f, 0.331382f, 0.208409f, 0.300308f, 0.668994f, 0.799260f, 0.730623f, 0.632193f, +0.064386f, 0.081161f, 0.036847f, 0.091070f, 0.086096f, 0.131722f, 0.081473f, 0.104210f, 0.133666f, 0.190186f, 0.111318f, 0.197788f, 0.207201f, 0.281604f, 0.239575f, 0.255612f, +0.000650f, 0.000813f, 0.001247f, 0.004239f, 0.000412f, 0.000625f, 0.001308f, 0.002300f, 0.000014f, 0.000019f, 0.000038f, 0.000094f, 0.000767f, 0.001034f, 0.002975f, 0.004365f, +0.000397f, 0.000497f, 0.000945f, 0.002708f, 0.000271f, 0.000411f, 0.001066f, 0.001581f, 0.000017f, 0.000023f, 0.000057f, 0.000118f, 0.000381f, 0.000514f, 0.001832f, 0.002267f, +0.003269f, 0.003520f, 0.006322f, 0.019153f, 0.001467f, 0.001918f, 0.004692f, 0.007356f, 0.000066f, 0.000080f, 0.000185f, 0.000402f, 0.002947f, 0.003422f, 0.011515f, 0.015058f, +0.000036f, 0.000044f, 0.000073f, 0.000272f, 0.000037f, 0.000054f, 0.000124f, 0.000239f, 0.000002f, 0.000002f, 0.000005f, 0.000013f, 0.000045f, 0.000059f, 0.000186f, 0.000300f, +0.106679f, 0.144023f, 0.054661f, 0.160131f, 0.085881f, 0.140725f, 0.072764f, 0.110315f, 0.098123f, 0.149529f, 0.073164f, 0.154084f, 0.282934f, 0.411843f, 0.292902f, 0.370412f, +0.051280f, 0.069332f, 0.032614f, 0.080583f, 0.044438f, 0.072921f, 0.046733f, 0.059756f, 0.092717f, 0.141496f, 0.085811f, 0.152419f, 0.110573f, 0.161184f, 0.142082f, 0.151545f, +0.416970f, 0.484819f, 0.215316f, 0.562246f, 0.237671f, 0.335405f, 0.202941f, 0.274243f, 0.363512f, 0.477083f, 0.273164f, 0.512777f, 0.844439f, 1.058603f, 0.881008f, 0.993096f, +0.060135f, 0.079540f, 0.032876f, 0.105856f, 0.078545f, 0.126094f, 0.071006f, 0.118316f, 0.119894f, 0.179000f, 0.095385f, 0.220785f, 0.170980f, 0.243833f, 0.188859f, 0.262502f, +0.033065f, 0.040096f, 0.015814f, 0.042718f, 0.026058f, 0.038352f, 0.020609f, 0.028809f, 0.021853f, 0.029912f, 0.015210f, 0.029536f, 0.075516f, 0.098733f, 0.072973f, 0.085091f, +0.062341f, 0.075707f, 0.037010f, 0.084316f, 0.052885f, 0.077949f, 0.051915f, 0.061208f, 0.080993f, 0.111022f, 0.069971f, 0.114597f, 0.115755f, 0.151562f, 0.138842f, 0.136546f, +0.198052f, 0.206838f, 0.095464f, 0.229851f, 0.110511f, 0.140080f, 0.088082f, 0.109752f, 0.124067f, 0.146254f, 0.087026f, 0.150630f, 0.345389f, 0.388912f, 0.336364f, 0.349605f, +0.022626f, 0.026881f, 0.011546f, 0.034280f, 0.028930f, 0.041716f, 0.024413f, 0.037508f, 0.032414f, 0.043468f, 0.024072f, 0.051376f, 0.055398f, 0.070960f, 0.057118f, 0.073202f, +0.503442f, 0.630620f, 0.289007f, 0.528150f, 0.422602f, 0.642493f, 0.401155f, 0.379382f, 0.325593f, 0.460356f, 0.271998f, 0.357332f, 0.902115f, 1.218345f, 1.046310f, 0.825413f, +0.454544f, 0.570194f, 0.323885f, 0.499203f, 0.410714f, 0.625324f, 0.483922f, 0.385992f, 0.577855f, 0.818214f, 0.599191f, 0.663910f, 0.662185f, 0.895605f, 0.953308f, 0.634282f, +1.319309f, 1.423258f, 0.763274f, 1.243303f, 0.784114f, 1.026680f, 0.750126f, 0.632335f, 0.808710f, 0.984761f, 0.680862f, 0.797283f, 1.805152f, 2.099623f, 2.110023f, 1.483701f, +0.219923f, 0.269892f, 0.134706f, 0.270561f, 0.299519f, 0.446130f, 0.303360f, 0.315323f, 0.308299f, 0.427062f, 0.274801f, 0.396785f, 0.422467f, 0.558987f, 0.522813f, 0.453304f, +0.000400f, 0.000488f, 0.000823f, 0.002272f, 0.000259f, 0.000382f, 0.000878f, 0.001255f, 0.000006f, 0.000008f, 0.000017f, 0.000034f, 0.000282f, 0.000370f, 0.001171f, 0.001396f, +0.000480f, 0.000586f, 0.001225f, 0.002853f, 0.000334f, 0.000494f, 0.001408f, 0.001697f, 0.000014f, 0.000019f, 0.000050f, 0.000084f, 0.000275f, 0.000362f, 0.001418f, 0.001426f, +0.002011f, 0.002109f, 0.004163f, 0.010250f, 0.000919f, 0.001170f, 0.003147f, 0.004009f, 0.000027f, 0.000032f, 0.000082f, 0.000145f, 0.001082f, 0.001223f, 0.004526f, 0.004810f, +0.000017f, 0.000020f, 0.000036f, 0.000110f, 0.000017f, 0.000025f, 0.000063f, 0.000099f, 0.000001f, 0.000001f, 0.000002f, 0.000004f, 0.000012f, 0.000016f, 0.000055f, 0.000072f, +0.372567f, 0.489689f, 0.204317f, 0.486416f, 0.305480f, 0.487324f, 0.277015f, 0.341290f, 0.231400f, 0.343307f, 0.184670f, 0.316051f, 0.589836f, 0.835870f, 0.653538f, 0.671641f, +0.352107f, 0.463469f, 0.239679f, 0.481251f, 0.310767f, 0.496478f, 0.349793f, 0.363471f, 0.429886f, 0.638704f, 0.425834f, 0.614666f, 0.453203f, 0.643175f, 0.623287f, 0.540247f, +1.454077f, 1.645975f, 0.803641f, 1.705352f, 0.844144f, 1.159768f, 0.771458f, 0.847190f, 0.855991f, 1.093720f, 0.688456f, 1.050231f, 1.757801f, 2.145341f, 1.962836f, 1.798036f, +0.158461f, 0.204051f, 0.092721f, 0.242612f, 0.210800f, 0.329463f, 0.203960f, 0.276184f, 0.213333f, 0.310081f, 0.181653f, 0.341693f, 0.268942f, 0.373393f, 0.317945f, 0.359129f, +0.030523f, 0.038237f, 0.017312f, 0.036021f, 0.021552f, 0.032768f, 0.020212f, 0.021765f, 0.021254f, 0.030053f, 0.017542f, 0.026239f, 0.064352f, 0.086916f, 0.073741f, 0.066236f, +0.038811f, 0.048689f, 0.027322f, 0.047948f, 0.029498f, 0.044914f, 0.034338f, 0.031185f, 0.053122f, 0.075224f, 0.054421f, 0.068657f, 0.066524f, 0.089979f, 0.094618f, 0.071680f, +0.195295f, 0.210697f, 0.111627f, 0.207033f, 0.097633f, 0.127844f, 0.092277f, 0.088569f, 0.128889f, 0.156959f, 0.107208f, 0.142941f, 0.314396f, 0.365708f, 0.363074f, 0.290688f, +0.022927f, 0.028138f, 0.013874f, 0.031729f, 0.026265f, 0.039124f, 0.026282f, 0.031105f, 0.034604f, 0.047938f, 0.030473f, 0.050099f, 0.051819f, 0.068569f, 0.063356f, 0.062547f, +0.457711f, 0.592276f, 0.311579f, 0.438612f, 0.344232f, 0.540634f, 0.387480f, 0.282279f, 0.311866f, 0.455514f, 0.308942f, 0.312641f, 0.757111f, 1.056289f, 1.041300f, 0.632778f, +0.278695f, 0.361153f, 0.235484f, 0.279584f, 0.225616f, 0.354855f, 0.315227f, 0.193683f, 0.373270f, 0.545991f, 0.458973f, 0.391738f, 0.374790f, 0.523649f, 0.639823f, 0.327924f, +1.281242f, 1.427851f, 0.878988f, 1.102919f, 0.682246f, 0.922809f, 0.773952f, 0.502564f, 0.827425f, 1.040834f, 0.826061f, 0.745127f, 1.618281f, 1.944449f, 2.243083f, 1.214979f, +0.219475f, 0.278240f, 0.159411f, 0.246639f, 0.267803f, 0.412067f, 0.321639f, 0.257531f, 0.324143f, 0.463844f, 0.342611f, 0.381068f, 0.389192f, 0.531970f, 0.571129f, 0.381454f, +0.004466f, 0.005617f, 0.010878f, 0.023143f, 0.002583f, 0.003943f, 0.010405f, 0.011456f, 0.000067f, 0.000096f, 0.000238f, 0.000365f, 0.002905f, 0.003939f, 0.014297f, 0.013131f, +0.003613f, 0.004551f, 0.010924f, 0.019601f, 0.002250f, 0.003439f, 0.011247f, 0.010444f, 0.000107f, 0.000152f, 0.000471f, 0.000607f, 0.001911f, 0.002595f, 0.011672f, 0.009041f, +0.023956f, 0.025948f, 0.058810f, 0.111523f, 0.009812f, 0.012899f, 0.039828f, 0.039086f, 0.000342f, 0.000418f, 0.001222f, 0.001666f, 0.011900f, 0.013897f, 0.059021f, 0.048315f, +0.000202f, 0.000249f, 0.000526f, 0.001230f, 0.000190f, 0.000284f, 0.000816f, 0.000988f, 0.000007f, 0.000009f, 0.000025f, 0.000042f, 0.000141f, 0.000188f, 0.000741f, 0.000748f, +0.627138f, 0.851520f, 0.407831f, 0.747908f, 0.460701f, 0.759224f, 0.495402f, 0.470157f, 0.410369f, 0.628937f, 0.388351f, 0.511975f, 0.916529f, 1.341740f, 1.204214f, 0.953310f, +0.399710f, 0.543508f, 0.322640f, 0.499028f, 0.316070f, 0.521630f, 0.421869f, 0.337676f, 0.514132f, 0.789107f, 0.603920f, 0.671495f, 0.474919f, 0.696259f, 0.774519f, 0.517131f, +2.614502f, 3.057313f, 1.713490f, 2.800900f, 1.359864f, 1.930036f, 1.473700f, 1.246643f, 1.621517f, 2.140294f, 1.546487f, 1.817270f, 2.917610f, 3.678482f, 3.863309f, 2.726076f, +0.292788f, 0.389480f, 0.203154f, 0.409473f, 0.348963f, 0.563418f, 0.400380f, 0.417628f, 0.415278f, 0.623552f, 0.419318f, 0.607576f, 0.458718f, 0.657913f, 0.643069f, 0.559526f, +0.061907f, 0.068786f, 0.030459f, 0.082014f, 0.051850f, 0.069923f, 0.042184f, 0.058781f, 0.043136f, 0.054100f, 0.030885f, 0.059783f, 0.130064f, 0.155812f, 0.129292f, 0.150282f, +0.087418f, 0.097272f, 0.053387f, 0.121240f, 0.078812f, 0.106437f, 0.079587f, 0.093534f, 0.119734f, 0.150385f, 0.106410f, 0.173720f, 0.149316f, 0.179136f, 0.184237f, 0.180614f, +0.267545f, 0.256019f, 0.132662f, 0.318397f, 0.158655f, 0.184267f, 0.130085f, 0.161571f, 0.176692f, 0.190851f, 0.127497f, 0.219977f, 0.429207f, 0.442825f, 0.429989f, 0.445493f, +0.029758f, 0.032393f, 0.015622f, 0.046231f, 0.040437f, 0.053426f, 0.035102f, 0.053759f, 0.044944f, 0.055224f, 0.034335f, 0.073046f, 0.067023f, 0.078663f, 0.071088f, 0.090816f, +1.349875f, 1.549296f, 0.797155f, 1.452132f, 1.204221f, 1.677513f, 1.175916f, 1.108553f, 0.920371f, 1.192353f, 0.790941f, 1.035773f, 2.225081f, 2.753450f, 2.654816f, 2.087663f, +0.912790f, 1.049156f, 0.669075f, 1.027961f, 0.876525f, 1.222793f, 1.062404f, 0.844711f, 1.223369f, 1.587186f, 1.304950f, 1.441294f, 1.223245f, 1.515910f, 1.811579f, 1.201493f, +2.552297f, 2.522848f, 1.518990f, 2.466417f, 1.612108f, 1.934073f, 1.586495f, 1.333114f, 1.649384f, 1.840274f, 1.428491f, 1.667425f, 3.212464f, 3.423644f, 3.862794f, 2.707545f, +0.414220f, 0.465770f, 0.260996f, 0.522552f, 0.599533f, 0.818226f, 0.624651f, 0.647217f, 0.612174f, 0.776993f, 0.561320f, 0.807911f, 0.731968f, 0.887409f, 0.931827f, 0.805366f, +0.000637f, 0.000710f, 0.001345f, 0.003704f, 0.000437f, 0.000592f, 0.001527f, 0.002175f, 0.000010f, 0.000012f, 0.000030f, 0.000058f, 0.000413f, 0.000496f, 0.001762f, 0.002094f, +0.000572f, 0.000639f, 0.001500f, 0.003484f, 0.000423f, 0.000573f, 0.001833f, 0.002202f, 0.000017f, 0.000021f, 0.000065f, 0.000108f, 0.000301f, 0.000363f, 0.001598f, 0.001601f, +0.002307f, 0.002216f, 0.004913f, 0.012057f, 0.001121f, 0.001307f, 0.003947f, 0.005012f, 0.000033f, 0.000036f, 0.000102f, 0.000180f, 0.001142f, 0.001183f, 0.004914f, 0.005205f, +0.000018f, 0.000020f, 0.000042f, 0.000126f, 0.000021f, 0.000027f, 0.000077f, 0.000120f, 0.000001f, 0.000001f, 0.000002f, 0.000004f, 0.000013f, 0.000015f, 0.000058f, 0.000076f, +0.793983f, 0.956205f, 0.447920f, 1.062967f, 0.691863f, 1.011297f, 0.645403f, 0.792623f, 0.519895f, 0.706735f, 0.426813f, 0.728138f, 1.156321f, 1.501443f, 1.317977f, 1.350173f, +0.561995f, 0.677799f, 0.393530f, 0.787652f, 0.527136f, 0.771632f, 0.610364f, 0.632212f, 0.723360f, 0.984746f, 0.737109f, 1.060586f, 0.665412f, 0.865266f, 0.941403f, 0.813382f, +2.235811f, 2.318964f, 1.271159f, 2.688851f, 1.379413f, 1.736489f, 1.296821f, 1.419593f, 1.387589f, 1.624504f, 1.148042f, 1.745749f, 2.486322f, 2.780397f, 2.856022f, 2.607901f, +0.237216f, 0.279888f, 0.142787f, 0.372425f, 0.335369f, 0.480266f, 0.333801f, 0.450563f, 0.336685f, 0.448399f, 0.294917f, 0.552978f, 0.370357f, 0.471141f, 0.450406f, 0.507128f, +0.064424f, 0.080246f, 0.028789f, 0.095695f, 0.040062f, 0.060565f, 0.029603f, 0.050923f, 0.054568f, 0.076721f, 0.035486f, 0.084795f, 0.149201f, 0.200370f, 0.134708f, 0.193291f, +0.074184f, 0.092536f, 0.041148f, 0.115356f, 0.049656f, 0.075178f, 0.045544f, 0.066076f, 0.123514f, 0.173907f, 0.099698f, 0.200927f, 0.139675f, 0.187849f, 0.156530f, 0.189432f, +0.383011f, 0.410867f, 0.172492f, 0.511060f, 0.168634f, 0.219560f, 0.125581f, 0.192550f, 0.307484f, 0.372318f, 0.201518f, 0.429214f, 0.677307f, 0.783369f, 0.616287f, 0.788222f, +0.043068f, 0.052556f, 0.020535f, 0.075020f, 0.043451f, 0.064357f, 0.034258f, 0.064769f, 0.079071f, 0.108916f, 0.054864f, 0.144089f, 0.106925f, 0.140683f, 0.103005f, 0.162445f, +0.718958f, 0.925038f, 0.385620f, 0.867172f, 0.476203f, 0.743648f, 0.422347f, 0.491509f, 0.595888f, 0.865410f, 0.465108f, 0.751893f, 1.306352f, 1.812204f, 1.415652f, 1.374246f, +0.396440f, 0.510814f, 0.263930f, 0.500579f, 0.282649f, 0.442030f, 0.311158f, 0.305408f, 0.645887f, 0.939383f, 0.625750f, 0.853182f, 0.585633f, 0.813581f, 0.787729f, 0.644945f, +1.870012f, 2.072143f, 1.010824f, 2.026139f, 0.876967f, 1.179445f, 0.783855f, 0.813104f, 1.469019f, 1.837400f, 1.155555f, 1.665104f, 2.594516f, 3.099718f, 2.833526f, 2.451791f, +0.306819f, 0.386757f, 0.175587f, 0.433980f, 0.329716f, 0.504448f, 0.312013f, 0.399086f, 0.551212f, 0.784289f, 0.459052f, 0.815636f, 0.597652f, 0.812260f, 0.691033f, 0.737291f, +0.002214f, 0.002768f, 0.004249f, 0.014440f, 0.001128f, 0.001712f, 0.003579f, 0.006295f, 0.000041f, 0.000057f, 0.000113f, 0.000277f, 0.001582f, 0.002133f, 0.006134f, 0.008999f, +0.001622f, 0.002031f, 0.003864f, 0.011075f, 0.000889f, 0.001352f, 0.003504f, 0.005197f, 0.000058f, 0.000083f, 0.000203f, 0.000417f, 0.000942f, 0.001272f, 0.004535f, 0.005612f, +0.011034f, 0.011884f, 0.021343f, 0.064656f, 0.003980f, 0.005203f, 0.012730f, 0.019957f, 0.000192f, 0.000233f, 0.000539f, 0.001175f, 0.006021f, 0.006991f, 0.023529f, 0.030769f, +0.000089f, 0.000109f, 0.000183f, 0.000683f, 0.000074f, 0.000110f, 0.000250f, 0.000483f, 0.000004f, 0.000005f, 0.000011f, 0.000028f, 0.000068f, 0.000090f, 0.000283f, 0.000456f, +0.815838f, 1.101436f, 0.418024f, 1.224622f, 0.527824f, 0.864895f, 0.447206f, 0.677993f, 0.649383f, 0.989594f, 0.484206f, 1.019737f, 1.309713f, 1.906435f, 1.355854f, 1.714654f, +0.470894f, 0.636659f, 0.299485f, 0.739970f, 0.327936f, 0.538136f, 0.344876f, 0.440980f, 0.736779f, 1.124403f, 0.681902f, 1.211206f, 0.614590f, 0.895901f, 0.789729f, 0.842324f, +3.160323f, 3.674564f, 1.631938f, 4.261402f, 1.447661f, 2.042961f, 1.236118f, 1.670421f, 2.384238f, 3.129139f, 1.791653f, 3.363256f, 3.873992f, 4.856502f, 4.041759f, 4.555981f, +0.338983f, 0.448367f, 0.185323f, 0.596709f, 0.355822f, 0.571225f, 0.321667f, 0.535988f, 0.584857f, 0.873185f, 0.465301f, 1.077020f, 0.583391f, 0.831966f, 0.644393f, 0.895666f, +0.131035f, 0.158899f, 0.062672f, 0.169290f, 0.112068f, 0.164943f, 0.088632f, 0.123898f, 0.090822f, 0.124315f, 0.063213f, 0.122751f, 0.275347f, 0.360001f, 0.266076f, 0.310260f, +0.165189f, 0.200605f, 0.098067f, 0.223418f, 0.152075f, 0.224150f, 0.149286f, 0.176008f, 0.225062f, 0.308506f, 0.194435f, 0.318440f, 0.282204f, 0.369501f, 0.338489f, 0.332892f, +0.716717f, 0.748513f, 0.345468f, 0.831791f, 0.434003f, 0.550128f, 0.345919f, 0.431020f, 0.470839f, 0.555041f, 0.330267f, 0.571647f, 1.149991f, 1.294903f, 1.119940f, 1.164028f, +0.079260f, 0.094165f, 0.040448f, 0.120084f, 0.109982f, 0.158589f, 0.092807f, 0.142590f, 0.119079f, 0.159686f, 0.088431f, 0.188735f, 0.178549f, 0.228708f, 0.184093f, 0.235933f, +2.296289f, 2.876369f, 1.318213f, 2.408985f, 2.091837f, 3.180274f, 1.985674f, 1.877903f, 1.557403f, 2.202013f, 1.301044f, 1.709219f, 3.785789f, 5.112872f, 4.390916f, 3.463905f, +1.386233f, 1.738933f, 0.987758f, 1.522429f, 1.359310f, 2.069589f, 1.601600f, 1.277489f, 1.848111f, 2.616831f, 1.916349f, 2.123333f, 1.858047f, 2.513008f, 2.674919f, 1.779752f, +5.495001f, 5.927953f, 3.179081f, 5.178430f, 3.544208f, 4.640610f, 3.390581f, 2.858166f, 3.532347f, 4.301316f, 2.973921f, 3.482435f, 6.917552f, 8.045999f, 8.085855f, 5.685714f, +0.886691f, 1.088155f, 0.543108f, 1.090853f, 1.310520f, 1.952004f, 1.327328f, 1.379670f, 1.303533f, 1.805681f, 1.161897f, 1.677665f, 1.567153f, 2.073577f, 1.939388f, 1.681541f, +0.000122f, 0.000149f, 0.000251f, 0.000693f, 0.000086f, 0.000126f, 0.000291f, 0.000415f, 0.000002f, 0.000003f, 0.000005f, 0.000011f, 0.000079f, 0.000104f, 0.000329f, 0.000392f, +0.000098f, 0.000119f, 0.000250f, 0.000582f, 0.000074f, 0.000109f, 0.000311f, 0.000375f, 0.000003f, 0.000004f, 0.000011f, 0.000018f, 0.000052f, 0.000068f, 0.000266f, 0.000267f, +0.000560f, 0.000587f, 0.001159f, 0.002854f, 0.000278f, 0.000354f, 0.000951f, 0.001212f, 0.000008f, 0.000009f, 0.000024f, 0.000042f, 0.000277f, 0.000313f, 0.001160f, 0.001232f, +0.000004f, 0.000005f, 0.000010f, 0.000030f, 0.000005f, 0.000007f, 0.000018f, 0.000029f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000003f, 0.000004f, 0.000014f, 0.000018f, +0.856945f, 1.126341f, 0.469951f, 1.118811f, 0.762519f, 1.216428f, 0.691467f, 0.851906f, 0.558165f, 0.828095f, 0.445445f, 0.762352f, 1.248239f, 1.768909f, 1.383049f, 1.421359f, +0.541511f, 0.712775f, 0.368606f, 0.740123f, 0.518664f, 0.828610f, 0.583797f, 0.606626f, 0.693321f, 1.030103f, 0.686786f, 0.991335f, 0.641273f, 0.910078f, 0.881937f, 0.764437f, +3.054081f, 3.457137f, 1.687934f, 3.581848f, 1.924103f, 2.643524f, 1.758426f, 1.931047f, 1.885434f, 2.409064f, 1.516416f, 2.313275f, 3.396882f, 4.145787f, 3.793105f, 3.474635f, +0.322177f, 0.414869f, 0.188516f, 0.493270f, 0.465117f, 0.726939f, 0.450026f, 0.609382f, 0.454861f, 0.661146f, 0.387316f, 0.728548f, 0.503093f, 0.698484f, 0.594761f, 0.671801f, +0.071499f, 0.089567f, 0.040551f, 0.084377f, 0.054786f, 0.083299f, 0.051380f, 0.055327f, 0.052210f, 0.073825f, 0.043091f, 0.064457f, 0.138691f, 0.187321f, 0.158925f, 0.142750f, +0.060786f, 0.076257f, 0.042792f, 0.075097f, 0.050137f, 0.076340f, 0.058363f, 0.053005f, 0.087252f, 0.123553f, 0.089386f, 0.112768f, 0.095861f, 0.129661f, 0.136346f, 0.103291f, +0.417736f, 0.450681f, 0.238771f, 0.442844f, 0.226634f, 0.296764f, 0.214203f, 0.205595f, 0.289120f, 0.352084f, 0.240485f, 0.320639f, 0.618736f, 0.719719f, 0.714535f, 0.572079f, +0.047472f, 0.058262f, 0.028728f, 0.065698f, 0.059018f, 0.087912f, 0.059056f, 0.069893f, 0.075140f, 0.104092f, 0.066170f, 0.108785f, 0.098718f, 0.130628f, 0.120697f, 0.119155f, +1.233989f, 1.596777f, 0.840017f, 1.182500f, 1.007141f, 1.581765f, 1.133674f, 0.825882f, 0.881732f, 1.287866f, 0.873466f, 0.883925f, 1.878006f, 2.620116f, 2.582936f, 1.569599f, +0.502380f, 0.651020f, 0.424487f, 0.503982f, 0.441360f, 0.694182f, 0.616660f, 0.378890f, 0.705627f, 1.032138f, 0.867640f, 0.740538f, 0.621597f, 0.868482f, 1.061159f, 0.543868f, +3.154243f, 3.515176f, 2.163948f, 2.715237f, 1.822738f, 2.465444f, 2.067745f, 1.342687f, 2.136199f, 2.687167f, 2.132679f, 1.923727f, 3.665521f, 4.404313f, 5.080741f, 2.752012f, +0.523034f, 0.663075f, 0.379893f, 0.587768f, 0.692594f, 1.065690f, 0.831823f, 0.666028f, 0.810084f, 1.159216f, 0.856237f, 0.952348f, 0.853345f, 1.166402f, 1.252264f, 0.836379f, +0.000805f, 0.001012f, 0.001961f, 0.004172f, 0.000505f, 0.000771f, 0.002035f, 0.002241f, 0.000013f, 0.000018f, 0.000045f, 0.000069f, 0.000482f, 0.000653f, 0.002371f, 0.002178f, +0.000435f, 0.000548f, 0.001317f, 0.002362f, 0.000294f, 0.000450f, 0.001471f, 0.001366f, 0.000014f, 0.000019f, 0.000060f, 0.000077f, 0.000212f, 0.000288f, 0.001294f, 0.001003f, +0.003943f, 0.004271f, 0.009680f, 0.018357f, 0.001753f, 0.002304f, 0.007114f, 0.006982f, 0.000059f, 0.000072f, 0.000211f, 0.000288f, 0.001802f, 0.002105f, 0.008938f, 0.007317f, +0.000032f, 0.000040f, 0.000084f, 0.000196f, 0.000033f, 0.000049f, 0.000141f, 0.000171f, 0.000001f, 0.000002f, 0.000004f, 0.000007f, 0.000021f, 0.000027f, 0.000109f, 0.000110f, +0.852619f, 1.157675f, 0.554463f, 1.016812f, 0.679720f, 1.120162f, 0.730919f, 0.693671f, 0.585080f, 0.896702f, 0.553688f, 0.729945f, 1.146452f, 1.678333f, 1.506306f, 1.192459f, +0.363346f, 0.494061f, 0.293287f, 0.453628f, 0.311801f, 0.514584f, 0.416170f, 0.333115f, 0.490115f, 0.752246f, 0.575710f, 0.640128f, 0.397202f, 0.582322f, 0.647776f, 0.432507f, +3.245825f, 3.795562f, 2.127246f, 3.477233f, 1.832106f, 2.600282f, 1.985474f, 1.679566f, 2.111090f, 2.786498f, 2.013407f, 2.365946f, 3.332582f, 4.201674f, 4.412789f, 3.113807f, +0.351859f, 0.468059f, 0.244141f, 0.492086f, 0.455107f, 0.734793f, 0.522165f, 0.544658f, 0.523365f, 0.785846f, 0.528456f, 0.765713f, 0.507200f, 0.727447f, 0.711035f, 0.618662f, +0.017202f, 0.019113f, 0.008463f, 0.022789f, 0.015635f, 0.021085f, 0.012720f, 0.017725f, 0.012570f, 0.015764f, 0.009000f, 0.017420f, 0.033251f, 0.039834f, 0.033054f, 0.038420f, +0.016241f, 0.018072f, 0.009918f, 0.022525f, 0.015890f, 0.021460f, 0.016046f, 0.018858f, 0.023328f, 0.029300f, 0.020732f, 0.033846f, 0.025523f, 0.030621f, 0.031493f, 0.030873f, +0.067885f, 0.064960f, 0.033661f, 0.080787f, 0.043687f, 0.050739f, 0.035819f, 0.044489f, 0.047015f, 0.050783f, 0.033925f, 0.058533f, 0.100198f, 0.103377f, 0.100380f, 0.104000f, +0.007309f, 0.007956f, 0.003837f, 0.011355f, 0.010778f, 0.014240f, 0.009356f, 0.014329f, 0.011576f, 0.014224f, 0.008844f, 0.018815f, 0.015146f, 0.017776f, 0.016064f, 0.020523f, +0.431695f, 0.495470f, 0.254933f, 0.464397f, 0.417934f, 0.582194f, 0.408111f, 0.384732f, 0.308671f, 0.399887f, 0.265263f, 0.347374f, 0.654706f, 0.810173f, 0.781151f, 0.614272f, +0.195181f, 0.224340f, 0.143067f, 0.219807f, 0.203399f, 0.283751f, 0.246533f, 0.196017f, 0.274329f, 0.355912f, 0.292623f, 0.323197f, 0.240656f, 0.298234f, 0.356402f, 0.236377f, +0.745347f, 0.736746f, 0.443590f, 0.720267f, 0.510905f, 0.612941f, 0.502788f, 0.422487f, 0.505124f, 0.563584f, 0.437476f, 0.510649f, 0.863143f, 0.919884f, 1.037878f, 0.727479f, +0.117095f, 0.131667f, 0.073780f, 0.147719f, 0.183924f, 0.251014f, 0.191630f, 0.198553f, 0.181481f, 0.230342f, 0.166405f, 0.239508f, 0.190378f, 0.230806f, 0.242359f, 0.209468f, +0.000014f, 0.000015f, 0.000029f, 0.000079f, 0.000010f, 0.000014f, 0.000035f, 0.000050f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, 0.000008f, 0.000010f, 0.000035f, 0.000041f, +0.000008f, 0.000009f, 0.000021f, 0.000050f, 0.000007f, 0.000009f, 0.000028f, 0.000034f, 0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000004f, 0.000005f, 0.000021f, 0.000021f, +0.000045f, 0.000043f, 0.000096f, 0.000235f, 0.000024f, 0.000028f, 0.000084f, 0.000106f, 0.000001f, 0.000001f, 0.000002f, 0.000004f, 0.000021f, 0.000021f, 0.000088f, 0.000094f, +0.000000f, 0.000000f, 0.000001f, 0.000002f, 0.000000f, 0.000001f, 0.000002f, 0.000002f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000001f, 0.000001f, +0.128046f, 0.154208f, 0.072236f, 0.171425f, 0.121086f, 0.176992f, 0.112955f, 0.138720f, 0.087927f, 0.119526f, 0.072184f, 0.123145f, 0.171574f, 0.222783f, 0.195560f, 0.200337f, +0.060600f, 0.073087f, 0.042434f, 0.084932f, 0.061685f, 0.090296f, 0.071424f, 0.073981f, 0.081798f, 0.111355f, 0.083353f, 0.119931f, 0.066016f, 0.085843f, 0.093397f, 0.080696f, +0.329256f, 0.341502f, 0.187197f, 0.395973f, 0.220451f, 0.277517f, 0.207252f, 0.226873f, 0.214293f, 0.250882f, 0.177299f, 0.269606f, 0.336879f, 0.376724f, 0.386971f, 0.353352f, +0.033816f, 0.039899f, 0.020355f, 0.053091f, 0.051882f, 0.074298f, 0.051640f, 0.069703f, 0.050333f, 0.067034f, 0.044089f, 0.082668f, 0.048575f, 0.061794f, 0.059075f, 0.066514f, +0.090023f, 0.112131f, 0.040229f, 0.133719f, 0.060751f, 0.091843f, 0.044891f, 0.077221f, 0.079964f, 0.112426f, 0.052001f, 0.124257f, 0.191819f, 0.257605f, 0.173187f, 0.248503f, +0.069310f, 0.086456f, 0.038444f, 0.107777f, 0.050348f, 0.076225f, 0.046178f, 0.066996f, 0.121019f, 0.170394f, 0.097684f, 0.196867f, 0.120067f, 0.161478f, 0.134555f, 0.162838f, +0.488719f, 0.524263f, 0.220098f, 0.652109f, 0.233513f, 0.304033f, 0.173896f, 0.266631f, 0.411453f, 0.498209f, 0.269656f, 0.574342f, 0.795153f, 0.919670f, 0.723517f, 0.925367f, +0.053196f, 0.064916f, 0.025364f, 0.092662f, 0.058244f, 0.086266f, 0.045921f, 0.086819f, 0.102422f, 0.141080f, 0.071066f, 0.186641f, 0.121514f, 0.159878f, 0.117058f, 0.184609f, +1.156272f, 1.487703f, 0.620178f, 1.394640f, 0.831128f, 1.297905f, 0.737132f, 0.857843f, 1.005011f, 1.459581f, 0.784439f, 1.268125f, 1.933013f, 2.681523f, 2.094743f, 2.033475f, +0.426302f, 0.549291f, 0.283811f, 0.538285f, 0.329842f, 0.515834f, 0.363111f, 0.356401f, 0.728359f, 1.059331f, 0.705650f, 0.962122f, 0.579405f, 0.804930f, 0.779352f, 0.638087f, +2.746283f, 3.043131f, 1.484487f, 2.975570f, 1.397666f, 1.879739f, 1.249268f, 1.295884f, 2.262441f, 2.829786f, 1.779675f, 2.564434f, 3.505700f, 4.188328f, 3.828650f, 3.312850f, +0.436176f, 0.549818f, 0.249616f, 0.616951f, 0.508675f, 0.778243f, 0.481362f, 0.615696f, 0.821766f, 1.169246f, 0.684370f, 1.215978f, 0.781711f, 1.062413f, 0.903851f, 0.964355f, +0.000238f, 0.000298f, 0.000457f, 0.001553f, 0.000132f, 0.000200f, 0.000418f, 0.000735f, 0.000005f, 0.000006f, 0.000013f, 0.000031f, 0.000157f, 0.000211f, 0.000607f, 0.000890f, +0.000117f, 0.000146f, 0.000278f, 0.000796f, 0.000069f, 0.000105f, 0.000273f, 0.000406f, 0.000004f, 0.000006f, 0.000015f, 0.000031f, 0.000062f, 0.000084f, 0.000300f, 0.000371f, +0.001083f, 0.001167f, 0.002096f, 0.006349f, 0.000424f, 0.000554f, 0.001356f, 0.002127f, 0.000020f, 0.000024f, 0.000056f, 0.000121f, 0.000544f, 0.000632f, 0.002126f, 0.002780f, +0.000008f, 0.000010f, 0.000017f, 0.000065f, 0.000008f, 0.000011f, 0.000026f, 0.000050f, 0.000000f, 0.000000f, 0.000001f, 0.000003f, 0.000006f, 0.000008f, 0.000025f, 0.000040f, +0.661657f, 0.893281f, 0.339024f, 0.993186f, 0.464555f, 0.761221f, 0.393600f, 0.596723f, 0.552305f, 0.841657f, 0.411821f, 0.867293f, 0.977288f, 1.422553f, 1.011718f, 1.279448f, +0.255349f, 0.345238f, 0.162400f, 0.401260f, 0.192983f, 0.316682f, 0.202952f, 0.259507f, 0.418984f, 0.639415f, 0.387778f, 0.688777f, 0.306630f, 0.446981f, 0.394010f, 0.420251f, +2.340477f, 2.721314f, 1.208583f, 3.155916f, 1.163480f, 1.641920f, 0.993464f, 1.342510f, 1.851704f, 2.430228f, 1.391477f, 2.612053f, 2.639667f, 3.309131f, 2.753980f, 3.104362f, +0.243013f, 0.321430f, 0.132856f, 0.427775f, 0.276824f, 0.444405f, 0.250252f, 0.416991f, 0.439695f, 0.656460f, 0.349813f, 0.809702f, 0.384795f, 0.548751f, 0.425031f, 0.590767f, +0.265164f, 0.321549f, 0.126824f, 0.342577f, 0.280674f, 0.413098f, 0.221977f, 0.310301f, 0.227243f, 0.311047f, 0.158165f, 0.307131f, 0.621947f, 0.813162f, 0.601006f, 0.700808f, +0.282476f, 0.343039f, 0.167696f, 0.382049f, 0.321848f, 0.474386f, 0.315946f, 0.372500f, 0.475857f, 0.652288f, 0.411103f, 0.673292f, 0.538657f, 0.705284f, 0.646090f, 0.635406f, +1.090418f, 1.138793f, 0.525598f, 1.265492f, 0.817205f, 1.035861f, 0.651347f, 0.811587f, 0.885710f, 1.044105f, 0.621275f, 1.075344f, 1.952932f, 2.199022f, 1.901899f, 1.976769f, +0.129859f, 0.154279f, 0.066270f, 0.196745f, 0.223014f, 0.321576f, 0.188188f, 0.289134f, 0.241227f, 0.323490f, 0.179143f, 0.382337f, 0.326530f, 0.418260f, 0.336668f, 0.431474f, +3.542748f, 4.437703f, 2.033758f, 3.716616f, 3.994235f, 6.072541f, 3.791523f, 3.585741f, 2.970902f, 4.200562f, 2.481874f, 3.260507f, 6.519543f, 8.804924f, 7.561637f, 5.965224f, +1.807277f, 2.267103f, 1.287772f, 1.984840f, 2.193306f, 3.339372f, 2.584251f, 2.061285f, 2.979136f, 4.218305f, 3.089135f, 3.422791f, 2.703909f, 3.657036f, 3.892657f, 2.589972f, +6.373824f, 6.876020f, 3.687516f, 6.006625f, 5.087961f, 6.661924f, 4.867418f, 4.103099f, 5.066050f, 6.168897f, 4.265162f, 4.994468f, 8.956366f, 10.417400f, 10.469000f, 7.361467f, +1.107588f, 1.359241f, 0.678409f, 1.362611f, 2.026010f, 3.017718f, 2.051994f, 2.132914f, 2.013267f, 2.788821f, 1.794516f, 2.591103f, 2.185064f, 2.891166f, 2.704067f, 2.344554f, +0.001355f, 0.001650f, 0.002784f, 0.007689f, 0.001175f, 0.001737f, 0.003992f, 0.005706f, 0.000025f, 0.000035f, 0.000075f, 0.000149f, 0.000981f, 0.001288f, 0.004071f, 0.004853f, +0.000919f, 0.001120f, 0.002342f, 0.005456f, 0.000857f, 0.001269f, 0.003615f, 0.004358f, 0.000033f, 0.000046f, 0.000124f, 0.000208f, 0.000541f, 0.000711f, 0.002784f, 0.002800f, +0.004673f, 0.004899f, 0.009673f, 0.023814f, 0.002869f, 0.003651f, 0.009821f, 0.012512f, 0.000082f, 0.000097f, 0.000247f, 0.000438f, 0.002582f, 0.002919f, 0.010801f, 0.011478f, +0.000040f, 0.000048f, 0.000088f, 0.000266f, 0.000056f, 0.000082f, 0.000204f, 0.000321f, 0.000002f, 0.000002f, 0.000005f, 0.000011f, 0.000031f, 0.000040f, 0.000138f, 0.000180f, +1.844220f, 2.423983f, 1.011376f, 2.407777f, 2.030965f, 3.239950f, 1.841719f, 2.269049f, 1.485238f, 2.203505f, 1.185300f, 2.028565f, 2.998505f, 4.249250f, 3.322342f, 3.414370f, +0.984785f, 1.296245f, 0.670343f, 1.345980f, 1.167381f, 1.864993f, 1.313981f, 1.365361f, 1.558987f, 2.316269f, 1.544294f, 2.229096f, 1.301740f, 1.847398f, 1.790273f, 1.551756f, +4.941503f, 5.593647f, 2.731076f, 5.795430f, 3.853001f, 5.293635f, 3.521234f, 3.866905f, 3.771931f, 4.819485f, 3.033686f, 4.627853f, 6.134879f, 7.487426f, 6.850470f, 6.275303f, +0.561366f, 0.722875f, 0.328474f, 0.859481f, 1.003011f, 1.567624f, 0.970468f, 1.314116f, 0.979952f, 1.424370f, 0.834433f, 1.569582f, 0.978470f, 1.358486f, 1.156755f, 1.306591f, +0.178905f, 0.224114f, 0.101467f, 0.211129f, 0.169663f, 0.257961f, 0.159115f, 0.171337f, 0.161529f, 0.228401f, 0.133317f, 0.199418f, 0.387362f, 0.523185f, 0.443875f, 0.398699f, +0.128529f, 0.161242f, 0.090482f, 0.158789f, 0.131204f, 0.199776f, 0.152731f, 0.138709f, 0.228111f, 0.323016f, 0.233689f, 0.294819f, 0.226249f, 0.306023f, 0.321800f, 0.243786f, +0.785856f, 0.847832f, 0.449181f, 0.833090f, 0.527666f, 0.690947f, 0.498722f, 0.478680f, 0.672500f, 0.818956f, 0.559375f, 0.745814f, 1.299251f, 1.511300f, 1.500415f, 1.201280f, +0.096173f, 0.118033f, 0.058199f, 0.133096f, 0.147975f, 0.220423f, 0.148070f, 0.175242f, 0.188216f, 0.260739f, 0.165748f, 0.272495f, 0.223233f, 0.295391f, 0.272933f, 0.269447f, +2.354076f, 3.046165f, 1.602498f, 2.255850f, 2.377889f, 3.734594f, 2.676638f, 1.949930f, 2.079790f, 3.037760f, 2.060292f, 2.084964f, 3.999017f, 5.579262f, 5.500090f, 3.342297f, +0.809872f, 1.049489f, 0.684303f, 0.812455f, 0.880580f, 1.384998f, 1.230331f, 0.755944f, 1.406478f, 2.057290f, 1.729407f, 1.476064f, 1.118509f, 1.562758f, 1.909464f, 0.978644f, +4.524004f, 5.041676f, 3.103664f, 3.894355f, 3.235522f, 4.376382f, 3.670431f, 2.383388f, 3.788291f, 4.765366f, 3.782050f, 3.411498f, 5.868277f, 7.051039f, 8.133958f, 4.405805f, +0.807850f, 1.024151f, 0.586762f, 0.907835f, 1.323951f, 2.037155f, 1.590100f, 1.273169f, 1.547053f, 2.213805f, 1.635192f, 1.818739f, 1.471204f, 2.010928f, 2.158957f, 1.441953f, +0.011046f, 0.013892f, 0.026907f, 0.057244f, 0.008582f, 0.013100f, 0.034567f, 0.038058f, 0.000216f, 0.000306f, 0.000765f, 0.001170f, 0.007380f, 0.010007f, 0.036318f, 0.033354f, +0.005049f, 0.006359f, 0.015266f, 0.027393f, 0.004223f, 0.006455f, 0.021111f, 0.019604f, 0.000194f, 0.000276f, 0.000853f, 0.001100f, 0.002742f, 0.003724f, 0.016753f, 0.012976f, +0.040680f, 0.044063f, 0.099865f, 0.189379f, 0.022378f, 0.029419f, 0.090838f, 0.089146f, 0.000753f, 0.000921f, 0.002691f, 0.003668f, 0.020752f, 0.024235f, 0.102928f, 0.084258f, +0.000358f, 0.000441f, 0.000931f, 0.002177f, 0.000452f, 0.000675f, 0.001941f, 0.002349f, 0.000015f, 0.000021f, 0.000057f, 0.000096f, 0.000257f, 0.000341f, 0.001347f, 0.001360f, +2.268873f, 3.080648f, 1.475462f, 2.705801f, 2.238606f, 3.689167f, 2.407224f, 2.284553f, 1.925061f, 2.950375f, 1.821774f, 2.401704f, 3.405321f, 4.985176f, 4.474201f, 3.541979f, +0.817053f, 1.110992f, 0.659513f, 1.020069f, 0.867759f, 1.432117f, 1.158226f, 0.927078f, 1.362706f, 2.091528f, 1.600691f, 1.779797f, 0.996986f, 1.461639f, 1.625929f, 1.085601f, +6.493803f, 7.593641f, 4.255903f, 6.956772f, 4.536458f, 6.438529f, 4.916211f, 4.158755f, 5.222214f, 6.892972f, 4.980574f, 5.852651f, 7.442210f, 9.383036f, 9.854490f, 6.953648f, +0.758082f, 1.008436f, 0.526003f, 1.060201f, 1.213538f, 1.959316f, 1.392345f, 1.452324f, 1.394201f, 2.093431f, 1.407764f, 2.039797f, 1.219757f, 1.749427f, 1.709957f, 1.487810f, +0.307528f, 0.341696f, 0.151307f, 0.407411f, 0.345941f, 0.466527f, 0.281448f, 0.392183f, 0.277847f, 0.348468f, 0.198936f, 0.385073f, 0.663534f, 0.794895f, 0.659597f, 0.766680f, +0.245359f, 0.273015f, 0.149842f, 0.340286f, 0.297099f, 0.401241f, 0.300022f, 0.352599f, 0.435754f, 0.547302f, 0.387262f, 0.632226f, 0.430400f, 0.516354f, 0.531059f, 0.520615f, +0.912438f, 0.873129f, 0.452433f, 1.085863f, 0.726728f, 0.844045f, 0.595858f, 0.740084f, 0.781351f, 0.843960f, 0.563805f, 0.972763f, 1.503272f, 1.550968f, 1.506009f, 1.560311f, +0.105793f, 0.115164f, 0.055538f, 0.164359f, 0.193084f, 0.255107f, 0.167609f, 0.256696f, 0.207184f, 0.254573f, 0.158277f, 0.336728f, 0.244707f, 0.287206f, 0.259548f, 0.331577f, +5.884053f, 6.753321f, 3.474765f, 6.329786f, 7.050182f, 9.821097f, 6.884466f, 6.490088f, 5.201981f, 6.739235f, 4.470436f, 5.854240f, 9.960779f, 12.326070f, 11.884530f, 9.345616f, +2.248076f, 2.583929f, 1.647841f, 2.531727f, 2.899453f, 4.044869f, 3.514320f, 2.794215f, 3.906796f, 5.068638f, 4.167323f, 4.602734f, 3.093986f, 3.834232f, 4.582076f, 3.038970f, +7.637949f, 7.549820f, 4.545697f, 7.380948f, 6.479639f, 7.773731f, 6.376693f, 5.358263f, 6.400151f, 7.140869f, 5.543016f, 6.470158f, 9.872983f, 10.522010f, 11.871670f, 8.321199f, +1.292198f, 1.453015f, 0.814203f, 1.630152f, 2.512019f, 3.428332f, 2.617263f, 2.711813f, 2.476262f, 3.142958f, 2.270556f, 3.268020f, 2.345067f, 2.843065f, 2.985370f, 2.580217f, +0.001335f, 0.001489f, 0.002821f, 0.007765f, 0.001230f, 0.001665f, 0.004298f, 0.006124f, 0.000026f, 0.000033f, 0.000080f, 0.000159f, 0.000889f, 0.001069f, 0.003794f, 0.004509f, +0.000678f, 0.000757f, 0.001777f, 0.004127f, 0.000672f, 0.000911f, 0.002915f, 0.003503f, 0.000026f, 0.000033f, 0.000099f, 0.000166f, 0.000367f, 0.000442f, 0.001943f, 0.001948f, +0.003320f, 0.003190f, 0.007071f, 0.017352f, 0.002167f, 0.002526f, 0.007629f, 0.009689f, 0.000062f, 0.000067f, 0.000191f, 0.000336f, 0.001688f, 0.001748f, 0.007263f, 0.007693f, +0.000028f, 0.000030f, 0.000062f, 0.000189f, 0.000041f, 0.000055f, 0.000154f, 0.000242f, 0.000001f, 0.000001f, 0.000004f, 0.000008f, 0.000020f, 0.000023f, 0.000090f, 0.000118f, +2.434514f, 2.931919f, 1.373416f, 3.259274f, 2.849262f, 4.164769f, 2.657928f, 3.264215f, 2.066996f, 2.809832f, 1.696921f, 2.894926f, 3.641197f, 4.727967f, 4.150243f, 4.251624f, +0.973624f, 1.174248f, 0.681768f, 1.364562f, 1.226571f, 1.795477f, 1.420230f, 1.471067f, 1.624936f, 2.212104f, 1.655821f, 2.382470f, 1.183896f, 1.539475f, 1.674937f, 1.447164f, +4.706509f, 4.881550f, 2.675861f, 5.660183f, 3.900042f, 4.909610f, 3.666527f, 4.013643f, 3.787456f, 4.434120f, 3.133606f, 4.765061f, 5.375090f, 6.010839f, 6.174332f, 5.637927f, +0.520547f, 0.614187f, 0.313332f, 0.817251f, 0.988440f, 1.415498f, 0.983820f, 1.327955f, 0.957994f, 1.275863f, 0.839150f, 1.573429f, 0.834644f, 1.061774f, 1.015045f, 1.142875f, +0.277518f, 0.345670f, 0.124015f, 0.412220f, 0.231785f, 0.350409f, 0.171273f, 0.294619f, 0.304793f, 0.428525f, 0.198207f, 0.473622f, 0.660047f, 0.886414f, 0.595933f, 0.855096f, +0.180554f, 0.225220f, 0.100149f, 0.280762f, 0.162324f, 0.245754f, 0.148882f, 0.215999f, 0.389796f, 0.548831f, 0.314636f, 0.634101f, 0.349125f, 0.469539f, 0.391254f, 0.473494f, +1.132696f, 1.215077f, 0.510119f, 1.511383f, 0.669820f, 0.872101f, 0.498812f, 0.764816f, 1.179094f, 1.427709f, 0.772749f, 1.645882f, 2.057086f, 2.379213f, 1.871760f, 2.393953f, +0.132772f, 0.162024f, 0.063306f, 0.231276f, 0.179917f, 0.266478f, 0.141850f, 0.268184f, 0.316079f, 0.435380f, 0.219314f, 0.575983f, 0.338532f, 0.445412f, 0.326120f, 0.514312f, +2.717583f, 3.496545f, 1.457603f, 3.277817f, 2.417590f, 3.775356f, 2.144175f, 2.495299f, 2.920568f, 4.241551f, 2.279586f, 3.685179f, 5.071127f, 7.034793f, 5.495415f, 5.334683f, +0.846671f, 1.090937f, 0.563672f, 1.069080f, 0.810767f, 1.267943f, 0.892542f, 0.876050f, 1.788615f, 2.601374f, 1.732849f, 2.362661f, 1.284479f, 1.784443f, 1.727740f, 1.414570f, +4.852736f, 5.377272f, 2.623118f, 5.257891f, 3.056588f, 4.110845f, 2.732053f, 2.833999f, 4.943022f, 6.182568f, 3.888266f, 5.602821f, 6.914539f, 8.260934f, 7.551515f, 6.534168f, +0.829998f, 1.046245f, 0.474994f, 1.173993f, 1.197973f, 1.832831f, 1.133650f, 1.450017f, 1.933467f, 2.751024f, 1.610200f, 2.860978f, 1.660382f, 2.256602f, 1.919811f, 2.048323f, +0.004024f, 0.005032f, 0.007724f, 0.026249f, 0.002754f, 0.004179f, 0.008739f, 0.015370f, 0.000096f, 0.000135f, 0.000267f, 0.000653f, 0.002953f, 0.003982f, 0.011452f, 0.016801f, +0.001666f, 0.002086f, 0.003969f, 0.011375f, 0.001227f, 0.001865f, 0.004833f, 0.007170f, 0.000078f, 0.000110f, 0.000270f, 0.000556f, 0.000994f, 0.001342f, 0.004784f, 0.005919f, +0.013771f, 0.014831f, 0.026637f, 0.080691f, 0.006672f, 0.008721f, 0.021338f, 0.033452f, 0.000310f, 0.000377f, 0.000873f, 0.001901f, 0.007717f, 0.008961f, 0.030157f, 0.039436f, +0.000116f, 0.000142f, 0.000238f, 0.000889f, 0.000129f, 0.000192f, 0.000437f, 0.000844f, 0.000006f, 0.000008f, 0.000018f, 0.000048f, 0.000091f, 0.000121f, 0.000378f, 0.000610f, +2.169210f, 2.928578f, 1.111473f, 3.256115f, 1.884942f, 3.088674f, 1.597041f, 2.421218f, 2.238831f, 3.411753f, 1.669363f, 3.515675f, 3.576337f, 5.205761f, 3.702332f, 4.682079f, +0.707421f, 0.956449f, 0.449915f, 1.111654f, 0.661692f, 1.085822f, 0.695872f, 0.889785f, 1.435210f, 2.190284f, 1.328312f, 2.359371f, 0.948212f, 1.382229f, 1.218422f, 1.299568f, +5.768886f, 6.707586f, 2.978956f, 7.778807f, 3.549266f, 5.008778f, 3.030621f, 4.095411f, 5.643299f, 7.406420f, 4.240698f, 7.960556f, 7.262460f, 9.104343f, 7.576968f, 8.540965f, +0.645047f, 0.853192f, 0.352649f, 1.135470f, 0.909405f, 1.459930f, 0.822111f, 1.369872f, 1.443066f, 2.154481f, 1.148075f, 2.657419f, 1.140085f, 1.625861f, 1.259299f, 1.750347f, +0.062672f, 0.075999f, 0.029975f, 0.080969f, 0.061459f, 0.090456f, 0.048606f, 0.067946f, 0.047865f, 0.065517f, 0.033315f, 0.064693f, 0.236474f, 0.309177f, 0.228512f, 0.266458f, +0.081601f, 0.099096f, 0.048443f, 0.110365f, 0.086136f, 0.126960f, 0.084557f, 0.099692f, 0.122507f, 0.167928f, 0.105836f, 0.173335f, 0.250319f, 0.327753f, 0.300245f, 0.295280f, +0.311366f, 0.325179f, 0.150083f, 0.361358f, 0.216189f, 0.274034f, 0.172312f, 0.214703f, 0.225394f, 0.265702f, 0.158101f, 0.273651f, 0.897090f, 1.010134f, 0.873648f, 0.908040f, +0.044378f, 0.052723f, 0.022647f, 0.067235f, 0.070607f, 0.101813f, 0.059582f, 0.091542f, 0.073467f, 0.098521f, 0.054559f, 0.116443f, 0.179510f, 0.229939f, 0.185084f, 0.237203f, +1.338541f, 1.676678f, 0.768406f, 1.404233f, 1.398136f, 2.125624f, 1.327179f, 1.255148f, 1.000350f, 1.414396f, 0.835686f, 1.097865f, 3.962594f, 5.351655f, 4.595982f, 3.625678f, +0.834580f, 1.046923f, 0.594679f, 0.916577f, 0.938356f, 1.428673f, 1.105612f, 0.881873f, 1.226044f, 1.736016f, 1.271314f, 1.408628f, 2.008661f, 2.716714f, 2.891750f, 1.924021f, +2.909447f, 3.138682f, 1.683233f, 2.741832f, 2.151685f, 2.817309f, 2.058418f, 1.735189f, 2.060877f, 2.509517f, 1.735075f, 2.031757f, 6.576779f, 7.649637f, 7.687529f, 5.405624f, +0.605069f, 0.742545f, 0.370611f, 0.744387f, 1.025399f, 1.527320f, 1.038550f, 1.079505f, 0.980168f, 1.357750f, 0.873668f, 1.261490f, 1.920269f, 2.540803f, 2.376377f, 2.060432f, +0.002017f, 0.002456f, 0.004144f, 0.011445f, 0.001621f, 0.002395f, 0.005505f, 0.007869f, 0.000033f, 0.000046f, 0.000100f, 0.000198f, 0.002349f, 0.003083f, 0.009748f, 0.011622f, +0.001671f, 0.002038f, 0.004261f, 0.009926f, 0.001445f, 0.002139f, 0.006094f, 0.007346f, 0.000054f, 0.000075f, 0.000201f, 0.000337f, 0.001582f, 0.002079f, 0.008149f, 0.008194f, +0.008403f, 0.008811f, 0.017396f, 0.042826f, 0.004780f, 0.006083f, 0.016363f, 0.020846f, 0.000132f, 0.000156f, 0.000396f, 0.000702f, 0.007470f, 0.008445f, 0.031246f, 0.033205f, +0.000086f, 0.000103f, 0.000189f, 0.000573f, 0.000112f, 0.000163f, 0.000407f, 0.000640f, 0.000003f, 0.000004f, 0.000010f, 0.000021f, 0.000108f, 0.000138f, 0.000476f, 0.000624f, +0.568840f, 0.747665f, 0.311953f, 0.742666f, 0.580369f, 0.925849f, 0.526290f, 0.648404f, 0.408268f, 0.605708f, 0.325820f, 0.557620f, 1.487829f, 2.108437f, 1.648514f, 1.694177f, +0.371254f, 0.488671f, 0.252712f, 0.507421f, 0.407724f, 0.651375f, 0.458926f, 0.476872f, 0.523774f, 0.778199f, 0.518838f, 0.748911f, 0.789450f, 1.120369f, 1.085725f, 0.941074f, +1.841430f, 2.084449f, 1.017724f, 2.159642f, 1.330209f, 1.827573f, 1.215670f, 1.335009f, 1.252657f, 1.600550f, 1.007486f, 1.536908f, 3.677675f, 4.488486f, 4.106650f, 3.761855f, +0.250356f, 0.322386f, 0.146492f, 0.383309f, 0.414422f, 0.647708f, 0.400976f, 0.542964f, 0.389484f, 0.566119f, 0.331647f, 0.623834f, 0.701990f, 0.974628f, 0.829899f, 0.937397f, +0.040595f, 0.050853f, 0.023024f, 0.047907f, 0.035667f, 0.054229f, 0.033449f, 0.036018f, 0.032664f, 0.046187f, 0.026959f, 0.040326f, 0.141396f, 0.190975f, 0.162025f, 0.145534f, +0.035645f, 0.044718f, 0.025094f, 0.044037f, 0.033711f, 0.051330f, 0.039242f, 0.035639f, 0.056379f, 0.079836f, 0.057758f, 0.072867f, 0.100939f, 0.136530f, 0.143568f, 0.108763f, +0.215433f, 0.232423f, 0.123137f, 0.228381f, 0.134015f, 0.175484f, 0.126663f, 0.121573f, 0.164298f, 0.200079f, 0.136661f, 0.182210f, 0.572971f, 0.666485f, 0.661685f, 0.529766f, +0.031553f, 0.038725f, 0.019094f, 0.043667f, 0.044978f, 0.066999f, 0.045007f, 0.053266f, 0.055032f, 0.076237f, 0.048462f, 0.079674f, 0.117819f, 0.155903f, 0.144050f, 0.142210f, +0.853891f, 1.104931f, 0.581272f, 0.818261f, 0.799094f, 1.255018f, 0.899489f, 0.655278f, 0.672316f, 0.981991f, 0.666013f, 0.673989f, 2.333492f, 3.255590f, 3.209392f, 1.950285f, +0.359046f, 0.465277f, 0.303377f, 0.360191f, 0.361682f, 0.568863f, 0.505336f, 0.310490f, 0.555698f, 0.812834f, 0.683288f, 0.583192f, 0.797709f, 1.114543f, 1.361811f, 0.697959f, +1.982548f, 2.209407f, 1.360115f, 1.706618f, 1.313620f, 1.776808f, 1.490193f, 0.967654f, 1.479505f, 1.861098f, 1.477067f, 1.332350f, 4.136971f, 4.970785f, 5.734214f, 3.105970f, +0.423690f, 0.537132f, 0.307737f, 0.476129f, 0.643301f, 0.989842f, 0.772621f, 0.618625f, 0.723094f, 1.034734f, 0.764290f, 0.850081f, 1.241256f, 1.696622f, 1.821513f, 1.216577f, +0.015785f, 0.019853f, 0.038451f, 0.081804f, 0.011362f, 0.017344f, 0.045765f, 0.050387f, 0.000275f, 0.000390f, 0.000974f, 0.001490f, 0.016965f, 0.023004f, 0.083491f, 0.076678f, +0.008819f, 0.011108f, 0.026664f, 0.047845f, 0.006833f, 0.010445f, 0.034161f, 0.031722f, 0.000302f, 0.000429f, 0.001328f, 0.001713f, 0.007706f, 0.010464f, 0.047071f, 0.036461f, +0.070234f, 0.076075f, 0.172417f, 0.326961f, 0.035794f, 0.047056f, 0.145297f, 0.142590f, 0.001159f, 0.001417f, 0.004140f, 0.005644f, 0.057637f, 0.067311f, 0.285871f, 0.234018f, +0.000740f, 0.000912f, 0.001924f, 0.004499f, 0.000865f, 0.001293f, 0.003715f, 0.004496f, 0.000028f, 0.000039f, 0.000106f, 0.000178f, 0.000853f, 0.001133f, 0.004479f, 0.004521f, +0.671859f, 0.912241f, 0.436914f, 0.801242f, 0.614144f, 1.012094f, 0.660403f, 0.626749f, 0.508024f, 0.778605f, 0.480767f, 0.633811f, 1.622172f, 2.374758f, 2.131348f, 1.687271f, +0.295713f, 0.402097f, 0.238695f, 0.369190f, 0.290967f, 0.480201f, 0.388363f, 0.310857f, 0.439536f, 0.674615f, 0.516297f, 0.574067f, 0.580470f, 0.851003f, 0.946657f, 0.632064f, +2.323196f, 2.716669f, 1.522574f, 2.488826f, 1.503585f, 2.134017f, 1.629453f, 1.378398f, 1.664998f, 2.197685f, 1.587956f, 1.866000f, 4.283115f, 5.400092f, 5.671422f, 4.001940f, +0.324578f, 0.431769f, 0.225212f, 0.453933f, 0.481372f, 0.777199f, 0.552299f, 0.576091f, 0.531987f, 0.798793f, 0.537162f, 0.778327f, 0.840132f, 1.204953f, 1.177767f, 1.024759f, +0.048993f, 0.054436f, 0.024105f, 0.064906f, 0.051060f, 0.068858f, 0.041541f, 0.057885f, 0.039448f, 0.049475f, 0.028245f, 0.054672f, 0.170053f, 0.203719f, 0.169044f, 0.196488f, +0.047775f, 0.053160f, 0.029177f, 0.066259f, 0.053595f, 0.072382f, 0.054123f, 0.063607f, 0.075616f, 0.094973f, 0.067201f, 0.109710f, 0.134817f, 0.161741f, 0.166347f, 0.163076f, +0.175619f, 0.168053f, 0.087081f, 0.208999f, 0.129588f, 0.150508f, 0.106252f, 0.131970f, 0.134025f, 0.144765f, 0.096710f, 0.166858f, 0.465455f, 0.480223f, 0.466302f, 0.483116f, +0.024369f, 0.026528f, 0.012793f, 0.037860f, 0.041206f, 0.054442f, 0.035769f, 0.054781f, 0.042532f, 0.052260f, 0.032492f, 0.069125f, 0.090678f, 0.106427f, 0.096178f, 0.122869f, +1.498507f, 1.719886f, 0.884927f, 1.612023f, 1.663440f, 2.317217f, 1.624340f, 1.531290f, 1.180656f, 1.529555f, 1.014622f, 1.328694f, 4.080812f, 5.049844f, 4.868948f, 3.828787f, +0.699754f, 0.804294f, 0.512920f, 0.788045f, 0.836132f, 1.166443f, 1.013445f, 0.805784f, 1.083746f, 1.406041f, 1.156016f, 1.276799f, 1.549259f, 1.919924f, 2.294393f, 1.521710f, +2.350054f, 2.322939f, 1.398626f, 2.270980f, 1.847041f, 2.215926f, 1.817696f, 1.527389f, 1.754946f, 1.958053f, 1.519916f, 1.774142f, 4.886756f, 5.208001f, 5.876030f, 4.118681f, +0.475824f, 0.535042f, 0.299813f, 0.600269f, 0.856969f, 1.169567f, 0.892873f, 0.925128f, 0.812618f, 1.031403f, 0.745113f, 1.072444f, 1.389133f, 1.684130f, 1.768426f, 1.528428f, +0.001339f, 0.001494f, 0.002830f, 0.007791f, 0.001143f, 0.001548f, 0.003995f, 0.005692f, 0.000023f, 0.000029f, 0.000072f, 0.000142f, 0.001434f, 0.001725f, 0.006123f, 0.007277f, +0.000831f, 0.000928f, 0.002179f, 0.005061f, 0.000764f, 0.001035f, 0.003312f, 0.003980f, 0.000028f, 0.000036f, 0.000109f, 0.000181f, 0.000723f, 0.000871f, 0.003834f, 0.003843f, +0.004025f, 0.003867f, 0.008571f, 0.021034f, 0.002433f, 0.002837f, 0.008568f, 0.010881f, 0.000066f, 0.000072f, 0.000206f, 0.000363f, 0.003291f, 0.003409f, 0.014162f, 0.015002f, +0.000040f, 0.000044f, 0.000091f, 0.000274f, 0.000056f, 0.000074f, 0.000208f, 0.000325f, 0.000002f, 0.000002f, 0.000005f, 0.000011f, 0.000046f, 0.000054f, 0.000210f, 0.000275f, +0.506151f, 0.609565f, 0.285542f, 0.677624f, 0.548814f, 0.802202f, 0.511960f, 0.628740f, 0.382983f, 0.520620f, 0.314414f, 0.536386f, 1.217821f, 1.581298f, 1.388074f, 1.421982f, +0.247407f, 0.298387f, 0.173243f, 0.346748f, 0.288760f, 0.422693f, 0.334352f, 0.346320f, 0.367984f, 0.500954f, 0.374978f, 0.539535f, 0.483955f, 0.629309f, 0.684684f, 0.591574f, +1.182186f, 1.226153f, 0.672126f, 1.421731f, 0.907572f, 1.142506f, 0.853231f, 0.934007f, 0.847826f, 0.992583f, 0.701461f, 1.066664f, 2.171920f, 2.428808f, 2.494871f, 2.278125f, +0.156482f, 0.184631f, 0.094191f, 0.245674f, 0.275282f, 0.394219f, 0.273996f, 0.369838f, 0.256649f, 0.341806f, 0.224810f, 0.421525f, 0.403623f, 0.513460f, 0.490863f, 0.552679f, +0.041071f, 0.051157f, 0.018353f, 0.061006f, 0.031780f, 0.048045f, 0.023483f, 0.040395f, 0.040200f, 0.056519f, 0.026142f, 0.062467f, 0.157141f, 0.211034f, 0.141877f, 0.203578f, +0.032659f, 0.040738f, 0.018115f, 0.050785f, 0.027202f, 0.041183f, 0.024950f, 0.036197f, 0.062836f, 0.088472f, 0.050720f, 0.102218f, 0.101589f, 0.136628f, 0.113848f, 0.137779f, +0.202524f, 0.217254f, 0.091208f, 0.270233f, 0.110955f, 0.144462f, 0.082628f, 0.126691f, 0.187882f, 0.227497f, 0.123133f, 0.262261f, 0.591680f, 0.684334f, 0.538375f, 0.688574f, +0.028411f, 0.034670f, 0.013546f, 0.049489f, 0.035668f, 0.052828f, 0.028121f, 0.053166f, 0.060276f, 0.083027f, 0.041823f, 0.109840f, 0.116534f, 0.153325f, 0.112261f, 0.177043f, +0.642924f, 0.827210f, 0.344839f, 0.775464f, 0.529888f, 0.827484f, 0.469961f, 0.546920f, 0.615767f, 0.894281f, 0.480624f, 0.776976f, 1.929979f, 2.677314f, 2.091455f, 2.030283f, +0.244818f, 0.315449f, 0.162988f, 0.309129f, 0.217195f, 0.339667f, 0.239101f, 0.234684f, 0.460912f, 0.670354f, 0.446542f, 0.608840f, 0.597485f, 0.830047f, 0.803672f, 0.657998f, +1.387019f, 1.536943f, 0.749745f, 1.502821f, 0.809389f, 1.088558f, 0.723452f, 0.750447f, 1.259101f, 1.574842f, 0.990431f, 1.427167f, 3.179291f, 3.798360f, 3.472171f, 3.004397f, +0.283916f, 0.357887f, 0.162480f, 0.401585f, 0.379650f, 0.580844f, 0.359266f, 0.459526f, 0.589415f, 0.838646f, 0.490868f, 0.872166f, 0.913674f, 1.241762f, 1.056433f, 1.127151f, +0.003751f, 0.004691f, 0.007199f, 0.024466f, 0.002378f, 0.003609f, 0.007546f, 0.013272f, 0.000079f, 0.000112f, 0.000222f, 0.000542f, 0.004428f, 0.005970f, 0.017171f, 0.025191f, +0.001898f, 0.002377f, 0.004521f, 0.012959f, 0.001295f, 0.001968f, 0.005101f, 0.007567f, 0.000079f, 0.000112f, 0.000274f, 0.000564f, 0.001821f, 0.002459f, 0.008767f, 0.010848f, +0.015507f, 0.016701f, 0.029994f, 0.090863f, 0.006960f, 0.009098f, 0.022261f, 0.034899f, 0.000311f, 0.000378f, 0.000876f, 0.001908f, 0.013979f, 0.016232f, 0.054628f, 0.071438f, +0.000157f, 0.000192f, 0.000321f, 0.001198f, 0.000161f, 0.000239f, 0.000545f, 0.001054f, 0.000007f, 0.000010f, 0.000021f, 0.000058f, 0.000198f, 0.000262f, 0.000820f, 0.001322f, +0.418952f, 0.565613f, 0.214665f, 0.628872f, 0.337276f, 0.552662f, 0.285761f, 0.433233f, 0.385351f, 0.587236f, 0.287333f, 0.605123f, 1.111149f, 1.617403f, 1.150295f, 1.454697f, +0.166991f, 0.225776f, 0.106205f, 0.262412f, 0.144709f, 0.237464f, 0.152184f, 0.194592f, 0.301928f, 0.460774f, 0.279439f, 0.496345f, 0.360074f, 0.524887f, 0.462683f, 0.493498f, +1.346089f, 1.565121f, 0.695098f, 1.815076f, 0.767264f, 1.082775f, 0.655146f, 0.885327f, 1.173511f, 1.540148f, 0.881843f, 1.655379f, 2.726068f, 3.417444f, 2.844123f, 3.205973f, +0.180131f, 0.238257f, 0.098478f, 0.317084f, 0.235278f, 0.377707f, 0.212693f, 0.354407f, 0.359134f, 0.536183f, 0.285720f, 0.661349f, 0.512161f, 0.730387f, 0.565716f, 0.786309f, +0.413275f, 0.501156f, 0.197663f, 0.533929f, 0.325699f, 0.479367f, 0.257586f, 0.360079f, 0.273145f, 0.373876f, 0.190113f, 0.369170f, 0.943875f, 1.234065f, 0.912095f, 1.063556f, +0.646110f, 0.784636f, 0.383573f, 0.873864f, 0.548106f, 0.807876f, 0.538055f, 0.634365f, 0.839418f, 1.150644f, 0.725190f, 1.187694f, 1.199698f, 1.570812f, 1.438973f, 1.415178f, +2.034871f, 2.125147f, 0.980839f, 2.361585f, 1.135439f, 1.439245f, 0.904993f, 1.127634f, 1.274713f, 1.502677f, 0.894140f, 1.547635f, 3.548678f, 3.995850f, 3.455946f, 3.591992f, +0.215703f, 0.256266f, 0.110077f, 0.326804f, 0.275806f, 0.397700f, 0.232737f, 0.357579f, 0.309020f, 0.414401f, 0.229488f, 0.489786f, 0.528131f, 0.676496f, 0.544529f, 0.697869f, +5.277797f, 6.611053f, 3.029784f, 5.536817f, 4.430318f, 6.735530f, 4.205474f, 3.977226f, 3.413327f, 4.826105f, 2.851472f, 3.746059f, 9.457252f, 12.772430f, 10.968910f, 8.653157f, +3.951263f, 4.956586f, 2.815467f, 4.339471f, 3.570257f, 5.435820f, 4.206636f, 3.355353f, 5.023179f, 7.112567f, 5.208651f, 5.771236f, 5.756240f, 7.785313f, 8.286915f, 5.513684f, +11.369230f, 12.265020f, 6.577563f, 10.714240f, 6.757152f, 8.847479f, 6.464256f, 5.449189f, 6.969114f, 8.486245f, 5.867372f, 6.870641f, 15.556010f, 18.093640f, 18.183260f, 12.785890f, +1.758524f, 2.158075f, 1.077115f, 2.163427f, 2.394973f, 3.567285f, 2.425690f, 2.521346f, 2.465181f, 3.414822f, 2.197326f, 3.172722f, 3.378080f, 4.469704f, 4.180451f, 3.624650f, +0.004191f, 0.005102f, 0.008609f, 0.023777f, 0.002706f, 0.003998f, 0.009191f, 0.013137f, 0.000060f, 0.000082f, 0.000179f, 0.000356f, 0.002953f, 0.003877f, 0.012257f, 0.014614f, +0.004169f, 0.005083f, 0.010630f, 0.024760f, 0.002897f, 0.004287f, 0.012215f, 0.014725f, 0.000117f, 0.000161f, 0.000435f, 0.000728f, 0.002388f, 0.003140f, 0.012304f, 0.012372f, +0.017301f, 0.018141f, 0.035817f, 0.088174f, 0.007909f, 0.010065f, 0.027074f, 0.034492f, 0.000234f, 0.000278f, 0.000706f, 0.001250f, 0.009310f, 0.010525f, 0.038939f, 0.041381f, +0.000132f, 0.000157f, 0.000289f, 0.000878f, 0.000138f, 0.000200f, 0.000501f, 0.000787f, 0.000004f, 0.000006f, 0.000013f, 0.000028f, 0.000100f, 0.000128f, 0.000442f, 0.000579f, +2.653185f, 3.487261f, 1.455015f, 3.463946f, 2.175435f, 3.470419f, 1.972727f, 2.430455f, 1.647889f, 2.444814f, 1.315104f, 2.250716f, 4.200442f, 5.952542f, 4.654087f, 4.783004f, +2.079195f, 2.736787f, 1.415307f, 2.841792f, 1.835081f, 2.931702f, 2.065530f, 2.146298f, 2.538477f, 3.771550f, 2.514553f, 3.629606f, 2.676168f, 3.797953f, 3.680513f, 3.190161f, +8.512017f, 9.635371f, 4.704432f, 9.982953f, 4.941529f, 6.789163f, 4.516033f, 4.959361f, 5.010882f, 6.402522f, 4.030149f, 6.147945f, 10.289990f, 12.558600f, 11.490240f, 10.525520f, +0.860714f, 1.108347f, 0.503632f, 1.317798f, 1.145005f, 1.789548f, 1.107855f, 1.500152f, 1.158762f, 1.684273f, 0.986691f, 1.855982f, 1.460816f, 2.028165f, 1.726989f, 1.950687f, +0.410004f, 0.513613f, 0.232537f, 0.483853f, 0.289496f, 0.440158f, 0.271498f, 0.292352f, 0.285491f, 0.403684f, 0.235629f, 0.352459f, 0.864407f, 1.167500f, 0.990517f, 0.889706f, +0.432281f, 0.542304f, 0.304316f, 0.534054f, 0.328550f, 0.500262f, 0.382456f, 0.347343f, 0.591683f, 0.837851f, 0.606150f, 0.764711f, 0.740947f, 1.002201f, 1.053868f, 0.798378f, +2.156393f, 2.326456f, 1.232555f, 2.286002f, 1.078034f, 1.411621f, 1.018901f, 0.977955f, 1.423162f, 1.733095f, 1.183764f, 1.578310f, 3.471470f, 4.038043f, 4.008958f, 3.209699f, +0.234897f, 0.288288f, 0.142147f, 0.325080f, 0.269093f, 0.400838f, 0.269266f, 0.318678f, 0.354534f, 0.491143f, 0.312212f, 0.513287f, 0.530906f, 0.702517f, 0.649106f, 0.640815f, +5.156721f, 6.672776f, 3.510351f, 4.941551f, 3.878231f, 6.090955f, 4.365476f, 3.180249f, 3.513579f, 5.131965f, 3.480638f, 3.522319f, 8.529858f, 11.900500f, 11.731630f, 7.129081f, +2.603565f, 3.373883f, 2.199888f, 2.611869f, 2.107704f, 3.315050f, 2.944848f, 1.809383f, 3.487081f, 5.100639f, 4.287720f, 3.659605f, 3.501282f, 4.891919f, 5.977217f, 3.063460f, +11.865740f, 13.223510f, 8.140413f, 10.214270f, 6.318369f, 8.546254f, 7.167665f, 4.654310f, 7.662881f, 9.639290f, 7.650257f, 6.900712f, 14.987110f, 18.007780f, 20.773470f, 11.252070f, +1.886001f, 2.390975f, 1.369851f, 2.119424f, 2.301293f, 3.540984f, 2.763913f, 2.213022f, 2.785436f, 3.985909f, 2.944129f, 3.274602f, 3.344409f, 4.571335f, 4.907840f, 3.277914f, +0.050226f, 0.063169f, 0.122345f, 0.260287f, 0.029054f, 0.044350f, 0.117024f, 0.128843f, 0.000757f, 0.001074f, 0.002682f, 0.004102f, 0.032673f, 0.044305f, 0.160800f, 0.147678f, +0.033693f, 0.042437f, 0.101872f, 0.182794f, 0.020980f, 0.032071f, 0.104888f, 0.097398f, 0.000998f, 0.001419f, 0.004390f, 0.005663f, 0.017819f, 0.024198f, 0.108854f, 0.084317f, +0.221477f, 0.239895f, 0.543701f, 1.031040f, 0.090709f, 0.119251f, 0.368214f, 0.361354f, 0.003162f, 0.003867f, 0.011298f, 0.015402f, 0.110013f, 0.128478f, 0.545651f, 0.446676f, +0.001736f, 0.002139f, 0.004512f, 0.010551f, 0.001629f, 0.002437f, 0.007003f, 0.008474f, 0.000057f, 0.000079f, 0.000214f, 0.000360f, 0.001211f, 0.001609f, 0.006358f, 0.006418f, +4.799606f, 6.516847f, 3.121213f, 5.723891f, 3.525835f, 5.810489f, 3.791410f, 3.598202f, 3.140631f, 4.813376f, 2.972124f, 3.918248f, 7.014374f, 10.268600f, 9.216083f, 7.295866f, +2.536558f, 3.449097f, 2.047472f, 3.166824f, 2.005776f, 3.310259f, 2.677174f, 2.142888f, 3.262674f, 5.007665f, 3.832472f, 4.261300f, 3.013830f, 4.418450f, 4.915091f, 3.281709f, +16.448000f, 19.233760f, 10.779680f, 17.620650f, 8.554996f, 12.141980f, 9.271147f, 7.842713f, 10.201070f, 13.464730f, 9.729053f, 11.432570f, 18.354870f, 23.141570f, 24.304330f, 17.149930f, +1.709107f, 2.273534f, 1.185883f, 2.390240f, 2.037022f, 3.288870f, 2.337163f, 2.437843f, 2.424130f, 3.639896f, 2.447712f, 3.546642f, 2.677700f, 3.840472f, 3.753824f, 3.266151f, +0.508870f, 0.565409f, 0.250370f, 0.674148f, 0.426200f, 0.574762f, 0.346745f, 0.483170f, 0.354572f, 0.444694f, 0.253871f, 0.491409f, 1.069107f, 1.280759f, 1.062763f, 1.235299f, +0.595832f, 0.662992f, 0.363877f, 0.826353f, 0.537170f, 0.725463f, 0.542455f, 0.637517f, 0.816093f, 1.025003f, 0.725275f, 1.184052f, 1.017721f, 1.220967f, 1.255740f, 1.231043f, +1.807775f, 1.729893f, 0.896385f, 2.151374f, 1.072016f, 1.245074f, 0.878968f, 1.091719f, 1.193889f, 1.289555f, 0.861483f, 1.486363f, 2.900106f, 2.992122f, 2.905387f, 3.010146f, +0.186569f, 0.203093f, 0.097942f, 0.289851f, 0.253522f, 0.334958f, 0.220073f, 0.337046f, 0.281782f, 0.346234f, 0.215266f, 0.457970f, 0.420207f, 0.493185f, 0.445690f, 0.569377f, +9.306487f, 10.681360f, 5.495848f, 10.011480f, 8.302299f, 11.565330f, 8.107152f, 7.642733f, 6.345342f, 8.220473f, 5.453008f, 7.140963f, 15.340440f, 18.983190f, 18.303180f, 14.393040f, +5.218182f, 5.997755f, 3.824930f, 5.876586f, 5.010870f, 6.990392f, 6.073491f, 4.828995f, 6.993684f, 9.073535f, 7.460062f, 8.239505f, 6.992975f, 8.666067f, 10.356330f, 6.868628f, +14.464540f, 14.297640f, 8.608516f, 13.977840f, 9.136240f, 10.960900f, 8.991087f, 7.555109f, 9.347493f, 10.429320f, 8.095637f, 9.449739f, 18.205880f, 19.402690f, 21.891470f, 15.344370f, +2.178193f, 2.449274f, 1.372460f, 2.747863f, 3.152673f, 4.302678f, 3.284758f, 3.403421f, 3.219146f, 4.085853f, 2.951728f, 4.248434f, 3.849087f, 4.666479f, 4.900053f, 4.235052f, +0.004382f, 0.004888f, 0.009260f, 0.025494f, 0.003007f, 0.004071f, 0.010506f, 0.014969f, 0.000066f, 0.000083f, 0.000203f, 0.000402f, 0.002841f, 0.003417f, 0.012128f, 0.014414f, +0.003265f, 0.003647f, 0.008563f, 0.019883f, 0.002411f, 0.003269f, 0.010458f, 0.012567f, 0.000097f, 0.000122f, 0.000369f, 0.000616f, 0.001721f, 0.002072f, 0.009118f, 0.009139f, +0.013052f, 0.012540f, 0.027796f, 0.068211f, 0.006341f, 0.007394f, 0.022330f, 0.028357f, 0.000186f, 0.000202f, 0.000578f, 0.001020f, 0.006461f, 0.006692f, 0.027799f, 0.029448f, +0.000097f, 0.000106f, 0.000219f, 0.000661f, 0.000108f, 0.000143f, 0.000402f, 0.000630f, 0.000003f, 0.000004f, 0.000010f, 0.000023f, 0.000067f, 0.000079f, 0.000307f, 0.000401f, +3.718466f, 4.478201f, 2.097749f, 4.978200f, 3.240207f, 4.736215f, 3.022620f, 3.712096f, 2.434828f, 3.309855f, 1.998896f, 3.410092f, 5.415407f, 7.031718f, 6.172492f, 6.323272f, +2.182437f, 2.632148f, 1.528224f, 3.058748f, 2.047067f, 2.996535f, 2.370272f, 2.455115f, 2.809078f, 3.824135f, 2.862471f, 4.118651f, 2.584043f, 3.360151f, 3.655817f, 3.158667f, +8.607343f, 8.927460f, 4.893659f, 10.351440f, 5.310414f, 6.685071f, 4.992453f, 5.465095f, 5.341889f, 6.253954f, 4.419688f, 6.720718f, 9.571750f, 10.703870f, 10.995010f, 10.039800f, +0.847364f, 0.999792f, 0.510051f, 1.330347f, 1.197978f, 1.715567f, 1.192378f, 1.609465f, 1.202678f, 1.601734f, 1.053479f, 1.975303f, 1.322959f, 1.682972f, 1.608905f, 1.811522f, +0.845225f, 1.052795f, 0.377708f, 1.255483f, 0.525600f, 0.794595f, 0.388383f, 0.668085f, 0.715918f, 1.006551f, 0.465563f, 1.112476f, 1.957457f, 2.628779f, 1.767319f, 2.535902f, +0.807025f, 1.006670f, 0.447637f, 1.254928f, 0.540196f, 0.817844f, 0.495463f, 0.718821f, 1.343679f, 1.891892f, 1.084591f, 2.185829f, 1.519488f, 2.043563f, 1.702847f, 2.060777f, +4.130608f, 4.431027f, 1.860253f, 5.511567f, 1.818642f, 2.367863f, 1.354336f, 2.076569f, 3.316088f, 4.015294f, 2.173283f, 4.628886f, 7.304466f, 8.448302f, 6.646395f, 8.500643f, +0.430970f, 0.525918f, 0.205487f, 0.750708f, 0.434810f, 0.644006f, 0.342814f, 0.648130f, 0.791248f, 1.089896f, 0.549013f, 1.441871f, 1.069980f, 1.407789f, 1.030749f, 1.625557f, +7.911370f, 10.179070f, 4.243343f, 9.542314f, 5.240112f, 8.183059f, 4.647485f, 5.408544f, 6.557124f, 9.522932f, 5.118020f, 8.273793f, 14.375030f, 19.941400f, 15.577760f, 15.122130f, +3.617284f, 4.660877f, 2.408210f, 4.567494f, 2.579008f, 4.033264f, 2.839132f, 2.786672f, 5.893344f, 8.571322f, 5.709601f, 7.784782f, 5.343556f, 7.423452f, 7.187564f, 5.884748f, +16.915090f, 18.743450f, 9.143352f, 18.327330f, 7.932559f, 10.668600f, 7.090315f, 7.354888f, 13.287920f, 16.620090f, 10.452510f, 15.061610f, 23.468550f, 28.038330f, 25.630500f, 22.177540f, +2.575157f, 3.246088f, 1.473719f, 3.642438f, 2.767341f, 4.233875f, 2.618753f, 3.349569f, 4.626373f, 6.582611f, 3.852865f, 6.845708f, 5.016147f, 6.817377f, 5.799903f, 6.188149f, +0.024318f, 0.030411f, 0.046673f, 0.158622f, 0.012389f, 0.018804f, 0.039317f, 0.069151f, 0.000446f, 0.000629f, 0.001245f, 0.003041f, 0.017377f, 0.023430f, 0.067383f, 0.098859f, +0.014773f, 0.018501f, 0.035194f, 0.100880f, 0.008101f, 0.012314f, 0.031913f, 0.047340f, 0.000532f, 0.000752f, 0.001845f, 0.003802f, 0.008583f, 0.011589f, 0.041309f, 0.051115f, +0.099639f, 0.107311f, 0.192725f, 0.583830f, 0.035940f, 0.046980f, 0.114950f, 0.180208f, 0.001731f, 0.002104f, 0.004871f, 0.010609f, 0.054367f, 0.063130f, 0.212462f, 0.277839f, +0.000748f, 0.000917f, 0.001532f, 0.005723f, 0.000618f, 0.000920f, 0.002094f, 0.004048f, 0.000030f, 0.000041f, 0.000089f, 0.000238f, 0.000573f, 0.000757f, 0.002371f, 0.003824f, +6.098357f, 8.233190f, 3.124713f, 9.154001f, 3.945464f, 6.465054f, 3.342846f, 5.067970f, 4.854111f, 7.397176f, 3.619422f, 7.622495f, 9.790056f, 14.250530f, 10.134960f, 12.816970f, +2.918692f, 3.946138f, 1.856269f, 4.586483f, 2.032613f, 3.335475f, 2.137610f, 2.733280f, 4.566702f, 6.969276f, 4.226565f, 7.507295f, 3.809353f, 5.552975f, 4.894896f, 5.220894f, +19.418780f, 22.578560f, 10.027530f, 26.184420f, 8.895234f, 12.553090f, 7.595397f, 10.263990f, 14.650080f, 19.227170f, 11.008910f, 20.665710f, 23.803950f, 29.841040f, 24.834810f, 27.994470f, +1.932682f, 2.556324f, 1.056604f, 3.402083f, 2.028690f, 3.256792f, 1.833954f, 3.055892f, 3.334514f, 4.978391f, 2.652873f, 6.140537f, 3.326151f, 4.743382f, 3.673952f, 5.106563f, +}; + +static const float acceptor_me2x3acc4[16384] = { +0.034748f, 0.053536f, 0.000903f, 0.143367f, 0.038723f, 0.115762f, 0.066831f, 0.172192f, 0.000570f, 0.001446f, 0.000683f, 0.004695f, 0.103045f, 0.319651f, 0.153655f, 0.431369f, +0.031342f, 0.057415f, 0.000667f, 0.141258f, 0.049473f, 0.175853f, 0.069935f, 0.240318f, 0.019452f, 0.058702f, 0.019117f, 0.175139f, 0.108159f, 0.398928f, 0.132097f, 0.494599f, +0.001270f, 0.002731f, 0.000050f, 0.006952f, 0.001982f, 0.008271f, 0.005189f, 0.011695f, 0.000587f, 0.002081f, 0.001069f, 0.006425f, 0.005728f, 0.024805f, 0.012958f, 0.031821f, +0.055800f, 0.093433f, 0.001475f, 0.254089f, 0.136888f, 0.444747f, 0.240457f, 0.671810f, 0.045152f, 0.124550f, 0.055143f, 0.410743f, 0.239982f, 0.809049f, 0.364211f, 1.108744f, +0.031725f, 0.047173f, 0.000821f, 0.104089f, 0.043776f, 0.126302f, 0.075239f, 0.154798f, 0.000747f, 0.001829f, 0.000892f, 0.004895f, 0.122122f, 0.365610f, 0.181345f, 0.406533f, +0.035930f, 0.063523f, 0.000761f, 0.128773f, 0.070226f, 0.240910f, 0.098859f, 0.271266f, 0.032022f, 0.093267f, 0.031341f, 0.229277f, 0.160948f, 0.572919f, 0.195754f, 0.585271f, +0.017237f, 0.035780f, 0.000676f, 0.075049f, 0.033314f, 0.134182f, 0.086870f, 0.156333f, 0.011451f, 0.039159f, 0.020760f, 0.099604f, 0.100942f, 0.421881f, 0.227413f, 0.445931f, +0.061330f, 0.099109f, 0.001615f, 0.222078f, 0.186295f, 0.584149f, 0.325885f, 0.727045f, 0.071266f, 0.189723f, 0.086673f, 0.515528f, 0.342379f, 1.113986f, 0.517458f, 1.257883f, +0.004423f, 0.007235f, 0.000101f, 0.018111f, 0.004154f, 0.013185f, 0.006277f, 0.018334f, 0.000114f, 0.000307f, 0.000120f, 0.000931f, 0.015410f, 0.050752f, 0.020118f, 0.064025f, +0.005178f, 0.010071f, 0.000096f, 0.023162f, 0.006889f, 0.025998f, 0.008526f, 0.033212f, 0.005044f, 0.016160f, 0.004340f, 0.045071f, 0.020994f, 0.082213f, 0.022449f, 0.095285f, +0.002654f, 0.006061f, 0.000092f, 0.014423f, 0.003492f, 0.015471f, 0.008005f, 0.020450f, 0.001927f, 0.007249f, 0.003071f, 0.020920f, 0.014068f, 0.064682f, 0.027865f, 0.077567f, +0.011676f, 0.020757f, 0.000270f, 0.052768f, 0.024141f, 0.083276f, 0.037128f, 0.117591f, 0.014828f, 0.043427f, 0.015855f, 0.133877f, 0.058997f, 0.211174f, 0.078393f, 0.270532f, +0.065421f, 0.088787f, 0.001523f, 0.230012f, 0.084845f, 0.223429f, 0.131239f, 0.321508f, 0.001515f, 0.003387f, 0.001629f, 0.010641f, 0.255104f, 0.697073f, 0.340922f, 0.910024f, +0.077558f, 0.125152f, 0.001479f, 0.297870f, 0.142476f, 0.446105f, 0.180505f, 0.589759f, 0.068001f, 0.180770f, 0.059896f, 0.521745f, 0.351936f, 1.143423f, 0.385224f, 1.371411f, +0.051527f, 0.097624f, 0.001820f, 0.240414f, 0.093602f, 0.344106f, 0.219662f, 0.470700f, 0.033676f, 0.105110f, 0.054945f, 0.313901f, 0.305678f, 1.166054f, 0.619776f, 1.447082f, +0.105237f, 0.155220f, 0.002494f, 0.408352f, 0.300451f, 0.859873f, 0.473005f, 1.256520f, 0.120302f, 0.292315f, 0.131674f, 0.932566f, 0.595132f, 1.767350f, 0.809483f, 2.343045f, +0.043054f, 0.125193f, 0.001437f, 0.200584f, 0.037671f, 0.212547f, 0.083547f, 0.189157f, 0.000584f, 0.002795f, 0.000900f, 0.005431f, 0.094915f, 0.555685f, 0.181868f, 0.448661f, +0.040118f, 0.138704f, 0.001097f, 0.204170f, 0.049721f, 0.333559f, 0.090318f, 0.272725f, 0.020586f, 0.117250f, 0.025998f, 0.209295f, 0.102920f, 0.716435f, 0.161523f, 0.531438f, +0.001965f, 0.007975f, 0.000099f, 0.012147f, 0.002408f, 0.018966f, 0.008102f, 0.016045f, 0.000751f, 0.005025f, 0.001758f, 0.009282f, 0.006589f, 0.053856f, 0.019156f, 0.041335f, +0.062594f, 0.197808f, 0.002127f, 0.321844f, 0.120565f, 0.739290f, 0.272143f, 0.668136f, 0.041876f, 0.218012f, 0.065718f, 0.430155f, 0.200121f, 1.273318f, 0.390276f, 1.044022f, +0.051858f, 0.145530f, 0.001724f, 0.192122f, 0.056183f, 0.305933f, 0.124084f, 0.224335f, 0.001009f, 0.004666f, 0.001550f, 0.007470f, 0.148396f, 0.838482f, 0.283164f, 0.557812f, +0.060673f, 0.202451f, 0.001652f, 0.245544f, 0.093110f, 0.602838f, 0.168431f, 0.406123f, 0.044708f, 0.245758f, 0.056227f, 0.361459f, 0.202043f, 1.357374f, 0.315771f, 0.829620f, +0.035189f, 0.137859f, 0.001775f, 0.173005f, 0.053399f, 0.405931f, 0.178929f, 0.282959f, 0.019328f, 0.124745f, 0.045027f, 0.189840f, 0.153194f, 1.208391f, 0.443497f, 0.764190f, +0.090760f, 0.276809f, 0.003071f, 0.371096f, 0.216460f, 1.280997f, 0.486572f, 0.953900f, 0.087195f, 0.438109f, 0.136270f, 0.712247f, 0.376656f, 2.312943f, 0.731505f, 1.562580f, +0.045130f, 0.139328f, 0.001319f, 0.208679f, 0.033281f, 0.199370f, 0.064624f, 0.165862f, 0.000960f, 0.004882f, 0.001296f, 0.008868f, 0.116891f, 0.726591f, 0.196100f, 0.548404f, +0.054583f, 0.200362f, 0.001307f, 0.275703f, 0.057016f, 0.406110f, 0.090679f, 0.310397f, 0.043957f, 0.265821f, 0.048604f, 0.443564f, 0.164518f, 1.215922f, 0.226060f, 0.843146f, +0.033823f, 0.145773f, 0.001500f, 0.207546f, 0.034937f, 0.292173f, 0.102923f, 0.231062f, 0.020304f, 0.144162f, 0.041586f, 0.248904f, 0.133278f, 1.156537f, 0.339224f, 0.829794f, +0.107861f, 0.361900f, 0.003208f, 0.550441f, 0.175103f, 1.139995f, 0.346055f, 0.963106f, 0.113253f, 0.626001f, 0.155610f, 1.154623f, 0.405160f, 2.737056f, 0.691798f, 2.097866f, +0.081419f, 0.208546f, 0.002436f, 0.323237f, 0.082907f, 0.412052f, 0.164791f, 0.354749f, 0.001559f, 0.006578f, 0.002154f, 0.012364f, 0.236017f, 1.217171f, 0.405308f, 0.950699f, +0.099715f, 0.303683f, 0.002443f, 0.432441f, 0.143826f, 0.849924f, 0.234148f, 0.672256f, 0.072285f, 0.362665f, 0.081815f, 0.626259f, 0.336371f, 2.062579f, 0.473123f, 1.480089f, +0.080091f, 0.286386f, 0.003635f, 0.421960f, 0.114234f, 0.792587f, 0.344482f, 0.648657f, 0.043278f, 0.254939f, 0.090735f, 0.455512f, 0.353209f, 2.542926f, 0.920252f, 1.888100f, +0.118573f, 0.330074f, 0.003610f, 0.519534f, 0.265796f, 1.435677f, 0.537707f, 1.255187f, 0.112069f, 0.513936f, 0.157622f, 0.980969f, 0.498480f, 2.793865f, 0.871260f, 2.216052f, +0.004387f, 0.011568f, 0.000139f, 0.027394f, 0.005143f, 0.026314f, 0.010857f, 0.034613f, 0.000060f, 0.000259f, 0.000087f, 0.000742f, 0.010342f, 0.054906f, 0.018863f, 0.065523f, +0.004439f, 0.013917f, 0.000116f, 0.030279f, 0.007371f, 0.044842f, 0.012745f, 0.054190f, 0.002280f, 0.011776f, 0.002741f, 0.031068f, 0.012177f, 0.076868f, 0.018191f, 0.084276f, +0.000190f, 0.000701f, 0.000009f, 0.001577f, 0.000313f, 0.002233f, 0.001001f, 0.002792f, 0.000073f, 0.000442f, 0.000162f, 0.001207f, 0.000683f, 0.005060f, 0.001889f, 0.005740f, +0.007728f, 0.022146f, 0.000250f, 0.053258f, 0.019943f, 0.110897f, 0.042851f, 0.148134f, 0.005175f, 0.024431f, 0.007731f, 0.071249f, 0.026419f, 0.152441f, 0.049045f, 0.184739f, +0.006234f, 0.015866f, 0.000197f, 0.030958f, 0.009050f, 0.044688f, 0.019026f, 0.048434f, 0.000121f, 0.000509f, 0.000178f, 0.001205f, 0.019077f, 0.097751f, 0.034651f, 0.096116f, +0.007921f, 0.023967f, 0.000205f, 0.042964f, 0.016286f, 0.095620f, 0.028043f, 0.095211f, 0.005842f, 0.029122f, 0.006994f, 0.063307f, 0.028204f, 0.171832f, 0.041960f, 0.155227f, +0.004023f, 0.014291f, 0.000193f, 0.026508f, 0.008179f, 0.056383f, 0.026087f, 0.058090f, 0.002212f, 0.012944f, 0.004904f, 0.029116f, 0.018727f, 0.133955f, 0.051606f, 0.125209f, +0.013220f, 0.036565f, 0.000426f, 0.072454f, 0.042246f, 0.226720f, 0.090395f, 0.249532f, 0.012713f, 0.057928f, 0.018913f, 0.139193f, 0.058669f, 0.326712f, 0.108461f, 0.326231f, +0.005292f, 0.014816f, 0.000147f, 0.032799f, 0.005229f, 0.028406f, 0.009665f, 0.034929f, 0.000113f, 0.000520f, 0.000145f, 0.001395f, 0.014657f, 0.082623f, 0.023407f, 0.092171f, +0.006950f, 0.023136f, 0.000158f, 0.047055f, 0.009727f, 0.062831f, 0.014726f, 0.070979f, 0.005603f, 0.030724f, 0.005897f, 0.075776f, 0.022401f, 0.150140f, 0.029300f, 0.153878f, +0.003771f, 0.014740f, 0.000159f, 0.031019f, 0.005219f, 0.039584f, 0.014637f, 0.046269f, 0.002266f, 0.014591f, 0.004418f, 0.037235f, 0.015891f, 0.125054f, 0.038502f, 0.132614f, +0.015325f, 0.046630f, 0.000434f, 0.104826f, 0.033334f, 0.196802f, 0.062709f, 0.245744f, 0.016106f, 0.080735f, 0.021066f, 0.220096f, 0.061557f, 0.377111f, 0.100051f, 0.427214f, +0.012751f, 0.029619f, 0.000363f, 0.067854f, 0.017397f, 0.078410f, 0.032916f, 0.099776f, 0.000244f, 0.000935f, 0.000321f, 0.002598f, 0.039526f, 0.184855f, 0.064613f, 0.213406f, +0.016958f, 0.046835f, 0.000396f, 0.098573f, 0.032772f, 0.175623f, 0.050786f, 0.205314f, 0.012305f, 0.055985f, 0.013257f, 0.142889f, 0.061170f, 0.340149f, 0.081901f, 0.360770f, +0.011927f, 0.038677f, 0.000515f, 0.084227f, 0.022793f, 0.143415f, 0.065429f, 0.173479f, 0.006451f, 0.034462f, 0.012875f, 0.091011f, 0.056247f, 0.367231f, 0.139499f, 0.403008f, +0.022501f, 0.056801f, 0.000652f, 0.132142f, 0.067578f, 0.331019f, 0.130136f, 0.427748f, 0.021287f, 0.088525f, 0.028499f, 0.249745f, 0.101150f, 0.514115f, 0.168290f, 0.602723f, +0.057900f, 0.090182f, 0.001353f, 0.218290f, 0.056994f, 0.172248f, 0.088483f, 0.231589f, 0.000790f, 0.002027f, 0.000853f, 0.005950f, 0.100644f, 0.315616f, 0.134995f, 0.384985f, +0.060025f, 0.111161f, 0.001149f, 0.247203f, 0.083693f, 0.300743f, 0.106422f, 0.371487f, 0.031006f, 0.094596f, 0.027411f, 0.255103f, 0.121416f, 0.452721f, 0.133389f, 0.507343f, +0.002512f, 0.005461f, 0.000089f, 0.012566f, 0.003463f, 0.014611f, 0.008157f, 0.018674f, 0.000967f, 0.003464f, 0.001584f, 0.009666f, 0.006642f, 0.029078f, 0.013516f, 0.033717f, +0.108103f, 0.182989f, 0.002571f, 0.449802f, 0.234251f, 0.769401f, 0.370141f, 1.050505f, 0.072806f, 0.203029f, 0.079982f, 0.605198f, 0.272512f, 0.928766f, 0.372027f, 1.150468f, +0.077081f, 0.115868f, 0.001794f, 0.231091f, 0.093950f, 0.274028f, 0.145250f, 0.303571f, 0.001510f, 0.003740f, 0.001623f, 0.009045f, 0.173918f, 0.526372f, 0.232311f, 0.529033f, +0.100336f, 0.179331f, 0.001912f, 0.328594f, 0.173225f, 0.600749f, 0.219354f, 0.611428f, 0.074429f, 0.219149f, 0.065526f, 0.486951f, 0.263446f, 0.948031f, 0.288223f, 0.875383f, +0.049720f, 0.104338f, 0.001755f, 0.197815f, 0.084883f, 0.345634f, 0.199103f, 0.363984f, 0.027493f, 0.095045f, 0.044834f, 0.218518f, 0.170671f, 0.721111f, 0.345874f, 0.688956f, +0.173247f, 0.283029f, 0.004103f, 0.573235f, 0.464844f, 1.473518f, 0.731453f, 1.657697f, 0.167557f, 0.450950f, 0.183307f, 1.107573f, 0.566900f, 1.864676f, 0.770705f, 1.903163f, +0.068183f, 0.112754f, 0.001395f, 0.255132f, 0.056568f, 0.181513f, 0.076891f, 0.228134f, 0.001460f, 0.003978f, 0.001380f, 0.010914f, 0.139246f, 0.463628f, 0.163527f, 0.528660f, +0.091748f, 0.180398f, 0.001537f, 0.375018f, 0.107819f, 0.411354f, 0.120036f, 0.474991f, 0.074381f, 0.240935f, 0.057573f, 0.607385f, 0.218042f, 0.863197f, 0.209729f, 0.904278f, +0.048575f, 0.112141f, 0.001508f, 0.241212f, 0.056449f, 0.252863f, 0.116410f, 0.302112f, 0.029355f, 0.111644f, 0.042088f, 0.291213f, 0.150923f, 0.701512f, 0.268902f, 0.760399f, +0.209275f, 0.376114f, 0.004358f, 0.864246f, 0.382213f, 1.332879f, 0.528768f, 1.701208f, 0.221207f, 0.654941f, 0.212763f, 1.825000f, 0.619824f, 2.242863f, 0.740851f, 2.597122f, +0.119703f, 0.164232f, 0.002507f, 0.384569f, 0.137129f, 0.365062f, 0.190801f, 0.474822f, 0.002307f, 0.005215f, 0.002232f, 0.014809f, 0.273597f, 0.755783f, 0.328900f, 0.891834f, +0.163105f, 0.266074f, 0.002798f, 0.572406f, 0.264666f, 0.837758f, 0.301621f, 1.001079f, 0.119027f, 0.319878f, 0.094307f, 0.834502f, 0.433822f, 1.424887f, 0.427146f, 1.544732f, +0.111933f, 0.214390f, 0.003556f, 0.477221f, 0.179609f, 0.667509f, 0.379148f, 0.825317f, 0.060889f, 0.192126f, 0.089363f, 0.518614f, 0.389221f, 1.500981f, 0.709873f, 1.683689f, +0.223875f, 0.333816f, 0.004772f, 0.793791f, 0.564579f, 1.633467f, 0.799524f, 2.157533f, 0.213010f, 0.523242f, 0.209722f, 1.508841f, 0.742089f, 2.227872f, 0.907956f, 2.669687f, +0.048066f, 0.074055f, 0.001249f, 0.198315f, 0.060726f, 0.181539f, 0.104806f, 0.270035f, 0.000979f, 0.002484f, 0.001174f, 0.008067f, 0.165131f, 0.512242f, 0.246232f, 0.691271f, +0.040667f, 0.074498f, 0.000865f, 0.183287f, 0.072776f, 0.258684f, 0.102876f, 0.353513f, 0.031350f, 0.094608f, 0.030810f, 0.282267f, 0.162583f, 0.599661f, 0.198566f, 0.743472f, +0.001383f, 0.002974f, 0.000054f, 0.007570f, 0.002447f, 0.010211f, 0.006406f, 0.014438f, 0.000794f, 0.002815f, 0.001446f, 0.008690f, 0.007226f, 0.031293f, 0.016348f, 0.040144f, +0.067005f, 0.112194f, 0.001771f, 0.305108f, 0.186351f, 0.605451f, 0.327344f, 0.914561f, 0.067345f, 0.185766f, 0.082245f, 0.612624f, 0.333840f, 1.125471f, 0.506655f, 1.542378f, +0.054657f, 0.081271f, 0.001414f, 0.179326f, 0.085502f, 0.246690f, 0.146955f, 0.302347f, 0.001598f, 0.003915f, 0.001910f, 0.010475f, 0.243740f, 0.729712f, 0.361941f, 0.811388f, +0.058065f, 0.102657f, 0.001230f, 0.208103f, 0.128662f, 0.441375f, 0.181121f, 0.496990f, 0.064278f, 0.187213f, 0.062910f, 0.460225f, 0.301323f, 1.072603f, 0.366484f, 1.095726f, +0.023378f, 0.048527f, 0.000917f, 0.101787f, 0.051224f, 0.206322f, 0.133573f, 0.240380f, 0.019291f, 0.065969f, 0.034973f, 0.167798f, 0.158605f, 0.662877f, 0.357321f, 0.700664f, +0.091723f, 0.148223f, 0.002415f, 0.332128f, 0.315864f, 0.990429f, 0.552541f, 1.232711f, 0.132385f, 0.352435f, 0.161005f, 0.957658f, 0.593199f, 1.930070f, 0.896537f, 2.179383f, +0.006819f, 0.011155f, 0.000155f, 0.027924f, 0.007261f, 0.023047f, 0.010972f, 0.032047f, 0.000218f, 0.000587f, 0.000229f, 0.001783f, 0.027524f, 0.090652f, 0.035934f, 0.114359f, +0.007489f, 0.014565f, 0.000139f, 0.033498f, 0.011295f, 0.042626f, 0.013979f, 0.054455f, 0.009060f, 0.029030f, 0.007796f, 0.080965f, 0.035175f, 0.137744f, 0.037613f, 0.159644f, +0.003221f, 0.007356f, 0.000111f, 0.017506f, 0.004805f, 0.021289f, 0.011015f, 0.028141f, 0.002905f, 0.010929f, 0.004630f, 0.031540f, 0.019781f, 0.090952f, 0.039182f, 0.109070f, +0.015627f, 0.027781f, 0.000362f, 0.070625f, 0.036631f, 0.126359f, 0.056337f, 0.178427f, 0.024650f, 0.072194f, 0.026357f, 0.222561f, 0.091477f, 0.327431f, 0.121551f, 0.419466f, +0.095542f, 0.129665f, 0.002224f, 0.335913f, 0.140476f, 0.369926f, 0.217289f, 0.532313f, 0.002748f, 0.006144f, 0.002955f, 0.019303f, 0.431605f, 1.179362f, 0.576798f, 1.539650f, +0.106246f, 0.171445f, 0.002026f, 0.408052f, 0.221274f, 0.692827f, 0.280335f, 0.915931f, 0.115707f, 0.307590f, 0.101916f, 0.887778f, 0.558527f, 1.814631f, 0.611356f, 2.176452f, +0.059241f, 0.112239f, 0.002092f, 0.276405f, 0.122004f, 0.448516f, 0.286312f, 0.613521f, 0.048091f, 0.150103f, 0.078464f, 0.448266f, 0.407140f, 1.553093f, 0.825493f, 1.927401f, +0.133416f, 0.196781f, 0.003161f, 0.517691f, 0.431827f, 1.235862f, 0.679832f, 1.805948f, 0.189438f, 0.460304f, 0.207346f, 1.468499f, 0.874062f, 2.595684f, 1.188877f, 3.441199f, +0.098037f, 0.285071f, 0.003272f, 0.456743f, 0.097248f, 0.548692f, 0.215676f, 0.488310f, 0.001650f, 0.007906f, 0.002545f, 0.015361f, 0.250380f, 1.465869f, 0.479758f, 1.183545f, +0.085690f, 0.296261f, 0.002343f, 0.436093f, 0.120401f, 0.807715f, 0.218706f, 0.660406f, 0.054615f, 0.311068f, 0.068973f, 0.555267f, 0.254670f, 1.772783f, 0.399680f, 1.315017f, +0.003522f, 0.014297f, 0.000178f, 0.021775f, 0.004893f, 0.038544f, 0.016465f, 0.032608f, 0.001673f, 0.011190f, 0.003914f, 0.020667f, 0.013684f, 0.111843f, 0.039781f, 0.085842f, +0.123728f, 0.391001f, 0.004203f, 0.636179f, 0.270180f, 1.656716f, 0.609860f, 1.497263f, 0.102816f, 0.535269f, 0.161352f, 1.056127f, 0.458268f, 2.915840f, 0.893715f, 2.390764f, +0.147069f, 0.412726f, 0.004889f, 0.544859f, 0.180639f, 0.983634f, 0.398955f, 0.721281f, 0.003555f, 0.016437f, 0.005459f, 0.026314f, 0.487554f, 2.754826f, 0.930332f, 1.832686f, +0.161406f, 0.538568f, 0.004394f, 0.653204f, 0.280811f, 1.818109f, 0.507972f, 1.224834f, 0.147728f, 0.812054f, 0.185791f, 1.194362f, 0.622670f, 4.183233f, 0.973163f, 2.556772f, +0.078563f, 0.307789f, 0.003962f, 0.386256f, 0.135162f, 1.027470f, 0.452896f, 0.716210f, 0.053600f, 0.345938f, 0.124867f, 0.526458f, 0.396235f, 3.125486f, 1.147098f, 1.976566f, +0.223440f, 0.681473f, 0.007559f, 0.913597f, 0.604149f, 3.575324f, 1.358045f, 2.662378f, 0.266636f, 1.339699f, 0.416702f, 2.177989f, 1.074250f, 6.596684f, 2.086306f, 4.456592f, +0.114540f, 0.353616f, 0.003347f, 0.529629f, 0.095762f, 0.573656f, 0.185945f, 0.477243f, 0.003026f, 0.015391f, 0.004085f, 0.027955f, 0.343690f, 2.136368f, 0.576585f, 1.612452f, +0.129946f, 0.477003f, 0.003110f, 0.656367f, 0.153888f, 1.096095f, 0.244744f, 0.837765f, 0.129985f, 0.786052f, 0.143726f, 1.311654f, 0.453745f, 3.353545f, 0.623478f, 2.325417f, +0.067579f, 0.291259f, 0.002996f, 0.414685f, 0.079139f, 0.661825f, 0.233140f, 0.523396f, 0.050390f, 0.357776f, 0.103206f, 0.617721f, 0.308499f, 2.677043f, 0.785203f, 1.920729f, +0.237639f, 0.797337f, 0.007069f, 1.212732f, 0.437368f, 2.847447f, 0.864367f, 2.405620f, 0.309928f, 1.713113f, 0.425841f, 3.159738f, 1.034123f, 6.986022f, 1.765736f, 5.354563f, +0.195735f, 0.501354f, 0.005856f, 0.777079f, 0.225962f, 1.123040f, 0.449134f, 0.966860f, 0.004655f, 0.019643f, 0.006433f, 0.036920f, 0.657324f, 3.389908f, 1.128812f, 2.647764f, +0.224863f, 0.684821f, 0.005510f, 0.975177f, 0.367699f, 2.172877f, 0.598612f, 1.718659f, 0.202470f, 1.015826f, 0.229165f, 1.754154f, 0.878755f, 5.388393f, 1.236012f, 3.866665f, +0.151578f, 0.542007f, 0.006880f, 0.798592f, 0.245102f, 1.700590f, 0.739127f, 1.391771f, 0.101737f, 0.599305f, 0.213298f, 1.070805f, 0.774423f, 5.575450f, 2.017683f, 4.139721f, +0.247451f, 0.688834f, 0.007534f, 1.084222f, 0.628856f, 3.396720f, 1.272180f, 2.969694f, 0.290500f, 1.332204f, 0.408582f, 2.542825f, 1.205160f, 6.754638f, 2.106417f, 5.357679f, +0.036548f, 0.096374f, 0.001161f, 0.228224f, 0.048574f, 0.248534f, 0.102545f, 0.326916f, 0.000616f, 0.002675f, 0.000904f, 0.007683f, 0.099811f, 0.529920f, 0.182052f, 0.632388f, +0.034688f, 0.108758f, 0.000903f, 0.236618f, 0.065302f, 0.397278f, 0.112916f, 0.480098f, 0.022129f, 0.114302f, 0.026603f, 0.301566f, 0.110239f, 0.695906f, 0.164689f, 0.762973f, +0.001248f, 0.004596f, 0.000060f, 0.010346f, 0.002324f, 0.016601f, 0.007444f, 0.020758f, 0.000594f, 0.003600f, 0.001322f, 0.009829f, 0.005187f, 0.038446f, 0.014354f, 0.043614f, +0.055887f, 0.160162f, 0.001807f, 0.385161f, 0.163511f, 0.909242f, 0.351333f, 1.214542f, 0.046485f, 0.219465f, 0.069442f, 0.640018f, 0.221347f, 1.277185f, 0.410910f, 1.547782f, +0.064688f, 0.164627f, 0.002047f, 0.321223f, 0.106455f, 0.525684f, 0.223806f, 0.569742f, 0.001565f, 0.006563f, 0.002288f, 0.015528f, 0.229317f, 1.175016f, 0.416529f, 1.155369f, +0.077091f, 0.233270f, 0.001998f, 0.418168f, 0.179700f, 1.055092f, 0.309434f, 1.050583f, 0.070625f, 0.352060f, 0.084550f, 0.765334f, 0.318017f, 1.937496f, 0.473120f, 1.750265f, +0.032859f, 0.116740f, 0.001577f, 0.216533f, 0.075741f, 0.522139f, 0.241587f, 0.537948f, 0.022439f, 0.131334f, 0.049760f, 0.295410f, 0.177211f, 1.267632f, 0.488353f, 1.184868f, +0.119080f, 0.329354f, 0.003835f, 0.652608f, 0.431393f, 2.315159f, 0.923075f, 2.548111f, 0.142236f, 0.648089f, 0.211597f, 1.557277f, 0.612200f, 3.409179f, 1.131773f, 3.404161f, +0.049141f, 0.137581f, 0.001367f, 0.304565f, 0.055047f, 0.299039f, 0.101746f, 0.367704f, 0.001300f, 0.005994f, 0.001670f, 0.016091f, 0.157676f, 0.888814f, 0.251800f, 0.991527f, +0.060538f, 0.201523f, 0.001379f, 0.409859f, 0.096056f, 0.620445f, 0.145420f, 0.700907f, 0.060614f, 0.332406f, 0.063798f, 0.819822f, 0.226042f, 1.515019f, 0.295660f, 1.552737f, +0.027569f, 0.107753f, 0.001164f, 0.226753f, 0.043257f, 0.328054f, 0.121304f, 0.383456f, 0.020576f, 0.132488f, 0.040117f, 0.338096f, 0.134579f, 1.059049f, 0.326062f, 1.123078f, +0.123532f, 0.375874f, 0.003498f, 0.844982f, 0.304622f, 1.798483f, 0.573068f, 2.245746f, 0.161264f, 0.808348f, 0.210920f, 2.203670f, 0.574838f, 3.521594f, 0.934313f, 3.989481f, +0.112157f, 0.260519f, 0.003194f, 0.596819f, 0.173478f, 0.781881f, 0.328230f, 0.994930f, 0.002670f, 0.010217f, 0.003512f, 0.028383f, 0.402761f, 1.883615f, 0.658390f, 2.174535f, +0.139912f, 0.386412f, 0.003263f, 0.813280f, 0.306536f, 1.642707f, 0.475037f, 1.920426f, 0.126099f, 0.573728f, 0.135860f, 1.464324f, 0.584675f, 3.251192f, 0.782823f, 3.448284f, +0.082588f, 0.267809f, 0.003568f, 0.583215f, 0.178929f, 1.125826f, 0.513627f, 1.361829f, 0.055485f, 0.296402f, 0.110733f, 0.782757f, 0.451203f, 2.945847f, 1.119027f, 3.232841f, +0.171800f, 0.433695f, 0.004979f, 1.008952f, 0.584972f, 2.865365f, 1.126486f, 3.702668f, 0.201879f, 0.839562f, 0.270283f, 2.368544f, 0.894720f, 4.547585f, 1.488608f, 5.331366f, +0.112315f, 0.174936f, 0.002624f, 0.423442f, 0.125340f, 0.378802f, 0.194589f, 0.509301f, 0.001903f, 0.004884f, 0.002054f, 0.014336f, 0.226171f, 0.709267f, 0.303368f, 0.865157f, +0.109220f, 0.202267f, 0.002090f, 0.449806f, 0.172647f, 0.620391f, 0.219534f, 0.766327f, 0.070078f, 0.213797f, 0.061952f, 0.576560f, 0.255941f, 0.954321f, 0.281180f, 1.069463f, +0.003836f, 0.008340f, 0.000136f, 0.019190f, 0.005995f, 0.025295f, 0.014122f, 0.032329f, 0.001834f, 0.006571f, 0.003004f, 0.018335f, 0.011750f, 0.051442f, 0.023912f, 0.059649f, +0.182035f, 0.308137f, 0.004329f, 0.757427f, 0.447197f, 1.468827f, 0.706619f, 2.005471f, 0.152281f, 0.424653f, 0.167289f, 1.265825f, 0.531614f, 1.811832f, 0.725748f, 2.244328f, +0.186226f, 0.279935f, 0.004333f, 0.558310f, 0.257328f, 0.750561f, 0.397840f, 0.831481f, 0.004532f, 0.011223f, 0.004871f, 0.027144f, 0.486776f, 1.473256f, 0.650211f, 1.480701f, +0.227385f, 0.406406f, 0.004334f, 0.744671f, 0.445055f, 1.543465f, 0.563572f, 1.570903f, 0.209509f, 0.616881f, 0.184448f, 1.370716f, 0.691654f, 2.488973f, 0.756704f, 2.298240f, +0.094566f, 0.198447f, 0.003338f, 0.376238f, 0.183031f, 0.745277f, 0.429318f, 0.784845f, 0.064950f, 0.224536f, 0.105918f, 0.516234f, 0.376059f, 1.588903f, 0.762101f, 1.518051f, +0.363345f, 0.593586f, 0.008605f, 1.202224f, 1.105246f, 3.503541f, 1.739155f, 3.941460f, 0.436489f, 1.174731f, 0.477517f, 2.885242f, 1.377373f, 4.530528f, 1.872550f, 4.624039f, +0.147420f, 0.243786f, 0.003016f, 0.551624f, 0.138659f, 0.444923f, 0.188474f, 0.559201f, 0.003921f, 0.010682f, 0.003705f, 0.029311f, 0.348782f, 1.161290f, 0.409601f, 1.324180f, +0.186074f, 0.365866f, 0.003118f, 0.760576f, 0.247905f, 0.945813f, 0.275995f, 1.092131f, 0.187375f, 0.606943f, 0.145032f, 1.530071f, 0.512300f, 2.028117f, 0.492768f, 2.124639f, +0.082681f, 0.190876f, 0.002566f, 0.410569f, 0.108928f, 0.487946f, 0.224635f, 0.582982f, 0.062063f, 0.236037f, 0.088982f, 0.615682f, 0.297602f, 1.383298f, 0.530243f, 1.499415f, +0.392785f, 0.705923f, 0.008179f, 1.622093f, 0.813284f, 2.836143f, 1.125131f, 3.619886f, 0.515698f, 1.526855f, 0.496012f, 4.254597f, 1.347718f, 4.876787f, 1.610876f, 5.647072f, +0.245151f, 0.336346f, 0.005134f, 0.787594f, 0.318389f, 0.847607f, 0.443003f, 1.102448f, 0.005869f, 0.013266f, 0.005678f, 0.037670f, 0.649131f, 1.793157f, 0.780341f, 2.115949f, +0.313334f, 0.511144f, 0.005374f, 1.099626f, 0.576420f, 1.824561f, 0.656902f, 2.180261f, 0.284018f, 0.763276f, 0.225032f, 1.991249f, 0.965484f, 3.171128f, 0.950626f, 3.437847f, +0.180466f, 0.345655f, 0.005734f, 0.769410f, 0.328295f, 1.220097f, 0.693021f, 1.508545f, 0.121937f, 0.384753f, 0.178959f, 1.038580f, 0.726988f, 2.803536f, 1.325903f, 3.144798f, +0.398010f, 0.593467f, 0.008483f, 1.411221f, 1.137922f, 3.292290f, 1.611459f, 4.348559f, 0.470377f, 1.155443f, 0.463115f, 3.331882f, 1.528401f, 4.588510f, 1.870020f, 5.498469f, +0.005820f, 0.008967f, 0.000151f, 0.024014f, 0.009671f, 0.028912f, 0.016691f, 0.043006f, 0.000129f, 0.000328f, 0.000155f, 0.001065f, 0.022861f, 0.070915f, 0.034089f, 0.095700f, +0.008098f, 0.014834f, 0.000172f, 0.036497f, 0.019059f, 0.067747f, 0.026942f, 0.092582f, 0.006804f, 0.020533f, 0.006687f, 0.061262f, 0.037013f, 0.136516f, 0.045205f, 0.169255f, +0.000211f, 0.000453f, 0.000008f, 0.001153f, 0.000490f, 0.002045f, 0.001283f, 0.002891f, 0.000132f, 0.000467f, 0.000240f, 0.001442f, 0.001258f, 0.005447f, 0.002846f, 0.006988f, +0.010701f, 0.017918f, 0.000283f, 0.048728f, 0.039143f, 0.127175f, 0.068759f, 0.192104f, 0.011723f, 0.032337f, 0.014317f, 0.106641f, 0.060956f, 0.205501f, 0.092511f, 0.281625f, +0.007071f, 0.010513f, 0.000183f, 0.023198f, 0.014547f, 0.041972f, 0.025003f, 0.051441f, 0.000225f, 0.000552f, 0.000269f, 0.001477f, 0.036049f, 0.107923f, 0.053530f, 0.120002f, +0.012352f, 0.021838f, 0.000262f, 0.044269f, 0.035997f, 0.123488f, 0.050674f, 0.139048f, 0.014903f, 0.043407f, 0.014586f, 0.106708f, 0.073284f, 0.260864f, 0.089131f, 0.266487f, +0.003803f, 0.007894f, 0.000149f, 0.016557f, 0.010959f, 0.044140f, 0.028576f, 0.051426f, 0.003420f, 0.011696f, 0.006200f, 0.029750f, 0.029496f, 0.123276f, 0.066451f, 0.130303f, +0.015649f, 0.025289f, 0.000412f, 0.056666f, 0.070879f, 0.222251f, 0.123989f, 0.276618f, 0.024619f, 0.065540f, 0.029941f, 0.178090f, 0.115712f, 0.376487f, 0.174882f, 0.425119f, +0.000850f, 0.001390f, 0.000019f, 0.003481f, 0.001190f, 0.003779f, 0.001799f, 0.005254f, 0.000030f, 0.000080f, 0.000031f, 0.000242f, 0.003923f, 0.012919f, 0.005121f, 0.016298f, +0.001535f, 0.002986f, 0.000029f, 0.006867f, 0.003045f, 0.011492f, 0.003769f, 0.014681f, 0.002024f, 0.006486f, 0.001742f, 0.018089f, 0.008243f, 0.032281f, 0.008815f, 0.037414f, +0.000505f, 0.001153f, 0.000017f, 0.002744f, 0.000990f, 0.004389f, 0.002271f, 0.005801f, 0.000496f, 0.001867f, 0.000791f, 0.005388f, 0.003545f, 0.016299f, 0.007022f, 0.019546f, +0.002569f, 0.004567f, 0.000059f, 0.011611f, 0.007921f, 0.027323f, 0.012182f, 0.038582f, 0.004417f, 0.012937f, 0.004723f, 0.039882f, 0.017195f, 0.061546f, 0.022848f, 0.078846f, +0.012238f, 0.016609f, 0.000285f, 0.043028f, 0.023666f, 0.062322f, 0.036607f, 0.089680f, 0.000384f, 0.000858f, 0.000413f, 0.002695f, 0.063208f, 0.172716f, 0.084471f, 0.225479f, +0.022380f, 0.036113f, 0.000427f, 0.085952f, 0.061301f, 0.191940f, 0.077664f, 0.253748f, 0.026565f, 0.070619f, 0.023399f, 0.203823f, 0.134506f, 0.437005f, 0.147229f, 0.524139f, +0.009542f, 0.018078f, 0.000337f, 0.044520f, 0.025845f, 0.095014f, 0.060653f, 0.129969f, 0.008443f, 0.026352f, 0.013775f, 0.078696f, 0.074974f, 0.285999f, 0.152013f, 0.354927f, +0.022540f, 0.033245f, 0.000534f, 0.087461f, 0.095952f, 0.274608f, 0.151058f, 0.401281f, 0.034883f, 0.084761f, 0.038181f, 0.270412f, 0.168827f, 0.501363f, 0.229635f, 0.664677f, +0.009921f, 0.028847f, 0.000331f, 0.046219f, 0.012943f, 0.073027f, 0.028705f, 0.064990f, 0.000182f, 0.000872f, 0.000281f, 0.001694f, 0.028967f, 0.169592f, 0.055505f, 0.136929f, +0.014259f, 0.049299f, 0.000390f, 0.072568f, 0.026351f, 0.176776f, 0.047866f, 0.144536f, 0.009906f, 0.056420f, 0.012510f, 0.100711f, 0.048451f, 0.337271f, 0.076039f, 0.250181f, +0.000448f, 0.001819f, 0.000023f, 0.002771f, 0.000819f, 0.006450f, 0.002756f, 0.005457f, 0.000232f, 0.001552f, 0.000543f, 0.002866f, 0.001991f, 0.016271f, 0.005787f, 0.012488f, +0.016513f, 0.052185f, 0.000561f, 0.084908f, 0.047427f, 0.290815f, 0.107053f, 0.262825f, 0.014957f, 0.077866f, 0.023472f, 0.153636f, 0.069927f, 0.444928f, 0.136372f, 0.364807f, +0.015899f, 0.044618f, 0.000529f, 0.058903f, 0.025684f, 0.139857f, 0.056725f, 0.102554f, 0.000419f, 0.001937f, 0.000643f, 0.003101f, 0.060260f, 0.340488f, 0.114986f, 0.226514f, +0.028693f, 0.095742f, 0.000781f, 0.116121f, 0.065656f, 0.425092f, 0.118769f, 0.286378f, 0.028624f, 0.157346f, 0.035999f, 0.231424f, 0.126555f, 0.850222f, 0.197791f, 0.519652f, +0.010680f, 0.041839f, 0.000539f, 0.052506f, 0.024165f, 0.183697f, 0.080971f, 0.128048f, 0.007942f, 0.051255f, 0.018501f, 0.078002f, 0.061580f, 0.485744f, 0.178275f, 0.307186f, +0.031859f, 0.097166f, 0.001078f, 0.130263f, 0.113295f, 0.670473f, 0.254671f, 0.499270f, 0.041437f, 0.208200f, 0.064759f, 0.338478f, 0.175117f, 1.075348f, 0.340096f, 0.726484f, +0.011932f, 0.036837f, 0.000349f, 0.055173f, 0.013120f, 0.078597f, 0.025477f, 0.065387f, 0.000344f, 0.001748f, 0.000464f, 0.003174f, 0.040934f, 0.254442f, 0.068671f, 0.192043f, +0.022260f, 0.081713f, 0.000533f, 0.112438f, 0.034672f, 0.246954f, 0.055142f, 0.188751f, 0.024270f, 0.146767f, 0.026836f, 0.244904f, 0.088866f, 0.656795f, 0.122109f, 0.455435f, +0.008852f, 0.038152f, 0.000392f, 0.054320f, 0.013634f, 0.114020f, 0.040166f, 0.090171f, 0.007194f, 0.051081f, 0.014735f, 0.088194f, 0.046201f, 0.400914f, 0.117592f, 0.287648f, +0.032650f, 0.109550f, 0.000971f, 0.166623f, 0.079035f, 0.514549f, 0.156196f, 0.434708f, 0.046413f, 0.256546f, 0.063772f, 0.473184f, 0.162443f, 1.097383f, 0.277367f, 0.841109f, +0.020953f, 0.053668f, 0.000627f, 0.083184f, 0.031813f, 0.158113f, 0.063234f, 0.136125f, 0.000543f, 0.002292f, 0.000751f, 0.004308f, 0.080447f, 0.414876f, 0.138150f, 0.324048f, +0.039583f, 0.120549f, 0.000970f, 0.171660f, 0.085129f, 0.503062f, 0.138590f, 0.397902f, 0.038847f, 0.194901f, 0.043969f, 0.336560f, 0.176852f, 1.084433f, 0.248752f, 0.778180f, +0.020403f, 0.072956f, 0.000926f, 0.107493f, 0.043391f, 0.301062f, 0.130850f, 0.246390f, 0.014926f, 0.087925f, 0.031293f, 0.157100f, 0.119177f, 0.858012f, 0.310503f, 0.637066f, +0.034936f, 0.097253f, 0.001064f, 0.153076f, 0.116772f, 0.630737f, 0.236231f, 0.551443f, 0.044704f, 0.205007f, 0.062875f, 0.391304f, 0.194532f, 1.090306f, 0.340009f, 0.864815f, +0.002725f, 0.007186f, 0.000087f, 0.017018f, 0.004764f, 0.024374f, 0.010057f, 0.032061f, 0.000050f, 0.000217f, 0.000073f, 0.000624f, 0.008509f, 0.045177f, 0.015520f, 0.053912f, +0.004253f, 0.013336f, 0.000111f, 0.029014f, 0.010531f, 0.064070f, 0.018210f, 0.077427f, 0.002958f, 0.015276f, 0.003556f, 0.040304f, 0.015454f, 0.097559f, 0.023088f, 0.106961f, +0.000117f, 0.000431f, 0.000006f, 0.000970f, 0.000287f, 0.002047f, 0.000918f, 0.002560f, 0.000061f, 0.000368f, 0.000135f, 0.001004f, 0.000556f, 0.004121f, 0.001539f, 0.004675f, +0.005496f, 0.015751f, 0.000178f, 0.037880f, 0.021150f, 0.117609f, 0.045445f, 0.157100f, 0.004983f, 0.023525f, 0.007444f, 0.068606f, 0.024888f, 0.143606f, 0.046203f, 0.174032f, +0.005153f, 0.013114f, 0.000163f, 0.025589f, 0.011153f, 0.055077f, 0.023449f, 0.059693f, 0.000136f, 0.000570f, 0.000199f, 0.001348f, 0.020885f, 0.107015f, 0.037936f, 0.105226f, +0.010099f, 0.030557f, 0.000262f, 0.054778f, 0.030960f, 0.181781f, 0.053312f, 0.181004f, 0.010084f, 0.050267f, 0.012072f, 0.109274f, 0.047628f, 0.290172f, 0.070858f, 0.262131f, +0.003291f, 0.011694f, 0.000158f, 0.021690f, 0.009978f, 0.068788f, 0.031827f, 0.070871f, 0.002450f, 0.014339f, 0.005433f, 0.032252f, 0.020294f, 0.145170f, 0.055927f, 0.135692f, +0.012511f, 0.034604f, 0.000403f, 0.068567f, 0.059612f, 0.319920f, 0.127555f, 0.352110f, 0.016288f, 0.074217f, 0.024231f, 0.178334f, 0.073538f, 0.409513f, 0.135949f, 0.408910f, +0.003772f, 0.010561f, 0.000105f, 0.023379f, 0.005558f, 0.030191f, 0.010272f, 0.037123f, 0.000109f, 0.000502f, 0.000140f, 0.001346f, 0.013838f, 0.078004f, 0.022098f, 0.087018f, +0.007642f, 0.025438f, 0.000174f, 0.051736f, 0.015947f, 0.103007f, 0.024143f, 0.116365f, 0.008340f, 0.045734f, 0.008778f, 0.112795f, 0.032622f, 0.218644f, 0.042669f, 0.224087f, +0.002661f, 0.010401f, 0.000112f, 0.021887f, 0.005491f, 0.041646f, 0.015400f, 0.048680f, 0.002165f, 0.013938f, 0.004221f, 0.035570f, 0.014851f, 0.116871f, 0.035982f, 0.123937f, +0.012507f, 0.038055f, 0.000354f, 0.085549f, 0.040563f, 0.239481f, 0.076308f, 0.299038f, 0.017795f, 0.089201f, 0.023275f, 0.243176f, 0.066538f, 0.407626f, 0.108147f, 0.461784f, +0.008847f, 0.020550f, 0.000252f, 0.047077f, 0.017997f, 0.081116f, 0.034052f, 0.103219f, 0.000230f, 0.000878f, 0.000302f, 0.002440f, 0.036322f, 0.169870f, 0.059376f, 0.196106f, +0.018148f, 0.050122f, 0.000423f, 0.105492f, 0.052295f, 0.280247f, 0.081042f, 0.327626f, 0.017828f, 0.081114f, 0.019208f, 0.207027f, 0.086707f, 0.482148f, 0.116092f, 0.511377f, +0.008192f, 0.026563f, 0.000354f, 0.057847f, 0.023342f, 0.146866f, 0.067004f, 0.177653f, 0.005998f, 0.032044f, 0.011971f, 0.084622f, 0.051166f, 0.334055f, 0.126896f, 0.366600f, +0.017873f, 0.045120f, 0.000518f, 0.104967f, 0.080042f, 0.392070f, 0.154138f, 0.506638f, 0.022892f, 0.095202f, 0.030649f, 0.268580f, 0.106421f, 0.540906f, 0.177060f, 0.634131f, +0.013235f, 0.020614f, 0.000309f, 0.049898f, 0.019426f, 0.058708f, 0.030158f, 0.078934f, 0.000244f, 0.000627f, 0.000264f, 0.001841f, 0.030471f, 0.095555f, 0.040871f, 0.116557f, +0.021164f, 0.039195f, 0.000405f, 0.087162f, 0.044001f, 0.158112f, 0.055950f, 0.195305f, 0.014801f, 0.045156f, 0.013085f, 0.121774f, 0.056702f, 0.211423f, 0.062293f, 0.236932f, +0.000568f, 0.001236f, 0.000020f, 0.002843f, 0.001168f, 0.004930f, 0.002752f, 0.006300f, 0.000296f, 0.001061f, 0.000485f, 0.002961f, 0.001991f, 0.008715f, 0.004051f, 0.010105f, +0.028292f, 0.047890f, 0.000673f, 0.117718f, 0.091412f, 0.300244f, 0.144440f, 0.409939f, 0.025796f, 0.071936f, 0.028339f, 0.214430f, 0.094462f, 0.321943f, 0.128958f, 0.398793f, +0.023444f, 0.035240f, 0.000546f, 0.070285f, 0.042606f, 0.124271f, 0.065871f, 0.137669f, 0.000622f, 0.001540f, 0.000668f, 0.003724f, 0.070060f, 0.212041f, 0.093583f, 0.213113f, +0.047072f, 0.084131f, 0.000897f, 0.154157f, 0.121175f, 0.420237f, 0.153443f, 0.427708f, 0.047272f, 0.139190f, 0.041618f, 0.309281f, 0.163698f, 0.589081f, 0.179094f, 0.543940f, +0.014969f, 0.031413f, 0.000528f, 0.059557f, 0.038106f, 0.155162f, 0.089381f, 0.163400f, 0.011206f, 0.038740f, 0.018274f, 0.089068f, 0.068058f, 0.287556f, 0.137923f, 0.274733f, +0.060328f, 0.098556f, 0.001429f, 0.199612f, 0.241357f, 0.765081f, 0.379786f, 0.860711f, 0.078992f, 0.212592f, 0.086417f, 0.522145f, 0.261463f, 0.860017f, 0.355461f, 0.877767f, +0.017883f, 0.029573f, 0.000366f, 0.066917f, 0.022123f, 0.070986f, 0.030070f, 0.089219f, 0.000518f, 0.001412f, 0.000490f, 0.003875f, 0.048373f, 0.161060f, 0.056808f, 0.183651f, +0.037118f, 0.072984f, 0.000622f, 0.151721f, 0.065041f, 0.248146f, 0.072411f, 0.286535f, 0.040740f, 0.131965f, 0.031534f, 0.332677f, 0.116838f, 0.462544f, 0.112383f, 0.484557f, +0.012612f, 0.029116f, 0.000391f, 0.062627f, 0.021853f, 0.097891f, 0.045066f, 0.116957f, 0.010318f, 0.039243f, 0.014794f, 0.102362f, 0.051900f, 0.241238f, 0.092471f, 0.261488f, +0.062844f, 0.112944f, 0.001309f, 0.259526f, 0.171139f, 0.596807f, 0.236760f, 0.761729f, 0.089931f, 0.266264f, 0.086498f, 0.741946f, 0.246526f, 0.892066f, 0.294663f, 1.032967f, +0.030559f, 0.041927f, 0.000640f, 0.098177f, 0.052199f, 0.138964f, 0.072630f, 0.180745f, 0.000797f, 0.001802f, 0.000771f, 0.005118f, 0.092512f, 0.255554f, 0.111212f, 0.301557f, +0.064229f, 0.104777f, 0.001102f, 0.225406f, 0.155403f, 0.491903f, 0.177101f, 0.587799f, 0.063456f, 0.170534f, 0.050277f, 0.444892f, 0.226268f, 0.743177f, 0.222786f, 0.805684f, +0.028287f, 0.054179f, 0.000899f, 0.120600f, 0.067679f, 0.251527f, 0.142869f, 0.310992f, 0.020832f, 0.065733f, 0.030574f, 0.177435f, 0.130279f, 0.502405f, 0.237607f, 0.563561f, +0.065436f, 0.097571f, 0.001395f, 0.232017f, 0.246057f, 0.711905f, 0.348452f, 0.940306f, 0.084290f, 0.207053f, 0.082989f, 0.597065f, 0.287289f, 0.862488f, 0.351502f, 1.033530f, +0.068702f, 0.105849f, 0.001785f, 0.283457f, 0.080701f, 0.241255f, 0.139281f, 0.358862f, 0.001501f, 0.003809f, 0.001801f, 0.012371f, 0.174331f, 0.540782f, 0.259951f, 0.729786f, +0.065544f, 0.120069f, 0.001395f, 0.295407f, 0.109056f, 0.387642f, 0.154162f, 0.529745f, 0.054212f, 0.163605f, 0.053280f, 0.488120f, 0.193543f, 0.713851f, 0.236378f, 0.885047f, +0.002346f, 0.005046f, 0.000092f, 0.012846f, 0.003860f, 0.016110f, 0.010108f, 0.022779f, 0.001446f, 0.005125f, 0.002633f, 0.015822f, 0.009057f, 0.039221f, 0.020489f, 0.050315f, +0.088589f, 0.148335f, 0.002342f, 0.403393f, 0.229077f, 0.744266f, 0.402396f, 1.124248f, 0.095534f, 0.263524f, 0.116672f, 0.869057f, 0.326007f, 1.099065f, 0.494768f, 1.506190f, +0.077551f, 0.115313f, 0.002006f, 0.254439f, 0.112795f, 0.325435f, 0.193864f, 0.398858f, 0.002433f, 0.005960f, 0.002907f, 0.015947f, 0.255435f, 0.764723f, 0.379307f, 0.850318f, +0.092898f, 0.164241f, 0.001968f, 0.332945f, 0.191389f, 0.656561f, 0.269425f, 0.739291f, 0.110340f, 0.321373f, 0.107992f, 0.790029f, 0.356074f, 1.267495f, 0.433074f, 1.294821f, +0.039379f, 0.081743f, 0.001546f, 0.171458f, 0.080226f, 0.323135f, 0.209197f, 0.376477f, 0.034866f, 0.119229f, 0.063208f, 0.303270f, 0.197331f, 0.824729f, 0.444567f, 0.871743f, +0.120381f, 0.194535f, 0.003169f, 0.435899f, 0.385438f, 1.208588f, 0.674248f, 1.504237f, 0.186423f, 0.496294f, 0.226725f, 1.348561f, 0.575037f, 1.870974f, 0.869086f, 2.112654f, +0.005848f, 0.009565f, 0.000133f, 0.023946f, 0.005789f, 0.018375f, 0.008748f, 0.025551f, 0.000200f, 0.000540f, 0.000211f, 0.001640f, 0.017433f, 0.057417f, 0.022760f, 0.072433f, +0.007241f, 0.014084f, 0.000135f, 0.032391f, 0.010155f, 0.038323f, 0.012568f, 0.048957f, 0.009400f, 0.030118f, 0.008088f, 0.084000f, 0.025122f, 0.098377f, 0.026863f, 0.114018f, +0.003280f, 0.007489f, 0.000113f, 0.017822f, 0.004548f, 0.020152f, 0.010426f, 0.026637f, 0.003173f, 0.011938f, 0.005058f, 0.034452f, 0.014875f, 0.068392f, 0.029463f, 0.082016f, +0.012396f, 0.022037f, 0.000287f, 0.056021f, 0.027015f, 0.093191f, 0.041549f, 0.131592f, 0.020979f, 0.061443f, 0.022432f, 0.189418f, 0.053594f, 0.191834f, 0.071214f, 0.245756f, +0.141021f, 0.191387f, 0.003283f, 0.495810f, 0.192782f, 0.507665f, 0.298195f, 0.730515f, 0.004352f, 0.009731f, 0.004681f, 0.030570f, 0.470531f, 1.285729f, 0.628820f, 1.678511f, +0.176831f, 0.285344f, 0.003372f, 0.679139f, 0.342411f, 1.072117f, 0.433805f, 1.417360f, 0.206625f, 0.549281f, 0.181998f, 1.585355f, 0.686597f, 2.230722f, 0.751539f, 2.675507f, +0.103809f, 0.196678f, 0.003667f, 0.484350f, 0.198775f, 0.730745f, 0.466475f, 0.999580f, 0.090419f, 0.282217f, 0.147525f, 0.842808f, 0.526953f, 2.010137f, 1.068419f, 2.494596f, +0.182154f, 0.268667f, 0.004316f, 0.706808f, 0.548168f, 1.568825f, 0.862991f, 2.292503f, 0.277509f, 0.674301f, 0.303742f, 2.151212f, 0.881428f, 2.617557f, 1.198895f, 3.470197f, +0.130382f, 0.379123f, 0.004352f, 0.607434f, 0.120250f, 0.678470f, 0.266688f, 0.603806f, 0.002355f, 0.011281f, 0.003631f, 0.021919f, 0.245947f, 1.439914f, 0.471263f, 1.162589f, +0.128502f, 0.444281f, 0.003513f, 0.653976f, 0.167875f, 1.126198f, 0.304942f, 0.920805f, 0.087876f, 0.500514f, 0.110979f, 0.893436f, 0.282081f, 1.963595f, 0.442699f, 1.456558f, +0.005561f, 0.022573f, 0.000282f, 0.034380f, 0.007184f, 0.056583f, 0.024171f, 0.047869f, 0.002835f, 0.018956f, 0.006631f, 0.035012f, 0.015958f, 0.130430f, 0.046392f, 0.100108f, +0.152207f, 0.481002f, 0.005171f, 0.782617f, 0.309027f, 1.894924f, 0.697547f, 1.712545f, 0.135709f, 0.706514f, 0.212972f, 1.394006f, 0.416393f, 2.649398f, 0.812050f, 2.172302f, +0.194158f, 0.544872f, 0.006454f, 0.719313f, 0.221727f, 1.207371f, 0.489701f, 0.885344f, 0.005036f, 0.023283f, 0.007733f, 0.037273f, 0.475412f, 2.686219f, 0.907163f, 1.787044f, +0.240274f, 0.801731f, 0.006542f, 0.972383f, 0.388666f, 2.516416f, 0.703076f, 1.695272f, 0.235956f, 1.297037f, 0.296751f, 1.907670f, 0.684636f, 4.599538f, 1.070010f, 2.811215f, +0.123134f, 0.482405f, 0.006210f, 0.605389f, 0.196964f, 1.497278f, 0.659982f, 1.043695f, 0.090137f, 0.581751f, 0.209984f, 0.885323f, 0.458697f, 3.618184f, 1.327926f, 2.288150f, +0.272858f, 0.832194f, 0.009231f, 1.115656f, 0.685952f, 4.059429f, 1.541927f, 3.022869f, 0.349360f, 1.755342f, 0.545984f, 2.853712f, 0.968935f, 5.949973f, 1.881773f, 4.019687f, +0.091391f, 0.282149f, 0.002671f, 0.422588f, 0.071041f, 0.425570f, 0.137945f, 0.354046f, 0.002591f, 0.013177f, 0.003497f, 0.023932f, 0.202547f, 1.259029f, 0.339800f, 0.950269f, +0.116913f, 0.429163f, 0.002799f, 0.590538f, 0.128730f, 0.916902f, 0.204732f, 0.700804f, 0.125479f, 0.758807f, 0.138744f, 1.266190f, 0.301527f, 2.228531f, 0.414320f, 1.545309f, +0.064015f, 0.275900f, 0.002838f, 0.392817f, 0.069700f, 0.582893f, 0.205335f, 0.460974f, 0.051214f, 0.363631f, 0.104895f, 0.627832f, 0.215843f, 1.873014f, 0.549373f, 1.343852f, +0.175390f, 0.588478f, 0.005217f, 0.895061f, 0.300130f, 1.953969f, 0.593144f, 1.650779f, 0.245430f, 1.356603f, 0.337221f, 2.502177f, 0.563734f, 3.808305f, 0.962560f, 2.918944f, +0.268814f, 0.688538f, 0.008042f, 1.067205f, 0.288531f, 1.434010f, 0.573499f, 1.234584f, 0.006859f, 0.028944f, 0.009479f, 0.054403f, 0.666770f, 3.438624f, 1.145034f, 2.685814f, +0.348222f, 1.060511f, 0.008532f, 1.510154f, 0.529425f, 3.128579f, 0.861900f, 2.474581f, 0.336416f, 1.687858f, 0.380772f, 2.914637f, 1.005123f, 6.163265f, 1.413756f, 4.422707f, +0.247141f, 0.883718f, 0.011217f, 1.302068f, 0.371560f, 2.577997f, 1.120475f, 2.109846f, 0.177978f, 1.048421f, 0.373143f, 1.873260f, 0.932611f, 6.714328f, 2.429829f, 4.985328f, +0.314351f, 0.875064f, 0.009571f, 1.377347f, 0.742763f, 4.011981f, 1.502615f, 3.507605f, 0.395958f, 1.815828f, 0.556907f, 3.465935f, 1.130794f, 6.337835f, 1.976438f, 5.027077f, +0.055903f, 0.147414f, 0.001776f, 0.349092f, 0.069081f, 0.353459f, 0.145838f, 0.464931f, 0.001011f, 0.004391f, 0.001483f, 0.012609f, 0.112764f, 0.598692f, 0.205678f, 0.714458f, +0.059829f, 0.187583f, 0.001557f, 0.408114f, 0.104722f, 0.637092f, 0.181077f, 0.769907f, 0.040953f, 0.211527f, 0.049232f, 0.558080f, 0.140438f, 0.886540f, 0.209803f, 0.971980f, +0.002267f, 0.008346f, 0.000109f, 0.018788f, 0.003924f, 0.028030f, 0.012569f, 0.035048f, 0.001157f, 0.007015f, 0.002576f, 0.019151f, 0.006957f, 0.051567f, 0.019253f, 0.058498f, +0.079074f, 0.226610f, 0.002557f, 0.544960f, 0.215101f, 1.196120f, 0.462183f, 1.597747f, 0.070570f, 0.333170f, 0.105420f, 0.971612f, 0.231318f, 1.334717f, 0.429420f, 1.617503f, +0.098223f, 0.249969f, 0.003108f, 0.487745f, 0.150288f, 0.742136f, 0.315959f, 0.804336f, 0.002550f, 0.010691f, 0.003727f, 0.025298f, 0.257178f, 1.317779f, 0.467137f, 1.295745f, +0.131990f, 0.399392f, 0.003421f, 0.715964f, 0.286063f, 1.679594f, 0.492585f, 1.672416f, 0.129741f, 0.646749f, 0.155322f, 1.405951f, 0.402164f, 2.450162f, 0.598309f, 2.213388f, +0.059233f, 0.210441f, 0.002844f, 0.390333f, 0.126946f, 0.875127f, 0.404910f, 0.901624f, 0.043401f, 0.254020f, 0.096244f, 0.571367f, 0.235948f, 1.687789f, 0.650217f, 1.577593f, +0.167250f, 0.462584f, 0.005386f, 0.916600f, 0.563345f, 3.023307f, 1.205419f, 3.327512f, 0.214346f, 0.976654f, 0.318872f, 2.346779f, 0.635088f, 3.536640f, 1.174087f, 3.531434f, +0.045096f, 0.126257f, 0.001255f, 0.279497f, 0.046968f, 0.255152f, 0.086814f, 0.313740f, 0.001280f, 0.005902f, 0.001644f, 0.015844f, 0.106875f, 0.602452f, 0.170674f, 0.672072f, +0.062644f, 0.208535f, 0.001427f, 0.424118f, 0.092417f, 0.596939f, 0.139911f, 0.674352f, 0.067298f, 0.369063f, 0.070834f, 0.910229f, 0.172765f, 1.157936f, 0.225974f, 1.186763f, +0.030036f, 0.117396f, 0.001268f, 0.247045f, 0.043818f, 0.332309f, 0.122878f, 0.388431f, 0.024053f, 0.154874f, 0.046895f, 0.395223f, 0.108297f, 0.852224f, 0.262384f, 0.903748f, +0.104862f, 0.319067f, 0.002969f, 0.717278f, 0.240422f, 1.419450f, 0.452293f, 1.772451f, 0.146877f, 0.736236f, 0.192104f, 2.007083f, 0.360411f, 2.207968f, 0.585795f, 2.501323f, +0.177158f, 0.411504f, 0.005045f, 0.942708f, 0.254773f, 1.148284f, 0.482044f, 1.461171f, 0.004525f, 0.017315f, 0.005952f, 0.048103f, 0.469889f, 2.197559f, 0.768125f, 2.536968f, +0.249198f, 0.688241f, 0.005812f, 1.448538f, 0.507627f, 2.720343f, 0.786667f, 3.180249f, 0.240979f, 1.096413f, 0.259634f, 2.798374f, 0.769162f, 4.277067f, 1.029833f, 4.536348f, +0.154875f, 0.502211f, 0.006691f, 1.093677f, 0.311972f, 1.962934f, 0.895535f, 2.374418f, 0.111639f, 0.596377f, 0.222801f, 1.574948f, 0.624951f, 4.080229f, 1.549940f, 4.477739f, +0.251015f, 0.633667f, 0.007275f, 1.474169f, 0.794668f, 3.892519f, 1.530301f, 5.029972f, 0.316480f, 1.316159f, 0.423715f, 3.713104f, 0.965556f, 4.907626f, 1.606464f, 5.753460f, +0.142540f, 0.222013f, 0.003331f, 0.537393f, 0.147898f, 0.446976f, 0.229610f, 0.600962f, 0.002592f, 0.006650f, 0.002798f, 0.019521f, 0.212007f, 0.664847f, 0.284368f, 0.810974f, +0.156299f, 0.289453f, 0.002991f, 0.643693f, 0.229713f, 0.825454f, 0.292098f, 1.019627f, 0.107600f, 0.328272f, 0.095124f, 0.885271f, 0.270524f, 1.008699f, 0.297201f, 1.130401f, +0.005779f, 0.012566f, 0.000205f, 0.028913f, 0.008399f, 0.035435f, 0.019782f, 0.045289f, 0.002966f, 0.010623f, 0.004856f, 0.029641f, 0.013077f, 0.057248f, 0.026611f, 0.066381f, +0.213695f, 0.361729f, 0.005082f, 0.889162f, 0.488104f, 1.603190f, 0.771257f, 2.188924f, 0.191807f, 0.534876f, 0.210711f, 1.594384f, 0.460947f, 1.570984f, 0.629274f, 1.945988f, +0.234609f, 0.352664f, 0.005459f, 0.703363f, 0.301414f, 0.879152f, 0.466001f, 0.973936f, 0.006126f, 0.015170f, 0.006585f, 0.036691f, 0.452947f, 1.370869f, 0.605023f, 1.377797f, +0.323013f, 0.577323f, 0.006156f, 1.057848f, 0.587824f, 2.038589f, 0.744358f, 2.074828f, 0.319330f, 0.940241f, 0.281132f, 2.089226f, 0.725707f, 2.611516f, 0.793960f, 2.411393f, +0.141438f, 0.296807f, 0.004993f, 0.562720f, 0.254524f, 1.036386f, 0.597012f, 1.091410f, 0.104228f, 0.360326f, 0.169972f, 0.828430f, 0.415431f, 1.755258f, 0.841892f, 1.676989f, +0.423414f, 0.691719f, 0.010028f, 1.400978f, 1.197509f, 3.796009f, 1.884336f, 4.270484f, 0.545756f, 1.468803f, 0.597055f, 3.607508f, 1.185527f, 3.899498f, 1.611734f, 3.979983f, +0.112246f, 0.185620f, 0.002296f, 0.420010f, 0.098161f, 0.314974f, 0.133426f, 0.395875f, 0.003203f, 0.008727f, 0.003027f, 0.023945f, 0.196148f, 0.653087f, 0.230351f, 0.744693f, +0.159756f, 0.314118f, 0.002677f, 0.653001f, 0.197893f, 0.755007f, 0.220316f, 0.871807f, 0.172608f, 0.559111f, 0.133602f, 1.409489f, 0.324869f, 1.286109f, 0.312484f, 1.347318f, +0.074739f, 0.172542f, 0.002320f, 0.371132f, 0.091550f, 0.410099f, 0.188797f, 0.489972f, 0.060194f, 0.228929f, 0.086303f, 0.597142f, 0.198698f, 0.923574f, 0.354023f, 1.001101f, +0.276639f, 0.497183f, 0.005760f, 1.142443f, 0.532568f, 1.857208f, 0.736776f, 2.370431f, 0.389702f, 1.153812f, 0.374826f, 3.215110f, 0.701086f, 2.536916f, 0.837981f, 2.937619f, +0.321283f, 0.440799f, 0.006728f, 1.032181f, 0.387958f, 1.032813f, 0.539802f, 1.343339f, 0.008253f, 0.018654f, 0.007984f, 0.052970f, 0.628347f, 1.735744f, 0.755356f, 2.048200f, +0.463037f, 0.755356f, 0.007942f, 1.625000f, 0.791992f, 2.506922f, 0.902575f, 2.995648f, 0.450332f, 1.210233f, 0.356805f, 3.157279f, 1.053822f, 3.461273f, 1.037604f, 3.752396f, +0.280786f, 0.537802f, 0.008921f, 1.197120f, 0.474917f, 1.765012f, 1.002536f, 2.182286f, 0.203561f, 0.642304f, 0.298753f, 1.733797f, 0.835450f, 3.221805f, 1.523718f, 3.613981f, +0.482492f, 0.719436f, 0.010284f, 1.710767f, 1.282573f, 3.710801f, 1.816305f, 4.901341f, 0.611816f, 1.502875f, 0.602370f, 4.333752f, 1.368506f, 4.108479f, 1.674386f, 4.923241f, +0.051370f, 0.079146f, 0.001334f, 0.211948f, 0.057246f, 0.171137f, 0.098801f, 0.254563f, 0.000842f, 0.002137f, 0.001010f, 0.006941f, 0.152338f, 0.472559f, 0.227157f, 0.637719f, +0.088653f, 0.162402f, 0.001886f, 0.399558f, 0.139938f, 0.497414f, 0.197817f, 0.679758f, 0.055020f, 0.166043f, 0.054074f, 0.495394f, 0.305936f, 1.128395f, 0.373646f, 1.399008f, +0.002172f, 0.004671f, 0.000086f, 0.011890f, 0.003390f, 0.014146f, 0.008876f, 0.020003f, 0.001005f, 0.003560f, 0.001829f, 0.010989f, 0.009797f, 0.042427f, 0.022164f, 0.054427f, +0.097919f, 0.163957f, 0.002589f, 0.445876f, 0.240212f, 0.780443f, 0.421955f, 1.178895f, 0.079233f, 0.218560f, 0.096764f, 0.720773f, 0.421121f, 1.419722f, 0.639119f, 1.945627f, +0.048214f, 0.071691f, 0.001247f, 0.158188f, 0.066528f, 0.191947f, 0.114344f, 0.235253f, 0.001135f, 0.002780f, 0.001356f, 0.007439f, 0.185594f, 0.555632f, 0.275597f, 0.617824f, +0.104476f, 0.184710f, 0.002214f, 0.374440f, 0.204199f, 0.700505f, 0.287458f, 0.788773f, 0.093112f, 0.271195f, 0.091131f, 0.666678f, 0.467997f, 1.665902f, 0.569201f, 1.701816f, +0.030307f, 0.062911f, 0.001189f, 0.131957f, 0.058576f, 0.235931f, 0.152742f, 0.274877f, 0.020134f, 0.068852f, 0.036502f, 0.175133f, 0.177485f, 0.741787f, 0.399857f, 0.784072f, +0.110635f, 0.178786f, 0.002913f, 0.400610f, 0.336060f, 1.053758f, 0.587871f, 1.311531f, 0.128558f, 0.342246f, 0.156350f, 0.929972f, 0.617625f, 2.009541f, 0.933452f, 2.269120f, +0.005320f, 0.008703f, 0.000121f, 0.021786f, 0.004997f, 0.015860f, 0.007551f, 0.022054f, 0.000137f, 0.000369f, 0.000144f, 0.001120f, 0.018536f, 0.061050f, 0.024200f, 0.077016f, +0.011917f, 0.023179f, 0.000222f, 0.053308f, 0.015855f, 0.059835f, 0.019623f, 0.076438f, 0.011608f, 0.037193f, 0.009988f, 0.103732f, 0.048318f, 0.189215f, 0.051667f, 0.219299f, +0.003694f, 0.008435f, 0.000127f, 0.020072f, 0.004859f, 0.021531f, 0.011140f, 0.028461f, 0.002682f, 0.010089f, 0.004274f, 0.029115f, 0.019578f, 0.090019f, 0.038779f, 0.107951f, +0.016671f, 0.029637f, 0.000386f, 0.075344f, 0.034470f, 0.118904f, 0.053013f, 0.167900f, 0.021172f, 0.062006f, 0.022638f, 0.191153f, 0.084238f, 0.301520f, 0.111932f, 0.386273f, +0.090127f, 0.122316f, 0.002098f, 0.316875f, 0.116886f, 0.307805f, 0.180800f, 0.442923f, 0.002087f, 0.004666f, 0.002245f, 0.014660f, 0.351442f, 0.960317f, 0.469668f, 1.253688f, +0.204432f, 0.329883f, 0.003898f, 0.785145f, 0.375548f, 1.175873f, 0.475788f, 1.554527f, 0.179241f, 0.476486f, 0.157878f, 1.375250f, 0.927655f, 3.013910f, 1.015399f, 3.614856f, +0.082128f, 0.155601f, 0.002901f, 0.383191f, 0.149191f, 0.548465f, 0.350115f, 0.750240f, 0.053676f, 0.167534f, 0.087576f, 0.500320f, 0.487215f, 1.858553f, 0.987850f, 2.306479f, +0.172090f, 0.253824f, 0.004078f, 0.667758f, 0.491313f, 1.406109f, 0.773482f, 2.054727f, 0.196724f, 0.478008f, 0.215321f, 1.524981f, 0.973190f, 2.890061f, 1.323708f, 3.831467f, +0.081725f, 0.237638f, 0.002728f, 0.380746f, 0.071507f, 0.403453f, 0.158587f, 0.359054f, 0.001108f, 0.005306f, 0.001708f, 0.010309f, 0.180165f, 1.054789f, 0.345218f, 0.851639f, +0.145702f, 0.503747f, 0.003983f, 0.741510f, 0.180579f, 1.211426f, 0.328020f, 0.990489f, 0.074764f, 0.425830f, 0.094419f, 0.760121f, 0.373786f, 2.601962f, 0.586621f, 1.930086f, +0.004315f, 0.017515f, 0.000219f, 0.026676f, 0.005288f, 0.041651f, 0.017793f, 0.035237f, 0.001650f, 0.011037f, 0.003861f, 0.020384f, 0.014471f, 0.118274f, 0.042069f, 0.090778f, +0.141032f, 0.445686f, 0.004791f, 0.725155f, 0.271647f, 1.665712f, 0.613171f, 1.505394f, 0.094353f, 0.491208f, 0.148070f, 0.969192f, 0.450898f, 2.868945f, 0.879342f, 2.352314f, +0.101191f, 0.283975f, 0.003364f, 0.374889f, 0.109630f, 0.596970f, 0.242127f, 0.437748f, 0.001969f, 0.009105f, 0.003024f, 0.014576f, 0.289567f, 1.636138f, 0.552540f, 1.088463f, +0.226522f, 0.755846f, 0.006167f, 0.916731f, 0.347623f, 2.250680f, 0.628831f, 1.516250f, 0.166916f, 0.917531f, 0.209923f, 1.349497f, 0.754324f, 5.067717f, 1.178924f, 3.097364f, +0.079442f, 0.311230f, 0.004006f, 0.390574f, 0.120554f, 0.916428f, 0.403950f, 0.638807f, 0.043635f, 0.281624f, 0.101653f, 0.428583f, 0.345851f, 2.728057f, 1.001236f, 1.725231f, +0.210217f, 0.641143f, 0.007112f, 0.859530f, 0.501362f, 2.967034f, 1.126993f, 2.209413f, 0.201961f, 1.014744f, 0.315627f, 1.649699f, 0.872407f, 5.357218f, 1.694305f, 3.619233f, +0.069702f, 0.215191f, 0.002037f, 0.322302f, 0.051402f, 0.307924f, 0.099811f, 0.256172f, 0.001483f, 0.007541f, 0.002002f, 0.013696f, 0.180537f, 1.122211f, 0.302874f, 0.847004f, +0.161298f, 0.592089f, 0.003861f, 0.814728f, 0.168489f, 1.200092f, 0.267965f, 0.917252f, 0.129897f, 0.785525f, 0.143629f, 1.310774f, 0.486167f, 3.593166f, 0.668028f, 2.491575f, +0.060438f, 0.260484f, 0.002680f, 0.370868f, 0.062430f, 0.522090f, 0.183916f, 0.412889f, 0.036281f, 0.257605f, 0.074310f, 0.444771f, 0.238156f, 2.066636f, 0.606165f, 1.482773f, +0.197741f, 0.663470f, 0.005882f, 1.009122f, 0.321016f, 2.089949f, 0.634422f, 1.765659f, 0.207626f, 1.147645f, 0.285279f, 2.116766f, 0.742778f, 5.017836f, 1.268272f, 3.846011f, +0.144018f, 0.368888f, 0.004308f, 0.571761f, 0.146651f, 0.728863f, 0.291492f, 0.627501f, 0.002758f, 0.011636f, 0.003811f, 0.021870f, 0.417481f, 2.153005f, 0.716934f, 1.681653f, +0.337476f, 1.027783f, 0.008269f, 1.463550f, 0.486763f, 2.876474f, 0.792447f, 2.275176f, 0.244640f, 1.227400f, 0.276895f, 2.119506f, 1.138412f, 6.980571f, 1.601233f, 5.009199f, +0.163906f, 0.586090f, 0.007439f, 0.863544f, 0.233780f, 1.622035f, 0.704985f, 1.327482f, 0.088569f, 0.521736f, 0.185691f, 0.932208f, 0.722845f, 5.204118f, 1.883303f, 3.864011f, +0.248959f, 0.693031f, 0.007580f, 1.090827f, 0.558072f, 3.014382f, 1.128982f, 2.635422f, 0.235302f, 1.079074f, 0.330948f, 2.059666f, 1.046622f, 5.866068f, 1.829319f, 4.652879f, +0.008856f, 0.023354f, 0.000281f, 0.055305f, 0.010383f, 0.053124f, 0.021919f, 0.069878f, 0.000120f, 0.000522f, 0.000176f, 0.001499f, 0.020878f, 0.110846f, 0.038081f, 0.132280f, +0.017146f, 0.053757f, 0.000446f, 0.116957f, 0.028471f, 0.173210f, 0.049230f, 0.209319f, 0.008806f, 0.045486f, 0.010587f, 0.120006f, 0.047035f, 0.296917f, 0.070267f, 0.325532f, +0.000445f, 0.001637f, 0.000021f, 0.003685f, 0.000730f, 0.005215f, 0.002338f, 0.006521f, 0.000170f, 0.001032f, 0.000379f, 0.002818f, 0.001595f, 0.011819f, 0.004413f, 0.013407f, +0.018518f, 0.053070f, 0.000599f, 0.127624f, 0.047790f, 0.265748f, 0.102686f, 0.354980f, 0.012401f, 0.058546f, 0.018525f, 0.170736f, 0.063310f, 0.365302f, 0.117529f, 0.442698f, +0.012938f, 0.032928f, 0.000409f, 0.064249f, 0.018781f, 0.092743f, 0.039485f, 0.100516f, 0.000252f, 0.001057f, 0.000368f, 0.002500f, 0.039591f, 0.202866f, 0.071913f, 0.199474f, +0.031451f, 0.095168f, 0.000815f, 0.170602f, 0.064667f, 0.379685f, 0.111353f, 0.378062f, 0.023197f, 0.115636f, 0.027771f, 0.251377f, 0.111993f, 0.682308f, 0.166614f, 0.616373f, +0.009659f, 0.034315f, 0.000464f, 0.063649f, 0.019638f, 0.135380f, 0.062639f, 0.139479f, 0.005310f, 0.031081f, 0.011776f, 0.069909f, 0.044964f, 0.321639f, 0.123911f, 0.300639f, +0.032568f, 0.090076f, 0.001049f, 0.178483f, 0.104068f, 0.558505f, 0.222681f, 0.614702f, 0.031318f, 0.142700f, 0.046591f, 0.342890f, 0.144526f, 0.804827f, 0.267185f, 0.803643f, +0.008693f, 0.024338f, 0.000242f, 0.053878f, 0.008589f, 0.046662f, 0.015876f, 0.057376f, 0.000185f, 0.000854f, 0.000238f, 0.002292f, 0.024077f, 0.135721f, 0.038450f, 0.151406f, +0.021844f, 0.072716f, 0.000498f, 0.147890f, 0.030572f, 0.197474f, 0.046284f, 0.223083f, 0.017608f, 0.096564f, 0.018533f, 0.238159f, 0.070405f, 0.471879f, 0.092088f, 0.483627f, +0.007167f, 0.028014f, 0.000303f, 0.058951f, 0.009920f, 0.075229f, 0.027817f, 0.087934f, 0.004307f, 0.027731f, 0.008397f, 0.070766f, 0.030201f, 0.237664f, 0.073173f, 0.252033f, +0.029881f, 0.090920f, 0.000846f, 0.204393f, 0.064995f, 0.383730f, 0.122272f, 0.479159f, 0.031405f, 0.157420f, 0.041075f, 0.429149f, 0.120025f, 0.735301f, 0.195082f, 0.832994f, +0.023989f, 0.055722f, 0.000683f, 0.127653f, 0.032729f, 0.147513f, 0.061925f, 0.187708f, 0.000460f, 0.001759f, 0.000605f, 0.004888f, 0.074361f, 0.347767f, 0.121557f, 0.401479f, +0.061041f, 0.168583f, 0.001424f, 0.354816f, 0.117963f, 0.632156f, 0.182807f, 0.739030f, 0.044291f, 0.201517f, 0.047720f, 0.514332f, 0.220184f, 1.224373f, 0.294805f, 1.298596f, +0.025961f, 0.084183f, 0.001122f, 0.183327f, 0.049611f, 0.312156f, 0.142412f, 0.377592f, 0.014042f, 0.075011f, 0.028023f, 0.198093f, 0.122427f, 0.799313f, 0.303632f, 0.877184f, +0.050246f, 0.126841f, 0.001456f, 0.295085f, 0.150908f, 0.739193f, 0.290606f, 0.955196f, 0.047535f, 0.197684f, 0.063641f, 0.557701f, 0.225877f, 1.148062f, 0.375807f, 1.345931f, +0.127782f, 0.199026f, 0.002986f, 0.481751f, 0.125782f, 0.380139f, 0.195276f, 0.511099f, 0.001743f, 0.004473f, 0.001882f, 0.013131f, 0.222113f, 0.696541f, 0.297924f, 0.849634f, +0.253458f, 0.469385f, 0.004851f, 1.043829f, 0.353398f, 1.269904f, 0.449372f, 1.568625f, 0.130926f, 0.399438f, 0.115746f, 1.077188f, 0.512685f, 1.911640f, 0.563242f, 2.142285f, +0.006413f, 0.013944f, 0.000227f, 0.032086f, 0.008842f, 0.037306f, 0.020827f, 0.047680f, 0.002469f, 0.008845f, 0.004044f, 0.024682f, 0.016959f, 0.074245f, 0.034512f, 0.086090f, +0.283186f, 0.479359f, 0.006735f, 1.178306f, 0.613645f, 2.015529f, 0.969624f, 2.751912f, 0.190724f, 0.531856f, 0.209521f, 1.585381f, 0.713874f, 2.433002f, 0.974564f, 3.013776f, +0.174874f, 0.262870f, 0.004069f, 0.524276f, 0.213143f, 0.621687f, 0.329529f, 0.688712f, 0.003426f, 0.008485f, 0.003683f, 0.020521f, 0.394568f, 1.194181f, 0.527043f, 1.200216f, +0.435532f, 0.778429f, 0.008301f, 1.426341f, 0.751924f, 2.607695f, 0.952158f, 2.654051f, 0.323075f, 0.951268f, 0.284430f, 2.113730f, 1.143550f, 4.115159f, 1.251101f, 3.799810f, +0.130506f, 0.273866f, 0.004607f, 0.519226f, 0.222802f, 0.907221f, 0.522607f, 0.955387f, 0.072163f, 0.249473f, 0.117681f, 0.573567f, 0.447979f, 1.892776f, 0.907851f, 1.808375f, +0.466543f, 0.762177f, 0.011049f, 1.543681f, 1.251792f, 3.968081f, 1.969753f, 4.464063f, 0.451220f, 1.214377f, 0.493633f, 2.982617f, 1.526621f, 5.021444f, 2.075455f, 5.125087f, +0.122437f, 0.202472f, 0.002505f, 0.458143f, 0.101579f, 0.325944f, 0.138073f, 0.409663f, 0.002622f, 0.007143f, 0.002478f, 0.019599f, 0.250046f, 0.832541f, 0.293647f, 0.949318f, +0.315223f, 0.619803f, 0.005282f, 1.288471f, 0.370440f, 1.413313f, 0.412415f, 1.631953f, 0.255556f, 0.827795f, 0.197805f, 2.086826f, 0.749140f, 2.965734f, 0.720579f, 3.106879f, +0.100919f, 0.232980f, 0.003132f, 0.501134f, 0.117276f, 0.525340f, 0.241850f, 0.627659f, 0.060988f, 0.231947f, 0.087441f, 0.605016f, 0.313553f, 1.457440f, 0.558663f, 1.579780f, +0.446068f, 0.801683f, 0.009288f, 1.842133f, 0.814683f, 2.841020f, 1.127065f, 3.626110f, 0.471502f, 1.396001f, 0.453503f, 3.889971f, 1.321149f, 4.780644f, 1.579118f, 5.535743f, +0.246178f, 0.337756f, 0.005155f, 0.790894f, 0.282016f, 0.750777f, 0.392395f, 0.976506f, 0.004745f, 0.010725f, 0.004590f, 0.030455f, 0.562673f, 1.554324f, 0.676407f, 1.834123f, +0.641797f, 1.046969f, 0.011008f, 2.252348f, 1.041430f, 3.296475f, 1.186840f, 3.939125f, 0.468359f, 1.258679f, 0.371088f, 3.283665f, 1.707037f, 5.606756f, 1.680767f, 6.078332f, +0.266331f, 0.510116f, 0.008462f, 1.135492f, 0.427357f, 1.588259f, 0.902139f, 1.963745f, 0.144879f, 0.457142f, 0.212629f, 1.233982f, 0.926106f, 3.571408f, 1.689060f, 4.006140f, +0.546510f, 0.814892f, 0.011648f, 1.937755f, 1.378216f, 3.987519f, 1.951749f, 5.266839f, 0.519987f, 1.277306f, 0.511959f, 3.683292f, 1.811542f, 5.438547f, 2.216448f, 6.517078f, +0.065496f, 0.100910f, 0.001701f, 0.270230f, 0.082747f, 0.247371f, 0.142812f, 0.367959f, 0.001334f, 0.003385f, 0.001600f, 0.010992f, 0.225013f, 0.697999f, 0.335525f, 0.941950f, +0.106026f, 0.194227f, 0.002256f, 0.477858f, 0.189738f, 0.674427f, 0.268214f, 0.921661f, 0.081733f, 0.246658f, 0.080327f, 0.735912f, 0.423879f, 1.563407f, 0.517692f, 1.938345f, +0.002180f, 0.004688f, 0.000086f, 0.011934f, 0.003857f, 0.016097f, 0.010100f, 0.022762f, 0.001252f, 0.004438f, 0.002280f, 0.013700f, 0.011392f, 0.049334f, 0.025773f, 0.063288f, +0.108376f, 0.181466f, 0.002865f, 0.493493f, 0.301411f, 0.979280f, 0.529458f, 1.479246f, 0.108926f, 0.300465f, 0.133027f, 0.990882f, 0.539965f, 1.820380f, 0.819483f, 2.494700f, +0.076562f, 0.113843f, 0.001981f, 0.251196f, 0.119769f, 0.345557f, 0.205851f, 0.423520f, 0.002239f, 0.005484f, 0.002675f, 0.014673f, 0.341425f, 1.022161f, 0.506998f, 1.136571f, +0.155621f, 0.275133f, 0.003297f, 0.557743f, 0.344830f, 1.182939f, 0.485428f, 1.331996f, 0.172273f, 0.501754f, 0.168606f, 1.233461f, 0.807583f, 2.874709f, 0.982223f, 2.936684f, +0.037887f, 0.078646f, 0.001487f, 0.164961f, 0.083017f, 0.334375f, 0.216474f, 0.389572f, 0.031264f, 0.106912f, 0.056678f, 0.271941f, 0.257042f, 1.074289f, 0.579091f, 1.135529f, +0.152508f, 0.246452f, 0.004015f, 0.552232f, 0.525190f, 1.646796f, 0.918716f, 2.049640f, 0.220118f, 0.585997f, 0.267705f, 1.592308f, 0.986318f, 3.209146f, 1.490680f, 3.623682f, +0.007560f, 0.012367f, 0.000172f, 0.030960f, 0.008051f, 0.025553f, 0.012165f, 0.035531f, 0.000242f, 0.000651f, 0.000254f, 0.001976f, 0.030517f, 0.100508f, 0.039841f, 0.126793f, +0.015886f, 0.030898f, 0.000296f, 0.071061f, 0.023961f, 0.090426f, 0.029655f, 0.115518f, 0.019220f, 0.061583f, 0.016538f, 0.171755f, 0.074618f, 0.292204f, 0.079790f, 0.338662f, +0.004132f, 0.009436f, 0.000143f, 0.022456f, 0.006163f, 0.027309f, 0.014129f, 0.036098f, 0.003727f, 0.014020f, 0.005940f, 0.040458f, 0.025375f, 0.116670f, 0.050261f, 0.139911f, +0.020566f, 0.036562f, 0.000476f, 0.092946f, 0.048208f, 0.166296f, 0.074142f, 0.234820f, 0.032441f, 0.095011f, 0.034688f, 0.292903f, 0.120388f, 0.430918f, 0.159968f, 0.552042f, +0.121320f, 0.164649f, 0.002824f, 0.426543f, 0.178377f, 0.469733f, 0.275914f, 0.675932f, 0.003490f, 0.007802f, 0.003753f, 0.024511f, 0.548052f, 1.497556f, 0.732419f, 1.955049f, +0.258129f, 0.416532f, 0.004922f, 0.991375f, 0.537592f, 1.683245f, 0.681083f, 2.225283f, 0.281114f, 0.747300f, 0.247609f, 2.156884f, 1.356960f, 4.408703f, 1.485310f, 5.287758f, +0.087031f, 0.164891f, 0.003074f, 0.406070f, 0.179237f, 0.658921f, 0.420625f, 0.901332f, 0.070652f, 0.220519f, 0.115273f, 0.658553f, 0.598135f, 2.281671f, 1.212743f, 2.831572f, +0.201090f, 0.296598f, 0.004765f, 0.780288f, 0.650868f, 1.862747f, 1.024673f, 2.722006f, 0.285529f, 0.693790f, 0.312521f, 2.213388f, 1.317426f, 3.912331f, 1.791928f, 5.186730f, +0.171525f, 0.498758f, 0.005725f, 0.799113f, 0.170145f, 0.959987f, 0.377345f, 0.854343f, 0.002888f, 0.013832f, 0.004452f, 0.026875f, 0.438063f, 2.564671f, 0.839380f, 2.070720f, +0.286848f, 0.991741f, 0.007842f, 1.459831f, 0.403044f, 2.703844f, 0.732124f, 2.210724f, 0.182824f, 1.041307f, 0.230888f, 1.858769f, 0.852513f, 5.934431f, 1.337937f, 4.402048f, +0.007129f, 0.028940f, 0.000361f, 0.044077f, 0.009905f, 0.078021f, 0.033329f, 0.066005f, 0.003387f, 0.022650f, 0.007923f, 0.041835f, 0.027700f, 0.226395f, 0.080525f, 0.173762f, +0.256951f, 0.812012f, 0.008729f, 1.321187f, 0.561097f, 3.440588f, 1.266528f, 3.109445f, 0.213523f, 1.111621f, 0.335087f, 2.193314f, 0.951710f, 6.055478f, 1.856025f, 4.965025f, +0.264513f, 0.742313f, 0.008793f, 0.979963f, 0.324890f, 1.769126f, 0.717545f, 1.297269f, 0.006395f, 0.029563f, 0.009819f, 0.047327f, 0.876896f, 4.954725f, 1.673260f, 3.296199f, +0.555432f, 1.853330f, 0.015122f, 2.247819f, 0.966333f, 6.256512f, 1.748044f, 4.214921f, 0.508364f, 2.794457f, 0.639347f, 4.110062f, 2.142742f, 14.395420f, 3.348867f, 8.798411f, +0.163480f, 0.640469f, 0.008245f, 0.803749f, 0.281254f, 2.138033f, 0.942419f, 1.490340f, 0.111535f, 0.719853f, 0.259832f, 1.095491f, 0.824514f, 6.503736f, 2.386965f, 4.112981f, +0.477018f, 1.454864f, 0.016139f, 1.950421f, 1.289786f, 7.632889f, 2.899264f, 5.683860f, 0.569237f, 2.860098f, 0.889609f, 4.649746f, 2.293396f, 14.083130f, 4.454013f, 9.514291f, +0.163057f, 0.503403f, 0.004765f, 0.753973f, 0.136325f, 0.816648f, 0.264709f, 0.679396f, 0.004308f, 0.021911f, 0.005816f, 0.039796f, 0.489273f, 3.041305f, 0.820819f, 2.295465f, +0.353942f, 1.299248f, 0.008472f, 1.787794f, 0.419156f, 2.985512f, 0.666626f, 2.281880f, 0.354048f, 2.141027f, 0.391476f, 3.572646f, 1.235899f, 9.134291f, 1.698212f, 6.333906f, +0.111305f, 0.479715f, 0.004935f, 0.683003f, 0.130345f, 1.090051f, 0.383990f, 0.862054f, 0.082994f, 0.589270f, 0.169984f, 1.017411f, 0.508109f, 4.409193f, 1.293260f, 3.163514f, +0.401558f, 1.347330f, 0.011944f, 2.049259f, 0.739059f, 4.811579f, 1.460596f, 4.064985f, 0.523712f, 2.894796f, 0.719581f, 5.339285f, 1.747448f, 11.804890f, 2.983719f, 9.048072f, +0.319125f, 0.817405f, 0.009547f, 1.266945f, 0.368407f, 1.830998f, 0.732265f, 1.576364f, 0.007590f, 0.032025f, 0.010488f, 0.060195f, 1.071697f, 5.526889f, 1.840410f, 4.316900f, +0.701452f, 2.136273f, 0.017187f, 3.042026f, 1.147023f, 6.778208f, 1.867347f, 5.361292f, 0.631596f, 3.168830f, 0.714872f, 5.472015f, 2.741242f, 16.808890f, 3.855693f, 12.061910f, +0.285922f, 1.022392f, 0.012977f, 1.506390f, 0.462338f, 3.207837f, 1.394222f, 2.625310f, 0.191907f, 1.130475f, 0.402346f, 2.019868f, 1.460800f, 10.517020f, 3.805973f, 7.808792f, +0.478885f, 1.333079f, 0.014581f, 2.098260f, 1.217005f, 6.573562f, 2.462009f, 5.747151f, 0.562194f, 2.578171f, 0.790715f, 4.921046f, 2.332307f, 13.072030f, 4.076480f, 10.368540f, +0.068008f, 0.179333f, 0.002161f, 0.424679f, 0.090386f, 0.462472f, 0.190817f, 0.608325f, 0.001146f, 0.004978f, 0.001682f, 0.014296f, 0.185729f, 0.986077f, 0.338762f, 1.176748f, +0.123499f, 0.387210f, 0.003214f, 0.842431f, 0.232496f, 1.414429f, 0.402014f, 1.709295f, 0.078788f, 0.406949f, 0.094715f, 1.073668f, 0.392485f, 2.477635f, 0.586342f, 2.716415f, +0.002688f, 0.009894f, 0.000130f, 0.022274f, 0.005004f, 0.035740f, 0.016026f, 0.044690f, 0.001278f, 0.007751f, 0.002846f, 0.021161f, 0.011167f, 0.082770f, 0.030903f, 0.093895f, +0.123441f, 0.353758f, 0.003992f, 0.850729f, 0.361157f, 2.008296f, 0.776009f, 2.682631f, 0.102675f, 0.484745f, 0.153381f, 1.413646f, 0.488902f, 2.820993f, 0.907601f, 3.418677f, +0.123741f, 0.314913f, 0.003915f, 0.614463f, 0.203636f, 1.005572f, 0.428115f, 1.089852f, 0.002994f, 0.012553f, 0.004377f, 0.029704f, 0.438657f, 2.247671f, 0.796771f, 2.210088f, +0.282148f, 0.853759f, 0.007312f, 1.530478f, 0.657694f, 3.861588f, 1.132513f, 3.845086f, 0.258484f, 1.288525f, 0.309449f, 2.801089f, 1.163927f, 7.091150f, 1.731599f, 6.405891f, +0.072721f, 0.258361f, 0.003491f, 0.479218f, 0.167627f, 1.155567f, 0.534665f, 1.190554f, 0.049661f, 0.290661f, 0.110127f, 0.653784f, 0.392194f, 2.805447f, 1.080793f, 2.622279f, +0.270381f, 0.747826f, 0.008708f, 1.481800f, 0.979513f, 5.256758f, 2.095916f, 5.785693f, 0.322959f, 1.471539f, 0.480449f, 3.535925f, 1.390049f, 7.740819f, 2.569782f, 7.729423f, +0.074403f, 0.208307f, 0.002070f, 0.461134f, 0.083345f, 0.452767f, 0.154052f, 0.556732f, 0.001968f, 0.009075f, 0.002529f, 0.024363f, 0.238733f, 1.345731f, 0.381244f, 1.501246f, +0.175373f, 0.583794f, 0.003996f, 1.187322f, 0.278265f, 1.797371f, 0.421268f, 2.030461f, 0.175593f, 0.962948f, 0.184817f, 2.374946f, 0.654823f, 4.388866f, 0.856499f, 4.498131f, +0.048294f, 0.188755f, 0.002038f, 0.397210f, 0.075774f, 0.574662f, 0.212492f, 0.671712f, 0.036044f, 0.232082f, 0.070274f, 0.592252f, 0.235746f, 1.855169f, 0.571173f, 1.967329f, +0.222012f, 0.675518f, 0.006286f, 1.518599f, 0.547465f, 3.232227f, 1.029915f, 4.036046f, 0.289822f, 1.452760f, 0.379064f, 3.960428f, 1.033096f, 6.328996f, 1.679144f, 7.169880f, +0.194484f, 0.451747f, 0.005538f, 1.034901f, 0.300816f, 1.355804f, 0.569160f, 1.725236f, 0.004630f, 0.017716f, 0.006090f, 0.049217f, 0.698398f, 3.266243f, 1.141667f, 3.770707f, +0.464193f, 1.282018f, 0.010827f, 2.698257f, 1.017007f, 5.450084f, 1.576052f, 6.371486f, 0.418363f, 1.903483f, 0.450750f, 4.858256f, 1.939803f, 10.786630f, 2.597207f, 11.440530f, +0.165690f, 0.537281f, 0.007159f, 1.170050f, 0.358970f, 2.258642f, 1.030444f, 2.732114f, 0.111314f, 0.594645f, 0.222154f, 1.570374f, 0.905207f, 5.909986f, 2.245002f, 6.485756f, +0.353612f, 0.892665f, 0.010249f, 2.076706f, 1.204036f, 5.897723f, 2.318625f, 7.621128f, 0.415523f, 1.728053f, 0.556317f, 4.875126f, 1.841584f, 9.360204f, 3.063972f, 10.973440f, +0.228469f, 0.355850f, 0.005338f, 0.861353f, 0.254962f, 0.770548f, 0.395827f, 1.036006f, 0.003872f, 0.009935f, 0.004179f, 0.029162f, 0.460071f, 1.442771f, 0.617102f, 1.759878f, +0.425086f, 0.787227f, 0.008135f, 1.750652f, 0.671945f, 2.414572f, 0.854429f, 2.982555f, 0.272743f, 0.832102f, 0.241119f, 2.243980f, 0.996125f, 3.714232f, 1.094355f, 4.162365f, +0.009027f, 0.019628f, 0.000320f, 0.045163f, 0.014110f, 0.059531f, 0.033234f, 0.076086f, 0.004317f, 0.015465f, 0.007070f, 0.043152f, 0.027654f, 0.121068f, 0.056276f, 0.140382f, +0.439532f, 0.744011f, 0.010453f, 1.828843f, 1.079777f, 3.546552f, 1.706164f, 4.842304f, 0.367689f, 1.025344f, 0.403928f, 3.056394f, 1.283608f, 4.374752f, 1.752352f, 5.419034f, +0.389419f, 0.585374f, 0.009062f, 1.167486f, 0.538099f, 1.569504f, 0.831927f, 1.738717f, 0.009477f, 0.023469f, 0.010187f, 0.056761f, 1.017902f, 3.080736f, 1.359661f, 3.096305f, +0.909755f, 1.626011f, 0.017339f, 2.979391f, 1.780645f, 6.175327f, 2.254823f, 6.285104f, 0.838234f, 2.468109f, 0.737966f, 5.484169f, 2.767272f, 9.958257f, 3.027535f, 9.195146f, +0.228787f, 0.480110f, 0.008077f, 0.910245f, 0.442814f, 1.803075f, 1.038666f, 1.898804f, 0.157135f, 0.543229f, 0.256250f, 1.248944f, 0.909812f, 3.844090f, 1.843780f, 3.672677f, +0.901869f, 1.473358f, 0.021359f, 2.984075f, 2.743363f, 8.696243f, 4.316809f, 9.783213f, 1.083423f, 2.915835f, 1.185260f, 7.161545f, 3.418819f, 11.245360f, 4.647912f, 11.477460f, +0.244000f, 0.403500f, 0.004992f, 0.913016f, 0.229500f, 0.736410f, 0.311951f, 0.925556f, 0.006490f, 0.017680f, 0.006133f, 0.048513f, 0.577284f, 1.922098f, 0.677946f, 2.191703f, +0.589260f, 1.158626f, 0.009874f, 2.408598f, 0.785068f, 2.995209f, 0.874024f, 3.458570f, 0.593380f, 1.922073f, 0.459288f, 4.845439f, 1.622354f, 6.422654f, 1.560501f, 6.728320f, +0.158329f, 0.365516f, 0.004914f, 0.786215f, 0.208591f, 0.934387f, 0.430162f, 1.116375f, 0.118847f, 0.451995f, 0.170396f, 1.178993f, 0.569890f, 2.648930f, 1.015382f, 2.871286f, +0.771682f, 1.386885f, 0.016068f, 3.186829f, 1.597811f, 5.572000f, 2.210477f, 7.111771f, 1.013160f, 2.999720f, 0.974484f, 8.358749f, 2.647781f, 9.581130f, 3.164791f, 11.094460f, +0.464705f, 0.637574f, 0.009732f, 1.492953f, 0.603533f, 1.606712f, 0.839751f, 2.089787f, 0.011126f, 0.025147f, 0.010763f, 0.071407f, 1.230485f, 3.399085f, 1.479205f, 4.010964f, +1.136418f, 1.853848f, 0.019492f, 3.988188f, 2.090592f, 6.617425f, 2.382492f, 7.907496f, 1.030092f, 2.768295f, 0.816158f, 7.221979f, 3.501674f, 11.501230f, 3.447785f, 12.468580f, +0.395786f, 0.758065f, 0.012575f, 1.687414f, 0.719993f, 2.675827f, 1.519883f, 3.308429f, 0.267423f, 0.843812f, 0.392480f, 2.277738f, 1.594376f, 6.148508f, 2.907872f, 6.896939f, +0.895543f, 1.335328f, 0.019088f, 3.175316f, 2.560380f, 7.407813f, 3.625861f, 9.784469f, 1.058371f, 2.599802f, 1.042031f, 7.496895f, 3.438976f, 10.324370f, 4.207636f, 12.371820f, +0.027051f, 0.041678f, 0.000703f, 0.111610f, 0.044949f, 0.134375f, 0.077577f, 0.199880f, 0.000600f, 0.001524f, 0.000720f, 0.004948f, 0.106251f, 0.329596f, 0.158435f, 0.444790f, +0.072011f, 0.131915f, 0.001532f, 0.324550f, 0.169487f, 0.602446f, 0.239588f, 0.823293f, 0.060505f, 0.182594f, 0.059464f, 0.544775f, 0.329141f, 1.213983f, 0.401987f, 1.505122f, +0.001132f, 0.002435f, 0.000045f, 0.006198f, 0.002635f, 0.010995f, 0.006899f, 0.015547f, 0.000709f, 0.002512f, 0.001291f, 0.007755f, 0.006764f, 0.029293f, 0.015303f, 0.037578f, +0.059036f, 0.098851f, 0.001561f, 0.268823f, 0.215946f, 0.701605f, 0.379330f, 1.059806f, 0.064673f, 0.178397f, 0.078983f, 0.588324f, 0.336286f, 1.133719f, 0.510368f, 1.553682f, +0.033782f, 0.050231f, 0.000874f, 0.110836f, 0.069504f, 0.200533f, 0.119459f, 0.245777f, 0.001077f, 0.002637f, 0.001286f, 0.007057f, 0.172235f, 0.515637f, 0.255759f, 0.573352f, +0.112914f, 0.199628f, 0.002392f, 0.404683f, 0.329068f, 1.128867f, 0.463240f, 1.271110f, 0.136240f, 0.396807f, 0.133341f, 0.975471f, 0.669924f, 2.384690f, 0.814794f, 2.436100f, +0.021020f, 0.043634f, 0.000825f, 0.091523f, 0.060578f, 0.243996f, 0.157963f, 0.284274f, 0.018906f, 0.064652f, 0.034275f, 0.164450f, 0.163047f, 0.681442f, 0.367329f, 0.720288f, +0.088752f, 0.143422f, 0.002337f, 0.321370f, 0.401975f, 1.260442f, 0.703177f, 1.568775f, 0.139620f, 0.371695f, 0.169804f, 1.009994f, 0.656233f, 2.135159f, 0.991803f, 2.410965f, +0.003215f, 0.005258f, 0.000073f, 0.013164f, 0.004502f, 0.014289f, 0.006803f, 0.019869f, 0.000112f, 0.000302f, 0.000118f, 0.000916f, 0.014834f, 0.048858f, 0.019367f, 0.061635f, +0.011107f, 0.021603f, 0.000207f, 0.049684f, 0.022033f, 0.083153f, 0.027270f, 0.106227f, 0.014647f, 0.046930f, 0.012603f, 0.130889f, 0.059647f, 0.233577f, 0.063781f, 0.270713f, +0.002209f, 0.005045f, 0.000076f, 0.012006f, 0.004334f, 0.019203f, 0.009935f, 0.025382f, 0.002172f, 0.008170f, 0.003461f, 0.023576f, 0.015510f, 0.071314f, 0.030721f, 0.085520f, +0.011533f, 0.020503f, 0.000267f, 0.052122f, 0.035556f, 0.122650f, 0.054683f, 0.173190f, 0.019829f, 0.058073f, 0.021202f, 0.179028f, 0.077185f, 0.276274f, 0.102560f, 0.353930f, +0.053006f, 0.071937f, 0.001234f, 0.186360f, 0.102501f, 0.269924f, 0.158549f, 0.388412f, 0.001662f, 0.003716f, 0.001787f, 0.011673f, 0.273760f, 0.748050f, 0.365854f, 0.976575f, +0.185456f, 0.299262f, 0.003536f, 0.712265f, 0.507991f, 1.590563f, 0.643582f, 2.102756f, 0.220138f, 0.585203f, 0.193901f, 1.689035f, 1.114624f, 3.621362f, 1.220052f, 4.343428f, +0.047813f, 0.090588f, 0.001689f, 0.223087f, 0.129509f, 0.476109f, 0.303927f, 0.651266f, 0.042306f, 0.132047f, 0.069025f, 0.394342f, 0.375690f, 1.433125f, 0.761728f, 1.778519f, +0.115877f, 0.170913f, 0.002746f, 0.449636f, 0.493287f, 1.411758f, 0.776590f, 2.062982f, 0.179335f, 0.435756f, 0.196288f, 1.390184f, 0.867941f, 2.577505f, 1.180551f, 3.417099f, +0.059203f, 0.172150f, 0.001976f, 0.275819f, 0.077239f, 0.435794f, 0.171299f, 0.387836f, 0.001086f, 0.005204f, 0.001675f, 0.010110f, 0.172866f, 1.012057f, 0.331232f, 0.817137f, +0.162810f, 0.562895f, 0.004451f, 0.828576f, 0.300872f, 2.018416f, 0.546530f, 1.650303f, 0.113102f, 0.644193f, 0.142836f, 1.149908f, 0.553207f, 3.850932f, 0.868205f, 2.856548f, +0.003094f, 0.012560f, 0.000157f, 0.019130f, 0.005654f, 0.044536f, 0.019025f, 0.037677f, 0.001602f, 0.010715f, 0.003748f, 0.019790f, 0.013745f, 0.112337f, 0.039957f, 0.086221f, +0.116972f, 0.369653f, 0.003974f, 0.601446f, 0.335946f, 2.059988f, 0.758310f, 1.861722f, 0.105946f, 0.551566f, 0.166264f, 1.088282f, 0.495329f, 3.151651f, 0.965992f, 2.584111f, +0.097535f, 0.273716f, 0.003242f, 0.361346f, 0.157561f, 0.857970f, 0.347986f, 0.629134f, 0.002570f, 0.011881f, 0.003946f, 0.019021f, 0.369674f, 2.088768f, 0.705398f, 1.389582f, +0.336789f, 1.123775f, 0.009169f, 1.362975f, 0.770643f, 4.989518f, 1.394051f, 3.361365f, 0.335977f, 1.846852f, 0.422544f, 2.716334f, 1.485436f, 9.979487f, 2.321570f, 6.099414f, +0.075799f, 0.296958f, 0.003823f, 0.372664f, 0.171512f, 1.303799f, 0.574699f, 0.908828f, 0.056366f, 0.363788f, 0.131310f, 0.553622f, 0.437071f, 3.447598f, 1.265318f, 2.180271f, +0.231987f, 0.707542f, 0.007849f, 0.948545f, 0.824986f, 4.882228f, 1.854458f, 3.635570f, 0.301738f, 1.516068f, 0.471560f, 2.464716f, 1.275163f, 7.830437f, 2.476500f, 5.290092f, +0.057937f, 0.178869f, 0.001693f, 0.267901f, 0.063708f, 0.381640f, 0.123705f, 0.317498f, 0.001668f, 0.008486f, 0.002252f, 0.015412f, 0.198759f, 1.235480f, 0.333444f, 0.932495f, +0.206807f, 0.759143f, 0.004950f, 1.044598f, 0.322112f, 2.294300f, 0.512288f, 1.753574f, 0.225477f, 1.363522f, 0.249313f, 2.275254f, 0.825604f, 6.101882f, 1.134438f, 4.231171f, +0.049730f, 0.214331f, 0.002205f, 0.305158f, 0.076594f, 0.640542f, 0.225643f, 0.506566f, 0.040416f, 0.286962f, 0.082778f, 0.495457f, 0.259547f, 2.252257f, 0.660609f, 1.615953f, +0.188184f, 0.631406f, 0.005598f, 0.960354f, 0.455526f, 2.965664f, 0.900253f, 2.505493f, 0.267507f, 1.478634f, 0.367555f, 2.727256f, 0.936259f, 6.324903f, 1.598637f, 4.847836f, +0.116519f, 0.298452f, 0.003486f, 0.462588f, 0.176915f, 0.879274f, 0.351645f, 0.756995f, 0.003020f, 0.012745f, 0.004174f, 0.023955f, 0.447369f, 2.307143f, 0.768260f, 1.802046f, +0.421160f, 1.282645f, 0.010320f, 1.826471f, 0.905778f, 5.352594f, 1.474600f, 4.233688f, 0.413330f, 2.073752f, 0.467828f, 3.581008f, 1.881717f, 11.538410f, 2.646729f, 8.279866f, +0.131271f, 0.469394f, 0.005958f, 0.691603f, 0.279176f, 1.937009f, 0.841882f, 1.585258f, 0.096033f, 0.565703f, 0.201339f, 1.010767f, 0.766775f, 5.520387f, 1.997757f, 4.098838f, +0.230613f, 0.641961f, 0.007022f, 1.010443f, 0.770805f, 4.163448f, 1.559344f, 3.640030f, 0.295085f, 1.353233f, 0.415031f, 2.582964f, 1.284091f, 7.197025f, 2.244374f, 5.708574f, +0.017297f, 0.045611f, 0.000550f, 0.108012f, 0.030235f, 0.154702f, 0.063830f, 0.203491f, 0.000318f, 0.001380f, 0.000466f, 0.003963f, 0.054007f, 0.286733f, 0.098506f, 0.342177f, +0.051652f, 0.161946f, 0.001344f, 0.352337f, 0.127891f, 0.778045f, 0.221139f, 0.940244f, 0.035916f, 0.185512f, 0.043177f, 0.489443f, 0.187674f, 1.184727f, 0.280370f, 1.298904f, +0.000860f, 0.003164f, 0.000041f, 0.007123f, 0.002105f, 0.015033f, 0.006741f, 0.018798f, 0.000446f, 0.002702f, 0.000992f, 0.007376f, 0.004083f, 0.030264f, 0.011299f, 0.034332f, +0.041408f, 0.118668f, 0.001339f, 0.285376f, 0.159339f, 0.886041f, 0.342368f, 1.183551f, 0.037540f, 0.177234f, 0.056080f, 0.516863f, 0.187502f, 1.081897f, 0.348080f, 1.311118f, +0.033622f, 0.085565f, 0.001064f, 0.166957f, 0.072771f, 0.359352f, 0.152992f, 0.389470f, 0.000887f, 0.003718f, 0.001296f, 0.008797f, 0.136267f, 0.698228f, 0.247513f, 0.686554f, +0.126066f, 0.381466f, 0.003267f, 0.683830f, 0.386495f, 2.269271f, 0.665524f, 2.259574f, 0.125882f, 0.627512f, 0.150702f, 1.364130f, 0.594571f, 3.622386f, 0.884556f, 3.272334f, +0.024846f, 0.088271f, 0.001193f, 0.163728f, 0.075324f, 0.519261f, 0.240255f, 0.534982f, 0.018493f, 0.108239f, 0.041010f, 0.243463f, 0.153196f, 1.095847f, 0.422173f, 1.024299f, +0.096895f, 0.267993f, 0.003121f, 0.531023f, 0.461672f, 2.477658f, 0.987864f, 2.726960f, 0.126148f, 0.574783f, 0.187663f, 1.381131f, 0.569523f, 3.171523f, 1.052876f, 3.166854f, +0.019481f, 0.054540f, 0.000542f, 0.120737f, 0.028701f, 0.155915f, 0.053049f, 0.191716f, 0.000562f, 0.002590f, 0.000722f, 0.006953f, 0.071463f, 0.402836f, 0.114123f, 0.449389f, +0.075507f, 0.251354f, 0.001720f, 0.511204f, 0.157574f, 1.017802f, 0.238553f, 1.149794f, 0.082403f, 0.451894f, 0.086732f, 1.114520f, 0.322335f, 2.160407f, 0.421609f, 2.214192f, +0.015900f, 0.062143f, 0.000671f, 0.130772f, 0.032811f, 0.248833f, 0.092011f, 0.290856f, 0.012934f, 0.083281f, 0.025217f, 0.212525f, 0.088736f, 0.698291f, 0.214991f, 0.740509f, +0.076666f, 0.233274f, 0.002171f, 0.524411f, 0.248648f, 1.468014f, 0.467767f, 1.833093f, 0.109086f, 0.546803f, 0.142676f, 1.490662f, 0.407875f, 2.498739f, 0.662940f, 2.830727f, +0.052326f, 0.121542f, 0.001490f, 0.278439f, 0.106446f, 0.479764f, 0.201403f, 0.610491f, 0.001358f, 0.005195f, 0.001786f, 0.014433f, 0.214828f, 1.004700f, 0.351178f, 1.159874f, +0.205373f, 0.567203f, 0.004790f, 1.193788f, 0.591790f, 3.171369f, 0.917095f, 3.707527f, 0.201746f, 0.917912f, 0.217364f, 2.342786f, 0.981203f, 5.456160f, 1.313735f, 5.786919f, +0.056054f, 0.181767f, 0.002422f, 0.395839f, 0.159724f, 1.004988f, 0.458498f, 1.215660f, 0.041046f, 0.219270f, 0.081917f, 0.579062f, 0.350122f, 2.285902f, 0.868336f, 2.508602f, +0.125480f, 0.316764f, 0.003637f, 0.736923f, 0.561935f, 2.752523f, 1.082124f, 3.556852f, 0.160713f, 0.668363f, 0.215168f, 1.885563f, 0.747129f, 3.797427f, 1.243051f, 4.451917f, +0.091829f, 0.143027f, 0.002146f, 0.346204f, 0.134780f, 0.407334f, 0.209246f, 0.547662f, 0.001696f, 0.004352f, 0.001831f, 0.012775f, 0.211414f, 0.662988f, 0.283573f, 0.808706f, +0.280958f, 0.520312f, 0.005377f, 1.157081f, 0.584114f, 2.098959f, 0.742745f, 2.592701f, 0.196483f, 0.599445f, 0.173702f, 1.616558f, 0.752723f, 2.806664f, 0.826951f, 3.145296f, +0.004562f, 0.009920f, 0.000162f, 0.022825f, 0.009379f, 0.039571f, 0.022091f, 0.050575f, 0.002378f, 0.008519f, 0.003894f, 0.023771f, 0.015979f, 0.069955f, 0.032518f, 0.081116f, +0.233001f, 0.394408f, 0.005541f, 0.969490f, 0.752837f, 2.472709f, 1.189563f, 3.376127f, 0.212450f, 0.592441f, 0.233388f, 1.765975f, 0.777959f, 2.651415f, 1.062052f, 3.284326f, +0.167211f, 0.251352f, 0.003891f, 0.501302f, 0.303886f, 0.886360f, 0.469822f, 0.981921f, 0.004435f, 0.010984f, 0.004767f, 0.026565f, 0.499702f, 1.512376f, 0.667477f, 1.520019f, +0.642371f, 1.148113f, 0.012243f, 2.103725f, 1.653630f, 5.734835f, 2.093984f, 5.836782f, 0.645111f, 1.899476f, 0.567944f, 4.220659f, 2.233935f, 8.038998f, 2.444037f, 7.422961f, +0.123527f, 0.259222f, 0.004361f, 0.491462f, 0.314450f, 1.280397f, 0.737576f, 1.348376f, 0.092472f, 0.319685f, 0.150801f, 0.734992f, 0.561617f, 2.372914f, 1.138145f, 2.267102f, +0.510749f, 0.834396f, 0.012096f, 1.689950f, 2.043371f, 6.477323f, 3.215339f, 7.286943f, 0.668760f, 1.799846f, 0.731621f, 4.420579f, 2.213592f, 7.281063f, 3.009396f, 7.431344f, +0.100959f, 0.166954f, 0.002065f, 0.377773f, 0.124892f, 0.400749f, 0.169761f, 0.503681f, 0.002927f, 0.007973f, 0.002766f, 0.021879f, 0.273086f, 0.909255f, 0.320705f, 1.036793f, +0.400935f, 0.788333f, 0.006718f, 1.638818f, 0.702543f, 2.680360f, 0.782149f, 3.095014f, 0.440056f, 1.425425f, 0.340612f, 3.593418f, 1.262029f, 4.996181f, 1.213913f, 5.233959f, +0.082375f, 0.190170f, 0.002557f, 0.409051f, 0.142736f, 0.639385f, 0.294353f, 0.763916f, 0.067396f, 0.256318f, 0.096628f, 0.668583f, 0.338988f, 1.575665f, 0.603981f, 1.707930f, +0.421122f, 0.756850f, 0.008769f, 1.739114f, 1.146817f, 3.999264f, 1.586554f, 5.104424f, 0.602637f, 1.784260f, 0.579632f, 4.971859f, 1.651995f, 5.977826f, 1.974566f, 6.922019f, +0.197583f, 0.271083f, 0.004138f, 0.634772f, 0.337499f, 0.898481f, 0.469593f, 1.168619f, 0.005156f, 0.011654f, 0.004988f, 0.033092f, 0.598143f, 1.652306f, 0.719046f, 1.949743f, +0.794552f, 1.296160f, 0.013628f, 2.788432f, 1.922443f, 6.085177f, 2.190865f, 7.271485f, 0.784998f, 2.109624f, 0.621966f, 5.503626f, 2.799094f, 9.193611f, 2.756018f, 9.966871f, +0.211599f, 0.405285f, 0.006723f, 0.902144f, 0.506270f, 1.881534f, 1.068720f, 2.326354f, 0.155834f, 0.491709f, 0.228707f, 1.327291f, 0.974546f, 3.758213f, 1.777407f, 4.215683f, +0.502197f, 0.748817f, 0.010704f, 1.780633f, 1.888389f, 5.463577f, 2.674227f, 7.216462f, 0.646895f, 1.589044f, 0.636907f, 4.582231f, 2.204824f, 6.619242f, 2.697633f, 7.931919f, +0.101998f, 0.157147f, 0.002650f, 0.420830f, 0.119811f, 0.358175f, 0.206782f, 0.532778f, 0.002228f, 0.005656f, 0.002674f, 0.018367f, 0.258817f, 0.802862f, 0.385932f, 1.083463f, +0.186183f, 0.341066f, 0.003961f, 0.839124f, 0.309781f, 1.101124f, 0.437908f, 1.504779f, 0.153994f, 0.464731f, 0.151345f, 1.386539f, 0.549772f, 2.027744f, 0.671448f, 2.514040f, +0.004030f, 0.008667f, 0.000159f, 0.022064f, 0.006630f, 0.027671f, 0.017361f, 0.039127f, 0.002485f, 0.008804f, 0.004523f, 0.027177f, 0.015557f, 0.067369f, 0.035194f, 0.086424f, +0.156116f, 0.261403f, 0.004127f, 0.710878f, 0.403690f, 1.311582f, 0.709121f, 1.981203f, 0.168354f, 0.464395f, 0.205604f, 1.531494f, 0.574506f, 1.936825f, 0.871904f, 2.654280f, +0.118357f, 0.175989f, 0.003062f, 0.388321f, 0.172146f, 0.496674f, 0.295873f, 0.608732f, 0.003713f, 0.009096f, 0.004437f, 0.024338f, 0.389841f, 1.167109f, 0.578893f, 1.297743f, +0.271270f, 0.479596f, 0.005748f, 0.972226f, 0.558871f, 1.917210f, 0.786742f, 2.158789f, 0.322202f, 0.938433f, 0.315345f, 2.306947f, 1.039763f, 3.701186f, 1.264612f, 3.780979f, +0.069534f, 0.144338f, 0.002729f, 0.302751f, 0.141659f, 0.570573f, 0.369389f, 0.664761f, 0.061564f, 0.210528f, 0.111610f, 0.535498f, 0.348435f, 1.456260f, 0.784991f, 1.539275f, +0.218079f, 0.352414f, 0.005742f, 0.789664f, 0.698250f, 2.189446f, 1.221450f, 2.725034f, 0.337718f, 0.899072f, 0.410729f, 2.443016f, 1.041721f, 3.389407f, 1.574413f, 3.827228f, +0.007064f, 0.011555f, 0.000161f, 0.028926f, 0.006993f, 0.022197f, 0.010568f, 0.030866f, 0.000242f, 0.000653f, 0.000254f, 0.001981f, 0.021059f, 0.069360f, 0.027494f, 0.087499f, +0.016736f, 0.032551f, 0.000312f, 0.074865f, 0.023470f, 0.088575f, 0.029048f, 0.113154f, 0.021726f, 0.069612f, 0.018694f, 0.194148f, 0.058063f, 0.227377f, 0.062088f, 0.263528f, +0.004584f, 0.010467f, 0.000158f, 0.024908f, 0.006356f, 0.028164f, 0.014572f, 0.037228f, 0.004435f, 0.016685f, 0.007069f, 0.048150f, 0.020789f, 0.095585f, 0.041177f, 0.114626f, +0.017774f, 0.031598f, 0.000411f, 0.080328f, 0.038737f, 0.133625f, 0.059576f, 0.188687f, 0.030082f, 0.088102f, 0.032166f, 0.271603f, 0.076848f, 0.275069f, 0.102113f, 0.352386f, +0.195101f, 0.264781f, 0.004542f, 0.685947f, 0.266711f, 0.702348f, 0.412549f, 1.010659f, 0.006021f, 0.013463f, 0.006475f, 0.042294f, 0.650974f, 1.778789f, 0.869964f, 2.322198f, +0.468080f, 0.755320f, 0.008925f, 1.797715f, 0.906378f, 2.837946f, 1.148304f, 3.751820f, 0.546947f, 1.453974f, 0.481758f, 4.196512f, 1.817455f, 5.904830f, 1.989361f, 7.082198f, +0.166161f, 0.314812f, 0.005869f, 0.775273f, 0.318167f, 1.169664f, 0.746661f, 1.599974f, 0.144729f, 0.451729f, 0.236135f, 1.349037f, 0.843464f, 3.217515f, 1.710159f, 3.992962f, +0.299131f, 0.441203f, 0.007088f, 1.160713f, 0.900196f, 2.576308f, 1.417194f, 3.764724f, 0.455722f, 1.107330f, 0.498802f, 3.532697f, 1.447471f, 4.298524f, 1.968813f, 5.698721f, +0.248539f, 0.722697f, 0.008296f, 1.157911f, 0.229224f, 1.293321f, 0.508370f, 1.150995f, 0.004489f, 0.021505f, 0.006922f, 0.041782f, 0.468832f, 2.744811f, 0.898337f, 2.216165f, +0.468678f, 1.620392f, 0.012814f, 2.385199f, 0.612277f, 4.107500f, 1.112194f, 3.358385f, 0.320504f, 1.825490f, 0.404764f, 3.258565f, 1.028813f, 7.161679f, 1.614624f, 5.312397f, +0.012264f, 0.049783f, 0.000621f, 0.075823f, 0.015843f, 0.124790f, 0.053308f, 0.105571f, 0.006252f, 0.041807f, 0.014624f, 0.077216f, 0.035195f, 0.287656f, 0.102315f, 0.220782f, +0.344398f, 1.088357f, 0.011700f, 1.770816f, 0.699231f, 4.287616f, 1.578330f, 3.874950f, 0.307067f, 1.598618f, 0.481888f, 3.154198f, 0.942166f, 5.994754f, 1.837413f, 4.915236f, +0.380470f, 1.067726f, 0.012647f, 1.409556f, 0.434493f, 2.365950f, 0.959613f, 1.734909f, 0.009869f, 0.045625f, 0.015153f, 0.073040f, 0.931611f, 5.263882f, 1.777665f, 3.501870f, +0.900863f, 3.005941f, 0.024527f, 3.645769f, 1.457231f, 9.434829f, 2.636053f, 6.356106f, 0.884671f, 4.862997f, 1.112611f, 7.152453f, 2.566915f, 17.245110f, 4.011801f, 10.540130f, +0.279167f, 1.093697f, 0.014079f, 1.372523f, 0.446551f, 3.394589f, 1.496294f, 2.366237f, 0.204357f, 1.318930f, 0.476070f, 2.007182f, 1.039946f, 8.203052f, 3.010638f, 5.187633f, +0.634673f, 1.935697f, 0.021472f, 2.595037f, 1.595536f, 9.442302f, 3.586548f, 7.031247f, 0.812619f, 4.082957f, 1.269969f, 6.637784f, 2.253761f, 13.839740f, 4.377037f, 9.349862f, +0.141751f, 0.437624f, 0.004143f, 0.655452f, 0.110188f, 0.660077f, 0.213958f, 0.549139f, 0.004018f, 0.020437f, 0.005425f, 0.037120f, 0.314159f, 1.952805f, 0.527043f, 1.473906f, +0.346955f, 1.273597f, 0.008305f, 1.752499f, 0.382023f, 2.721027f, 0.607570f, 2.079730f, 0.372376f, 2.251859f, 0.411741f, 3.757587f, 0.894822f, 6.613461f, 1.229549f, 4.585910f, +0.114875f, 0.495102f, 0.005093f, 0.704910f, 0.125077f, 1.046000f, 0.368472f, 0.827218f, 0.091904f, 0.652536f, 0.188234f, 1.126643f, 0.387331f, 3.361121f, 0.985850f, 2.411541f, +0.322906f, 1.083431f, 0.009605f, 1.647875f, 0.552561f, 3.597402f, 1.092022f, 3.039207f, 0.451854f, 2.497608f, 0.620849f, 4.606694f, 1.037876f, 7.011374f, 1.772144f, 5.373994f, +0.477511f, 1.223094f, 0.014285f, 1.895746f, 0.512536f, 2.547325f, 1.018743f, 2.193072f, 0.012185f, 0.051416f, 0.016838f, 0.096640f, 1.184426f, 6.108250f, 2.033999f, 4.770986f, +1.183519f, 3.604410f, 0.028999f, 5.132634f, 1.799382f, 10.633250f, 2.929383f, 8.410478f, 1.143393f, 5.736607f, 1.294150f, 9.906118f, 3.416162f, 20.947390f, 4.805001f, 15.031670f, +0.507921f, 1.816209f, 0.023053f, 2.675998f, 0.763627f, 5.298275f, 2.302789f, 4.336136f, 0.365779f, 2.154706f, 0.766879f, 3.849908f, 1.916694f, 13.799230f, 4.993762f, 10.245800f, +0.662820f, 1.845102f, 0.020182f, 2.904182f, 1.566141f, 8.459394f, 3.168313f, 7.395901f, 0.834891f, 3.828733f, 1.174257f, 7.308038f, 2.384317f, 13.363530f, 4.167384f, 10.599760f, +0.113339f, 0.298867f, 0.003601f, 0.707749f, 0.140054f, 0.716603f, 0.295671f, 0.942602f, 0.002049f, 0.008902f, 0.003008f, 0.025563f, 0.228619f, 1.213789f, 0.416992f, 1.448491f, +0.232080f, 0.727646f, 0.006040f, 1.583099f, 0.406222f, 2.471319f, 0.702408f, 2.986516f, 0.158858f, 0.820527f, 0.190973f, 2.164824f, 0.544766f, 3.438942f, 0.813839f, 3.770366f, +0.005318f, 0.019576f, 0.000256f, 0.044069f, 0.009205f, 0.065747f, 0.029481f, 0.082211f, 0.002713f, 0.016455f, 0.006042f, 0.044921f, 0.016319f, 0.120957f, 0.045160f, 0.137215f, +0.190291f, 0.545340f, 0.006154f, 1.311451f, 0.517644f, 2.878475f, 1.112248f, 3.844995f, 0.169826f, 0.801776f, 0.253695f, 2.338193f, 0.556668f, 3.212009f, 1.033402f, 3.892537f, +0.204710f, 0.520973f, 0.006477f, 1.016531f, 0.313222f, 1.546720f, 0.658505f, 1.676355f, 0.005315f, 0.022283f, 0.007768f, 0.052725f, 0.535998f, 2.746446f, 0.973581f, 2.700523f, +0.526328f, 1.592630f, 0.013641f, 2.855003f, 1.140716f, 6.697604f, 1.964250f, 6.668982f, 0.517359f, 2.579000f, 0.619367f, 5.606415f, 1.603685f, 9.770345f, 2.385836f, 8.826180f, +0.142827f, 0.507432f, 0.006857f, 0.941204f, 0.306103f, 2.110180f, 0.976352f, 2.174070f, 0.104652f, 0.612514f, 0.232072f, 1.377729f, 0.568938f, 4.069737f, 1.567858f, 3.804023f, +0.413755f, 1.144372f, 0.013325f, 2.267547f, 1.393641f, 7.479260f, 2.982048f, 8.231824f, 0.530265f, 2.416114f, 0.788847f, 5.805621f, 1.571125f, 8.749180f, 2.904537f, 8.736301f, +0.074392f, 0.208277f, 0.002070f, 0.461067f, 0.077480f, 0.420907f, 0.143211f, 0.517555f, 0.002111f, 0.009736f, 0.002713f, 0.026136f, 0.176305f, 0.993823f, 0.281549f, 1.108671f, +0.197722f, 0.658191f, 0.004505f, 1.338629f, 0.291692f, 1.884098f, 0.441595f, 2.128435f, 0.212411f, 1.164860f, 0.223570f, 2.872926f, 0.545293f, 3.654753f, 0.713234f, 3.745741f, +0.057326f, 0.224058f, 0.002420f, 0.471502f, 0.083629f, 0.634234f, 0.234520f, 0.741345f, 0.045907f, 0.295586f, 0.089502f, 0.754308f, 0.206691f, 1.626523f, 0.500777f, 1.724860f, +0.205331f, 0.624764f, 0.005814f, 1.404502f, 0.470771f, 2.779425f, 0.885635f, 3.470637f, 0.287601f, 1.441624f, 0.376159f, 3.930069f, 0.705722f, 4.323423f, 1.147046f, 4.897842f, +0.334701f, 0.777445f, 0.009531f, 1.781037f, 0.481336f, 2.169427f, 0.910715f, 2.760556f, 0.008549f, 0.032713f, 0.011245f, 0.090879f, 0.887751f, 4.151799f, 1.451200f, 4.793035f, +0.900798f, 2.487844f, 0.021010f, 5.236153f, 1.834961f, 9.833456f, 2.843632f, 11.495920f, 0.871086f, 3.963300f, 0.938520f, 10.115520f, 2.780357f, 15.460680f, 3.722625f, 16.397930f, +0.338529f, 1.097745f, 0.014626f, 2.390588f, 0.681917f, 4.290633f, 1.957484f, 5.190065f, 0.244023f, 1.303577f, 0.487004f, 3.442564f, 1.366035f, 8.918672f, 3.387899f, 9.787559f, +0.562916f, 1.421035f, 0.016315f, 3.305911f, 1.782090f, 8.729203f, 3.431789f, 11.280010f, 0.709726f, 2.951564f, 0.950206f, 8.326855f, 2.165317f, 11.005640f, 3.602590f, 12.902470f, +0.315911f, 0.492045f, 0.007382f, 1.191019f, 0.327784f, 0.990630f, 0.508882f, 1.331906f, 0.005744f, 0.014739f, 0.006200f, 0.043265f, 0.469869f, 1.473495f, 0.630243f, 1.797355f, +0.662780f, 1.227417f, 0.012684f, 2.729557f, 0.974093f, 3.500311f, 1.238632f, 4.323694f, 0.456273f, 1.392028f, 0.403369f, 3.753965f, 1.147149f, 4.277354f, 1.260272f, 4.793429f, +0.014818f, 0.032220f, 0.000525f, 0.074138f, 0.021536f, 0.090862f, 0.050725f, 0.116129f, 0.007604f, 0.027239f, 0.012452f, 0.076005f, 0.033530f, 0.146793f, 0.068234f, 0.170212f, +0.562173f, 0.951609f, 0.013370f, 2.339139f, 1.284068f, 4.217548f, 2.028964f, 5.758451f, 0.504591f, 1.407111f, 0.554322f, 4.194381f, 1.212623f, 4.132823f, 1.655445f, 5.119355f, +0.534516f, 0.803483f, 0.012438f, 1.602488f, 0.686720f, 2.002994f, 1.061701f, 2.218943f, 0.013957f, 0.034563f, 0.015002f, 0.083593f, 1.031960f, 3.123284f, 1.378440f, 3.139069f, +1.408065f, 2.516644f, 0.026836f, 4.611326f, 2.562416f, 8.886530f, 3.244775f, 9.044503f, 1.392009f, 4.098657f, 1.225500f, 9.107265f, 3.163471f, 11.384010f, 3.460997f, 10.511640f, +0.372821f, 0.782366f, 0.013162f, 1.483296f, 0.670910f, 2.731851f, 1.573690f, 2.876891f, 0.274740f, 0.949797f, 0.448035f, 2.183691f, 1.095052f, 4.626756f, 2.219177f, 4.420442f, +1.145062f, 1.870655f, 0.027119f, 3.788743f, 3.238492f, 10.265760f, 5.095917f, 11.548910f, 1.475918f, 3.972166f, 1.614648f, 9.755985f, 3.206087f, 10.545630f, 4.358702f, 10.763290f, +0.202416f, 0.334733f, 0.004141f, 0.757415f, 0.177016f, 0.568001f, 0.240611f, 0.713892f, 0.005777f, 0.015737f, 0.005459f, 0.043181f, 0.353719f, 1.177728f, 0.415398f, 1.342923f, +0.551211f, 1.083812f, 0.009236f, 2.253072f, 0.682797f, 2.605024f, 0.760165f, 3.008023f, 0.595556f, 1.929121f, 0.460972f, 4.863206f, 1.120908f, 4.437506f, 1.078173f, 4.648695f, +0.155934f, 0.359988f, 0.004840f, 0.774324f, 0.191008f, 0.855623f, 0.393902f, 1.022270f, 0.125588f, 0.477633f, 0.180061f, 1.245867f, 0.414559f, 1.926930f, 0.738627f, 2.088680f, +0.592156f, 1.064237f, 0.012330f, 2.445438f, 1.139980f, 3.975419f, 1.577095f, 5.073990f, 0.834171f, 2.469776f, 0.802327f, 6.882054f, 1.500699f, 5.430357f, 1.793728f, 6.288077f, +0.663545f, 0.910382f, 0.013896f, 2.131764f, 0.801250f, 2.133068f, 1.114853f, 2.774398f, 0.017045f, 0.038526f, 0.016489f, 0.109399f, 1.297726f, 3.584831f, 1.560037f, 4.230146f, +1.829725f, 2.984846f, 0.031384f, 6.421307f, 3.129616f, 9.906284f, 3.566590f, 11.837520f, 1.779519f, 4.782325f, 1.409941f, 12.476220f, 4.164255f, 13.677470f, 4.100169f, 14.827860f, +0.670932f, 1.285065f, 0.021317f, 2.860491f, 1.134804f, 4.217458f, 2.395537f, 5.214523f, 0.486404f, 1.534771f, 0.713863f, 4.142871f, 1.996289f, 7.698433f, 3.640892f, 8.635528f, +1.182827f, 1.763694f, 0.025211f, 4.193938f, 3.144224f, 9.097016f, 4.452666f, 12.015620f, 1.499864f, 3.684293f, 1.476708f, 10.624180f, 3.354887f, 10.071920f, 4.104752f, 12.069310f, +0.021178f, 0.032628f, 0.000550f, 0.087376f, 0.023600f, 0.070552f, 0.040731f, 0.104944f, 0.000347f, 0.000881f, 0.000417f, 0.002861f, 0.062802f, 0.194815f, 0.093646f, 0.262902f, +0.020440f, 0.037443f, 0.000435f, 0.092122f, 0.032264f, 0.114683f, 0.045608f, 0.156724f, 0.012685f, 0.038283f, 0.012467f, 0.114217f, 0.070536f, 0.260161f, 0.086147f, 0.322554f, +0.000745f, 0.001601f, 0.000029f, 0.004076f, 0.001162f, 0.004850f, 0.003043f, 0.006858f, 0.000344f, 0.001220f, 0.000627f, 0.003767f, 0.003359f, 0.014546f, 0.007599f, 0.018660f, +0.037789f, 0.063274f, 0.000999f, 0.172072f, 0.092702f, 0.301187f, 0.162840f, 0.454957f, 0.030578f, 0.084346f, 0.037343f, 0.278160f, 0.162518f, 0.547897f, 0.246648f, 0.750853f, +0.023057f, 0.034285f, 0.000596f, 0.075649f, 0.031815f, 0.091794f, 0.054682f, 0.112504f, 0.000543f, 0.001330f, 0.000649f, 0.003558f, 0.088756f, 0.265718f, 0.131797f, 0.295459f, +0.027943f, 0.049401f, 0.000592f, 0.100145f, 0.054614f, 0.187353f, 0.076882f, 0.210961f, 0.024903f, 0.072532f, 0.024373f, 0.178306f, 0.125168f, 0.445553f, 0.152235f, 0.455158f, +0.012053f, 0.025020f, 0.000473f, 0.052480f, 0.023296f, 0.093831f, 0.060746f, 0.109321f, 0.008008f, 0.027383f, 0.014517f, 0.069652f, 0.070587f, 0.295014f, 0.159026f, 0.311831f, +0.049529f, 0.080038f, 0.001304f, 0.179343f, 0.150446f, 0.471741f, 0.263176f, 0.587140f, 0.057552f, 0.153215f, 0.069994f, 0.416326f, 0.276495f, 0.899622f, 0.417884f, 1.015830f, +0.002411f, 0.003943f, 0.000055f, 0.009872f, 0.002264f, 0.007187f, 0.003421f, 0.009993f, 0.000062f, 0.000167f, 0.000065f, 0.000507f, 0.008399f, 0.027663f, 0.010965f, 0.034897f, +0.003020f, 0.005874f, 0.000056f, 0.013509f, 0.004018f, 0.015163f, 0.004973f, 0.019370f, 0.002942f, 0.009425f, 0.002531f, 0.026287f, 0.012245f, 0.047950f, 0.013093f, 0.055573f, +0.001392f, 0.003178f, 0.000048f, 0.007564f, 0.001831f, 0.008114f, 0.004198f, 0.010725f, 0.001011f, 0.003802f, 0.001611f, 0.010971f, 0.007378f, 0.033922f, 0.014613f, 0.040679f, +0.007071f, 0.012571f, 0.000164f, 0.031959f, 0.014621f, 0.050436f, 0.022487f, 0.071219f, 0.008980f, 0.026301f, 0.009602f, 0.081082f, 0.035731f, 0.127897f, 0.047479f, 0.163846f, +0.040074f, 0.054387f, 0.000933f, 0.140895f, 0.051972f, 0.136863f, 0.080391f, 0.196941f, 0.000928f, 0.002075f, 0.000998f, 0.006518f, 0.156265f, 0.426996f, 0.208834f, 0.557440f, +0.050836f, 0.082032f, 0.000969f, 0.195243f, 0.093388f, 0.292406f, 0.118315f, 0.386566f, 0.044572f, 0.118488f, 0.039260f, 0.341985f, 0.230681f, 0.749472f, 0.252500f, 0.898910f, +0.030369f, 0.057538f, 0.001073f, 0.141695f, 0.055167f, 0.202810f, 0.129465f, 0.277421f, 0.019848f, 0.061950f, 0.032384f, 0.185007f, 0.180161f, 0.687250f, 0.365284f, 0.852882f, +0.071630f, 0.105650f, 0.001697f, 0.277944f, 0.204502f, 0.585272f, 0.321951f, 0.855250f, 0.081884f, 0.198964f, 0.089624f, 0.634751f, 0.405076f, 1.202946f, 0.550974f, 1.594793f, +0.039850f, 0.115875f, 0.001330f, 0.185656f, 0.034868f, 0.196729f, 0.077329f, 0.175079f, 0.000540f, 0.002587f, 0.000833f, 0.005027f, 0.087851f, 0.514328f, 0.168332f, 0.415269f, +0.039734f, 0.137374f, 0.001086f, 0.202212f, 0.049245f, 0.330360f, 0.089452f, 0.270110f, 0.020388f, 0.116125f, 0.025748f, 0.207288f, 0.101933f, 0.709564f, 0.159974f, 0.526341f, +0.001750f, 0.007103f, 0.000089f, 0.010818f, 0.002144f, 0.016890f, 0.007215f, 0.014289f, 0.000669f, 0.004475f, 0.001566f, 0.008266f, 0.005868f, 0.047962f, 0.017059f, 0.036812f, +0.064376f, 0.203439f, 0.002187f, 0.331006f, 0.123997f, 0.760335f, 0.279890f, 0.687155f, 0.043068f, 0.224218f, 0.067588f, 0.442400f, 0.205818f, 1.309565f, 0.401386f, 1.073742f, +0.057238f, 0.160628f, 0.001903f, 0.212053f, 0.062011f, 0.337672f, 0.136957f, 0.247609f, 0.001114f, 0.005150f, 0.001711f, 0.008245f, 0.163791f, 0.925470f, 0.312540f, 0.615681f, +0.071659f, 0.239107f, 0.001951f, 0.290002f, 0.109968f, 0.711988f, 0.198927f, 0.479656f, 0.052803f, 0.290255f, 0.066408f, 0.426904f, 0.238625f, 1.603139f, 0.372945f, 0.979831f, +0.037370f, 0.146404f, 0.001885f, 0.183729f, 0.056709f, 0.431093f, 0.190021f, 0.300499f, 0.020526f, 0.132478f, 0.047818f, 0.201608f, 0.162690f, 1.283295f, 0.470988f, 0.811559f, +0.111311f, 0.339490f, 0.003766f, 0.455128f, 0.265475f, 1.571068f, 0.596752f, 1.169902f, 0.106940f, 0.537315f, 0.167127f, 0.873529f, 0.461946f, 2.836690f, 0.897148f, 1.916413f, +0.037357f, 0.115331f, 0.001092f, 0.172737f, 0.027549f, 0.165031f, 0.053493f, 0.137295f, 0.000795f, 0.004041f, 0.001073f, 0.007340f, 0.096758f, 0.601446f, 0.162324f, 0.453949f, +0.048347f, 0.177470f, 0.001157f, 0.244203f, 0.050502f, 0.359710f, 0.080319f, 0.274933f, 0.038935f, 0.235450f, 0.043051f, 0.392885f, 0.145722f, 1.076999f, 0.200232f, 0.746814f, +0.026938f, 0.116100f, 0.001194f, 0.165300f, 0.027826f, 0.232701f, 0.081973f, 0.184029f, 0.016171f, 0.114817f, 0.033121f, 0.198239f, 0.106149f, 0.921121f, 0.270174f, 0.660887f, +0.099208f, 0.332869f, 0.002951f, 0.506286f, 0.161057f, 1.048546f, 0.318295f, 0.885847f, 0.104168f, 0.575784f, 0.143127f, 1.062001f, 0.372658f, 2.517494f, 0.636304f, 1.929579f, +0.075742f, 0.194005f, 0.002266f, 0.300700f, 0.077127f, 0.383322f, 0.153301f, 0.330014f, 0.001450f, 0.006119f, 0.002004f, 0.011502f, 0.219561f, 1.132304f, 0.377048f, 0.884412f, +0.099261f, 0.302299f, 0.002432f, 0.430469f, 0.143170f, 0.846048f, 0.233080f, 0.669190f, 0.071955f, 0.361011f, 0.081442f, 0.623404f, 0.334837f, 2.053173f, 0.470965f, 1.473340f, +0.071688f, 0.256338f, 0.003254f, 0.377688f, 0.102248f, 0.709429f, 0.308339f, 0.580600f, 0.038737f, 0.228191f, 0.081215f, 0.407720f, 0.316151f, 2.276124f, 0.823700f, 1.690002f, +0.122568f, 0.341193f, 0.003732f, 0.537037f, 0.274750f, 1.484044f, 0.555822f, 1.297474f, 0.115844f, 0.531251f, 0.162933f, 1.014017f, 0.515274f, 2.887989f, 0.900612f, 2.290710f, +0.004101f, 0.010815f, 0.000130f, 0.025610f, 0.004808f, 0.024600f, 0.010150f, 0.032358f, 0.000056f, 0.000242f, 0.000082f, 0.000694f, 0.009668f, 0.051330f, 0.017634f, 0.061255f, +0.004440f, 0.013922f, 0.000116f, 0.030289f, 0.007373f, 0.044858f, 0.012750f, 0.054209f, 0.002281f, 0.011780f, 0.002742f, 0.031079f, 0.012181f, 0.076895f, 0.018198f, 0.084306f, +0.000171f, 0.000630f, 0.000008f, 0.001419f, 0.000281f, 0.002008f, 0.000901f, 0.002511f, 0.000066f, 0.000398f, 0.000146f, 0.001085f, 0.000614f, 0.004551f, 0.001699f, 0.005163f, +0.008027f, 0.023005f, 0.000260f, 0.055324f, 0.020717f, 0.115199f, 0.044513f, 0.153880f, 0.005376f, 0.025379f, 0.008030f, 0.074012f, 0.027444f, 0.158355f, 0.050948f, 0.191905f, +0.006950f, 0.017688f, 0.000220f, 0.034513f, 0.010089f, 0.049820f, 0.021210f, 0.053995f, 0.000135f, 0.000568f, 0.000198f, 0.001343f, 0.021268f, 0.108975f, 0.038630f, 0.107152f, +0.009449f, 0.028591f, 0.000245f, 0.051253f, 0.019427f, 0.114066f, 0.033453f, 0.113579f, 0.006969f, 0.034740f, 0.008343f, 0.075520f, 0.033645f, 0.204981f, 0.050055f, 0.185172f, +0.004315f, 0.015330f, 0.000207f, 0.028434f, 0.008773f, 0.060479f, 0.027983f, 0.062310f, 0.002372f, 0.013885f, 0.005261f, 0.031231f, 0.020087f, 0.143686f, 0.055355f, 0.134305f, +0.016377f, 0.045296f, 0.000527f, 0.089752f, 0.052332f, 0.280850f, 0.111977f, 0.309109f, 0.015749f, 0.071758f, 0.023429f, 0.172425f, 0.072676f, 0.404715f, 0.134357f, 0.404120f, +0.004425f, 0.012387f, 0.000123f, 0.027422f, 0.004372f, 0.023750f, 0.008081f, 0.029203f, 0.000094f, 0.000434f, 0.000121f, 0.001166f, 0.012255f, 0.069079f, 0.019570f, 0.077062f, +0.006218f, 0.020699f, 0.000142f, 0.042097f, 0.008702f, 0.056211f, 0.013175f, 0.063501f, 0.005012f, 0.027487f, 0.005276f, 0.067792f, 0.020041f, 0.134321f, 0.026213f, 0.137665f, +0.003034f, 0.011858f, 0.000128f, 0.024953f, 0.004199f, 0.031843f, 0.011775f, 0.037221f, 0.001823f, 0.011738f, 0.003554f, 0.029954f, 0.012784f, 0.100598f, 0.030972f, 0.106680f, +0.014237f, 0.043320f, 0.000403f, 0.097385f, 0.030968f, 0.182832f, 0.058257f, 0.228300f, 0.014963f, 0.075004f, 0.019571f, 0.204472f, 0.057187f, 0.350341f, 0.092949f, 0.396888f, +0.011981f, 0.027830f, 0.000341f, 0.063756f, 0.016347f, 0.073676f, 0.030929f, 0.093751f, 0.000230f, 0.000879f, 0.000302f, 0.002441f, 0.037139f, 0.173692f, 0.060712f, 0.200519f, +0.017050f, 0.047089f, 0.000398f, 0.099109f, 0.032950f, 0.176577f, 0.051062f, 0.206429f, 0.012372f, 0.056289f, 0.013329f, 0.143666f, 0.061503f, 0.341997f, 0.082346f, 0.362730f, +0.010783f, 0.034966f, 0.000466f, 0.076147f, 0.020607f, 0.129657f, 0.059152f, 0.156836f, 0.005832f, 0.031156f, 0.011640f, 0.082280f, 0.050851f, 0.332001f, 0.126116f, 0.364346f, +0.023492f, 0.059304f, 0.000681f, 0.137965f, 0.070556f, 0.345605f, 0.135871f, 0.446597f, 0.022225f, 0.092426f, 0.029755f, 0.260750f, 0.105607f, 0.536770f, 0.175706f, 0.629282f, +0.045805f, 0.071343f, 0.001070f, 0.172689f, 0.045088f, 0.136266f, 0.069999f, 0.183210f, 0.000625f, 0.001604f, 0.000675f, 0.004707f, 0.079619f, 0.249684f, 0.106795f, 0.304561f, +0.050812f, 0.094100f, 0.000972f, 0.209261f, 0.070847f, 0.254584f, 0.090088f, 0.314470f, 0.026247f, 0.080077f, 0.023204f, 0.215949f, 0.102780f, 0.383235f, 0.112916f, 0.429474f, +0.001912f, 0.004157f, 0.000068f, 0.009565f, 0.002636f, 0.011121f, 0.006209f, 0.014214f, 0.000736f, 0.002637f, 0.001205f, 0.007358f, 0.005056f, 0.022133f, 0.010288f, 0.025664f, +0.095027f, 0.160855f, 0.002260f, 0.395395f, 0.205916f, 0.676336f, 0.325369f, 0.923438f, 0.064000f, 0.178471f, 0.070307f, 0.531994f, 0.239549f, 0.816424f, 0.327027f, 1.011310f, +0.072717f, 0.109308f, 0.001692f, 0.218007f, 0.088630f, 0.258513f, 0.137027f, 0.286384f, 0.001425f, 0.003528f, 0.001531f, 0.008533f, 0.164071f, 0.496571f, 0.219158f, 0.499080f, +0.101286f, 0.181028f, 0.001930f, 0.331704f, 0.174865f, 0.606435f, 0.221430f, 0.617215f, 0.075133f, 0.221223f, 0.066146f, 0.491560f, 0.265939f, 0.957005f, 0.290951f, 0.883668f, +0.045131f, 0.094707f, 0.001593f, 0.179555f, 0.077048f, 0.313729f, 0.180724f, 0.330385f, 0.024955f, 0.086271f, 0.040696f, 0.198347f, 0.154917f, 0.654547f, 0.313947f, 0.625360f, +0.181607f, 0.296686f, 0.004301f, 0.600895f, 0.487274f, 1.544620f, 0.766748f, 1.737687f, 0.175643f, 0.472710f, 0.192152f, 1.161017f, 0.594255f, 1.954654f, 0.807894f, 1.994998f, +0.048239f, 0.079773f, 0.000987f, 0.180506f, 0.040022f, 0.128420f, 0.054400f, 0.161405f, 0.001033f, 0.002814f, 0.000976f, 0.007722f, 0.098517f, 0.328017f, 0.115695f, 0.374026f, +0.069458f, 0.136572f, 0.001164f, 0.283911f, 0.081625f, 0.311419f, 0.090874f, 0.359596f, 0.056311f, 0.182402f, 0.043586f, 0.459825f, 0.165071f, 0.653490f, 0.158777f, 0.684591f, +0.033067f, 0.076338f, 0.001026f, 0.164201f, 0.038427f, 0.172132f, 0.079244f, 0.205657f, 0.019983f, 0.075999f, 0.028651f, 0.198238f, 0.102738f, 0.477542f, 0.183050f, 0.517628f, +0.164521f, 0.295681f, 0.003426f, 0.679425f, 0.300476f, 1.047840f, 0.415690f, 1.337401f, 0.173902f, 0.514880f, 0.167263f, 1.434720f, 0.487273f, 1.763222f, 0.582419f, 2.041722f, +0.095178f, 0.130584f, 0.001993f, 0.305777f, 0.109034f, 0.290267f, 0.151709f, 0.377539f, 0.001835f, 0.004147f, 0.001775f, 0.011775f, 0.217542f, 0.600936f, 0.261514f, 0.709112f, +0.138772f, 0.226380f, 0.002380f, 0.487011f, 0.225182f, 0.712776f, 0.256623f, 0.851732f, 0.101270f, 0.272156f, 0.080238f, 0.710006f, 0.369102f, 1.212313f, 0.363422f, 1.314279f, +0.085633f, 0.164016f, 0.002721f, 0.365091f, 0.137407f, 0.510668f, 0.290062f, 0.631397f, 0.046582f, 0.146983f, 0.068366f, 0.396759f, 0.297768f, 1.148304f, 0.543078f, 1.288082f, +0.197795f, 0.294929f, 0.004216f, 0.701319f, 0.498809f, 1.443178f, 0.706384f, 1.906194f, 0.188196f, 0.462287f, 0.185290f, 1.333070f, 0.655640f, 1.968339f, 0.802185f, 2.358685f, +0.045023f, 0.069367f, 0.001170f, 0.185761f, 0.056882f, 0.170047f, 0.098172f, 0.252941f, 0.000917f, 0.002327f, 0.001100f, 0.007556f, 0.154678f, 0.479817f, 0.230646f, 0.647513f, +0.040761f, 0.074670f, 0.000867f, 0.183711f, 0.072944f, 0.259282f, 0.103114f, 0.354330f, 0.031422f, 0.094827f, 0.030881f, 0.282920f, 0.162959f, 0.601048f, 0.199025f, 0.745192f, +0.001246f, 0.002680f, 0.000049f, 0.006823f, 0.002205f, 0.009203f, 0.005774f, 0.013012f, 0.000716f, 0.002537f, 0.001303f, 0.007832f, 0.006513f, 0.028203f, 0.014734f, 0.036180f, +0.069740f, 0.116774f, 0.001844f, 0.317564f, 0.193959f, 0.630170f, 0.340708f, 0.951900f, 0.070094f, 0.193351f, 0.085603f, 0.637636f, 0.347470f, 1.171421f, 0.527341f, 1.605349f, +0.061053f, 0.090781f, 0.001579f, 0.200309f, 0.095507f, 0.275555f, 0.164150f, 0.337724f, 0.001785f, 0.004373f, 0.002133f, 0.011701f, 0.272260f, 0.815095f, 0.404292f, 0.906328f, +0.069402f, 0.122701f, 0.001470f, 0.248736f, 0.153783f, 0.527555f, 0.216486f, 0.594030f, 0.076828f, 0.223767f, 0.075193f, 0.550086f, 0.360158f, 1.282033f, 0.438041f, 1.309671f, +0.025125f, 0.052155f, 0.000986f, 0.109396f, 0.055053f, 0.221744f, 0.143557f, 0.258349f, 0.020733f, 0.070900f, 0.037587f, 0.180341f, 0.170460f, 0.712427f, 0.384031f, 0.753039f, +0.113844f, 0.183972f, 0.002997f, 0.412231f, 0.392044f, 1.229302f, 0.685804f, 1.530018f, 0.164314f, 0.437436f, 0.199837f, 1.188628f, 0.736268f, 2.395567f, 1.112765f, 2.705011f, +0.005712f, 0.009344f, 0.000130f, 0.023392f, 0.006083f, 0.019307f, 0.009191f, 0.026846f, 0.000183f, 0.000492f, 0.000192f, 0.001493f, 0.023057f, 0.075940f, 0.030102f, 0.095800f, +0.006713f, 0.013056f, 0.000125f, 0.030027f, 0.010125f, 0.038210f, 0.012531f, 0.048813f, 0.008121f, 0.026022f, 0.006988f, 0.072576f, 0.031530f, 0.123473f, 0.033716f, 0.143104f, +0.002596f, 0.005929f, 0.000090f, 0.014110f, 0.003873f, 0.017160f, 0.008878f, 0.022682f, 0.002342f, 0.008809f, 0.003732f, 0.025422f, 0.015944f, 0.073309f, 0.031581f, 0.087913f, +0.014546f, 0.025860f, 0.000337f, 0.065740f, 0.034097f, 0.117619f, 0.052440f, 0.166086f, 0.022945f, 0.067201f, 0.024535f, 0.207167f, 0.085150f, 0.304784f, 0.113144f, 0.390454f, +0.089949f, 0.122074f, 0.002094f, 0.316248f, 0.132252f, 0.348269f, 0.204568f, 0.501149f, 0.002587f, 0.005785f, 0.002782f, 0.018173f, 0.406337f, 1.110318f, 0.543030f, 1.449513f, +0.107033f, 0.172715f, 0.002041f, 0.411073f, 0.222912f, 0.697956f, 0.282411f, 0.922712f, 0.116564f, 0.309867f, 0.102671f, 0.894350f, 0.562663f, 1.828066f, 0.615883f, 2.192565f, +0.053663f, 0.101670f, 0.001895f, 0.250378f, 0.110516f, 0.406283f, 0.259353f, 0.555751f, 0.043563f, 0.135969f, 0.071076f, 0.406056f, 0.368803f, 1.406851f, 0.747763f, 1.745914f, +0.139568f, 0.205856f, 0.003307f, 0.541564f, 0.451739f, 1.292851f, 0.711181f, 1.889226f, 0.198174f, 0.481530f, 0.216907f, 1.536216f, 0.914368f, 2.715378f, 1.243699f, 3.599883f, +0.139462f, 0.405527f, 0.004655f, 0.649738f, 0.138340f, 0.780540f, 0.306810f, 0.694644f, 0.002348f, 0.011246f, 0.003620f, 0.021851f, 0.356177f, 2.085267f, 0.682478f, 1.683648f, +0.130436f, 0.450967f, 0.003566f, 0.663818f, 0.183273f, 1.229499f, 0.332913f, 1.005266f, 0.083134f, 0.473506f, 0.104990f, 0.845224f, 0.387657f, 2.698520f, 0.608390f, 2.001711f, +0.004821f, 0.019568f, 0.000244f, 0.029804f, 0.006698f, 0.052756f, 0.022536f, 0.044631f, 0.002290f, 0.015316f, 0.005358f, 0.028288f, 0.018730f, 0.153083f, 0.054449f, 0.117494f, +0.195574f, 0.618049f, 0.006644f, 1.005598f, 0.427069f, 2.618743f, 0.963995f, 2.366700f, 0.162519f, 0.846091f, 0.255046f, 1.669403f, 0.724377f, 4.609021f, 1.412681f, 3.779042f, +0.249486f, 0.700140f, 0.008293f, 0.924288f, 0.306432f, 1.668617f, 0.676779f, 1.223567f, 0.006031f, 0.027883f, 0.009261f, 0.044638f, 0.827077f, 4.673232f, 1.578197f, 3.108932f, +0.292985f, 0.977613f, 0.007977f, 1.185702f, 0.509731f, 3.300248f, 0.922076f, 2.223329f, 0.268157f, 1.474048f, 0.337249f, 2.168017f, 1.130275f, 7.593442f, 1.766494f, 4.641075f, +0.128231f, 0.502373f, 0.006467f, 0.630448f, 0.220611f, 1.677038f, 0.739218f, 1.168998f, 0.087486f, 0.564641f, 0.203808f, 0.859286f, 0.646735f, 5.101424f, 1.872296f, 3.226155f, +0.421175f, 1.284548f, 0.014249f, 1.722093f, 1.138795f, 6.739335f, 2.559858f, 5.018472f, 0.502598f, 2.525277f, 0.785466f, 4.105417f, 2.024917f, 12.434470f, 3.932598f, 8.400489f, +0.145719f, 0.449876f, 0.004259f, 0.673803f, 0.121829f, 0.729814f, 0.236563f, 0.607156f, 0.003850f, 0.019581f, 0.005197f, 0.035565f, 0.437248f, 2.717923f, 0.733541f, 2.051389f, +0.176899f, 0.649360f, 0.004234f, 0.893534f, 0.209493f, 1.492150f, 0.333178f, 1.140477f, 0.176952f, 1.070079f, 0.195659f, 1.785598f, 0.617699f, 4.565292f, 0.848762f, 3.165668f, +0.082722f, 0.356526f, 0.003668f, 0.507610f, 0.096873f, 0.810130f, 0.285383f, 0.640682f, 0.061681f, 0.437948f, 0.126332f, 0.756144f, 0.377629f, 3.276929f, 0.961156f, 2.351136f, +0.335936f, 1.127149f, 0.009992f, 1.714369f, 0.618282f, 4.025272f, 1.221906f, 3.400686f, 0.438127f, 2.421728f, 0.601987f, 4.466739f, 1.461880f, 9.875737f, 2.496120f, 7.569437f, +0.279856f, 0.716822f, 0.008372f, 1.111044f, 0.323074f, 1.605690f, 0.642158f, 1.382389f, 0.006656f, 0.028085f, 0.009197f, 0.052787f, 0.939822f, 4.846792f, 1.613943f, 3.785695f, +0.344023f, 1.047724f, 0.008429f, 1.491946f, 0.562552f, 3.324337f, 0.915830f, 2.629418f, 0.309763f, 1.554136f, 0.350605f, 2.683722f, 1.344428f, 8.243831f, 1.891004f, 5.915703f, +0.208522f, 0.745627f, 0.009464f, 1.098605f, 0.337181f, 2.339465f, 1.016802f, 1.914631f, 0.139957f, 0.824452f, 0.293430f, 1.473084f, 1.065357f, 7.670029f, 2.775684f, 5.694927f, +0.393129f, 1.094359f, 0.011970f, 1.722516f, 0.999071f, 5.396407f, 2.021127f, 4.717985f, 0.461520f, 2.116487f, 0.649118f, 4.039814f, 1.914651f, 10.731170f, 3.346488f, 8.511804f, +0.052513f, 0.138472f, 0.001669f, 0.327918f, 0.069792f, 0.357100f, 0.147340f, 0.469721f, 0.000885f, 0.003844f, 0.001299f, 0.011039f, 0.143411f, 0.761404f, 0.261577f, 0.908631f, +0.053332f, 0.167212f, 0.001388f, 0.363794f, 0.100401f, 0.610804f, 0.173605f, 0.738139f, 0.034023f, 0.175736f, 0.040902f, 0.463651f, 0.169490f, 1.069937f, 0.253205f, 1.173051f, +0.001726f, 0.006354f, 0.000083f, 0.014303f, 0.003213f, 0.022951f, 0.010291f, 0.028697f, 0.000821f, 0.004978f, 0.001828f, 0.013588f, 0.007171f, 0.053150f, 0.019844f, 0.060295f, +0.089226f, 0.255706f, 0.002886f, 0.614930f, 0.261054f, 1.451651f, 0.560921f, 1.939079f, 0.074216f, 0.350387f, 0.110868f, 1.021822f, 0.353392f, 2.039091f, 0.656038f, 2.471114f, +0.110838f, 0.282074f, 0.003507f, 0.550387f, 0.182401f, 0.900711f, 0.383472f, 0.976202f, 0.002682f, 0.011244f, 0.003920f, 0.026606f, 0.392913f, 2.013283f, 0.713684f, 1.979620f, +0.141340f, 0.427685f, 0.003663f, 0.766683f, 0.329468f, 1.934437f, 0.567325f, 1.926171f, 0.129486f, 0.645478f, 0.155017f, 1.403187f, 0.583062f, 3.552265f, 0.867433f, 3.208989f, +0.054170f, 0.192456f, 0.002601f, 0.356974f, 0.124866f, 0.860791f, 0.398276f, 0.886853f, 0.036993f, 0.216516f, 0.082034f, 0.487009f, 0.292148f, 2.089801f, 0.805091f, 1.953357f, +0.226714f, 0.627051f, 0.007301f, 1.242487f, 0.821320f, 4.407785f, 1.757424f, 4.851297f, 0.270801f, 1.233884f, 0.402856f, 2.964869f, 1.165555f, 6.490668f, 2.154760f, 6.481113f, +0.063146f, 0.176789f, 0.001757f, 0.391362f, 0.070734f, 0.384261f, 0.130743f, 0.472495f, 0.001670f, 0.007702f, 0.002146f, 0.020677f, 0.202612f, 1.142115f, 0.323560f, 1.274100f, +0.083240f, 0.277095f, 0.001897f, 0.563555f, 0.132077f, 0.853111f, 0.199952f, 0.963746f, 0.083344f, 0.457058f, 0.087723f, 1.127254f, 0.310808f, 2.083150f, 0.406532f, 2.135012f, +0.034086f, 0.133223f, 0.001439f, 0.280351f, 0.053482f, 0.405597f, 0.149977f, 0.474095f, 0.025440f, 0.163804f, 0.049599f, 0.418012f, 0.166390f, 1.309380f, 0.403134f, 1.388543f, +0.176383f, 0.536685f, 0.004994f, 1.206494f, 0.434949f, 2.567934f, 0.818245f, 3.206550f, 0.230258f, 1.154186f, 0.301158f, 3.146473f, 0.820772f, 5.028249f, 1.334043f, 5.696313f, +0.161969f, 0.376222f, 0.004612f, 0.861880f, 0.250524f, 1.129133f, 0.474005f, 1.436801f, 0.003856f, 0.014754f, 0.005072f, 0.040988f, 0.581636f, 2.720173f, 0.950796f, 3.140297f, +0.216204f, 0.597116f, 0.005043f, 1.256747f, 0.473684f, 2.538445f, 0.734066f, 2.967599f, 0.194858f, 0.886571f, 0.209942f, 2.262794f, 0.903488f, 5.024009f, 1.209682f, 5.328570f, +0.114756f, 0.372118f, 0.004958f, 0.810370f, 0.248620f, 1.564322f, 0.713679f, 1.892246f, 0.077096f, 0.411847f, 0.153863f, 1.087631f, 0.626941f, 4.093221f, 1.554875f, 4.491996f, +0.275680f, 0.695932f, 0.007990f, 1.619024f, 0.938680f, 4.597934f, 1.807627f, 5.941520f, 0.323947f, 1.347210f, 0.433711f, 3.800705f, 1.435720f, 7.297324f, 2.388708f, 8.555025f, +0.136561f, 0.212699f, 0.003191f, 0.514849f, 0.152396f, 0.460573f, 0.236594f, 0.619243f, 0.002314f, 0.005938f, 0.002498f, 0.017431f, 0.274994f, 0.862375f, 0.368855f, 1.051916f, +0.142099f, 0.263157f, 0.002720f, 0.585214f, 0.224620f, 0.807152f, 0.285621f, 0.997019f, 0.091174f, 0.278158f, 0.080602f, 0.750126f, 0.332988f, 1.241607f, 0.365825f, 1.391410f, +0.004487f, 0.009757f, 0.000159f, 0.022450f, 0.007014f, 0.029592f, 0.016520f, 0.037821f, 0.002146f, 0.007687f, 0.003514f, 0.021450f, 0.013746f, 0.060181f, 0.027974f, 0.069782f, +0.245935f, 0.416301f, 0.005849f, 1.023305f, 0.604175f, 1.984428f, 0.954662f, 2.709448f, 0.205736f, 0.573718f, 0.226012f, 1.710166f, 0.718226f, 2.447836f, 0.980506f, 3.032150f, +0.270012f, 0.405882f, 0.006283f, 0.809502f, 0.373103f, 1.088250f, 0.576835f, 1.205578f, 0.006571f, 0.016273f, 0.007063f, 0.039357f, 0.705785f, 2.136096f, 0.942751f, 2.146892f, +0.352783f, 0.630531f, 0.006724f, 1.155342f, 0.690495f, 2.394655f, 0.874370f, 2.437224f, 0.325048f, 0.957078f, 0.286167f, 2.126639f, 1.073087f, 3.861591f, 1.174011f, 3.565673f, +0.131925f, 0.276846f, 0.004657f, 0.524875f, 0.255339f, 1.039707f, 0.598926f, 1.094907f, 0.090609f, 0.313242f, 0.147761f, 0.720179f, 0.524625f, 2.216617f, 1.063178f, 2.117775f, +0.585383f, 0.956324f, 0.013864f, 1.936896f, 1.780656f, 5.644538f, 2.801945f, 6.350066f, 0.703225f, 1.892604f, 0.769326f, 4.648400f, 2.219079f, 7.299112f, 3.016856f, 7.449766f, +0.160301f, 0.265087f, 0.003279f, 0.599824f, 0.150775f, 0.483799f, 0.204942f, 0.608063f, 0.004264f, 0.011615f, 0.004029f, 0.031872f, 0.379258f, 1.262762f, 0.445391f, 1.439884f, +0.216506f, 0.425702f, 0.003628f, 0.884966f, 0.288449f, 1.100499f, 0.321133f, 1.270746f, 0.218020f, 0.706207f, 0.168751f, 1.780309f, 0.596085f, 2.359809f, 0.573359f, 2.472117f, +0.086504f, 0.199702f, 0.002685f, 0.429554f, 0.113965f, 0.510509f, 0.235022f, 0.609939f, 0.064933f, 0.246951f, 0.093097f, 0.644151f, 0.311364f, 1.447262f, 0.554761f, 1.568748f, +0.474585f, 0.852936f, 0.009882f, 1.959905f, 0.982656f, 3.426788f, 1.359446f, 4.373750f, 0.623095f, 1.844832f, 0.599309f, 5.140643f, 1.628389f, 5.892409f, 1.946351f, 6.823110f, +0.299585f, 0.411029f, 0.006274f, 0.962473f, 0.389084f, 1.035810f, 0.541369f, 1.347238f, 0.007173f, 0.016212f, 0.006938f, 0.046035f, 0.793265f, 2.191312f, 0.953609f, 2.585777f, +0.409729f, 0.668394f, 0.007028f, 1.437919f, 0.753751f, 2.385876f, 0.858994f, 2.851003f, 0.371394f, 0.998093f, 0.294261f, 2.603844f, 1.262509f, 4.146705f, 1.243080f, 4.495478f, +0.212194f, 0.406424f, 0.006742f, 0.904679f, 0.386012f, 1.434601f, 0.814860f, 1.773760f, 0.143375f, 0.452396f, 0.210422f, 1.221171f, 0.854799f, 3.296421f, 1.559007f, 3.697680f, +0.540454f, 0.805862f, 0.011519f, 1.916283f, 1.545173f, 4.470566f, 2.188183f, 5.904863f, 0.638720f, 1.568963f, 0.628859f, 4.524327f, 2.075400f, 6.230690f, 2.539281f, 7.466313f, +0.012855f, 0.019806f, 0.000334f, 0.053039f, 0.021360f, 0.063857f, 0.036866f, 0.094986f, 0.000285f, 0.000724f, 0.000342f, 0.002352f, 0.050492f, 0.156629f, 0.075291f, 0.211371f, +0.019138f, 0.035059f, 0.000407f, 0.086256f, 0.045045f, 0.160112f, 0.063675f, 0.218807f, 0.016080f, 0.048528f, 0.015804f, 0.144785f, 0.087476f, 0.322641f, 0.106836f, 0.400017f, +0.000447f, 0.000962f, 0.000018f, 0.002449f, 0.001041f, 0.004345f, 0.002726f, 0.006144f, 0.000280f, 0.000993f, 0.000510f, 0.003065f, 0.002673f, 0.011577f, 0.006048f, 0.014851f, +0.026263f, 0.043975f, 0.000694f, 0.119588f, 0.096065f, 0.312114f, 0.168748f, 0.471462f, 0.028770f, 0.079361f, 0.035136f, 0.261720f, 0.149599f, 0.504343f, 0.227041f, 0.691166f, +0.018623f, 0.027691f, 0.000482f, 0.061099f, 0.038315f, 0.110546f, 0.065853f, 0.135487f, 0.000593f, 0.001454f, 0.000709f, 0.003890f, 0.094946f, 0.284251f, 0.140990f, 0.316067f, +0.034812f, 0.061546f, 0.000738f, 0.124764f, 0.101452f, 0.348030f, 0.142817f, 0.391884f, 0.042003f, 0.122336f, 0.041109f, 0.300738f, 0.206538f, 0.735201f, 0.251202f, 0.751051f, +0.009637f, 0.020004f, 0.000378f, 0.041959f, 0.027772f, 0.111859f, 0.072418f, 0.130325f, 0.008667f, 0.029640f, 0.015713f, 0.075391f, 0.074748f, 0.312404f, 0.168400f, 0.330213f, +0.045800f, 0.074012f, 0.001206f, 0.165842f, 0.207438f, 0.650446f, 0.362871f, 0.809559f, 0.072050f, 0.191812f, 0.087627f, 0.521203f, 0.338646f, 1.101839f, 0.511815f, 1.244168f, +0.001679f, 0.002747f, 0.000038f, 0.006876f, 0.002351f, 0.007464f, 0.003553f, 0.010378f, 0.000058f, 0.000158f, 0.000061f, 0.000478f, 0.007748f, 0.025519f, 0.010116f, 0.032193f, +0.003245f, 0.006310f, 0.000060f, 0.014513f, 0.006436f, 0.024290f, 0.007966f, 0.031030f, 0.004279f, 0.013709f, 0.003682f, 0.038235f, 0.017424f, 0.068231f, 0.018631f, 0.079079f, +0.000960f, 0.002191f, 0.000033f, 0.005215f, 0.001882f, 0.008341f, 0.004316f, 0.011026f, 0.000943f, 0.003549f, 0.001504f, 0.010241f, 0.006737f, 0.030977f, 0.013345f, 0.037148f, +0.005639f, 0.010025f, 0.000131f, 0.025485f, 0.017385f, 0.059970f, 0.026738f, 0.084682f, 0.009695f, 0.028395f, 0.010367f, 0.087536f, 0.037740f, 0.135085f, 0.050147f, 0.173055f, +0.027168f, 0.036871f, 0.000632f, 0.095518f, 0.052537f, 0.138349f, 0.081264f, 0.199079f, 0.000852f, 0.001904f, 0.000916f, 0.005983f, 0.140315f, 0.383411f, 0.187517f, 0.500540f, +0.053161f, 0.085783f, 0.001014f, 0.204170f, 0.145615f, 0.455933f, 0.184482f, 0.602752f, 0.063102f, 0.167748f, 0.055581f, 0.484160f, 0.319505f, 1.038059f, 0.349726f, 1.245038f, +0.020380f, 0.038613f, 0.000720f, 0.095091f, 0.055203f, 0.202941f, 0.129549f, 0.277602f, 0.018033f, 0.056285f, 0.029422f, 0.168088f, 0.160138f, 0.610869f, 0.324686f, 0.758094f, +0.055598f, 0.082005f, 0.001317f, 0.215737f, 0.236681f, 0.677367f, 0.372611f, 0.989827f, 0.086046f, 0.209077f, 0.094180f, 0.667016f, 0.416441f, 1.236697f, 0.566433f, 1.639538f, +0.033277f, 0.096762f, 0.001111f, 0.155033f, 0.043414f, 0.244952f, 0.096284f, 0.217995f, 0.000611f, 0.002925f, 0.000941f, 0.005683f, 0.097165f, 0.568859f, 0.186179f, 0.459297f, +0.051180f, 0.176947f, 0.001399f, 0.260464f, 0.094579f, 0.634492f, 0.171802f, 0.518775f, 0.035554f, 0.202503f, 0.044901f, 0.361475f, 0.173901f, 1.210546f, 0.272922f, 0.897960f, +0.001446f, 0.005871f, 0.000073f, 0.008942f, 0.002643f, 0.020818f, 0.008893f, 0.017612f, 0.000749f, 0.005009f, 0.001752f, 0.009251f, 0.006425f, 0.052511f, 0.018678f, 0.040303f, +0.061548f, 0.194502f, 0.002091f, 0.316465f, 0.176766f, 1.083912f, 0.399003f, 0.979590f, 0.055746f, 0.290220f, 0.087484f, 0.572626f, 0.260630f, 1.658317f, 0.508280f, 1.359692f, +0.063596f, 0.178471f, 0.002114f, 0.235608f, 0.102734f, 0.559421f, 0.226897f, 0.410213f, 0.001676f, 0.007747f, 0.002573f, 0.012402f, 0.241038f, 1.361937f, 0.459940f, 0.906048f, +0.122812f, 0.409791f, 0.003344f, 0.497017f, 0.281019f, 1.819458f, 0.508349f, 1.225742f, 0.122516f, 0.673466f, 0.154083f, 0.990528f, 0.541673f, 3.639081f, 0.846574f, 2.224188f, +0.041102f, 0.161025f, 0.002073f, 0.202076f, 0.093002f, 0.706982f, 0.311629f, 0.492810f, 0.030564f, 0.197263f, 0.071203f, 0.300200f, 0.237001f, 1.869452f, 0.686116f, 1.182246f, +0.141599f, 0.431866f, 0.004791f, 0.578969f, 0.503552f, 2.979994f, 1.131916f, 2.219064f, 0.184174f, 0.925371f, 0.287829f, 1.504403f, 0.778329f, 4.779509f, 1.511595f, 3.228943f, +0.035794f, 0.110505f, 0.001046f, 0.165509f, 0.039359f, 0.235776f, 0.076425f, 0.196150f, 0.001031f, 0.005242f, 0.001392f, 0.009522f, 0.122793f, 0.763277f, 0.206001f, 0.576093f, +0.071454f, 0.262293f, 0.001710f, 0.360921f, 0.111294f, 0.792708f, 0.177001f, 0.605881f, 0.077905f, 0.471113f, 0.086141f, 0.786127f, 0.285256f, 2.108272f, 0.391962f, 1.461919f, +0.025550f, 0.110119f, 0.001133f, 0.156784f, 0.039352f, 0.329098f, 0.115931f, 0.260263f, 0.020765f, 0.147435f, 0.042530f, 0.254556f, 0.133350f, 1.157164f, 0.339408f, 0.830244f, +0.108833f, 0.365162f, 0.003237f, 0.555403f, 0.263445f, 1.715136f, 0.520644f, 1.449005f, 0.154708f, 0.855140f, 0.212569f, 1.577257f, 0.541468f, 3.657889f, 0.924542f, 2.803655f, +0.070638f, 0.180933f, 0.002113f, 0.280438f, 0.107252f, 0.533049f, 0.213180f, 0.458918f, 0.001831f, 0.007726f, 0.002530f, 0.014523f, 0.271212f, 1.398677f, 0.465748f, 1.092468f, +0.142793f, 0.434876f, 0.003499f, 0.619259f, 0.307101f, 1.814778f, 0.499958f, 1.435417f, 0.140138f, 0.703098f, 0.158615f, 1.214128f, 0.637990f, 3.912058f, 0.897364f, 2.807259f, +0.066182f, 0.236652f, 0.003004f, 0.348683f, 0.140751f, 0.976574f, 0.424448f, 0.799233f, 0.048416f, 0.285208f, 0.101508f, 0.509594f, 0.386582f, 2.783191f, 1.007201f, 2.066494f, +0.130875f, 0.364319f, 0.003985f, 0.573436f, 0.437439f, 2.362796f, 0.884942f, 2.065751f, 0.167464f, 0.767972f, 0.235534f, 1.465856f, 0.728734f, 4.084379f, 1.273703f, 3.239669f, +0.009233f, 0.024347f, 0.000293f, 0.057656f, 0.016139f, 0.082579f, 0.034072f, 0.108622f, 0.000170f, 0.000737f, 0.000249f, 0.002115f, 0.028828f, 0.153057f, 0.052582f, 0.182652f, +0.015420f, 0.048346f, 0.000401f, 0.105184f, 0.038179f, 0.232271f, 0.066017f, 0.280692f, 0.010722f, 0.055381f, 0.012890f, 0.146114f, 0.056027f, 0.353678f, 0.083699f, 0.387763f, +0.000382f, 0.001405f, 0.000018f, 0.003162f, 0.000934f, 0.006674f, 0.002992f, 0.008345f, 0.000198f, 0.001199f, 0.000440f, 0.003274f, 0.001813f, 0.013435f, 0.005016f, 0.015241f, +0.020691f, 0.059298f, 0.000669f, 0.142601f, 0.079621f, 0.442749f, 0.171079f, 0.591413f, 0.018759f, 0.088563f, 0.028023f, 0.258273f, 0.093693f, 0.540617f, 0.173933f, 0.655157f, +0.020819f, 0.052983f, 0.000659f, 0.103382f, 0.045061f, 0.222516f, 0.094735f, 0.241166f, 0.000549f, 0.002302f, 0.000803f, 0.005447f, 0.084378f, 0.432353f, 0.153264f, 0.425124f, +0.043657f, 0.132103f, 0.001131f, 0.236813f, 0.133845f, 0.785858f, 0.230474f, 0.782500f, 0.043593f, 0.217310f, 0.052189f, 0.472404f, 0.205902f, 1.254448f, 0.306326f, 1.133223f, +0.012794f, 0.045456f, 0.000614f, 0.084313f, 0.038789f, 0.267398f, 0.123721f, 0.275494f, 0.009523f, 0.055739f, 0.021119f, 0.125373f, 0.078890f, 0.564315f, 0.217401f, 0.527471f, +0.056166f, 0.155344f, 0.001809f, 0.307812f, 0.267612f, 1.436193f, 0.572623f, 1.580703f, 0.073122f, 0.333177f, 0.108780f, 0.800583f, 0.330128f, 1.838398f, 0.610308f, 1.835691f, +0.011429f, 0.031999f, 0.000318f, 0.070837f, 0.016839f, 0.091476f, 0.031124f, 0.112481f, 0.000329f, 0.001520f, 0.000423f, 0.004079f, 0.041928f, 0.236346f, 0.066957f, 0.263659f, +0.024776f, 0.082475f, 0.000565f, 0.167738f, 0.051704f, 0.333965f, 0.078275f, 0.377275f, 0.027038f, 0.148277f, 0.028459f, 0.365700f, 0.105766f, 0.708880f, 0.138340f, 0.726528f, +0.007758f, 0.030321f, 0.000327f, 0.063807f, 0.016009f, 0.121411f, 0.044894f, 0.141916f, 0.006311f, 0.040635f, 0.012304f, 0.103696f, 0.043296f, 0.340712f, 0.104899f, 0.361311f, +0.042107f, 0.128120f, 0.001192f, 0.288020f, 0.136564f, 0.806272f, 0.256910f, 1.006783f, 0.059913f, 0.300318f, 0.078361f, 0.818710f, 0.224015f, 1.372372f, 0.364104f, 1.554709f, +0.030125f, 0.069975f, 0.000858f, 0.160305f, 0.061284f, 0.276213f, 0.115953f, 0.351476f, 0.000782f, 0.002991f, 0.001028f, 0.008309f, 0.123683f, 0.578434f, 0.202183f, 0.667772f, +0.066127f, 0.182630f, 0.001542f, 0.384380f, 0.190547f, 1.021129f, 0.295289f, 1.193763f, 0.064959f, 0.295553f, 0.069988f, 0.754339f, 0.315931f, 1.756794f, 0.423001f, 1.863293f, +0.026838f, 0.087029f, 0.001160f, 0.189525f, 0.076475f, 0.481181f, 0.219526f, 0.582050f, 0.019653f, 0.104985f, 0.039221f, 0.277251f, 0.167636f, 1.094474f, 0.415753f, 1.201101f, +0.067627f, 0.170719f, 0.001960f, 0.397164f, 0.302854f, 1.483469f, 0.583209f, 1.916961f, 0.086616f, 0.360213f, 0.115964f, 1.016222f, 0.402664f, 2.046619f, 0.669941f, 2.399355f, +0.037944f, 0.059100f, 0.000887f, 0.143054f, 0.055692f, 0.168313f, 0.086462f, 0.226298f, 0.000701f, 0.001798f, 0.000757f, 0.005279f, 0.087358f, 0.273951f, 0.117174f, 0.334163f, +0.064927f, 0.120240f, 0.001243f, 0.267392f, 0.134984f, 0.485052f, 0.171642f, 0.599151f, 0.045406f, 0.138527f, 0.040141f, 0.373573f, 0.173948f, 0.648597f, 0.191101f, 0.726852f, +0.001568f, 0.003409f, 0.000056f, 0.007844f, 0.003223f, 0.013598f, 0.007591f, 0.017379f, 0.000817f, 0.002927f, 0.001338f, 0.008168f, 0.005491f, 0.024039f, 0.011174f, 0.027874f, +0.090127f, 0.152561f, 0.002143f, 0.375009f, 0.291205f, 0.956469f, 0.460135f, 1.305921f, 0.082178f, 0.229162f, 0.090277f, 0.683097f, 0.300923f, 1.025595f, 0.410812f, 1.270411f, +0.080150f, 0.120481f, 0.001865f, 0.240290f, 0.145662f, 0.424860f, 0.225200f, 0.470665f, 0.002126f, 0.005265f, 0.002285f, 0.012733f, 0.239523f, 0.724929f, 0.319942f, 0.728592f, +0.172202f, 0.307777f, 0.003282f, 0.563950f, 0.443292f, 1.537351f, 0.561339f, 1.564680f, 0.172936f, 0.509197f, 0.152250f, 1.131442f, 0.598856f, 2.155033f, 0.655179f, 1.989891f, +0.049241f, 0.103333f, 0.001738f, 0.195910f, 0.125348f, 0.510400f, 0.294017f, 0.537498f, 0.036862f, 0.127435f, 0.060113f, 0.292987f, 0.223875f, 0.945906f, 0.453695f, 0.903727f, +0.229178f, 0.374402f, 0.005428f, 0.758298f, 0.916881f, 2.906440f, 1.442755f, 3.269725f, 0.300079f, 0.807609f, 0.328286f, 1.983558f, 0.993261f, 3.267086f, 1.350347f, 3.334519f, +0.045852f, 0.075825f, 0.000938f, 0.171572f, 0.056722f, 0.182007f, 0.077100f, 0.228755f, 0.001329f, 0.003621f, 0.001256f, 0.009937f, 0.124027f, 0.412953f, 0.145653f, 0.470877f, +0.101837f, 0.200236f, 0.001706f, 0.416258f, 0.178445f, 0.680808f, 0.198665f, 0.786130f, 0.111774f, 0.362056f, 0.086515f, 0.912724f, 0.320554f, 1.269024f, 0.308332f, 1.329419f, +0.031113f, 0.071827f, 0.000966f, 0.154498f, 0.053911f, 0.241495f, 0.111177f, 0.288530f, 0.025455f, 0.096811f, 0.036496f, 0.252523f, 0.128035f, 0.595127f, 0.228123f, 0.645083f, +0.179041f, 0.321777f, 0.003728f, 0.739390f, 0.487573f, 1.700299f, 0.674528f, 2.170161f, 0.256213f, 0.758584f, 0.246432f, 2.113801f, 0.702350f, 2.541491f, 0.839493f, 2.942917f, +0.088056f, 0.120813f, 0.001844f, 0.282897f, 0.150412f, 0.400424f, 0.209283f, 0.520816f, 0.002298f, 0.005194f, 0.002223f, 0.014748f, 0.266573f, 0.736380f, 0.320456f, 0.868938f, +0.198039f, 0.323062f, 0.003397f, 0.695005f, 0.479161f, 1.516705f, 0.546064f, 1.812387f, 0.195658f, 0.525815f, 0.155023f, 1.371756f, 0.697662f, 2.291469f, 0.686926f, 2.484201f, +0.078425f, 0.150211f, 0.002492f, 0.334363f, 0.187639f, 0.697355f, 0.396101f, 0.862220f, 0.057757f, 0.182243f, 0.084766f, 0.491936f, 0.361197f, 1.392911f, 0.658763f, 1.562464f, +0.209515f, 0.312404f, 0.004466f, 0.742876f, 0.787831f, 2.279390f, 1.115681f, 3.010690f, 0.269883f, 0.662945f, 0.265716f, 1.911695f, 0.919847f, 2.761531f, 1.125446f, 3.309177f, +0.056729f, 0.087402f, 0.001474f, 0.234057f, 0.066637f, 0.199210f, 0.115008f, 0.296320f, 0.001239f, 0.003146f, 0.001487f, 0.010215f, 0.143949f, 0.446536f, 0.214648f, 0.602600f, +0.057912f, 0.106089f, 0.001232f, 0.261010f, 0.096358f, 0.342506f, 0.136212f, 0.468063f, 0.047900f, 0.144555f, 0.047076f, 0.431285f, 0.171007f, 0.630732f, 0.208855f, 0.781995f, +0.001864f, 0.004009f, 0.000073f, 0.010206f, 0.003067f, 0.012799f, 0.008030f, 0.018098f, 0.001149f, 0.004072f, 0.002092f, 0.012570f, 0.007196f, 0.031161f, 0.016279f, 0.039974f, +0.081282f, 0.136099f, 0.002149f, 0.370119f, 0.210181f, 0.682875f, 0.369204f, 1.031513f, 0.087654f, 0.241787f, 0.107048f, 0.797372f, 0.299116f, 1.008407f, 0.453956f, 1.381950f, +0.076362f, 0.113545f, 0.001975f, 0.250538f, 0.111066f, 0.320446f, 0.190892f, 0.392744f, 0.002396f, 0.005868f, 0.002863f, 0.015702f, 0.251519f, 0.753000f, 0.373492f, 0.837283f, +0.097882f, 0.173051f, 0.002074f, 0.350806f, 0.201656f, 0.691782f, 0.283878f, 0.778950f, 0.116260f, 0.338612f, 0.113785f, 0.832410f, 0.375175f, 1.335490f, 0.456307f, 1.364281f, +0.037309f, 0.077445f, 0.001464f, 0.162443f, 0.076008f, 0.306144f, 0.198197f, 0.356681f, 0.033032f, 0.112960f, 0.059885f, 0.287324f, 0.186954f, 0.781363f, 0.421191f, 0.825905f, +0.131712f, 0.212846f, 0.003468f, 0.476931f, 0.421720f, 1.322354f, 0.737716f, 1.645832f, 0.203971f, 0.543010f, 0.248067f, 1.475502f, 0.629165f, 2.047091f, 0.950894f, 2.311521f, +0.004318f, 0.007064f, 0.000098f, 0.017683f, 0.004275f, 0.013570f, 0.006460f, 0.018868f, 0.000148f, 0.000399f, 0.000156f, 0.001211f, 0.012874f, 0.042400f, 0.016807f, 0.053489f, +0.005722f, 0.011129f, 0.000107f, 0.025595f, 0.008024f, 0.030282f, 0.009931f, 0.038685f, 0.007428f, 0.023799f, 0.006391f, 0.066376f, 0.019851f, 0.077737f, 0.021227f, 0.090096f, +0.002330f, 0.005321f, 0.000080f, 0.012663f, 0.003231f, 0.014318f, 0.007408f, 0.018926f, 0.002255f, 0.008483f, 0.003594f, 0.024479f, 0.010569f, 0.048594f, 0.020934f, 0.058274f, +0.010171f, 0.018082f, 0.000235f, 0.045968f, 0.022168f, 0.076468f, 0.034093f, 0.107978f, 0.017215f, 0.050417f, 0.018407f, 0.155427f, 0.043977f, 0.157411f, 0.058435f, 0.201656f, +0.117036f, 0.158835f, 0.002725f, 0.411481f, 0.159993f, 0.421320f, 0.247477f, 0.606266f, 0.003612f, 0.008076f, 0.003884f, 0.025371f, 0.390502f, 1.067047f, 0.521867f, 1.393023f, +0.157035f, 0.253400f, 0.002994f, 0.603110f, 0.304078f, 0.952094f, 0.385241f, 1.258687f, 0.183493f, 0.487789f, 0.161623f, 1.407875f, 0.609732f, 1.980993f, 0.667404f, 2.375985f, +0.082893f, 0.157051f, 0.002928f, 0.386763f, 0.158725f, 0.583513f, 0.372489f, 0.798183f, 0.072201f, 0.225355f, 0.117801f, 0.672998f, 0.420781f, 1.605131f, 0.853152f, 1.991980f, +0.167977f, 0.247758f, 0.003980f, 0.651800f, 0.505506f, 1.446729f, 0.795827f, 2.114085f, 0.255911f, 0.621823f, 0.280103f, 1.983790f, 0.812829f, 2.413841f, 1.105589f, 3.200123f, +0.163500f, 0.475424f, 0.005458f, 0.761728f, 0.150794f, 0.850807f, 0.334429f, 0.757178f, 0.002953f, 0.014147f, 0.004554f, 0.027486f, 0.308419f, 1.805664f, 0.590968f, 1.457896f, +0.172431f, 0.596158f, 0.004714f, 0.877537f, 0.225263f, 1.511188f, 0.409187f, 1.235582f, 0.117916f, 0.671615f, 0.148917f, 1.198857f, 0.378510f, 2.634850f, 0.594036f, 1.954482f, +0.006709f, 0.027236f, 0.000340f, 0.041482f, 0.008668f, 0.068271f, 0.029164f, 0.057757f, 0.003420f, 0.022872f, 0.008001f, 0.042244f, 0.019255f, 0.157372f, 0.055975f, 0.120786f, +0.212087f, 0.670234f, 0.007205f, 1.090506f, 0.430602f, 2.640406f, 0.971970f, 2.386277f, 0.189098f, 0.984464f, 0.296757f, 1.942423f, 0.580206f, 3.691699f, 1.131519f, 3.026908f, +0.290344f, 0.814802f, 0.009651f, 1.075659f, 0.331570f, 1.805502f, 0.732299f, 1.323942f, 0.007531f, 0.034817f, 0.011564f, 0.055739f, 0.710930f, 4.016970f, 1.356571f, 2.672345f, +0.384474f, 1.282890f, 0.010468f, 1.555959f, 0.621924f, 4.026641f, 1.125027f, 2.712689f, 0.377564f, 2.075453f, 0.474845f, 3.052558f, 1.095520f, 7.359949f, 1.712175f, 4.498365f, +0.177169f, 0.694097f, 0.008935f, 0.871049f, 0.283396f, 2.154321f, 0.949598f, 1.501694f, 0.129692f, 0.837037f, 0.302130f, 1.273825f, 0.659985f, 5.205933f, 1.910652f, 3.292246f, +0.453391f, 1.382802f, 0.015339f, 1.853814f, 1.139802f, 6.745290f, 2.562120f, 5.022906f, 0.580510f, 2.916739f, 0.907226f, 4.741828f, 1.610017f, 9.886685f, 3.126820f, 6.679253f, +0.102494f, 0.316427f, 0.002995f, 0.473928f, 0.079672f, 0.477272f, 0.154703f, 0.397058f, 0.002905f, 0.014777f, 0.003922f, 0.026840f, 0.227155f, 1.411987f, 0.381082f, 1.065716f, +0.140301f, 0.515016f, 0.003358f, 0.708673f, 0.154482f, 1.100326f, 0.245689f, 0.840998f, 0.150581f, 0.910604f, 0.166499f, 1.519488f, 0.361847f, 2.674343f, 0.497204f, 1.854445f, +0.069076f, 0.297712f, 0.003063f, 0.423874f, 0.075211f, 0.628976f, 0.221568f, 0.497419f, 0.055263f, 0.392380f, 0.113188f, 0.677468f, 0.232908f, 2.021095f, 0.592807f, 1.450098f, +0.218564f, 0.733337f, 0.006501f, 1.115389f, 0.374009f, 2.434956f, 0.739152f, 2.057133f, 0.305845f, 1.690543f, 0.420231f, 3.118111f, 0.702502f, 4.745755f, 1.199503f, 3.637469f, +0.338807f, 0.867817f, 0.010136f, 1.345081f, 0.363658f, 1.807393f, 0.722825f, 1.556041f, 0.008645f, 0.036481f, 0.011947f, 0.068569f, 0.840381f, 4.333962f, 1.443175f, 3.385138f, +0.469634f, 1.430273f, 0.011507f, 2.036691f, 0.714016f, 4.219402f, 1.162414f, 3.337378f, 0.453712f, 2.276354f, 0.513534f, 3.930866f, 1.355574f, 8.312176f, 1.906682f, 5.964747f, +0.299706f, 1.071679f, 0.013603f, 1.579009f, 0.450589f, 3.126320f, 1.358792f, 2.558597f, 0.215833f, 1.271414f, 0.452508f, 2.271691f, 1.130972f, 8.142423f, 2.946638f, 6.045676f, +0.440245f, 1.225516f, 0.013405f, 1.928956f, 1.040230f, 5.618725f, 2.104392f, 4.912354f, 0.554534f, 2.543042f, 0.779941f, 4.853995f, 1.583662f, 8.876052f, 2.767975f, 7.040354f, +0.070807f, 0.186714f, 0.002250f, 0.442159f, 0.087497f, 0.447690f, 0.184718f, 0.588881f, 0.001280f, 0.005561f, 0.001879f, 0.015970f, 0.142827f, 0.758302f, 0.260511f, 0.904930f, +0.081087f, 0.254235f, 0.002110f, 0.553125f, 0.141931f, 0.863464f, 0.245417f, 1.043471f, 0.055504f, 0.286687f, 0.066725f, 0.756376f, 0.190338f, 1.201546f, 0.284351f, 1.317343f, +0.002763f, 0.010171f, 0.000133f, 0.022896f, 0.004782f, 0.034159f, 0.015317f, 0.042713f, 0.001410f, 0.008549f, 0.003139f, 0.023339f, 0.008479f, 0.062843f, 0.023463f, 0.071291f, +0.111288f, 0.318931f, 0.003599f, 0.766975f, 0.302733f, 1.683417f, 0.650475f, 2.248666f, 0.099319f, 0.468902f, 0.148368f, 1.367444f, 0.325556f, 1.878477f, 0.604364f, 2.276470f, +0.148356f, 0.377556f, 0.004694f, 0.736695f, 0.226997f, 1.120930f, 0.477228f, 1.214878f, 0.003852f, 0.016149f, 0.005630f, 0.038210f, 0.388445f, 1.990388f, 0.705568f, 1.957108f, +0.213324f, 0.645503f, 0.005529f, 1.157151f, 0.462339f, 2.714581f, 0.796123f, 2.702980f, 0.209689f, 1.045285f, 0.251033f, 2.272315f, 0.649983f, 3.959982f, 0.966994f, 3.577306f, +0.086081f, 0.305827f, 0.004132f, 0.567259f, 0.184486f, 1.271795f, 0.588442f, 1.310301f, 0.063073f, 0.369159f, 0.139868f, 0.830350f, 0.342896f, 2.452809f, 0.944940f, 2.292665f, +0.280699f, 0.776362f, 0.009040f, 1.538344f, 0.945470f, 5.074062f, 2.023074f, 5.584614f, 0.359741f, 1.639134f, 0.535168f, 3.938635f, 1.065879f, 5.935598f, 1.970489f, 5.926860f, +0.051083f, 0.143017f, 0.001421f, 0.316600f, 0.053203f, 0.289023f, 0.098338f, 0.355388f, 0.001449f, 0.006685f, 0.001863f, 0.017947f, 0.121063f, 0.682425f, 0.193330f, 0.761288f, +0.075931f, 0.252764f, 0.001730f, 0.514071f, 0.112018f, 0.723546f, 0.169585f, 0.817379f, 0.081572f, 0.447339f, 0.085857f, 1.103284f, 0.209407f, 1.403527f, 0.273902f, 1.438469f, +0.032736f, 0.127949f, 0.001382f, 0.269253f, 0.047757f, 0.362181f, 0.133924f, 0.423347f, 0.026215f, 0.168795f, 0.051111f, 0.430750f, 0.118032f, 0.928832f, 0.285970f, 0.984988f, +0.131987f, 0.401599f, 0.003737f, 0.902815f, 0.302612f, 1.786617f, 0.569287f, 2.230929f, 0.184870f, 0.926677f, 0.241795f, 2.526252f, 0.453639f, 2.779100f, 0.737322f, 3.148338f, +0.225528f, 0.523857f, 0.006422f, 1.200095f, 0.324333f, 1.461799f, 0.613657f, 1.860113f, 0.005760f, 0.022043f, 0.007577f, 0.061236f, 0.598182f, 2.797557f, 0.977845f, 3.229633f, +0.339458f, 0.937524f, 0.007918f, 1.973202f, 0.691490f, 3.705658f, 1.071600f, 4.332143f, 0.328262f, 1.493537f, 0.353674f, 3.811952f, 1.047755f, 5.826232f, 1.402841f, 6.179425f, +0.189701f, 0.615141f, 0.008196f, 1.339609f, 0.382125f, 2.404333f, 1.096911f, 2.908346f, 0.136742f, 0.730483f, 0.272902f, 1.929102f, 0.765482f, 4.997738f, 1.898470f, 5.484635f, +0.355072f, 0.896350f, 0.010291f, 2.085279f, 1.124094f, 5.506144f, 2.164679f, 7.115123f, 0.447676f, 1.861766f, 0.599364f, 5.252353f, 1.365823f, 6.942058f, 2.272415f, 8.138528f, +0.152777f, 0.237957f, 0.003570f, 0.575986f, 0.158519f, 0.479076f, 0.246099f, 0.644120f, 0.002778f, 0.007128f, 0.002998f, 0.020923f, 0.227232f, 0.712593f, 0.304790f, 0.869215f, +0.179258f, 0.331972f, 0.003431f, 0.738247f, 0.263457f, 0.946708f, 0.335005f, 1.169404f, 0.123406f, 0.376494f, 0.109097f, 1.015313f, 0.310263f, 1.156871f, 0.340858f, 1.296451f, +0.005960f, 0.012958f, 0.000211f, 0.029817f, 0.008661f, 0.036543f, 0.020401f, 0.046705f, 0.003058f, 0.010955f, 0.005008f, 0.030568f, 0.013485f, 0.059038f, 0.027443f, 0.068456f, +0.254503f, 0.430806f, 0.006053f, 1.058960f, 0.581315f, 1.909341f, 0.918539f, 2.606928f, 0.228435f, 0.637018f, 0.250949f, 1.898853f, 0.548971f, 1.870985f, 0.749442f, 2.317601f, +0.299862f, 0.450752f, 0.006978f, 0.898992f, 0.385248f, 1.123675f, 0.595612f, 1.244822f, 0.007830f, 0.019390f, 0.008416f, 0.046896f, 0.578927f, 1.752155f, 0.773301f, 1.761010f, +0.441774f, 0.789586f, 0.008420f, 1.446783f, 0.803947f, 2.788109f, 1.018034f, 2.837672f, 0.436737f, 1.285935f, 0.384495f, 2.857363f, 0.992525f, 3.571683f, 1.085872f, 3.297981f, +0.173937f, 0.365007f, 0.006141f, 0.692021f, 0.313008f, 1.274526f, 0.734194f, 1.342194f, 0.128178f, 0.443121f, 0.209028f, 1.018786f, 0.510889f, 2.158581f, 1.035342f, 2.062327f, +0.601340f, 0.982393f, 0.014242f, 1.989695f, 1.700725f, 5.391163f, 2.676170f, 6.065021f, 0.775093f, 2.086021f, 0.847948f, 5.123449f, 1.683707f, 5.538139f, 2.289014f, 5.652446f, +0.107593f, 0.177926f, 0.002201f, 0.402601f, 0.094092f, 0.301918f, 0.127896f, 0.379466f, 0.003070f, 0.008365f, 0.002902f, 0.022953f, 0.188018f, 0.626016f, 0.220803f, 0.713825f, +0.163861f, 0.322189f, 0.002746f, 0.669780f, 0.202978f, 0.774406f, 0.225977f, 0.894207f, 0.177043f, 0.573477f, 0.137035f, 1.445705f, 0.333217f, 1.319155f, 0.320513f, 1.381936f, +0.068931f, 0.159133f, 0.002139f, 0.342290f, 0.084435f, 0.378228f, 0.174124f, 0.451894f, 0.055516f, 0.211138f, 0.079596f, 0.550735f, 0.183256f, 0.851799f, 0.326510f, 0.923300f, +0.294650f, 0.529553f, 0.006135f, 1.216823f, 0.567241f, 1.978125f, 0.784745f, 2.524762f, 0.415074f, 1.228933f, 0.399229f, 3.424434f, 0.746731f, 2.702086f, 0.892539f, 3.128878f, +0.346104f, 0.474854f, 0.007248f, 1.111926f, 0.417931f, 1.112607f, 0.581506f, 1.447123f, 0.008891f, 0.020095f, 0.008601f, 0.057062f, 0.676893f, 1.869844f, 0.813714f, 2.206440f, +0.533752f, 0.870713f, 0.009155f, 1.873168f, 0.912945f, 2.889776f, 1.040415f, 3.453139f, 0.519106f, 1.395058f, 0.411296f, 3.639454f, 1.214760f, 3.989875f, 1.196066f, 4.325457f, +0.291036f, 0.557433f, 0.009247f, 1.240818f, 0.492253f, 1.829441f, 1.039131f, 2.261946f, 0.210991f, 0.665750f, 0.309658f, 1.797086f, 0.865946f, 3.339411f, 1.579339f, 3.745902f, +0.577549f, 0.861173f, 0.012310f, 2.047808f, 1.535255f, 4.441873f, 2.174139f, 5.866963f, 0.732351f, 1.798959f, 0.721044f, 5.187552f, 1.638118f, 4.917897f, 2.004260f, 5.893177f, +0.094125f, 0.145017f, 0.002445f, 0.388349f, 0.104891f, 0.313573f, 0.181032f, 0.466432f, 0.001543f, 0.003916f, 0.001851f, 0.012718f, 0.279128f, 0.865865f, 0.416217f, 1.168486f, +0.084524f, 0.154838f, 0.001798f, 0.380947f, 0.133420f, 0.474244f, 0.188603f, 0.648095f, 0.052457f, 0.158309f, 0.051555f, 0.472318f, 0.291686f, 1.075835f, 0.356242f, 1.333843f, +0.003213f, 0.006910f, 0.000127f, 0.017591f, 0.005015f, 0.020929f, 0.013131f, 0.029594f, 0.001486f, 0.005266f, 0.002706f, 0.016258f, 0.014495f, 0.062770f, 0.032791f, 0.080523f, +0.120348f, 0.201513f, 0.003182f, 0.548009f, 0.295235f, 0.959213f, 0.518609f, 1.448934f, 0.097382f, 0.268624f, 0.118929f, 0.885874f, 0.517584f, 1.744925f, 0.785516f, 2.391295f, +0.105395f, 0.156715f, 0.002726f, 0.345794f, 0.145429f, 0.419590f, 0.249953f, 0.514257f, 0.002481f, 0.006077f, 0.002965f, 0.016262f, 0.405703f, 1.214597f, 0.602447f, 1.350546f, +0.118837f, 0.210100f, 0.002518f, 0.425909f, 0.232268f, 0.796795f, 0.326971f, 0.897196f, 0.105911f, 0.308473f, 0.103657f, 0.758318f, 0.532327f, 1.894893f, 0.647442f, 1.935744f, +0.053494f, 0.111042f, 0.002099f, 0.232913f, 0.103390f, 0.416433f, 0.269599f, 0.485177f, 0.035538f, 0.121529f, 0.064428f, 0.309121f, 0.313273f, 1.309302f, 0.705774f, 1.383939f, +0.162225f, 0.262154f, 0.004271f, 0.587417f, 0.492767f, 1.545130f, 0.861999f, 1.923105f, 0.188505f, 0.501837f, 0.229257f, 1.363623f, 0.905626f, 2.946601f, 1.368725f, 3.327223f, +0.011812f, 0.019321f, 0.000269f, 0.048368f, 0.011094f, 0.035213f, 0.016764f, 0.048963f, 0.000304f, 0.000819f, 0.000319f, 0.002486f, 0.041154f, 0.135540f, 0.053728f, 0.170987f, +0.013767f, 0.026777f, 0.000256f, 0.061584f, 0.018316f, 0.069124f, 0.022669f, 0.088305f, 0.013410f, 0.042967f, 0.011539f, 0.119836f, 0.055820f, 0.218590f, 0.059689f, 0.253344f, +0.006621f, 0.015121f, 0.000228f, 0.035982f, 0.008711f, 0.038599f, 0.019971f, 0.051021f, 0.004808f, 0.018086f, 0.007663f, 0.052193f, 0.035098f, 0.161373f, 0.069519f, 0.193520f, +0.024827f, 0.044137f, 0.000575f, 0.112204f, 0.051333f, 0.177076f, 0.078949f, 0.250043f, 0.031530f, 0.092341f, 0.033713f, 0.284671f, 0.125450f, 0.449035f, 0.166693f, 0.575251f, +0.183165f, 0.248582f, 0.004264f, 0.643982f, 0.237548f, 0.625551f, 0.367439f, 0.900149f, 0.004242f, 0.009484f, 0.004562f, 0.029793f, 0.714234f, 1.951647f, 0.954504f, 2.547863f, +0.216185f, 0.348849f, 0.004122f, 0.830285f, 0.397139f, 1.243476f, 0.503142f, 1.643900f, 0.189546f, 0.503880f, 0.166955f, 1.454315f, 0.980988f, 3.187186f, 1.073776f, 3.822681f, +0.134770f, 0.255337f, 0.004760f, 0.628808f, 0.244820f, 0.900018f, 0.574531f, 1.231128f, 0.088081f, 0.274919f, 0.143710f, 0.821015f, 0.799509f, 3.049845f, 1.621039f, 3.784882f, +0.234596f, 0.346018f, 0.005559f, 0.910301f, 0.669768f, 1.916836f, 1.054427f, 2.801046f, 0.268178f, 0.651630f, 0.293530f, 2.078885f, 1.326672f, 3.939791f, 1.804506f, 5.223135f, +0.124801f, 0.362896f, 0.004166f, 0.581434f, 0.109197f, 0.616111f, 0.242177f, 0.548309f, 0.001691f, 0.008103f, 0.002608f, 0.015743f, 0.275129f, 1.610763f, 0.527180f, 1.300533f, +0.115777f, 0.400284f, 0.003165f, 0.589214f, 0.143491f, 0.962616f, 0.260649f, 0.787057f, 0.059408f, 0.338370f, 0.075027f, 0.604003f, 0.297015f, 2.067555f, 0.466137f, 1.533673f, +0.005320f, 0.021597f, 0.000269f, 0.032893f, 0.006520f, 0.051359f, 0.021939f, 0.043449f, 0.002035f, 0.013609f, 0.004760f, 0.025135f, 0.017844f, 0.145839f, 0.051873f, 0.111934f, +0.144465f, 0.456535f, 0.004908f, 0.742807f, 0.278260f, 1.706261f, 0.628098f, 1.542040f, 0.096649f, 0.503166f, 0.151674f, 0.992785f, 0.461874f, 2.938784f, 0.900748f, 2.409576f, +0.184356f, 0.517364f, 0.006128f, 0.682998f, 0.199732f, 1.087600f, 0.441123f, 0.797518f, 0.003588f, 0.016588f, 0.005509f, 0.026556f, 0.527552f, 2.980826f, 1.006655f, 1.983036f, +0.214743f, 0.716540f, 0.005847f, 0.869059f, 0.329545f, 2.133639f, 0.596130f, 1.437402f, 0.158236f, 0.869817f, 0.199007f, 1.279319f, 0.715097f, 4.804183f, 1.117617f, 2.936293f, +0.116864f, 0.457840f, 0.005894f, 0.574561f, 0.177343f, 1.348128f, 0.594239f, 0.939728f, 0.064190f, 0.414288f, 0.149538f, 0.630475f, 0.508770f, 4.013158f, 1.472887f, 2.537932f, +0.256900f, 0.783522f, 0.008691f, 1.050406f, 0.612699f, 3.625924f, 1.377265f, 2.700058f, 0.246811f, 1.240088f, 0.385719f, 2.016048f, 1.066143f, 6.546897f, 2.070560f, 4.422957f, +0.128974f, 0.398180f, 0.003769f, 0.596374f, 0.095113f, 0.569770f, 0.184686f, 0.474010f, 0.002743f, 0.013953f, 0.003704f, 0.025342f, 0.334058f, 2.076493f, 0.560426f, 1.567261f, +0.155301f, 0.570077f, 0.003717f, 0.784438f, 0.162225f, 1.155476f, 0.258003f, 0.883151f, 0.125068f, 0.756321f, 0.138290f, 1.262043f, 0.468093f, 3.459582f, 0.643192f, 2.398945f, +0.090299f, 0.389182f, 0.004004f, 0.554105f, 0.093275f, 0.780040f, 0.274783f, 0.616886f, 0.054207f, 0.384881f, 0.111025f, 0.664521f, 0.355823f, 3.087706f, 0.905655f, 2.215372f, +0.245432f, 0.823486f, 0.007300f, 1.252504f, 0.398439f, 2.594006f, 0.787433f, 2.191504f, 0.257702f, 1.424437f, 0.354083f, 2.627292f, 0.921922f, 6.228046f, 1.574156f, 4.773598f, +0.243936f, 0.624817f, 0.007298f, 0.968441f, 0.248396f, 1.234537f, 0.493724f, 1.062852f, 0.004671f, 0.019708f, 0.006454f, 0.037044f, 0.707123f, 3.646729f, 1.214332f, 2.848360f, +0.297434f, 0.905838f, 0.007288f, 1.289901f, 0.429009f, 2.535183f, 0.698424f, 2.005228f, 0.215613f, 1.081771f, 0.244042f, 1.868029f, 1.003340f, 6.152333f, 1.411248f, 4.414862f, +0.224166f, 0.801566f, 0.010174f, 1.181025f, 0.319729f, 2.218374f, 0.964172f, 1.815529f, 0.121131f, 0.713551f, 0.253959f, 1.274933f, 0.988598f, 7.117406f, 2.575697f, 5.284609f, +0.282856f, 0.787391f, 0.008612f, 1.239350f, 0.634056f, 3.424809f, 1.282700f, 2.994251f, 0.267340f, 1.225996f, 0.376008f, 2.340102f, 1.189126f, 6.664769f, 2.078391f, 5.286396f, +0.010688f, 0.028185f, 0.000340f, 0.066744f, 0.012530f, 0.064112f, 0.026453f, 0.084332f, 0.000145f, 0.000630f, 0.000213f, 0.001809f, 0.025197f, 0.133774f, 0.045958f, 0.159641f, +0.010767f, 0.033758f, 0.000280f, 0.073446f, 0.017879f, 0.108771f, 0.030915f, 0.131447f, 0.005530f, 0.028564f, 0.006648f, 0.075361f, 0.029537f, 0.186457f, 0.044126f, 0.204426f, +0.000433f, 0.001595f, 0.000021f, 0.003590f, 0.000711f, 0.005082f, 0.002279f, 0.006354f, 0.000166f, 0.001006f, 0.000369f, 0.002746f, 0.001554f, 0.011517f, 0.004300f, 0.013065f, +0.014991f, 0.042962f, 0.000485f, 0.103315f, 0.038688f, 0.215131f, 0.083127f, 0.287366f, 0.010039f, 0.047395f, 0.014996f, 0.138216f, 0.051251f, 0.295722f, 0.095143f, 0.358377f, +0.018629f, 0.047409f, 0.000589f, 0.092506f, 0.027041f, 0.133532f, 0.056850f, 0.144724f, 0.000363f, 0.001522f, 0.000530f, 0.003600f, 0.057004f, 0.292087f, 0.103541f, 0.287203f, +0.023563f, 0.071299f, 0.000611f, 0.127814f, 0.048448f, 0.284457f, 0.083425f, 0.283242f, 0.017379f, 0.086633f, 0.020806f, 0.188330f, 0.083904f, 0.511181f, 0.124826f, 0.461782f, +0.011229f, 0.039894f, 0.000539f, 0.073997f, 0.022831f, 0.157389f, 0.072822f, 0.162154f, 0.006174f, 0.036133f, 0.013690f, 0.081275f, 0.052274f, 0.373928f, 0.144055f, 0.349514f, +0.031453f, 0.086994f, 0.001013f, 0.172378f, 0.100508f, 0.539399f, 0.215063f, 0.593673f, 0.030247f, 0.137818f, 0.044997f, 0.331159f, 0.139582f, 0.777294f, 0.258045f, 0.776150f, +0.012712f, 0.035590f, 0.000354f, 0.078787f, 0.012560f, 0.068234f, 0.023216f, 0.083902f, 0.000271f, 0.001248f, 0.000348f, 0.003351f, 0.035208f, 0.198468f, 0.056226f, 0.221404f, +0.016621f, 0.055330f, 0.000379f, 0.112531f, 0.023263f, 0.150260f, 0.035218f, 0.169746f, 0.013398f, 0.073477f, 0.014102f, 0.181218f, 0.053572f, 0.359057f, 0.070071f, 0.367996f, +0.008463f, 0.033077f, 0.000357f, 0.069607f, 0.011713f, 0.088827f, 0.032845f, 0.103828f, 0.005085f, 0.032743f, 0.009914f, 0.083557f, 0.035660f, 0.280623f, 0.086399f, 0.297589f, +0.029310f, 0.089183f, 0.000830f, 0.200488f, 0.063753f, 0.376398f, 0.119935f, 0.470004f, 0.030805f, 0.154412f, 0.040290f, 0.420949f, 0.117732f, 0.721252f, 0.191355f, 0.817079f, +0.032112f, 0.074589f, 0.000914f, 0.170874f, 0.043811f, 0.197459f, 0.082892f, 0.251262f, 0.000615f, 0.002355f, 0.000810f, 0.006542f, 0.099538f, 0.465515f, 0.162714f, 0.537413f, +0.042516f, 0.117422f, 0.000992f, 0.247138f, 0.082164f, 0.440312f, 0.127329f, 0.514751f, 0.030850f, 0.140361f, 0.033238f, 0.358244f, 0.153363f, 0.852804f, 0.205338f, 0.904502f, +0.028059f, 0.090988f, 0.001212f, 0.198148f, 0.053622f, 0.337390f, 0.153925f, 0.408116f, 0.015177f, 0.081075f, 0.030289f, 0.214107f, 0.132324f, 0.863929f, 0.328177f, 0.948095f, +0.045115f, 0.113890f, 0.001308f, 0.264955f, 0.135499f, 0.663716f, 0.260933f, 0.857664f, 0.042681f, 0.177499f, 0.057143f, 0.500755f, 0.202813f, 1.030836f, 0.337434f, 1.208502f, +0.191721f, 0.298614f, 0.004480f, 0.722810f, 0.188721f, 0.570353f, 0.292988f, 0.766843f, 0.002616f, 0.006712f, 0.002823f, 0.019702f, 0.333254f, 1.045077f, 0.447000f, 1.274774f, +0.197879f, 0.366455f, 0.003787f, 0.814932f, 0.275903f, 0.991431f, 0.350831f, 1.224647f, 0.102216f, 0.311847f, 0.090364f, 0.840975f, 0.400260f, 1.492443f, 0.439731f, 1.672511f, +0.007769f, 0.016893f, 0.000275f, 0.038871f, 0.010712f, 0.045195f, 0.025231f, 0.057764f, 0.002992f, 0.010716f, 0.004899f, 0.029901f, 0.020546f, 0.089947f, 0.041810f, 0.104297f, +0.285005f, 0.482438f, 0.006778f, 1.185875f, 0.617586f, 2.028476f, 0.975853f, 2.769590f, 0.191949f, 0.535273f, 0.210867f, 1.595565f, 0.718460f, 2.448631f, 0.980824f, 3.033136f, +0.313024f, 0.470537f, 0.007284f, 0.938452f, 0.381526f, 1.112817f, 0.589857f, 1.232793f, 0.006133f, 0.015188f, 0.006592f, 0.036733f, 0.706275f, 2.137579f, 0.943406f, 2.148382f, +0.405661f, 0.725040f, 0.007731f, 1.328514f, 0.700353f, 2.428843f, 0.886853f, 2.472020f, 0.300917f, 0.886024f, 0.264922f, 1.968757f, 1.065118f, 3.832916f, 1.165293f, 3.539196f, +0.188624f, 0.395829f, 0.006659f, 0.750456f, 0.322024f, 1.311238f, 0.755342f, 1.380854f, 0.104300f, 0.360572f, 0.170088f, 0.828996f, 0.647479f, 2.735695f, 1.312149f, 2.613706f, +0.560174f, 0.915141f, 0.013267f, 1.853486f, 1.503017f, 4.764444f, 2.365067f, 5.359967f, 0.541777f, 1.458094f, 0.592701f, 3.581205f, 1.833003f, 6.029209f, 2.491983f, 6.153653f, +0.222589f, 0.368093f, 0.004554f, 0.832899f, 0.184670f, 0.592563f, 0.251016f, 0.744762f, 0.004766f, 0.012985f, 0.004504f, 0.035630f, 0.454580f, 1.513550f, 0.533847f, 1.725850f, +0.298194f, 0.586321f, 0.004997f, 1.218867f, 0.350429f, 1.336965f, 0.390136f, 1.543795f, 0.241751f, 0.783077f, 0.187120f, 1.974095f, 0.708672f, 2.805524f, 0.681653f, 2.939045f, +0.148143f, 0.342000f, 0.004598f, 0.735633f, 0.172154f, 0.771166f, 0.355021f, 0.921364f, 0.089526f, 0.340484f, 0.128358f, 0.888125f, 0.460276f, 2.139430f, 0.820082f, 2.319018f, +0.543966f, 0.977628f, 0.011326f, 2.246425f, 0.993481f, 3.464537f, 1.374422f, 4.421930f, 0.574982f, 1.702381f, 0.553033f, 4.743701f, 1.611100f, 5.829849f, 1.925687f, 6.750669f, +0.409679f, 0.562078f, 0.008579f, 1.316171f, 0.469319f, 1.249410f, 0.653007f, 1.625059f, 0.007897f, 0.017848f, 0.007639f, 0.050682f, 0.936375f, 2.586638f, 1.125646f, 3.052266f, +0.555754f, 0.906605f, 0.009532f, 1.950383f, 0.901809f, 2.854528f, 1.027724f, 3.411019f, 0.405567f, 1.089932f, 0.321337f, 2.843435f, 1.478181f, 4.855077f, 1.455432f, 5.263429f, +0.357876f, 0.685455f, 0.011370f, 1.525788f, 0.574250f, 2.134182f, 1.212225f, 2.638732f, 0.194677f, 0.614273f, 0.285715f, 1.658131f, 1.244431f, 4.798987f, 2.269630f, 5.383147f, +0.610059f, 0.909648f, 0.013003f, 2.163079f, 1.538476f, 4.451192f, 2.178700f, 5.879272f, 0.580452f, 1.425832f, 0.571491f, 4.111588f, 2.022190f, 6.070946f, 2.474178f, 7.274890f, +0.118107f, 0.181966f, 0.003068f, 0.487296f, 0.149214f, 0.446075f, 0.257528f, 0.663527f, 0.002405f, 0.006104f, 0.002886f, 0.019821f, 0.405757f, 1.258675f, 0.605039f, 1.698583f, +0.099486f, 0.182247f, 0.002117f, 0.448382f, 0.178034f, 0.632826f, 0.251669f, 0.864810f, 0.076691f, 0.231443f, 0.075372f, 0.690518f, 0.397732f, 1.466970f, 0.485759f, 1.818781f, +0.003174f, 0.006826f, 0.000125f, 0.017377f, 0.005616f, 0.023438f, 0.014706f, 0.033142f, 0.001824f, 0.006462f, 0.003320f, 0.019948f, 0.016588f, 0.071833f, 0.037526f, 0.092150f, +0.131090f, 0.219500f, 0.003466f, 0.596924f, 0.364584f, 1.184527f, 0.640427f, 1.789282f, 0.131755f, 0.363440f, 0.160908f, 1.198561f, 0.653137f, 2.201914f, 0.991239f, 3.017566f, +0.164712f, 0.244915f, 0.004261f, 0.540408f, 0.257664f, 0.743411f, 0.442856f, 0.911137f, 0.004816f, 0.011797f, 0.005755f, 0.031567f, 0.734523f, 2.199020f, 1.090726f, 2.445156f, +0.174208f, 0.307994f, 0.003691f, 0.624359f, 0.386016f, 1.324228f, 0.543407f, 1.491088f, 0.192849f, 0.561683f, 0.188744f, 1.380784f, 0.904040f, 3.218060f, 1.099538f, 3.287437f, +0.065814f, 0.136616f, 0.002583f, 0.286555f, 0.144208f, 0.580843f, 0.376038f, 0.676727f, 0.054308f, 0.185717f, 0.098456f, 0.472390f, 0.446509f, 1.866151f, 1.005941f, 1.972532f, +0.220081f, 0.355649f, 0.005794f, 0.796913f, 0.757889f, 2.376452f, 1.325777f, 2.957786f, 0.317647f, 0.845638f, 0.386319f, 2.297821f, 1.423333f, 4.631042f, 2.151165f, 5.229249f, +0.016520f, 0.027023f, 0.000376f, 0.067647f, 0.017590f, 0.055832f, 0.026581f, 0.077635f, 0.000528f, 0.001423f, 0.000555f, 0.004319f, 0.066679f, 0.219609f, 0.087052f, 0.277042f, +0.018061f, 0.035129f, 0.000336f, 0.080793f, 0.027242f, 0.102809f, 0.033716f, 0.131337f, 0.021852f, 0.070016f, 0.018803f, 0.195276f, 0.084836f, 0.332220f, 0.090716f, 0.385040f, +0.007290f, 0.016648f, 0.000252f, 0.039618f, 0.010873f, 0.048181f, 0.024928f, 0.063686f, 0.006575f, 0.024735f, 0.010479f, 0.071379f, 0.044768f, 0.205838f, 0.088674f, 0.246842f, +0.030142f, 0.053586f, 0.000698f, 0.136226f, 0.070656f, 0.243730f, 0.108666f, 0.344163f, 0.047547f, 0.139253f, 0.050840f, 0.429291f, 0.176447f, 0.631572f, 0.234456f, 0.809096f, +0.242651f, 0.329315f, 0.005649f, 0.853128f, 0.356772f, 0.939512f, 0.551856f, 1.351930f, 0.006980f, 0.015605f, 0.007506f, 0.049025f, 1.096159f, 2.995259f, 1.464910f, 3.910292f, +0.268645f, 0.433501f, 0.005123f, 1.031763f, 0.559493f, 1.751819f, 0.708830f, 2.315940f, 0.292567f, 0.777744f, 0.257697f, 2.244754f, 1.412242f, 4.588310f, 1.545820f, 5.503177f, +0.140554f, 0.266296f, 0.004964f, 0.655796f, 0.289465f, 1.064145f, 0.679303f, 1.455635f, 0.114101f, 0.356134f, 0.186164f, 1.063552f, 0.965977f, 3.684857f, 1.958558f, 4.572937f, +0.269788f, 0.397923f, 0.006393f, 1.046853f, 0.873221f, 2.499107f, 1.374727f, 3.651911f, 0.383073f, 0.930806f, 0.419286f, 2.969535f, 1.767491f, 5.248880f, 2.404095f, 6.958644f, +0.257785f, 0.749584f, 0.008605f, 1.200988f, 0.255711f, 1.442767f, 0.567113f, 1.283994f, 0.004340f, 0.020788f, 0.006691f, 0.040390f, 0.658365f, 3.854449f, 1.261506f, 3.112089f, +0.224323f, 0.775567f, 0.006133f, 1.141626f, 0.315191f, 2.114476f, 0.572540f, 1.728843f, 0.142973f, 0.814329f, 0.180560f, 1.453606f, 0.666687f, 4.640879f, 1.046302f, 3.442516f, +0.008651f, 0.035119f, 0.000438f, 0.053488f, 0.012020f, 0.094680f, 0.040446f, 0.080099f, 0.004110f, 0.027487f, 0.009615f, 0.050767f, 0.033614f, 0.274735f, 0.097719f, 0.210864f, +0.259037f, 0.818601f, 0.008800f, 1.331908f, 0.565650f, 3.468510f, 1.276806f, 3.134679f, 0.215256f, 1.120642f, 0.337807f, 2.211113f, 0.959433f, 6.104620f, 1.871087f, 5.005318f, +0.474274f, 1.330970f, 0.015765f, 1.757078f, 0.582529f, 3.172053f, 1.286562f, 2.326009f, 0.011465f, 0.053006f, 0.017605f, 0.084858f, 1.572279f, 8.883849f, 3.000164f, 5.910103f, +0.518206f, 1.729118f, 0.014109f, 2.097169f, 0.901569f, 5.837197f, 1.630889f, 3.932434f, 0.474293f, 2.607170f, 0.596498f, 3.834603f, 1.999134f, 13.430630f, 3.124423f, 8.208736f, +0.236681f, 0.927248f, 0.011936f, 1.163639f, 0.407189f, 3.095367f, 1.364400f, 2.157661f, 0.161476f, 1.042178f, 0.376176f, 1.586013f, 1.193702f, 9.415873f, 3.455761f, 5.954624f, +0.573715f, 1.749780f, 0.019410f, 2.345792f, 1.551239f, 9.180155f, 3.486975f, 6.836038f, 0.684627f, 3.439870f, 1.069942f, 5.592298f, 2.758291f, 16.937930f, 5.356888f, 11.442940f, +0.296934f, 0.916720f, 0.008678f, 1.373018f, 0.248254f, 1.487154f, 0.482048f, 1.237211f, 0.007845f, 0.039901f, 0.010591f, 0.072471f, 0.890988f, 5.538353f, 1.494748f, 4.180146f, +0.335385f, 1.231128f, 0.008028f, 1.694060f, 0.397180f, 2.828980f, 0.631675f, 2.162240f, 0.335485f, 2.028772f, 0.370951f, 3.385331f, 1.171100f, 8.655377f, 1.609174f, 6.001817f, +0.163663f, 0.705375f, 0.007257f, 1.004291f, 0.191659f, 1.602815f, 0.564621f, 1.267568f, 0.122034f, 0.866466f, 0.249945f, 1.496005f, 0.747126f, 6.483298f, 1.901615f, 4.651645f, +0.490511f, 1.645789f, 0.014590f, 2.503209f, 0.902774f, 5.877436f, 1.784146f, 4.965456f, 0.639724f, 3.536048f, 0.878982f, 6.522038f, 2.134541f, 14.419900f, 3.644669f, 11.052390f, +0.531967f, 1.362576f, 0.015914f, 2.111937f, 0.614117f, 3.052188f, 1.220651f, 2.627724f, 0.012651f, 0.053385f, 0.017483f, 0.100342f, 1.786468f, 9.213063f, 3.067876f, 7.196069f, +0.608431f, 1.852979f, 0.014908f, 2.638619f, 0.994915f, 5.879341f, 1.619715f, 4.650324f, 0.547839f, 2.748607f, 0.620072f, 4.746365f, 2.377722f, 14.579840f, 3.344384f, 10.462370f, +0.384846f, 1.376122f, 0.017467f, 2.027575f, 0.622298f, 4.317692f, 1.876598f, 3.533622f, 0.258304f, 1.521599f, 0.541551f, 2.718708f, 1.966211f, 14.155720f, 5.122774f, 10.510500f, +0.535468f, 1.490592f, 0.016304f, 2.346184f, 1.360803f, 7.350275f, 2.752913f, 6.426218f, 0.628622f, 2.882800f, 0.884143f, 5.502503f, 2.607886f, 14.616580f, 4.558145f, 11.593660f, +0.080775f, 0.212998f, 0.002567f, 0.504403f, 0.107355f, 0.549292f, 0.226639f, 0.722525f, 0.001361f, 0.005913f, 0.001998f, 0.016980f, 0.220595f, 1.171192f, 0.402358f, 1.397657f, +0.076326f, 0.239307f, 0.001986f, 0.520645f, 0.143689f, 0.874156f, 0.248456f, 1.056391f, 0.048693f, 0.251506f, 0.058537f, 0.663556f, 0.242566f, 1.531246f, 0.362376f, 1.678819f, +0.002578f, 0.009489f, 0.000124f, 0.021361f, 0.004799f, 0.034276f, 0.015370f, 0.042859f, 0.001226f, 0.007434f, 0.002730f, 0.020294f, 0.010710f, 0.079379f, 0.029637f, 0.090049f, +0.098346f, 0.281840f, 0.003180f, 0.677779f, 0.287735f, 1.600016f, 0.618249f, 2.137262f, 0.081802f, 0.386198f, 0.122199f, 1.126257f, 0.389510f, 2.247496f, 0.723089f, 2.723672f, +0.175341f, 0.446230f, 0.005548f, 0.870691f, 0.288551f, 1.424890f, 0.606637f, 1.544314f, 0.004243f, 0.017788f, 0.006202f, 0.042090f, 0.621574f, 3.184937f, 1.129021f, 3.131683f, +0.208035f, 0.629498f, 0.005392f, 1.128459f, 0.484934f, 2.847243f, 0.835030f, 2.835076f, 0.190587f, 0.950061f, 0.228164f, 2.065311f, 0.858192f, 5.228478f, 1.276750f, 4.723220f, +0.083204f, 0.295605f, 0.003994f, 0.548299f, 0.191790f, 1.322146f, 0.611739f, 1.362176f, 0.056820f, 0.332560f, 0.126002f, 0.748029f, 0.448730f, 3.209862f, 1.236593f, 3.000289f, +0.256995f, 0.710801f, 0.008277f, 1.408436f, 0.931017f, 4.996497f, 1.992148f, 5.499245f, 0.306969f, 1.398683f, 0.456662f, 3.360863f, 1.321228f, 7.357573f, 2.442553f, 7.346742f, +0.107078f, 0.299786f, 0.002979f, 0.663643f, 0.119946f, 0.651601f, 0.221704f, 0.801222f, 0.002832f, 0.013061f, 0.003639f, 0.035062f, 0.343574f, 1.936714f, 0.548669f, 2.160525f, +0.131329f, 0.437177f, 0.002992f, 0.889132f, 0.208380f, 1.345970f, 0.315469f, 1.520521f, 0.131494f, 0.721108f, 0.138402f, 1.778490f, 0.490368f, 3.286625f, 0.641393f, 3.368448f, +0.056120f, 0.219342f, 0.002369f, 0.461577f, 0.088053f, 0.667784f, 0.246926f, 0.780561f, 0.041885f, 0.269690f, 0.081661f, 0.688225f, 0.273948f, 2.155792f, 0.663729f, 2.286128f, +0.214320f, 0.652115f, 0.006068f, 1.465986f, 0.528498f, 3.120245f, 0.994233f, 3.896215f, 0.279781f, 1.402429f, 0.365932f, 3.823217f, 0.997304f, 6.109725f, 1.620969f, 6.921476f, +0.256208f, 0.595121f, 0.007296f, 1.363354f, 0.396288f, 1.786105f, 0.749799f, 2.272786f, 0.006099f, 0.023339f, 0.008023f, 0.064837f, 0.920053f, 4.302871f, 1.504005f, 4.967440f, +0.318199f, 0.878809f, 0.007422f, 1.849625f, 0.697147f, 3.735971f, 1.080366f, 4.367582f, 0.286783f, 1.304816f, 0.308984f, 3.330280f, 1.329714f, 7.394115f, 1.780356f, 7.842356f, +0.176247f, 0.571515f, 0.007615f, 1.244602f, 0.381842f, 2.402555f, 1.096100f, 2.906195f, 0.118407f, 0.632534f, 0.236309f, 1.670433f, 0.962884f, 6.286551f, 2.388046f, 6.899008f, +0.312476f, 0.788820f, 0.009057f, 1.835120f, 1.063969f, 5.211633f, 2.048896f, 6.734552f, 0.367185f, 1.527026f, 0.491600f, 4.307996f, 1.627350f, 8.271319f, 2.707536f, 9.696888f, +0.337360f, 0.525452f, 0.007883f, 1.271883f, 0.376481f, 1.137800f, 0.584483f, 1.529778f, 0.005717f, 0.014670f, 0.006171f, 0.043061f, 0.679346f, 2.130412f, 0.911219f, 2.598656f, +0.326613f, 0.604862f, 0.006251f, 1.345106f, 0.516286f, 1.855226f, 0.656497f, 2.291634f, 0.209561f, 0.639342f, 0.185263f, 1.724152f, 0.765368f, 2.853814f, 0.840843f, 3.198135f, +0.010763f, 0.023402f, 0.000382f, 0.053847f, 0.016823f, 0.070978f, 0.039625f, 0.090717f, 0.005147f, 0.018439f, 0.008429f, 0.051450f, 0.032972f, 0.144348f, 0.067098f, 0.167377f, +0.435348f, 0.736927f, 0.010353f, 1.811432f, 1.069498f, 3.512789f, 1.689921f, 4.796205f, 0.364189f, 1.015583f, 0.400082f, 3.027297f, 1.271388f, 4.333105f, 1.735669f, 5.367445f, +0.686016f, 1.031218f, 0.015963f, 2.056689f, 0.947937f, 2.764900f, 1.465555f, 3.062993f, 0.016695f, 0.041343f, 0.017945f, 0.099992f, 1.793176f, 5.427146f, 2.395233f, 5.454574f, +0.833935f, 1.490497f, 0.015894f, 2.731084f, 1.632243f, 5.660666f, 2.066902f, 5.761294f, 0.768374f, 2.262413f, 0.676462f, 5.027110f, 2.536643f, 9.128320f, 2.775216f, 8.428808f, +0.325435f, 0.682926f, 0.011489f, 1.294767f, 0.629875f, 2.564762f, 1.477437f, 2.700930f, 0.223515f, 0.772709f, 0.364500f, 1.776545f, 1.294151f, 5.467978f, 2.622661f, 5.224153f, +1.065713f, 1.741024f, 0.025239f, 3.526195f, 3.241753f, 10.276100f, 5.101049f, 11.560540f, 1.280249f, 3.445559f, 1.400587f, 8.462591f, 4.039919f, 13.288310f, 5.492304f, 13.562590f, +0.436562f, 0.721937f, 0.008931f, 1.633558f, 0.410618f, 1.317576f, 0.558138f, 1.655995f, 0.011611f, 0.031633f, 0.010973f, 0.086800f, 1.032870f, 3.438996f, 1.212974f, 3.921370f, +0.548597f, 1.078674f, 0.009192f, 2.242390f, 0.730893f, 2.788520f, 0.813710f, 3.219906f, 0.552433f, 1.789437f, 0.427594f, 4.511072f, 1.510401f, 5.979449f, 1.452816f, 6.264023f, +0.228735f, 0.528054f, 0.007100f, 1.135830f, 0.301348f, 1.349892f, 0.621448f, 1.612807f, 0.171696f, 0.652990f, 0.246168f, 1.703270f, 0.823310f, 3.826862f, 1.466905f, 4.148096f, +0.926134f, 1.664471f, 0.019284f, 3.824675f, 1.917613f, 6.687239f, 2.652905f, 8.535197f, 1.215945f, 3.600116f, 1.169528f, 10.031760f, 3.177736f, 11.498800f, 3.798225f, 13.315020f, +0.761090f, 1.044214f, 0.015938f, 2.445147f, 0.988462f, 2.631460f, 1.375338f, 3.422636f, 0.018222f, 0.041185f, 0.017627f, 0.116950f, 2.015279f, 5.566996f, 2.422630f, 6.569126f, +0.968472f, 1.579877f, 0.016612f, 3.398794f, 1.781634f, 5.639470f, 2.030396f, 6.738889f, 0.877860f, 2.359183f, 0.695542f, 6.154681f, 2.984180f, 9.801525f, 2.938255f, 10.625920f, +0.523401f, 1.002493f, 0.016629f, 2.231499f, 0.952145f, 3.538612f, 2.009949f, 4.375189f, 0.353650f, 1.115889f, 0.519030f, 3.012164f, 2.108462f, 8.131014f, 3.845477f, 9.120766f, +0.983840f, 1.466988f, 0.020970f, 3.488393f, 2.812826f, 8.138200f, 3.983360f, 10.749190f, 1.162723f, 2.856134f, 1.144772f, 8.236065f, 3.778049f, 11.342320f, 4.622496f, 13.591640f, +0.055140f, 0.084953f, 0.001432f, 0.227500f, 0.091622f, 0.273903f, 0.158130f, 0.407425f, 0.001224f, 0.003106f, 0.001468f, 0.010086f, 0.216577f, 0.671831f, 0.322946f, 0.906637f, +0.076377f, 0.139914f, 0.001625f, 0.344231f, 0.179765f, 0.638979f, 0.254116f, 0.873218f, 0.064174f, 0.193667f, 0.063070f, 0.577811f, 0.349101f, 1.287600f, 0.426364f, 1.596394f, +0.001863f, 0.004007f, 0.000073f, 0.010201f, 0.004336f, 0.018097f, 0.011354f, 0.025589f, 0.001167f, 0.004135f, 0.002124f, 0.012764f, 0.011133f, 0.048212f, 0.025186f, 0.061848f, +0.080719f, 0.135157f, 0.002134f, 0.367557f, 0.295259f, 0.959291f, 0.518651f, 1.449052f, 0.088426f, 0.243919f, 0.107992f, 0.804403f, 0.459798f, 1.550112f, 0.697816f, 2.124318f, +0.082151f, 0.122153f, 0.002125f, 0.269531f, 0.169021f, 0.487658f, 0.290501f, 0.597681f, 0.002618f, 0.006413f, 0.003128f, 0.017160f, 0.418841f, 1.253929f, 0.621956f, 1.394281f, +0.142879f, 0.252605f, 0.003027f, 0.512075f, 0.416394f, 1.428440f, 0.586172f, 1.608431f, 0.172395f, 0.502110f, 0.168726f, 1.234336f, 0.847705f, 3.017527f, 1.031021f, 3.082580f, +0.041275f, 0.085678f, 0.001620f, 0.179712f, 0.118949f, 0.479102f, 0.310171f, 0.558191f, 0.037123f, 0.126949f, 0.067301f, 0.322908f, 0.320152f, 1.338054f, 0.721273f, 1.414330f, +0.144772f, 0.233951f, 0.003812f, 0.524220f, 0.655704f, 2.056039f, 1.147025f, 2.558993f, 0.227748f, 0.606311f, 0.276985f, 1.647506f, 1.070450f, 3.482881f, 1.617833f, 3.932776f, +0.007939f, 0.012987f, 0.000181f, 0.032512f, 0.011119f, 0.035292f, 0.016802f, 0.049074f, 0.000277f, 0.000745f, 0.000291f, 0.002262f, 0.036639f, 0.120670f, 0.047833f, 0.152228f, +0.014274f, 0.027763f, 0.000266f, 0.063852f, 0.028317f, 0.106865f, 0.035046f, 0.136519f, 0.018823f, 0.060313f, 0.016197f, 0.168214f, 0.076656f, 0.300184f, 0.081969f, 0.347911f, +0.004406f, 0.010061f, 0.000152f, 0.023942f, 0.008643f, 0.038295f, 0.019814f, 0.050620f, 0.004331f, 0.016292f, 0.006903f, 0.047017f, 0.030932f, 0.142219f, 0.061267f, 0.170549f, +0.019107f, 0.033967f, 0.000442f, 0.086351f, 0.058905f, 0.203197f, 0.090594f, 0.286927f, 0.032850f, 0.096210f, 0.035126f, 0.296597f, 0.127873f, 0.457707f, 0.169913f, 0.586361f, +0.119837f, 0.162637f, 0.002790f, 0.421332f, 0.231739f, 0.610255f, 0.358455f, 0.878139f, 0.003757f, 0.008400f, 0.004040f, 0.026390f, 0.618928f, 1.691225f, 0.827138f, 2.207883f, +0.218173f, 0.352057f, 0.004160f, 0.837920f, 0.597609f, 1.871164f, 0.757120f, 2.473716f, 0.258974f, 0.688443f, 0.228108f, 1.987009f, 1.311261f, 4.260229f, 1.435288f, 5.109679f, +0.087284f, 0.165370f, 0.003083f, 0.407250f, 0.236422f, 0.869147f, 0.554825f, 1.188899f, 0.077231f, 0.241054f, 0.126007f, 0.719879f, 0.685830f, 2.616198f, 1.390550f, 3.246723f, +0.175731f, 0.259194f, 0.004164f, 0.681886f, 0.748083f, 2.140969f, 1.177720f, 3.128568f, 0.271967f, 0.660835f, 0.297676f, 2.108252f, 1.316255f, 3.908856f, 1.790337f, 5.182123f, +0.100576f, 0.292453f, 0.003357f, 0.468570f, 0.131215f, 0.740340f, 0.291008f, 0.658867f, 0.001845f, 0.008840f, 0.002845f, 0.017176f, 0.293670f, 1.719313f, 0.562706f, 1.388176f, +0.143920f, 0.497586f, 0.003935f, 0.732440f, 0.265963f, 1.784230f, 0.483119f, 1.458827f, 0.099979f, 0.569451f, 0.126264f, 1.016491f, 0.489021f, 3.404129f, 0.767472f, 2.525118f, +0.004244f, 0.017229f, 0.000215f, 0.026241f, 0.007756f, 0.061091f, 0.026097f, 0.051683f, 0.002198f, 0.014698f, 0.005141f, 0.027146f, 0.018854f, 0.154095f, 0.054809f, 0.118271f, +0.133295f, 0.421235f, 0.004528f, 0.685371f, 0.382824f, 2.347437f, 0.864124f, 2.121506f, 0.120730f, 0.628531f, 0.189465f, 1.240140f, 0.564447f, 3.591429f, 1.100786f, 2.944695f, +0.197680f, 0.554755f, 0.006571f, 0.732358f, 0.319337f, 1.738891f, 0.705282f, 1.275098f, 0.005209f, 0.024081f, 0.007998f, 0.038551f, 0.749237f, 4.233415f, 1.429666f, 2.816338f, +0.355180f, 1.185142f, 0.009670f, 1.437405f, 0.812726f, 5.261986f, 1.470177f, 3.544923f, 0.354324f, 1.947705f, 0.445618f, 2.864668f, 1.566552f, 10.524450f, 2.448346f, 6.432491f, +0.124045f, 0.485972f, 0.006256f, 0.609865f, 0.280680f, 2.133670f, 0.940496f, 1.487299f, 0.092243f, 0.595341f, 0.214889f, 0.906004f, 0.715268f, 5.642004f, 2.070697f, 3.568019f, +0.315387f, 0.961905f, 0.010670f, 1.289550f, 1.121571f, 6.637404f, 2.521141f, 4.942568f, 0.410214f, 2.061099f, 0.641087f, 3.350789f, 1.733588f, 10.645500f, 3.366809f, 7.191896f, +0.119261f, 0.368192f, 0.003485f, 0.551460f, 0.131139f, 0.785585f, 0.254640f, 0.653553f, 0.003434f, 0.017467f, 0.004636f, 0.031726f, 0.409135f, 2.543170f, 0.686377f, 1.919492f, +0.221511f, 0.813119f, 0.005302f, 1.118870f, 0.345015f, 2.457425f, 0.548712f, 1.878254f, 0.241508f, 1.460469f, 0.267039f, 2.437026f, 0.884305f, 6.535729f, 1.215098f, 4.532009f, +0.082655f, 0.356238f, 0.003665f, 0.507201f, 0.127306f, 1.064642f, 0.375040f, 0.841960f, 0.067175f, 0.476958f, 0.137586f, 0.823497f, 0.431391f, 3.743466f, 1.097996f, 2.685867f, +0.259838f, 0.871822f, 0.007729f, 1.326022f, 0.628974f, 4.094880f, 1.243036f, 3.459493f, 0.369364f, 2.041644f, 0.507507f, 3.765695f, 1.292753f, 8.733195f, 2.207339f, 6.693715f, +0.219553f, 0.562363f, 0.006568f, 0.871639f, 0.333355f, 1.656787f, 0.662593f, 1.426379f, 0.005691f, 0.024015f, 0.007865f, 0.045138f, 0.842962f, 4.347273f, 1.447607f, 3.395535f, +0.412934f, 1.257591f, 0.010118f, 1.790794f, 0.888085f, 5.248040f, 1.445797f, 4.150990f, 0.405257f, 2.033245f, 0.458690f, 3.511059f, 1.844961f, 11.313030f, 2.595029f, 8.118132f, +0.199722f, 0.714160f, 0.009065f, 1.052242f, 0.424754f, 2.947067f, 1.280884f, 2.411895f, 0.146109f, 0.860691f, 0.306328f, 1.537834f, 1.166611f, 8.399009f, 3.039493f, 6.236189f, +0.291478f, 0.811391f, 0.008875f, 1.277126f, 0.974241f, 5.262293f, 1.970896f, 4.600731f, 0.372966f, 1.710387f, 0.524569f, 3.264677f, 1.622996f, 9.096512f, 2.836724f, 7.215219f, +0.023222f, 0.061236f, 0.000738f, 0.145013f, 0.040593f, 0.207698f, 0.085696f, 0.273201f, 0.000427f, 0.001853f, 0.000626f, 0.005321f, 0.072507f, 0.384959f, 0.132251f, 0.459396f, +0.036084f, 0.113135f, 0.000939f, 0.246141f, 0.089344f, 0.543540f, 0.154487f, 0.656852f, 0.025091f, 0.129598f, 0.030163f, 0.341923f, 0.131108f, 0.827647f, 0.195866f, 0.907410f, +0.000932f, 0.003430f, 0.000045f, 0.007722f, 0.002282f, 0.016297f, 0.007308f, 0.020378f, 0.000483f, 0.002929f, 0.001076f, 0.007996f, 0.004426f, 0.032808f, 0.012249f, 0.037218f, +0.037291f, 0.106868f, 0.001206f, 0.257000f, 0.143495f, 0.797939f, 0.308325f, 1.065867f, 0.033808f, 0.159611f, 0.050504f, 0.465469f, 0.168858f, 0.974320f, 0.313469f, 1.180749f, +0.053853f, 0.137052f, 0.001704f, 0.267418f, 0.116560f, 0.575583f, 0.245050f, 0.623824f, 0.001420f, 0.005955f, 0.002076f, 0.014090f, 0.218261f, 1.118368f, 0.396448f, 1.099668f, +0.105069f, 0.317932f, 0.002723f, 0.569935f, 0.322123f, 1.891316f, 0.554679f, 1.883234f, 0.104916f, 0.522997f, 0.125602f, 1.136930f, 0.495543f, 3.019065f, 0.737230f, 2.727315f, +0.032133f, 0.114162f, 0.001543f, 0.211752f, 0.097417f, 0.671566f, 0.310725f, 0.691899f, 0.023918f, 0.139987f, 0.053039f, 0.314874f, 0.198131f, 1.417272f, 0.546001f, 1.324738f, +0.104104f, 0.287933f, 0.003353f, 0.570532f, 0.496021f, 2.662000f, 1.061363f, 2.929851f, 0.135533f, 0.617548f, 0.201626f, 1.483890f, 0.611896f, 3.407490f, 1.131212f, 3.402474f, +0.031691f, 0.088725f, 0.000882f, 0.196411f, 0.046689f, 0.253638f, 0.086299f, 0.311878f, 0.000913f, 0.004213f, 0.001174f, 0.011310f, 0.116254f, 0.655322f, 0.185652f, 0.731052f, +0.063916f, 0.212766f, 0.001456f, 0.432724f, 0.133383f, 0.861549f, 0.201930f, 0.973278f, 0.069752f, 0.382520f, 0.073417f, 0.943419f, 0.272850f, 1.828742f, 0.356884f, 1.874270f, +0.020885f, 0.081627f, 0.000881f, 0.171774f, 0.043098f, 0.326851f, 0.120859f, 0.382051f, 0.016990f, 0.109393f, 0.033124f, 0.279160f, 0.116557f, 0.917231f, 0.282399f, 0.972686f, +0.083659f, 0.254549f, 0.002369f, 0.572239f, 0.271326f, 1.601903f, 0.510430f, 2.000279f, 0.119035f, 0.596674f, 0.155688f, 1.626616f, 0.445074f, 2.726634f, 0.723402f, 3.088900f, +0.077919f, 0.180990f, 0.002219f, 0.414628f, 0.158511f, 0.714425f, 0.299912f, 0.909092f, 0.002022f, 0.007736f, 0.002659f, 0.021492f, 0.319904f, 1.496117f, 0.522946f, 1.727189f, +0.159134f, 0.439499f, 0.003712f, 0.925011f, 0.458550f, 2.457346f, 0.710614f, 2.872789f, 0.156324f, 0.711247f, 0.168425f, 1.815315f, 0.760289f, 4.227723f, 1.017952f, 4.484013f, +0.067399f, 0.218555f, 0.002912f, 0.475952f, 0.192051f, 1.208387f, 0.551293f, 1.461697f, 0.049354f, 0.263648f, 0.098497f, 0.696258f, 0.420983f, 2.748543f, 1.044078f, 3.016316f, +0.125338f, 0.316405f, 0.003633f, 0.736089f, 0.561299f, 2.749410f, 1.080900f, 3.552829f, 0.160531f, 0.667607f, 0.214925f, 1.883430f, 0.746284f, 3.793132f, 1.241645f, 4.446882f, +0.153272f, 0.238728f, 0.003581f, 0.577853f, 0.224964f, 0.679885f, 0.349254f, 0.914109f, 0.002831f, 0.007264f, 0.003056f, 0.021324f, 0.352873f, 1.106601f, 0.473315f, 1.349820f, +0.244015f, 0.451897f, 0.004670f, 1.004939f, 0.507310f, 1.822971f, 0.645083f, 2.251791f, 0.170648f, 0.520625f, 0.150862f, 1.403999f, 0.653749f, 2.437621f, 0.718216f, 2.731727f, +0.006148f, 0.013369f, 0.000218f, 0.030762f, 0.012640f, 0.053331f, 0.029773f, 0.068162f, 0.003205f, 0.011481f, 0.005249f, 0.032036f, 0.021536f, 0.094280f, 0.043825f, 0.109322f, +0.260869f, 0.441582f, 0.006204f, 1.085446f, 0.842880f, 2.768458f, 1.331841f, 3.779928f, 0.237860f, 0.663300f, 0.261302f, 1.977194f, 0.871007f, 2.968538f, 1.189078f, 3.677147f, +0.332967f, 0.500516f, 0.007748f, 0.998242f, 0.605127f, 1.765006f, 0.935554f, 1.955297f, 0.008832f, 0.021872f, 0.009493f, 0.052898f, 0.995055f, 3.011591f, 1.329144f, 3.026810f, +0.665599f, 1.189628f, 0.012686f, 2.179794f, 1.713424f, 5.942204f, 2.169701f, 6.047837f, 0.668438f, 1.968160f, 0.588481f, 4.373276f, 2.314712f, 8.329684f, 2.532412f, 7.691372f, +0.198616f, 0.416796f, 0.007012f, 0.790209f, 0.505596f, 2.058718f, 1.185930f, 2.168020f, 0.148684f, 0.514013f, 0.242469f, 1.181775f, 0.903010f, 3.815349f, 1.829994f, 3.645217f, +0.682218f, 1.114521f, 0.016157f, 2.257302f, 2.729373f, 8.651895f, 4.294795f, 9.733322f, 0.893277f, 2.404092f, 0.977241f, 5.904659f, 2.956740f, 9.725466f, 4.019713f, 9.926201f, +0.204182f, 0.337654f, 0.004177f, 0.764023f, 0.252587f, 0.810490f, 0.343332f, 1.018664f, 0.005919f, 0.016126f, 0.005594f, 0.044249f, 0.552300f, 1.838913f, 0.648606f, 2.096850f, +0.421929f, 0.829613f, 0.007070f, 1.724632f, 0.739331f, 2.820713f, 0.823105f, 3.257079f, 0.463099f, 1.500065f, 0.358448f, 3.781581f, 1.328113f, 5.257798f, 1.277478f, 5.508027f, +0.134520f, 0.310552f, 0.004175f, 0.667988f, 0.233090f, 1.044129f, 0.480684f, 1.247491f, 0.110058f, 0.418572f, 0.157796f, 1.091810f, 0.553575f, 2.573092f, 0.986312f, 2.789083f, +0.571298f, 1.026750f, 0.011896f, 2.359299f, 1.555783f, 5.425439f, 2.152334f, 6.924710f, 0.817543f, 2.420544f, 0.786334f, 6.744870f, 2.241111f, 8.109575f, 2.678714f, 9.390476f, +0.365786f, 0.501857f, 0.007660f, 1.175156f, 0.624813f, 1.663362f, 0.869360f, 2.163469f, 0.009545f, 0.021575f, 0.009234f, 0.061263f, 1.107344f, 3.058922f, 1.331174f, 3.609568f, +0.765404f, 1.248610f, 0.013128f, 2.686139f, 1.851918f, 5.861942f, 2.110493f, 7.004731f, 0.756200f, 2.032233f, 0.599150f, 5.301726f, 2.696410f, 8.856344f, 2.654913f, 9.601237f, +0.316307f, 0.605835f, 0.010050f, 1.348559f, 0.756791f, 2.812588f, 1.597563f, 3.477522f, 0.232946f, 0.735026f, 0.341880f, 1.984085f, 1.456788f, 5.617918f, 2.656934f, 6.301762f, +0.623636f, 0.929893f, 0.013292f, 2.211219f, 2.345033f, 6.784760f, 3.320899f, 8.961521f, 0.803324f, 1.973301f, 0.790922f, 5.690290f, 2.737986f, 8.219883f, 3.349965f, 9.849988f, +0.161370f, 0.248620f, 0.004192f, 0.665793f, 0.189552f, 0.566667f, 0.327148f, 0.842904f, 0.003525f, 0.008948f, 0.004230f, 0.029058f, 0.409473f, 1.270203f, 0.610581f, 1.714140f, +0.153272f, 0.280776f, 0.003261f, 0.690794f, 0.255022f, 0.906482f, 0.360500f, 1.238783f, 0.126773f, 0.382582f, 0.124592f, 1.141445f, 0.452591f, 1.669305f, 0.552758f, 2.069640f, +0.005148f, 0.011072f, 0.000203f, 0.028187f, 0.008470f, 0.035349f, 0.022178f, 0.049983f, 0.003174f, 0.011246f, 0.005778f, 0.034718f, 0.019873f, 0.086061f, 0.044959f, 0.110403f, +0.165676f, 0.277410f, 0.004380f, 0.754409f, 0.428410f, 1.391897f, 0.752544f, 2.102523f, 0.178663f, 0.492833f, 0.218194f, 1.625276f, 0.609686f, 2.055427f, 0.925295f, 2.816816f, +0.223397f, 0.332176f, 0.005779f, 0.732949f, 0.324922f, 0.937464f, 0.558454f, 1.148971f, 0.007008f, 0.017168f, 0.008375f, 0.045937f, 0.735819f, 2.202899f, 1.092650f, 2.449469f, +0.266425f, 0.471030f, 0.005645f, 0.954862f, 0.548890f, 1.882969f, 0.772691f, 2.120233f, 0.316448f, 0.921673f, 0.309713f, 2.265746f, 1.021193f, 3.635084f, 1.242026f, 3.713451f, +0.105973f, 0.219978f, 0.004159f, 0.461408f, 0.215895f, 0.869582f, 0.562967f, 1.013129f, 0.093826f, 0.320855f, 0.170099f, 0.816125f, 0.531033f, 2.219413f, 1.196366f, 2.345932f, +0.276106f, 0.446185f, 0.007269f, 0.999781f, 0.884043f, 2.772022f, 1.546459f, 3.450123f, 0.427580f, 1.138301f, 0.520018f, 3.093064f, 1.318906f, 4.291274f, 1.993340f, 4.845592f, +0.013541f, 0.022151f, 0.000308f, 0.055452f, 0.013406f, 0.042552f, 0.020258f, 0.059169f, 0.000464f, 0.001251f, 0.000488f, 0.003798f, 0.040371f, 0.132962f, 0.052706f, 0.167735f, +0.016694f, 0.032470f, 0.000311f, 0.074678f, 0.023411f, 0.088353f, 0.028975f, 0.112871f, 0.021671f, 0.069438f, 0.018648f, 0.193663f, 0.057918f, 0.226808f, 0.061933f, 0.262869f, +0.007095f, 0.016202f, 0.000245f, 0.038555f, 0.009839f, 0.043595f, 0.022555f, 0.057625f, 0.006865f, 0.025827f, 0.010942f, 0.074531f, 0.032179f, 0.147954f, 0.063738f, 0.177428f, +0.022855f, 0.040631f, 0.000529f, 0.103292f, 0.049811f, 0.171826f, 0.076608f, 0.242630f, 0.038682f, 0.113289f, 0.041361f, 0.349250f, 0.098817f, 0.353706f, 0.131305f, 0.453127f, +0.342361f, 0.464635f, 0.007970f, 1.203693f, 0.468021f, 1.232473f, 0.723936f, 1.773492f, 0.010566f, 0.023624f, 0.011363f, 0.074216f, 1.142322f, 3.121399f, 1.526602f, 4.074966f, +0.427401f, 0.689678f, 0.008150f, 1.641481f, 0.827608f, 2.591310f, 1.048509f, 3.425763f, 0.499413f, 1.327614f, 0.439890f, 3.831807f, 1.659506f, 5.391662f, 1.816473f, 6.466709f, +0.235435f, 0.446059f, 0.008316f, 1.098489f, 0.450813f, 1.657303f, 1.057948f, 2.267011f, 0.205068f, 0.640057f, 0.334581f, 1.911457f, 1.195109f, 4.558915f, 2.423134f, 5.657650f, +0.352100f, 0.519329f, 0.008343f, 1.366247f, 1.059599f, 3.032509f, 1.668145f, 4.431364f, 0.536419f, 1.303412f, 0.587128f, 4.158251f, 1.703783f, 5.059687f, 2.317441f, 6.707825f, +0.327716f, 0.952929f, 0.010939f, 1.526789f, 0.302248f, 1.705338f, 0.670323f, 1.517670f, 0.005919f, 0.028355f, 0.009127f, 0.055093f, 0.618189f, 3.619232f, 1.184523f, 2.922174f, +0.321565f, 1.111769f, 0.008791f, 1.636512f, 0.420090f, 2.818201f, 0.763089f, 2.304225f, 0.219901f, 1.252489f, 0.277713f, 2.235737f, 0.705880f, 4.913707f, 1.107812f, 3.644894f, +0.013057f, 0.053004f, 0.000661f, 0.080728f, 0.016868f, 0.132862f, 0.056756f, 0.112400f, 0.006656f, 0.044511f, 0.015570f, 0.082210f, 0.037472f, 0.306262f, 0.108933f, 0.235062f, +0.304610f, 0.962620f, 0.010349f, 1.566235f, 0.618449f, 3.792271f, 1.395987f, 3.427280f, 0.271592f, 1.413931f, 0.426216f, 2.789796f, 0.833318f, 5.302185f, 1.625138f, 4.347383f, +0.598516f, 1.679634f, 0.019895f, 2.217365f, 0.683499f, 3.721864f, 1.509562f, 2.729176f, 0.015524f, 0.071772f, 0.023838f, 0.114900f, 1.465512f, 8.280586f, 2.796436f, 5.508774f, +0.737401f, 2.460514f, 0.020077f, 2.984245f, 1.192817f, 7.722882f, 2.157741f, 5.202792f, 0.724148f, 3.980608f, 0.910728f, 5.854642f, 2.101149f, 14.115990f, 3.283860f, 8.627622f, +0.354598f, 1.389210f, 0.017883f, 1.743374f, 0.567208f, 4.311796f, 1.900587f, 3.005586f, 0.259573f, 1.675301f, 0.604703f, 2.549515f, 1.320936f, 10.419490f, 3.824102f, 6.589314f, +0.669706f, 2.042546f, 0.022658f, 2.738280f, 1.683608f, 9.963509f, 3.784523f, 7.419365f, 0.857474f, 4.308332f, 1.340070f, 7.004183f, 2.378166f, 14.603680f, 4.618646f, 9.865965f, +0.226474f, 0.699190f, 0.006619f, 1.047212f, 0.176047f, 1.054601f, 0.341840f, 0.877357f, 0.006420f, 0.032653f, 0.008667f, 0.059306f, 0.501931f, 3.119988f, 0.842055f, 2.354852f, +0.288441f, 1.058807f, 0.006904f, 1.456942f, 0.317595f, 2.262129f, 0.505104f, 1.728985f, 0.309575f, 1.872085f, 0.342301f, 3.123874f, 0.743912f, 5.498108f, 1.022187f, 3.812501f, +0.148195f, 0.638711f, 0.006571f, 0.909377f, 0.161357f, 1.349404f, 0.475352f, 1.067161f, 0.118562f, 0.841811f, 0.242833f, 1.453438f, 0.499680f, 4.336049f, 1.271806f, 3.111035f, +0.346059f, 1.161114f, 0.010293f, 1.766029f, 0.592180f, 3.855339f, 1.170321f, 3.257121f, 0.484253f, 2.676688f, 0.665364f, 4.936998f, 1.112292f, 7.514095f, 1.899208f, 5.759314f, +0.698362f, 1.788778f, 0.020892f, 2.772533f, 0.749585f, 3.725469f, 1.489914f, 3.207373f, 0.017820f, 0.075196f, 0.024626f, 0.141337f, 1.732227f, 8.933331f, 2.974728f, 6.977579f, +0.900663f, 2.742971f, 0.022069f, 3.905956f, 1.369338f, 8.091951f, 2.229273f, 6.400409f, 0.870127f, 4.365582f, 0.984853f, 7.538598f, 2.599713f, 15.941050f, 3.656626f, 11.439170f, +0.599804f, 2.144759f, 0.027223f, 3.160083f, 0.901767f, 6.256728f, 2.719361f, 5.120539f, 0.431947f, 2.544490f, 0.905607f, 4.546352f, 2.263422f, 16.295490f, 5.897128f, 12.099260f, +0.650237f, 1.810075f, 0.019798f, 2.849049f, 1.536409f, 8.298802f, 3.108166f, 7.255498f, 0.819042f, 3.756049f, 1.151965f, 7.169303f, 2.339053f, 13.109840f, 4.088271f, 10.398530f, +0.118105f, 0.311436f, 0.003753f, 0.737513f, 0.145944f, 0.746739f, 0.308106f, 0.982243f, 0.002135f, 0.009276f, 0.003134f, 0.026638f, 0.238233f, 1.264835f, 0.434528f, 1.509407f, +0.125840f, 0.394549f, 0.003275f, 0.858399f, 0.220264f, 1.340016f, 0.380864f, 1.619369f, 0.086137f, 0.444912f, 0.103551f, 1.173826f, 0.295387f, 1.864686f, 0.441286f, 2.044394f, +0.004475f, 0.016472f, 0.000216f, 0.037080f, 0.007745f, 0.055320f, 0.024806f, 0.069173f, 0.002283f, 0.013846f, 0.005084f, 0.037797f, 0.013731f, 0.101774f, 0.037998f, 0.115454f, +0.133012f, 0.381186f, 0.004302f, 0.916689f, 0.361827f, 2.012021f, 0.777449f, 2.687608f, 0.118707f, 0.560433f, 0.177330f, 1.634370f, 0.389105f, 2.245157f, 0.722336f, 2.720839f, +0.254496f, 0.647674f, 0.008053f, 1.263753f, 0.389398f, 1.922885f, 0.818655f, 2.084046f, 0.006607f, 0.027702f, 0.009658f, 0.065547f, 0.666353f, 3.414385f, 1.210357f, 3.357294f, +0.340477f, 1.030260f, 0.008824f, 1.846879f, 0.737920f, 4.332628f, 1.270658f, 4.314113f, 0.334676f, 1.668335f, 0.400664f, 3.626746f, 1.037411f, 6.320360f, 1.543379f, 5.709587f, +0.143373f, 0.509373f, 0.006883f, 0.944803f, 0.307273f, 2.118249f, 0.980085f, 2.182383f, 0.105052f, 0.614856f, 0.232959f, 1.382997f, 0.571114f, 4.085299f, 1.573853f, 3.818569f, +0.345036f, 0.954307f, 0.011112f, 1.890938f, 1.162176f, 6.237056f, 2.486770f, 6.864629f, 0.442195f, 2.014830f, 0.657830f, 4.841386f, 1.310182f, 7.296059f, 2.422132f, 7.285319f, +0.093931f, 0.262980f, 0.002613f, 0.582163f, 0.097829f, 0.531455f, 0.180825f, 0.653487f, 0.002665f, 0.012293f, 0.003425f, 0.033001f, 0.222610f, 1.254843f, 0.355496f, 1.399856f, +0.129905f, 0.432437f, 0.002960f, 0.879491f, 0.191644f, 1.237869f, 0.290132f, 1.398400f, 0.139556f, 0.765323f, 0.146888f, 1.887537f, 0.358262f, 2.401204f, 0.468601f, 2.460984f, +0.058446f, 0.228432f, 0.002467f, 0.480706f, 0.085262f, 0.646615f, 0.239098f, 0.755817f, 0.046803f, 0.301357f, 0.091249f, 0.769034f, 0.210726f, 1.658276f, 0.510553f, 1.758533f, +0.173906f, 0.529147f, 0.004924f, 1.189549f, 0.398722f, 2.354047f, 0.750092f, 2.939472f, 0.243585f, 1.220990f, 0.318589f, 3.328590f, 0.597714f, 3.661743f, 0.971496f, 4.148250f, +0.386848f, 0.898572f, 0.011016f, 2.058525f, 0.556329f, 2.507427f, 1.052606f, 3.190655f, 0.009881f, 0.037810f, 0.012997f, 0.105038f, 1.026063f, 4.798655f, 1.677299f, 5.539796f, +0.541753f, 1.496225f, 0.012636f, 3.149097f, 1.103572f, 5.913980f, 1.710201f, 6.913809f, 0.523883f, 2.383585f, 0.564439f, 6.083619f, 1.672146f, 9.298273f, 2.238840f, 9.861945f, +0.315933f, 1.024474f, 0.013650f, 2.231023f, 0.636401f, 4.004245f, 1.826828f, 4.843643f, 0.227735f, 1.216567f, 0.454498f, 3.212782f, 1.274856f, 8.323376f, 3.161767f, 9.134267f, +0.436422f, 1.101710f, 0.012649f, 2.563031f, 1.381632f, 6.767642f, 2.660623f, 8.745250f, 0.550241f, 2.288311f, 0.736683f, 6.455706f, 1.678743f, 8.532534f, 2.793042f, 10.003120f, +0.409265f, 0.637447f, 0.009563f, 1.542972f, 0.424646f, 1.283367f, 0.659260f, 1.725493f, 0.007442f, 0.019095f, 0.008032f, 0.056049f, 0.608718f, 1.908922f, 0.816484f, 2.328484f, +0.446786f, 0.827413f, 0.008551f, 1.840018f, 0.656644f, 2.359590f, 0.834972f, 2.914640f, 0.307578f, 0.938378f, 0.271915f, 2.530581f, 0.773303f, 2.883402f, 0.849560f, 3.231292f, +0.015501f, 0.033704f, 0.000550f, 0.077553f, 0.022528f, 0.095046f, 0.053062f, 0.121478f, 0.007954f, 0.028493f, 0.013026f, 0.079506f, 0.035075f, 0.153554f, 0.071377f, 0.178052f, +0.488528f, 0.826947f, 0.011618f, 2.032708f, 1.115853f, 3.665042f, 1.763167f, 5.004085f, 0.438489f, 1.222777f, 0.481705f, 3.644910f, 1.053768f, 3.591417f, 1.438579f, 4.448712f, +0.826135f, 1.241844f, 0.019224f, 2.476768f, 1.061378f, 3.095779f, 1.640940f, 3.429545f, 0.021572f, 0.053420f, 0.023187f, 0.129200f, 1.594973f, 4.827273f, 2.130483f, 4.851669f, +1.132410f, 2.023963f, 0.021582f, 3.708572f, 2.060774f, 7.146823f, 2.609549f, 7.273870f, 1.119497f, 3.296267f, 0.985585f, 7.324345f, 2.544162f, 9.155376f, 2.783441f, 8.453790f, +0.465272f, 0.976374f, 0.016426f, 1.851119f, 0.837280f, 3.409287f, 1.963928f, 3.590293f, 0.342869f, 1.185325f, 0.559138f, 2.725196f, 1.366600f, 5.774083f, 2.769482f, 5.516609f, +1.187132f, 1.939383f, 0.028115f, 3.927942f, 3.357474f, 10.642930f, 5.283141f, 11.973220f, 1.530144f, 4.118104f, 1.673971f, 10.114420f, 3.323879f, 10.933080f, 4.518841f, 11.158740f, +0.317742f, 0.525447f, 0.006500f, 1.188950f, 0.277870f, 0.891617f, 0.377698f, 1.120629f, 0.009068f, 0.024703f, 0.008569f, 0.067784f, 0.555250f, 1.848734f, 0.652070f, 2.108049f, +0.450233f, 0.885267f, 0.007544f, 1.840327f, 0.557714f, 2.127805f, 0.620909f, 2.456977f, 0.486455f, 1.575721f, 0.376526f, 3.972306f, 0.915567f, 3.624590f, 0.880660f, 3.797091f, +0.197645f, 0.456282f, 0.006135f, 0.981451f, 0.242102f, 1.084496f, 0.499268f, 1.295720f, 0.159182f, 0.605397f, 0.228226f, 1.579128f, 0.525451f, 2.442370f, 0.936204f, 2.647388f, +0.623513f, 1.120592f, 0.012983f, 2.574932f, 1.200345f, 4.185930f, 1.660607f, 5.342674f, 0.878343f, 2.600558f, 0.844813f, 7.246480f, 1.580166f, 5.717911f, 1.888712f, 6.621050f, +0.953460f, 1.308144f, 0.019967f, 3.063171f, 1.151331f, 3.065045f, 1.601953f, 3.986583f, 0.024493f, 0.055359f, 0.023693f, 0.157198f, 1.864726f, 5.151109f, 2.241646f, 6.078375f, +1.368070f, 2.231744f, 0.023466f, 4.801158f, 2.339988f, 7.406847f, 2.666710f, 8.850816f, 1.330531f, 3.575705f, 1.054201f, 9.328364f, 3.113579f, 10.226530f, 3.065663f, 11.086670f, +0.778443f, 1.490985f, 0.024733f, 3.318859f, 1.316646f, 4.893268f, 2.779400f, 6.050104f, 0.564346f, 1.780705f, 0.828253f, 4.806729f, 2.316176f, 8.932038f, 4.224312f, 10.019290f, +1.140074f, 1.699945f, 0.024300f, 4.042348f, 3.030576f, 8.768203f, 4.291724f, 11.581320f, 1.445651f, 3.551124f, 1.423332f, 10.240170f, 3.233625f, 9.707871f, 3.956386f, 11.633060f, +}; + +static const float acceptor_me2x3acc5[16384] = { +0.008602f, 0.120562f, 0.000004f, 0.088841f, 0.006477f, 0.082328f, 0.000037f, 0.118493f, 0.001347f, 0.025650f, 0.000001f, 0.015608f, 0.008022f, 0.078301f, 0.000010f, 0.090715f, +0.010717f, 0.107252f, 0.000027f, 0.114039f, 0.009805f, 0.088994f, 0.000306f, 0.184821f, 0.014729f, 0.200307f, 0.000034f, 0.175875f, 0.015004f, 0.104573f, 0.000102f, 0.174814f, +0.000185f, 0.002733f, 0.000003f, 0.002511f, 0.000165f, 0.002207f, 0.000029f, 0.003960f, 0.000166f, 0.003334f, 0.000002f, 0.002529f, 0.000169f, 0.001734f, 0.000006f, 0.002506f, +0.016295f, 0.164076f, 0.000076f, 0.163631f, 0.022160f, 0.202370f, 0.001262f, 0.394198f, 0.024095f, 0.329686f, 0.000100f, 0.271508f, 0.044644f, 0.313057f, 0.000552f, 0.490855f, +0.008946f, 0.158159f, 0.000028f, 0.102085f, 0.007099f, 0.113822f, 0.000273f, 0.143496f, 0.001548f, 0.037199f, 0.000004f, 0.019827f, 0.006010f, 0.073990f, 0.000050f, 0.075084f, +0.039041f, 0.492823f, 0.000677f, 0.458992f, 0.037644f, 0.430968f, 0.007995f, 0.783978f, 0.059318f, 1.017524f, 0.000920f, 0.782561f, 0.039372f, 0.346120f, 0.001816f, 0.506814f, +0.005822f, 0.108483f, 0.000565f, 0.087313f, 0.005463f, 0.092327f, 0.006493f, 0.145141f, 0.005778f, 0.146299f, 0.000501f, 0.097233f, 0.003822f, 0.049596f, 0.000987f, 0.062757f, +0.030575f, 0.388328f, 0.000968f, 0.339225f, 0.043822f, 0.504779f, 0.016995f, 0.861261f, 0.049981f, 0.862619f, 0.001415f, 0.622252f, 0.060339f, 0.533702f, 0.005083f, 0.732984f, +0.000359f, 0.005415f, 0.000002f, 0.003942f, 0.000256f, 0.003504f, 0.000022f, 0.004983f, 0.000085f, 0.001739f, 0.000001f, 0.001045f, 0.000352f, 0.003695f, 0.000006f, 0.004230f, +0.000553f, 0.005954f, 0.000021f, 0.006254f, 0.000480f, 0.004682f, 0.000224f, 0.009606f, 0.001147f, 0.016780f, 0.000039f, 0.014555f, 0.000814f, 0.006100f, 0.000083f, 0.010074f, +0.000241f, 0.003826f, 0.000051f, 0.003473f, 0.000203f, 0.002928f, 0.000532f, 0.005191f, 0.000326f, 0.007042f, 0.000062f, 0.005279f, 0.000231f, 0.002551f, 0.000131f, 0.003641f, +0.000820f, 0.008879f, 0.000057f, 0.008748f, 0.001057f, 0.010378f, 0.000902f, 0.019971f, 0.001830f, 0.026922f, 0.000114f, 0.021903f, 0.002361f, 0.017801f, 0.000438f, 0.027573f, +0.021364f, 0.239581f, 0.000096f, 0.128936f, 0.017000f, 0.172901f, 0.000940f, 0.181746f, 0.003462f, 0.052760f, 0.000014f, 0.023447f, 0.022598f, 0.176486f, 0.000271f, 0.149328f, +0.047887f, 0.383459f, 0.001195f, 0.297774f, 0.046302f, 0.336264f, 0.014157f, 0.510028f, 0.068124f, 0.741289f, 0.001521f, 0.475352f, 0.076044f, 0.424067f, 0.005050f, 0.517738f, +0.014583f, 0.172370f, 0.002037f, 0.115672f, 0.013723f, 0.147108f, 0.023477f, 0.192819f, 0.013550f, 0.217648f, 0.001693f, 0.120609f, 0.015074f, 0.124085f, 0.005601f, 0.130917f, +0.098077f, 0.790184f, 0.004471f, 0.575534f, 0.140961f, 1.030003f, 0.078699f, 1.465299f, 0.150112f, 1.643474f, 0.006120f, 0.988473f, 0.304774f, 1.710042f, 0.036958f, 1.958200f, +0.023448f, 0.425815f, 0.000002f, 0.192385f, 0.012998f, 0.214069f, 0.000013f, 0.188907f, 0.002727f, 0.067285f, 0.000000f, 0.025103f, 0.014411f, 0.182256f, 0.000003f, 0.129462f, +0.030761f, 0.398883f, 0.000014f, 0.260041f, 0.020720f, 0.243668f, 0.000114f, 0.310269f, 0.031400f, 0.553300f, 0.000013f, 0.297863f, 0.028383f, 0.256310f, 0.000034f, 0.262706f, +0.000456f, 0.008735f, 0.000001f, 0.004921f, 0.000299f, 0.005193f, 0.000009f, 0.005715f, 0.000304f, 0.007914f, 0.000001f, 0.003682f, 0.000274f, 0.003654f, 0.000002f, 0.003236f, +0.038821f, 0.506482f, 0.000032f, 0.309696f, 0.038868f, 0.459901f, 0.000391f, 0.549263f, 0.042634f, 0.755866f, 0.000031f, 0.381658f, 0.070094f, 0.636866f, 0.000153f, 0.612247f, +0.042977f, 0.984438f, 0.000024f, 0.389587f, 0.025107f, 0.521579f, 0.000170f, 0.403164f, 0.005525f, 0.171969f, 0.000003f, 0.056199f, 0.019026f, 0.303508f, 0.000028f, 0.188840f, +0.197487f, 3.230102f, 0.000605f, 1.844500f, 0.140188f, 2.079541f, 0.005261f, 2.319395f, 0.222858f, 4.953288f, 0.000611f, 2.335689f, 0.131253f, 1.495054f, 0.001070f, 1.342229f, +0.025313f, 0.611129f, 0.000434f, 0.301575f, 0.017488f, 0.382910f, 0.003672f, 0.369067f, 0.018658f, 0.612116f, 0.000286f, 0.249434f, 0.010951f, 0.184127f, 0.000499f, 0.142852f, +0.128371f, 2.112536f, 0.000718f, 1.131464f, 0.135454f, 2.021639f, 0.009282f, 2.114878f, 0.155856f, 3.485356f, 0.000780f, 1.541498f, 0.166957f, 1.913410f, 0.002485f, 1.611210f, +0.012231f, 0.238836f, 0.000015f, 0.106602f, 0.006425f, 0.113788f, 0.000096f, 0.099199f, 0.002146f, 0.056951f, 0.000002f, 0.020990f, 0.007899f, 0.107416f, 0.000026f, 0.075377f, +0.019831f, 0.276523f, 0.000134f, 0.178091f, 0.012659f, 0.160084f, 0.001046f, 0.201374f, 0.030548f, 0.578822f, 0.000184f, 0.307833f, 0.019227f, 0.186706f, 0.000345f, 0.189050f, +0.007420f, 0.152711f, 0.000280f, 0.084993f, 0.004609f, 0.086040f, 0.002131f, 0.093531f, 0.007465f, 0.208790f, 0.000252f, 0.095957f, 0.004683f, 0.067119f, 0.000470f, 0.058730f, +0.024396f, 0.342259f, 0.000300f, 0.206747f, 0.023147f, 0.294523f, 0.003492f, 0.347495f, 0.040430f, 0.770787f, 0.000445f, 0.384484f, 0.046285f, 0.452216f, 0.001517f, 0.429475f, +0.062235f, 0.904301f, 0.000050f, 0.298389f, 0.036458f, 0.480456f, 0.000356f, 0.309648f, 0.007491f, 0.147907f, 0.000005f, 0.040301f, 0.043384f, 0.439010f, 0.000092f, 0.227747f, +0.146893f, 1.524084f, 0.000648f, 0.725647f, 0.104564f, 0.983940f, 0.005649f, 0.915019f, 0.155205f, 2.188273f, 0.000612f, 0.860354f, 0.153727f, 1.110782f, 0.001804f, 0.831482f, +0.038448f, 0.588839f, 0.000949f, 0.242278f, 0.026636f, 0.369973f, 0.008052f, 0.297325f, 0.026534f, 0.552221f, 0.000586f, 0.187624f, 0.026192f, 0.279358f, 0.001720f, 0.180711f, +0.249707f, 2.606741f, 0.002011f, 1.164096f, 0.264217f, 2.501532f, 0.026066f, 2.181934f, 0.283858f, 4.026763f, 0.002045f, 1.484930f, 0.511382f, 3.717760f, 0.010958f, 2.610235f, +0.000101f, 0.001933f, 0.000000f, 0.001329f, 0.000074f, 0.001281f, 0.000000f, 0.001721f, 0.000012f, 0.000312f, 0.000000f, 0.000177f, 0.000087f, 0.001166f, 0.000000f, 0.001261f, +0.000144f, 0.001961f, 0.000000f, 0.001946f, 0.000127f, 0.001579f, 0.000001f, 0.003061f, 0.000150f, 0.002782f, 0.000000f, 0.002280f, 0.000187f, 0.001776f, 0.000000f, 0.002771f, +0.000002f, 0.000050f, 0.000000f, 0.000043f, 0.000002f, 0.000039f, 0.000000f, 0.000066f, 0.000002f, 0.000047f, 0.000000f, 0.000033f, 0.000002f, 0.000030f, 0.000000f, 0.000040f, +0.000221f, 0.003041f, 0.000000f, 0.002831f, 0.000292f, 0.003639f, 0.000005f, 0.006617f, 0.000248f, 0.004640f, 0.000000f, 0.003567f, 0.000563f, 0.005389f, 0.000002f, 0.007887f, +0.000092f, 0.002219f, 0.000000f, 0.001337f, 0.000071f, 0.001550f, 0.000001f, 0.001824f, 0.000012f, 0.000396f, 0.000000f, 0.000197f, 0.000057f, 0.000964f, 0.000000f, 0.000913f, +0.000458f, 0.007888f, 0.000002f, 0.006857f, 0.000428f, 0.006694f, 0.000028f, 0.011366f, 0.000528f, 0.012368f, 0.000002f, 0.008879f, 0.000429f, 0.005145f, 0.000006f, 0.007033f, +0.000069f, 0.001748f, 0.000002f, 0.001314f, 0.000063f, 0.001444f, 0.000022f, 0.002119f, 0.000052f, 0.001791f, 0.000001f, 0.001111f, 0.000042f, 0.000742f, 0.000003f, 0.000877f, +0.000363f, 0.006299f, 0.000003f, 0.005137f, 0.000505f, 0.007946f, 0.000059f, 0.012656f, 0.000451f, 0.010627f, 0.000004f, 0.007156f, 0.000666f, 0.008041f, 0.000017f, 0.010309f, +0.000063f, 0.001291f, 0.000000f, 0.000877f, 0.000043f, 0.000811f, 0.000001f, 0.001076f, 0.000011f, 0.000315f, 0.000000f, 0.000177f, 0.000057f, 0.000818f, 0.000000f, 0.000874f, +0.000110f, 0.001619f, 0.000001f, 0.001587f, 0.000093f, 0.001235f, 0.000013f, 0.002366f, 0.000174f, 0.003465f, 0.000002f, 0.002805f, 0.000151f, 0.001540f, 0.000005f, 0.002374f, +0.000048f, 0.001047f, 0.000003f, 0.000887f, 0.000040f, 0.000778f, 0.000031f, 0.001287f, 0.000050f, 0.001464f, 0.000003f, 0.001025f, 0.000043f, 0.000649f, 0.000007f, 0.000864f, +0.000165f, 0.002447f, 0.000003f, 0.002250f, 0.000207f, 0.002775f, 0.000053f, 0.004985f, 0.000280f, 0.005634f, 0.000005f, 0.004279f, 0.000442f, 0.004556f, 0.000025f, 0.006587f, +0.000256f, 0.003928f, 0.000000f, 0.001973f, 0.000198f, 0.002751f, 0.000003f, 0.002699f, 0.000032f, 0.000657f, 0.000000f, 0.000272f, 0.000252f, 0.002687f, 0.000001f, 0.002122f, +0.000656f, 0.007170f, 0.000005f, 0.005197f, 0.000615f, 0.006102f, 0.000057f, 0.008639f, 0.000708f, 0.010527f, 0.000005f, 0.006301f, 0.000967f, 0.007365f, 0.000019f, 0.008393f, +0.000201f, 0.003246f, 0.000008f, 0.002033f, 0.000184f, 0.002688f, 0.000095f, 0.003289f, 0.000142f, 0.003112f, 0.000005f, 0.001610f, 0.000193f, 0.002170f, 0.000022f, 0.002137f, +0.001361f, 0.014975f, 0.000019f, 0.010182f, 0.001899f, 0.018943f, 0.000321f, 0.025155f, 0.001582f, 0.023655f, 0.000020f, 0.013280f, 0.003929f, 0.030101f, 0.000144f, 0.032175f, +0.048065f, 0.526778f, 0.000004f, 0.266123f, 0.027016f, 0.268534f, 0.000026f, 0.264972f, 0.004941f, 0.073584f, 0.000000f, 0.030697f, 0.029348f, 0.224004f, 0.000006f, 0.177918f, +0.062734f, 0.490950f, 0.000027f, 0.357880f, 0.042847f, 0.304108f, 0.000226f, 0.432986f, 0.056609f, 0.602015f, 0.000022f, 0.362382f, 0.057507f, 0.313418f, 0.000066f, 0.359197f, +0.000683f, 0.007895f, 0.000002f, 0.004974f, 0.000454f, 0.004760f, 0.000013f, 0.005856f, 0.000403f, 0.006324f, 0.000001f, 0.003289f, 0.000408f, 0.003281f, 0.000003f, 0.003249f, +0.070282f, 0.553397f, 0.000055f, 0.378366f, 0.071352f, 0.509538f, 0.000686f, 0.680450f, 0.068233f, 0.730084f, 0.000048f, 0.412199f, 0.126075f, 0.691333f, 0.000263f, 0.743139f, +0.076511f, 1.057705f, 0.000041f, 0.468043f, 0.045323f, 0.568246f, 0.000294f, 0.491136f, 0.008695f, 0.163336f, 0.000004f, 0.059684f, 0.033652f, 0.323977f, 0.000047f, 0.225394f, +0.349789f, 3.452846f, 0.001025f, 2.204672f, 0.251779f, 2.254072f, 0.009036f, 2.811121f, 0.348941f, 4.680688f, 0.000915f, 2.467946f, 0.230965f, 1.587759f, 0.001800f, 1.593892f, +0.032924f, 0.479731f, 0.000540f, 0.264707f, 0.023064f, 0.304791f, 0.004632f, 0.328483f, 0.021453f, 0.424770f, 0.000315f, 0.193544f, 0.014151f, 0.143598f, 0.000617f, 0.124573f, +0.201845f, 2.004687f, 0.001080f, 1.200571f, 0.215963f, 1.945295f, 0.014153f, 2.275472f, 0.216635f, 2.923780f, 0.001037f, 1.445922f, 0.260807f, 1.803920f, 0.003712f, 1.698502f, +0.033638f, 0.396442f, 0.000039f, 0.197856f, 0.017918f, 0.191521f, 0.000256f, 0.186694f, 0.005218f, 0.083567f, 0.000005f, 0.034440f, 0.021583f, 0.177140f, 0.000067f, 0.138993f, +0.054265f, 0.456663f, 0.000350f, 0.328859f, 0.035124f, 0.268072f, 0.002775f, 0.377060f, 0.073893f, 0.845016f, 0.000426f, 0.502503f, 0.052270f, 0.306330f, 0.000897f, 0.346826f, +0.014909f, 0.185199f, 0.000538f, 0.115254f, 0.009392f, 0.105805f, 0.004152f, 0.128608f, 0.013261f, 0.223837f, 0.000428f, 0.115029f, 0.009348f, 0.080868f, 0.000898f, 0.079123f, +0.059261f, 0.501765f, 0.000698f, 0.338914f, 0.057016f, 0.437829f, 0.008225f, 0.577615f, 0.086819f, 0.998931f, 0.000915f, 0.557165f, 0.111701f, 0.658656f, 0.003500f, 0.699447f, +0.123079f, 1.079335f, 0.000094f, 0.398228f, 0.073112f, 0.581482f, 0.000683f, 0.419041f, 0.013096f, 0.156059f, 0.000009f, 0.047547f, 0.085240f, 0.520577f, 0.000173f, 0.301973f, +0.289025f, 1.809827f, 0.001219f, 0.963514f, 0.208620f, 1.184776f, 0.010779f, 1.231976f, 0.269957f, 2.297126f, 0.001019f, 1.009868f, 0.300506f, 1.310460f, 0.003372f, 1.096863f, +0.055554f, 0.513486f, 0.001311f, 0.236238f, 0.039026f, 0.327145f, 0.011282f, 0.293972f, 0.033892f, 0.425696f, 0.000715f, 0.161726f, 0.037599f, 0.242025f, 0.002361f, 0.175060f, +0.436161f, 2.747941f, 0.003360f, 1.372153f, 0.467969f, 2.673960f, 0.044148f, 2.607925f, 0.438301f, 3.752500f, 0.003020f, 1.547302f, 0.887419f, 3.893656f, 0.018184f, 3.056753f, +0.016101f, 0.225655f, 0.000007f, 0.166283f, 0.010485f, 0.133276f, 0.000059f, 0.191823f, 0.002168f, 0.041288f, 0.000001f, 0.025124f, 0.012140f, 0.118493f, 0.000015f, 0.137278f, +0.019346f, 0.193609f, 0.000049f, 0.205861f, 0.015309f, 0.138948f, 0.000477f, 0.288567f, 0.022866f, 0.310968f, 0.000052f, 0.273038f, 0.021899f, 0.152627f, 0.000148f, 0.255145f, +0.000426f, 0.006296f, 0.000006f, 0.005785f, 0.000328f, 0.004398f, 0.000057f, 0.007892f, 0.000329f, 0.006605f, 0.000004f, 0.005012f, 0.000314f, 0.003231f, 0.000012f, 0.004667f, +0.028736f, 0.289346f, 0.000134f, 0.288562f, 0.033800f, 0.308668f, 0.001924f, 0.601256f, 0.036542f, 0.500003f, 0.000152f, 0.411769f, 0.063654f, 0.446359f, 0.000787f, 0.699866f, +0.012635f, 0.223371f, 0.000040f, 0.144177f, 0.008672f, 0.139038f, 0.000333f, 0.175286f, 0.001881f, 0.045182f, 0.000005f, 0.024082f, 0.006863f, 0.084488f, 0.000057f, 0.085737f, +0.053179f, 0.671292f, 0.000922f, 0.625209f, 0.044350f, 0.507735f, 0.009420f, 0.923626f, 0.069487f, 1.191965f, 0.001078f, 0.916721f, 0.043361f, 0.381185f, 0.002000f, 0.558158f, +0.010122f, 0.188602f, 0.000982f, 0.151797f, 0.008215f, 0.138831f, 0.009763f, 0.218245f, 0.008639f, 0.218738f, 0.000750f, 0.145377f, 0.005372f, 0.069713f, 0.001387f, 0.088214f, +0.040686f, 0.516739f, 0.001288f, 0.451398f, 0.050436f, 0.580958f, 0.019560f, 0.991240f, 0.057197f, 0.987163f, 0.001620f, 0.712093f, 0.064917f, 0.574194f, 0.005468f, 0.788597f, +0.000738f, 0.011118f, 0.000005f, 0.008093f, 0.000455f, 0.006223f, 0.000038f, 0.008848f, 0.000150f, 0.003070f, 0.000001f, 0.001845f, 0.000584f, 0.006134f, 0.000011f, 0.007021f, +0.001096f, 0.011790f, 0.000042f, 0.012384f, 0.000822f, 0.008018f, 0.000384f, 0.016451f, 0.001954f, 0.028575f, 0.000067f, 0.024786f, 0.001303f, 0.009766f, 0.000132f, 0.016128f, +0.000609f, 0.009669f, 0.000130f, 0.008777f, 0.000444f, 0.006400f, 0.001162f, 0.011347f, 0.000709f, 0.015306f, 0.000135f, 0.011473f, 0.000471f, 0.005213f, 0.000268f, 0.007440f, +0.001586f, 0.017175f, 0.000111f, 0.016921f, 0.001768f, 0.017363f, 0.001510f, 0.033413f, 0.003044f, 0.044787f, 0.000190f, 0.036437f, 0.003692f, 0.027840f, 0.000685f, 0.043124f, +0.036557f, 0.409966f, 0.000165f, 0.220633f, 0.025160f, 0.255895f, 0.001391f, 0.268986f, 0.005095f, 0.077642f, 0.000021f, 0.034505f, 0.031265f, 0.244170f, 0.000375f, 0.206597f, +0.079031f, 0.632848f, 0.001973f, 0.491437f, 0.066093f, 0.479992f, 0.020208f, 0.728026f, 0.096689f, 1.052124f, 0.002159f, 0.674674f, 0.101469f, 0.565852f, 0.006739f, 0.690842f, +0.030718f, 0.363083f, 0.004291f, 0.243655f, 0.025001f, 0.268012f, 0.042772f, 0.351291f, 0.024547f, 0.394273f, 0.003067f, 0.218487f, 0.025673f, 0.211327f, 0.009540f, 0.222961f, +0.158125f, 1.273972f, 0.007208f, 0.927903f, 0.196564f, 1.436292f, 0.109742f, 2.043291f, 0.208135f, 2.278732f, 0.008486f, 1.370551f, 0.397281f, 2.229089f, 0.048176f, 2.552569f, +0.056658f, 1.028896f, 0.000005f, 0.464860f, 0.027164f, 0.447379f, 0.000027f, 0.394795f, 0.005666f, 0.139820f, 0.000000f, 0.052165f, 0.028154f, 0.356059f, 0.000006f, 0.252918f, +0.071688f, 0.929572f, 0.000032f, 0.606009f, 0.041763f, 0.491142f, 0.000230f, 0.625386f, 0.062932f, 1.108910f, 0.000025f, 0.596969f, 0.053479f, 0.482939f, 0.000064f, 0.494989f, +0.001357f, 0.025982f, 0.000003f, 0.014638f, 0.000770f, 0.013360f, 0.000024f, 0.014701f, 0.000778f, 0.020245f, 0.000002f, 0.009418f, 0.000659f, 0.008787f, 0.000004f, 0.007783f, +0.088380f, 1.153063f, 0.000073f, 0.705056f, 0.076533f, 0.905576f, 0.000770f, 1.081535f, 0.083473f, 1.479898f, 0.000061f, 0.747242f, 0.129021f, 1.172263f, 0.000282f, 1.126947f, +0.078359f, 1.794891f, 0.000043f, 0.710321f, 0.039593f, 0.822511f, 0.000269f, 0.635775f, 0.008663f, 0.269648f, 0.000004f, 0.088120f, 0.028047f, 0.447413f, 0.000041f, 0.278377f, +0.347276f, 5.680052f, 0.001064f, 3.243506f, 0.213216f, 3.162828f, 0.008002f, 3.527628f, 0.337026f, 7.490797f, 0.000924f, 3.532234f, 0.186610f, 2.125599f, 0.001521f, 1.908320f, +0.056813f, 1.371618f, 0.000974f, 0.676855f, 0.033947f, 0.743309f, 0.007129f, 0.716435f, 0.036013f, 1.181496f, 0.000552f, 0.481453f, 0.019872f, 0.334123f, 0.000906f, 0.259224f, +0.220524f, 3.629038f, 0.001234f, 1.943696f, 0.201256f, 3.003745f, 0.013792f, 3.142278f, 0.230255f, 5.149119f, 0.001152f, 2.277344f, 0.231889f, 2.657565f, 0.003452f, 2.237836f, +0.032417f, 0.633031f, 0.000040f, 0.282546f, 0.014729f, 0.260852f, 0.000220f, 0.227407f, 0.004892f, 0.129814f, 0.000005f, 0.047846f, 0.016926f, 0.230188f, 0.000055f, 0.161531f, +0.050695f, 0.706874f, 0.000342f, 0.455253f, 0.027988f, 0.353940f, 0.002312f, 0.445231f, 0.067156f, 1.272492f, 0.000405f, 0.676744f, 0.039738f, 0.385885f, 0.000713f, 0.390730f, +0.024208f, 0.498250f, 0.000914f, 0.277305f, 0.013007f, 0.242799f, 0.006013f, 0.263939f, 0.020946f, 0.585845f, 0.000707f, 0.269248f, 0.012352f, 0.177054f, 0.001240f, 0.154926f, +0.060923f, 0.854707f, 0.000750f, 0.516300f, 0.049996f, 0.636141f, 0.007542f, 0.750556f, 0.086830f, 1.655372f, 0.000957f, 0.825733f, 0.093452f, 0.913055f, 0.003062f, 0.867140f, +0.137481f, 1.997666f, 0.000110f, 0.659164f, 0.069659f, 0.917984f, 0.000680f, 0.591631f, 0.014231f, 0.280994f, 0.000010f, 0.076564f, 0.077486f, 0.784103f, 0.000164f, 0.406772f, +0.312965f, 3.247172f, 0.001381f, 1.546044f, 0.192686f, 1.813164f, 0.010411f, 1.686159f, 0.284380f, 4.009555f, 0.001122f, 1.576419f, 0.264811f, 1.913435f, 0.003108f, 1.432312f, +0.104553f, 1.601243f, 0.002581f, 0.658830f, 0.062648f, 0.870165f, 0.018938f, 0.699299f, 0.062053f, 1.291432f, 0.001370f, 0.438780f, 0.057587f, 0.614199f, 0.003781f, 0.397313f, +0.519730f, 5.425572f, 0.004186f, 2.422905f, 0.475642f, 4.503246f, 0.046924f, 3.927908f, 0.508097f, 7.207789f, 0.003660f, 2.657981f, 0.860561f, 6.256299f, 0.018440f, 4.392540f, +0.010795f, 0.206582f, 0.000002f, 0.142097f, 0.006822f, 0.118399f, 0.000012f, 0.159069f, 0.001104f, 0.028706f, 0.000000f, 0.016305f, 0.007560f, 0.100752f, 0.000003f, 0.108957f, +0.014795f, 0.202168f, 0.000011f, 0.200656f, 0.011361f, 0.140795f, 0.000107f, 0.272943f, 0.013280f, 0.246609f, 0.000009f, 0.202119f, 0.015554f, 0.148024f, 0.000032f, 0.230983f, +0.000328f, 0.006620f, 0.000001f, 0.005678f, 0.000245f, 0.004487f, 0.000013f, 0.007517f, 0.000192f, 0.005275f, 0.000001f, 0.003736f, 0.000225f, 0.003155f, 0.000003f, 0.004255f, +0.022273f, 0.306228f, 0.000031f, 0.285075f, 0.025423f, 0.317006f, 0.000438f, 0.576404f, 0.021510f, 0.401888f, 0.000027f, 0.308943f, 0.045824f, 0.438762f, 0.000171f, 0.642170f, +0.007415f, 0.178996f, 0.000007f, 0.107846f, 0.004939f, 0.108118f, 0.000057f, 0.127234f, 0.000838f, 0.027497f, 0.000001f, 0.013681f, 0.003741f, 0.062882f, 0.000009f, 0.059565f, +0.035598f, 0.613575f, 0.000187f, 0.533425f, 0.028808f, 0.450341f, 0.001850f, 0.764702f, 0.035326f, 0.827418f, 0.000166f, 0.594005f, 0.026958f, 0.323600f, 0.000376f, 0.442305f, +0.006823f, 0.173590f, 0.000200f, 0.130416f, 0.005374f, 0.123997f, 0.001931f, 0.181954f, 0.004422f, 0.152899f, 0.000116f, 0.094857f, 0.003363f, 0.059595f, 0.000263f, 0.070392f, +0.027603f, 0.478706f, 0.000264f, 0.390346f, 0.033205f, 0.522265f, 0.003894f, 0.831795f, 0.029471f, 0.694531f, 0.000252f, 0.467661f, 0.040907f, 0.494052f, 0.001042f, 0.633374f, +0.007354f, 0.151331f, 0.000015f, 0.102834f, 0.004404f, 0.082195f, 0.000113f, 0.109094f, 0.001135f, 0.031733f, 0.000002f, 0.017806f, 0.005411f, 0.077553f, 0.000030f, 0.082854f, +0.012457f, 0.183043f, 0.000144f, 0.179477f, 0.009065f, 0.120807f, 0.001282f, 0.231362f, 0.016874f, 0.336937f, 0.000174f, 0.272811f, 0.013761f, 0.140825f, 0.000423f, 0.217091f, +0.006969f, 0.151159f, 0.000450f, 0.128082f, 0.004936f, 0.097092f, 0.003905f, 0.160688f, 0.006166f, 0.181740f, 0.000356f, 0.127164f, 0.005012f, 0.075701f, 0.000861f, 0.100848f, +0.018280f, 0.270266f, 0.000385f, 0.248553f, 0.019774f, 0.265141f, 0.005105f, 0.476268f, 0.026641f, 0.535243f, 0.000502f, 0.406479f, 0.039519f, 0.406894f, 0.002216f, 0.588325f, +0.025065f, 0.383805f, 0.000034f, 0.192808f, 0.016740f, 0.232474f, 0.000280f, 0.228104f, 0.002653f, 0.055204f, 0.000003f, 0.022900f, 0.019909f, 0.212311f, 0.000072f, 0.167685f, +0.061805f, 0.675776f, 0.000467f, 0.489850f, 0.050157f, 0.497377f, 0.004638f, 0.704191f, 0.057426f, 0.853248f, 0.000388f, 0.510734f, 0.073701f, 0.561207f, 0.001480f, 0.639573f, +0.024190f, 0.390418f, 0.001022f, 0.244562f, 0.019105f, 0.279656f, 0.009884f, 0.342160f, 0.014681f, 0.321978f, 0.000555f, 0.166550f, 0.018777f, 0.211054f, 0.002110f, 0.207855f, +0.125333f, 1.378813f, 0.001728f, 0.937432f, 0.151189f, 1.508467f, 0.025526f, 2.003158f, 0.125290f, 1.873024f, 0.001545f, 1.051567f, 0.292471f, 2.240724f, 0.010726f, 2.395137f, +0.100808f, 1.104836f, 0.000008f, 0.558152f, 0.049007f, 0.487126f, 0.000047f, 0.480664f, 0.008912f, 0.132724f, 0.000001f, 0.055369f, 0.049767f, 0.379853f, 0.000010f, 0.301702f, +0.126900f, 0.993102f, 0.000055f, 0.723927f, 0.074963f, 0.532056f, 0.000395f, 0.757535f, 0.098479f, 1.047279f, 0.000038f, 0.630409f, 0.094052f, 0.512589f, 0.000108f, 0.587459f, +0.001765f, 0.020384f, 0.000004f, 0.012841f, 0.001014f, 0.010628f, 0.000030f, 0.013077f, 0.000894f, 0.014040f, 0.000002f, 0.007304f, 0.000851f, 0.006849f, 0.000005f, 0.006783f, +0.138885f, 1.093567f, 0.000109f, 0.747688f, 0.121952f, 0.870877f, 0.001173f, 1.162992f, 0.115958f, 1.240735f, 0.000081f, 0.700508f, 0.201431f, 1.104547f, 0.000421f, 1.187317f, +0.121086f, 1.673918f, 0.000064f, 0.740722f, 0.062038f, 0.777817f, 0.000403f, 0.672269f, 0.011834f, 0.222305f, 0.000006f, 0.081232f, 0.043059f, 0.414545f, 0.000061f, 0.288403f, +0.533903f, 5.270271f, 0.001565f, 3.365115f, 0.332390f, 2.975744f, 0.011929f, 3.711140f, 0.458042f, 6.144178f, 0.001200f, 3.239588f, 0.285030f, 1.959426f, 0.002222f, 1.966995f, +0.064141f, 0.934582f, 0.001052f, 0.515685f, 0.038863f, 0.513562f, 0.007804f, 0.553484f, 0.035942f, 0.711659f, 0.000527f, 0.324263f, 0.022290f, 0.226182f, 0.000972f, 0.196215f, +0.300971f, 2.989191f, 0.001610f, 1.790173f, 0.278521f, 2.508791f, 0.018252f, 2.934611f, 0.277801f, 3.749299f, 0.001329f, 1.854174f, 0.314424f, 2.174768f, 0.004475f, 2.047678f, +0.077389f, 0.912062f, 0.000090f, 0.455191f, 0.035654f, 0.381095f, 0.000509f, 0.371489f, 0.010324f, 0.165339f, 0.000011f, 0.068140f, 0.040146f, 0.329495f, 0.000125f, 0.258539f, +0.120407f, 1.013273f, 0.000777f, 0.729695f, 0.067407f, 0.514463f, 0.005326f, 0.723625f, 0.141005f, 1.612480f, 0.000814f, 0.958889f, 0.093771f, 0.549553f, 0.001609f, 0.622202f, +0.042223f, 0.524487f, 0.001524f, 0.326400f, 0.023005f, 0.259164f, 0.010169f, 0.315017f, 0.032296f, 0.545162f, 0.001043f, 0.280156f, 0.021405f, 0.185166f, 0.002055f, 0.181169f, +0.128455f, 1.087634f, 0.001513f, 0.734636f, 0.106893f, 0.820840f, 0.015421f, 1.082911f, 0.161844f, 1.862157f, 0.001705f, 1.038639f, 0.195762f, 1.154328f, 0.006134f, 1.225818f, +0.236001f, 2.069598f, 0.000180f, 0.763592f, 0.121252f, 0.964357f, 0.001133f, 0.694957f, 0.021595f, 0.257345f, 0.000015f, 0.078406f, 0.132149f, 0.807055f, 0.000268f, 0.468151f, +0.534504f, 3.346979f, 0.002255f, 1.781862f, 0.333691f, 1.895066f, 0.017241f, 1.970562f, 0.429348f, 3.653416f, 0.001620f, 1.606124f, 0.449322f, 1.959424f, 0.005042f, 1.640049f, +0.131128f, 1.212017f, 0.003095f, 0.557609f, 0.079671f, 0.667871f, 0.023031f, 0.600148f, 0.068798f, 0.864128f, 0.001452f, 0.328290f, 0.071754f, 0.461878f, 0.004505f, 0.334084f, +0.787978f, 4.964491f, 0.006070f, 2.478963f, 0.731234f, 4.178248f, 0.068984f, 4.075063f, 0.680986f, 5.830240f, 0.004691f, 2.404035f, 1.296238f, 5.687396f, 0.026561f, 4.464946f, +0.000070f, 0.000975f, 0.000000f, 0.000719f, 0.000062f, 0.000785f, 0.000000f, 0.001129f, 0.000011f, 0.000204f, 0.000000f, 0.000124f, 0.000079f, 0.000767f, 0.000000f, 0.000889f, +0.000165f, 0.001654f, 0.000000f, 0.001759f, 0.000178f, 0.001618f, 0.000006f, 0.003360f, 0.000223f, 0.003034f, 0.000001f, 0.002664f, 0.000280f, 0.001954f, 0.000002f, 0.003266f, +0.000002f, 0.000024f, 0.000000f, 0.000022f, 0.000002f, 0.000023f, 0.000000f, 0.000041f, 0.000001f, 0.000029f, 0.000000f, 0.000022f, 0.000002f, 0.000018f, 0.000000f, 0.000027f, +0.000153f, 0.001545f, 0.000001f, 0.001541f, 0.000246f, 0.002246f, 0.000014f, 0.004375f, 0.000223f, 0.003049f, 0.000001f, 0.002511f, 0.000509f, 0.003571f, 0.000006f, 0.005600f, +0.000064f, 0.001138f, 0.000000f, 0.000735f, 0.000060f, 0.000965f, 0.000002f, 0.001217f, 0.000011f, 0.000263f, 0.000000f, 0.000140f, 0.000052f, 0.000645f, 0.000000f, 0.000654f, +0.000536f, 0.006763f, 0.000009f, 0.006299f, 0.000609f, 0.006969f, 0.000129f, 0.012677f, 0.000799f, 0.013710f, 0.000012f, 0.010544f, 0.000654f, 0.005753f, 0.000030f, 0.008424f, +0.000045f, 0.000844f, 0.000004f, 0.000680f, 0.000050f, 0.000847f, 0.000060f, 0.001331f, 0.000044f, 0.001118f, 0.000004f, 0.000743f, 0.000036f, 0.000468f, 0.000009f, 0.000592f, +0.000256f, 0.003254f, 0.000008f, 0.002842f, 0.000433f, 0.004984f, 0.000168f, 0.008503f, 0.000411f, 0.007096f, 0.000012f, 0.005119f, 0.000612f, 0.005416f, 0.000052f, 0.007439f, +0.000005f, 0.000072f, 0.000000f, 0.000053f, 0.000004f, 0.000055f, 0.000000f, 0.000078f, 0.000001f, 0.000023f, 0.000000f, 0.000014f, 0.000006f, 0.000060f, 0.000000f, 0.000068f, +0.000014f, 0.000151f, 0.000001f, 0.000159f, 0.000014f, 0.000140f, 0.000007f, 0.000288f, 0.000029f, 0.000419f, 0.000001f, 0.000364f, 0.000025f, 0.000188f, 0.000003f, 0.000310f, +0.000003f, 0.000055f, 0.000001f, 0.000050f, 0.000003f, 0.000050f, 0.000009f, 0.000088f, 0.000005f, 0.000100f, 0.000001f, 0.000075f, 0.000004f, 0.000045f, 0.000002f, 0.000064f, +0.000013f, 0.000138f, 0.000001f, 0.000136f, 0.000019f, 0.000190f, 0.000017f, 0.000366f, 0.000028f, 0.000411f, 0.000002f, 0.000334f, 0.000044f, 0.000335f, 0.000008f, 0.000519f, +0.000170f, 0.001909f, 0.000001f, 0.001027f, 0.000160f, 0.001623f, 0.000009f, 0.001706f, 0.000027f, 0.000413f, 0.000000f, 0.000183f, 0.000218f, 0.001703f, 0.000003f, 0.001441f, +0.000728f, 0.005827f, 0.000018f, 0.004525f, 0.000829f, 0.006021f, 0.000253f, 0.009132f, 0.001016f, 0.011059f, 0.000023f, 0.007092f, 0.001400f, 0.007805f, 0.000093f, 0.009529f, +0.000126f, 0.001485f, 0.000018f, 0.000997f, 0.000139f, 0.001494f, 0.000238f, 0.001958f, 0.000115f, 0.001841f, 0.000014f, 0.001020f, 0.000157f, 0.001295f, 0.000058f, 0.001366f, +0.000910f, 0.007331f, 0.000041f, 0.005340f, 0.001541f, 0.011260f, 0.000860f, 0.016019f, 0.001367f, 0.014971f, 0.000056f, 0.009004f, 0.003425f, 0.019216f, 0.000415f, 0.022005f, +0.000290f, 0.005262f, 0.000000f, 0.002377f, 0.000189f, 0.003117f, 0.000000f, 0.002751f, 0.000033f, 0.000816f, 0.000000f, 0.000305f, 0.000216f, 0.002728f, 0.000000f, 0.001938f, +0.000725f, 0.009402f, 0.000000f, 0.006129f, 0.000575f, 0.006767f, 0.000003f, 0.008617f, 0.000727f, 0.012804f, 0.000000f, 0.006893f, 0.000810f, 0.007317f, 0.000001f, 0.007500f, +0.000006f, 0.000117f, 0.000000f, 0.000066f, 0.000005f, 0.000082f, 0.000000f, 0.000090f, 0.000004f, 0.000104f, 0.000000f, 0.000048f, 0.000004f, 0.000059f, 0.000000f, 0.000052f, +0.000559f, 0.007289f, 0.000000f, 0.004457f, 0.000659f, 0.007799f, 0.000007f, 0.009314f, 0.000602f, 0.010680f, 0.000000f, 0.005393f, 0.001222f, 0.011101f, 0.000003f, 0.010672f, +0.000472f, 0.010822f, 0.000000f, 0.004283f, 0.000325f, 0.006756f, 0.000002f, 0.005222f, 0.000060f, 0.001856f, 0.000000f, 0.000607f, 0.000253f, 0.004041f, 0.000000f, 0.002514f, +0.004141f, 0.067726f, 0.000013f, 0.038674f, 0.003464f, 0.051377f, 0.000130f, 0.057303f, 0.004588f, 0.101968f, 0.000013f, 0.048082f, 0.003333f, 0.037967f, 0.000027f, 0.034086f, +0.000301f, 0.007267f, 0.000005f, 0.003586f, 0.000245f, 0.005365f, 0.000051f, 0.005171f, 0.000218f, 0.007146f, 0.000003f, 0.002912f, 0.000158f, 0.002652f, 0.000007f, 0.002057f, +0.001643f, 0.027045f, 0.000009f, 0.014485f, 0.002043f, 0.030497f, 0.000140f, 0.031903f, 0.001959f, 0.043809f, 0.000010f, 0.019376f, 0.002589f, 0.029669f, 0.000039f, 0.024984f, +0.000249f, 0.004868f, 0.000000f, 0.002173f, 0.000154f, 0.002733f, 0.000002f, 0.002383f, 0.000043f, 0.001140f, 0.000000f, 0.000420f, 0.000195f, 0.002652f, 0.000001f, 0.001861f, +0.000771f, 0.010750f, 0.000005f, 0.006923f, 0.000580f, 0.007333f, 0.000048f, 0.009224f, 0.001166f, 0.022093f, 0.000007f, 0.011749f, 0.000905f, 0.008791f, 0.000016f, 0.008901f, +0.000164f, 0.003367f, 0.000006f, 0.001874f, 0.000120f, 0.002235f, 0.000055f, 0.002430f, 0.000162f, 0.004519f, 0.000005f, 0.002077f, 0.000125f, 0.001792f, 0.000013f, 0.001568f, +0.000579f, 0.008124f, 0.000007f, 0.004907f, 0.000647f, 0.008238f, 0.000098f, 0.009719f, 0.000942f, 0.017963f, 0.000010f, 0.008960f, 0.001331f, 0.013001f, 0.000044f, 0.012347f, +0.000758f, 0.011008f, 0.000001f, 0.003632f, 0.000523f, 0.006891f, 0.000005f, 0.004441f, 0.000090f, 0.001768f, 0.000000f, 0.000482f, 0.000640f, 0.006473f, 0.000001f, 0.003358f, +0.003410f, 0.035384f, 0.000015f, 0.016847f, 0.002861f, 0.026917f, 0.000155f, 0.025032f, 0.003538f, 0.049880f, 0.000014f, 0.019611f, 0.004323f, 0.031235f, 0.000051f, 0.023381f, +0.000506f, 0.007753f, 0.000012f, 0.003190f, 0.000413f, 0.005740f, 0.000125f, 0.004613f, 0.000343f, 0.007139f, 0.000008f, 0.002425f, 0.000418f, 0.004455f, 0.000027f, 0.002882f, +0.003540f, 0.036952f, 0.000029f, 0.016502f, 0.004413f, 0.041784f, 0.000435f, 0.036446f, 0.003951f, 0.056044f, 0.000028f, 0.020667f, 0.008780f, 0.063832f, 0.000188f, 0.044817f, +0.000059f, 0.001127f, 0.000000f, 0.000775f, 0.000051f, 0.000880f, 0.000000f, 0.001182f, 0.000007f, 0.000179f, 0.000000f, 0.000102f, 0.000062f, 0.000823f, 0.000000f, 0.000890f, +0.000160f, 0.002181f, 0.000000f, 0.002164f, 0.000167f, 0.002069f, 0.000002f, 0.004011f, 0.000164f, 0.003037f, 0.000000f, 0.002489f, 0.000251f, 0.002392f, 0.000001f, 0.003732f, +0.000002f, 0.000032f, 0.000000f, 0.000027f, 0.000002f, 0.000029f, 0.000000f, 0.000049f, 0.000001f, 0.000029f, 0.000000f, 0.000020f, 0.000002f, 0.000023f, 0.000000f, 0.000031f, +0.000150f, 0.002065f, 0.000000f, 0.001922f, 0.000234f, 0.002912f, 0.000004f, 0.005294f, 0.000166f, 0.003093f, 0.000000f, 0.002378f, 0.000463f, 0.004431f, 0.000002f, 0.006486f, +0.000048f, 0.001151f, 0.000000f, 0.000693f, 0.000043f, 0.000947f, 0.000001f, 0.001115f, 0.000006f, 0.000202f, 0.000000f, 0.000100f, 0.000036f, 0.000606f, 0.000000f, 0.000574f, +0.000453f, 0.007802f, 0.000002f, 0.006783f, 0.000499f, 0.007802f, 0.000032f, 0.013248f, 0.000513f, 0.012012f, 0.000002f, 0.008624f, 0.000514f, 0.006165f, 0.000007f, 0.008426f, +0.000039f, 0.000981f, 0.000001f, 0.000737f, 0.000041f, 0.000955f, 0.000015f, 0.001401f, 0.000029f, 0.000986f, 0.000001f, 0.000612f, 0.000028f, 0.000504f, 0.000002f, 0.000596f, +0.000219f, 0.003805f, 0.000002f, 0.003102f, 0.000360f, 0.005655f, 0.000042f, 0.009007f, 0.000267f, 0.006302f, 0.000002f, 0.004243f, 0.000487f, 0.005882f, 0.000012f, 0.007541f, +0.000060f, 0.001241f, 0.000000f, 0.000843f, 0.000049f, 0.000918f, 0.000001f, 0.001219f, 0.000011f, 0.000297f, 0.000000f, 0.000167f, 0.000066f, 0.000953f, 0.000000f, 0.001018f, +0.000202f, 0.002969f, 0.000002f, 0.002911f, 0.000200f, 0.002669f, 0.000028f, 0.005112f, 0.000312f, 0.006239f, 0.000003f, 0.005051f, 0.000334f, 0.003422f, 0.000010f, 0.005275f, +0.000050f, 0.001089f, 0.000003f, 0.000923f, 0.000048f, 0.000953f, 0.000038f, 0.001578f, 0.000051f, 0.001495f, 0.000003f, 0.001046f, 0.000054f, 0.000817f, 0.000009f, 0.001089f, +0.000185f, 0.002740f, 0.000004f, 0.002520f, 0.000273f, 0.003662f, 0.000071f, 0.006577f, 0.000308f, 0.006194f, 0.000006f, 0.004704f, 0.000600f, 0.006179f, 0.000034f, 0.008934f, +0.000147f, 0.002256f, 0.000000f, 0.001133f, 0.000134f, 0.001861f, 0.000002f, 0.001826f, 0.000018f, 0.000370f, 0.000000f, 0.000154f, 0.000175f, 0.001869f, 0.000001f, 0.001476f, +0.000718f, 0.007853f, 0.000005f, 0.005693f, 0.000794f, 0.007875f, 0.000073f, 0.011149f, 0.000762f, 0.011321f, 0.000005f, 0.006776f, 0.001283f, 0.009770f, 0.000026f, 0.011135f, +0.000125f, 0.002016f, 0.000005f, 0.001263f, 0.000134f, 0.001967f, 0.000070f, 0.002407f, 0.000087f, 0.001898f, 0.000003f, 0.000982f, 0.000145f, 0.001633f, 0.000016f, 0.001608f, +0.000910f, 0.010015f, 0.000013f, 0.006809f, 0.001496f, 0.014927f, 0.000253f, 0.019823f, 0.001039f, 0.015532f, 0.000013f, 0.008720f, 0.003182f, 0.024382f, 0.000117f, 0.026062f, +0.000731f, 0.008016f, 0.000000f, 0.004049f, 0.000484f, 0.004815f, 0.000000f, 0.004751f, 0.000074f, 0.001099f, 0.000000f, 0.000459f, 0.000541f, 0.004128f, 0.000000f, 0.003279f, +0.001821f, 0.014248f, 0.000001f, 0.010386f, 0.001465f, 0.010400f, 0.000008f, 0.014807f, 0.001613f, 0.017154f, 0.000001f, 0.010326f, 0.002021f, 0.011017f, 0.000002f, 0.012626f, +0.000011f, 0.000130f, 0.000000f, 0.000082f, 0.000009f, 0.000092f, 0.000000f, 0.000114f, 0.000007f, 0.000102f, 0.000000f, 0.000053f, 0.000008f, 0.000065f, 0.000000f, 0.000065f, +0.001245f, 0.009806f, 0.000001f, 0.006705f, 0.001490f, 0.010639f, 0.000014f, 0.014208f, 0.001187f, 0.012702f, 0.000001f, 0.007172f, 0.002706f, 0.014838f, 0.000006f, 0.015950f, +0.001036f, 0.014317f, 0.000001f, 0.006336f, 0.000723f, 0.009064f, 0.000005f, 0.007834f, 0.000116f, 0.002171f, 0.000000f, 0.000793f, 0.000552f, 0.005312f, 0.000001f, 0.003695f, +0.009031f, 0.089143f, 0.000026f, 0.056918f, 0.007659f, 0.068572f, 0.000275f, 0.085518f, 0.008845f, 0.118645f, 0.000023f, 0.062557f, 0.007222f, 0.049649f, 0.000056f, 0.049841f, +0.000482f, 0.007024f, 0.000008f, 0.003876f, 0.000398f, 0.005258f, 0.000080f, 0.005667f, 0.000308f, 0.006106f, 0.000005f, 0.002782f, 0.000251f, 0.002547f, 0.000011f, 0.002209f, +0.003182f, 0.031601f, 0.000017f, 0.018925f, 0.004011f, 0.036133f, 0.000263f, 0.042266f, 0.003353f, 0.045251f, 0.000016f, 0.022379f, 0.004980f, 0.034442f, 0.000071f, 0.032429f, +0.000844f, 0.009950f, 0.000001f, 0.004966f, 0.000530f, 0.005664f, 0.000008f, 0.005521f, 0.000129f, 0.002059f, 0.000000f, 0.000849f, 0.000656f, 0.005385f, 0.000002f, 0.004225f, +0.002598f, 0.021859f, 0.000017f, 0.015742f, 0.001981f, 0.015120f, 0.000157f, 0.021268f, 0.003473f, 0.039713f, 0.000020f, 0.023616f, 0.003030f, 0.017760f, 0.000052f, 0.020108f, +0.000405f, 0.005028f, 0.000015f, 0.003129f, 0.000300f, 0.003384f, 0.000133f, 0.004114f, 0.000353f, 0.005966f, 0.000011f, 0.003066f, 0.000307f, 0.002659f, 0.000030f, 0.002602f, +0.001732f, 0.014665f, 0.000020f, 0.009905f, 0.001964f, 0.015078f, 0.000283f, 0.019893f, 0.002491f, 0.028665f, 0.000026f, 0.015988f, 0.003954f, 0.023316f, 0.000124f, 0.024760f, +0.001845f, 0.016178f, 0.000001f, 0.005969f, 0.001291f, 0.010270f, 0.000012f, 0.007401f, 0.000193f, 0.002297f, 0.000000f, 0.000700f, 0.001547f, 0.009451f, 0.000003f, 0.005482f, +0.008262f, 0.051737f, 0.000035f, 0.027544f, 0.007027f, 0.039909f, 0.000363f, 0.041499f, 0.007577f, 0.064474f, 0.000029f, 0.028344f, 0.010405f, 0.045374f, 0.000117f, 0.037978f, +0.000901f, 0.008325f, 0.000021f, 0.003830f, 0.000746f, 0.006250f, 0.000216f, 0.005616f, 0.000539f, 0.006776f, 0.000011f, 0.002574f, 0.000738f, 0.004752f, 0.000046f, 0.003438f, +0.007613f, 0.047964f, 0.000059f, 0.023951f, 0.009625f, 0.054997f, 0.000908f, 0.053638f, 0.007511f, 0.064308f, 0.000052f, 0.026517f, 0.018761f, 0.082317f, 0.000384f, 0.064624f, +0.023781f, 0.333295f, 0.000011f, 0.245602f, 0.016065f, 0.204207f, 0.000091f, 0.293913f, 0.003021f, 0.057538f, 0.000001f, 0.035012f, 0.019309f, 0.188469f, 0.000024f, 0.218348f, +0.029034f, 0.290563f, 0.000074f, 0.308950f, 0.023834f, 0.216322f, 0.000743f, 0.449256f, 0.032379f, 0.440337f, 0.000074f, 0.386627f, 0.035392f, 0.246666f, 0.000240f, 0.412348f, +0.000455f, 0.006727f, 0.000006f, 0.006181f, 0.000364f, 0.004874f, 0.000063f, 0.008748f, 0.000332f, 0.006659f, 0.000004f, 0.005053f, 0.000361f, 0.003718f, 0.000014f, 0.005370f, +0.042448f, 0.427418f, 0.000197f, 0.426261f, 0.051796f, 0.473001f, 0.002949f, 0.921360f, 0.050931f, 0.696888f, 0.000212f, 0.573911f, 0.101257f, 0.710043f, 0.001252f, 1.113306f, +0.021978f, 0.388527f, 0.000069f, 0.250778f, 0.015647f, 0.250878f, 0.000601f, 0.316283f, 0.003087f, 0.074150f, 0.000009f, 0.039522f, 0.012854f, 0.158253f, 0.000107f, 0.160593f, +0.093986f, 1.186412f, 0.001630f, 1.104968f, 0.081311f, 0.930884f, 0.017270f, 1.693382f, 0.115873f, 1.987662f, 0.001797f, 1.528677f, 0.082525f, 0.725477f, 0.003807f, 1.062296f, +0.012736f, 0.237314f, 0.001236f, 0.191002f, 0.010724f, 0.181216f, 0.012744f, 0.284876f, 0.010256f, 0.259689f, 0.000890f, 0.172595f, 0.007280f, 0.094462f, 0.001879f, 0.119530f, +0.070776f, 0.898912f, 0.002241f, 0.785246f, 0.091016f, 1.048397f, 0.035298f, 1.788790f, 0.093880f, 1.620279f, 0.002659f, 1.168793f, 0.121611f, 1.075645f, 0.010244f, 1.477288f, +0.001136f, 0.017128f, 0.000008f, 0.012469f, 0.000728f, 0.009945f, 0.000062f, 0.014141f, 0.000218f, 0.004462f, 0.000001f, 0.002682f, 0.000970f, 0.010177f, 0.000018f, 0.011648f, +0.001715f, 0.018455f, 0.000065f, 0.019386f, 0.001334f, 0.013021f, 0.000624f, 0.026715f, 0.002886f, 0.042205f, 0.000099f, 0.036608f, 0.002197f, 0.016462f, 0.000223f, 0.027187f, +0.000678f, 0.010775f, 0.000145f, 0.009781f, 0.000514f, 0.007399f, 0.001344f, 0.013118f, 0.000746f, 0.016095f, 0.000142f, 0.012065f, 0.000566f, 0.006257f, 0.000321f, 0.008929f, +0.002444f, 0.026463f, 0.000170f, 0.026072f, 0.002826f, 0.027753f, 0.002413f, 0.053406f, 0.004425f, 0.065109f, 0.000276f, 0.052971f, 0.006126f, 0.046193f, 0.001136f, 0.071551f, +0.068374f, 0.766772f, 0.000309f, 0.412657f, 0.048816f, 0.496496f, 0.002699f, 0.521895f, 0.008991f, 0.137016f, 0.000036f, 0.060891f, 0.062970f, 0.491786f, 0.000756f, 0.416108f, +0.150192f, 1.202675f, 0.003750f, 0.933935f, 0.130298f, 0.946275f, 0.039840f, 1.435260f, 0.173373f, 1.886560f, 0.003871f, 1.209757f, 0.207657f, 1.158021f, 0.013791f, 1.413814f, +0.041562f, 0.491255f, 0.005805f, 0.329667f, 0.035091f, 0.376174f, 0.060033f, 0.493063f, 0.031337f, 0.503330f, 0.003915f, 0.278920f, 0.037406f, 0.307907f, 0.013899f, 0.324859f, +0.295781f, 2.383040f, 0.013483f, 1.735698f, 0.381426f, 2.787073f, 0.212950f, 3.964934f, 0.367342f, 4.021788f, 0.014977f, 2.418917f, 0.800264f, 4.490169f, 0.097044f, 5.141772f, +0.080402f, 1.460075f, 0.000007f, 0.659669f, 0.039988f, 0.658589f, 0.000040f, 0.581179f, 0.007586f, 0.187209f, 0.000001f, 0.069845f, 0.043024f, 0.544114f, 0.000009f, 0.386499f, +0.103366f, 1.340347f, 0.000046f, 0.873803f, 0.062468f, 0.734641f, 0.000344f, 0.935441f, 0.085617f, 1.508638f, 0.000034f, 0.812157f, 0.083039f, 0.749877f, 0.000099f, 0.768588f, +0.001394f, 0.026672f, 0.000004f, 0.015027f, 0.000820f, 0.014228f, 0.000025f, 0.015656f, 0.000754f, 0.019609f, 0.000002f, 0.009122f, 0.000729f, 0.009714f, 0.000005f, 0.008604f, +0.125433f, 1.636474f, 0.000103f, 1.000644f, 0.112678f, 1.333262f, 0.001133f, 1.592323f, 0.111778f, 1.981721f, 0.000082f, 1.000627f, 0.197187f, 1.791617f, 0.000431f, 1.722358f, +0.130950f, 2.999523f, 0.000073f, 1.187050f, 0.068638f, 1.425905f, 0.000466f, 1.102179f, 0.013659f, 0.425174f, 0.000007f, 0.138945f, 0.050474f, 0.805169f, 0.000074f, 0.500969f, +0.589684f, 9.644877f, 0.001807f, 5.507559f, 0.375577f, 5.571272f, 0.014096f, 6.213863f, 0.539960f, 12.001270f, 0.001480f, 5.659115f, 0.341228f, 3.886778f, 0.002782f, 3.489471f, +0.068682f, 1.658172f, 0.001178f, 0.818261f, 0.042573f, 0.932180f, 0.008940f, 0.898478f, 0.041078f, 1.347669f, 0.000630f, 0.549167f, 0.025871f, 0.434977f, 0.001180f, 0.337471f, +0.368572f, 6.065380f, 0.002062f, 3.248589f, 0.348940f, 5.207915f, 0.023912f, 5.448106f, 0.363103f, 8.119961f, 0.001817f, 3.591284f, 0.417359f, 4.783155f, 0.006212f, 4.027714f, +0.047982f, 0.936977f, 0.000059f, 0.418209f, 0.022615f, 0.400527f, 0.000338f, 0.349174f, 0.006832f, 0.181293f, 0.000007f, 0.066820f, 0.026979f, 0.366903f, 0.000088f, 0.257468f, +0.076242f, 1.063106f, 0.000514f, 0.684679f, 0.043666f, 0.552203f, 0.003608f, 0.694631f, 0.095296f, 1.805692f, 0.000575f, 0.960314f, 0.064359f, 0.624966f, 0.001155f, 0.632811f, +0.025920f, 0.533498f, 0.000978f, 0.296922f, 0.014448f, 0.269692f, 0.006679f, 0.293173f, 0.021161f, 0.591866f, 0.000714f, 0.272015f, 0.014243f, 0.204153f, 0.001430f, 0.178638f, +0.090185f, 1.265242f, 0.001111f, 0.764290f, 0.076776f, 0.976886f, 0.011583f, 1.152587f, 0.121277f, 2.312099f, 0.001336f, 1.153321f, 0.148974f, 1.455516f, 0.004882f, 1.382322f, +0.247049f, 3.589733f, 0.000197f, 1.184494f, 0.129853f, 1.711233f, 0.001269f, 1.102871f, 0.024128f, 0.476422f, 0.000017f, 0.129814f, 0.149944f, 1.517317f, 0.000318f, 0.787145f, +0.571433f, 5.928908f, 0.002521f, 2.822872f, 0.364968f, 3.434322f, 0.019719f, 3.193760f, 0.489918f, 6.907490f, 0.001933f, 2.715787f, 0.520679f, 3.762247f, 0.006110f, 2.816250f, +0.135912f, 2.081510f, 0.003355f, 0.856436f, 0.084481f, 1.173431f, 0.025539f, 0.943016f, 0.076110f, 1.583972f, 0.001680f, 0.538174f, 0.080613f, 0.859795f, 0.005293f, 0.556184f, +0.934048f, 9.750728f, 0.007524f, 4.354396f, 0.886761f, 8.395597f, 0.087482f, 7.322970f, 0.861575f, 12.222170f, 0.006207f, 4.507109f, 1.665471f, 12.108020f, 0.035687f, 8.501024f, +0.020501f, 0.392336f, 0.000003f, 0.269869f, 0.013440f, 0.233264f, 0.000023f, 0.313391f, 0.001978f, 0.051439f, 0.000000f, 0.029218f, 0.015461f, 0.206055f, 0.000006f, 0.222836f, +0.028550f, 0.390130f, 0.000022f, 0.387213f, 0.022742f, 0.281850f, 0.000214f, 0.546391f, 0.024180f, 0.449013f, 0.000017f, 0.368009f, 0.032323f, 0.307605f, 0.000066f, 0.480000f, +0.000451f, 0.009096f, 0.000002f, 0.007801f, 0.000350f, 0.006395f, 0.000018f, 0.010714f, 0.000249f, 0.006838f, 0.000001f, 0.004843f, 0.000332f, 0.004668f, 0.000004f, 0.006295f, +0.042305f, 0.581652f, 0.000059f, 0.541474f, 0.050093f, 0.624627f, 0.000862f, 1.135742f, 0.038550f, 0.720242f, 0.000048f, 0.553671f, 0.093729f, 0.897451f, 0.000350f, 1.313508f, +0.016584f, 0.400332f, 0.000016f, 0.241202f, 0.011458f, 0.250847f, 0.000133f, 0.295199f, 0.001769f, 0.058025f, 0.000002f, 0.028869f, 0.009009f, 0.151449f, 0.000023f, 0.143461f, +0.080896f, 1.394358f, 0.000424f, 1.212217f, 0.067914f, 1.061653f, 0.004362f, 1.802742f, 0.075744f, 1.774133f, 0.000355f, 1.273654f, 0.065972f, 0.791915f, 0.000920f, 1.082411f, +0.011039f, 0.280855f, 0.000324f, 0.211003f, 0.009019f, 0.208115f, 0.003241f, 0.305389f, 0.006751f, 0.233409f, 0.000177f, 0.144805f, 0.005860f, 0.103832f, 0.000457f, 0.122643f, +0.061743f, 1.070773f, 0.000591f, 0.873128f, 0.077050f, 1.211865f, 0.009037f, 1.930098f, 0.062199f, 1.465801f, 0.000533f, 0.986994f, 0.098535f, 1.190050f, 0.002510f, 1.525644f, +0.014567f, 0.299773f, 0.000030f, 0.203705f, 0.009050f, 0.168907f, 0.000231f, 0.224182f, 0.002121f, 0.059310f, 0.000004f, 0.033281f, 0.011543f, 0.165435f, 0.000064f, 0.176743f, +0.025073f, 0.368426f, 0.000289f, 0.361247f, 0.018928f, 0.252245f, 0.002676f, 0.483083f, 0.032045f, 0.639881f, 0.000331f, 0.518098f, 0.029828f, 0.305240f, 0.000916f, 0.470547f, +0.009987f, 0.216611f, 0.000645f, 0.183542f, 0.007337f, 0.144334f, 0.005805f, 0.238872f, 0.008337f, 0.245727f, 0.000482f, 0.171936f, 0.007734f, 0.116820f, 0.001329f, 0.155624f, +0.036216f, 0.535438f, 0.000763f, 0.492422f, 0.040639f, 0.544917f, 0.010493f, 0.978822f, 0.049800f, 1.000515f, 0.000939f, 0.759821f, 0.084311f, 0.868089f, 0.004728f, 1.255162f, +0.060278f, 0.923023f, 0.000082f, 0.463689f, 0.041762f, 0.579976f, 0.000698f, 0.569075f, 0.006020f, 0.125263f, 0.000007f, 0.051963f, 0.051561f, 0.549842f, 0.000187f, 0.434270f, +0.151027f, 1.651333f, 0.001140f, 1.197002f, 0.127144f, 1.260815f, 0.011756f, 1.785075f, 0.132402f, 1.967260f, 0.000894f, 1.177554f, 0.193941f, 1.476789f, 0.003895f, 1.683007f, +0.042085f, 0.679224f, 0.001778f, 0.425474f, 0.034481f, 0.504711f, 0.017839f, 0.617515f, 0.024098f, 0.528523f, 0.000910f, 0.273390f, 0.035179f, 0.395404f, 0.003953f, 0.389411f, +0.301454f, 3.316340f, 0.004156f, 2.254725f, 0.377232f, 3.763779f, 0.063691f, 4.998083f, 0.284332f, 4.250615f, 0.003506f, 2.386411f, 0.757530f, 5.803716f, 0.027780f, 6.203661f, +0.129744f, 1.421973f, 0.000010f, 0.718367f, 0.065432f, 0.650384f, 0.000062f, 0.641755f, 0.010822f, 0.161175f, 0.000001f, 0.067238f, 0.068977f, 0.526469f, 0.000014f, 0.418153f, +0.165953f, 1.298726f, 0.000071f, 0.946713f, 0.101696f, 0.721797f, 0.000536f, 1.027687f, 0.121512f, 1.292232f, 0.000047f, 0.777858f, 0.132451f, 0.721867f, 0.000152f, 0.827305f, +0.001643f, 0.018979f, 0.000004f, 0.011955f, 0.000980f, 0.010265f, 0.000029f, 0.012631f, 0.000786f, 0.012334f, 0.000002f, 0.006416f, 0.000854f, 0.006867f, 0.000005f, 0.006801f, +0.178772f, 1.407638f, 0.000140f, 0.962423f, 0.162843f, 1.162885f, 0.001566f, 1.552948f, 0.140832f, 1.506883f, 0.000099f, 0.850773f, 0.279213f, 1.531066f, 0.000583f, 1.645798f, +0.183526f, 2.537102f, 0.000097f, 1.122688f, 0.097543f, 1.222970f, 0.000633f, 1.057016f, 0.016923f, 0.317912f, 0.000008f, 0.116168f, 0.070280f, 0.676612f, 0.000099f, 0.470726f, +0.822236f, 8.116466f, 0.002409f, 5.182435f, 0.531026f, 4.754053f, 0.019058f, 5.928923f, 0.665571f, 8.927964f, 0.001744f, 4.707370f, 0.472703f, 3.249577f, 0.003685f, 3.262129f, +0.070327f, 1.024716f, 0.001153f, 0.565419f, 0.044203f, 0.584135f, 0.008876f, 0.629543f, 0.037183f, 0.736228f, 0.000545f, 0.335458f, 0.026318f, 0.267059f, 0.001148f, 0.231676f, +0.456227f, 4.531163f, 0.002441f, 2.713632f, 0.437974f, 3.945073f, 0.028702f, 4.614675f, 0.397323f, 5.362419f, 0.001901f, 2.651924f, 0.513258f, 3.550037f, 0.007306f, 3.342579f, +0.103890f, 1.224385f, 0.000121f, 0.611065f, 0.049652f, 0.530714f, 0.000709f, 0.517338f, 0.013077f, 0.209423f, 0.000014f, 0.086308f, 0.058036f, 0.476329f, 0.000180f, 0.373752f, +0.164239f, 1.382134f, 0.001059f, 0.995325f, 0.095381f, 0.727967f, 0.007536f, 1.023933f, 0.181473f, 2.075260f, 0.001047f, 1.234089f, 0.137739f, 0.807230f, 0.002364f, 0.913944f, +0.041004f, 0.509343f, 0.001480f, 0.316975f, 0.023175f, 0.261087f, 0.010245f, 0.317354f, 0.029593f, 0.499524f, 0.000955f, 0.256702f, 0.022385f, 0.193643f, 0.002149f, 0.189463f, +0.172463f, 1.460255f, 0.002031f, 0.986320f, 0.148878f, 1.143243f, 0.021478f, 1.508248f, 0.205021f, 2.358939f, 0.002160f, 1.315725f, 0.283034f, 1.668933f, 0.008869f, 1.772294f, +0.384629f, 3.372989f, 0.000293f, 1.244486f, 0.204999f, 1.630427f, 0.001915f, 1.174955f, 0.033208f, 0.395730f, 0.000023f, 0.120568f, 0.231929f, 1.416433f, 0.000471f, 0.821634f, +0.885136f, 5.542582f, 0.003734f, 2.950754f, 0.573242f, 3.255501f, 0.029617f, 3.385194f, 0.670846f, 5.708378f, 0.002531f, 2.509532f, 0.801274f, 3.494230f, 0.008992f, 2.924690f, +0.154599f, 1.428958f, 0.003649f, 0.657416f, 0.097442f, 0.816842f, 0.028169f, 0.734013f, 0.076532f, 0.961266f, 0.001616f, 0.365194f, 0.091101f, 0.586412f, 0.005720f, 0.424161f, +1.284385f, 8.092000f, 0.009893f, 4.040650f, 1.236436f, 7.064961f, 0.116645f, 6.890487f, 1.047307f, 8.966483f, 0.007215f, 3.697230f, 2.275253f, 9.982943f, 0.046622f, 7.837207f, +0.033462f, 0.468986f, 0.000015f, 0.345591f, 0.025195f, 0.320254f, 0.000142f, 0.460938f, 0.005239f, 0.099778f, 0.000002f, 0.060715f, 0.031207f, 0.304591f, 0.000038f, 0.352880f, +0.033557f, 0.335829f, 0.000085f, 0.357081f, 0.030702f, 0.278659f, 0.000957f, 0.578717f, 0.046120f, 0.627206f, 0.000105f, 0.550703f, 0.046982f, 0.327442f, 0.000318f, 0.547381f, +0.000824f, 0.012165f, 0.000012f, 0.011178f, 0.000733f, 0.009824f, 0.000128f, 0.017631f, 0.000739f, 0.014840f, 0.000009f, 0.011260f, 0.000751f, 0.007721f, 0.000028f, 0.011154f, +0.059606f, 0.600177f, 0.000277f, 0.598553f, 0.081061f, 0.740257f, 0.004615f, 1.441949f, 0.088137f, 1.205971f, 0.000366f, 0.993158f, 0.163305f, 1.145141f, 0.002019f, 1.795514f, +0.031243f, 0.552327f, 0.000098f, 0.356504f, 0.024792f, 0.397494f, 0.000952f, 0.501123f, 0.005408f, 0.129908f, 0.000015f, 0.069241f, 0.020988f, 0.258389f, 0.000175f, 0.262211f, +0.109745f, 1.385344f, 0.001903f, 1.290245f, 0.105819f, 1.211467f, 0.022475f, 2.203793f, 0.166745f, 2.860302f, 0.002586f, 2.199811f, 0.110676f, 0.972957f, 0.005106f, 1.424674f, +0.023268f, 0.433556f, 0.002258f, 0.348947f, 0.021835f, 0.368988f, 0.025948f, 0.580058f, 0.023092f, 0.584686f, 0.002004f, 0.388594f, 0.015275f, 0.198210f, 0.003943f, 0.250811f, +0.100406f, 1.275229f, 0.003179f, 1.113979f, 0.143907f, 1.657641f, 0.055810f, 2.828290f, 0.164131f, 2.832749f, 0.004648f, 2.043411f, 0.198148f, 1.752619f, 0.016691f, 2.407043f, +0.001455f, 0.021936f, 0.000010f, 0.015969f, 0.001039f, 0.014196f, 0.000088f, 0.020184f, 0.000344f, 0.007043f, 0.000002f, 0.004234f, 0.001426f, 0.014970f, 0.000026f, 0.017133f, +0.001804f, 0.019414f, 0.000069f, 0.020393f, 0.001564f, 0.015266f, 0.000731f, 0.031321f, 0.003742f, 0.054715f, 0.000128f, 0.047460f, 0.002654f, 0.019890f, 0.000270f, 0.032848f, +0.001116f, 0.017735f, 0.000238f, 0.016099f, 0.000942f, 0.013572f, 0.002465f, 0.024064f, 0.001512f, 0.032647f, 0.000289f, 0.024472f, 0.001069f, 0.011828f, 0.000608f, 0.016880f, +0.003124f, 0.033821f, 0.000218f, 0.033321f, 0.004026f, 0.039532f, 0.003437f, 0.076073f, 0.006970f, 0.102551f, 0.000435f, 0.083433f, 0.008992f, 0.067806f, 0.001668f, 0.105030f, +0.073133f, 0.820138f, 0.000330f, 0.441377f, 0.058194f, 0.591875f, 0.003217f, 0.622154f, 0.011852f, 0.180609f, 0.000048f, 0.080264f, 0.077358f, 0.604149f, 0.000929f, 0.511180f, +0.131951f, 1.056615f, 0.003294f, 0.820512f, 0.127585f, 0.926572f, 0.039010f, 1.405375f, 0.187714f, 2.042612f, 0.004191f, 1.309825f, 0.209537f, 1.168509f, 0.013916f, 1.426619f, +0.057130f, 0.675266f, 0.007980f, 0.453151f, 0.053760f, 0.576302f, 0.091971f, 0.755377f, 0.053084f, 0.852644f, 0.006632f, 0.472493f, 0.059055f, 0.486110f, 0.021944f, 0.512873f, +0.315709f, 2.543596f, 0.014391f, 1.852640f, 0.453754f, 3.315575f, 0.253331f, 4.716789f, 0.483208f, 5.290335f, 0.019700f, 3.181888f, 0.981065f, 5.504616f, 0.118969f, 6.303434f, +0.076210f, 1.383946f, 0.000006f, 0.625273f, 0.042244f, 0.695748f, 0.000042f, 0.613970f, 0.008862f, 0.218685f, 0.000001f, 0.081588f, 0.046839f, 0.592354f, 0.000010f, 0.420765f, +0.080477f, 1.043539f, 0.000036f, 0.680307f, 0.054206f, 0.637471f, 0.000299f, 0.811712f, 0.082148f, 1.447517f, 0.000033f, 0.779254f, 0.074254f, 0.670547f, 0.000089f, 0.687279f, +0.001697f, 0.032490f, 0.000004f, 0.018304f, 0.001113f, 0.019316f, 0.000034f, 0.021255f, 0.001132f, 0.029437f, 0.000003f, 0.013695f, 0.001020f, 0.013590f, 0.000007f, 0.012037f, +0.118646f, 1.547923f, 0.000097f, 0.946499f, 0.118789f, 1.405560f, 0.001195f, 1.678669f, 0.130300f, 2.310095f, 0.000096f, 1.166432f, 0.214224f, 1.946404f, 0.000468f, 1.871161f, +0.125398f, 2.872369f, 0.000069f, 1.136729f, 0.073257f, 1.521851f, 0.000497f, 1.176343f, 0.016120f, 0.501767f, 0.000008f, 0.163975f, 0.055514f, 0.885569f, 0.000082f, 0.550994f, +0.463826f, 7.586342f, 0.001421f, 4.332064f, 0.329252f, 4.884091f, 0.012357f, 5.447422f, 0.523413f, 11.633490f, 0.001435f, 5.485691f, 0.308267f, 3.511341f, 0.002513f, 3.152411f, +0.084523f, 2.040633f, 0.001449f, 1.006995f, 0.058393f, 1.278583f, 0.012262f, 1.232356f, 0.062300f, 2.043928f, 0.000955f, 0.832889f, 0.036567f, 0.614822f, 0.001668f, 0.477001f, +0.352214f, 5.796186f, 0.001971f, 3.104411f, 0.371645f, 5.546792f, 0.025468f, 5.802612f, 0.427624f, 9.562807f, 0.002140f, 4.229424f, 0.458081f, 5.249843f, 0.006818f, 4.420695f, +0.041395f, 0.808343f, 0.000050f, 0.360795f, 0.021745f, 0.385117f, 0.000325f, 0.335739f, 0.007264f, 0.192750f, 0.000008f, 0.071042f, 0.026733f, 0.363551f, 0.000087f, 0.255116f, +0.054027f, 0.753340f, 0.000364f, 0.485178f, 0.034487f, 0.436121f, 0.002849f, 0.548608f, 0.083222f, 1.576905f, 0.000502f, 0.838638f, 0.052381f, 0.508650f, 0.000940f, 0.515035f, +0.028738f, 0.591489f, 0.001085f, 0.329198f, 0.017853f, 0.333254f, 0.008253f, 0.362269f, 0.028914f, 0.808695f, 0.000976f, 0.371667f, 0.018137f, 0.259967f, 0.001821f, 0.227476f, +0.077642f, 1.089273f, 0.000956f, 0.657993f, 0.073669f, 0.937348f, 0.011114f, 1.105938f, 0.128674f, 2.453108f, 0.001418f, 1.223659f, 0.147306f, 1.439222f, 0.004827f, 1.366847f, +0.177998f, 2.586402f, 0.000142f, 0.853428f, 0.104275f, 1.374158f, 0.001019f, 0.885630f, 0.021424f, 0.423031f, 0.000015f, 0.115266f, 0.124082f, 1.255619f, 0.000263f, 0.651382f, +0.338179f, 3.508781f, 0.001492f, 1.670601f, 0.240730f, 2.265250f, 0.013006f, 2.106578f, 0.357316f, 5.037893f, 0.001410f, 1.980726f, 0.353915f, 2.557269f, 0.004153f, 1.914257f, +0.125846f, 1.927346f, 0.003106f, 0.793005f, 0.087184f, 1.210967f, 0.026356f, 0.973181f, 0.086850f, 1.807489f, 0.001917f, 0.614117f, 0.085731f, 0.914373f, 0.005629f, 0.591490f, +0.671583f, 7.010795f, 0.005410f, 3.130820f, 0.710608f, 6.727836f, 0.070104f, 5.868283f, 0.763432f, 10.829930f, 0.005500f, 3.993699f, 1.375355f, 9.998865f, 0.029471f, 7.020191f, +0.000383f, 0.007329f, 0.000000f, 0.005041f, 0.000280f, 0.004856f, 0.000000f, 0.006525f, 0.000046f, 0.001184f, 0.000000f, 0.000673f, 0.000332f, 0.004421f, 0.000000f, 0.004781f, +0.000438f, 0.005986f, 0.000000f, 0.005941f, 0.000389f, 0.004820f, 0.000004f, 0.009344f, 0.000457f, 0.008490f, 0.000000f, 0.006959f, 0.000570f, 0.005421f, 0.000001f, 0.008459f, +0.000011f, 0.000218f, 0.000000f, 0.000187f, 0.000009f, 0.000171f, 0.000000f, 0.000287f, 0.000007f, 0.000202f, 0.000000f, 0.000143f, 0.000009f, 0.000129f, 0.000000f, 0.000174f, +0.000789f, 0.010843f, 0.000001f, 0.010094f, 0.001041f, 0.012977f, 0.000018f, 0.023596f, 0.000886f, 0.016546f, 0.000001f, 0.012720f, 0.002007f, 0.019215f, 0.000008f, 0.028122f, +0.000313f, 0.007555f, 0.000000f, 0.004552f, 0.000241f, 0.005276f, 0.000003f, 0.006209f, 0.000041f, 0.001350f, 0.000000f, 0.000671f, 0.000195f, 0.003283f, 0.000000f, 0.003110f, +0.001254f, 0.021614f, 0.000007f, 0.018791f, 0.001173f, 0.018342f, 0.000075f, 0.031145f, 0.001447f, 0.033892f, 0.000007f, 0.024331f, 0.001175f, 0.014099f, 0.000016f, 0.019271f, +0.000268f, 0.006812f, 0.000008f, 0.005117f, 0.000244f, 0.005626f, 0.000088f, 0.008255f, 0.000202f, 0.006976f, 0.000005f, 0.004328f, 0.000163f, 0.002892f, 0.000013f, 0.003416f, +0.001163f, 0.020166f, 0.000011f, 0.016444f, 0.001617f, 0.025437f, 0.000190f, 0.040513f, 0.001444f, 0.034020f, 0.000012f, 0.022908f, 0.002131f, 0.025741f, 0.000054f, 0.033000f, +0.000248f, 0.005097f, 0.000001f, 0.003463f, 0.000171f, 0.003201f, 0.000004f, 0.004248f, 0.000044f, 0.001243f, 0.000000f, 0.000697f, 0.000225f, 0.003231f, 0.000001f, 0.003451f, +0.000350f, 0.005145f, 0.000004f, 0.005045f, 0.000295f, 0.003926f, 0.000042f, 0.007519f, 0.000552f, 0.011013f, 0.000006f, 0.008917f, 0.000478f, 0.004896f, 0.000015f, 0.007547f, +0.000218f, 0.004733f, 0.000014f, 0.004010f, 0.000179f, 0.003515f, 0.000141f, 0.005817f, 0.000224f, 0.006617f, 0.000013f, 0.004630f, 0.000194f, 0.002932f, 0.000033f, 0.003905f, +0.000614f, 0.009085f, 0.000013f, 0.008355f, 0.000768f, 0.010304f, 0.000198f, 0.018509f, 0.001041f, 0.020920f, 0.000020f, 0.015887f, 0.001643f, 0.016916f, 0.000092f, 0.024459f, +0.000856f, 0.013106f, 0.000001f, 0.006584f, 0.000661f, 0.009178f, 0.000011f, 0.009006f, 0.000105f, 0.002192f, 0.000000f, 0.000909f, 0.000841f, 0.008967f, 0.000003f, 0.007082f, +0.001761f, 0.019260f, 0.000013f, 0.013961f, 0.001653f, 0.016389f, 0.000153f, 0.023204f, 0.001903f, 0.028276f, 0.000013f, 0.016926f, 0.002598f, 0.019782f, 0.000052f, 0.022545f, +0.000768f, 0.012394f, 0.000032f, 0.007764f, 0.000701f, 0.010265f, 0.000363f, 0.012559f, 0.000542f, 0.011886f, 0.000020f, 0.006148f, 0.000737f, 0.008287f, 0.000083f, 0.008161f, +0.004272f, 0.046992f, 0.000059f, 0.031949f, 0.005958f, 0.059440f, 0.001006f, 0.078933f, 0.004965f, 0.074227f, 0.000061f, 0.041673f, 0.012328f, 0.094453f, 0.000452f, 0.100962f, +0.159710f, 1.750391f, 0.000012f, 0.884281f, 0.089769f, 0.892292f, 0.000086f, 0.880455f, 0.016417f, 0.244506f, 0.000001f, 0.102001f, 0.097520f, 0.744327f, 0.000020f, 0.591189f, +0.167794f, 1.313134f, 0.000072f, 0.957216f, 0.114602f, 0.813393f, 0.000604f, 1.158100f, 0.151412f, 1.610197f, 0.000058f, 0.969257f, 0.153814f, 0.838294f, 0.000176f, 0.960737f, +0.002599f, 0.030023f, 0.000006f, 0.018913f, 0.001728f, 0.018099f, 0.000051f, 0.022269f, 0.001532f, 0.024046f, 0.000003f, 0.012509f, 0.001551f, 0.012476f, 0.000010f, 0.012357f, +0.219604f, 1.729144f, 0.000172f, 1.182242f, 0.222947f, 1.592101f, 0.002145f, 2.126134f, 0.213201f, 2.281218f, 0.000150f, 1.287956f, 0.393933f, 2.160139f, 0.000823f, 2.322011f, +0.228236f, 3.155193f, 0.000121f, 1.396198f, 0.135201f, 1.695108f, 0.000877f, 1.465087f, 0.025937f, 0.487239f, 0.000012f, 0.178042f, 0.100384f, 0.966440f, 0.000141f, 0.672363f, +0.839909f, 8.290918f, 0.002461f, 5.293824f, 0.604568f, 5.412442f, 0.021698f, 6.750020f, 0.837871f, 11.239190f, 0.002196f, 5.925992f, 0.554589f, 3.812501f, 0.004323f, 3.827228f, +0.112398f, 1.637717f, 0.001843f, 0.903661f, 0.078738f, 1.040500f, 0.015811f, 1.121383f, 0.073236f, 1.450089f, 0.001074f, 0.660725f, 0.048310f, 0.490219f, 0.002107f, 0.425269f, +0.566194f, 5.623335f, 0.003030f, 3.367714f, 0.605796f, 5.456735f, 0.039699f, 6.382914f, 0.607681f, 8.201478f, 0.002908f, 4.055949f, 0.731590f, 5.060165f, 0.010413f, 4.764457f, +0.116397f, 1.371782f, 0.000136f, 0.684627f, 0.062001f, 0.662706f, 0.000886f, 0.646004f, 0.018055f, 0.289160f, 0.000019f, 0.119170f, 0.074681f, 0.612945f, 0.000232f, 0.480947f, +0.151144f, 1.271934f, 0.000975f, 0.915966f, 0.097829f, 0.746655f, 0.007729f, 1.050219f, 0.205813f, 2.353608f, 0.001187f, 1.399613f, 0.145585f, 0.853216f, 0.002498f, 0.966010f, +0.059039f, 0.733372f, 0.002131f, 0.456393f, 0.037190f, 0.418979f, 0.016441f, 0.509275f, 0.052510f, 0.886374f, 0.001695f, 0.455503f, 0.037018f, 0.320230f, 0.003554f, 0.313318f, +0.192824f, 1.632645f, 0.002271f, 1.102760f, 0.185518f, 1.424608f, 0.026764f, 1.879445f, 0.282493f, 3.250326f, 0.002976f, 1.812906f, 0.363454f, 2.143136f, 0.011389f, 2.275864f, +0.359895f, 3.156085f, 0.000275f, 1.164458f, 0.213786f, 1.700312f, 0.001997f, 1.225318f, 0.038293f, 0.456332f, 0.000026f, 0.139032f, 0.249251f, 1.522219f, 0.000506f, 0.882998f, +0.680286f, 4.259843f, 0.002870f, 2.267851f, 0.491036f, 2.788643f, 0.025370f, 2.899737f, 0.635406f, 5.406813f, 0.002397f, 2.376957f, 0.707311f, 3.084469f, 0.007938f, 2.581718f, +0.185904f, 1.718307f, 0.004388f, 0.790536f, 0.130594f, 1.094744f, 0.037752f, 0.983736f, 0.113415f, 1.424530f, 0.002394f, 0.541192f, 0.125820f, 0.809900f, 0.007900f, 0.585814f, +1.199293f, 7.555896f, 0.009238f, 3.772952f, 1.286754f, 7.352474f, 0.121392f, 7.170900f, 1.205178f, 10.318090f, 0.008303f, 4.254550f, 2.440099f, 10.706220f, 0.050000f, 8.405024f, +0.087034f, 1.219811f, 0.000040f, 0.898866f, 0.056679f, 0.720443f, 0.000320f, 1.036924f, 0.011719f, 0.223186f, 0.000005f, 0.135809f, 0.065625f, 0.640528f, 0.000080f, 0.742076f, +0.084179f, 0.842437f, 0.000214f, 0.895748f, 0.066612f, 0.604594f, 0.002077f, 1.255617f, 0.099496f, 1.353093f, 0.000227f, 1.188050f, 0.095289f, 0.664113f, 0.000645f, 1.110191f, +0.002637f, 0.038949f, 0.000038f, 0.035789f, 0.002031f, 0.027204f, 0.000354f, 0.048824f, 0.002036f, 0.040862f, 0.000026f, 0.031004f, 0.001943f, 0.019987f, 0.000074f, 0.028874f, +0.146069f, 1.470788f, 0.000679f, 1.466806f, 0.171813f, 1.569007f, 0.009781f, 3.056274f, 0.185749f, 2.541591f, 0.000772f, 2.093087f, 0.323563f, 2.268913f, 0.004001f, 3.557524f, +0.061318f, 1.083997f, 0.000192f, 0.699675f, 0.042084f, 0.674737f, 0.001616f, 0.850644f, 0.009127f, 0.219262f, 0.000026f, 0.116867f, 0.033303f, 0.410010f, 0.000278f, 0.416074f, +0.207733f, 2.622260f, 0.003602f, 2.442250f, 0.173242f, 1.983360f, 0.036796f, 3.607952f, 0.271437f, 4.656163f, 0.004210f, 3.580977f, 0.169380f, 1.489019f, 0.007814f, 2.180329f, +0.056214f, 1.047435f, 0.005454f, 0.843027f, 0.045625f, 0.771020f, 0.054220f, 1.212062f, 0.047977f, 1.214795f, 0.004164f, 0.807377f, 0.029836f, 0.387164f, 0.007701f, 0.489911f, +0.185664f, 2.358075f, 0.005879f, 2.059901f, 0.230157f, 2.651134f, 0.089260f, 4.523403f, 0.261011f, 4.504802f, 0.007392f, 3.249552f, 0.296243f, 2.620267f, 0.024954f, 3.598667f, +0.004153f, 0.062583f, 0.000029f, 0.045559f, 0.002563f, 0.035029f, 0.000217f, 0.049807f, 0.000844f, 0.017280f, 0.000005f, 0.010387f, 0.003290f, 0.034531f, 0.000060f, 0.039522f, +0.004964f, 0.053421f, 0.000190f, 0.056114f, 0.003723f, 0.036333f, 0.001741f, 0.074543f, 0.008854f, 0.129479f, 0.000302f, 0.112311f, 0.005904f, 0.044251f, 0.000600f, 0.073079f, +0.003921f, 0.062285f, 0.000838f, 0.056539f, 0.002862f, 0.041228f, 0.007487f, 0.073096f, 0.004568f, 0.098605f, 0.000873f, 0.073913f, 0.003036f, 0.033585f, 0.001725f, 0.047930f, +0.008396f, 0.090913f, 0.000585f, 0.089570f, 0.009360f, 0.091911f, 0.007991f, 0.176867f, 0.016112f, 0.237073f, 0.001005f, 0.192876f, 0.019543f, 0.147368f, 0.003624f, 0.228269f, +0.173902f, 1.950198f, 0.000785f, 1.049546f, 0.119686f, 1.217290f, 0.006617f, 1.279562f, 0.024236f, 0.369343f, 0.000098f, 0.164139f, 0.148725f, 1.161514f, 0.001786f, 0.982776f, +0.302616f, 2.423231f, 0.007555f, 1.881756f, 0.253075f, 1.837931f, 0.077380f, 2.787677f, 0.370231f, 4.028677f, 0.008267f, 2.583389f, 0.388533f, 2.166700f, 0.025803f, 2.645298f, +0.167226f, 1.976593f, 0.023359f, 1.326434f, 0.136105f, 1.459031f, 0.232845f, 1.912395f, 0.133631f, 2.146388f, 0.016694f, 1.189421f, 0.139760f, 1.150442f, 0.051933f, 1.213782f, +0.707321f, 5.698726f, 0.032243f, 4.150692f, 0.879270f, 6.424811f, 0.490896f, 9.140038f, 0.931027f, 10.193210f, 0.037958f, 6.130739f, 1.777116f, 9.971146f, 0.215502f, 11.418140f, +0.255893f, 4.646945f, 0.000021f, 2.099511f, 0.122683f, 2.020562f, 0.000122f, 1.783066f, 0.025590f, 0.631487f, 0.000002f, 0.235599f, 0.127157f, 1.608119f, 0.000028f, 1.142289f, +0.260618f, 3.379430f, 0.000117f, 2.203128f, 0.151828f, 1.785531f, 0.000836f, 2.273571f, 0.228786f, 4.031407f, 0.000092f, 2.170261f, 0.194422f, 1.755708f, 0.000233f, 1.799517f, +0.007016f, 0.134293f, 0.000018f, 0.075657f, 0.003978f, 0.069054f, 0.000123f, 0.075985f, 0.004023f, 0.104638f, 0.000009f, 0.048679f, 0.003407f, 0.045416f, 0.000023f, 0.040226f, +0.375351f, 4.897059f, 0.000308f, 2.994374f, 0.325037f, 3.845984f, 0.003270f, 4.593281f, 0.354510f, 6.285129f, 0.000260f, 3.173539f, 0.547951f, 4.978604f, 0.001197f, 4.786143f, +0.317716f, 7.277588f, 0.000176f, 2.880077f, 0.160533f, 3.334966f, 0.001089f, 2.577822f, 0.035125f, 1.093320f, 0.000017f, 0.357292f, 0.113721f, 1.814088f, 0.000168f, 1.128710f, +1.133414f, 18.538130f, 0.003473f, 10.585920f, 0.695880f, 10.322600f, 0.026117f, 11.513210f, 1.099959f, 24.447910f, 0.003015f, 11.528250f, 0.609045f, 6.937374f, 0.004965f, 6.228234f, +0.263618f, 6.364472f, 0.004520f, 3.140688f, 0.157519f, 3.449041f, 0.033077f, 3.324341f, 0.167103f, 5.482283f, 0.002562f, 2.233999f, 0.092210f, 1.550369f, 0.004206f, 1.202832f, +0.840797f, 13.836530f, 0.004705f, 7.410783f, 0.767335f, 11.452460f, 0.052585f, 11.980650f, 0.877900f, 19.632190f, 0.004393f, 8.682894f, 0.884129f, 10.132570f, 0.013160f, 8.532258f, +0.152464f, 2.977270f, 0.000186f, 1.328871f, 0.069272f, 1.226837f, 0.001035f, 1.069538f, 0.023008f, 0.610541f, 0.000025f, 0.225029f, 0.079608f, 1.082620f, 0.000258f, 0.759711f, +0.191920f, 2.676085f, 0.001295f, 1.723496f, 0.105957f, 1.339948f, 0.008754f, 1.685556f, 0.254240f, 4.817398f, 0.001534f, 2.562017f, 0.150442f, 1.460885f, 0.002700f, 1.479224f, +0.130295f, 2.681759f, 0.004918f, 1.492554f, 0.070009f, 1.306834f, 0.032363f, 1.420613f, 0.112740f, 3.153231f, 0.003806f, 1.449190f, 0.066485f, 0.952971f, 0.006676f, 0.833868f, +0.269438f, 3.780045f, 0.003319f, 2.283398f, 0.221114f, 2.813411f, 0.033358f, 3.319424f, 0.384016f, 7.321081f, 0.004231f, 3.651901f, 0.413304f, 4.038094f, 0.013543f, 3.835029f, +0.546416f, 7.939687f, 0.000436f, 2.619836f, 0.276859f, 3.648513f, 0.002705f, 2.351427f, 0.056560f, 1.116807f, 0.000040f, 0.304304f, 0.307968f, 3.116403f, 0.000653f, 1.616709f, +1.001246f, 10.388430f, 0.004417f, 4.946142f, 0.616447f, 5.800721f, 0.033306f, 5.394401f, 0.909797f, 12.827470f, 0.003590f, 5.043320f, 0.847191f, 6.121511f, 0.009942f, 4.582289f, +0.475552f, 7.283118f, 0.011738f, 2.996635f, 0.284948f, 3.957874f, 0.086139f, 3.180704f, 0.282244f, 5.873969f, 0.006231f, 1.995754f, 0.261928f, 2.793634f, 0.017198f, 1.807147f, +1.942427f, 20.277410f, 0.015646f, 9.055308f, 1.777655f, 16.830320f, 0.175371f, 14.680070f, 1.898951f, 26.938220f, 0.013680f, 9.933878f, 3.216240f, 23.382140f, 0.068917f, 16.416580f, +0.056883f, 1.088580f, 0.000008f, 0.748780f, 0.035947f, 0.623901f, 0.000061f, 0.838213f, 0.005817f, 0.151265f, 0.000001f, 0.085920f, 0.039836f, 0.530911f, 0.000015f, 0.574148f, +0.062753f, 0.857523f, 0.000048f, 0.851110f, 0.048187f, 0.597201f, 0.000454f, 1.157724f, 0.056330f, 1.046022f, 0.000039f, 0.857313f, 0.065976f, 0.627865f, 0.000135f, 0.979746f, +0.001979f, 0.039924f, 0.000009f, 0.034243f, 0.001479f, 0.027059f, 0.000078f, 0.045332f, 0.001160f, 0.031809f, 0.000004f, 0.022529f, 0.001355f, 0.019028f, 0.000016f, 0.025659f, +0.110365f, 1.517399f, 0.000155f, 1.412583f, 0.125973f, 1.570806f, 0.002169f, 2.856154f, 0.106587f, 1.991408f, 0.000134f, 1.530853f, 0.227063f, 2.174119f, 0.000849f, 3.182036f, +0.035079f, 0.846770f, 0.000033f, 0.510183f, 0.023363f, 0.511470f, 0.000271f, 0.601901f, 0.003966f, 0.130079f, 0.000003f, 0.064718f, 0.017695f, 0.297473f, 0.000045f, 0.281783f, +0.135552f, 2.336435f, 0.000711f, 2.031233f, 0.109699f, 1.714855f, 0.007046f, 2.911913f, 0.134517f, 3.150729f, 0.000631f, 2.261914f, 0.102654f, 1.232236f, 0.001432f, 1.684254f, +0.036937f, 0.939777f, 0.001084f, 0.706043f, 0.029092f, 0.671292f, 0.010455f, 0.985059f, 0.023942f, 0.827762f, 0.000628f, 0.513536f, 0.018209f, 0.322633f, 0.001421f, 0.381085f, +0.122792f, 2.129497f, 0.001176f, 1.736431f, 0.147712f, 2.323267f, 0.017324f, 3.700193f, 0.131101f, 3.089584f, 0.001123f, 2.080365f, 0.181973f, 2.197762f, 0.004636f, 2.817530f, +0.040353f, 0.830410f, 0.000084f, 0.564288f, 0.024166f, 0.451037f, 0.000618f, 0.598639f, 0.006227f, 0.174129f, 0.000012f, 0.097710f, 0.029694f, 0.425561f, 0.000165f, 0.454650f, +0.055022f, 0.808507f, 0.000635f, 0.792753f, 0.040040f, 0.533608f, 0.005662f, 1.021930f, 0.074532f, 1.488257f, 0.000770f, 1.205011f, 0.060784f, 0.622030f, 0.001867f, 0.958898f, +0.043764f, 0.949245f, 0.002827f, 0.804327f, 0.030995f, 0.609719f, 0.024522f, 1.009087f, 0.038721f, 1.141290f, 0.002237f, 0.798563f, 0.031472f, 0.475389f, 0.005408f, 0.633302f, +0.094326f, 1.394578f, 0.001988f, 1.282541f, 0.102034f, 1.368137f, 0.026344f, 2.457553f, 0.137469f, 2.761867f, 0.002592f, 2.097443f, 0.203918f, 2.099586f, 0.011436f, 3.035774f, +0.116229f, 1.779769f, 0.000159f, 0.894084f, 0.077625f, 1.078020f, 0.001298f, 1.057757f, 0.012302f, 0.255988f, 0.000015f, 0.106192f, 0.092322f, 0.984521f, 0.000335f, 0.777583f, +0.230696f, 2.522436f, 0.001742f, 1.828438f, 0.187217f, 1.856532f, 0.017311f, 2.628497f, 0.214351f, 3.184877f, 0.001447f, 1.906390f, 0.275101f, 2.094787f, 0.005525f, 2.387302f, +0.128373f, 2.071868f, 0.005423f, 1.297843f, 0.101389f, 1.484080f, 0.052455f, 1.815776f, 0.077908f, 1.708669f, 0.002943f, 0.883847f, 0.099648f, 1.120019f, 0.011198f, 1.103045f, +0.546520f, 6.012353f, 0.007534f, 4.087699f, 0.659265f, 6.577716f, 0.111309f, 8.734830f, 0.546332f, 8.167377f, 0.006736f, 4.585388f, 1.275326f, 9.770743f, 0.046769f, 10.444060f, +0.465479f, 5.101561f, 0.000036f, 2.577259f, 0.226291f, 2.249298f, 0.000216f, 2.219458f, 0.041150f, 0.612852f, 0.000003f, 0.255664f, 0.229800f, 1.753963f, 0.000048f, 1.393102f, +0.471662f, 3.691165f, 0.000203f, 2.690694f, 0.278623f, 1.977547f, 0.001468f, 2.815609f, 0.366026f, 3.892528f, 0.000141f, 2.343104f, 0.349574f, 1.905194f, 0.000400f, 2.183471f, +0.009325f, 0.107715f, 0.000022f, 0.067854f, 0.005361f, 0.056163f, 0.000158f, 0.069103f, 0.004726f, 0.074194f, 0.000010f, 0.038595f, 0.004499f, 0.036191f, 0.000029f, 0.035843f, +0.603040f, 4.748285f, 0.000474f, 3.246474f, 0.529517f, 3.781362f, 0.005094f, 5.049732f, 0.503492f, 5.387295f, 0.000354f, 3.041620f, 0.874615f, 4.795963f, 0.001827f, 5.155353f, +0.501940f, 6.938935f, 0.000266f, 3.070534f, 0.257168f, 3.224305f, 0.001669f, 2.786776f, 0.049055f, 0.921526f, 0.000023f, 0.336735f, 0.178493f, 1.718424f, 0.000252f, 1.195526f, +1.781498f, 17.585550f, 0.005220f, 11.228530f, 1.109100f, 9.929297f, 0.039805f, 12.383120f, 1.528370f, 20.501550f, 0.004006f, 10.809680f, 0.951071f, 6.538103f, 0.007414f, 6.563358f, +0.304281f, 4.433594f, 0.004989f, 2.446374f, 0.184363f, 2.436303f, 0.037022f, 2.625688f, 0.170507f, 3.376060f, 0.002500f, 1.538282f, 0.105742f, 1.072989f, 0.004612f, 0.930829f, +1.173193f, 11.651950f, 0.006277f, 6.978144f, 1.085683f, 9.779338f, 0.071148f, 11.439200f, 1.082875f, 14.614870f, 0.005182f, 7.227621f, 1.225633f, 8.477305f, 0.017446f, 7.981905f, +0.372120f, 4.385581f, 0.000434f, 2.188750f, 0.171441f, 1.832463f, 0.002449f, 1.786278f, 0.049642f, 0.795022f, 0.000052f, 0.327648f, 0.193037f, 1.584352f, 0.000599f, 1.243164f, +0.466037f, 3.921870f, 0.003006f, 2.824282f, 0.260898f, 1.991226f, 0.020613f, 2.800788f, 0.545758f, 6.241099f, 0.003149f, 3.711376f, 0.362940f, 2.127043f, 0.006228f, 2.408233f, +0.232345f, 2.886136f, 0.008386f, 1.796105f, 0.126589f, 1.426123f, 0.055960f, 1.733471f, 0.177720f, 2.999908f, 0.005737f, 1.541636f, 0.117785f, 1.018928f, 0.011310f, 0.996932f, +0.580818f, 4.917810f, 0.006841f, 3.321704f, 0.483324f, 3.711482f, 0.069726f, 4.896451f, 0.731789f, 8.419866f, 0.007709f, 4.696274f, 0.885153f, 5.219370f, 0.027736f, 5.542615f, +0.958966f, 8.409609f, 0.000731f, 3.102780f, 0.492694f, 3.918571f, 0.004603f, 2.823890f, 0.087750f, 1.045697f, 0.000060f, 0.318596f, 0.536972f, 3.279388f, 0.001090f, 1.902284f, +1.748256f, 10.947300f, 0.007375f, 5.828113f, 1.091437f, 6.198381f, 0.056390f, 6.445314f, 1.404312f, 11.949590f, 0.005298f, 5.253310f, 1.469644f, 6.408884f, 0.016492f, 5.364272f, +0.609769f, 5.636092f, 0.014392f, 2.592979f, 0.370486f, 3.105718f, 0.107100f, 2.790794f, 0.319924f, 4.018347f, 0.006754f, 1.526607f, 0.333669f, 2.147816f, 0.020951f, 1.553551f, +3.010858f, 18.969280f, 0.023192f, 9.472097f, 2.794038f, 15.965050f, 0.263588f, 15.570780f, 2.602042f, 22.277300f, 0.017926f, 9.185796f, 4.952913f, 21.731490f, 0.101490f, 17.060520f, +0.019918f, 0.279155f, 0.000009f, 0.205706f, 0.017671f, 0.224620f, 0.000100f, 0.323293f, 0.003062f, 0.058311f, 0.000001f, 0.035483f, 0.022498f, 0.219594f, 0.000028f, 0.254408f, +0.038096f, 0.381252f, 0.000097f, 0.405378f, 0.041070f, 0.372765f, 0.001280f, 0.774155f, 0.051406f, 0.699096f, 0.000117f, 0.613824f, 0.064602f, 0.450242f, 0.000437f, 0.752665f, +0.000530f, 0.007832f, 0.000008f, 0.007197f, 0.000556f, 0.007453f, 0.000097f, 0.013376f, 0.000467f, 0.009381f, 0.000006f, 0.007118f, 0.000585f, 0.006021f, 0.000022f, 0.008698f, +0.041317f, 0.416025f, 0.000192f, 0.414899f, 0.066210f, 0.604632f, 0.003769f, 1.177765f, 0.059983f, 0.820749f, 0.000249f, 0.675914f, 0.137106f, 0.961429f, 0.001695f, 1.507465f, +0.016543f, 0.292460f, 0.000052f, 0.188771f, 0.015469f, 0.248010f, 0.000594f, 0.312667f, 0.002811f, 0.067536f, 0.000008f, 0.035997f, 0.013460f, 0.165715f, 0.000112f, 0.168166f, +0.110832f, 1.399058f, 0.001922f, 1.303017f, 0.125924f, 1.441644f, 0.026746f, 2.622511f, 0.165335f, 2.836110f, 0.002564f, 2.181205f, 0.135379f, 1.190118f, 0.006245f, 1.742657f, +0.013327f, 0.248314f, 0.001293f, 0.199855f, 0.014736f, 0.249021f, 0.017512f, 0.391467f, 0.012985f, 0.328784f, 0.001127f, 0.218517f, 0.010596f, 0.137499f, 0.002735f, 0.173988f, +0.061913f, 0.786345f, 0.001960f, 0.686913f, 0.104563f, 1.204436f, 0.040551f, 2.055025f, 0.099369f, 1.715007f, 0.002814f, 1.237125f, 0.147990f, 1.308973f, 0.012466f, 1.797740f, +0.001429f, 0.021535f, 0.000010f, 0.015677f, 0.001201f, 0.016422f, 0.000102f, 0.023350f, 0.000331f, 0.006788f, 0.000002f, 0.004081f, 0.001696f, 0.017801f, 0.000031f, 0.020373f, +0.003378f, 0.036352f, 0.000129f, 0.038185f, 0.003451f, 0.033683f, 0.001614f, 0.069107f, 0.006878f, 0.100589f, 0.000235f, 0.087251f, 0.006019f, 0.045109f, 0.000611f, 0.074497f, +0.001186f, 0.018833f, 0.000253f, 0.017095f, 0.001179f, 0.016983f, 0.003084f, 0.030111f, 0.001577f, 0.034038f, 0.000301f, 0.025514f, 0.001375f, 0.015212f, 0.000781f, 0.021710f, +0.003571f, 0.038667f, 0.000249f, 0.038096f, 0.005423f, 0.053257f, 0.004630f, 0.102484f, 0.007824f, 0.115114f, 0.000488f, 0.093653f, 0.012452f, 0.093895f, 0.002309f, 0.145441f, +0.042879f, 0.480855f, 0.000194f, 0.258784f, 0.040205f, 0.408908f, 0.002223f, 0.429827f, 0.006822f, 0.103968f, 0.000028f, 0.046204f, 0.054935f, 0.429032f, 0.000660f, 0.363011f, +0.147554f, 1.181550f, 0.003684f, 0.917530f, 0.168114f, 1.220908f, 0.051402f, 1.851809f, 0.206094f, 2.242613f, 0.004602f, 1.438076f, 0.283802f, 1.582653f, 0.018848f, 1.932242f, +0.036231f, 0.428241f, 0.005061f, 0.287380f, 0.040174f, 0.430657f, 0.068728f, 0.564475f, 0.033053f, 0.530901f, 0.004129f, 0.294199f, 0.045361f, 0.373392f, 0.016856f, 0.393950f, +0.215561f, 1.736722f, 0.009826f, 1.264949f, 0.365066f, 2.667531f, 0.203816f, 3.794871f, 0.323929f, 3.546486f, 0.013207f, 2.133045f, 0.811331f, 4.552264f, 0.098386f, 5.212878f, +0.069309f, 1.258625f, 0.000006f, 0.568653f, 0.045270f, 0.745586f, 0.000045f, 0.657950f, 0.007913f, 0.195267f, 0.000001f, 0.072851f, 0.051594f, 0.652494f, 0.000011f, 0.463484f, +0.139590f, 1.810065f, 0.000063f, 1.180023f, 0.110789f, 1.302910f, 0.000610f, 1.659035f, 0.139899f, 2.465142f, 0.000056f, 1.327081f, 0.156000f, 1.408746f, 0.000187f, 1.443898f, +0.001670f, 0.031961f, 0.000004f, 0.018006f, 0.001290f, 0.022390f, 0.000040f, 0.024637f, 0.001093f, 0.028431f, 0.000002f, 0.013226f, 0.001215f, 0.016192f, 0.000008f, 0.014342f, +0.125656f, 1.639386f, 0.000103f, 1.002425f, 0.148243f, 1.754082f, 0.001491f, 2.094910f, 0.135491f, 2.402121f, 0.000100f, 1.212899f, 0.274801f, 2.496798f, 0.000600f, 2.400278f, +0.101450f, 2.323818f, 0.000056f, 0.919642f, 0.069836f, 1.450783f, 0.000474f, 1.121409f, 0.012804f, 0.398562f, 0.000006f, 0.130248f, 0.054398f, 0.867766f, 0.000080f, 0.539917f, +0.715690f, 11.705830f, 0.002193f, 6.684436f, 0.598642f, 8.880186f, 0.022467f, 9.904427f, 0.792953f, 17.624340f, 0.002173f, 8.310636f, 0.576123f, 6.562377f, 0.004696f, 5.891570f, +0.073965f, 1.785715f, 0.001268f, 0.881200f, 0.060211f, 1.318392f, 0.012644f, 1.270726f, 0.053527f, 1.756087f, 0.000821f, 0.715595f, 0.038758f, 0.651650f, 0.001768f, 0.505574f, +0.331836f, 5.460834f, 0.001857f, 2.924797f, 0.412585f, 6.157819f, 0.028274f, 6.441820f, 0.395559f, 8.845758f, 0.001980f, 3.912289f, 0.522729f, 5.990750f, 0.007781f, 5.044585f, +0.062092f, 1.212519f, 0.000076f, 0.541194f, 0.038435f, 0.680697f, 0.000574f, 0.593421f, 0.010697f, 0.283870f, 0.000012f, 0.104627f, 0.048569f, 0.660505f, 0.000158f, 0.463498f, +0.154565f, 2.155221f, 0.001043f, 1.388040f, 0.116257f, 1.470199f, 0.009605f, 1.849403f, 0.233761f, 4.429342f, 0.001410f, 2.355638f, 0.181505f, 1.762535f, 0.003257f, 1.784660f, +0.046627f, 0.959677f, 0.001760f, 0.534116f, 0.034131f, 0.637122f, 0.015778f, 0.692592f, 0.046059f, 1.288237f, 0.001555f, 0.592059f, 0.035641f, 0.510875f, 0.003579f, 0.447026f, +0.135627f, 1.902761f, 0.001671f, 1.149394f, 0.151635f, 1.929376f, 0.022876f, 2.276389f, 0.220684f, 4.207239f, 0.002431f, 2.098655f, 0.311664f, 3.045043f, 0.010212f, 2.891916f, +0.159454f, 2.316941f, 0.000127f, 0.764515f, 0.110070f, 1.450521f, 0.001075f, 0.934845f, 0.018843f, 0.372070f, 0.000013f, 0.101380f, 0.134632f, 1.362370f, 0.000286f, 0.706762f, +0.577796f, 5.994922f, 0.002549f, 2.854302f, 0.484647f, 4.560494f, 0.026185f, 4.241048f, 0.599394f, 8.451022f, 0.002365f, 3.322651f, 0.732394f, 5.292025f, 0.008595f, 3.961373f, +0.121940f, 1.867516f, 0.003010f, 0.768388f, 0.099542f, 1.382628f, 0.030092f, 1.111135f, 0.082624f, 1.719544f, 0.001824f, 0.584237f, 0.100614f, 1.073115f, 0.006606f, 0.694177f, +0.700605f, 7.313769f, 0.005643f, 3.266119f, 0.873520f, 8.270234f, 0.086176f, 7.213623f, 0.781947f, 11.092580f, 0.005633f, 4.090556f, 1.737829f, 12.634060f, 0.037238f, 8.870358f, +0.016431f, 0.314450f, 0.000002f, 0.216294f, 0.014146f, 0.245529f, 0.000024f, 0.329869f, 0.001918f, 0.049884f, 0.000000f, 0.028335f, 0.017238f, 0.229743f, 0.000006f, 0.248453f, +0.035847f, 0.489845f, 0.000028f, 0.486181f, 0.037501f, 0.464761f, 0.000354f, 0.900978f, 0.036736f, 0.682163f, 0.000025f, 0.559097f, 0.056459f, 0.537289f, 0.000116f, 0.838408f, +0.000502f, 0.010133f, 0.000002f, 0.008692f, 0.000511f, 0.009357f, 0.000027f, 0.015676f, 0.000336f, 0.009217f, 0.000001f, 0.006528f, 0.000515f, 0.007235f, 0.000006f, 0.009757f, +0.039404f, 0.541761f, 0.000055f, 0.504338f, 0.061275f, 0.764059f, 0.001055f, 1.389268f, 0.043446f, 0.811713f, 0.000055f, 0.623988f, 0.121446f, 1.162842f, 0.000454f, 1.701934f, +0.011946f, 0.288365f, 0.000011f, 0.173741f, 0.010839f, 0.237297f, 0.000126f, 0.279253f, 0.001542f, 0.050573f, 0.000001f, 0.025162f, 0.009027f, 0.151759f, 0.000023f, 0.143755f, +0.091286f, 1.573446f, 0.000479f, 1.367911f, 0.100646f, 1.573338f, 0.006465f, 2.671610f, 0.103421f, 2.422389f, 0.000485f, 1.739038f, 0.103563f, 1.243145f, 0.001445f, 1.699165f, +0.011053f, 0.281214f, 0.000324f, 0.211272f, 0.011860f, 0.273665f, 0.004262f, 0.401579f, 0.008179f, 0.282782f, 0.000215f, 0.175435f, 0.008163f, 0.144627f, 0.000637f, 0.170830f, +0.051685f, 0.896335f, 0.000495f, 0.730888f, 0.084704f, 1.332260f, 0.009934f, 2.121849f, 0.062999f, 1.484664f, 0.000540f, 0.999695f, 0.114744f, 1.385811f, 0.002923f, 1.776609f, +0.017527f, 0.360682f, 0.000037f, 0.245094f, 0.014300f, 0.266895f, 0.000366f, 0.354237f, 0.003088f, 0.086345f, 0.000006f, 0.048451f, 0.019321f, 0.276900f, 0.000107f, 0.295828f, +0.047259f, 0.694444f, 0.000546f, 0.680913f, 0.046854f, 0.624414f, 0.006625f, 1.195835f, 0.073085f, 1.459374f, 0.000755f, 1.181625f, 0.078212f, 0.800377f, 0.002402f, 1.233832f, +0.016703f, 0.362281f, 0.001079f, 0.306973f, 0.016116f, 0.317025f, 0.012750f, 0.524677f, 0.016871f, 0.497277f, 0.000975f, 0.347945f, 0.017994f, 0.271798f, 0.003092f, 0.362083f, +0.050639f, 0.748672f, 0.001067f, 0.688526f, 0.074626f, 1.000634f, 0.019267f, 1.797415f, 0.084254f, 1.692726f, 0.001589f, 1.285506f, 0.163997f, 1.688544f, 0.009197f, 2.441453f, +0.036173f, 0.553907f, 0.000049f, 0.278261f, 0.032913f, 0.457085f, 0.000550f, 0.448493f, 0.004371f, 0.090955f, 0.000005f, 0.037731f, 0.043044f, 0.459017f, 0.000156f, 0.362535f, +0.141983f, 1.552442f, 0.001072f, 1.125319f, 0.156977f, 1.556662f, 0.014515f, 2.203937f, 0.150611f, 2.237807f, 0.001017f, 1.339497f, 0.253640f, 1.931367f, 0.005094f, 2.201063f, +0.035106f, 0.566593f, 0.001483f, 0.354920f, 0.037774f, 0.552920f, 0.019543f, 0.676500f, 0.024323f, 0.533460f, 0.000919f, 0.275944f, 0.040823f, 0.458843f, 0.004587f, 0.451889f, +0.210231f, 2.312784f, 0.002898f, 1.572424f, 0.345499f, 3.447166f, 0.058333f, 4.577639f, 0.239929f, 3.586807f, 0.002958f, 2.013731f, 0.734923f, 5.630513f, 0.026951f, 6.018523f, +0.178847f, 1.960126f, 0.000014f, 0.990237f, 0.118453f, 1.177401f, 0.000113f, 1.161781f, 0.018050f, 0.268826f, 0.000001f, 0.112146f, 0.132270f, 1.009558f, 0.000027f, 0.801851f, +0.358372f, 2.804569f, 0.000154f, 2.044405f, 0.288414f, 2.047038f, 0.001519f, 2.914550f, 0.317504f, 3.376520f, 0.000122f, 2.032493f, 0.397898f, 2.168561f, 0.000455f, 2.485306f, +0.003148f, 0.036366f, 0.000008f, 0.022908f, 0.002466f, 0.025832f, 0.000073f, 0.031784f, 0.001822f, 0.028597f, 0.000004f, 0.014876f, 0.002275f, 0.018304f, 0.000015f, 0.018128f, +0.286381f, 2.254940f, 0.000225f, 1.541736f, 0.342590f, 2.446487f, 0.003295f, 3.267104f, 0.272977f, 2.920812f, 0.000192f, 1.649065f, 0.622221f, 3.411958f, 0.001300f, 3.667637f, +0.227362f, 3.143109f, 0.000120f, 1.390851f, 0.158701f, 1.989753f, 0.001030f, 1.719750f, 0.025368f, 0.476551f, 0.000012f, 0.174136f, 0.121120f, 1.166076f, 0.000171f, 0.811251f, +1.595783f, 15.752310f, 0.004676f, 10.057990f, 1.353491f, 12.117230f, 0.048576f, 15.111760f, 1.562974f, 20.965730f, 0.004096f, 11.054420f, 1.276237f, 8.773441f, 0.009949f, 8.807330f, +0.121109f, 1.764647f, 0.001986f, 0.973699f, 0.099970f, 1.321082f, 0.020075f, 1.423775f, 0.077478f, 1.534076f, 0.001136f, 0.698993f, 0.063049f, 0.639775f, 0.002750f, 0.555011f, +0.656831f, 6.523526f, 0.003514f, 3.906822f, 0.828101f, 7.459156f, 0.054268f, 8.725209f, 0.692145f, 9.341435f, 0.003312f, 4.619701f, 1.027955f, 7.110029f, 0.014632f, 6.694530f, +0.214984f, 2.533666f, 0.000251f, 1.264499f, 0.134937f, 1.442294f, 0.001928f, 1.405943f, 0.032742f, 0.524368f, 0.000034f, 0.216105f, 0.167067f, 1.371209f, 0.000518f, 1.075921f, +0.532432f, 4.480611f, 0.003435f, 3.226652f, 0.406079f, 3.099282f, 0.032084f, 4.359340f, 0.711835f, 8.140288f, 0.004107f, 4.840761f, 0.621167f, 3.640407f, 0.010660f, 4.121661f, +0.117948f, 1.465124f, 0.004257f, 0.911778f, 0.087549f, 0.986304f, 0.038702f, 1.198866f, 0.102998f, 1.738602f, 0.003325f, 0.893458f, 0.089573f, 0.774873f, 0.008601f, 0.758146f, +0.414743f, 3.511649f, 0.004885f, 2.371922f, 0.470191f, 3.610631f, 0.067832f, 4.763402f, 0.596568f, 6.864031f, 0.006285f, 3.828490f, 0.946863f, 5.583251f, 0.029670f, 5.929033f, +0.396978f, 3.481285f, 0.000303f, 1.284443f, 0.277868f, 2.209978f, 0.002596f, 1.592605f, 0.041471f, 0.494202f, 0.000028f, 0.150570f, 0.333001f, 2.033698f, 0.000676f, 1.179693f, +1.431168f, 8.961746f, 0.006037f, 4.771045f, 1.217253f, 6.912901f, 0.062891f, 7.188299f, 1.312453f, 11.167950f, 0.004952f, 4.909682f, 1.802302f, 7.859551f, 0.020226f, 6.578488f, +0.221802f, 2.050110f, 0.005235f, 0.943187f, 0.183598f, 1.539067f, 0.053075f, 1.383004f, 0.132856f, 1.668711f, 0.002805f, 0.633959f, 0.181821f, 1.170377f, 0.011416f, 0.846553f, +1.540534f, 9.705809f, 0.011866f, 4.846487f, 1.947645f, 11.128780f, 0.183740f, 10.853950f, 1.519952f, 13.013020f, 0.010471f, 5.365775f, 3.796395f, 16.657130f, 0.077792f, 13.076840f, +0.086537f, 1.212838f, 0.000040f, 0.893728f, 0.058461f, 0.743095f, 0.000330f, 1.069527f, 0.010994f, 0.209378f, 0.000005f, 0.127407f, 0.070265f, 0.685824f, 0.000086f, 0.794553f, +0.085044f, 0.851095f, 0.000216f, 0.904954f, 0.069812f, 0.633635f, 0.002176f, 1.315928f, 0.094842f, 1.289801f, 0.000216f, 1.132478f, 0.103668f, 0.722514f, 0.000702f, 1.207820f, +0.001896f, 0.028015f, 0.000027f, 0.025742f, 0.001515f, 0.020299f, 0.000264f, 0.036430f, 0.001381f, 0.027731f, 0.000018f, 0.021041f, 0.001505f, 0.015481f, 0.000057f, 0.022365f, +0.145252f, 1.462557f, 0.000675f, 1.458597f, 0.177237f, 1.618534f, 0.010089f, 3.152748f, 0.174279f, 2.384640f, 0.000724f, 1.963833f, 0.346486f, 2.429654f, 0.004284f, 3.809556f, +0.071797f, 1.269254f, 0.000225f, 0.819252f, 0.051118f, 0.819577f, 0.001963f, 1.033244f, 0.010083f, 0.242237f, 0.000028f, 0.129112f, 0.041992f, 0.516986f, 0.000350f, 0.524632f, +0.247147f, 3.119800f, 0.004286f, 2.905635f, 0.213815f, 2.447862f, 0.045413f, 4.452933f, 0.304702f, 5.226774f, 0.004726f, 4.019825f, 0.217008f, 1.907722f, 0.010011f, 2.793425f, +0.047616f, 0.887217f, 0.004620f, 0.714076f, 0.040091f, 0.677490f, 0.047643f, 1.065031f, 0.038343f, 0.970869f, 0.003328f, 0.645259f, 0.027215f, 0.353152f, 0.007025f, 0.446873f, +0.217421f, 2.761408f, 0.006885f, 2.412234f, 0.279597f, 3.220617f, 0.108433f, 5.495063f, 0.288394f, 4.977408f, 0.008167f, 3.590468f, 0.373581f, 3.304323f, 0.031469f, 4.538148f, +0.004307f, 0.064904f, 0.000030f, 0.047248f, 0.002757f, 0.037686f, 0.000233f, 0.053584f, 0.000826f, 0.016908f, 0.000005f, 0.010164f, 0.003674f, 0.038564f, 0.000067f, 0.044138f, +0.005231f, 0.056292f, 0.000200f, 0.059131f, 0.004069f, 0.039717f, 0.001903f, 0.081486f, 0.008803f, 0.128734f, 0.000301f, 0.111665f, 0.006700f, 0.050214f, 0.000680f, 0.082927f, +0.002942f, 0.046728f, 0.000628f, 0.042417f, 0.002227f, 0.032086f, 0.005827f, 0.056888f, 0.003234f, 0.069798f, 0.000618f, 0.052320f, 0.002453f, 0.027133f, 0.001394f, 0.038723f, +0.008709f, 0.094295f, 0.000607f, 0.092902f, 0.010071f, 0.098892f, 0.008598f, 0.190303f, 0.015768f, 0.232006f, 0.000983f, 0.188754f, 0.021829f, 0.164600f, 0.004048f, 0.254961f, +0.218953f, 2.455414f, 0.000988f, 1.321439f, 0.156323f, 1.589916f, 0.008642f, 1.671251f, 0.028792f, 0.438764f, 0.000116f, 0.194990f, 0.201648f, 1.574832f, 0.002421f, 1.332491f, +0.387140f, 3.100065f, 0.009665f, 2.407350f, 0.335861f, 2.439157f, 0.102693f, 3.699584f, 0.446893f, 4.862875f, 0.009978f, 3.118319f, 0.535264f, 2.984962f, 0.035548f, 3.644304f, +0.152311f, 1.800301f, 0.021275f, 1.208129f, 0.128599f, 1.378563f, 0.220003f, 1.806924f, 0.114839f, 1.844551f, 0.014347f, 1.022158f, 0.137080f, 1.128384f, 0.050937f, 1.190509f, +0.890666f, 7.175890f, 0.040601f, 5.226591f, 1.148562f, 8.392530f, 0.641242f, 11.939340f, 1.106153f, 12.110540f, 0.045098f, 7.283925f, 2.409783f, 13.520950f, 0.292222f, 15.483080f, +0.244449f, 4.439133f, 0.000020f, 2.005620f, 0.121577f, 2.002337f, 0.000121f, 1.766984f, 0.023065f, 0.569180f, 0.000002f, 0.212354f, 0.130808f, 1.654294f, 0.000028f, 1.175089f, +0.252968f, 3.280229f, 0.000114f, 2.138457f, 0.152878f, 1.797888f, 0.000842f, 2.289305f, 0.209530f, 3.692088f, 0.000084f, 1.987592f, 0.203221f, 1.835173f, 0.000243f, 1.880965f, +0.004849f, 0.092803f, 0.000012f, 0.052283f, 0.002852f, 0.049503f, 0.000088f, 0.054472f, 0.002623f, 0.068227f, 0.000006f, 0.031740f, 0.002535f, 0.033797f, 0.000017f, 0.029935f, +0.358608f, 4.678624f, 0.000295f, 2.860809f, 0.322144f, 3.811753f, 0.003240f, 4.552398f, 0.319570f, 5.665674f, 0.000235f, 2.860759f, 0.563752f, 5.122175f, 0.001232f, 4.924164f, +0.357421f, 8.187070f, 0.000198f, 3.240001f, 0.187345f, 3.891947f, 0.001271f, 3.008350f, 0.037283f, 1.160494f, 0.000018f, 0.379244f, 0.137767f, 2.197674f, 0.000203f, 1.367374f, +1.295566f, 21.190300f, 0.003970f, 12.100400f, 0.825163f, 12.240380f, 0.030969f, 13.652180f, 1.186321f, 26.367410f, 0.003251f, 12.433370f, 0.749695f, 8.539457f, 0.006111f, 7.666553f, +0.214534f, 5.179471f, 0.003678f, 2.555923f, 0.132981f, 2.911762f, 0.027925f, 2.806488f, 0.128311f, 4.209583f, 0.001968f, 1.715381f, 0.080810f, 1.358696f, 0.003686f, 1.054125f, +0.945985f, 15.567560f, 0.005293f, 8.337912f, 0.895598f, 13.366770f, 0.061374f, 13.983250f, 0.931950f, 20.840900f, 0.004664f, 9.217482f, 1.071205f, 12.276570f, 0.015945f, 10.337640f, +0.151914f, 2.966531f, 0.000185f, 1.324078f, 0.071602f, 1.268096f, 0.001070f, 1.105507f, 0.021630f, 0.573985f, 0.000024f, 0.211555f, 0.085418f, 1.161638f, 0.000277f, 0.815160f, +0.194303f, 2.709322f, 0.001311f, 1.744902f, 0.111282f, 1.407288f, 0.009194f, 1.770265f, 0.242862f, 4.601803f, 0.001465f, 2.447357f, 0.164018f, 1.592725f, 0.002943f, 1.612719f, +0.093916f, 1.933003f, 0.003545f, 1.075828f, 0.052348f, 0.977164f, 0.024199f, 1.062241f, 0.076673f, 2.144486f, 0.002588f, 0.985581f, 0.051606f, 0.739701f, 0.005182f, 0.647253f, +0.268498f, 3.766863f, 0.003307f, 2.275436f, 0.228578f, 2.908375f, 0.034483f, 3.431469f, 0.361066f, 6.883554f, 0.003978f, 3.433654f, 0.443523f, 4.333346f, 0.014533f, 4.115433f, +0.660982f, 9.604382f, 0.000527f, 3.169131f, 0.347424f, 4.578428f, 0.003394f, 2.950747f, 0.064555f, 1.274673f, 0.000046f, 0.347319f, 0.401177f, 4.059604f, 0.000851f, 2.106017f, +1.230658f, 12.768690f, 0.005429f, 6.079429f, 0.786007f, 7.396268f, 0.042467f, 6.878186f, 1.055104f, 14.876190f, 0.004163f, 5.848807f, 1.121351f, 8.102497f, 0.013159f, 6.065167f, +0.416146f, 6.373316f, 0.010271f, 2.622297f, 0.258671f, 3.592895f, 0.078196f, 2.887392f, 0.233038f, 4.849918f, 0.005144f, 1.647820f, 0.246828f, 2.632581f, 0.016207f, 1.702964f, +2.349974f, 24.531870f, 0.018929f, 10.955230f, 2.231003f, 21.122500f, 0.220096f, 18.423870f, 2.167637f, 30.749770f, 0.015616f, 11.339440f, 4.190161f, 30.462570f, 0.089786f, 21.387740f, +0.072723f, 1.391724f, 0.000010f, 0.957299f, 0.047674f, 0.827452f, 0.000081f, 1.111685f, 0.007016f, 0.182468f, 0.000001f, 0.103643f, 0.054844f, 0.730935f, 0.000020f, 0.790461f, +0.081519f, 1.113958f, 0.000063f, 1.105627f, 0.064937f, 0.804781f, 0.000612f, 1.560136f, 0.069043f, 1.282089f, 0.000048f, 1.050793f, 0.092294f, 0.878320f, 0.000189f, 1.370567f, +0.001831f, 0.036924f, 0.000008f, 0.031670f, 0.001419f, 0.025961f, 0.000075f, 0.043492f, 0.001013f, 0.027757f, 0.000004f, 0.019660f, 0.001349f, 0.018951f, 0.000015f, 0.025555f, +0.141116f, 1.940192f, 0.000198f, 1.806172f, 0.167092f, 2.083541f, 0.002877f, 3.788447f, 0.128589f, 2.402480f, 0.000162f, 1.846857f, 0.312648f, 2.993590f, 0.001169f, 4.381412f, +0.052814f, 1.274878f, 0.000050f, 0.768120f, 0.036489f, 0.798836f, 0.000424f, 0.940076f, 0.005633f, 0.184784f, 0.000005f, 0.091936f, 0.028690f, 0.482297f, 0.000072f, 0.456859f, +0.207367f, 3.574268f, 0.001088f, 3.107372f, 0.174089f, 2.721419f, 0.011182f, 4.621110f, 0.194162f, 4.547776f, 0.000911f, 3.264857f, 0.169112f, 2.029978f, 0.002359f, 2.774629f, +0.040230f, 1.023552f, 0.001180f, 0.768982f, 0.032870f, 0.758457f, 0.011813f, 1.112966f, 0.024604f, 0.850639f, 0.000646f, 0.527729f, 0.021357f, 0.378406f, 0.001667f, 0.446963f, +0.184895f, 3.206509f, 0.001771f, 2.614646f, 0.230731f, 3.629017f, 0.027061f, 5.779820f, 0.186258f, 4.389449f, 0.001595f, 2.955626f, 0.295070f, 3.563692f, 0.007517f, 4.568652f, +0.053810f, 1.107351f, 0.000112f, 0.752478f, 0.033430f, 0.623935f, 0.000855f, 0.828118f, 0.007834f, 0.219088f, 0.000015f, 0.122938f, 0.042641f, 0.611109f, 0.000237f, 0.652881f, +0.074551f, 1.095486f, 0.000861f, 1.074140f, 0.056280f, 0.750032f, 0.007958f, 1.436411f, 0.095283f, 1.902636f, 0.000984f, 1.540525f, 0.088691f, 0.907608f, 0.002724f, 1.399135f, +0.042217f, 0.915700f, 0.002727f, 0.775903f, 0.031017f, 0.610154f, 0.024539f, 1.009806f, 0.035243f, 1.038785f, 0.002036f, 0.726840f, 0.032693f, 0.493843f, 0.005618f, 0.657885f, +0.125800f, 1.859893f, 0.002652f, 1.710474f, 0.141164f, 1.892819f, 0.036447f, 3.400028f, 0.172984f, 3.475381f, 0.003261f, 2.639307f, 0.292864f, 3.015385f, 0.016424f, 4.359921f, +0.188166f, 2.881324f, 0.000257f, 1.447459f, 0.130365f, 1.810463f, 0.002180f, 1.776433f, 0.018792f, 0.391024f, 0.000023f, 0.162210f, 0.160953f, 1.716396f, 0.000584f, 1.355624f, +0.379489f, 4.149336f, 0.002865f, 3.007729f, 0.319476f, 3.168075f, 0.029541f, 4.485392f, 0.332689f, 4.943172f, 0.002246f, 2.958863f, 0.487320f, 3.710755f, 0.009787f, 4.228924f, +0.150343f, 2.426456f, 0.006351f, 1.519962f, 0.123178f, 1.803027f, 0.063728f, 2.206008f, 0.086089f, 1.888091f, 0.003252f, 0.976657f, 0.125673f, 1.412537f, 0.014122f, 1.391129f, +0.884885f, 9.734753f, 0.012198f, 6.618498f, 1.107324f, 11.048160f, 0.186958f, 14.671330f, 0.834626f, 12.477220f, 0.010290f, 7.005050f, 2.223647f, 17.036170f, 0.081546f, 18.210170f, +0.403293f, 4.420010f, 0.000031f, 2.232946f, 0.203386f, 2.021630f, 0.000194f, 1.994810f, 0.033639f, 0.500991f, 0.000002f, 0.208999f, 0.214404f, 1.636457f, 0.000044f, 1.299772f, +0.415223f, 3.249480f, 0.000179f, 2.368725f, 0.254450f, 1.805974f, 0.001340f, 2.571326f, 0.304030f, 3.233231f, 0.000117f, 1.946241f, 0.331401f, 1.806149f, 0.000379f, 2.069959f, +0.005844f, 0.067511f, 0.000014f, 0.042528f, 0.003486f, 0.036516f, 0.000103f, 0.044930f, 0.002795f, 0.043876f, 0.000006f, 0.022824f, 0.003036f, 0.024426f, 0.000019f, 0.024192f, +0.522539f, 4.114426f, 0.000410f, 2.813095f, 0.475978f, 3.399031f, 0.004579f, 4.539157f, 0.411642f, 4.404512f, 0.000289f, 2.486749f, 0.816119f, 4.475198f, 0.001705f, 4.810552f, +0.512132f, 7.079840f, 0.000271f, 3.132886f, 0.272197f, 3.412724f, 0.001766f, 2.949628f, 0.047224f, 0.887141f, 0.000022f, 0.324170f, 0.196117f, 1.888100f, 0.000276f, 1.313571f, +1.846912f, 18.231260f, 0.005412f, 11.640820f, 1.192795f, 10.678590f, 0.042809f, 13.317590f, 1.495010f, 20.054050f, 0.003918f, 10.573730f, 1.061788f, 7.299222f, 0.008277f, 7.327417f, +0.224588f, 3.272417f, 0.003682f, 1.805658f, 0.141163f, 1.865428f, 0.028347f, 2.010436f, 0.118743f, 2.351134f, 0.001741f, 1.071281f, 0.084047f, 0.852849f, 0.003666f, 0.739855f, +1.197161f, 11.889990f, 0.006406f, 7.120701f, 1.149265f, 10.352060f, 0.075314f, 12.109130f, 1.042596f, 14.071240f, 0.004989f, 6.958775f, 1.346813f, 9.315468f, 0.019170f, 8.771086f, +0.336282f, 3.963213f, 0.000392f, 1.977955f, 0.160719f, 1.717868f, 0.002296f, 1.674572f, 0.042327f, 0.677882f, 0.000044f, 0.279371f, 0.187856f, 1.541828f, 0.000583f, 1.209797f, +0.427928f, 3.601171f, 0.002761f, 2.593335f, 0.248516f, 1.896730f, 0.019635f, 2.667873f, 0.472830f, 5.407121f, 0.002728f, 3.215437f, 0.358880f, 2.103249f, 0.006159f, 2.381294f, +0.151892f, 1.886771f, 0.005482f, 1.174178f, 0.085848f, 0.967150f, 0.037950f, 1.175583f, 0.109621f, 1.850397f, 0.003539f, 0.950909f, 0.082920f, 0.717315f, 0.007962f, 0.701830f, +0.524944f, 4.444718f, 0.006183f, 3.002158f, 0.453153f, 3.479800f, 0.065374f, 4.590800f, 0.624041f, 7.180129f, 0.006574f, 4.004797f, 0.861499f, 5.079893f, 0.026995f, 5.394501f, +1.052104f, 9.226384f, 0.000802f, 3.404134f, 0.560748f, 4.459825f, 0.005238f, 3.213941f, 0.090836f, 1.082471f, 0.000062f, 0.329800f, 0.634412f, 3.874472f, 0.001287f, 2.247476f, +1.948906f, 12.203740f, 0.008221f, 6.497015f, 1.262174f, 7.168010f, 0.065212f, 7.453570f, 1.477080f, 12.568790f, 0.005573f, 5.525524f, 1.764258f, 7.693647f, 0.019799f, 6.439626f, +0.483953f, 4.473174f, 0.011423f, 2.057959f, 0.305031f, 2.557020f, 0.088179f, 2.297735f, 0.239574f, 3.009122f, 0.005057f, 1.143193f, 0.285179f, 1.835688f, 0.017906f, 1.327783f, +3.303681f, 20.814150f, 0.025447f, 10.393310f, 3.180348f, 18.172410f, 0.300033f, 17.723630f, 2.693871f, 23.063490f, 0.018559f, 9.509973f, 5.852382f, 25.678010f, 0.119921f, 20.158780f, +0.005545f, 0.077720f, 0.000003f, 0.057271f, 0.004175f, 0.053072f, 0.000024f, 0.076386f, 0.000868f, 0.016535f, 0.000000f, 0.010062f, 0.005172f, 0.050477f, 0.000006f, 0.058479f, +0.005449f, 0.054528f, 0.000014f, 0.057979f, 0.004985f, 0.045246f, 0.000155f, 0.093966f, 0.007488f, 0.101839f, 0.000017f, 0.089417f, 0.007628f, 0.053166f, 0.000052f, 0.088878f, +0.000188f, 0.002775f, 0.000003f, 0.002550f, 0.000167f, 0.002241f, 0.000029f, 0.004022f, 0.000169f, 0.003385f, 0.000002f, 0.002569f, 0.000171f, 0.001761f, 0.000006f, 0.002544f, +0.008632f, 0.086920f, 0.000040f, 0.086685f, 0.011740f, 0.107207f, 0.000668f, 0.208829f, 0.012764f, 0.174653f, 0.000053f, 0.143833f, 0.023650f, 0.165844f, 0.000292f, 0.260033f, +0.007607f, 0.134479f, 0.000024f, 0.086800f, 0.006036f, 0.096780f, 0.000232f, 0.122012f, 0.001317f, 0.031629f, 0.000004f, 0.016858f, 0.005110f, 0.062912f, 0.000043f, 0.063842f, +0.026180f, 0.330479f, 0.000454f, 0.307793f, 0.025243f, 0.289000f, 0.005362f, 0.525723f, 0.039778f, 0.682335f, 0.000617f, 0.524773f, 0.026402f, 0.232102f, 0.001218f, 0.339861f, +0.007799f, 0.145310f, 0.000757f, 0.116952f, 0.007318f, 0.123669f, 0.008697f, 0.194411f, 0.007739f, 0.195962f, 0.000672f, 0.130240f, 0.005119f, 0.066432f, 0.001321f, 0.084061f, +0.021364f, 0.271339f, 0.000676f, 0.237029f, 0.030620f, 0.352707f, 0.011875f, 0.601794f, 0.034923f, 0.602742f, 0.000989f, 0.434790f, 0.042161f, 0.372916f, 0.003551f, 0.512162f, +0.000195f, 0.002932f, 0.000001f, 0.002134f, 0.000139f, 0.001897f, 0.000012f, 0.002698f, 0.000046f, 0.000941f, 0.000000f, 0.000566f, 0.000191f, 0.002001f, 0.000004f, 0.002290f, +0.000236f, 0.002542f, 0.000009f, 0.002671f, 0.000205f, 0.001999f, 0.000096f, 0.004102f, 0.000490f, 0.007166f, 0.000017f, 0.006215f, 0.000348f, 0.002605f, 0.000035f, 0.004302f, +0.000205f, 0.003263f, 0.000044f, 0.002962f, 0.000173f, 0.002497f, 0.000453f, 0.004428f, 0.000278f, 0.006007f, 0.000053f, 0.004503f, 0.000197f, 0.002176f, 0.000112f, 0.003106f, +0.000365f, 0.003951f, 0.000025f, 0.003892f, 0.000470f, 0.004618f, 0.000401f, 0.008886f, 0.000814f, 0.011979f, 0.000051f, 0.009746f, 0.001050f, 0.007920f, 0.000195f, 0.012269f, +0.018109f, 0.203080f, 0.000082f, 0.109292f, 0.014410f, 0.146558f, 0.000797f, 0.154056f, 0.002935f, 0.044722f, 0.000012f, 0.019875f, 0.019155f, 0.149597f, 0.000230f, 0.126577f, +0.032013f, 0.256346f, 0.000799f, 0.199065f, 0.030953f, 0.224796f, 0.009464f, 0.340958f, 0.045541f, 0.495559f, 0.001017f, 0.317777f, 0.050836f, 0.283492f, 0.003376f, 0.346112f, +0.019473f, 0.230169f, 0.002720f, 0.154460f, 0.018325f, 0.196437f, 0.031349f, 0.257476f, 0.018094f, 0.290630f, 0.002260f, 0.161053f, 0.020129f, 0.165694f, 0.007480f, 0.174817f, +0.068318f, 0.550421f, 0.003114f, 0.400902f, 0.098190f, 0.717473f, 0.054819f, 1.020689f, 0.104564f, 1.144801f, 0.004263f, 0.688544f, 0.212297f, 1.191170f, 0.025744f, 1.364030f, +0.014425f, 0.261958f, 0.000001f, 0.118354f, 0.007996f, 0.131693f, 0.000008f, 0.116214f, 0.001677f, 0.041393f, 0.000000f, 0.015443f, 0.008866f, 0.112123f, 0.000002f, 0.079644f, +0.014925f, 0.193531f, 0.000007f, 0.126167f, 0.010053f, 0.118223f, 0.000055f, 0.150537f, 0.015235f, 0.268451f, 0.000006f, 0.144518f, 0.013771f, 0.124357f, 0.000016f, 0.127460f, +0.000442f, 0.008466f, 0.000001f, 0.004769f, 0.000290f, 0.005033f, 0.000009f, 0.005538f, 0.000295f, 0.007670f, 0.000001f, 0.003568f, 0.000266f, 0.003541f, 0.000002f, 0.003136f, +0.019626f, 0.256052f, 0.000016f, 0.156567f, 0.019650f, 0.232503f, 0.000198f, 0.277680f, 0.021554f, 0.382128f, 0.000016f, 0.192947f, 0.035436f, 0.321968f, 0.000077f, 0.309521f, +0.034873f, 0.798797f, 0.000019f, 0.316121f, 0.020372f, 0.423222f, 0.000138f, 0.327137f, 0.004483f, 0.139540f, 0.000002f, 0.045601f, 0.015438f, 0.246274f, 0.000023f, 0.153230f, +0.126381f, 2.067081f, 0.000387f, 1.180375f, 0.089713f, 1.330788f, 0.003367f, 1.484281f, 0.142617f, 3.169823f, 0.000391f, 1.494708f, 0.083995f, 0.956749f, 0.000685f, 0.858950f, +0.032357f, 0.781184f, 0.000555f, 0.385492f, 0.022354f, 0.489460f, 0.004694f, 0.471764f, 0.023849f, 0.782446f, 0.000366f, 0.318842f, 0.013998f, 0.235363f, 0.000638f, 0.182603f, +0.085599f, 1.408657f, 0.000479f, 0.754470f, 0.090322f, 1.348046f, 0.006190f, 1.410219f, 0.103926f, 2.324065f, 0.000520f, 1.027884f, 0.111328f, 1.275878f, 0.001657f, 1.074369f, +0.006320f, 0.123410f, 0.000008f, 0.055083f, 0.003320f, 0.058796f, 0.000050f, 0.051257f, 0.001109f, 0.029427f, 0.000001f, 0.010846f, 0.004081f, 0.055503f, 0.000013f, 0.038949f, +0.008082f, 0.112687f, 0.000055f, 0.072575f, 0.005159f, 0.065236f, 0.000426f, 0.082063f, 0.012449f, 0.235879f, 0.000075f, 0.125446f, 0.007835f, 0.076086f, 0.000141f, 0.077041f, +0.006040f, 0.124307f, 0.000228f, 0.069184f, 0.003752f, 0.070036f, 0.001734f, 0.076134f, 0.006076f, 0.169954f, 0.000205f, 0.078109f, 0.003812f, 0.054634f, 0.000383f, 0.047806f, +0.010359f, 0.145331f, 0.000128f, 0.087790f, 0.009829f, 0.125061f, 0.001483f, 0.147554f, 0.017168f, 0.327294f, 0.000189f, 0.163261f, 0.019654f, 0.192021f, 0.000644f, 0.182365f, +0.050343f, 0.731501f, 0.000040f, 0.241371f, 0.029492f, 0.388647f, 0.000288f, 0.250479f, 0.006059f, 0.119644f, 0.000004f, 0.032600f, 0.035094f, 0.355121f, 0.000074f, 0.184228f, +0.093712f, 0.972309f, 0.000413f, 0.462936f, 0.066708f, 0.627718f, 0.003604f, 0.583748f, 0.099015f, 1.396038f, 0.000391f, 0.548874f, 0.098073f, 0.708638f, 0.001151f, 0.530455f, +0.048995f, 0.750363f, 0.001209f, 0.308736f, 0.033943f, 0.471459f, 0.010261f, 0.378883f, 0.033813f, 0.703700f, 0.000746f, 0.239091f, 0.033377f, 0.355988f, 0.002192f, 0.230281f, +0.165991f, 1.732819f, 0.001337f, 0.773827f, 0.175637f, 1.662882f, 0.017327f, 1.450431f, 0.188693f, 2.676773f, 0.001359f, 0.987100f, 0.339939f, 2.471364f, 0.007284f, 1.735142f, +0.000080f, 0.001532f, 0.000000f, 0.001054f, 0.000058f, 0.001015f, 0.000000f, 0.001364f, 0.000010f, 0.000247f, 0.000000f, 0.000141f, 0.000069f, 0.000924f, 0.000000f, 0.000999f, +0.000090f, 0.001226f, 0.000000f, 0.001217f, 0.000080f, 0.000987f, 0.000001f, 0.001913f, 0.000094f, 0.001739f, 0.000000f, 0.001425f, 0.000117f, 0.001110f, 0.000000f, 0.001732f, +0.000003f, 0.000063f, 0.000000f, 0.000054f, 0.000003f, 0.000049f, 0.000000f, 0.000082f, 0.000002f, 0.000058f, 0.000000f, 0.000041f, 0.000003f, 0.000037f, 0.000000f, 0.000050f, +0.000144f, 0.001980f, 0.000000f, 0.001844f, 0.000190f, 0.002370f, 0.000003f, 0.004310f, 0.000162f, 0.003022f, 0.000000f, 0.002323f, 0.000367f, 0.003510f, 0.000001f, 0.005137f, +0.000096f, 0.002320f, 0.000000f, 0.001398f, 0.000074f, 0.001620f, 0.000001f, 0.001907f, 0.000013f, 0.000414f, 0.000000f, 0.000206f, 0.000060f, 0.001008f, 0.000000f, 0.000955f, +0.000377f, 0.006503f, 0.000002f, 0.005653f, 0.000353f, 0.005518f, 0.000023f, 0.009371f, 0.000435f, 0.010197f, 0.000002f, 0.007320f, 0.000353f, 0.004242f, 0.000005f, 0.005798f, +0.000113f, 0.002879f, 0.000003f, 0.002163f, 0.000103f, 0.002378f, 0.000037f, 0.003489f, 0.000085f, 0.002949f, 0.000002f, 0.001829f, 0.000069f, 0.001223f, 0.000005f, 0.001444f, +0.000312f, 0.005412f, 0.000003f, 0.004413f, 0.000434f, 0.006826f, 0.000051f, 0.010872f, 0.000387f, 0.009129f, 0.000003f, 0.006147f, 0.000572f, 0.006908f, 0.000015f, 0.008856f, +0.000042f, 0.000859f, 0.000000f, 0.000584f, 0.000029f, 0.000540f, 0.000001f, 0.000716f, 0.000007f, 0.000209f, 0.000000f, 0.000118f, 0.000038f, 0.000545f, 0.000000f, 0.000582f, +0.000058f, 0.000850f, 0.000001f, 0.000833f, 0.000049f, 0.000648f, 0.000007f, 0.001242f, 0.000091f, 0.001819f, 0.000001f, 0.001473f, 0.000079f, 0.000809f, 0.000002f, 0.001247f, +0.000051f, 0.001098f, 0.000003f, 0.000931f, 0.000041f, 0.000816f, 0.000033f, 0.001350f, 0.000052f, 0.001535f, 0.000003f, 0.001074f, 0.000045f, 0.000680f, 0.000008f, 0.000906f, +0.000091f, 0.001338f, 0.000002f, 0.001231f, 0.000113f, 0.001518f, 0.000029f, 0.002727f, 0.000153f, 0.003082f, 0.000003f, 0.002341f, 0.000242f, 0.002492f, 0.000014f, 0.003603f, +0.000267f, 0.004093f, 0.000000f, 0.002056f, 0.000206f, 0.002866f, 0.000003f, 0.002812f, 0.000033f, 0.000685f, 0.000000f, 0.000284f, 0.000263f, 0.002800f, 0.000001f, 0.002212f, +0.000539f, 0.005893f, 0.000004f, 0.004272f, 0.000506f, 0.005015f, 0.000047f, 0.007100f, 0.000582f, 0.008652f, 0.000004f, 0.005179f, 0.000795f, 0.006053f, 0.000016f, 0.006898f, +0.000330f, 0.005328f, 0.000014f, 0.003338f, 0.000301f, 0.004413f, 0.000156f, 0.005399f, 0.000233f, 0.005110f, 0.000009f, 0.002643f, 0.000317f, 0.003563f, 0.000036f, 0.003509f, +0.001166f, 0.012825f, 0.000016f, 0.008719f, 0.001626f, 0.016222f, 0.000275f, 0.021542f, 0.001355f, 0.020258f, 0.000017f, 0.011373f, 0.003365f, 0.025778f, 0.000123f, 0.027554f, +0.026737f, 0.293036f, 0.000002f, 0.148039f, 0.015028f, 0.149380f, 0.000014f, 0.147398f, 0.002748f, 0.040933f, 0.000000f, 0.017076f, 0.016326f, 0.124609f, 0.000003f, 0.098972f, +0.027523f, 0.215389f, 0.000012f, 0.157009f, 0.018798f, 0.133418f, 0.000099f, 0.189959f, 0.024836f, 0.264115f, 0.000010f, 0.158984f, 0.025230f, 0.137502f, 0.000029f, 0.157586f, +0.000599f, 0.006919f, 0.000001f, 0.004358f, 0.000398f, 0.004171f, 0.000012f, 0.005132f, 0.000353f, 0.005542f, 0.000001f, 0.002883f, 0.000357f, 0.002875f, 0.000002f, 0.002848f, +0.032129f, 0.252978f, 0.000025f, 0.172965f, 0.032618f, 0.232929f, 0.000314f, 0.311059f, 0.031192f, 0.333748f, 0.000022f, 0.188431f, 0.057633f, 0.316034f, 0.000120f, 0.339716f, +0.056138f, 0.776059f, 0.000030f, 0.343412f, 0.033254f, 0.416933f, 0.000216f, 0.360356f, 0.006379f, 0.119843f, 0.000003f, 0.043792f, 0.024691f, 0.237708f, 0.000035f, 0.165376f, +0.202409f, 1.998023f, 0.000593f, 1.275755f, 0.145695f, 1.304341f, 0.005229f, 1.626683f, 0.201918f, 2.708526f, 0.000529f, 1.428101f, 0.133650f, 0.918772f, 0.001042f, 0.922321f, +0.038056f, 0.554498f, 0.000624f, 0.305962f, 0.026659f, 0.352293f, 0.005353f, 0.379678f, 0.024796f, 0.490971f, 0.000364f, 0.223708f, 0.016357f, 0.165978f, 0.000713f, 0.143988f, +0.121703f, 1.208731f, 0.000651f, 0.723887f, 0.130215f, 1.172920f, 0.008533f, 1.372002f, 0.130620f, 1.762900f, 0.000625f, 0.871822f, 0.157254f, 1.087678f, 0.002238f, 1.024116f, +0.015717f, 0.185230f, 0.000018f, 0.092444f, 0.008372f, 0.089485f, 0.000120f, 0.087229f, 0.002438f, 0.039045f, 0.000003f, 0.016091f, 0.010084f, 0.082765f, 0.000031f, 0.064942f, +0.019996f, 0.168275f, 0.000129f, 0.121181f, 0.012943f, 0.098782f, 0.001023f, 0.138943f, 0.027229f, 0.311380f, 0.000157f, 0.185167f, 0.019261f, 0.112879f, 0.000331f, 0.127802f, +0.010974f, 0.136315f, 0.000396f, 0.084832f, 0.006913f, 0.077878f, 0.003056f, 0.094661f, 0.009760f, 0.164754f, 0.000315f, 0.084666f, 0.006881f, 0.059523f, 0.000661f, 0.058238f, +0.022754f, 0.192657f, 0.000268f, 0.130129f, 0.021892f, 0.168108f, 0.003158f, 0.221780f, 0.033335f, 0.383549f, 0.000351f, 0.213929f, 0.042889f, 0.252897f, 0.001344f, 0.268559f, +0.090026f, 0.789479f, 0.000069f, 0.291283f, 0.053477f, 0.425324f, 0.000500f, 0.306507f, 0.009579f, 0.114149f, 0.000007f, 0.034778f, 0.062349f, 0.380775f, 0.000127f, 0.220878f, +0.166730f, 1.044034f, 0.000703f, 0.555822f, 0.120347f, 0.683461f, 0.006218f, 0.710689f, 0.155730f, 1.325141f, 0.000588f, 0.582562f, 0.173353f, 0.755964f, 0.001945f, 0.632746f, +0.064014f, 0.591677f, 0.001511f, 0.272211f, 0.044968f, 0.376962f, 0.012999f, 0.338737f, 0.039053f, 0.490519f, 0.000824f, 0.186353f, 0.043325f, 0.278879f, 0.002720f, 0.201718f, +0.262171f, 1.651751f, 0.002019f, 0.824784f, 0.281290f, 1.607282f, 0.026537f, 1.567590f, 0.263457f, 2.255579f, 0.001815f, 0.930063f, 0.533416f, 2.340426f, 0.010930f, 1.837374f, +0.019864f, 0.278401f, 0.000009f, 0.205151f, 0.012936f, 0.164429f, 0.000073f, 0.236661f, 0.002675f, 0.050938f, 0.000001f, 0.030996f, 0.014978f, 0.146190f, 0.000018f, 0.169367f, +0.018824f, 0.188385f, 0.000048f, 0.200306f, 0.014896f, 0.135199f, 0.000464f, 0.280780f, 0.022249f, 0.302577f, 0.000051f, 0.265670f, 0.021308f, 0.148508f, 0.000144f, 0.248259f, +0.000828f, 0.012237f, 0.000012f, 0.011244f, 0.000638f, 0.008547f, 0.000111f, 0.015339f, 0.000640f, 0.012838f, 0.000008f, 0.009741f, 0.000610f, 0.006279f, 0.000023f, 0.009071f, +0.029134f, 0.293357f, 0.000135f, 0.292563f, 0.034269f, 0.312947f, 0.001951f, 0.609591f, 0.037049f, 0.506934f, 0.000154f, 0.417478f, 0.064536f, 0.452547f, 0.000798f, 0.709568f, +0.020561f, 0.363488f, 0.000064f, 0.234617f, 0.014112f, 0.226254f, 0.000542f, 0.285240f, 0.003061f, 0.073524f, 0.000009f, 0.039188f, 0.011167f, 0.137485f, 0.000093f, 0.139519f, +0.068249f, 0.861524f, 0.001184f, 0.802383f, 0.056917f, 0.651618f, 0.012089f, 1.185366f, 0.089179f, 1.529748f, 0.001383f, 1.176504f, 0.055648f, 0.489206f, 0.002567f, 0.716331f, +0.025948f, 0.483484f, 0.002518f, 0.389132f, 0.021060f, 0.355894f, 0.025027f, 0.559474f, 0.022146f, 0.560736f, 0.001922f, 0.372676f, 0.013772f, 0.178711f, 0.003555f, 0.226137f, +0.054407f, 0.691014f, 0.001723f, 0.603637f, 0.067446f, 0.776893f, 0.026157f, 1.325546f, 0.076487f, 1.320095f, 0.002166f, 0.952254f, 0.086811f, 0.767847f, 0.007313f, 1.054559f, +0.000764f, 0.011521f, 0.000005f, 0.008387f, 0.000472f, 0.006448f, 0.000040f, 0.009169f, 0.000155f, 0.003181f, 0.000001f, 0.001912f, 0.000606f, 0.006357f, 0.000011f, 0.007275f, +0.000895f, 0.009635f, 0.000034f, 0.010121f, 0.000671f, 0.006553f, 0.000314f, 0.013445f, 0.001597f, 0.023353f, 0.000055f, 0.020257f, 0.001065f, 0.007981f, 0.000108f, 0.013181f, +0.000994f, 0.015783f, 0.000212f, 0.014327f, 0.000725f, 0.010447f, 0.001897f, 0.018523f, 0.001158f, 0.024987f, 0.000221f, 0.018730f, 0.000769f, 0.008510f, 0.000437f, 0.012146f, +0.001351f, 0.014626f, 0.000094f, 0.014410f, 0.001506f, 0.014786f, 0.001286f, 0.028453f, 0.002592f, 0.038139f, 0.000162f, 0.031029f, 0.003144f, 0.023708f, 0.000583f, 0.036723f, +0.059305f, 0.665065f, 0.000268f, 0.357921f, 0.040816f, 0.415126f, 0.002256f, 0.436362f, 0.008265f, 0.125955f, 0.000033f, 0.055975f, 0.050719f, 0.396105f, 0.000609f, 0.335151f, +0.101113f, 0.809673f, 0.002524f, 0.628750f, 0.084560f, 0.614107f, 0.025855f, 0.931445f, 0.123705f, 1.346100f, 0.002762f, 0.863186f, 0.129820f, 0.723958f, 0.008622f, 0.883872f, +0.078502f, 0.927888f, 0.010965f, 0.622678f, 0.063893f, 0.684924f, 0.109306f, 0.897751f, 0.062732f, 1.007596f, 0.007837f, 0.558360f, 0.065609f, 0.540061f, 0.024379f, 0.569795f, +0.210799f, 1.698362f, 0.009609f, 1.237010f, 0.262044f, 1.914754f, 0.146299f, 2.723959f, 0.277469f, 3.037831f, 0.011312f, 1.827113f, 0.529625f, 2.971650f, 0.064225f, 3.402889f, +0.066708f, 1.211395f, 0.000005f, 0.547314f, 0.031982f, 0.526733f, 0.000032f, 0.464821f, 0.006671f, 0.164620f, 0.000000f, 0.061418f, 0.033148f, 0.419215f, 0.000007f, 0.297779f, +0.066566f, 0.863159f, 0.000030f, 0.562713f, 0.038779f, 0.456052f, 0.000214f, 0.580705f, 0.058436f, 1.029684f, 0.000024f, 0.554319f, 0.049658f, 0.448435f, 0.000059f, 0.459625f, +0.002518f, 0.048191f, 0.000006f, 0.027149f, 0.001427f, 0.024780f, 0.000044f, 0.027267f, 0.001444f, 0.037549f, 0.000003f, 0.017468f, 0.001223f, 0.016297f, 0.000008f, 0.014435f, +0.085511f, 1.115631f, 0.000070f, 0.682168f, 0.074049f, 0.876179f, 0.000745f, 1.046425f, 0.080763f, 1.431856f, 0.000059f, 0.722985f, 0.124832f, 1.134208f, 0.000273f, 1.090363f, +0.121686f, 2.787335f, 0.000067f, 1.103077f, 0.061485f, 1.277300f, 0.000417f, 0.987312f, 0.013453f, 0.418744f, 0.000007f, 0.136844f, 0.043556f, 0.694800f, 0.000064f, 0.432299f, +0.425323f, 6.956597f, 0.001303f, 3.972458f, 0.261135f, 3.873648f, 0.009801f, 4.320434f, 0.412769f, 9.174293f, 0.001131f, 4.326075f, 0.228549f, 2.603310f, 0.001863f, 2.337200f, +0.138985f, 3.355496f, 0.002383f, 1.655843f, 0.083048f, 1.818414f, 0.017439f, 1.752669f, 0.088101f, 2.890386f, 0.001351f, 1.177815f, 0.048615f, 0.817390f, 0.002217f, 0.634161f, +0.281423f, 4.631226f, 0.001575f, 2.480463f, 0.256835f, 3.833252f, 0.017601f, 4.010043f, 0.293842f, 6.571088f, 0.001470f, 2.906251f, 0.295927f, 3.391473f, 0.004405f, 2.855832f, +0.032057f, 0.626005f, 0.000039f, 0.279411f, 0.014565f, 0.257957f, 0.000218f, 0.224883f, 0.004838f, 0.128373f, 0.000005f, 0.047315f, 0.016739f, 0.227633f, 0.000054f, 0.159738f, +0.039537f, 0.551301f, 0.000267f, 0.355058f, 0.021828f, 0.276043f, 0.001803f, 0.347242f, 0.052376f, 0.992434f, 0.000316f, 0.527802f, 0.030992f, 0.300957f, 0.000556f, 0.304735f, +0.037712f, 0.776197f, 0.001423f, 0.431999f, 0.020263f, 0.378245f, 0.009367f, 0.411176f, 0.032631f, 0.912659f, 0.001102f, 0.419448f, 0.019243f, 0.275824f, 0.001932f, 0.241351f, +0.049509f, 0.694582f, 0.000610f, 0.419574f, 0.040630f, 0.516964f, 0.006129f, 0.609943f, 0.070563f, 1.345247f, 0.000777f, 0.671036f, 0.075944f, 0.741999f, 0.002489f, 0.704686f, +0.212838f, 3.092631f, 0.000170f, 1.020467f, 0.107841f, 1.421153f, 0.001053f, 0.915917f, 0.022031f, 0.435014f, 0.000016f, 0.118531f, 0.119958f, 1.213887f, 0.000255f, 0.629733f, +0.382116f, 3.964643f, 0.001686f, 1.887646f, 0.235261f, 2.213787f, 0.012711f, 2.058720f, 0.347215f, 4.895477f, 0.001370f, 1.924733f, 0.323322f, 2.336214f, 0.003794f, 1.748785f, +0.254985f, 3.905123f, 0.006294f, 1.606761f, 0.152785f, 2.122166f, 0.046187f, 1.705457f, 0.151336f, 3.149554f, 0.003341f, 1.070100f, 0.140443f, 1.497914f, 0.009222f, 0.968971f, +0.661206f, 6.902464f, 0.005326f, 3.082443f, 0.605117f, 5.729071f, 0.059697f, 4.997122f, 0.646406f, 9.169818f, 0.004657f, 3.381509f, 1.094814f, 7.959322f, 0.023460f, 5.588231f, +0.016373f, 0.313344f, 0.000002f, 0.215534f, 0.010347f, 0.179588f, 0.000018f, 0.241277f, 0.001674f, 0.043541f, 0.000000f, 0.024732f, 0.011467f, 0.152821f, 0.000004f, 0.165266f, +0.017698f, 0.241844f, 0.000014f, 0.240036f, 0.013590f, 0.168427f, 0.000128f, 0.326509f, 0.015887f, 0.295006f, 0.000011f, 0.241785f, 0.018607f, 0.177074f, 0.000038f, 0.276314f, +0.000784f, 0.015819f, 0.000003f, 0.013568f, 0.000586f, 0.010722f, 0.000031f, 0.017962f, 0.000460f, 0.012604f, 0.000002f, 0.008927f, 0.000537f, 0.007540f, 0.000006f, 0.010167f, +0.027763f, 0.381705f, 0.000039f, 0.355338f, 0.031689f, 0.395139f, 0.000546f, 0.718472f, 0.026812f, 0.500943f, 0.000034f, 0.385089f, 0.057118f, 0.546904f, 0.000214f, 0.800447f, +0.014835f, 0.358104f, 0.000014f, 0.215760f, 0.009880f, 0.216304f, 0.000115f, 0.254548f, 0.001677f, 0.055011f, 0.000001f, 0.027370f, 0.007484f, 0.125803f, 0.000019f, 0.119168f, +0.056167f, 0.968116f, 0.000295f, 0.841653f, 0.045454f, 0.710560f, 0.002920f, 1.206568f, 0.055738f, 1.305523f, 0.000261f, 0.937237f, 0.042535f, 0.510584f, 0.000593f, 0.697880f, +0.021503f, 0.547094f, 0.000631f, 0.411025f, 0.016936f, 0.390794f, 0.006087f, 0.573455f, 0.013938f, 0.481884f, 0.000366f, 0.298956f, 0.010600f, 0.187822f, 0.000827f, 0.221850f, +0.045382f, 0.787025f, 0.000435f, 0.641754f, 0.054592f, 0.858638f, 0.006403f, 1.367526f, 0.048453f, 1.141856f, 0.000415f, 0.768866f, 0.067254f, 0.812254f, 0.001713f, 1.041309f, +0.009369f, 0.192795f, 0.000020f, 0.131010f, 0.005611f, 0.104716f, 0.000143f, 0.138985f, 0.001446f, 0.040427f, 0.000003f, 0.022685f, 0.006894f, 0.098802f, 0.000038f, 0.105555f, +0.012516f, 0.183914f, 0.000144f, 0.180331f, 0.009108f, 0.121382f, 0.001288f, 0.232462f, 0.016954f, 0.338540f, 0.000175f, 0.274109f, 0.013827f, 0.141496f, 0.000425f, 0.218124f, +0.013987f, 0.303371f, 0.000903f, 0.257056f, 0.009906f, 0.194861f, 0.007837f, 0.322496f, 0.012375f, 0.364747f, 0.000715f, 0.255214f, 0.010058f, 0.151930f, 0.001728f, 0.202398f, +0.019138f, 0.282952f, 0.000403f, 0.260220f, 0.020702f, 0.277587f, 0.005345f, 0.498623f, 0.027892f, 0.560367f, 0.000526f, 0.425559f, 0.041374f, 0.425994f, 0.002320f, 0.615941f, +0.049990f, 0.765475f, 0.000068f, 0.384543f, 0.033386f, 0.463654f, 0.000558f, 0.454939f, 0.005291f, 0.110100f, 0.000006f, 0.045673f, 0.039708f, 0.423441f, 0.000144f, 0.334437f, +0.097216f, 1.062959f, 0.000734f, 0.770507f, 0.078894f, 0.782346f, 0.007295f, 1.107654f, 0.090328f, 1.342113f, 0.000610f, 0.803357f, 0.115928f, 0.882747f, 0.002328f, 1.006014f, +0.076003f, 1.226654f, 0.003211f, 0.768391f, 0.060027f, 0.878653f, 0.031056f, 1.075034f, 0.046125f, 1.011622f, 0.001743f, 0.523284f, 0.058997f, 0.663110f, 0.006630f, 0.653060f, +0.205419f, 2.259846f, 0.002832f, 1.536432f, 0.247796f, 2.472347f, 0.041837f, 3.283136f, 0.205348f, 3.069849f, 0.002532f, 1.723497f, 0.479353f, 3.672501f, 0.017579f, 3.925580f, +0.107323f, 1.176235f, 0.000008f, 0.594222f, 0.052174f, 0.518606f, 0.000050f, 0.511726f, 0.009488f, 0.141301f, 0.000001f, 0.058947f, 0.052983f, 0.404400f, 0.000011f, 0.321199f, +0.106550f, 0.833842f, 0.000046f, 0.607833f, 0.062942f, 0.446732f, 0.000332f, 0.636052f, 0.082686f, 0.879330f, 0.000032f, 0.529312f, 0.078969f, 0.430387f, 0.000090f, 0.493250f, +0.002959f, 0.034187f, 0.000007f, 0.021536f, 0.001701f, 0.017825f, 0.000050f, 0.021932f, 0.001500f, 0.023548f, 0.000003f, 0.012249f, 0.001428f, 0.011486f, 0.000009f, 0.011376f, +0.121508f, 0.956742f, 0.000095f, 0.654139f, 0.106694f, 0.761915f, 0.001026f, 1.017481f, 0.101450f, 1.085497f, 0.000071f, 0.612862f, 0.176228f, 0.966349f, 0.000368f, 1.038763f, +0.170030f, 2.350538f, 0.000090f, 1.040132f, 0.087115f, 1.092221f, 0.000565f, 0.944010f, 0.016617f, 0.312163f, 0.000008f, 0.114068f, 0.060464f, 0.582110f, 0.000085f, 0.404980f, +0.591274f, 5.836594f, 0.001733f, 3.726717f, 0.368107f, 3.295506f, 0.013211f, 4.109924f, 0.507262f, 6.804407f, 0.001329f, 3.587701f, 0.315658f, 2.169978f, 0.002461f, 2.178360f, +0.141887f, 2.067393f, 0.002326f, 1.140748f, 0.085969f, 1.136052f, 0.017263f, 1.224363f, 0.079508f, 1.574263f, 0.001166f, 0.717304f, 0.049308f, 0.500337f, 0.002151f, 0.434047f, +0.347305f, 3.449371f, 0.001858f, 2.065767f, 0.321399f, 2.895015f, 0.021062f, 3.386390f, 0.320568f, 4.326497f, 0.001534f, 2.139620f, 0.362829f, 2.509569f, 0.005164f, 2.362914f, +0.069202f, 0.815567f, 0.000081f, 0.407032f, 0.031882f, 0.340775f, 0.000455f, 0.332186f, 0.009232f, 0.147847f, 0.000010f, 0.060931f, 0.035898f, 0.294635f, 0.000111f, 0.231186f, +0.084915f, 0.714587f, 0.000548f, 0.514600f, 0.047537f, 0.362813f, 0.003756f, 0.510319f, 0.099440f, 1.137163f, 0.000574f, 0.676234f, 0.066130f, 0.387559f, 0.001135f, 0.438794f, +0.059478f, 0.738826f, 0.002147f, 0.459787f, 0.032406f, 0.365075f, 0.014325f, 0.443754f, 0.045495f, 0.767950f, 0.001469f, 0.394645f, 0.030152f, 0.260837f, 0.002895f, 0.255206f, +0.094393f, 0.799229f, 0.001112f, 0.539834f, 0.078548f, 0.603180f, 0.011332f, 0.795758f, 0.118928f, 1.368374f, 0.001253f, 0.763226f, 0.143853f, 0.848238f, 0.004508f, 0.900771f, +0.330370f, 2.897166f, 0.000252f, 1.068928f, 0.169736f, 1.349973f, 0.001586f, 0.972849f, 0.030230f, 0.360249f, 0.000021f, 0.109758f, 0.184991f, 1.129771f, 0.000375f, 0.655350f, +0.590108f, 3.695164f, 0.002489f, 1.967228f, 0.368405f, 2.092208f, 0.019034f, 2.175558f, 0.474013f, 4.033479f, 0.001788f, 1.773208f, 0.496065f, 2.163262f, 0.005567f, 1.810662f, +0.289172f, 2.672811f, 0.006825f, 1.229671f, 0.175696f, 1.472828f, 0.050790f, 1.323482f, 0.151718f, 1.905626f, 0.003203f, 0.723965f, 0.158236f, 1.018561f, 0.009936f, 0.736742f, +0.906473f, 5.711041f, 0.006982f, 2.851744f, 0.841195f, 4.806564f, 0.079358f, 4.687863f, 0.783391f, 6.706979f, 0.005397f, 2.765548f, 1.491163f, 6.542654f, 0.030555f, 5.136375f, +0.003088f, 0.043274f, 0.000001f, 0.031889f, 0.002739f, 0.034821f, 0.000015f, 0.050117f, 0.000475f, 0.009039f, 0.000000f, 0.005501f, 0.003488f, 0.034041f, 0.000004f, 0.039438f, +0.005786f, 0.057907f, 0.000015f, 0.061571f, 0.006238f, 0.056618f, 0.000194f, 0.117583f, 0.007808f, 0.106182f, 0.000018f, 0.093231f, 0.009812f, 0.068385f, 0.000066f, 0.114319f, +0.000113f, 0.001671f, 0.000002f, 0.001536f, 0.000119f, 0.001590f, 0.000021f, 0.002854f, 0.000100f, 0.002002f, 0.000001f, 0.001519f, 0.000125f, 0.001285f, 0.000005f, 0.001856f, +0.005597f, 0.056360f, 0.000026f, 0.056208f, 0.008970f, 0.081912f, 0.000511f, 0.159556f, 0.008126f, 0.111190f, 0.000034f, 0.091568f, 0.018574f, 0.130248f, 0.000230f, 0.204221f, +0.003768f, 0.066610f, 0.000012f, 0.042994f, 0.003523f, 0.056486f, 0.000135f, 0.071212f, 0.000640f, 0.015382f, 0.000002f, 0.008199f, 0.003066f, 0.037743f, 0.000026f, 0.038301f, +0.024732f, 0.312202f, 0.000429f, 0.290770f, 0.028100f, 0.321705f, 0.005968f, 0.585217f, 0.036895f, 0.632882f, 0.000572f, 0.486739f, 0.030210f, 0.265576f, 0.001394f, 0.388876f, +0.004178f, 0.077851f, 0.000405f, 0.062658f, 0.004620f, 0.078073f, 0.005490f, 0.122732f, 0.004071f, 0.103080f, 0.000353f, 0.068509f, 0.003322f, 0.043108f, 0.000858f, 0.054549f, +0.012323f, 0.156513f, 0.000390f, 0.136722f, 0.020812f, 0.239729f, 0.008071f, 0.409029f, 0.019778f, 0.341353f, 0.000560f, 0.246236f, 0.029456f, 0.260536f, 0.002481f, 0.357820f, +0.000179f, 0.002693f, 0.000001f, 0.001960f, 0.000150f, 0.002053f, 0.000013f, 0.002920f, 0.000041f, 0.000849f, 0.000000f, 0.000510f, 0.000212f, 0.002226f, 0.000004f, 0.002547f, +0.000414f, 0.004453f, 0.000016f, 0.004678f, 0.000423f, 0.004126f, 0.000198f, 0.008466f, 0.000843f, 0.012323f, 0.000029f, 0.010689f, 0.000737f, 0.005526f, 0.000075f, 0.009126f, +0.000204f, 0.003241f, 0.000044f, 0.002942f, 0.000203f, 0.002923f, 0.000531f, 0.005183f, 0.000271f, 0.005858f, 0.000052f, 0.004391f, 0.000237f, 0.002618f, 0.000134f, 0.003737f, +0.000390f, 0.004225f, 0.000027f, 0.004163f, 0.000593f, 0.005819f, 0.000506f, 0.011198f, 0.000855f, 0.012578f, 0.000053f, 0.010233f, 0.001361f, 0.010260f, 0.000252f, 0.015892f, +0.009932f, 0.111380f, 0.000045f, 0.059942f, 0.009313f, 0.094715f, 0.000515f, 0.099560f, 0.001580f, 0.024082f, 0.000006f, 0.010702f, 0.012725f, 0.099376f, 0.000153f, 0.084084f, +0.033487f, 0.268148f, 0.000836f, 0.208230f, 0.038153f, 0.277080f, 0.011666f, 0.420261f, 0.046772f, 0.508952f, 0.001044f, 0.326366f, 0.064408f, 0.359177f, 0.004277f, 0.438515f, +0.011552f, 0.136544f, 0.001614f, 0.091631f, 0.012809f, 0.137315f, 0.021914f, 0.179983f, 0.010539f, 0.169278f, 0.001317f, 0.093805f, 0.014463f, 0.119056f, 0.005374f, 0.125611f, +0.043634f, 0.351553f, 0.001989f, 0.256055f, 0.073898f, 0.539970f, 0.041257f, 0.768170f, 0.065571f, 0.717891f, 0.002673f, 0.431778f, 0.164232f, 0.921484f, 0.019916f, 1.055208f, +0.012272f, 0.222855f, 0.000001f, 0.100687f, 0.008016f, 0.132015f, 0.000008f, 0.116498f, 0.001401f, 0.034574f, 0.000000f, 0.012899f, 0.009135f, 0.115532f, 0.000002f, 0.082066f, +0.024216f, 0.314014f, 0.000011f, 0.204713f, 0.019220f, 0.226032f, 0.000106f, 0.287813f, 0.024270f, 0.427659f, 0.000010f, 0.230225f, 0.027063f, 0.244393f, 0.000032f, 0.250491f, +0.000407f, 0.007790f, 0.000001f, 0.004389f, 0.000314f, 0.005457f, 0.000010f, 0.006005f, 0.000266f, 0.006930f, 0.000001f, 0.003224f, 0.000296f, 0.003947f, 0.000002f, 0.003496f, +0.019444f, 0.253673f, 0.000016f, 0.155112f, 0.022939f, 0.271421f, 0.000231f, 0.324159f, 0.020965f, 0.371696f, 0.000015f, 0.187680f, 0.042522f, 0.386346f, 0.000093f, 0.371411f, +0.026391f, 0.604521f, 0.000015f, 0.239237f, 0.018167f, 0.377409f, 0.000123f, 0.291725f, 0.003331f, 0.103683f, 0.000002f, 0.033883f, 0.014151f, 0.225742f, 0.000021f, 0.140455f, +0.182416f, 2.983603f, 0.000559f, 1.703741f, 0.152583f, 2.263397f, 0.005727f, 2.524458f, 0.202109f, 4.492122f, 0.000554f, 2.118229f, 0.146843f, 1.672630f, 0.001197f, 1.501654f, +0.026487f, 0.639461f, 0.000454f, 0.315556f, 0.021562f, 0.472114f, 0.004528f, 0.455045f, 0.019168f, 0.628852f, 0.000294f, 0.256253f, 0.013879f, 0.233355f, 0.000633f, 0.181045f, +0.075440f, 1.241468f, 0.000422f, 0.664924f, 0.093797f, 1.399921f, 0.006428f, 1.464486f, 0.089927f, 2.010998f, 0.000450f, 0.889421f, 0.118837f, 1.361939f, 0.001769f, 1.146838f, +0.008868f, 0.173163f, 0.000011f, 0.077290f, 0.005489f, 0.097212f, 0.000082f, 0.084748f, 0.001528f, 0.040540f, 0.000002f, 0.014942f, 0.006936f, 0.094329f, 0.000023f, 0.066194f, +0.021628f, 0.301570f, 0.000146f, 0.194222f, 0.016267f, 0.205718f, 0.001344f, 0.258779f, 0.032709f, 0.619778f, 0.000197f, 0.329614f, 0.025397f, 0.246624f, 0.000456f, 0.249720f, +0.009166f, 0.188663f, 0.000346f, 0.105002f, 0.006710f, 0.125251f, 0.003102f, 0.136156f, 0.009055f, 0.253254f, 0.000306f, 0.116393f, 0.007007f, 0.100433f, 0.000704f, 0.087881f, +0.016927f, 0.237476f, 0.000209f, 0.143451f, 0.018925f, 0.240797f, 0.002855f, 0.284106f, 0.027543f, 0.525088f, 0.000303f, 0.261924f, 0.038897f, 0.380039f, 0.001275f, 0.360928f, +0.042186f, 0.612982f, 0.000034f, 0.202264f, 0.029121f, 0.383757f, 0.000284f, 0.247327f, 0.004985f, 0.098437f, 0.000004f, 0.026822f, 0.035619f, 0.360436f, 0.000076f, 0.186985f, +0.149774f, 1.553980f, 0.000661f, 0.739881f, 0.125628f, 1.182153f, 0.006788f, 1.099348f, 0.155373f, 2.190640f, 0.000613f, 0.861284f, 0.189848f, 1.371778f, 0.002228f, 1.026851f, +0.044409f, 0.680126f, 0.001096f, 0.279838f, 0.036252f, 0.503536f, 0.010959f, 0.404662f, 0.030091f, 0.626237f, 0.000664f, 0.212772f, 0.036642f, 0.390815f, 0.002406f, 0.252811f, +0.161984f, 1.690989f, 0.001305f, 0.755147f, 0.201963f, 1.912130f, 0.019924f, 1.667835f, 0.180791f, 2.564674f, 0.001302f, 0.945762f, 0.401797f, 2.921074f, 0.008610f, 2.050883f, +0.003212f, 0.061478f, 0.000000f, 0.042288f, 0.002766f, 0.048003f, 0.000005f, 0.064493f, 0.000375f, 0.009753f, 0.000000f, 0.005540f, 0.003370f, 0.044917f, 0.000001f, 0.048575f, +0.006867f, 0.093833f, 0.000005f, 0.093131f, 0.007184f, 0.089028f, 0.000068f, 0.172589f, 0.007037f, 0.130673f, 0.000005f, 0.107099f, 0.010815f, 0.102921f, 0.000022f, 0.160603f, +0.000135f, 0.002727f, 0.000001f, 0.002339f, 0.000138f, 0.002518f, 0.000007f, 0.004219f, 0.000091f, 0.002481f, 0.000000f, 0.001757f, 0.000139f, 0.001947f, 0.000002f, 0.002626f, +0.006732f, 0.092564f, 0.000009f, 0.086170f, 0.010469f, 0.130546f, 0.000180f, 0.237368f, 0.007423f, 0.138688f, 0.000009f, 0.106613f, 0.020750f, 0.198681f, 0.000078f, 0.290789f, +0.003431f, 0.082831f, 0.000003f, 0.049906f, 0.003114f, 0.068162f, 0.000036f, 0.080214f, 0.000443f, 0.014527f, 0.000000f, 0.007228f, 0.002593f, 0.043592f, 0.000007f, 0.041293f, +0.025691f, 0.442826f, 0.000135f, 0.384981f, 0.028326f, 0.442796f, 0.001819f, 0.751890f, 0.029106f, 0.681750f, 0.000137f, 0.489430f, 0.029147f, 0.349867f, 0.000407f, 0.478208f, +0.004370f, 0.111194f, 0.000128f, 0.083539f, 0.004690f, 0.108209f, 0.001685f, 0.158787f, 0.003234f, 0.111814f, 0.000085f, 0.069368f, 0.003228f, 0.057187f, 0.000252f, 0.067547f, +0.012974f, 0.225004f, 0.000124f, 0.183472f, 0.021263f, 0.334432f, 0.002494f, 0.532640f, 0.015814f, 0.372690f, 0.000135f, 0.250950f, 0.028804f, 0.347875f, 0.000734f, 0.445976f, +0.002764f, 0.056877f, 0.000006f, 0.038649f, 0.002255f, 0.042087f, 0.000058f, 0.055860f, 0.000487f, 0.013616f, 0.000001f, 0.007640f, 0.003047f, 0.043665f, 0.000017f, 0.046650f, +0.007302f, 0.107294f, 0.000084f, 0.105204f, 0.007239f, 0.096474f, 0.001024f, 0.184761f, 0.011292f, 0.225479f, 0.000117f, 0.182566f, 0.012084f, 0.123661f, 0.000371f, 0.190632f, +0.003626f, 0.078641f, 0.000234f, 0.066635f, 0.003498f, 0.068817f, 0.002768f, 0.113893f, 0.003662f, 0.107945f, 0.000212f, 0.075529f, 0.003906f, 0.059000f, 0.000671f, 0.078598f, +0.006978f, 0.103174f, 0.000147f, 0.094885f, 0.010284f, 0.137896f, 0.002655f, 0.247700f, 0.011611f, 0.233273f, 0.000219f, 0.177154f, 0.022600f, 0.232697f, 0.001267f, 0.336454f, +0.010567f, 0.161813f, 0.000014f, 0.081288f, 0.009615f, 0.133528f, 0.000161f, 0.131018f, 0.001277f, 0.026571f, 0.000002f, 0.011022f, 0.012574f, 0.134092f, 0.000046f, 0.105907f, +0.040639f, 0.444344f, 0.000307f, 0.322092f, 0.044931f, 0.445552f, 0.004155f, 0.630818f, 0.043108f, 0.640512f, 0.000291f, 0.383395f, 0.072597f, 0.552802f, 0.001458f, 0.629995f, +0.014117f, 0.227845f, 0.000596f, 0.142725f, 0.015190f, 0.222347f, 0.007859f, 0.272042f, 0.009781f, 0.214521f, 0.000370f, 0.110966f, 0.016416f, 0.184515f, 0.001845f, 0.181719f, +0.053671f, 0.590442f, 0.000740f, 0.401432f, 0.088204f, 0.880044f, 0.014892f, 1.168648f, 0.061253f, 0.915694f, 0.000755f, 0.514096f, 0.187622f, 1.437442f, 0.006881f, 1.536499f, +0.028008f, 0.306961f, 0.000002f, 0.155074f, 0.018550f, 0.184384f, 0.000018f, 0.181938f, 0.002827f, 0.042099f, 0.000000f, 0.017562f, 0.020714f, 0.158099f, 0.000004f, 0.125572f, +0.054987f, 0.430323f, 0.000024f, 0.313686f, 0.044253f, 0.314090f, 0.000233f, 0.447198f, 0.048717f, 0.518081f, 0.000019f, 0.311858f, 0.061052f, 0.332736f, 0.000070f, 0.381336f, +0.000679f, 0.007839f, 0.000002f, 0.004938f, 0.000532f, 0.005569f, 0.000016f, 0.006852f, 0.000393f, 0.006165f, 0.000001f, 0.003207f, 0.000490f, 0.003946f, 0.000003f, 0.003908f, +0.039193f, 0.308603f, 0.000031f, 0.210997f, 0.046886f, 0.334818f, 0.000451f, 0.447125f, 0.037359f, 0.399732f, 0.000026f, 0.225686f, 0.085155f, 0.466949f, 0.000178f, 0.501940f, +0.052312f, 0.723172f, 0.000028f, 0.320010f, 0.036514f, 0.457806f, 0.000237f, 0.395683f, 0.005837f, 0.109646f, 0.000003f, 0.040066f, 0.027868f, 0.268293f, 0.000039f, 0.186654f, +0.359737f, 3.551042f, 0.001054f, 2.267371f, 0.305117f, 2.731586f, 0.010951f, 3.406643f, 0.352341f, 4.726301f, 0.000923f, 2.491996f, 0.287702f, 1.977796f, 0.002243f, 1.985435f, +0.038358f, 0.558898f, 0.000629f, 0.308390f, 0.031663f, 0.418413f, 0.006358f, 0.450937f, 0.024539f, 0.485872f, 0.000360f, 0.221385f, 0.019969f, 0.202629f, 0.000871f, 0.175783f, +0.132070f, 1.311691f, 0.000707f, 0.785548f, 0.166507f, 1.499819f, 0.010912f, 1.754386f, 0.139170f, 1.878291f, 0.000666f, 0.928888f, 0.206692f, 1.429620f, 0.002942f, 1.346075f, +0.027155f, 0.320029f, 0.000032f, 0.159720f, 0.017044f, 0.182177f, 0.000243f, 0.177586f, 0.004136f, 0.066233f, 0.000004f, 0.027296f, 0.021102f, 0.173199f, 0.000065f, 0.135900f, +0.065892f, 0.554507f, 0.000425f, 0.399321f, 0.050255f, 0.383558f, 0.003971f, 0.539499f, 0.088094f, 1.007417f, 0.000508f, 0.599078f, 0.076874f, 0.450526f, 0.001319f, 0.510084f, +0.020508f, 0.254746f, 0.000740f, 0.158534f, 0.015222f, 0.171492f, 0.006729f, 0.208451f, 0.017909f, 0.302297f, 0.000578f, 0.155349f, 0.015574f, 0.134730f, 0.001495f, 0.131822f, +0.045781f, 0.387631f, 0.000539f, 0.261823f, 0.051902f, 0.398557f, 0.007488f, 0.525805f, 0.065852f, 0.757682f, 0.000694f, 0.422605f, 0.104519f, 0.616304f, 0.003275f, 0.654472f, +0.092891f, 0.814601f, 0.000071f, 0.300552f, 0.065019f, 0.517123f, 0.000607f, 0.372661f, 0.009704f, 0.115640f, 0.000007f, 0.035233f, 0.077920f, 0.475874f, 0.000158f, 0.276041f, +0.328114f, 2.054600f, 0.001384f, 1.093826f, 0.279071f, 1.584875f, 0.014419f, 1.648013f, 0.300897f, 2.560401f, 0.001135f, 1.125610f, 0.413202f, 1.801907f, 0.004637f, 1.508206f, +0.071443f, 0.660351f, 0.001686f, 0.303806f, 0.059138f, 0.495742f, 0.017096f, 0.445473f, 0.042794f, 0.537501f, 0.000903f, 0.204202f, 0.058566f, 0.376985f, 0.003677f, 0.272679f, +0.315024f, 1.984742f, 0.002427f, 0.991059f, 0.398274f, 2.275726f, 0.037573f, 2.219526f, 0.310815f, 2.661034f, 0.002141f, 1.097248f, 0.776325f, 3.406219f, 0.015908f, 2.674086f, +0.028455f, 0.398803f, 0.000013f, 0.293874f, 0.019223f, 0.244343f, 0.000108f, 0.351680f, 0.003615f, 0.068847f, 0.000001f, 0.041894f, 0.023105f, 0.225512f, 0.000028f, 0.261263f, +0.027399f, 0.274197f, 0.000070f, 0.291549f, 0.022491f, 0.204138f, 0.000701f, 0.423953f, 0.030555f, 0.415535f, 0.000070f, 0.364851f, 0.033399f, 0.232772f, 0.000226f, 0.389123f, +0.000858f, 0.012681f, 0.000012f, 0.011652f, 0.000686f, 0.009188f, 0.000120f, 0.016490f, 0.000625f, 0.012552f, 0.000008f, 0.009524f, 0.000681f, 0.007007f, 0.000026f, 0.010123f, +0.041739f, 0.420277f, 0.000194f, 0.419139f, 0.050930f, 0.465098f, 0.002899f, 0.905967f, 0.050080f, 0.685245f, 0.000208f, 0.564323f, 0.099565f, 0.698180f, 0.001231f, 1.094706f, +0.034685f, 0.613180f, 0.000109f, 0.395783f, 0.024695f, 0.395940f, 0.000948f, 0.499163f, 0.004871f, 0.117025f, 0.000014f, 0.062374f, 0.020287f, 0.249757f, 0.000169f, 0.253451f, +0.116984f, 1.476710f, 0.002029f, 1.375339f, 0.101206f, 1.158659f, 0.021496f, 2.107729f, 0.144226f, 2.474015f, 0.002237f, 1.902724f, 0.102718f, 0.902992f, 0.004739f, 1.322226f, +0.031665f, 0.590014f, 0.003072f, 0.474872f, 0.026661f, 0.450542f, 0.031683f, 0.708263f, 0.025499f, 0.645643f, 0.002213f, 0.429107f, 0.018099f, 0.234852f, 0.004672f, 0.297178f, +0.091793f, 1.165835f, 0.002907f, 1.018417f, 0.118043f, 1.359708f, 0.045779f, 2.319953f, 0.121757f, 2.101405f, 0.003448f, 1.515854f, 0.157722f, 1.395047f, 0.013286f, 1.915954f, +0.001142f, 0.017213f, 0.000008f, 0.012531f, 0.000731f, 0.009995f, 0.000062f, 0.014211f, 0.000219f, 0.004484f, 0.000001f, 0.002696f, 0.000974f, 0.010228f, 0.000018f, 0.011706f, +0.001359f, 0.014628f, 0.000052f, 0.015365f, 0.001057f, 0.010321f, 0.000494f, 0.021174f, 0.002287f, 0.033452f, 0.000078f, 0.029016f, 0.001741f, 0.013048f, 0.000177f, 0.021549f, +0.001074f, 0.017060f, 0.000229f, 0.015486f, 0.000813f, 0.011714f, 0.002127f, 0.020769f, 0.001180f, 0.025482f, 0.000226f, 0.019101f, 0.000895f, 0.009906f, 0.000509f, 0.014137f, +0.002018f, 0.021855f, 0.000141f, 0.021532f, 0.002334f, 0.022921f, 0.001993f, 0.044107f, 0.003655f, 0.053773f, 0.000228f, 0.043748f, 0.005059f, 0.038150f, 0.000938f, 0.059093f, +0.107576f, 1.206389f, 0.000486f, 0.649247f, 0.076804f, 0.781154f, 0.004246f, 0.821115f, 0.014146f, 0.215573f, 0.000057f, 0.095802f, 0.099073f, 0.773743f, 0.001190f, 0.654677f, +0.186363f, 1.492322f, 0.004653f, 1.158860f, 0.161678f, 1.174171f, 0.049435f, 1.780921f, 0.215127f, 2.340911f, 0.004803f, 1.501109f, 0.257668f, 1.436913f, 0.017112f, 1.754310f, +0.103012f, 1.217588f, 0.014389f, 0.817088f, 0.086974f, 0.932356f, 0.148793f, 1.222067f, 0.077669f, 1.247516f, 0.009703f, 0.691311f, 0.092711f, 0.763154f, 0.034450f, 0.805170f, +0.382423f, 3.081097f, 0.017433f, 2.244130f, 0.493156f, 3.603483f, 0.275329f, 5.126371f, 0.474946f, 5.199878f, 0.019364f, 3.127483f, 1.034683f, 5.805460f, 0.125471f, 6.647936f, +0.091809f, 1.667222f, 0.000007f, 0.753258f, 0.045661f, 0.752025f, 0.000045f, 0.663633f, 0.008663f, 0.213769f, 0.000001f, 0.079754f, 0.049128f, 0.621309f, 0.000011f, 0.441333f, +0.093087f, 1.207059f, 0.000042f, 0.786910f, 0.056256f, 0.661587f, 0.000310f, 0.842419f, 0.077103f, 1.358615f, 0.000031f, 0.731395f, 0.074782f, 0.675307f, 0.000089f, 0.692158f, +0.002507f, 0.047979f, 0.000006f, 0.027030f, 0.001474f, 0.025593f, 0.000045f, 0.028162f, 0.001356f, 0.035273f, 0.000003f, 0.016410f, 0.001311f, 0.017473f, 0.000009f, 0.015476f, +0.117702f, 1.535608f, 0.000097f, 0.938968f, 0.105733f, 1.251085f, 0.001064f, 1.494178f, 0.104889f, 1.859575f, 0.000077f, 0.938952f, 0.185034f, 1.681189f, 0.000404f, 1.616199f, +0.197224f, 4.517593f, 0.000109f, 1.787820f, 0.103376f, 2.147561f, 0.000702f, 1.659996f, 0.020572f, 0.640356f, 0.000010f, 0.209265f, 0.076020f, 1.212668f, 0.000112f, 0.754512f, +0.700434f, 11.456320f, 0.002146f, 6.541953f, 0.446116f, 6.617633f, 0.016743f, 7.380911f, 0.641372f, 14.255270f, 0.001758f, 6.721974f, 0.405315f, 4.616768f, 0.003304f, 4.144842f, +0.162955f, 3.934202f, 0.002794f, 1.941418f, 0.101009f, 2.211705f, 0.021211f, 2.131741f, 0.097462f, 3.197499f, 0.001495f, 1.302962f, 0.061381f, 1.032033f, 0.002800f, 0.800688f, +0.456174f, 7.506995f, 0.002552f, 4.020712f, 0.431876f, 6.445729f, 0.029596f, 6.743009f, 0.449406f, 10.049910f, 0.002249f, 4.444858f, 0.516557f, 5.920012f, 0.007689f, 4.985019f, +0.046019f, 0.898640f, 0.000056f, 0.401098f, 0.021690f, 0.384140f, 0.000324f, 0.334887f, 0.006552f, 0.173875f, 0.000007f, 0.064086f, 0.025875f, 0.351891f, 0.000084f, 0.246933f, +0.057670f, 0.804131f, 0.000389f, 0.517890f, 0.033029f, 0.417685f, 0.002729f, 0.525417f, 0.072082f, 1.365822f, 0.000435f, 0.726379f, 0.048681f, 0.472723f, 0.000874f, 0.478657f, +0.039163f, 0.806050f, 0.001478f, 0.448613f, 0.021829f, 0.407471f, 0.010091f, 0.442948f, 0.031972f, 0.894237f, 0.001079f, 0.410981f, 0.021519f, 0.308451f, 0.002161f, 0.269900f, +0.071080f, 0.997203f, 0.000876f, 0.602377f, 0.060512f, 0.769935f, 0.009129f, 0.908414f, 0.095585f, 1.822286f, 0.001053f, 0.908992f, 0.117414f, 1.147168f, 0.003847f, 1.089480f, +0.370930f, 5.389782f, 0.000296f, 1.778451f, 0.194967f, 2.569320f, 0.001905f, 1.655899f, 0.036227f, 0.715321f, 0.000026f, 0.194908f, 0.225132f, 2.278166f, 0.000478f, 1.181853f, +0.676656f, 7.020649f, 0.002985f, 3.342672f, 0.432172f, 4.066713f, 0.023350f, 3.781855f, 0.580131f, 8.179425f, 0.002289f, 3.215868f, 0.616556f, 4.455022f, 0.007235f, 3.334830f, +0.321470f, 4.923340f, 0.007935f, 2.025705f, 0.199821f, 2.775485f, 0.060406f, 2.230490f, 0.180020f, 3.746526f, 0.003974f, 1.272929f, 0.190672f, 2.033650f, 0.012520f, 1.315528f, +1.152476f, 12.030940f, 0.009283f, 5.372671f, 1.094130f, 10.358910f, 0.107939f, 9.035447f, 1.063054f, 15.080320f, 0.007658f, 5.561096f, 2.054942f, 14.939470f, 0.044033f, 10.488990f, +0.030159f, 0.577153f, 0.000004f, 0.396995f, 0.019771f, 0.343147f, 0.000034f, 0.461020f, 0.002910f, 0.075670f, 0.000000f, 0.042981f, 0.022744f, 0.303121f, 0.000008f, 0.327807f, +0.033123f, 0.452622f, 0.000025f, 0.449237f, 0.026385f, 0.326998f, 0.000249f, 0.633913f, 0.028054f, 0.520937f, 0.000019f, 0.426957f, 0.037501f, 0.356878f, 0.000077f, 0.556887f, +0.001045f, 0.021078f, 0.000005f, 0.018079f, 0.000810f, 0.014820f, 0.000043f, 0.024828f, 0.000578f, 0.015846f, 0.000002f, 0.011223f, 0.000770f, 0.010818f, 0.000009f, 0.014588f, +0.051142f, 0.703153f, 0.000072f, 0.654582f, 0.060557f, 0.755104f, 0.001043f, 1.372986f, 0.046602f, 0.870692f, 0.000059f, 0.669327f, 0.113308f, 1.084918f, 0.000424f, 1.587884f, +0.032179f, 0.776766f, 0.000031f, 0.468005f, 0.022232f, 0.486720f, 0.000258f, 0.572775f, 0.003432f, 0.112586f, 0.000003f, 0.056015f, 0.017480f, 0.293857f, 0.000044f, 0.278358f, +0.123791f, 2.133721f, 0.000649f, 1.854999f, 0.103925f, 1.624598f, 0.006675f, 2.758651f, 0.115908f, 2.714873f, 0.000544f, 1.949013f, 0.100955f, 1.211830f, 0.001408f, 1.656362f, +0.033742f, 0.858468f, 0.000990f, 0.644956f, 0.027568f, 0.636129f, 0.009908f, 0.933461f, 0.020635f, 0.713443f, 0.000542f, 0.442613f, 0.017912f, 0.317375f, 0.001398f, 0.374874f, +0.098450f, 1.707343f, 0.000943f, 1.392199f, 0.122855f, 1.932312f, 0.014409f, 3.077532f, 0.099175f, 2.337213f, 0.000849f, 1.573757f, 0.157113f, 1.897529f, 0.004002f, 2.432631f, +0.017999f, 0.370395f, 0.000038f, 0.251694f, 0.011182f, 0.208698f, 0.000286f, 0.276995f, 0.002621f, 0.073282f, 0.000005f, 0.041121f, 0.014263f, 0.204408f, 0.000079f, 0.218380f, +0.024432f, 0.359017f, 0.000282f, 0.352022f, 0.018444f, 0.245804f, 0.002608f, 0.470747f, 0.031227f, 0.623540f, 0.000322f, 0.504868f, 0.029066f, 0.297445f, 0.000893f, 0.458531f, +0.019439f, 0.421624f, 0.001256f, 0.357256f, 0.014281f, 0.280938f, 0.011299f, 0.464954f, 0.016227f, 0.478297f, 0.000938f, 0.334665f, 0.015053f, 0.227384f, 0.002587f, 0.302916f, +0.036773f, 0.543669f, 0.000775f, 0.499992f, 0.041264f, 0.553294f, 0.010654f, 0.993869f, 0.050565f, 1.015895f, 0.000953f, 0.771501f, 0.085608f, 0.881433f, 0.004801f, 1.274457f, +0.116597f, 1.785404f, 0.000159f, 0.896914f, 0.080780f, 1.121848f, 0.001351f, 1.100761f, 0.011644f, 0.242297f, 0.000014f, 0.100513f, 0.099734f, 1.063560f, 0.000362f, 0.840009f, +0.230395f, 2.519139f, 0.001739f, 1.826048f, 0.193960f, 1.923397f, 0.017935f, 2.723165f, 0.201982f, 3.001092f, 0.001364f, 1.796381f, 0.295861f, 2.252869f, 0.005942f, 2.567459f, +0.128239f, 2.069710f, 0.005417f, 1.296491f, 0.105068f, 1.537939f, 0.054358f, 1.881673f, 0.073431f, 1.610497f, 0.002774f, 0.833066f, 0.107196f, 1.204861f, 0.012046f, 1.186600f, +0.479179f, 5.271525f, 0.006606f, 3.584023f, 0.599634f, 5.982758f, 0.101241f, 7.944760f, 0.451964f, 6.756613f, 0.005572f, 3.793347f, 1.204141f, 9.225362f, 0.044159f, 9.861100f, +0.133964f, 1.468220f, 0.000010f, 0.741731f, 0.067560f, 0.671536f, 0.000064f, 0.662628f, 0.011174f, 0.166417f, 0.000001f, 0.069424f, 0.071220f, 0.543591f, 0.000015f, 0.431753f, +0.135138f, 1.057575f, 0.000058f, 0.770924f, 0.082813f, 0.587772f, 0.000436f, 0.836863f, 0.098950f, 1.052286f, 0.000038f, 0.633423f, 0.107857f, 0.587828f, 0.000123f, 0.673688f, +0.002672f, 0.030870f, 0.000006f, 0.019446f, 0.001594f, 0.016697f, 0.000047f, 0.020544f, 0.001278f, 0.020062f, 0.000003f, 0.010436f, 0.001388f, 0.011169f, 0.000009f, 0.011062f, +0.151689f, 1.194385f, 0.000119f, 0.816619f, 0.138173f, 0.986711f, 0.001329f, 1.317680f, 0.119496f, 1.278594f, 0.000084f, 0.721883f, 0.236913f, 1.299114f, 0.000495f, 1.396464f, +0.249939f, 3.455213f, 0.000132f, 1.528959f, 0.132842f, 1.665530f, 0.000862f, 1.439523f, 0.023047f, 0.432956f, 0.000011f, 0.158206f, 0.095712f, 0.921460f, 0.000135f, 0.641069f, +0.883134f, 8.717606f, 0.002588f, 5.566268f, 0.570357f, 5.106158f, 0.020470f, 6.368044f, 0.714866f, 9.589207f, 0.001874f, 5.056018f, 0.507713f, 3.490254f, 0.003958f, 3.503736f, +0.150880f, 2.198430f, 0.002474f, 1.213052f, 0.094834f, 1.253206f, 0.019043f, 1.350623f, 0.079772f, 1.579507f, 0.001170f, 0.719693f, 0.056463f, 0.572949f, 0.002463f, 0.497039f, +0.510588f, 5.071072f, 0.002732f, 3.036974f, 0.490161f, 4.415146f, 0.032121f, 5.164535f, 0.444666f, 6.001376f, 0.002128f, 2.967913f, 0.574415f, 3.973041f, 0.008176f, 3.740862f, +0.090098f, 1.061835f, 0.000105f, 0.529939f, 0.043060f, 0.460256f, 0.000615f, 0.448656f, 0.011340f, 0.181620f, 0.000012f, 0.074850f, 0.050331f, 0.413091f, 0.000156f, 0.324132f, +0.112334f, 0.945328f, 0.000725f, 0.680765f, 0.065237f, 0.497902f, 0.005154f, 0.700332f, 0.124121f, 1.419400f, 0.000716f, 0.844070f, 0.094208f, 0.552115f, 0.001617f, 0.625103f, +0.056019f, 0.695859f, 0.002022f, 0.433048f, 0.031662f, 0.356694f, 0.013996f, 0.433566f, 0.040429f, 0.682444f, 0.001305f, 0.350704f, 0.030582f, 0.264553f, 0.002936f, 0.258842f, +0.122911f, 1.040689f, 0.001448f, 0.702927f, 0.106102f, 0.814762f, 0.015307f, 1.074892f, 0.146113f, 1.681159f, 0.001539f, 0.937685f, 0.201712f, 1.189409f, 0.006321f, 1.263071f, +0.522196f, 4.579373f, 0.000398f, 1.689589f, 0.278319f, 2.213565f, 0.002600f, 1.595190f, 0.045085f, 0.537268f, 0.000031f, 0.163691f, 0.314881f, 1.923034f, 0.000639f, 1.115500f, +0.947752f, 5.934670f, 0.003998f, 3.159494f, 0.613794f, 3.485798f, 0.031712f, 3.624666f, 0.718303f, 6.112195f, 0.002710f, 2.687059f, 0.857957f, 3.741415f, 0.009628f, 3.131585f, +0.330651f, 3.056208f, 0.007804f, 1.406060f, 0.208406f, 1.747034f, 0.060246f, 1.569882f, 0.163684f, 2.055923f, 0.003455f, 0.781064f, 0.194843f, 1.254197f, 0.012234f, 0.907182f, +1.432978f, 9.028179f, 0.011038f, 4.508120f, 1.379482f, 7.882320f, 0.130140f, 7.687661f, 1.168472f, 10.003830f, 0.008050f, 4.124970f, 2.538482f, 11.137890f, 0.052016f, 8.743909f, +0.039881f, 0.558942f, 0.000018f, 0.411878f, 0.030028f, 0.381682f, 0.000169f, 0.549350f, 0.006244f, 0.118916f, 0.000003f, 0.072361f, 0.037192f, 0.363015f, 0.000046f, 0.420566f, +0.030630f, 0.306531f, 0.000078f, 0.325929f, 0.028023f, 0.254349f, 0.000874f, 0.528229f, 0.042097f, 0.572488f, 0.000096f, 0.502659f, 0.042883f, 0.298876f, 0.000290f, 0.499628f, +0.000921f, 0.013603f, 0.000013f, 0.012499f, 0.000820f, 0.010985f, 0.000143f, 0.019715f, 0.000827f, 0.016594f, 0.000011f, 0.012591f, 0.000839f, 0.008634f, 0.000032f, 0.012472f, +0.047218f, 0.475439f, 0.000219f, 0.474152f, 0.064214f, 0.586405f, 0.003655f, 1.142260f, 0.069819f, 0.955327f, 0.000290f, 0.786744f, 0.129364f, 0.907139f, 0.001600f, 1.422341f, +0.048359f, 0.854909f, 0.000152f, 0.551809f, 0.038374f, 0.615254f, 0.001474f, 0.775654f, 0.008370f, 0.201075f, 0.000023f, 0.107173f, 0.032485f, 0.399942f, 0.000271f, 0.405857f, +0.130095f, 1.642218f, 0.002256f, 1.529485f, 0.125440f, 1.436100f, 0.026643f, 2.612426f, 0.197663f, 3.390666f, 0.003066f, 2.607705f, 0.131198f, 1.153365f, 0.006053f, 1.688841f, +0.033790f, 0.629614f, 0.003279f, 0.506744f, 0.031709f, 0.535847f, 0.037682f, 0.842365f, 0.033534f, 0.849087f, 0.002910f, 0.564320f, 0.022182f, 0.287842f, 0.005726f, 0.364230f, +0.103298f, 1.311958f, 0.003271f, 1.146064f, 0.148052f, 1.705384f, 0.057418f, 2.909750f, 0.168858f, 2.914337f, 0.004782f, 2.102265f, 0.203855f, 1.803098f, 0.017172f, 2.476370f, +0.001333f, 0.020096f, 0.000009f, 0.014629f, 0.000951f, 0.013005f, 0.000080f, 0.018491f, 0.000315f, 0.006452f, 0.000002f, 0.003878f, 0.001307f, 0.013714f, 0.000024f, 0.015696f, +0.001266f, 0.013621f, 0.000048f, 0.014308f, 0.001097f, 0.010711f, 0.000513f, 0.021976f, 0.002625f, 0.038389f, 0.000090f, 0.033299f, 0.001862f, 0.013955f, 0.000189f, 0.023047f, +0.000960f, 0.015243f, 0.000205f, 0.013837f, 0.000810f, 0.011666f, 0.002118f, 0.020683f, 0.001300f, 0.028061f, 0.000248f, 0.021034f, 0.000919f, 0.010166f, 0.000522f, 0.014508f, +0.001902f, 0.020594f, 0.000133f, 0.020290f, 0.002451f, 0.024072f, 0.002093f, 0.046322f, 0.004244f, 0.062445f, 0.000265f, 0.050803f, 0.005476f, 0.041288f, 0.001015f, 0.063954f, +0.068670f, 0.770088f, 0.000310f, 0.414441f, 0.054643f, 0.555755f, 0.003021f, 0.584186f, 0.011128f, 0.169588f, 0.000045f, 0.075366f, 0.072637f, 0.567280f, 0.000872f, 0.479985f, +0.094889f, 0.759836f, 0.002369f, 0.590049f, 0.091749f, 0.666319f, 0.028053f, 1.010637f, 0.134989f, 1.468889f, 0.003014f, 0.941925f, 0.150683f, 0.840302f, 0.010007f, 1.025914f, +0.050329f, 0.594887f, 0.007030f, 0.399211f, 0.047361f, 0.507703f, 0.081023f, 0.665461f, 0.046766f, 0.751150f, 0.005842f, 0.416250f, 0.052025f, 0.428246f, 0.019332f, 0.451824f, +0.197038f, 1.587486f, 0.008982f, 1.156252f, 0.283193f, 2.069286f, 0.158106f, 2.943799f, 0.301576f, 3.301755f, 0.012295f, 1.985851f, 0.612293f, 3.435490f, 0.074250f, 3.934041f, +0.115556f, 2.098468f, 0.000009f, 0.948097f, 0.064054f, 1.054957f, 0.000064f, 0.930959f, 0.013437f, 0.331590f, 0.000001f, 0.123712f, 0.071021f, 0.898182f, 0.000015f, 0.638003f, +0.093455f, 1.211829f, 0.000042f, 0.790019f, 0.062947f, 0.740275f, 0.000347f, 0.942615f, 0.095396f, 1.680956f, 0.000038f, 0.904923f, 0.086229f, 0.778685f, 0.000103f, 0.798115f, +0.002415f, 0.046221f, 0.000006f, 0.026040f, 0.001583f, 0.027479f, 0.000049f, 0.030238f, 0.001610f, 0.041877f, 0.000004f, 0.019482f, 0.001450f, 0.019333f, 0.000010f, 0.017124f, +0.119576f, 1.560057f, 0.000098f, 0.953919f, 0.119720f, 1.416579f, 0.001204f, 1.691828f, 0.131322f, 2.328204f, 0.000096f, 1.175576f, 0.215903f, 1.961663f, 0.000472f, 1.885830f, +0.246940f, 5.656400f, 0.000137f, 2.238498f, 0.144260f, 2.996899f, 0.000979f, 2.316507f, 0.031744f, 0.988101f, 0.000016f, 0.322907f, 0.109322f, 1.743903f, 0.000161f, 1.085042f, +0.699527f, 11.441470f, 0.002144f, 6.533476f, 0.496567f, 7.366025f, 0.018637f, 8.215623f, 0.789395f, 17.545240f, 0.002163f, 8.273338f, 0.464918f, 5.295689f, 0.003790f, 4.754363f, +0.156164f, 3.770251f, 0.002678f, 1.860513f, 0.107887f, 2.362296f, 0.022655f, 2.276888f, 0.115105f, 3.776340f, 0.001765f, 1.538837f, 0.067561f, 1.135938f, 0.003082f, 0.881301f, +0.461014f, 7.586657f, 0.002580f, 4.063378f, 0.486448f, 7.260223f, 0.033336f, 7.595067f, 0.559719f, 12.516800f, 0.002801f, 5.535914f, 0.599584f, 6.871545f, 0.008925f, 5.786269f, +0.048247f, 0.942153f, 0.000059f, 0.420519f, 0.025345f, 0.448868f, 0.000379f, 0.391316f, 0.008466f, 0.224657f, 0.000009f, 0.082803f, 0.031158f, 0.423732f, 0.000101f, 0.297347f, +0.048227f, 0.672460f, 0.000325f, 0.433089f, 0.030784f, 0.389298f, 0.002543f, 0.489708f, 0.074287f, 1.407605f, 0.000448f, 0.748601f, 0.046757f, 0.454040f, 0.000839f, 0.459740f, +0.031426f, 0.646812f, 0.001186f, 0.359988f, 0.019523f, 0.364424f, 0.009025f, 0.396152f, 0.031618f, 0.884333f, 0.001067f, 0.406430f, 0.019833f, 0.284282f, 0.001991f, 0.248752f, +0.060149f, 0.843861f, 0.000741f, 0.509748f, 0.057071f, 0.726164f, 0.008610f, 0.856771f, 0.099684f, 1.900425f, 0.001098f, 0.947970f, 0.114118f, 1.114966f, 0.003739f, 1.058897f, +0.212641f, 3.089767f, 0.000170f, 1.019522f, 0.124569f, 1.641596f, 0.001217f, 1.057991f, 0.025594f, 0.505362f, 0.000018f, 0.137699f, 0.148231f, 1.499987f, 0.000315f, 0.778154f, +0.309405f, 3.210227f, 0.001365f, 1.528453f, 0.220247f, 2.072505f, 0.011900f, 1.927334f, 0.326913f, 4.609229f, 0.001290f, 1.812190f, 0.323801f, 2.339676f, 0.003800f, 1.751377f, +0.141051f, 2.160205f, 0.003481f, 0.888815f, 0.097717f, 1.357274f, 0.029540f, 1.090759f, 0.097343f, 2.025867f, 0.002149f, 0.688314f, 0.096088f, 1.024846f, 0.006309f, 0.662953f, +0.533258f, 5.566796f, 0.004295f, 2.485971f, 0.564246f, 5.342117f, 0.055665f, 4.659605f, 0.606189f, 8.599308f, 0.004367f, 3.171125f, 1.092076f, 7.939419f, 0.023401f, 5.574257f, +0.000622f, 0.011898f, 0.000000f, 0.008184f, 0.000454f, 0.007884f, 0.000001f, 0.010593f, 0.000074f, 0.001922f, 0.000000f, 0.001092f, 0.000539f, 0.007177f, 0.000000f, 0.007762f, +0.000545f, 0.007443f, 0.000000f, 0.007387f, 0.000484f, 0.005993f, 0.000005f, 0.011618f, 0.000569f, 0.010557f, 0.000000f, 0.008652f, 0.000708f, 0.006740f, 0.000001f, 0.010517f, +0.000016f, 0.000333f, 0.000000f, 0.000285f, 0.000014f, 0.000261f, 0.000001f, 0.000437f, 0.000011f, 0.000308f, 0.000000f, 0.000218f, 0.000014f, 0.000196f, 0.000000f, 0.000264f, +0.000851f, 0.011700f, 0.000001f, 0.010892f, 0.001123f, 0.014004f, 0.000019f, 0.025463f, 0.000956f, 0.017855f, 0.000001f, 0.013726f, 0.002165f, 0.020734f, 0.000008f, 0.030347f, +0.000660f, 0.015930f, 0.000001f, 0.009598f, 0.000508f, 0.011125f, 0.000006f, 0.013092f, 0.000087f, 0.002845f, 0.000000f, 0.001416f, 0.000412f, 0.006922f, 0.000001f, 0.006556f, +0.002025f, 0.034903f, 0.000011f, 0.030343f, 0.001895f, 0.029618f, 0.000122f, 0.050293f, 0.002337f, 0.054729f, 0.000011f, 0.039290f, 0.001897f, 0.022767f, 0.000026f, 0.031119f, +0.000530f, 0.013475f, 0.000016f, 0.010123f, 0.000482f, 0.011128f, 0.000173f, 0.016330f, 0.000399f, 0.013801f, 0.000010f, 0.008562f, 0.000323f, 0.005722f, 0.000025f, 0.006758f, +0.001630f, 0.028261f, 0.000016f, 0.023045f, 0.002266f, 0.035648f, 0.000266f, 0.056776f, 0.002023f, 0.047678f, 0.000017f, 0.032104f, 0.002987f, 0.036075f, 0.000076f, 0.046248f, +0.000309f, 0.006360f, 0.000001f, 0.004322f, 0.000214f, 0.003994f, 0.000005f, 0.005301f, 0.000055f, 0.001551f, 0.000000f, 0.000870f, 0.000281f, 0.004031f, 0.000002f, 0.004307f, +0.000335f, 0.004917f, 0.000004f, 0.004822f, 0.000282f, 0.003752f, 0.000040f, 0.007186f, 0.000527f, 0.010525f, 0.000005f, 0.008522f, 0.000457f, 0.004679f, 0.000014f, 0.007213f, +0.000255f, 0.005541f, 0.000017f, 0.004695f, 0.000209f, 0.004115f, 0.000166f, 0.006811f, 0.000263f, 0.007747f, 0.000015f, 0.005421f, 0.000227f, 0.003432f, 0.000039f, 0.004573f, +0.000510f, 0.007535f, 0.000011f, 0.006930f, 0.000637f, 0.008547f, 0.000165f, 0.015353f, 0.000864f, 0.017353f, 0.000016f, 0.013178f, 0.001363f, 0.014032f, 0.000076f, 0.020288f, +0.001095f, 0.016764f, 0.000001f, 0.008422f, 0.000845f, 0.011740f, 0.000014f, 0.011519f, 0.000135f, 0.002804f, 0.000000f, 0.001163f, 0.001076f, 0.011470f, 0.000004f, 0.009059f, +0.001726f, 0.018867f, 0.000013f, 0.013676f, 0.001619f, 0.016055f, 0.000150f, 0.022731f, 0.001864f, 0.027699f, 0.000013f, 0.016580f, 0.002545f, 0.019379f, 0.000051f, 0.022085f, +0.000922f, 0.014874f, 0.000039f, 0.009317f, 0.000842f, 0.012318f, 0.000435f, 0.015072f, 0.000650f, 0.014264f, 0.000025f, 0.007378f, 0.000885f, 0.009945f, 0.000099f, 0.009794f, +0.003632f, 0.039951f, 0.000050f, 0.027162f, 0.005065f, 0.050534f, 0.000855f, 0.067107f, 0.004221f, 0.063105f, 0.000052f, 0.035429f, 0.010481f, 0.080301f, 0.000384f, 0.085835f, +0.224501f, 2.460486f, 0.000017f, 1.243014f, 0.126187f, 1.254275f, 0.000120f, 1.237636f, 0.023077f, 0.343696f, 0.000002f, 0.143380f, 0.137081f, 1.046284f, 0.000028f, 0.831021f, +0.180639f, 1.413659f, 0.000078f, 1.030494f, 0.123375f, 0.875661f, 0.000650f, 1.246756f, 0.163003f, 1.733463f, 0.000063f, 1.043457f, 0.165589f, 0.902468f, 0.000189f, 1.034284f, +0.003428f, 0.039596f, 0.000008f, 0.024943f, 0.002278f, 0.023870f, 0.000067f, 0.029370f, 0.002020f, 0.031713f, 0.000004f, 0.016497f, 0.002045f, 0.016454f, 0.000013f, 0.016296f, +0.205180f, 1.615568f, 0.000161f, 1.104588f, 0.208304f, 1.487527f, 0.002004f, 1.986482f, 0.199197f, 2.131379f, 0.000140f, 1.203358f, 0.368059f, 2.018253f, 0.000769f, 2.169493f, +0.416665f, 5.760081f, 0.000221f, 2.548882f, 0.246821f, 3.094567f, 0.001602f, 2.674644f, 0.047350f, 0.889498f, 0.000022f, 0.325031f, 0.183260f, 1.764321f, 0.000258f, 1.227457f, +1.174314f, 11.591910f, 0.003441f, 7.401534f, 0.845274f, 7.567380f, 0.030337f, 9.437507f, 1.171465f, 15.714020f, 0.003070f, 8.285397f, 0.775396f, 5.330430f, 0.006044f, 5.351020f, +0.192516f, 2.805091f, 0.003156f, 1.547796f, 0.134863f, 1.782175f, 0.027082f, 1.920712f, 0.125440f, 2.483721f, 0.001839f, 1.131693f, 0.082746f, 0.839650f, 0.003609f, 0.728404f, +0.687030f, 6.823463f, 0.003676f, 4.086449f, 0.735085f, 6.621307f, 0.048172f, 7.745151f, 0.737371f, 9.951831f, 0.003529f, 4.921566f, 0.887725f, 6.140102f, 0.012636f, 5.781284f, +0.125768f, 1.482223f, 0.000147f, 0.739746f, 0.066993f, 0.716060f, 0.000957f, 0.698013f, 0.019509f, 0.312440f, 0.000020f, 0.128764f, 0.080693f, 0.662292f, 0.000250f, 0.519668f, +0.125075f, 1.052550f, 0.000807f, 0.757980f, 0.080956f, 0.617872f, 0.006396f, 0.869076f, 0.170315f, 1.947656f, 0.000983f, 1.158207f, 0.120475f, 0.706053f, 0.002067f, 0.799392f, +0.059851f, 0.743461f, 0.002160f, 0.462672f, 0.037702f, 0.424743f, 0.016667f, 0.516281f, 0.053233f, 0.898569f, 0.001718f, 0.461769f, 0.037527f, 0.324636f, 0.003603f, 0.317628f, +0.138483f, 1.172542f, 0.001631f, 0.791987f, 0.133237f, 1.023133f, 0.019221f, 1.349790f, 0.202882f, 2.334338f, 0.002137f, 1.302003f, 0.261028f, 1.539169f, 0.008179f, 1.634493f, +0.398574f, 3.495274f, 0.000304f, 1.289604f, 0.236762f, 1.883047f, 0.002212f, 1.357004f, 0.042408f, 0.505374f, 0.000029f, 0.153974f, 0.276038f, 1.685815f, 0.000560f, 0.977896f, +0.576997f, 3.613065f, 0.002434f, 1.923520f, 0.416481f, 2.365239f, 0.021518f, 2.459466f, 0.538932f, 4.585889f, 0.002033f, 2.016060f, 0.599919f, 2.616150f, 0.006732f, 2.189732f, +0.193164f, 1.785413f, 0.004559f, 0.821409f, 0.135694f, 1.137498f, 0.039226f, 1.022155f, 0.117845f, 1.480164f, 0.002488f, 0.562328f, 0.130734f, 0.841529f, 0.008209f, 0.608692f, +0.882808f, 5.561946f, 0.006800f, 2.777295f, 0.947188f, 5.412206f, 0.089357f, 5.278548f, 0.887140f, 7.595216f, 0.006112f, 3.131803f, 1.796173f, 7.880921f, 0.036805f, 6.186995f, +0.146804f, 2.057506f, 0.000068f, 1.516155f, 0.095602f, 1.215201f, 0.000539f, 1.749023f, 0.019766f, 0.376457f, 0.000008f, 0.229074f, 0.110692f, 1.080406f, 0.000136f, 1.251691f, +0.108744f, 1.088268f, 0.000277f, 1.157135f, 0.086050f, 0.781020f, 0.002683f, 1.622017f, 0.128530f, 1.747937f, 0.000293f, 1.534734f, 0.123095f, 0.857907f, 0.000834f, 1.434154f, +0.004173f, 0.061639f, 0.000059f, 0.056637f, 0.003213f, 0.043052f, 0.000561f, 0.077266f, 0.003221f, 0.064665f, 0.000041f, 0.049065f, 0.003075f, 0.031630f, 0.000116f, 0.045694f, +0.163763f, 1.648948f, 0.000761f, 1.644484f, 0.192625f, 1.759064f, 0.010966f, 3.426487f, 0.208250f, 2.849460f, 0.000866f, 2.346628f, 0.362757f, 2.543751f, 0.004485f, 3.988455f, +0.134323f, 2.374616f, 0.000421f, 1.532717f, 0.092189f, 1.478086f, 0.003540f, 1.863431f, 0.019994f, 0.480319f, 0.000056f, 0.256010f, 0.072954f, 0.898172f, 0.000609f, 0.911456f, +0.348514f, 4.399373f, 0.006044f, 4.097370f, 0.290649f, 3.327489f, 0.061732f, 6.053072f, 0.455391f, 7.811659f, 0.007063f, 6.007817f, 0.284169f, 2.498132f, 0.013109f, 3.657944f, +0.115536f, 2.152773f, 0.011210f, 1.732657f, 0.093773f, 1.584663f, 0.111438f, 2.491128f, 0.098606f, 2.496745f, 0.008557f, 1.659386f, 0.061322f, 0.795731f, 0.015828f, 1.006904f, +0.270335f, 3.433455f, 0.008560f, 2.999301f, 0.335119f, 3.860160f, 0.129966f, 6.586261f, 0.380043f, 6.559177f, 0.010763f, 4.731481f, 0.431342f, 3.815216f, 0.036335f, 5.239808f, +0.005384f, 0.081143f, 0.000037f, 0.059070f, 0.003323f, 0.045417f, 0.000281f, 0.064578f, 0.001094f, 0.022404f, 0.000007f, 0.013468f, 0.004266f, 0.044772f, 0.000078f, 0.051242f, +0.004929f, 0.053046f, 0.000188f, 0.055720f, 0.003696f, 0.036078f, 0.001728f, 0.074020f, 0.008792f, 0.128570f, 0.000300f, 0.111522f, 0.005863f, 0.043940f, 0.000595f, 0.072566f, +0.004770f, 0.075767f, 0.001019f, 0.068777f, 0.003481f, 0.050152f, 0.009107f, 0.088919f, 0.005557f, 0.119948f, 0.001062f, 0.089912f, 0.003693f, 0.040854f, 0.002099f, 0.058305f, +0.007236f, 0.078348f, 0.000504f, 0.077190f, 0.008066f, 0.079207f, 0.006886f, 0.152422f, 0.013886f, 0.204306f, 0.000866f, 0.166218f, 0.016842f, 0.126999f, 0.003123f, 0.196719f, +0.231101f, 2.591637f, 0.001043f, 1.394751f, 0.159052f, 1.617668f, 0.008793f, 1.700422f, 0.032208f, 0.490824f, 0.000130f, 0.218126f, 0.197642f, 1.543547f, 0.002373f, 1.306020f, +0.307991f, 2.466266f, 0.007689f, 1.915174f, 0.257570f, 1.870571f, 0.078754f, 2.837184f, 0.376806f, 4.100223f, 0.008413f, 2.629268f, 0.395433f, 2.205179f, 0.026262f, 2.692276f, +0.208500f, 2.464440f, 0.029124f, 1.653814f, 0.169697f, 1.819137f, 0.290313f, 2.384397f, 0.166613f, 2.676142f, 0.020815f, 1.482985f, 0.174255f, 1.434385f, 0.064751f, 1.513358f, +0.624770f, 5.033631f, 0.028480f, 3.666267f, 0.776651f, 5.674976f, 0.433604f, 8.073310f, 0.822368f, 9.003568f, 0.033528f, 5.415224f, 1.569710f, 8.807420f, 0.190351f, 10.085530f, +0.549140f, 9.972235f, 0.000045f, 4.505501f, 0.263275f, 4.336078f, 0.000262f, 3.826419f, 0.054915f, 1.355157f, 0.000004f, 0.505591f, 0.272876f, 3.450985f, 0.000059f, 2.451326f, +0.428330f, 5.554156f, 0.000193f, 3.620882f, 0.249531f, 2.934553f, 0.001375f, 3.736656f, 0.376014f, 6.625692f, 0.000151f, 3.566864f, 0.319536f, 2.885539f, 0.000382f, 2.957540f, +0.014126f, 0.270385f, 0.000036f, 0.152328f, 0.008009f, 0.139033f, 0.000247f, 0.152989f, 0.008100f, 0.210678f, 0.000018f, 0.098011f, 0.006860f, 0.091440f, 0.000046f, 0.080991f, +0.535391f, 6.985035f, 0.000440f, 4.271095f, 0.463624f, 5.485809f, 0.004664f, 6.551733f, 0.505664f, 8.964940f, 0.000371f, 4.526652f, 0.781583f, 7.101348f, 0.001708f, 6.826827f, +0.885485f, 20.282860f, 0.000491f, 8.026864f, 0.447412f, 9.294653f, 0.003036f, 7.184470f, 0.097893f, 3.047116f, 0.000049f, 0.995783f, 0.316945f, 5.055918f, 0.000467f, 3.145749f, +2.419245f, 39.569210f, 0.007413f, 22.595390f, 1.485339f, 22.033360f, 0.055746f, 24.574680f, 2.347837f, 52.183490f, 0.006435f, 24.606770f, 1.299992f, 14.807660f, 0.010597f, 13.294020f, +0.689321f, 16.642160f, 0.011819f, 8.212438f, 0.411889f, 9.018737f, 0.086493f, 8.692666f, 0.436951f, 14.335370f, 0.006700f, 5.841579f, 0.241116f, 4.053988f, 0.010997f, 3.145229f, +1.557547f, 25.631690f, 0.008715f, 13.728220f, 1.421462f, 21.215280f, 0.097411f, 22.193740f, 1.626279f, 36.367940f, 0.008138f, 16.084760f, 1.637817f, 18.770240f, 0.024379f, 15.805710f, +0.251498f, 4.911180f, 0.000307f, 2.192050f, 0.114268f, 2.023739f, 0.001707f, 1.764266f, 0.037953f, 1.007123f, 0.000041f, 0.371198f, 0.131318f, 1.785845f, 0.000426f, 1.253187f, +0.242458f, 3.380782f, 0.001636f, 2.177346f, 0.133859f, 1.692798f, 0.011060f, 2.129415f, 0.321190f, 6.085970f, 0.001938f, 3.236676f, 0.190057f, 1.845582f, 0.003411f, 1.868750f, +0.201652f, 4.150429f, 0.007611f, 2.309954f, 0.108349f, 2.022524f, 0.050087f, 2.198614f, 0.174482f, 4.880103f, 0.005890f, 2.242841f, 0.102895f, 1.474867f, 0.010331f, 1.290537f, +0.295416f, 4.144506f, 0.003639f, 2.503557f, 0.242434f, 3.084672f, 0.036574f, 3.639474f, 0.421041f, 8.026960f, 0.004638f, 4.004008f, 0.453153f, 4.427436f, 0.014849f, 4.204792f, +0.923837f, 13.423790f, 0.000737f, 4.429409f, 0.468091f, 6.168614f, 0.004573f, 3.975604f, 0.095627f, 1.888209f, 0.000068f, 0.514493f, 0.520688f, 5.268965f, 0.001105f, 2.733402f, +1.296470f, 13.451520f, 0.005719f, 6.404539f, 0.798209f, 7.511095f, 0.043126f, 6.984970f, 1.178055f, 16.609720f, 0.004648f, 6.530371f, 1.096990f, 7.926473f, 0.012873f, 5.933403f, +0.754354f, 11.553000f, 0.018619f, 4.753473f, 0.452004f, 6.278259f, 0.136640f, 5.045457f, 0.447715f, 9.317705f, 0.009883f, 3.165807f, 0.415488f, 4.431460f, 0.027281f, 2.866624f, +2.182854f, 22.787270f, 0.017583f, 10.176140f, 1.997687f, 18.913520f, 0.197078f, 16.497120f, 2.133996f, 30.272540f, 0.015374f, 11.163460f, 3.614335f, 26.276300f, 0.077448f, 18.448560f, +0.130699f, 2.501222f, 0.000018f, 1.720467f, 0.082594f, 1.433532f, 0.000141f, 1.925956f, 0.013365f, 0.347561f, 0.000002f, 0.197417f, 0.091530f, 1.219871f, 0.000034f, 1.319215f, +0.110428f, 1.508993f, 0.000085f, 1.497709f, 0.084796f, 1.050901f, 0.000799f, 2.037261f, 0.099125f, 1.840696f, 0.000068f, 1.508625f, 0.116099f, 1.104861f, 0.000238f, 1.724070f, +0.004267f, 0.086065f, 0.000018f, 0.073819f, 0.003189f, 0.058333f, 0.000168f, 0.097723f, 0.002502f, 0.068572f, 0.000010f, 0.048567f, 0.002920f, 0.041019f, 0.000033f, 0.055314f, +0.168551f, 2.317394f, 0.000237f, 2.157319f, 0.192388f, 2.398958f, 0.003312f, 4.361962f, 0.162781f, 3.041308f, 0.000205f, 2.337942f, 0.346774f, 3.320346f, 0.001297f, 4.859652f, +0.104678f, 2.526819f, 0.000099f, 1.522420f, 0.069716f, 1.526261f, 0.000810f, 1.796114f, 0.011833f, 0.388164f, 0.000010f, 0.193123f, 0.052804f, 0.887679f, 0.000133f, 0.840861f, +0.309788f, 5.339641f, 0.001625f, 4.642139f, 0.250704f, 3.919095f, 0.016103f, 6.654828f, 0.307421f, 7.200611f, 0.001442f, 5.169333f, 0.234604f, 2.816128f, 0.003273f, 3.849159f, +0.103414f, 2.631111f, 0.003035f, 1.976720f, 0.081450f, 1.879428f, 0.029272f, 2.757888f, 0.067031f, 2.317500f, 0.001759f, 1.437755f, 0.050980f, 0.903281f, 0.003979f, 1.066932f, +0.243550f, 4.223709f, 0.002332f, 3.444090f, 0.292976f, 4.608037f, 0.034361f, 7.339075f, 0.260030f, 6.127974f, 0.002227f, 4.126259f, 0.360930f, 4.359108f, 0.009194f, 5.588375f, +0.071270f, 1.466652f, 0.000149f, 0.996633f, 0.042682f, 0.796611f, 0.001091f, 1.057303f, 0.010998f, 0.307543f, 0.000021f, 0.172573f, 0.052445f, 0.751616f, 0.000291f, 0.802993f, +0.074425f, 1.093624f, 0.000859f, 1.072314f, 0.054160f, 0.721783f, 0.007658f, 1.382310f, 0.100815f, 2.013086f, 0.001041f, 1.629954f, 0.082220f, 0.841386f, 0.002525f, 1.297050f, +0.072520f, 1.572963f, 0.004685f, 1.332823f, 0.051361f, 1.010345f, 0.040634f, 1.672125f, 0.064163f, 1.891195f, 0.003707f, 1.323273f, 0.052151f, 0.787752f, 0.008962f, 1.049424f, +0.110733f, 1.637139f, 0.002334f, 1.505616f, 0.119780f, 1.606099f, 0.030926f, 2.884999f, 0.161380f, 3.242243f, 0.003043f, 2.462255f, 0.239386f, 2.464770f, 0.013425f, 3.563791f, +0.210403f, 3.221828f, 0.000287f, 1.618515f, 0.140520f, 1.951486f, 0.002349f, 1.914806f, 0.022270f, 0.463403f, 0.000027f, 0.192235f, 0.167127f, 1.782230f, 0.000607f, 1.407621f, +0.319837f, 3.497103f, 0.002415f, 2.534945f, 0.259558f, 2.573895f, 0.024000f, 3.644146f, 0.297176f, 4.415510f, 0.002007f, 2.643018f, 0.381400f, 2.904211f, 0.007660f, 3.309754f, +0.218030f, 3.518895f, 0.009210f, 2.204279f, 0.172200f, 2.520586f, 0.089090f, 3.083944f, 0.132320f, 2.902032f, 0.004999f, 1.501141f, 0.169243f, 1.902260f, 0.019018f, 1.873429f, +0.657587f, 7.234214f, 0.009065f, 4.918423f, 0.793243f, 7.914473f, 0.133930f, 10.509970f, 0.657361f, 9.827193f, 0.008105f, 5.517254f, 1.534505f, 11.756400f, 0.056274f, 12.566560f, +0.926036f, 10.149180f, 0.000072f, 5.127266f, 0.450189f, 4.474811f, 0.000429f, 4.415446f, 0.081864f, 1.219223f, 0.000006f, 0.508624f, 0.457169f, 3.489379f, 0.000095f, 2.771473f, +0.718636f, 5.623942f, 0.000309f, 4.099601f, 0.424517f, 3.013034f, 0.002236f, 4.289925f, 0.557686f, 5.930744f, 0.000215f, 3.570006f, 0.532618f, 2.902796f, 0.000609f, 3.326785f, +0.017405f, 0.201052f, 0.000042f, 0.126651f, 0.010006f, 0.104830f, 0.000295f, 0.128982f, 0.008822f, 0.138484f, 0.000019f, 0.072038f, 0.008397f, 0.067550f, 0.000054f, 0.066902f, +0.797411f, 6.278744f, 0.000626f, 4.292872f, 0.700191f, 5.000165f, 0.006735f, 6.677352f, 0.665777f, 7.123718f, 0.000468f, 4.021989f, 1.156520f, 6.341790f, 0.002416f, 6.817018f, +1.296869f, 17.928230f, 0.000687f, 7.933385f, 0.664450f, 8.330684f, 0.004312f, 7.200235f, 0.126743f, 2.380960f, 0.000060f, 0.870026f, 0.461175f, 4.439917f, 0.000650f, 3.088898f, +3.525166f, 34.797660f, 0.010330f, 22.218610f, 2.194647f, 19.647740f, 0.078765f, 24.503290f, 3.024286f, 40.567750f, 0.007926f, 21.389810f, 1.881946f, 12.937370f, 0.014670f, 12.987340f, +0.737607f, 10.747470f, 0.012094f, 5.930250f, 0.446913f, 5.905837f, 0.089744f, 6.364923f, 0.413325f, 8.183900f, 0.006061f, 3.728947f, 0.256328f, 2.601031f, 0.011180f, 2.256420f, +2.014756f, 20.010200f, 0.010780f, 11.983750f, 1.864472f, 16.794310f, 0.122184f, 19.644830f, 1.859650f, 25.098500f, 0.008899f, 12.412180f, 2.104812f, 14.558300f, 0.029960f, 13.707530f, +0.569054f, 6.706524f, 0.000664f, 3.347084f, 0.262171f, 2.802241f, 0.003745f, 2.731615f, 0.075913f, 1.215764f, 0.000079f, 0.501046f, 0.295196f, 2.422826f, 0.000916f, 1.901073f, +0.545808f, 4.593178f, 0.003521f, 3.307715f, 0.305555f, 2.332065f, 0.024141f, 3.280200f, 0.639176f, 7.309389f, 0.003688f, 4.346653f, 0.425065f, 2.491129f, 0.007294f, 2.820451f, +0.333357f, 4.140882f, 0.012032f, 2.576960f, 0.181623f, 2.046129f, 0.080289f, 2.487096f, 0.254984f, 4.304116f, 0.008231f, 2.211861f, 0.168992f, 1.461906f, 0.016226f, 1.430348f, +0.590362f, 4.998623f, 0.006954f, 3.376289f, 0.491266f, 3.772472f, 0.070872f, 4.976913f, 0.743814f, 8.558227f, 0.007836f, 4.773447f, 0.899698f, 5.305138f, 0.028192f, 5.633696f, +1.503064f, 13.181060f, 0.001146f, 4.863237f, 0.772240f, 6.141892f, 0.007214f, 4.426110f, 0.137537f, 1.639005f, 0.000094f, 0.499361f, 0.841640f, 5.140050f, 0.001708f, 2.981604f, +2.098597f, 13.141080f, 0.008853f, 6.996037f, 1.310156f, 7.440505f, 0.067691f, 7.736921f, 1.685728f, 14.344230f, 0.006360f, 6.306046f, 1.764153f, 7.693191f, 0.019797f, 6.439244f, +0.896696f, 8.288160f, 0.021165f, 3.813107f, 0.544819f, 4.567116f, 0.157496f, 4.104005f, 0.470465f, 5.909184f, 0.009932f, 2.244954f, 0.490678f, 3.158473f, 0.030809f, 2.284575f, +3.136700f, 19.762120f, 0.024161f, 9.867994f, 2.910818f, 16.632330f, 0.274605f, 16.221580f, 2.710797f, 23.208400f, 0.018675f, 9.569727f, 5.159926f, 22.639780f, 0.105732f, 17.773580f, +0.023662f, 0.331634f, 0.000011f, 0.244378f, 0.020993f, 0.266847f, 0.000118f, 0.384070f, 0.003637f, 0.069274f, 0.000001f, 0.042153f, 0.026728f, 0.260876f, 0.000033f, 0.302235f, +0.034661f, 0.346877f, 0.000088f, 0.368828f, 0.037367f, 0.339155f, 0.001165f, 0.704355f, 0.046771f, 0.636063f, 0.000106f, 0.558480f, 0.058777f, 0.409647f, 0.000398f, 0.684802f, +0.000591f, 0.008730f, 0.000008f, 0.008022f, 0.000620f, 0.008307f, 0.000108f, 0.014909f, 0.000521f, 0.010456f, 0.000007f, 0.007933f, 0.000652f, 0.006711f, 0.000025f, 0.009695f, +0.032625f, 0.328505f, 0.000152f, 0.327616f, 0.052281f, 0.477434f, 0.002976f, 0.929995f, 0.047365f, 0.648085f, 0.000197f, 0.533721f, 0.108263f, 0.759171f, 0.001339f, 1.190336f, +0.025524f, 0.451229f, 0.000080f, 0.291250f, 0.023866f, 0.382648f, 0.000917f, 0.482407f, 0.004337f, 0.104200f, 0.000012f, 0.055539f, 0.020767f, 0.255678f, 0.000173f, 0.259459f, +0.130962f, 1.653165f, 0.002271f, 1.539680f, 0.148796f, 1.703485f, 0.031603f, 3.098829f, 0.195364f, 3.351223f, 0.003030f, 2.577370f, 0.159967f, 1.406275f, 0.007380f, 2.059170f, +0.019291f, 0.359449f, 0.001872f, 0.289302f, 0.021331f, 0.360472f, 0.025349f, 0.566671f, 0.018797f, 0.475935f, 0.001631f, 0.316316f, 0.015339f, 0.199038f, 0.003959f, 0.251859f, +0.063493f, 0.806403f, 0.002010f, 0.704435f, 0.107230f, 1.235158f, 0.041586f, 2.107444f, 0.101903f, 1.758753f, 0.002886f, 1.268681f, 0.151765f, 1.342362f, 0.012784f, 1.843596f, +0.001305f, 0.019666f, 0.000009f, 0.014316f, 0.001097f, 0.014996f, 0.000093f, 0.021323f, 0.000303f, 0.006199f, 0.000002f, 0.003726f, 0.001549f, 0.016255f, 0.000028f, 0.018604f, +0.002362f, 0.025423f, 0.000090f, 0.026705f, 0.002414f, 0.023557f, 0.001129f, 0.048331f, 0.004811f, 0.070349f, 0.000164f, 0.061021f, 0.004210f, 0.031548f, 0.000428f, 0.052101f, +0.001016f, 0.016135f, 0.000217f, 0.014647f, 0.001010f, 0.014550f, 0.002642f, 0.025798f, 0.001351f, 0.029162f, 0.000258f, 0.021860f, 0.001178f, 0.013033f, 0.000669f, 0.018601f, +0.002168f, 0.023469f, 0.000151f, 0.023123f, 0.003292f, 0.032325f, 0.002810f, 0.062204f, 0.004749f, 0.069870f, 0.000296f, 0.056845f, 0.007558f, 0.056991f, 0.001402f, 0.088278f, +0.040133f, 0.450065f, 0.000181f, 0.242213f, 0.037630f, 0.382725f, 0.002080f, 0.402304f, 0.006386f, 0.097311f, 0.000026f, 0.043246f, 0.051417f, 0.401560f, 0.000617f, 0.339766f, +0.105769f, 0.846959f, 0.002641f, 0.657704f, 0.120507f, 0.875171f, 0.036846f, 1.327414f, 0.147732f, 1.607551f, 0.003299f, 1.030842f, 0.203435f, 1.134478f, 0.013511f, 1.385070f, +0.031816f, 0.376057f, 0.004444f, 0.252361f, 0.035278f, 0.378179f, 0.060353f, 0.495691f, 0.029025f, 0.466208f, 0.003626f, 0.258349f, 0.039834f, 0.327893f, 0.014802f, 0.345945f, +0.134103f, 1.080436f, 0.006113f, 0.786941f, 0.227112f, 1.659504f, 0.126797f, 2.360836f, 0.201520f, 2.206313f, 0.008216f, 1.326994f, 0.504739f, 2.832020f, 0.061207f, 3.242996f, +0.104756f, 1.902333f, 0.000009f, 0.859483f, 0.068423f, 1.126906f, 0.000068f, 0.994450f, 0.011960f, 0.295133f, 0.000001f, 0.110110f, 0.077981f, 0.986205f, 0.000017f, 0.700527f, +0.161583f, 2.095241f, 0.000073f, 1.365936f, 0.128244f, 1.508184f, 0.000706f, 1.920417f, 0.161940f, 2.853526f, 0.000065f, 1.536163f, 0.180578f, 1.630694f, 0.000216f, 1.671384f, +0.002368f, 0.045322f, 0.000006f, 0.025533f, 0.001829f, 0.031750f, 0.000056f, 0.034937f, 0.001550f, 0.040317f, 0.000003f, 0.018756f, 0.001723f, 0.022961f, 0.000012f, 0.020338f, +0.126236f, 1.646948f, 0.000104f, 1.007049f, 0.148927f, 1.762172f, 0.001498f, 2.104573f, 0.136116f, 2.413200f, 0.000100f, 1.218493f, 0.276068f, 2.508314f, 0.000603f, 2.411348f, +0.199141f, 4.561515f, 0.000110f, 1.805202f, 0.137083f, 2.847800f, 0.000930f, 2.201258f, 0.025134f, 0.782354f, 0.000012f, 0.255670f, 0.106781f, 1.703372f, 0.000157f, 1.059824f, +1.075924f, 17.597830f, 0.003297f, 10.048970f, 0.899961f, 13.349920f, 0.033776f, 14.889700f, 1.192077f, 26.495340f, 0.003267f, 12.493700f, 0.866108f, 9.865471f, 0.007060f, 8.857022f, +0.136219f, 3.288704f, 0.002336f, 1.622883f, 0.110890f, 2.428048f, 0.023286f, 2.340262f, 0.098579f, 3.234139f, 0.001512f, 1.317893f, 0.071379f, 1.200127f, 0.003256f, 0.931102f, +0.432950f, 7.124826f, 0.002422f, 3.816023f, 0.538305f, 8.034192f, 0.036889f, 8.404732f, 0.516091f, 11.541180f, 0.002583f, 5.104417f, 0.682012f, 7.816214f, 0.010152f, 6.581739f, +0.072139f, 1.408709f, 0.000088f, 0.628761f, 0.044654f, 0.790836f, 0.000667f, 0.689439f, 0.012428f, 0.329802f, 0.000014f, 0.121556f, 0.056427f, 0.767377f, 0.000183f, 0.538494f, +0.137529f, 1.917672f, 0.000928f, 1.235050f, 0.103443f, 1.308154f, 0.008547f, 1.645561f, 0.207995f, 3.941139f, 0.001255f, 2.095999f, 0.161500f, 1.568268f, 0.002898f, 1.587954f, +0.050824f, 1.046077f, 0.001918f, 0.582202f, 0.037204f, 0.694482f, 0.017199f, 0.754946f, 0.050206f, 1.404218f, 0.001695f, 0.645363f, 0.038850f, 0.556869f, 0.003901f, 0.487272f, +0.104734f, 1.469350f, 0.001290f, 0.887585f, 0.117096f, 1.489903f, 0.017665f, 1.757873f, 0.170417f, 3.248915f, 0.001877f, 1.620624f, 0.240673f, 2.351444f, 0.007886f, 2.233196f, +0.189877f, 2.759002f, 0.000151f, 0.910380f, 0.131070f, 1.727273f, 0.001280f, 1.113209f, 0.022439f, 0.443059f, 0.000016f, 0.120723f, 0.160319f, 1.622303f, 0.000340f, 0.841609f, +0.526940f, 5.467265f, 0.002325f, 2.603074f, 0.441989f, 4.159091f, 0.023880f, 3.867762f, 0.546637f, 7.707185f, 0.002157f, 3.030200f, 0.667930f, 4.826235f, 0.007838f, 3.612704f, +0.136235f, 2.086444f, 0.003363f, 0.858466f, 0.111212f, 1.544713f, 0.033619f, 1.241393f, 0.092310f, 1.921126f, 0.002038f, 0.652727f, 0.112409f, 1.198916f, 0.007381f, 0.775555f, +0.554522f, 5.788772f, 0.004467f, 2.585099f, 0.691382f, 6.545804f, 0.068207f, 5.709508f, 0.618903f, 8.779660f, 0.004459f, 3.237633f, 1.375474f, 9.999726f, 0.029473f, 7.020796f, +0.026591f, 0.508872f, 0.000004f, 0.350028f, 0.022893f, 0.397338f, 0.000039f, 0.533825f, 0.003104f, 0.080728f, 0.000000f, 0.045854f, 0.027897f, 0.371792f, 0.000010f, 0.402070f, +0.044428f, 0.607107f, 0.000034f, 0.602567f, 0.046478f, 0.576018f, 0.000438f, 1.116660f, 0.045530f, 0.845463f, 0.000031f, 0.692937f, 0.069974f, 0.665909f, 0.000143f, 1.039112f, +0.000763f, 0.015386f, 0.000003f, 0.013196f, 0.000777f, 0.014207f, 0.000041f, 0.023800f, 0.000511f, 0.013995f, 0.000002f, 0.009912f, 0.000782f, 0.010985f, 0.000009f, 0.014814f, +0.042384f, 0.582738f, 0.000060f, 0.542485f, 0.065909f, 0.821849f, 0.001135f, 1.494347f, 0.046732f, 0.873108f, 0.000059f, 0.671184f, 0.130632f, 1.250795f, 0.000488f, 1.830661f, +0.025107f, 0.606060f, 0.000024f, 0.365154f, 0.022781f, 0.498732f, 0.000265f, 0.586911f, 0.003240f, 0.106290f, 0.000003f, 0.052882f, 0.018973f, 0.318954f, 0.000048f, 0.302131f, +0.146936f, 2.532652f, 0.000771f, 2.201819f, 0.162002f, 2.532478f, 0.010406f, 4.300280f, 0.166468f, 3.899128f, 0.000781f, 2.799192f, 0.166698f, 2.000993f, 0.002326f, 2.735011f, +0.021795f, 0.554518f, 0.000640f, 0.416603f, 0.023386f, 0.539634f, 0.008405f, 0.791863f, 0.016128f, 0.557611f, 0.000423f, 0.345937f, 0.016095f, 0.285187f, 0.001256f, 0.336855f, +0.072202f, 1.252139f, 0.000691f, 1.021018f, 0.118328f, 1.861107f, 0.013878f, 2.964126f, 0.088007f, 2.074008f, 0.000754f, 1.396529f, 0.160292f, 1.935914f, 0.004083f, 2.481842f, +0.021802f, 0.448667f, 0.000046f, 0.304883f, 0.017789f, 0.332002f, 0.000455f, 0.440649f, 0.003841f, 0.107408f, 0.000007f, 0.060270f, 0.024034f, 0.344448f, 0.000133f, 0.367992f, +0.045023f, 0.661587f, 0.000520f, 0.648696f, 0.044637f, 0.594870f, 0.006312f, 1.139255f, 0.069627f, 1.390324f, 0.000719f, 1.125717f, 0.074512f, 0.762507f, 0.002288f, 1.175454f, +0.019494f, 0.422816f, 0.001259f, 0.358266f, 0.018809f, 0.369997f, 0.014881f, 0.612347f, 0.019690f, 0.580368f, 0.001138f, 0.406085f, 0.021000f, 0.317214f, 0.003609f, 0.422584f, +0.041869f, 0.619013f, 0.000883f, 0.569283f, 0.061702f, 0.827339f, 0.015931f, 1.486129f, 0.069662f, 1.399570f, 0.001313f, 1.062875f, 0.135595f, 1.396113f, 0.007604f, 2.018628f, +0.046120f, 0.706222f, 0.000063f, 0.354777f, 0.041964f, 0.582775f, 0.000702f, 0.571821f, 0.005573f, 0.115967f, 0.000007f, 0.048107f, 0.054880f, 0.585238f, 0.000199f, 0.462226f, +0.138640f, 1.515894f, 0.001047f, 1.098826f, 0.153282f, 1.520015f, 0.014173f, 2.152052f, 0.147065f, 2.185125f, 0.000993f, 1.307963f, 0.247669f, 1.885899f, 0.004974f, 2.149245f, +0.041994f, 0.677767f, 0.001774f, 0.424561f, 0.045186f, 0.661412f, 0.023378f, 0.809239f, 0.029096f, 0.638133f, 0.001099f, 0.330088f, 0.048833f, 0.548875f, 0.005488f, 0.540557f, +0.178160f, 1.959960f, 0.002456f, 1.332545f, 0.292792f, 2.921288f, 0.049434f, 3.879303f, 0.203327f, 3.039626f, 0.002507f, 1.706529f, 0.622807f, 4.771557f, 0.022840f, 5.100374f, +0.250596f, 2.746484f, 0.000019f, 1.387497f, 0.165973f, 1.649748f, 0.000158f, 1.627861f, 0.025291f, 0.376673f, 0.000002f, 0.157137f, 0.185333f, 1.414569f, 0.000038f, 1.123535f, +0.384571f, 3.009600f, 0.000165f, 2.193864f, 0.309499f, 2.196689f, 0.001630f, 3.127622f, 0.340716f, 3.623364f, 0.000131f, 2.181081f, 0.426986f, 2.327096f, 0.000489f, 2.666997f, +0.004139f, 0.047807f, 0.000010f, 0.030116f, 0.003241f, 0.033960f, 0.000096f, 0.041784f, 0.002395f, 0.037594f, 0.000005f, 0.019556f, 0.002991f, 0.024062f, 0.000019f, 0.023831f, +0.266714f, 2.100082f, 0.000209f, 1.435857f, 0.319063f, 2.278474f, 0.003069f, 3.042735f, 0.254230f, 2.720225f, 0.000179f, 1.535815f, 0.579490f, 3.177642f, 0.001211f, 3.415762f, +0.413741f, 5.719648f, 0.000219f, 2.530990f, 0.288796f, 3.620838f, 0.001874f, 3.129501f, 0.046163f, 0.867199f, 0.000022f, 0.316883f, 0.220408f, 2.121957f, 0.000311f, 1.476268f, +2.223992f, 21.953500f, 0.006517f, 14.017500f, 1.886318f, 16.887400f, 0.067699f, 21.060790f, 2.178268f, 29.219270f, 0.005709f, 15.406190f, 1.778651f, 12.227270f, 0.013865f, 12.274500f, +0.206772f, 3.012820f, 0.003390f, 1.662418f, 0.170682f, 2.255512f, 0.034274f, 2.430843f, 0.132280f, 2.619161f, 0.001940f, 1.193406f, 0.107645f, 1.092302f, 0.004695f, 0.947582f, +0.794459f, 7.890426f, 0.004251f, 4.725434f, 1.001616f, 9.022103f, 0.065638f, 10.553440f, 0.837173f, 11.298780f, 0.004006f, 5.587686f, 1.243347f, 8.599821f, 0.017698f, 8.097261f, +0.231548f, 2.728884f, 0.000270f, 1.361928f, 0.145334f, 1.553422f, 0.002076f, 1.514271f, 0.035265f, 0.564770f, 0.000037f, 0.232755f, 0.179940f, 1.476860f, 0.000558f, 1.158820f, +0.439187f, 3.695920f, 0.002833f, 2.661567f, 0.334962f, 2.556503f, 0.026465f, 3.595887f, 0.587171f, 6.714677f, 0.003388f, 3.992997f, 0.512382f, 3.002861f, 0.008793f, 3.399833f, +0.119188f, 1.480524f, 0.004302f, 0.921362f, 0.088469f, 0.996672f, 0.039109f, 1.211468f, 0.104081f, 1.756878f, 0.003360f, 0.902849f, 0.090515f, 0.783018f, 0.008691f, 0.766115f, +0.296909f, 2.513941f, 0.003497f, 1.698026f, 0.336603f, 2.584801f, 0.048560f, 3.410053f, 0.427075f, 4.913864f, 0.004499f, 2.740763f, 0.677846f, 3.996972f, 0.021240f, 4.244512f, +0.438235f, 3.843079f, 0.000334f, 1.417929f, 0.306745f, 2.439651f, 0.002866f, 1.758117f, 0.045781f, 0.545562f, 0.000031f, 0.166218f, 0.367608f, 2.245051f, 0.000746f, 1.302293f, +1.209985f, 7.576733f, 0.005104f, 4.033694f, 1.029130f, 5.844531f, 0.053171f, 6.077367f, 1.109617f, 9.441974f, 0.004187f, 4.150904f, 1.523761f, 6.644879f, 0.017100f, 5.561801f, +0.229726f, 2.123354f, 0.005422f, 0.976885f, 0.190157f, 1.594053f, 0.054971f, 1.432414f, 0.137602f, 1.728330f, 0.002905f, 0.656609f, 0.188317f, 1.212191f, 0.011824f, 0.876798f, +1.130366f, 7.121635f, 0.008707f, 3.556109f, 1.429084f, 8.165742f, 0.134819f, 7.964084f, 1.115265f, 9.548301f, 0.007683f, 3.937136f, 2.785604f, 12.222170f, 0.057080f, 9.595131f, +0.098398f, 1.379080f, 0.000045f, 1.016230f, 0.066474f, 0.844950f, 0.000375f, 1.216125f, 0.012500f, 0.238077f, 0.000005f, 0.144870f, 0.079897f, 0.779829f, 0.000098f, 0.903461f, +0.074060f, 0.741164f, 0.000189f, 0.788066f, 0.060795f, 0.551792f, 0.001895f, 1.145957f, 0.082592f, 1.123205f, 0.000188f, 0.986202f, 0.090278f, 0.629191f, 0.000611f, 1.051812f, +0.002023f, 0.029887f, 0.000029f, 0.027462f, 0.001616f, 0.021655f, 0.000282f, 0.038864f, 0.001474f, 0.029584f, 0.000019f, 0.022447f, 0.001605f, 0.016516f, 0.000061f, 0.023859f, +0.109778f, 1.105370f, 0.000510f, 1.102377f, 0.133952f, 1.223254f, 0.007625f, 2.382781f, 0.131716f, 1.802261f, 0.000548f, 1.484223f, 0.261867f, 1.836281f, 0.003238f, 2.879183f, +0.106025f, 1.874356f, 0.000332f, 1.209820f, 0.075487f, 1.210300f, 0.002899f, 1.525831f, 0.014891f, 0.357720f, 0.000042f, 0.190665f, 0.062012f, 0.763453f, 0.000517f, 0.774744f, +0.279517f, 3.528415f, 0.004847f, 3.286200f, 0.241820f, 2.768471f, 0.051361f, 5.036155f, 0.344610f, 5.911351f, 0.005345f, 4.546321f, 0.245431f, 2.157586f, 0.011322f, 3.159293f, +0.065972f, 1.229247f, 0.006401f, 0.989358f, 0.055546f, 0.938668f, 0.066010f, 1.475609f, 0.053125f, 1.345146f, 0.004610f, 0.894011f, 0.037707f, 0.489295f, 0.009733f, 0.619146f, +0.213409f, 2.710455f, 0.006758f, 2.367723f, 0.274438f, 3.161191f, 0.106432f, 5.393669f, 0.283073f, 4.885566f, 0.008017f, 3.524217f, 0.366688f, 3.243352f, 0.030888f, 4.454410f, +0.003764f, 0.056728f, 0.000026f, 0.041297f, 0.002410f, 0.032939f, 0.000204f, 0.046834f, 0.000722f, 0.014778f, 0.000004f, 0.008884f, 0.003211f, 0.033707f, 0.000059f, 0.038578f, +0.003502f, 0.037682f, 0.000134f, 0.039581f, 0.002724f, 0.026586f, 0.001274f, 0.054546f, 0.005893f, 0.086173f, 0.000201f, 0.074747f, 0.004485f, 0.033613f, 0.000455f, 0.055510f, +0.002412f, 0.038319f, 0.000515f, 0.034784f, 0.001826f, 0.026312f, 0.004778f, 0.046651f, 0.002652f, 0.057237f, 0.000507f, 0.042904f, 0.002011f, 0.022250f, 0.001143f, 0.031754f, +0.005059f, 0.054781f, 0.000353f, 0.053971f, 0.005850f, 0.057451f, 0.004995f, 0.110556f, 0.009160f, 0.134784f, 0.000571f, 0.109656f, 0.012681f, 0.095624f, 0.002352f, 0.148119f, +0.196148f, 2.199672f, 0.000885f, 1.183806f, 0.140042f, 1.424320f, 0.007742f, 1.497183f, 0.025793f, 0.393065f, 0.000104f, 0.174681f, 0.180645f, 1.410807f, 0.002169f, 1.193707f, +0.265614f, 2.126933f, 0.006631f, 1.651666f, 0.230432f, 1.673488f, 0.070457f, 2.538258f, 0.306610f, 3.336384f, 0.006846f, 2.139456f, 0.367241f, 2.047961f, 0.024389f, 2.500331f, +0.128018f, 1.513157f, 0.017882f, 1.015436f, 0.108087f, 1.158686f, 0.184913f, 1.518724f, 0.096523f, 1.550349f, 0.012058f, 0.859126f, 0.115216f, 0.948409f, 0.042813f, 1.000625f, +0.530342f, 4.272847f, 0.024175f, 3.112147f, 0.683905f, 4.997288f, 0.381824f, 7.109221f, 0.658653f, 7.211161f, 0.026853f, 4.337175f, 1.434893f, 8.050978f, 0.174002f, 9.219319f, +0.353632f, 6.421863f, 0.000029f, 2.901427f, 0.175878f, 2.896677f, 0.000175f, 2.556204f, 0.033367f, 0.823404f, 0.000002f, 0.307201f, 0.189233f, 2.393181f, 0.000041f, 1.699940f, +0.280271f, 3.634266f, 0.000126f, 2.369261f, 0.169379f, 1.991934f, 0.000933f, 2.536391f, 0.232144f, 4.090576f, 0.000093f, 2.202114f, 0.225155f, 2.033244f, 0.000269f, 2.083979f, +0.006581f, 0.125960f, 0.000017f, 0.070963f, 0.003871f, 0.067190f, 0.000119f, 0.073934f, 0.003560f, 0.092603f, 0.000008f, 0.043080f, 0.003441f, 0.045872f, 0.000023f, 0.040631f, +0.344819f, 4.498723f, 0.000283f, 2.750806f, 0.309757f, 3.665185f, 0.003116f, 4.377351f, 0.307282f, 5.447819f, 0.000226f, 2.750758f, 0.542075f, 4.925219f, 0.001184f, 4.734822f, +0.671521f, 15.381840f, 0.000372f, 6.087302f, 0.351982f, 7.312174f, 0.002389f, 5.652077f, 0.070047f, 2.180331f, 0.000035f, 0.712522f, 0.258837f, 4.128981f, 0.000382f, 2.569017f, +1.864185f, 30.490640f, 0.005712f, 17.411210f, 1.187324f, 17.612630f, 0.044561f, 19.644070f, 1.706993f, 37.939960f, 0.004678f, 17.890330f, 1.078733f, 12.287390f, 0.008794f, 11.031370f, +0.378166f, 9.129998f, 0.006484f, 4.505397f, 0.234410f, 5.132644f, 0.049224f, 4.947074f, 0.226177f, 7.420350f, 0.003468f, 3.023750f, 0.142446f, 2.395010f, 0.006497f, 1.858135f, +1.181333f, 19.440540f, 0.006610f, 10.412270f, 1.118410f, 16.692230f, 0.076643f, 17.462080f, 1.163806f, 26.025820f, 0.005824f, 11.510660f, 1.337706f, 15.330800f, 0.019911f, 12.909490f, +0.168929f, 3.298788f, 0.000206f, 1.472377f, 0.079621f, 1.410125f, 0.001189f, 1.229326f, 0.024053f, 0.638272f, 0.000026f, 0.235250f, 0.094985f, 1.291744f, 0.000308f, 0.906459f, +0.165476f, 2.307363f, 0.001116f, 1.486025f, 0.094772f, 1.198501f, 0.007830f, 1.507626f, 0.206831f, 3.919071f, 0.001248f, 2.084263f, 0.139684f, 1.356426f, 0.002507f, 1.373453f, +0.097983f, 2.016711f, 0.003698f, 1.122416f, 0.054615f, 1.019480f, 0.025247f, 1.108240f, 0.079994f, 2.237352f, 0.002700f, 1.028262f, 0.053840f, 0.771734f, 0.005406f, 0.675282f, +0.198452f, 2.784156f, 0.002444f, 1.681815f, 0.168946f, 2.149633f, 0.025487f, 2.536260f, 0.266870f, 5.087758f, 0.002940f, 2.537875f, 0.327816f, 3.202854f, 0.010742f, 3.041791f, +0.753354f, 10.946590f, 0.000601f, 3.612017f, 0.395976f, 5.218264f, 0.003868f, 3.363114f, 0.073577f, 1.452809f, 0.000052f, 0.395857f, 0.457241f, 4.626934f, 0.000970f, 2.400333f, +1.074228f, 11.145650f, 0.004739f, 5.306669f, 0.686097f, 6.456124f, 0.037069f, 6.003896f, 0.920989f, 12.985270f, 0.003634f, 5.105362f, 0.978815f, 7.072584f, 0.011487f, 5.294220f, +0.445001f, 6.815232f, 0.010984f, 2.804123f, 0.276607f, 3.842021f, 0.083618f, 3.087600f, 0.249197f, 5.186203f, 0.005501f, 1.762077f, 0.263942f, 2.815120f, 0.017331f, 1.821045f, +1.780250f, 18.584400f, 0.014340f, 8.299260f, 1.690122f, 16.001590f, 0.166736f, 13.957210f, 1.642118f, 23.294840f, 0.011830f, 8.590324f, 3.174305f, 23.077270f, 0.068019f, 16.202530f, +0.112643f, 2.155673f, 0.000016f, 1.482782f, 0.073844f, 1.281659f, 0.000126f, 1.721915f, 0.010868f, 0.282629f, 0.000001f, 0.160535f, 0.084949f, 1.132162f, 0.000031f, 1.224364f, +0.096703f, 1.321442f, 0.000074f, 1.311560f, 0.077032f, 0.954679f, 0.000726f, 1.850726f, 0.081903f, 1.520890f, 0.000056f, 1.246513f, 0.109485f, 1.041915f, 0.000224f, 1.625847f, +0.002660f, 0.053659f, 0.000011f, 0.046023f, 0.002062f, 0.037728f, 0.000109f, 0.063204f, 0.001472f, 0.040338f, 0.000006f, 0.028570f, 0.001961f, 0.027540f, 0.000022f, 0.037138f, +0.145283f, 1.997482f, 0.000204f, 1.859504f, 0.172026f, 2.145063f, 0.002962f, 3.900311f, 0.132386f, 2.473420f, 0.000166f, 1.901390f, 0.321880f, 3.081984f, 0.001204f, 4.510785f, +0.106242f, 2.564575f, 0.000101f, 1.545168f, 0.073402f, 1.606957f, 0.000852f, 1.891078f, 0.011332f, 0.371716f, 0.000010f, 0.184940f, 0.057713f, 0.970200f, 0.000146f, 0.919029f, +0.319474f, 5.506598f, 0.001675f, 4.787287f, 0.268205f, 4.192679f, 0.017227f, 7.119386f, 0.299130f, 7.006406f, 0.001403f, 5.029912f, 0.260538f, 3.127430f, 0.003635f, 4.274656f, +0.075928f, 1.931801f, 0.002228f, 1.451337f, 0.062037f, 1.431474f, 0.022295f, 2.100556f, 0.046436f, 1.605453f, 0.001219f, 0.996008f, 0.040307f, 0.714184f, 0.003146f, 0.843575f, +0.247218f, 4.287334f, 0.002367f, 3.495971f, 0.308503f, 4.852257f, 0.036182f, 7.728036f, 0.249041f, 5.869010f, 0.002133f, 3.951886f, 0.394530f, 4.764913f, 0.010050f, 6.108617f, +0.064067f, 1.318432f, 0.000134f, 0.895914f, 0.039803f, 0.742868f, 0.001018f, 0.985972f, 0.009328f, 0.260850f, 0.000017f, 0.146372f, 0.050769f, 0.727597f, 0.000282f, 0.777332f, +0.067980f, 0.998916f, 0.000785f, 0.979452f, 0.051319f, 0.683915f, 0.007256f, 1.309787f, 0.086884f, 1.734913f, 0.000897f, 1.404724f, 0.080873f, 0.827600f, 0.002484f, 1.275798f, +0.047160f, 1.022896f, 0.003046f, 0.866733f, 0.034648f, 0.681581f, 0.027412f, 1.128018f, 0.039369f, 1.160389f, 0.002275f, 0.811927f, 0.036521f, 0.551654f, 0.006276f, 0.734900f, +0.099554f, 1.471867f, 0.002099f, 1.353621f, 0.111713f, 1.497924f, 0.028843f, 2.690687f, 0.136895f, 2.750320f, 0.002581f, 2.088674f, 0.231764f, 2.386291f, 0.012997f, 3.450319f, +0.229625f, 3.516160f, 0.000313f, 1.766375f, 0.159088f, 2.209358f, 0.002660f, 2.167831f, 0.022932f, 0.477177f, 0.000028f, 0.197949f, 0.196415f, 2.094566f, 0.000713f, 1.654306f, +0.354671f, 3.877973f, 0.002678f, 2.811026f, 0.298583f, 2.960885f, 0.027609f, 4.192051f, 0.310932f, 4.619892f, 0.002100f, 2.765356f, 0.455450f, 3.468075f, 0.009147f, 3.952356f, +0.172133f, 2.778143f, 0.007271f, 1.740262f, 0.141032f, 2.064354f, 0.072964f, 2.525743f, 0.098566f, 2.161748f, 0.003724f, 1.118212f, 0.143888f, 1.617268f, 0.016169f, 1.592757f, +0.717747f, 7.896047f, 0.009894f, 5.368392f, 0.898172f, 8.961379f, 0.151645f, 11.900200f, 0.676982f, 10.120510f, 0.008347f, 5.681932f, 1.803643f, 13.818370f, 0.066144f, 14.770620f, +0.540862f, 5.927737f, 0.000042f, 2.994635f, 0.272764f, 2.711236f, 0.000260f, 2.675267f, 0.045113f, 0.671886f, 0.000003f, 0.280292f, 0.287541f, 2.194675f, 0.000059f, 1.743142f, +0.426478f, 3.337560f, 0.000183f, 2.432932f, 0.261347f, 1.854927f, 0.001377f, 2.641025f, 0.312271f, 3.320871f, 0.000120f, 1.998996f, 0.340384f, 1.855106f, 0.000389f, 2.126067f, +0.007354f, 0.084947f, 0.000018f, 0.053512f, 0.004386f, 0.045947f, 0.000129f, 0.056533f, 0.003517f, 0.055207f, 0.000008f, 0.028718f, 0.003821f, 0.030735f, 0.000024f, 0.030440f, +0.465792f, 3.667610f, 0.000366f, 2.507600f, 0.424288f, 3.029905f, 0.004081f, 4.046216f, 0.366939f, 3.926193f, 0.000258f, 2.216694f, 0.727491f, 3.989203f, 0.001520f, 4.288138f, +0.892000f, 12.331220f, 0.000473f, 5.456662f, 0.474095f, 5.944066f, 0.003077f, 5.137474f, 0.082252f, 1.545167f, 0.000039f, 0.564619f, 0.341584f, 3.288573f, 0.000481f, 2.287895f, +2.463646f, 24.319170f, 0.007219f, 15.528010f, 1.591102f, 14.244450f, 0.057104f, 17.764690f, 1.994234f, 26.750640f, 0.005227f, 14.104580f, 1.416348f, 9.736629f, 0.011041f, 9.774239f, +0.367008f, 5.347572f, 0.006017f, 2.950690f, 0.230679f, 3.048361f, 0.046322f, 3.285324f, 0.194042f, 3.842072f, 0.002845f, 1.750618f, 0.137344f, 1.393671f, 0.005990f, 1.209023f, +1.385936f, 13.764870f, 0.007416f, 8.243534f, 1.330488f, 11.984430f, 0.087190f, 14.018560f, 1.206998f, 16.290080f, 0.005776f, 8.056075f, 1.559187f, 10.784380f, 0.022193f, 10.154160f, +0.346666f, 4.085599f, 0.000404f, 2.039036f, 0.165683f, 1.770917f, 0.002367f, 1.726284f, 0.043635f, 0.698815f, 0.000046f, 0.287998f, 0.193657f, 1.589441f, 0.000601f, 1.247156f, +0.337853f, 2.843162f, 0.002179f, 2.047465f, 0.196206f, 1.497488f, 0.015502f, 2.106314f, 0.373305f, 4.268979f, 0.002154f, 2.538621f, 0.283340f, 1.660537f, 0.004862f, 1.880056f, +0.146909f, 1.824875f, 0.005303f, 1.135659f, 0.083032f, 0.935422f, 0.036705f, 1.137018f, 0.106025f, 1.789694f, 0.003423f, 0.919714f, 0.080200f, 0.693783f, 0.007701f, 0.678806f, +0.359691f, 3.045515f, 0.004237f, 2.057075f, 0.310500f, 2.384354f, 0.044794f, 3.145610f, 0.427592f, 4.919815f, 0.004505f, 2.744082f, 0.590298f, 3.480736f, 0.018497f, 3.696305f, +1.111658f, 9.748637f, 0.000848f, 3.596823f, 0.592489f, 4.712271f, 0.005535f, 3.395864f, 0.095977f, 1.143743f, 0.000065f, 0.348468f, 0.670323f, 4.093784f, 0.001360f, 2.374693f, +1.577077f, 9.875405f, 0.006653f, 5.257458f, 1.021365f, 5.800435f, 0.052770f, 6.031514f, 1.195270f, 10.170810f, 0.004510f, 4.471317f, 1.427657f, 6.225787f, 0.016021f, 5.211019f, +0.479757f, 4.434388f, 0.011324f, 2.040115f, 0.302386f, 2.534849f, 0.087414f, 2.277812f, 0.237497f, 2.983031f, 0.005014f, 1.133281f, 0.282707f, 1.819771f, 0.017751f, 1.316270f, +2.320164f, 14.617710f, 0.017872f, 7.299190f, 2.233548f, 12.762430f, 0.210712f, 12.447250f, 1.891897f, 16.197410f, 0.013034f, 6.678823f, 4.110108f, 18.033580f, 0.084220f, 14.157440f, +}; + +static const float acceptor_me2x3acc6[64] = { +0.198561f, 0.337610f, 0.053207f, 0.494507f, 0.281041f, 0.889447f, 0.497835f, 1.051663f, 0.054833f, 0.146701f, 0.059791f, 0.204920f, 0.431595f, 1.165403f, 0.729493f, 1.225165f, +0.318682f, 0.805716f, 0.125296f, 1.247968f, 0.425639f, 2.003059f, 1.106270f, 2.504460f, 0.277875f, 1.105458f, 0.444579f, 1.632886f, 0.703515f, 2.824722f, 1.744707f, 3.140203f, +0.121459f, 0.355438f, 0.052579f, 0.416660f, 0.260773f, 1.420445f, 0.746255f, 1.344131f, 0.122075f, 0.562122f, 0.215047f, 0.628406f, 0.355410f, 1.651741f, 0.970475f, 1.389698f, +0.534367f, 0.995770f, 0.131584f, 1.612177f, 0.696320f, 2.415218f, 1.133477f, 3.156525f, 0.518371f, 1.519946f, 0.519427f, 2.346789f, 1.001111f, 2.962646f, 1.554947f, 3.442660f, +}; + +static const float acceptor_me2x3acc7[256] = { +0.081108f, 0.167573f, 0.002337f, 0.363544f, 0.096275f, 0.397918f, 0.186669f, 0.465951f, 0.001415f, 0.004990f, 0.001895f, 0.012787f, 0.191144f, 0.781838f, 0.312855f, 0.859362f, +0.106486f, 0.277527f, 0.002589f, 0.530254f, 0.178124f, 0.928697f, 0.291412f, 0.957738f, 0.070281f, 0.312724f, 0.079420f, 0.705780f, 0.291511f, 1.504128f, 0.402592f, 1.456027f, +0.003287f, 0.010233f, 0.000151f, 0.019997f, 0.005449f, 0.033928f, 0.016796f, 0.035788f, 0.001596f, 0.008479f, 0.003397f, 0.019573f, 0.011741f, 0.072355f, 0.030554f, 0.071640f, +0.136216f, 0.300112f, 0.003885f, 0.664942f, 0.346218f, 1.525973f, 0.664408f, 1.824904f, 0.115029f, 0.432688f, 0.152476f, 1.132412f, 0.449239f, 1.959528f, 0.727757f, 2.199667f, +0.157231f, 0.313885f, 0.004443f, 0.562003f, 0.229863f, 0.918001f, 0.437029f, 0.887163f, 0.003917f, 0.013349f, 0.005144f, 0.028233f, 0.465016f, 1.837885f, 0.746333f, 1.667212f, +0.267623f, 0.673953f, 0.006381f, 1.062730f, 0.551359f, 2.777670f, 0.884509f, 2.364105f, 0.252278f, 1.084667f, 0.279548f, 2.020313f, 0.919431f, 4.583978f, 1.245120f, 3.662190f, +0.094987f, 0.285687f, 0.004267f, 0.460769f, 0.193895f, 1.166641f, 0.586098f, 1.015600f, 0.065845f, 0.338115f, 0.137479f, 0.644149f, 0.425748f, 2.535123f, 1.086372f, 2.071556f, +0.326828f, 0.695776f, 0.009140f, 1.272283f, 1.023115f, 4.357276f, 1.925271f, 4.300530f, 0.394195f, 1.432757f, 0.512374f, 3.094677f, 1.352704f, 5.701262f, 2.148795f, 5.281901f, +0.057108f, 0.111893f, 0.001336f, 0.257086f, 0.061805f, 0.242256f, 0.097290f, 0.300431f, 0.001612f, 0.005392f, 0.001753f, 0.014634f, 0.151666f, 0.588321f, 0.201536f, 0.684852f, +0.105920f, 0.261792f, 0.002091f, 0.529736f, 0.161543f, 0.798747f, 0.214563f, 0.872378f, 0.113136f, 0.477410f, 0.103795f, 1.141098f, 0.326767f, 1.598953f, 0.366378f, 1.639244f, +0.038181f, 0.112707f, 0.001420f, 0.233266f, 0.057697f, 0.340719f, 0.144395f, 0.380619f, 0.029990f, 0.151144f, 0.051842f, 0.369505f, 0.153674f, 0.898096f, 0.324659f, 0.941737f, +0.187401f, 0.391559f, 0.004339f, 0.918799f, 0.434288f, 1.815277f, 0.676619f, 2.299108f, 0.256113f, 0.913623f, 0.275617f, 2.532323f, 0.696500f, 2.881136f, 0.916035f, 3.425251f, +0.232940f, 0.409618f, 0.005813f, 0.875147f, 0.306761f, 1.079138f, 0.515071f, 1.244432f, 0.005656f, 0.016981f, 0.006560f, 0.042855f, 0.677246f, 2.357764f, 0.959925f, 2.552155f, +0.388069f, 0.860830f, 0.008171f, 1.619737f, 0.720186f, 3.195905f, 1.020324f, 3.245741f, 0.356594f, 1.350495f, 0.348960f, 3.001574f, 1.310623f, 5.755779f, 1.567457f, 5.487020f, +0.197520f, 0.523291f, 0.007836f, 1.007094f, 0.363197f, 1.924930f, 0.969550f, 1.999559f, 0.133470f, 0.603706f, 0.246104f, 1.372401f, 0.870313f, 4.564836f, 1.961226f, 4.450991f, +0.385616f, 0.723116f, 0.009524f, 1.577815f, 1.087389f, 4.079232f, 1.807083f, 4.804181f, 0.453372f, 1.451507f, 0.520424f, 3.741071f, 1.568960f, 5.824824f, 2.201048f, 6.439262f, +}; + +static const float acceptor_me2x3acc8[64] = { +0.134444f, 0.409251f, 0.002027f, 0.474269f, 0.184336f, 0.805301f, 0.161375f, 0.751628f, 0.001443f, 0.007759f, 0.001694f, 0.010525f, 0.386007f, 1.386879f, 0.409270f, 1.281481f, +0.372889f, 1.094436f, 0.005800f, 1.222804f, 0.718837f, 3.027880f, 0.649183f, 2.724678f, 0.308629f, 1.600430f, 0.373846f, 2.093096f, 0.975473f, 3.379250f, 1.066943f, 3.010408f, +0.089765f, 0.290835f, 0.001698f, 0.279494f, 0.240649f, 1.118979f, 0.264289f, 0.866080f, 0.068014f, 0.389342f, 0.100188f, 0.437969f, 0.484599f, 1.853180f, 0.644566f, 1.419979f, +0.379547f, 1.332811f, 0.007810f, 1.355863f, 1.060978f, 5.346963f, 1.267546f, 4.380911f, 0.311496f, 1.932612f, 0.499147f, 2.301325f, 0.956094f, 3.962763f, 1.383397f, 3.214281f, +}; + +static const float acceptor_me2x3acc9[256] = { +0.080864f, 1.061148f, 0.000011f, 0.632647f, 0.047418f, 0.576001f, 0.000076f, 0.661114f, 0.009049f, 0.167492f, 0.000001f, 0.080486f, 0.051563f, 0.472722f, 0.000019f, 0.445638f, +0.091072f, 0.893180f, 0.000061f, 0.745645f, 0.064861f, 0.588840f, 0.000540f, 0.946364f, 0.088873f, 1.229428f, 0.000052f, 0.827251f, 0.086646f, 0.593677f, 0.000166f, 0.783673f, +0.001797f, 0.027297f, 0.000007f, 0.019437f, 0.001261f, 0.017732f, 0.000064f, 0.024308f, 0.001179f, 0.025278f, 0.000004f, 0.014508f, 0.001114f, 0.011825f, 0.000013f, 0.013314f, +0.101349f, 0.983407f, 0.000137f, 0.783197f, 0.107646f, 0.966884f, 0.001807f, 1.482449f, 0.105833f, 1.448493f, 0.000126f, 0.929812f, 0.191886f, 1.300784f, 0.000743f, 1.638076f, +0.154788f, 2.479552f, 0.000105f, 1.241938f, 0.092410f, 1.370311f, 0.000774f, 1.321338f, 0.018062f, 0.408120f, 0.000011f, 0.164761f, 0.066491f, 0.744129f, 0.000128f, 0.589341f, +0.610440f, 7.308212f, 0.002132f, 5.125611f, 0.442626f, 4.905333f, 0.019136f, 6.623243f, 0.621187f, 10.489910f, 0.001904f, 5.929898f, 0.391247f, 3.272414f, 0.003899f, 3.629058f, +0.099005f, 1.836332f, 0.002115f, 1.098519f, 0.070736f, 1.214508f, 0.018706f, 1.398699f, 0.067780f, 1.773278f, 0.001270f, 0.855015f, 0.041357f, 0.535914f, 0.002521f, 0.506924f, +0.339500f, 4.021317f, 0.002391f, 2.690590f, 0.367127f, 4.025388f, 0.032003f, 5.185070f, 0.369690f, 6.176573f, 0.002284f, 3.330946f, 0.433020f, 3.583319f, 0.008701f, 3.791018f, +0.054050f, 0.613302f, 0.000071f, 0.393464f, 0.030405f, 0.319364f, 0.000496f, 0.394443f, 0.008783f, 0.140572f, 0.000010f, 0.072689f, 0.036114f, 0.286286f, 0.000136f, 0.290417f, +0.078085f, 0.662187f, 0.000531f, 0.594866f, 0.053349f, 0.418797f, 0.004490f, 0.724286f, 0.110652f, 1.323586f, 0.000660f, 0.958366f, 0.077845f, 0.461199f, 0.001510f, 0.655115f, +0.033487f, 0.439955f, 0.001393f, 0.337108f, 0.022544f, 0.274172f, 0.011605f, 0.404438f, 0.031925f, 0.591623f, 0.001165f, 0.365381f, 0.021758f, 0.199711f, 0.002582f, 0.241966f, +0.079155f, 0.664122f, 0.001085f, 0.569156f, 0.080653f, 0.626402f, 0.013686f, 1.033485f, 0.120029f, 1.420490f, 0.001444f, 0.981210f, 0.157035f, 0.920483f, 0.006143f, 1.247354f, +0.260466f, 2.581281f, 0.000283f, 1.096237f, 0.154217f, 1.414753f, 0.002073f, 1.156691f, 0.027955f, 0.390769f, 0.000027f, 0.133761f, 0.180139f, 1.247216f, 0.000558f, 0.837533f, +0.515311f, 3.816698f, 0.002889f, 2.269680f, 0.370563f, 2.540647f, 0.025710f, 2.908632f, 0.482302f, 5.038710f, 0.002372f, 2.415110f, 0.531751f, 2.751541f, 0.008505f, 2.587283f, +0.170667f, 1.958368f, 0.005852f, 0.993328f, 0.120930f, 1.284526f, 0.051320f, 1.254322f, 0.107464f, 1.739365f, 0.003232f, 0.711099f, 0.114782f, 0.920173f, 0.011229f, 0.738005f, +0.770290f, 5.644597f, 0.008706f, 3.202243f, 0.826094f, 5.603657f, 0.115564f, 6.120130f, 0.771476f, 7.974125f, 0.007649f, 3.646240f, 1.581806f, 8.098064f, 0.051009f, 7.264300f, +}; + +#endif diff --git a/splice_site_new.cpp b/splice_site_new.cpp new file mode 100644 index 0000000..ae12a47 --- /dev/null +++ b/splice_site_new.cpp @@ -0,0 +1,1157 @@ +/* + * Copyright 2013, Daehwan Kim + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include "edit.h" +#include "splice_site.h" +#include "aligner_report.h" +#include "aligner_result.h" + +#if defined(NEW_PROB_MODEL) + +#include "splice_site_mem.h" + +#else + +float donor_prob[4][donor_len] = { + {0.340f, 0.604f, 0.092f, 0.001f, 0.001f, 0.526f, 0.713f, 0.071f, 0.160f}, + {0.363f, 0.129f, 0.033f, 0.001f, 0.001f, 0.028f, 0.076f, 0.055f, 0.165f}, + {0.183f, 0.125f, 0.803f, 1.000f, 0.001f, 0.419f, 0.118f, 0.814f, 0.209f}, + {0.114f, 0.142f, 0.073f, 0.001f, 1.000f, 0.025f, 0.093f, 0.059f, 0.462f} +}; + +float acceptor_prob[4][acceptor_len] = { + {0.090f, 0.084f, 0.075f, 0.068f, 0.076f, 0.080f, 0.097f, 0.092f, 0.076f, 0.078f, 0.237f, 0.042f, 1.000f, 0.001f, 0.239f}, + {0.310f, 0.310f, 0.307f, 0.293f, 0.326f, 0.330f, 0.373f, 0.385f, 0.410f, 0.352f, 0.309f, 0.708f, 0.001f, 0.001f, 0.138f}, + {0.125f, 0.115f, 0.106f, 0.104f, 0.110f, 0.113f, 0.113f, 0.085f, 0.066f, 0.064f, 0.212f, 0.003f, 0.001f, 1.000f, 0.520f}, + {0.463f, 0.440f, 0.470f, 0.494f, 0.471f, 0.463f, 0.408f, 0.429f, 0.445f, 0.504f, 0.240f, 0.246f, 0.001f, 0.001f, 0.104f} +}; + +float donor_prob_sum[1 << (donor_len << 1)]; +float acceptor_prob_sum1[1 << (acceptor_len1 << 1)]; +float acceptor_prob_sum2[1 << (acceptor_len2 << 1)]; + +#endif + +void init_junction_prob() +{ +#if !defined(NEW_PROB_MODEL) + for(size_t i = 0; i < donor_len; i++) { + ASSERT_ONLY(float sum = 0.0f); + for(size_t j = 0; j < 4; j++) { + float prob = donor_prob[j][i]; + assert_gt(prob, 0.0f); + ASSERT_ONLY(sum += prob); + donor_prob[j][i] = log(prob / background_prob[j]); + } + assert_range(0.9f, 1.05f, sum); + } + for(size_t i = 0; i < acceptor_len; i++) { + ASSERT_ONLY(float sum = 0.0f); + for(size_t j = 0; j < 4; j++) { + float prob = acceptor_prob[j][i]; + assert_gt(prob, 0.0f); + ASSERT_ONLY(sum += prob); + acceptor_prob[j][i] = log(prob / background_prob[j]); + } + assert_range(0.9f, 1.05f, sum); + } + + const size_t donor_elms = 1 << (donor_len << 1); + for(size_t i = 0; i < donor_elms; i++) { + float sum = 0.0f; + for(size_t j = 0; j < donor_len; j++) { + int base = (i >> (j << 1)) & 0x3; + sum += donor_prob[base][donor_len - j - 1]; + } + donor_prob_sum[i] = exp(-sum); + } + + const size_t acceptor_elms1 = 1 << (acceptor_len1 << 1); + for(size_t i = 0; i < acceptor_elms1; i++) { + float sum = 0.0f; + for(size_t j = 0; j < acceptor_len1; j++) { + int base = (i >> (j << 1)) & 0x3; + sum += acceptor_prob[base][acceptor_len1 - j - 1]; + } + acceptor_prob_sum1[i] = exp(-sum); + } + + const size_t acceptor_elms2 = 1 << (acceptor_len2 << 1); + for(size_t i = 0; i < acceptor_elms2; i++) { + float sum = 0.0f; + for(size_t j = 0; j < acceptor_len2; j++) { + int base = (i >> (j << 1)) & 0x3; + sum += acceptor_prob[base][acceptor_len - j - 1]; + } + acceptor_prob_sum2[i] = exp(-sum); + } +#endif +} + +ostream& operator<<(ostream& out, const SpliceSite& s) +{ + out << s.ref() << "\t" + << s.left() << "\t" + << s.right() << "\t" + << (s.fw() ? "+" : "-") << endl; + return out; +} + +#if defined(SPLIT_DB) + +static const uint64_t ref_segment_size = 1 << 24; + +SpliceSiteDB::SpliceSiteDB( + const BitPairReference& refs, + const EList& refnames, + bool threadSafe, + bool write, + bool read) : +_numRefs(refs.numRefs()), +_refnames(refnames), +_write(write), +_read(read), +_threadSafe(threadSafe), +_empty(true) +{ + assert_gt(_numRefs, 0); + assert_eq(_numRefs, _refnames.size()); + for(uint64_t i = 0; i < _numRefs; i++) { + uint64_t reflen = refs.approxLen(i); + assert_gt(reflen, 0); + uint64_t numsegs = (reflen + ref_segment_size - 1) / ref_segment_size; + assert_gt(numsegs, 0); + + _fwIndex.expand(); + _bwIndex.expand(); + for(uint64_t j = 0; j < numsegs; j++) { + _fwIndex.back().push_back(new RedBlack(16 << 10, CA_CAT)); + _bwIndex.back().push_back(new RedBlack(16 << 10, CA_CAT)); + } + + _pool.expand(); + _pool.back().resize(numsegs); + _spliceSites.expand(); + _spliceSites.back().resize(numsegs); + + _mutex.expand(); + for(uint64_t j = 0; j < numsegs; j++) { + _mutex.back().push_back(MUTEX_T()); + } + } + + donorstr.resize(donor_exonic_len + donor_intronic_len); + acceptorstr.resize(acceptor_intronic_len + acceptor_exonic_len); +} + +SpliceSiteDB::~SpliceSiteDB() { + assert_eq(_fwIndex.size(), _bwIndex.size()); + assert_eq(_fwIndex.size(), _pool.size()); + for(uint64_t i = 0; i < _numRefs; i++) { + assert_eq(_fwIndex[i].size(), _bwIndex[i].size()); + for(uint64_t j = 0; j < _fwIndex[i].size(); j++) { + delete _fwIndex[i][j]; + delete _bwIndex[i][j]; + } + + _fwIndex[i].clear(); + _bwIndex[i].clear(); + + ELList& pool = _pool[i]; + for(size_t j = 0; j < pool.size(); j++) { + for(size_t k = 0; k < pool[j].size(); k++) { + delete pool[j][k]; + } + } + } +} + +#if 0 +size_t SpliceSiteDB::size(uint64_t ref) const { + if(!_read) return 0; + + assert_lt(ref, _numRefs); + assert_lt(ref, _mutex.size()); + assert_lt(ref, _fwIndex.size()); + assert_eq(_fwIndex.size(), _bwIndex.size()); + ThreadSafe t(const_cast(&_mutex[ref]), _threadSafe && _write); + return _fwIndex.size(); +} + +bool SpliceSiteDB::empty(uint64_t ref) const { + return size(ref) == 0; +} +#endif + +bool SpliceSiteDB::addSpliceSite( + const Read& rd, + const AlnRes& rs, + uint32_t minAnchorLen) +{ + if(!_write) return false; + if(rs.trimmed5p(true) + rs.trimmed3p(true) > 0) return false; + + _empty = false; + + Coord coord = rs.refcoord(); + uint64_t ref = coord.ref(); + assert_lt(ref, _numRefs); + const EList& edits = rs.ned(); + if(!coord.orient()) { + Edit::invertPoss(const_cast&>(edits), rd.length(), false); + } + SpliceSitePos ssp; + uint32_t refoff = coord.off(); + uint32_t leftAnchorLen = 0, rightAnchorLen = 0; + size_t eidx = 0; + size_t last_eidx = 0; + for(size_t i = 0; i < rd.length(); i++, refoff++) { + while(eidx < edits.size() && edits[eidx].pos == i) { + if(edits[eidx].isReadGap()) { + refoff++; + } else if(edits[eidx].isRefGap()) { + assert_gt(refoff, 0); + refoff--; + } + if(edits[eidx].isSpliced()) { + assert_gt(refoff, 0); + if(ssp.inited()) { + assert(edits[last_eidx].isSpliced()); + assert_lt(edits[last_eidx].pos, edits[eidx].pos); + rightAnchorLen = edits[eidx].pos - edits[last_eidx].pos; + if(leftAnchorLen >= minAnchorLen && rightAnchorLen >= minAnchorLen) { + bool added = false; + uint64_t ref_seg = ssp.left() / ref_segment_size; + uint64_t ref_seg2 = ssp.right() / ref_segment_size; + assert_leq(ref_seg, ref_seg2); + assert_lt(ref, _mutex.size()); + assert_lt(ref_seg, _mutex[ref].size()); + assert_lt(ref_seg2, _mutex[ref].size()); + ThreadSafe t(&_mutex[ref][ref_seg], _threadSafe && _write); + ThreadSafe t2(&_mutex[ref][ref_seg2], _threadSafe && _write && ref_seg != ref_seg2); + assert_lt(ref, _fwIndex.size()); + assert_lt(ref_seg, _fwIndex[ref].size()); + assert(_fwIndex[ref][ref_seg] != NULL); + Node *cur = _fwIndex[ref][ref_seg]->add(pool(ref, ref_seg), ssp, &added); + if(added) { + assert_lt(ref, _spliceSites.size()); + assert_lt(ref_seg, _spliceSites[ref].size()); + _spliceSites[ref][ref_seg].expand(); + _spliceSites[ref][ref_seg].back().init(ssp.ref(), ssp.left(), ssp.right(), ssp.fw(), ssp.canonical()); + _spliceSites[ref][ref_seg].back()._readid = rd.rdid; + assert(cur != NULL); + cur->payload = _spliceSites[ref][ref_seg].size() - 1; + + SpliceSitePos rssp(ssp.ref(), ssp.right(), ssp.left(), ssp.fw(), ssp.canonical()); + assert_lt(ref, _bwIndex.size()); + assert_lt(ref_seg2, _bwIndex[ref].size()); + assert(_bwIndex[ref][ref_seg2] != NULL); + cur = _bwIndex[ref][ref_seg2]->add(pool(ref, ref_seg2), rssp, &added); + assert(added); + assert(cur != NULL); + if(ref_seg != ref_seg2) { + _spliceSites[ref][ref_seg2].expand(); + _spliceSites[ref][ref_seg2].back().init(ssp.ref(), ssp.left(), ssp.right(), ssp.fw(), ssp.canonical()); + _spliceSites[ref][ref_seg2].back()._readid = rd.rdid; + } + cur->payload = _spliceSites[ref][ref_seg2].size() - 1; + + } else { + assert(cur != NULL); + assert_lt(ref, _spliceSites.size()); + assert_lt(ref_seg, _spliceSites[ref].size()); + assert_lt(cur->payload, _spliceSites[ref][ref_seg].size()); + if(rd.rdid < _spliceSites[ref][ref_seg][cur->payload]._readid) { + _spliceSites[ref][ref_seg][cur->payload]._readid = rd.rdid; + } + if(ref_seg != ref_seg2) { + SpliceSitePos rssp(ssp.ref(), ssp.right(), ssp.left(), ssp.fw(), ssp.canonical()); + cur = _bwIndex[ref][ref_seg2]->add(pool(ref, ref_seg2), rssp, &added); + assert(cur != NULL); + assert(!added); + if(rd.rdid < _spliceSites[ref][ref_seg2][cur->payload]._readid) { + _spliceSites[ref][ref_seg2][cur->payload]._readid = rd.rdid; + } + } + } + } + leftAnchorLen = rightAnchorLen; + rightAnchorLen = 0; + } else { + leftAnchorLen = edits[eidx].pos; + } + bool fw = (edits[eidx].splDir == EDIT_SPL_FW || edits[eidx].splDir == EDIT_SPL_UNKNOWN); + bool canonical = (edits[eidx].splDir != EDIT_SPL_UNKNOWN); + ssp.init(coord.ref(), refoff - 1, refoff + edits[eidx].splLen, fw, canonical); + refoff += edits[eidx].splLen; + last_eidx = eidx; + } + eidx++; + } + } + if(ssp.inited()) { + assert(edits[last_eidx].isSpliced()); + assert_lt(edits[last_eidx].pos, rd.length()); + rightAnchorLen = rd.length() - edits[last_eidx].pos; + if(leftAnchorLen >= minAnchorLen && rightAnchorLen >= minAnchorLen) { + bool added = false; + uint64_t ref_seg = ssp.left() / ref_segment_size; + uint64_t ref_seg2 = ssp.right() / ref_segment_size; + assert_leq(ref_seg, ref_seg2); + assert_lt(ref, _mutex.size()); + assert_lt(ref_seg, _mutex[ref].size()); + assert_lt(ref_seg2, _mutex[ref].size()); + ThreadSafe t(&_mutex[ref][ref_seg], _threadSafe && _write); + ThreadSafe t2(&_mutex[ref][ref_seg2], _threadSafe && _write && ref_seg != ref_seg2); + assert_lt(ref, _fwIndex.size()); + assert_lt(ref_seg, _fwIndex[ref].size()); + assert(_fwIndex[ref][ref_seg] != NULL); + Node *cur = _fwIndex[ref][ref_seg]->add(pool(ref, ref_seg), ssp, &added); + if(added) { + assert_lt(ref, _spliceSites.size()); + assert_lt(ref_seg, _spliceSites[ref].size()); + _spliceSites[ref][ref_seg].expand(); + _spliceSites[ref][ref_seg].back().init(ssp.ref(), ssp.left(), ssp.right(), ssp.fw(), ssp.canonical()); + _spliceSites[ref][ref_seg].back()._readid = rd.rdid; + assert(cur != NULL); + cur->payload = _spliceSites[ref][ref_seg].size() - 1; + + SpliceSitePos rssp(ssp.ref(), ssp.right(), ssp.left(), ssp.fw(), ssp.canonical()); + assert_lt(ref, _bwIndex.size()); + assert_lt(ref_seg2, _bwIndex[ref].size()); + assert(_bwIndex[ref][ref_seg2] != NULL); + cur = _bwIndex[ref][ref_seg2]->add(pool(ref, ref_seg2), rssp, &added); + assert(added); + assert(cur != NULL); + if(ref_seg != ref_seg2) { + _spliceSites[ref][ref_seg2].expand(); + _spliceSites[ref][ref_seg2].back().init(ssp.ref(), ssp.left(), ssp.right(), ssp.fw(), ssp.canonical()); + _spliceSites[ref][ref_seg2].back()._readid = rd.rdid; + } + cur->payload = _spliceSites[ref][ref_seg2].size() - 1; + + } else { + assert(cur != NULL); + assert_lt(ref, _spliceSites.size()); + assert_lt(ref_seg, _spliceSites[ref].size()); + assert_lt(cur->payload, _spliceSites[ref][ref_seg].size()); + if(rd.rdid < _spliceSites[ref][ref_seg][cur->payload]._readid) { + _spliceSites[ref][ref_seg][cur->payload]._readid = rd.rdid; + } + if(ref_seg != ref_seg2) { + SpliceSitePos rssp(ssp.ref(), ssp.right(), ssp.left(), ssp.fw(), ssp.canonical()); + cur = _bwIndex[ref][ref_seg2]->add(pool(ref, ref_seg2), rssp, &added); + assert(cur != NULL); + assert(!added); + if(rd.rdid < _spliceSites[ref][ref_seg2][cur->payload]._readid) { + _spliceSites[ref][ref_seg2][cur->payload]._readid = rd.rdid; + } + } + } + } + } + if(!coord.orient()) { + Edit::invertPoss(const_cast&>(edits), rd.length(), false); + } + + return true; +} + +bool SpliceSiteDB::getSpliceSite(SpliceSite& ss) const +{ + if(!_read) return false; + + uint64_t ref = ss.ref(); + uint64_t ref_seg = ss.left() / ref_segment_size; + assert_lt(ref, _numRefs); + assert_lt(ref, _mutex.size()); + assert_lt(ref_seg, _mutex[ref].size()); + ThreadSafe t(const_cast(&_mutex[ref][ref_seg]), _threadSafe && _write); + + assert_lt(ref, _fwIndex.size()); + assert_lt(ref_seg, _fwIndex[ref].size()); + assert(_fwIndex[ref][ref_seg] != NULL); + const Node *cur = _fwIndex[ref][ref_seg]->lookup(ss); + if(cur == NULL) return false; + assert(cur != NULL); + assert_lt(ref, _spliceSites.size()); + assert_lt(ref_seg, _spliceSites[ref].size()); + ss = _spliceSites[ref][ref_seg][cur->payload]; + return true; +} + +void SpliceSiteDB::getLeftSpliceSites(uint32_t ref, uint32_t left, uint32_t range, EList& spliceSites) const +{ + if(!_read) return; + spliceSites.clear(); + assert_lt(ref, _numRefs); + assert_gt(range, 0); + assert_geq(left + 1, range); + uint32_t begin = left + 1 - range, end = left; + for(uint32_t i = begin / ref_segment_size; i <= end / ref_segment_size; i++) { + assert_lt(ref, _mutex.size()); + assert_lt(i, _mutex[ref].size()); + ThreadSafe t(const_cast(&_mutex[ref][i]), _threadSafe && _write); + assert_lt(ref, _bwIndex.size()); + assert_lt(i, _bwIndex[ref].size()); + assert(_bwIndex[ref][i] != NULL); + const Node *cur = _bwIndex[ref][i]->root(); + if(cur != NULL) getSpliceSites_recur(cur, _spliceSites[ref][i], ref, begin, end, spliceSites); + } +} + +void SpliceSiteDB::getRightSpliceSites(uint32_t ref, uint32_t right, uint32_t range, EList& spliceSites) const +{ + if(!_read) return; + spliceSites.clear(); + assert_lt(ref, _numRefs); + assert_gt(range, 0); + assert_gt(right + range, range); + uint32_t begin = right, end = right + range - 1; + for(uint32_t i = begin / ref_segment_size; i <= end / ref_segment_size; i++) { + assert_lt(ref, _mutex.size()); + assert_lt(i, _mutex[ref].size()); + ThreadSafe t(const_cast(&_mutex[ref][i]), _threadSafe && _write); + assert_lt(ref, _fwIndex.size()); + assert_lt(i, _fwIndex[ref].size()); + assert(_fwIndex[ref][i] != NULL); + const Node *cur = _fwIndex[ref][i]->root(); + if(cur != NULL) getSpliceSites_recur(cur, _spliceSites[ref][i], ref, begin, end, spliceSites); + } + +} + +void SpliceSiteDB::getSpliceSites_recur( + const RedBlackNode *node, + const EList& spliceSites_db, + uint32_t ref, + uint32_t left, + uint32_t right, + EList& spliceSites) const +{ + assert(node != NULL); + bool goleft = true, goright = true; + if((node->key.ref() > ref) || + (node->key.ref() == ref && node->key.left() > right)) { + goright = false; + } + + if((node->key.ref() < ref) || + (node->key.ref() == ref && node->key.left() < left)) { + goleft = false; + } + + if(goleft && node->left != NULL) { + getSpliceSites_recur( + node->left, + spliceSites_db, + ref, + left, + right, + spliceSites); + } + + if(node->key.ref() == ref && + node->key.left() >= left && node->key.left() <= right) { + assert_lt(node->payload, spliceSites_db.size()); +#ifndef NDEBUG + const SpliceSite& ss = spliceSites_db[node->payload]; + assert_eq(ss.ref(), node->key.ref()); + assert(ss.left() == node->key.left() || + ss.right() == node->key.left()); +#endif + spliceSites.push_back(spliceSites_db[node->payload]); + } + + if(goright && node->right != NULL) { + getSpliceSites_recur( + node->right, + spliceSites_db, + ref, + left, + right, + spliceSites); + } +} + +void SpliceSiteDB::print(ofstream& out) +{ + EList ss_list; + for(uint64_t i = 0; i < _fwIndex.size(); i++) { + for(uint64_t j = 0; j < _fwIndex[i].size(); j++) { + assert(_fwIndex[i][j] != NULL); + const Node *root = _fwIndex[i][j]->root(); + if(root != NULL) print_recur(root, out, ss_list); + } + } + print_impl(out, ss_list); +} + +void SpliceSiteDB::print_recur( + const RedBlackNode *node, + ofstream& out, + EList& ss_list) +{ + if(node == NULL) return; + print_recur(node->left, out, ss_list); + print_impl(out, ss_list, &(node->key)); + print_recur(node->right, out, ss_list); +} + +void SpliceSiteDB::print_impl( + ofstream& out, + EList& ss_list, + const SpliceSitePos* ss) +{ + size_t i = 0; + while(i < ss_list.size()) { + const SpliceSitePos& tmp_ss = ss_list[i]; + bool do_print = true; + if(ss != NULL) { + if(tmp_ss.ref() == ss->ref()) { + assert_leq(tmp_ss.left(), ss->left()); + if(ss->left() < tmp_ss.left() + 10) { + do_print = false; + if((int)ss->left() - (int)tmp_ss.left() == (int)ss->right() - (int)tmp_ss.right()) { + if(!tmp_ss.canonical() && ss->canonical()) { + ss_list.erase(i); + ss_list.push_back(*ss); + } + return; + } + } + } + } + + if(!do_print) { + i++; + continue; + } + + assert_lt(tmp_ss.ref(), _refnames.size()); + out << _refnames[tmp_ss.ref()] << "\t" + << tmp_ss.left() << "\t" + << tmp_ss.right() << "\t" + << (tmp_ss.canonical() ? (tmp_ss.fw() ? "+" : "-") : ".") << endl; + + ss_list.erase(i); + } + + if(ss != NULL) ss_list.push_back(*ss); +} + +void SpliceSiteDB::read(ifstream& in, bool novel) +{ + _empty = false; + assert_eq(_numRefs, _refnames.size()); + while(!in.eof()) { + string refname; + uint32_t left = 0, right = 0; + char fw = 0; + in >> refname >> left >> right >> fw; + uint32_t ref = 0; + for(; ref < _refnames.size(); ref++) { + if(_refnames[ref] == refname) break; + } + if(ref >= _numRefs) continue; + uint64_t ref_seg = left / ref_segment_size; + assert_lt(ref, _spliceSites.size()); + assert_lt(ref_seg, _spliceSites[ref].size()); + + _spliceSites[ref][ref_seg].expand(); + _spliceSites[ref][ref_seg].back().init(ref, left, right, fw == '+' || fw == '.', fw != '.'); + _spliceSites[ref][ref_seg].back()._fromfile = true; + assert_gt(_spliceSites[ref][ref_seg].size(), 0); + + bool added = false; + assert_lt(ref, _fwIndex.size()); + assert_lt(ref_seg, _fwIndex[ref].size()); + assert(_fwIndex[ref][ref_seg] != NULL); + Node *cur = _fwIndex[ref][ref_seg]->add(pool(ref, ref_seg), _spliceSites[ref][ref_seg].back(), &added); + assert(added); + assert(cur != NULL); + cur->payload = _spliceSites[ref][ref_seg].size() - 1; + + added = false; + uint64_t ref_seg2 = right / ref_segment_size; + SpliceSitePos rssp(ref, right, left, fw == '+' || fw == '.', fw != '.'); + assert_lt(ref, _bwIndex.size()); + assert_lt(ref_seg2, _bwIndex.size()); + assert(_bwIndex[ref][ref_seg2] != NULL); + cur = _bwIndex[ref][ref_seg2]->add(pool(ref, ref_seg2), rssp, &added); + assert(added); + assert(cur != NULL); + if(ref_seg != ref_seg2) { + _spliceSites[ref][ref_seg2].expand(); + _spliceSites[ref][ref_seg2].back().init(ref, left, right, fw == '+' || fw == '.', fw != '.'); + _spliceSites[ref][ref_seg2].back()._fromfile = true; + } + cur->payload = _spliceSites[ref][ref_seg2].size() - 1; + } +} + +Pool& SpliceSiteDB::pool(uint64_t ref, uint64_t ref_seg) { + assert_lt(ref, _numRefs); + assert_lt(ref, _pool.size()); + assert_lt(ref_seg, _pool[ref].size()); + if(_pool[ref][ref_seg].size() <= 0 || _pool[ref][ref_seg].back()->full()) { + _pool[ref][ref_seg].push_back(new Pool(1 << 18 /* 256KB */, 16 << 10 /* 16KB */, CA_CAT)); + } + assert(_pool[ref][ref_seg].back() != NULL); + return *_pool[ref][ref_seg].back(); +} + +#else + +SpliceSiteDB::SpliceSiteDB( + const BitPairReference& refs, + const EList& refnames, + bool threadSafe, + bool write, + bool read) : +_numRefs(refs.numRefs()), +_refnames(refnames), +_write(write), +_read(read), +_threadSafe(threadSafe), +_empty(true) +{ + assert_gt(_numRefs, 0); + assert_eq(_numRefs, _refnames.size()); + for(uint64_t i = 0; i < _numRefs; i++) { + _fwIndex.push_back(new RedBlack(16 << 10, CA_CAT)); + _bwIndex.push_back(new RedBlack(16 << 10, CA_CAT)); + _pool.expand(); + _spliceSites.expand(); + _mutex.push_back(MUTEX_T()); + } + + donorstr.resize(donor_exonic_len + donor_intronic_len); + acceptorstr.resize(acceptor_intronic_len + acceptor_exonic_len); +} + +SpliceSiteDB::~SpliceSiteDB() { + assert_eq(_fwIndex.size(), _bwIndex.size()); + assert_eq(_fwIndex.size(), _pool.size()); + for(uint64_t i = 0; i < _numRefs; i++) { + delete _fwIndex[i]; + delete _bwIndex[i]; + + EList& pool = _pool[i]; + for(size_t j = 0; j < pool.size(); j++) { + delete pool[j]; + } + } +} + +size_t SpliceSiteDB::size(uint64_t ref) const { + if(!_read) return 0; + + assert_lt(ref, _numRefs); + assert_lt(ref, _mutex.size()); + assert_lt(ref, _fwIndex.size()); + assert_eq(_fwIndex.size(), _bwIndex.size()); + ThreadSafe t(const_cast(&_mutex[ref]), _threadSafe && _write); + return _fwIndex.size(); +} + +bool SpliceSiteDB::empty(uint64_t ref) const { + return size(ref) == 0; +} + +bool SpliceSiteDB::addSpliceSite( + const Read& rd, + const AlnRes& rs, + uint32_t minAnchorLen) +{ + if(!_write) return false; + if(rs.trimmed5p(true) + rs.trimmed3p(true) > 0) return false; + + _empty = false; + + Coord coord = rs.refcoord(); + uint64_t ref = coord.ref(); + assert_lt(ref, _numRefs); + const EList& edits = rs.ned(); + if(!coord.orient()) { + Edit::invertPoss(const_cast&>(edits), rd.length(), false); + } + SpliceSitePos ssp; + uint32_t refoff = coord.off(); + uint32_t leftAnchorLen = 0, rightAnchorLen = 0; + size_t eidx = 0; + size_t last_eidx = 0; + uint32_t mm = 0; + for(size_t i = 0; i < rd.length(); i++, refoff++) { + while(eidx < edits.size() && edits[eidx].pos == i) { + if(edits[eidx].isReadGap()) { + refoff++; + } else if(edits[eidx].isRefGap()) { + assert_gt(refoff, 0); + refoff--; + } + if(edits[eidx].isGap() || edits[eidx].isMismatch()) mm++; + if(edits[eidx].isSpliced()) { + assert_gt(refoff, 0); + if(ssp.inited()) { + assert(edits[last_eidx].isSpliced()); + assert_lt(edits[last_eidx].pos, edits[eidx].pos); + rightAnchorLen = edits[eidx].pos - edits[last_eidx].pos; + uint32_t minLeftAnchorLen = minAnchorLen + mm * 2 + (edits[eidx].splDir == EDIT_SPL_UNKNOWN ? 6 : 0); + uint32_t mm2 = 0; + for(size_t j = eidx + 1; j < edits.size(); j++) { + if(edits[j].isGap() || edits[j].isMismatch()) mm2++; + } + uint32_t minRightAnchorLen = minAnchorLen + mm2 * 2 + (edits[eidx].splDir == EDIT_SPL_UNKNOWN ? 6 : 0); + if(leftAnchorLen >= minLeftAnchorLen && rightAnchorLen >= minRightAnchorLen) { + bool added = false; + assert_lt(ref, _mutex.size()); + ThreadSafe t(&_mutex[ref], _threadSafe && _write); + assert_lt(ref, _fwIndex.size()); + assert(_fwIndex[ref] != NULL); + Node *cur = _fwIndex[ref]->add(pool(ref), ssp, &added); + if(added) { + assert_lt(ref, _spliceSites.size()); + _spliceSites[ref].expand(); + _spliceSites[ref].back().init(ssp.ref(), ssp.left(), ssp.right(), ssp.fw(), ssp.canonical()); + _spliceSites[ref].back()._readid = rd.rdid; + _spliceSites[ref].back()._leftext = leftAnchorLen; + _spliceSites[ref].back()._rightext = rightAnchorLen; + _spliceSites[ref].back()._numreads = 1; + assert(cur != NULL); + cur->payload = _spliceSites[ref].size() - 1; + + SpliceSitePos rssp(ssp.ref(), ssp.right(), ssp.left(), ssp.fw(), ssp.canonical()); + assert_lt(ref, _bwIndex.size()); + assert(_bwIndex[ref] != NULL); + cur = _bwIndex[ref]->add(pool(ref), rssp, &added); + assert(added); + assert(cur != NULL); + cur->payload = _spliceSites[ref].size() - 1; + assert_eq(_fwIndex[ref]->size(), _bwIndex[ref]->size()); + } else { + assert(cur != NULL); + assert_lt(ref, _spliceSites.size()); + assert_lt(cur->payload, _spliceSites[ref].size()); + if(leftAnchorLen > _spliceSites[ref][cur->payload]._leftext) _spliceSites[ref][cur->payload]._leftext = leftAnchorLen; + if(rightAnchorLen > _spliceSites[ref][cur->payload]._rightext) _spliceSites[ref][cur->payload]._rightext = rightAnchorLen; + _spliceSites[ref][cur->payload]._numreads += 1; + if(rd.rdid < _spliceSites[ref][cur->payload]._readid) { + _spliceSites[ref][cur->payload]._readid = rd.rdid; + } + } + } + leftAnchorLen = rightAnchorLen; + rightAnchorLen = 0; + } else { + leftAnchorLen = edits[eidx].pos; + } + bool fw = (edits[eidx].splDir == EDIT_SPL_FW || edits[eidx].splDir == EDIT_SPL_UNKNOWN); + bool canonical = (edits[eidx].splDir != EDIT_SPL_UNKNOWN); + ssp.init(coord.ref(), refoff - 1, refoff + edits[eidx].splLen, fw, canonical); + refoff += edits[eidx].splLen; + last_eidx = eidx; + } + eidx++; + } + } + if(ssp.inited()) { + assert(edits[last_eidx].isSpliced()); + assert_lt(edits[last_eidx].pos, rd.length()); + rightAnchorLen = rd.length() - edits[last_eidx].pos; + uint32_t minLeftAnchorLen = minAnchorLen + mm * 2 + (edits[last_eidx].splDir == EDIT_SPL_UNKNOWN ? 6 : 0); + uint32_t mm2 = 0; + for(size_t j = last_eidx + 1; j < edits.size(); j++) { + if(edits[j].isGap() || edits[j].isMismatch()) mm2++; + } + uint32_t minRightAnchorLen = minAnchorLen + mm2 * 2 + (edits[last_eidx].splDir == EDIT_SPL_UNKNOWN ? 6 : 0); + if(leftAnchorLen >= minLeftAnchorLen && rightAnchorLen >= minRightAnchorLen) { + bool added = false; + assert_lt(ref, _mutex.size()); + ThreadSafe t(&_mutex[ref], _threadSafe && _write); + assert_lt(ref, _fwIndex.size()); + assert(_fwIndex[ref] != NULL); + Node *cur = _fwIndex[ref]->add(pool(ref), ssp, &added); + if(added) { + assert_lt(ref, _spliceSites.size()); + _spliceSites[ref].expand(); + _spliceSites[ref].back().init(ssp.ref(), ssp.left(), ssp.right(), ssp.fw(), ssp.canonical()); + _spliceSites[ref].back()._readid = rd.rdid; + _spliceSites[ref].back()._leftext = leftAnchorLen; + _spliceSites[ref].back()._rightext = rightAnchorLen; + _spliceSites[ref].back()._numreads = 1; + assert(cur != NULL); + cur->payload = _spliceSites[ref].size() - 1; + + SpliceSitePos rssp(ssp.ref(), ssp.right(), ssp.left(), ssp.fw(), ssp.canonical()); + assert_lt(ref, _bwIndex.size()); + assert(_bwIndex[ref] != NULL); + cur = _bwIndex[ref]->add(pool(ref), rssp, &added); + assert(added); + assert(cur != NULL); + cur->payload = _spliceSites[ref].size() - 1; + assert_eq(_fwIndex[ref]->size(), _bwIndex[ref]->size()); + } else { + assert(cur != NULL); + assert_lt(ref, _spliceSites.size()); + assert_lt(cur->payload, _spliceSites[ref].size()); + if(leftAnchorLen > _spliceSites[ref][cur->payload]._leftext) _spliceSites[ref][cur->payload]._leftext = leftAnchorLen; + if(rightAnchorLen > _spliceSites[ref][cur->payload]._rightext) _spliceSites[ref][cur->payload]._rightext = rightAnchorLen; + _spliceSites[ref][cur->payload]._numreads += 1; + if(rd.rdid < _spliceSites[ref][cur->payload]._readid) { + _spliceSites[ref][cur->payload]._readid = rd.rdid; + } + } + } + } + if(!coord.orient()) { + Edit::invertPoss(const_cast&>(edits), rd.length(), false); + } + + return true; +} + +bool SpliceSiteDB::getSpliceSite(SpliceSite& ss) const +{ + if(!_read) return false; + + uint64_t ref = ss.ref(); + assert_lt(ref, _numRefs); + assert_lt(ref, _mutex.size()); + ThreadSafe t(const_cast(&_mutex[ref]), _threadSafe && _write); + + assert_lt(ref, _fwIndex.size()); + assert(_fwIndex[ref] != NULL); + const Node *cur = _fwIndex[ref]->lookup(ss); + if(cur == NULL) return false; + assert(cur != NULL); + assert_lt(ref, _spliceSites.size()); + ss = _spliceSites[ref][cur->payload]; + return true; +} + +void SpliceSiteDB::getLeftSpliceSites(uint32_t ref, uint32_t left, uint32_t range, EList& spliceSites) const +{ + if(!_read) return; + + assert_lt(ref, _numRefs); + assert_lt(ref, _mutex.size()); + ThreadSafe t(const_cast(&_mutex[ref]), _threadSafe && _write); + assert_gt(range, 0); + assert_geq(left + 1, range); + assert_lt(ref, _bwIndex.size()); + assert(_bwIndex[ref] != NULL); + const Node *cur = _bwIndex[ref]->root(); + if(cur != NULL) getSpliceSites_recur(cur, ref, left + 1 - range, left, spliceSites); +} + +void SpliceSiteDB::getRightSpliceSites(uint32_t ref, uint32_t right, uint32_t range, EList& spliceSites) const +{ + if(!_read) return; + + assert_lt(ref, _numRefs); + assert_lt(ref, _mutex.size()); + ThreadSafe t(const_cast(&_mutex[ref]), _threadSafe && _write); + assert_gt(range, 0); + assert_gt(right + range, range); + assert_lt(ref, _fwIndex.size()); + assert(_fwIndex[ref] != NULL); + const Node *cur = _fwIndex[ref]->root(); + if(cur != NULL) getSpliceSites_recur(cur, ref, right, right + range - 1, spliceSites); + +} + +void SpliceSiteDB::getSpliceSites_recur( + const RedBlackNode *node, + uint32_t ref, + uint32_t left, + uint32_t right, + EList& spliceSites) const +{ + assert(node != NULL); + bool goleft = true, goright = true; + if((node->key.ref() > ref) || + (node->key.ref() == ref && node->key.left() > right)) { + goright = false; + } + + if((node->key.ref() < ref) || + (node->key.ref() == ref && node->key.left() < left)) { + goleft = false; + } + + if(goleft && node->left != NULL) { + getSpliceSites_recur( + node->left, + ref, + left, + right, + spliceSites); + } + + if(node->key.ref() == ref && + node->key.left() >= left && node->key.left() <= right) { + assert_lt(ref, _spliceSites.size()); + assert_lt(node->payload, _spliceSites[ref].size()); +#ifndef NDEBUG + const SpliceSite& ss = _spliceSites[ref][node->payload]; + assert_eq(ss.ref(), node->key.ref()); + assert(ss.left() == node->key.left() || + ss.right() == node->key.left()); +#endif + spliceSites.push_back(_spliceSites[ref][node->payload]); + } + + if(goright && node->right != NULL) { + getSpliceSites_recur( + node->right, + ref, + left, + right, + spliceSites); + } +} + +void calculate_splicesite_read_dist_impl(const RedBlackNode *node, + const EList &spliceSites, + EList& splicesite_read_dist) { + if(node == NULL) return; + calculate_splicesite_read_dist_impl(node->left, spliceSites, splicesite_read_dist); + assert_lt(node->payload, spliceSites.size()); + const SpliceSite& ss = spliceSites[node->payload]; + if(ss.numreads() < splicesite_read_dist.size()) + splicesite_read_dist[ss.numreads()] += 1; + else + splicesite_read_dist.back() += 1; + calculate_splicesite_read_dist_impl(node->right, spliceSites, splicesite_read_dist); +} + +uint32_t calculate_splicesite_read_dist(const EList* >& fwIndex, + const ELList &spliceSites, + EList& splicesite_read_dist) { + for(size_t i = 0; i < fwIndex.size(); i++) { + assert(fwIndex[i] != NULL); + const RedBlackNode *root = fwIndex[i]->root(); + assert_lt(i, spliceSites.size()); + if(root != NULL) calculate_splicesite_read_dist_impl(root, spliceSites[i], splicesite_read_dist); + } + + for(size_t i = 1; i < splicesite_read_dist.size(); i++) { + splicesite_read_dist[i] += splicesite_read_dist[i-1]; + } + + for(size_t i = 0; i < splicesite_read_dist.size(); i++) { + float cmf_i = float(splicesite_read_dist[i]) / splicesite_read_dist.back(); + if(cmf_i > 0.7) + return i; + } + + return 0; +} + +void SpliceSiteDB::print(ofstream& out) +{ + EList splicesite_read_dist; + for(size_t i = 0; i < 100; i++) { + splicesite_read_dist.push_back(0); + } + uint32_t numreads_cutoff = calculate_splicesite_read_dist(_fwIndex, _spliceSites, splicesite_read_dist); + + EList ss_list; + for(size_t i = 0; i < _fwIndex.size(); i++) { + assert(_fwIndex[i] != NULL); + const Node *root = _fwIndex[i]->root(); + if(root != NULL) print_recur(root, out, numreads_cutoff, ss_list); + } + print_impl(out, ss_list); +} + +void SpliceSiteDB::print_recur( + const RedBlackNode *node, + ofstream& out, + const uint32_t numreads_cutoff, + EList& ss_list) +{ + if(node == NULL) return; + print_recur(node->left, out, numreads_cutoff, ss_list); + const SpliceSitePos& ssp = node->key; + assert_lt(ssp.ref(), _spliceSites.size()); + assert_lt(node->payload, _spliceSites[ssp.ref()].size()); + const SpliceSite& ss = _spliceSites[ssp.ref()][node->payload]; + if(ss.numreads() >= numreads_cutoff) print_impl(out, ss_list, &ss); + print_recur(node->right, out, numreads_cutoff, ss_list); +} + +void SpliceSiteDB::print_impl( + ofstream& out, + EList& ss_list, + const SpliceSite* ss) +{ + size_t i = 0; + while(i < ss_list.size()) { + const SpliceSite& tmp_ss = ss_list[i]; + bool do_print = true; + if(ss != NULL) { + if(tmp_ss.ref() == ss->ref()) { + assert_leq(tmp_ss.left(), ss->left()); + if(ss->left() < tmp_ss.left() + 10) { + do_print = false; + if(abs(((int)ss->left() - (int)tmp_ss.left()) - ((int)ss->right() - (int)tmp_ss.right())) <= 10) { + if(tmp_ss.numreads() < ss->numreads()) { + ss_list.erase(i); + ss_list.push_back(*ss); + } + return; + } + } + } + } + + if(!do_print) { + i++; + continue; + } + + assert_lt(tmp_ss.ref(), _refnames.size()); + out << _refnames[tmp_ss.ref()] << "\t" + << tmp_ss.left() << "\t" + << tmp_ss.right() << "\t" + << (tmp_ss.canonical() ? (tmp_ss.fw() ? "+" : "-") : ".") << endl; + + ss_list.erase(i); + } + + if(ss != NULL) ss_list.push_back(*ss); +} + +void SpliceSiteDB::read(ifstream& in, bool novel) +{ + _empty = false; + assert_eq(_numRefs, _refnames.size()); + while(!in.eof()) { + string refname; + uint32_t left = 0, right = 0; + char fw = 0; + in >> refname >> left >> right >> fw; + uint32_t ref = 0; + for(; ref < _refnames.size(); ref++) { + if(_refnames[ref] == refname) break; + } + if(ref >= _numRefs) continue; + assert_lt(ref, _spliceSites.size()); + _spliceSites[ref].expand(); + _spliceSites[ref].back().init(ref, left, right, fw == '+' || fw == '.', fw != '.'); + _spliceSites[ref].back()._fromfile = true; + assert_gt(_spliceSites[ref].size(), 0); + + bool added = false; + assert_lt(ref, _fwIndex.size()); + assert(_fwIndex[ref] != NULL); + Node *cur = _fwIndex[ref]->add(pool(ref), _spliceSites[ref].back(), &added); + assert(added); + assert(cur != NULL); + cur->payload = _spliceSites[ref].size() - 1; + + added = false; + SpliceSitePos rssp(ref, right, left, fw == '+' || fw == '.', fw != '.'); + assert_lt(ref, _bwIndex.size()); + assert(_bwIndex[ref] != NULL); + cur = _bwIndex[ref]->add(pool(ref), rssp, &added); + assert(added); + assert(cur != NULL); + cur->payload = _spliceSites[ref].size() - 1; + } +} + +Pool& SpliceSiteDB::pool(uint64_t ref) { + assert_lt(ref, _numRefs); + assert_lt(ref, _pool.size()); + EList& pool = _pool[ref]; + if(pool.size() <= 0 || pool.back()->full()) { + pool.push_back(new Pool(1 << 20 /* 1MB */, 16 << 10 /* 16KB */, CA_CAT)); + } + assert(pool.back() != NULL); + return *pool.back(); +} + +#endif + +float SpliceSiteDB::probscore( + int64_t donor_seq, + int64_t acceptor_seq) +{ + float probscore = 0.0f; +#if defined(NEW_PROB_MODEL) + float donor_probscore = 0.0f; + assert_leq(donor_seq, 0x3ffff); + int64_t donor_exonic_seq = (donor_seq >> 4) & (~0xff); + int64_t donor_intronic_seq = donor_seq & 0xff; + int64_t donor_rest_seq = donor_exonic_seq | donor_intronic_seq; + int donor_seq3 = (donor_seq >> 10) & 0x3; + int donor_seq4 = (donor_seq >> 8) & 0x3; + donor_probscore = donor_cons1[donor_seq3] * donor_cons2[donor_seq4] / (background_bp_prob[donor_seq3] * background_bp_prob[donor_seq4]) * donor_me2x5[donor_rest_seq]; + + float acceptor_probscore = 0.0f; + assert_leq(acceptor_seq, 0x3fffffffffff); + int64_t acceptor_intronic_seq = (acceptor_seq >> 4) & (~0x3f); + int64_t acceptor_exonic_seq = acceptor_seq & 0x3f; + int64_t acceptor_rest_seq = acceptor_intronic_seq | acceptor_exonic_seq; + int acceptor_seq18 = (acceptor_seq >> 8) & 0x3; + int acceptor_seq19 = (acceptor_seq >> 6) & 0x3; + acceptor_probscore = acceptor_cons1[acceptor_seq18] * acceptor_cons2[acceptor_seq19] / (background_bp_prob[acceptor_seq18] * background_bp_prob[acceptor_seq19]); + + int64_t acceptor_seq1 = acceptor_rest_seq >> 28 & 0x3fff; // [0, 7] + acceptor_probscore *= acceptor_me2x3acc1[acceptor_seq1]; + int64_t acceptor_seq2 = (acceptor_rest_seq >> 14) & 0x3fff; // [7, 7] + acceptor_probscore *= acceptor_me2x3acc2[acceptor_seq2]; + int64_t acceptor_seq3 = acceptor_rest_seq & 0x3fff; // [14, 7] + acceptor_probscore *= acceptor_me2x3acc3[acceptor_seq3]; + int64_t acceptor_seq4 = (acceptor_rest_seq >> 20) & 0x3fff; // [4, 7] + acceptor_probscore *= acceptor_me2x3acc4[acceptor_seq4]; + int64_t acceptor_seq5 = (acceptor_rest_seq >> 6) & 0x3fff; // [11, 7] + acceptor_probscore *= acceptor_me2x3acc5[acceptor_seq5]; + int64_t acceptor_seq6 = acceptor_seq1 & 0x3f; // [4, 3] + acceptor_probscore /= acceptor_me2x3acc6[acceptor_seq6]; + int64_t acceptor_seq7 = acceptor_seq4 & 0xff; // [7, 4] + acceptor_probscore /= acceptor_me2x3acc7[acceptor_seq7]; + int64_t acceptor_seq8 = acceptor_seq2 & 0x3f; // [11, 3] + acceptor_probscore /= acceptor_me2x3acc8[acceptor_seq8]; + int64_t acceptor_seq9 = acceptor_seq5 & 0xff; // [14, 4] + acceptor_probscore /= acceptor_me2x3acc9[acceptor_seq9]; + + donor_probscore /= (1.0f + donor_probscore); + acceptor_probscore /= (1.0f + acceptor_probscore); + probscore = (donor_probscore + acceptor_probscore) / 2.0; + +#else + assert_lt(donor_seq, (int)(1 << (donor_len << 1))); + probscore = donor_prob_sum[donor_seq]; + + int acceptor_seq1 = acceptor_seq >> (acceptor_len2 << 1); + assert_lt(acceptor_seq1, (int)(1 << (acceptor_len1 << 1))); + probscore *= acceptor_prob_sum1[acceptor_seq1]; + + int acceptor_seq2 = acceptor_seq % (1 << (acceptor_len2 << 1)); + probscore *= acceptor_prob_sum2[acceptor_seq2]; + + probscore = 1.0 / (1.0 + probscore); +#endif + return probscore; +} + diff --git a/spliced_aligner.h b/spliced_aligner.h new file mode 100644 index 0000000..0217bf5 --- /dev/null +++ b/spliced_aligner.h @@ -0,0 +1,2054 @@ +/* + * Copyright 2015, Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +#ifndef SPLICED_ALIGNER_H_ +#define SPLICED_ALIGNER_H_ + +#include "hi_aligner.h" + +/** + * With a hierarchical indexing, SplicedAligner provides several alignment strategies + * , which enable effective alignment of RNA-seq reads + */ +template +class SplicedAligner : public HI_Aligner { + +public: + /** + * Initialize with index. + */ + SplicedAligner( + const GFM& gfm, + bool anchorStop, + uint64_t threads_rids_mindist = 0) : + HI_Aligner(gfm, + anchorStop, + threads_rids_mindist) + { + } + + ~SplicedAligner() { + } + + /** + * Given a partial alignment of a read, try to further extend + * the alignment bidirectionally using a combination of + * local search, extension, and global search + */ + virtual + void hybridSearch( + const Scoring& sc, + const PairedEndPolicy& pepol, // paired-end policy + const TranscriptomePolicy& tpol, + const GraphPolicy& gpol, + const GFM& gfm, + const ALTDB& altdb, + const RepeatDB& repeatdb, + const BitPairReference& ref, + SwAligner& swa, + SpliceSiteDB& ssdb, + index_t rdi, + bool fw, + WalkMetrics& wlm, + PerReadMetrics& prm, + SwMetrics& swm, + HIMetrics& him, + RandomSource& rnd, + AlnSinkWrap& sink); + + /** + * Given a partial alignment of a read, try to further extend + * the alignment bidirectionally using a combination of + * local search, extension, and global search + */ + virtual + int64_t hybridSearch_recur( + const Scoring& sc, + const PairedEndPolicy& pepol, // paired-end policy + const TranscriptomePolicy& tpol, + const GraphPolicy& gpol, + const GFM& gfm, + const ALTDB& altdb, + const RepeatDB& repeatdb, + const BitPairReference& ref, + SwAligner& swa, + SpliceSiteDB& ssdb, + index_t rdi, + const GenomeHit& hit, + index_t hitoff, + index_t hitlen, + WalkMetrics& wlm, + PerReadMetrics& prm, + SwMetrics& swm, + HIMetrics& him, + RandomSource& rnd, + AlnSinkWrap& sink, + bool alignMate = false, + index_t dep = 0); +}; + +/** + * Given a partial alignment of a read, try to further extend + * the alignment bidirectionally using a combination of + * local search, extension, and global search + */ +template +void SplicedAligner::hybridSearch( + const Scoring& sc, + const PairedEndPolicy& pepol, // paired-end policy + const TranscriptomePolicy& tpol, + const GraphPolicy& gpol, + const GFM& gfm, + const ALTDB& altdb, + const RepeatDB& repeatdb, + const BitPairReference& ref, + SwAligner& swa, + SpliceSiteDB& ssdb, + index_t rdi, + bool fw, + WalkMetrics& wlm, + PerReadMetrics& prm, + SwMetrics& swm, + HIMetrics& him, + RandomSource& rnd, + AlnSinkWrap& sink) +{ + assert_lt(rdi, 2); + assert(this->_rds[rdi] != NULL); + him.localatts++; + + const ReportingParams& rp = sink.reportingParams(); + + // before further alignment using local search, extend the partial alignments directly + // by comparing with the corresponding genomic sequences + // this extension is performed without any mismatches allowed + for(index_t hi = 0; hi < this->_genomeHits.size(); hi++) { + GenomeHit& genomeHit = this->_genomeHits[hi]; + index_t leftext = (index_t)INDEX_MAX, rightext = (index_t)INDEX_MAX; + genomeHit.extend( + *(this->_rds[rdi]), + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + prm, + sc, + this->_minsc[rdi], + rnd, + INDEX_MAX, + tpol, + gpol, + leftext, + rightext); + } + + // for the candidate alignments, examine the longest (best) one first + this->_genomeHits_done.resize(this->_genomeHits.size()); + this->_genomeHits_done.fill(false); + for(size_t hi = 0; hi < this->_genomeHits.size(); hi++) { + index_t hj = 0; + for(; hj < this->_genomeHits.size(); hj++) { + if(!this->_genomeHits_done[hj]) break; + } + if(hj >= this->_genomeHits.size()) break; + for(index_t hk = hj + 1; hk < this->_genomeHits.size(); hk++) { + if(this->_genomeHits_done[hk]) continue; + GenomeHit& genomeHit_j = this->_genomeHits[hj]; + GenomeHit& genomeHit_k = this->_genomeHits[hk]; + if(genomeHit_k.hitcount() > genomeHit_j.hitcount() || + (genomeHit_k.hitcount() == genomeHit_j.hitcount() && genomeHit_k.len() > genomeHit_j.len())) { + hj = hk; + } + } + + // given a candidate partial alignment, extend it bidirectionally + him.anchoratts++; + GenomeHit& genomeHit = this->_genomeHits[hj]; + + int64_t maxsc = std::numeric_limits::min(); + maxsc = hybridSearch_recur(sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + swa, + ssdb, + rdi, + genomeHit, + genomeHit.rdoff(), + genomeHit.len(), + wlm, + prm, + swm, + him, + rnd, + sink); + + if(rp.bowtie2_dp == 2 || (rp.bowtie2_dp == 1 && maxsc < this->_minsc[rdi])) { + const Read& rd = *this->_rds[rdi]; + // Initialize the aligner with a new read + swa.initRead(rd.patFw, // fw version of query + rd.patRc, // rc version of query + rd.qual, // fw version of qualities + rd.qualRev, // rc version of qualities + 0, // off of first char in 'rd' to consider + rd.length(), // off of last char (excl) in 'rd' to consider + sc); // scoring scheme + + bool found = genomeHit.len() >= rd.length(); + if(!found) { + DynProgFramer dpframe(false); // trimToRef + size_t tlen = ref.approxLen(genomeHit.ref()); + size_t readGaps = 10, refGaps = 10, nceil = 0, maxhalf = 10; + index_t refoff = genomeHit.refoff() > genomeHit.rdoff() ? genomeHit.refoff() - genomeHit.rdoff() : 0; + DPRect rect; + dpframe.frameSeedExtensionRect(refoff, // ref offset implied by seed hit assuming no gaps + rd.length(), // length of read sequence used in DP table + tlen, // length of reference + readGaps, // max # of read gaps permitted in opp mate alignment + refGaps, // max # of ref gaps permitted in opp mate alignment + (size_t)nceil, // # Ns permitted + maxhalf, // max width in either direction + rect); // DP rectangle + assert(rect.repOk()); + + size_t cminlen = 2000, cpow2 = 4, nwindow = 10, nsInLeftShift = 0; + swa.initRef(fw, // whether to align forward or revcomp read + genomeHit.ref(), // reference aligned against + rect, // DP rectangle + ref, // Reference strings + tlen, // length of reference sequence + sc, // scoring scheme + this->_minsc[rdi], // minimum score permitted + true, // use 8-bit SSE if possible? + cminlen, // minimum length for using checkpointing scheme + cpow2, // interval b/t checkpointed diags; 1 << this + false, // triangular mini-fills? + true, // this is a seed extension - not finding a mate + nwindow, + nsInLeftShift); + + // Now fill the dynamic programming matrix and return true iff + // there is at least one valid alignment + TAlScore bestCell = std::numeric_limits::min(); + found = swa.align(rnd, bestCell); + if(found) { + SwResult res; + res.reset(); + res.alres.init_raw_edits(&(this->_rawEdits)); + found = swa.nextAlignment(res, this->_minsc[rdi], rnd); + if(found) { + if(!fw) res.alres.invertEdits(); + + const Coord& coord = res.alres.refcoord(); + assert_geq(genomeHit._joinedOff + coord.off(), genomeHit.refoff()); + index_t joinedOff = genomeHit._joinedOff + coord.off() - genomeHit.refoff(); + genomeHit.init(fw, + 0, // rdoff + rd.length(), + 0, // trim5 + 0, // trim3 + coord.ref(), + coord.off(), + joinedOff, + this->_sharedVars, + genomeHit.repeat(), // repeat? + &res.alres.ned(), + NULL, + res.alres.score().score()); + + genomeHit.replace_edits_with_alts(rd, + altdb.alts(), + ssdb, + sc, + this->_minK_local, + (index_t)tpol.minIntronLen(), + (index_t)tpol.maxIntronLen(), + (index_t)tpol.minAnchorLen(), + (index_t)tpol.minAnchorLen_noncan(), + ref); + + } + } + } + + if(found) { + hybridSearch_recur(sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + swa, + ssdb, + rdi, + genomeHit, + genomeHit.rdoff(), + genomeHit.len(), + wlm, + prm, + swm, + him, + rnd, + sink); + } + } + this->_genomeHits_done[hj] = true; + } +} + + +/** + * Given a partial alignment of a read, try to further extend + * the alignment bidirectionally using a combination of + * local search, extension, and global search + */ +template +int64_t SplicedAligner::hybridSearch_recur( + const Scoring& sc, + const PairedEndPolicy& pepol, // paired-end policy + const TranscriptomePolicy& tpol, + const GraphPolicy& gpol, + const GFM& gfm, + const ALTDB& altdb, + const RepeatDB& repeatdb, + const BitPairReference& ref, + SwAligner& swa, + SpliceSiteDB& ssdb, + index_t rdi, + const GenomeHit& hit, + index_t hitoff, + index_t hitlen, + WalkMetrics& wlm, + PerReadMetrics& prm, + SwMetrics& swm, + HIMetrics& him, + RandomSource& rnd, + AlnSinkWrap& sink, + bool alignMate, + index_t dep) +{ + const ReportingParams& rp = sink.reportingParams(); + int64_t maxsc = numeric_limits::min(); + him.localsearchrecur++; + assert_lt(rdi, 2); + assert(this->_rds[rdi] != NULL); + const Read& rd = *(this->_rds[rdi]); + index_t rdlen = (index_t)rd.length(); + + TAlScore cushion = 0; + if(tpol.no_spliced_alignment()) { + cushion = alignMate ? rdlen * 0.03 * sc.mm(255) : 0; + } + + if(hit.score() + cushion < this->_minsc[rdi]) return maxsc; + if(dep >= 128) return maxsc; + + // if it's already examined, just return + if(hitoff == hit.rdoff() - hit.trim5() && hitlen == hit.len() + hit.trim5() + hit.trim3()) { + if(this->isSearched(hit, rdi)) return maxsc; + this->addSearched(hit, rdi); + } + + // for effective use of memory allocation and deallocation + if(this->_coords.size() <= dep) { + this->_coords.expand(); + assert_leq(this->_local_genomeHits.size(), dep); + this->_local_genomeHits.expand(); + assert_leq(this->_spliceSites.size(), dep); + this->_spliceSites.expand(); + } + EList& coords = this->_coords[dep]; + EList& spliceSites = this->_spliceSites[dep]; + + // daehwan - for debugging purposes +#if 0 + cout << rd.name << "\t" + << (hit.fw() ? "+" : "-") << "\t" + << hitoff << "\t" + << hitoff + hitlen << "\t" + << "( " << hit.rdoff() << "\t" + << hit.rdoff() + hit.len() << " )" << "\t" + << hit.refoff() << "\t" + << hit.getRightOff() << "\t" + << hit.score() << "\t" + << "dep: " << dep << "\t"; + Edit::print(cout, hit.edits()); + cout << endl; +#endif + + assert_leq(hitoff + hitlen, rdlen); + // if this is a full alignment, report it + if(hitoff == 0 && hitlen == rdlen) { + if(!this->redundant(sink, rdi, hit)) { + bool another_spliced = false; + if(!ssdb.empty()) { + int64_t best_score = hit.score(); + this->_local_genomeHits[dep].clear(); + this->_anchors_added.clear(); + + this->_local_genomeHits[dep].expand(); + this->_local_genomeHits[dep].back() = hit; + this->_anchors_added.push_back(0); + + index_t fragoff = 0, fraglen = 0, left = 0, right = 0; + hit.getLeft(fragoff, fraglen, left); + const index_t minMatchLen = (index_t)this->_minK; + index_t min_left_anchor = rdlen, min_right_anchor = rdlen; + // make use of a list of known or novel splice sites to further align the read + if(fraglen >= minMatchLen && + left >= minMatchLen && + hit.trim5() == 0 && + !hit.repeat() && + !tpol.no_spliced_alignment()) { + spliceSites.clear(); + ssdb.getLeftSpliceSites(hit.ref(), left + minMatchLen, minMatchLen, spliceSites); + for(size_t si = 0; si < spliceSites.size(); si++) { + const SpliceSite& ss = spliceSites[si]; + if(!ss._fromfile && ss._readid + this->_thread_rids_mindist > rd.rdid) continue; + if(left + fraglen - 1 < ss.right()) continue; + index_t frag2off = ss.left() - (ss.right() - left); + if(frag2off + 1 < hitoff) continue; + GenomeHit tempHit; + if(fragoff + ss.right() < left + 1) continue; + index_t readoff = fragoff + ss.right() - left - 1; + index_t joinedOff = 0; + bool success = gfm.textOffToJoined(hit.ref(), ss.left(), joinedOff); + if(!success) { + continue; + } +#ifndef NDEBUG + index_t debug_tid = 0, debug_toff = 0, debug_tlen = 0; + bool debug_straddled = false; + gfm.joinedToTextOff(1, // qlen + joinedOff, + debug_tid, + debug_toff, + debug_tlen, + false, + debug_straddled); + assert_eq(hit.ref(), debug_tid); + assert_eq(ss.left(), debug_toff); +#endif + tempHit.init(hit.fw(), + readoff + 1, // rdoff + 0, // len + 0, // trim5 + 0, // trim3 + hit.ref(), + ss.left() + 1, + joinedOff + 1, + this->_sharedVars, + gfm.repeat()); + index_t leftext = readoff + 1, rightext = 0; + tempHit.extend(rd, + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + prm, + sc, + this->_minsc[rdi], + rnd, + (index_t)this->_minK_local, + tpol, + gpol, + leftext, + rightext); + if(tempHit.len() <= 0) + continue; + if(!tempHit.compatibleWith( + hit, + (index_t)tpol.minIntronLen(), + (index_t)tpol.maxIntronLen(), + tpol.no_spliced_alignment())) + continue; + int64_t minsc = max(this->_minsc[rdi], best_score); + bool combined = tempHit.combineWith( + hit, + rd, + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + sc, + minsc, + rnd, + (index_t)this->_minK_local, + (index_t)tpol.minIntronLen(), + (index_t)tpol.maxIntronLen(), + 1, + 1, + gpol.maxAltsTried(), + &ss, + tpol.no_spliced_alignment()); + if(rdi == 0) minsc = max(minsc, sink.bestUnp1()); + else minsc = max(minsc, sink.bestUnp2()); + index_t leftAnchorLen = 0, nedits = 0; + tempHit.getLeftAnchor(leftAnchorLen, nedits); + if(combined && + tempHit.score() >= minsc && + nedits <= leftAnchorLen / 4) { // prevent (short) anchors from having many mismatches + if(this->isSearched(tempHit, rdi)) continue; + if(!this->redundant(sink, rdi, tempHit)) { + another_spliced = true; + if(tempHit.score() > best_score) + best_score = tempHit.score(); + this->_local_genomeHits[dep].expand(); + this->_local_genomeHits[dep].back() = tempHit; + this->_anchors_added.push_back(1); + index_t temp_fragoff = 0, temp_fraglen = 0, temp_left = 0; + tempHit.getLeft(temp_fragoff, temp_fraglen, temp_left); + if(temp_fraglen < min_left_anchor) + min_left_anchor = temp_fraglen; + } + } + } + } + + size_t num_local_genomeHits = this->_local_genomeHits[dep].size(); + for(size_t i = 0; i < num_local_genomeHits; i++) { + this->_local_genomeHits[dep][i].getRight(fragoff, fraglen, right); + if(this->_local_genomeHits[dep][i].score() < best_score) continue; + // make use of a list of known or novel splice sites to further align the read + if(fraglen >= minMatchLen && + this->_local_genomeHits[dep][i].trim3() == 0 && + !hit.repeat() && + !tpol.no_spliced_alignment()) { + spliceSites.clear(); + assert_gt(fraglen, 0); + ssdb.getRightSpliceSites(this->_local_genomeHits[dep][i].ref(), right + fraglen - minMatchLen, minMatchLen, spliceSites); + for(size_t si = 0; si < spliceSites.size(); si++) { + const GenomeHit& canHit = this->_local_genomeHits[dep][i]; + const SpliceSite& ss = spliceSites[si]; + if(!ss._fromfile && ss._readid + this->_thread_rids_mindist > rd.rdid) continue; + if(right > ss.left()) continue; + GenomeHit tempHit; + index_t readoff = fragoff + ss.left() - right + 1; + if(readoff >= rdlen) + continue; + index_t joinedOff = 0; + bool success = gfm.textOffToJoined(canHit.ref(), ss.right(), joinedOff); + if(!success) { + continue; + } +#ifndef NDEBUG + index_t debug_tid = 0, debug_toff = 0, debug_tlen = 0; + bool debug_straddled = false; + gfm.joinedToTextOff(1, // qlen + joinedOff, + debug_tid, + debug_toff, + debug_tlen, + false, + debug_straddled); + assert_eq(canHit.ref(), debug_tid); + assert_eq(ss.right(), debug_toff); +#endif + tempHit.init(canHit.fw(), + readoff, + 0, // len + 0, // trim5 + 0, // trim3 + canHit.ref(), + ss.right(), + joinedOff, + this->_sharedVars, + gfm.repeat()); + index_t leftext = 0, rightext = rdlen - readoff; + tempHit.extend(rd, + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + prm, + sc, + this->_minsc[rdi], + rnd, + (index_t)this->_minK_local, + tpol, + gpol, + leftext, + rightext); + if(tempHit.len() <= 0) + continue; + if(!canHit.compatibleWith(tempHit, (index_t)tpol.minIntronLen(), (index_t)tpol.maxIntronLen(), tpol.no_spliced_alignment())) continue; + GenomeHit combinedHit = canHit; + int64_t minsc = max(this->_minsc[rdi], best_score); + bool combined = combinedHit.combineWith( + tempHit, + rd, + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + sc, + minsc, + rnd, + (index_t)this->_minK_local, + (index_t)tpol.minIntronLen(), + (index_t)tpol.maxIntronLen(), + 1, + 1, + gpol.maxAltsTried(), + &ss, + tpol.no_spliced_alignment()); + if(rdi == 0) minsc = max(minsc, sink.bestUnp1()); + else minsc = max(minsc, sink.bestUnp2()); + index_t rightAnchorLen = 0, nedits = 0; + combinedHit.getRightAnchor(rightAnchorLen, nedits); + if(combined && + combinedHit.score() >= minsc && + nedits <= rightAnchorLen / 4) { // prevent (short) anchors from having many mismatches + if(this->isSearched(combinedHit, rdi)) continue; + if(!this->redundant(sink, rdi, combinedHit)) { + another_spliced = true; + if(combinedHit.score() > best_score) + best_score = tempHit.score(); + this->_local_genomeHits[dep].expand(); + this->_local_genomeHits[dep].back() = combinedHit; + this->_anchors_added.push_back(this->_anchors_added[i] + 1); + + index_t temp_fragoff = 0, temp_fraglen = 0, temp_right = 0; + combinedHit.getLeft(temp_fragoff, temp_fraglen, temp_right); + if(temp_fraglen < min_right_anchor) + min_right_anchor = temp_fraglen; + } + } + } + } + } + + assert_eq(this->_local_genomeHits[dep].size(), this->_anchors_added.size()); + for(size_t i = 0; i < this->_local_genomeHits[dep].size(); i++) { + const GenomeHit& canHit = this->_local_genomeHits[dep][i]; + if(!rp.secondary && canHit.score() < best_score) continue; + // if(min(min_left_anchor, min_right_anchor) <= this->_minK_local) { + + // daehwan - for debugging purposes + // if(this->_anchors_added[i] < this->_anchors_added.back()) continue; + + //} + if(i > 0 && !this->isSearched(canHit, rdi)) { + this->addSearched(canHit, rdi); + } + if(!this->redundant(sink, rdi, canHit)) { + this->reportHit(sc, pepol, tpol, gpol, gfm, altdb, repeatdb, ref, ssdb, sink, rdi, canHit, alignMate); + maxsc = max(maxsc, canHit.score()); + } + } + } + else { + this->reportHit(sc, pepol, tpol, gpol, gfm, altdb, repeatdb, ref, ssdb, sink, rdi, hit, alignMate); + maxsc = max(maxsc, hit.score()); + } + return maxsc; + } + } else if(hitoff > 0 && (hitoff + hitlen == rdlen || hitoff + hitoff < rdlen - hitlen)) { + // Decide which side to extend first (left or right) + if(!ssdb.empty()) { + // extend the partial alignment in the left direction + index_t fragoff = 0, fraglen = 0, left = 0; + hit.getLeft(fragoff, fraglen, left); + const index_t minMatchLen = (index_t)this->_minK_local; + // make use of a list of known or novel splice sites to further align the read + if(fraglen >= minMatchLen && + left >= minMatchLen && + !hit.repeat() && + !tpol.no_spliced_alignment()) { + spliceSites.clear(); + ssdb.getLeftSpliceSites(hit.ref(), left + minMatchLen, minMatchLen + min(minMatchLen, fragoff), spliceSites); + for(size_t si = 0; si < spliceSites.size(); si++) { + const SpliceSite& ss = spliceSites[si]; + if(!ss._fromfile && ss._readid + this->_thread_rids_mindist > rd.rdid) continue; + if(left + fraglen - 1 < ss.right()) continue; + if(fragoff + ss.right() < left + 1) continue; + index_t readoff = fragoff + ss.right() - left - 1; + index_t joinedOff = 0; + bool success = gfm.textOffToJoined(hit.ref(), ss.left(), joinedOff); + if(!success) { + continue; + } +#ifndef NDEBUG + index_t debug_tid = 0, debug_toff = 0, debug_tlen = 0; + bool debug_straddled = false; + gfm.joinedToTextOff(1, // qlen + joinedOff, + debug_tid, + debug_toff, + debug_tlen, + false, + debug_straddled); + assert_eq(hit.ref(), debug_tid); + assert_eq(ss.left(), debug_toff); +#endif + GenomeHit tempHit; + tempHit.init(hit.fw(), + readoff + 1, // rdoff + 0, // len + 0, // trim5 + 0, // trim3 + hit.ref(), + ss.left() + 1, + joinedOff + 1, + this->_sharedVars, + gfm.repeat()); + index_t leftext = readoff + 1, rightext = 0; + tempHit.extend(rd, + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + prm, + sc, + this->_minsc[rdi], + rnd, + (index_t)this->_minK_local, + tpol, + gpol, + leftext, + rightext); + if(tempHit.len() <= 0) + continue; + if(!tempHit.compatibleWith(hit, (index_t)tpol.minIntronLen(), (index_t)tpol.maxIntronLen(), tpol.no_spliced_alignment())) continue; + int64_t minsc = this->_minsc[rdi]; + bool combined = tempHit.combineWith( + hit, + rd, + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + sc, + minsc, + rnd, + (index_t)this->_minK_local, + (index_t)tpol.minIntronLen(), + (index_t)tpol.maxIntronLen(), + 1, + 1, + gpol.maxAltsTried(), + &ss, + tpol.no_spliced_alignment()); + if(!rp.secondary) { + if(rdi == 0) minsc = max(minsc, sink.bestUnp1() - cushion); + else minsc = max(minsc, sink.bestUnp2() - cushion); + } + if(combined && + tempHit.score() >= minsc && + // soft-clipping might be better + tempHit.score() + sc.sc(0) * hit.rdoff() >= hit.score()) { + assert_eq(tempHit.trim5(), 0); + assert_leq(tempHit.rdoff() + tempHit.len() + tempHit.trim3(), rdlen); + int64_t tmp_maxsc = hybridSearch_recur( + sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + swa, + ssdb, + rdi, + tempHit, + tempHit.rdoff(), + tempHit.len() + tempHit.trim3(), + wlm, + prm, + swm, + him, + rnd, + sink, + alignMate, + dep + 1); + maxsc = max(maxsc, tmp_maxsc); + } + } + } + } + + bool use_localindex = true; + if(hitoff == hit.rdoff() && hitoff <= this->_minK) { + index_t leftext = (index_t)INDEX_MAX, rightext = (index_t)0; + GenomeHit tempHit = hit; + tempHit.extend( + rd, + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + prm, + sc, + this->_minsc[rdi], + rnd, + (index_t)this->_minK_local, + tpol, + gpol, + leftext, + rightext, + 1); + if(tempHit.rdoff() == 0) { + use_localindex = false; + } + } + + // Choose a local index based on the genomic location of the partial alignment + const HGFM* hGFM = (const HGFM*)(&gfm); + const LocalGFM* lGFM = hGFM->getLocalGFM(hit.ref(), hit.refoff()); + assert_leq(lGFM->_localOffset, hit.refoff()); + bool success = false, first = true; + index_t count = 0; + // Use at most two local indexes + const index_t max_count = 2; + int64_t prev_score = hit.score(); + this->_local_genomeHits[dep].clear(); + while(!success && count++ < max_count && use_localindex) { + if(him.localindexatts >= this->max_localindexatts) break; + if(first) { + first = false; + } else { + lGFM = hGFM->prevLocalGFM(lGFM); + if(lGFM == NULL || lGFM->empty()) break; + } + // local index search + index_t extlen = 0; + local_index_t top = (local_index_t)INDEX_MAX, bot = (local_index_t)INDEX_MAX; + local_index_t node_top = (local_index_t)INDEX_MAX, node_bot = (local_index_t)INDEX_MAX; + index_t extoff = hitoff - 1; + if(extoff > 0) extoff -= 1; + if(extoff < tpol.minAnchorLen()) { + extoff = tpol.minAnchorLen(); + } + index_t nelt = (index_t)INDEX_MAX; + index_t max_nelt = std::max(5, extlen); + bool no_extension = false; + bool uniqueStop= false; + index_t minUniqueLen = (index_t)this->_minK_local; + for(; extoff < rdlen; extoff++) { + extlen = 0; + uniqueStop = true; + him.localindexatts++; + this->_local_node_iedge_count.clear(); + nelt = this->localGFMSearch( + *lGFM, // BWT index + rd, // read to align + sc, // scoring scheme + sink.reportingParams(), + hit.fw(), + extoff, + extlen, + top, + bot, + node_top, + node_bot, + this->_local_node_iedge_count, + rnd, + uniqueStop, + minUniqueLen); + if(extoff + 1 - extlen >= hitoff) { + no_extension = true; + break; + } + if(nelt <= max_nelt) break; + } + assert_leq(node_top, node_bot); + assert_eq(nelt, (index_t)(node_bot - node_top)); + assert_leq(extlen, extoff + 1); + if(nelt > 0 && + nelt <= max_nelt && + extlen >= tpol.minAnchorLen() && + !no_extension) { + assert_leq(nelt, max_nelt); + coords.clear(); + bool straddled = false; + // get genomic locations for this local search + this->getGenomeCoords_local( + *lGFM, + altdb, + ref, + rnd, + top, + bot, + node_top, + node_bot, + this->_local_node_iedge_count, + hit.fw(), + extoff + 1 - extlen, + extlen, + coords, + wlm, + prm, + him, + true, // reject straddled? + straddled); + assert_leq(coords.size(), nelt); + coords.sort(); + for(int ri = (int)coords.size() - 1; ri >= 0; ri--) { + const Coord& coord = coords[ri]; + GenomeHit tempHit; + tempHit.init(coord.orient(), + extoff + 1 - extlen, + extlen, + 0, // trim5 + 0, // trim3 + (index_t)coord.ref(), + (index_t)coord.off(), + (index_t)coord.joinedOff(), + this->_sharedVars, + gfm.repeat()); + if(!tempHit.adjustWithALT(*this->_rds[rdi], gfm, altdb, ref, gpol)) continue; + // check if the partial alignment is compatible with the new alignment using the local index + if(!tempHit.compatibleWith(hit, (index_t)tpol.minIntronLen(), (index_t)tpol.maxIntronLen(), tpol.no_spliced_alignment())) { + if(count == 1) continue; + else break; + } + if(uniqueStop) { + assert_eq(coords.size(), 1); + index_t leftext = (index_t)INDEX_MAX, rightext = (index_t)0; + tempHit.extend( + rd, + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + prm, + sc, + this->_minsc[rdi], + rnd, + (index_t)this->_minK_local, + tpol, + gpol, + leftext, + rightext); + } + // combine the partial alignment and the new alignment + int64_t minsc = this->_minsc[rdi]; + bool combined = tempHit.combineWith( + hit, + rd, + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + sc, + minsc, + rnd, + (index_t)this->_minK_local, + (index_t)tpol.minIntronLen(), + (index_t)tpol.maxIntronLen(), + tpol.minAnchorLen(), + tpol.minAnchorLen_noncan(), + gpol.maxAltsTried(), + NULL, // splice sites + tpol.no_spliced_alignment()); + if(!rp.secondary) { + if(rdi == 0) minsc = max(minsc, sink.bestUnp1() - cushion); + else minsc = max(minsc, sink.bestUnp2() - cushion); + } + if(combined && tempHit.score() >= minsc) { + assert_eq(tempHit.trim5(), 0); + assert_leq(tempHit.rdoff() + tempHit.len() + tempHit.trim3(), rdlen); + if(tempHit.score() >= prev_score - sc.mmpMax) { + // extend the new partial alignment recursively + int64_t tmp_maxsc = hybridSearch_recur( + sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + swa, + ssdb, + rdi, + tempHit, + tempHit.rdoff(), + tempHit.len() + tempHit.trim3(), + wlm, + prm, + swm, + him, + rnd, + sink, + alignMate, + dep + 1); + maxsc = max(maxsc, tmp_maxsc); + } else { + this->_local_genomeHits[dep].push_back(tempHit); + } + } + } + } + if(maxsc >= prev_score - sc.mmpMax) success = true; + if(!success && + (him.localindexatts >= this->max_localindexatts || count == max_count || hGFM->prevLocalGFM(lGFM) == NULL)) { + for(index_t ti = 0; ti < this->_local_genomeHits[dep].size(); ti++) { + GenomeHit& tempHit = this->_local_genomeHits[dep][ti]; + int64_t minsc = this->_minsc[rdi]; + if(!rp.secondary) { + if(rdi == 0) minsc = max(minsc, sink.bestUnp1() - cushion); + else minsc = max(minsc, sink.bestUnp2() - cushion); + } + if(tempHit.score() >= minsc) { + int64_t tmp_maxsc = hybridSearch_recur( + sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + swa, + ssdb, + rdi, + tempHit, + tempHit.rdoff(), + tempHit.len() + tempHit.trim3(), + wlm, + prm, + swm, + him, + rnd, + sink, + alignMate, + dep + 1); + maxsc = max(maxsc, tmp_maxsc); + } + } + } + } // while(!success && count++ < 2) + + if(!success) { + if(hitoff > this->_minK && + him.localindexatts < this->max_localindexatts) { + index_t extlen = 0; + index_t top = (index_t)INDEX_MAX, bot = (index_t)INDEX_MAX; + index_t node_top = (index_t)INDEX_MAX, node_bot = (index_t)INDEX_MAX; + this->_node_iedge_count.clear(); + index_t extoff = hitoff - 1; + bool uniqueStop = true; + // perform global search for long introns + index_t nelt = this->globalGFMSearch( + gfm, // GFM index + rd, // read to align + sc, // scoring scheme + sink.reportingParams(), + hit.fw(), + extoff, + extlen, + top, + bot, + node_top, + node_bot, + this->_node_iedge_count, + rnd, + uniqueStop); + if(nelt > 0 && nelt <= 5 && extlen >= this->_minK) { + coords.clear(); + bool straddled = false; + this->getGenomeCoords( + gfm, + altdb, + ref, + rnd, + top, + bot, + node_top, + node_bot, + this->_node_iedge_count, + hit.fw(), + bot - top, + extoff + 1 - extlen, + extlen, + coords, + wlm, + prm, + him, + true, // reject straddled? + straddled); + assert_leq(coords.size(), nelt); + if(coords.size() > 1) coords.sort(); + for(int ri = (int)coords.size() - 1; ri >= 0; ri--) { + const Coord& coord = coords[ri]; + GenomeHit tempHit; + tempHit.init(coord.orient(), + extoff + 1 - extlen, + extlen, + 0, // trim5 + 0, // trim3 + (index_t)coord.ref(), + (index_t)coord.off(), + (index_t)coord.joinedOff(), + this->_sharedVars, + gfm.repeat()); + if(!tempHit.adjustWithALT(*this->_rds[rdi], gfm, altdb, ref, gpol)) continue; + if(!tempHit.compatibleWith(hit, (index_t)tpol.minIntronLen(), (index_t)tpol.maxIntronLen(), tpol.no_spliced_alignment())) continue; + if(uniqueStop) { + assert_eq(coords.size(), 1); + index_t leftext = (index_t)INDEX_MAX, rightext = (index_t)0; + tempHit.extend( + rd, + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + prm, + sc, + this->_minsc[rdi], + rnd, + (index_t)this->_minK_local, + tpol, + gpol, + leftext, + rightext); + } + int64_t minsc = this->_minsc[rdi]; + bool combined = tempHit.combineWith( + hit, + rd, + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + sc, + minsc, + rnd, + (index_t)this->_minK_local, + (index_t)tpol.minIntronLen(), + (index_t)tpol.maxIntronLen(), + tpol.minAnchorLen(), + tpol.minAnchorLen_noncan(), + gpol.maxAltsTried(), + NULL, // splice sites + tpol.no_spliced_alignment()); + if(!rp.secondary) { + if(rdi == 0) minsc = max(minsc, sink.bestUnp1() - cushion); + else minsc = max(minsc, sink.bestUnp2() - cushion); + } + if(combined && tempHit.score() >= minsc) { + assert_eq(tempHit.trim5(), 0); + assert_leq(tempHit.rdoff() + tempHit.len() + tempHit.trim3(), rdlen); + int64_t tmp_maxsc = hybridSearch_recur( + sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + swa, + ssdb, + rdi, + tempHit, + tempHit.rdoff(), + tempHit.len() + tempHit.trim3(), + wlm, + prm, + swm, + him, + rnd, + sink, + alignMate, + dep + 1); + maxsc = max(maxsc, tmp_maxsc); + } + } + } + } + GenomeHit tempHit = hit; + index_t trimMax = (index_t)((tempHit.score() - max(maxsc, this->_minsc[rdi])) / sc.sc(0)); + if(tempHit.rdoff() < trimMax) { + index_t trim5 = tempHit.rdoff(); + GenomeHit trimedHit = tempHit; + trimedHit.trim5(trim5, + rd, + ssdb, + sc, + (index_t)this->_minK_local, + (index_t)tpol.minIntronLen(), + (index_t)tpol.maxIntronLen(), + tpol.minAnchorLen(), + tpol.minAnchorLen_noncan(), + ref); + assert_leq(trimedHit.len() + trimedHit.trim5() + trimedHit.trim3(), rdlen); + int64_t tmp_score = trimedHit.score(); + if(tmp_score > maxsc && tmp_score >= this->_minsc[rdi]) { + int64_t tmp_maxsc = hybridSearch_recur( + sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + swa, + ssdb, + rdi, + trimedHit, + 0, + trimedHit.len() + trimedHit.trim5() + trimedHit.trim3(), + wlm, + prm, + swm, + him, + rnd, + sink, + alignMate, + dep + 1); + maxsc = max(maxsc, tmp_maxsc); + // return maxsc; + } + } + // extend the partial alignment directly comparing with the corresponding genomic sequence + // with mismatches or a gap allowed + int64_t minsc = this->_minsc[rdi]; + assert_geq(tempHit.score(), minsc); + index_t mm = (index_t)((tempHit.score() - minsc) / sc.mmpMax); + index_t leftext = (index_t)INDEX_MAX, rightext = (index_t)0; + index_t num_mismatch_allowed = 1; + if(hitoff <= this->_minK_local) { + num_mismatch_allowed = min(tempHit.rdoff(), mm); + } + him.localextatts++; + tempHit.extend( + rd, + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + prm, + sc, + this->_minsc[rdi], + rnd, + (index_t)this->_minK_local, + tpol, + gpol, + leftext, + rightext, + num_mismatch_allowed); + if(!rp.secondary) { + if(rdi == 0) minsc = max(minsc, sink.bestUnp1() - cushion); + else minsc = max(minsc, sink.bestUnp2() - cushion); + } + if(tempHit.score() >= minsc && leftext >= min((index_t)this->_minK_local, hit.rdoff())) { + assert_eq(tempHit.trim5(), 0); + assert_leq(tempHit.rdoff() + tempHit.len() + tempHit.trim3(), rdlen); + int64_t tmp_maxsc = hybridSearch_recur( + sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + swa, + ssdb, + rdi, + tempHit, + tempHit.rdoff(), + tempHit.len() + tempHit.trim3(), + wlm, + prm, + swm, + him, + rnd, + sink, + alignMate, + dep + 1); + maxsc = max(maxsc, tmp_maxsc); + } else if(hitoff > this->_minK_local) { + // skip some bases of a read + index_t jumplen = hitoff > this->_minK ? (index_t)this->_minK : (index_t)this->_minK_local; + assert_leq(hitoff, hit.rdoff()); + int64_t expected_score = hit.score() - (hit.rdoff() - hitoff) / jumplen * sc.mmpMax - sc.mmpMax; + if(expected_score >= minsc) { + assert_lt(hitlen + jumplen, rdlen); + assert_eq(hit.trim5(), 0); + assert_leq(hitoff + hitlen, rdlen); + int64_t tmp_maxsc = hybridSearch_recur( + sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + swa, + ssdb, + rdi, + hit, + hitoff - jumplen, + hitlen + jumplen, + wlm, + prm, + swm, + him, + rnd, + sink, + alignMate, + dep + 1); + maxsc = max(maxsc, tmp_maxsc); + } + } + } + } else { + // extend the partial alignment in the right direction + assert_lt(hitoff + hitlen, rdlen); + if(!ssdb.empty()) { + index_t fragoff = 0, fraglen = 0, right = 0; + hit.getRight(fragoff, fraglen, right); + const index_t minMatchLen = (index_t)this->_minK_local; + // make use of a list of known or novel splice sites to further align the read + if(fraglen >= minMatchLen && + !hit.repeat() && + !tpol.no_spliced_alignment()) { + spliceSites.clear(); + assert_gt(fraglen, 0); + assert_leq(fragoff + fraglen, rdlen); + index_t right_unmapped_len = rdlen - fragoff - fraglen; + ssdb.getRightSpliceSites(hit.ref(), right + fraglen - minMatchLen, minMatchLen + min(minMatchLen, right_unmapped_len), spliceSites); + for(size_t si = 0; si < spliceSites.size(); si++) { + const SpliceSite& ss = spliceSites[si]; + if(!ss._fromfile && ss._readid + this->_thread_rids_mindist > rd.rdid) continue; + if(right > ss.left()) continue; + GenomeHit tempHit; + assert_leq(right, ss.left()); + index_t readoff = fragoff + ss.left() - right + 1; + if(readoff >= rdlen) + continue; + index_t joinedOff = 0; + bool success = gfm.textOffToJoined(hit.ref(), ss.right(), joinedOff); + if(!success) { + continue; + } +#ifndef NDEBUG + index_t debug_tid = 0, debug_toff = 0, debug_tlen = 0; + bool debug_straddled = false; + gfm.joinedToTextOff(1, // qlen + joinedOff, + debug_tid, + debug_toff, + debug_tlen, + false, + debug_straddled); + assert_eq(hit.ref(), debug_tid); + assert_eq(ss.right(), debug_toff); +#endif + tempHit.init(hit.fw(), + readoff, + 0, // len + 0, // trim5 + 0, // trim3 + hit.ref(), + ss.right(), + joinedOff, + this->_sharedVars, + gfm.repeat()); + index_t leftext = 0, rightext = rdlen - readoff; + tempHit.extend(rd, + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + prm, + sc, + this->_minsc[rdi], + rnd, + (index_t)this->_minK_local, + tpol, + gpol, + leftext, + rightext); + if(tempHit.len() <= 0) + continue; + if(!hit.compatibleWith(tempHit, (index_t)tpol.minIntronLen(), (index_t)tpol.maxIntronLen(), tpol.no_spliced_alignment())) continue; + GenomeHit combinedHit = hit; + int64_t minsc = this->_minsc[rdi]; + bool combined = combinedHit.combineWith( + tempHit, + rd, + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + sc, + minsc, + rnd, + (index_t)this->_minK_local, + (index_t)tpol.minIntronLen(), + (index_t)tpol.maxIntronLen(), + 1, + 1, + gpol.maxAltsTried(), + &ss, + tpol.no_spliced_alignment()); + if(!rp.secondary) { + if(rdi == 0) minsc = max(minsc, sink.bestUnp1() - cushion); + else minsc = max(minsc, sink.bestUnp2() - cushion); + } + if(combined && combinedHit.score() >= minsc && + // soft-clipping might be better + combinedHit.score() + sc.sc(0) * (rdlen - hit.rdoff() - hit.len() - hit.trim5()) >= hit.score()) { + assert_leq(combinedHit.trim5(), combinedHit.rdoff()); + int64_t tmp_maxsc = hybridSearch_recur( + sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + swa, + ssdb, + rdi, + combinedHit, + combinedHit.rdoff() - combinedHit.trim5(), + combinedHit.len() + combinedHit.trim5(), + wlm, + prm, + swm, + him, + rnd, + sink, + alignMate, + dep + 1); + maxsc = max(maxsc, tmp_maxsc); + } + } + } + } + + bool use_localindex = true; + if(hit.len() == hitlen && hitoff + hitlen + this->_minK > rdlen) { + index_t leftext = (index_t)0, rightext = (index_t)INDEX_MAX; + GenomeHit tempHit = hit; + tempHit.extend( + rd, + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + prm, + sc, + this->_minsc[rdi], + rnd, + (index_t)this->_minK_local, + tpol, + gpol, + leftext, + rightext, + 1); + if(tempHit.rdoff() + tempHit.len()== rdlen) { + use_localindex = false; + } + } + + // Choose a local index based on the genomic location of the partial alignment + const HGFM* hGFM = (const HGFM*)(&gfm); + const LocalGFM* lGFM = hGFM->getLocalGFM(hit.ref(), hit.refoff()); + bool success = false, first = true; + index_t count = 0; + // Use at most two local indexes + const index_t max_count = 2; + int64_t prev_score = hit.score(); + this->_local_genomeHits[dep].clear(); + while(!success && count++ < max_count && use_localindex) { + if(him.localindexatts >= this->max_localindexatts) break; + if(first) { + first = false; + } else { + lGFM = hGFM->nextLocalGFM(lGFM); + if(lGFM == NULL || lGFM->empty()) break; + } + // local index search + index_t extlen = 0; + local_index_t top = (local_index_t)INDEX_MAX, bot = (local_index_t)INDEX_MAX; + local_index_t node_top = (local_index_t)INDEX_MAX, node_bot = (local_index_t)INDEX_MAX; + index_t extoff = hitoff + hitlen + (index_t)this->_minK_local; + if(extoff + 1 < rdlen) extoff += 1; + if(extoff >= rdlen) { + extoff = rdlen - 1; + } + index_t nelt = (index_t)INDEX_MAX; + index_t max_nelt = std::max(5, extlen); + bool no_extension = false; + bool uniqueStop; + index_t minUniqueLen = (index_t)this->_minK_local; + index_t maxHitLen = max(extoff - hitoff - hitlen, (index_t)this->_minK_local); + for(; maxHitLen < extoff + 1 && extoff < rdlen;) { + extlen = 0; + uniqueStop = false; + him.localindexatts++; + this->_local_node_iedge_count.clear(); + nelt = this->localGFMSearch( + *lGFM, // GFM index + rd, // read to align + sc, // scoring scheme + sink.reportingParams(), + hit.fw(), + extoff, + extlen, + top, + bot, + node_top, + node_bot, + this->_local_node_iedge_count, + rnd, + uniqueStop, + minUniqueLen, + maxHitLen); + if(extoff < hitoff + hitlen) { + no_extension = true; + break; + } + if(nelt <= max_nelt) break; + if(extoff + 1 < rdlen) extoff++; + else { + if(extlen < maxHitLen) break; + else maxHitLen++; + } + } + assert_leq(node_top, node_bot); + assert_eq(nelt, (index_t)(node_bot - node_top)); + assert_leq(extlen, extoff + 1); + assert_leq(extoff, rdlen); + if(nelt > 0 && + nelt <= max_nelt && + extlen >= tpol.minAnchorLen() && + !no_extension) { + assert_leq(nelt, max_nelt); + coords.clear(); + bool straddled = false; + // get genomic locations for this local search + this->getGenomeCoords_local( + *lGFM, + altdb, + ref, + rnd, + top, + bot, + node_top, + node_bot, + this->_local_node_iedge_count, + hit.fw(), + extoff + 1 - extlen, + extlen, + coords, + wlm, + prm, + him, + true, // reject straddled? + straddled); + assert_leq(coords.size(), nelt); + if(coords.size() > 1) coords.sort(); + for(index_t ri = 0; ri < coords.size(); ri++) { + const Coord& coord = coords[ri]; + GenomeHit tempHit; + tempHit.init(coord.orient(), + extoff + 1 - extlen, + extlen, + 0, // trim5 + 0, // trim3 + (index_t)coord.ref(), + (index_t)coord.off(), + (index_t)coord.joinedOff(), + this->_sharedVars, + gfm.repeat()); + if(!tempHit.adjustWithALT(*this->_rds[rdi], gfm, altdb, ref, gpol)) continue; + // check if the partial alignment is compatible with the new alignment using the local index + if(!hit.compatibleWith(tempHit, (index_t)tpol.minIntronLen(), (index_t)tpol.maxIntronLen(), tpol.no_spliced_alignment())) { + if(count == 1) continue; + else break; + } + index_t leftext = (index_t)0, rightext = (index_t)INDEX_MAX; + tempHit.extend( + rd, + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + prm, + sc, + this->_minsc[rdi], + rnd, + (index_t)this->_minK_local, + tpol, + gpol, + leftext, + rightext); + GenomeHit combinedHit = hit; + int64_t minsc = this->_minsc[rdi]; + // combine the partial alignment and the new alignment + bool combined = combinedHit.combineWith( + tempHit, + rd, + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + sc, + minsc, + rnd, + (index_t)this->_minK_local, + (index_t)tpol.minIntronLen(), + (index_t)tpol.maxIntronLen(), + tpol.minAnchorLen(), + tpol.minAnchorLen_noncan(), + gpol.maxAltsTried(), + NULL, // splice sites + tpol.no_spliced_alignment()); + if(!rp.secondary) { + if(rdi == 0) minsc = max(minsc, sink.bestUnp1() - cushion); + else minsc = max(minsc, sink.bestUnp2() - cushion); + } + if(combined && combinedHit.score() >= minsc) { + assert_leq(combinedHit.trim5(), combinedHit.rdoff()); + if(combinedHit.score() >= prev_score - sc.mmpMax) { + // extend the new partial alignment recursively + int64_t tmp_maxsc = hybridSearch_recur( + sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + swa, + ssdb, + rdi, + combinedHit, + combinedHit.rdoff() - combinedHit.trim5(), + combinedHit.len() + combinedHit.trim5(), + wlm, + prm, + swm, + him, + rnd, + sink, + alignMate, + dep + 1); + maxsc = max(maxsc, tmp_maxsc); + } else { + this->_local_genomeHits[dep].push_back(combinedHit); + } + } + } + } + // int64_t minsc = (rdi == 0 ? sink.bestUnp1() : sink.bestUnp2()); + if(maxsc >= prev_score - sc.mmpMax) success = true; + if(!success && + (him.localindexatts >= this->max_localindexatts || count == max_count || hGFM->nextLocalGFM(lGFM) == NULL) ) { + for(index_t ti = 0; ti < this->_local_genomeHits[dep].size(); ti++) { + GenomeHit& tempHit = this->_local_genomeHits[dep][ti]; + int64_t minsc = this->_minsc[rdi]; + if(!rp.secondary) { + if(rdi == 0) minsc = max(minsc, sink.bestUnp1() - cushion); + else minsc = max(minsc, sink.bestUnp2() - cushion); + } + if(tempHit.score() >= minsc) { + int64_t tmp_maxsc = hybridSearch_recur( + sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + swa, + ssdb, + rdi, + tempHit, + tempHit.rdoff() - tempHit.trim5(), + tempHit.len() + tempHit.trim5(), + wlm, + prm, + swm, + him, + rnd, + sink, + alignMate, + dep + 1); + maxsc = max(maxsc, tmp_maxsc); + } + } + } + } // while(!success && count++ < 2) + + if(!success) { + // perform global search for long introns + if(hitoff + hitlen + this->_minK + 1 < rdlen && + him.localindexatts < this->max_localindexatts) { + index_t extlen = 0; + index_t top = (index_t)INDEX_MAX, bot = (index_t)INDEX_MAX; + index_t node_top = (index_t)INDEX_MAX, node_bot = (index_t)INDEX_MAX; + this->_node_iedge_count.clear(); + index_t extoff = hitoff + hitlen + (index_t)this->_minK + 1; + bool uniqueStop = true; + index_t nelt = this->globalGFMSearch( + gfm, // GFM index + rd, // read to align + sc, // scoring scheme + sink.reportingParams(), + hit.fw(), + extoff, + extlen, + top, + bot, + node_top, + node_bot, + this->_node_iedge_count, + rnd, + uniqueStop); + if(nelt > 0 && nelt <= 5 && extlen >= this->_minK) { + coords.clear(); + bool straddled = false; + this->getGenomeCoords( + gfm, + altdb, + ref, + rnd, + top, + bot, + node_top, + node_bot, + this->_node_iedge_count, + hit.fw(), + bot - top, + extoff + 1 - extlen, + extlen, + coords, + wlm, + prm, + him, + true, // reject straddled + straddled); + assert_leq(coords.size(), nelt); + coords.sort(); + for(index_t ri = 0; ri < coords.size(); ri++) { + const Coord& coord = coords[ri]; + GenomeHit tempHit; + tempHit.init(coord.orient(), + extoff + 1 - extlen, + extlen, + 0, // trim5 + 0, // trim3 + (index_t)coord.ref(), + (index_t)coord.off(), + (index_t)coord.joinedOff(), + this->_sharedVars, + gfm.repeat()); + if(!tempHit.adjustWithALT(*this->_rds[rdi], gfm, altdb, ref, gpol)) continue; + if(!hit.compatibleWith(tempHit, (index_t)tpol.minIntronLen(), (index_t)tpol.maxIntronLen(), tpol.no_spliced_alignment())) continue; + index_t leftext = (index_t)0, rightext = (index_t)INDEX_MAX; + tempHit.extend( + rd, + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + prm, + sc, + this->_minsc[rdi], + rnd, + (index_t)this->_minK_local, + tpol, + gpol, + leftext, + rightext); + GenomeHit combinedHit = hit; + int64_t minsc = this->_minsc[rdi]; + bool combined = combinedHit.combineWith( + tempHit, + rd, + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + sc, + minsc, + rnd, + (index_t)this->_minK_local, + (index_t)tpol.minIntronLen(), + (index_t)tpol.maxIntronLen(), + tpol.minAnchorLen(), + tpol.minAnchorLen_noncan(), + gpol.maxAltsTried(), + NULL, // splice sites + tpol.no_spliced_alignment()); + if(!rp.secondary) { + if(rdi == 0) minsc = max(minsc, sink.bestUnp1() - cushion); + else minsc = max(minsc, sink.bestUnp2() - cushion); + } + if(combined && combinedHit.score() >= minsc) { + assert_leq(combinedHit.trim5(), combinedHit.rdoff()); + int64_t tmp_maxsc = hybridSearch_recur( + sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + swa, + ssdb, + rdi, + combinedHit, + combinedHit.rdoff() - combinedHit.trim5(), + combinedHit.len() + combinedHit.trim5(), + wlm, + prm, + swm, + him, + rnd, + sink, + alignMate, + dep + 1); + maxsc = max(maxsc, tmp_maxsc); + } + } + } + } + GenomeHit tempHit = hit; + assert(tempHit.trim5() == 0 || hitoff == 0); + index_t trimLen = rdlen - hitoff - tempHit.len() - tempHit.trim5(); + index_t trimMax = (index_t)((tempHit.score() - max(maxsc, this->_minsc[rdi])) / sc.sc(0)); + if(trimLen < trimMax) { + index_t trim3 = rdlen - hitoff - tempHit.len() - tempHit.trim5(); + GenomeHit trimedHit = tempHit; + trimedHit.trim3(trim3, + rd, + ssdb, + sc, + (index_t)this->_minK_local, + (index_t)tpol.minIntronLen(), + (index_t)tpol.maxIntronLen(), + tpol.minAnchorLen(), + tpol.minAnchorLen_noncan(), + ref); + assert_leq(trimedHit.trim5(), trimedHit.rdoff()); + assert_leq(trimedHit.len() + trimedHit.trim5() + trimedHit.trim3(), rdlen); + int64_t tmp_score = trimedHit.score(); + if(tmp_score > maxsc && tmp_score >= this->_minsc[rdi]) { + int64_t tmp_maxsc = hybridSearch_recur( + sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + swa, + ssdb, + rdi, + trimedHit, + trimedHit.rdoff() - trimedHit.trim5(), + trimedHit.len() + trimedHit.trim5() + trimedHit.trim3(), + wlm, + prm, + swm, + him, + rnd, + sink, + alignMate, + dep + 1); + maxsc = max(maxsc, tmp_maxsc); + // return maxsc; + } + } + // extend the partial alignment directly comparing with the corresponding genomic sequence + // with mismatches or a gap allowed + int64_t minsc = this->_minsc[rdi]; + assert_geq(tempHit.score(), minsc); + index_t leftext = (index_t)0, rightext = (index_t)INDEX_MAX; + index_t mm = (index_t)((tempHit.score() - minsc) / sc.mmpMax); + index_t num_mismatch_allowed = 1; + if(rdlen - hitoff - hitlen <= this->_minK_local) { + num_mismatch_allowed = min(rdlen - tempHit.rdoff() - tempHit.len(), mm); + } + him.localextatts++; + tempHit.extend( + rd, + gfm, + ref, + altdb, + repeatdb, + ssdb, + swa, + swm, + prm, + sc, + this->_minsc[rdi], + rnd, + (index_t)this->_minK_local, + tpol, + gpol, + leftext, + rightext, + num_mismatch_allowed); + if(!rp.secondary) { + if(rdi == 0) minsc = max(minsc, sink.bestUnp1() - cushion); + else minsc = max(minsc, sink.bestUnp2() - cushion); + } + + if(tempHit.score() >= minsc && rightext >= min((index_t)this->_minK_local, rdlen - hit.len() - hit.rdoff())) { + assert_eq(tempHit.trim3(), 0); + assert_leq(tempHit.trim5(), tempHit.rdoff()); + int64_t tmp_maxsc = hybridSearch_recur( + sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + swa, + ssdb, + rdi, + tempHit, + tempHit.rdoff() - tempHit.trim5(), + tempHit.len() + tempHit.trim5(), + wlm, + prm, + swm, + him, + rnd, + sink, + alignMate, + dep + 1); + maxsc = max(maxsc, tmp_maxsc); + } else if(hitoff + hitlen + this->_minK_local < rdlen) { + // skip some bases of a read + index_t jumplen = hitoff + hitlen + this->_minK < rdlen ? (index_t)this->_minK : (index_t)this->_minK_local; + assert_lt(hitoff + hitlen + jumplen, rdlen); + assert_leq(hit.len(), hitlen); + int64_t expected_score = hit.score() - (hitlen - hit.len()) / jumplen * sc.mmpMax - sc.mmpMax; + if(expected_score >= minsc) { + assert_eq(hit.trim3(), 0); + int64_t tmp_maxsc = hybridSearch_recur( + sc, + pepol, + tpol, + gpol, + gfm, + altdb, + repeatdb, + ref, + swa, + ssdb, + rdi, + hit, + hitoff, + hitlen + jumplen, + wlm, + prm, + swm, + him, + rnd, + sink, + alignMate, + dep + 1); + maxsc = max(maxsc, tmp_maxsc); + } + } + } + } + + return maxsc; +} + +#endif /*SPLICED_ALIGNER_H_*/ diff --git a/sse_util.cpp b/sse_util.cpp new file mode 100644 index 0000000..d6310cf --- /dev/null +++ b/sse_util.cpp @@ -0,0 +1,33 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include "sse_util.h" +#include "aligner_swsse.h" +#include "limit.h" + +/** + * Given a column of filled-in cells, save the checkpointed cells in cs_. + */ +void Checkpointer::commitCol( + __m128i *pvH, + __m128i *pvE, + __m128i *pvF, + size_t coli) +{ +} diff --git a/sse_util.h b/sse_util.h new file mode 100644 index 0000000..b5781f1 --- /dev/null +++ b/sse_util.h @@ -0,0 +1,574 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef SSE_UTIL_H_ +#define SSE_UTIL_H_ + +#include "assert_helpers.h" +#include "ds.h" +#include "limit.h" +#include +#include + +class EList_m128i { +public: + + /** + * Allocate initial default of S elements. + */ + explicit EList_m128i(int cat = 0) : + cat_(cat), last_alloc_(NULL), list_(NULL), sz_(0), cur_(0) + { + assert_geq(cat, 0); + } + + /** + * Destructor. + */ + ~EList_m128i() { free(); } + + /** + * Return number of elements. + */ + inline size_t size() const { return cur_; } + + /** + * Return number of elements allocated. + */ + inline size_t capacity() const { return sz_; } + + /** + * Ensure that there is sufficient capacity to expand to include + * 'thresh' more elements without having to expand. + */ + inline void ensure(size_t thresh) { + if(list_ == NULL) lazyInit(); + expandCopy(cur_ + thresh); + } + + /** + * Ensure that there is sufficient capacity to include 'newsz' elements. + * If there isn't enough capacity right now, expand capacity to exactly + * equal 'newsz'. + */ + inline void reserveExact(size_t newsz) { + if(list_ == NULL) lazyInitExact(newsz); + expandCopyExact(newsz); + } + + /** + * Return true iff there are no elements. + */ + inline bool empty() const { return cur_ == 0; } + + /** + * Return true iff list hasn't been initialized yet. + */ + inline bool null() const { return list_ == NULL; } + + /** + * If size is less than requested size, resize up to at least sz + * and set cur_ to requested sz. + */ + void resize(size_t sz) { + if(sz > 0 && list_ == NULL) lazyInit(); + if(sz <= cur_) { + cur_ = sz; + return; + } + if(sz_ < sz) { + expandCopy(sz); + } + cur_ = sz; + } + + /** + * Zero out contents of vector. + */ + void zero() { + if(cur_ > 0) { + memset(list_, 0, cur_ * sizeof(__m128i)); + } + } + + /** + * If size is less than requested size, resize up to at least sz + * and set cur_ to requested sz. Do not copy the elements over. + */ + void resizeNoCopy(size_t sz) { + if(sz > 0 && list_ == NULL) lazyInit(); + if(sz <= cur_) { + cur_ = sz; + return; + } + if(sz_ < sz) { + expandNoCopy(sz); + } + cur_ = sz; + } + + /** + * If size is less than requested size, resize up to exactly sz and set + * cur_ to requested sz. + */ + void resizeExact(size_t sz) { + if(sz > 0 && list_ == NULL) lazyInitExact(sz); + if(sz <= cur_) { + cur_ = sz; + return; + } + if(sz_ < sz) expandCopyExact(sz); + cur_ = sz; + } + + /** + * Make the stack empty. + */ + void clear() { + cur_ = 0; // re-use stack memory + // Don't clear heap; re-use it + } + + /** + * Return a reference to the ith element. + */ + inline __m128i& operator[](size_t i) { + assert_lt(i, cur_); + return list_[i]; + } + + /** + * Return a reference to the ith element. + */ + inline __m128i operator[](size_t i) const { + assert_lt(i, cur_); + return list_[i]; + } + + /** + * Return a reference to the ith element. + */ + inline __m128i& get(size_t i) { + return operator[](i); + } + + /** + * Return a reference to the ith element. + */ + inline __m128i get(size_t i) const { + return operator[](i); + } + + /** + * Return a pointer to the beginning of the buffer. + */ + __m128i *ptr() { return list_; } + + /** + * Return a const pointer to the beginning of the buffer. + */ + const __m128i *ptr() const { return list_; } + + /** + * Return memory category. + */ + int cat() const { return cat_; } + +private: + + /** + * Initialize memory for EList. + */ + void lazyInit() { + assert(list_ == NULL); + list_ = alloc(sz_); + } + + /** + * Initialize exactly the prescribed number of elements for EList. + */ + void lazyInitExact(size_t sz) { + assert_gt(sz, 0); + assert(list_ == NULL); + sz_ = sz; + list_ = alloc(sz); + } + + /** + * Allocate a T array of length sz_ and store in list_. Also, + * tally into the global memory tally. + */ + __m128i *alloc(size_t sz) { + __m128i* last_alloc_; + try { + last_alloc_ = new __m128i[sz + 2]; + } catch(std::bad_alloc& e) { + std::cerr << "Error: Out of memory allocating " << sz << " __m128i's for DP matrix: '" << e.what() << "'" << std::endl; + throw e; + } + __m128i* tmp = last_alloc_; + size_t tmpint = (size_t)tmp; + // Align it! + if((tmpint & 0xf) != 0) { + tmpint += 15; + tmpint &= (~0xf); + tmp = reinterpret_cast<__m128i*>(tmpint); + } + assert_eq(0, (tmpint & 0xf)); // should be 16-byte aligned + assert(tmp != NULL); + gMemTally.add(cat_, sz); + return tmp; + } + + /** + * Allocate a T array of length sz_ and store in list_. Also, + * tally into the global memory tally. + */ + void free() { + if(list_ != NULL) { + delete[] last_alloc_; + gMemTally.del(cat_, sz_); + list_ = NULL; + sz_ = cur_ = 0; + } + } + + /** + * Expand the list_ buffer until it has at least 'thresh' elements. Size + * increases quadratically with number of expansions. Copy old contents + * into new buffer using operator=. + */ + void expandCopy(size_t thresh) { + if(thresh <= sz_) return; + size_t newsz = (sz_ * 2)+1; + while(newsz < thresh) newsz *= 2; + expandCopyExact(newsz); + } + + /** + * Expand the list_ buffer until it has exactly 'newsz' elements. Copy + * old contents into new buffer using operator=. + */ + void expandCopyExact(size_t newsz) { + if(newsz <= sz_) return; + __m128i* tmp = alloc(newsz); + assert(tmp != NULL); + size_t cur = cur_; + if(list_ != NULL) { + for(size_t i = 0; i < cur_; i++) { + // Note: operator= is used + tmp[i] = list_[i]; + } + free(); + } + list_ = tmp; + sz_ = newsz; + cur_ = cur; + } + + /** + * Expand the list_ buffer until it has at least 'thresh' elements. + * Size increases quadratically with number of expansions. Don't copy old + * contents into the new buffer. + */ + void expandNoCopy(size_t thresh) { + assert(list_ != NULL); + if(thresh <= sz_) return; + size_t newsz = (sz_ * 2)+1; + while(newsz < thresh) newsz *= 2; + expandNoCopyExact(newsz); + } + + /** + * Expand the list_ buffer until it has exactly 'newsz' elements. Don't + * copy old contents into the new buffer. + */ + void expandNoCopyExact(size_t newsz) { + assert(list_ != NULL); + assert_gt(newsz, 0); + free(); + __m128i* tmp = alloc(newsz); + assert(tmp != NULL); + list_ = tmp; + sz_ = newsz; + assert_gt(sz_, 0); + } + + int cat_; // memory category, for accounting purposes + __m128i* last_alloc_; // what new[] originally returns + __m128i *list_; // list ptr, aligned version of what new[] returns + size_t sz_; // capacity + size_t cur_; // occupancy (AKA size) +}; + +struct CpQuad { + CpQuad() { reset(); } + + void reset() { sc[0] = sc[1] = sc[2] = sc[3] = 0; } + + bool operator==(const CpQuad& o) const { + return sc[0] == o.sc[0] && + sc[1] == o.sc[1] && + sc[2] == o.sc[2] && + sc[3] == o.sc[3]; + } + + int16_t sc[4]; +}; + +/** + * Encapsulates a collection of checkpoints. Assumes the scheme is to + * checkpoint adjacent pairs of anti-diagonals. + */ +class Checkpointer { + +public: + + Checkpointer() { reset(); } + + /** + * Set the checkpointer up for a new rectangle. + */ + void init( + size_t nrow, // # of rows + size_t ncol, // # of columns + size_t perpow2, // checkpoint every 1 << perpow2 diags (& next) + int64_t perfectScore, // what is a perfect score? for sanity checks + bool is8, // 8-bit? + bool doTri, // triangle shaped? + bool local, // is alignment local? for sanity checks + bool debug) // gather debug checkpoints? + { + assert_gt(perpow2, 0); + nrow_ = nrow; + ncol_ = ncol; + perpow2_ = perpow2; + per_ = 1 << perpow2; + lomask_ = ~(0xffffffff << perpow2); + perf_ = perfectScore; + local_ = local; + ndiag_ = (ncol + nrow - 1 + 1) / per_; + locol_ = MAX_SIZE_T; + hicol_ = MIN_SIZE_T; +// debug_ = debug; + debug_ = true; + commitMap_.clear(); + firstCommit_ = true; + size_t perword = (is8 ? 16 : 8); + is8_ = is8; + niter_ = ((nrow_ + perword - 1) / perword); + if(doTri) { + // Save a pair of anti-diagonals every per_ anti-diagonals for + // backtrace purposes + qdiag1s_.resize(ndiag_ * nrow_); + qdiag2s_.resize(ndiag_ * nrow_); + } else { + // Save every per_ columns and rows for backtrace purposes + qrows_.resize((nrow_ / per_) * ncol_); + qcols_.resize((ncol_ / per_) * (niter_ << 2)); + } + if(debug_) { + // Save all columns for debug purposes + qcolsD_.resize(ncol_ * (niter_ << 2)); + } + } + + /** + * Return true iff we've been collecting debug cells. + */ + bool debug() const { return debug_; } + + /** + * Check whether the given score matches the saved score at row, col, hef. + */ + int64_t debugCell(size_t row, size_t col, int hef) const { + assert(debug_); + const __m128i* ptr = qcolsD_.ptr() + hef; + // Fast forward to appropriate column + ptr += ((col * niter_) << 2); + size_t mod = row % niter_; // which m128i + size_t div = row / niter_; // offset into m128i + // Fast forward to appropriate word + ptr += (mod << 2); + // Extract score + int16_t sc = (is8_ ? ((uint8_t*)ptr)[div] : ((int16_t*)ptr)[div]); + int64_t asc = MIN_I64; + // Convert score + if(is8_) { + if(local_) { + asc = sc; + } else { + if(sc == 0) asc = MIN_I64; + else asc = sc - 0xff; + } + } else { + if(local_) { + asc = sc + 0x8000; + } else { + if(sc != MIN_I16) asc = sc - 0x7fff; + } + } + return asc; + } + + /** + * Return true iff the given row/col is checkpointed. + */ + bool isCheckpointed(size_t row, size_t col) const { + assert_leq(col, hicol_); + assert_geq(col, locol_); + size_t mod = (row + col) & lomask_; + assert_lt(mod, per_); + return mod >= per_ - 2; + } + + /** + * Return the checkpointed H, E, or F score from the given cell. + */ + inline int64_t scoreTriangle(size_t row, size_t col, int hef) const { + assert(isCheckpointed(row, col)); + bool diag1 = ((row + col) & lomask_) == per_ - 2; + size_t off = (row + col) >> perpow2_; + if(diag1) { + if(qdiag1s_[off * nrow_ + row].sc[hef] == MIN_I16) { + return MIN_I64; + } else { + return qdiag1s_[off * nrow_ + row].sc[hef]; + } + } else { + if(qdiag2s_[off * nrow_ + row].sc[hef] == MIN_I16) { + return MIN_I64; + } else { + return qdiag2s_[off * nrow_ + row].sc[hef]; + } + } + } + + /** + * Return the checkpointed H, E, or F score from the given cell. + */ + inline int64_t scoreSquare(size_t row, size_t col, int hef) const { + // Is it in a checkpointed row? Note that checkpointed rows don't + // necessarily have the horizontal contributions calculated, so we want + // to use the column info in that case. + if((row & lomask_) == lomask_ && hef != 1) { + int64_t sc = qrows_[(row >> perpow2_) * ncol_ + col].sc[hef]; + if(sc == MIN_I16) return MIN_I64; + return sc; + } + hef--; + if(hef == -1) hef = 2; + // It must be in a checkpointed column + assert_eq(lomask_, (col & lomask_)); + // Fast forward to appropriate column + const __m128i* ptr = qcols_.ptr() + hef; + ptr += (((col >> perpow2_) * niter_) << 2); + size_t mod = row % niter_; // which m128i + size_t div = row / niter_; // offset into m128i + // Fast forward to appropriate word + ptr += (mod << 2); + // Extract score + int16_t sc = (is8_ ? ((uint8_t*)ptr)[div] : ((int16_t*)ptr)[div]); + int64_t asc = MIN_I64; + // Convert score + if(is8_) { + if(local_) { + asc = sc; + } else { + if(sc == 0) asc = MIN_I64; + else asc = sc - 0xff; + } + } else { + if(local_) { + asc = sc + 0x8000; + } else { + if(sc != MIN_I16) asc = sc - 0x7fff; + } + } + return asc; + } + + /** + * Given a column of filled-in cells, save the checkpointed cells in cs_. + */ + void commitCol(__m128i *pvH, __m128i *pvE, __m128i *pvF, size_t coli); + + /** + * Reset the state of the Checkpointer. + */ + void reset() { + perpow2_ = per_ = lomask_ = nrow_ = ncol_ = 0; + local_ = false; + niter_ = ndiag_ = locol_ = hicol_ = 0; + perf_ = 0; + firstCommit_ = true; + is8_ = debug_ = false; + } + + /** + * Return true iff the Checkpointer has been initialized. + */ + bool inited() const { + return nrow_ > 0; + } + + size_t per() const { return per_; } + size_t perpow2() const { return perpow2_; } + size_t lomask() const { return lomask_; } + size_t locol() const { return locol_; } + size_t hicol() const { return hicol_; } + size_t nrow() const { return nrow_; } + size_t ncol() const { return ncol_; } + + const CpQuad* qdiag1sPtr() const { return qdiag1s_.ptr(); } + const CpQuad* qdiag2sPtr() const { return qdiag2s_.ptr(); } + + size_t perpow2_; // 1 << perpow2_ - 2 is the # of uncheckpointed + // anti-diags between checkpointed anti-diag pairs + size_t per_; // 1 << perpow2_ + size_t lomask_; // mask for extracting low bits + size_t nrow_; // # rows in current rectangle + size_t ncol_; // # cols in current rectangle + int64_t perf_; // perfect score + bool local_; // local alignment? + + size_t ndiag_; // # of double-diags + + size_t locol_; // leftmost column committed + size_t hicol_; // rightmost column committed + + // Map for committing scores from vector columns to checkpointed diagonals + EList commitMap_; + bool firstCommit_; + + EList qdiag1s_; // checkpoint H/E/F values for diagonal 1 + EList qdiag2s_; // checkpoint H/E/F values for diagonal 2 + + EList qrows_; // checkpoint H/E/F values for rows + + // We store columns in this way to reduce overhead of populating them + bool is8_; // true -> fill used 8-bit cells + size_t niter_; // # __m128i words per column + EList_m128i qcols_; // checkpoint E/F/H values for select columns + + bool debug_; // get debug checkpoints? (i.e. fill qcolsD_?) + EList_m128i qcolsD_; // checkpoint E/F/H values for all columns (debug) +}; + +#endif diff --git a/sstring.cpp b/sstring.cpp new file mode 100644 index 0000000..3b26587 --- /dev/null +++ b/sstring.cpp @@ -0,0 +1,202 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifdef MAIN_SSTRING + +#include +#include +#include "ds.h" +#include "sstring.h" + +using namespace std; + +int main(void) { + cerr << "Test inter-class comparison operators..."; + { + SString s(2); + s.set('a', 0); + s.set('b', 1); + assert(sstr_eq(s, (const char *)"ab")); + assert(!sstr_neq(s, (const char *)"ab")); + assert(!sstr_lt(s, (const char *)"ab")); + assert(!sstr_gt(s, (const char *)"ab")); + assert(sstr_leq(s, (const char *)"ab")); + assert(sstr_geq(s, (const char *)"ab")); + + SStringExpandable s2; + s2.append('a'); + s2.append('b'); + assert(sstr_eq(s, s2)); + assert(sstr_eq(s2, (const char *)"ab")); + assert(!sstr_neq(s, s2)); + assert(!sstr_neq(s2, (const char *)"ab")); + assert(!sstr_lt(s, s2)); + assert(!sstr_lt(s2, (const char *)"ab")); + assert(!sstr_gt(s, s2)); + assert(!sstr_gt(s2, (const char *)"ab")); + assert(sstr_leq(s, s2)); + assert(sstr_leq(s2, (const char *)"ab")); + assert(sstr_geq(s, s2)); + assert(sstr_geq(s2, (const char *)"ab")); + + SStringFixed s3; + s3.append('a'); + s3.append('b'); + assert(sstr_eq(s, s3)); + assert(sstr_eq(s2, s3)); + assert(sstr_eq(s3, (const char *)"ab")); + assert(!sstr_neq(s, s3)); + assert(!sstr_neq(s2, s3)); + assert(!sstr_neq(s3, (const char *)"ab")); + assert(!sstr_lt(s, s3)); + assert(!sstr_lt(s2, s3)); + assert(!sstr_lt(s3, (const char *)"ab")); + assert(!sstr_gt(s, s3)); + assert(!sstr_gt(s2, s3)); + assert(!sstr_gt(s3, (const char *)"ab")); + assert(sstr_geq(s, s3)); + assert(sstr_geq(s2, s3)); + assert(sstr_geq(s3, (const char *)"ab")); + assert(sstr_leq(s, s3)); + assert(sstr_leq(s2, s3)); + assert(sstr_leq(s3, (const char *)"ab")); + } + cerr << "PASSED" << endl; + + cerr << "Test flag for whether to consider end-of-word < other chars ..."; + { + SString ss("String"); + SString sl("String1"); + assert(sstr_lt(ss, sl)); + assert(sstr_gt(ss, sl, false)); + assert(sstr_leq(ss, sl)); + assert(sstr_geq(ss, sl, false)); + } + cerr << "PASSED" << endl; + + cerr << "Test toZBuf and toZBufXForm ..."; + { + SString s(10); + for(int i = 0; i < 10; i++) { + s[i] = (uint32_t)i; + } + assert(strcmp(s.toZBufXForm("0123456789"), "0123456789") == 0); + } + cerr << "PASSED" << endl; + + cerr << "Test S2bDnaString ..."; + { + const char *str = + "ACGTACGTAC" "ACGTACGTAC" "ACGTACGTAC" + "ACGTACGTAC" "ACGTACGTAC" "ACGTACGTAC"; + const char *gs = + "GGGGGGGGGG" "GGGGGGGGGG" "GGGGGGGGGG" + "GGGGGGGGGG" "GGGGGGGGGG" "GGGGGGGGGG"; + for(size_t i = 0; i < 60; i++) { + S2bDnaString s(str, i, true); + S2bDnaString sr; + BTDnaString s2(str, i, true); + assert(sstr_eq(s, s2)); + if(i >= 10) { + BTDnaString s3; + s.windowGetDna(s3, true, false, 3, 4); + assert(sstr_eq(s3.toZBuf(), (const char*)"TACG")); + s.windowGetDna(s3, false, false, 3, 4); + assert(sstr_eq(s3.toZBuf(), (const char*)"CGTA")); + assert_eq('A', s.toChar(0)); + assert_eq('G', s.toChar(2)); + assert_eq('A', s.toChar(4)); + assert_eq('G', s.toChar(6)); + assert_eq('A', s.toChar(8)); + + s.reverseWindow(1, 8); + s2.reverseWindow(1, 8); + + assert_eq('A', s.toChar(1)); + assert_eq('T', s.toChar(2)); + assert_eq('G', s.toChar(3)); + assert_eq('C', s.toChar(4)); + assert_eq('A', s.toChar(5)); + assert_eq('T', s.toChar(6)); + assert_eq('G', s.toChar(7)); + assert_eq('C', s.toChar(8)); + assert(sstr_eq(s, s2)); + + s.reverseWindow(1, 8); + s2.reverseWindow(1, 8); + assert(sstr_eq(s, s2)); + } + if(i > 1) { + s.reverse(); + sr.installReverseChars(str, i); + s2.reverse(); + assert(sstr_eq(s, s2)); + assert(sstr_eq(sr, s2)); + s.reverse(); + sr.reverse(); + assert(sstr_neq(s, s2)); + assert(sstr_neq(sr, s2)); + s.fill(2); + s2.reverse(); + assert(sstr_leq(s, gs)); + assert(sstr_gt(s, s2)); + assert(sstr_gt(s, sr)); + s2.fill(2); + sr.fill(2); + assert(sstr_eq(s, s2)); + assert(sstr_eq(s, sr)); + } + } + S2bDnaString s(str, true); + S2bDnaString sr; + BTDnaString s2(str, true); + assert(sstr_eq(s2.toZBuf(), str)); + assert(sstr_eq(s, s2)); + s.reverse(); + sr.installReverseChars(str); + s2.reverse(); + assert(sstr_eq(s, s2)); + assert(sstr_eq(sr, s2)); + s.reverse(); + sr.reverse(); + assert(sstr_neq(s, s2)); + assert(sstr_neq(sr, s2)); + } + cerr << "PASSED" << endl; + + cerr << "Test operator=() ..."; + { + S2bDnaString s; + s.installChars(string("gtcagtca")); + assert(sstr_eq(s.toZBuf(), (const char *)"GTCAGTCA")); + } + cerr << "PASSED" << endl; + + cerr << "Conversions from string ..."; + { + SStringExpandable se(string("hello")); + EList > sel; + sel.push_back(SStringExpandable(string("hello"))); + } + cerr << "PASSED" << endl; + + cerr << "PASSED" << endl; +} + +#endif /*def MAIN_SSTRING*/ diff --git a/sstring.h b/sstring.h new file mode 100644 index 0000000..c907901 --- /dev/null +++ b/sstring.h @@ -0,0 +1,3454 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef SSTRING_H_ +#define SSTRING_H_ + +#include +#include +#include "assert_helpers.h" +#include "alphabet.h" +#include "random_source.h" + +/** + * Four kinds of strings defined here: + * + * SString: + * A fixed-length string using heap memory with size set at construction time + * or when install() member is called. + * + * S2bDnaString: + * Like SString, but stores a list uint32_t words where each word is divided + * into 16 2-bit slots interpreted as holding one A/C/G/T nucleotide each. + * + * TODO: S3bDnaString allowing N. S4bDnaString allowing nucleotide masks. + * + * SStringExpandable: + * A string using heap memory where the size of the backing store is + * automatically resized as needed. Supports operations like append, insert, + * erase, etc. + * + * SStringFixed: + * A fixed-length string using stack memory where size is set at compile + * time. + * + * All string classes have some extra facilities that make it easy to print the + * string, including when the string uses an encoded alphabet. See toZBuf() + * and toZBufXForm(). + * + * Global lt, eq, and gt template functions are supplied. They are capable of + * doing lexicographical comparisons between any of the three categories of + * strings defined here. + */ + +template +class Class_sstr_len { +public: + static inline size_t sstr_len(const T& s) { + return s.length(); + } +}; + +template +class Class_sstr_len { +public: + static inline size_t sstr_len(const char s[N]) { + return strlen(s); + } +}; + +template<> +class Class_sstr_len { +public: + static inline size_t sstr_len(const char *s) { + return strlen(s); + } +}; + +template<> +class Class_sstr_len { +public: + static inline size_t sstr_len(const unsigned char *s) { + return strlen((const char *)s); + } +}; + +template +static inline bool sstr_eq(const T1& s1, const T2& s2) { + size_t len1 = Class_sstr_len::sstr_len(s1); + size_t len2 = Class_sstr_len::sstr_len(s2); + if(len1 != len2) return false; + for(size_t i = 0; i < len1; i++) { + if(s1[i] != s2[i]) return false; + } + return true; +} + +template +static inline bool sstr_neq(const T1& s1, const T2& s2) { + return !sstr_eq(s1, s2); +} + +/** + * Return true iff the given suffix of s1 is equal to the given suffix of s2 up + * to upto characters. + */ +template +static inline bool sstr_suf_upto_eq( + const T1& s1, size_t suf1, + const T2& s2, size_t suf2, + size_t upto, + bool endlt = true) +{ + assert_leq(suf1, Class_sstr_len::sstr_len(s1)); + assert_leq(suf2, Class_sstr_len::sstr_len(s2)); + size_t len1 = Class_sstr_len::sstr_len(s1) - suf1; + size_t len2 = Class_sstr_len::sstr_len(s2) - suf2; + if(len1 > upto) len1 = upto; + if(len2 > upto) len2 = upto; + if(len1 != len2) return false; + for(size_t i = 0; i < len1; i++) { + if(s1[suf1+i] != s2[suf2+i]) { + return false; + } + } + return true; +} + +/** + * Return true iff the given suffix of s1 is equal to the given suffix of s2 up + * to upto characters. + */ +template +static inline bool sstr_suf_upto_neq( + const T1& s1, size_t suf1, + const T2& s2, size_t suf2, + size_t upto, + bool endlt = true) +{ + return !sstr_suf_upto_eq(s1, suf1, s2, suf2, upto, endlt); +} + +/** + * Return true iff s1 is less than s2. + */ +template +static inline bool sstr_lt(const T1& s1, const T2& s2, bool endlt = true) { + size_t len1 = Class_sstr_len::sstr_len(s1); + size_t len2 = Class_sstr_len::sstr_len(s2); + size_t minlen = (len1 < len2 ? len1 : len2); + for(size_t i = 0; i < minlen; i++) { + if(s1[i] < s2[i]) { + return true; + } else if(s1[i] > s2[i]) { + return false; + } + } + if(len1 == len2) return false; + return (len1 < len2) == endlt; +} + +/** + * Return true iff the given suffix of s1 is less than the given suffix of s2. + */ +template +static inline bool sstr_suf_lt( + const T1& s1, size_t suf1, + const T2& s2, size_t suf2, + bool endlt = true) +{ + assert_leq(suf1, Class_sstr_len::sstr_len(s1)); + assert_leq(suf2, Class_sstr_len::sstr_len(s2)); + size_t len1 = Class_sstr_len::sstr_len(s1) - suf1; + size_t len2 = Class_sstr_len::sstr_len(s2) - suf2; + size_t minlen = (len1 < len2 ? len1 : len2); + for(size_t i = 0; i < minlen; i++) { + if(s1[suf1+i] < s2[suf2+i]) { + return true; + } else if(s1[suf1+i] > s2[suf2+i]) { + return false; + } + } + if(len1 == len2) return false; + return (len1 < len2) == endlt; +} + +/** + * Return true iff the given suffix of s1 is less than the given suffix of s2. + * Treat s1 and s2 as though they have lengths len1/len2. + */ +template +static inline bool sstr_suf_lt( + const T1& s1, size_t suf1, size_t len1, + const T2& s2, size_t suf2, size_t len2, + bool endlt = true) +{ + assert_leq(suf1, len1); + assert_leq(suf2, len2); + size_t left1 = len1 - suf1; + size_t left2 = len2 - suf2; + size_t minleft = (left1 < left2 ? left1 : left2); + for(size_t i = 0; i < minleft; i++) { + if(s1[suf1+i] < s2[suf2+i]) { + return true; + } else if(s1[suf1+i] > s2[suf2+i]) { + return false; + } + } + if(left1 == left2) return false; + return (left1 < left2) == endlt; +} + +/** + * Return true iff the given suffix of s1 is less than the given suffix of s2 + * up to upto characters. + */ +template +static inline bool sstr_suf_upto_lt( + const T1& s1, size_t suf1, + const T2& s2, size_t suf2, + size_t upto, + bool endlt = true) +{ + assert_leq(suf1, Class_sstr_len::sstr_len(s1)); + assert_leq(suf2, Class_sstr_len::sstr_len(s2)); + size_t len1 = Class_sstr_len::sstr_len(s1) - suf1; + size_t len2 = Class_sstr_len::sstr_len(s2) - suf2; + if(len1 > upto) len1 = upto; + if(len2 > upto) len2 = upto; + size_t minlen = (len1 < len2 ? len1 : len2); + for(size_t i = 0; i < minlen; i++) { + if(s1[suf1+i] < s2[suf2+i]) { + return true; + } else if(s1[suf1+i] > s2[suf2+i]) { + return false; + } + } + if(len1 == len2) return false; + return (len1 < len2) == endlt; +} + +/** + * Return true iff the given prefix of s1 is less than the given prefix of s2. + */ +template +static inline bool sstr_pre_lt( + const T1& s1, size_t pre1, + const T2& s2, size_t pre2, + bool endlt = true) +{ + assert_leq(pre1, Class_sstr_len::sstr_len(s1)); + assert_leq(pre2, Class_sstr_len::sstr_len(s2)); + size_t len1 = pre1; + size_t len2 = pre2; + size_t minlen = (len1 < len2 ? len1 : len2); + for(size_t i = 0; i < minlen; i++) { + if(s1[i] < s2[i]) { + return true; + } else if(s1[i] > s2[i]) { + return false; + } + } + if(len1 == len2) return false; + return (len1 < len2) == endlt; +} + +/** + * Return true iff s1 is less than or equal to s2. + */ +template +static inline bool sstr_leq(const T1& s1, const T2& s2, bool endlt = true) { + size_t len1 = Class_sstr_len::sstr_len(s1); + size_t len2 = Class_sstr_len::sstr_len(s2); + size_t minlen = (len1 < len2 ? len1 : len2); + for(size_t i = 0; i < minlen; i++) { + if(s1[i] < s2[i]) { + return true; + } else if(s1[i] > s2[i]) { + return false; + } + } + if(len1 == len2) return true; + return (len1 < len2) == endlt; +} + +/** + * Return true iff the given suffix of s1 is less than or equal to the given + * suffix of s2. + */ +template +static inline bool sstr_suf_leq( + const T1& s1, size_t suf1, + const T2& s2, size_t suf2, + bool endlt = true) +{ + assert_leq(suf1, Class_sstr_len::sstr_len(s1)); + assert_leq(suf2, Class_sstr_len::sstr_len(s2)); + size_t len1 = Class_sstr_len::sstr_len(s1) - suf1; + size_t len2 = Class_sstr_len::sstr_len(s2) - suf2; + size_t minlen = (len1 < len2 ? len1 : len2); + for(size_t i = 0; i < minlen; i++) { + if(s1[suf1+i] < s2[suf2+i]) { + return true; + } else if(s1[suf1+i] > s2[suf2+i]) { + return false; + } + } + if(len1 == len2) return true; + return (len1 < len2) == endlt; +} + +/** + * Return true iff the given prefix of s1 is less than or equal to the given + * prefix of s2. + */ +template +static inline bool sstr_pre_leq( + const T1& s1, size_t pre1, + const T2& s2, size_t pre2, + bool endlt = true) +{ + assert_leq(pre1, Class_sstr_len::sstr_len(s1)); + assert_leq(pre2, Class_sstr_len::sstr_len(s2)); + size_t len1 = pre1; + size_t len2 = pre2; + size_t minlen = (len1 < len2 ? len1 : len2); + for(size_t i = 0; i < minlen; i++) { + if(s1[i] < s2[i]) { + return true; + } else if(s1[i] > s2[i]) { + return false; + } + } + if(len1 == len2) return true; + return (len1 < len2) == endlt; +} + +/** + * Return true iff s1 is greater than s2. + */ +template +static inline bool sstr_gt(const T1& s1, const T2& s2, bool endlt = true) { + size_t len1 = Class_sstr_len::sstr_len(s1); + size_t len2 = Class_sstr_len::sstr_len(s2); + size_t minlen = (len1 < len2 ? len1 : len2); + for(size_t i = 0; i < minlen; i++) { + if(s1[i] > s2[i]) { + return true; + } else if(s1[i] < s2[i]) { + return false; + } + } + if(len1 == len2) return false; + return (len1 > len2) == endlt; +} + +/** + * Return true iff the given suffix of s1 is greater than the given suffix of + * s2. + */ +template +static inline bool sstr_suf_gt( + const T1& s1, size_t suf1, + const T2& s2, size_t suf2, + bool endlt = true) +{ + assert_leq(suf1, Class_sstr_len::sstr_len(s1)); + assert_leq(suf2, Class_sstr_len::sstr_len(s2)); + size_t len1 = Class_sstr_len::sstr_len(s1) - suf1; + size_t len2 = Class_sstr_len::sstr_len(s2) - suf2; + size_t minlen = (len1 < len2 ? len1 : len2); + for(size_t i = 0; i < minlen; i++) { + if(s1[suf1+i] > s2[suf2+i]) { + return true; + } else if(s1[suf1+i] < s2[suf2+i]) { + return false; + } + } + if(len1 == len2) return false; + return (len1 > len2) == endlt; +} + +/** + * Return true iff the given prefix of s1 is greater than the given prefix of + * s2. + */ +template +static inline bool sstr_pre_gt( + const T1& s1, size_t pre1, + const T2& s2, size_t pre2, + bool endlt = true) +{ + assert_leq(pre1, Class_sstr_len::sstr_len(s1)); + assert_leq(pre2, Class_sstr_len::sstr_len(s2)); + size_t len1 = pre1; + size_t len2 = pre2; + size_t minlen = (len1 < len2 ? len1 : len2); + for(size_t i = 0; i < minlen; i++) { + if(s1[i] > s2[i]) { + return true; + } else if(s1[i] < s2[i]) { + return false; + } + } + if(len1 == len2) return false; + return (len1 > len2) == endlt; +} + +/** + * Return true iff s1 is greater than or equal to s2. + */ +template +static inline bool sstr_geq(const T1& s1, const T2& s2, bool endlt = true) { + size_t len1 = Class_sstr_len::sstr_len(s1); + size_t len2 = Class_sstr_len::sstr_len(s2); + size_t minlen = (len1 < len2 ? len1 : len2); + for(size_t i = 0; i < minlen; i++) { + if(s1[i] > s2[i]) { + return true; + } else if(s1[i] < s2[i]) { + return false; + } + } + if(len1 == len2) return true; + return (len1 > len2) == endlt; +} + +/** + * Return true iff the given suffix of s1 is greater than or equal to the given + * suffix of s2. + */ +template +static inline bool sstr_suf_geq( + const T1& s1, size_t suf1, + const T2& s2, size_t suf2, + bool endlt = true) +{ + assert_leq(suf1, Class_sstr_len::sstr_len(s1)); + assert_leq(suf2, Class_sstr_len::sstr_len(s2)); + size_t len1 = Class_sstr_len::sstr_len(s1) - suf1; + size_t len2 = Class_sstr_len::sstr_len(s2) - suf2; + size_t minlen = (len1 < len2 ? len1 : len2); + for(size_t i = 0; i < minlen; i++) { + if(s1[suf1+i] > s2[suf2+i]) { + return true; + } else if(s1[suf1+i] < s2[suf2+i]) { + return false; + } + } + if(len1 == len2) return true; + return (len1 > len2) == endlt; +} + +/** + * Return true iff the given prefix of s1 is greater than or equal to the given + * prefix of s2. + */ +template +static inline bool sstr_pre_geq( + const T1& s1, size_t pre1, + const T2& s2, size_t pre2, + bool endlt = true) +{ + assert_leq(pre1, Class_sstr_len::sstr_len(s1)); + assert_leq(pre2, Class_sstr_len::sstr_len(s2)); + size_t len1 = pre1; + size_t len2 = pre2; + size_t minlen = (len1 < len2 ? len1 : len2); + for(size_t i = 0; i < minlen; i++) { + if(s1[i] > s2[i]) { + return true; + } else if(s1[i] < s2[i]) { + return false; + } + } + if(len1 == len2) return true; + return (len1 > len2) == endlt; +} + +template +static inline const char * sstr_to_cstr(const T& s) { + return s.toZBuf(); +} + +template<> +inline const char * sstr_to_cstr >( + const std::basic_string& s) +{ + return s.c_str(); +} + +/** + * Simple string class with backing memory whose size is managed by the user + * using the constructor and install() member function. No behind-the-scenes + * reallocation or copying takes place. + */ +template +class SString { + +public: + + explicit SString() : + cs_(NULL), + printcs_(NULL), + len_(0) + { } + + explicit SString(size_t sz) : + cs_(NULL), + printcs_(NULL), + len_(0) + { + resize(sz); + } + + /** + * Create an SStringExpandable from another SStringExpandable. + */ + SString(const SString& o) : + cs_(NULL), + printcs_(NULL), + len_(0) + { + *this = o; + } + + /** + * Create an SStringExpandable from a std::basic_string of the + * appropriate type. + */ + explicit SString(const std::basic_string& str) : + cs_(NULL), + printcs_(NULL), + len_(0) + { + install(str.c_str(), str.length()); + } + + /** + * Create an SStringExpandable from an array and size. + */ + explicit SString(const T* b, size_t sz) : + cs_(NULL), + printcs_(NULL), + len_(0) + { + install(b, sz); + } + + /** + * Create an SStringExpandable from a zero-terminated array. + */ + explicit SString(const T* b) : + cs_(NULL), + printcs_(NULL), + len_(0) + { + install(b, strlen(b)); + } + + /** + * Destroy the expandable string object. + */ + virtual ~SString() { + if(cs_ != NULL) { + delete[] cs_; + cs_ = NULL; + } + if(printcs_ != NULL) { + delete[] printcs_; + printcs_ = NULL; + } + len_ = 0; + } + + /** + * Assignment to other SString. + */ + SString& operator=(const SString& o) { + install(o.cs_, o.len_); + return *this; + } + + /** + * Assignment to other SString. + */ + SString& operator=(const std::basic_string& o) { + install(o); + return *this; + } + + /** + * Resizes the string without preserving its contents. + */ + void resize(size_t sz) { + if(cs_ != NULL) { + delete cs_; + cs_ = NULL; + } + if(printcs_ != NULL) { + delete printcs_; + printcs_ = NULL; + } + if(sz != 0) { + cs_ = new T[sz+1]; + } + len_ = sz; + } + + /** + * Return ith character from the left of either the forward or the + * reverse version of the read. + */ + T windowGet( + size_t i, + bool fw, + size_t depth = 0, + size_t len = 0) const + { + if(len == 0) len = len_; + assert_lt(i, len); + assert_leq(len, len_ - depth); + return fw ? cs_[depth+i] : cs_[depth+len-i-1]; + } + + /** + * Return ith character from the left of either the forward or the + * reverse-complement version of the read. + */ + void windowGet( + T& ret, + bool fw, + size_t depth = 0, + size_t len = 0) const + { + if(len == 0) len = len_; + assert_leq(len, len_ - depth); + ret.resize(len); + for(size_t i = 0; i < len; i++) { + ret.set(fw ? cs_[depth+i] : cs_[depth+len-i-1], i); + } + } + + /** + * Set character at index 'idx' to 'c'. + */ + inline void set(int c, size_t idx) { + assert_lt(idx, len_); + cs_[idx] = c; + } + + /** + * Retrieve constant version of element i. + */ + inline const T& operator[](size_t i) const { + assert_lt(i, len_); + return cs_[i]; + } + + /** + * Retrieve mutable version of element i. + */ + inline T& operator[](size_t i) { + assert_lt(i, len_); + return cs_[i]; + } + + /** + * Retrieve constant version of element i. + */ + inline const T& get(size_t i) const { + assert_lt(i, len_); + return cs_[i]; + } + + /** + * Copy 'sz' bytes from buffer 'b' into this string. memcpy is used, not + * operator=. + */ + virtual void install(const T* b, size_t sz) { + if(sz == 0) return; + resize(sz); + memcpy(cs_, b, sz * sizeof(T)); + } + + /** + * Copy 'sz' bytes from buffer 'b' into this string. memcpy is used, not + * operator=. + */ + virtual void install(const std::basic_string& b) { + size_t sz = b.length(); + if(sz == 0) return; + resize(sz); + memcpy(cs_, b.c_str(), sz * sizeof(T)); + } + + /** + * Copy all bytes from zero-terminated buffer 'b' into this string. + */ + void install(const T* b) { + install(b, strlen(b)); + } + + /** + * Copy 'sz' bytes from buffer 'b' into this string, reversing them + * in the process. + */ + void installReverse(const char* b, size_t sz) { + if(sz == 0) return; + resize(sz); + for(size_t i = 0; i < sz; i++) { + cs_[i] = b[sz-i-1]; + } + len_ = sz; + } + + /** + * Copy 'sz' bytes from buffer 'b' into this string, reversing them + * in the process. + */ + void installReverse(const SString& b) { + installReverse(b.cs_, b.len_); + } + + /** + * Return true iff the two strings are equal. + */ + bool operator==(const SString& o) { + return sstr_eq(*this, o); + } + + /** + * Return true iff the two strings are not equal. + */ + bool operator!=(const SString& o) { + return sstr_neq(*this, o); + } + + /** + * Return true iff this string is less than given string. + */ + bool operator<(const SString& o) { + return sstr_lt(*this, o); + } + + /** + * Return true iff this string is greater than given string. + */ + bool operator>(const SString& o) { + return sstr_gt(*this, o); + } + + /** + * Return true iff this string is less than or equal to given string. + */ + bool operator<=(const SString& o) { + return sstr_leq(*this, o); + } + + /** + * Return true iff this string is greater than or equal to given string. + */ + bool operator>=(const SString& o) { + return sstr_geq(*this, o); + } + + /** + * Reverse the buffer in place. + */ + void reverse() { + for(size_t i = 0; i < (len_ >> 1); i++) { + T tmp = get(i); + set(get(len_-i-1), i); + set(tmp, len_-i-1); + } + } + + /** + * Reverse the buffer in place. + */ + void reverseComplement(int* rcmap) { + size_t mid = len_ >> 1; + for(size_t i = 0; i < (len_ >> 1); i++) { + T tmp = get(i); + set(rcmap[get(len_-i-1)], i); + set(rcmap[tmp], len_-i-1); + } + if (len_ % 2) { + set(rcmap[get(mid)], mid); + } + } + + /** + * Reverse a substring of the buffer in place. + */ + void reverseWindow(size_t off, size_t len) { + assert_leq(off, len_); + assert_leq(off + len, len_); + size_t mid = len >> 1; + for(size_t i = 0; i < mid; i++) { + T tmp = get(off+i); + set(get(off+len-i-1), off+i); + set(tmp, off+len-i-1); + } + } + + /** + * Set the first len elements of the buffer to el. + */ + void fill(size_t len, const T& el) { + assert_leq(len, len_); + for(size_t i = 0; i < len; i++) { + set(el, i); + } + } + + /** + * Set all elements of the buffer to el. + */ + void fill(const T& el) { + fill(len_, el); + } + + /** + * Return the length of the string. + */ + inline size_t length() const { return len_; } + + /** + * Clear the buffer. + */ + void clear() { len_ = 0; } + + /** + * Return true iff the buffer is empty. + */ + inline bool empty() const { return len_ == 0; } + + /** + * Put a terminator in the 'len_'th element and then return a + * pointer to the buffer. Useful for printing. + */ + const char* toZBufXForm(const char *xform) const { + ASSERT_ONLY(size_t xformElts = strlen(xform)); + // Lazily allocate space for print buffer + if(printcs_ == NULL) { + const_cast(printcs_) = new char[len_+1]; + } + char* printcs = const_cast(printcs_); + assert(printcs != NULL); + for(size_t i = 0; i < len_; i++) { + assert_lt(cs_[i], (int)xformElts); + printcs[i] = xform[cs_[i]]; + } + printcs[len_] = 0; + return printcs_; + } + + /** + * Put a terminator in the 'len_'th element and then return a + * pointer to the buffer. Useful for printing. + */ + virtual const T* toZBuf() const { + const_cast(cs_)[len_] = 0; + return cs_; + } + + /** + * Return a const version of the raw buffer. + */ + const T* buf() const { return cs_; } + + /** + * Return a writeable version of the raw buffer. + */ + T* wbuf() { return cs_; } + +protected: + + T *cs_; // +1 so that we have the option of dropping in a terminating "\0" + char *printcs_; // +1 so that we have the option of dropping in a terminating "\0" + size_t len_; // # elements +}; + +/** + * Simple string class with backing memory whose size is managed by the user + * using the constructor and install() member function. No behind-the-scenes + * reallocation or copying takes place. + */ +class S2bDnaString { + +public: + + explicit S2bDnaString() : + cs_(NULL), + printcs_(NULL), + len_(0) + { } + + explicit S2bDnaString(size_t sz) : + cs_(NULL), + printcs_(NULL), + len_(0) + { + resize(sz); + } + + /** + * Copy another object of the same class. + */ + S2bDnaString(const S2bDnaString& o) : + cs_(NULL), + printcs_(NULL), + len_(0) + { + *this = o; + } + + /** + * Create an SStringExpandable from a std::basic_string of the + * appropriate type. + */ + explicit S2bDnaString( + const std::basic_string& str, + bool chars = false, + bool colors = false) : + cs_(NULL), + printcs_(NULL), + len_(0) + { + if(chars) { + if(colors) { + installColors(str.c_str(), str.length()); + } else { + installChars(str.c_str(), str.length()); + } + } else { + install(str.c_str(), str.length()); + } + } + + /** + * Create an SStringExpandable from an array and size. + */ + explicit S2bDnaString( + const char* b, + size_t sz, + bool chars = false, + bool colors = false) : + cs_(NULL), + printcs_(NULL), + len_(0) + { + if(chars) { + if(colors) { + installColors(b, sz); + } else { + installChars(b, sz); + } + } else { + install(b, sz); + } + } + + /** + * Create an SStringFixed from a zero-terminated string. + */ + explicit S2bDnaString( + const char* b, + bool chars = false, + bool colors = false) : + cs_(NULL), + printcs_(NULL), + len_(0) + { + if(chars) { + if(colors) { + installColors(b, strlen(b)); + } else { + installChars(b, strlen(b)); + } + } else { + install(b, strlen(b)); + } + } + + /** + * Destroy the expandable string object. + */ + virtual ~S2bDnaString() { + if(cs_ != NULL) { + delete[] cs_; + cs_ = NULL; + } + if(printcs_ != NULL) { + delete[] printcs_; + printcs_ = NULL; + } + len_ = 0; + } + + /** + * Assignment to other SString. + */ + template + S2bDnaString& operator=(const T& o) { + install(o.c_str(), o.length()); + return *this; + } + + /** + * Assignment from a std::basic_string + */ + template + S2bDnaString& operator=(const std::basic_string& o) { + install(o); + return *this; + } + + /** + * Resizes the string without preserving its contents. + */ + void resize(size_t sz) { + if(cs_ != NULL) { + delete cs_; + cs_ = NULL; + } + if(printcs_ != NULL) { + delete printcs_; + printcs_ = NULL; + } + len_ = sz; + if(sz != 0) { + cs_ = new uint32_t[nwords()]; + } + } + + /** + * Return DNA character corresponding to element 'idx'. + */ + char toChar(size_t idx) const { + int c = (int)get(idx); + assert_range(0, 3, c); + return "ACGT"[c]; + } + + /** + * Return color character corresponding to element 'idx'. + */ + char toColor(size_t idx) const { + int c = (int)get(idx); + assert_range(0, 3, c); + return "0123"[c]; + } + + /** + * Return ith character from the left of either the forward or the + * reverse version of the read. + */ + char windowGet( + size_t i, + bool fw, + size_t depth = 0, + size_t len = 0) const + { + if(len == 0) len = len_; + assert_lt(i, len); + assert_leq(len, len_ - depth); + return fw ? get(depth+i) : get(depth+len-i-1); + } + + /** + * Return ith character from the left of either the forward or the + * reverse-complement version of the read. + */ + template + void windowGet( + T& ret, + bool fw, + size_t depth = 0, + size_t len = 0) const + { + if(len == 0) len = len_; + assert_leq(len, len_ - depth); + ret.resize(len); + for(size_t i = 0; i < len; i++) { + ret.set((fw ? get(depth+i) : get(depth+len-i-1)), i); + } + } + + /** + * Return length in 32-bit words. + */ + size_t nwords() const { + return (len_ + 15) >> 4; + } + + /** + * Set character at index 'idx' to 'c'. + */ + void set(int c, size_t idx) { + assert_lt(idx, len_); + assert_range(0, 3, c); + size_t word = idx >> 4; + size_t bpoff = (idx & 15) << 1; + cs_[word] = cs_[word] & ~(uint32_t)(3 << bpoff); + cs_[word] = cs_[word] | (uint32_t)(c << bpoff); + } + + /** + * Set character at index 'idx' to DNA char 'c'. + */ + void setChar(int c, size_t idx) { + assert_in(toupper(c), "ACGT"); + int bp = asc2dna[c]; + set(bp, idx); + } + + /** + * Set character at index 'idx' to color char 'c'. + */ + void setColor(int c, size_t idx) { + assert_in(toupper(c), "0123"); + int co = asc2col[c]; + set(co, idx); + } + + /** + * Set the ith 32-bit word to given word. + */ + void setWord(uint32_t w, size_t i) { + assert_lt(i, nwords()); + cs_[i] = w; + } + + /** + * Retrieve constant version of element i. + */ + char operator[](size_t i) const { + assert_lt(i, len_); + return get(i); + } + + /** + * Retrieve constant version of element i. + */ + char get(size_t i) const { + assert_lt(i, len_); + size_t word = i >> 4; + size_t bpoff = (i & 15) << 1; + return (char)((cs_[word] >> bpoff) & 3); + } + + /** + * Copy packed words from string 'b' into this packed string. + */ + void install(const uint32_t* b, size_t sz) { + if(sz == 0) return; + resize(sz); + memcpy(cs_, b, sizeof(uint32_t)*nwords()); + } + + /** + * Copy 'sz' DNA characters encoded as integers from buffer 'b' into this + * packed string. + */ + void install(const char* b, size_t sz) { + if(sz == 0) return; + resize(sz); + size_t wordi = 0; + for(size_t i = 0; i < sz; i += 16) { + uint32_t word = 0; + for(int j = 0; j < 16 && (size_t)(i+j) < sz; j++) { + uint32_t bp = (int)b[i+j]; + uint32_t shift = (uint32_t)j << 1; + assert_range(0, 3, (int)bp); + word |= (bp << shift); + } + cs_[wordi++] = word; + } + } + + /** + * Copy 'sz' DNA characters from buffer 'b' into this packed string. + */ + void installChars(const char* b, size_t sz) { + if(sz == 0) return; + resize(sz); + size_t wordi = 0; + for(size_t i = 0; i < sz; i += 16) { + uint32_t word = 0; + for(int j = 0; j < 16 && (size_t)(i+j) < sz; j++) { + char c = b[i+j]; + assert_in(toupper(c), "ACGT"); + int bp = asc2dna[(int)c]; + assert_range(0, 3, (int)bp); + uint32_t shift = (uint32_t)j << 1; + word |= (bp << shift); + } + cs_[wordi++] = word; + } + } + + /** + * Copy 'sz' color characters from buffer 'b' into this packed string. + */ + void installColors(const char* b, size_t sz) { + if(sz == 0) return; + resize(sz); + size_t wordi = 0; + for(size_t i = 0; i < sz; i += 16) { + uint32_t word = 0; + for(int j = 0; j < 16 && (size_t)(i+j) < sz; j++) { + char c = b[i+j]; + assert_in(c, "0123"); + int bp = asc2col[(int)c]; + assert_range(0, 3, (int)bp); + uint32_t shift = (uint32_t)j << 1; + word |= (bp << shift); + } + cs_[wordi++] = word; + } + } + + /** + * Copy 'sz' DNA characters from buffer 'b' into this packed string. + */ + void install(const char* b) { + install(b, strlen(b)); + } + + /** + * Copy 'sz' DNA characters from buffer 'b' into this packed string. + */ + void installChars(const char* b) { + installChars(b, strlen(b)); + } + + /** + * Copy 'sz' DNA characters from buffer 'b' into this packed string. + */ + void installColors(const char* b) { + installColors(b, strlen(b)); + } + + /** + * Copy 'sz' DNA characters from buffer 'b' into this packed string. + */ + void install(const std::basic_string& b) { + install(b.c_str(), b.length()); + } + + /** + * Copy 'sz' DNA characters from buffer 'b' into this packed string. + */ + void installChars(const std::basic_string& b) { + installChars(b.c_str(), b.length()); + } + + /** + * Copy 'sz' DNA characters from buffer 'b' into this packed string. + */ + void installColors(const std::basic_string& b) { + installColors(b.c_str(), b.length()); + } + + /** + * Copy 'sz' bytes from buffer 'b' into this string, reversing them + * in the process. + */ + void installReverse(const char* b, size_t sz) { + resize(sz); + if(sz == 0) return; + size_t wordi = 0; + size_t bpi = 0; + cs_[0] = 0; + for(size_t i =sz; i > 0; i--) { + assert_range(0, 3, (int)b[i-1]); + cs_[wordi] |= ((int)b[i-1] << (bpi<<1)); + if(bpi == 15) { + wordi++; + cs_[wordi] = 0; + bpi = 0; + } else bpi++; + } + } + + /** + * Copy all chars from buffer of DNA characters 'b' into this string, + * reversing them in the process. + */ + void installReverse(const char* b) { + installReverse(b, strlen(b)); + } + + /** + * Copy 'sz' bytes from buffer of DNA characters 'b' into this string, + * reversing them in the process. + */ + void installReverseChars(const char* b, size_t sz) { + resize(sz); + if(sz == 0) return; + size_t wordi = 0; + size_t bpi = 0; + cs_[0] = 0; + for(size_t i =sz; i > 0; i--) { + char c = b[i-1]; + assert_in(toupper(c), "ACGT"); + int bp = asc2dna[(int)c]; + assert_range(0, 3, bp); + cs_[wordi] |= (bp << (bpi<<1)); + if(bpi == 15) { + wordi++; + cs_[wordi] = 0; + bpi = 0; + } else bpi++; + } + } + + /** + * Copy all chars from buffer of DNA characters 'b' into this string, + * reversing them in the process. + */ + void installReverseChars(const char* b) { + installReverseChars(b, strlen(b)); + } + + /** + * Copy 'sz' bytes from buffer of color characters 'b' into this string, + * reversing them in the process. + */ + void installReverseColors(const char* b, size_t sz) { + resize(sz); + if(sz == 0) return; + size_t wordi = 0; + size_t bpi = 0; + cs_[0] = 0; + for(size_t i =sz; i > 0; i--) { + char c = b[i-1]; + assert_in(c, "0123"); + int bp = asc2col[(int)c]; + assert_range(0, 3, bp); + cs_[wordi] |= (bp << (bpi<<1)); + if(bpi == 15) { + wordi++; + cs_[wordi] = 0; + bpi = 0; + } else bpi++; + } + } + + /** + * Copy all chars from buffer of color characters 'b' into this string, + * reversing them in the process. + */ + void installReverseColors(const char* b) { + installReverseColors(b, strlen(b)); + } + + /** + * Copy 'sz' bytes from buffer 'b' into this string, reversing them + * in the process. + */ + void installReverse(const S2bDnaString& b) { + resize(b.len_); + if(b.len_ == 0) return; + size_t wordi = 0; + size_t bpi = 0; + size_t wordb = b.nwords()-1; + size_t bpb = (b.len_-1) & 15; + cs_[0] = 0; + for(size_t i = b.len_; i > 0; i--) { + int bbp = (int)((b[wordb] >> (bpb << 1)) & 3); + assert_range(0, 3, bbp); + cs_[wordi] |= (bbp << (bpi << 1)); + if(bpi == 15) { + wordi++; + cs_[wordi] = 0; + bpi = 0; + } else bpi++; + if(bpb == 0) { + wordb--; + bpi = 15; + } else bpi--; + } + } + + /** + * Return true iff the two strings are equal. + */ + bool operator==(const S2bDnaString& o) { + return sstr_eq(*this, o); + } + + /** + * Return true iff the two strings are not equal. + */ + bool operator!=(const S2bDnaString& o) { + return sstr_neq(*this, o); + } + + /** + * Return true iff this string is less than given string. + */ + bool operator<(const S2bDnaString& o) { + return sstr_lt(*this, o); + } + + /** + * Return true iff this string is greater than given string. + */ + bool operator>(const S2bDnaString& o) { + return sstr_gt(*this, o); + } + + /** + * Return true iff this string is less than or equal to given string. + */ + bool operator<=(const S2bDnaString& o) { + return sstr_leq(*this, o); + } + + /** + * Return true iff this string is greater than or equal to given string. + */ + bool operator>=(const S2bDnaString& o) { + return sstr_geq(*this, o); + } + + /** + * Reverse the 2-bit encoded DNA string in-place. + */ + void reverse() { + if(len_ <= 1) return; + size_t wordf = nwords()-1; + size_t bpf = (len_-1) & 15; + size_t wordi = 0; + size_t bpi = 0; + while(wordf > wordi || (wordf == wordi && bpf > bpi)) { + int f = (cs_[wordf] >> (bpf << 1)) & 3; + int i = (cs_[wordi] >> (bpi << 1)) & 3; + cs_[wordf] &= ~(uint32_t)(3 << (bpf << 1)); + cs_[wordi] &= ~(uint32_t)(3 << (bpi << 1)); + cs_[wordf] |= (uint32_t)(i << (bpf << 1)); + cs_[wordi] |= (uint32_t)(f << (bpi << 1)); + if(bpf == 0) { + bpf = 15; + wordf--; + } else bpf--; + if(bpi == 15) { + bpi = 0; + wordi++; + } else bpi++; + } + } + + /** + * Reverse a substring of the buffer in place. + */ + void reverseWindow(size_t off, size_t len) { + assert_leq(off, len_); + assert_leq(off+len, len_); + if(len <= 1) return; + size_t wordf = (off+len-1) >> 4; + size_t bpf = (off+len-1) & 15; + size_t wordi = (off ) >> 4; + size_t bpi = (off ) & 15; + while(wordf > wordi || (wordf == wordi && bpf > bpi)) { + int f = (cs_[wordf] >> (bpf << 1)) & 3; + int i = (cs_[wordi] >> (bpi << 1)) & 3; + cs_[wordf] &= ~(uint32_t)(3 << (bpf << 1)); + cs_[wordi] &= ~(uint32_t)(3 << (bpi << 1)); + cs_[wordf] |= (uint32_t)(i << (bpf << 1)); + cs_[wordi] |= (uint32_t)(f << (bpi << 1)); + if(bpf == 0) { + bpf = 15; + wordf--; + } else bpf--; + if(bpi == 15) { + bpi = 0; + wordi++; + } else bpi++; + } + } + + + /** + * Set the first len elements of the buffer to el. + */ + void fill(size_t len, char el) { + assert_leq(len, len_); + assert_range(0, 3, (int)el); + size_t word = 0; + if(len > 32) { + // Copy el throughout block + uint32_t bl = (uint32_t)el; + bl |= (bl << 2); + bl |= (bl << 4); + bl |= (bl << 8); + bl |= (bl << 16); + // Fill with blocks + size_t blen = len >> 4; + for(; word < blen; word++) { + cs_[word] = bl; + } + len = len & 15; + } + size_t bp = 0; + for(size_t i = 0; i < len; i++) { + cs_[word] &= ~(uint32_t)(3 << (bp << 1)); + cs_[word] |= (uint32_t)(el << (bp << 1)); + if(bp == 15) { + word++; + bp = 0; + } else bp++; + } + } + + /** + * Set all elements of the buffer to el. + */ + void fill(char el) { + fill(len_, el); + } + + /** + * Return the ith character in the window defined by fw, color, depth and + * len. + */ + char windowGetDna( + size_t i, + bool fw, + bool color, + size_t depth = 0, + size_t len = 0) const + { + if(len == 0) len = len_; + assert_lt(i, len); + assert_leq(len, len_ - depth); + if(fw) { + return get(depth+i); + } else { + return + color ? + get(depth+len-i-1) : + compDna(get(depth+len-i-1)); + } + } + + /** + * Fill the given DNA buffer with the substring specified by fw, + * color, depth and len. + */ + template + void windowGetDna( + T& buf, + bool fw, + bool color, + size_t depth = 0, + size_t len = 0) const + { + if(len == 0) len = len_; + assert_leq(len, len_ - depth); + buf.resize(len); + for(size_t i = 0; i < len; i++) { + buf.set( + (fw ? + get(depth+i) : + (color ? + get(depth+len-i-1) : + compDna(get(depth+len-i-1)))), i); + } + } + + /** + * Return the length of the string. + */ + inline size_t length() const { return len_; } + + /** + * Clear the buffer. + */ + void clear() { len_ = 0; } + + /** + * Return true iff the buffer is empty. + */ + inline bool empty() const { return len_ == 0; } + + /** + * Return a const version of the raw buffer. + */ + const uint32_t* buf() const { return cs_; } + + /** + * Return a writeable version of the raw buffer. + */ + uint32_t* wbuf() { return cs_; } + + /** + * Note: the size of the string once it's stored in the print buffer is 4 + * times as large as the string as stored in compact 2-bit-per-char words. + */ + const char* toZBuf() const { + if(printcs_ == NULL) { + const_cast(printcs_) = new char[len_+1]; + } + char *printcs = const_cast(printcs_); + size_t word = 0, bp = 0; + for(size_t i = 0; i < len_; i++) { + int c = (cs_[word] >> (bp << 1)) & 3; + printcs[i] = "ACGT"[c]; + if(bp == 15) { + word++; + bp = 0; + } else bp++; + } + printcs[len_] = '\0'; + return printcs_; + } + +protected: + + uint32_t *cs_; // 2-bit packed words + char *printcs_; + size_t len_; // # elements +}; + +/** + * Simple string class with backing memory that automatically expands as needed. + */ +template +class SStringExpandable { + +public: + + explicit SStringExpandable() : + cs_(NULL), + printcs_(NULL), + len_(0), + sz_(0) + { } + + explicit SStringExpandable(size_t sz) : + cs_(NULL), + printcs_(NULL), + len_(0), + sz_(0) + { + expandNoCopy(sz); + } + + /** + * Create an SStringExpandable from another SStringExpandable. + */ + SStringExpandable(const SStringExpandable& o) : + cs_(NULL), + printcs_(NULL), + len_(0), + sz_(0) + { + *this = o; + } + + /** + * Create an SStringExpandable from a std::basic_string of the + * appropriate type. + */ + explicit SStringExpandable(const std::basic_string& str) : + cs_(NULL), + printcs_(NULL), + len_(0), + sz_(0) + { + install(str.c_str(), str.length()); + } + + /** + * Create an SStringExpandable from an array and size. + */ + explicit SStringExpandable(const T* b, size_t sz) : + cs_(NULL), + printcs_(NULL), + len_(0), + sz_(0) + { + install(b, sz); + } + + /** + * Create an SStringExpandable from a zero-terminated array. + */ + explicit SStringExpandable(const T* b) : + cs_(NULL), + printcs_(NULL), + len_(0), + sz_(0) + { + install(b, strlen(b)); + } + + /** + * Destroy the expandable string object. + */ + virtual ~SStringExpandable() { + if(cs_ != NULL) { + delete[] cs_; + cs_ = NULL; + } + if(printcs_ != NULL) { + delete[] printcs_; + printcs_ = NULL; + } + sz_ = len_ = 0; + } + + /** + * Return ith character from the left of either the forward or the + * reverse-complement version of the read. + */ + T windowGet( + size_t i, + bool fw, + size_t depth = 0, + size_t len = 0) const + { + if(len == 0) len = len_; + assert_lt(i, len); + assert_leq(len, len_ - depth); + return fw ? cs_[depth+i] : cs_[depth+len-i-1]; + } + + /** + * Return ith character from the left of either the forward or the + * reverse-complement version of the read. + */ + void windowGet( + T& ret, + bool fw, + size_t depth = 0, + size_t len = 0) const + { + if(len == 0) len = len_; + assert_leq(len, len_ - depth); + for(size_t i = 0; i < len; i++) { + ret.append(fw ? cs_[depth+i] : cs_[depth+len-i-1]); + } + } + + /** + * Assignment to other SStringFixed. + */ + SStringExpandable& operator=(const SStringExpandable& o) { + install(o.cs_, o.len_); + return *this; + } + + /** + * Assignment from a std::basic_string + */ + SStringExpandable& operator=(const std::basic_string& o) { + install(o.c_str(), o.length()); + return *this; + } + + /** + * Insert char c before position 'idx'; slide subsequent chars down. + */ + void insert(const T& c, size_t idx) { + assert_lt(idx, len_); + if(sz_ < len_ + 1) expandCopy((len_ + 1 + S) * M); + len_++; + // Move everyone down by 1 + // len_ is the *new* length + for(size_t i = len_; i > idx+1; i--) { + cs_[i-1] = cs_[i-2]; + } + cs_[idx] = c; + } + + /** + * Set character at index 'idx' to 'c'. + */ + void set(int c, size_t idx) { + assert_lt(idx, len_); + cs_[idx] = c; + } + + /** + * Append char c. + */ + void append(const T& c) { + if(sz_ < len_ + 1) expandCopy((len_ + 1 + S) * M); + cs_[len_++] = c; + } + + /** + * Delete char at position 'idx'; slide subsequent chars up. + */ + void remove(size_t idx) { + assert_lt(idx, len_); + assert_gt(len_, 0); + for(size_t i = idx; i < len_-1; i++) { + cs_[i] = cs_[i+1]; + } + len_--; + } + + /** + * Retrieve constant version of element i. + */ + const T& operator[](size_t i) const { + assert_lt(i, len_); + return cs_[i]; + } + + /** + * Retrieve mutable version of element i. + */ + T& operator[](size_t i) { + assert_lt(i, len_); + return cs_[i]; + } + + /** + * Retrieve constant version of element i. + */ + const T& get(size_t i) const { + assert_lt(i, len_); + return cs_[i]; + } + + /** + * Copy 'sz' bytes from buffer 'b' into this string. + */ + virtual void install(const T* b, size_t sz) { + if(sz_ < sz) expandNoCopy((sz + S) * M); + memcpy(cs_, b, sz * sizeof(T)); + len_ = sz; + } + + + /** + * Copy all bytes from zero-terminated buffer 'b' into this string. + */ + void install(const T* b) { install(b, strlen(b)); } + + /** + * Copy 'sz' bytes from buffer 'b' into this string, reversing them + * in the process. + */ + void installReverse(const char* b, size_t sz) { + if(sz_ < sz) expandNoCopy((sz + S) * M); + for(size_t i = 0; i < sz; i++) { + cs_[i] = b[sz-i-1]; + } + len_ = sz; + } + + /** + * Copy 'sz' bytes from buffer 'b' into this string, reversing them + * in the process. + */ + void installReverse(const SStringExpandable& b) { + if(sz_ < b.len_) expandNoCopy((b.len_ + S) * M); + for(size_t i = 0; i < b.len_; i++) { + cs_[i] = b.cs_[b.len_ - i - 1]; + } + len_ = b.len_; + } + + /** + * Return true iff the two strings are equal. + */ + bool operator==(const SStringExpandable& o) { + return sstr_eq(*this, o); + } + + /** + * Return true iff the two strings are not equal. + */ + bool operator!=(const SStringExpandable& o) { + return sstr_neq(*this, o); + } + + /** + * Return true iff this string is less than given string. + */ + bool operator<(const SStringExpandable& o) { + return sstr_lt(*this, o); + } + + /** + * Return true iff this string is greater than given string. + */ + bool operator>(const SStringExpandable& o) { + return sstr_gt(*this, o); + } + + /** + * Return true iff this string is less than or equal to given string. + */ + bool operator<=(const SStringExpandable& o) { + return sstr_leq(*this, o); + } + + /** + * Return true iff this string is greater than or equal to given string. + */ + bool operator>=(const SStringExpandable& o) { + return sstr_geq(*this, o); + } + + /** + * Reverse the buffer in place. + */ + void reverse() { + for(size_t i = 0; i < (len_ >> 1); i++) { + T tmp = get(i); + set(get(len_-i-1), i); + set(tmp, len_-i-1); + } + } + + /** + * Reverse a substring of the buffer in place. + */ + void reverseWindow(size_t off, size_t len) { + assert_leq(off, len_); + assert_leq(off + len, len_); + size_t mid = len >> 1; + for(size_t i = 0; i < mid; i++) { + T tmp = get(off+i); + set(get(off+len-i-1), off+i); + set(tmp, off+len-i-1); + } + } + + /** + * Simply resize the buffer. If the buffer is resized to be + * longer, the newly-added elements will contain garbage and should + * be initialized immediately. + */ + void resize(size_t len) { + if(sz_ < len) expandCopy((len + S) * M); + len_ = len; + } + + /** + * Simply resize the buffer. If the buffer is resized to be + * longer, new elements will be initialized with 'el'. + */ + void resize(size_t len, const T& el) { + if(sz_ < len) expandCopy((len + S) * M); + if(len > len_) { + for(size_t i = len_; i < len; i++) { + cs_[i] = el; + } + } + len_ = len; + } + + /** + * Set the first len elements of the buffer to el. + */ + void fill(size_t len, const T& el) { + assert_leq(len, len_); + for(size_t i = 0; i < len; i++) { + cs_[i] = el; + } + } + + /** + * Set all elements of the buffer to el. + */ + void fill(const T& el) { + fill(len_, el); + } + + /** + * Trim len characters from the beginning of the string. + */ + void trimBegin(size_t len) { + assert_leq(len, len_); + if(len == len_) { + len_ = 0; return; + } + for(size_t i = 0; i < len_-len; i++) { + cs_[i] = cs_[i+len]; + } + len_ -= len; + } + + /** + * Trim len characters from the end of the string. + */ + void trimEnd(size_t len) { + if(len >= len_) len_ = 0; + else len_ -= len; + } + + /** + * Copy 'sz' bytes from buffer 'b' into this string. + */ + void append(const T* b, size_t sz) { + if(sz_ < len_ + sz) expandCopy((len_ + sz + S) * M); + memcpy(cs_ + len_, b, sz * sizeof(T)); + len_ += sz; + } + + /** + * Copy bytes from zero-terminated buffer 'b' into this string. + */ + void append(const T* b) { + append(b, strlen(b)); + } + + /** + * Return the length of the string. + */ + size_t length() const { return len_; } + + /** + * Clear the buffer. + */ + void clear() { len_ = 0; } + + /** + * Return true iff the buffer is empty. + */ + bool empty() const { return len_ == 0; } + + /** + * Put a terminator in the 'len_'th element and then return a + * pointer to the buffer. Useful for printing. + */ + const char* toZBufXForm(const char *xform) const { + ASSERT_ONLY(size_t xformElts = strlen(xform)); + if(empty()) { + const_cast(zero_) = 0; + return &zero_; + } + char* printcs = const_cast(printcs_); + // Lazily allocate space for print buffer + for(size_t i = 0; i < len_; i++) { + assert_lt(cs_[i], (int)xformElts); + printcs[i] = xform[(int)cs_[i]]; + } + printcs[len_] = 0; + return printcs_; + } + + /** + * Put a terminator in the 'len_'th element and then return a + * pointer to the buffer. Useful for printing. + */ + virtual const T* toZBuf() const { + if(empty()) { + const_cast(zeroT_) = 0; + return &zeroT_; + } + assert_leq(len_, sz_); + const_cast(cs_)[len_] = 0; + return cs_; + } + + /** + * Return true iff this DNA string matches the given nucleotide + * character string. + */ + bool eq(const char *str) const { + const char *self = toZBuf(); + return strcmp(str, self) == 0; + } + + /** + * Return a const version of the raw buffer. + */ + const T* buf() const { return cs_; } + + /** + * Return a writeable version of the raw buffer. + */ + T* wbuf() { return cs_; } + +protected: + /** + * Allocate new, bigger buffer and copy old contents into it. If + * requested size can be accommodated by current buffer, do nothing. + */ + void expandCopy(size_t sz) { + if(sz_ >= sz) return; // done! + T *tmp = new T[sz + 1]; + char *ptmp = new char[sz + 1]; + if(cs_ != NULL) { + memcpy(tmp, cs_, sizeof(T)*len_); + delete[] cs_; + } + if(printcs_ != NULL) { + memcpy(ptmp, printcs_, sizeof(char)*len_); + delete[] printcs_; + } + cs_ = tmp; + printcs_ = ptmp; + sz_ = sz; + } + + /** + * Allocate new, bigger buffer. If requested size can be + * accommodated by current buffer, do nothing. + */ + void expandNoCopy(size_t sz) { + if(sz_ >= sz) return; // done! + if(cs_ != NULL) delete[] cs_; + if(printcs_ != NULL) delete[] printcs_; + cs_ = new T[sz + 1]; + printcs_ = new char[sz + 1]; + sz_ = sz; + } + + T *cs_; // +1 so that we have the option of dropping in a terminating "\0" + char *printcs_; // +1 so that we have the option of dropping in a terminating "\0" + char zero_; // 0 terminator for empty string + T zeroT_; // 0 terminator for empty string + size_t len_; // # filled-in elements + size_t sz_; // size capacity of cs_ +}; + +/** + * Simple string class with in-object storage. + * + * All copies induced by, e.g., operator=, the copy constructor, + * install() and append(), are shallow (using memcpy/sizeof). If deep + * copies are needed, use a different class. + * + * Reading from an uninitialized element results in an assert as long + * as NDEBUG is not defined. If NDEBUG is defined, the result is + * undefined. + */ +template +class SStringFixed { +public: + explicit SStringFixed() : len_(0) { } + + /** + * Create an SStringFixed from another SStringFixed. + */ + SStringFixed(const SStringFixed& o) { + *this = o; + } + + /** + * Create an SStringFixed from another SStringFixed. + */ + explicit SStringFixed(const std::basic_string& str) { + install(str.c_str(), str.length()); + } + + /** + * Create an SStringFixed from an array and size. + */ + explicit SStringFixed(const T* b, size_t sz) { + install(b, sz); + } + + /** + * Create an SStringFixed from a zero-terminated string. + */ + explicit SStringFixed(const T* b) { + install(b, strlen(b)); + } + + virtual ~SStringFixed() { } // C++ needs this + + /** + * Retrieve constant version of element i. + */ + inline const T& operator[](size_t i) const { + return get(i); + } + + /** + * Retrieve mutable version of element i. + */ + inline T& operator[](size_t i) { + return get(i); + } + + /** + * Retrieve constant version of element i. + */ + inline const T& get(size_t i) const { + assert_lt(i, len_); + return cs_[i]; + } + + /** + * Retrieve mutable version of element i. + */ + inline T& get(size_t i) { + assert_lt(i, len_); + return cs_[i]; + } + + /** + * Return ith character from the left of either the forward or the + * reverse-complement version of the read. + */ + T windowGet( + size_t i, + bool fw, + size_t depth = 0, + size_t len = 0) const + { + if(len == 0) len = len_; + assert_lt(i, len); + assert_leq(len, len_ - depth); + return fw ? cs_[depth+i] : cs_[depth+len-i-1]; + } + + /** + * Return ith character from the left of either the forward or the + * reverse-complement version of the read. + */ + void windowGet( + T& ret, + bool fw, + size_t depth = 0, + size_t len = 0) const + { + if(len == 0) len = len_; + assert_leq(len, len_ - depth); + for(size_t i = 0; i < len; i++) { + ret.append(fw ? cs_[depth+i] : cs_[depth+len-i-1]); + } + } + + /** + * Assignment to other SStringFixed. + */ + SStringFixed& operator=(const SStringFixed& o) { + install(o.cs_, o.len_); + return *this; + } + + /** + * Assignment from a std::basic_string + */ + SStringFixed& operator=(const std::basic_string& o) { + install(o); + return *this; + } + + /** + * Insert char c before position 'idx'; slide subsequent chars down. + */ + void insert(const T& c, size_t idx) { + assert_lt(len_, S); + assert_lt(idx, len_); + // Move everyone down by 1 + for(int i = len_; i > idx; i--) { + cs_[i] = cs_[i-1]; + } + cs_[idx] = c; + len_++; + } + + /** + * Set character at index 'idx' to 'c'. + */ + void set(int c, size_t idx) { + assert_lt(idx, len_); + cs_[idx] = c; + } + + /** + * Append char c. + */ + void append(const T& c) { + assert_lt(len_, S); + cs_[len_++] = c; + } + + /** + * Delete char at position 'idx'; slide subsequent chars up. + */ + void remove(size_t idx) { + assert_lt(idx, len_); + assert_gt(len_, 0); + for(size_t i = idx; i < len_-1; i++) { + cs_[i] = cs_[i+1]; + } + len_--; + } + + /** + * Copy 'sz' bytes from buffer 'b' into this string. + */ + virtual void install(const T* b, size_t sz) { + assert_leq(sz, S); + memcpy(cs_, b, sz * sizeof(T)); + len_ = sz; + } + + /** + * Copy all bytes from zero-terminated buffer 'b' into this string. + */ + void install(const T* b) { install(b, strlen(b)); } + + /** + * Copy 'sz' bytes from buffer 'b' into this string, reversing them + * in the process. + */ + void installReverse(const char* b, size_t sz) { + assert_leq(sz, S); + for(size_t i = 0; i < sz; i++) { + cs_[i] = b[sz-i-1]; + } + len_ = sz; + } + + /** + * Copy 'sz' bytes from buffer 'b' into this string, reversing them + * in the process. + */ + void installReverse(const SStringFixed& b) { + assert_leq(b.len_, S); + for(size_t i = 0; i < b.len_; i++) { + cs_[i] = b.cs_[b.len_ - i - 1]; + } + len_ = b.len_; + } + + /** + * Return true iff the two strings are equal. + */ + bool operator==(const SStringFixed& o) { + return sstr_eq(*this, o); + } + + /** + * Return true iff the two strings are not equal. + */ + bool operator!=(const SStringFixed& o) { + return sstr_neq(*this, o); + } + + /** + * Return true iff this string is less than given string. + */ + bool operator<(const SStringFixed& o) { + return sstr_lt(*this, o); + } + + /** + * Return true iff this string is greater than given string. + */ + bool operator>(const SStringFixed& o) { + return sstr_gt(*this, o); + } + + /** + * Return true iff this string is less than or equal to given string. + */ + bool operator<=(const SStringFixed& o) { + return sstr_leq(*this, o); + } + + /** + * Return true iff this string is greater than or equal to given string. + */ + bool operator>=(const SStringFixed& o) { + return sstr_geq(*this, o); + } + + /** + * Reverse the buffer in place. + */ + void reverse() { + for(size_t i = 0; i < (len_ >> 1); i++) { + T tmp = get(i); + set(get(len_-i-1), i); + set(tmp, len_-i-1); + } + } + + /** + * Reverse a substring of the buffer in place. + */ + void reverseWindow(size_t off, size_t len) { + assert_leq(off, len_); + assert_leq(off + len, len_); + size_t mid = len >> 1; + for(size_t i = 0; i < mid; i++) { + T tmp = get(off+i); + set(get(off+len-i-1), off+i); + set(tmp, off+len-i-1); + } + } + + /** + * Simply resize the buffer. If the buffer is resized to be + * longer, the newly-added elements will contain garbage and should + * be initialized immediately. + */ + void resize(size_t len) { + assert_lt(len, S); + len_ = len; + } + + /** + * Simply resize the buffer. If the buffer is resized to be + * longer, new elements will be initialized with 'el'. + */ + void resize(size_t len, const T& el) { + assert_lt(len, S); + if(len > len_) { + for(size_t i = len_; i < len; i++) { + cs_[i] = el; + } + } + len_ = len; + } + + /** + * Set the first len elements of the buffer to el. + */ + void fill(size_t len, const T& el) { + assert_leq(len, len_); + for(size_t i = 0; i < len; i++) { + cs_[i] = el; + } + } + + /** + * Set all elements of the buffer to el. + */ + void fill(const T& el) { + fill(len_, el); + } + + /** + * Trim len characters from the beginning of the string. + */ + void trimBegin(size_t len) { + assert_leq(len, len_); + if(len == len_) { + len_ = 0; return; + } + for(size_t i = 0; i < len_-len; i++) { + cs_[i] = cs_[i+len]; + } + len_ -= len; + } + + /** + * Trim len characters from the end of the string. + */ + void trimEnd(size_t len) { + if(len >= len_) len_ = 0; + else len_ -= len; + } + + /** + * Copy 'sz' bytes from buffer 'b' into this string. + */ + void append(const T* b, size_t sz) { + assert_leq(sz + len_, S); + memcpy(cs_ + len_, b, sz * sizeof(T)); + len_ += sz; + } + + /** + * Copy bytes from zero-terminated buffer 'b' into this string. + */ + void append(const T* b) { + append(b, strlen(b)); + } + + /** + * Return the length of the string. + */ + size_t length() const { return len_; } + + /** + * Clear the buffer. + */ + void clear() { len_ = 0; } + + /** + * Return true iff the buffer is empty. + */ + bool empty() const { return len_ == 0; } + + /** + * Put a terminator in the 'len_'th element and then return a + * pointer to the buffer. Useful for printing. + */ + virtual const T* toZBuf() const { + const_cast(cs_)[len_] = 0; + return cs_; + } + + /** + * Return true iff this DNA string matches the given nucleotide + * character string. + */ + bool eq(const char *str) const { + const char *self = toZBuf(); + return strcmp(str, self) == 0; + } + + /** + * Put a terminator in the 'len_'th element and then return a + * pointer to the buffer. Useful for printing. + */ + const char* toZBufXForm(const char *xform) const { + ASSERT_ONLY(size_t xformElts = strlen(xform)); + char* printcs = const_cast(printcs_); + for(size_t i = 0; i < len_; i++) { + assert_lt(cs_[i], (int)xformElts); + printcs[i] = xform[cs_[i]]; + } + printcs[len_] = 0; + return printcs_; + } + + /** + * Return a const version of the raw buffer. + */ + const T* buf() const { return cs_; } + + /** + * Return a writeable version of the raw buffer. + */ + T* wbuf() { return cs_; } + +protected: + T cs_[S+1]; // +1 so that we have the option of dropping in a terminating "\0" + char printcs_[S+1]; // +1 so that we have the option of dropping in a terminating "\0" + size_t len_; +}; + +// +// Stream put operators +// + +template +std::ostream& operator<< (std::ostream& os, const SStringExpandable& str) { + os << str.toZBuf(); + return os; +} + +template +std::ostream& operator<< (std::ostream& os, const SStringFixed& str) { + os << str.toZBuf(); + return os; +} + +extern uint8_t asc2dna[]; +extern uint8_t asc2col[]; + +extern uint8_t asc2dna_3N[2][256]; + + + +/** + * Encapsulates a fixed-length DNA string with characters encoded as + * chars. Only capable of encoding A, C, G, T and N. The length is + * specified via the template parameter S. + */ +template +class SDnaStringFixed : public SStringFixed { +public: + + explicit SDnaStringFixed() : SStringFixed() { } + + /** + * Create an SStringFixed from another SStringFixed. + */ + SDnaStringFixed(const SDnaStringFixed& o) : + SStringFixed(o) { } + + /** + * Create an SStringFixed from a C++ basic_string. + */ + explicit SDnaStringFixed(const std::basic_string& str) : + SStringFixed(str) { } + + /** + * Create an SStringFixed from an array and size. + */ + explicit SDnaStringFixed(const char* b, size_t sz) : + SStringFixed(b, sz) { } + + /** + * Create an SStringFixed from a zero-terminated string. + */ + explicit SDnaStringFixed( + const char* b, + bool chars = false, + bool colors = false) : + SStringFixed() + { + if(chars) { + if(colors) { + installColors(b, strlen(b)); + } else { + installChars(b, strlen(b)); + } + } else { + install(b, strlen(b)); + } + } + + virtual ~SDnaStringFixed() { } // C++ needs this + + /** + * Copy 'sz' bytes from buffer 'b' into this string, reverse- + * complementing them in the process, assuming an encoding where + * 0=A, 1=C, 2=G, 3=T, 4=N. + */ + void installReverseComp(const char* b, size_t sz) { + assert_leq(sz, S); + for(size_t i = 0; i < sz; i++) { + this->cs_[i] = (b[sz-i-1] == 4 ? 4 : b[sz-i-1] ^ 3); + } + this->len_ = sz; + } + + /** + * Copy 'sz' bytes from buffer 'b' into this string, reverse- + * complementing them in the process, assuming an encoding where + * 0=A, 1=C, 2=G, 3=T, 4=N. + */ + void installReverseComp(const SDnaStringFixed& b) { + assert_leq(b.len_, S); + for(size_t i = 0; i < b.len_; i++) { + this->cs_[i] = (b.cs_[b.len_-i-1] == 4 ? 4 : b.cs_[b.len_-i-1] ^ 3); + } + this->len_ = b.len_; + } + + /** + * Either reverse or reverse-complement (depending on "color") this + * DNA buffer in-place. + */ + void reverseComp(bool color = false) { + if(color) { + this->reverse(); + } else { + for(size_t i = 0; i < (this->len_ >> 1); i++) { + char tmp1 = (this->cs_[i] == 4 ? 4 : this->cs_[i] ^ 3); + char tmp2 = (this->cs_[this->len_-i-1] == 4 ? 4 : this->cs_[this->len_-i-1] ^ 3); + this->cs_[i] = tmp2; + this->cs_[this->len_-i-1] = tmp1; + } + // Do middle element iff there are an odd number + if((this->len_ & 1) != 0) { + char tmp = this->cs_[this->len_ >> 1]; + tmp = (tmp == 4 ? 4 : tmp ^ 3); + this->cs_[this->len_ >> 1] = tmp; + } + } + } + + /** + * Copy 'sz' bytes from buffer 'b' into this string. + */ + virtual void install(const char* b, size_t sz) { + assert_leq(sz, S); + memcpy(this->cs_, b, sz); +#ifndef NDEBUG + for(size_t i = 0; i < sz; i++) { + assert_leq(this->cs_[i], 4); + assert_geq(this->cs_[i], 0); + } +#endif + this->len_ = sz; + } + + /** + * Copy buffer 'b' of ASCII DNA characters into normal DNA + * characters. + */ + virtual void installChars(const char* b, size_t sz) { + assert_leq(sz, S); + for(size_t i = 0; i < sz; i++) { + assert_in(toupper(b[i]), "ACGTN-"); + this->cs_[i] = asc2dna[(int)b[i]]; + assert_geq(this->cs_[i], 0); + assert_leq(this->cs_[i], 4); + } + this->len_ = sz; + } + + /** + * Copy buffer 'b' of ASCII color characters into normal DNA + * characters. + */ + virtual void installColors(const char* b, size_t sz) { + assert_leq(sz, S); + for(size_t i = 0; i < sz; i++) { + assert_in(b[i], "0123."); + this->cs_[i] = asc2col[(int)b[i]]; + assert_geq(this->cs_[i], 0); + assert_leq(this->cs_[i], 4); + } + this->len_ = sz; + } + + /** + * Copy C++ string of ASCII DNA characters into normal DNA + * characters. + */ + virtual void installChars(const std::basic_string& str) { + installChars(str.c_str(), str.length()); + } + + /** + * Copy C++ string of ASCII color characters into normal DNA + * characters. + */ + virtual void installColors(const std::basic_string& str) { + installColors(str.c_str(), str.length()); + } + + /** + * Set DNA character at index 'idx' to 'c'. + */ + void set(int c, size_t idx) { + assert_lt(idx, this->len_); + assert_leq(c, 4); + assert_geq(c, 0); + this->cs_[idx] = c; + } + + /** + * Append DNA char c. + */ + void append(const char& c) { + assert_lt(this->len_, S); + assert_leq(c, 4); + assert_geq(c, 0); + this->cs_[this->len_++] = c; + } + + /** + * Set DNA character at index 'idx' to 'c'. + */ + void setChar(char c, size_t idx) { + assert_lt(idx, this->len_); + assert_in(toupper(c), "ACGTN"); + this->cs_[idx] = asc2dna[(int)c]; + } + + /** + * Append DNA character. + */ + void appendChar(char c) { + assert_lt(this->len_, S); + assert_in(toupper(c), "ACGTN"); + this->cs_[this->len_++] = asc2dna[(int)c]; + } + + /** + * Return DNA character corresponding to element 'idx'. + */ + char toChar(size_t idx) const { + assert_geq((int)this->cs_[idx], 0); + assert_leq((int)this->cs_[idx], 4); + return "ACGTN"[(int)this->cs_[idx]]; + } + + /** + * Retrieve constant version of element i. + */ + const char& operator[](size_t i) const { + return this->get(i); + } + + /** + * Retrieve constant version of element i. + */ + const char& get(size_t i) const { + assert_lt(i, this->len_); + assert_leq(this->cs_[i], 4); + assert_geq(this->cs_[i], 0); + return this->cs_[i]; + } + + /** + * Return the ith character in the window defined by fw, color, + * depth and len. + */ + char windowGetDna( + size_t i, + bool fw, + bool color, + size_t depth = 0, + size_t len = 0) const + { + if(len == 0) len = this->len_; + assert_lt(i, len); + assert_leq(len, this->len_ - depth); + if(fw) return this->cs_[depth+i]; + else return color ? this->cs_[depth+len-i-1] : + compDna(this->cs_[depth+len-i-1]); + } + + /** + * Fill the given DNA buffer with the substring specified by fw, + * color, depth and len. + */ + void windowGetDna( + SDnaStringFixed& buf, + bool fw, + bool color, + size_t depth = 0, + size_t len = 0) const + { + if(len == 0) len = this->len_; + assert_leq(len, this->len_ - depth); + for(size_t i = 0; i < len; i++) { + buf.append(fw ? this->cs_[depth+i] : + (color ? this->cs_[depth+len-i-1] : + compDna(this->cs_[depth+len-i-1]))); + } + } + + /** + * Put a terminator in the 'len_'th element and then return a + * pointer to the buffer. Useful for printing. + */ + virtual const char* toZBuf() const { return this->toZBufXForm("ACGTN"); } +}; + +/** + * Encapsulates a fixed-length DNA string with characters encoded as + * chars. Only capable of encoding A, C, G, T and N. The length is + * specified via the template parameter S. + */ + +template +class SDnaStringExpandable : public SStringExpandable { +public: + + explicit SDnaStringExpandable() : SStringExpandable() { } + + /** + * Create an SStringFixed from another SStringFixed. + */ + SDnaStringExpandable(const SDnaStringExpandable& o) : + SStringExpandable(o) { } + + /** + * Create an SStringFixed from a C++ basic_string. + */ + explicit SDnaStringExpandable( + const std::basic_string& str, + bool chars = false, + bool colors = false) : + SStringExpandable() + { + if(chars) { + if(colors) { + installColors(str); + } else { + installChars(str); + } + } else { + install(str); + } + } + + /** + * Create an SStringFixed from an array and size. + */ + explicit SDnaStringExpandable( + const char* b, + size_t sz, + bool chars = false, + bool colors = false) : + SStringExpandable() + { + if(chars) { + if(colors) { + installColors(b, sz); + } else { + installChars(b, sz); + } + } else { + install(b, sz); + } + } + + /** + * Create an SStringFixed from a zero-terminated string. + */ + explicit SDnaStringExpandable( + const char* b, + bool chars = false, + bool colors = false) : + SStringExpandable() + { + install(b, chars, colors); + } + + virtual ~SDnaStringExpandable() { } // C++ needs this + + /** + * Copy 'sz' bytes from buffer 'b' into this string, reverse- + * complementing them in the process, assuming an encoding where + * 0=A, 1=C, 2=G, 3=T, 4=N. + */ + void installReverseComp(const char* b, size_t sz) { + if(this->sz_ < sz) this->expandCopy((sz + S) * M); + for(size_t i = 0; i < sz; i++) { + this->cs_[i] = (b[sz-i-1] == 4 ? 4 : b[sz-i-1] ^ 3); + } + this->len_ = sz; + } + + /** + * Copy 'sz' bytes from buffer 'b' into this string, reverse- + * complementing them in the process, assuming an encoding where + * 0=A, 1=C, 2=G, 3=T, 4=N. + */ + void installReverseComp(const SDnaStringExpandable& b) { + if(this->sz_ < b.len_) this->expandCopy((b.len_ + S) * M); + for(size_t i = 0; i < b.len_; i++) { + this->cs_[i] = (b.cs_[b.len_-i-1] == 4 ? 4 : b.cs_[b.len_-i-1] ^ 3); + } + this->len_ = b.len_; + } + + /** + * Either reverse or reverse-complement (depending on "color") this + * DNA buffer in-place. + */ + void reverseComp(bool color = false) { + if(color) { + this->reverse(); + } else { + for(size_t i = 0; i < (this->len_ >> 1); i++) { + char tmp1 = (this->cs_[i] == 4 ? 4 : this->cs_[i] ^ 3); + char tmp2 = (this->cs_[this->len_-i-1] == 4 ? 4 : this->cs_[this->len_-i-1] ^ 3); + this->cs_[i] = tmp2; + this->cs_[this->len_-i-1] = tmp1; + } + // Do middle element iff there are an odd number + if((this->len_ & 1) != 0) { + char tmp = this->cs_[this->len_ >> 1]; + tmp = (tmp == 4 ? 4 : tmp ^ 3); + this->cs_[this->len_ >> 1] = tmp; + } + } + } + + /** + * Copy 'sz' bytes from buffer 'b' into this string. + */ + virtual void install( + const char* b, + bool chars = false, + bool colors = false) + { + if(chars) { + if(colors) { + installColors(b, strlen(b)); + } else { + installChars(b, strlen(b)); + } + } else { + install(b, strlen(b)); + } + } + + /** + * Copy 'sz' bytes from buffer 'b' into this string. + */ + virtual void install(const char* b, size_t sz) { + if(this->sz_ < sz) this->expandCopy((sz + S) * M); + memcpy(this->cs_, b, sz); +#ifndef NDEBUG + for(size_t i = 0; i < sz; i++) { + assert_range(0, 4, (int)this->cs_[i]); + } +#endif + this->len_ = sz; + } + + /** + * Copy buffer 'b' of ASCII DNA characters into normal DNA + * characters. + */ + virtual void installChars(const char* b, size_t sz) { + if(this->sz_ < sz) this->expandCopy((sz + S) * M); + for(size_t i = 0; i < sz; i++) { + assert_in(toupper(b[i]), "ACGTN-"); + this->cs_[i] = asc2dna[(int)b[i]]; + assert_range(0, 4, (int)this->cs_[i]); + } + this->len_ = sz; + } + + /** + * Copy buffer 'b' of ASCII color characters into normal DNA + * characters. + */ + virtual void installColors(const char* b, size_t sz) { + if(this->sz_ < sz) this->expandCopy((sz + S) * M); + for(size_t i = 0; i < sz; i++) { + assert_in(b[i], "0123."); + this->cs_[i] = asc2col[(int)b[i]]; + assert_range(0, 4, (int)this->cs_[i]); + } + this->len_ = sz; + } + + /** + * Copy C++ string of ASCII DNA characters into normal DNA + * characters. + */ + virtual void installChars(const std::basic_string& str) { + installChars(str.c_str(), str.length()); + } + + /** + * Copy C++ string of ASCII color characters into normal DNA + * characters. + */ + virtual void installColors(const std::basic_string& str) { + installColors(str.c_str(), str.length()); + } + + /** + * Set DNA character at index 'idx' to 'c'. + */ + void set(int c, size_t idx) { + assert_lt(idx, this->len_); + assert_range(0, 4, c); + this->cs_[idx] = c; + } + + /** + * Append DNA char c. + */ + void append(const char& c) { + if(this->sz_ < this->len_ + 1) { + this->expandCopy((this->len_ + 1 + S) * M); + } + assert_range(0, 4, (int)c); + this->cs_[this->len_++] = c; + } + + /** + * Set DNA character at index 'idx' to 'c'. + */ + void setChar(char c, size_t idx) { + assert_lt(idx, this->len_); + assert_in(toupper(c), "ACGTN"); + this->cs_[idx] = asc2dna[(int)c]; + } + + /** + * Append DNA character. + */ + void appendChar(char c) { + if(this->sz_ < this->len_ + 1) { + this->expandCopy((this->len_ + 1 + S) * M); + } + assert_in(toupper(c), "ACGTN"); + this->cs_[this->len_++] = asc2dna[(int)c]; + } + + /** + * Return DNA character corresponding to element 'idx'. + */ + char toChar(size_t idx) const { + assert_range(0, 4, (int)this->cs_[idx]); + return "ACGTN"[(int)this->cs_[idx]]; + } + + /** + * Retrieve constant version of element i. + */ + inline const char& operator[](size_t i) const { + return this->get(i); + } + + /** + * Retrieve constant version of element i. + */ + inline const char& get(size_t i) const { + assert_lt(i, this->len_); + assert_range(0, 4, (int)this->cs_[i]); + return this->cs_[i]; + } + + /** + * Return the ith character in the window defined by fw, color, + * depth and len. + */ + char windowGetDna( + size_t i, + bool fw, + bool color, + size_t depth = 0, + size_t len = 0) const + { + if(len == 0) len = this->len_; + assert_lt(i, len); + assert_leq(len, this->len_ - depth); + if(fw) return this->cs_[depth+i]; + else return color ? this->cs_[depth+len-i-1] : + compDna(this->cs_[depth+len-i-1]); + } + + /** + * Fill the given DNA buffer with the substring specified by fw, + * color, depth and len. + */ + void windowGetDna( + SDnaStringExpandable& buf, + bool fw, + bool color, + size_t depth = 0, + size_t len = 0) const + { + if(len == 0) len = this->len_; + assert_leq(len, this->len_ - depth); + for(size_t i = 0; i < len; i++) { + buf.append(fw ? this->cs_[depth+i] : + (color ? this->cs_[depth+len-i-1] : + compDna(this->cs_[depth+len-i-1]))); + } + } + + /** + * Put a terminator in the 'len_'th element and then return a + * pointer to the buffer. Useful for printing. + */ + virtual const char* toZBuf() const { return this->toZBufXForm("ACGTN"); } +}; + +/** + * Encapsulates an expandable DNA string with characters encoded as + * char-sized masks. Encodes A, C, G, T, and all IUPAC, as well as the + * empty mask indicating "matches nothing." + */ +template +class SDnaMaskString : public SStringExpandable { +public: + + explicit SDnaMaskString() : SStringExpandable() { } + + /** + * Create an SStringFixed from another SStringFixed. + */ + SDnaMaskString(const SDnaMaskString& o) : + SStringExpandable(o) { } + + /** + * Create an SStringFixed from a C++ basic_string. + */ + explicit SDnaMaskString(const std::basic_string& str) : + SStringExpandable(str) { } + + /** + * Create an SStringFixed from an array and size. + */ + explicit SDnaMaskString(const char* b, size_t sz) : + SStringExpandable(b, sz) { } + + /** + * Create an SStringFixed from a zero-terminated string. + */ + explicit SDnaMaskString(const char* b, bool chars = false) : + SStringExpandable() + { + if(chars) { + installChars(b, strlen(b)); + } else { + install(b, strlen(b)); + } + } + + virtual ~SDnaMaskString() { } // C++ needs this + + /** + * Copy 'sz' bytes from buffer 'b' into this string, reverse- + * complementing them in the process, assuming an encoding where + * 0=A, 1=C, 2=G, 3=T, 4=N. + */ + void installReverseComp(const char* b, size_t sz) { + while(this->sz_ < sz) { + this->expandNoCopy((sz + S) * M); + } + for(size_t i = 0; i < sz; i++) { + this->cs_[i] = maskcomp[(int)b[sz-i-1]]; + } + this->len_ = sz; + } + + /** + * Copy 'sz' bytes from buffer 'b' into this string, reverse- + * complementing them in the process, assuming an encoding where + * 0=A, 1=C, 2=G, 3=T, 4=N. + */ + void installReverseComp(const SDnaMaskString& b) { + while(this->sz_ < b.len_) { + this->expandNoCopy((b.len_ + S) * M); + } + for(size_t i = 0; i < b.len_; i++) { + this->cs_[i] = maskcomp[(int)b.cs_[b.len_-i-1]]; + } + this->len_ = b.len_; + } + + /** + * Either reverse or reverse-complement (depending on "color") this + * DNA buffer in-place. + */ + void reverseComp(bool color = false) { + if(color) { + this->reverse(); + } else { + for(size_t i = 0; i < (this->len_ >> 1); i++) { + char tmp1 = maskcomp[(int)this->cs_[i]]; + char tmp2 = maskcomp[(int)this->cs_[this->len_-i-1]]; + this->cs_[i] = tmp2; + this->cs_[this->len_-i-1] = tmp1; + } + // Do middle element iff there are an odd number + if((this->len_ & 1) != 0) { + char tmp = this->cs_[this->len_ >> 1]; + tmp = maskcomp[(int)tmp]; + this->cs_[this->len_ >> 1] = tmp; + } + } + } + + /** + * Copy 'sz' bytes from buffer 'b' into this string. + */ + virtual void install(const char* b, size_t sz) { + while(this->sz_ < sz) { + this->expandNoCopy((sz + S) * M); + } + memcpy(this->cs_, b, sz); +#ifndef NDEBUG + for(size_t i = 0; i < sz; i++) { + assert_range((int)this->cs_[i], 0, 15); + } +#endif + this->len_ = sz; + } + + /** + * Copy buffer 'b' of ASCII DNA characters into DNA masks. + */ + virtual void installChars(const char* b, size_t sz) { + while(this->sz_ < sz) { + this->expandNoCopy((sz + S) * M); + } + for(size_t i = 0; i < sz; i++) { + assert_in(b[i], iupacs); + this->cs_[i] = asc2dnamask[(int)b[i]]; + assert_range((int)this->cs_[i], 0, 15); + } + this->len_ = sz; + } + + /** + * Copy C++ string of ASCII DNA characters into normal DNA + * characters. + */ + virtual void installChars(const std::basic_string& str) { + installChars(str.c_str(), str.length()); + } + + /** + * Set DNA character at index 'idx' to 'c'. + */ + void set(int c, size_t idx) { + assert_lt(idx, this->len_); + assert_range(c, 0, 15); + this->cs_[idx] = c; + } + + /** + * Append DNA char c. + */ + void append(const char& c) { + while(this->sz_ < this->len_+1) { + this->expandNoCopy((this->len_ + 1 + S) * M); + } + assert_range((int)c, 0, 15); + this->cs_[this->len_++] = c; + } + + /** + * Set DNA character at index 'idx' to 'c'. + */ + void setChar(char c, size_t idx) { + assert_lt(idx, this->len_); + assert_in(toupper(c), iupacs); + this->cs_[idx] = asc2dnamask[(int)c]; + } + + /** + * Append DNA character. + */ + void appendChar(char c) { + while(this->sz_ < this->len_+1) { + expandNoCopy((this->len_ + 1 + S) * M); + } + assert_in(toupper(c), iupacs); + this->cs_[this->len_++] = asc2dnamask[(int)c]; + } + + /** + * Return DNA character corresponding to element 'idx'. + */ + char toChar(size_t idx) const { + assert_range((int)this->cs_[idx], 0, 15); + return mask2iupac[(int)this->cs_[idx]]; + } + + /** + * Retrieve constant version of element i. + */ + const char& operator[](size_t i) const { + return this->get(i); + } + + /** + * Retrieve mutable version of element i. + */ + char& operator[](size_t i) { + return this->get(i); + } + + /** + * Retrieve constant version of element i. + */ + const char& get(size_t i) const { + assert_lt(i, this->len_); + assert_range((int)this->cs_[i], 0, 15); + return this->cs_[i]; + } + + /** + * Retrieve mutable version of element i. + */ + char& get(size_t i) { + assert_lt(i, this->len_); + assert_range((int)this->cs_[i], 0, 15); + return this->cs_[i]; + } + + /** + * Return the ith character in the window defined by fw, color, + * depth and len. + */ + char windowGetDna( + size_t i, + bool fw, + bool color, + size_t depth = 0, + size_t len = 0) const + { + if(len == 0) len = this->len_; + assert_lt(i, len); + assert_leq(len, this->len_ - depth); + if(fw) return this->cs_[depth+i]; + else return color ? this->cs_[depth+len-i-1] : + maskcomp[this->cs_[depth+len-i-1]]; + } + + /** + * Fill the given DNA buffer with the substring specified by fw, + * color, depth and len. + */ + void windowGetDna( + SDnaStringFixed& buf, + bool fw, + bool color, + size_t depth = 0, + size_t len = 0) const + { + if(len == 0) len = this->len_; + assert_leq(len, this->len_ - depth); + for(size_t i = 0; i < len; i++) { + buf.append(fw ? this->cs_[depth+i] : + (color ? this->cs_[depth+len-i-1] : + maskcomp[this->cs_[depth+len-i-1]])); + } + } + + /** + * Sample a random substring of the given length from this DNA + * string and install the result in 'dst'. + */ + template + void randSubstr( + RandomSource& rnd, // pseudo-random generator + T& dst, // put sampled substring here + size_t len, // length of substring to extract + bool watson = true, // true -> possibly extract from Watson strand + bool crick = true) // true -> possibly extract from Crick strand + { + assert(watson || crick); + assert_geq(this->len_, len); + size_t poss = this->len_ - len + 1; + assert_gt(poss, 0); + uint32_t rndoff = (uint32_t)(rnd.nextU32() % poss); + bool fw; + if (watson && !crick) fw = true; + else if(!watson && crick) fw = false; + else { + fw = rnd.nextBool(); + } + if(fw) { + // Install Watson substring + for(size_t i = 0; i < len; i++) { + dst[i] = this->cs_[i + rndoff]; + } + } else { + // Install Crick substring + for(size_t i = 0; i < len; i++) { + dst[i] = maskcomp[(int)this->cs_[i + rndoff + (len - i - 1)]]; + } + } + } + + /** + * Put a terminator in the 'len_'th element and then return a + * pointer to the buffer. Useful for printing. + */ + virtual const char* toZBuf() const { return this->toZBufXForm(iupacs); } +}; + +typedef SStringExpandable BTString; +typedef SDnaStringExpandable<1024, 2> BTDnaString; +typedef SDnaMaskString<32, 2> BTDnaMask; + +#endif /* SSTRING_H_ */ diff --git a/str_util.h b/str_util.h new file mode 100644 index 0000000..48dae17 --- /dev/null +++ b/str_util.h @@ -0,0 +1,47 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef STR_UTIL_H_ +#define STR_UTIL_H_ + +#include + +/** + * Given a string, return an int hash for it. + */ +static inline int +hash_string(const std::string& s) { + int ret = 0; + int a = 63689; + int b = 378551; + for(size_t i = 0; i < s.length(); i++) { + ret = (ret * a) + (int)s[i]; + if(a == 0) { + a += b; + } else { + a *= b; + } + if(a == 0) { + a += b; + } + } + return ret; +} + +#endif /* STR_UTIL_H_ */ diff --git a/third_party/cpuid.h b/third_party/cpuid.h new file mode 100644 index 0000000..6a9688f --- /dev/null +++ b/third_party/cpuid.h @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. + * + * This file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) any + * later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * . + */ + +/* %ecx */ +#define bit_SSE3 (1 << 0) +#define bit_PCLMUL (1 << 1) +#define bit_SSSE3 (1 << 9) +#define bit_FMA (1 << 12) +#define bit_CMPXCHG16B (1 << 13) +#define bit_SSE4_1 (1 << 19) +#define bit_SSE4_2 (1 << 20) +#define bit_MOVBE (1 << 22) +#define bit_POPCNT (1 << 23) +#define bit_AES (1 << 25) +#define bit_XSAVE (1 << 26) +#define bit_OSXSAVE (1 << 27) +#define bit_AVX (1 << 28) +#define bit_F16C (1 << 29) +#define bit_RDRND (1 << 30) + +/* %edx */ +#define bit_CMPXCHG8B (1 << 8) +#define bit_CMOV (1 << 15) +#define bit_MMX (1 << 23) +#define bit_FXSAVE (1 << 24) +#define bit_SSE (1 << 25) +#define bit_SSE2 (1 << 26) + +/* Extended Features */ +/* %ecx */ +#define bit_LAHF_LM (1 << 0) +#define bit_ABM (1 << 5) +#define bit_SSE4a (1 << 6) +#define bit_XOP (1 << 11) +#define bit_LWP (1 << 15) +#define bit_FMA4 (1 << 16) +#define bit_TBM (1 << 21) + +/* %edx */ +#define bit_LM (1 << 29) +#define bit_3DNOWP (1 << 30) +#define bit_3DNOW (1 << 31) + +/* Extended Features (%eax == 7) */ +#define bit_FSGSBASE (1 << 0) +#define bit_BMI (1 << 3) + +#if defined(__i386__) && defined(__PIC__) +/* %ebx may be the PIC register. */ +#if __GNUC__ >= 3 +#define __cpuid(level, a, b, c, d) \ + __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \ + "cpuid\n\t" \ + "xchg{l}\t{%%}ebx, %1\n\t" \ + : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ + : "0" (level)) + +#define __cpuid_count(level, count, a, b, c, d) \ + __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \ + "cpuid\n\t" \ + "xchg{l}\t{%%}ebx, %1\n\t" \ + : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ + : "0" (level), "2" (count)) +#else +/* Host GCCs older than 3.0 weren't supporting Intel asm syntax + nor alternatives in i386 code. */ +#define __cpuid(level, a, b, c, d) \ + __asm__ ("xchgl\t%%ebx, %1\n\t" \ + "cpuid\n\t" \ + "xchgl\t%%ebx, %1\n\t" \ + : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ + : "0" (level)) + +#define __cpuid_count(level, count, a, b, c, d) \ + __asm__ ("xchgl\t%%ebx, %1\n\t" \ + "cpuid\n\t" \ + "xchgl\t%%ebx, %1\n\t" \ + : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ + : "0" (level), "2" (count)) +#endif +#else +#define __cpuid(level, a, b, c, d) \ + __asm__ ("cpuid\n\t" \ + : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ + : "0" (level)) + +#define __cpuid_count(level, count, a, b, c, d) \ + __asm__ ("cpuid\n\t" \ + : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ + : "0" (level), "2" (count)) +#endif + +/* Return highest supported input value for cpuid instruction. ext can + be either 0x0 or 0x8000000 to return highest supported value for + basic or extended cpuid information. Function returns 0 if cpuid + is not supported or whatever cpuid returns in eax register. If sig + pointer is non-null, then first four bytes of the signature + (as found in ebx register) are returned in location pointed by sig. */ + +static __inline unsigned int +__get_cpuid_max (unsigned int __ext, unsigned int *__sig) +{ + unsigned int __eax, __ebx, __ecx, __edx; + +#ifndef __x86_64__ +#if __GNUC__ >= 3 + /* See if we can use cpuid. On AMD64 we always can. */ + __asm__ ("pushf{l|d}\n\t" + "pushf{l|d}\n\t" + "pop{l}\t%0\n\t" + "mov{l}\t{%0, %1|%1, %0}\n\t" + "xor{l}\t{%2, %0|%0, %2}\n\t" + "push{l}\t%0\n\t" + "popf{l|d}\n\t" + "pushf{l|d}\n\t" + "pop{l}\t%0\n\t" + "popf{l|d}\n\t" + : "=&r" (__eax), "=&r" (__ebx) + : "i" (0x00200000)); +#else +/* Host GCCs older than 3.0 weren't supporting Intel asm syntax + nor alternatives in i386 code. */ + __asm__ ("pushfl\n\t" + "pushfl\n\t" + "popl\t%0\n\t" + "movl\t%0, %1\n\t" + "xorl\t%2, %0\n\t" + "pushl\t%0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl\t%0\n\t" + "popfl\n\t" + : "=&r" (__eax), "=&r" (__ebx) + : "i" (0x00200000)); +#endif + + if (!((__eax ^ __ebx) & 0x00200000)) + return 0; +#endif + + /* Host supports cpuid. Return highest supported cpuid input value. */ + __cpuid (__ext, __eax, __ebx, __ecx, __edx); + + if (__sig) + *__sig = __ebx; + + return __eax; +} + +/* Return cpuid data for requested cpuid level, as found in returned + eax, ebx, ecx and edx registers. The function checks if cpuid is + supported and returns 1 for valid cpuid information or 0 for + unsupported cpuid level. All pointers are required to be non-null. */ + +static __inline int +__get_cpuid (unsigned int __level, + unsigned int *__eax, unsigned int *__ebx, + unsigned int *__ecx, unsigned int *__edx) +{ + unsigned int __ext = __level & 0x80000000; + + if (__get_cpuid_max (__ext, 0) < __level) + return 0; + + __cpuid (__level, *__eax, *__ebx, *__ecx, *__edx); + return 1; +} diff --git a/threading.h b/threading.h new file mode 100644 index 0000000..fca4086 --- /dev/null +++ b/threading.h @@ -0,0 +1,57 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef THREADING_H_ +#define THREADING_H_ + +#include +#include "tinythread.h" +#include "fast_mutex.h" + +#ifdef NO_SPINLOCK +# define MUTEX_T tthread::mutex +#else +# define MUTEX_T tthread::fast_mutex +#endif /* NO_SPINLOCK */ + + +/** + * Wrap a lock; obtain lock upon construction, release upon destruction. + */ +class ThreadSafe { +public: + ThreadSafe(MUTEX_T* ptr_mutex, bool locked = true) { + if(locked) { + this->ptr_mutex = ptr_mutex; + ptr_mutex->lock(); + } + else + this->ptr_mutex = NULL; + } + + ~ThreadSafe() { + if (ptr_mutex != NULL) + ptr_mutex->unlock(); + } + +private: + MUTEX_T *ptr_mutex; +}; + +#endif diff --git a/timer.h b/timer.h new file mode 100644 index 0000000..5d0c844 --- /dev/null +++ b/timer.h @@ -0,0 +1,87 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef TIMER_H_ +#define TIMER_H_ + +#include +#include +#include +#include + +using namespace std; + +/** + * Use time() call to keep track of elapsed time between creation and + * destruction. If verbose is true, Timer will print a message showing + * elapsed time to the given output stream upon destruction. + */ +class Timer { +public: + Timer(ostream& out = cout, const char *msg = "", bool verbose = true) : + _t(time(0)), _out(out), _msg(msg), _verbose(verbose) { } + + /// Optionally print message + ~Timer() { + if(_verbose) write(_out); + } + + /// Return elapsed time since Timer object was created + time_t elapsed() const { + return time(0) - _t; + } + + void write(ostream& out) { + time_t passed = elapsed(); + // Print the message supplied at construction time followed + // by time elapsed formatted HH:MM:SS + time_t hours = (passed / 60) / 60; + time_t minutes = (passed / 60) % 60; + time_t seconds = (passed % 60); + std::ostringstream oss; + oss << _msg << setfill ('0') << setw (2) << hours << ":" + << setfill ('0') << setw (2) << minutes << ":" + << setfill ('0') << setw (2) << seconds << endl; + out << oss.str().c_str(); + } + +private: + time_t _t; + ostream& _out; + const char *_msg; + bool _verbose; +}; + +static inline void logTime(std::ostream& os, bool nl = true) { + struct tm *current; + time_t now; + time(&now); + current = localtime(&now); + std::ostringstream oss; + oss << setfill('0') << setw(2) + << current->tm_hour << ":" + << setfill('0') << setw(2) + << current->tm_min << ":" + << setfill('0') << setw(2) + << current->tm_sec; + if(nl) oss << std::endl; + os << oss.str().c_str(); +} + +#endif /*TIMER_H_*/ diff --git a/tinythread.cpp b/tinythread.cpp new file mode 100644 index 0000000..9a4f9df --- /dev/null +++ b/tinythread.cpp @@ -0,0 +1,320 @@ +/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; -*- +Copyright (c) 2010-2012 Marcus Geelnard + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. +*/ + +#include +#include "tinythread.h" + +#if defined(_TTHREAD_POSIX_) + #include + #include +#elif defined(_TTHREAD_WIN32_) + #include +#endif + + +namespace tthread { + +//------------------------------------------------------------------------------ +// condition_variable +//------------------------------------------------------------------------------ +// NOTE 1: The Win32 implementation of the condition_variable class is based on +// the corresponding implementation in GLFW, which in turn is based on a +// description by Douglas C. Schmidt and Irfan Pyarali: +// http://www.cs.wustl.edu/~schmidt/win32-cv-1.html +// +// NOTE 2: Windows Vista actually has native support for condition variables +// (InitializeConditionVariable, WakeConditionVariable, etc), but we want to +// be portable with pre-Vista Windows versions, so TinyThread++ does not use +// Vista condition variables. +//------------------------------------------------------------------------------ + +#if defined(_TTHREAD_WIN32_) + #define _CONDITION_EVENT_ONE 0 + #define _CONDITION_EVENT_ALL 1 +#endif + +#if defined(_TTHREAD_WIN32_) +condition_variable::condition_variable() : mWaitersCount(0) +{ + mEvents[_CONDITION_EVENT_ONE] = CreateEvent(NULL, FALSE, FALSE, NULL); + mEvents[_CONDITION_EVENT_ALL] = CreateEvent(NULL, TRUE, FALSE, NULL); + InitializeCriticalSection(&mWaitersCountLock); +} +#endif + +#if defined(_TTHREAD_WIN32_) +condition_variable::~condition_variable() +{ + CloseHandle(mEvents[_CONDITION_EVENT_ONE]); + CloseHandle(mEvents[_CONDITION_EVENT_ALL]); + DeleteCriticalSection(&mWaitersCountLock); +} +#endif + +#if defined(_TTHREAD_WIN32_) +void condition_variable::_wait() +{ + // Wait for either event to become signaled due to notify_one() or + // notify_all() being called + int result = WaitForMultipleObjects(2, mEvents, FALSE, INFINITE); + + // Check if we are the last waiter + EnterCriticalSection(&mWaitersCountLock); + -- mWaitersCount; + bool lastWaiter = (result == (WAIT_OBJECT_0 + _CONDITION_EVENT_ALL)) && + (mWaitersCount == 0); + LeaveCriticalSection(&mWaitersCountLock); + + // If we are the last waiter to be notified to stop waiting, reset the event + if(lastWaiter) + ResetEvent(mEvents[_CONDITION_EVENT_ALL]); +} +#endif + +#if defined(_TTHREAD_WIN32_) +void condition_variable::notify_one() +{ + // Are there any waiters? + EnterCriticalSection(&mWaitersCountLock); + bool haveWaiters = (mWaitersCount > 0); + LeaveCriticalSection(&mWaitersCountLock); + + // If we have any waiting threads, send them a signal + if(haveWaiters) + SetEvent(mEvents[_CONDITION_EVENT_ONE]); +} +#endif + +#if defined(_TTHREAD_WIN32_) +void condition_variable::notify_all() +{ + // Are there any waiters? + EnterCriticalSection(&mWaitersCountLock); + bool haveWaiters = (mWaitersCount > 0); + LeaveCriticalSection(&mWaitersCountLock); + + // If we have any waiting threads, send them a signal + if(haveWaiters) + SetEvent(mEvents[_CONDITION_EVENT_ALL]); +} +#endif + + +//------------------------------------------------------------------------------ +// POSIX pthread_t to unique thread::id mapping logic. +// Note: Here we use a global thread safe std::map to convert instances of +// pthread_t to small thread identifier numbers (unique within one process). +// This method should be portable across different POSIX implementations. +//------------------------------------------------------------------------------ + +#if defined(_TTHREAD_POSIX_) +static thread::id _pthread_t_to_ID(const pthread_t &aHandle) +{ + static mutex idMapLock; + static std::map idMap; + static unsigned long int idCount(1); + + lock_guard guard(idMapLock); + if(idMap.find(aHandle) == idMap.end()) + idMap[aHandle] = idCount ++; + return thread::id(idMap[aHandle]); +} +#endif // _TTHREAD_POSIX_ + + +//------------------------------------------------------------------------------ +// thread +//------------------------------------------------------------------------------ + +/// Information to pass to the new thread (what to run). +struct _thread_start_info { + void (*mFunction)(void *); ///< Pointer to the function to be executed. + void * mArg; ///< Function argument for the thread function. + thread * mThread; ///< Pointer to the thread object. +}; + +// Thread wrapper function. +#if defined(_TTHREAD_WIN32_) +unsigned WINAPI thread::wrapper_function(void * aArg) +#elif defined(_TTHREAD_POSIX_) +void * thread::wrapper_function(void * aArg) +#endif +{ + // Get thread startup information + _thread_start_info * ti = (_thread_start_info *) aArg; + + try + { + // Call the actual client thread function + ti->mFunction(ti->mArg); + } + catch(...) + { + // Uncaught exceptions will terminate the application (default behavior + // according to C++11) + std::terminate(); + } + + // The thread is no longer executing + lock_guard guard(ti->mThread->mDataMutex); + ti->mThread->mNotAThread = true; + + // The thread is responsible for freeing the startup information + delete ti; + + return 0; +} + +thread::thread(void (*aFunction)(void *), void * aArg) +{ + // Serialize access to this thread structure + lock_guard guard(mDataMutex); + + // Fill out the thread startup information (passed to the thread wrapper, + // which will eventually free it) + _thread_start_info * ti = new _thread_start_info; + ti->mFunction = aFunction; + ti->mArg = aArg; + ti->mThread = this; + + // The thread is now alive + mNotAThread = false; + + // Create the thread +#if defined(_TTHREAD_WIN32_) + mHandle = (HANDLE) _beginthreadex(0, 0, wrapper_function, (void *) ti, 0, &mWin32ThreadID); +#elif defined(_TTHREAD_POSIX_) + int err = 0; + pthread_attr_t stackSizeAttribute; + size_t stackSize = 0; + + err = pthread_attr_init(&stackSizeAttribute); + if(err) throw "Error: pthread_attr_init"; + + err = pthread_attr_getstacksize(&stackSizeAttribute, &stackSize); + if(err) throw "Error: pthread_attr_getstacksize"; + + size_t REQUIRED_STACK_SIZE = 4 * 1024 * 1024; + if(stackSize < REQUIRED_STACK_SIZE) { + err = pthread_attr_setstacksize(&stackSizeAttribute, REQUIRED_STACK_SIZE); + if(err) throw "Error: pthread_attr_setstacksize"; + } + err = pthread_attr_getstacksize(&stackSizeAttribute, &stackSize); + + if(pthread_create(&mHandle, &stackSizeAttribute, wrapper_function, (void *) ti) != 0) + mHandle = 0; +#endif + + // Did we fail to create the thread? + if(!mHandle) + { + mNotAThread = true; + delete ti; + } +} + +thread::~thread() +{ + if(joinable()) + std::terminate(); +} + +void thread::join() +{ + if(joinable()) + { +#if defined(_TTHREAD_WIN32_) + WaitForSingleObject(mHandle, INFINITE); + CloseHandle(mHandle); +#elif defined(_TTHREAD_POSIX_) + pthread_join(mHandle, NULL); +#endif + } +} + +bool thread::joinable() const +{ + mDataMutex.lock(); + bool result = !mNotAThread; + mDataMutex.unlock(); + return result; +} + +void thread::detach() +{ + mDataMutex.lock(); + if(!mNotAThread) + { +#if defined(_TTHREAD_WIN32_) + CloseHandle(mHandle); +#elif defined(_TTHREAD_POSIX_) + pthread_detach(mHandle); +#endif + mNotAThread = true; + } + mDataMutex.unlock(); +} + +thread::id thread::get_id() const +{ + if(!joinable()) + return id(); +#if defined(_TTHREAD_WIN32_) + return id((unsigned long int) mWin32ThreadID); +#elif defined(_TTHREAD_POSIX_) + return _pthread_t_to_ID(mHandle); +#endif +} + +unsigned thread::hardware_concurrency() +{ +#if defined(_TTHREAD_WIN32_) + SYSTEM_INFO si; + GetSystemInfo(&si); + return (int) si.dwNumberOfProcessors; +#elif defined(_SC_NPROCESSORS_ONLN) + return (int) sysconf(_SC_NPROCESSORS_ONLN); +#elif defined(_SC_NPROC_ONLN) + return (int) sysconf(_SC_NPROC_ONLN); +#else + // The standard requires this function to return zero if the number of + // hardware cores could not be determined. + return 0; +#endif +} + + +//------------------------------------------------------------------------------ +// this_thread +//------------------------------------------------------------------------------ + +thread::id this_thread::get_id() +{ +#if defined(_TTHREAD_WIN32_) + return thread::id((unsigned long int) GetCurrentThreadId()); +#elif defined(_TTHREAD_POSIX_) + return _pthread_t_to_ID(pthread_self()); +#endif +} + +} diff --git a/tinythread.h b/tinythread.h new file mode 100644 index 0000000..aed7b58 --- /dev/null +++ b/tinythread.h @@ -0,0 +1,714 @@ +/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; -*- +Copyright (c) 2010-2012 Marcus Geelnard + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. +*/ + +#ifndef _TINYTHREAD_H_ +#define _TINYTHREAD_H_ + +/// @file +/// @mainpage TinyThread++ API Reference +/// +/// @section intro_sec Introduction +/// TinyThread++ is a minimal, portable implementation of basic threading +/// classes for C++. +/// +/// They closely mimic the functionality and naming of the C++11 standard, and +/// should be easily replaceable with the corresponding std:: variants. +/// +/// @section port_sec Portability +/// The Win32 variant uses the native Win32 API for implementing the thread +/// classes, while for other systems, the POSIX threads API (pthread) is used. +/// +/// @section class_sec Classes +/// In order to mimic the threading API of the C++11 standard, subsets of +/// several classes are provided. The fundamental classes are: +/// @li tthread::thread +/// @li tthread::mutex +/// @li tthread::recursive_mutex +/// @li tthread::condition_variable +/// @li tthread::lock_guard +/// @li tthread::fast_mutex +/// +/// @section misc_sec Miscellaneous +/// The following special keywords are available: #thread_local. +/// +/// For more detailed information (including additional classes), browse the +/// different sections of this documentation. A good place to start is: +/// tinythread.h. + +// Which platform are we on? +#if !defined(_TTHREAD_PLATFORM_DEFINED_) + #if defined(_WIN32) || defined(__WIN32__) || defined(__WINDOWS__) + #define _TTHREAD_WIN32_ + #else + #define _TTHREAD_POSIX_ + #endif + #define _TTHREAD_PLATFORM_DEFINED_ +#endif + +// Platform specific includes +#if defined(_TTHREAD_WIN32_) + #ifndef WIN32_LEAN_AND_MEAN + #define WIN32_LEAN_AND_MEAN + #define __UNDEF_LEAN_AND_MEAN + #endif + #include + #ifdef __UNDEF_LEAN_AND_MEAN + #undef WIN32_LEAN_AND_MEAN + #undef __UNDEF_LEAN_AND_MEAN + #endif +#else + #include + #include + #include + #include +#endif + +// Generic includes +#include + +/// TinyThread++ version (major number). +#define TINYTHREAD_VERSION_MAJOR 1 +/// TinyThread++ version (minor number). +#define TINYTHREAD_VERSION_MINOR 1 +/// TinyThread++ version (full version). +#define TINYTHREAD_VERSION (TINYTHREAD_VERSION_MAJOR * 100 + TINYTHREAD_VERSION_MINOR) + +// Do we have a fully featured C++11 compiler? +#if (__cplusplus > 199711L) || (defined(__STDCXX_VERSION__) && (__STDCXX_VERSION__ >= 201001L)) + #define _TTHREAD_CPP11_ +#endif + +// ...at least partial C++11? +#if defined(_TTHREAD_CPP11_) || defined(__GXX_EXPERIMENTAL_CXX0X__) || defined(__GXX_EXPERIMENTAL_CPP0X__) + #define _TTHREAD_CPP11_PARTIAL_ +#endif + +// Macro for disabling assignments of objects. +#ifdef _TTHREAD_CPP11_PARTIAL_ + #define _TTHREAD_DISABLE_ASSIGNMENT(name) \ + name(const name&) = delete; \ + name& operator=(const name&) = delete; +#else + #define _TTHREAD_DISABLE_ASSIGNMENT(name) \ + name(const name&); \ + name& operator=(const name&); +#endif + +/// @def thread_local +/// Thread local storage keyword. +/// A variable that is declared with the @c thread_local keyword makes the +/// value of the variable local to each thread (known as thread-local storage, +/// or TLS). Example usage: +/// @code +/// // This variable is local to each thread. +/// thread_local int variable; +/// @endcode +/// @note The @c thread_local keyword is a macro that maps to the corresponding +/// compiler directive (e.g. @c __declspec(thread)). While the C++11 standard +/// allows for non-trivial types (e.g. classes with constructors and +/// destructors) to be declared with the @c thread_local keyword, most pre-C++11 +/// compilers only allow for trivial types (e.g. @c int). So, to guarantee +/// portable code, only use trivial types for thread local storage. +/// @note This directive is currently not supported on Mac OS X (it will give +/// a compiler error), since compile-time TLS is not supported in the Mac OS X +/// executable format. Also, some older versions of MinGW (before GCC 4.x) do +/// not support this directive. +/// @hideinitializer + +#if !defined(_TTHREAD_CPP11_) && !defined(thread_local) + #if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_CC) || defined(__IBMCPP__) + #define thread_local __thread + #else + #define thread_local __declspec(thread) + #endif +#endif + + +/// Main name space for TinyThread++. +/// This namespace is more or less equivalent to the @c std namespace for the +/// C++11 thread classes. For instance, the tthread::mutex class corresponds to +/// the std::mutex class. +namespace tthread { + +/// Mutex class. +/// This is a mutual exclusion object for synchronizing access to shared +/// memory areas for several threads. The mutex is non-recursive (i.e. a +/// program may deadlock if the thread that owns a mutex object calls lock() +/// on that object). +/// @see recursive_mutex +class mutex { + public: + /// Constructor. + mutex() +#if defined(_TTHREAD_WIN32_) + : mAlreadyLocked(false) +#endif + { +#if defined(_TTHREAD_WIN32_) + InitializeCriticalSection(&mHandle); +#else + pthread_mutex_init(&mHandle, NULL); +#endif + } + + /// Destructor. + ~mutex() + { +#if defined(_TTHREAD_WIN32_) + DeleteCriticalSection(&mHandle); +#else + pthread_mutex_destroy(&mHandle); +#endif + } + + /// Lock the mutex. + /// The method will block the calling thread until a lock on the mutex can + /// be obtained. The mutex remains locked until @c unlock() is called. + /// @see lock_guard + inline void lock() + { +#if defined(_TTHREAD_WIN32_) + EnterCriticalSection(&mHandle); + while(mAlreadyLocked) Sleep(1000); // Simulate deadlock... + mAlreadyLocked = true; +#else + pthread_mutex_lock(&mHandle); +#endif + } + + /// Try to lock the mutex. + /// The method will try to lock the mutex. If it fails, the function will + /// return immediately (non-blocking). + /// @return @c true if the lock was acquired, or @c false if the lock could + /// not be acquired. + inline bool try_lock() + { +#if defined(_TTHREAD_WIN32_) + bool ret = (TryEnterCriticalSection(&mHandle) ? true : false); + if(ret && mAlreadyLocked) + { + LeaveCriticalSection(&mHandle); + ret = false; + } + return ret; +#else + return (pthread_mutex_trylock(&mHandle) == 0) ? true : false; +#endif + } + + /// Unlock the mutex. + /// If any threads are waiting for the lock on this mutex, one of them will + /// be unblocked. + inline void unlock() + { +#if defined(_TTHREAD_WIN32_) + mAlreadyLocked = false; + LeaveCriticalSection(&mHandle); +#else + pthread_mutex_unlock(&mHandle); +#endif + } + + _TTHREAD_DISABLE_ASSIGNMENT(mutex) + + private: +#if defined(_TTHREAD_WIN32_) + CRITICAL_SECTION mHandle; + bool mAlreadyLocked; +#else + pthread_mutex_t mHandle; +#endif + + friend class condition_variable; +}; + +/// Recursive mutex class. +/// This is a mutual exclusion object for synchronizing access to shared +/// memory areas for several threads. The mutex is recursive (i.e. a thread +/// may lock the mutex several times, as long as it unlocks the mutex the same +/// number of times). +/// @see mutex +class recursive_mutex { + public: + /// Constructor. + recursive_mutex() + { +#if defined(_TTHREAD_WIN32_) + InitializeCriticalSection(&mHandle); +#else + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); + pthread_mutex_init(&mHandle, &attr); +#endif + } + + /// Destructor. + ~recursive_mutex() + { +#if defined(_TTHREAD_WIN32_) + DeleteCriticalSection(&mHandle); +#else + pthread_mutex_destroy(&mHandle); +#endif + } + + /// Lock the mutex. + /// The method will block the calling thread until a lock on the mutex can + /// be obtained. The mutex remains locked until @c unlock() is called. + /// @see lock_guard + inline void lock() + { +#if defined(_TTHREAD_WIN32_) + EnterCriticalSection(&mHandle); +#else + pthread_mutex_lock(&mHandle); +#endif + } + + /// Try to lock the mutex. + /// The method will try to lock the mutex. If it fails, the function will + /// return immediately (non-blocking). + /// @return @c true if the lock was acquired, or @c false if the lock could + /// not be acquired. + inline bool try_lock() + { +#if defined(_TTHREAD_WIN32_) + return TryEnterCriticalSection(&mHandle) ? true : false; +#else + return (pthread_mutex_trylock(&mHandle) == 0) ? true : false; +#endif + } + + /// Unlock the mutex. + /// If any threads are waiting for the lock on this mutex, one of them will + /// be unblocked. + inline void unlock() + { +#if defined(_TTHREAD_WIN32_) + LeaveCriticalSection(&mHandle); +#else + pthread_mutex_unlock(&mHandle); +#endif + } + + _TTHREAD_DISABLE_ASSIGNMENT(recursive_mutex) + + private: +#if defined(_TTHREAD_WIN32_) + CRITICAL_SECTION mHandle; +#else + pthread_mutex_t mHandle; +#endif + + friend class condition_variable; +}; + +/// Lock guard class. +/// The constructor locks the mutex, and the destructor unlocks the mutex, so +/// the mutex will automatically be unlocked when the lock guard goes out of +/// scope. Example usage: +/// @code +/// mutex m; +/// int counter; +/// +/// void increment() +/// { +/// lock_guard guard(m); +/// ++ counter; +/// } +/// @endcode + +template +class lock_guard { + public: + typedef T mutex_type; + + lock_guard() : mMutex(0) {} + + /// The constructor locks the mutex. + explicit lock_guard(mutex_type &aMutex) + { + mMutex = &aMutex; + mMutex->lock(); + } + + /// The destructor unlocks the mutex. + ~lock_guard() + { + if(mMutex) + mMutex->unlock(); + } + + private: + mutex_type * mMutex; +}; + +/// Condition variable class. +/// This is a signalling object for synchronizing the execution flow for +/// several threads. Example usage: +/// @code +/// // Shared data and associated mutex and condition variable objects +/// int count; +/// mutex m; +/// condition_variable cond; +/// +/// // Wait for the counter to reach a certain number +/// void wait_counter(int targetCount) +/// { +/// lock_guard guard(m); +/// while(count < targetCount) +/// cond.wait(m); +/// } +/// +/// // Increment the counter, and notify waiting threads +/// void increment() +/// { +/// lock_guard guard(m); +/// ++ count; +/// cond.notify_all(); +/// } +/// @endcode +class condition_variable { + public: + /// Constructor. +#if defined(_TTHREAD_WIN32_) + condition_variable(); +#else + condition_variable() + { + pthread_cond_init(&mHandle, NULL); + } +#endif + + /// Destructor. +#if defined(_TTHREAD_WIN32_) + ~condition_variable(); +#else + ~condition_variable() + { + pthread_cond_destroy(&mHandle); + } +#endif + + /// Wait for the condition. + /// The function will block the calling thread until the condition variable + /// is woken by @c notify_one(), @c notify_all() or a spurious wake up. + /// @param[in] aMutex A mutex that will be unlocked when the wait operation + /// starts, an locked again as soon as the wait operation is finished. + template + inline void wait(_mutexT &aMutex) + { +#if defined(_TTHREAD_WIN32_) + // Increment number of waiters + EnterCriticalSection(&mWaitersCountLock); + ++ mWaitersCount; + LeaveCriticalSection(&mWaitersCountLock); + + // Release the mutex while waiting for the condition (will decrease + // the number of waiters when done)... + aMutex.unlock(); + _wait(); + aMutex.lock(); +#else + pthread_cond_wait(&mHandle, &aMutex.mHandle); +#endif + } + + /// Notify one thread that is waiting for the condition. + /// If at least one thread is blocked waiting for this condition variable, + /// one will be woken up. + /// @note Only threads that started waiting prior to this call will be + /// woken up. +#if defined(_TTHREAD_WIN32_) + void notify_one(); +#else + inline void notify_one() + { + pthread_cond_signal(&mHandle); + } +#endif + + /// Notify all threads that are waiting for the condition. + /// All threads that are blocked waiting for this condition variable will + /// be woken up. + /// @note Only threads that started waiting prior to this call will be + /// woken up. +#if defined(_TTHREAD_WIN32_) + void notify_all(); +#else + inline void notify_all() + { + pthread_cond_broadcast(&mHandle); + } +#endif + + _TTHREAD_DISABLE_ASSIGNMENT(condition_variable) + + private: +#if defined(_TTHREAD_WIN32_) + void _wait(); + HANDLE mEvents[2]; ///< Signal and broadcast event HANDLEs. + unsigned int mWaitersCount; ///< Count of the number of waiters. + CRITICAL_SECTION mWaitersCountLock; ///< Serialize access to mWaitersCount. +#else + pthread_cond_t mHandle; +#endif +}; + + +/// Thread class. +class thread { + public: +#if defined(_TTHREAD_WIN32_) + typedef HANDLE native_handle_type; +#else + typedef pthread_t native_handle_type; +#endif + + class id; + + /// Default constructor. + /// Construct a @c thread object without an associated thread of execution + /// (i.e. non-joinable). + thread() : mHandle(0), mNotAThread(true) +#if defined(_TTHREAD_WIN32_) + , mWin32ThreadID(0) +#endif + {} + + /// Thread starting constructor. + /// Construct a @c thread object with a new thread of execution. + /// @param[in] aFunction A function pointer to a function of type: + /// void fun(void * arg) + /// @param[in] aArg Argument to the thread function. + /// @note This constructor is not fully compatible with the standard C++ + /// thread class. It is more similar to the pthread_create() (POSIX) and + /// CreateThread() (Windows) functions. + thread(void (*aFunction)(void *), void * aArg); + + /// Destructor. + /// @note If the thread is joinable upon destruction, @c std::terminate() + /// will be called, which terminates the process. It is always wise to do + /// @c join() before deleting a thread object. + ~thread(); + + /// Wait for the thread to finish (join execution flows). + /// After calling @c join(), the thread object is no longer associated with + /// a thread of execution (i.e. it is not joinable, and you may not join + /// with it nor detach from it). + void join(); + + /// Check if the thread is joinable. + /// A thread object is joinable if it has an associated thread of execution. + bool joinable() const; + + /// Detach from the thread. + /// After calling @c detach(), the thread object is no longer assicated with + /// a thread of execution (i.e. it is not joinable). The thread continues + /// execution without the calling thread blocking, and when the thread + /// ends execution, any owned resources are released. + void detach(); + + /// Return the thread ID of a thread object. + id get_id() const; + + /// Get the native handle for this thread. + /// @note Under Windows, this is a @c HANDLE, and under POSIX systems, this + /// is a @c pthread_t. + inline native_handle_type native_handle() + { + return mHandle; + } + + /// Determine the number of threads which can possibly execute concurrently. + /// This function is useful for determining the optimal number of threads to + /// use for a task. + /// @return The number of hardware thread contexts in the system. + /// @note If this value is not defined, the function returns zero (0). + static unsigned hardware_concurrency(); + + _TTHREAD_DISABLE_ASSIGNMENT(thread) + + private: + native_handle_type mHandle; ///< Thread handle. + mutable mutex mDataMutex; ///< Serializer for access to the thread private data. + bool mNotAThread; ///< True if this object is not a thread of execution. +#if defined(_TTHREAD_WIN32_) + unsigned int mWin32ThreadID; ///< Unique thread ID (filled out by _beginthreadex). +#endif + + // This is the internal thread wrapper function. +#if defined(_TTHREAD_WIN32_) + static unsigned WINAPI wrapper_function(void * aArg); +#else + static void * wrapper_function(void * aArg); +#endif +}; + +/// Thread ID. +/// The thread ID is a unique identifier for each thread. +/// @see thread::get_id() +class thread::id { + public: + /// Default constructor. + /// The default constructed ID is that of thread without a thread of + /// execution. + id() : mId(0) {}; + + id(unsigned long int aId) : mId(aId) {}; + + id(const id& aId) : mId(aId.mId) {}; + + inline id & operator=(const id &aId) + { + mId = aId.mId; + return *this; + } + + inline friend bool operator==(const id &aId1, const id &aId2) + { + return (aId1.mId == aId2.mId); + } + + inline friend bool operator!=(const id &aId1, const id &aId2) + { + return (aId1.mId != aId2.mId); + } + + inline friend bool operator<=(const id &aId1, const id &aId2) + { + return (aId1.mId <= aId2.mId); + } + + inline friend bool operator<(const id &aId1, const id &aId2) + { + return (aId1.mId < aId2.mId); + } + + inline friend bool operator>=(const id &aId1, const id &aId2) + { + return (aId1.mId >= aId2.mId); + } + + inline friend bool operator>(const id &aId1, const id &aId2) + { + return (aId1.mId > aId2.mId); + } + + inline friend std::ostream& operator <<(std::ostream &os, const id &obj) + { + os << obj.mId; + return os; + } + + private: + unsigned long int mId; +}; + + +// Related to - minimal to be able to support chrono. +typedef long long __intmax_t; + +/// Minimal implementation of the @c ratio class. This class provides enough +/// functionality to implement some basic @c chrono classes. +template <__intmax_t N, __intmax_t D = 1> class ratio { + public: + static double _as_double() { return double(N) / double(D); } +}; + +/// Minimal implementation of the @c chrono namespace. +/// The @c chrono namespace provides types for specifying time intervals. +namespace chrono { + /// Duration template class. This class provides enough functionality to + /// implement @c this_thread::sleep_for(). + template > class duration { + private: + _Rep rep_; + public: + typedef _Rep rep; + typedef _Period period; + + /// Construct a duration object with the given duration. + template + explicit duration(const _Rep2& r) : rep_(r) {}; + + /// Return the value of the duration object. + rep count() const + { + return rep_; + } + }; + + // Standard duration types. + typedef duration<__intmax_t, ratio<1, 1000000000> > nanoseconds; ///< Duration with the unit nanoseconds. + typedef duration<__intmax_t, ratio<1, 1000000> > microseconds; ///< Duration with the unit microseconds. + typedef duration<__intmax_t, ratio<1, 1000> > milliseconds; ///< Duration with the unit milliseconds. + typedef duration<__intmax_t> seconds; ///< Duration with the unit seconds. + typedef duration<__intmax_t, ratio<60> > minutes; ///< Duration with the unit minutes. + typedef duration<__intmax_t, ratio<3600> > hours; ///< Duration with the unit hours. +} + +/// The namespace @c this_thread provides methods for dealing with the +/// calling thread. +namespace this_thread { + /// Return the thread ID of the calling thread. + thread::id get_id(); + + /// Yield execution to another thread. + /// Offers the operating system the opportunity to schedule another thread + /// that is ready to run on the current processor. + inline void yield() + { +#if defined(_TTHREAD_WIN32_) + Sleep(0); +#else + sched_yield(); +#endif + } + + /// Blocks the calling thread for a period of time. + /// @param[in] aTime Minimum time to put the thread to sleep. + /// Example usage: + /// @code + /// // Sleep for 100 milliseconds + /// this_thread::sleep_for(chrono::milliseconds(100)); + /// @endcode + /// @note Supported duration types are: nanoseconds, microseconds, + /// milliseconds, seconds, minutes and hours. + template void sleep_for(const chrono::duration<_Rep, _Period>& aTime) + { +#if defined(_TTHREAD_WIN32_) + Sleep(int(double(aTime.count()) * (1000.0 * _Period::_as_double()) + 0.5)); +#else + usleep(int(double(aTime.count()) * (1000000.0 * _Period::_as_double()) + 0.5)); +#endif + } +} + +} + +// Define/macro cleanup +#undef _TTHREAD_DISABLE_ASSIGNMENT + +#endif // _TINYTHREAD_H_ diff --git a/tokenize.h b/tokenize.h new file mode 100644 index 0000000..a5e521c --- /dev/null +++ b/tokenize.h @@ -0,0 +1,62 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef TOKENIZE_H_ +#define TOKENIZE_H_ + +#include +#include +#include + +using namespace std; + +/** + * Split string s according to given delimiters. Mostly borrowed + * from C++ Programming HOWTO 7.3. + */ +template +static inline void tokenize( + const string& s, + const string& delims, + T& ss, + size_t max = std::numeric_limits::max()) +{ + //string::size_type lastPos = s.find_first_not_of(delims, 0); + string::size_type lastPos = 0; + string::size_type pos = s.find_first_of(delims, lastPos); + while (string::npos != pos || string::npos != lastPos) { + ss.push_back(s.substr(lastPos, pos - lastPos)); + lastPos = s.find_first_not_of(delims, pos); + pos = s.find_first_of(delims, lastPos); + if(ss.size() == (max - 1)) { + pos = string::npos; + } + } +} + +template +static inline void tokenize(const std::string& s, char delim, T& ss) { + std::string token; + std::istringstream iss(s); + while(getline(iss, token, delim)) { + ss.push_back(token); + } +} + +#endif /*TOKENIZE_H_*/ diff --git a/tp.h b/tp.h new file mode 100644 index 0000000..374950f --- /dev/null +++ b/tp.h @@ -0,0 +1,118 @@ +/* + * Copyright 2015, Daehwan Kim + * + * This file is part of HISAT 2. + * + * HISAT 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT 2. If not, see . + */ + +/* + * tp.h + * + */ + +#ifndef TP_H_ +#define TP_H_ + +#include +#include + +/** + * Encapsulates alignment policy for transcriptome + */ +class TranscriptomePolicy { + +public: + + TranscriptomePolicy() { reset(); } + + TranscriptomePolicy( + size_t minIntronLen, + size_t maxIntronLen, + uint32_t minAnchorLen = 7, + uint32_t minAnchorLen_noncan = 14, + bool no_spliced_alignment = false, + bool transcriptome_mapping_only = false, + bool transcriptome_assembly = false, + bool xs_only = false, + bool avoid_pseudogene = false) + { + init(minIntronLen, + maxIntronLen, + minAnchorLen, + minAnchorLen_noncan, + no_spliced_alignment, + transcriptome_mapping_only, + transcriptome_assembly, + xs_only, + avoid_pseudogene); + } + + /** + */ + void reset() { + init(false, false, false); + } + + /** + */ + void init( + size_t minIntronLen, + size_t maxIntronLen, + uint32_t minAnchorLen = 7, + uint32_t minAnchorLen_noncan = 14, + bool no_spliced_alignment = false, + bool transcriptome_mapping_only = false, + bool transcriptome_assembly = false, + bool xs_only = false, + bool avoid_pseudogene = false) + { + minIntronLen_ = minIntronLen; + maxIntronLen_ = maxIntronLen; + minAnchorLen_ = minAnchorLen; + minAnchorLen_noncan_ = minAnchorLen_noncan; + no_spliced_alignment_ = no_spliced_alignment; + transcriptome_mapping_only_ = transcriptome_mapping_only; + transcriptome_assembly_ = transcriptome_assembly; + xs_only_ = xs_only; + avoid_pseudogene_ = avoid_pseudogene; + } + + size_t minIntronLen() const { return minIntronLen_; } + size_t maxIntronLen() const { return maxIntronLen_; } + uint32_t minAnchorLen() const { return minAnchorLen_; } + uint32_t minAnchorLen_noncan() const { return minAnchorLen_noncan_; } + bool no_spliced_alignment() const { return no_spliced_alignment_; } + bool transcriptome_mapping_only() const { return transcriptome_mapping_only_; } + bool transcriptome_assembly() const { return transcriptome_assembly_; } + bool xs_only() const { return xs_only_; } + bool avoid_pseudogene() const { return avoid_pseudogene_; } + +private: + size_t minIntronLen_; + size_t maxIntronLen_; + + // Minimum anchor length required for canonical splice sites + uint32_t minAnchorLen_; + // Minimum anchor length required for non-canonical splice sites + uint32_t minAnchorLen_noncan_; + + bool no_spliced_alignment_; + bool transcriptome_mapping_only_; + bool transcriptome_assembly_; + bool xs_only_; + bool avoid_pseudogene_; +}; + +#endif /*ndef TP_H_*/ diff --git a/unique.cpp b/unique.cpp new file mode 100644 index 0000000..c470473 --- /dev/null +++ b/unique.cpp @@ -0,0 +1,66 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#include "unique.h" + +using namespace std; + +// There is no valid second-best alignment and the best alignment has a +// perfect score. +const TMapq unp_nosec_perf = 44; + +// There is no valid second-best alignment. We stratify the alignment +// score of the best alignment into 10 bins. +const TMapq unp_nosec[11] = { + 43, 42, 41, 36, 32, 27, 20, 11, 4, 1, 0 +}; + +// The best alignment has a perfect score, and we stratify the distance +// between best and second-best alignment scores into 10 bins. +const TMapq unp_sec_perf[11] = { + 2, 16, 23, 30, 31, 32, 34, 36, 38, 40, 42 +}; + +// The best alignment has a non-perfect score, and we stratify both by best +// alignment score (specifically, the maximum score minus the best "best") +// and by the distance between the best and second-best alignment scores +// ("difference"). Each is stratified into 10 bins. Each row is a +// difference (smaller elts = smaller differences) and each column is a +// best score (smaller elts = higher best alignment scores). +const TMapq unp_sec[11][11] = { + { 2, 2, 2, 1, 1, 0, 0, 0, 0, 0, 0}, + { 20, 14, 7, 3, 2, 1, 0, 0, 0, 0, 0}, + { 20, 16, 10, 6, 3, 1, 0, 0, 0, 0, 0}, + { 20, 17, 13, 9, 3, 1, 1, 0, 0, 0, 0}, + { 21, 19, 15, 9, 5, 2, 2, 0, 0, 0, 0}, + { 22, 21, 16, 11, 10, 5, 0, 0, 0, 0, 0}, + { 23, 22, 19, 16, 11, 0, 0, 0, 0, 0, 0}, + { 24, 25, 21, 30, 0, 0, 0, 0, 0, 0, 0}, + { 30, 26, 29, 0, 0, 0, 0, 0, 0, 0, 0}, + { 30, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +}; + +// +// Paired mapping quality: +// + +// There is no valid second-best alignment and the best alignment has a +// perfect score. +const TMapq pair_nosec_perf = 44; diff --git a/unique.h b/unique.h new file mode 100644 index 0000000..187e88e --- /dev/null +++ b/unique.h @@ -0,0 +1,531 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +/* + * unique.h + * + * Encapsulates objects and routines for determining whether and to + * what extend the best alignment for a read is "unique." In the + * simplest scenario, uniqueness is determined by whether we found only + * one alignment. More complex scenarios might assign a uniqueness + * score based that is a function of (of a summarized version of): all + * the alignments found and their scores. + * + * Since mapping quality is related to uniqueness, objects and + * routings for calculating mapping quality are also included here. + */ + +#ifndef UNIQUE_H_ +#define UNIQUE_H_ + +#include +#include "aligner_result.h" +#include "simple_func.h" +#include "util.h" +#include "scoring.h" + +typedef int64_t TMapq; + +/** + * Class that returns yes-or-no answers to the question of whether a + */ +class Uniqueness { +public: + + /** + * Given an AlnSetSumm, determine if the best alignment is "unique" + * according to some definition. + */ + static bool bestIsUnique( + const AlnSetSumm& s, + const AlnFlags& flags, + bool mate1, + size_t rdlen, + size_t ordlen, + char *inps) + { + assert(!s.empty()); + return !VALID_AL_SCORE(s.secbest(mate1)); + } +}; + +/** + * Collection of routines for calculating mapping quality. + */ +class Mapq { + +public: + + virtual ~Mapq() { } + + virtual TMapq mapq( + const AlnSetSumm& s, + const AlnFlags& flags, + bool mate1, + size_t rdlen, + size_t ordlen, + char *inps) const = 0; +}; + +extern const TMapq unp_nosec_perf; +extern const TMapq unp_nosec[11]; +extern const TMapq unp_sec_perf[11]; +extern const TMapq unp_sec[11][11]; +extern const TMapq pair_nosec_perf; + +/** + * V3 of the MAPQ calculator + */ +class BowtieMapq3 : public Mapq { + +public: + + BowtieMapq3( + const SimpleFunc& scoreMin, + const Scoring& sc) : + scoreMin_(scoreMin), + sc_(sc) + { } + + virtual ~BowtieMapq3() { } + + /** + * Given an AlnSetSumm, return a mapping quality calculated. + */ + virtual TMapq mapq( + const AlnSetSumm& s, + const AlnFlags& flags, + bool mate1, + size_t rdlen, + size_t ordlen, + char *inps) // put string representation of inputs here + const + { + if(s.paired()) { + return pair_nosec_perf; + } else { + bool hasSecbest = VALID_AL_SCORE(s.secbest(mate1)); + if(!flags.canMax() && !s.exhausted(mate1) && !hasSecbest) { + return 255; + } + TAlScore scMax = (TAlScore)sc_.perfectScore(rdlen); + TAlScore scMin = scoreMin_.f((float)rdlen); + assert_geq(scMax, scMin); + TAlScore best = scMax - s.best(mate1).score(); // best score (lower=better) + size_t best_bin = (size_t)((double)best * (10.0 / (double)(scMax - scMin)) + 0.5); + assert_geq(best_bin, 0); + assert_lt(best_bin, 11); + if(hasSecbest) { + assert_geq(s.best(mate1).score(), s.secbest(mate1).score()); + size_t diff = s.best(mate1).score() - s.secbest(mate1).score(); + size_t diff_bin = (size_t)((double)diff * (10.0 / (double)(scMax - scMin)) + 0.5); + assert_geq(diff_bin, 0); + assert_lt(diff_bin, 11); + // A valid second-best alignment was found + if(best == scMax) { + // Best alignment has perfect score + return unp_sec_perf[best_bin]; + } else { + // Best alignment has less than perfect score + return unp_sec[diff_bin][best_bin]; + } + } else { + // No valid second-best alignment was found + if(best == scMax) { + // Best alignment has perfect score + return unp_nosec_perf; + } else { + // Best alignment has less than perfect score + return unp_nosec[best_bin]; + } + } + } + } + +protected: + + SimpleFunc scoreMin_; + const Scoring& sc_; +}; + +/** + * V2 of the MAPQ calculator + */ +class BowtieMapq2 : public Mapq { + +public: + + BowtieMapq2( + const SimpleFunc& scoreMin, + const Scoring& sc) : + scoreMin_(scoreMin), + sc_(sc) + { } + + virtual ~BowtieMapq2() { } + + /** + * Given an AlnSetSumm, return a mapping quality calculated. + */ + virtual TMapq mapq( + const AlnSetSumm& s, + const AlnFlags& flags, + bool mate1, + size_t rdlen, + size_t ordlen, + char *inps) // put string representation of inputs here + const + { + // Did the read have a second-best alignment? + bool hasSecbest = s.paired() ? + VALID_AL_SCORE(s.secbestPaired()) : + VALID_AL_SCORE(s.secbest(mate1)); + + // for hisat + bool equalSecbest = false; + if(hasSecbest) { + if(s.paired()) { + assert_geq(s.bestPaired(), s.secbestPaired()); + equalSecbest = s.bestPaired() == s.secbestPaired(); + } else { + assert_geq(s.best(mate1), s.secbest(mate1)); + equalSecbest = s.best(mate1) == s.secbest(mate1); + } + } + + // This corresponds to a scenario where we found one and only one + // alignment but didn't really look for a second one + if(!flags.canMax() && + !s.exhausted(mate1) && + (!hasSecbest || !equalSecbest)) { + return 60; + } + // scPer = score of a perfect match + TAlScore scPer = (TAlScore)sc_.perfectScore(rdlen); + if(s.paired()) { + scPer += (TAlScore)sc_.perfectScore(ordlen); + } + // scMin = score of a just barely valid match + TAlScore scMin = scoreMin_.f((float)rdlen); + if(s.paired()) { + scMin += scoreMin_.f((float)ordlen); + } + TAlScore secbest = scMin-1; + TAlScore diff = (scPer - scMin); // scores can vary by up to this much + TMapq ret = 0; + TAlScore best = s.paired() ? + s.bestPaired().score() : s.best(mate1).score(); + // best score but normalized so that 0 = worst valid score + TAlScore bestOver = best - scMin; + if(sc_.monotone) { + // End-to-end alignment + if(!hasSecbest) { + if (bestOver >= diff * (double)0.8f) ret = 42; + else if(bestOver >= diff * (double)0.7f) ret = 40; + else if(bestOver >= diff * (double)0.6f) ret = 24; + else if(bestOver >= diff * (double)0.5f) ret = 23; + else if(bestOver >= diff * (double)0.4f) ret = 8; + else if(bestOver >= diff * (double)0.3f) ret = 3; + else ret = 0; + } else { + secbest = s.paired() ? + s.secbestPaired().score() : s.secbest(mate1).score(); + TAlScore bestdiff = abs(abs(static_cast(best))-abs(static_cast(secbest))); + if(bestdiff >= diff * (double)0.9f) { + if(bestOver == diff) { + ret = 39; + } else { + ret = 33; + } + } else if(bestdiff >= diff * (double)0.8f) { + if(bestOver == diff) { + ret = 38; + } else { + ret = 27; + } + } else if(bestdiff >= diff * (double)0.7f) { + if(bestOver == diff) { + ret = 37; + } else { + ret = 26; + } + } else if(bestdiff >= diff * (double)0.6f) { + if(bestOver == diff) { + ret = 36; + } else { + ret = 22; + } + } else if(bestdiff >= diff * (double)0.5f) { + // Top third is still pretty good + if (bestOver == diff) { + ret = 35; + } else if(bestOver >= diff * (double)0.84f) { + ret = 25; + } else if(bestOver >= diff * (double)0.68f) { + ret = 16; + } else { + ret = 5; + } + } else if(bestdiff >= diff * (double)0.4f) { + // Top third is still pretty good + if (bestOver == diff) { + ret = 34; + } else if(bestOver >= diff * (double)0.84f) { + ret = 21; + } else if(bestOver >= diff * (double)0.68f) { + ret = 14; + } else { + ret = 4; + } + } else if(bestdiff >= diff * (double)0.3f) { + // Top third is still pretty good + if (bestOver == diff) { + ret = 32; + } else if(bestOver >= diff * (double)0.88f) { + ret = 18; + } else if(bestOver >= diff * (double)0.67f) { + ret = 15; + } else { + ret = 3; + } + } else if(bestdiff >= diff * (double)0.2f) { + // Top third is still pretty good + if (bestOver == diff) { + ret = 31; + } else if(bestOver >= diff * (double)0.88f) { + ret = 17; + } else if(bestOver >= diff * (double)0.67f) { + ret = 11; + } else { + ret = 0; + } + } else if(bestdiff >= diff * (double)0.1f) { + // Top third is still pretty good + if (bestOver == diff) { + ret = 30; + } else if(bestOver >= diff * (double)0.88f) { + ret = 12; + } else if(bestOver >= diff * (double)0.67f) { + ret = 7; + } else { + ret = 0; + } + } else if(bestdiff > 0) { + // Top third is still pretty good + if(bestOver >= diff * (double)0.67f) { + ret = 6; + } else { + ret = 2; + } + } else { + assert_eq(bestdiff, 0); + // Top third is still pretty good + if(bestOver >= diff * (double)0.67f) { + ret = 1; + } else { + ret = 0; + } + } + } + } else { + // Local alignment + if(!hasSecbest) { + if (bestOver >= diff * (double)0.8f) ret = 44; + else if(bestOver >= diff * (double)0.7f) ret = 42; + else if(bestOver >= diff * (double)0.6f) ret = 41; + else if(bestOver >= diff * (double)0.5f) ret = 36; + else if(bestOver >= diff * (double)0.4f) ret = 28; + else if(bestOver >= diff * (double)0.3f) ret = 24; + else ret = 22; + } else { + secbest = s.paired() ? + s.secbestPaired().score() : s.secbest(mate1).score(); + TAlScore bestdiff = abs(abs(static_cast(best))-abs(static_cast(secbest))); + if (bestdiff >= diff * (double)0.9f) ret = 40; + else if(bestdiff >= diff * (double)0.8f) ret = 39; + else if(bestdiff >= diff * (double)0.7f) ret = 38; + else if(bestdiff >= diff * (double)0.6f) ret = 37; + else if(bestdiff >= diff * (double)0.5f) { + if (bestOver == diff) ret = 35; + else if(bestOver >= diff * (double)0.50f) ret = 25; + else ret = 20; + } else if(bestdiff >= diff * (double)0.4f) { + if (bestOver == diff) ret = 34; + else if(bestOver >= diff * (double)0.50f) ret = 21; + else ret = 19; + } else if(bestdiff >= diff * (double)0.3f) { + if (bestOver == diff) ret = 33; + else if(bestOver >= diff * (double)0.5f) ret = 18; + else ret = 16; + } else if(bestdiff >= diff * (double)0.2f) { + if (bestOver == diff) ret = 32; + else if(bestOver >= diff * (double)0.5f) ret = 17; + else ret = 12; + } else if(bestdiff >= diff * (double)0.1f) { + if (bestOver == diff) ret = 31; + else if(bestOver >= diff * (double)0.5f) ret = 14; + else ret = 9; + } else if(bestdiff > 0) { + if(bestOver >= diff * (double)0.5f) ret = 11; + else ret = 2; + } else { + assert_eq(bestdiff, 0); + if(bestOver >= diff * (double)0.5f) ret = 1; + else ret = 0; + } + } + } + // Note: modifications to inps must be synchronized + //if(inps != NULL) { + // inps = itoa10(best, inps); + // *inps++ = ','; + // inps = itoa10(secbest, inps); + // *inps++ = ','; + // inps = itoa10(ret, inps); + //} + return ret; + } + +protected: + + SimpleFunc scoreMin_; + const Scoring& sc_; +}; + +/** + * TODO: Do BowtieMapq on a per-thread basis prior to the mutex'ed output + * function. + * + * topCoeff :: top_coeff + * botCoeff :: bot_coeff + * mx :: mapqMax + * horiz :: mapqHorizon (sort of) + * + * sc1 <- tab$sc1 + * sc2 <- tab$sc2 + * mapq <- rep(mx, length(sc1)) + * diff_top <- ifelse(sc1 != best & sc2 != best, abs(best - abs(pmax(sc1, sc2))), 0) + * mapq <- mapq - diff_top * top_coeff + * diff_bot <- ifelse(sc2 != horiz, abs(abs(sc2) - abs(horiz)), 0) + * mapq <- mapq - diff_bot * bot_coeff + * mapq <- round(pmax(0, pmin(mx, mapq))) + * tab$mapq <- mapq + */ +class BowtieMapq : public Mapq { + +public: + + BowtieMapq( + const SimpleFunc& scoreMin, + const Scoring& sc) : + scoreMin_(scoreMin), + sc_(sc) + { } + + virtual ~BowtieMapq() { } + + /** + * Given an AlnSetSumm, return a mapping quality calculated. + */ + virtual TMapq mapq( + const AlnSetSumm& s, + const AlnFlags& flags, + bool mate1, + size_t rdlen, + size_t ordlen, + char *inps) // put string representation of inputs here + const + { + bool hasSecbest = VALID_AL_SCORE(s.secbest(mate1)); + if(!flags.canMax() && !s.exhausted(mate1) && !hasSecbest) { + return 255; + } + TAlScore scPer = (TAlScore)sc_.perfectScore(rdlen); + TAlScore scMin = scoreMin_.f((float)rdlen); + TAlScore secbest = scMin-1; + TAlScore diff = (scPer - scMin); + float sixth_2 = (float)(scPer - diff * (double)0.1666f * 2); + float sixth_3 = (float)(scPer - diff * (double)0.1666f * 3); + TMapq ret = 0; + TAlScore best = s.best(mate1).score(); + if(!hasSecbest) { + // Top third? + if(best >= sixth_2) { + ret = 37; + } + // Otherwise in top half? + else if(best >= sixth_3) { + ret = 25; + } + // Otherwise has no second-best? + else { + ret = 10; + } + } else { + secbest = s.secbest(mate1).score(); + TAlScore bestdiff = abs(abs(static_cast(best))-abs(static_cast(secbest))); + if(bestdiff >= diff * 0.1666 * 5) { + ret = 6; + } else if(bestdiff >= diff * 0.1666 * 4) { + ret = 5; + } else if(bestdiff >= diff * 0.1666 * 3) { + ret = 4; + } else if(bestdiff >= diff * 0.1666 * 2) { + ret = 3; + } else if(bestdiff >= diff * 0.1666 * 1) { + ret = 2; + } else { + ret = 1; + } + } + // Note: modifications to inps must be synchronized + //if(inps != NULL) { + // inps = itoa10(best, inps); + // *inps++ = ','; + // inps = itoa10(secbest, inps); + // *inps++ = ','; + // inps = itoa10(ret, inps); + //} + return ret; + } + +protected: + + SimpleFunc scoreMin_; + const Scoring& sc_; +}; + +/** + * Create and return new MAPQ calculating object. + */ +static inline Mapq *new_mapq( + int version, + const SimpleFunc& scoreMin, + const Scoring& sc) +{ + if(version == 3) { + return new BowtieMapq3(scoreMin, sc); + } else if(version == 2) { + return new BowtieMapq2(scoreMin, sc); + } else { + return new BowtieMapq(scoreMin, sc); + } +} + +#endif /*ndef UNIQUE_H_*/ diff --git a/util.h b/util.h new file mode 100644 index 0000000..f9c792c --- /dev/null +++ b/util.h @@ -0,0 +1,53 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef UTIL_H_ +#define UTIL_H_ + +#include +#include + +/** + * C++ version char* style "itoa": + */ +template +char* itoa10(const T& value, char* result) { + // Check that base is valid + char* out = result; + T quotient = value; + if(std::numeric_limits::is_signed) { + if(quotient <= 0) quotient = 0-quotient; + } + // Now write each digit from most to least significant + do { + *out = "0123456789"[quotient % 10]; + ++out; + quotient /= 10; + } while (quotient > 0); + // Only apply negative sign for base 10 + if(std::numeric_limits::is_signed) { + // Avoid compiler warning in cases where T is unsigned + if (value <= 0 && value != 0) *out++ = '-'; + } + reverse( result, out ); + *out = 0; // terminator + return out; +} + +#endif /*ndef UTIL_H_*/ diff --git a/utility_3n.cpp b/utility_3n.cpp new file mode 100644 index 0000000..17d3782 --- /dev/null +++ b/utility_3n.cpp @@ -0,0 +1,80 @@ +/* + * Copyright 2021, Yun (Leo) Zhang + * + * This file is part of HISAT-3N. + * + * HISAT-3N is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT-3N is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT-3N. If not, see . + */ + +#include +#include "utility_3n.h" +#include "alphabet.h" + +void getConversion(char usrInputFrom, char usrInputTo, char& convertFrom, char& convertTo) { + if ((usrInputFrom == 'A' && usrInputTo == 'T') || + (usrInputFrom == 'A' && usrInputTo == 'C') || + (usrInputFrom == 'C' && usrInputTo == 'G') || + (usrInputFrom == 'C' && usrInputTo == 'T') || + (usrInputFrom == 'G' && usrInputTo == 'A') || + (usrInputFrom == 'T' && usrInputTo == 'G')) { + convertFrom = usrInputFrom; + convertTo = usrInputTo; + return; + } + if ((usrInputFrom == 'C' && usrInputTo == 'A') || + (usrInputFrom == 'G' && usrInputTo == 'C') || + (usrInputFrom == 'T' && usrInputTo == 'A') || + (usrInputFrom == 'T' && usrInputTo == 'C') || + (usrInputFrom == 'A' && usrInputTo == 'G') || + (usrInputFrom == 'G' && usrInputTo == 'T')) { + convertFrom = usrInputTo; + convertTo = usrInputFrom; + return; + } + cerr << "Un-identified --base-change type: " << usrInputFrom << "," << usrInputTo << endl; + throw 1; +} + +bool fileExist (string name) { + return ( access( name.c_str(), F_OK ) != -1 ); +} + +void ConvertMatrix3N:: convertMatrix() { + restoreNormal(); + for (int i = 0; i < 4; i++) { + char base = allBase[i]; + char lowerBase = allBaseLower[i]; + if (convertFrom == base) { + asc2dna[base] = charToInt(convertTo); + asc2dna[lowerBase] = charToInt(convertTo); + } else if (complement(convertFrom) == base) { + asc2dnacomp[base] = convertTo; + asc2dnacomp[lowerBase] = convertTo; + dnacomp[i] = charToInt(convertTo); + } + } +} + +void ConvertMatrix3N::restoreNormal() { + for (int i = 0; i < 4; i++) { + char base = allBase[i]; + char lowerBase = allBaseLower[i]; + asc2dna[base] = charToInt(base); + asc2dna[lowerBase] = charToInt(base); + asc2dnacomp[base] = complement(base); + asc2dnacomp[lowerBase] = complement(base); + dnacomp[i] = charToInt(complement(base)); + } +} + diff --git a/utility_3n.h b/utility_3n.h new file mode 100644 index 0000000..06f5533 --- /dev/null +++ b/utility_3n.h @@ -0,0 +1,113 @@ +/* + * Copyright 2020, Yun (Leo) Zhang + * + * This file is part of HISAT-3N. + * + * HISAT-3N is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT-3N is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT-3N. If not, see . + */ + +#ifndef HISAT2_UTILITY_3N_H +#define HISAT2_UTILITY_3N_H + +#include + + +using namespace std; + +/** + * this is the class to convert asc2dna, asc2dnacomp, and dnacomp matrix for hisat-3n conversion. + * always save the conversion as convertFrom and convertTo. + * this class is to convert matrix for hisat2-build with (--3N) or hisat2-repeat with (--3N). + * hisat2 with (--base-change C,T) will not use this class. + */ +class ConvertMatrix3N { + char convertFrom = 'A'; + char convertTo = 'A'; + string allBase = "ACGT"; + string allBaseLower = "acgt"; + + /** + * helper function to convert character to int presentation. + */ + int charToInt(char inputChar) { return allBase.find(inputChar); } + + /** + * return the complement nucleotide + */ + int complement(char inputChar) { return allBase[3-charToInt(inputChar)]; } + + /** + * convert asc2dna, asc2dnacomp, and dnacomp matrix according to convertFrom and convertTo variable. + * always restore the matrix to original (4 letter) matrix. + */ + void convertMatrix() ; +public: + ConvertMatrix3N(){ + + }; + + /** + * save the conversion information then convert the matrix. + */ + void convert(char from, char to) { + convertFrom = from; + convertTo = to; + convertMatrix(); + } + + /** + * change convertFrom and convertTO to it's complement nucleotide, then convert the matrix. + */ + void inverseConversion() { + convertFrom = complement(convertFrom); + convertTo = complement(convertTo); + convertMatrix(); + } + + /** + * restore the asc2dna, asc2dnacomp, and dnacomp matrix to original (4 letter). + * do not change the convertFrom and convertTO variable. + */ + void restoreNormal() ; + + /** + * convert the matrix according to the convertFrom and convertTo variable. + */ + void restoreConversion() { + convertMatrix(); + } +}; + +/** + * the simple data structure to store cigar information. + */ +class Cigar { + int len; + char label; +public: + Cigar() { } + + Cigar(int inputLen, char inputLabel): len(inputLen), label(inputLabel) { + } + + int& getLen() { return len; } + + char& getLabel() { return label; } +}; + +extern void getConversion(char usrInputFrom, char usrInputTo, char& convertFrom, char& convertTo); +extern bool fileExist (string name); + + +#endif //HISAT2_UTILITY_3N_H diff --git a/utility_3n_table.h b/utility_3n_table.h new file mode 100644 index 0000000..33a086e --- /dev/null +++ b/utility_3n_table.h @@ -0,0 +1,327 @@ +/* + * Copyright 2020, Yun (Leo) Zhang + * + * This file is part of HISAT-3N. + * + * HISAT-3N is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * HISAT-3N is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HISAT-3N. If not, see . + */ + +#ifndef UTILITY_3N_TABLE_H +#define UTILITY_3N_TABLE_H + +#include +#include +#include + +using namespace std; + +/** + * return complement of input base. + */ +char asc2dnacomp[] = { + /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 16 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 32 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'-', 0, 0, + /* 48 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 64 */ 0,'T','V','G','H', 0, 0,'C','D', 0, 0,'M', 0,'K','N', 0, + /* A B C D G H K M N */ + /* 80 */ 0, 0,'Y','S','A', 0,'B','W', 0,'R', 0, 0, 0, 0, 0, 0, + /* R S T V W Y */ + /* 96 */ 0,'T','V','G','H', 0, 0,'C','D', 0, 0,'M', 0,'K','N', 0, + /* a b c d g h k m n */ + /* 112 */ 0, 0,'Y','S','A', 0,'B','W', 0,'R', 0, 0, 0, 0, 0, 0, + /* r s t v w y */ + /* 128 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 144 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 160 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 176 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 192 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 208 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 224 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 240 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/** + * the simple data structure to bind quality score and position (on reference) together. + */ +class PosQuality { +public: + int readPos; // 0-based + int refPos; // 0-based + char qual; + bool converted; + bool remove; + + PosQuality(int& inputPos) { + readPos = inputPos; + refPos = inputPos; + remove = true; + } + + void setQual (char& inputQual, bool inputConverted) { + qual = inputQual; + converted = inputConverted; + remove = false; + } +}; + +/** + * the base class for string we need to search. + */ +class string_search { +public: + int start; + string s; + int stringLen; + + void initialize() { + start = 0; + stringLen = 0; + s.clear(); + } + + void loadString(string intputString) { + s = intputString; + stringLen = s.size(); + start = 0; + } +}; + + +/** + * to store CIGAR string and search segments in it. + */ +class CIGAR : public string_search{ +public: + + bool getNextSegment(int& len, char& symbol) { + if (start == stringLen) { + return false; + } + len = 0; + int currentIndex = start; + while (true) { + if (isalpha(s[currentIndex])) { + len = stoi(s.substr(start, currentIndex-start)); + symbol = s[currentIndex]; + start = currentIndex+1; + return true; + } + currentIndex++; + } + } +}; + +/** + * to store MD tag and search segments in it. + */ +class MD_tag : public string_search { +public: + + bool getNextSegment(string& seg) { + if (start >= stringLen) { + return false; + } + seg.clear(); + int currentIndex = start; + bool deletion = false; + + while (true) { + if (currentIndex >= stringLen) { + start = currentIndex + 1; + return !seg.empty(); + } + if (seg.empty() && s[currentIndex] == '0') { + currentIndex++; + continue; + } + if (isalpha(s[currentIndex])) { + if (seg.empty()) { + seg = s[currentIndex]; + start = currentIndex+1; + return true; + } else { + if (deletion) { + seg += s[currentIndex]; + //currentIndex++; + } else { + start = currentIndex; + return true; + } + } + } else if (s[currentIndex] == '^') { + if (seg.empty()) { + seg = s[currentIndex]; + deletion = true; + } else { + start = currentIndex; + return true; + } + } else { // number + if (seg.empty()) { + seg = s[currentIndex]; + } else { + if (deletion || isalpha(seg.back())) { + start = currentIndex; + return true; + } else { + seg += s[currentIndex]; + } + } + } + currentIndex++; + } + } +}; + +/** + * simple safe queue + */ +template +class SafeQueue { +private: + mutex mutex_; + queue queue_; + + string getReadName(string* line){ + int startPosition = 0; + int endPosition; + + endPosition = line->find("\t", startPosition); + string readName = line->substr(startPosition, endPosition - startPosition); + return readName; + } + +public: + void pop() { + mutex_.lock(); + queue_.pop(); + mutex_.unlock(); + } + + T front() { + mutex_.lock(); + T value = queue_.front(); + mutex_.unlock(); + return value; + } + + int size() { + mutex_.lock(); + int s = queue_.size(); + mutex_.unlock(); + return s; + } + + /** + * return true if the queue is not empty and pop front and get value. + * return false if the queue is empty. + */ + bool popFront(T& value) { + mutex_.lock(); + bool isEmpty = queue_.empty(); + if (!isEmpty) { + value = queue_.front(); + queue_.pop(); + } + mutex_.unlock(); + return !isEmpty; + } + + void push(T value) { + mutex_.lock(); + queue_.push(value); + mutex_.unlock(); + } + + bool empty() { + mutex_.lock(); + bool check = queue_.empty(); + mutex_.unlock(); + return check; + } +}; + +/** + * store one chromosome and it's stream position + */ +class ChromosomeFilePosition { +public: + string chromosome; + streampos linePos; + ChromosomeFilePosition(string inputChromosome, streampos inputPos) { + chromosome = inputChromosome; + linePos = inputPos; + } + + bool operator < (const ChromosomeFilePosition& in) const{ + return chromosome < in.chromosome; + } +}; + +/** + * store all chromosome and it's stream position + */ +class ChromosomeFilePositions { +public: + vector pos; + + /** + * input the chromosome name and it's streamPos, if it is not in pos, add it. + */ + void append (string &chromosome, streampos& linePos) { + pos.push_back(ChromosomeFilePosition(chromosome, linePos)); + } + + /** + * make binary search on pos for target chromosome name + */ + int findChromosome(string &targetChromosome, int start, int end) { + if (start <= end) { + int middle = (start + end) / 2; + if (pos[middle].chromosome == targetChromosome) { + return middle; + } + if (pos[middle].chromosome > targetChromosome) { + return findChromosome(targetChromosome, start, middle-1); + } + return findChromosome(targetChromosome, middle+1, end); + } + else + { + // cannot find the chromosome! throw! + cerr << "Cannot find the chromosome: " << targetChromosome << " in reference file." << endl; + throw 1; + } + } + + /** + * given targetChromosome name, return its streampos + */ + streampos getChromosomePosInRefFile(string &targetChromosome) + { + int index = findChromosome(targetChromosome, 0, pos.size()-1); + assert(pos[index].chromosome == targetChromosome); + return pos[index].linePos; + } + + /** + * sort the pos by chromosome name + */ + void sort() + { + std::sort(pos.begin(), pos.end()); + } +}; +#endif //UTILITY_3N_TABLE_H diff --git a/word_io.h b/word_io.h new file mode 100644 index 0000000..54a5823 --- /dev/null +++ b/word_io.h @@ -0,0 +1,393 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef WORD_IO_H_ +#define WORD_IO_H_ + +#include +#include +#include +#include +#include "assert_helpers.h" +#include "endian_swap.h" + +/** + * Write a 32-bit unsigned to an output stream being careful to + * re-endianize if caller-requested endianness differs from current + * host. + */ +static inline void writeU32(std::ostream& out, uint32_t x, bool toBigEndian) { + uint32_t y = endianizeU32(x, toBigEndian); + out.write((const char*)&y, 4); +} + +/** + * Write a 32-bit unsigned to an output stream using the native + * endianness. + */ +static inline void writeU32(std::ostream& out, uint32_t x) { + out.write((const char*)&x, 4); +} + +/** + * Write a 32-bit signed int to an output stream being careful to + * re-endianize if caller-requested endianness differs from current + * host. + */ +static inline void writeI32(std::ostream& out, int32_t x, bool toBigEndian) { + int32_t y = endianizeI32(x, toBigEndian); + out.write((const char*)&y, 4); +} + +/** + * Write a 32-bit unsigned to an output stream using the native + * endianness. + */ +static inline void writeI32(std::ostream& out, int32_t x) { + out.write((const char*)&x, 4); +} + +/** + * Write a 16-bit unsigned to an output stream being careful to + * re-endianize if caller-requested endianness differs from current + * host. + */ +static inline void writeU16(std::ostream& out, uint16_t x, bool toBigEndian) { + uint16_t y = endianizeU16(x, toBigEndian); + out.write((const char*)&y, 2); +} + +/** + * Write a 16-bit unsigned to an output stream using the native + * endianness. + */ +static inline void writeU16(std::ostream& out, uint16_t x) { + out.write((const char*)&x, 2); +} + +/** + * Write a 16-bit signed int to an output stream being careful to + * re-endianize if caller-requested endianness differs from current + * host. + */ +static inline void writeI16(std::ostream& out, int16_t x, bool toBigEndian) { + int16_t y = endianizeI16(x, toBigEndian); + out.write((const char*)&y, 2); +} + +/** + * Write a 16-bit unsigned to an output stream using the native + * endianness. + */ +static inline void writeI16(std::ostream& out, int16_t x) { + out.write((const char*)&x, 2); +} + +/** + * Write a 8-bit unsigned to an output stream. + */ +static inline void writeU8(std::ostream& out, uint8_t x) { + out.write((const char*)&x, 1); +} + +/** + * Read a 32-bit unsigned from an input stream, inverting endianness + * if necessary. + */ +static inline uint32_t readU32(std::istream& in, bool swap) { + uint32_t x; + in.read((char *)&x, 4); + assert_eq(4, in.gcount()); + if(swap) { + return endianSwapU32(x); + } else { + return x; + } +} + +/** + * Read a 32-bit unsigned from a file descriptor, optionally inverting + * endianness. + */ +#ifdef BOWTIE_MM +static inline uint32_t readU32(int in, bool swap) { + uint32_t x; + if(read(in, (void *)&x, 4) != 4) { + assert(false); + } + if(swap) { + return endianSwapU32(x); + } else { + return x; + } +} +#endif + +/** + * Read a 32-bit unsigned from a FILE*, optionally inverting + * endianness. + */ +static inline uint32_t readU32(FILE* in, bool swap) { + uint32_t x; + if(fread((void *)&x, 1, 4, in) != 4) { + assert(false); + } + if(swap) { + return endianSwapU32(x); + } else { + return x; + } +} + + +/** + * Read a 32-bit signed from an input stream, inverting endianness + * if necessary. + */ +static inline int32_t readI32(std::istream& in, bool swap) { + int32_t x; + in.read((char *)&x, 4); + assert_eq(4, in.gcount()); + if(swap) { + return endianSwapI32(x); + } else { + return x; + } +} + +/** + * Read a 32-bit unsigned from a file descriptor, optionally inverting + * endianness. + */ +#ifdef BOWTIE_MM +static inline uint32_t readI32(int in, bool swap) { + int32_t x; + if(read(in, (void *)&x, 4) != 4) { + assert(false); + } + if(swap) { + return endianSwapI32(x); + } else { + return x; + } +} +#endif + +/** + * Read a 32-bit unsigned from a FILE*, optionally inverting + * endianness. + */ +static inline uint32_t readI32(FILE* in, bool swap) { + int32_t x; + if(fread((void *)&x, 1, 4, in) != 4) { + assert(false); + } + if(swap) { + return endianSwapI32(x); + } else { + return x; + } +} + + +/** + * Read a 16-bit unsigned from an input stream, inverting endianness + * if necessary. + */ +static inline uint16_t readU16(std::istream& in, bool swap) { + uint16_t x; + in.read((char *)&x, 2); + assert_eq(2, in.gcount()); + if(swap) { + return endianSwapU16(x); + } else { + return x; + } +} + +/** + * Read a 16-bit unsigned from a file descriptor, optionally inverting + * endianness. + */ +#ifdef BOWTIE_MM +static inline uint16_t readU16(int in, bool swap) { + uint16_t x; + if(read(in, (void *)&x, 2) != 2) { + assert(false); + } + if(swap) { + return endianSwapU16(x); + } else { + return x; + } +} +#endif + +/** + * Read a 16-bit unsigned from a FILE*, optionally inverting + * endianness. + */ +static inline uint16_t readU16(FILE* in, bool swap) { + uint16_t x; + if(fread((void *)&x, 1, 2, in) != 2) { + assert(false); + } + if(swap) { + return endianSwapU32(x); + } else { + return x; + } +} + + +/** + * Read a 16-bit signed from an input stream, inverting endianness + * if necessary. + */ +static inline int32_t readI16(std::istream& in, bool swap) { + int16_t x; + in.read((char *)&x, 2); + assert_eq(2, in.gcount()); + if(swap) { + return endianSwapI16(x); + } else { + return x; + } +} + +/** + * Read a 16-bit unsigned from a file descriptor, optionally inverting + * endianness. + */ +#ifdef BOWTIE_MM +static inline uint16_t readI16(int in, bool swap) { + int16_t x; + if(read(in, (void *)&x, 2) != 2) { + assert(false); + } + if(swap) { + return endianSwapI16(x); + } else { + return x; + } +} +#endif + +/** + * Read a 16-bit unsigned from a FILE*, optionally inverting + * endianness. + */ +static inline uint16_t readI16(FILE* in, bool swap) { + int16_t x; + if(fread((void *)&x, 1, 2, in) != 2) { + assert(false); + } + if(swap) { + return endianSwapI16(x); + } else { + return x; + } +} + +/** + * Read a 8-bit unsigned from an input stream + */ +static inline uint8_t readU8(std::istream& in) { + uint8_t x; + in.read((char *)&x, 1); + assert_eq(1, in.gcount()); + return x; +} + +template +void writeIndex(std::ostream& out, index_t x, bool toBigEndian) { + index_t y = endianizeIndex(x, toBigEndian); + out.write((const char*)&y, sizeof(index_t)); +} + +/** + * Read a unsigned from an input stream, inverting endianness + * if necessary. + */ +template +static inline index_t readIndex(std::istream& in, bool swap) { + index_t x; + in.read((char *)&x, sizeof(index_t)); + assert_eq(sizeof(index_t), in.gcount()); + if(swap) { + return endianSwapIndex(x); + } else { + return x; + } +} + +/** + * Read a unsigned from a file descriptor, optionally inverting + * endianness. + */ +#ifdef BOWTIE_MM +template +static inline index_t readIndex(int in, bool swap) { + index_t x; + if(read(in, (void *)&x, sizeof(index_t)) != sizeof(index_t)) { + assert(false); + } + if(swap) { + if(sizeof(index_t) == 8) { + assert(false); + return 0; + } else if(sizeof(index_t) == 4) { + return endianSwapU32(x); + } else { + assert_eq(sizeof(index_t), 2); + return endianSwapU16(x); + } + } else { + return x; + } +} +#endif + +/** + * Read a unsigned from a FILE*, optionally inverting + * endianness. + */ +template +static inline index_t readIndex(FILE* in, bool swap) { + index_t x; + if(fread((void *)&x, 1, sizeof(index_t), in) != sizeof(index_t)) { + assert(false); + } + if(swap) { + if(sizeof(index_t) == 8) { + assert(false); + return 0; + } else if(sizeof(index_t) == 4) { + return endianSwapU32(x); + } else { + assert_eq(sizeof(index_t), 2); + return endianSwapU16(x); + } + } else { + return x; + } +} + + +#endif /*WORD_IO_H_*/ diff --git a/zbox.h b/zbox.h new file mode 100644 index 0000000..6ef1456 --- /dev/null +++ b/zbox.h @@ -0,0 +1,97 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +#ifndef ZBOX_H_ +#define ZBOX_H_ + +#include "btypes.h" + +/** + * Fill z with Z-box information for s. String z will not be resized + * and will only be filled up to its size cap. This is the linear-time + * algorithm from Gusfield. An optional sanity-check uses a naive + * algorithm to double-check results. + */ +template +void calcZ(const T& s, + TIndexOffU off, + EList& z, + bool verbose = false, + bool sanityCheck = false) +{ + size_t lCur = 0, rCur = 0; + size_t zlen = z.size(); + size_t slen = s.length(); + assert_gt(zlen, 0); + assert_eq(z[0], 0); + //assert_leq(zlen, slen); + for (size_t k = 1; k < zlen && k+off < slen; k++) { + assert_lt(lCur, k); + assert(z[lCur] == 0 || z[lCur] == rCur - lCur + 1); + if(k > rCur) { + // compare starting at k with prefix starting at 0 + size_t ki = k; + while(off+ki < s.length() && s[off+ki] == s[off+ki-k]) ki++; + z[k] = (TIndexOffU)(ki - k); + assert_lt(off+z[k], slen); + if(z[k] > 0) { + lCur = k; + rCur = k + z[k] - 1; + } + } else { + // position k is contained in a Z-box + size_t betaLen = rCur - k + 1; + size_t kPrime = k - lCur; + assert_eq(s[off+k], s[off+kPrime]); + if(z[kPrime] < betaLen) { + z[k] = z[kPrime]; + assert_lt(off+z[k], slen); + // lCur, rCur unchanged + } else if (z[kPrime] > 0) { + int q = 0; + while (off+q+rCur+1 < s.length() && s[off+q+rCur+1] == s[off+betaLen+q]) q++; + z[k] = (TIndexOffU)(betaLen + q); + assert_lt(off+z[k], slen); + rCur = rCur + q; + assert_geq(k, lCur); + lCur = k; + } else { + z[k] = 0; + assert_lt(off+z[k], slen); + // lCur, rCur unchanged + } + } + } +#ifndef NDEBUG + if(sanityCheck) { + // Recalculate Z-boxes using naive quadratic-time algorithm and + // compare to linear-time result + assert_eq(0, z[0]); + for(size_t i = 1; i < z.size(); i++) { + size_t j; + for(j = i; off+j < s.length(); j++) { + if(s[off+j] != s[off+j-i]) break; + } + assert_eq(j-i, z[i]); + } + } +#endif +} + +#endif /*ZBOX_H_*/